diff options
Diffstat (limited to 'storage/xtradb')
425 files changed, 0 insertions, 356480 deletions
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt deleted file mode 100644 index 4f9d2bd2cbb..00000000000 --- a/storage/xtradb/CMakeLists.txt +++ /dev/null @@ -1,515 +0,0 @@ -# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -# This is the CMakeLists for XtraDB -RETURN() # until upgraded to 5.7 XtraDB doesn't compile at all - -INCLUDE(CheckFunctionExists) -INCLUDE(CheckCSourceCompiles) -INCLUDE(CheckCSourceRuns) -INCLUDE(lz4) -INCLUDE(lzo) -INCLUDE(lzma) -INCLUDE(bzip2) -INCLUDE(snappy) -INCLUDE(numa) - -MYSQL_CHECK_LZ4() -MYSQL_CHECK_LZO() -MYSQL_CHECK_LZMA() -MYSQL_CHECK_BZIP2() -MYSQL_CHECK_SNAPPY() -MYSQL_CHECK_NUMA() - -IF(CMAKE_CROSSCOMPILING) - # Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when - # cross-compiling. Not as precise, but usually good enough. - # This only make sense for atomic tests in this file, this trick doesn't - # work in a general case. - MACRO(CHECK_C_SOURCE SOURCE VAR) - CHECK_C_SOURCE_COMPILES("${SOURCE}" "${VAR}") - ENDMACRO() -ELSE() - MACRO(CHECK_C_SOURCE SOURCE VAR) - CHECK_C_SOURCE_RUNS("${SOURCE}" "${VAR}") - ENDMACRO() -ENDIF() - -# OS tests -IF(UNIX) - - IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H) - IF (XTRADB_PREFER_STATIC_LIBAIO) - SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) - ENDIF() - FIND_LIBRARY(AIO_LIBRARY aio) - MARK_AS_ADVANCED(AIO_LIBRARY) - IF(AIO_LIBRARY) - CHECK_LIBRARY_EXISTS(${AIO_LIBRARY} io_queue_init "" HAVE_LIBAIO) - IF(HAVE_LIBAIO AND HAVE_LIBAIO_H) - ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) - ENDIF() - LINK_LIBRARIES(${AIO_LIBRARY}) - ENDIF() - ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1") - IF(HAVE_LIBNUMA) - LINK_LIBRARIES(numa) - ENDIF() - ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*") - ADD_DEFINITIONS("-DUNIV_HPUX") - ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX") - ADD_DEFINITIONS("-DUNIV_AIX") - ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") - ADD_DEFINITIONS("-DUNIV_SOLARIS") - ENDIF() -ENDIF() - -IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU") -# After: WL#5825 Using C++ Standard Library with MySQL code -# we no longer use -fno-exceptions -# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions") -ENDIF() - -# Enable InnoDB's UNIV_DEBUG and UNIV_SYNC_DEBUG in debug builds -SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEBUG") - -# Add -Wconversion if compiling with GCC -## As of Mar 15 2011 this flag causes 3573+ warnings. If you are reading this -## please fix them and enable the following code: -#IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU") -#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion") -#ENDIF() - -CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU) -IF(HAVE_SCHED_GETCPU) - ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU) -ENDIF() - -IF(NOT MSVC) - # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not - # workaround for gcc 4.1.2 RHEL5/x86, gcc atomic ops only work under -march=i686 - IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND - CMAKE_C_COMPILER_VERSION VERSION_LESS "4.1.3") - SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686") - ENDIF() - CHECK_C_SOURCE( - " - int main() - { - long x; - long y; - long res; - - x = 10; - y = 123; - res = __sync_bool_compare_and_swap(&x, x, y); - if (!res || x != y) { - return(1); - } - - x = 10; - y = 123; - res = __sync_bool_compare_and_swap(&x, x + 1, y); - if (res || x != 10) { - return(1); - } - x = 10; - y = 123; - res = __sync_add_and_fetch(&x, y); - if (res != 123 + 10 || x != 123 + 10) { - return(1); - } - return(0); - }" - HAVE_IB_GCC_ATOMIC_BUILTINS - ) - CHECK_C_SOURCE( - " - int main() - { - long res; - char c; - - c = 10; - res = __sync_lock_test_and_set(&c, 123); - if (res != 10 || c != 123) { - return(1); - } - return(0); - }" - HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE - ) - CHECK_C_SOURCE( - "#include<stdint.h> - int main() - { - int64_t x,y,res; - - x = 10; - y = 123; - res = __sync_sub_and_fetch(&y, x); - if (res != y || y != 113) { - return(1); - } - res = __sync_add_and_fetch(&y, x); - if (res != y || y != 123) { - return(1); - } - return(0); - }" - HAVE_IB_GCC_ATOMIC_BUILTINS_64 - ) - CHECK_C_SOURCE( - "#include<stdint.h> - int main() - { - __sync_synchronize(); - return(0); - }" - HAVE_IB_GCC_SYNC_SYNCHRONISE - ) - CHECK_C_SOURCE( - "#include<stdint.h> - int main() - { - __atomic_thread_fence(__ATOMIC_ACQUIRE); - __atomic_thread_fence(__ATOMIC_RELEASE); - return(0); - }" - HAVE_IB_GCC_ATOMIC_THREAD_FENCE - ) - CHECK_C_SOURCE( - "#include<stdint.h> - int main() - { - unsigned char c; - - __atomic_test_and_set(&c, __ATOMIC_ACQUIRE); - __atomic_clear(&c, __ATOMIC_RELEASE); - return(0); - }" - HAVE_IB_GCC_ATOMIC_TEST_AND_SET - ) - -IF(HAVE_IB_GCC_ATOMIC_BUILTINS) - ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1) - SET(XTRADB_OK 1) -ENDIF() - -IF(HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE) - ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_BYTE=1) -ENDIF() - -IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64) - ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1) -ENDIF() - -IF(HAVE_IB_GCC_SYNC_SYNCHRONISE) - ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1) -ENDIF() - -IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) - ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) -ENDIF() - -IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) - ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1) -ENDIF() - -# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not -CHECK_C_SOURCE( -" -#include <pthread.h> -#include <string.h> - -int main() { - pthread_t x1; - pthread_t x2; - pthread_t x3; - - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); - - __sync_bool_compare_and_swap(&x1, x2, x3); - - return(0); -}" -HAVE_IB_ATOMIC_PTHREAD_T_GCC) - -IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC) - ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1) -ENDIF() - -CHECK_CXX_SOURCE_COMPILES("struct t1{ int a; char *b; }; struct t1 c= { .a=1, .b=0 }; main() { }" HAVE_C99_INITIALIZERS) -IF(HAVE_C99_INITIALIZERS) - ADD_DEFINITIONS(-DHAVE_C99_INITIALIZERS) -ENDIF() - -ENDIF(NOT MSVC) - -CHECK_FUNCTION_EXISTS(vasprintf HAVE_VASPRINTF) - -# Solaris atomics -IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") - CHECK_FUNCTION_EXISTS(atomic_cas_ulong HAVE_ATOMIC_CAS_ULONG) - CHECK_FUNCTION_EXISTS(atomic_cas_32 HAVE_ATOMIC_CAS_32) - CHECK_FUNCTION_EXISTS(atomic_cas_64 HAVE_ATOMIC_CAS_64) - CHECK_FUNCTION_EXISTS(atomic_add_long_nv HAVE_ATOMIC_ADD_LONG_NV) - CHECK_FUNCTION_EXISTS(atomic_swap_uchar HAVE_ATOMIC_SWAP_UCHAR) - IF(HAVE_ATOMIC_CAS_ULONG AND - HAVE_ATOMIC_CAS_32 AND - HAVE_ATOMIC_CAS_64 AND - HAVE_ATOMIC_ADD_LONG_NV AND - HAVE_ATOMIC_SWAP_UCHAR) - SET(HAVE_IB_SOLARIS_ATOMICS 1) - ENDIF() - - IF(HAVE_IB_SOLARIS_ATOMICS) - ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1) - SET(XTRADB_OK 1) - ENDIF() - - # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not - CHECK_C_SOURCE_COMPILES( - " #include <pthread.h> - #include <string.h> - - int main(int argc, char** argv) { - pthread_t x1; - pthread_t x2; - pthread_t x3; - - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); - - if (sizeof(pthread_t) == 4) { - - atomic_cas_32(&x1, x2, x3); - - } else if (sizeof(pthread_t) == 8) { - - atomic_cas_64(&x1, x2, x3); - - } else { - - return(1); - } - - return(0); - } - " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) - CHECK_C_SOURCE_COMPILES( - "#include <mbarrier.h> - int main() { - __machine_r_barrier(); - __machine_w_barrier(); - return(0); - }" - HAVE_IB_MACHINE_BARRIER_SOLARIS) - - IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) - ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1) - ENDIF() - IF(HAVE_IB_MACHINE_BARRIER_SOLARIS) - ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1) - ENDIF() -ENDIF() - - -IF(UNIX) -# this is needed to know which one of atomic_cas_32() or atomic_cas_64() -# to use in the source -SET(CMAKE_EXTRA_INCLUDE_FILES pthread.h) -CHECK_TYPE_SIZE(pthread_t SIZEOF_PTHREAD_T) -SET(CMAKE_EXTRA_INCLUDE_FILES) -ENDIF() - -IF(SIZEOF_PTHREAD_T) - ADD_DEFINITIONS(-DSIZEOF_PTHREAD_T=${SIZEOF_PTHREAD_T}) -ENDIF() - -IF(MSVC) - ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS) - ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE) - SET(XTRADB_OK 1) -ENDIF() - - -# Include directories under xtradb -INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include - ${CMAKE_SOURCE_DIR}/storage/xtradb/handler) - -# Sun Studio bug with -xO2 -IF(CMAKE_CXX_COMPILER_ID MATCHES "SunPro" - AND CMAKE_CXX_FLAGS_RELEASE MATCHES "O2" - AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug") - # Sun Studio 12 crashes with -xO2 flag, but not with higher optimization - # -xO3 - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0rec.cc - PROPERTIES COMPILE_FLAGS -xO3) -ENDIF() - -# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows -# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297 -IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8) - SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.cc mem/mem0pool.cc - PROPERTIES COMPILE_FLAGS -Od) -ENDIF() - -SET(INNOBASE_SOURCES - api/api0api.cc - api/api0misc.cc - btr/btr0btr.cc - btr/btr0cur.cc - btr/btr0pcur.cc - btr/btr0scrub.cc - btr/btr0sea.cc - btr/btr0defragment.cc - buf/buf0buddy.cc - buf/buf0buf.cc - buf/buf0dblwr.cc - buf/buf0checksum.cc - buf/buf0dump.cc - buf/buf0flu.cc - buf/buf0lru.cc - buf/buf0rea.cc - buf/buf0mtflu.cc - data/data0data.cc - data/data0type.cc - dict/dict0boot.cc - dict/dict0crea.cc - dict/dict0dict.cc - dict/dict0load.cc - dict/dict0mem.cc - dict/dict0stats.cc - dict/dict0stats_bg.cc - dyn/dyn0dyn.cc - eval/eval0eval.cc - eval/eval0proc.cc - fil/fil0fil.cc - fil/fil0pagecompress.cc - fil/fil0crypt.cc - fsp/fsp0fsp.cc - fut/fut0fut.cc - fut/fut0lst.cc - ha/ha0ha.cc - ha/ha0storage.cc - ha/hash0hash.cc - fts/fts0fts.cc - fts/fts0ast.cc - fts/fts0blex.cc - fts/fts0config.cc - fts/fts0opt.cc - fts/fts0pars.cc - fts/fts0que.cc - fts/fts0sql.cc - fts/fts0tlex.cc - handler/ha_innodb.cc - handler/handler0alter.cc - handler/i_s.cc - handler/xtradb_i_s.cc - ibuf/ibuf0ibuf.cc - lock/lock0iter.cc - lock/lock0lock.cc - lock/lock0wait.cc - log/log0log.cc - log/log0online.cc - log/log0recv.cc - log/log0crypt.cc - mach/mach0data.cc - mem/mem0mem.cc - mem/mem0pool.cc - mtr/mtr0log.cc - mtr/mtr0mtr.cc - os/os0file.cc - os/os0proc.cc - os/os0sync.cc - os/os0thread.cc - os/os0stacktrace.cc - page/page0cur.cc - page/page0page.cc - page/page0zip.cc - pars/lexyy.cc - pars/pars0grm.cc - pars/pars0opt.cc - pars/pars0pars.cc - pars/pars0sym.cc - que/que0que.cc - read/read0read.cc - rem/rem0cmp.cc - rem/rem0rec.cc - row/row0ext.cc - row/row0ftsort.cc - row/row0import.cc - row/row0ins.cc - row/row0merge.cc - row/row0mysql.cc - row/row0log.cc - row/row0purge.cc - row/row0row.cc - row/row0sel.cc - row/row0uins.cc - row/row0umod.cc - row/row0undo.cc - row/row0upd.cc - row/row0quiesce.cc - row/row0vers.cc - srv/srv0conc.cc - srv/srv0mon.cc - srv/srv0srv.cc - srv/srv0start.cc - sync/sync0arr.cc - sync/sync0rw.cc - sync/sync0sync.cc - trx/trx0i_s.cc - trx/trx0purge.cc - trx/trx0rec.cc - trx/trx0roll.cc - trx/trx0rseg.cc - trx/trx0sys.cc - trx/trx0trx.cc - trx/trx0undo.cc - usr/usr0sess.cc - ut/ut0bh.cc - ut/ut0byte.cc - ut/ut0crc32.cc - ut/ut0dbg.cc - ut/ut0list.cc - ut/ut0mem.cc - ut/ut0rbt.cc - ut/ut0rnd.cc - ut/ut0ut.cc - ut/ut0vec.cc - ut/ut0wqueue.cc - ut/ut0timer.cc) - -MYSQL_ADD_PLUGIN(xtradb ${INNOBASE_SOURCES} STORAGE_ENGINE - RECOMPILE_FOR_EMBEDDED - LINK_LIBRARIES - ${ZLIB_LIBRARY} - ${CRC32_VPMSUM_LIBRARY} - ${NUMA_LIBRARY} - ${LINKER_SCRIPT}) - -IF(TARGET xtradb AND NOT XTRADB_OK) - MESSAGE(FATAL_ERROR "Percona XtraDB is not supported on this platform") -ENDIF() - -ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup) - diff --git a/storage/xtradb/COPYING.Google b/storage/xtradb/COPYING.Google deleted file mode 100644 index 5ade2b0e381..00000000000 --- a/storage/xtradb/COPYING.Google +++ /dev/null @@ -1,30 +0,0 @@ -Portions of this software contain modifications contributed by Google, Inc. -These contributions are used with the following license: - -Copyright (c) 2008, Google Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - * Neither the name of the Google Inc. nor the names of its - contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/storage/xtradb/COPYING.Percona b/storage/xtradb/COPYING.Percona deleted file mode 100644 index 8c786811719..00000000000 --- a/storage/xtradb/COPYING.Percona +++ /dev/null @@ -1,30 +0,0 @@ -Portions of this software contain modifications contributed by Percona, Inc. -These contributions are used with the following license: - -Copyright (c) 2008, 2009, Percona Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - * Neither the name of the Percona Inc. nor the names of its - contributors may be used to endorse or promote products - derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/storage/xtradb/Doxyfile b/storage/xtradb/Doxyfile deleted file mode 100644 index 7cf5048fa52..00000000000 --- a/storage/xtradb/Doxyfile +++ /dev/null @@ -1,1419 +0,0 @@ -# Doxyfile 1.5.6 - -# Usage: SVNVERSION=-r$(svnversion) doxygen - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (" ") - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# http://www.gnu.org/software/libiconv for the list of possible encodings. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = "InnoDB Plugin" - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = 1.0$(SVNVERSION) - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = dox - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create -# 4096 sub-directories (in 2 levels) under the output directory of each output -# format and will distribute the generated files over these directories. -# Enabling this option can be useful when feeding doxygen a huge amount of -# source files, where putting all generated files in the same directory would -# otherwise cause performance problems for the file system. - -CREATE_SUBDIRS = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, -# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, -# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, -# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, -# and Ukrainian. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator -# that is used to form the text in various listings. Each string -# in this list, if found as the leading text of the brief description, will be -# stripped from the text and the result after processing the whole list, is -# used as the annotated text. Otherwise, the brief description is used as-is. -# If left blank, the following values are used ("$name" is automatically -# replaced with the name of the entity): "The $name class" "The $name widget" -# "The $name file" "is" "provides" "specifies" "contains" -# "represents" "a" "an" "the" - -ABBREVIATE_BRIEF = - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full -# path before files name in the file list and in the header files. If set -# to NO the shortest path that makes the file name unique will be used. - -FULL_PATH_NAMES = YES - -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user-defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the -# path to strip. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of -# the path mentioned in the documentation of a class, which tells -# the reader which header file to include in order to use a class. -# If left blank only the name of the header file containing the class -# definition is used. Otherwise one should specify the include paths that -# are normally passed to the compiler using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter -# (but less readable) file names. This can be useful is your file systems -# doesn't support long names like on DOS, Mac, or CD-ROM. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like regular Qt-style comments -# (thus requiring an explicit @brief command for a brief description.) - -JAVADOC_AUTOBRIEF = NO - -# If the QT_AUTOBRIEF tag is set to YES then Doxygen will -# interpret the first line (until the first dot) of a Qt-style -# comment as the brief description. If set to NO, the comments -# will behave just like regular Qt-style comments (thus requiring -# an explicit \brief command for a brief description.) - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen -# treat a multi-line C++ special comment block (i.e. a block of //! or /// -# comments) as a brief description. This used to be the default behaviour. -# The new default is to treat a multi-line C++ comment block as a detailed -# description. Set this tag to YES if you prefer the old behaviour instead. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the DETAILS_AT_TOP tag is set to YES then Doxygen -# will output the detailed description near the top, like JavaDoc. -# If set to NO, the detailed description appears after the member -# documentation. - -DETAILS_AT_TOP = NO - -# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented -# member inherits the documentation from any documented member that it -# re-implements. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce -# a new page for each member. If set to NO, the documentation of a member will -# be part of the file/class/namespace that contains it. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. -# Doxygen uses this value to replace tabs by spaces in code fragments. - -TAB_SIZE = 8 - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user-defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C -# sources only. Doxygen will then generate output that is more tailored for C. -# For instance, some of the names that are used will be different. The list -# of all members will be omitted, etc. - -OPTIMIZE_OUTPUT_FOR_C = YES - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java -# sources only. Doxygen will then generate output that is more tailored for -# Java. For instance, namespaces will be presented as packages, qualified -# scopes will look different, etc. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources only. Doxygen will then generate output that is more tailored for -# Fortran. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for -# VHDL. - -OPTIMIZE_OUTPUT_VHDL = NO - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should -# set this tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. -# func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. -# Doxygen will parse them like normal C++ but will assume all classes use public -# instead of private inheritance when no explicit protection keyword is present. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate getter -# and setter methods for a property. Setting this option to YES (the default) -# will make doxygen to replace the get and set methods by a property in the -# documentation. This will only work if the methods are indeed getting or -# setting a simple type. If this is not the case, or you want to show the -# methods anyway, you should set this option to NO. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. - -DISTRIBUTE_GROUP_DOC = NO - -# Set the SUBGROUPING tag to YES (the default) to allow class member groups of -# the same type (for instance a group of public functions) to be put as a -# subgroup of that type (e.g. under the Public Functions section). Set it to -# NO to prevent subgrouping. Alternatively, this can be done per class using -# the \nosubgrouping command. - -SUBGROUPING = YES - -# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum -# is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically -# be useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. - -TYPEDEF_HIDES_STRUCT = NO - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. -# Private class members and static file members will be hidden unless -# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES - -EXTRACT_ALL = NO - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class -# will be included in the documentation. - -EXTRACT_PRIVATE = YES - -# If the EXTRACT_STATIC tag is set to YES all static members of a file -# will be included in the documentation. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) -# defined locally in source files will be included in the documentation. -# If set to NO only classes defined in header files are included. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. When set to YES local -# methods, which are defined in the implementation section but not in -# the interface are included in the documentation. -# If set to NO (the default) only methods in the interface are included. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base -# name of the file that contains the anonymous namespace. By default -# anonymous namespace are hidden. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all -# undocumented members of documented classes, files or namespaces. -# If set to NO (the default) these members will be included in the -# various overviews, but no documentation section is generated. -# This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. -# If set to NO (the default) these classes will be included in the various -# overviews. This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all -# friend (class|struct|union) declarations. -# If set to NO (the default) these declarations will be included in the -# documentation. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any -# documentation blocks found inside the body of a function. -# If set to NO (the default) these blocks will be appended to the -# function's detailed documentation block. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation -# that is typed after a \internal command is included. If the tag is set -# to NO (the default) then the documentation will be excluded. -# Set it to YES to include the internal documentation. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate -# file names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen -# will show members with their full class and namespace scopes in the -# documentation. If set to YES the scope will be hidden. - -HIDE_SCOPE_NAMES = NO - -# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen -# will put a list of the files that are included by a file in the documentation -# of that file. - -SHOW_INCLUDE_FILES = YES - -# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] -# is inserted in the documentation for inline members. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen -# will sort the (detailed) documentation of file and class members -# alphabetically by member name. If set to NO the members will appear in -# declaration order. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the -# brief documentation of file, namespace and class members alphabetically -# by member name. If set to NO (the default) the members will appear in -# declaration order. - -SORT_BRIEF_DOCS = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the -# hierarchy of group names into alphabetical order. If set to NO (the default) -# the group names will appear in their defined order. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be -# sorted by fully-qualified names, including namespaces. If set to -# NO (the default), the class list will be sorted only by class name, -# not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the -# alphabetical list. - -SORT_BY_SCOPE_NAME = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or -# disable (NO) the todo list. This list is created by putting \todo -# commands in the documentation. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or -# disable (NO) the test list. This list is created by putting \test -# commands in the documentation. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or -# disable (NO) the bug list. This list is created by putting \bug -# commands in the documentation. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or -# disable (NO) the deprecated list. This list is created by putting -# \deprecated commands in the documentation. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines -# the initial value of a variable or define consists of for it to appear in -# the documentation. If the initializer consists of more lines than specified -# here it will be hidden. Use a value of 0 to hide initializers completely. -# The appearance of the initializer of individual variables and defines in the -# documentation can be controlled using \showinitializer or \hideinitializer -# command in the documentation regardless of this setting. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated -# at the bottom of the documentation of classes and structs. If set to YES the -# list will mention the files that were used to generate the documentation. - -SHOW_USED_FILES = YES - -# If the sources in your project are distributed over multiple directories -# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy -# in the documentation. The default is NO. - -SHOW_DIRECTORIES = NO - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. -# This will remove the Files entry from the Quick Index and from the -# Folder Tree View (if specified). The default is YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the -# Namespaces page. This will remove the Namespaces entry from the Quick Index -# and from the Folder Tree View (if specified). The default is YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command <command> <input-file>, where <command> is the value of -# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file -# provided by doxygen. Whatever the program writes to standard output -# is used as the file version. See the manual for examples. - -FILE_VERSION_FILTER = - -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated -# by doxygen. Possible values are YES and NO. If left blank NO is used. - -QUIET = YES - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated by doxygen. Possible values are YES and NO. If left blank -# NO is used. - -WARNINGS = YES - -# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings -# for undocumented members. If EXTRACT_ALL is set to YES then this flag will -# automatically be disabled. - -WARN_IF_UNDOCUMENTED = YES - -# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some -# parameters in a documented function, or documenting parameters that -# don't exist or using markup commands wrongly. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be abled to get warnings for -# functions that are documented, but have no documentation for their parameters -# or return value. If set to NO (the default) doxygen will only warn about -# wrong or incomplete parameter documentation, but not about the absence of -# documentation. - -WARN_NO_PARAMDOC = NO - -# The WARN_FORMAT tag determines the format of the warning messages that -# doxygen can produce. The string should contain the $file, $line, and $text -# tags, which will be replaced by the file and line number from which the -# warning originated and the warning text. Optionally the format may contain -# $version, which will be replaced by the version of the file (if it could -# be obtained via FILE_VERSION_FILTER) - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning -# and error messages should be written. If left blank the output is written -# to stderr. - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT = . include/univ.i - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is -# also the default input encoding. Doxygen uses libiconv (or the iconv built -# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for -# the list of possible encodings. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx -# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 - -FILE_PATTERNS = *.c *.ic *.h - -# The RECURSIVE tag can be used to turn specify whether or not subdirectories -# should be searched for input files as well. Possible values are YES and NO. -# If left blank NO is used. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or -# directories that are symbolic links (a Unix filesystem feature) are excluded -# from the input. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. Note that the wildcards are matched -# against the file with absolute path, so to exclude all test directories -# for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank all files are included. - -EXAMPLE_PATTERNS = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude -# commands irrespective of the value of the RECURSIVE tag. -# Possible values are YES and NO. If left blank NO is used. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command <filter> <input-file>, where <filter> -# is the value of the INPUT_FILTER tag, and <input-file> is the name of an -# input file. Doxygen will then use the output that the filter program writes -# to standard output. If FILTER_PATTERNS is specified, this tag will be -# ignored. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: -# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further -# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER -# is applied to all files. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will be used to filter the input files when producing source -# files to browse (i.e. when SOURCE_BROWSER is set to YES). - -FILTER_SOURCE_FILES = NO - -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will -# be generated. Documented entities will be cross-referenced with these sources. -# Note: To get rid of all source code in the generated output, make sure also -# VERBATIM_HEADERS is set to NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body -# of functions and classes directly in the documentation. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct -# doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES -# then for each documented function all documented -# functions referencing it will be listed. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES -# then for each documented function all documented entities -# called/used by that function will be listed. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) -# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from -# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will -# link to the source code. Otherwise they will link to the documentstion. - -REFERENCES_LINK_SOURCE = YES - -# If the USE_HTAGS tag is set to YES then the references to source code -# will point to the HTML generated by the htags(1) tool instead of doxygen -# built-in source browser. The htags tool is part of GNU's global source -# tagging system (see http://www.gnu.org/software/global/global.html). You -# will need version 4.8.6 or higher. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen -# will generate a verbatim copy of the header file for each class for -# which an include is specified. Set to NO to disable this. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index -# of all compounds will be generated. Enable this if the project -# contains a lot of classes, structs, unions or interfaces. - -ALPHABETICAL_INDEX = NO - -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all -# classes will be put under the same header in the alphabetical index. -# The IGNORE_PREFIX tag can be used to specify one or more prefixes that -# should be ignored while generating the index headers. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will -# generate HTML output. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `html' will be used as the default path. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for -# each generated HTML page (for example: .htm,.php,.asp). If it is left blank -# doxygen will generate files with .html extension. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a personal HTML header for -# each generated HTML page. If it is left blank doxygen will generate a -# standard header. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a personal HTML footer for -# each generated HTML page. If it is left blank doxygen will generate a -# standard footer. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading -# style sheet that is used by each HTML page. It can be used to -# fine-tune the look of the HTML output. If the tag is left blank doxygen -# will generate a default style sheet. Note that doxygen will try to copy -# the style sheet file to the HTML output directory, so don't put your own -# stylesheet in the HTML output directory as well, or it will be erased! - -HTML_STYLESHEET = - -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - -# If the GENERATE_HTMLHELP tag is set to YES, additional index files -# will be generated that can be used as input for tools like the -# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) -# of the generated HTML documentation. - -GENERATE_HTMLHELP = NO - -# If the GENERATE_DOCSET tag is set to YES, additional index files -# will be generated that can be used as input for Apple's Xcode 3 -# integrated development environment, introduced with OSX 10.5 (Leopard). -# To create a documentation set, doxygen will generate a Makefile in the -# HTML output directory. Running make will produce the docset in that -# directory and running "make install" will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find -# it at startup. - -GENERATE_DOCSET = NO - -# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the -# feed. A documentation feed provides an umbrella under which multiple -# documentation sets from a single provider (such as a company or product suite) -# can be grouped. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that -# should uniquely identify the documentation set bundle. This should be a -# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen -# will append .docset to the name. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. For this to work a browser that supports -# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox -# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). - -HTML_DYNAMIC_SECTIONS = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can -# be used to specify the file name of the resulting .chm file. You -# can add a path in front of the file if the result should not be -# written to the html output directory. - -CHM_FILE = - -# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can -# be used to specify the location (absolute path including file name) of -# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run -# the HTML help compiler on the generated index.hhp. - -HHC_LOCATION = - -# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag -# controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). - -GENERATE_CHI = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING -# is used to encode HtmlHelp index (hhk), content (hhc) and project file -# content. - -CHM_INDEX_ENCODING = - -# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag -# controls whether a binary table of contents is generated (YES) or a -# normal table of contents (NO) in the .chm file. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members -# to the contents of the HTML help documentation and to the tree view. - -TOC_EXPAND = NO - -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. - -DISABLE_INDEX = NO - -# This tag can be used to set the number of enum values (range [1..20]) -# that doxygen will group on one line in the generated HTML documentation. - -ENUM_VALUES_PER_LINE = 4 - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. -# If the tag value is set to FRAME, a side panel will be generated -# containing a tree-like index structure (just like the one that -# is generated for HTML Help). For this to work a browser that supports -# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, -# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are -# probably better off using the HTML help feature. Other possible values -# for this tag are: HIERARCHIES, which will generate the Groups, Directories, -# and Class Hiererachy pages using a tree view instead of an ordered list; -# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which -# disables this behavior completely. For backwards compatibility with previous -# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE -# respectively. - -GENERATE_TREEVIEW = NONE - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be -# used to set the initial width (in pixels) of the frame in which the tree -# is shown. - -TREEVIEW_WIDTH = 250 - -# Use this tag to change the font size of Latex formulas included -# as images in the HTML documentation. The default is 10. Note that -# when you change the font size after a successful doxygen run you need -# to manually remove any form_*.png images from the HTML output directory -# to force them to be regenerated. - -FORMULA_FONTSIZE = 10 - -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will -# generate Latex output. - -GENERATE_LATEX = NO - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `latex' will be used as the default path. - -LATEX_OUTPUT = latex - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be -# invoked. If left blank `latex' will be used as the default command name. - -LATEX_CMD_NAME = latex - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the -# default command name. - -MAKEINDEX_CMD_NAME = makeindex - -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_LATEX = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and -# executive. If left blank a4wide will be used. - -PAPER_TYPE = a4wide - -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX -# packages that should be included in the LaTeX output. - -EXTRA_PACKAGES = - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a -# standard header. Notice: only use this tag if you know what you are doing! - -LATEX_HEADER = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references -# This makes the output suitable for online browsing using a pdf viewer. - -PDF_HYPERLINKS = YES - -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a -# higher quality PDF documentation. - -USE_PDFLATEX = YES - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. -# This option is also used when generating formulas in HTML. - -LATEX_BATCHMODE = NO - -# If LATEX_HIDE_INDICES is set to YES then doxygen will not -# include the index chapters (such as File Index, Compound Index, etc.) -# in the output. - -LATEX_HIDE_INDICES = NO - -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output -# The RTF output is optimized for Word 97 and may not look very pretty with -# other RTF readers or editors. - -GENERATE_RTF = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `rtf' will be used as the default path. - -RTF_OUTPUT = rtf - -# If the COMPACT_RTF tag is set to YES Doxygen generates more compact -# RTF documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_RTF = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated -# will contain hyperlink fields. The RTF file will -# contain links (just like the HTML output) instead of page references. -# This makes the output suitable for online browsing using WORD or other -# programs which support those fields. -# Note: wordpad (write) and others do not support links. - -RTF_HYPERLINKS = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's -# config file, i.e. a series of assignments. You only have to provide -# replacements, missing definitions are set to their default value. - -RTF_STYLESHEET_FILE = - -# Set optional variables used in the generation of an rtf document. -# Syntax is similar to doxygen's config file. - -RTF_EXTENSIONS_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will -# generate man pages - -GENERATE_MAN = NO - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `man' will be used as the default path. - -MAN_OUTPUT = man - -# The MAN_EXTENSION tag determines the extension that is added to -# the generated man pages (default is the subroutine's section .3) - -MAN_EXTENSION = .3 - -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command -# would be unable to find the correct page. The default is NO. - -MAN_LINKS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES Doxygen will -# generate an XML file that captures the structure of -# the code including all documentation. - -GENERATE_XML = NO - -# The XML_OUTPUT tag is used to specify where the XML pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `xml' will be used as the default path. - -XML_OUTPUT = xml - -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - -# If the XML_PROGRAMLISTING tag is set to YES Doxygen will -# dump the program listings (including syntax highlighting -# and cross-referencing information) to the XML output. Note that -# enabling this will significantly increase the size of the XML output. - -XML_PROGRAMLISTING = YES - -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will -# generate an AutoGen Definitions (see autogen.sf.net) file -# that captures the structure of the code including all -# documentation. Note that this feature is still experimental -# and incomplete at the moment. - -GENERATE_AUTOGEN_DEF = NO - -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- - -# If the GENERATE_PERLMOD tag is set to YES Doxygen will -# generate a Perl module file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_PERLMOD = NO - -# If the PERLMOD_LATEX tag is set to YES Doxygen will generate -# the necessary Makefile rules, Perl scripts and LaTeX code to be able -# to generate PDF and DVI output from the Perl module output. - -PERLMOD_LATEX = NO - -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be -# nicely formatted so it can be parsed by a human reader. This is useful -# if you want to understand what is going on. On the other hand, if this -# tag is set to NO the size of the Perl module output will be much smaller -# and Perl will parse it just the same. - -PERLMOD_PRETTY = YES - -# The names of the make variables in the generated doxyrules.make file -# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. -# This is useful so different doxyrules.make files included by the same -# Makefile don't overwrite each other's variables. - -PERLMOD_MAKEVAR_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include -# files. - -ENABLE_PREPROCESSING = YES - -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled -# way by setting EXPAND_ONLY_PREDEF to YES. - -MACRO_EXPANSION = YES - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the -# PREDEFINED and EXPAND_AS_DEFINED tags. - -EXPAND_ONLY_PREDEF = YES - -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. - -SEARCH_INCLUDES = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will -# be used. - -INCLUDE_FILE_PATTERNS = - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. To prevent a macro definition from being -# undefined via #undef or recursively expanded use the := operator -# instead of the = operator. - -PREDEFINED = DOXYGEN UNIV_DEBUG UNIV_SYNC_DEBUG __attribute__()= - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED = UT_LIST_BASE_NODE_T UT_LIST_NODE_T - -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse -# the parser if not removed. - -SKIP_FUNCTION_MACROS = YES - -#--------------------------------------------------------------------------- -# Configuration::additions related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES option can be used to specify one or more tagfiles. -# Optionally an initial location of the external documentation -# can be added for each tagfile. The format of a tag file without -# this location is as follows: -# TAGFILES = file1 file2 ... -# Adding location for the tag files is done as follows: -# TAGFILES = file1=loc1 "file2 = loc2" ... -# where "loc1" and "loc2" can be relative or absolute paths or -# URLs. If a location is present for each tag, the installdox tool -# does not have to be run to correct the links. -# Note that each tag file must have a unique name -# (where the name does NOT include the path) -# If a tag file is not located in the directory in which doxygen -# is run, you must also specify the path to the tagfile here. - -TAGFILES = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create -# a tag file that is based on the input files it reads. - -GENERATE_TAGFILE = - -# If the ALLEXTERNALS tag is set to YES all external classes will be listed -# in the class index. If set to NO only the inherited external classes -# will be listed. - -ALLEXTERNALS = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed -# in the modules index. If set to NO, only the current project's groups will -# be listed. - -EXTERNAL_GROUPS = NO - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of `which perl'). - -PERL_PATH = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base -# or super classes. Setting the tag to NO turns the diagrams off. Note that -# this option is superseded by the HAVE_DOT option below. This is only a -# fallback. It is recommended to install and use dot, since it yields more -# powerful graphs. - -CLASS_DIAGRAMS = YES - -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see -# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - -# If set to YES, the inheritance and collaboration graphs will hide -# inheritance and usage relations if the target is undocumented -# or is not a class. - -HIDE_UNDOC_RELATIONS = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz, a graph visualization -# toolkit from AT&T and Lucent Bell Labs. The other options in this section -# have no effect if this option is set to NO (the default) - -HAVE_DOT = YES - -# By default doxygen will write a font called FreeSans.ttf to the output -# directory and reference it in all dot files that doxygen generates. This -# font does not include all possible unicode characters however, so when you need -# these (or just want a differently looking font) you can specify the font name -# using DOT_FONTNAME. You need need to make sure dot is able to find the font, -# which can be done by putting it in a standard location or by setting the -# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory -# containing the font. - -DOT_FONTNAME = FreeSans - -# By default doxygen will tell dot to use the output directory to look for the -# FreeSans.ttf font (which doxygen will put there itself). If you specify a -# different font using DOT_FONTNAME you can set the path where dot -# can find it using this tag. - -DOT_FONTPATH = - -# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. - -CLASS_GRAPH = YES - -# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect implementation dependencies (inheritance, containment, and -# class references variables) of the class with other documented classes. - -COLLABORATION_GRAPH = YES - -# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for groups, showing the direct groups dependencies - -GROUP_GRAPHS = NO - -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and -# collaboration diagrams in a style similar to the OMG's Unified Modeling -# Language. - -UML_LOOK = NO - -# If set to YES, the inheritance and collaboration graphs will show the -# relations between templates and their instances. - -TEMPLATE_RELATIONS = NO - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT -# tags are set to YES then doxygen will generate a graph for each documented -# file showing the direct and indirect include dependencies of the file with -# other documented files. - -INCLUDE_GRAPH = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and -# HAVE_DOT tags are set to YES then doxygen will generate a graph for each -# documented header file showing the documented files that directly or -# indirectly include this file. - -INCLUDED_BY_GRAPH = YES - -# If the CALL_GRAPH and HAVE_DOT options are set to YES then -# doxygen will generate a call dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable call graphs -# for selected functions only using the \callgraph command. - -CALL_GRAPH = NO - -# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then -# doxygen will generate a caller dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable caller -# graphs for selected functions only using the \callergraph command. - -CALLER_GRAPH = NO - -# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen -# will graphical hierarchy of all classes instead of a textual one. - -GRAPHICAL_HIERARCHY = YES - -# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES -# then doxygen will show the dependencies a directory has on other directories -# in a graphical way. The dependency relations are determined by the #include -# relations between the files in the directories. - -DIRECTORY_GRAPH = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, jpg, or gif -# If left blank png will be used. - -DOT_IMAGE_FORMAT = png - -# The tag DOT_PATH can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found in the path. - -DOT_PATH = - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS = - -# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of -# nodes that will be shown in the graph. If the number of nodes in a graph -# becomes larger than this value, doxygen will truncate the graph, which is -# visualized by representing a node as a red box. Note that doxygen if the -# number of direct children of the root node in a graph is already larger than -# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note -# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. - -DOT_GRAPH_MAX_NODES = 50 - -# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the -# graphs generated by dot. A depth value of 3 means that only nodes reachable -# from the root by following a path via at most 3 edges will be shown. Nodes -# that lay further from the root node will be omitted. Note that setting this -# option to 1 or 2 may greatly reduce the computation time needed for large -# code bases. Also note that the size of a graph can be further restricted by -# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. - -MAX_DOT_GRAPH_DEPTH = 3 - -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is enabled by default, which results in a transparent -# background. Warning: Depending on the platform used, enabling this option -# may lead to badly anti-aliased labels on the edges of a graph (i.e. they -# become hard to read). - -DOT_TRANSPARENT = YES - -# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output -# files in one run (i.e. multiple -o and -T options on the command line). This -# makes dot run faster, but since only newer versions of dot (>1.8.10) -# support this, this feature is disabled by default. - -DOT_MULTI_TARGETS = NO - -# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will -# generate a legend page explaining the meaning of the various boxes and -# arrows in the dot generated graphs. - -GENERATE_LEGEND = YES - -# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will -# remove the intermediate dot files that are used to generate -# the various graphs. - -DOT_CLEANUP = YES - -#--------------------------------------------------------------------------- -# Configuration::additions related to the search engine -#--------------------------------------------------------------------------- - -# The SEARCHENGINE tag specifies whether or not a search engine should be -# used. If set to NO the values of all tags below this one will be ignored. - -SEARCHENGINE = NO diff --git a/storage/xtradb/api/api0api.cc b/storage/xtradb/api/api0api.cc deleted file mode 100644 index 2a46dd4b4c1..00000000000 --- a/storage/xtradb/api/api0api.cc +++ /dev/null @@ -1,3886 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2008, 2015, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file api/api0api.cc -InnoDB Native API - -2008-08-01 Created Sunny Bains -3/20/2011 Jimmy Yang extracted from Embedded InnoDB -*******************************************************/ - -#include "univ.i" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif - -#include "api0api.h" -#include "api0misc.h" -#include "srv0start.h" -#include "dict0dict.h" -#include "btr0pcur.h" -#include "row0ins.h" -#include "row0upd.h" -#include "row0vers.h" -#include "trx0roll.h" -#include "dict0crea.h" -#include "row0merge.h" -#include "pars0pars.h" -#include "lock0types.h" -#include "row0sel.h" -#include "lock0lock.h" -#include "rem0cmp.h" -#include "ut0dbg.h" -#include "dict0priv.h" -#include "ut0ut.h" -#include "ha_prototypes.h" -#include "trx0roll.h" - -/** configure variable for binlog option with InnoDB APIs */ -my_bool ib_binlog_enabled = FALSE; - -/** configure variable for MDL option with InnoDB APIs */ -my_bool ib_mdl_enabled = FALSE; - -/** configure variable for disable rowlock with InnoDB APIs */ -my_bool ib_disable_row_lock = FALSE; - -/** configure variable for Transaction isolation levels */ -ulong ib_trx_level_setting = IB_TRX_READ_UNCOMMITTED; - -/** configure variable for background commit interval in seconds */ -ulong ib_bk_commit_interval = 0; - -/** InnoDB tuple types. */ -enum ib_tuple_type_t{ - TPL_TYPE_ROW, /*!< Data row tuple */ - TPL_TYPE_KEY /*!< Index key tuple */ -}; - -/** Query types supported. */ -enum ib_qry_type_t{ - QRY_NON, /*!< None/Sentinel */ - QRY_INS, /*!< Insert operation */ - QRY_UPD, /*!< Update operation */ - QRY_SEL /*!< Select operation */ -}; - -/** Query graph types. */ -struct ib_qry_grph_t { - que_fork_t* ins; /*!< Innobase SQL query graph used - in inserts */ - que_fork_t* upd; /*!< Innobase SQL query graph used - in updates or deletes */ - que_fork_t* sel; /*!< dummy query graph used in - selects */ -}; - -/** Query node types. */ -struct ib_qry_node_t { - ins_node_t* ins; /*!< Innobase SQL insert node - used to perform inserts to the table */ - upd_node_t* upd; /*!< Innobase SQL update node - used to perform updates and deletes */ - sel_node_t* sel; /*!< Innobase SQL select node - used to perform selects on the table */ -}; - -/** Query processing fields. */ -struct ib_qry_proc_t { - - ib_qry_node_t node; /*!< Query node*/ - - ib_qry_grph_t grph; /*!< Query graph */ -}; - -/** Cursor instance for traversing tables/indexes. This will eventually -become row_prebuilt_t. */ -struct ib_cursor_t { - mem_heap_t* heap; /*!< Instance heap */ - - mem_heap_t* query_heap; /*!< Heap to use for query graphs */ - - ib_qry_proc_t q_proc; /*!< Query processing info */ - - ib_match_mode_t match_mode; /*!< ib_cursor_moveto match mode */ - - row_prebuilt_t* prebuilt; /*!< For reading rows */ - - bool valid_trx; /*!< Valid transaction attached */ -}; - -/** InnoDB table columns used during table and index schema creation. */ -struct ib_col_t { - const char* name; /*!< Name of column */ - - ib_col_type_t ib_col_type; /*!< Main type of the column */ - - ulint len; /*!< Length of the column */ - - ib_col_attr_t ib_col_attr; /*!< Column attributes */ - -}; - -/** InnoDB index columns used during index and index schema creation. */ -struct ib_key_col_t { - const char* name; /*!< Name of column */ - - ulint prefix_len; /*!< Column index prefix len or 0 */ -}; - -struct ib_table_def_t; - -/** InnoDB index schema used during index creation */ -struct ib_index_def_t { - mem_heap_t* heap; /*!< Heap used to build this and all - its columns in the list */ - - const char* name; /*!< Index name */ - - dict_table_t* table; /*!< Parent InnoDB table */ - - ib_table_def_t* schema; /*!< Parent table schema that owns - this instance */ - - ibool clustered; /*!< True if clustered index */ - - ibool unique; /*!< True if unique index */ - - ib_vector_t* cols; /*!< Vector of columns */ - - trx_t* usr_trx; /*!< User transacton covering the - DDL operations */ -}; - -/** InnoDB table schema used during table creation */ -struct ib_table_def_t { - mem_heap_t* heap; /*!< Heap used to build this and all - its columns in the list */ - const char* name; /*!< Table name */ - - ib_tbl_fmt_t ib_tbl_fmt; /*!< Row format */ - - ulint page_size; /*!< Page size */ - - ib_vector_t* cols; /*!< Vector of columns */ - - ib_vector_t* indexes; /*!< Vector of indexes */ - - dict_table_t* table; /* Table read from or NULL */ -}; - -/** InnoDB tuple used for key operations. */ -struct ib_tuple_t { - mem_heap_t* heap; /*!< Heap used to build - this and for copying - the column values. */ - - ib_tuple_type_t type; /*!< Tuple discriminitor. */ - - const dict_index_t* index; /*!< Index for tuple can be either - secondary or cluster index. */ - - dtuple_t* ptr; /*!< The internal tuple - instance */ -}; - -/** The following counter is used to convey information to InnoDB -about server activity: in case of normal DML ops it is not -sensible to call srv_active_wake_master_thread after each -operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */ - -#define INNOBASE_WAKE_INTERVAL 32 - -/*****************************************************************//** -Check whether the Innodb persistent cursor is positioned. -@return IB_TRUE if positioned */ -UNIV_INLINE -ib_bool_t -ib_btr_cursor_is_positioned( -/*========================*/ - btr_pcur_t* pcur) /*!< in: InnoDB persistent cursor */ -{ - return(pcur->old_stored == BTR_PCUR_OLD_STORED - && (pcur->pos_state == BTR_PCUR_IS_POSITIONED - || pcur->pos_state == BTR_PCUR_WAS_POSITIONED)); -} - - -/********************************************************************//** -Open a table using the table id, if found then increment table ref count. -@return table instance if found */ -static -dict_table_t* -ib_open_table_by_id( -/*================*/ - ib_id_u64_t tid, /*!< in: table id to lookup */ - ib_bool_t locked) /*!< in: TRUE if own dict mutex */ -{ - dict_table_t* table; - table_id_t table_id; - - table_id = tid; - - if (!locked) { - dict_mutex_enter_for_mysql(); - } - - table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL); - - if (table != NULL && table->file_unreadable) { - table = NULL; - } - - if (!locked) { - dict_mutex_exit_for_mysql(); - } - - return(table); -} - -/********************************************************************//** -Open a table using the table name, if found then increment table ref count. -@return table instance if found */ -UNIV_INTERN -void* -ib_open_table_by_name( -/*==================*/ - const char* name) /*!< in: table name to lookup */ -{ - dict_table_t* table; - - table = dict_table_open_on_name(name, FALSE, FALSE, - DICT_ERR_IGNORE_NONE); - - if (table != NULL && table->file_unreadable) { - table = NULL; - } - - return(table); -} - -/********************************************************************//** -Find table using table name. -@return table instance if found */ -static -dict_table_t* -ib_lookup_table_by_name( -/*====================*/ - const char* name) /*!< in: table name to lookup */ -{ - dict_table_t* table; - - table = dict_table_get_low(name); - - if (table != NULL && table->file_unreadable) { - table = NULL; - } - - return(table); -} - -/********************************************************************//** -Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth -time calls srv_active_wake_master_thread. This function should be used -when a single database operation may introduce a small need for -server utility activity, like checkpointing. */ -UNIV_INLINE -void -ib_wake_master_thread(void) -/*=======================*/ -{ - static ulint ib_signal_counter = 0; - - ++ib_signal_counter; - - if ((ib_signal_counter % INNOBASE_WAKE_INTERVAL) == 0) { - srv_active_wake_master_thread(); - } -} - -/*****************************************************************//** -Read the columns from a rec into a tuple. */ -static -void -ib_read_tuple( -/*==========*/ - const rec_t* rec, /*!< in: Record to read */ - ib_bool_t page_format, /*!< in: IB_TRUE if compressed format */ - ib_tuple_t* tuple, /*!< in: tuple to read into */ - void** rec_buf, /*!< in/out: row buffer */ - ulint* len) /*!< in/out: buffer len */ -{ - ulint i; - void* ptr; - rec_t* copy; - ulint rec_meta_data; - ulint n_index_fields; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - dtuple_t* dtuple = tuple->ptr; - const dict_index_t* index = tuple->index; - ulint offset_size; - - rec_offs_init(offsets_); - - offsets = rec_get_offsets( - rec, index, offsets, ULINT_UNDEFINED, &tuple->heap); - - rec_meta_data = rec_get_info_bits(rec, page_format); - dtuple_set_info_bits(dtuple, rec_meta_data); - - offset_size = rec_offs_size(offsets); - - if (rec_buf && *rec_buf) { - if (*len < offset_size) { - free(*rec_buf); - *rec_buf = malloc(offset_size); - *len = offset_size; - } - ptr = *rec_buf; - } else { - /* Make a copy of the rec. */ - ptr = mem_heap_alloc(tuple->heap, offset_size); - } - - copy = rec_copy(ptr, rec, offsets); - - n_index_fields = ut_min( - rec_offs_n_fields(offsets), dtuple_get_n_fields(dtuple)); - - for (i = 0; i < n_index_fields; ++i) { - ulint len; - const byte* data; - dfield_t* dfield; - - if (tuple->type == TPL_TYPE_ROW) { - const dict_col_t* col; - ulint col_no; - const dict_field_t* index_field; - - index_field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(index_field); - col_no = dict_col_get_no(col); - - dfield = dtuple_get_nth_field(dtuple, col_no); - } else { - dfield = dtuple_get_nth_field(dtuple, i); - } - - data = rec_get_nth_field(copy, offsets, i, &len); - - /* Fetch and copy any externally stored column. */ - if (rec_offs_nth_extern(offsets, i)) { - - ulint zip_size; - - zip_size = dict_table_zip_size(index->table); - - data = btr_rec_copy_externally_stored_field( - copy, offsets, zip_size, i, &len, - tuple->heap, NULL); - - ut_a(len != UNIV_SQL_NULL); - } - - dfield_set_data(dfield, data, len); - } -} - -/*****************************************************************//** -Create an InnoDB key tuple. -@return tuple instance created, or NULL */ -static -ib_tpl_t -ib_key_tuple_new_low( -/*=================*/ - const dict_index_t* index, /*!< in: index for which tuple - required */ - ulint n_cols, /*!< in: no. of user defined cols */ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ib_tuple_t* tuple; - ulint i; - ulint n_cmp_cols; - - tuple = static_cast<ib_tuple_t*>( - mem_heap_alloc(heap, sizeof(*tuple))); - - if (tuple == NULL) { - mem_heap_free(heap); - return(NULL); - } - - tuple->heap = heap; - tuple->index = index; - tuple->type = TPL_TYPE_KEY; - - /* Is it a generated clustered index ? */ - if (n_cols == 0) { - ++n_cols; - } - - tuple->ptr = dtuple_create(heap, n_cols); - - /* Copy types and set to SQL_NULL. */ - dict_index_copy_types(tuple->ptr, index, n_cols); - - for (i = 0; i < n_cols; i++) { - - dfield_t* dfield; - - dfield = dtuple_get_nth_field(tuple->ptr, i); - dfield_set_null(dfield); - } - - n_cmp_cols = dict_index_get_n_ordering_defined_by_user(index); - - dtuple_set_n_fields_cmp(tuple->ptr, n_cmp_cols); - - return((ib_tpl_t) tuple); -} - -/*****************************************************************//** -Create an InnoDB key tuple. -@return tuple instance created, or NULL */ -static -ib_tpl_t -ib_key_tuple_new( -/*=============*/ - const dict_index_t* index, /*!< in: index of tuple */ - ulint n_cols) /*!< in: no. of user defined cols */ -{ - mem_heap_t* heap; - - heap = mem_heap_create(64); - - if (heap == NULL) { - return(NULL); - } - - return(ib_key_tuple_new_low(index, n_cols, heap)); -} - -/*****************************************************************//** -Create an InnoDB row tuple. -@return tuple instance, or NULL */ -static -ib_tpl_t -ib_row_tuple_new_low( -/*=================*/ - const dict_index_t* index, /*!< in: index of tuple */ - ulint n_cols, /*!< in: no. of cols in tuple */ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ib_tuple_t* tuple; - - tuple = static_cast<ib_tuple_t*>(mem_heap_alloc(heap, sizeof(*tuple))); - - if (tuple == NULL) { - mem_heap_free(heap); - return(NULL); - } - - tuple->heap = heap; - tuple->index = index; - tuple->type = TPL_TYPE_ROW; - - tuple->ptr = dtuple_create(heap, n_cols); - - /* Copy types and set to SQL_NULL. */ - dict_table_copy_types(tuple->ptr, index->table); - - return((ib_tpl_t) tuple); -} - -/*****************************************************************//** -Create an InnoDB row tuple. -@return tuple instance, or NULL */ -static -ib_tpl_t -ib_row_tuple_new( -/*=============*/ - const dict_index_t* index, /*!< in: index of tuple */ - ulint n_cols) /*!< in: no. of cols in tuple */ -{ - mem_heap_t* heap; - - heap = mem_heap_create(64); - - if (heap == NULL) { - return(NULL); - } - - return(ib_row_tuple_new_low(index, n_cols, heap)); -} - -/*****************************************************************//** -Begin a transaction. -@return innobase txn handle */ -UNIV_INTERN -ib_err_t -ib_trx_start( -/*=========*/ - ib_trx_t ib_trx, /*!< in: transaction to restart */ - ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */ - ib_bool_t read_write, /*!< in: true if read write - transaction */ - ib_bool_t auto_commit, /*!< in: auto commit after each - single DML */ - void* thd) /*!< in: THD */ -{ - ib_err_t err = DB_SUCCESS; - trx_t* trx = (trx_t*) ib_trx; - - ut_a(ib_trx_level <= IB_TRX_SERIALIZABLE); - - trx->api_trx = true; - trx->api_auto_commit = auto_commit; - trx->read_write = read_write; - - trx_start_if_not_started(trx); - - trx->isolation_level = ib_trx_level; - - /* FIXME: This is a place holder, we should add an arg that comes - from the client. */ - trx->mysql_thd = static_cast<THD*>(thd); - - return(err); -} - -/*****************************************************************//** -Begin a transaction. This will allocate a new transaction handle. -put the transaction in the active state. -@return innobase txn handle */ -UNIV_INTERN -ib_trx_t -ib_trx_begin( -/*=========*/ - ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */ - ib_bool_t read_write, /*!< in: true if read write - transaction */ - ib_bool_t auto_commit) /*!< in: auto commit after each - single DML */ -{ - trx_t* trx; - ib_bool_t started; - - trx = trx_allocate_for_mysql(); - - started = ib_trx_start(static_cast<ib_trx_t>(trx), ib_trx_level, - read_write, auto_commit, NULL); - ut_a(started); - - return(static_cast<ib_trx_t>(trx)); -} - - -/*****************************************************************//** -Check if transaction is read_only -@return transaction read_only status */ -UNIV_INTERN -ib_u32_t -ib_trx_read_only( -/*=============*/ - ib_trx_t ib_trx) /*!< in: trx handle */ -{ - trx_t* trx = (trx_t*) ib_trx; - - return(trx->read_only); -} - -/*****************************************************************//** -Get the transaction's state. -@return transaction state */ -UNIV_INTERN -ib_trx_state_t -ib_trx_state( -/*=========*/ - ib_trx_t ib_trx) /*!< in: trx handle */ -{ - trx_t* trx = (trx_t*) ib_trx; - - return((ib_trx_state_t) trx->state); -} - -/*****************************************************************//** -Get a trx start time. -@return trx start_time */ -UNIV_INTERN -ib_u64_t -ib_trx_get_start_time( -/*==================*/ - ib_trx_t ib_trx) /*!< in: transaction */ -{ - trx_t* trx = (trx_t*) ib_trx; - return(static_cast<ib_u64_t>(trx->start_time)); -} -/*****************************************************************//** -Release the resources of the transaction. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_trx_release( -/*===========*/ - ib_trx_t ib_trx) /*!< in: trx handle */ -{ - trx_t* trx = (trx_t*) ib_trx; - - ut_ad(trx != NULL); - trx_free_for_mysql(trx); - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Commit a transaction. This function will also release the schema -latches too. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_trx_commit( -/*==========*/ - ib_trx_t ib_trx) /*!< in: trx handle */ -{ - ib_err_t err = DB_SUCCESS; - trx_t* trx = (trx_t*) ib_trx; - - if (trx->state == TRX_STATE_NOT_STARTED) { - return(err); - } - - trx_commit(trx); - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Rollback a transaction. This function will also release the schema -latches too. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_trx_rollback( -/*============*/ - ib_trx_t ib_trx) /*!< in: trx handle */ -{ - ib_err_t err; - trx_t* trx = (trx_t*) ib_trx; - - err = static_cast<ib_err_t>(trx_rollback_for_mysql(trx)); - - /* It should always succeed */ - ut_a(err == DB_SUCCESS); - - return(err); -} - -#ifdef __WIN__ -/*****************************************************************//** -Convert a string to lower case. */ -static -void -ib_to_lower_case( -/*=============*/ - char* ptr) /*!< string to convert to lower case */ -{ - while (*ptr) { - *ptr = tolower(*ptr); - ++ptr; - } -} -#endif /* __WIN__ */ - -/*****************************************************************//** -Normalizes a table name string. A normalized name consists of the -database name catenated to '/' and table name. An example: -test/mytable. On Windows normalization puts both the database name and the -table name always to lower case. This function can be called for system -tables and they don't have a database component. For tables that don't have -a database component, we don't normalize them to lower case on Windows. -The assumption is that they are system tables that reside in the system -table space. */ -static -void -ib_normalize_table_name( -/*====================*/ - char* norm_name, /*!< out: normalized name as a - null-terminated string */ - const char* name) /*!< in: table name string */ -{ - const char* ptr = name; - - /* Scan name from the end */ - - ptr += ut_strlen(name) - 1; - - /* Find the start of the table name. */ - while (ptr >= name && *ptr != '\\' && *ptr != '/' && ptr > name) { - --ptr; - } - - - /* For system tables there is no '/' or dbname. */ - ut_a(ptr >= name); - - if (ptr > name) { - const char* db_name; - const char* table_name; - - table_name = ptr + 1; - - --ptr; - - while (ptr >= name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - - db_name = ptr + 1; - - memcpy(norm_name, db_name, - ut_strlen(name) + 1 - (db_name - name)); - - norm_name[table_name - db_name - 1] = '/'; -#ifdef __WIN__ - ib_to_lower_case(norm_name); -#endif - } else { - ut_strcpy(norm_name, name); - } -} - -/*****************************************************************//** -Check whether the table name conforms to our requirements. Currently -we only do a simple check for the presence of a '/'. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_table_name_check( -/*================*/ - const char* name) /*!< in: table name to check */ -{ - const char* slash = NULL; - ulint len = ut_strlen(name); - - if (len < 2 - || *name == '/' - || name[len - 1] == '/' - || (name[0] == '.' && name[1] == '/') - || (name[0] == '.' && name[1] == '.' && name[2] == '/')) { - - return(DB_DATA_MISMATCH); - } - - for ( ; *name; ++name) { -#ifdef __WIN__ - /* Check for reserved characters in DOS filenames. */ - switch (*name) { - case ':': - case '|': - case '"': - case '*': - case '<': - case '>': - return(DB_DATA_MISMATCH); - } -#endif /* __WIN__ */ - if (*name == '/') { - if (slash) { - return(DB_DATA_MISMATCH); - } - slash = name; - } - } - - return(slash ? DB_SUCCESS : DB_DATA_MISMATCH); -} - - - -/*****************************************************************//** -Get a table id. The caller must have acquired the dictionary mutex. -@return DB_SUCCESS if found */ -static -ib_err_t -ib_table_get_id_low( -/*================*/ - const char* table_name, /*!< in: table to find */ - ib_id_u64_t* table_id) /*!< out: table id if found */ -{ - dict_table_t* table; - ib_err_t err = DB_TABLE_NOT_FOUND; - - *table_id = 0; - - table = ib_lookup_table_by_name(table_name); - - if (table != NULL) { - *table_id = (table->id); - - err = DB_SUCCESS; - } - - return(err); -} - -/*****************************************************************//** -Create an internal cursor instance. -@return DB_SUCCESS or err code */ -static -ib_err_t -ib_create_cursor( -/*=============*/ - ib_crsr_t* ib_crsr, /*!< out: InnoDB cursor */ - dict_table_t* table, /*!< in: table instance */ - dict_index_t* index, /*!< in: index to use */ - trx_t* trx) /*!< in: transaction */ -{ - mem_heap_t* heap; - ib_cursor_t* cursor; - ib_err_t err = DB_SUCCESS; - - heap = mem_heap_create(sizeof(*cursor) * 2); - - if (heap != NULL) { - row_prebuilt_t* prebuilt; - - cursor = static_cast<ib_cursor_t*>( - mem_heap_zalloc(heap, sizeof(*cursor))); - - cursor->heap = heap; - - cursor->query_heap = mem_heap_create(64); - - if (cursor->query_heap == NULL) { - mem_heap_free(heap); - - return(DB_OUT_OF_MEMORY); - } - - cursor->prebuilt = row_create_prebuilt(table, 0); - - prebuilt = cursor->prebuilt; - - prebuilt->trx = trx; - - cursor->valid_trx = TRUE; - - prebuilt->table = table; - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->innodb_api = TRUE; - - prebuilt->index = index; - - ut_a(prebuilt->index != NULL); - - if (prebuilt->trx != NULL) { - ++prebuilt->trx->n_mysql_tables_in_use; - - prebuilt->index_usable = - row_merge_is_index_usable( - prebuilt->trx, prebuilt->index); - - /* Assign a read view if the transaction does - not have it yet */ - - trx_assign_read_view(prebuilt->trx); - } - - *ib_crsr = (ib_crsr_t) cursor; - } else { - err = DB_OUT_OF_MEMORY; - } - - return(err); -} - -/*****************************************************************//** -Create an internal cursor instance, and set prebuilt->index to index -with supplied index_id. -@return DB_SUCCESS or err code */ -static -ib_err_t -ib_create_cursor_with_index_id( -/*===========================*/ - ib_crsr_t* ib_crsr, /*!< out: InnoDB cursor */ - dict_table_t* table, /*!< in: table instance */ - ib_id_u64_t index_id, /*!< in: index id or 0 */ - trx_t* trx) /*!< in: transaction */ -{ - dict_index_t* index; - - if (index_id != 0) { - mutex_enter(&dict_sys->mutex); - index = dict_index_find_on_id_low(index_id); - mutex_exit(&dict_sys->mutex); - } else { - index = dict_table_get_first_index(table); - } - - return(ib_create_cursor(ib_crsr, table, index, trx)); -} - -/*****************************************************************//** -Open an InnoDB table and return a cursor handle to it. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_open_table_using_id( -/*==========================*/ - ib_id_u64_t table_id, /*!< in: table id of table to open */ - ib_trx_t ib_trx, /*!< in: Current transaction handle - can be NULL */ - ib_crsr_t* ib_crsr) /*!< out,own: InnoDB cursor */ -{ - ib_err_t err; - dict_table_t* table; - - if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) { - table = ib_open_table_by_id(table_id, FALSE); - } else { - table = ib_open_table_by_id(table_id, TRUE); - } - - if (table == NULL) { - - return(DB_TABLE_NOT_FOUND); - } - - err = ib_create_cursor_with_index_id(ib_crsr, table, 0, - (trx_t*) ib_trx); - - return(err); -} - -/*****************************************************************//** -Open an InnoDB index and return a cursor handle to it. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_open_index_using_id( -/*==========================*/ - ib_id_u64_t index_id, /*!< in: index id of index to open */ - ib_trx_t ib_trx, /*!< in: Current transaction handle - can be NULL */ - ib_crsr_t* ib_crsr) /*!< out: InnoDB cursor */ -{ - ib_err_t err; - dict_table_t* table; - ulint table_id = (ulint)( index_id >> 32); - - if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) { - table = ib_open_table_by_id(table_id, FALSE); - } else { - table = ib_open_table_by_id(table_id, TRUE); - } - - if (table == NULL) { - - return(DB_TABLE_NOT_FOUND); - } - - /* We only return the lower 32 bits of the dulint. */ - err = ib_create_cursor_with_index_id( - ib_crsr, table, index_id, (trx_t*) ib_trx); - - if (ib_crsr != NULL) { - const ib_cursor_t* cursor; - - cursor = *(ib_cursor_t**) ib_crsr; - - if (cursor->prebuilt->index == NULL) { - ib_err_t crsr_err; - - crsr_err = ib_cursor_close(*ib_crsr); - ut_a(crsr_err == DB_SUCCESS); - - *ib_crsr = NULL; - } - } - - return(err); -} - -/*****************************************************************//** -Open an InnoDB secondary index cursor and return a cursor handle to it. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_open_index_using_name( -/*============================*/ - ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */ - const char* index_name, /*!< in: secondary index name */ - ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */ - int* idx_type, /*!< out: index is cluster index */ - ib_id_u64_t* idx_id) /*!< out: index id */ -{ - dict_table_t* table; - dict_index_t* index; - index_id_t index_id = 0; - ib_err_t err = DB_TABLE_NOT_FOUND; - ib_cursor_t* cursor = (ib_cursor_t*) ib_open_crsr; - - *idx_type = 0; - *idx_id = 0; - *ib_crsr = NULL; - - /* We want to increment the ref count, so we do a redundant search. */ - table = dict_table_open_on_id(cursor->prebuilt->table->id, - FALSE, DICT_TABLE_OP_NORMAL); - ut_a(table != NULL); - - /* The first index is always the cluster index. */ - index = dict_table_get_first_index(table); - - /* Traverse the user defined indexes. */ - while (index != NULL) { - if (innobase_strcasecmp(index->name, index_name) == 0) { - index_id = index->id; - *idx_type = index->type; - *idx_id = index_id; - break; - } - index = UT_LIST_GET_NEXT(indexes, index); - } - - if (!index_id) { - dict_table_close(table, FALSE, FALSE); - return(DB_ERROR); - } - - if (index_id > 0) { - ut_ad(index->id == index_id); - err = ib_create_cursor( - ib_crsr, table, index, cursor->prebuilt->trx); - } - - if (*ib_crsr != NULL) { - const ib_cursor_t* cursor; - - cursor = *(ib_cursor_t**) ib_crsr; - - if (cursor->prebuilt->index == NULL) { - err = ib_cursor_close(*ib_crsr); - ut_a(err == DB_SUCCESS); - *ib_crsr = NULL; - } - } - - return(err); -} - -/*****************************************************************//** -Open an InnoDB table and return a cursor handle to it. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_open_table( -/*=================*/ - const char* name, /*!< in: table name */ - ib_trx_t ib_trx, /*!< in: Current transaction handle - can be NULL */ - ib_crsr_t* ib_crsr) /*!< out,own: InnoDB cursor */ -{ - ib_err_t err; - dict_table_t* table; - char* normalized_name; - - normalized_name = static_cast<char*>(mem_alloc(ut_strlen(name) + 1)); - ib_normalize_table_name(normalized_name, name); - - if (ib_trx != NULL) { - if (!ib_schema_lock_is_exclusive(ib_trx)) { - table = (dict_table_t*)ib_open_table_by_name( - normalized_name); - } else { - /* NOTE: We do not acquire MySQL metadata lock */ - table = ib_lookup_table_by_name(normalized_name); - } - } else { - table = (dict_table_t*)ib_open_table_by_name(normalized_name); - } - - mem_free(normalized_name); - normalized_name = NULL; - - /* It can happen that another thread has created the table but - not the cluster index or it's a broken table definition. Refuse to - open if that's the case. */ - if (table != NULL && dict_table_get_first_index(table) == NULL) { - table = NULL; - } - - if (table != NULL) { - err = ib_create_cursor_with_index_id(ib_crsr, table, 0, - (trx_t*) ib_trx); - } else { - err = DB_TABLE_NOT_FOUND; - } - - return(err); -} - -/********************************************************************//** -Free a context struct for a table handle. */ -static -void -ib_qry_proc_free( -/*=============*/ - ib_qry_proc_t* q_proc) /*!< in, own: qproc struct */ -{ - que_graph_free_recursive(q_proc->grph.ins); - que_graph_free_recursive(q_proc->grph.upd); - que_graph_free_recursive(q_proc->grph.sel); - - memset(q_proc, 0x0, sizeof(*q_proc)); -} - -/*****************************************************************//** -set a cursor trx to NULL */ -UNIV_INTERN -void -ib_cursor_clear_trx( -/*================*/ - ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - - cursor->prebuilt->trx = NULL; -} - -/*****************************************************************//** -Reset the cursor. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_reset( -/*============*/ - ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - if (cursor->valid_trx && prebuilt->trx != NULL - && prebuilt->trx->n_mysql_tables_in_use > 0) { - - --prebuilt->trx->n_mysql_tables_in_use; - } - - /* The fields in this data structure are allocated from - the query heap and so need to be reset too. */ - ib_qry_proc_free(&cursor->q_proc); - - mem_heap_empty(cursor->query_heap); - - return(DB_SUCCESS); -} - -/*****************************************************************//** -update the cursor with new transactions and also reset the cursor -@return DB_SUCCESS or err code */ -ib_err_t -ib_cursor_new_trx( -/*==============*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */ - ib_trx_t ib_trx) /*!< in: transaction */ -{ - ib_err_t err = DB_SUCCESS; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - trx_t* trx = (trx_t*) ib_trx; - - row_prebuilt_t* prebuilt = cursor->prebuilt; - - row_update_prebuilt_trx(prebuilt, trx); - - cursor->valid_trx = TRUE; - - trx_assign_read_view(prebuilt->trx); - - ib_qry_proc_free(&cursor->q_proc); - - mem_heap_empty(cursor->query_heap); - - return(err); -} - -/*****************************************************************//** -Commit the transaction in a cursor -@return DB_SUCCESS or err code */ -ib_err_t -ib_cursor_commit_trx( -/*=================*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */ - ib_trx_t ib_trx) /*!< in: transaction */ -{ - ib_err_t err = DB_SUCCESS; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; -#ifdef UNIV_DEBUG - row_prebuilt_t* prebuilt = cursor->prebuilt; - - ut_ad(prebuilt->trx == (trx_t*) ib_trx); -#endif /* UNIV_DEBUG */ - ib_trx_commit(ib_trx); - cursor->valid_trx = FALSE; - return(err); -} - -/*****************************************************************//** -Close an InnoDB table and free the cursor. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_close( -/*============*/ - ib_crsr_t ib_crsr) /*!< in,own: InnoDB cursor */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt; - trx_t* trx; - - if (!cursor) { - return(DB_SUCCESS); - } - - prebuilt = cursor->prebuilt; - trx = prebuilt->trx; - - ib_qry_proc_free(&cursor->q_proc); - - /* The transaction could have been detached from the cursor. */ - if (cursor->valid_trx && trx != NULL - && trx->n_mysql_tables_in_use > 0) { - --trx->n_mysql_tables_in_use; - } - - row_prebuilt_free(prebuilt, FALSE); - cursor->prebuilt = NULL; - - mem_heap_free(cursor->query_heap); - mem_heap_free(cursor->heap); - cursor = NULL; - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Close the table, decrement n_ref_count count. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_close_table( -/*==================*/ - ib_crsr_t ib_crsr) /*!< in,own: InnoDB cursor */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - if (prebuilt && prebuilt->table) { - dict_table_close(prebuilt->table, FALSE, FALSE); - } - - return(DB_SUCCESS); -} -/**********************************************************************//** -Run the insert query and do error handling. -@return DB_SUCCESS or error code */ -UNIV_INLINE -ib_err_t -ib_insert_row_with_lock_retry( -/*==========================*/ - que_thr_t* thr, /*!< in: insert query graph */ - ins_node_t* node, /*!< in: insert node for the query */ - trx_savept_t* savept) /*!< in: savepoint to rollback to - in case of an error */ -{ - trx_t* trx; - ib_err_t err; - ib_bool_t lock_wait; - - trx = thr_get_trx(thr); - - do { - thr->run_node = node; - thr->prev_node = node; - - row_ins_step(thr); - - err = trx->error_state; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - thr->lock_state = QUE_THR_LOCK_ROW; - lock_wait = static_cast<ib_bool_t>( - ib_handle_errors(&err, trx, thr, savept)); - thr->lock_state = QUE_THR_LOCK_NOLOCK; - } else { - lock_wait = FALSE; - } - } while (lock_wait); - - return(err); -} - -/*****************************************************************//** -Write a row. -@return DB_SUCCESS or err code */ -static -ib_err_t -ib_execute_insert_query_graph( -/*==========================*/ - dict_table_t* table, /*!< in: table where to insert */ - que_fork_t* ins_graph, /*!< in: query graph */ - ins_node_t* node) /*!< in: insert node */ -{ - trx_t* trx; - que_thr_t* thr; - trx_savept_t savept; - ib_err_t err = DB_SUCCESS; - - trx = ins_graph->trx; - - savept = trx_savept_take(trx); - - thr = que_fork_get_first_thr(ins_graph); - - que_thr_move_to_run_state_for_mysql(thr, trx); - - err = ib_insert_row_with_lock_retry(thr, node, &savept); - - if (err == DB_SUCCESS) { - que_thr_stop_for_mysql_no_error(thr, trx); - - dict_table_n_rows_inc(table); - - if (table->is_system_db) { - srv_stats.n_system_rows_inserted.inc(); - } else { - srv_stats.n_rows_inserted.inc(); - } - } - - trx->op_info = ""; - - return(err); -} - -/*****************************************************************//** -Create an insert query graph node. */ -static -void -ib_insert_query_graph_create( -/*==========================*/ - ib_cursor_t* cursor) /*!< in: Cursor instance */ -{ - ib_qry_proc_t* q_proc = &cursor->q_proc; - ib_qry_node_t* node = &q_proc->node; - trx_t* trx = cursor->prebuilt->trx; - - ut_a(trx->state != TRX_STATE_NOT_STARTED); - - if (node->ins == NULL) { - dtuple_t* row; - ib_qry_grph_t* grph = &q_proc->grph; - mem_heap_t* heap = cursor->query_heap; - dict_table_t* table = cursor->prebuilt->table; - - node->ins = ins_node_create(INS_DIRECT, table, heap); - - node->ins->select = NULL; - node->ins->values_list = NULL; - - row = dtuple_create(heap, dict_table_get_n_cols(table)); - dict_table_copy_types(row, table); - - ins_node_set_new_row(node->ins, row); - - grph->ins = static_cast<que_fork_t*>( - que_node_get_parent( - pars_complete_graph_for_exec(node->ins, trx, - heap))); - - grph->ins->state = QUE_FORK_ACTIVE; - } -} - -/*****************************************************************//** -Insert a row to a table. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_insert_row( -/*=================*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */ - const ib_tpl_t ib_tpl) /*!< in: tuple to insert */ -{ - ib_ulint_t i; - ib_qry_node_t* node; - ib_qry_proc_t* q_proc; - ulint n_fields; - dtuple_t* dst_dtuple; - ib_err_t err = DB_SUCCESS; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - const ib_tuple_t* src_tuple = (const ib_tuple_t*) ib_tpl; - - ib_insert_query_graph_create(cursor); - - ut_ad(src_tuple->type == TPL_TYPE_ROW); - - q_proc = &cursor->q_proc; - node = &q_proc->node; - - node->ins->state = INS_NODE_ALLOC_ROW_ID; - dst_dtuple = node->ins->row; - - n_fields = dtuple_get_n_fields(src_tuple->ptr); - ut_ad(n_fields == dtuple_get_n_fields(dst_dtuple)); - - /* Do a shallow copy of the data fields and check for NULL - constraints on columns. */ - for (i = 0; i < n_fields; i++) { - ulint mtype; - dfield_t* src_field; - dfield_t* dst_field; - - src_field = dtuple_get_nth_field(src_tuple->ptr, i); - - mtype = dtype_get_mtype(dfield_get_type(src_field)); - - /* Don't touch the system columns. */ - if (mtype != DATA_SYS) { - ulint prtype; - - prtype = dtype_get_prtype(dfield_get_type(src_field)); - - if ((prtype & DATA_NOT_NULL) - && dfield_is_null(src_field)) { - - err = DB_DATA_MISMATCH; - break; - } - - dst_field = dtuple_get_nth_field(dst_dtuple, i); - ut_ad(mtype - == dtype_get_mtype(dfield_get_type(dst_field))); - - /* Do a shallow copy. */ - dfield_set_data( - dst_field, src_field->data, src_field->len); - - if (dst_field->len != IB_SQL_NULL) { - UNIV_MEM_ASSERT_RW(dst_field->data, - dst_field->len); - } - } - } - - if (err == DB_SUCCESS) { - err = ib_execute_insert_query_graph( - src_tuple->index->table, q_proc->grph.ins, node->ins); - } - - ib_wake_master_thread(); - - return(err); -} - -/*********************************************************************//** -Gets pointer to a prebuilt update vector used in updates. -@return update vector */ -UNIV_INLINE -upd_t* -ib_update_vector_create( -/*====================*/ - ib_cursor_t* cursor) /*!< in: current cursor */ -{ - trx_t* trx = cursor->prebuilt->trx; - mem_heap_t* heap = cursor->query_heap; - dict_table_t* table = cursor->prebuilt->table; - ib_qry_proc_t* q_proc = &cursor->q_proc; - ib_qry_grph_t* grph = &q_proc->grph; - ib_qry_node_t* node = &q_proc->node; - - ut_a(trx->state != TRX_STATE_NOT_STARTED); - - if (node->upd == NULL) { - node->upd = static_cast<upd_node_t*>( - row_create_update_node_for_mysql(table, heap)); - } - - grph->upd = static_cast<que_fork_t*>( - que_node_get_parent( - pars_complete_graph_for_exec(node->upd, trx, heap))); - - grph->upd->state = QUE_FORK_ACTIVE; - - return(node->upd->update); -} - -/**********************************************************************//** -Note that a column has changed. */ -static -void -ib_update_col( -/*==========*/ - - ib_cursor_t* cursor, /*!< in: current cursor */ - upd_field_t* upd_field, /*!< in/out: update field */ - ulint col_no, /*!< in: column number */ - dfield_t* dfield) /*!< in: updated dfield */ -{ - ulint data_len; - dict_table_t* table = cursor->prebuilt->table; - dict_index_t* index = dict_table_get_first_index(table); - - data_len = dfield_get_len(dfield); - - if (data_len == UNIV_SQL_NULL) { - dfield_set_null(&upd_field->new_val); - } else { - dfield_copy_data(&upd_field->new_val, dfield); - } - - upd_field->exp = NULL; - - upd_field->orig_len = 0; - - upd_field->field_no = dict_col_get_clust_pos( - &table->cols[col_no], index); -} - -/**********************************************************************//** -Checks which fields have changed in a row and stores the new data -to an update vector. -@return DB_SUCCESS or err code */ -static -ib_err_t -ib_calc_diff( -/*=========*/ - ib_cursor_t* cursor, /*!< in: current cursor */ - upd_t* upd, /*!< in/out: update vector */ - const ib_tuple_t*old_tuple, /*!< in: Old tuple in table */ - const ib_tuple_t*new_tuple) /*!< in: New tuple to update */ -{ - ulint i; - ulint n_changed = 0; - ib_err_t err = DB_SUCCESS; - ulint n_fields = dtuple_get_n_fields(new_tuple->ptr); - - ut_a(old_tuple->type == TPL_TYPE_ROW); - ut_a(new_tuple->type == TPL_TYPE_ROW); - ut_a(old_tuple->index->table == new_tuple->index->table); - - for (i = 0; i < n_fields; ++i) { - ulint mtype; - ulint prtype; - upd_field_t* upd_field; - dfield_t* new_dfield; - dfield_t* old_dfield; - - new_dfield = dtuple_get_nth_field(new_tuple->ptr, i); - old_dfield = dtuple_get_nth_field(old_tuple->ptr, i); - - mtype = dtype_get_mtype(dfield_get_type(old_dfield)); - prtype = dtype_get_prtype(dfield_get_type(old_dfield)); - - /* Skip the system columns */ - if (mtype == DATA_SYS) { - continue; - - } else if ((prtype & DATA_NOT_NULL) - && dfield_is_null(new_dfield)) { - - err = DB_DATA_MISMATCH; - break; - } - - if (dfield_get_len(new_dfield) != dfield_get_len(old_dfield) - || (!dfield_is_null(old_dfield) - && memcmp(dfield_get_data(new_dfield), - dfield_get_data(old_dfield), - dfield_get_len(old_dfield)) != 0)) { - - upd_field = &upd->fields[n_changed]; - - ib_update_col(cursor, upd_field, i, new_dfield); - - ++n_changed; - } - } - - if (err == DB_SUCCESS) { - upd->info_bits = 0; - upd->n_fields = n_changed; - } - - return(err); -} - -/**********************************************************************//** -Run the update query and do error handling. -@return DB_SUCCESS or error code */ -UNIV_INLINE -ib_err_t -ib_update_row_with_lock_retry( -/*==========================*/ - que_thr_t* thr, /*!< in: Update query graph */ - upd_node_t* node, /*!< in: Update node for the query */ - trx_savept_t* savept) /*!< in: savepoint to rollback to - in case of an error */ - -{ - trx_t* trx; - ib_err_t err; - ib_bool_t lock_wait; - - trx = thr_get_trx(thr); - - do { - thr->run_node = node; - thr->prev_node = node; - - row_upd_step(thr); - - err = trx->error_state; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - if (err != DB_RECORD_NOT_FOUND) { - thr->lock_state = QUE_THR_LOCK_ROW; - - lock_wait = static_cast<ib_bool_t>( - ib_handle_errors(&err, trx, thr, savept)); - - thr->lock_state = QUE_THR_LOCK_NOLOCK; - } else { - lock_wait = FALSE; - } - } else { - lock_wait = FALSE; - } - } while (lock_wait); - - return(err); -} - -/*********************************************************************//** -Does an update or delete of a row. -@return DB_SUCCESS or err code */ -UNIV_INLINE -ib_err_t -ib_execute_update_query_graph( -/*==========================*/ - ib_cursor_t* cursor, /*!< in: Cursor instance */ - btr_pcur_t* pcur) /*!< in: Btree persistent cursor */ -{ - ib_err_t err; - que_thr_t* thr; - upd_node_t* node; - trx_savept_t savept; - trx_t* trx = cursor->prebuilt->trx; - dict_table_t* table = cursor->prebuilt->table; - ib_qry_proc_t* q_proc = &cursor->q_proc; - - /* The transaction must be running. */ - ut_a(trx->state != TRX_STATE_NOT_STARTED); - - node = q_proc->node.upd; - - ut_a(dict_index_is_clust(pcur->btr_cur.index)); - btr_pcur_copy_stored_position(node->pcur, pcur); - - ut_a(node->pcur->rel_pos == BTR_PCUR_ON); - - savept = trx_savept_take(trx); - - thr = que_fork_get_first_thr(q_proc->grph.upd); - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - que_thr_move_to_run_state_for_mysql(thr, trx); - - err = ib_update_row_with_lock_retry(thr, node, &savept); - - if (err == DB_SUCCESS) { - - que_thr_stop_for_mysql_no_error(thr, trx); - - if (node->is_delete) { - - dict_table_n_rows_dec(table); - - if (table->is_system_db) { - srv_stats.n_system_rows_deleted.inc(); - } else { - srv_stats.n_rows_deleted.inc(); - } - } else { - if (table->is_system_db) { - srv_stats.n_system_rows_updated.inc(); - } else { - srv_stats.n_rows_updated.inc(); - } - } - - } else if (err == DB_RECORD_NOT_FOUND) { - trx->error_state = DB_SUCCESS; - } - - trx->op_info = ""; - - return(err); -} - -/*****************************************************************//** -Update a row in a table. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_update_row( -/*=================*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */ - const ib_tpl_t ib_new_tpl) /*!< in: New tuple to update */ -{ - upd_t* upd; - ib_err_t err; - btr_pcur_t* pcur; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - const ib_tuple_t*old_tuple = (const ib_tuple_t*) ib_old_tpl; - const ib_tuple_t*new_tuple = (const ib_tuple_t*) ib_new_tpl; - - if (dict_index_is_clust(prebuilt->index)) { - pcur = &cursor->prebuilt->pcur; - } else if (prebuilt->need_to_access_clustered) { - pcur = &cursor->prebuilt->clust_pcur; - } else { - return(DB_ERROR); - } - - ut_a(old_tuple->type == TPL_TYPE_ROW); - ut_a(new_tuple->type == TPL_TYPE_ROW); - - upd = ib_update_vector_create(cursor); - - err = ib_calc_diff(cursor, upd, old_tuple, new_tuple); - - if (err == DB_SUCCESS) { - /* Note that this is not a delete. */ - cursor->q_proc.node.upd->is_delete = FALSE; - - err = ib_execute_update_query_graph(cursor, pcur); - } - - ib_wake_master_thread(); - - return(err); -} - -/**********************************************************************//** -Build the update query graph to delete a row from an index. -@return DB_SUCCESS or err code */ -static -ib_err_t -ib_delete_row( -/*==========*/ - ib_cursor_t* cursor, /*!< in: current cursor */ - btr_pcur_t* pcur, /*!< in: Btree persistent cursor */ - const rec_t* rec) /*!< in: record to delete */ -{ - ulint i; - upd_t* upd; - ib_err_t err; - ib_tuple_t* tuple; - ib_tpl_t ib_tpl; - ulint n_cols; - upd_field_t* upd_field; - ib_bool_t page_format; - dict_table_t* table = cursor->prebuilt->table; - dict_index_t* index = dict_table_get_first_index(table); - - n_cols = dict_index_get_n_ordering_defined_by_user(index); - ib_tpl = ib_key_tuple_new(index, n_cols); - - if (!ib_tpl) { - return(DB_OUT_OF_MEMORY); - } - - tuple = (ib_tuple_t*) ib_tpl; - - upd = ib_update_vector_create(cursor); - - page_format = static_cast<ib_bool_t>( - dict_table_is_comp(index->table)); - ib_read_tuple(rec, page_format, tuple, NULL, NULL); - - upd->n_fields = ib_tuple_get_n_cols(ib_tpl); - - for (i = 0; i < upd->n_fields; ++i) { - dfield_t* dfield; - - upd_field = &upd->fields[i]; - dfield = dtuple_get_nth_field(tuple->ptr, i); - - dfield_copy_data(&upd_field->new_val, dfield); - - upd_field->exp = NULL; - - upd_field->orig_len = 0; - - upd->info_bits = 0; - - upd_field->field_no = dict_col_get_clust_pos( - &table->cols[i], index); - } - - /* Note that this is a delete. */ - cursor->q_proc.node.upd->is_delete = TRUE; - - err = ib_execute_update_query_graph(cursor, pcur); - - ib_tuple_delete(ib_tpl); - - return(err); -} - -/*****************************************************************//** -Delete a row in a table. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_delete_row( -/*=================*/ - ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */ -{ - ib_err_t err; - btr_pcur_t* pcur; - dict_index_t* index; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - index = dict_table_get_first_index(prebuilt->index->table); - - /* Check whether this is a secondary index cursor */ - if (index != prebuilt->index) { - if (prebuilt->need_to_access_clustered) { - pcur = &prebuilt->clust_pcur; - } else { - return(DB_ERROR); - } - } else { - pcur = &prebuilt->pcur; - } - - if (ib_btr_cursor_is_positioned(pcur)) { - const rec_t* rec; - ib_bool_t page_format; - mtr_t mtr; - rec_t* copy = NULL; - byte ptr[UNIV_PAGE_SIZE_MAX]; - - page_format = static_cast<ib_bool_t>( - dict_table_is_comp(index->table)); - - mtr_start(&mtr); - - if (btr_pcur_restore_position( - BTR_SEARCH_LEAF, pcur, &mtr)) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - - rec_offs_init(offsets_); - - rec = btr_pcur_get_rec(pcur); - - /* Since mtr will be commited, the rec - will not be protected. Make a copy of - the rec. */ - offsets = rec_get_offsets( - rec, index, offsets, ULINT_UNDEFINED, &heap); - ut_ad(rec_offs_size(offsets) < UNIV_PAGE_SIZE_MAX); - copy = rec_copy(ptr, rec, offsets); - } - - mtr_commit(&mtr); - - if (copy && !rec_get_deleted_flag(copy, page_format)) { - err = ib_delete_row(cursor, pcur, copy); - } else { - err = DB_RECORD_NOT_FOUND; - } - } else { - err = DB_RECORD_NOT_FOUND; - } - - ib_wake_master_thread(); - - return(err); -} - -/*****************************************************************//** -Read current row. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_read_row( -/*===============*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - ib_tpl_t ib_tpl, /*!< out: read cols into this tuple */ - void** row_buf, /*!< in/out: row buffer */ - ib_ulint_t* row_len) /*!< in/out: row buffer len */ -{ - ib_err_t err; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - - ut_a(cursor->prebuilt->trx->state != TRX_STATE_NOT_STARTED); - - /* When searching with IB_EXACT_MATCH set, row_search_for_mysql() - will not position the persistent cursor but will copy the record - found into the row cache. It should be the only entry. */ - if (!ib_cursor_is_positioned(ib_crsr) ) { - err = DB_RECORD_NOT_FOUND; - } else { - mtr_t mtr; - btr_pcur_t* pcur; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - if (prebuilt->need_to_access_clustered - && tuple->type == TPL_TYPE_ROW) { - pcur = &prebuilt->clust_pcur; - } else { - pcur = &prebuilt->pcur; - } - - if (pcur == NULL) { - return(DB_ERROR); - } - - mtr_start(&mtr); - - if (btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr)) { - const rec_t* rec; - ib_bool_t page_format; - - page_format = static_cast<ib_bool_t>( - dict_table_is_comp(tuple->index->table)); - rec = btr_pcur_get_rec(pcur); - - if (prebuilt->innodb_api_rec && - prebuilt->innodb_api_rec != rec) { - rec = prebuilt->innodb_api_rec; - } - - if (!rec_get_deleted_flag(rec, page_format)) { - ib_read_tuple(rec, page_format, tuple, - row_buf, (ulint*) row_len); - err = DB_SUCCESS; - } else{ - err = DB_RECORD_NOT_FOUND; - } - - } else { - err = DB_RECORD_NOT_FOUND; - } - - mtr_commit(&mtr); - } - - return(err); -} - -/*****************************************************************//** -Move cursor to the first record in the table. -@return DB_SUCCESS or err code */ -UNIV_INLINE -ib_err_t -ib_cursor_position( -/*===============*/ - ib_cursor_t* cursor, /*!< in: InnoDB cursor instance */ - ib_srch_mode_t mode) /*!< in: Search mode */ -{ - ib_err_t err; - row_prebuilt_t* prebuilt = cursor->prebuilt; - unsigned char* buf; - - buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE)); - - /* We want to position at one of the ends, row_search_for_mysql() - uses the search_tuple fields to work out what to do. */ - dtuple_set_n_fields(prebuilt->search_tuple, 0); - - err = static_cast<ib_err_t>(row_search_for_mysql( - buf, mode, prebuilt, 0, 0)); - - mem_free(buf); - - return(err); -} - -/*****************************************************************//** -Move cursor to the first record in the table. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_first( -/*============*/ - ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - - return(ib_cursor_position(cursor, IB_CUR_G)); -} - -/*****************************************************************//** -Move cursor to the last record in the table. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_last( -/*===========*/ - ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - - return(ib_cursor_position(cursor, IB_CUR_L)); -} - -/*****************************************************************//** -Move cursor to the next user record in the table. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_next( -/*===========*/ - ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */ -{ - ib_err_t err; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - byte buf[UNIV_PAGE_SIZE_MAX]; - - /* We want to move to the next record */ - dtuple_set_n_fields(prebuilt->search_tuple, 0); - - err = static_cast<ib_err_t>(row_search_for_mysql( - buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT)); - - return(err); -} - -/*****************************************************************//** -Search for key. -@return DB_SUCCESS or err code */ -UNIV_INTERN -ib_err_t -ib_cursor_moveto( -/*=============*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - ib_tpl_t ib_tpl, /*!< in: Key to search for */ - ib_srch_mode_t ib_srch_mode) /*!< in: search mode */ -{ - ulint i; - ulint n_fields; - ib_err_t err = DB_SUCCESS; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - dtuple_t* search_tuple = prebuilt->search_tuple; - unsigned char* buf; - - ut_a(tuple->type == TPL_TYPE_KEY); - - n_fields = dict_index_get_n_ordering_defined_by_user(prebuilt->index); - - if (n_fields > dtuple_get_n_fields(tuple->ptr)) { - n_fields = dtuple_get_n_fields(tuple->ptr); - } - - dtuple_set_n_fields(search_tuple, n_fields); - dtuple_set_n_fields_cmp(search_tuple, n_fields); - - /* Do a shallow copy */ - for (i = 0; i < n_fields; ++i) { - dfield_copy(dtuple_get_nth_field(search_tuple, i), - dtuple_get_nth_field(tuple->ptr, i)); - } - - ut_a(prebuilt->select_lock_type <= LOCK_NUM); - - prebuilt->innodb_api_rec = NULL; - - buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE)); - - err = static_cast<ib_err_t>(row_search_for_mysql( - buf, ib_srch_mode, prebuilt, cursor->match_mode, 0)); - - mem_free(buf); - - return(err); -} - -/*****************************************************************//** -Set the cursor search mode. */ -UNIV_INTERN -void -ib_cursor_set_match_mode( -/*=====================*/ - ib_crsr_t ib_crsr, /*!< in: Cursor instance */ - ib_match_mode_t match_mode) /*!< in: ib_cursor_moveto match mode */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - - cursor->match_mode = match_mode; -} - -/*****************************************************************//** -Get the dfield instance for the column in the tuple. -@return dfield instance in tuple */ -UNIV_INLINE -dfield_t* -ib_col_get_dfield( -/*==============*/ - ib_tuple_t* tuple, /*!< in: tuple instance */ - ulint col_no) /*!< in: col no. in tuple */ -{ - dfield_t* dfield; - - dfield = dtuple_get_nth_field(tuple->ptr, col_no); - - return(dfield); -} - -/*****************************************************************//** -Predicate to check whether a column type contains variable length data. -@return DB_SUCCESS or error code */ -UNIV_INLINE -ib_err_t -ib_col_is_capped( -/*==============*/ - const dtype_t* dtype) /*!< in: column type */ -{ - return(static_cast<ib_err_t>( - (dtype_get_mtype(dtype) == DATA_VARCHAR - || dtype_get_mtype(dtype) == DATA_CHAR - || dtype_get_mtype(dtype) == DATA_MYSQL - || dtype_get_mtype(dtype) == DATA_VARMYSQL - || dtype_get_mtype(dtype) == DATA_FIXBINARY - || dtype_get_mtype(dtype) == DATA_BINARY) - && dtype_get_len(dtype) > 0)); -} - -/*****************************************************************//** -Set a column of the tuple. Make a copy using the tuple's heap. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_col_set_value( -/*=============*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t col_no, /*!< in: column index in tuple */ - const void* src, /*!< in: data value */ - ib_ulint_t len, /*!< in: data value len */ - ib_bool_t need_cpy) /*!< in: if need memcpy */ -{ - const dtype_t* dtype; - dfield_t* dfield; - void* dst = NULL; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - ulint col_len; - - dfield = ib_col_get_dfield(tuple, col_no); - - /* User wants to set the column to NULL. */ - if (len == IB_SQL_NULL) { - dfield_set_null(dfield); - return(DB_SUCCESS); - } - - dtype = dfield_get_type(dfield); - col_len = dtype_get_len(dtype); - - /* Not allowed to update system columns. */ - if (dtype_get_mtype(dtype) == DATA_SYS) { - return(DB_DATA_MISMATCH); - } - - dst = dfield_get_data(dfield); - - /* Since TEXT/CLOB also map to DATA_VARCHAR we need to make an - exception. Perhaps we need to set the precise type and check - for that. */ - if (ib_col_is_capped(dtype)) { - - len = ut_min(len, static_cast<ib_ulint_t>(col_len)); - - if (dst == NULL || len > dfield_get_len(dfield)) { - dst = mem_heap_alloc(tuple->heap, col_len); - ut_a(dst != NULL); - } - } else if (dst == NULL || len > dfield_get_len(dfield)) { - dst = mem_heap_alloc(tuple->heap, len); - } - - if (dst == NULL) { - return(DB_OUT_OF_MEMORY); - } - - switch (dtype_get_mtype(dtype)) { - case DATA_INT: { - - if (col_len == len) { - ibool usign; - - usign = dtype_get_prtype(dtype) & DATA_UNSIGNED; - mach_write_int_type(static_cast<byte*>(dst), - static_cast<const byte*>(src), - len, usign); - - } else { - return(DB_DATA_MISMATCH); - } - break; - } - - case DATA_FLOAT: - if (len == sizeof(float)) { - mach_float_write(static_cast<byte*>(dst), *(float*)src); - } else { - return(DB_DATA_MISMATCH); - } - break; - - case DATA_DOUBLE: - if (len == sizeof(double)) { - mach_double_write(static_cast<byte*>(dst), - *(double*)src); - } else { - return(DB_DATA_MISMATCH); - } - break; - - case DATA_SYS: - ut_error; - break; - - case DATA_CHAR: { - ulint pad_char = ULINT_UNDEFINED; - - pad_char = dtype_get_pad_char( - dtype_get_mtype(dtype), dtype_get_prtype(dtype)); - - ut_a(pad_char != ULINT_UNDEFINED); - - memset((byte*) dst + len, - static_cast<int>(pad_char), - static_cast<size_t>(col_len - len)); - - memcpy(dst, src, len); - - len = static_cast<ib_ulint_t>(col_len); - break; - } - case DATA_BLOB: - case DATA_BINARY: - case DATA_DECIMAL: - case DATA_VARCHAR: - case DATA_FIXBINARY: - if (need_cpy) { - memcpy(dst, src, len); - } else { - dfield_set_data(dfield, src, len); - dst = dfield_get_data(dfield); - } - break; - - case DATA_MYSQL: - case DATA_VARMYSQL: { - ulint cset; - CHARSET_INFO* cs; - int error = 0; - ulint true_len = len; - - /* For multi byte character sets we need to - calculate the true length of the data. */ - cset = dtype_get_charset_coll( - dtype_get_prtype(dtype)); - cs = all_charsets[cset]; - if (cs) { - uint pos = (uint)(col_len / cs->mbmaxlen); - - if (len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) - my_well_formed_length( - cs, - (const char*)src, - (const char*)src + len, - pos, - &error); - - if (true_len < len) { - len = static_cast<ib_ulint_t>(true_len); - } - } - } - - /* All invalid bytes in data need be truncated. - If len == 0, means all bytes of the data is invalid. - In this case, the data will be truncated to empty.*/ - memcpy(dst, src, len); - - /* For DATA_MYSQL, need to pad the unused - space with spaces. */ - if (dtype_get_mtype(dtype) == DATA_MYSQL) { - ulint n_chars; - - if (len < col_len) { - ulint pad_len = col_len - len; - - ut_a(cs != NULL); - ut_a(!(pad_len % cs->mbminlen)); - - cs->cset->fill(cs, (char*)dst + len, - pad_len, - 0x20 /* space */); - } - - /* Why we should do below? See function - row_mysql_store_col_in_innobase_format */ - - ut_a(!(dtype_get_len(dtype) - % dtype_get_mbmaxlen(dtype))); - - n_chars = dtype_get_len(dtype) - / dtype_get_mbmaxlen(dtype); - - /* Strip space padding. */ - while (col_len > n_chars - && ((char*)dst)[col_len - 1] == 0x20) { - col_len--; - } - - len = static_cast<ib_ulint_t>(col_len); - } - break; - } - - default: - ut_error; - } - - if (dst != dfield_get_data(dfield)) { - dfield_set_data(dfield, dst, len); - } else { - dfield_set_len(dfield, len); - } - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Get the size of the data available in a column of the tuple. -@return bytes avail or IB_SQL_NULL */ -UNIV_INTERN -ib_ulint_t -ib_col_get_len( -/*===========*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t i) /*!< in: column index in tuple */ -{ - const dfield_t* dfield; - ulint data_len; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - dfield = ib_col_get_dfield(tuple, i); - - data_len = dfield_get_len(dfield); - - return(static_cast<ib_ulint_t>( - data_len == UNIV_SQL_NULL ? IB_SQL_NULL : data_len)); -} - -/*****************************************************************//** -Copy a column value from the tuple. -@return bytes copied or IB_SQL_NULL */ -UNIV_INLINE -ib_ulint_t -ib_col_copy_value_low( -/*==================*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t i, /*!< in: column index in tuple */ - void* dst, /*!< out: copied data value */ - ib_ulint_t len) /*!< in: max data value len to copy */ -{ - const void* data; - const dfield_t* dfield; - ulint data_len; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - dfield = ib_col_get_dfield(tuple, i); - - data = dfield_get_data(dfield); - data_len = dfield_get_len(dfield); - - if (data_len != UNIV_SQL_NULL) { - - const dtype_t* dtype = dfield_get_type(dfield); - - switch (dtype_get_mtype(dfield_get_type(dfield))) { - case DATA_INT: { - ibool usign; - ullint ret; - - ut_a(data_len == len); - - usign = dtype_get_prtype(dtype) & DATA_UNSIGNED; - ret = mach_read_int_type(static_cast<const byte*>(data), - data_len, usign); - - if (usign) { - if (len == 1) { - *(ib_i8_t*)dst = (ib_i8_t)ret; - } else if (len == 2) { - *(ib_i16_t*)dst = (ib_i16_t)ret; - } else if (len == 4) { - *(ib_i32_t*)dst = (ib_i32_t)ret; - } else { - *(ib_i64_t*)dst = (ib_i64_t)ret; - } - } else { - if (len == 1) { - *(ib_u8_t*)dst = (ib_i8_t)ret; - } else if (len == 2) { - *(ib_u16_t*)dst = (ib_i16_t)ret; - } else if (len == 4) { - *(ib_u32_t*)dst = (ib_i32_t)ret; - } else { - *(ib_u64_t*)dst = (ib_i64_t)ret; - } - } - - break; - } - case DATA_FLOAT: - if (len == data_len) { - float f; - - ut_a(data_len == sizeof(f)); - f = mach_float_read(static_cast<const byte*>( - data)); - memcpy(dst, &f, sizeof(f)); - } else { - data_len = 0; - } - break; - case DATA_DOUBLE: - if (len == data_len) { - double d; - - ut_a(data_len == sizeof(d)); - d = mach_double_read(static_cast<const byte*>( - data)); - memcpy(dst, &d, sizeof(d)); - } else { - data_len = 0; - } - break; - default: - data_len = ut_min(data_len, len); - memcpy(dst, data, data_len); - } - } else { - data_len = IB_SQL_NULL; - } - - return(static_cast<ib_ulint_t>(data_len)); -} - -/*****************************************************************//** -Copy a column value from the tuple. -@return bytes copied or IB_SQL_NULL */ -UNIV_INTERN -ib_ulint_t -ib_col_copy_value( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t i, /*!< in: column index in tuple */ - void* dst, /*!< out: copied data value */ - ib_ulint_t len) /*!< in: max data value len to copy */ -{ - return(ib_col_copy_value_low(ib_tpl, i, dst, len)); -} - -/*****************************************************************//** -Get the InnoDB column attribute from the internal column precise type. -@return precise type in api format */ -UNIV_INLINE -ib_col_attr_t -ib_col_get_attr( -/*============*/ - ulint prtype) /*!< in: column definition */ -{ - ib_col_attr_t attr = IB_COL_NONE; - - if (prtype & DATA_UNSIGNED) { - attr = static_cast<ib_col_attr_t>(attr | IB_COL_UNSIGNED); - } - - if (prtype & DATA_NOT_NULL) { - attr = static_cast<ib_col_attr_t>(attr | IB_COL_NOT_NULL); - } - - return(attr); -} - -/*****************************************************************//** -Get a column name from the tuple. -@return name of the column */ -UNIV_INTERN -const char* -ib_col_get_name( -/*============*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - ib_ulint_t i) /*!< in: column index in tuple */ -{ - const char* name; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - dict_table_t* table = cursor->prebuilt->table; - dict_col_t* col = dict_table_get_nth_col(table, i); - ulint col_no = dict_col_get_no(col); - - name = dict_table_get_col_name(table, col_no); - - return(name); -} - -/*****************************************************************//** -Get an index field name from the cursor. -@return name of the field */ -UNIV_INTERN -const char* -ib_get_idx_field_name( -/*==================*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - ib_ulint_t i) /*!< in: column index in tuple */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - dict_index_t* index = cursor->prebuilt->index; - dict_field_t* field; - - if (index) { - field = dict_index_get_nth_field(cursor->prebuilt->index, i); - - if (field) { - return(field->name); - } - } - - return(NULL); -} - -/*****************************************************************//** -Get a column type, length and attributes from the tuple. -@return len of column data */ -UNIV_INLINE -ib_ulint_t -ib_col_get_meta_low( -/*================*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t i, /*!< in: column index in tuple */ - ib_col_meta_t* ib_col_meta) /*!< out: column meta data */ -{ - ib_u16_t prtype; - const dfield_t* dfield; - ulint data_len; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - dfield = ib_col_get_dfield(tuple, i); - - data_len = dfield_get_len(dfield); - - /* We assume 1-1 mapping between the ENUM and internal type codes. */ - ib_col_meta->type = static_cast<ib_col_type_t>( - dtype_get_mtype(dfield_get_type(dfield))); - - ib_col_meta->type_len = static_cast<ib_u32_t>( - dtype_get_len(dfield_get_type(dfield))); - - prtype = (ib_u16_t) dtype_get_prtype(dfield_get_type(dfield)); - - ib_col_meta->attr = ib_col_get_attr(prtype); - ib_col_meta->client_type = prtype & DATA_MYSQL_TYPE_MASK; - - return(static_cast<ib_ulint_t>(data_len)); -} - -/*************************************************************//** -Read a signed int 8 bit column from an InnoDB tuple. */ -UNIV_INLINE -ib_err_t -ib_tuple_check_int( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_bool_t usign, /*!< in: true if unsigned */ - ulint size) /*!< in: size of integer */ -{ - ib_col_meta_t ib_col_meta; - - ib_col_get_meta_low(ib_tpl, i, &ib_col_meta); - - if (ib_col_meta.type != IB_INT) { - return(DB_DATA_MISMATCH); - } else if (ib_col_meta.type_len == IB_SQL_NULL) { - return(DB_UNDERFLOW); - } else if (ib_col_meta.type_len != size) { - return(DB_DATA_MISMATCH); - } else if ((ib_col_meta.attr & IB_COL_UNSIGNED) && !usign) { - return(DB_DATA_MISMATCH); - } - - return(DB_SUCCESS); -} - -/*************************************************************//** -Read a signed int 8 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_i8( -/*=============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_i8_t* ival) /*!< out: integer value */ -{ - ib_err_t err; - - err = ib_tuple_check_int(ib_tpl, i, IB_FALSE, sizeof(*ival)); - - if (err == DB_SUCCESS) { - ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival)); - } - - return(err); -} - -/*************************************************************//** -Read an unsigned int 8 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_u8( -/*=============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_u8_t* ival) /*!< out: integer value */ -{ - ib_err_t err; - - err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival)); - - if (err == DB_SUCCESS) { - ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival)); - } - - return(err); -} - -/*************************************************************//** -Read a signed int 16 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_i16( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_i16_t* ival) /*!< out: integer value */ -{ - ib_err_t err; - - err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival)); - - if (err == DB_SUCCESS) { - ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival)); - } - - return(err); -} - -/*************************************************************//** -Read an unsigned int 16 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_u16( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_u16_t* ival) /*!< out: integer value */ -{ - ib_err_t err; - - err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival)); - - if (err == DB_SUCCESS) { - ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival)); - } - - return(err); -} - -/*************************************************************//** -Read a signed int 32 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_i32( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_i32_t* ival) /*!< out: integer value */ -{ - ib_err_t err; - - err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival)); - - if (err == DB_SUCCESS) { - ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival)); - } - - return(err); -} - -/*************************************************************//** -Read an unsigned int 32 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_u32( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_u32_t* ival) /*!< out: integer value */ -{ - ib_err_t err; - - err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival)); - - if (err == DB_SUCCESS) { - ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival)); - } - - return(err); -} - -/*************************************************************//** -Read a signed int 64 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_i64( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_i64_t* ival) /*!< out: integer value */ -{ - ib_err_t err; - - err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival)); - - if (err == DB_SUCCESS) { - ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival)); - } - - return(err); -} - -/*************************************************************//** -Read an unsigned int 64 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_u64( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_u64_t* ival) /*!< out: integer value */ -{ - ib_err_t err; - - err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival)); - - if (err == DB_SUCCESS) { - ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival)); - } - - return(err); -} - -/*****************************************************************//** -Get a column value pointer from the tuple. -@return NULL or pointer to buffer */ -UNIV_INTERN -const void* -ib_col_get_value( -/*=============*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t i) /*!< in: column index in tuple */ -{ - const void* data; - const dfield_t* dfield; - ulint data_len; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - dfield = ib_col_get_dfield(tuple, i); - - data = dfield_get_data(dfield); - data_len = dfield_get_len(dfield); - - return(data_len != UNIV_SQL_NULL ? data : NULL); -} - -/*****************************************************************//** -Get a column type, length and attributes from the tuple. -@return len of column data */ -UNIV_INTERN -ib_ulint_t -ib_col_get_meta( -/*============*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t i, /*!< in: column index in tuple */ - ib_col_meta_t* ib_col_meta) /*!< out: column meta data */ -{ - return(ib_col_get_meta_low(ib_tpl, i, ib_col_meta)); -} - -/*****************************************************************//** -"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple. -@return new tuple, or NULL */ -UNIV_INTERN -ib_tpl_t -ib_tuple_clear( -/*============*/ - ib_tpl_t ib_tpl) /*!< in,own: tuple (will be freed) */ -{ - const dict_index_t* index; - ulint n_cols; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - ib_tuple_type_t type = tuple->type; - mem_heap_t* heap = tuple->heap; - - index = tuple->index; - n_cols = dtuple_get_n_fields(tuple->ptr); - - mem_heap_empty(heap); - - if (type == TPL_TYPE_ROW) { - return(ib_row_tuple_new_low(index, n_cols, heap)); - } else { - return(ib_key_tuple_new_low(index, n_cols, heap)); - } -} - -/*****************************************************************//** -Create a new cluster key search tuple and copy the contents of the -secondary index key tuple columns that refer to the cluster index record -to the cluster key. It does a deep copy of the column data. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_tuple_get_cluster_key( -/*=====================*/ - ib_crsr_t ib_crsr, /*!< in: secondary index cursor */ - ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */ - const ib_tpl_t ib_src_tpl) /*!< in: source tuple */ -{ - ulint i; - ulint n_fields; - ib_err_t err = DB_SUCCESS; - ib_tuple_t* dst_tuple = NULL; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - ib_tuple_t* src_tuple = (ib_tuple_t*) ib_src_tpl; - dict_index_t* clust_index; - - clust_index = dict_table_get_first_index(cursor->prebuilt->table); - - /* We need to ensure that the src tuple belongs to the same table - as the open cursor and that it's not a tuple for a cluster index. */ - if (src_tuple->type != TPL_TYPE_KEY) { - return(DB_ERROR); - } else if (src_tuple->index->table != cursor->prebuilt->table) { - return(DB_DATA_MISMATCH); - } else if (src_tuple->index == clust_index) { - return(DB_ERROR); - } - - /* Create the cluster index key search tuple. */ - *ib_dst_tpl = ib_clust_search_tuple_create(ib_crsr); - - if (!*ib_dst_tpl) { - return(DB_OUT_OF_MEMORY); - } - - dst_tuple = (ib_tuple_t*) *ib_dst_tpl; - ut_a(dst_tuple->index == clust_index); - - n_fields = dict_index_get_n_unique(dst_tuple->index); - - /* Do a deep copy of the data fields. */ - for (i = 0; i < n_fields; i++) { - ulint pos; - dfield_t* src_field; - dfield_t* dst_field; - - pos = dict_index_get_nth_field_pos( - src_tuple->index, dst_tuple->index, i); - - ut_a(pos != ULINT_UNDEFINED); - - src_field = dtuple_get_nth_field(src_tuple->ptr, pos); - dst_field = dtuple_get_nth_field(dst_tuple->ptr, i); - - if (!dfield_is_null(src_field)) { - UNIV_MEM_ASSERT_RW(src_field->data, src_field->len); - - dst_field->data = mem_heap_dup( - dst_tuple->heap, - src_field->data, - src_field->len); - - dst_field->len = src_field->len; - } else { - dfield_set_null(dst_field); - } - } - - return(err); -} - -/*****************************************************************//** -Copy the contents of source tuple to destination tuple. The tuples -must be of the same type and belong to the same table/index. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_tuple_copy( -/*==========*/ - ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */ - const ib_tpl_t ib_src_tpl) /*!< in: source tuple */ -{ - ulint i; - ulint n_fields; - ib_err_t err = DB_SUCCESS; - const ib_tuple_t*src_tuple = (const ib_tuple_t*) ib_src_tpl; - ib_tuple_t* dst_tuple = (ib_tuple_t*) ib_dst_tpl; - - /* Make sure src and dst are not the same. */ - ut_a(src_tuple != dst_tuple); - - /* Make sure they are the same type and refer to the same index. */ - if (src_tuple->type != dst_tuple->type - || src_tuple->index != dst_tuple->index) { - - return(DB_DATA_MISMATCH); - } - - n_fields = dtuple_get_n_fields(src_tuple->ptr); - ut_ad(n_fields == dtuple_get_n_fields(dst_tuple->ptr)); - - /* Do a deep copy of the data fields. */ - for (i = 0; i < n_fields; ++i) { - dfield_t* src_field; - dfield_t* dst_field; - - src_field = dtuple_get_nth_field(src_tuple->ptr, i); - dst_field = dtuple_get_nth_field(dst_tuple->ptr, i); - - if (!dfield_is_null(src_field)) { - UNIV_MEM_ASSERT_RW(src_field->data, src_field->len); - - dst_field->data = mem_heap_dup( - dst_tuple->heap, - src_field->data, - src_field->len); - - dst_field->len = src_field->len; - } else { - dfield_set_null(dst_field); - } - } - - return(err); -} - -/*****************************************************************//** -Create an InnoDB tuple used for index/table search. -@return own: Tuple for current index */ -UNIV_INTERN -ib_tpl_t -ib_sec_search_tuple_create( -/*=======================*/ - ib_crsr_t ib_crsr) /*!< in: Cursor instance */ -{ - ulint n_cols; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - dict_index_t* index = cursor->prebuilt->index; - - n_cols = dict_index_get_n_unique_in_tree(index); - return(ib_key_tuple_new(index, n_cols)); -} - -/*****************************************************************//** -Create an InnoDB tuple used for index/table search. -@return own: Tuple for current index */ -UNIV_INTERN -ib_tpl_t -ib_sec_read_tuple_create( -/*=====================*/ - ib_crsr_t ib_crsr) /*!< in: Cursor instance */ -{ - ulint n_cols; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - dict_index_t* index = cursor->prebuilt->index; - - n_cols = dict_index_get_n_fields(index); - return(ib_row_tuple_new(index, n_cols)); -} - -/*****************************************************************//** -Create an InnoDB tuple used for table key operations. -@return own: Tuple for current table */ -UNIV_INTERN -ib_tpl_t -ib_clust_search_tuple_create( -/*=========================*/ - ib_crsr_t ib_crsr) /*!< in: Cursor instance */ -{ - ulint n_cols; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - dict_index_t* index; - - index = dict_table_get_first_index(cursor->prebuilt->table); - - n_cols = dict_index_get_n_ordering_defined_by_user(index); - return(ib_key_tuple_new(index, n_cols)); -} - -/*****************************************************************//** -Create an InnoDB tuple for table row operations. -@return own: Tuple for current table */ -UNIV_INTERN -ib_tpl_t -ib_clust_read_tuple_create( -/*=======================*/ - ib_crsr_t ib_crsr) /*!< in: Cursor instance */ -{ - ulint n_cols; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - dict_index_t* index; - - index = dict_table_get_first_index(cursor->prebuilt->table); - - n_cols = dict_table_get_n_cols(cursor->prebuilt->table); - return(ib_row_tuple_new(index, n_cols)); -} - -/*****************************************************************//** -Return the number of user columns in the tuple definition. -@return number of user columns */ -UNIV_INTERN -ib_ulint_t -ib_tuple_get_n_user_cols( -/*=====================*/ - const ib_tpl_t ib_tpl) /*!< in: Tuple for current table */ -{ - const ib_tuple_t* tuple = (const ib_tuple_t*) ib_tpl; - - if (tuple->type == TPL_TYPE_ROW) { - return(static_cast<ib_ulint_t>( - dict_table_get_n_user_cols(tuple->index->table))); - } - - return(static_cast<ib_ulint_t>( - dict_index_get_n_ordering_defined_by_user(tuple->index))); -} - -/*****************************************************************//** -Return the number of columns in the tuple definition. -@return number of columns */ -UNIV_INTERN -ib_ulint_t -ib_tuple_get_n_cols( -/*================*/ - const ib_tpl_t ib_tpl) /*!< in: Tuple for table/index */ -{ - const ib_tuple_t* tuple = (const ib_tuple_t*) ib_tpl; - - return(static_cast<ib_ulint_t>(dtuple_get_n_fields(tuple->ptr))); -} - -/*****************************************************************//** -Destroy an InnoDB tuple. */ -UNIV_INTERN -void -ib_tuple_delete( -/*============*/ - ib_tpl_t ib_tpl) /*!< in,own: Tuple instance to delete */ -{ - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - if (!ib_tpl) { - return; - } - - mem_heap_free(tuple->heap); -} - -/*****************************************************************//** -Get a table id. This function will acquire the dictionary mutex. -@return DB_SUCCESS if found */ -UNIV_INTERN -ib_err_t -ib_table_get_id( -/*============*/ - const char* table_name, /*!< in: table to find */ - ib_id_u64_t* table_id) /*!< out: table id if found */ -{ - ib_err_t err; - - dict_mutex_enter_for_mysql(); - - err = ib_table_get_id_low(table_name, table_id); - - dict_mutex_exit_for_mysql(); - - return(err); -} - -/*****************************************************************//** -Get an index id. -@return DB_SUCCESS if found */ -UNIV_INTERN -ib_err_t -ib_index_get_id( -/*============*/ - const char* table_name, /*!< in: find index for this table */ - const char* index_name, /*!< in: index to find */ - ib_id_u64_t* index_id) /*!< out: index id if found */ -{ - dict_table_t* table; - char* normalized_name; - ib_err_t err = DB_TABLE_NOT_FOUND; - - *index_id = 0; - - normalized_name = static_cast<char*>( - mem_alloc(ut_strlen(table_name) + 1)); - ib_normalize_table_name(normalized_name, table_name); - - table = ib_lookup_table_by_name(normalized_name); - - mem_free(normalized_name); - normalized_name = NULL; - - if (table != NULL) { - dict_index_t* index; - - index = dict_table_get_index_on_name(table, index_name); - - if (index != NULL) { - /* We only support 32 bit table and index ids. Because - we need to pack the table id into the index id. */ - - *index_id = (table->id); - *index_id <<= 32; - *index_id |= (index->id); - - err = DB_SUCCESS; - } - } - - return(err); -} - -#ifdef __WIN__ -#define SRV_PATH_SEPARATOR '\\' -#else -#define SRV_PATH_SEPARATOR '/' -#endif - - -/*****************************************************************//** -Check if cursor is positioned. -@return IB_TRUE if positioned */ -UNIV_INTERN -ib_bool_t -ib_cursor_is_positioned( -/*====================*/ - const ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */ -{ - const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - return(ib_btr_cursor_is_positioned(&prebuilt->pcur)); -} - - -/*****************************************************************//** -Checks if the data dictionary is latched in exclusive mode. -@return TRUE if exclusive latch */ -UNIV_INTERN -ib_bool_t -ib_schema_lock_is_exclusive( -/*========================*/ - const ib_trx_t ib_trx) /*!< in: transaction */ -{ - const trx_t* trx = (const trx_t*) ib_trx; - - return(trx->dict_operation_lock_mode == RW_X_LATCH); -} - -/*****************************************************************//** -Checks if the data dictionary is latched in shared mode. -@return TRUE if shared latch */ -UNIV_INTERN -ib_bool_t -ib_schema_lock_is_shared( -/*=====================*/ - const ib_trx_t ib_trx) /*!< in: transaction */ -{ - const trx_t* trx = (const trx_t*) ib_trx; - - return(trx->dict_operation_lock_mode == RW_S_LATCH); -} - -/*****************************************************************//** -Set the Lock an InnoDB cursor/table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_cursor_lock( -/*===========*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */ - ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - trx_t* trx = prebuilt->trx; - dict_table_t* table = prebuilt->table; - - return(ib_trx_lock_table_with_retry( - trx, table, (enum lock_mode) ib_lck_mode)); -} - -/*****************************************************************//** -Set the Lock an InnoDB table using the table id. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_table_lock( -/*==========*/ - ib_trx_t ib_trx, /*!< in/out: transaction */ - ib_id_u64_t table_id, /*!< in: table id */ - ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */ -{ - ib_err_t err; - que_thr_t* thr; - mem_heap_t* heap; - dict_table_t* table; - ib_qry_proc_t q_proc; - trx_t* trx = (trx_t*) ib_trx; - - ut_a(trx->state != TRX_STATE_NOT_STARTED); - - table = ib_open_table_by_id(table_id, FALSE); - - if (table == NULL) { - return(DB_TABLE_NOT_FOUND); - } - - ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM)); - - heap = mem_heap_create(128); - - q_proc.node.sel = sel_node_create(heap); - - thr = pars_complete_graph_for_exec(q_proc.node.sel, trx, heap); - - q_proc.grph.sel = static_cast<que_fork_t*>(que_node_get_parent(thr)); - q_proc.grph.sel->state = QUE_FORK_ACTIVE; - - trx->op_info = "setting table lock"; - - ut_a(ib_lck_mode == IB_LOCK_IS || ib_lck_mode == IB_LOCK_IX); - err = static_cast<ib_err_t>( - lock_table(0, table, (enum lock_mode) ib_lck_mode, thr)); - - trx->error_state = err; - - mem_heap_free(heap); - - return(err); -} - -/*****************************************************************//** -Unlock an InnoDB table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_cursor_unlock( -/*=============*/ - ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */ -{ - ib_err_t err = DB_SUCCESS; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - if (prebuilt->trx->mysql_n_tables_locked > 0) { - --prebuilt->trx->mysql_n_tables_locked; - } else { - err = DB_ERROR; - } - - return(err); -} - -/*****************************************************************//** -Set the Lock mode of the cursor. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_cursor_set_lock_mode( -/*====================*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */ - ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */ -{ - ib_err_t err = DB_SUCCESS; - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM)); - - if (ib_lck_mode == IB_LOCK_X) { - err = ib_cursor_lock(ib_crsr, IB_LOCK_IX); - } else if (ib_lck_mode == IB_LOCK_S) { - err = ib_cursor_lock(ib_crsr, IB_LOCK_IS); - } - - if (err == DB_SUCCESS) { - prebuilt->select_lock_type = (enum lock_mode) ib_lck_mode; - ut_a(prebuilt->trx->state != TRX_STATE_NOT_STARTED); - } - - return(err); -} - -/*****************************************************************//** -Set need to access clustered index record. */ -UNIV_INTERN -void -ib_cursor_set_cluster_access( -/*=========================*/ - ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - prebuilt->need_to_access_clustered = TRUE; -} - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_i8( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_i8_t val) /*!< in: value to write */ -{ - return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true)); -} - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_i16( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_i16_t val) /*!< in: value to write */ -{ - return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true)); -} - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_i32( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_i32_t val) /*!< in: value to write */ -{ - return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true)); -} - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_i64( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_i64_t val) /*!< in: value to write */ -{ - return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true)); -} - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_u8( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_u8_t val) /*!< in: value to write */ -{ - return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true)); -} - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_u16( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tupe to write to */ - int col_no, /*!< in: column number */ - ib_u16_t val) /*!< in: value to write */ -{ - return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true)); -} - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_u32( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_u32_t val) /*!< in: value to write */ -{ - return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true)); -} - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_u64( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_u64_t val) /*!< in: value to write */ -{ - return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true)); -} - -/*****************************************************************//** -Inform the cursor that it's the start of an SQL statement. */ -UNIV_INTERN -void -ib_cursor_stmt_begin( -/*=================*/ - ib_crsr_t ib_crsr) /*!< in: cursor */ -{ - ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr; - - cursor->prebuilt->sql_stat_start = TRUE; -} - -/*****************************************************************//** -Write a double value to a column. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_double( -/*==================*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - double val) /*!< in: value to write */ -{ - const dfield_t* dfield; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - dfield = ib_col_get_dfield(tuple, col_no); - - if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) { - return(ib_col_set_value(ib_tpl, col_no, - &val, sizeof(val), true)); - } else { - return(DB_DATA_MISMATCH); - } -} - -/*************************************************************//** -Read a double column value from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_double( -/*=================*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t col_no, /*!< in: column number */ - double* dval) /*!< out: double value */ -{ - ib_err_t err; - const dfield_t* dfield; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - dfield = ib_col_get_dfield(tuple, col_no); - - if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) { - ib_col_copy_value_low(ib_tpl, col_no, dval, sizeof(*dval)); - err = DB_SUCCESS; - } else { - err = DB_DATA_MISMATCH; - } - - return(err); -} - -/*****************************************************************//** -Write a float value to a column. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_write_float( -/*=================*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - float val) /*!< in: value to write */ -{ - const dfield_t* dfield; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - dfield = ib_col_get_dfield(tuple, col_no); - - if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) { - return(ib_col_set_value(ib_tpl, col_no, - &val, sizeof(val), true)); - } else { - return(DB_DATA_MISMATCH); - } -} - -/*************************************************************//** -Read a float value from an InnoDB tuple. -@return DB_SUCCESS or error */ -UNIV_INTERN -ib_err_t -ib_tuple_read_float( -/*================*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t col_no, /*!< in: column number */ - float* fval) /*!< out: float value */ -{ - ib_err_t err; - const dfield_t* dfield; - ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl; - - dfield = ib_col_get_dfield(tuple, col_no); - - if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) { - ib_col_copy_value_low(ib_tpl, col_no, fval, sizeof(*fval)); - err = DB_SUCCESS; - } else { - err = DB_DATA_MISMATCH; - } - - return(err); -} - -/*****************************************************************//** -Truncate a table. The cursor handle will be closed and set to NULL -on success. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_cursor_truncate( -/*===============*/ - ib_crsr_t* ib_crsr, /*!< in/out: cursor for table - to truncate */ - ib_id_u64_t* table_id) /*!< out: new table id */ -{ - ib_err_t err; - ib_cursor_t* cursor = *(ib_cursor_t**) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - - *table_id = 0; - - err = ib_cursor_lock(*ib_crsr, IB_LOCK_X); - - if (err == DB_SUCCESS) { - trx_t* trx; - dict_table_t* table = prebuilt->table; - - /* We are going to free the cursor and the prebuilt. Store - the transaction handle locally. */ - trx = prebuilt->trx; - err = ib_cursor_close(*ib_crsr); - ut_a(err == DB_SUCCESS); - - *ib_crsr = NULL; - - /* A temp go around for assertion in trx_start_for_ddl_low - we already start the trx */ - if (trx->state == TRX_STATE_ACTIVE) { -#ifdef UNIV_DEBUG - trx->start_file = 0; -#endif /* UNIV_DEBUG */ - trx->dict_operation = TRX_DICT_OP_TABLE; - } - - /* This function currently commits the transaction - on success. */ - err = static_cast<ib_err_t>( - row_truncate_table_for_mysql(table, trx)); - - if (err == DB_SUCCESS) { - *table_id = (table->id); - } - } - - return(err); -} - -/*****************************************************************//** -Truncate a table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -ib_err_t -ib_table_truncate( -/*==============*/ - const char* table_name, /*!< in: table name */ - ib_id_u64_t* table_id) /*!< out: new table id */ -{ - ib_err_t err; - dict_table_t* table; - ib_err_t trunc_err; - ib_trx_t ib_trx = NULL; - ib_crsr_t ib_crsr = NULL; - ib_ulint_t memcached_sync = 0; - - ib_trx = ib_trx_begin(IB_TRX_SERIALIZABLE, true, false); - - dict_mutex_enter_for_mysql(); - - table = dict_table_open_on_name(table_name, TRUE, FALSE, - DICT_ERR_IGNORE_NONE); - - if (table != NULL && dict_table_get_first_index(table)) { - err = ib_create_cursor_with_index_id(&ib_crsr, table, 0, - (trx_t*) ib_trx); - } else { - err = DB_TABLE_NOT_FOUND; - } - - /* Remember the memcached_sync_count and set it to 0, so the - truncate can be executed. */ - if (table != NULL && err == DB_SUCCESS) { - memcached_sync = static_cast<ib_ulint_t>( - table->memcached_sync_count); - table->memcached_sync_count = 0; - } - - dict_mutex_exit_for_mysql(); - - if (err == DB_SUCCESS) { - trunc_err = ib_cursor_truncate(&ib_crsr, table_id); - ut_a(err == DB_SUCCESS); - } else { - trunc_err = err; - } - - if (ib_crsr != NULL) { - err = ib_cursor_close(ib_crsr); - ut_a(err == DB_SUCCESS); - } - - if (trunc_err == DB_SUCCESS) { - ut_a(ib_trx_state(ib_trx) == static_cast<ib_trx_state_t>( - TRX_STATE_NOT_STARTED)); - } else { - err = ib_trx_rollback(ib_trx); - ut_a(err == DB_SUCCESS); - } - - err = ib_trx_release(ib_trx); - ut_a(err == DB_SUCCESS); - - /* Set the memcached_sync_count back. */ - if (table != NULL && memcached_sync != 0) { - dict_mutex_enter_for_mysql(); - - table->memcached_sync_count = memcached_sync; - - dict_mutex_exit_for_mysql(); - } - - return(trunc_err); -} - -/*****************************************************************//** -Frees a possible InnoDB trx object associated with the current THD. -@return 0 or error number */ -UNIV_INTERN -ib_err_t -ib_close_thd( -/*=========*/ - void* thd) /*!< in: handle to the MySQL thread of the user - whose resources should be free'd */ -{ - innobase_close_thd(static_cast<THD*>(thd)); - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Return isolation configuration set by "innodb_api_trx_level" -@return trx isolation level*/ -UNIV_INTERN -ib_trx_state_t -ib_cfg_trx_level() -/*==============*/ -{ - return(static_cast<ib_trx_state_t>(ib_trx_level_setting)); -} - -/*****************************************************************//** -Return configure value for background commit interval (in seconds) -@return background commit interval (in seconds) */ -UNIV_INTERN -ib_ulint_t -ib_cfg_bk_commit_interval() -/*=======================*/ -{ - return(static_cast<ib_ulint_t>(ib_bk_commit_interval)); -} - -/*****************************************************************//** -Get generic configure status -@return configure status*/ -UNIV_INTERN -int -ib_cfg_get_cfg() -/*============*/ -{ - int cfg_status; - - cfg_status = (ib_binlog_enabled) ? IB_CFG_BINLOG_ENABLED : 0; - - if (ib_mdl_enabled) { - cfg_status |= IB_CFG_MDL_ENABLED; - } - - if (ib_disable_row_lock) { - cfg_status |= IB_CFG_DISABLE_ROWLOCK; - } - - return(cfg_status); -} - -/*****************************************************************//** -Increase/decrease the memcached sync count of table to sync memcached -DML with SQL DDLs. -@return DB_SUCCESS or error number */ -UNIV_INTERN -ib_err_t -ib_cursor_set_memcached_sync( -/*=========================*/ - ib_crsr_t ib_crsr, /*!< in: cursor */ - ib_bool_t flag) /*!< in: true for increase */ -{ - const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr; - row_prebuilt_t* prebuilt = cursor->prebuilt; - dict_table_t* table = prebuilt->table; - ib_err_t err = DB_SUCCESS; - - if (table != NULL) { - /* If memcached_sync_count is -1, means table is - doing DDL, we just return error. */ - if (table->memcached_sync_count == DICT_TABLE_IN_DDL) { - return(DB_ERROR); - } - - if (flag) { -#ifdef HAVE_ATOMIC_BUILTINS - os_atomic_increment_lint(&table->memcached_sync_count, 1); -#else - dict_mutex_enter_for_mysql(); - ++table->memcached_sync_count; - dict_mutex_exit_for_mysql(); -#endif - } else { -#ifdef HAVE_ATOMIC_BUILTINS - os_atomic_decrement_lint(&table->memcached_sync_count, 1); -#else - dict_mutex_enter_for_mysql(); - --table->memcached_sync_count; - dict_mutex_exit_for_mysql(); -#endif - ut_a(table->memcached_sync_count >= 0); - } - } else { - err = DB_TABLE_NOT_FOUND; - } - - return(err); -} diff --git a/storage/xtradb/api/api0misc.cc b/storage/xtradb/api/api0misc.cc deleted file mode 100644 index 5daee5de4c9..00000000000 --- a/storage/xtradb/api/api0misc.cc +++ /dev/null @@ -1,203 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file api/api0misc.cc -InnoDB Native API - -2008-08-01 Created by Sunny Bains -3/20/2011 Jimmy Yang extracted from Embedded InnoDB -*******************************************************/ - -#include <my_config.h> -#include <errno.h> - -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif /* HAVE_UNISTD_H */ - -#include "api0misc.h" -#include "trx0roll.h" -#include "srv0srv.h" -#include "dict0mem.h" -#include "dict0dict.h" -#include "pars0pars.h" -#include "row0sel.h" -#include "lock0lock.h" -#include "ha_prototypes.h" -#include <m_ctype.h> -#include <mysys_err.h> -#include <mysql/plugin.h> - -/*********************************************************************//** -Sets a lock on a table. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -ib_trx_lock_table_with_retry( -/*=========================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_table_t* table, /*!< in: table to lock */ - enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */ -{ - que_thr_t* thr; - dberr_t err; - mem_heap_t* heap; - sel_node_t* node; - - heap = mem_heap_create(512); - - trx->op_info = "setting table lock"; - - node = sel_node_create(heap); - thr = pars_complete_graph_for_exec(node, trx, heap); - thr->graph->state = QUE_FORK_ACTIVE; - - /* We use the select query graph as the dummy graph needed - in the lock module call */ - - thr = que_fork_get_first_thr(static_cast<que_fork_t*>( - que_node_get_parent(thr))); - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - err = lock_table(0, table, mode, thr); - - trx->error_state = err; - - if (UNIV_LIKELY(err == DB_SUCCESS)) { - que_thr_stop_for_mysql_no_error(thr, trx); - } else { - que_thr_stop_for_mysql(thr); - - if (err != DB_QUE_THR_SUSPENDED) { - ibool was_lock_wait; - - was_lock_wait = ib_handle_errors(&err, trx, thr, NULL); - - if (was_lock_wait) { - goto run_again; - } - } else { - que_thr_t* run_thr; - que_node_t* parent; - - parent = que_node_get_parent(thr); - run_thr = que_fork_start_command( - static_cast<que_fork_t*>(parent)); - - ut_a(run_thr == thr); - - /* There was a lock wait but the thread was not - in a ready to run or running state. */ - trx->error_state = DB_LOCK_WAIT; - - goto run_again; - } - } - - que_graph_free(thr->graph); - trx->op_info = ""; - - return(err); -} -/****************************************************************//** -Handles user errors and lock waits detected by the database engine. -@return TRUE if it was a lock wait and we should continue running -the query thread */ -UNIV_INTERN -ibool -ib_handle_errors( -/*=============*/ - dberr_t* new_err,/*!< out: possible new error encountered in - lock wait, or if no new error, the value - of trx->error_state at the entry of this - function */ - trx_t* trx, /*!< in: transaction */ - que_thr_t* thr, /*!< in: query thread */ - trx_savept_t* savept) /*!< in: savepoint or NULL */ -{ - dberr_t err; -handle_new_error: - err = trx->error_state; - - ut_a(err != DB_SUCCESS); - - trx->error_state = DB_SUCCESS; - - switch (err) { - case DB_LOCK_WAIT_TIMEOUT: - trx_rollback_for_mysql(trx); - break; - /* fall through */ - case DB_DUPLICATE_KEY: - case DB_FOREIGN_DUPLICATE_KEY: - case DB_TOO_BIG_RECORD: - case DB_ROW_IS_REFERENCED: - case DB_NO_REFERENCED_ROW: - case DB_CANNOT_ADD_CONSTRAINT: - case DB_TOO_MANY_CONCURRENT_TRXS: - case DB_OUT_OF_FILE_SPACE: - if (savept) { - /* Roll back the latest, possibly incomplete - insertion or update */ - - trx_rollback_to_savepoint(trx, savept); - } - break; - case DB_LOCK_WAIT: - lock_wait_suspend_thread(thr); - - if (trx->error_state != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - goto handle_new_error; - } - - *new_err = err; - - return(TRUE); /* Operation needs to be retried. */ - - case DB_DEADLOCK: - case DB_LOCK_TABLE_FULL: - /* Roll back the whole transaction; this resolution was added - to version 3.23.43 */ - - trx_rollback_for_mysql(trx); - break; - - case DB_CORRUPTION: - case DB_FOREIGN_EXCEED_MAX_CASCADE: - break; - default: - ut_error; - } - - if (trx->error_state != DB_SUCCESS) { - *new_err = trx->error_state; - } else { - *new_err = err; - } - - trx->error_state = DB_SUCCESS; - - return(FALSE); -} diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc deleted file mode 100644 index 85a083aaee0..00000000000 --- a/storage/xtradb/btr/btr0btr.cc +++ /dev/null @@ -1,5330 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file btr/btr0btr.cc -The B-tree - -Created 6/2/1994 Heikki Tuuri -*******************************************************/ - -#include "btr0btr.h" - -#ifdef UNIV_NONINL -#include "btr0btr.ic" -#endif - -#include "fsp0fsp.h" -#include "page0page.h" -#include "page0zip.h" - -#ifndef UNIV_HOTBACKUP -#include "btr0cur.h" -#include "btr0sea.h" -#include "btr0pcur.h" -#include "btr0defragment.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "ibuf0ibuf.h" -#include "trx0trx.h" -#include "srv0mon.h" - -/**************************************************************//** -Checks if the page in the cursor can be merged with given page. -If necessary, re-organize the merge_page. -@return TRUE if possible to merge. */ -UNIV_INTERN -ibool -btr_can_merge_with_page( -/*====================*/ - btr_cur_t* cursor, /*!< in: cursor on the page to merge */ - ulint page_no, /*!< in: a sibling page */ - buf_block_t** merge_block, /*!< out: the merge block */ - mtr_t* mtr); /*!< in: mini-transaction */ - -#endif /* UNIV_HOTBACKUP */ - -/**************************************************************//** -Report that an index page is corrupted. */ -UNIV_INTERN -void -btr_corruption_report( -/*==================*/ - const buf_block_t* block, /*!< in: corrupted block */ - const dict_index_t* index) /*!< in: index tree */ -{ - fprintf(stderr, "InnoDB: flag mismatch in space %u page %u" - " index %s of table %s\n", - (unsigned) buf_block_get_space(block), - (unsigned) buf_block_get_page_no(block), - index->name, index->table_name); - if (block->page.zip.data) { - buf_page_print(block->page.zip.data, - buf_block_get_zip_size(block), - BUF_PAGE_PRINT_NO_CRASH); - } - buf_page_print(buf_nonnull_block_get_frame(block), 0, 0); -} - -#ifndef UNIV_HOTBACKUP -#ifdef UNIV_BLOB_DEBUG -# include "srv0srv.h" -# include "ut0rbt.h" - -/** TRUE when messages about index->blobs modification are enabled. */ -static ibool btr_blob_dbg_msg; - -/** Issue a message about an operation on index->blobs. -@param op operation -@param b the entry being subjected to the operation -@param ctx the context of the operation */ -#define btr_blob_dbg_msg_issue(op, b, ctx) \ - fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n", \ - (b)->ref_page_no, (b)->ref_heap_no, \ - (b)->ref_field_no, (b)->blob_page_no, ctx, \ - (b)->owner, (b)->always_owner, (b)->del) - -/** Insert to index->blobs a reference to an off-page column. -@param index the index tree -@param b the reference -@param ctx context (for logging) */ -UNIV_INTERN -void -btr_blob_dbg_rbt_insert( -/*====================*/ - dict_index_t* index, /*!< in/out: index tree */ - const btr_blob_dbg_t* b, /*!< in: the reference */ - const char* ctx) /*!< in: context (for logging) */ -{ - if (btr_blob_dbg_msg) { - btr_blob_dbg_msg_issue("insert", b, ctx); - } - mutex_enter(&index->blobs_mutex); - rbt_insert(index->blobs, b, b); - mutex_exit(&index->blobs_mutex); -} - -/** Remove from index->blobs a reference to an off-page column. -@param index the index tree -@param b the reference -@param ctx context (for logging) */ -UNIV_INTERN -void -btr_blob_dbg_rbt_delete( -/*====================*/ - dict_index_t* index, /*!< in/out: index tree */ - const btr_blob_dbg_t* b, /*!< in: the reference */ - const char* ctx) /*!< in: context (for logging) */ -{ - if (btr_blob_dbg_msg) { - btr_blob_dbg_msg_issue("delete", b, ctx); - } - mutex_enter(&index->blobs_mutex); - ut_a(rbt_delete(index->blobs, b)); - mutex_exit(&index->blobs_mutex); -} - -/**************************************************************//** -Comparator for items (btr_blob_dbg_t) in index->blobs. -The key in index->blobs is (ref_page_no, ref_heap_no, ref_field_no). -@return negative, 0 or positive if *a<*b, *a=*b, *a>*b */ -static -int -btr_blob_dbg_cmp( -/*=============*/ - const void* a, /*!< in: first btr_blob_dbg_t to compare */ - const void* b) /*!< in: second btr_blob_dbg_t to compare */ -{ - const btr_blob_dbg_t* aa = static_cast<const btr_blob_dbg_t*>(a); - const btr_blob_dbg_t* bb = static_cast<const btr_blob_dbg_t*>(b); - - ut_ad(aa != NULL); - ut_ad(bb != NULL); - - if (aa->ref_page_no != bb->ref_page_no) { - return(aa->ref_page_no < bb->ref_page_no ? -1 : 1); - } - if (aa->ref_heap_no != bb->ref_heap_no) { - return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1); - } - if (aa->ref_field_no != bb->ref_field_no) { - return(aa->ref_field_no < bb->ref_field_no ? -1 : 1); - } - return(0); -} - -/**************************************************************//** -Add a reference to an off-page column to the index->blobs map. */ -UNIV_INTERN -void -btr_blob_dbg_add_blob( -/*==================*/ - const rec_t* rec, /*!< in: clustered index record */ - ulint field_no, /*!< in: off-page column number */ - ulint page_no, /*!< in: start page of the column */ - dict_index_t* index, /*!< in/out: index tree */ - const char* ctx) /*!< in: context (for logging) */ -{ - btr_blob_dbg_t b; - const page_t* page = page_align(rec); - - ut_a(index->blobs); - - b.blob_page_no = page_no; - b.ref_page_no = page_get_page_no(page); - b.ref_heap_no = page_rec_get_heap_no(rec); - b.ref_field_no = field_no; - ut_a(b.ref_field_no >= index->n_uniq); - b.always_owner = b.owner = TRUE; - b.del = FALSE; - ut_a(!rec_get_deleted_flag(rec, page_is_comp(page))); - btr_blob_dbg_rbt_insert(index, &b, ctx); -} - -/**************************************************************//** -Add to index->blobs any references to off-page columns from a record. -@return number of references added */ -UNIV_INTERN -ulint -btr_blob_dbg_add_rec( -/*=================*/ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in/out: index */ - const ulint* offsets,/*!< in: offsets */ - const char* ctx) /*!< in: context (for logging) */ -{ - ulint count = 0; - ulint i; - btr_blob_dbg_t b; - ibool del; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (!rec_offs_any_extern(offsets)) { - return(0); - } - - b.ref_page_no = page_get_page_no(page_align(rec)); - b.ref_heap_no = page_rec_get_heap_no(rec); - del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0); - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - if (rec_offs_nth_extern(offsets, i)) { - ulint len; - const byte* field_ref = rec_get_nth_field( - rec, offsets, i, &len); - - ut_a(len != UNIV_SQL_NULL); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; - - if (!memcmp(field_ref, field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE)) { - /* the column has not been stored yet */ - continue; - } - - b.ref_field_no = i; - b.blob_page_no = mach_read_from_4( - field_ref + BTR_EXTERN_PAGE_NO); - ut_a(b.ref_field_no >= index->n_uniq); - b.always_owner = b.owner - = !(field_ref[BTR_EXTERN_LEN] - & BTR_EXTERN_OWNER_FLAG); - b.del = del; - - btr_blob_dbg_rbt_insert(index, &b, ctx); - count++; - } - } - - return(count); -} - -/**************************************************************//** -Display the references to off-page columns. -This function is to be called from a debugger, -for example when a breakpoint on ut_dbg_assertion_failed is hit. */ -UNIV_INTERN -void -btr_blob_dbg_print( -/*===============*/ - const dict_index_t* index) /*!< in: index tree */ -{ - const ib_rbt_node_t* node; - - if (!index->blobs) { - return; - } - - /* We intentionally do not acquire index->blobs_mutex here. - This function is to be called from a debugger, and the caller - should make sure that the index->blobs_mutex is held. */ - - for (node = rbt_first(index->blobs); - node != NULL; node = rbt_next(index->blobs, node)) { - const btr_blob_dbg_t* b - = rbt_value(btr_blob_dbg_t, node); - fprintf(stderr, "%u:%u:%u->%u%s%s%s\n", - b->ref_page_no, b->ref_heap_no, b->ref_field_no, - b->blob_page_no, - b->owner ? "" : "(disowned)", - b->always_owner ? "" : "(has disowned)", - b->del ? "(deleted)" : ""); - } -} - -/**************************************************************//** -Remove from index->blobs any references to off-page columns from a record. -@return number of references removed */ -UNIV_INTERN -ulint -btr_blob_dbg_remove_rec( -/*====================*/ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in/out: index */ - const ulint* offsets,/*!< in: offsets */ - const char* ctx) /*!< in: context (for logging) */ -{ - ulint i; - ulint count = 0; - btr_blob_dbg_t b; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (!rec_offs_any_extern(offsets)) { - return(0); - } - - b.ref_page_no = page_get_page_no(page_align(rec)); - b.ref_heap_no = page_rec_get_heap_no(rec); - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - if (rec_offs_nth_extern(offsets, i)) { - ulint len; - const byte* field_ref = rec_get_nth_field( - rec, offsets, i, &len); - - ut_a(len != UNIV_SQL_NULL); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; - - b.ref_field_no = i; - b.blob_page_no = mach_read_from_4( - field_ref + BTR_EXTERN_PAGE_NO); - - switch (b.blob_page_no) { - case 0: - /* The column has not been stored yet. - The BLOB pointer must be all zero. - There cannot be a BLOB starting at - page 0, because page 0 is reserved for - the tablespace header. */ - ut_a(!memcmp(field_ref, field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE)); - /* fall through */ - case FIL_NULL: - /* the column has been freed already */ - continue; - } - - btr_blob_dbg_rbt_delete(index, &b, ctx); - count++; - } - } - - return(count); -} - -/**************************************************************//** -Check that there are no references to off-page columns from or to -the given page. Invoked when freeing or clearing a page. -@return TRUE when no orphan references exist */ -UNIV_INTERN -ibool -btr_blob_dbg_is_empty( -/*==================*/ - dict_index_t* index, /*!< in: index */ - ulint page_no) /*!< in: page number */ -{ - const ib_rbt_node_t* node; - ibool success = TRUE; - - if (!index->blobs) { - return(success); - } - - mutex_enter(&index->blobs_mutex); - - for (node = rbt_first(index->blobs); - node != NULL; node = rbt_next(index->blobs, node)) { - const btr_blob_dbg_t* b - = rbt_value(btr_blob_dbg_t, node); - - if (b->ref_page_no != page_no && b->blob_page_no != page_no) { - continue; - } - - fprintf(stderr, - "InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n", - b->owner ? "" : "(disowned)", - b->always_owner ? "" : "(has disowned)", - b->del ? "(deleted)" : "", - b->ref_page_no, b->ref_heap_no, b->ref_field_no, - b->blob_page_no); - - if (b->blob_page_no != page_no || b->owner || !b->del) { - success = FALSE; - } - } - - mutex_exit(&index->blobs_mutex); - return(success); -} - -/**************************************************************//** -Count and process all references to off-page columns on a page. -@return number of references processed */ -UNIV_INTERN -ulint -btr_blob_dbg_op( -/*============*/ - const page_t* page, /*!< in: B-tree leaf page */ - const rec_t* rec, /*!< in: record to start from - (NULL to process the whole page) */ - dict_index_t* index, /*!< in/out: index */ - const char* ctx, /*!< in: context (for logging) */ - const btr_blob_dbg_op_f op) /*!< in: operation on records */ -{ - ulint count = 0; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_a(!rec || page_align(rec) == page); - - if (!index->blobs || !page_is_leaf(page) - || !dict_index_is_clust(index)) { - return(0); - } - - if (rec == NULL) { - rec = page_get_infimum_rec(page); - } - - do { - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - count += op(rec, index, offsets, ctx); - rec = page_rec_get_next_const(rec); - } while (!page_rec_is_supremum(rec)); - - if (heap) { - mem_heap_free(heap); - } - - return(count); -} - -/**************************************************************//** -Count and add to index->blobs any references to off-page columns -from records on a page. -@return number of references added */ -UNIV_INTERN -ulint -btr_blob_dbg_add( -/*=============*/ - const page_t* page, /*!< in: rewritten page */ - dict_index_t* index, /*!< in/out: index */ - const char* ctx) /*!< in: context (for logging) */ -{ - btr_blob_dbg_assert_empty(index, page_get_page_no(page)); - - return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec)); -} - -/**************************************************************//** -Count and remove from index->blobs any references to off-page columns -from records on a page. -Used when reorganizing a page, before copying the records. -@return number of references removed */ -UNIV_INTERN -ulint -btr_blob_dbg_remove( -/*================*/ - const page_t* page, /*!< in: b-tree page */ - dict_index_t* index, /*!< in/out: index */ - const char* ctx) /*!< in: context (for logging) */ -{ - ulint count; - - count = btr_blob_dbg_op(page, NULL, index, ctx, - btr_blob_dbg_remove_rec); - - /* Check that no references exist. */ - btr_blob_dbg_assert_empty(index, page_get_page_no(page)); - - return(count); -} - -/**************************************************************//** -Restore in index->blobs any references to off-page columns -Used when page reorganize fails due to compressed page overflow. */ -UNIV_INTERN -void -btr_blob_dbg_restore( -/*=================*/ - const page_t* npage, /*!< in: page that failed to compress */ - const page_t* page, /*!< in: copy of original page */ - dict_index_t* index, /*!< in/out: index */ - const char* ctx) /*!< in: context (for logging) */ -{ - ulint removed; - ulint added; - - ut_a(page_get_page_no(npage) == page_get_page_no(page)); - ut_a(page_get_space_id(npage) == page_get_space_id(page)); - - removed = btr_blob_dbg_remove(npage, index, ctx); - added = btr_blob_dbg_add(page, index, ctx); - ut_a(added == removed); -} - -/**************************************************************//** -Modify the 'deleted' flag of a record. */ -UNIV_INTERN -void -btr_blob_dbg_set_deleted_flag( -/*==========================*/ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in/out: index */ - const ulint* offsets,/*!< in: rec_get_offs(rec, index) */ - ibool del) /*!< in: TRUE=deleted, FALSE=exists */ -{ - const ib_rbt_node_t* node; - btr_blob_dbg_t b; - btr_blob_dbg_t* c; - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_a(dict_index_is_clust(index)); - ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */ - - if (!rec_offs_any_extern(offsets) || !index->blobs) { - - return; - } - - b.ref_page_no = page_get_page_no(page_align(rec)); - b.ref_heap_no = page_rec_get_heap_no(rec); - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - if (rec_offs_nth_extern(offsets, i)) { - ulint len; - const byte* field_ref = rec_get_nth_field( - rec, offsets, i, &len); - - ut_a(len != UNIV_SQL_NULL); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; - - b.ref_field_no = i; - b.blob_page_no = mach_read_from_4( - field_ref + BTR_EXTERN_PAGE_NO); - - switch (b.blob_page_no) { - case 0: - ut_a(memcmp(field_ref, field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE)); - /* page number 0 is for the - page allocation bitmap */ - case FIL_NULL: - /* the column has been freed already */ - ut_error; - } - - mutex_enter(&index->blobs_mutex); - node = rbt_lookup(index->blobs, &b); - ut_a(node); - - c = rbt_value(btr_blob_dbg_t, node); - /* The flag should be modified. */ - c->del = del; - if (btr_blob_dbg_msg) { - b = *c; - mutex_exit(&index->blobs_mutex); - btr_blob_dbg_msg_issue("del_mk", &b, ""); - } else { - mutex_exit(&index->blobs_mutex); - } - } - } -} - -/**************************************************************//** -Change the ownership of an off-page column. */ -UNIV_INTERN -void -btr_blob_dbg_owner( -/*===============*/ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in/out: index */ - const ulint* offsets,/*!< in: rec_get_offs(rec, index) */ - ulint i, /*!< in: ith field in rec */ - ibool own) /*!< in: TRUE=owned, FALSE=disowned */ -{ - const ib_rbt_node_t* node; - btr_blob_dbg_t b; - const byte* field_ref; - ulint len; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_a(rec_offs_nth_extern(offsets, i)); - - field_ref = rec_get_nth_field(rec, offsets, i, &len); - ut_a(len != UNIV_SQL_NULL); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; - - b.ref_page_no = page_get_page_no(page_align(rec)); - b.ref_heap_no = page_rec_get_heap_no(rec); - b.ref_field_no = i; - b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG); - b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO); - - ut_a(b.owner == own); - - mutex_enter(&index->blobs_mutex); - node = rbt_lookup(index->blobs, &b); - /* row_ins_clust_index_entry_by_modify() invokes - btr_cur_unmark_extern_fields() also for the newly inserted - references, which are all zero bytes until the columns are stored. - The node lookup must fail if and only if that is the case. */ - ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE) - == !node); - - if (node) { - btr_blob_dbg_t* c = rbt_value(btr_blob_dbg_t, node); - /* Some code sets ownership from TRUE to TRUE. - We do not allow changing ownership from FALSE to FALSE. */ - ut_a(own || c->owner); - - c->owner = own; - if (!own) { - c->always_owner = FALSE; - } - } - - mutex_exit(&index->blobs_mutex); -} -#endif /* UNIV_BLOB_DEBUG */ - -/* -Latching strategy of the InnoDB B-tree --------------------------------------- -A tree latch protects all non-leaf nodes of the tree. Each node of a tree -also has a latch of its own. - -A B-tree operation normally first acquires an S-latch on the tree. It -searches down the tree and releases the tree latch when it has the -leaf node latch. To save CPU time we do not acquire any latch on -non-leaf nodes of the tree during a search, those pages are only bufferfixed. - -If an operation needs to restructure the tree, it acquires an X-latch on -the tree before searching to a leaf node. If it needs, for example, to -split a leaf, -(1) InnoDB decides the split point in the leaf, -(2) allocates a new page, -(3) inserts the appropriate node pointer to the first non-leaf level, -(4) releases the tree X-latch, -(5) and then moves records from the leaf to the new allocated page. - -Node pointers -------------- -Leaf pages of a B-tree contain the index records stored in the -tree. On levels n > 0 we store 'node pointers' to pages on level -n - 1. For each page there is exactly one node pointer stored: -thus the our tree is an ordinary B-tree, not a B-link tree. - -A node pointer contains a prefix P of an index record. The prefix -is long enough so that it determines an index record uniquely. -The file page number of the child page is added as the last -field. To the child page we can store node pointers or index records -which are >= P in the alphabetical order, but < P1 if there is -a next node pointer on the level, and P1 is its prefix. - -If a node pointer with a prefix P points to a non-leaf child, -then the leftmost record in the child must have the same -prefix P. If it points to a leaf node, the child is not required -to contain any record with a prefix equal to P. The leaf case -is decided this way to allow arbitrary deletions in a leaf node -without touching upper levels of the tree. - -We have predefined a special minimum record which we -define as the smallest record in any alphabetical order. -A minimum record is denoted by setting a bit in the record -header. A minimum record acts as the prefix of a node pointer -which points to a leftmost node on any level of the tree. - -File page allocation --------------------- -In the root node of a B-tree there are two file segment headers. -The leaf pages of a tree are allocated from one file segment, to -make them consecutive on disk if possible. From the other file segment -we allocate pages for the non-leaf levels of the tree. -*/ - -#ifdef UNIV_BTR_DEBUG -/**************************************************************//** -Checks a file segment header within a B-tree root page. -@return TRUE if valid */ -static -ibool -btr_root_fseg_validate( -/*===================*/ - const fseg_header_t* seg_header, /*!< in: segment header */ - ulint space) /*!< in: tablespace identifier */ -{ - ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET); - - if (UNIV_UNLIKELY(srv_pass_corrupt_table != 0)) { - return (mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space) - && (offset >= FIL_PAGE_DATA) - && (offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END); - } - - ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space); - ut_a(offset >= FIL_PAGE_DATA); - ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END); - return(TRUE); -} -#endif /* UNIV_BTR_DEBUG */ - -/**************************************************************//** -Gets the root node of a tree and x- or s-latches it. -@return root page, x- or s-latched */ -buf_block_t* -btr_root_block_get( -/*===============*/ - const dict_index_t* index, /*!< in: index tree */ - ulint mode, /*!< in: either RW_S_LATCH - or RW_X_LATCH */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint space; - ulint zip_size; - ulint root_page_no; - buf_block_t* block; - - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - root_page_no = dict_index_get_page(index); - - block = btr_block_get(space, zip_size, root_page_no, mode, (dict_index_t*)index, mtr); - - if (!block) { - if (index && index->table) { - index->table->file_unreadable = true; - - ib_push_warning( - static_cast<THD*>(NULL), DB_DECRYPTION_FAILED, - "Table %s in tablespace %lu is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name, space); - } - - return NULL; - } - - SRV_CORRUPT_TABLE_CHECK(block, return(0);); - - btr_assert_not_corrupted(block, index); - -#ifdef UNIV_BTR_DEBUG - if (!dict_index_is_ibuf(index)) { - const page_t* root = buf_block_get_frame(block); - - if (UNIV_UNLIKELY(srv_pass_corrupt_table != 0)) { - if (!btr_root_fseg_validate(FIL_PAGE_DATA - + PAGE_BTR_SEG_LEAF - + root, space)) - return(NULL); - if (!btr_root_fseg_validate(FIL_PAGE_DATA - + PAGE_BTR_SEG_TOP - + root, space)) - return(NULL); - return(block); - } - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root, space)); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, space)); - } -#endif /* UNIV_BTR_DEBUG */ - - return(block); -} - -/**************************************************************//** -Gets the root node of a tree and x-latches it. -@return root page, x-latched */ -UNIV_INTERN -page_t* -btr_root_get( -/*=========*/ - const dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* root = btr_root_block_get(index, RW_X_LATCH, - mtr); - - if (root && root->page.encrypted == true) { - root = NULL; - } - - return(root ? buf_block_get_frame(root) : NULL); -} - -/**************************************************************//** -Gets the height of the B-tree (the level of the root, when the leaf -level is assumed to be 0). The caller must hold an S or X latch on -the index. -@return tree height (level of the root) */ -UNIV_INTERN -ulint -btr_height_get( -/*===========*/ - dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint height=0; - buf_block_t* root_block; - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_S_LOCK) - || mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - - /* S latches the page */ - root_block = btr_root_block_get(index, RW_S_LATCH, mtr); - ut_ad(root_block); // The index must not be corrupted - - if (root_block) { - - height = btr_page_get_level(buf_nonnull_block_get_frame(root_block), - mtr); - /* Release the S latch on the root page. */ - mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX); -#ifdef UNIV_SYNC_DEBUG - sync_thread_reset_level(&root_block->lock); -#endif /* UNIV_SYNC_DEBUG */ - } - - return(height); -} - -/**************************************************************//** -Checks a file segment header within a B-tree root page and updates -the segment header space id. -@return TRUE if valid */ -static -bool -btr_root_fseg_adjust_on_import( -/*===========================*/ - fseg_header_t* seg_header, /*!< in/out: segment header */ - page_zip_des_t* page_zip, /*!< in/out: compressed page, - or NULL */ - ulint space, /*!< in: tablespace identifier */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET); - - if (offset < FIL_PAGE_DATA - || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) { - - return(FALSE); - - } else if (page_zip) { - mach_write_to_4(seg_header + FSEG_HDR_SPACE, space); - page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE, - 4, mtr); - } else { - mlog_write_ulint(seg_header + FSEG_HDR_SPACE, - space, MLOG_4BYTES, mtr); - } - - return(TRUE); -} - -/**************************************************************//** -Checks and adjusts the root node of a tree during IMPORT TABLESPACE. -@return error code, or DB_SUCCESS */ -UNIV_INTERN -dberr_t -btr_root_adjust_on_import( -/*======================*/ - const dict_index_t* index) /*!< in: index tree */ -{ - dberr_t err; - mtr_t mtr; - page_t* page; - buf_block_t* block; - page_zip_des_t* page_zip; - dict_table_t* table = index->table; - ulint space_id = dict_index_get_space(index); - ulint zip_size = dict_table_zip_size(table); - ulint root_page_no = dict_index_get_page(index); - - mtr_start(&mtr); - - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - - DBUG_EXECUTE_IF("ib_import_trigger_corruption_3", - return(DB_CORRUPTION);); - - block = btr_block_get( - space_id, zip_size, root_page_no, RW_X_LATCH, (dict_index_t*)index, &mtr); - - page = buf_block_get_frame(block); - page_zip = buf_block_get_page_zip(block); - - /* Check that this is a B-tree page and both the PREV and NEXT - pointers are FIL_NULL, because the root page does not have any - siblings. */ - if (fil_page_get_type(page) != FIL_PAGE_INDEX - || fil_page_get_prev(page) != FIL_NULL - || fil_page_get_next(page) != FIL_NULL) { - - err = DB_CORRUPTION; - - } else if (dict_index_is_clust(index)) { - bool page_is_compact_format; - - page_is_compact_format = page_is_comp(page) > 0; - - /* Check if the page format and table format agree. */ - if (page_is_compact_format != dict_table_is_comp(table)) { - err = DB_CORRUPTION; - } else { - - /* Check that the table flags and the tablespace - flags match. */ - ulint flags = fil_space_get_flags(table->space); - - if (flags - && flags != dict_tf_to_fsp_flags(table->flags)) { - - err = DB_CORRUPTION; - } else { - err = DB_SUCCESS; - } - } - } else { - err = DB_SUCCESS; - } - - /* Check and adjust the file segment headers, if all OK so far. */ - if (err == DB_SUCCESS - && (!btr_root_fseg_adjust_on_import( - FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + page, page_zip, space_id, &mtr) - || !btr_root_fseg_adjust_on_import( - FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + page, page_zip, space_id, &mtr))) { - - err = DB_CORRUPTION; - } - - mtr_commit(&mtr); - - return(err); -} - -/*************************************************************//** -Gets pointer to the previous user record in the tree. It is assumed that -the caller has appropriate latches on the page and its neighbor. -@return previous user record, NULL if there is none */ -UNIV_INTERN -rec_t* -btr_get_prev_user_rec( -/*==================*/ - rec_t* rec, /*!< in: record on leaf level */ - mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if - needed, also to the previous page */ -{ - page_t* page; - page_t* prev_page; - ulint prev_page_no; - - if (!page_rec_is_infimum(rec)) { - - rec_t* prev_rec = page_rec_get_prev(rec); - - if (!page_rec_is_infimum(prev_rec)) { - - return(prev_rec); - } - } - - page = page_align(rec); - prev_page_no = btr_page_get_prev(page, mtr); - - if (prev_page_no != FIL_NULL) { - - ulint space; - ulint zip_size; - buf_block_t* prev_block; - - space = page_get_space_id(page); - zip_size = fil_space_get_zip_size(space); - - prev_block = buf_page_get_with_no_latch(space, zip_size, - prev_page_no, mtr); - prev_page = buf_block_get_frame(prev_block); - /* The caller must already have a latch to the brother */ - ut_ad(mtr_memo_contains(mtr, prev_block, - MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains(mtr, prev_block, - MTR_MEMO_PAGE_X_FIX)); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(prev_page) == page_is_comp(page)); - ut_a(btr_page_get_next(prev_page, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - return(page_rec_get_prev(page_get_supremum_rec(prev_page))); - } - - return(NULL); -} - -/*************************************************************//** -Gets pointer to the next user record in the tree. It is assumed that the -caller has appropriate latches on the page and its neighbor. -@return next user record, NULL if there is none */ -UNIV_INTERN -rec_t* -btr_get_next_user_rec( -/*==================*/ - rec_t* rec, /*!< in: record on leaf level */ - mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if - needed, also to the next page */ -{ - page_t* page; - page_t* next_page; - ulint next_page_no; - - if (!page_rec_is_supremum(rec)) { - - rec_t* next_rec = page_rec_get_next(rec); - - if (!page_rec_is_supremum(next_rec)) { - - return(next_rec); - } - } - - page = page_align(rec); - next_page_no = btr_page_get_next(page, mtr); - - if (next_page_no != FIL_NULL) { - ulint space; - ulint zip_size; - buf_block_t* next_block; - - space = page_get_space_id(page); - zip_size = fil_space_get_zip_size(space); - - next_block = buf_page_get_with_no_latch(space, zip_size, - next_page_no, mtr); - next_page = buf_block_get_frame(next_block); - /* The caller must already have a latch to the brother */ - ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains(mtr, next_block, - MTR_MEMO_PAGE_X_FIX)); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_page) == page_is_comp(page)); - ut_a(btr_page_get_prev(next_page, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - return(page_rec_get_next(page_get_infimum_rec(next_page))); - } - - return(NULL); -} - -/**************************************************************//** -Creates a new index page (not the root, and also not -used in page reorganization). @see btr_page_empty(). */ -UNIV_INTERN -void -btr_page_create( -/*============*/ - buf_block_t* block, /*!< in/out: page to be created */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: the B-tree level of the page */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* page = buf_block_get_frame(block); - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block)); - - if (page_zip) { - page_create_zip(block, index, level, 0, mtr); - } else { - page_create(block, mtr, dict_table_is_comp(index->table)); - /* Set the level of the new index page */ - btr_page_set_level(page, NULL, level, mtr); - } - - block->check_index_page_at_flush = TRUE; - - btr_page_set_index_id(page, page_zip, index->id, mtr); -} - -/**************************************************************//** -Allocates a new file page to be used in an ibuf tree. Takes the page from -the free list of the tree, which must contain pages! -@return new allocated block, x-latched */ -static -buf_block_t* -btr_page_alloc_for_ibuf( -/*====================*/ - dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ -{ - fil_addr_t node_addr; - page_t* root; - page_t* new_page; - buf_block_t* new_block; - - root = btr_root_get(index, mtr); - - node_addr = flst_get_first(root + PAGE_HEADER - + PAGE_BTR_IBUF_FREE_LIST, mtr); - ut_a(node_addr.page != FIL_NULL); - - new_block = buf_page_get(dict_index_get_space(index), - dict_table_zip_size(index->table), - node_addr.page, RW_X_LATCH, mtr); - new_page = buf_block_get_frame(new_block); - buf_block_dbg_add_level(new_block, SYNC_IBUF_TREE_NODE_NEW); - - flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, - mtr); - ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - mtr)); - - return(new_block); -} - -/**************************************************************//** -Allocates a new file page to be used in an index tree. NOTE: we assume -that the caller has made the reservation for free extents! -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -buf_block_t* -btr_page_alloc_low( -/*===============*/ - dict_index_t* index, /*!< in: index */ - ulint hint_page_no, /*!< in: hint of a good page */ - byte file_direction, /*!< in: direction where a possible - page split is made */ - ulint level, /*!< in: level where the page is placed - in the tree */ - mtr_t* mtr, /*!< in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr) /*!< in/out: mtr or another - mini-transaction in which the - page should be initialized. - If init_mtr!=mtr, but the page - is already X-latched in mtr, do - not initialize the page. */ -{ - fseg_header_t* seg_header; - page_t* root; - - root = btr_root_get(index, mtr); - - if (level == 0) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - } else { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - } - - /* Parameter TRUE below states that the caller has made the - reservation for free extents, and thus we know that a page can - be allocated: */ - - buf_block_t* block = fseg_alloc_free_page_general( - seg_header, hint_page_no, file_direction, - TRUE, mtr, init_mtr); - -#ifdef UNIV_DEBUG_SCRUBBING - if (block != NULL) { - fprintf(stderr, - "alloc %lu:%lu to index: %lu root: %lu\n", - buf_block_get_page_no(block), - buf_block_get_space(block), - index->id, - dict_index_get_page(index)); - } else { - fprintf(stderr, - "failed alloc index: %lu root: %lu\n", - index->id, - dict_index_get_page(index)); - } -#endif /* UNIV_DEBUG_SCRUBBING */ - - return block; -} - -/**************************************************************//** -Allocates a new file page to be used in an index tree. NOTE: we assume -that the caller has made the reservation for free extents! -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -UNIV_INTERN -buf_block_t* -btr_page_alloc( -/*===========*/ - dict_index_t* index, /*!< in: index */ - ulint hint_page_no, /*!< in: hint of a good page */ - byte file_direction, /*!< in: direction where a possible - page split is made */ - ulint level, /*!< in: level where the page is placed - in the tree */ - mtr_t* mtr, /*!< in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr) /*!< in/out: mini-transaction - for x-latching and initializing - the page */ -{ - buf_block_t* new_block; - - if (dict_index_is_ibuf(index)) { - - return(btr_page_alloc_for_ibuf(index, mtr)); - } - - new_block = btr_page_alloc_low( - index, hint_page_no, file_direction, level, mtr, init_mtr); - - if (new_block) { - buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW); - } - - return(new_block); -} - -/**************************************************************//** -Gets the number of pages in a B-tree. -@return number of pages, or ULINT_UNDEFINED if the index is unavailable */ -UNIV_INTERN -ulint -btr_get_size( -/*=========*/ - dict_index_t* index, /*!< in: index */ - ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - mtr_t* mtr) /*!< in/out: mini-transaction where index - is s-latched */ -{ - ulint used; - if (flag == BTR_N_LEAF_PAGES) { - btr_get_size_and_reserved(index, flag, &used, mtr); - return used; - } else if (flag == BTR_TOTAL_SIZE) { - return btr_get_size_and_reserved(index, flag, &used, mtr); - } else { - ut_error; - } - return (ULINT_UNDEFINED); -} - -/**************************************************************//** -Gets the number of reserved and used pages in a B-tree. -@return number of pages reserved, or ULINT_UNDEFINED if the index -is unavailable */ -UNIV_INTERN -ulint -btr_get_size_and_reserved( -/*======================*/ - dict_index_t* index, /*!< in: index */ - ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - ulint* used, /*!< out: number of pages used (<= reserved) */ - mtr_t* mtr) /*!< in/out: mini-transaction where index - is s-latched */ -{ - fseg_header_t* seg_header; - page_t* root; - ulint n=ULINT_UNDEFINED; - ulint dummy; - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_S_LOCK)); - - ut_a(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE); - - if (index->page == FIL_NULL || dict_index_is_online_ddl(index) - || *index->name == TEMP_INDEX_PREFIX) { - return(ULINT_UNDEFINED); - } - - root = btr_root_get(index, mtr); - *used = 0; - - if (root) { - - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - - n = fseg_n_reserved_pages(seg_header, used, mtr); - - if (flag == BTR_TOTAL_SIZE) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - - n += fseg_n_reserved_pages(seg_header, &dummy, mtr); - *used += dummy; - - } - } - - return(n); -} - -/**************************************************************//** -Frees a page used in an ibuf tree. Puts the page to the free list of the -ibuf tree. */ -static -void -btr_page_free_for_ibuf( -/*===================*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* root; - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - root = btr_root_get(index, mtr); - - flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - buf_block_get_frame(block) - + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); - - ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - mtr)); -} - -/**************************************************************//** -Frees a file page used in an index tree. Can be used also to (BLOB) -external storage pages, because the page level 0 can be given as an -argument. */ -UNIV_INTERN -void -btr_page_free_low( -/*==============*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - ulint level, /*!< in: page level */ - bool blob, /*!< in: blob page */ - mtr_t* mtr) /*!< in: mtr */ -{ - fseg_header_t* seg_header; - page_t* root; - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - /* The page gets invalid for optimistic searches: increment the frame - modify clock */ - - buf_block_modify_clock_inc(block); - btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block)); - - if (blob) { - ut_a(level == 0); - } - - bool scrub = srv_immediate_scrub_data_uncompressed; - /* scrub page */ - if (scrub && blob) { - /* blob page: scrub entire page */ - // TODO(jonaso): scrub only what is actually needed - page_t* page = buf_block_get_frame(block); - memset(page + PAGE_HEADER, 0, - UNIV_PAGE_SIZE - PAGE_HEADER); -#ifdef UNIV_DEBUG_SCRUBBING - fprintf(stderr, - "btr_page_free_low: scrub blob page %lu/%lu\n", - buf_block_get_space(block), - buf_block_get_page_no(block)); -#endif /* UNIV_DEBUG_SCRUBBING */ - } else if (scrub) { - /* scrub records on page */ - - /* TODO(jonaso): in theory we could clear full page - * but, since page still remains in buffer pool, and - * gets flushed etc. Lots of routines validates consistency - * of it. And in order to remain structurally consistent - * we clear each record by it own - * - * NOTE: The TODO below mentions removing page from buffer pool - * and removing redo entries, once that is done, clearing full - * pages should be possible - */ - uint cnt = 0; - uint bytes = 0; - page_t* page = buf_block_get_frame(block); - mem_heap_t* heap = NULL; - ulint* offsets = NULL; - rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); - while (!page_rec_is_supremum(rec)) { - offsets = rec_get_offsets(rec, index, - offsets, ULINT_UNDEFINED, - &heap); - uint size = rec_offs_data_size(offsets); - memset(rec, 0, size); - rec = page_rec_get_next(rec); - cnt++; - bytes += size; - } -#ifdef UNIV_DEBUG_SCRUBBING - fprintf(stderr, - "btr_page_free_low: scrub %lu/%lu - " - "%u records %u bytes\n", - buf_block_get_space(block), - buf_block_get_page_no(block), - cnt, bytes); -#endif /* UNIV_DEBUG_SCRUBBING */ - if (heap) { - mem_heap_free(heap); - } - } - -#ifdef UNIV_DEBUG_SCRUBBING - if (scrub == false) { - fprintf(stderr, - "btr_page_free_low %lu/%lu blob: %u\n", - buf_block_get_space(block), - buf_block_get_page_no(block), - blob); - } -#endif /* UNIV_DEBUG_SCRUBBING */ - - if (dict_index_is_ibuf(index)) { - - btr_page_free_for_ibuf(index, block, mtr); - - return; - } - - root = btr_root_get(index, mtr); - - if (level == 0) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - } else { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - } - - if (scrub) { - /** - * Reset page type so that scrub thread won't try to scrub it - */ - mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_ALLOCATED, MLOG_2BYTES, mtr); - } - - fseg_free_page(seg_header, - buf_block_get_space(block), - buf_block_get_page_no(block), mtr); - - /* The page was marked free in the allocation bitmap, but it - should remain buffer-fixed until mtr_commit(mtr) or until it - is explicitly freed from the mini-transaction. */ - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - /* TODO: Discard any operations on the page from the redo log - and remove the block from the flush list and the buffer pool. - This would free up buffer pool earlier and reduce writes to - both the tablespace and the redo log. */ -} - -/**************************************************************//** -Frees a file page used in an index tree. NOTE: cannot free field external -storage pages because the page must contain info on its level. */ -UNIV_INTERN -void -btr_page_free( -/*==========*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - mtr_t* mtr) /*!< in: mtr */ -{ - const page_t* page = buf_block_get_frame(block); - ulint level = btr_page_get_level(page, mtr); - - ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX); - btr_page_free_low(index, block, level, false, mtr); -} - -/**************************************************************//** -Sets the child node file address in a node pointer. */ -UNIV_INLINE -void -btr_node_ptr_set_child_page_no( -/*===========================*/ - rec_t* rec, /*!< in: node pointer record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed - part will be updated, or NULL */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint page_no,/*!< in: child node address */ - mtr_t* mtr) /*!< in: mtr */ -{ - byte* field; - ulint len; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!page_is_leaf(page_align(rec))); - ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); - - /* The child address is in the last field */ - field = rec_get_nth_field(rec, offsets, - rec_offs_n_fields(offsets) - 1, &len); - - ut_ad(len == REC_NODE_PTR_SIZE); - - if (page_zip) { - page_zip_write_node_ptr(page_zip, rec, - rec_offs_data_size(offsets), - page_no, mtr); - } else { - mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr); - } -} - -/************************************************************//** -Returns the child page of a node pointer and x-latches it. -@return child page, x-latched */ -buf_block_t* -btr_node_ptr_get_child( -/*===================*/ - const rec_t* node_ptr,/*!< in: node pointer */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint page_no; - ulint space; - - ut_ad(rec_offs_validate(node_ptr, index, offsets)); - space = page_get_space_id(page_align(node_ptr)); - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - - return(btr_block_get(space, dict_table_zip_size(index->table), - page_no, RW_X_LATCH, index, mtr)); -} - -/************************************************************//** -Returns the upper level node pointer to a page. It is assumed that mtr holds -an x-latch on the tree. -@return rec_get_offsets() of the node pointer record */ -static -ulint* -btr_page_get_father_node_ptr_func( -/*==============================*/ - ulint* offsets,/*!< in: work area for the return value */ - mem_heap_t* heap, /*!< in: memory heap to use */ - btr_cur_t* cursor, /*!< in: cursor pointing to user record, - out: cursor on node pointer record, - its page x-latched */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - dtuple_t* tuple; - rec_t* user_rec; - rec_t* node_ptr; - ulint level; - ulint page_no; - dict_index_t* index; - - page_no = buf_block_get_page_no(btr_cur_get_block(cursor)); - index = btr_cur_get_index(cursor); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - - ut_ad(dict_index_get_page(index) != page_no); - - level = btr_page_get_level(btr_cur_get_page(cursor), mtr); - - user_rec = btr_cur_get_rec(cursor); - ut_a(page_rec_is_user_rec(user_rec)); - tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level); - - btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE, - BTR_CONT_MODIFY_TREE, cursor, 0, - file, line, mtr); - - node_ptr = btr_cur_get_rec(cursor); - ut_ad(!page_rec_is_comp(node_ptr) - || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - - if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) { - rec_t* print_rec; - fputs("InnoDB: Dump of the child page:\n", stderr); - buf_page_print(page_align(user_rec), 0, - BUF_PAGE_PRINT_NO_CRASH); - fputs("InnoDB: Dump of the parent page:\n", stderr); - buf_page_print(page_align(node_ptr), 0, - BUF_PAGE_PRINT_NO_CRASH); - - fputs("InnoDB: Corruption of an index tree: table ", stderr); - ut_print_name(stderr, NULL, TRUE, index->table_name); - fputs(", index ", stderr); - ut_print_name(stderr, NULL, FALSE, index->name); - fprintf(stderr, ",\n" - "InnoDB: father ptr page no %lu, child page no %lu\n", - (ulong) - btr_node_ptr_get_child_page_no(node_ptr, offsets), - (ulong) page_no); - print_rec = page_rec_get_next( - page_get_infimum_rec(page_align(user_rec))); - offsets = rec_get_offsets(print_rec, index, - offsets, ULINT_UNDEFINED, &heap); - page_rec_print(print_rec, offsets); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - page_rec_print(node_ptr, offsets); - - fputs("InnoDB: You should dump + drop + reimport the table" - " to fix the\n" - "InnoDB: corruption. If the crash happens at " - "the database startup, see\n" - "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n" - "InnoDB: forcing recovery. " - "Then dump + drop + reimport.\n", stderr); - - ut_error; - } - - return(offsets); -} - -#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \ - btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr) - -/************************************************************//** -Returns the upper level node pointer to a page. It is assumed that mtr holds -an x-latch on the tree. -@return rec_get_offsets() of the node pointer record */ -static -ulint* -btr_page_get_father_block( -/*======================*/ - ulint* offsets,/*!< in: work area for the return value */ - mem_heap_t* heap, /*!< in: memory heap to use */ - dict_index_t* index, /*!< in: b-tree index */ - buf_block_t* block, /*!< in: child page in the index */ - mtr_t* mtr, /*!< in: mtr */ - btr_cur_t* cursor) /*!< out: cursor on node pointer record, - its page x-latched */ -{ - rec_t* rec - = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame( - block))); - btr_cur_position(index, rec, block, cursor); - return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr)); -} - -/************************************************************//** -Seeks to the upper level node pointer to a page. -It is assumed that mtr holds an x-latch on the tree. */ -static -void -btr_page_get_father( -/*================*/ - dict_index_t* index, /*!< in: b-tree index */ - buf_block_t* block, /*!< in: child page in the index */ - mtr_t* mtr, /*!< in: mtr */ - btr_cur_t* cursor) /*!< out: cursor on node pointer record, - its page x-latched */ -{ - mem_heap_t* heap; - rec_t* rec - = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame( - block))); - btr_cur_position(index, rec, block, cursor); - - heap = mem_heap_create(100); - btr_page_get_father_node_ptr(NULL, heap, cursor, mtr); - mem_heap_free(heap); -} - -/************************************************************//** -Creates the root node for a new index tree. -@return page number of the created root, FIL_NULL if did not succeed */ -UNIV_INTERN -ulint -btr_create( -/*=======*/ - ulint type, /*!< in: type of the index */ - ulint space, /*!< in: space where created */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - index_id_t index_id,/*!< in: index id */ - dict_index_t* index, /*!< in: index */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - buf_block_t* block; - page_t* page; - page_zip_des_t* page_zip; - - /* Create the two new segments (one, in the case of an ibuf tree) for - the index tree; the segment headers are put on the allocated root page - (for an ibuf tree, not in the root, but on a separate ibuf header - page) */ - - if (type & DICT_IBUF) { - /* Allocate first the ibuf header page */ - buf_block_t* ibuf_hdr_block = fseg_create( - space, 0, - IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr); - - if (ibuf_hdr_block == NULL) { - return(FIL_NULL); - } - - buf_block_dbg_add_level( - ibuf_hdr_block, SYNC_IBUF_TREE_NODE_NEW); - - ut_ad(buf_block_get_page_no(ibuf_hdr_block) - == IBUF_HEADER_PAGE_NO); - /* Allocate then the next page to the segment: it will be the - tree root page */ - - block = fseg_alloc_free_page( - buf_block_get_frame(ibuf_hdr_block) - + IBUF_HEADER + IBUF_TREE_SEG_HEADER, - IBUF_TREE_ROOT_PAGE_NO, - FSP_UP, mtr); - - if (block == NULL) { - return(FIL_NULL); - } - - ut_ad(buf_block_get_page_no(block) == IBUF_TREE_ROOT_PAGE_NO); - - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); - - flst_init(block->frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - mtr); - } else { -#ifdef UNIV_BLOB_DEBUG - if ((type & DICT_CLUSTERED) && !index->blobs) { - mutex_create(PFS_NOT_INSTRUMENTED, - &index->blobs_mutex, SYNC_ANY_LATCH); - index->blobs = rbt_create(sizeof(btr_blob_dbg_t), - btr_blob_dbg_cmp); - } -#endif /* UNIV_BLOB_DEBUG */ - block = fseg_create(space, 0, - PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr); - - if (block == NULL) { - return(FIL_NULL); - } - - buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW); - - if (!fseg_create(space, buf_block_get_page_no(block), - PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) { - /* Not enough space for new segment, free root - segment before return. */ - btr_free_root(space, zip_size, - buf_block_get_page_no(block), mtr); - return(FIL_NULL); - } - - /* The fseg create acquires a second latch on the page, - therefore we must declare it: */ - buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW); - } - - /* Create a new index page on the allocated segment page */ - page_zip = buf_block_get_page_zip(block); - - if (page_zip) { - page = page_create_zip(block, index, 0, 0, mtr); - } else { - page = page_create(block, mtr, - dict_table_is_comp(index->table)); - /* Set the level of the new index page */ - btr_page_set_level(page, NULL, 0, mtr); - } - - block->check_index_page_at_flush = TRUE; - - /* Set the index id of the page */ - btr_page_set_index_id(page, page_zip, index_id, mtr); - - /* Set the next node and previous node fields */ - btr_page_set_next(page, page_zip, FIL_NULL, mtr); - btr_page_set_prev(page, page_zip, FIL_NULL, mtr); - - /* We reset the free bits for the page to allow creation of several - trees in the same mtr, otherwise the latch on a bitmap page would - prevent it because of the latching order */ - - if (!(type & DICT_CLUSTERED)) { - ibuf_reset_free_bits(block); - } - - /* In the following assertion we test that two records of maximum - allowed size fit on the root page: this fact is needed to ensure - correctness of split algorithms */ - - ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE); - - return(buf_block_get_page_no(block)); -} - -/************************************************************//** -Frees a B-tree except the root page, which MUST be freed after this -by calling btr_free_root. */ -UNIV_INTERN -void -btr_free_but_not_root( -/*==================*/ - ulint space, /*!< in: space where created */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint root_page_no) /*!< in: root page number */ -{ - ibool finished; - page_t* root; - mtr_t mtr; - -leaf_loop: - mtr_start(&mtr); - - root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, - NULL, &mtr); - - if (!root) { - mtr_commit(&mtr); - return; - } - - SRV_CORRUPT_TABLE_CHECK(root, - { - mtr_commit(&mtr); - return; - }); - -#ifdef UNIV_BTR_DEBUG - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root, space)); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, space)); -#endif /* UNIV_BTR_DEBUG */ - - /* NOTE: page hash indexes are dropped when a page is freed inside - fsp0fsp. */ - - finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF, - &mtr); - mtr_commit(&mtr); - - if (!finished) { - - goto leaf_loop; - } -top_loop: - mtr_start(&mtr); - - root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, - NULL, &mtr); - - SRV_CORRUPT_TABLE_CHECK(root, - { - mtr_commit(&mtr); - return; - }); - -#ifdef UNIV_BTR_DEBUG - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, space)); -#endif /* UNIV_BTR_DEBUG */ - - finished = fseg_free_step_not_header( - root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr); - mtr_commit(&mtr); - - if (!finished) { - - goto top_loop; - } -} - -/************************************************************//** -Frees the B-tree root page. Other tree MUST already have been freed. */ -UNIV_INTERN -void -btr_free_root( -/*==========*/ - ulint space, /*!< in: space where created */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint root_page_no, /*!< in: root page number */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - buf_block_t* block; - fseg_header_t* header; - - block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, - NULL, mtr); - - if (block) { - SRV_CORRUPT_TABLE_CHECK(block, return;); - - btr_search_drop_page_hash_index(block); - - header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP; -#ifdef UNIV_BTR_DEBUG - ut_a(btr_root_fseg_validate(header, space)); -#endif /* UNIV_BTR_DEBUG */ - - while (!fseg_free_step(header, mtr)) { - /* Free the entire segment in small steps. */ - } - } -} -#endif /* !UNIV_HOTBACKUP */ - -/*************************************************************//** -Reorganizes an index page. - -IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. This has to -be done either within the same mini-transaction, or by invoking -ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, -IBUF_BITMAP_FREE is unaffected by reorganization. - -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -UNIV_INTERN -bool -btr_page_reorganize_low( -/*====================*/ - bool recovery,/*!< in: true if called in recovery: - locks should not be updated, i.e., - there cannot exist locks on the - page, and a hash index should not be - dropped: it cannot exist */ - ulint z_level,/*!< in: compression level to be used - if dealing with compressed page */ - page_cur_t* cursor, /*!< in/out: page cursor */ - dict_index_t* index, /*!< in: the index tree of the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - buf_block_t* block = page_cur_get_block(cursor); -#ifndef UNIV_HOTBACKUP - buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page); -#endif /* !UNIV_HOTBACKUP */ - page_t* page = buf_block_get_frame(block); - page_zip_des_t* page_zip = buf_block_get_page_zip(block); - buf_block_t* temp_block; - page_t* temp_page; - ulint log_mode; - ulint data_size1; - ulint data_size2; - ulint max_ins_size1; - ulint max_ins_size2; - bool success = false; - ulint pos; - bool log_compressed; - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - btr_assert_not_corrupted(block, index); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - data_size1 = page_get_data_size(page); - max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); - - /* Turn logging off */ - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - -#ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(buf_pool); -#else /* !UNIV_HOTBACKUP */ - ut_ad(block == back_block1); - temp_block = back_block2; -#endif /* !UNIV_HOTBACKUP */ - temp_page = temp_block->frame; - - MONITOR_INC(MONITOR_INDEX_REORG_ATTEMPTS); - - /* Copy the old page to temporary space */ - buf_frame_copy(temp_page, page); - -#ifndef UNIV_HOTBACKUP - if (!recovery) { - btr_search_drop_page_hash_index(block); - } - - block->check_index_page_at_flush = TRUE; -#endif /* !UNIV_HOTBACKUP */ - btr_blob_dbg_remove(page, index, "btr_page_reorganize"); - - /* Save the cursor position. */ - pos = page_rec_get_n_recs_before(page_cur_get_rec(cursor)); - - /* Recreate the page: note that global data on page (possible - segment headers, next page-field, etc.) is preserved intact */ - - page_create(block, mtr, dict_table_is_comp(index->table)); - - /* Copy the records from the temporary space to the recreated page; - do not copy the lock bits yet */ - - page_copy_rec_list_end_no_locks(block, temp_block, - page_get_infimum_rec(temp_page), - index, mtr); - - if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) { - /* Copy max trx id to recreated page */ - trx_id_t max_trx_id = page_get_max_trx_id(temp_page); - page_set_max_trx_id(block, NULL, max_trx_id, mtr); - /* In crash recovery, dict_index_is_sec_or_ibuf() always - holds, even for clustered indexes. max_trx_id is - unused in clustered index pages. */ - ut_ad(max_trx_id != 0 || recovery); - } - - /* If innodb_log_compressed_pages is ON, page reorganize should log the - compressed page image.*/ - log_compressed = page_zip && page_zip_log_pages; - - if (log_compressed) { - mtr_set_log_mode(mtr, log_mode); - } - - if (page_zip - && !page_zip_compress(page_zip, page, index, z_level, mtr)) { - - /* Restore the old page and exit. */ - btr_blob_dbg_restore(page, temp_page, index, - "btr_page_reorganize_compress_fail"); - -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG - /* Check that the bytes that we skip are identical. */ - ut_a(!memcmp(page, temp_page, PAGE_HEADER)); - ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page, - PAGE_HEADER + PAGE_N_RECS + temp_page, - PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS))); - ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page, - UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page, - FIL_PAGE_DATA_END)); -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ - - memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page, - PAGE_N_RECS - PAGE_N_DIR_SLOTS); - memcpy(PAGE_DATA + page, PAGE_DATA + temp_page, - UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END); - -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG - ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE)); -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ - - goto func_exit; - } - -#ifndef UNIV_HOTBACKUP - if (!recovery) { - /* Update the record lock bitmaps */ - lock_move_reorganize_page(block, temp_block); - } -#endif /* !UNIV_HOTBACKUP */ - - data_size2 = page_get_data_size(page); - max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1); - - if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) { - buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(temp_page, 0, BUF_PAGE_PRINT_NO_CRASH); - - fprintf(stderr, - "InnoDB: Error: page old data size %lu" - " new data size %lu\n" - "InnoDB: Error: page old max ins size %lu" - " new max ins size %lu\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - (unsigned long) data_size1, (unsigned long) data_size2, - (unsigned long) max_ins_size1, - (unsigned long) max_ins_size2); - ut_ad(0); - } else { - success = true; - } - - /* Restore the cursor position. */ - if (pos > 0) { - cursor->rec = page_rec_get_nth(page, pos); - } else { - ut_ad(cursor->rec == page_get_infimum_rec(page)); - } - -func_exit: -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ -#ifndef UNIV_HOTBACKUP - buf_block_free(temp_block); -#endif /* !UNIV_HOTBACKUP */ - - /* Restore logging mode */ - mtr_set_log_mode(mtr, log_mode); - -#ifndef UNIV_HOTBACKUP - if (success) { - byte type; - byte* log_ptr; - - /* Write the log record */ - if (page_zip) { - ut_ad(page_is_comp(page)); - type = MLOG_ZIP_PAGE_REORGANIZE; - } else if (page_is_comp(page)) { - type = MLOG_COMP_PAGE_REORGANIZE; - } else { - type = MLOG_PAGE_REORGANIZE; - } - - log_ptr = log_compressed - ? NULL - : mlog_open_and_write_index( - mtr, page, index, type, - page_zip ? 1 : 0); - - /* For compressed pages write the compression level. */ - if (log_ptr && page_zip) { - mach_write_to_1(log_ptr, z_level); - mlog_close(mtr, log_ptr + 1); - } - - MONITOR_INC(MONITOR_INDEX_REORG_SUCCESSFUL); - } -#endif /* !UNIV_HOTBACKUP */ - - return(success); -} - -/*************************************************************//** -Reorganizes an index page. - -IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. This has to -be done either within the same mini-transaction, or by invoking -ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, -IBUF_BITMAP_FREE is unaffected by reorganization. - -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -UNIV_INTERN -bool -btr_page_reorganize_block( -/*======================*/ - bool recovery,/*!< in: true if called in recovery: - locks should not be updated, i.e., - there cannot exist locks on the - page, and a hash index should not be - dropped: it cannot exist */ - ulint z_level,/*!< in: compression level to be used - if dealing with compressed page */ - buf_block_t* block, /*!< in/out: B-tree page */ - dict_index_t* index, /*!< in: the index tree of the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - page_cur_t cur; - page_cur_set_before_first(block, &cur); - - return(btr_page_reorganize_low(recovery, z_level, &cur, index, mtr)); -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Reorganizes an index page. - -IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. This has to -be done either within the same mini-transaction, or by invoking -ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, -IBUF_BITMAP_FREE is unaffected by reorganization. - -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -UNIV_INTERN -bool -btr_page_reorganize( -/*================*/ - page_cur_t* cursor, /*!< in/out: page cursor */ - dict_index_t* index, /*!< in: the index tree of the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - return(btr_page_reorganize_low(false, page_zip_level, - cursor, index, mtr)); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses a redo log record of reorganizing a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_parse_page_reorganize( -/*======================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - dict_index_t* index, /*!< in: record descriptor */ - bool compressed,/*!< in: true if compressed page */ - buf_block_t* block, /*!< in: page to be reorganized, or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ulint level; - - ut_ad(ptr != NULL); - ut_ad(end_ptr != NULL); - - /* If dealing with a compressed page the record has the - compression level used during original compression written in - one byte. Otherwise record is empty. */ - if (compressed) { - if (ptr == end_ptr) { - return(NULL); - } - - level = mach_read_from_1(ptr); - - ut_a(level <= 9); - ++ptr; - } else { - level = page_zip_level; - } - - if (block != NULL) { - btr_page_reorganize_block(true, level, block, index, mtr); - } - - return(ptr); -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Empties an index page. @see btr_page_create(). */ -UNIV_INTERN -void -btr_page_empty( -/*===========*/ - buf_block_t* block, /*!< in: page to be emptied */ - page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */ - dict_index_t* index, /*!< in: index of the page */ - ulint level, /*!< in: the B-tree level of the page */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* page = buf_block_get_frame(block); - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(page_zip == buf_block_get_page_zip(block)); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - btr_search_drop_page_hash_index(block); - btr_blob_dbg_remove(page, index, "btr_page_empty"); - - /* Recreate the page: note that global data on page (possible - segment headers, next page-field, etc.) is preserved intact */ - - if (page_zip) { - page_create_zip(block, index, level, 0, mtr); - } else { - page_create(block, mtr, dict_table_is_comp(index->table)); - btr_page_set_level(page, NULL, level, mtr); - } - - block->check_index_page_at_flush = TRUE; -} - -/*************************************************************//** -Makes tree one level higher by splitting the root, and inserts -the tuple. It is assumed that mtr contains an x-latch on the tree. -NOTE that the operation of this function must always succeed, -we cannot reverse it: therefore enough free disk space must be -guaranteed to be available before this function is called. -@return inserted record or NULL if run out of space */ -UNIV_INTERN -rec_t* -btr_root_raise_and_insert( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor at which to insert: must be - on the root page; when the function returns, - the cursor is positioned on the predecessor - of the inserted record */ - ulint** offsets,/*!< out: offsets on inserted record */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ - const dtuple_t* tuple, /*!< in: tuple to insert */ - ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ -{ - dict_index_t* index; - page_t* root; - page_t* new_page; - ulint new_page_no; - rec_t* rec; - dtuple_t* node_ptr; - ulint level; - rec_t* node_ptr_rec; - page_cur_t* page_cursor; - page_zip_des_t* root_page_zip; - page_zip_des_t* new_page_zip; - buf_block_t* root_block; - buf_block_t* new_block; - - root = btr_cur_get_page(cursor); - root_block = btr_cur_get_block(cursor); - root_page_zip = buf_block_get_page_zip(root_block); - ut_ad(!page_is_empty(root)); - index = btr_cur_get_index(cursor); -#ifdef UNIV_ZIP_DEBUG - ut_a(!root_page_zip || page_zip_validate(root_page_zip, root, index)); -#endif /* UNIV_ZIP_DEBUG */ -#ifdef UNIV_BTR_DEBUG - if (!dict_index_is_ibuf(index)) { - ulint space = dict_index_get_space(index); - - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root, space)); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, space)); - } - - ut_a(dict_index_get_page(index) == page_get_page_no(root)); -#endif /* UNIV_BTR_DEBUG */ - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX)); - - /* Allocate a new page to the tree. Root splitting is done by first - moving the root records to the new page, emptying the root, putting - a node pointer to the new page, and then splitting the new page. */ - - level = btr_page_get_level(root, mtr); - - new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr); - - if (new_block == NULL && os_has_said_disk_full) { - return(NULL); - } - - new_page = buf_block_get_frame(new_block); - new_page_zip = buf_block_get_page_zip(new_block); - ut_a(!new_page_zip == !root_page_zip); - ut_a(!new_page_zip - || page_zip_get_size(new_page_zip) - == page_zip_get_size(root_page_zip)); - - btr_page_create(new_block, new_page_zip, index, level, mtr); - - /* Set the next node and previous node fields of new page */ - btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr); - btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr); - - /* Copy the records from root to the new page one by one. */ - - if (0 -#ifdef UNIV_ZIP_COPY - || new_page_zip -#endif /* UNIV_ZIP_COPY */ - || !page_copy_rec_list_end(new_block, root_block, - page_get_infimum_rec(root), - index, mtr)) { - ut_a(new_page_zip); - - /* Copy the page byte for byte. */ - page_zip_copy_recs(new_page_zip, new_page, - root_page_zip, root, index, mtr); - - /* Update the lock table and possible hash index. */ - - lock_move_rec_list_end(new_block, root_block, - page_get_infimum_rec(root)); - - btr_search_move_or_delete_hash_entries(new_block, root_block, - index); - } - - /* If this is a pessimistic insert which is actually done to - perform a pessimistic update then we have stored the lock - information of the record to be inserted on the infimum of the - root page: we cannot discard the lock structs on the root page */ - - lock_update_root_raise(new_block, root_block); - - /* Create a memory heap where the node pointer is stored */ - if (!*heap) { - *heap = mem_heap_create(1000); - } - - rec = page_rec_get_next(page_get_infimum_rec(new_page)); - new_page_no = buf_block_get_page_no(new_block); - - /* Build the node pointer (= node key and page address) for the - child */ - - node_ptr = dict_index_build_node_ptr( - index, rec, new_page_no, *heap, level); - /* The node pointer must be marked as the predefined minimum record, - as there is no lower alphabetical limit to records in the leftmost - node of a level: */ - dtuple_set_info_bits(node_ptr, - dtuple_get_info_bits(node_ptr) - | REC_INFO_MIN_REC_FLAG); - - /* Rebuild the root page to get free space */ - btr_page_empty(root_block, root_page_zip, index, level + 1, mtr); - - /* Set the next node and previous node fields, although - they should already have been set. The previous node field - must be FIL_NULL if root_page_zip != NULL, because the - REC_INFO_MIN_REC_FLAG (of the first user record) will be - set if and only if btr_page_get_prev() == FIL_NULL. */ - btr_page_set_next(root, root_page_zip, FIL_NULL, mtr); - btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr); - - page_cursor = btr_cur_get_page_cur(cursor); - - /* Insert node pointer to the root */ - - page_cur_set_before_first(root_block, page_cursor); - - node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, - index, offsets, heap, 0, mtr); - - /* The root page should only contain the node pointer - to new_page at this point. Thus, the data should fit. */ - ut_a(node_ptr_rec); - - /* We play safe and reset the free bits for the new page */ - -#if 0 - fprintf(stderr, "Root raise new page no %lu\n", new_page_no); -#endif - - if (!dict_index_is_clust(index)) { - ibuf_reset_free_bits(new_block); - } - - if (tuple != NULL) { - /* Reposition the cursor to the child node */ - page_cur_search(new_block, index, tuple, - PAGE_CUR_LE, page_cursor); - } else { - /* Set cursor to first record on child node */ - page_cur_set_before_first(new_block, page_cursor); - } - - /* Split the child and insert tuple */ - return(btr_page_split_and_insert(flags, cursor, offsets, heap, - tuple, n_ext, mtr)); -} - -/*************************************************************//** -Decides if the page should be split at the convergence point of inserts -converging to the left. -@return TRUE if split recommended */ -UNIV_INTERN -ibool -btr_page_get_split_rec_to_left( -/*===========================*/ - btr_cur_t* cursor, /*!< in: cursor at which to insert */ - rec_t** split_rec) /*!< out: if split recommended, - the first record on upper half page, - or NULL if tuple to be inserted should - be first */ -{ - page_t* page; - rec_t* insert_point; - rec_t* infimum; - - page = btr_cur_get_page(cursor); - insert_point = btr_cur_get_rec(cursor); - - if (page_header_get_ptr(page, PAGE_LAST_INSERT) - == page_rec_get_next(insert_point)) { - - infimum = page_get_infimum_rec(page); - - /* If the convergence is in the middle of a page, include also - the record immediately before the new insert to the upper - page. Otherwise, we could repeatedly move from page to page - lots of records smaller than the convergence point. */ - - if (infimum != insert_point - && page_rec_get_next(infimum) != insert_point) { - - *split_rec = insert_point; - } else { - *split_rec = page_rec_get_next(insert_point); - } - - return(TRUE); - } - - return(FALSE); -} - -/*************************************************************//** -Decides if the page should be split at the convergence point of inserts -converging to the right. -@return TRUE if split recommended */ -UNIV_INTERN -ibool -btr_page_get_split_rec_to_right( -/*============================*/ - btr_cur_t* cursor, /*!< in: cursor at which to insert */ - rec_t** split_rec) /*!< out: if split recommended, - the first record on upper half page, - or NULL if tuple to be inserted should - be first */ -{ - page_t* page; - rec_t* insert_point; - - page = btr_cur_get_page(cursor); - insert_point = btr_cur_get_rec(cursor); - - /* We use eager heuristics: if the new insert would be right after - the previous insert on the same page, we assume that there is a - pattern of sequential inserts here. */ - - if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) { - - rec_t* next_rec; - - next_rec = page_rec_get_next(insert_point); - - if (page_rec_is_supremum(next_rec)) { -split_at_new: - /* Split at the new record to insert */ - *split_rec = NULL; - } else { - rec_t* next_next_rec = page_rec_get_next(next_rec); - if (page_rec_is_supremum(next_next_rec)) { - - goto split_at_new; - } - - /* If there are >= 2 user records up from the insert - point, split all but 1 off. We want to keep one because - then sequential inserts can use the adaptive hash - index, as they can do the necessary checks of the right - search position just by looking at the records on this - page. */ - - *split_rec = next_next_rec; - } - - return(TRUE); - } - - return(FALSE); -} - -/*************************************************************//** -Calculates a split record such that the tuple will certainly fit on -its half-page when the split is performed. We assume in this function -only that the cursor page has at least one user record. -@return split record, or NULL if tuple will be the first record on -the lower or upper half-page (determined by btr_page_tuple_smaller()) */ -static -rec_t* -btr_page_get_split_rec( -/*===================*/ - btr_cur_t* cursor, /*!< in: cursor at which insert should be made */ - const dtuple_t* tuple, /*!< in: tuple to insert */ - ulint n_ext) /*!< in: number of externally stored columns */ -{ - page_t* page; - page_zip_des_t* page_zip; - ulint insert_size; - ulint free_space; - ulint total_data; - ulint total_n_recs; - ulint total_space; - ulint incl_data; - rec_t* ins_rec; - rec_t* rec; - rec_t* next_rec; - ulint n; - mem_heap_t* heap; - ulint* offsets; - - page = btr_cur_get_page(cursor); - - insert_size = rec_get_converted_size(cursor->index, tuple, n_ext); - free_space = page_get_free_space_of_empty(page_is_comp(page)); - - page_zip = btr_cur_get_page_zip(cursor); - if (page_zip) { - /* Estimate the free space of an empty compressed page. */ - ulint free_space_zip = page_zip_empty_size( - cursor->index->n_fields, - page_zip_get_size(page_zip)); - - if (free_space > (ulint) free_space_zip) { - free_space = (ulint) free_space_zip; - } - } - - /* free_space is now the free space of a created new page */ - - total_data = page_get_data_size(page) + insert_size; - total_n_recs = page_get_n_recs(page) + 1; - ut_ad(total_n_recs >= 2); - total_space = total_data + page_dir_calc_reserved_space(total_n_recs); - - n = 0; - incl_data = 0; - ins_rec = btr_cur_get_rec(cursor); - rec = page_get_infimum_rec(page); - - heap = NULL; - offsets = NULL; - - /* We start to include records to the left half, and when the - space reserved by them exceeds half of total_space, then if - the included records fit on the left page, they will be put there - if something was left over also for the right page, - otherwise the last included record will be the first on the right - half page */ - - do { - /* Decide the next record to include */ - if (rec == ins_rec) { - rec = NULL; /* NULL denotes that tuple is - now included */ - } else if (rec == NULL) { - rec = page_rec_get_next(ins_rec); - } else { - rec = page_rec_get_next(rec); - } - - if (rec == NULL) { - /* Include tuple */ - incl_data += insert_size; - } else { - offsets = rec_get_offsets(rec, cursor->index, - offsets, ULINT_UNDEFINED, - &heap); - incl_data += rec_offs_size(offsets); - } - - n++; - } while (incl_data + page_dir_calc_reserved_space(n) - < total_space / 2); - - if (incl_data + page_dir_calc_reserved_space(n) <= free_space) { - /* The next record will be the first on - the right half page if it is not the - supremum record of page */ - - if (rec == ins_rec) { - rec = NULL; - - goto func_exit; - } else if (rec == NULL) { - next_rec = page_rec_get_next(ins_rec); - } else { - next_rec = page_rec_get_next(rec); - } - ut_ad(next_rec); - if (!page_rec_is_supremum(next_rec)) { - rec = next_rec; - } - } - -func_exit: - if (heap) { - mem_heap_free(heap); - } - return(rec); -} - -/*************************************************************//** -Returns TRUE if the insert fits on the appropriate half-page with the -chosen split_rec. -@return true if fits */ -static MY_ATTRIBUTE((nonnull(1,3,4,6), warn_unused_result)) -bool -btr_page_insert_fits( -/*=================*/ - btr_cur_t* cursor, /*!< in: cursor at which insert - should be made */ - const rec_t* split_rec,/*!< in: suggestion for first record - on upper half-page, or NULL if - tuple to be inserted should be first */ - ulint** offsets,/*!< in: rec_get_offsets( - split_rec, cursor->index); out: garbage */ - const dtuple_t* tuple, /*!< in: tuple to insert */ - ulint n_ext, /*!< in: number of externally stored columns */ - mem_heap_t** heap) /*!< in: temporary memory heap */ -{ - page_t* page; - ulint insert_size; - ulint free_space; - ulint total_data; - ulint total_n_recs; - const rec_t* rec; - const rec_t* end_rec; - - page = btr_cur_get_page(cursor); - - ut_ad(!split_rec - || !page_is_comp(page) == !rec_offs_comp(*offsets)); - ut_ad(!split_rec - || rec_offs_validate(split_rec, cursor->index, *offsets)); - - insert_size = rec_get_converted_size(cursor->index, tuple, n_ext); - free_space = page_get_free_space_of_empty(page_is_comp(page)); - - /* free_space is now the free space of a created new page */ - - total_data = page_get_data_size(page) + insert_size; - total_n_recs = page_get_n_recs(page) + 1; - - /* We determine which records (from rec to end_rec, not including - end_rec) will end up on the other half page from tuple when it is - inserted. */ - - if (split_rec == NULL) { - rec = page_rec_get_next(page_get_infimum_rec(page)); - end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); - - } else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) { - - rec = page_rec_get_next(page_get_infimum_rec(page)); - end_rec = split_rec; - } else { - rec = split_rec; - end_rec = page_get_supremum_rec(page); - } - - if (total_data + page_dir_calc_reserved_space(total_n_recs) - <= free_space) { - - /* Ok, there will be enough available space on the - half page where the tuple is inserted */ - - return(true); - } - - while (rec != end_rec) { - /* In this loop we calculate the amount of reserved - space after rec is removed from page. */ - - *offsets = rec_get_offsets(rec, cursor->index, *offsets, - ULINT_UNDEFINED, heap); - - total_data -= rec_offs_size(*offsets); - total_n_recs--; - - if (total_data + page_dir_calc_reserved_space(total_n_recs) - <= free_space) { - - /* Ok, there will be enough available space on the - half page where the tuple is inserted */ - - return(true); - } - - rec = page_rec_get_next_const(rec); - } - - return(false); -} - -/*******************************************************//** -Inserts a data tuple to a tree on a non-leaf level. It is assumed -that mtr holds an x-latch on the tree. */ -UNIV_INTERN -void -btr_insert_on_non_leaf_level_func( -/*==============================*/ - ulint flags, /*!< in: undo logging and locking flags */ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: level, must be > 0 */ - dtuple_t* tuple, /*!< in: the record to be inserted */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - big_rec_t* dummy_big_rec; - btr_cur_t cursor; - dberr_t err; - rec_t* rec; - ulint* offsets = NULL; - mem_heap_t* heap = NULL; - - ut_ad(level > 0); - - btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE, - BTR_CONT_MODIFY_TREE, - &cursor, 0, file, line, mtr); - - ut_ad(cursor.flag == BTR_CUR_BINARY); - - err = btr_cur_optimistic_insert( - flags - | BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, - &cursor, &offsets, &heap, - tuple, &rec, &dummy_big_rec, 0, NULL, mtr); - - if (err == DB_FAIL) { - err = btr_cur_pessimistic_insert(flags - | BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, - &cursor, &offsets, &heap, - tuple, &rec, - &dummy_big_rec, 0, NULL, mtr); - ut_a(err == DB_SUCCESS); - } - mem_heap_free(heap); -} - -/**************************************************************//** -Attaches the halves of an index page on the appropriate level in an -index tree. */ -static MY_ATTRIBUTE((nonnull)) -void -btr_attach_half_pages( -/*==================*/ - ulint flags, /*!< in: undo logging and - locking flags */ - dict_index_t* index, /*!< in: the index tree */ - buf_block_t* block, /*!< in/out: page to be split */ - const rec_t* split_rec, /*!< in: first record on upper - half page */ - buf_block_t* new_block, /*!< in/out: the new half page */ - ulint direction, /*!< in: FSP_UP or FSP_DOWN */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint space; - ulint zip_size; - ulint prev_page_no; - ulint next_page_no; - ulint level; - page_t* page = buf_block_get_frame(block); - page_t* lower_page; - page_t* upper_page; - ulint lower_page_no; - ulint upper_page_no; - page_zip_des_t* lower_page_zip; - page_zip_des_t* upper_page_zip; - dtuple_t* node_ptr_upper; - mem_heap_t* heap; - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX)); - - /* Create a memory heap where the data tuple is stored */ - heap = mem_heap_create(1024); - - /* Based on split direction, decide upper and lower pages */ - if (direction == FSP_DOWN) { - - btr_cur_t cursor; - ulint* offsets; - - lower_page = buf_block_get_frame(new_block); - lower_page_no = buf_block_get_page_no(new_block); - lower_page_zip = buf_block_get_page_zip(new_block); - upper_page = buf_block_get_frame(block); - upper_page_no = buf_block_get_page_no(block); - upper_page_zip = buf_block_get_page_zip(block); - - /* Look up the index for the node pointer to page */ - offsets = btr_page_get_father_block(NULL, heap, index, - block, mtr, &cursor); - - /* Replace the address of the old child node (= page) with the - address of the new lower half */ - - btr_node_ptr_set_child_page_no( - btr_cur_get_rec(&cursor), - btr_cur_get_page_zip(&cursor), - offsets, lower_page_no, mtr); - mem_heap_empty(heap); - } else { - lower_page = buf_block_get_frame(block); - lower_page_no = buf_block_get_page_no(block); - lower_page_zip = buf_block_get_page_zip(block); - upper_page = buf_block_get_frame(new_block); - upper_page_no = buf_block_get_page_no(new_block); - upper_page_zip = buf_block_get_page_zip(new_block); - } - - /* Get the level of the split pages */ - level = btr_page_get_level(buf_nonnull_block_get_frame(block), mtr); - ut_ad(level - == btr_page_get_level(buf_block_get_frame(new_block), mtr)); - - /* Build the node pointer (= node key and page address) for the upper - half */ - - node_ptr_upper = dict_index_build_node_ptr(index, split_rec, - upper_page_no, heap, level); - - /* Insert it next to the pointer to the lower half. Note that this - may generate recursion leading to a split on the higher level. */ - - btr_insert_on_non_leaf_level(flags, index, level + 1, - node_ptr_upper, mtr); - - /* Free the memory heap */ - mem_heap_free(heap); - - /* Get the previous and next pages of page */ - - prev_page_no = btr_page_get_prev(page, mtr); - next_page_no = btr_page_get_next(page, mtr); - space = buf_block_get_space(block); - zip_size = buf_block_get_zip_size(block); - - /* Update page links of the level */ - - if (prev_page_no != FIL_NULL) { - buf_block_t* prev_block = btr_block_get( - space, zip_size, prev_page_no, RW_X_LATCH, index, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(prev_block->frame) == page_is_comp(page)); - ut_a(btr_page_get_next(prev_block->frame, mtr) - == buf_block_get_page_no(block)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_next(buf_block_get_frame(prev_block), - buf_block_get_page_zip(prev_block), - lower_page_no, mtr); - } - - if (next_page_no != FIL_NULL) { - buf_block_t* next_block = btr_block_get( - space, zip_size, next_page_no, RW_X_LATCH, index, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_block->frame) == page_is_comp(page)); - ut_a(btr_page_get_prev(next_block->frame, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_prev(buf_block_get_frame(next_block), - buf_block_get_page_zip(next_block), - upper_page_no, mtr); - } - - btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr); - btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr); - - btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr); - btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr); -} - -/*************************************************************//** -Determine if a tuple is smaller than any record on the page. -@return TRUE if smaller */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -btr_page_tuple_smaller( -/*===================*/ - btr_cur_t* cursor, /*!< in: b-tree cursor */ - const dtuple_t* tuple, /*!< in: tuple to consider */ - ulint** offsets,/*!< in/out: temporary storage */ - ulint n_uniq, /*!< in: number of unique fields - in the index page records */ - mem_heap_t** heap) /*!< in/out: heap for offsets */ -{ - buf_block_t* block; - const rec_t* first_rec; - page_cur_t pcur; - - /* Read the first user record in the page. */ - block = btr_cur_get_block(cursor); - page_cur_set_before_first(block, &pcur); - page_cur_move_to_next(&pcur); - first_rec = page_cur_get_rec(&pcur); - - *offsets = rec_get_offsets( - first_rec, cursor->index, *offsets, - n_uniq, heap); - - return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0); -} - -/** Insert the tuple into the right sibling page, if the cursor is at the end -of a page. -@param[in] flags undo logging and locking flags -@param[in,out] cursor cursor at which to insert; when the function succeeds, - the cursor is positioned before the insert point. -@param[out] offsets offsets on inserted record -@param[in,out] heap memory heap for allocating offsets -@param[in] tuple tuple to insert -@param[in] n_ext number of externally stored columns -@param[in,out] mtr mini-transaction -@return inserted record (first record on the right sibling page); - the cursor will be positioned on the page infimum -@retval NULL if the operation was not performed */ -static -rec_t* -btr_insert_into_right_sibling( - ulint flags, - btr_cur_t* cursor, - ulint** offsets, - mem_heap_t* heap, - const dtuple_t* tuple, - ulint n_ext, - mtr_t* mtr) -{ - buf_block_t* block = btr_cur_get_block(cursor); - page_t* page = buf_block_get_frame(block); - ulint next_page_no = btr_page_get_next(page, mtr); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(heap); - - if (next_page_no == FIL_NULL || !page_rec_is_supremum( - page_rec_get_next(btr_cur_get_rec(cursor)))) { - - return(NULL); - } - - page_cur_t next_page_cursor; - buf_block_t* next_block; - page_t* next_page; - btr_cur_t next_father_cursor; - rec_t* rec = NULL; - ulint zip_size = buf_block_get_zip_size(block); - ulint max_size; - - next_block = btr_block_get( - buf_block_get_space(block), zip_size, - next_page_no, RW_X_LATCH, cursor->index, mtr); - next_page = buf_block_get_frame(next_block); - - bool is_leaf = page_is_leaf(next_page); - - btr_page_get_father( - cursor->index, next_block, mtr, &next_father_cursor); - - page_cur_search( - next_block, cursor->index, tuple, PAGE_CUR_LE, - &next_page_cursor); - - max_size = page_get_max_insert_size_after_reorganize(next_page, 1); - - /* Extends gap lock for the next page */ - lock_update_split_left(next_block, block); - - rec = page_cur_tuple_insert( - &next_page_cursor, tuple, cursor->index, offsets, &heap, - n_ext, mtr); - - if (rec == NULL) { - if (zip_size && is_leaf - && !dict_index_is_clust(cursor->index)) { - /* Reset the IBUF_BITMAP_FREE bits, because - page_cur_tuple_insert() will have attempted page - reorganize before failing. */ - ibuf_reset_free_bits(next_block); - } - return(NULL); - } - - ibool compressed; - dberr_t err; - ulint level = btr_page_get_level(next_page, mtr); - - /* adjust cursor position */ - *btr_cur_get_page_cur(cursor) = next_page_cursor; - - ut_ad(btr_cur_get_rec(cursor) == page_get_infimum_rec(next_page)); - ut_ad(page_rec_get_next(page_get_infimum_rec(next_page)) == rec); - - /* We have to change the parent node pointer */ - - compressed = btr_cur_pessimistic_delete( - &err, TRUE, &next_father_cursor, - BTR_CREATE_FLAG, RB_NONE, mtr); - - ut_a(err == DB_SUCCESS); - - if (!compressed) { - btr_cur_compress_if_useful(&next_father_cursor, FALSE, mtr); - } - - dtuple_t* node_ptr = dict_index_build_node_ptr( - cursor->index, rec, buf_block_get_page_no(next_block), - heap, level); - - btr_insert_on_non_leaf_level( - flags, cursor->index, level + 1, node_ptr, mtr); - - ut_ad(rec_offs_validate(rec, cursor->index, *offsets)); - - if (is_leaf && !dict_index_is_clust(cursor->index)) { - /* Update the free bits of the B-tree page in the - insert buffer bitmap. */ - - if (zip_size) { - ibuf_update_free_bits_zip(next_block, mtr); - } else { - ibuf_update_free_bits_if_full( - next_block, max_size, - rec_offs_size(*offsets) + PAGE_DIR_SLOT_SIZE); - } - } - - return(rec); -} - -/*************************************************************//** -Splits an index page to halves and inserts the tuple. It is assumed -that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is -released within this function! NOTE that the operation of this -function must always succeed, we cannot reverse it: therefore enough -free disk space (2 pages) must be guaranteed to be available before -this function is called. - -NOTE: jonaso added support for calling function with tuple == NULL -which cause it to only split a page. - -@return inserted record or NULL if run out of space */ -UNIV_INTERN -rec_t* -btr_page_split_and_insert( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor at which to insert; when the - function returns, the cursor is positioned - on the predecessor of the inserted record */ - ulint** offsets,/*!< out: offsets on inserted record */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ - const dtuple_t* tuple, /*!< in: tuple to insert */ - ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block; - page_t* page; - page_zip_des_t* page_zip; - ulint page_no; - byte direction; - ulint hint_page_no; - buf_block_t* new_block; - page_t* new_page; - page_zip_des_t* new_page_zip; - rec_t* split_rec; - buf_block_t* left_block; - buf_block_t* right_block; - buf_block_t* insert_block; - page_cur_t* page_cursor; - rec_t* first_rec; - byte* buf = 0; /* remove warning */ - rec_t* move_limit; - ibool insert_will_fit; - ibool insert_left; - ulint n_iterations = 0; - rec_t* rec; - ulint n_uniq; - - if (!*heap) { - *heap = mem_heap_create(1024); - } - n_uniq = dict_index_get_n_unique_in_tree(cursor->index); -func_start: - mem_heap_empty(*heap); - *offsets = NULL; - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK)); - ut_ad(!dict_index_is_online_ddl(cursor->index) - || (flags & BTR_CREATE_FLAG) - || dict_index_is_clust(cursor->index)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - block = btr_cur_get_block(cursor); - page = buf_block_get_frame(block); - page_zip = buf_block_get_page_zip(block); - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(!page_is_empty(page)); - - /* try to insert to the next page if possible before split */ - rec = btr_insert_into_right_sibling( - flags, cursor, offsets, *heap, tuple, n_ext, mtr); - - if (rec != NULL) { - return(rec); - } - - page_no = buf_block_get_page_no(block); - - /* 1. Decide the split record; split_rec == NULL means that the - tuple to be inserted should be the first record on the upper - half-page */ - insert_left = FALSE; - - if (tuple != NULL && n_iterations > 0) { - direction = FSP_UP; - hint_page_no = page_no + 1; - split_rec = btr_page_get_split_rec(cursor, tuple, n_ext); - - if (split_rec == NULL) { - insert_left = btr_page_tuple_smaller( - cursor, tuple, offsets, n_uniq, heap); - } - } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { - direction = FSP_UP; - hint_page_no = page_no + 1; - - } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { - direction = FSP_DOWN; - hint_page_no = page_no - 1; - ut_ad(split_rec); - } else { - direction = FSP_UP; - hint_page_no = page_no + 1; - - /* If there is only one record in the index page, we - can't split the node in the middle by default. We need - to determine whether the new record will be inserted - to the left or right. */ - - if (page_get_n_recs(page) > 1) { - split_rec = page_get_middle_rec(page); - } else if (btr_page_tuple_smaller(cursor, tuple, - offsets, n_uniq, heap)) { - split_rec = page_rec_get_next( - page_get_infimum_rec(page)); - } else { - split_rec = NULL; - } - } - - DBUG_EXECUTE_IF("disk_is_full", - os_has_said_disk_full = true; - return(NULL);); - - /* 2. Allocate a new page to the index */ - new_block = btr_page_alloc(cursor->index, hint_page_no, direction, - btr_page_get_level(page, mtr), mtr, mtr); - - if (new_block == NULL && os_has_said_disk_full) { - return(NULL); - } - - new_page = buf_block_get_frame(new_block); - new_page_zip = buf_block_get_page_zip(new_block); - btr_page_create(new_block, new_page_zip, cursor->index, - btr_page_get_level(page, mtr), mtr); - /* Only record the leaf level page splits. */ - if (page_is_leaf(page)) { - cursor->index->stat_defrag_n_page_split ++; - cursor->index->stat_defrag_modified_counter ++; - btr_defragment_save_defrag_stats_if_needed(cursor->index); - } - - /* 3. Calculate the first record on the upper half-page, and the - first record (move_limit) on original page which ends up on the - upper half */ - - if (split_rec) { - first_rec = move_limit = split_rec; - - *offsets = rec_get_offsets(split_rec, cursor->index, *offsets, - n_uniq, heap); - - if (tuple != NULL) { - insert_left = cmp_dtuple_rec( - tuple, split_rec, *offsets) < 0; - } else { - insert_left = 1; - } - - if (!insert_left && new_page_zip && n_iterations > 0) { - /* If a compressed page has already been split, - avoid further splits by inserting the record - to an empty page. */ - split_rec = NULL; - goto insert_empty; - } - } else if (insert_left) { - ut_a(n_iterations > 0); - first_rec = page_rec_get_next(page_get_infimum_rec(page)); - move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); - } else { -insert_empty: - ut_ad(!split_rec); - ut_ad(!insert_left); - buf = (byte*) mem_alloc(rec_get_converted_size(cursor->index, - tuple, n_ext)); - - first_rec = rec_convert_dtuple_to_rec(buf, cursor->index, - tuple, n_ext); - move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); - } - - /* 4. Do first the modifications in the tree structure */ - - btr_attach_half_pages(flags, cursor->index, block, - first_rec, new_block, direction, mtr); - - /* If the split is made on the leaf level and the insert will fit - on the appropriate half-page, we may release the tree x-latch. - We can then move the records after releasing the tree latch, - thus reducing the tree latch contention. */ - if (tuple == NULL) { - insert_will_fit = 1; - } - else if (split_rec) { - insert_will_fit = !new_page_zip - && btr_page_insert_fits(cursor, split_rec, - offsets, tuple, n_ext, heap); - } else { - if (!insert_left) { - mem_free(buf); - buf = NULL; - } - - insert_will_fit = !new_page_zip - && btr_page_insert_fits(cursor, NULL, - offsets, tuple, n_ext, heap); - } - - if (insert_will_fit && page_is_leaf(page) - && !dict_index_is_online_ddl(cursor->index)) { - - mtr_memo_release(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK); - } - - /* 5. Move then the records to the new page */ - if (direction == FSP_DOWN) { - /* fputs("Split left\n", stderr); */ - - if (0 -#ifdef UNIV_ZIP_COPY - || page_zip -#endif /* UNIV_ZIP_COPY */ - || !page_move_rec_list_start(new_block, block, move_limit, - cursor->index, mtr)) { - /* For some reason, compressing new_page failed, - even though it should contain fewer records than - the original page. Copy the page byte for byte - and then delete the records from both pages - as appropriate. Deleting will always succeed. */ - ut_a(new_page_zip); - - page_zip_copy_recs(new_page_zip, new_page, - page_zip, page, cursor->index, mtr); - page_delete_rec_list_end(move_limit - page + new_page, - new_block, cursor->index, - ULINT_UNDEFINED, - ULINT_UNDEFINED, mtr); - - /* Update the lock table and possible hash index. */ - - lock_move_rec_list_start( - new_block, block, move_limit, - new_page + PAGE_NEW_INFIMUM); - - btr_search_move_or_delete_hash_entries( - new_block, block, cursor->index); - - /* Delete the records from the source page. */ - - page_delete_rec_list_start(move_limit, block, - cursor->index, mtr); - } - - left_block = new_block; - right_block = block; - - lock_update_split_left(right_block, left_block); - } else { - /* fputs("Split right\n", stderr); */ - - if (0 -#ifdef UNIV_ZIP_COPY - || page_zip -#endif /* UNIV_ZIP_COPY */ - || !page_move_rec_list_end(new_block, block, move_limit, - cursor->index, mtr)) { - /* For some reason, compressing new_page failed, - even though it should contain fewer records than - the original page. Copy the page byte for byte - and then delete the records from both pages - as appropriate. Deleting will always succeed. */ - ut_a(new_page_zip); - - page_zip_copy_recs(new_page_zip, new_page, - page_zip, page, cursor->index, mtr); - page_delete_rec_list_start(move_limit - page - + new_page, new_block, - cursor->index, mtr); - - /* Update the lock table and possible hash index. */ - - lock_move_rec_list_end(new_block, block, move_limit); - - btr_search_move_or_delete_hash_entries( - new_block, block, cursor->index); - - /* Delete the records from the source page. */ - - page_delete_rec_list_end(move_limit, block, - cursor->index, - ULINT_UNDEFINED, - ULINT_UNDEFINED, mtr); - } - - left_block = block; - right_block = new_block; - - lock_update_split_right(right_block, left_block); - } - -#ifdef UNIV_ZIP_DEBUG - if (page_zip) { - ut_a(page_zip_validate(page_zip, page, cursor->index)); - ut_a(page_zip_validate(new_page_zip, new_page, cursor->index)); - } -#endif /* UNIV_ZIP_DEBUG */ - - /* At this point, split_rec, move_limit and first_rec may point - to garbage on the old page. */ - - /* 6. The split and the tree modification is now completed. Decide the - page where the tuple should be inserted */ - - if (tuple == NULL) { - rec = NULL; - goto func_exit; - } - - if (insert_left) { - insert_block = left_block; - } else { - insert_block = right_block; - } - - /* 7. Reposition the cursor for insert and try insertion */ - page_cursor = btr_cur_get_page_cur(cursor); - - page_cur_search(insert_block, cursor->index, tuple, - PAGE_CUR_LE, page_cursor); - - rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, - offsets, heap, n_ext, mtr); - -#ifdef UNIV_ZIP_DEBUG - { - page_t* insert_page - = buf_block_get_frame(insert_block); - - page_zip_des_t* insert_page_zip - = buf_block_get_page_zip(insert_block); - - ut_a(!insert_page_zip - || page_zip_validate(insert_page_zip, insert_page, - cursor->index)); - } -#endif /* UNIV_ZIP_DEBUG */ - - if (rec != NULL) { - - goto func_exit; - } - - /* 8. If insert did not fit, try page reorganization. - For compressed pages, page_cur_tuple_insert() will have - attempted this already. */ - - if (page_cur_get_page_zip(page_cursor) - || !btr_page_reorganize(page_cursor, cursor->index, mtr)) { - - goto insert_failed; - } - - rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, - offsets, heap, n_ext, mtr); - - if (rec == NULL) { - /* The insert did not fit on the page: loop back to the - start of the function for a new split */ -insert_failed: - /* We play safe and reset the free bits */ - if (!dict_index_is_clust(cursor->index)) { - ibuf_reset_free_bits(new_block); - ibuf_reset_free_bits(block); - } - - /* fprintf(stderr, "Split second round %lu\n", - page_get_page_no(page)); */ - n_iterations++; - ut_ad(n_iterations < 2 - || buf_block_get_page_zip(insert_block)); - ut_ad(!insert_will_fit); - - goto func_start; - } - -func_exit: - /* Insert fit on the page: update the free bits for the - left and right pages in the same mtr */ - - if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) { - ibuf_update_free_bits_for_two_pages_low( - buf_block_get_zip_size(left_block), - left_block, right_block, mtr); - } - -#if 0 - fprintf(stderr, "Split and insert done %lu %lu\n", - buf_block_get_page_no(left_block), - buf_block_get_page_no(right_block)); -#endif - MONITOR_INC(MONITOR_INDEX_SPLIT); - - ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index)); - ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index)); - - if (tuple == NULL) { - ut_ad(rec == NULL); - } - ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); - return(rec); -} - -/*************************************************************//** -Removes a page from the level list of pages. */ -UNIV_INTERN -void -btr_level_list_remove_func( -/*=======================*/ - ulint space, /*!< in: space where removed */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - page_t* page, /*!< in/out: page to remove */ - dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint prev_page_no; - ulint next_page_no; - - ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)); - ut_ad(space == page_get_space_id(page)); - /* Get the previous and next page numbers of page */ - - prev_page_no = btr_page_get_prev(page, mtr); - next_page_no = btr_page_get_next(page, mtr); - - /* Update page links of the level */ - - if (prev_page_no != FIL_NULL) { - buf_block_t* prev_block - = btr_block_get(space, zip_size, prev_page_no, - RW_X_LATCH, index, mtr); - page_t* prev_page - = buf_block_get_frame(prev_block); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(prev_page) == page_is_comp(page)); - ut_a(btr_page_get_next(prev_page, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_next(prev_page, - buf_block_get_page_zip(prev_block), - next_page_no, mtr); - } - - if (next_page_no != FIL_NULL) { - buf_block_t* next_block - = btr_block_get(space, zip_size, next_page_no, - RW_X_LATCH, index, mtr); - page_t* next_page - = buf_block_get_frame(next_block); -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_page) == page_is_comp(page)); - ut_a(btr_page_get_prev(next_page, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_prev(next_page, - buf_block_get_page_zip(next_block), - prev_page_no, mtr); - } -} - -/****************************************************************//** -Writes the redo log record for setting an index record as the predefined -minimum record. */ -UNIV_INLINE -void -btr_set_min_rec_mark_log( -/*=====================*/ - rec_t* rec, /*!< in: record */ - byte type, /*!< in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */ - mtr_t* mtr) /*!< in: mtr */ -{ - mlog_write_initial_log_record(rec, type, mtr); - - /* Write rec offset as a 2-byte ulint */ - mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES); -} -#else /* !UNIV_HOTBACKUP */ -# define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/****************************************************************//** -Parses the redo log record for setting an index record as the predefined -minimum record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_parse_set_min_rec_mark( -/*=======================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - ulint comp, /*!< in: nonzero=compact page format */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - rec_t* rec; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - if (page) { - ut_a(!page_is_comp(page) == !comp); - - rec = page + mach_read_from_2(ptr); - - btr_set_min_rec_mark(rec, mtr); - } - - return(ptr + 2); -} - -/****************************************************************//** -Sets a record as the predefined minimum record. */ -UNIV_INTERN -void -btr_set_min_rec_mark( -/*=================*/ - rec_t* rec, /*!< in: record */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint info_bits; - - if (page_rec_is_comp(rec)) { - info_bits = rec_get_info_bits(rec, TRUE); - - rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG); - - btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr); - } else { - info_bits = rec_get_info_bits(rec, FALSE); - - rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG); - - btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr); - } -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Deletes on the upper level the node pointer to a page. */ -UNIV_INTERN -void -btr_node_ptr_delete( -/*================*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: page whose node pointer is deleted */ - mtr_t* mtr) /*!< in: mtr */ -{ - btr_cur_t cursor; - ibool compressed; - dberr_t err; - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - - /* Delete node pointer on father page */ - btr_page_get_father(index, block, mtr, &cursor); - - compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, - BTR_CREATE_FLAG, RB_NONE, mtr); - ut_a(err == DB_SUCCESS); - - if (!compressed) { - btr_cur_compress_if_useful(&cursor, FALSE, mtr); - } -} - -/*************************************************************//** -If page is the only on its level, this function moves its records to the -father page, thus reducing the tree height. -@return father block */ -UNIV_INTERN -buf_block_t* -btr_lift_page_up( -/*=============*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: page which is the only on its level; - must not be empty: use - btr_discard_only_page_on_level if the last - record from the page should be removed */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* father_block; - page_t* father_page; - ulint page_level; - page_zip_des_t* father_page_zip; - page_t* page = buf_block_get_frame(block); - ulint root_page_no; - buf_block_t* blocks[BTR_MAX_LEVELS]; - ulint n_blocks; /*!< last used index in blocks[] */ - ulint i; - bool lift_father_up; - buf_block_t* block_orig = block; - - ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - - page_level = btr_page_get_level(page, mtr); - root_page_no = dict_index_get_page(index); - - { - btr_cur_t cursor; - ulint* offsets = NULL; - mem_heap_t* heap = mem_heap_create( - sizeof(*offsets) - * (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields)); - buf_block_t* b; - - offsets = btr_page_get_father_block(offsets, heap, index, - block, mtr, &cursor); - father_block = btr_cur_get_block(&cursor); - father_page_zip = buf_block_get_page_zip(father_block); - father_page = buf_block_get_frame(father_block); - - n_blocks = 0; - - /* Store all ancestor pages so we can reset their - levels later on. We have to do all the searches on - the tree now because later on, after we've replaced - the first level, the tree is in an inconsistent state - and can not be searched. */ - for (b = father_block; - buf_block_get_page_no(b) != root_page_no; ) { - ut_a(n_blocks < BTR_MAX_LEVELS); - - offsets = btr_page_get_father_block(offsets, heap, - index, b, - mtr, &cursor); - - blocks[n_blocks++] = b = btr_cur_get_block(&cursor); - } - - lift_father_up = (n_blocks && page_level == 0); - if (lift_father_up) { - /* The father page also should be the only on its level (not - root). We should lift up the father page at first. - Because the leaf page should be lifted up only for root page. - The freeing page is based on page_level (==0 or !=0) - to choose segment. If the page_level is changed ==0 from !=0, - later freeing of the page doesn't find the page allocation - to be freed.*/ - - block = father_block; - page = buf_block_get_frame(block); - page_level = btr_page_get_level(page, mtr); - - ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - - father_block = blocks[0]; - father_page_zip = buf_block_get_page_zip(father_block); - father_page = buf_block_get_frame(father_block); - } - - mem_heap_free(heap); - } - - btr_search_drop_page_hash_index(block); - - /* Make the father empty */ - btr_page_empty(father_block, father_page_zip, index, page_level, mtr); - page_level++; - - /* Copy the records to the father page one by one. */ - if (0 -#ifdef UNIV_ZIP_COPY - || father_page_zip -#endif /* UNIV_ZIP_COPY */ - || !page_copy_rec_list_end(father_block, block, - page_get_infimum_rec(page), - index, mtr)) { - const page_zip_des_t* page_zip - = buf_block_get_page_zip(block); - ut_a(father_page_zip); - ut_a(page_zip); - - /* Copy the page byte for byte. */ - page_zip_copy_recs(father_page_zip, father_page, - page_zip, page, index, mtr); - - /* Update the lock table and possible hash index. */ - - lock_move_rec_list_end(father_block, block, - page_get_infimum_rec(page)); - - btr_search_move_or_delete_hash_entries(father_block, block, - index); - } - - btr_blob_dbg_remove(page, index, "btr_lift_page_up"); - lock_update_copy_and_discard(father_block, block); - - /* Go upward to root page, decrementing levels by one. */ - for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) { - page_t* page = buf_block_get_frame(blocks[i]); - page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]); - - ut_ad(btr_page_get_level(page, mtr) == page_level + 1); - - btr_page_set_level(page, page_zip, page_level, mtr); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - } - - /* Free the file page */ - btr_page_free(index, block, mtr); - - /* We play it safe and reset the free bits for the father */ - if (!dict_index_is_clust(index)) { - ibuf_reset_free_bits(father_block); - } - ut_ad(page_validate(father_page, index)); - ut_ad(btr_check_node_ptr(index, father_block, mtr)); - - return(lift_father_up ? block_orig : father_block); -} - -/*************************************************************//** -Tries to merge the page first to the left immediate brother if such a -brother exists, and the node pointers to the current page and to the brother -reside on the same page. If the left brother does not satisfy these -conditions, looks at the right brother. If the page is the only one on that -level lifts the records of the page to the father page, thus reducing the -tree height. It is assumed that mtr holds an x-latch on the tree and on the -page. If cursor is on the leaf level, mtr must also hold x-latches to the -brothers, if they exist. -@return TRUE on success */ -UNIV_INTERN -ibool -btr_compress( -/*=========*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - dict_index_t* index; - ulint space; - ulint zip_size; - ulint left_page_no; - ulint right_page_no; - buf_block_t* merge_block; - page_t* merge_page = NULL; - page_zip_des_t* merge_page_zip; - ibool is_left; - buf_block_t* block; - page_t* page; - btr_cur_t father_cursor; - mem_heap_t* heap; - ulint* offsets; - ulint nth_rec = 0; /* remove bogus warning */ - DBUG_ENTER("btr_compress"); - - block = btr_cur_get_block(cursor); - page = btr_cur_get_page(cursor); - index = btr_cur_get_index(cursor); - - btr_assert_not_corrupted(block, index); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - - MONITOR_INC(MONITOR_INDEX_MERGE_ATTEMPTS); - - left_page_no = btr_page_get_prev(page, mtr); - right_page_no = btr_page_get_next(page, mtr); - -#ifdef UNIV_DEBUG - if (!page_is_leaf(page) && left_page_no == FIL_NULL) { - ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)), - page_is_comp(page))); - } -#endif /* UNIV_DEBUG */ - - heap = mem_heap_create(100); - offsets = btr_page_get_father_block(NULL, heap, index, block, mtr, - &father_cursor); - - if (adjust) { - nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); - ut_ad(nth_rec > 0); - } - - if (left_page_no == FIL_NULL && right_page_no == FIL_NULL) { - /* The page is the only one on the level, lift the records - to the father */ - - merge_block = btr_lift_page_up(index, block, mtr); - goto func_exit; - } - - /* Decide the page to which we try to merge and which will inherit - the locks */ - - is_left = btr_can_merge_with_page(cursor, left_page_no, - &merge_block, mtr); - - DBUG_EXECUTE_IF("ib_always_merge_right", is_left = FALSE;); - - if(!is_left - && !btr_can_merge_with_page(cursor, right_page_no, &merge_block, - mtr)) { - goto err_exit; - } - - merge_page = buf_block_get_frame(merge_block); - -#ifdef UNIV_BTR_DEBUG - if (is_left) { - ut_a(btr_page_get_next(merge_page, mtr) - == buf_block_get_page_no(block)); - } else { - ut_a(btr_page_get_prev(merge_page, mtr) - == buf_block_get_page_no(block)); - } -#endif /* UNIV_BTR_DEBUG */ - - ut_ad(page_validate(merge_page, index)); - - merge_page_zip = buf_block_get_page_zip(merge_block); -#ifdef UNIV_ZIP_DEBUG - if (merge_page_zip) { - const page_zip_des_t* page_zip - = buf_block_get_page_zip(block); - ut_a(page_zip); - ut_a(page_zip_validate(merge_page_zip, merge_page, index)); - ut_a(page_zip_validate(page_zip, page, index)); - } -#endif /* UNIV_ZIP_DEBUG */ - - /* Move records to the merge page */ - if (is_left) { - rec_t* orig_pred = page_copy_rec_list_start( - merge_block, block, page_get_supremum_rec(page), - index, mtr); - - if (!orig_pred) { - goto err_exit; - } - - btr_search_drop_page_hash_index(block); - - /* Remove the page from the level list */ - btr_level_list_remove(space, zip_size, page, index, mtr); - - btr_node_ptr_delete(index, block, mtr); - lock_update_merge_left(merge_block, orig_pred, block); - - if (adjust) { - nth_rec += page_rec_get_n_recs_before(orig_pred); - } - } else { - rec_t* orig_succ; - ibool compressed; - dberr_t err; - btr_cur_t cursor2; - /* father cursor pointing to node ptr - of the right sibling */ -#ifdef UNIV_BTR_DEBUG - byte fil_page_prev[4]; -#endif /* UNIV_BTR_DEBUG */ - - btr_page_get_father(index, merge_block, mtr, &cursor2); - - if (merge_page_zip && left_page_no == FIL_NULL) { - - /* The function page_zip_compress(), which will be - invoked by page_copy_rec_list_end() below, - requires that FIL_PAGE_PREV be FIL_NULL. - Clear the field, but prepare to restore it. */ -#ifdef UNIV_BTR_DEBUG - memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4); -#endif /* UNIV_BTR_DEBUG */ -#if FIL_NULL != 0xffffffff -# error "FIL_NULL != 0xffffffff" -#endif - memset(merge_page + FIL_PAGE_PREV, 0xff, 4); - } - - orig_succ = page_copy_rec_list_end(merge_block, block, - page_get_infimum_rec(page), - cursor->index, mtr); - - if (!orig_succ) { - ut_a(merge_page_zip); -#ifdef UNIV_BTR_DEBUG - if (left_page_no == FIL_NULL) { - /* FIL_PAGE_PREV was restored from - merge_page_zip. */ - ut_a(!memcmp(fil_page_prev, - merge_page + FIL_PAGE_PREV, 4)); - } -#endif /* UNIV_BTR_DEBUG */ - goto err_exit; - } - - btr_search_drop_page_hash_index(block); - -#ifdef UNIV_BTR_DEBUG - if (merge_page_zip && left_page_no == FIL_NULL) { - - /* Restore FIL_PAGE_PREV in order to avoid an assertion - failure in btr_level_list_remove(), which will set - the field again to FIL_NULL. Even though this makes - merge_page and merge_page_zip inconsistent for a - split second, it is harmless, because the pages - are X-latched. */ - memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4); - } -#endif /* UNIV_BTR_DEBUG */ - - /* Remove the page from the level list */ - btr_level_list_remove(space, zip_size, page, index, mtr); - - /* Replace the address of the old child node (= page) with the - address of the merge page to the right */ - btr_node_ptr_set_child_page_no( - btr_cur_get_rec(&father_cursor), - btr_cur_get_page_zip(&father_cursor), - offsets, right_page_no, mtr); - - compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor2, - BTR_CREATE_FLAG, - RB_NONE, mtr); - ut_a(err == DB_SUCCESS); - - if (!compressed) { - btr_cur_compress_if_useful(&cursor2, FALSE, mtr); - } - - lock_update_merge_right(merge_block, orig_succ, block); - } - - btr_blob_dbg_remove(page, index, "btr_compress"); - - if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) { - /* Update the free bits of the B-tree page in the - insert buffer bitmap. This has to be done in a - separate mini-transaction that is committed before the - main mini-transaction. We cannot update the insert - buffer bitmap in this mini-transaction, because - btr_compress() can be invoked recursively without - committing the mini-transaction in between. Since - insert buffer bitmap pages have a lower rank than - B-tree pages, we must not access other pages in the - same mini-transaction after accessing an insert buffer - bitmap page. */ - - /* The free bits in the insert buffer bitmap must - never exceed the free space on a page. It is safe to - decrement or reset the bits in the bitmap in a - mini-transaction that is committed before the - mini-transaction that affects the free space. */ - - /* It is unsafe to increment the bits in a separately - committed mini-transaction, because in crash recovery, - the free bits could momentarily be set too high. */ - - if (zip_size) { - /* Because the free bits may be incremented - and we cannot update the insert buffer bitmap - in the same mini-transaction, the only safe - thing we can do here is the pessimistic - approach: reset the free bits. */ - ibuf_reset_free_bits(merge_block); - } else { - /* On uncompressed pages, the free bits will - never increase here. Thus, it is safe to - write the bits accurately in a separate - mini-transaction. */ - ibuf_update_free_bits_if_full(merge_block, - UNIV_PAGE_SIZE, - ULINT_UNDEFINED); - } - } - - ut_ad(page_validate(merge_page, index)); -#ifdef UNIV_ZIP_DEBUG - ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page, - index)); -#endif /* UNIV_ZIP_DEBUG */ - - /* Free the file page */ - btr_page_free(index, block, mtr); - - ut_ad(btr_check_node_ptr(index, merge_block, mtr)); -func_exit: - mem_heap_free(heap); - - if (adjust) { - ut_ad(nth_rec > 0); - btr_cur_position( - index, - page_rec_get_nth(merge_block->frame, nth_rec), - merge_block, cursor); - } - - MONITOR_INC(MONITOR_INDEX_MERGE_SUCCESSFUL); - - DBUG_RETURN(TRUE); - -err_exit: - /* We play it safe and reset the free bits. */ - if (zip_size - && merge_page - && page_is_leaf(merge_page) - && !dict_index_is_clust(index)) { - ibuf_reset_free_bits(merge_block); - } - - mem_heap_free(heap); - DBUG_RETURN(FALSE); -} - -/*************************************************************//** -Discards a page that is the only page on its level. This will empty -the whole B-tree, leaving just an empty root page. This function -should never be reached, because btr_compress(), which is invoked in -delete operations, calls btr_lift_page_up() to flatten the B-tree. */ -static -void -btr_discard_only_page_on_level( -/*===========================*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: page which is the only on its level */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint page_level = 0; - trx_id_t max_trx_id; - - /* Save the PAGE_MAX_TRX_ID from the leaf page. */ - max_trx_id = page_get_max_trx_id(buf_block_get_frame(block)); - - while (buf_block_get_page_no(block) != dict_index_get_page(index)) { - btr_cur_t cursor; - buf_block_t* father; - const page_t* page = buf_block_get_frame(block); - - ut_a(page_get_n_recs(page) == 1); - ut_a(page_level == btr_page_get_level(page, mtr)); - ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_a(btr_page_get_next(page, mtr) == FIL_NULL); - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - btr_search_drop_page_hash_index(block); - - btr_page_get_father(index, block, mtr, &cursor); - father = btr_cur_get_block(&cursor); - - lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block); - - /* Free the file page */ - btr_page_free(index, block, mtr); - - block = father; - page_level++; - } - - /* block is the root page, which must be empty, except - for the node pointer to the (now discarded) block(s). */ - -#ifdef UNIV_BTR_DEBUG - if (!dict_index_is_ibuf(index)) { - const page_t* root = buf_block_get_frame(block); - const ulint space = dict_index_get_space(index); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF - + root, space)); - ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP - + root, space)); - } -#endif /* UNIV_BTR_DEBUG */ - - btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr); - ut_ad(page_is_leaf(buf_block_get_frame(block))); - - if (!dict_index_is_clust(index)) { - /* We play it safe and reset the free bits for the root */ - ibuf_reset_free_bits(block); - - ut_a(max_trx_id); - page_set_max_trx_id(block, - buf_block_get_page_zip(block), - max_trx_id, mtr); - } -} - -/*************************************************************//** -Discards a page from a B-tree. This is used to remove the last record from -a B-tree page: the whole page must be removed at the same time. This cannot -be used for the root page, which is allowed to be empty. */ -UNIV_INTERN -void -btr_discard_page( -/*=============*/ - btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on - the root page */ - mtr_t* mtr) /*!< in: mtr */ -{ - dict_index_t* index; - ulint space; - ulint zip_size; - ulint left_page_no; - ulint right_page_no; - buf_block_t* merge_block; - page_t* merge_page; - buf_block_t* block; - page_t* page; - rec_t* node_ptr; - - block = btr_cur_get_block(cursor); - index = btr_cur_get_index(cursor); - - ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block)); - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - - MONITOR_INC(MONITOR_INDEX_DISCARD); - - /* Decide the page which will inherit the locks */ - - left_page_no = btr_page_get_prev(buf_nonnull_block_get_frame(block), - mtr); - right_page_no = btr_page_get_next(buf_nonnull_block_get_frame(block), - mtr); - - if (left_page_no != FIL_NULL) { - merge_block = btr_block_get(space, zip_size, left_page_no, - RW_X_LATCH, index, mtr); - merge_page = buf_block_get_frame(merge_block); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(merge_page, mtr) - == buf_block_get_page_no(block)); -#endif /* UNIV_BTR_DEBUG */ - } else if (right_page_no != FIL_NULL) { - merge_block = btr_block_get(space, zip_size, right_page_no, - RW_X_LATCH, index, mtr); - merge_page = buf_block_get_frame(merge_block); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(merge_page, mtr) - == buf_block_get_page_no(block)); -#endif /* UNIV_BTR_DEBUG */ - } else { - btr_discard_only_page_on_level(index, block, mtr); - - return; - } - - page = buf_block_get_frame(block); - ut_a(page_is_comp(merge_page) == page_is_comp(page)); - btr_search_drop_page_hash_index(block); - - if (left_page_no == FIL_NULL && !page_is_leaf(page)) { - - /* We have to mark the leftmost node pointer on the right - side page as the predefined minimum record */ - node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page)); - - ut_ad(page_rec_is_user_rec(node_ptr)); - - /* This will make page_zip_validate() fail on merge_page - until btr_level_list_remove() completes. This is harmless, - because everything will take place within a single - mini-transaction and because writing to the redo log - is an atomic operation (performed by mtr_commit()). */ - btr_set_min_rec_mark(node_ptr, mtr); - } - - btr_node_ptr_delete(index, block, mtr); - - /* Remove the page from the level list */ - btr_level_list_remove(space, zip_size, page, index, mtr); -#ifdef UNIV_ZIP_DEBUG - { - page_zip_des_t* merge_page_zip - = buf_block_get_page_zip(merge_block); - ut_a(!merge_page_zip - || page_zip_validate(merge_page_zip, merge_page, index)); - } -#endif /* UNIV_ZIP_DEBUG */ - - if (left_page_no != FIL_NULL) { - lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM, - block); - } else { - lock_update_discard(merge_block, - lock_get_min_heap_no(merge_block), - block); - } - - btr_blob_dbg_remove(page, index, "btr_discard_page"); - - /* Free the file page */ - btr_page_free(index, block, mtr); - - ut_ad(btr_check_node_ptr(index, merge_block, mtr)); -} - -#ifdef UNIV_BTR_PRINT -/*************************************************************//** -Prints size info of a B-tree. */ -UNIV_INTERN -void -btr_print_size( -/*===========*/ - dict_index_t* index) /*!< in: index tree */ -{ - page_t* root; - fseg_header_t* seg; - mtr_t mtr; - - if (dict_index_is_ibuf(index)) { - fputs("Sorry, cannot print info of an ibuf tree:" - " use ibuf functions\n", stderr); - - return; - } - - mtr_start(&mtr); - - root = btr_root_get(index, &mtr); - - seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - - fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr); - fseg_print(seg, &mtr); - - if (!dict_index_is_univ(index)) { - - seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - - fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr); - fseg_print(seg, &mtr); - } - - mtr_commit(&mtr); -} - -/************************************************************//** -Prints recursively index tree pages. */ -static -void -btr_print_recursive( -/*================*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: index page */ - ulint width, /*!< in: print this many entries from start - and end */ - mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */ - ulint** offsets,/*!< in/out: buffer for rec_get_offsets() */ - mtr_t* mtr) /*!< in: mtr */ -{ - const page_t* page = buf_block_get_frame(block); - page_cur_t cursor; - ulint n_recs; - ulint i = 0; - mtr_t mtr2; - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n", - (ulong) btr_page_get_level(page, mtr), - (ulong) buf_block_get_page_no(block)); - - page_print(block, index, width, width); - - n_recs = page_get_n_recs(page); - - page_cur_set_before_first(block, &cursor); - page_cur_move_to_next(&cursor); - - while (!page_cur_is_after_last(&cursor)) { - - if (page_is_leaf(page)) { - - /* If this is the leaf level, do nothing */ - - } else if ((i <= width) || (i >= n_recs - width)) { - - const rec_t* node_ptr; - - mtr_start(&mtr2); - - node_ptr = page_cur_get_rec(&cursor); - - *offsets = rec_get_offsets(node_ptr, index, *offsets, - ULINT_UNDEFINED, heap); - btr_print_recursive(index, - btr_node_ptr_get_child(node_ptr, - index, - *offsets, - &mtr2), - width, heap, offsets, &mtr2); - mtr_commit(&mtr2); - } - - page_cur_move_to_next(&cursor); - i++; - } -} - -/**************************************************************//** -Prints directories and other info of all nodes in the tree. */ -UNIV_INTERN -void -btr_print_index( -/*============*/ - dict_index_t* index, /*!< in: index */ - ulint width) /*!< in: print this many entries from start - and end */ -{ - mtr_t mtr; - buf_block_t* root; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - fputs("--------------------------\n" - "INDEX TREE PRINT\n", stderr); - - mtr_start(&mtr); - - root = btr_root_block_get(index, RW_X_LATCH, &mtr); - - btr_print_recursive(index, root, width, &heap, &offsets, &mtr); - if (heap) { - mem_heap_free(heap); - } - - mtr_commit(&mtr); - - btr_validate_index(index, 0); -} -#endif /* UNIV_BTR_PRINT */ - -#ifdef UNIV_DEBUG -/************************************************************//** -Checks that the node pointer to a page is appropriate. -@return TRUE */ -UNIV_INTERN -ibool -btr_check_node_ptr( -/*===============*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: index page */ - mtr_t* mtr) /*!< in: mtr */ -{ - mem_heap_t* heap; - dtuple_t* tuple; - ulint* offsets; - btr_cur_t cursor; - page_t* page = buf_block_get_frame(block); - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - if (dict_index_get_page(index) == buf_block_get_page_no(block)) { - - return(TRUE); - } - - heap = mem_heap_create(256); - offsets = btr_page_get_father_block(NULL, heap, index, block, mtr, - &cursor); - - if (page_is_leaf(page)) { - - goto func_exit; - } - - tuple = dict_index_build_node_ptr( - index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap, - btr_page_get_level(page, mtr)); - - ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets)); -func_exit: - mem_heap_free(heap); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/************************************************************//** -Display identification information for a record. */ -static -void -btr_index_rec_validate_report( -/*==========================*/ - const page_t* page, /*!< in: index page */ - const rec_t* rec, /*!< in: index record */ - const dict_index_t* index) /*!< in: index */ -{ - fputs("InnoDB: Record in ", stderr); - dict_index_name_print(stderr, NULL, index); - fprintf(stderr, ", page %lu, at offset %lu\n", - page_get_page_no(page), (ulint) page_offset(rec)); -} - -/************************************************************//** -Checks the size and number of fields in a record based on the definition of -the index. -@return TRUE if ok */ -UNIV_INTERN -ibool -btr_index_rec_validate( -/*===================*/ - const rec_t* rec, /*!< in: index record */ - const dict_index_t* index, /*!< in: index */ - ibool dump_on_error) /*!< in: TRUE if the function - should print hex dump of record - and page on error */ -{ - ulint len; - ulint n; - ulint i; - const page_t* page; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - page = page_align(rec); - - if (dict_index_is_univ(index)) { - /* The insert buffer index tree can contain records from any - other index: we cannot check the number of fields or - their length */ - - return(TRUE); - } - - if ((ibool)!!page_is_comp(page) != dict_table_is_comp(index->table)) { - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n", - (ulong) !!page_is_comp(page), - (ulong) dict_table_is_comp(index->table)); - - return(FALSE); - } - - n = dict_index_get_n_fields(index); - - if (!page_is_comp(page) && rec_get_n_fields_old(rec) != n) { - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n", - (ulong) rec_get_n_fields_old(rec), (ulong) n); - - if (dump_on_error) { - buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); - - fputs("InnoDB: corrupt record ", stderr); - rec_print_old(stderr, rec); - putc('\n', stderr); - } - return(FALSE); - } - - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - for (i = 0; i < n; i++) { - ulint fixed_size = dict_col_get_fixed_size( - dict_index_get_nth_col(index, i), page_is_comp(page)); - - rec_get_nth_field_offs(offsets, i, &len); - - /* Note that if fixed_size != 0, it equals the - length of a fixed-size column in the clustered index. - A prefix index of the column is of fixed, but different - length. When fixed_size == 0, prefix_len is the maximum - length of the prefix index column. */ - - if ((dict_index_get_nth_field(index, i)->prefix_len == 0 - && len != UNIV_SQL_NULL && fixed_size - && len != fixed_size) - || (dict_index_get_nth_field(index, i)->prefix_len > 0 - && len != UNIV_SQL_NULL - && len - > dict_index_get_nth_field(index, i)->prefix_len)) { - - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, - "InnoDB: field %lu len is %lu," - " should be %lu\n", - (ulong) i, (ulong) len, (ulong) fixed_size); - - if (dump_on_error) { - buf_page_print(page, 0, - BUF_PAGE_PRINT_NO_CRASH); - - fputs("InnoDB: corrupt record ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - } - if (heap) { - mem_heap_free(heap); - } - return(FALSE); - } - } - - if (heap) { - mem_heap_free(heap); - } - return(TRUE); -} - -/************************************************************//** -Checks the size and number of fields in records based on the definition of -the index. -@return TRUE if ok */ -static -ibool -btr_index_page_validate( -/*====================*/ - buf_block_t* block, /*!< in: index page */ - dict_index_t* index) /*!< in: index */ -{ - page_cur_t cur; - ibool ret = TRUE; -#ifndef DBUG_OFF - ulint nth = 1; -#endif /* !DBUG_OFF */ - - page_cur_set_before_first(block, &cur); - - /* Directory slot 0 should only contain the infimum record. */ - DBUG_EXECUTE_IF("check_table_rec_next", - ut_a(page_rec_get_nth_const( - page_cur_get_page(&cur), 0) - == cur.rec); - ut_a(page_dir_slot_get_n_owned( - page_dir_get_nth_slot( - page_cur_get_page(&cur), 0)) - == 1);); - - page_cur_move_to_next(&cur); - - for (;;) { - if (page_cur_is_after_last(&cur)) { - - break; - } - - if (!btr_index_rec_validate(cur.rec, index, TRUE)) { - - return(FALSE); - } - - /* Verify that page_rec_get_nth_const() is correctly - retrieving each record. */ - DBUG_EXECUTE_IF("check_table_rec_next", - ut_a(cur.rec == page_rec_get_nth_const( - page_cur_get_page(&cur), - page_rec_get_n_recs_before( - cur.rec))); - ut_a(nth++ == page_rec_get_n_recs_before( - cur.rec));); - - page_cur_move_to_next(&cur); - } - - return(ret); -} - -/************************************************************//** -Report an error on one page of an index tree. */ -static -void -btr_validate_report1( -/*=================*/ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: B-tree level */ - const buf_block_t* block) /*!< in: index page */ -{ - fprintf(stderr, "InnoDB: Error in page %lu of ", - buf_block_get_page_no(block)); - dict_index_name_print(stderr, NULL, index); - if (level) { - fprintf(stderr, ", index tree level %lu", level); - } - putc('\n', stderr); -} - -/************************************************************//** -Report an error on two pages of an index tree. */ -static -void -btr_validate_report2( -/*=================*/ - const dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: B-tree level */ - const buf_block_t* block1, /*!< in: first index page */ - const buf_block_t* block2) /*!< in: second index page */ -{ - fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ", - buf_block_get_page_no(block1), - buf_block_get_page_no(block2)); - dict_index_name_print(stderr, NULL, index); - if (level) { - fprintf(stderr, ", index tree level %lu", level); - } - putc('\n', stderr); -} - -/************************************************************//** -Validates index tree level. -@return TRUE if ok */ -static -bool -btr_validate_level( -/*===============*/ - dict_index_t* index, /*!< in: index tree */ - const trx_t* trx, /*!< in: transaction or NULL */ - ulint level) /*!< in: level number */ -{ - ulint space; - ulint space_flags; - ulint zip_size; - buf_block_t* block; - page_t* page; - buf_block_t* right_block = 0; /* remove warning */ - page_t* right_page = 0; /* remove warning */ - page_t* father_page; - btr_cur_t node_cur; - btr_cur_t right_node_cur; - rec_t* rec; - ulint right_page_no; - ulint left_page_no; - page_cur_t cursor; - dtuple_t* node_ptr_tuple; - bool ret = true; - mtr_t mtr; - mem_heap_t* heap = mem_heap_create(256); - fseg_header_t* seg; - ulint* offsets = NULL; - ulint* offsets2= NULL; -#ifdef UNIV_ZIP_DEBUG - page_zip_des_t* page_zip; -#endif /* UNIV_ZIP_DEBUG */ - - mtr_start(&mtr); - - mtr_x_lock(dict_index_get_lock(index), &mtr); - - block = btr_root_block_get(index, RW_X_LATCH, &mtr); - page = buf_block_get_frame(block); - seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP; - - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - - fil_space_get_latch(space, &space_flags); - - if (zip_size != dict_tf_get_zip_size(space_flags)) { - - ib_logf(IB_LOG_LEVEL_WARN, - "Flags mismatch: table=%lu, tablespace=%lu", - (ulint) index->table->flags, (ulint) space_flags); - - mtr_commit(&mtr); - - return(false); - } - - while (level != btr_page_get_level(page, &mtr)) { - const rec_t* node_ptr; - - if (fseg_page_is_free(seg, - block->page.space, block->page.offset)) { - - btr_validate_report1(index, level, block); - - ib_logf(IB_LOG_LEVEL_WARN, "page is free"); - - ret = false; - } - - ut_a(space == buf_block_get_space(block)); - ut_a(space == page_get_space_id(page)); -#ifdef UNIV_ZIP_DEBUG - page_zip = buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - ut_a(!page_is_leaf(page)); - - page_cur_set_before_first(block, &cursor); - page_cur_move_to_next(&cursor); - - node_ptr = page_cur_get_rec(&cursor); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr); - page = buf_block_get_frame(block); - } - - /* Now we are on the desired level. Loop through the pages on that - level. */ - - if (level == 0) { - /* Leaf pages are managed in their own file segment. */ - seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF; - } - -loop: - mem_heap_empty(heap); - offsets = offsets2 = NULL; - mtr_x_lock(dict_index_get_lock(index), &mtr); - -#ifdef UNIV_ZIP_DEBUG - page_zip = buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - ut_a(block->page.space == space); - - if (fseg_page_is_free(seg, block->page.space, block->page.offset)) { - - btr_validate_report1(index, level, block); - - ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free"); - ret = false; - - } else if (btr_page_get_index_id(page) != index->id) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Page index id " IB_ID_FMT " != data dictionary " - "index id " IB_ID_FMT, - btr_page_get_index_id(page), index->id); - - ret = false; - - } else if (!page_validate(page, index)) { - - btr_validate_report1(index, level, block); - ret = false; - - } else if (level == 0 && !btr_index_page_validate(block, index)) { - - /* We are on level 0. Check that the records have the right - number of fields, and field lengths are right. */ - - ret = false; - } - - ut_a(btr_page_get_level(page, &mtr) == level); - - right_page_no = btr_page_get_next(page, &mtr); - left_page_no = btr_page_get_prev(page, &mtr); - - ut_a(!page_is_empty(page) - || (level == 0 - && page_get_page_no(page) == dict_index_get_page(index))); - - if (right_page_no != FIL_NULL) { - const rec_t* right_rec; - right_block = btr_block_get(space, zip_size, right_page_no, - RW_X_LATCH, index, &mtr); - right_page = buf_block_get_frame(right_block); - if (btr_page_get_prev(right_page, &mtr) - != page_get_page_no(page)) { - - btr_validate_report2(index, level, block, right_block); - fputs("InnoDB: broken FIL_PAGE_NEXT" - " or FIL_PAGE_PREV links\n", stderr); - buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); - - ret = false; - } - - if (page_is_comp(right_page) != page_is_comp(page)) { - btr_validate_report2(index, level, block, right_block); - fputs("InnoDB: 'compact' flag mismatch\n", stderr); - buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); - - ret = false; - - goto node_ptr_fails; - } - - rec = page_rec_get_prev(page_get_supremum_rec(page)); - right_rec = page_rec_get_next(page_get_infimum_rec( - right_page)); - offsets = rec_get_offsets(rec, index, - offsets, ULINT_UNDEFINED, &heap); - offsets2 = rec_get_offsets(right_rec, index, - offsets2, ULINT_UNDEFINED, &heap); - if (cmp_rec_rec(rec, right_rec, offsets, offsets2, - index) >= 0) { - - btr_validate_report2(index, level, block, right_block); - - fputs("InnoDB: records in wrong order" - " on adjacent pages\n", stderr); - - buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH); - - fputs("InnoDB: record ", stderr); - rec = page_rec_get_prev(page_get_supremum_rec(page)); - rec_print(stderr, rec, index); - putc('\n', stderr); - fputs("InnoDB: record ", stderr); - rec = page_rec_get_next( - page_get_infimum_rec(right_page)); - rec_print(stderr, rec, index); - putc('\n', stderr); - - ret = false; - } - } - - if (level > 0 && left_page_no == FIL_NULL) { - ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)), - page_is_comp(page))); - } - - if (buf_block_get_page_no(block) != dict_index_get_page(index)) { - - /* Check father node pointers */ - - rec_t* node_ptr; - - offsets = btr_page_get_father_block(offsets, heap, index, - block, &mtr, &node_cur); - father_page = btr_cur_get_page(&node_cur); - node_ptr = btr_cur_get_rec(&node_cur); - - btr_cur_position( - index, page_rec_get_prev(page_get_supremum_rec(page)), - block, &node_cur); - offsets = btr_page_get_father_node_ptr(offsets, heap, - &node_cur, &mtr); - - if (node_ptr != btr_cur_get_rec(&node_cur) - || btr_node_ptr_get_child_page_no(node_ptr, offsets) - != buf_block_get_page_no(block)) { - - btr_validate_report1(index, level, block); - - fputs("InnoDB: node pointer to the page is wrong\n", - stderr); - - buf_page_print(father_page, 0, BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); - - fputs("InnoDB: node ptr ", stderr); - rec_print(stderr, node_ptr, index); - - rec = btr_cur_get_rec(&node_cur); - fprintf(stderr, "\n" - "InnoDB: node ptr child page n:o %lu\n", - (ulong) btr_node_ptr_get_child_page_no( - rec, offsets)); - - fputs("InnoDB: record on page ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - ret = false; - - goto node_ptr_fails; - } - - if (!page_is_leaf(page)) { - node_ptr_tuple = dict_index_build_node_ptr( - index, - page_rec_get_next(page_get_infimum_rec(page)), - 0, heap, btr_page_get_level(page, &mtr)); - - if (cmp_dtuple_rec(node_ptr_tuple, node_ptr, - offsets)) { - const rec_t* first_rec = page_rec_get_next( - page_get_infimum_rec(page)); - - btr_validate_report1(index, level, block); - - buf_page_print(father_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(page, 0, - BUF_PAGE_PRINT_NO_CRASH); - - fputs("InnoDB: Error: node ptrs differ" - " on levels > 0\n" - "InnoDB: node ptr ", stderr); - rec_print_new(stderr, node_ptr, offsets); - fputs("InnoDB: first rec ", stderr); - rec_print(stderr, first_rec, index); - putc('\n', stderr); - ret = false; - - goto node_ptr_fails; - } - } - - if (left_page_no == FIL_NULL) { - ut_a(node_ptr == page_rec_get_next( - page_get_infimum_rec(father_page))); - ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL); - } - - if (right_page_no == FIL_NULL) { - ut_a(node_ptr == page_rec_get_prev( - page_get_supremum_rec(father_page))); - ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL); - } else { - const rec_t* right_node_ptr - = page_rec_get_next(node_ptr); - - offsets = btr_page_get_father_block( - offsets, heap, index, right_block, - &mtr, &right_node_cur); - if (right_node_ptr - != page_get_supremum_rec(father_page)) { - - if (btr_cur_get_rec(&right_node_cur) - != right_node_ptr) { - ret = false; - fputs("InnoDB: node pointer to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - block); - - buf_page_print( - father_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print( - page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print( - right_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - } - } else { - page_t* right_father_page - = btr_cur_get_page(&right_node_cur); - - if (btr_cur_get_rec(&right_node_cur) - != page_rec_get_next( - page_get_infimum_rec( - right_father_page))) { - ret = false; - fputs("InnoDB: node pointer 2 to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - block); - - buf_page_print( - father_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print( - right_father_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print( - page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print( - right_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - } - - if (page_get_page_no(right_father_page) - != btr_page_get_next(father_page, &mtr)) { - - ret = false; - fputs("InnoDB: node pointer 3 to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - block); - - buf_page_print( - father_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print( - right_father_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print( - page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print( - right_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - } - } - } - } - -node_ptr_fails: - /* Commit the mini-transaction to release the latch on 'page'. - Re-acquire the latch on right_page, which will become 'page' - on the next loop. The page has already been checked. */ - mtr_commit(&mtr); - - if (trx_is_interrupted(trx)) { - /* On interrupt, return the current status. */ - } else if (right_page_no != FIL_NULL) { - - mtr_start(&mtr); - - block = btr_block_get( - space, zip_size, right_page_no, - RW_X_LATCH, index, &mtr); - - page = buf_block_get_frame(block); - - goto loop; - } - - mem_heap_free(heap); - - return(ret); -} - -/**************************************************************//** -Checks the consistency of an index tree. -@return DB_SUCCESS if ok, error code if not */ -UNIV_INTERN -dberr_t -btr_validate_index( -/*===============*/ - dict_index_t* index, /*!< in: index */ - const trx_t* trx) /*!< in: transaction or NULL */ -{ - dberr_t err = DB_SUCCESS; - - /* Full Text index are implemented by auxiliary tables, - not the B-tree */ - if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) { - return(err); - } - - mtr_t mtr; - - mtr_start(&mtr); - - mtr_x_lock(dict_index_get_lock(index), &mtr); - - page_t* root = btr_root_get(index, &mtr); - - if (root == NULL && index->table->file_unreadable) { - err = DB_DECRYPTION_FAILED; - mtr_commit(&mtr); - return err; - } - - SRV_CORRUPT_TABLE_CHECK(root, - { - mtr_commit(&mtr); - return(DB_CORRUPTION); - }); - - ulint n = btr_page_get_level(root, &mtr); - - for (ulint i = 0; i <= n; ++i) { - - if (!btr_validate_level(index, trx, n - i)) { - err = DB_CORRUPTION; - break; - } - } - - mtr_commit(&mtr); - - return(err); -} - -/**************************************************************//** -Checks if the page in the cursor can be merged with given page. -If necessary, re-organize the merge_page. -@return TRUE if possible to merge. */ -UNIV_INTERN -ibool -btr_can_merge_with_page( -/*====================*/ - btr_cur_t* cursor, /*!< in: cursor on the page to merge */ - ulint page_no, /*!< in: a sibling page */ - buf_block_t** merge_block, /*!< out: the merge block */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - dict_index_t* index; - page_t* page; - ulint space; - ulint zip_size; - ulint n_recs; - ulint data_size; - ulint max_ins_size_reorg; - ulint max_ins_size; - buf_block_t* mblock; - page_t* mpage; - DBUG_ENTER("btr_can_merge_with_page"); - - if (page_no == FIL_NULL) { - goto error; - } - - index = btr_cur_get_index(cursor); - page = btr_cur_get_page(cursor); - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - - mblock = btr_block_get(space, zip_size, page_no, RW_X_LATCH, index, - mtr); - mpage = buf_block_get_frame(mblock); - - n_recs = page_get_n_recs(page); - data_size = page_get_data_size(page); - - max_ins_size_reorg = page_get_max_insert_size_after_reorganize( - mpage, n_recs); - - if (data_size > max_ins_size_reorg) { - goto error; - } - - /* If compression padding tells us that merging will result in - too packed up page i.e.: which is likely to cause compression - failure then don't merge the pages. */ - if (zip_size && page_is_leaf(mpage) - && (page_get_data_size(mpage) + data_size - >= dict_index_zip_pad_optimal_page_size(index))) { - - goto error; - } - - - max_ins_size = page_get_max_insert_size(mpage, n_recs); - - if (data_size > max_ins_size) { - - /* We have to reorganize mpage */ - - if (!btr_page_reorganize_block( - false, page_zip_level, mblock, index, mtr)) { - - goto error; - } - - max_ins_size = page_get_max_insert_size(mpage, n_recs); - - ut_ad(page_validate(mpage, index)); - ut_ad(max_ins_size == max_ins_size_reorg); - - if (data_size > max_ins_size) { - - /* Add fault tolerance, though this should - never happen */ - - goto error; - } - } - - *merge_block = mblock; - DBUG_RETURN(TRUE); - -error: - *merge_block = NULL; - DBUG_RETURN(FALSE); -} - -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc deleted file mode 100644 index ffd7ebc7504..00000000000 --- a/storage/xtradb/btr/btr0cur.cc +++ /dev/null @@ -1,6148 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2015, 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file btr/btr0cur.cc -The index tree cursor - -All changes that row operations make to a B-tree or the records -there must go through this module! Undo log records are written here -of every modify or insert of a clustered index record. - - NOTE!!! -To make sure we do not run out of disk space during a pessimistic -insert or update, we have to reserve 2 x the height of the index tree -many pages in the tablespace before we start the operation, because -if leaf splitting has been started, it is difficult to undo, except -by crashing the database and doing a roll-forward. - -Created 10/16/1994 Heikki Tuuri -*******************************************************/ - -#include "btr0cur.h" - -#ifdef UNIV_NONINL -#include "btr0cur.ic" -#endif - -#include "row0upd.h" -#ifndef UNIV_HOTBACKUP -#include "mtr0log.h" -#include "page0page.h" -#include "page0zip.h" -#include "rem0rec.h" -#include "rem0cmp.h" -#include "buf0lru.h" -#include "btr0btr.h" -#include "btr0sea.h" -#include "row0log.h" -#include "row0purge.h" -#include "row0upd.h" -#include "trx0rec.h" -#include "trx0roll.h" /* trx_is_recv() */ -#include "que0que.h" -#include "row0row.h" -#include "srv0srv.h" -#include "ibuf0ibuf.h" -#include "lock0lock.h" -#include "zlib.h" - -/** Buffered B-tree operation types, introduced as part of delete buffering. */ -enum btr_op_t { - BTR_NO_OP = 0, /*!< Not buffered */ - BTR_INSERT_OP, /*!< Insert, do not ignore UNIQUE */ - BTR_INSERT_IGNORE_UNIQUE_OP, /*!< Insert, ignoring UNIQUE */ - BTR_DELETE_OP, /*!< Purge a delete-marked record */ - BTR_DELMARK_OP /*!< Mark a record for deletion */ -}; - -#ifdef UNIV_DEBUG -/** If the following is set to TRUE, this module prints a lot of -trace information of individual record operations */ -UNIV_INTERN ibool btr_cur_print_record_ops = FALSE; -#endif /* UNIV_DEBUG */ - -/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ -UNIV_INTERN ulint btr_cur_n_non_sea = 0; -/** Number of successful adaptive hash index lookups in -btr_cur_search_to_nth_level(). */ -UNIV_INTERN ulint btr_cur_n_sea = 0; -/** Old value of btr_cur_n_non_sea. Copied by -srv_refresh_innodb_monitor_stats(). Referenced by -srv_printf_innodb_monitor(). */ -UNIV_INTERN ulint btr_cur_n_non_sea_old = 0; -/** Old value of btr_cur_n_sea. Copied by -srv_refresh_innodb_monitor_stats(). Referenced by -srv_printf_innodb_monitor(). */ -UNIV_INTERN ulint btr_cur_n_sea_old = 0; - -#ifdef UNIV_DEBUG -/* Flag to limit optimistic insert records */ -UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0; -#endif /* UNIV_DEBUG */ - -/** In the optimistic insert, if the insert does not fit, but this much space -can be released by page reorganize, then it is reorganized */ -#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32) - -/** The structure of a BLOB part header */ -/* @{ */ -/*--------------------------------------*/ -#define BTR_BLOB_HDR_PART_LEN 0 /*!< BLOB part len on this - page */ -#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /*!< next BLOB part page no, - FIL_NULL if none */ -/*--------------------------------------*/ -#define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB - part header, in bytes */ - -/** Estimated table level stats from sampled value. -@param value sampled stats -@param index index being sampled -@param sample number of sampled rows -@param ext_size external stored data size -@param not_empty table not empty -@return estimated table wide stats from sampled value */ -#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\ - (((value) * (ib_int64_t) index->stat_n_leaf_pages \ - + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size))) - -/* @} */ -#endif /* !UNIV_HOTBACKUP */ - -/** A BLOB field reference full of zero, for use in assertions and tests. -Initially, BLOB field references are set to zero, in -dtuple_convert_big_rec(). */ -const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE] = { - 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, -}; - -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Marks all extern fields in a record as owned by the record. This function -should be called if the delete mark of a record is removed: a not delete -marked record always owns all its extern fields. */ -static -void -btr_cur_unmark_extern_fields( -/*=========================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed - part will be updated, or NULL */ - rec_t* rec, /*!< in/out: record in a clustered index */ - dict_index_t* index, /*!< in: index of the page */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - mtr_t* mtr); /*!< in: mtr, or NULL if not logged */ -/*******************************************************************//** -Adds path information to the cursor for the current page, for which -the binary search has been performed. */ -static -void -btr_cur_add_path_info( -/*==================*/ - btr_cur_t* cursor, /*!< in: cursor positioned on a page */ - ulint height, /*!< in: height of the page in tree; - 0 means leaf node */ - ulint root_height); /*!< in: root node height in tree */ -/***********************************************************//** -Frees the externally stored fields for a record, if the field is mentioned -in the update vector. */ -static -void -btr_rec_free_updated_extern_fields( -/*===============================*/ - dict_index_t* index, /*!< in: index of rec; the index tree MUST be - X-latched */ - rec_t* rec, /*!< in: record */ - page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed - part will be updated, or NULL */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - const upd_t* update, /*!< in: update vector */ - enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ - mtr_t* mtr); /*!< in: mini-transaction handle which contains - an X-latch to record page and to the tree */ -/***********************************************************//** -Frees the externally stored fields for a record. */ -static -void -btr_rec_free_externally_stored_fields( -/*==================================*/ - dict_index_t* index, /*!< in: index of the data, the index - tree MUST be X-latched */ - rec_t* rec, /*!< in: record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed - part will be updated, or NULL */ - enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ - mtr_t* mtr); /*!< in: mini-transaction handle which contains - an X-latch to record page and to the index - tree */ -#endif /* !UNIV_HOTBACKUP */ - -/******************************************************//** -The following function is used to set the deleted bit of a record. */ -UNIV_INLINE -void -btr_rec_set_deleted_flag( -/*=====================*/ - rec_t* rec, /*!< in/out: physical record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */ - ulint flag) /*!< in: nonzero if delete marked */ -{ - if (page_rec_is_comp(rec)) { - rec_set_deleted_flag_new(rec, page_zip, flag); - } else { - ut_ad(!page_zip); - rec_set_deleted_flag_old(rec, flag); - } -} - -#ifndef UNIV_HOTBACKUP -/*==================== B-TREE SEARCH =========================*/ - -/********************************************************************//** -Latches the leaf page or pages requested. */ -static -void -btr_cur_latch_leaves( -/*=================*/ - page_t* page, /*!< in: leaf page where the search - converged */ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the leaf */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /*!< in: cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint mode; - ulint sibling_mode; - ulint left_page_no; - ulint right_page_no; - buf_block_t* get_block; - - ut_ad(page && mtr); - - switch (latch_mode) { - case BTR_SEARCH_LEAF: - case BTR_MODIFY_LEAF: - mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH; - get_block = btr_block_get( - space, zip_size, page_no, mode, cursor->index, mtr); - - SRV_CORRUPT_TABLE_CHECK(get_block, return;); - -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) == page_is_comp(page)); -#endif /* UNIV_BTR_DEBUG */ - get_block->check_index_page_at_flush = TRUE; - return; - case BTR_SEARCH_TREE: - case BTR_MODIFY_TREE: - if (UNIV_UNLIKELY(latch_mode == BTR_SEARCH_TREE)) { - mode = RW_S_LATCH; - sibling_mode = RW_NO_LATCH; - } else { - mode = sibling_mode = RW_X_LATCH; - } - /* Fetch and possibly latch also brothers from left to right */ - left_page_no = btr_page_get_prev(page, mtr); - - if (left_page_no != FIL_NULL) { - get_block = btr_block_get( - space, zip_size, left_page_no, - sibling_mode, cursor->index, mtr); - - SRV_CORRUPT_TABLE_CHECK(get_block, return;); - -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) - == page_is_comp(page)); - - /* For fake_change mode we avoid a detailed validation - as it operate in tweaked format where-in validation - may fail. */ - ut_a(sibling_mode == RW_NO_LATCH - || btr_page_get_next(get_block->frame, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - if (sibling_mode == RW_NO_LATCH) { - /* btr_block_get() called with RW_NO_LATCH will - fix the read block in the buffer. This serves - no purpose for the fake changes prefetching, - thus we unfix the sibling blocks immediately.*/ - mtr_memo_release(mtr, get_block, - MTR_MEMO_BUF_FIX); - } else { - get_block->check_index_page_at_flush = TRUE; - } - } - - get_block = btr_block_get( - space, zip_size, page_no, - mode, cursor->index, mtr); - - SRV_CORRUPT_TABLE_CHECK(get_block, return;); - -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) == page_is_comp(page)); -#endif /* UNIV_BTR_DEBUG */ - get_block->check_index_page_at_flush = TRUE; - - right_page_no = btr_page_get_next(page, mtr); - - if (right_page_no != FIL_NULL) { - get_block = btr_block_get( - space, zip_size, right_page_no, - sibling_mode, cursor->index, mtr); - - SRV_CORRUPT_TABLE_CHECK(get_block, return;); - -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) - == page_is_comp(page)); - ut_a(btr_page_get_prev(get_block->frame, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - if (sibling_mode == RW_NO_LATCH) { - mtr_memo_release(mtr, get_block, - MTR_MEMO_BUF_FIX); - } else { - get_block->check_index_page_at_flush = TRUE; - } - } - - return; - - case BTR_SEARCH_PREV: - case BTR_MODIFY_PREV: - mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH; - /* latch also left brother */ - left_page_no = btr_page_get_prev(page, mtr); - - if (left_page_no != FIL_NULL) { - get_block = btr_block_get( - space, zip_size, - left_page_no, mode, cursor->index, mtr); - cursor->left_block = get_block; - - SRV_CORRUPT_TABLE_CHECK(get_block, return;); - -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) - == page_is_comp(page)); - ut_a(btr_page_get_next(get_block->frame, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - get_block->check_index_page_at_flush = TRUE; - } - - get_block = btr_block_get( - space, zip_size, page_no, mode, cursor->index, mtr); - - SRV_CORRUPT_TABLE_CHECK(get_block, return;); - -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(get_block->frame) == page_is_comp(page)); -#endif /* UNIV_BTR_DEBUG */ - get_block->check_index_page_at_flush = TRUE; - return; - } - - ut_error; -} - -/********************************************************************//** -Searches an index tree and positions a tree cursor on a given level. -NOTE: n_fields_cmp in tuple must be set so that it cannot be compared -to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then -cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. - -If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the -search tuple should be performed in the B-tree. InnoDB does an insert -immediately after the cursor. Thus, the cursor may end up on a user record, -or on a page infimum record. */ -UNIV_INTERN -dberr_t -btr_cur_search_to_nth_level( -/*========================*/ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: the tree level of search */ - const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in - tuple must be set so that it cannot get - compared to the node ptr page number field! */ - ulint mode, /*!< in: PAGE_CUR_L, ...; - Inserts should always be made using - PAGE_CUR_LE to search the position! */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with - at most one of BTR_INSERT, BTR_DELETE_MARK, - BTR_DELETE, or BTR_ESTIMATE; - cursor->left_block is used to store a pointer - to the left neighbor page, in the cases - BTR_SEARCH_PREV and BTR_MODIFY_PREV; - NOTE that if has_search_latch - is != 0, we maybe do not have a latch set - on the cursor page, we assume - the caller uses his search latch - to protect the record! */ - btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is - s- or x-latched, but see also above! */ - ulint has_search_latch,/*!< in: info on the latch mode the - caller currently has on btr_search_latch: - RW_S_LATCH, or 0 */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* page; - buf_block_t* block; - ulint space; - buf_block_t* guess; - ulint height; - ulint page_no; - ulint up_match; - ulint up_bytes; - ulint low_match; - ulint low_bytes; - ulint savepoint; - ulint rw_latch; - ulint page_mode; - ulint buf_mode; - ulint estimate; - ulint zip_size; - page_cur_t* page_cursor; - btr_op_t btr_op; - ulint root_height = 0; /* remove warning */ - dberr_t err = DB_SUCCESS; - -#ifdef BTR_CUR_ADAPT - btr_search_t* info; -#endif - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - /* Currently, PAGE_CUR_LE is the only search mode used for searches - ending to upper levels */ - - ut_ad(level == 0 || mode == PAGE_CUR_LE); - ut_ad(dict_index_check_search_tuple(index, tuple)); - ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr)); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(!(index->type & DICT_FTS)); - ut_ad(index->page != FIL_NULL); - - UNIV_MEM_INVALID(&cursor->up_match, sizeof cursor->up_match); - UNIV_MEM_INVALID(&cursor->up_bytes, sizeof cursor->up_bytes); - UNIV_MEM_INVALID(&cursor->low_match, sizeof cursor->low_match); - UNIV_MEM_INVALID(&cursor->low_bytes, sizeof cursor->low_bytes); -#ifdef UNIV_DEBUG - cursor->up_match = ULINT_UNDEFINED; - cursor->low_match = ULINT_UNDEFINED; -#endif - - ibool s_latch_by_caller; - - s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED; - - ut_ad(!s_latch_by_caller - || mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_S_LOCK)); - - /* These flags are mutually exclusive, they are lumped together - with the latch mode for historical reasons. It's possible for - none of the flags to be set. */ - switch (UNIV_EXPECT(latch_mode - & (BTR_INSERT | BTR_DELETE | BTR_DELETE_MARK), - 0)) { - case 0: - btr_op = BTR_NO_OP; - break; - case BTR_INSERT: - btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE) - ? BTR_INSERT_IGNORE_UNIQUE_OP - : BTR_INSERT_OP; - break; - case BTR_DELETE: - btr_op = BTR_DELETE_OP; - ut_a(cursor->purge_node); - break; - case BTR_DELETE_MARK: - btr_op = BTR_DELMARK_OP; - break; - default: - /* only one of BTR_INSERT, BTR_DELETE, BTR_DELETE_MARK - should be specified at a time */ - ut_error; - } - - /* Operations on the insert buffer tree cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index)); - /* Operations on the clustered index cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index)); - - estimate = latch_mode & BTR_ESTIMATE; - - /* Turn the flags unrelated to the latch mode off. */ - latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - - ut_ad(!s_latch_by_caller - || latch_mode == BTR_SEARCH_LEAF - || latch_mode == BTR_MODIFY_LEAF); - - cursor->flag = BTR_CUR_BINARY; - cursor->index = index; - -#ifndef BTR_CUR_ADAPT - guess = NULL; -#else - info = btr_search_get_info(index); - - guess = info->root_guess; - -#ifdef BTR_CUR_HASH_ADAPT - -# ifdef UNIV_SEARCH_PERF_STAT - info->n_searches++; -# endif - if (rw_lock_get_writer(btr_search_get_latch(cursor->index)) == - RW_LOCK_NOT_LOCKED - && latch_mode <= BTR_MODIFY_LEAF - && info->last_hash_succ - && !estimate -# ifdef PAGE_CUR_LE_OR_EXTENDS - && mode != PAGE_CUR_LE_OR_EXTENDS -# endif /* PAGE_CUR_LE_OR_EXTENDS */ - /* If !has_search_latch, we do a dirty read of - btr_search_enabled below, and btr_search_guess_on_hash() - will have to check it again. */ - && UNIV_LIKELY(btr_search_enabled) - && btr_search_guess_on_hash(index, info, tuple, mode, - latch_mode, cursor, - has_search_latch, mtr)) { - - /* Search using the hash index succeeded */ - - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - btr_cur_n_sea++; - - return err; - } -# endif /* BTR_CUR_HASH_ADAPT */ -#endif /* BTR_CUR_ADAPT */ - btr_cur_n_non_sea++; - - /* If the hash search did not succeed, do binary search down the - tree */ - - if (has_search_latch) { - /* Release possible search latch to obey latching order */ - rw_lock_s_unlock(btr_search_get_latch(cursor->index)); - } - - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ - - savepoint = mtr_set_savepoint(mtr); - - switch (latch_mode) { - case BTR_MODIFY_TREE: - mtr_x_lock(dict_index_get_lock(index), mtr); - break; - case BTR_CONT_MODIFY_TREE: - /* Do nothing */ - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - break; - default: - if (!s_latch_by_caller) { - mtr_s_lock(dict_index_get_lock(index), mtr); - } - } - - page_cursor = btr_cur_get_page_cur(cursor); - - space = dict_index_get_space(index); - page_no = dict_index_get_page(index); - - up_match = 0; - up_bytes = 0; - low_match = 0; - low_bytes = 0; - - height = ULINT_UNDEFINED; - - /* We use these modified search modes on non-leaf levels of the - B-tree. These let us end up in the right B-tree leaf. In that leaf - we use the original search mode. */ - - switch (mode) { - case PAGE_CUR_GE: - page_mode = PAGE_CUR_L; - break; - case PAGE_CUR_G: - page_mode = PAGE_CUR_LE; - break; - default: -#ifdef PAGE_CUR_LE_OR_EXTENDS - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || mode == PAGE_CUR_LE_OR_EXTENDS); -#else /* PAGE_CUR_LE_OR_EXTENDS */ - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE); -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - page_mode = mode; - break; - } - - /* Loop and search until we arrive at the desired level */ - -search_loop: - buf_mode = BUF_GET; - rw_latch = RW_NO_LATCH; - - if (height != 0) { - /* We are about to fetch the root or a non-leaf page. */ - } else if (latch_mode <= BTR_MODIFY_LEAF) { - rw_latch = latch_mode; - - if (btr_op != BTR_NO_OP - && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) { - - /* Try to buffer the operation if the leaf - page is not in the buffer pool. */ - - buf_mode = btr_op == BTR_DELETE_OP - ? BUF_GET_IF_IN_POOL_OR_WATCH - : BUF_GET_IF_IN_POOL; - } - } - - zip_size = dict_table_zip_size(index->table); - -retry_page_get: - block = buf_page_get_gen( - space, zip_size, page_no, rw_latch, guess, buf_mode, - file, line, mtr, &err); - - /* Note that block==NULL signifies either an error or change - buffering. */ - if (err != DB_SUCCESS) { - ut_ad(block == NULL); - if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name); - index->table->file_unreadable = true; - } - - goto func_exit; - } - - if (block == NULL) { - SRV_CORRUPT_TABLE_CHECK(buf_mode == BUF_GET_IF_IN_POOL || - buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH, - { - page_cursor->block = 0; - page_cursor->rec = 0; - if (estimate) { - - cursor->path_arr->nth_rec = - ULINT_UNDEFINED; - } - - goto func_exit; - }); - - /* This must be a search to perform an insert/delete - mark/ delete; try using the insert/delete buffer */ - - ut_ad(height == 0); - ut_ad(cursor->thr); - - switch (btr_op) { - case BTR_INSERT_OP: - case BTR_INSERT_IGNORE_UNIQUE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - - if (ibuf_insert(IBUF_OP_INSERT, tuple, index, - space, zip_size, page_no, - cursor->thr)) { - - cursor->flag = BTR_CUR_INSERT_TO_IBUF; - - goto func_exit; - } - break; - - case BTR_DELMARK_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - - if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, - index, space, zip_size, - page_no, cursor->thr)) { - - cursor->flag = BTR_CUR_DEL_MARK_IBUF; - - goto func_exit; - } - - break; - - case BTR_DELETE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); - - if (!row_purge_poss_sec(cursor->purge_node, - index, tuple)) { - - /* The record cannot be purged yet. */ - cursor->flag = BTR_CUR_DELETE_REF; - } else if (ibuf_insert(IBUF_OP_DELETE, tuple, - index, space, zip_size, - page_no, - cursor->thr)) { - - /* The purge was buffered. */ - cursor->flag = BTR_CUR_DELETE_IBUF; - } else { - /* The purge could not be buffered. */ - buf_pool_watch_unset(space, page_no); - break; - } - - buf_pool_watch_unset(space, page_no); - goto func_exit; - - default: - ut_error; - } - - /* Insert to the insert/delete buffer did not succeed, we - must read the page from disk. */ - - buf_mode = BUF_GET; - - goto retry_page_get; - } - - block->check_index_page_at_flush = TRUE; - page = buf_block_get_frame(block); - - SRV_CORRUPT_TABLE_CHECK(page, - { - page_cursor->block = 0; - page_cursor->rec = 0; - - if (estimate) { - - cursor->path_arr->nth_rec = ULINT_UNDEFINED; - } - - goto func_exit; - }); - - if (rw_latch != RW_NO_LATCH) { -#ifdef UNIV_ZIP_DEBUG - const page_zip_des_t* page_zip - = buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - buf_block_dbg_add_level( - block, dict_index_is_ibuf(index) - ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE); - } - - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(index->id == btr_page_get_index_id(page)); - - if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) { - /* We are in the root node */ - - height = btr_page_get_level(page, mtr); - root_height = height; - cursor->tree_height = root_height + 1; - -#ifdef BTR_CUR_ADAPT - if (block != guess) { - info->root_guess = block; - } -#endif - } - - if (height == 0) { - if (rw_latch == RW_NO_LATCH) { - - btr_cur_latch_leaves( - page, space, zip_size, page_no, latch_mode, - cursor, mtr); - } - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - break; - default: - if (!s_latch_by_caller) { - /* Release the tree s-latch */ - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - dict_index_get_lock(index)); - } - } - - page_mode = mode; - } - - page_cur_search_with_match( - block, index, tuple, page_mode, &up_match, &up_bytes, - &low_match, &low_bytes, page_cursor); - - if (estimate) { - btr_cur_add_path_info(cursor, height, root_height); - } - - /* If this is the desired level, leave the loop */ - - ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor), - mtr)); - - if (level != height) { - - const rec_t* node_ptr; - ut_ad(height > 0); - - height--; - guess = NULL; - - node_ptr = page_cur_get_rec(page_cursor); - - offsets = rec_get_offsets( - node_ptr, index, offsets, ULINT_UNDEFINED, &heap); - - /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - - if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) { - /* We're doing a search on an ibuf tree and we're one - level above the leaf page. */ - - ut_ad(level == 0); - - buf_mode = BUF_GET; - rw_latch = RW_NO_LATCH; - goto retry_page_get; - } - - goto search_loop; - } - - if (level != 0) { - /* x-latch the page */ - buf_block_t* child_block = btr_block_get( - space, zip_size, page_no, RW_X_LATCH, index, mtr); - - page = buf_block_get_frame(child_block); - btr_assert_not_corrupted(child_block, index); - } else { - cursor->low_match = low_match; - cursor->low_bytes = low_bytes; - cursor->up_match = up_match; - cursor->up_bytes = up_bytes; - -#ifdef BTR_CUR_ADAPT - /* We do a dirty read of btr_search_enabled here. We - will properly check btr_search_enabled again in - btr_search_build_page_hash_index() before building a - page hash index, while holding btr_search_latch. */ - if (btr_search_enabled) { - btr_search_info_update(index, cursor); - } -#endif - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - } - -func_exit: - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - if (has_search_latch) { - - rw_lock_s_lock(btr_search_get_latch(cursor->index)); - } - - return err; -} - -/*****************************************************************//** -Opens a cursor at either end of an index. */ -UNIV_INTERN -dberr_t -btr_cur_open_at_index_side_func( -/*============================*/ - bool from_left, /*!< in: true if open to the low end, - false if to the high end */ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: latch mode */ - btr_cur_t* cursor, /*!< in/out: cursor */ - ulint level, /*!< in: level to search for - (0=leaf). */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - page_cur_t* page_cursor; - ulint page_no; - ulint space; - ulint zip_size; - ulint height; - ulint root_height = 0; /* remove warning */ - rec_t* node_ptr; - ulint estimate; - ulint savepoint; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - dberr_t err = DB_SUCCESS; - - rec_offs_init(offsets_); - - estimate = latch_mode & BTR_ESTIMATE; - latch_mode &= ~BTR_ESTIMATE; - - ut_ad(level != ULINT_UNDEFINED); - - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched the leaf node */ - - savepoint = mtr_set_savepoint(mtr); - - switch (latch_mode) { - case BTR_CONT_MODIFY_TREE: - break; - case BTR_MODIFY_TREE: - mtr_x_lock(dict_index_get_lock(index), mtr); - break; - case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED: - case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED: - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_S_LOCK)); - break; - default: - mtr_s_lock(dict_index_get_lock(index), mtr); - } - - page_cursor = btr_cur_get_page_cur(cursor); - cursor->index = index; - - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - page_no = dict_index_get_page(index); - - height = ULINT_UNDEFINED; - - for (;;) { - buf_block_t* block=NULL; - page_t* page=NULL; - - block = buf_page_get_gen(space, zip_size, page_no, - RW_NO_LATCH, NULL, BUF_GET, - file, line, mtr, &err); - - ut_ad((block != NULL) == (err == DB_SUCCESS)); - - if (err != DB_SUCCESS) { - if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name); - index->table->file_unreadable = true; - } - - goto exit_loop; - } - - page = buf_block_get_frame(block); - - SRV_CORRUPT_TABLE_CHECK(page, - { - page_cursor->block = 0; - page_cursor->rec = 0; - - if (estimate) { - - cursor->path_arr->nth_rec = - ULINT_UNDEFINED; - } - /* Can't use break with the macro */ - goto exit_loop; - }); - - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - - ut_ad(index->id == btr_page_get_index_id(page)); - - block->check_index_page_at_flush = TRUE; - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page, mtr); - root_height = height; - ut_a(height >= level); - } else { - /* TODO: flag the index corrupted if this fails */ - ut_ad(height == btr_page_get_level(page, mtr)); - } - - if (height == level) { - btr_cur_latch_leaves( - page, space, zip_size, page_no, - latch_mode & ~BTR_ALREADY_S_LATCHED, - cursor, mtr); - - if (height == 0) { - /* In versions <= 3.23.52 we had - forgotten to release the tree latch - here. If in an index scan we had to - scan far to find a record visible to - the current transaction, that could - starve others waiting for the tree - latch. */ - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED: - case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED: - break; - default: - /* Release the tree s-latch */ - - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - dict_index_get_lock(index)); - } - } - } - - if (from_left) { - page_cur_set_before_first(block, page_cursor); - } else { - page_cur_set_after_last(block, page_cursor); - } - - if (height == level) { - if (estimate) { - btr_cur_add_path_info(cursor, height, - root_height); - } - - break; - } - - ut_ad(height > 0); - - if (from_left) { - page_cur_move_to_next(page_cursor); - } else { - page_cur_move_to_prev(page_cursor); - } - - if (estimate) { - btr_cur_add_path_info(cursor, height, root_height); - } - - height--; - - node_ptr = page_cur_get_rec(page_cursor); - offsets = rec_get_offsets(node_ptr, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - } - -exit_loop: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return err; -} - -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree. */ -UNIV_INTERN -void -btr_cur_open_at_rnd_pos_func( -/*=========================*/ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /*!< in/out: B-tree cursor */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_cur_t* page_cursor; - ulint page_no; - ulint space; - ulint zip_size; - ulint height; - rec_t* node_ptr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - switch (latch_mode) { - case BTR_MODIFY_TREE: - mtr_x_lock(dict_index_get_lock(index), mtr); - break; - default: - ut_ad(latch_mode != BTR_CONT_MODIFY_TREE); - mtr_s_lock(dict_index_get_lock(index), mtr); - } - - page_cursor = btr_cur_get_page_cur(cursor); - cursor->index = index; - - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - page_no = dict_index_get_page(index); - - height = ULINT_UNDEFINED; - - for (;;) { - buf_block_t* block; - page_t* page; - dberr_t err=DB_SUCCESS; - - block = buf_page_get_gen(space, zip_size, page_no, - RW_NO_LATCH, NULL, BUF_GET, - file, line, mtr, &err); - - ut_ad((block != NULL) == (err == DB_SUCCESS)); - - if (err != DB_SUCCESS) { - if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name); - index->table->file_unreadable = true; - } - - goto exit_loop; - } - - page = buf_block_get_frame(block); - - SRV_CORRUPT_TABLE_CHECK(page, - { - page_cursor->block = 0; - page_cursor->rec = 0; - - goto exit_loop; - }); - - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - - ut_ad(index->id == btr_page_get_index_id(page)); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page, mtr); - } - - if (height == 0) { - btr_cur_latch_leaves(page, space, zip_size, page_no, - latch_mode, cursor, mtr); - } - - page_cur_open_on_rnd_user_rec(block, page_cursor); - - if (height == 0) { - - break; - } - - ut_ad(height > 0); - - height--; - - node_ptr = page_cur_get_rec(page_cursor); - offsets = rec_get_offsets(node_ptr, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - } - -exit_loop: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/*==================== B-TREE INSERT =========================*/ - -/*************************************************************//** -Inserts a record if there is enough space, or if enough space can -be freed by reorganizing. Differs from btr_cur_optimistic_insert because -no heuristics is applied to whether it pays to use CPU time for -reorganizing the page or not. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to inserted record if succeed, else NULL */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -rec_t* -btr_cur_insert_if_possible( -/*=======================*/ - btr_cur_t* cursor, /*!< in: cursor on page after which to insert; - cursor stays valid */ - const dtuple_t* tuple, /*!< in: tuple to insert; the size info need not - have been stored to tuple */ - ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ - ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - page_cur_t* page_cursor; - rec_t* rec; - - ut_ad(dtuple_check_typed(tuple)); - - ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), - MTR_MEMO_PAGE_X_FIX)); - page_cursor = btr_cur_get_page_cur(cursor); - - /* Now, try the insert */ - rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, - offsets, heap, n_ext, mtr); - - /* If the record did not fit, reorganize. - For compressed pages, page_cur_tuple_insert() - attempted this already. */ - if (!rec && !page_cur_get_page_zip(page_cursor) - && btr_page_reorganize(page_cursor, cursor->index, mtr)) { - rec = page_cur_tuple_insert( - page_cursor, tuple, cursor->index, - offsets, heap, n_ext, mtr); - } - - ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets)); - return(rec); -} - -/*************************************************************//** -For an insert, checks the locks and does the undo logging if desired. -@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */ -UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,5,6))) -dberr_t -btr_cur_ins_lock_and_undo( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags: if - not zero, the parameters index and thr - should be specified */ - btr_cur_t* cursor, /*!< in: cursor on page after which to insert */ - dtuple_t* entry, /*!< in/out: entry to insert */ - que_thr_t* thr, /*!< in: query thread or NULL */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - ibool* inherit)/*!< out: TRUE if the inserted new record maybe - should inherit LOCK_GAP type locks from the - successor record */ -{ - dict_index_t* index; - dberr_t err; - rec_t* rec; - roll_ptr_t roll_ptr; - - if (UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes)) { - /* skip LOCK, UNDO */ - return(DB_SUCCESS); - } - - /* Check if we have to wait for a lock: enqueue an explicit lock - request if yes */ - - rec = btr_cur_get_rec(cursor); - index = cursor->index; - - ut_ad(!dict_index_is_online_ddl(index) - || dict_index_is_clust(index) - || (flags & BTR_CREATE_FLAG)); - - err = lock_rec_insert_check_and_lock(flags, rec, - btr_cur_get_block(cursor), - index, thr, mtr, inherit); - - if (err != DB_SUCCESS - || !(~flags | (BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG)) - || !dict_index_is_clust(index) || dict_index_is_ibuf(index)) { - - return(err); - } - - if (flags & BTR_NO_UNDO_LOG_FLAG) { - roll_ptr = 0; - } else { - err = trx_undo_report_row_operation(thr, index, entry, - NULL, 0, NULL, NULL, - &roll_ptr); - if (err != DB_SUCCESS) { - return(err); - } - } - - /* Now we can fill in the roll ptr field in entry */ - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - - row_upd_index_entry_sys_field(entry, index, - DATA_ROLL_PTR, roll_ptr); - } - - return(DB_SUCCESS); -} - -#ifdef UNIV_DEBUG -/*************************************************************//** -Report information about a transaction. */ -static -void -btr_cur_trx_report( -/*===============*/ - trx_id_t trx_id, /*!< in: transaction id */ - const dict_index_t* index, /*!< in: index */ - const char* op) /*!< in: operation */ -{ - fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id); - fputs(op, stderr); - dict_index_name_print(stderr, NULL, index); - putc('\n', stderr); -} -#endif /* UNIV_DEBUG */ - -/*************************************************************//** -Tries to perform an insert to a page in an index tree, next to cursor. -It is assumed that mtr holds an x-latch on the page. The operation does -not succeed if there is too little space on the page. If there is just -one record on the page, the insert will always succeed; this is to -prevent trying to split a page with just one record. -@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */ -UNIV_INTERN -dberr_t -btr_cur_optimistic_insert( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags: if not - zero, the parameters index and thr should be - specified */ - btr_cur_t* cursor, /*!< in: cursor on page after which to insert; - cursor stays valid */ - ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap */ - dtuple_t* entry, /*!< in/out: entry to insert */ - rec_t** rec, /*!< out: pointer to inserted record if - succeed */ - big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller */ - ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr, /*!< in/out: query thread; can be NULL if - !(~flags - & (BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG)) */ - mtr_t* mtr) /*!< in/out: mini-transaction; - if this function returns DB_SUCCESS on - a leaf page of a secondary index in a - compressed tablespace, the caller must - mtr_commit(mtr) before latching - any further pages */ -{ - big_rec_t* big_rec_vec = NULL; - dict_index_t* index; - page_cur_t* page_cursor; - buf_block_t* block; - page_t* page; - rec_t* dummy; - ibool leaf; - ibool reorg; - ibool inherit = TRUE; - ulint zip_size; - ulint rec_size; - dberr_t err; - - ut_ad(thr || !(~flags & (BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG))); - *big_rec = NULL; - - block = btr_cur_get_block(cursor); - - SRV_CORRUPT_TABLE_CHECK(block, return(DB_CORRUPTION);); - - page = buf_block_get_frame(block); - index = cursor->index; - - const bool fake_changes = (~flags & (BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG)) - && thr_get_trx(thr)->fake_changes; - ut_ad(fake_changes - || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(!dict_index_is_online_ddl(index) - || dict_index_is_clust(index) - || (flags & BTR_CREATE_FLAG)); - ut_ad(dtuple_check_typed(entry)); - - zip_size = buf_block_get_zip_size(block); -#ifdef UNIV_DEBUG_VALGRIND - if (zip_size) { - UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); - UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); - } -#endif /* UNIV_DEBUG_VALGRIND */ - -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert "); - dtuple_print(stderr, entry); - } -#endif /* UNIV_DEBUG */ - - leaf = page_is_leaf(page); - - /* Calculate the record size when entry is converted to a record */ - rec_size = rec_get_converted_size(index, entry, n_ext); - - if (page_zip_rec_needs_ext(rec_size, page_is_comp(page), - dtuple_get_n_fields(entry), zip_size)) { - - /* The record is so big that we have to store some fields - externally on separate database pages */ - big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext); - - if (UNIV_UNLIKELY(big_rec_vec == NULL)) { - - return(DB_TOO_BIG_RECORD); - } - - rec_size = rec_get_converted_size(index, entry, n_ext); - } - - if (zip_size) { - /* Estimate the free space of an empty compressed page. - Subtract one byte for the encoded heap_no in the - modification log. */ - ulint free_space_zip = page_zip_empty_size( - cursor->index->n_fields, zip_size); - ulint n_uniq = dict_index_get_n_unique_in_tree(index); - - ut_ad(dict_table_is_comp(index->table)); - - if (free_space_zip == 0) { -too_big: - if (big_rec_vec) { - dtuple_convert_back_big_rec( - index, entry, big_rec_vec); - } - - return(DB_TOO_BIG_RECORD); - } - - /* Subtract one byte for the encoded heap_no in the - modification log. */ - free_space_zip--; - - /* There should be enough room for two node pointer - records on an empty non-leaf page. This prevents - infinite page splits. */ - - if (entry->n_fields >= n_uniq - && (REC_NODE_PTR_SIZE - + rec_get_converted_size_comp_prefix( - index, entry->fields, n_uniq, NULL) - /* On a compressed page, there is - a two-byte entry in the dense - page directory for every record. - But there is no record header. */ - - (REC_N_NEW_EXTRA_BYTES - 2) - > free_space_zip / 2)) { - goto too_big; - } - } - - LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), - goto fail); - - if (leaf && zip_size - && (page_get_data_size(page) + rec_size - >= dict_index_zip_pad_optimal_page_size(index))) { - /* If compression padding tells us that insertion will - result in too packed up page i.e.: which is likely to - cause compression failure then don't do an optimistic - insertion. */ -fail: - err = DB_FAIL; -fail_err: - - if (big_rec_vec) { - dtuple_convert_back_big_rec(index, entry, big_rec_vec); - } - - return(err); - } - - ulint max_size = page_get_max_insert_size_after_reorganize(page, 1); - - if (page_has_garbage(page)) { - if ((max_size < rec_size - || max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT) - && page_get_n_recs(page) > 1 - && page_get_max_insert_size(page, 1) < rec_size) { - - goto fail; - } - } else if (max_size < rec_size) { - goto fail; - } - - /* If there have been many consecutive inserts to the - clustered index leaf page of an uncompressed table, check if - we have to split the page to reserve enough free space for - future updates of records. */ - - if (leaf && !zip_size && dict_index_is_clust(index) - && page_get_n_recs(page) >= 2 - && dict_index_get_space_reserve() + rec_size > max_size - && (btr_page_get_split_rec_to_right(cursor, &dummy) - || btr_page_get_split_rec_to_left(cursor, &dummy))) { - goto fail; - } - - /* Check locks and write to the undo log, if specified */ - err = btr_cur_ins_lock_and_undo(flags, cursor, entry, - thr, mtr, &inherit); - - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - - goto fail_err; - } - - if (UNIV_UNLIKELY(fake_changes)) { - /* skip CHANGE, LOG */ - *big_rec = big_rec_vec; - return(err); /* == DB_SUCCESS */ - } - - page_cursor = btr_cur_get_page_cur(cursor); - - /* Now, try the insert */ - - { - const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor); - *rec = page_cur_tuple_insert(page_cursor, entry, index, - offsets, heap, n_ext, mtr); - reorg = page_cursor_rec != page_cur_get_rec(page_cursor); - } - - if (*rec) { - } else if (zip_size) { - /* Reset the IBUF_BITMAP_FREE bits, because - page_cur_tuple_insert() will have attempted page - reorganize before failing. */ - if (leaf && !dict_index_is_clust(index)) { - ibuf_reset_free_bits(block); - } - - goto fail; - } else { - ut_ad(!reorg); - - /* If the record did not fit, reorganize */ - if (!btr_page_reorganize(page_cursor, index, mtr)) { - ut_ad(0); - goto fail; - } - - ut_ad(page_get_max_insert_size(page, 1) == max_size); - - reorg = TRUE; - - *rec = page_cur_tuple_insert(page_cursor, entry, index, - offsets, heap, n_ext, mtr); - - if (UNIV_UNLIKELY(!*rec)) { - fputs("InnoDB: Error: cannot insert tuple ", stderr); - dtuple_print(stderr, entry); - fputs(" into ", stderr); - dict_index_name_print(stderr, thr_get_trx(thr), index); - fprintf(stderr, "\nInnoDB: max insert size %lu\n", - (ulong) max_size); - ut_error; - } - } - -#ifdef BTR_CUR_HASH_ADAPT - if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) { - btr_search_update_hash_node_on_insert(cursor); - } else { - btr_search_update_hash_on_insert(cursor); - } -#endif - - if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) { - - lock_update_insert(block, *rec); - } - - if (leaf && !dict_index_is_clust(index)) { - /* Update the free bits of the B-tree page in the - insert buffer bitmap. */ - - /* The free bits in the insert buffer bitmap must - never exceed the free space on a page. It is safe to - decrement or reset the bits in the bitmap in a - mini-transaction that is committed before the - mini-transaction that affects the free space. */ - - /* It is unsafe to increment the bits in a separately - committed mini-transaction, because in crash recovery, - the free bits could momentarily be set too high. */ - - if (zip_size) { - /* Update the bits in the same mini-transaction. */ - ibuf_update_free_bits_zip(block, mtr); - } else { - /* Decrement the bits in a separate - mini-transaction. */ - ibuf_update_free_bits_if_full( - block, max_size, - rec_size + PAGE_DIR_SLOT_SIZE); - } - } - - *big_rec = big_rec_vec; - - return(DB_SUCCESS); -} - -/*************************************************************//** -Performs an insert on a page of an index tree. It is assumed that mtr -holds an x-latch on the tree and on the cursor page. If the insert is -made on the leaf level, to avoid deadlocks, mtr must also own x-latches -to brothers of page, if those brothers exist. -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -btr_cur_pessimistic_insert( -/*=======================*/ - ulint flags, /*!< in: undo logging and locking flags: if not - zero, the parameter thr should be - specified; if no undo logging is specified, - then the caller must have reserved enough - free extents in the file space so that the - insertion will certainly succeed */ - btr_cur_t* cursor, /*!< in: cursor after which to insert; - cursor stays valid */ - ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap - that can be emptied */ - dtuple_t* entry, /*!< in/out: entry to insert */ - rec_t** rec, /*!< out: pointer to inserted record if - succeed */ - big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller */ - ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr, /*!< in/out: query thread; can be NULL if - !(~flags - & (BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG)) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - dict_index_t* index = cursor->index; - ulint zip_size = dict_table_zip_size(index->table); - big_rec_t* big_rec_vec = NULL; - dberr_t err; - ibool inherit = FALSE; - ibool success; - ulint n_reserved = 0; - - ut_ad(dtuple_check_typed(entry)); - ut_ad(thr || !(~flags & (BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG))); - - *big_rec = NULL; - - const bool fake_changes = (~flags & (BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG)) - && thr_get_trx(thr)->fake_changes; - ut_ad(fake_changes || mtr_memo_contains(mtr, - dict_index_get_lock(btr_cur_get_index(cursor)), - MTR_MEMO_X_LOCK)); - ut_ad(fake_changes || mtr_memo_contains(mtr, btr_cur_get_block(cursor), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(!dict_index_is_online_ddl(index) - || dict_index_is_clust(index) - || (flags & BTR_CREATE_FLAG)); - - cursor->flag = BTR_CUR_BINARY; - - /* Check locks and write to undo log, if specified */ - - err = btr_cur_ins_lock_and_undo(flags, cursor, entry, - thr, mtr, &inherit); - - if (err != DB_SUCCESS) { - - return(err); - } - - if (!(flags & BTR_NO_UNDO_LOG_FLAG)) { - - ut_a(cursor->tree_height != ULINT_UNDEFINED); - - /* First reserve enough free space for the file segments - of the index tree, so that the insert will not fail because - of lack of space */ - - ulint n_extents = cursor->tree_height / 16 + 3; - - success = fsp_reserve_free_extents(&n_reserved, index->space, - n_extents, FSP_NORMAL, mtr); - if (!success) { - return(DB_OUT_OF_FILE_SPACE); - } - } - - if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext), - dict_table_is_comp(index->table), - dtuple_get_n_fields(entry), - zip_size)) { - /* The record is so big that we have to store some fields - externally on separate database pages */ - - if (UNIV_LIKELY_NULL(big_rec_vec)) { - /* This should never happen, but we handle - the situation in a robust manner. */ - ut_ad(0); - dtuple_convert_back_big_rec(index, entry, big_rec_vec); - } - - big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext); - - if (big_rec_vec == NULL) { - - if (n_reserved > 0) { - fil_space_release_free_extents(index->space, - n_reserved); - } - return(DB_TOO_BIG_RECORD); - } - } - - if (UNIV_UNLIKELY(fake_changes)) { - /* skip CHANGE, LOG */ - if (n_reserved > 0) { - fil_space_release_free_extents(index->space, - n_reserved); - } - *big_rec = big_rec_vec; - return(DB_SUCCESS); - } - - if (dict_index_get_page(index) - == buf_block_get_page_no(btr_cur_get_block(cursor))) { - - /* The page is the root page */ - *rec = btr_root_raise_and_insert( - flags, cursor, offsets, heap, entry, n_ext, mtr); - } else { - *rec = btr_page_split_and_insert( - flags, cursor, offsets, heap, entry, n_ext, mtr); - } - - if (*rec == NULL && os_has_said_disk_full) { - return(DB_OUT_OF_FILE_SPACE); - } - - ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec); - - if (!(flags & BTR_NO_LOCKING_FLAG)) { - /* The cursor might be moved to the other page, - and the max trx id field should be updated after - the cursor was fixed. */ - if (!dict_index_is_clust(index)) { - page_update_max_trx_id( - btr_cur_get_block(cursor), - btr_cur_get_page_zip(cursor), - thr_get_trx(thr)->id, mtr); - } - - if (!page_rec_is_infimum(btr_cur_get_rec(cursor))) { - /* split and inserted need to call - lock_update_insert() always. */ - inherit = TRUE; - } - - buf_block_t* block = btr_cur_get_block(cursor); - buf_frame_t* frame = NULL; - - if (block) { - frame = buf_block_get_frame(block); - } - /* split and inserted need to call - lock_update_insert() always. */ - if (frame && btr_page_get_prev(frame, mtr) == FIL_NULL) { - inherit = TRUE; - } - } - -#ifdef BTR_CUR_ADAPT - btr_search_update_hash_on_insert(cursor); -#endif - if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) { - - lock_update_insert(btr_cur_get_block(cursor), *rec); - } - - if (n_reserved > 0) { - fil_space_release_free_extents(index->space, n_reserved); - } - - *big_rec = big_rec_vec; - - return(DB_SUCCESS); -} - -/*==================== B-TREE UPDATE =========================*/ - -/*************************************************************//** -For an update, checks the locks and does the undo logging. -@return DB_SUCCESS, DB_WAIT_LOCK, or error number */ -UNIV_INLINE MY_ATTRIBUTE((warn_unused_result)) -dberr_t -btr_cur_upd_lock_and_undo( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor on record to update */ - const ulint* offsets,/*!< in: rec_get_offsets() on cursor */ - const upd_t* update, /*!< in: update vector */ - ulint cmpl_info,/*!< in: compiler info on secondary index - updates */ - que_thr_t* thr, /*!< in: query thread - (can be NULL if BTR_NO_LOCKING_FLAG) */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - roll_ptr_t* roll_ptr)/*!< out: roll pointer */ -{ - dict_index_t* index; - const rec_t* rec; - dberr_t err; - - ut_ad((thr != NULL) || (flags & BTR_NO_LOCKING_FLAG)); - - if (!(flags & BTR_NO_LOCKING_FLAG) && thr_get_trx(thr)->fake_changes) { - /* skip LOCK, UNDO */ - return(DB_SUCCESS); - } - - rec = btr_cur_get_rec(cursor); - index = cursor->index; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (!dict_index_is_clust(index)) { - ut_ad(dict_index_is_online_ddl(index) - == !!(flags & BTR_CREATE_FLAG)); - - /* We do undo logging only when we update a clustered index - record */ - return(lock_sec_rec_modify_check_and_lock( - flags, btr_cur_get_block(cursor), rec, - index, thr, mtr)); - } - - /* Check if we have to wait for a lock: enqueue an explicit lock - request if yes */ - - if (!(flags & BTR_NO_LOCKING_FLAG)) { - err = lock_clust_rec_modify_check_and_lock( - flags, btr_cur_get_block(cursor), rec, index, - offsets, thr); - if (err != DB_SUCCESS) { - return(err); - } - } - - /* Append the info about the update in the undo log */ - - return((flags & BTR_NO_UNDO_LOG_FLAG) - ? DB_SUCCESS - : trx_undo_report_row_operation( - thr, index, NULL, update, - cmpl_info, rec, offsets, roll_ptr)); -} - -/***********************************************************//** -Writes a redo log record of updating a record in-place. */ -UNIV_INTERN -void -btr_cur_update_in_place_log( -/*========================*/ - ulint flags, /*!< in: flags */ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update, /*!< in: update vector */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr, /*!< in: roll ptr */ - mtr_t* mtr) /*!< in: mtr */ -{ - byte* log_ptr; - const page_t* page = page_align(rec); - ut_ad(flags < 256); - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page) - ? MLOG_COMP_REC_UPDATE_IN_PLACE - : MLOG_REC_UPDATE_IN_PLACE, - 1 + DATA_ROLL_PTR_LEN + 14 + 2 - + MLOG_BUF_MARGIN); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery */ - return; - } - - /* For secondary indexes, we could skip writing the dummy system fields - to the redo log but we have to change redo log parsing of - MLOG_REC_UPDATE_IN_PLACE/MLOG_COMP_REC_UPDATE_IN_PLACE or we have to add - new redo log record. For now, just write dummy sys fields to the redo - log if we are updating a secondary index record. - */ - mach_write_to_1(log_ptr, flags); - log_ptr++; - - if (dict_index_is_clust(index)) { - log_ptr = row_upd_write_sys_vals_to_log( - index, trx_id, roll_ptr, log_ptr, mtr); - } else { - /* Dummy system fields for a secondary index */ - /* TRX_ID Position */ - log_ptr += mach_write_compressed(log_ptr, 0); - /* ROLL_PTR */ - trx_write_roll_ptr(log_ptr, 0); - log_ptr += DATA_ROLL_PTR_LEN; - /* TRX_ID */ - log_ptr += mach_ull_write_compressed(log_ptr, 0); - } - - mach_write_to_2(log_ptr, page_offset(rec)); - log_ptr += 2; - - row_upd_index_write_log(update, log_ptr, mtr); -} -#endif /* UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses a redo log record of updating a record in-place. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_cur_parse_update_in_place( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in/out: page or NULL */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - dict_index_t* index) /*!< in: index corresponding to page */ -{ - ulint flags; - rec_t* rec; - upd_t* update; - ulint pos; - trx_id_t trx_id; - roll_ptr_t roll_ptr; - ulint rec_offset; - mem_heap_t* heap; - ulint* offsets; - - if (end_ptr < ptr + 1) { - - return(NULL); - } - - flags = mach_read_from_1(ptr); - ptr++; - - ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr); - - if (ptr == NULL) { - - return(NULL); - } - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - rec_offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(rec_offset <= UNIV_PAGE_SIZE); - - heap = mem_heap_create(256); - - ptr = row_upd_index_parse(ptr, end_ptr, heap, &update); - - if (!ptr || !page) { - - goto func_exit; - } - - ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table)); - rec = page + rec_offset; - - /* We do not need to reserve btr_search_latch, as the page is only - being recovered, and there cannot be a hash index to it. */ - - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets, - pos, trx_id, roll_ptr); - } - - row_upd_rec_in_place(rec, index, offsets, update, page_zip); - -func_exit: - mem_heap_free(heap); - - return(ptr); -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -See if there is enough place in the page modification log to log -an update-in-place. - -@retval false if out of space; IBUF_BITMAP_FREE will be reset -outside mtr if the page was recompressed -@retval true if enough place; - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is -a secondary index leaf page. This has to be done either within the -same mini-transaction, or by invoking ibuf_reset_free_bits() before -mtr_commit(mtr). */ -UNIV_INTERN -bool -btr_cur_update_alloc_zip_func( -/*==========================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - page_cur_t* cursor, /*!< in/out: B-tree page cursor */ - dict_index_t* index, /*!< in: the index corresponding to cursor */ -#ifdef UNIV_DEBUG - ulint* offsets,/*!< in/out: offsets of the cursor record */ -#endif /* UNIV_DEBUG */ - ulint length, /*!< in: size needed */ - bool create, /*!< in: true=delete-and-insert, - false=update-in-place */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - trx_t* trx) /*!< in: NULL or transaction */ -{ - const page_t* page = page_cur_get_page(cursor); - - ut_ad(page_zip == page_cur_get_page_zip(cursor)); - ut_ad(!dict_index_is_ibuf(index)); - ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets)); - - if (page_zip_available(page_zip, dict_index_is_clust(index), - length, create)) { - return(true); - } - - if (!page_zip->m_nonempty && !page_has_garbage(page)) { - /* The page has been freshly compressed, so - reorganizing it will not help. */ - return(false); - } - - if (create && page_is_leaf(page) - && (length + page_get_data_size(page) - >= dict_index_zip_pad_optimal_page_size(index))) { - return(false); - } - - if (UNIV_UNLIKELY(trx && trx->fake_changes)) { - /* Don't call page_zip_compress_write_log_no_data as that has - assert which would fail. Assume there won't be a compression - failure. */ - - return(true); - } - - if (!btr_page_reorganize(cursor, index, mtr)) { - goto out_of_space; - } - - rec_offs_make_valid(page_cur_get_rec(cursor), index, offsets); - - /* After recompressing a page, we must make sure that the free - bits in the insert buffer bitmap will not exceed the free - space on the page. Because this function will not attempt - recompression unless page_zip_available() fails above, it is - safe to reset the free bits if page_zip_available() fails - again, below. The free bits can safely be reset in a separate - mini-transaction. If page_zip_available() succeeds below, we - can be sure that the btr_page_reorganize() above did not reduce - the free space available on the page. */ - - if (page_zip_available(page_zip, dict_index_is_clust(index), - length, create)) { - return(true); - } - -out_of_space: - ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets)); - - /* Out of space: reset the free bits. */ - if (!dict_index_is_clust(index) && page_is_leaf(page)) { - ibuf_reset_free_bits(page_cur_get_block(cursor)); - } - - return(false); -} - -/*************************************************************//** -Updates a record when the update causes no size changes in its fields. -We assume here that the ordering fields of the record do not change. -@return locking or undo log related error code, or -@retval DB_SUCCESS on success -@retval DB_ZIP_OVERFLOW if there is not enough space left -on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */ -UNIV_INTERN -dberr_t -btr_cur_update_in_place( -/*====================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */ - const upd_t* update, /*!< in: update vector */ - ulint cmpl_info,/*!< in: compiler info on secondary index - updates */ - que_thr_t* thr, /*!< in: query thread */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in/out: mini-transaction; if this - is a secondary index, the caller must - mtr_commit(mtr) before latching any - further pages */ -{ - dict_index_t* index; - buf_block_t* block; - page_zip_des_t* page_zip; - dberr_t err; - rec_t* rec; - roll_ptr_t roll_ptr = 0; - ulint was_delete_marked; - ibool is_hashed; - trx_t* trx; - - rec = btr_cur_get_rec(cursor); - index = cursor->index; - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - /* The insert buffer tree should never be updated in place. */ - ut_ad(!dict_index_is_ibuf(index)); - ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG) - || dict_index_is_clust(index)); - ut_ad(thr_get_trx(thr)->id == trx_id - || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP)) - == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG - | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG)); - ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX); - ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id); - -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops) { - btr_cur_trx_report(trx_id, index, "update "); - rec_print_new(stderr, rec, offsets); - } -#endif /* UNIV_DEBUG */ - - block = btr_cur_get_block(cursor); - page_zip = buf_block_get_page_zip(block); - trx = thr_get_trx(thr); - - /* Check that enough space is available on the compressed page. */ - if (page_zip) { - if (!btr_cur_update_alloc_zip( - page_zip, btr_cur_get_page_cur(cursor), - index, offsets, rec_offs_size(offsets), - false, mtr, trx)) { - return(DB_ZIP_OVERFLOW); - } - - rec = btr_cur_get_rec(cursor); - } - - /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, offsets, - update, cmpl_info, - thr, mtr, &roll_ptr); - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - /* We may need to update the IBUF_BITMAP_FREE - bits after a reorganize that was done in - btr_cur_update_alloc_zip(). */ - goto func_exit; - } - - if (UNIV_UNLIKELY(trx->fake_changes)) { - /* skip CHANGE, LOG */ - return(err); /* == DB_SUCCESS */ - } - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields(rec, NULL, index, offsets, - thr_get_trx(thr), roll_ptr); - } - - was_delete_marked = rec_get_deleted_flag( - rec, page_is_comp(buf_block_get_frame(block))); - - is_hashed = (block->index != NULL); - - if (is_hashed) { - /* TO DO: Can we skip this if none of the fields - index->search_info->curr_n_fields - are being updated? */ - - /* The function row_upd_changes_ord_field_binary works only - if the update vector was built for a clustered index, we must - NOT call it if index is secondary */ - - if (!dict_index_is_clust(index) - || row_upd_changes_ord_field_binary(index, update, thr, - NULL, NULL)) { - - /* Remove possible hash index pointer to this record */ - btr_search_update_hash_on_delete(cursor); - } - - rw_lock_x_lock(btr_search_get_latch(cursor->index)); - } - - row_upd_rec_in_place(rec, index, offsets, update, page_zip); - - if (is_hashed) { - rw_lock_x_unlock(btr_search_get_latch(cursor->index)); - } - - btr_cur_update_in_place_log(flags, rec, index, update, - trx_id, roll_ptr, mtr); - - if (was_delete_marked - && !rec_get_deleted_flag( - rec, page_is_comp(buf_block_get_frame(block)))) { - /* The new updated record owns its possible externally - stored fields */ - - btr_cur_unmark_extern_fields(page_zip, - rec, index, offsets, mtr); - } - - ut_ad(err == DB_SUCCESS); - -func_exit: - if (page_zip - && !(flags & BTR_KEEP_IBUF_BITMAP) - && !dict_index_is_clust(index) - && block) { - buf_frame_t* frame = buf_block_get_frame(block); - if (frame && page_is_leaf(frame)) { - /* Update the free bits in the insert buffer. */ - ibuf_update_free_bits_zip(block, mtr); - } - } - - return(err); -} - -/*************************************************************//** -Tries to update a record on a page in an index tree. It is assumed that mtr -holds an x-latch on the page. The operation does not succeed if there is too -little space on the page or if the update would result in too empty a page, -so that tree compression is recommended. We assume here that the ordering -fields of the record do not change. -@return error code, including -@retval DB_SUCCESS on success -@retval DB_OVERFLOW if the updated record does not fit -@retval DB_UNDERFLOW if the page would become too empty -@retval DB_ZIP_OVERFLOW if there is not enough space left -on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */ -UNIV_INTERN -dberr_t -btr_cur_optimistic_update( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ - mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */ - const upd_t* update, /*!< in: update vector; this must also - contain trx id and roll ptr fields */ - ulint cmpl_info,/*!< in: compiler info on secondary index - updates */ - que_thr_t* thr, /*!< in: query thread */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in/out: mini-transaction; if this - is a secondary index, the caller must - mtr_commit(mtr) before latching any - further pages */ -{ - dict_index_t* index; - page_cur_t* page_cursor; - dberr_t err; - buf_block_t* block; - page_t* page; - page_zip_des_t* page_zip; - rec_t* rec; - ulint max_size; - ulint new_rec_size; - ulint old_rec_size; - ulint max_ins_size = 0; - dtuple_t* new_entry; - roll_ptr_t roll_ptr; - ulint i; - ulint n_ext; - - block = btr_cur_get_block(cursor); - page = buf_block_get_frame(block); - rec = btr_cur_get_rec(cursor); - index = cursor->index; - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - ut_ad(thr_get_trx(thr)->fake_changes - || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - /* The insert buffer tree should never be updated in place. */ - ut_ad(!dict_index_is_ibuf(index)); - ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG) - || dict_index_is_clust(index)); - ut_ad(thr_get_trx(thr)->id == trx_id - || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP)) - == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG - | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG)); - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(btr_page_get_index_id(page) == index->id); - - *offsets = rec_get_offsets(rec, index, *offsets, - ULINT_UNDEFINED, heap); -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern(rec, *offsets) - || trx_is_recv(thr_get_trx(thr))); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops) { - btr_cur_trx_report(trx_id, index, "update "); - rec_print_new(stderr, rec, *offsets); - } -#endif /* UNIV_DEBUG */ - - if (!row_upd_changes_field_size_or_external(index, *offsets, update)) { - - /* The simplest and the most common case: the update does not - change the size of any field and none of the updated fields is - externally stored in rec or update, and there is enough space - on the compressed page to log the update. */ - - return(btr_cur_update_in_place( - flags, cursor, *offsets, update, - cmpl_info, thr, trx_id, mtr)); - } - - if (rec_offs_any_extern(*offsets)) { -any_extern: - /* Externally stored fields are treated in pessimistic - update */ - - return(DB_OVERFLOW); - } - - for (i = 0; i < upd_get_n_fields(update); i++) { - if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) { - - goto any_extern; - } - } - - page_cursor = btr_cur_get_page_cur(cursor); - - if (!*heap) { - *heap = mem_heap_create( - rec_offs_size(*offsets) - + DTUPLE_EST_ALLOC(rec_offs_n_fields(*offsets))); - } - - new_entry = row_rec_to_index_entry(rec, index, *offsets, - &n_ext, *heap); - /* We checked above that there are no externally stored fields. */ - ut_a(!n_ext); - - /* The page containing the clustered index record - corresponding to new_entry is latched in mtr. - Thus the following call is safe. */ - row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, - FALSE, *heap); - old_rec_size = rec_offs_size(*offsets); - new_rec_size = rec_get_converted_size(index, new_entry, 0); - - page_zip = buf_block_get_page_zip(block); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - if (page_zip) { - if (page_zip_rec_needs_ext(new_rec_size, page_is_comp(page), - dict_index_get_n_fields(index), - page_zip_get_size(page_zip))) { - goto any_extern; - } - - if (!btr_cur_update_alloc_zip( - page_zip, page_cursor, index, *offsets, - new_rec_size, true, mtr, thr_get_trx(thr))) { - return(DB_ZIP_OVERFLOW); - } - - rec = page_cur_get_rec(page_cursor); - } - - if (UNIV_UNLIKELY(new_rec_size - >= (page_get_free_space_of_empty(page_is_comp(page)) - / 2))) { - /* We may need to update the IBUF_BITMAP_FREE - bits after a reorganize that was done in - btr_cur_update_alloc_zip(). */ - err = DB_OVERFLOW; - goto func_exit; - } - - if (UNIV_UNLIKELY(page_get_data_size(page) - - old_rec_size + new_rec_size - < BTR_CUR_PAGE_COMPRESS_LIMIT)) { - /* We may need to update the IBUF_BITMAP_FREE - bits after a reorganize that was done in - btr_cur_update_alloc_zip(). */ - - /* The page would become too empty */ - err = DB_UNDERFLOW; - goto func_exit; - } - - /* We do not attempt to reorganize if the page is compressed. - This is because the page may fail to compress after reorganization. */ - max_size = page_zip - ? page_get_max_insert_size(page, 1) - : (old_rec_size - + page_get_max_insert_size_after_reorganize(page, 1)); - - if (!page_zip) { - max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); - } - - if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT) - && (max_size >= new_rec_size)) - || (page_get_n_recs(page) <= 1))) { - - /* We may need to update the IBUF_BITMAP_FREE - bits after a reorganize that was done in - btr_cur_update_alloc_zip(). */ - - /* There was not enough space, or it did not pay to - reorganize: for simplicity, we decide what to do assuming a - reorganization is needed, though it might not be necessary */ - - err = DB_OVERFLOW; - goto func_exit; - } - - /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets, - update, cmpl_info, - thr, mtr, &roll_ptr); - if (err != DB_SUCCESS) { - /* We may need to update the IBUF_BITMAP_FREE - bits after a reorganize that was done in - btr_cur_update_alloc_zip(). */ - goto func_exit; - } - - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - /* skip CHANGE, LOG */ - ut_ad(err == DB_SUCCESS); - return(DB_SUCCESS); - } - - /* Ok, we may do the replacement. Store on the page infimum the - explicit locks on rec, before deleting rec (see the comment in - btr_cur_pessimistic_update). */ - - lock_rec_store_on_page_infimum(block, rec); - - btr_search_update_hash_on_delete(cursor); - - page_cur_delete_rec(page_cursor, index, *offsets, mtr); - - page_cur_move_to_prev(page_cursor); - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, - roll_ptr); - row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - trx_id); - } - - /* There are no externally stored columns in new_entry */ - rec = btr_cur_insert_if_possible( - cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr); - ut_a(rec); /* <- We calculated above the insert would fit */ - - /* Restore the old explicit lock state on the record */ - - lock_rec_restore_from_page_infimum(block, rec, block); - - page_cur_move_to_next(page_cursor); - ut_ad(err == DB_SUCCESS); - -func_exit: - if (!(flags & BTR_KEEP_IBUF_BITMAP) - && !dict_index_is_clust(index) - && page_is_leaf(page)) { - - if (page_zip) { - ibuf_update_free_bits_zip(block, mtr); - } else { - ibuf_update_free_bits_low(block, max_ins_size, mtr); - } - } - - return(err); -} - -/*************************************************************//** -If, in a split, a new supremum record was created as the predecessor of the -updated record, the supremum record must inherit exactly the locks on the -updated record. In the split it may have inherited locks from the successor -of the updated record, which is not correct. This function restores the -right locks for the new supremum. */ -static -void -btr_cur_pess_upd_restore_supremum( -/*==============================*/ - buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: updated record */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* page; - buf_block_t* prev_block; - ulint space; - ulint zip_size; - ulint prev_page_no; - - page = buf_block_get_frame(block); - - if (page_rec_get_next(page_get_infimum_rec(page)) != rec) { - /* Updated record is not the first user record on its page */ - - return; - } - - space = buf_block_get_space(block); - zip_size = buf_block_get_zip_size(block); - prev_page_no = btr_page_get_prev(page, mtr); - - ut_ad(prev_page_no != FIL_NULL); - prev_block = buf_page_get_with_no_latch(space, zip_size, - prev_page_no, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_block->frame, mtr) - == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - /* We must already have an x-latch on prev_block! */ - ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX)); - - lock_rec_reset_and_inherit_gap_locks(prev_block, block, - PAGE_HEAP_NO_SUPREMUM, - page_rec_get_heap_no(rec)); -} - -/*************************************************************//** -Check if the total length of the modified blob for the row is within 10% -of the total redo log size. This constraint on the blob length is to -avoid overwriting the redo logs beyond the last checkpoint lsn. -@return DB_SUCCESS or DB_TOO_BIG_FOR_REDO. */ -static -dberr_t -btr_check_blob_limit(const big_rec_t* big_rec_vec) -{ - const ib_uint64_t redo_size = srv_n_log_files * srv_log_file_size - * UNIV_PAGE_SIZE; - const ib_uint64_t redo_10p = redo_size / 10; - ib_uint64_t total_blob_len = 0; - dberr_t err = DB_SUCCESS; - - /* Calculate the total number of bytes for blob data */ - for (ulint i = 0; i < big_rec_vec->n_fields; i++) { - total_blob_len += big_rec_vec->fields[i].len; - } - - if (total_blob_len > redo_10p) { - ib_logf(IB_LOG_LEVEL_ERROR, "The total blob data" - " length (" UINT64PF ") is greater than" - " 10%% of the total redo log size (" UINT64PF - "). Please increase total redo log size.", - total_blob_len, redo_size); - err = DB_TOO_BIG_FOR_REDO; - } - - return(err); -} - -/*************************************************************//** -Performs an update of a record on a page of a tree. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. If the -update is made on the leaf level, to avoid deadlocks, mtr must also -own x-latches to brothers of page, if those brothers exist. We assume -here that the ordering fields of the record do not change. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -btr_cur_pessimistic_update( -/*=======================*/ - ulint flags, /*!< in: undo logging, locking, and rollback - flags */ - btr_cur_t* cursor, /*!< in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ - ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ - mem_heap_t** offsets_heap, - /*!< in/out: pointer to memory heap - that can be emptied */ - mem_heap_t* entry_heap, - /*!< in/out: memory heap for allocating - big_rec and the index tuple */ - big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller */ - const upd_t* update, /*!< in: update vector; this is allowed also - contain trx id and roll ptr fields, but - the values in update vector have no effect */ - ulint cmpl_info,/*!< in: compiler info on secondary index - updates */ - que_thr_t* thr, /*!< in: query thread */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in/out: mini-transaction; must be - committed before latching any further pages */ -{ - big_rec_t* big_rec_vec = NULL; - big_rec_t* dummy_big_rec; - dict_index_t* index; - buf_block_t* block; - page_t* page; - page_zip_des_t* page_zip; - rec_t* rec; - page_cur_t* page_cursor; - dberr_t err; - dberr_t optim_err; - roll_ptr_t roll_ptr; - ibool was_first; - ulint n_reserved = 0; - ulint n_ext; - trx_t* trx; - ulint max_ins_size = 0; - - *offsets = NULL; - *big_rec = NULL; - - block = btr_cur_get_block(cursor); - page = buf_block_get_frame(block); - page_zip = buf_block_get_page_zip(block); - index = cursor->index; - - ut_ad(thr_get_trx(thr)->fake_changes - || mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(thr_get_trx(thr)->fake_changes - || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - /* The insert buffer tree should never be updated in place. */ - ut_ad(!dict_index_is_ibuf(index)); - ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG) - || dict_index_is_clust(index)); - ut_ad(thr_get_trx(thr)->id == trx_id - || (flags & ~BTR_KEEP_POS_FLAG) - == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG - | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG)); - - err = optim_err = btr_cur_optimistic_update( - flags | BTR_KEEP_IBUF_BITMAP, - cursor, offsets, offsets_heap, update, - cmpl_info, thr, trx_id, mtr); - - switch (err) { - case DB_ZIP_OVERFLOW: - case DB_UNDERFLOW: - case DB_OVERFLOW: - break; - default: - err_exit: - /* We suppressed this with BTR_KEEP_IBUF_BITMAP. - For DB_ZIP_OVERFLOW, the IBUF_BITMAP_FREE bits were - already reset by btr_cur_update_alloc_zip() if the - page was recompressed. */ - if (page_zip - && optim_err != DB_ZIP_OVERFLOW - && !dict_index_is_clust(index) - && page_is_leaf(page)) { - ibuf_update_free_bits_zip(block, mtr); - } - - return(err); - } - - /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets, - update, cmpl_info, - thr, mtr, &roll_ptr); - if (err != DB_SUCCESS) { - goto err_exit; - } - - if (optim_err == DB_OVERFLOW) { - ulint reserve_flag; - ulint n_extents; - - /* First reserve enough free space for the file segments - of the index tree, so that the update will not fail because - of lack of space */ - if (UNIV_UNLIKELY(cursor->tree_height == ULINT_UNDEFINED)) { - /* When the tree height is uninitialized due to fake - changes, reserve some hardcoded number of extents. */ - ut_a(thr_get_trx(thr)->fake_changes); - n_extents = 3; - } - else { - n_extents = cursor->tree_height / 16 + 3; - } - - if (flags & BTR_NO_UNDO_LOG_FLAG) { - reserve_flag = FSP_CLEANING; - } else { - reserve_flag = FSP_NORMAL; - } - - if (!fsp_reserve_free_extents(&n_reserved, index->space, - n_extents, reserve_flag, mtr)) { - err = DB_OUT_OF_FILE_SPACE; - goto err_exit; - } - } - - rec = btr_cur_get_rec(cursor); - - *offsets = rec_get_offsets( - rec, index, *offsets, ULINT_UNDEFINED, offsets_heap); - - dtuple_t* new_entry = row_rec_to_index_entry( - rec, index, *offsets, &n_ext, entry_heap); - - /* The page containing the clustered index record - corresponding to new_entry is latched in mtr. If the - clustered index record is delete-marked, then its externally - stored fields cannot have been purged yet, because then the - purge would also have removed the clustered index record - itself. Thus the following call is safe. */ - row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, - FALSE, entry_heap); - - trx = thr_get_trx(thr); - - if (!(flags & BTR_KEEP_SYS_FLAG) && UNIV_LIKELY(!trx->fake_changes)) { - row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, - roll_ptr); - row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - trx_id); - } - - if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) { - /* We are in a transaction rollback undoing a row - update: we must free possible externally stored fields - which got new values in the update, if they are not - inherited values. They can be inherited if we have - updated the primary key to another value, and then - update it back again. */ - - ut_ad(big_rec_vec == NULL); - - /* fake_changes should not cause undo. so never reaches here */ - ut_ad(!(trx->fake_changes)); - - btr_rec_free_updated_extern_fields( - index, rec, page_zip, *offsets, update, - trx_is_recv(thr_get_trx(thr)) - ? RB_RECOVERY : RB_NORMAL, mtr); - } - - /* We have to set appropriate extern storage bits in the new - record to be inserted: we have to remember which fields were such */ - - ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec)); - ut_ad(rec_offs_validate(rec, index, *offsets)); - n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap); - - if (page_zip) { - ut_ad(page_is_comp(page)); - if (page_zip_rec_needs_ext( - rec_get_converted_size(index, new_entry, n_ext), - TRUE, - dict_index_get_n_fields(index), - page_zip_get_size(page_zip))) { - - goto make_external; - } - } else if (page_zip_rec_needs_ext( - rec_get_converted_size(index, new_entry, n_ext), - page_is_comp(page), 0, 0)) { -make_external: - big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext); - if (UNIV_UNLIKELY(big_rec_vec == NULL)) { - - /* We cannot goto return_after_reservations, - because we may need to update the - IBUF_BITMAP_FREE bits, which was suppressed by - BTR_KEEP_IBUF_BITMAP. */ -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip - || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - if (n_reserved > 0) { - fil_space_release_free_extents( - index->space, n_reserved); - } - - err = DB_TOO_BIG_RECORD; - goto err_exit; - } - - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - ut_ad(flags & BTR_KEEP_POS_FLAG); - } - - if (UNIV_UNLIKELY(trx->fake_changes)) { - /* skip CHANGE, LOG */ - err = DB_SUCCESS; - goto return_after_reservations; - } - - if (big_rec_vec) { - - err = btr_check_blob_limit(big_rec_vec); - - if (err != DB_SUCCESS) { - if (n_reserved > 0) { - fil_space_release_free_extents( - index->space, n_reserved); - } - goto err_exit; - } - } - - if (!page_zip) { - max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); - } - - /* Store state of explicit locks on rec on the page infimum record, - before deleting rec. The page infimum acts as a dummy carrier of the - locks, taking care also of lock releases, before we can move the locks - back on the actual record. There is a special case: if we are - inserting on the root page and the insert causes a call of - btr_root_raise_and_insert. Therefore we cannot in the lock system - delete the lock structs set on the root page even if the root - page carries just node pointers. */ - - lock_rec_store_on_page_infimum(block, rec); - - btr_search_update_hash_on_delete(cursor); - -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - page_cursor = btr_cur_get_page_cur(cursor); - - page_cur_delete_rec(page_cursor, index, *offsets, mtr); - - page_cur_move_to_prev(page_cursor); - - rec = btr_cur_insert_if_possible(cursor, new_entry, - offsets, offsets_heap, n_ext, mtr); - - if (rec) { - page_cursor->rec = rec; - - lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor), - rec, block); - - if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) { - /* The new inserted record owns its possible externally - stored fields */ - btr_cur_unmark_extern_fields( - page_zip, rec, index, *offsets, mtr); - } - - bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG); - - if (btr_cur_compress_if_useful(cursor, adjust, mtr)) { - if (adjust) { - rec_offs_make_valid( - page_cursor->rec, index, *offsets); - } - } else if (!dict_index_is_clust(index) - && page_is_leaf(page)) { - - /* Update the free bits in the insert buffer. - This is the same block which was skipped by - BTR_KEEP_IBUF_BITMAP. */ - if (page_zip) { - ibuf_update_free_bits_zip(block, mtr); - } else { - ibuf_update_free_bits_low(block, max_ins_size, - mtr); - } - } - - err = DB_SUCCESS; - goto return_after_reservations; - } else { - /* If the page is compressed and it initially - compresses very well, and there is a subsequent insert - of a badly-compressing record, it is possible for - btr_cur_optimistic_update() to return DB_UNDERFLOW and - btr_cur_insert_if_possible() to return FALSE. */ - ut_a(page_zip || optim_err != DB_UNDERFLOW); - - /* Out of space: reset the free bits. - This is the same block which was skipped by - BTR_KEEP_IBUF_BITMAP. */ - if (!dict_index_is_clust(index) && page_is_leaf(page)) { - ibuf_reset_free_bits(block); - } - } - - if (big_rec_vec) { - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - ut_ad(flags & BTR_KEEP_POS_FLAG); - - /* btr_page_split_and_insert() in - btr_cur_pessimistic_insert() invokes - mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK). - We must keep the index->lock when we created a - big_rec, so that row_upd_clust_rec() can store the - big_rec in the same mini-transaction. */ - - mtr_x_lock(dict_index_get_lock(index), mtr); - } - - /* Was the record to be updated positioned as the first user - record on its page? */ - was_first = page_cur_is_before_first(page_cursor); - - /* Lock checks and undo logging were already performed by - btr_cur_upd_lock_and_undo(). We do not try - btr_cur_optimistic_insert() because - btr_cur_insert_if_possible() already failed above. */ - - err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG - | BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG, - cursor, offsets, offsets_heap, - new_entry, &rec, - &dummy_big_rec, n_ext, NULL, mtr); - ut_a(rec); - ut_a(err == DB_SUCCESS); - ut_a(dummy_big_rec == NULL); - ut_ad(rec_offs_validate(rec, cursor->index, *offsets)); - page_cursor->rec = rec; - - if (dict_index_is_sec_or_ibuf(index)) { - /* Update PAGE_MAX_TRX_ID in the index page header. - It was not updated by btr_cur_pessimistic_insert() - because of BTR_NO_LOCKING_FLAG. */ - buf_block_t* rec_block; - - rec_block = btr_cur_get_block(cursor); - - page_update_max_trx_id(rec_block, - buf_block_get_page_zip(rec_block), - trx_id, mtr); - } - - if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) { - /* The new inserted record owns its possible externally - stored fields */ - buf_block_t* rec_block = btr_cur_get_block(cursor); - -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); - page = buf_block_get_frame(rec_block); -#endif /* UNIV_ZIP_DEBUG */ - page_zip = buf_block_get_page_zip(rec_block); - - btr_cur_unmark_extern_fields(page_zip, - rec, index, *offsets, mtr); - } - - lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor), - rec, block); - - /* If necessary, restore also the correct lock state for a new, - preceding supremum record created in a page split. While the old - record was nonexistent, the supremum might have inherited its locks - from a wrong record. */ - - if (!was_first) { - btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor), - rec, mtr); - } - -return_after_reservations: -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - if (n_reserved > 0) { - fil_space_release_free_extents(index->space, n_reserved); - } - - *big_rec = big_rec_vec; - - return(err); -} - -/*==================== B-TREE DELETE MARK AND UNMARK ===============*/ - -/****************************************************************//** -Writes the redo log record for delete marking or unmarking of an index -record. */ -UNIV_INLINE -void -btr_cur_del_mark_set_clust_rec_log( -/*===============================*/ - rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in: index of the record */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr,/*!< in: roll ptr to the undo log record */ - mtr_t* mtr) /*!< in: mtr */ -{ - byte* log_ptr; - - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - - log_ptr = mlog_open_and_write_index(mtr, rec, index, - page_rec_is_comp(rec) - ? MLOG_COMP_REC_CLUST_DELETE_MARK - : MLOG_REC_CLUST_DELETE_MARK, - 1 + 1 + DATA_ROLL_PTR_LEN - + 14 + 2); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery */ - return; - } - - *log_ptr++ = 0; - *log_ptr++ = 1; - - log_ptr = row_upd_write_sys_vals_to_log( - index, trx_id, roll_ptr, log_ptr, mtr); - mach_write_to_2(log_ptr, page_offset(rec)); - log_ptr += 2; - - mlog_close(mtr, log_ptr); -} -#endif /* !UNIV_HOTBACKUP */ - -/****************************************************************//** -Parses the redo log record for delete marking or unmarking of a clustered -index record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_cur_parse_del_mark_set_clust_rec( -/*=================================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in/out: page or NULL */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - dict_index_t* index) /*!< in: index corresponding to page */ -{ - ulint flags; - ulint val; - ulint pos; - trx_id_t trx_id; - roll_ptr_t roll_ptr; - ulint offset; - rec_t* rec; - - ut_ad(!page - || !!page_is_comp(page) == dict_table_is_comp(index->table)); - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - flags = mach_read_from_1(ptr); - ptr++; - val = mach_read_from_1(ptr); - ptr++; - - ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr); - - if (ptr == NULL) { - - return(NULL); - } - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(offset <= UNIV_PAGE_SIZE); - - if (page) { - rec = page + offset; - - /* We do not need to reserve btr_search_latch, as the page - is only being recovered, and there cannot be a hash index to - it. Besides, these fields are being updated in place - and the adaptive hash index does not depend on them. */ - - btr_rec_set_deleted_flag(rec, page_zip, val); - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - - row_upd_rec_sys_fields_in_recovery( - rec, page_zip, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), - pos, trx_id, roll_ptr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - } - - return(ptr); -} - -#ifndef UNIV_HOTBACKUP -/***********************************************************//** -Marks a clustered index record deleted. Writes an undo log record to -undo log on this delete marking. Writes in the trx id field the id -of the deleting transaction, and in the roll ptr field pointer to the -undo log record created. -@return DB_SUCCESS, DB_LOCK_WAIT, or error number */ -UNIV_INTERN -dberr_t -btr_cur_del_mark_set_clust_rec( -/*===========================*/ - buf_block_t* block, /*!< in/out: buffer block of the record */ - rec_t* rec, /*!< in/out: record */ - dict_index_t* index, /*!< in: clustered index of the record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec) */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - roll_ptr_t roll_ptr; - dberr_t err; - page_zip_des_t* page_zip; - trx_t* trx; - - ut_ad(dict_index_is_clust(index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - ut_ad(buf_block_get_frame(block) == page_align(rec)); - ut_ad(page_is_leaf(page_align(rec))); - -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops) { - btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark "); - rec_print_new(stderr, rec, offsets); - } -#endif /* UNIV_DEBUG */ - - ut_ad(dict_index_is_clust(index)); - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - /* skip LOCK, UNDO, CHANGE, LOG */ - return(DB_SUCCESS); - } - - err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block, - rec, index, offsets, thr); - - if (err != DB_SUCCESS) { - - return(err); - } - - err = trx_undo_report_row_operation(thr, - index, NULL, NULL, 0, rec, offsets, - &roll_ptr); - if (err != DB_SUCCESS) { - - return(err); - } - - /* The btr_search_latch is not needed here, because - the adaptive hash index does not depend on the delete-mark - and the delete-mark is being updated in place. */ - - page_zip = buf_block_get_page_zip(block); - - btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE); - btr_rec_set_deleted_flag(rec, page_zip, TRUE); - - trx = thr_get_trx(thr); - - if (dict_index_is_online_ddl(index)) { - row_log_table_delete(rec, index, offsets, NULL); - } - - row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr); - - btr_cur_del_mark_set_clust_rec_log(rec, index, trx->id, - roll_ptr, mtr); - - return(err); -} - -/****************************************************************//** -Writes the redo log record for a delete mark setting of a secondary -index record. */ -UNIV_INLINE -void -btr_cur_del_mark_set_sec_rec_log( -/*=============================*/ - rec_t* rec, /*!< in: record */ - ibool val, /*!< in: value to set */ - mtr_t* mtr) /*!< in: mtr */ -{ - byte* log_ptr; - ut_ad(val <= 1); - - log_ptr = mlog_open(mtr, 11 + 1 + 2); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery: - in that case mlog_open returns NULL */ - return; - } - - log_ptr = mlog_write_initial_log_record_fast( - rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr); - mach_write_to_1(log_ptr, val); - log_ptr++; - - mach_write_to_2(log_ptr, page_offset(rec)); - log_ptr += 2; - - mlog_close(mtr, log_ptr); -} -#endif /* !UNIV_HOTBACKUP */ - -/****************************************************************//** -Parses the redo log record for delete marking or unmarking of a secondary -index record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_cur_parse_del_mark_set_sec_rec( -/*===============================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in/out: page or NULL */ - page_zip_des_t* page_zip)/*!< in/out: compressed page, or NULL */ -{ - ulint val; - ulint offset; - rec_t* rec; - - if (end_ptr < ptr + 3) { - - return(NULL); - } - - val = mach_read_from_1(ptr); - ptr++; - - offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(offset <= UNIV_PAGE_SIZE); - - if (page) { - rec = page + offset; - - /* We do not need to reserve btr_search_latch, as the page - is only being recovered, and there cannot be a hash index to - it. Besides, the delete-mark flag is being updated in place - and the adaptive hash index does not depend on it. */ - - btr_rec_set_deleted_flag(rec, page_zip, val); - } - - return(ptr); -} - -#ifndef UNIV_HOTBACKUP -/***********************************************************//** -Sets a secondary index record delete mark to TRUE or FALSE. -@return DB_SUCCESS, DB_LOCK_WAIT, or error number */ -UNIV_INTERN -dberr_t -btr_cur_del_mark_set_sec_rec( -/*=========================*/ - ulint flags, /*!< in: locking flag */ - btr_cur_t* cursor, /*!< in: cursor */ - ibool val, /*!< in: value to set */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - buf_block_t* block; - rec_t* rec; - dberr_t err; - - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - /* skip LOCK, CHANGE, LOG */ - return(DB_SUCCESS); - } - - block = btr_cur_get_block(cursor); - rec = btr_cur_get_rec(cursor); - -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops) { - btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index, - "del mark "); - rec_print(stderr, rec, cursor->index); - } -#endif /* UNIV_DEBUG */ - - err = lock_sec_rec_modify_check_and_lock(flags, - btr_cur_get_block(cursor), - rec, cursor->index, thr, mtr); - if (err != DB_SUCCESS) { - - return(err); - } - - ut_ad(!!page_rec_is_comp(rec) - == dict_table_is_comp(cursor->index->table)); - - /* We do not need to reserve btr_search_latch, as the - delete-mark flag is being updated in place and the adaptive - hash index does not depend on it. */ - btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val); - - btr_cur_del_mark_set_sec_rec_log(rec, val, mtr); - - return(DB_SUCCESS); -} - -/***********************************************************//** -Sets a secondary index record's delete mark to the given value. This -function is only used by the insert buffer merge mechanism. */ -UNIV_INTERN -void -btr_cur_set_deleted_flag_for_ibuf( -/*==============================*/ - rec_t* rec, /*!< in/out: record */ - page_zip_des_t* page_zip, /*!< in/out: compressed page - corresponding to rec, or NULL - when the tablespace is - uncompressed */ - ibool val, /*!< in: value to set */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - /* We do not need to reserve btr_search_latch, as the page - has just been read to the buffer pool and there cannot be - a hash index to it. Besides, the delete-mark flag is being - updated in place and the adaptive hash index does not depend - on it. */ - - btr_rec_set_deleted_flag(rec, page_zip, val); - - btr_cur_del_mark_set_sec_rec_log(rec, val, mtr); -} - -/*==================== B-TREE RECORD REMOVE =========================*/ - -/*************************************************************//** -Tries to compress a page of the tree if it seems useful. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. To avoid -deadlocks, mtr must also own x-latches to brothers of page, if those -brothers exist. NOTE: it is assumed that the caller has reserved enough -free extents so that the compression will always succeed if done! -@return TRUE if compression occurred */ -UNIV_INTERN -ibool -btr_cur_compress_if_useful( -/*=======================*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; - cursor does not stay valid if !adjust and - compression occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(mtr_memo_contains(mtr, - dict_index_get_lock(btr_cur_get_index(cursor)), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), - MTR_MEMO_PAGE_X_FIX)); - - return(btr_cur_compress_recommendation(cursor, mtr) - && btr_compress(cursor, adjust, mtr)); -} - -/*******************************************************//** -Removes the record on which the tree cursor is positioned on a leaf page. -It is assumed that the mtr has an x-latch on the page where the cursor is -positioned, but no latch on the whole tree. -@return TRUE if success, i.e., the page did not become too empty */ -UNIV_INTERN -ibool -btr_cur_optimistic_delete_func( -/*===========================*/ - btr_cur_t* cursor, /*!< in: cursor on leaf page, on the record to - delete; cursor stays valid: if deletion - succeeds, on function exit it points to the - successor of the deleted record */ -#ifdef UNIV_DEBUG - ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */ -#endif /* UNIV_DEBUG */ - mtr_t* mtr) /*!< in: mtr; if this function returns - TRUE on a leaf page of a secondary - index, the mtr must be committed - before latching any further pages */ -{ - buf_block_t* block; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - ibool no_compress_needed; - rec_offs_init(offsets_); - - ut_ad(flags == 0 || flags == BTR_CREATE_FLAG); - ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), - MTR_MEMO_PAGE_X_FIX)); - /* This is intended only for leaf page deletions */ - - block = btr_cur_get_block(cursor); - - SRV_CORRUPT_TABLE_CHECK(block, return(DB_CORRUPTION);); - - ut_ad(page_is_leaf(buf_block_get_frame(block))); - ut_ad(!dict_index_is_online_ddl(cursor->index) - || dict_index_is_clust(cursor->index) - || (flags & BTR_CREATE_FLAG)); - - rec = btr_cur_get_rec(cursor); - offsets = rec_get_offsets(rec, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - - no_compress_needed = !rec_offs_any_extern(offsets) - && btr_cur_can_delete_without_compress( - cursor, rec_offs_size(offsets), mtr); - - if (no_compress_needed) { - - page_t* page = buf_block_get_frame(block); - page_zip_des_t* page_zip= buf_block_get_page_zip(block); - - lock_update_delete(block, rec); - - btr_search_update_hash_on_delete(cursor); - - if (page_zip) { -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, cursor->index)); -#endif /* UNIV_ZIP_DEBUG */ - page_cur_delete_rec(btr_cur_get_page_cur(cursor), - cursor->index, offsets, mtr); -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, cursor->index)); -#endif /* UNIV_ZIP_DEBUG */ - - /* On compressed pages, the IBUF_BITMAP_FREE - space is not affected by deleting (purging) - records, because it is defined as the minimum - of space available *without* reorganize, and - space available in the modification log. */ - } else { - const ulint max_ins - = page_get_max_insert_size_after_reorganize( - page, 1); - - page_cur_delete_rec(btr_cur_get_page_cur(cursor), - cursor->index, offsets, mtr); - - /* The change buffer does not handle inserts - into non-leaf pages, into clustered indexes, - or into the change buffer. */ - if (page_is_leaf(page) - && !dict_index_is_clust(cursor->index) - && !dict_index_is_ibuf(cursor->index)) { - ibuf_update_free_bits_low(block, max_ins, mtr); - } - } - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return(no_compress_needed); -} - -/*************************************************************//** -Removes the record on which the tree cursor is positioned. Tries -to compress the page if its fillfactor drops below a threshold -or if it is the only page on the level. It is assumed that mtr holds -an x-latch on the tree and on the cursor page. To avoid deadlocks, -mtr must also own x-latches to brothers of page, if those brothers -exist. -@return TRUE if compression occurred */ -UNIV_INTERN -ibool -btr_cur_pessimistic_delete( -/*=======================*/ - dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE; - the latter may occur because we may have - to update node pointers on upper levels, - and in the case of variable length keys - these may actually grow in size */ - ibool has_reserved_extents, /*!< in: TRUE if the - caller has already reserved enough free - extents so that he knows that the operation - will succeed */ - btr_cur_t* cursor, /*!< in: cursor on the record to delete; - if compression does not occur, the cursor - stays valid: it points to successor of - deleted record on function exit */ - ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */ - enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block; - page_t* page; - page_zip_des_t* page_zip; - dict_index_t* index; - rec_t* rec; - ulint n_reserved = 0; - ibool success; - ibool ret = FALSE; - ulint level; - mem_heap_t* heap; - ulint* offsets; - - block = btr_cur_get_block(cursor); - page = buf_block_get_frame(block); - index = btr_cur_get_index(cursor); - - ut_ad(flags == 0 || flags == BTR_CREATE_FLAG); - ut_ad(!dict_index_is_online_ddl(index) - || dict_index_is_clust(index) - || (flags & BTR_CREATE_FLAG)); - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - if (!has_reserved_extents) { - /* First reserve enough free space for the file segments - of the index tree, so that the node pointer updates will - not fail because of lack of space */ - - ut_a(cursor->tree_height != ULINT_UNDEFINED); - - ulint n_extents = cursor->tree_height / 32 + 1; - - success = fsp_reserve_free_extents(&n_reserved, - index->space, - n_extents, - FSP_CLEANING, mtr); - if (!success) { - *err = DB_OUT_OF_FILE_SPACE; - - return(FALSE); - } - } - - heap = mem_heap_create(1024); - rec = btr_cur_get_rec(cursor); - page_zip = buf_block_get_page_zip(block); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); - - if (rec_offs_any_extern(offsets)) { - btr_rec_free_externally_stored_fields(index, - rec, offsets, page_zip, - rb_ctx, mtr); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - } - - if (UNIV_UNLIKELY(page_get_n_recs(page) < 2) - && UNIV_UNLIKELY(dict_index_get_page(index) - != buf_block_get_page_no(block))) { - - /* If there is only one record, drop the whole page in - btr_discard_page, if this is not the root page */ - - btr_discard_page(cursor, mtr); - - ret = TRUE; - - goto return_after_reservations; - } - - if (flags == 0) { - lock_update_delete(block, rec); - } - - level = btr_page_get_level(page, mtr); - - if (level > 0 - && UNIV_UNLIKELY(rec == page_rec_get_next( - page_get_infimum_rec(page)))) { - - rec_t* next_rec = page_rec_get_next(rec); - - if (btr_page_get_prev(page, mtr) == FIL_NULL) { - - /* If we delete the leftmost node pointer on a - non-leaf level, we must mark the new leftmost node - pointer as the predefined minimum record */ - - /* This will make page_zip_validate() fail until - page_cur_delete_rec() completes. This is harmless, - because everything will take place within a single - mini-transaction and because writing to the redo log - is an atomic operation (performed by mtr_commit()). */ - btr_set_min_rec_mark(next_rec, mtr); - } else { - /* Otherwise, if we delete the leftmost node pointer - on a page, we have to change the father node pointer - so that it is equal to the new leftmost node pointer - on the page */ - - btr_node_ptr_delete(index, block, mtr); - - dtuple_t* node_ptr = dict_index_build_node_ptr( - index, next_rec, buf_block_get_page_no(block), - heap, level); - - btr_insert_on_non_leaf_level( - flags, index, level + 1, node_ptr, mtr); - } - } - - btr_search_update_hash_on_delete(cursor); - - page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - ut_ad(btr_check_node_ptr(index, block, mtr)); - -return_after_reservations: - *err = DB_SUCCESS; - - mem_heap_free(heap); - - if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, FALSE, mtr); - } - - if (n_reserved > 0) { - fil_space_release_free_extents(index->space, n_reserved); - } - - return(ret); -} - -/*******************************************************************//** -Adds path information to the cursor for the current page, for which -the binary search has been performed. */ -static -void -btr_cur_add_path_info( -/*==================*/ - btr_cur_t* cursor, /*!< in: cursor positioned on a page */ - ulint height, /*!< in: height of the page in tree; - 0 means leaf node */ - ulint root_height) /*!< in: root node height in tree */ -{ - btr_path_t* slot; - const rec_t* rec; - const page_t* page; - - ut_a(cursor->path_arr); - - if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) { - /* Do nothing; return empty path */ - - slot = cursor->path_arr; - slot->nth_rec = ULINT_UNDEFINED; - - return; - } - - if (height == 0) { - /* Mark end of slots for path */ - slot = cursor->path_arr + root_height + 1; - slot->nth_rec = ULINT_UNDEFINED; - } - - rec = btr_cur_get_rec(cursor); - - slot = cursor->path_arr + (root_height - height); - - page = page_align(rec); - - slot->nth_rec = page_rec_get_n_recs_before(rec); - slot->n_recs = page_get_n_recs(page); - slot->page_no = page_get_page_no(page); - slot->page_level = btr_page_get_level_low(page); -} - -/*******************************************************************//** -Estimate the number of rows between slot1 and slot2 for any level on a -B-tree. This function starts from slot1->page and reads a few pages to -the right, counting their records. If we reach slot2->page quickly then -we know exactly how many records there are between slot1 and slot2 and -we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly -then we calculate the average number of records in the pages scanned -so far and assume that all pages that we did not scan up to slot2->page -contain the same number of records, then we multiply that average to -the number of pages between slot1->page and slot2->page (which is -n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE. -@return number of rows (exact or estimated) */ -static -ib_int64_t -btr_estimate_n_rows_in_range_on_level( -/*==================================*/ - dict_index_t* index, /*!< in: index */ - btr_path_t* slot1, /*!< in: left border */ - btr_path_t* slot2, /*!< in: right border */ - ib_int64_t n_rows_on_prev_level, /*!< in: number of rows - on the previous level for the - same descend paths; used to - determine the numbe of pages - on this level */ - ibool* is_n_rows_exact) /*!< out: TRUE if the returned - value is exact i.e. not an - estimation */ -{ - ulint space; - ib_int64_t n_rows; - ulint n_pages_read; - ulint page_no; - ulint zip_size; - ulint level; - - space = dict_index_get_space(index); - - n_rows = 0; - n_pages_read = 0; - - /* Assume by default that we will scan all pages between - slot1->page_no and slot2->page_no */ - *is_n_rows_exact = TRUE; - - /* add records from slot1->page_no which are to the right of - the record which serves as a left border of the range, if any */ - if (slot1->nth_rec < slot1->n_recs) { - n_rows += slot1->n_recs - slot1->nth_rec; - } - - /* add records from slot2->page_no which are to the left of - the record which servers as a right border of the range, if any */ - if (slot2->nth_rec > 1) { - n_rows += slot2->nth_rec - 1; - } - - /* count the records in the pages between slot1->page_no and - slot2->page_no (non inclusive), if any */ - - zip_size = fil_space_get_zip_size(space); - - /* Do not read more than this number of pages in order not to hurt - performance with this code which is just an estimation. If we read - this many pages before reaching slot2->page_no then we estimate the - average from the pages scanned so far */ -# define N_PAGES_READ_LIMIT 10 - - page_no = slot1->page_no; - level = slot1->page_level; - - do { - mtr_t mtr; - page_t* page; - buf_block_t* block; - dberr_t err=DB_SUCCESS; - - mtr_start(&mtr); - - /* Fetch the page. Because we are not holding the - index->lock, the tree may have changed and we may be - attempting to read a page that is no longer part of - the B-tree. We pass BUF_GET_POSSIBLY_FREED in order to - silence a debug assertion about this. */ - block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, - NULL, BUF_GET_POSSIBLY_FREED, - __FILE__, __LINE__, &mtr, &err); - - ut_ad((block != NULL) == (err == DB_SUCCESS)); - - if (err != DB_SUCCESS) { - if (err == DB_DECRYPTION_FAILED) { - ib_push_warning((void *)NULL, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name); - index->table->file_unreadable = true; - } - - mtr_commit(&mtr); - goto inexact; - } - - page = buf_block_get_frame(block); - - /* It is possible that the tree has been reorganized in the - meantime and this is a different page. If this happens the - calculated estimate will be bogus, which is not fatal as - this is only an estimate. We are sure that a page with - page_no exists because InnoDB never frees pages, only - reuses them. */ - if (fil_page_get_type(page) != FIL_PAGE_INDEX - || btr_page_get_index_id(page) != index->id - || btr_page_get_level_low(page) != level) { - - /* The page got reused for something else */ - mtr_commit(&mtr); - goto inexact; - } - - /* It is possible but highly unlikely that the page was - originally written by an old version of InnoDB that did - not initialize FIL_PAGE_TYPE on other than B-tree pages. - For example, this could be an almost-empty BLOB page - that happens to contain the magic values in the fields - that we checked above. */ - - n_pages_read++; - - if (page_no != slot1->page_no) { - /* Do not count the records on slot1->page_no, - we already counted them before this loop. */ - n_rows += page_get_n_recs(page); - } - - page_no = btr_page_get_next(page, &mtr); - - mtr_commit(&mtr); - - if (n_pages_read == N_PAGES_READ_LIMIT - || page_no == FIL_NULL) { - /* Either we read too many pages or - we reached the end of the level without passing - through slot2->page_no, the tree must have changed - in the meantime */ - goto inexact; - } - - } while (page_no != slot2->page_no); - - return(n_rows); - -inexact: - - *is_n_rows_exact = FALSE; - - /* We did interrupt before reaching slot2->page */ - - if (n_pages_read > 0) { - /* The number of pages on this level is - n_rows_on_prev_level, multiply it by the - average number of recs per page so far */ - n_rows = n_rows_on_prev_level - * n_rows / n_pages_read; - } else { - /* The tree changed before we could even - start with slot1->page_no */ - n_rows = 10; - } - - return(n_rows); -} - -/** If the tree gets changed too much between the two dives for the left -and right boundary then btr_estimate_n_rows_in_range_low() will retry -that many times before giving up and returning the value stored in -rows_in_range_arbitrary_ret_val. */ -static const unsigned rows_in_range_max_retries = 4; - -/** We pretend that a range has that many records if the tree keeps changing -for rows_in_range_max_retries retries while we try to estimate the records -in a given range. */ -static const ib_int64_t rows_in_range_arbitrary_ret_val = 10; - -/** Estimates the number of rows in a given index range. -@param[in] index index -@param[in] tuple1 range start, may also be empty tuple -@param[in] mode1 search mode for range start -@param[in] tuple2 range end, may also be empty tuple -@param[in] mode2 search mode for range end -@param[in] trx trx -@param[in] nth_attempt if the tree gets modified too much while -we are trying to analyze it, then we will retry (this function will call -itself, incrementing this parameter) -@return estimated number of rows; if after rows_in_range_max_retries -retries the tree keeps changing, then we will just return -rows_in_range_arbitrary_ret_val as a result (if -nth_attempt >= rows_in_range_max_retries and the tree is modified between -the two dives). */ -static -ib_int64_t -btr_estimate_n_rows_in_range_low( - dict_index_t* index, - const dtuple_t* tuple1, - ulint mode1, - const dtuple_t* tuple2, - ulint mode2, - trx_t* trx, - unsigned nth_attempt) -{ - btr_path_t path1[BTR_PATH_ARRAY_N_SLOTS]; - btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS]; - btr_cur_t cursor; - btr_path_t* slot1; - btr_path_t* slot2; - ibool diverged; - ibool diverged_lot; - ulint divergence_level; - ib_int64_t n_rows; - ibool is_n_rows_exact; - ulint i; - mtr_t mtr; - ib_int64_t table_n_rows; - - table_n_rows = dict_table_get_n_rows(index->table); - - mtr_start_trx(&mtr, trx); - - cursor.path_arr = path1; - - if (dtuple_get_n_fields(tuple1) > 0) { - - btr_cur_search_to_nth_level(index, 0, tuple1, mode1, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, - __FILE__, __LINE__, &mtr); - } else { - btr_cur_open_at_index_side(true, index, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); - } - - mtr_commit(&mtr); - - if (index->table->file_unreadable) { - return (0); - } - - mtr_start_trx(&mtr, trx); - -#ifdef UNIV_DEBUG - if (!strcmp(index->name, "iC")) { - DEBUG_SYNC_C("btr_estimate_n_rows_in_range_between_dives"); - } -#endif - - cursor.path_arr = path2; - - if (dtuple_get_n_fields(tuple2) > 0) { - - btr_cur_search_to_nth_level(index, 0, tuple2, mode2, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, - __FILE__, __LINE__, &mtr); - } else { - btr_cur_open_at_index_side(false, index, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); - } - - mtr_commit(&mtr); - - /* We have the path information for the range in path1 and path2 */ - - n_rows = 1; - is_n_rows_exact = TRUE; - diverged = FALSE; /* This becomes true when the path is not - the same any more */ - diverged_lot = FALSE; /* This becomes true when the paths are - not the same or adjacent any more */ - divergence_level = 1000000; /* This is the level where paths diverged - a lot */ - for (i = 0; ; i++) { - ut_ad(i < BTR_PATH_ARRAY_N_SLOTS); - - slot1 = path1 + i; - slot2 = path2 + i; - - if (slot1->nth_rec == ULINT_UNDEFINED - || slot2->nth_rec == ULINT_UNDEFINED) { - - if (i > divergence_level + 1 && !is_n_rows_exact) { - /* In trees whose height is > 1 our algorithm - tends to underestimate: multiply the estimate - by 2: */ - - n_rows = n_rows * 2; - } - - DBUG_EXECUTE_IF("bug14007649", return(n_rows);); - - /* Do not estimate the number of rows in the range - to over 1 / 2 of the estimated rows in the whole - table */ - - if (n_rows > table_n_rows / 2 && !is_n_rows_exact) { - - n_rows = table_n_rows / 2; - - /* If there are just 0 or 1 rows in the table, - then we estimate all rows are in the range */ - - if (n_rows == 0) { - n_rows = table_n_rows; - } - } - - return(n_rows); - } - - if (!diverged && slot1->nth_rec != slot2->nth_rec) { - - /* If both slots do not point to the same page or if - the paths have crossed and the same page on both - apparently contains a different number of records, - this means that the tree must have changed between - the dive for slot1 and the dive for slot2 at the - beginning of this function. */ - if (slot1->page_no != slot2->page_no - || slot1->page_level != slot2->page_level - || (slot1->nth_rec >= slot2->nth_rec - && slot1->n_recs != slot2->n_recs)) { - - /* If the tree keeps changing even after a - few attempts, then just return some arbitrary - number. */ - if (nth_attempt >= rows_in_range_max_retries) { - return(rows_in_range_arbitrary_ret_val); - } - - const ib_int64_t ret = - btr_estimate_n_rows_in_range_low( - index, tuple1, mode1, - tuple2, mode2, trx, - nth_attempt + 1); - - return(ret); - } - - diverged = TRUE; - - if (slot1->nth_rec < slot2->nth_rec) { - n_rows = slot2->nth_rec - slot1->nth_rec; - - if (n_rows > 1) { - diverged_lot = TRUE; - divergence_level = i; - } - } else { - /* It is possible that - slot1->nth_rec >= slot2->nth_rec - if, for example, we have a single page - tree which contains (inf, 5, 6, supr) - and we select where x > 20 and x < 30; - in this case slot1->nth_rec will point - to the supr record and slot2->nth_rec - will point to 6 */ - return(0); - } - - } else if (diverged && !diverged_lot) { - - if (slot1->nth_rec < slot1->n_recs - || slot2->nth_rec > 1) { - - diverged_lot = TRUE; - divergence_level = i; - - n_rows = 0; - - if (slot1->nth_rec < slot1->n_recs) { - n_rows += slot1->n_recs - - slot1->nth_rec; - } - - if (slot2->nth_rec > 1) { - n_rows += slot2->nth_rec - 1; - } - } - } else if (diverged_lot) { - - n_rows = btr_estimate_n_rows_in_range_on_level( - index, slot1, slot2, n_rows, - &is_n_rows_exact); - } - } -} - -/** Estimates the number of rows in a given index range. -@param[in] index index -@param[in] tuple1 range start, may also be empty tuple -@param[in] mode1 search mode for range start -@param[in] tuple2 range end, may also be empty tuple -@param[in] mode2 search mode for range end -@param[in] trx trx -@return estimated number of rows */ -ib_int64_t -btr_estimate_n_rows_in_range( - dict_index_t* index, - const dtuple_t* tuple1, - ulint mode1, - const dtuple_t* tuple2, - ulint mode2, - trx_t* trx) -{ - const ib_int64_t ret = btr_estimate_n_rows_in_range_low( - index, tuple1, mode1, tuple2, mode2, trx, - 1 /* first attempt */); - - return(ret); -} - -/*******************************************************************//** -Record the number of non_null key values in a given index for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are eventually stored in the array: -index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */ -static -void -btr_record_not_null_field_in_rec( -/*=============================*/ - ulint n_unique, /*!< in: dict_index_get_n_unique(index), - number of columns uniquely determine - an index entry */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index), - its size could be for all fields or - that of "n_unique" */ - ib_uint64_t* n_not_null) /*!< in/out: array to record number of - not null rows for n-column prefix */ -{ - ulint i; - - ut_ad(rec_offs_n_fields(offsets) >= n_unique); - - if (n_not_null == NULL) { - return; - } - - for (i = 0; i < n_unique; i++) { - if (rec_offs_nth_sql_null(offsets, i)) { - break; - } - - n_not_null[i]++; - } -} - -/*******************************************************************//** -Estimates the number of different key values in a given index, for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed -0..n_uniq-1) and the number of pages that were sampled is saved in -index->stat_n_sample_sizes[]. -If innodb_stats_method is nulls_ignored, we also record the number of -non-null values for each prefix and stored the estimates in -array index->stat_n_non_null_key_vals. */ -UNIV_INTERN -void -btr_estimate_number_of_different_key_vals( -/*======================================*/ - dict_index_t* index) /*!< in: index */ -{ - btr_cur_t cursor; - page_t* page; - rec_t* rec; - ulint n_cols; - ulint matched_fields; - ulint matched_bytes; - ib_uint64_t* n_diff; - ib_uint64_t* n_not_null; - ibool stats_null_not_equal; - ullint n_sample_pages=1; /* number of pages to sample */ - ulint not_empty_flag = 0; - ulint total_external_size = 0; - ulint i; - ulint j; - ullint add_on; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint* offsets_rec = NULL; - ulint* offsets_next_rec = NULL; - - n_cols = dict_index_get_n_unique(index); - - heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null) - * n_cols - + dict_index_get_n_fields(index) - * (sizeof *offsets_rec - + sizeof *offsets_next_rec)); - - n_diff = (ib_uint64_t*) mem_heap_zalloc( - heap, n_cols * sizeof(ib_int64_t)); - - n_not_null = NULL; - - /* Check srv_innodb_stats_method setting, and decide whether we - need to record non-null value and also decide if NULL is - considered equal (by setting stats_null_not_equal value) */ - switch (srv_innodb_stats_method) { - case SRV_STATS_NULLS_IGNORED: - n_not_null = (ib_uint64_t*) mem_heap_zalloc( - heap, n_cols * sizeof *n_not_null); - /* fall through */ - - case SRV_STATS_NULLS_UNEQUAL: - /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL - case, we will treat NULLs as unequal value */ - stats_null_not_equal = TRUE; - break; - - case SRV_STATS_NULLS_EQUAL: - stats_null_not_equal = FALSE; - break; - - default: - ut_error; - } - - if (srv_stats_sample_traditional) { - /* It makes no sense to test more pages than are contained - in the index, thus we lower the number if it is too high */ - if (srv_stats_transient_sample_pages > index->stat_index_size) { - if (index->stat_index_size > 0) { - n_sample_pages = index->stat_index_size; - } - } else { - n_sample_pages = srv_stats_transient_sample_pages; - } - } else { - /* New logaritmic number of pages that are estimated. - Number of pages estimated should be between 1 and - index->stat_index_size. - - If we have only 0 or 1 index pages then we can only take 1 - sample. We have already initialized n_sample_pages to 1. - - So taking index size as I and sample as S and log(I)*S as L - - requirement 1) we want the out limit of the expression to not exceed I; - requirement 2) we want the ideal pages to be at least S; - so the current expression is min(I, max( min(S,I), L) - - looking for simplifications: - - case 1: assume S < I - min(I, max( min(S,I), L) -> min(I , max( S, L)) - - but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L. - - so we have: min(I , L) - - case 2: assume I < S - min(I, max( min(S,I), L) -> min(I, max( I, L)) - - case 2a: L > I - min(I, max( I, L)) -> min(I, L) -> I - - case 2b: when L < I - min(I, max( I, L)) -> min(I, I ) -> I - - so taking all case2 paths is I, our expression is: - n_pages = S < I? min(I,L) : I - */ - if (index->stat_index_size > 1) { - n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ? - (ulint) ut_min((double) index->stat_index_size, - log2(index->stat_index_size)*srv_stats_transient_sample_pages) - : index->stat_index_size; - - } - } - - /* Sanity check */ - ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size < 1 ? 1 : index->stat_index_size)); - - /* We sample some pages in the index to get an estimate */ - - for (i = 0; i < n_sample_pages; i++) { - mtr_start(&mtr); - - btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); - - /* Count the number of different key values for each prefix of - the key on this index page. If the prefix does not determine - the index record uniquely in the B-tree, then we subtract one - because otherwise our algorithm would give a wrong estimate - for an index where there is just one key value. */ - - if (index->table->file_unreadable) { - mtr_commit(&mtr); - goto exit_loop; - } - - page = btr_cur_get_page(&cursor); - - SRV_CORRUPT_TABLE_CHECK(page, goto exit_loop;); - DBUG_EXECUTE_IF("ib_corrupt_page_while_stats_calc", - page = NULL;); - - SRV_CORRUPT_TABLE_CHECK(page, - { - mtr_commit(&mtr); - goto exit_loop; - }); - - rec = page_rec_get_next(page_get_infimum_rec(page)); - - if (!page_rec_is_supremum(rec)) { - not_empty_flag = 1; - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - - if (n_not_null != NULL) { - btr_record_not_null_field_in_rec( - n_cols, offsets_rec, n_not_null); - } - } - - while (!page_rec_is_supremum(rec)) { - rec_t* next_rec = page_rec_get_next(rec); - if (page_rec_is_supremum(next_rec)) { - total_external_size += - btr_rec_get_externally_stored_len( - rec, offsets_rec); - break; - } - - matched_fields = 0; - matched_bytes = 0; - offsets_next_rec = rec_get_offsets(next_rec, index, - offsets_next_rec, - ULINT_UNDEFINED, - &heap); - - cmp_rec_rec_with_match(rec, next_rec, - offsets_rec, offsets_next_rec, - index, stats_null_not_equal, - &matched_fields, - &matched_bytes); - - for (j = matched_fields; j < n_cols; j++) { - /* We add one if this index record has - a different prefix from the previous */ - - n_diff[j]++; - } - - if (n_not_null != NULL) { - btr_record_not_null_field_in_rec( - n_cols, offsets_next_rec, n_not_null); - } - - total_external_size - += btr_rec_get_externally_stored_len( - rec, offsets_rec); - - rec = next_rec; - /* Initialize offsets_rec for the next round - and assign the old offsets_rec buffer to - offsets_next_rec. */ - { - ulint* offsets_tmp = offsets_rec; - offsets_rec = offsets_next_rec; - offsets_next_rec = offsets_tmp; - } - } - - - if (n_cols == dict_index_get_n_unique_in_tree(index)) { - - /* If there is more than one leaf page in the tree, - we add one because we know that the first record - on the page certainly had a different prefix than the - last record on the previous index page in the - alphabetical order. Before this fix, if there was - just one big record on each clustered index page, the - algorithm grossly underestimated the number of rows - in the table. */ - - if (btr_page_get_prev(page, &mtr) != FIL_NULL - || btr_page_get_next(page, &mtr) != FIL_NULL) { - - n_diff[n_cols - 1]++; - } - } - - mtr_commit(&mtr); - } - -exit_loop: - /* If we saw k borders between different key values on - n_sample_pages leaf pages, we can estimate how many - there will be in index->stat_n_leaf_pages */ - - /* We must take into account that our sample actually represents - also the pages used for external storage of fields (those pages are - included in index->stat_n_leaf_pages) */ - - for (j = 0; j < n_cols; j++) { - index->stat_n_diff_key_vals[j] - = BTR_TABLE_STATS_FROM_SAMPLE( - n_diff[j], index, n_sample_pages, - total_external_size, not_empty_flag); - - /* If the tree is small, smaller than - 10 * n_sample_pages + total_external_size, then - the above estimate is ok. For bigger trees it is common that we - do not see any borders between key values in the few pages - we pick. But still there may be n_sample_pages - different key values, or even more. Let us try to approximate - that: */ - - add_on = index->stat_n_leaf_pages - / (10 * (n_sample_pages - + total_external_size)); - - if (add_on > n_sample_pages) { - add_on = n_sample_pages; - } - - index->stat_n_diff_key_vals[j] += add_on; - - index->stat_n_sample_sizes[j] = n_sample_pages; - - /* Update the stat_n_non_null_key_vals[] with our - sampled result. stat_n_non_null_key_vals[] is created - and initialized to zero in dict_index_add_to_cache(), - along with stat_n_diff_key_vals[] array */ - if (n_not_null != NULL) { - index->stat_n_non_null_key_vals[j] = - BTR_TABLE_STATS_FROM_SAMPLE( - n_not_null[j], index, n_sample_pages, - total_external_size, not_empty_flag); - } - } - - mem_heap_free(heap); -} - -/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ - -/***********************************************************//** -Gets the offset of the pointer to the externally stored part of a field. -@return offset of the pointer to the externally stored part */ -static -ulint -btr_rec_get_field_ref_offs( -/*=======================*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n) /*!< in: index of the external field */ -{ - ulint field_ref_offs; - ulint local_len; - - ut_a(rec_offs_nth_extern(offsets, n)); - field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len); - ut_a(local_len != UNIV_SQL_NULL); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - - return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE); -} - -/** Gets a pointer to the externally stored part of a field. -@param rec record -@param offsets rec_get_offsets(rec) -@param n index of the externally stored field -@return pointer to the externally stored part */ -#define btr_rec_get_field_ref(rec, offsets, n) \ - ((rec) + btr_rec_get_field_ref_offs(offsets, n)) - -/** Gets the externally stored size of a record, in units of a database page. -@param[in] rec record -@param[in] offsets array returned by rec_get_offsets() -@return externally stored part, in units of a database page */ - -ulint -btr_rec_get_externally_stored_len( - const rec_t* rec, - const ulint* offsets) -{ - ulint n_fields; - ulint total_extern_len = 0; - ulint i; - - ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); - - if (!rec_offs_any_extern(offsets)) { - return(0); - } - - n_fields = rec_offs_n_fields(offsets); - - for (i = 0; i < n_fields; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - ulint extern_len = mach_read_from_4( - btr_rec_get_field_ref(rec, offsets, i) - + BTR_EXTERN_LEN + 4); - - total_extern_len += ut_calc_align(extern_len, - UNIV_PAGE_SIZE); - } - } - - return(total_extern_len / UNIV_PAGE_SIZE); -} - -/*******************************************************************//** -Sets the ownership bit of an externally stored field in a record. */ -static -void -btr_cur_set_ownership_of_extern_field( -/*==================================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed - part will be updated, or NULL */ - rec_t* rec, /*!< in/out: clustered index record */ - dict_index_t* index, /*!< in: index of the page */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint i, /*!< in: field number */ - ibool val, /*!< in: value to set */ - mtr_t* mtr) /*!< in: mtr, or NULL if not logged */ -{ - byte* data; - ulint local_len; - ulint byte_val; - - data = rec_get_nth_field(rec, offsets, i, &local_len); - ut_ad(rec_offs_nth_extern(offsets, i)); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN); - - if (val) { - byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); - } else { -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; - } - - if (page_zip) { - mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val); - page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr); - } else if (mtr != NULL) { - - mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val, - MLOG_1BYTE, mtr); - } else { - mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val); - } - - btr_blob_dbg_owner(rec, index, offsets, i, val); -} - -/*******************************************************************//** -Marks non-updated off-page fields as disowned by this record. The ownership -must be transferred to the updated record which is inserted elsewhere in the -index tree. In purge only the owner of externally stored field is allowed -to free the field. */ -UNIV_INTERN -void -btr_cur_disown_inherited_fields( -/*============================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed - part will be updated, or NULL */ - rec_t* rec, /*!< in/out: record in a clustered index */ - dict_index_t* index, /*!< in: index of the page */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - const upd_t* update, /*!< in: update vector */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); - ut_ad(rec_offs_any_extern(offsets)); - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - if (rec_offs_nth_extern(offsets, i) - && !upd_get_field_by_field_no(update, i)) { - btr_cur_set_ownership_of_extern_field( - page_zip, rec, index, offsets, i, FALSE, mtr); - } - } -} - -/*******************************************************************//** -Marks all extern fields in a record as owned by the record. This function -should be called if the delete mark of a record is removed: a not delete -marked record always owns all its extern fields. */ -static -void -btr_cur_unmark_extern_fields( -/*=========================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed - part will be updated, or NULL */ - rec_t* rec, /*!< in/out: record in a clustered index */ - dict_index_t* index, /*!< in: index of the page */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - mtr_t* mtr) /*!< in: mtr, or NULL if not logged */ -{ - ulint n; - ulint i; - - ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); - n = rec_offs_n_fields(offsets); - - if (!rec_offs_any_extern(offsets)) { - - return; - } - - for (i = 0; i < n; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - btr_cur_set_ownership_of_extern_field( - page_zip, rec, index, offsets, i, TRUE, mtr); - } - } -} - -/*******************************************************************//** -Flags the data tuple fields that are marked as extern storage in the -update vector. We use this function to remember which fields we must -mark as extern storage in a record inserted for an update. -@return number of flagged external columns */ -UNIV_INTERN -ulint -btr_push_update_extern_fields( -/*==========================*/ - dtuple_t* tuple, /*!< in/out: data tuple */ - const upd_t* update, /*!< in: update vector */ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ulint n_pushed = 0; - ulint n; - const upd_field_t* uf; - - uf = update->fields; - n = upd_get_n_fields(update); - - for (; n--; uf++) { - if (dfield_is_ext(&uf->new_val)) { - dfield_t* field - = dtuple_get_nth_field(tuple, uf->field_no); - - if (!dfield_is_ext(field)) { - dfield_set_ext(field); - n_pushed++; - } - - switch (uf->orig_len) { - byte* data; - ulint len; - byte* buf; - case 0: - break; - case BTR_EXTERN_FIELD_REF_SIZE: - /* Restore the original locally stored - part of the column. In the undo log, - InnoDB writes a longer prefix of externally - stored columns, so that column prefixes - in secondary indexes can be reconstructed. */ - dfield_set_data(field, (byte*) dfield_get_data(field) - + dfield_get_len(field) - - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - dfield_set_ext(field); - break; - default: - /* Reconstruct the original locally - stored part of the column. The data - will have to be copied. */ - ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE); - - data = (byte*) dfield_get_data(field); - len = dfield_get_len(field); - - buf = (byte*) mem_heap_alloc(heap, - uf->orig_len); - /* Copy the locally stored prefix. */ - memcpy(buf, data, - uf->orig_len - - BTR_EXTERN_FIELD_REF_SIZE); - /* Copy the BLOB pointer. */ - memcpy(buf + uf->orig_len - - BTR_EXTERN_FIELD_REF_SIZE, - data + len - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - - dfield_set_data(field, buf, uf->orig_len); - dfield_set_ext(field); - } - } - } - - return(n_pushed); -} - -/*******************************************************************//** -Returns the length of a BLOB part stored on the header page. -@return part length */ -static -ulint -btr_blob_get_part_len( -/*==================*/ - const byte* blob_header) /*!< in: blob header */ -{ - return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN)); -} - -/*******************************************************************//** -Returns the page number where the next BLOB part is stored. -@return page number or FIL_NULL if no more pages */ -static -ulint -btr_blob_get_next_page_no( -/*======================*/ - const byte* blob_header) /*!< in: blob header */ -{ - return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO)); -} - -/*******************************************************************//** -Deallocate a buffer block that was reserved for a BLOB part. */ -static -void -btr_blob_free( -/*==========*/ - buf_block_t* block, /*!< in: buffer block */ - ibool all, /*!< in: TRUE=remove also the compressed page - if there is one */ - mtr_t* mtr) /*!< in: mini-transaction to commit */ -{ - buf_pool_t* buf_pool = buf_pool_from_block(block); - ulint space = buf_block_get_space(block); - ulint page_no = buf_block_get_page_no(block); - bool freed = false; - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - - mtr_commit(mtr); - - mutex_enter(&buf_pool->LRU_list_mutex); - mutex_enter(&block->mutex); - - /* Only free the block if it is still allocated to - the same file page. */ - - if (buf_block_get_state(block) - == BUF_BLOCK_FILE_PAGE - && buf_block_get_space(block) == space - && buf_block_get_page_no(block) == page_no) { - - freed = buf_LRU_free_page(&block->page, all); - - if (!freed && all && block->page.zip.data - /* Now, buf_LRU_free_page() may release mutexes - temporarily */ - && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE - && buf_block_get_space(block) == space - && buf_block_get_page_no(block) == page_no) { - - /* Attempt to deallocate the uncompressed page - if the whole block cannot be deallocted. */ - freed = buf_LRU_free_page(&block->page, false); - } - } - - if (!freed) { - mutex_exit(&buf_pool->LRU_list_mutex); - } - - mutex_exit(&block->mutex); -} - -/*******************************************************************//** -Stores the fields in big_rec_vec to the tablespace and puts pointers to -them in rec. The extern flags in rec will have to be set beforehand. -The fields are stored on pages allocated from leaf node -file segment of the index tree. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE or DB_TOO_BIG_FOR_REDO */ -UNIV_INTERN -dberr_t -btr_store_big_rec_extern_fields( -/*============================*/ - dict_index_t* index, /*!< in: index of rec; the index tree - MUST be X-latched */ - buf_block_t* rec_block, /*!< in/out: block containing rec */ - rec_t* rec, /*!< in/out: record */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index); - the "external storage" flags in offsets - will not correspond to rec when - this function returns */ - const big_rec_t*big_rec_vec, /*!< in: vector containing fields - to be stored externally */ - mtr_t* btr_mtr, /*!< in: mtr containing the - latches to the clustered index */ - enum blob_op op) /*! in: operation code */ -{ - ulint rec_page_no; - byte* field_ref; - ulint extern_len; - ulint store_len; - ulint page_no; - ulint space_id; - ulint zip_size; - ulint prev_page_no; - ulint hint_page_no; - ulint i; - mtr_t mtr; - mtr_t* alloc_mtr; - mem_heap_t* heap = NULL; - page_zip_des_t* page_zip; - z_stream c_stream; - buf_block_t** freed_pages = NULL; - ulint n_freed_pages = 0; - dberr_t error = DB_SUCCESS; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(rec_offs_any_extern(offsets)); - ut_ad(mtr_memo_contains(btr_mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(buf_block_get_frame(rec_block) == page_align(rec)); - ut_a(dict_index_is_clust(index)); - - page_zip = buf_block_get_page_zip(rec_block); - ut_a(dict_table_zip_size(index->table) - == buf_block_get_zip_size(rec_block)); - - space_id = buf_block_get_space(rec_block); - zip_size = buf_block_get_zip_size(rec_block); - rec_page_no = buf_block_get_page_no(rec_block); - ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX); - - error = btr_check_blob_limit(big_rec_vec); - - if (error != DB_SUCCESS) { - ut_ad(op == BTR_STORE_INSERT); - return(error); - } - - if (page_zip) { - int err; - - /* Zlib deflate needs 128 kilobytes for the default - window size, plus 512 << memLevel, plus a few - kilobytes for small objects. We use reduced memLevel - to limit the memory consumption, and preallocate the - heap, hoping to avoid memory fragmentation. */ - heap = mem_heap_create(250000); - page_zip_set_alloc(&c_stream, heap); - - err = deflateInit2(&c_stream, page_zip_level, - Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY); - ut_a(err == Z_OK); - } - - if (btr_blob_op_is_update(op)) { - /* Avoid reusing pages that have been previously freed - in btr_mtr. */ - if (btr_mtr->n_freed_pages) { - if (heap == NULL) { - heap = mem_heap_create( - btr_mtr->n_freed_pages - * sizeof *freed_pages); - } - - freed_pages = static_cast<buf_block_t**>( - mem_heap_alloc( - heap, - btr_mtr->n_freed_pages - * sizeof *freed_pages)); - n_freed_pages = 0; - } - - /* Because btr_mtr will be committed after mtr, it is - possible that the tablespace has been extended when - the B-tree record was updated or inserted, or it will - be extended while allocating pages for big_rec. - - TODO: In mtr (not btr_mtr), write a redo log record - about extending the tablespace to its current size, - and remember the current size. Whenever the tablespace - grows as pages are allocated, write further redo log - records to mtr. (Currently tablespace extension is not - covered by the redo log. If it were, the record would - only be written to btr_mtr, which is committed after - mtr.) */ - alloc_mtr = btr_mtr; - } else { - /* Use the local mtr for allocations. */ - alloc_mtr = &mtr; - } - -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - /* All pointers to externally stored columns in the record - must either be zero or they must be pointers to inherited - columns, owned by this record or an earlier record version. */ - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - if (!rec_offs_nth_extern(offsets, i)) { - continue; - } - field_ref = btr_rec_get_field_ref(rec, offsets, i); - - ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); - /* Either this must be an update in place, - or the BLOB must be inherited, or the BLOB pointer - must be zero (will be written in this function). */ - ut_a(op == BTR_STORE_UPDATE - || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG) - || !memcmp(field_ref, field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE)); - } -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - /* We have to create a file segment to the tablespace - for each field and put the pointer to the field in rec */ - - for (i = 0; i < big_rec_vec->n_fields; i++) { - field_ref = btr_rec_get_field_ref( - rec, offsets, big_rec_vec->fields[i].field_no); -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - /* A zero BLOB pointer should have been initially inserted. */ - ut_a(!memcmp(field_ref, field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - extern_len = big_rec_vec->fields[i].len; - UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data, - extern_len); - - ut_a(extern_len > 0); - - prev_page_no = FIL_NULL; - - if (page_zip) { - int err = deflateReset(&c_stream); - ut_a(err == Z_OK); - - c_stream.next_in = (Bytef*) - big_rec_vec->fields[i].data; - c_stream.avail_in = static_cast<uInt>(extern_len); - } - - for (;;) { - buf_block_t* block; - page_t* page; - - mtr_start(&mtr); - - if (prev_page_no == FIL_NULL) { - hint_page_no = 1 + rec_page_no; - } else { - hint_page_no = prev_page_no + 1; - } - -alloc_another: - block = btr_page_alloc(index, hint_page_no, - FSP_NO_DIR, 0, alloc_mtr, &mtr); - if (UNIV_UNLIKELY(block == NULL)) { - mtr_commit(&mtr); - error = DB_OUT_OF_FILE_SPACE; - goto func_exit; - } - - if (rw_lock_get_x_lock_count(&block->lock) > 1) { - /* This page must have been freed in - btr_mtr previously. Put it aside, and - allocate another page for the BLOB data. */ - ut_ad(alloc_mtr == btr_mtr); - ut_ad(btr_blob_op_is_update(op)); - ut_ad(n_freed_pages < btr_mtr->n_freed_pages); - freed_pages[n_freed_pages++] = block; - goto alloc_another; - } - - page_no = buf_block_get_page_no(block); - page = buf_block_get_frame(block); - - if (prev_page_no != FIL_NULL) { - buf_block_t* prev_block; - page_t* prev_page; - - prev_block = buf_page_get(space_id, zip_size, - prev_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(prev_block, - SYNC_EXTERN_STORAGE); - prev_page = buf_block_get_frame(prev_block); - - if (page_zip) { - mlog_write_ulint( - prev_page + FIL_PAGE_NEXT, - page_no, MLOG_4BYTES, &mtr); - memcpy(buf_block_get_page_zip( - prev_block) - ->data + FIL_PAGE_NEXT, - prev_page + FIL_PAGE_NEXT, 4); - } else { - mlog_write_ulint( - prev_page + FIL_PAGE_DATA - + BTR_BLOB_HDR_NEXT_PAGE_NO, - page_no, MLOG_4BYTES, &mtr); - } - - } else if (dict_index_is_online_ddl(index)) { - row_log_table_blob_alloc(index, page_no); - } - - if (page_zip) { - int err; - page_zip_des_t* blob_page_zip; - - /* Write FIL_PAGE_TYPE to the redo log - separately, before logging any other - changes to the page, so that the debug - assertions in - recv_parse_or_apply_log_rec_body() can - be made simpler. Before InnoDB Plugin - 1.0.4, the initialization of - FIL_PAGE_TYPE was logged as part of - the mlog_log_string() below. */ - - mlog_write_ulint(page + FIL_PAGE_TYPE, - prev_page_no == FIL_NULL - ? FIL_PAGE_TYPE_ZBLOB - : FIL_PAGE_TYPE_ZBLOB2, - MLOG_2BYTES, &mtr); - - c_stream.next_out = page - + FIL_PAGE_DATA; - c_stream.avail_out - = static_cast<uInt>(page_zip_get_size(page_zip)) - - FIL_PAGE_DATA; - - err = deflate(&c_stream, Z_FINISH); - ut_a(err == Z_OK || err == Z_STREAM_END); - ut_a(err == Z_STREAM_END - || c_stream.avail_out == 0); - - /* Write the "next BLOB page" pointer */ - mlog_write_ulint(page + FIL_PAGE_NEXT, - FIL_NULL, MLOG_4BYTES, &mtr); - /* Initialize the unused "prev page" pointer */ - mlog_write_ulint(page + FIL_PAGE_PREV, - FIL_NULL, MLOG_4BYTES, &mtr); - /* Write a back pointer to the record - into the otherwise unused area. This - information could be useful in - debugging. Later, we might want to - implement the possibility to relocate - BLOB pages. Then, we would need to be - able to adjust the BLOB pointer in the - record. We do not store the heap - number of the record, because it can - change in page_zip_reorganize() or - btr_page_reorganize(). However, also - the page number of the record may - change when B-tree nodes are split or - merged. */ - mlog_write_ulint(page - + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - space_id, - MLOG_4BYTES, &mtr); - mlog_write_ulint(page - + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, - rec_page_no, - MLOG_4BYTES, &mtr); - - /* Zero out the unused part of the page. */ - memset(page + page_zip_get_size(page_zip) - - c_stream.avail_out, - 0, c_stream.avail_out); - mlog_log_string(page - + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - page_zip_get_size(page_zip) - - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - &mtr); - /* Copy the page to compressed storage, - because it will be flushed to disk - from there. */ - blob_page_zip = buf_block_get_page_zip(block); - ut_ad(blob_page_zip); - ut_ad(page_zip_get_size(blob_page_zip) - == page_zip_get_size(page_zip)); - memcpy(blob_page_zip->data, page, - page_zip_get_size(page_zip)); - - if (err == Z_OK && prev_page_no != FIL_NULL) { - - goto next_zip_page; - } - - if (alloc_mtr == &mtr) { - rec_block = buf_page_get( - space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level( - rec_block, - SYNC_NO_ORDER_CHECK); - } - - if (err == Z_STREAM_END) { - mach_write_to_4(field_ref - + BTR_EXTERN_LEN, 0); - mach_write_to_4(field_ref - + BTR_EXTERN_LEN + 4, - c_stream.total_in); - } else { - memset(field_ref + BTR_EXTERN_LEN, - 0, 8); - } - - if (prev_page_no == FIL_NULL) { - btr_blob_dbg_add_blob( - rec, big_rec_vec->fields[i] - .field_no, page_no, index, - "store"); - - mach_write_to_4(field_ref - + BTR_EXTERN_SPACE_ID, - space_id); - - mach_write_to_4(field_ref - + BTR_EXTERN_PAGE_NO, - page_no); - - mach_write_to_4(field_ref - + BTR_EXTERN_OFFSET, - FIL_PAGE_NEXT); - } - - page_zip_write_blob_ptr( - page_zip, rec, index, offsets, - big_rec_vec->fields[i].field_no, - alloc_mtr); - -next_zip_page: - prev_page_no = page_no; - - /* Commit mtr and release the - uncompressed page frame to save memory. */ - btr_blob_free(block, FALSE, &mtr); - - if (err == Z_STREAM_END) { - break; - } - } else { - mlog_write_ulint(page + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_BLOB, - MLOG_2BYTES, &mtr); - - if (extern_len > (UNIV_PAGE_SIZE - - FIL_PAGE_DATA - - BTR_BLOB_HDR_SIZE - - FIL_PAGE_DATA_END)) { - store_len = UNIV_PAGE_SIZE - - FIL_PAGE_DATA - - BTR_BLOB_HDR_SIZE - - FIL_PAGE_DATA_END; - } else { - store_len = extern_len; - } - - mlog_write_string(page + FIL_PAGE_DATA - + BTR_BLOB_HDR_SIZE, - (const byte*) - big_rec_vec->fields[i].data - + big_rec_vec->fields[i].len - - extern_len, - store_len, &mtr); - mlog_write_ulint(page + FIL_PAGE_DATA - + BTR_BLOB_HDR_PART_LEN, - store_len, MLOG_4BYTES, &mtr); - mlog_write_ulint(page + FIL_PAGE_DATA - + BTR_BLOB_HDR_NEXT_PAGE_NO, - FIL_NULL, MLOG_4BYTES, &mtr); - - extern_len -= store_len; - - if (alloc_mtr == &mtr) { - rec_block = buf_page_get( - space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level( - rec_block, - SYNC_NO_ORDER_CHECK); - } - - mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0, - MLOG_4BYTES, alloc_mtr); - mlog_write_ulint(field_ref - + BTR_EXTERN_LEN + 4, - big_rec_vec->fields[i].len - - extern_len, - MLOG_4BYTES, alloc_mtr); - - if (prev_page_no == FIL_NULL) { - btr_blob_dbg_add_blob( - rec, big_rec_vec->fields[i] - .field_no, page_no, index, - "store"); - - mlog_write_ulint(field_ref - + BTR_EXTERN_SPACE_ID, - space_id, MLOG_4BYTES, - alloc_mtr); - - mlog_write_ulint(field_ref - + BTR_EXTERN_PAGE_NO, - page_no, MLOG_4BYTES, - alloc_mtr); - - mlog_write_ulint(field_ref - + BTR_EXTERN_OFFSET, - FIL_PAGE_DATA, - MLOG_4BYTES, - alloc_mtr); - } - - prev_page_no = page_no; - - mtr_commit(&mtr); - - if (extern_len == 0) { - break; - } - } - } - - DBUG_EXECUTE_IF("btr_store_big_rec_extern", - error = DB_OUT_OF_FILE_SPACE; - goto func_exit;); - } - -func_exit: - if (page_zip) { - deflateEnd(&c_stream); - } - - if (n_freed_pages) { - ulint i; - - ut_ad(alloc_mtr == btr_mtr); - ut_ad(btr_blob_op_is_update(op)); - - for (i = 0; i < n_freed_pages; i++) { - btr_page_free_low(index, freed_pages[i], 0, true, alloc_mtr); - } - } - - if (heap != NULL) { - mem_heap_free(heap); - } - -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - /* All pointers to externally stored columns in the record - must be valid. */ - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - if (!rec_offs_nth_extern(offsets, i)) { - continue; - } - - field_ref = btr_rec_get_field_ref(rec, offsets, i); - - /* The pointer must not be zero if the operation - succeeded. */ - ut_a(0 != memcmp(field_ref, field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE) - || error != DB_SUCCESS); - /* The column must not be disowned by this record. */ - ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)); - } -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - return(error); -} - -/*******************************************************************//** -Check the FIL_PAGE_TYPE on an uncompressed BLOB page. */ -static -void -btr_check_blob_fil_page_type( -/*=========================*/ - ulint space_id, /*!< in: space id */ - ulint page_no, /*!< in: page number */ - const page_t* page, /*!< in: page */ - ibool read) /*!< in: TRUE=read, FALSE=purge */ -{ - ulint type = fil_page_get_type(page); - - ut_a(space_id == page_get_space_id(page)); - ut_a(page_no == page_get_page_no(page)); - - if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) { - ulint flags = fil_space_get_flags(space_id); - -#ifndef UNIV_DEBUG /* Improve debug test coverage */ - if (dict_tf_get_format(flags) == UNIV_FORMAT_A) { - /* Old versions of InnoDB did not initialize - FIL_PAGE_TYPE on BLOB pages. Do not print - anything about the type mismatch when reading - a BLOB page that is in Antelope format.*/ - return; - } -#endif /* !UNIV_DEBUG */ - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: FIL_PAGE_TYPE=%lu" - " on BLOB %s space %lu page %lu flags %lx\n", - (ulong) type, read ? "read" : "purge", - (ulong) space_id, (ulong) page_no, (ulong) flags); - ut_error; - } -} - -/*******************************************************************//** -Frees the space in an externally stored field to the file space -management if the field in data is owned by the externally stored field, -in a rollback we may have the additional condition that the field must -not be inherited. */ -UNIV_INTERN -void -btr_free_externally_stored_field( -/*=============================*/ - dict_index_t* index, /*!< in: index of the data, the index - tree MUST be X-latched; if the tree - height is 1, then also the root page - must be X-latched! (this is relevant - in the case this function is called - from purge where 'data' is located on - an undo log page, not an index - page) */ - byte* field_ref, /*!< in/out: field reference */ - const rec_t* rec, /*!< in: record containing field_ref, for - page_zip_write_blob_ptr(), or NULL */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index), - or NULL */ - page_zip_des_t* page_zip, /*!< in: compressed page corresponding - to rec, or NULL if rec == NULL */ - ulint i, /*!< in: field number of field_ref; - ignored if rec == NULL */ - enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ - mtr_t* local_mtr MY_ATTRIBUTE((unused))) /*!< in: mtr - containing the latch to data an an - X-latch to the index tree */ -{ - page_t* page; - const ulint space_id = mach_read_from_4( - field_ref + BTR_EXTERN_SPACE_ID); - const ulint start_page = mach_read_from_4( - field_ref + BTR_EXTERN_PAGE_NO); - ulint rec_zip_size = dict_table_zip_size(index->table); - ulint ext_zip_size; - ulint page_no; - ulint next_page_no; - mtr_t mtr; - - ut_ad(dict_index_is_clust(index)); - ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains_page(local_mtr, field_ref, - MTR_MEMO_PAGE_X_FIX)); - ut_ad(!rec || rec_offs_validate(rec, index, offsets)); - ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i)); - - if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE))) { - /* In the rollback, we may encounter a clustered index - record with some unwritten off-page columns. There is - nothing to free then. */ - if (rb_ctx == RB_NONE) { - char buf[3 * 512]; - char *bufend; - ulint ispace = dict_index_get_space(index); - bufend = innobase_convert_name(buf, sizeof buf, - index->name, strlen(index->name), - NULL, - FALSE); - buf[bufend - buf]='\0'; - ib_logf(IB_LOG_LEVEL_ERROR, "Unwritten off-page columns in " - "rollback context %d. Table %s index %s space_id %lu " - "index space %lu.", - rb_ctx, index->table->name, buf, space_id, ispace); - } - - ut_a(rb_ctx != RB_NONE); - return; - } - - ut_ad(space_id == index->space); - - if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) { - ext_zip_size = fil_space_get_zip_size(space_id); - /* This must be an undo log record in the system tablespace, - that is, in row_purge_upd_exist_or_extern(). - Currently, externally stored records are stored in the - same tablespace as the referring records. */ - ut_ad(!page_get_space_id(page_align(field_ref))); - ut_ad(!rec); - ut_ad(!page_zip); - } else { - ext_zip_size = rec_zip_size; - } - - if (!rec) { - /* This is a call from row_purge_upd_exist_or_extern(). */ - ut_ad(!page_zip); - rec_zip_size = 0; - } - -#ifdef UNIV_BLOB_DEBUG - if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG) - && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG) - && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) { - /* This off-page column will be freed. - Check that no references remain. */ - - btr_blob_dbg_t b; - - b.blob_page_no = start_page; - - if (rec) { - /* Remove the reference from the record to the - BLOB. If the BLOB were not freed, the - reference would be removed when the record is - removed. Freeing the BLOB will overwrite the - BTR_EXTERN_PAGE_NO in the field_ref of the - record with FIL_NULL, which would make the - btr_blob_dbg information inconsistent with the - record. */ - b.ref_page_no = page_get_page_no(page_align(rec)); - b.ref_heap_no = page_rec_get_heap_no(rec); - b.ref_field_no = i; - btr_blob_dbg_rbt_delete(index, &b, "free"); - } - - btr_blob_dbg_assert_empty(index, b.blob_page_no); - } -#endif /* UNIV_BLOB_DEBUG */ - - for (;;) { -#ifdef UNIV_SYNC_DEBUG - buf_block_t* rec_block; -#endif /* UNIV_SYNC_DEBUG */ - buf_block_t* ext_block; - - mtr_start(&mtr); - -#ifdef UNIV_SYNC_DEBUG - rec_block = -#endif /* UNIV_SYNC_DEBUG */ - buf_page_get(page_get_space_id(page_align(field_ref)), - rec_zip_size, - page_get_page_no(page_align(field_ref)), - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK); - page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO); - - if (/* There is no external storage data */ - page_no == FIL_NULL - /* This field does not own the externally stored field */ - || (mach_read_from_1(field_ref + BTR_EXTERN_LEN) - & BTR_EXTERN_OWNER_FLAG) - /* Rollback and inherited field */ - || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY) - && (mach_read_from_1(field_ref + BTR_EXTERN_LEN) - & BTR_EXTERN_INHERITED_FLAG))) { - - /* Do not free */ - mtr_commit(&mtr); - - return; - } - - if (page_no == start_page && dict_index_is_online_ddl(index)) { - row_log_table_blob_free(index, start_page); - } - - ext_block = buf_page_get(space_id, ext_zip_size, page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE); - page = buf_block_get_frame(ext_block); - - if (ext_zip_size) { - /* Note that page_zip will be NULL - in row_purge_upd_exist_or_extern(). */ - switch (fil_page_get_type(page)) { - case FIL_PAGE_TYPE_ZBLOB: - case FIL_PAGE_TYPE_ZBLOB2: - break; - default: - ut_error; - } - next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT); - - btr_page_free_low(index, ext_block, 0, true, &mtr); - - if (page_zip != NULL) { - mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO, - next_page_no); - mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4, - 0); - page_zip_write_blob_ptr(page_zip, rec, index, - offsets, i, &mtr); - } else { - mlog_write_ulint(field_ref - + BTR_EXTERN_PAGE_NO, - next_page_no, - MLOG_4BYTES, &mtr); - mlog_write_ulint(field_ref - + BTR_EXTERN_LEN + 4, 0, - MLOG_4BYTES, &mtr); - } - } else { - ut_a(!page_zip); - btr_check_blob_fil_page_type(space_id, page_no, page, - FALSE); - - next_page_no = mach_read_from_4( - page + FIL_PAGE_DATA - + BTR_BLOB_HDR_NEXT_PAGE_NO); - - /* We must supply the page level (= 0) as an argument - because we did not store it on the page (we save the - space overhead from an index page header. */ - - btr_page_free_low(index, ext_block, 0, true, &mtr); - - mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO, - next_page_no, - MLOG_4BYTES, &mtr); - /* Zero out the BLOB length. If the server - crashes during the execution of this function, - trx_rollback_or_clean_all_recovered() could - dereference the half-deleted BLOB, fetching a - wrong prefix for the BLOB. */ - mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4, - 0, - MLOG_4BYTES, &mtr); - } - - /* Commit mtr and release the BLOB block to save memory. */ - btr_blob_free(ext_block, TRUE, &mtr); - } -} - -/***********************************************************//** -Frees the externally stored fields for a record. */ -static -void -btr_rec_free_externally_stored_fields( -/*==================================*/ - dict_index_t* index, /*!< in: index of the data, the index - tree MUST be X-latched */ - rec_t* rec, /*!< in/out: record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed - part will be updated, or NULL */ - enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ - mtr_t* mtr) /*!< in: mini-transaction handle which contains - an X-latch to record page and to the index - tree */ -{ - ulint n_fields; - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)); - /* Free possible externally stored fields in the record */ - - ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets)); - n_fields = rec_offs_n_fields(offsets); - - for (i = 0; i < n_fields; i++) { - if (rec_offs_nth_extern(offsets, i)) { - btr_free_externally_stored_field( - index, btr_rec_get_field_ref(rec, offsets, i), - rec, offsets, page_zip, i, rb_ctx, mtr); - } - } -} - -/***********************************************************//** -Frees the externally stored fields for a record, if the field is mentioned -in the update vector. */ -static -void -btr_rec_free_updated_extern_fields( -/*===============================*/ - dict_index_t* index, /*!< in: index of rec; the index tree MUST be - X-latched */ - rec_t* rec, /*!< in/out: record */ - page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed - part will be updated, or NULL */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - const upd_t* update, /*!< in: update vector */ - enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ - mtr_t* mtr) /*!< in: mini-transaction handle which contains - an X-latch to record page and to the tree */ -{ - ulint n_fields; - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)); - - /* Free possible externally stored fields in the record */ - - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { - const upd_field_t* ufield = upd_get_nth_field(update, i); - - if (rec_offs_nth_extern(offsets, ufield->field_no)) { - ulint len; - byte* data = rec_get_nth_field( - rec, offsets, ufield->field_no, &len); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - - btr_free_externally_stored_field( - index, data + len - BTR_EXTERN_FIELD_REF_SIZE, - rec, offsets, page_zip, - ufield->field_no, rb_ctx, mtr); - } - } -} - -/*******************************************************************//** -Copies the prefix of an uncompressed BLOB. The clustered index record -that points to this BLOB must be protected by a lock or a page latch. -@return number of bytes written to buf */ -static -ulint -btr_copy_blob_prefix( -/*=================*/ - byte* buf, /*!< out: the externally stored part of - the field, or a prefix of it */ - ulint len, /*!< in: length of buf, in bytes */ - ulint space_id,/*!< in: space id of the BLOB pages */ - ulint page_no,/*!< in: page number of the first BLOB page */ - ulint offset, /*!< in: offset on the first BLOB page */ - trx_t* trx) /*!< in: transaction handle */ -{ - ulint copied_len = 0; - - for (;;) { - mtr_t mtr; - buf_block_t* block; - const page_t* page; - const byte* blob_header; - ulint part_len; - ulint copy_len; - - mtr_start_trx(&mtr, trx); - - block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr); - buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE); - page = buf_block_get_frame(block); - - btr_check_blob_fil_page_type(space_id, page_no, page, TRUE); - - blob_header = page + offset; - part_len = btr_blob_get_part_len(blob_header); - copy_len = ut_min(part_len, len - copied_len); - - memcpy(buf + copied_len, - blob_header + BTR_BLOB_HDR_SIZE, copy_len); - copied_len += copy_len; - - page_no = btr_blob_get_next_page_no(blob_header); - - mtr_commit(&mtr); - - if (page_no == FIL_NULL || copy_len != part_len) { - UNIV_MEM_ASSERT_RW(buf, copied_len); - return(copied_len); - } - - /* On other BLOB pages except the first the BLOB header - always is at the page data start: */ - - offset = FIL_PAGE_DATA; - - ut_ad(copied_len <= len); - } -} - -/*******************************************************************//** -Copies the prefix of a compressed BLOB. The clustered index record -that points to this BLOB must be protected by a lock or a page latch. -@return number of bytes written to buf */ -static -ulint -btr_copy_zblob_prefix( -/*==================*/ - byte* buf, /*!< out: the externally stored part of - the field, or a prefix of it */ - ulint len, /*!< in: length of buf, in bytes */ - ulint zip_size,/*!< in: compressed BLOB page size */ - ulint space_id,/*!< in: space id of the BLOB pages */ - ulint page_no,/*!< in: page number of the first BLOB page */ - ulint offset) /*!< in: offset on the first BLOB page */ -{ - ulint page_type = FIL_PAGE_TYPE_ZBLOB; - mem_heap_t* heap; - int err; - z_stream d_stream; - - d_stream.next_out = buf; - d_stream.avail_out = static_cast<uInt>(len); - d_stream.next_in = Z_NULL; - d_stream.avail_in = 0; - - /* Zlib inflate needs 32 kilobytes for the default - window size, plus a few kilobytes for small objects. */ - heap = mem_heap_create(40000); - page_zip_set_alloc(&d_stream, heap); - - ut_ad(ut_is_2pow(zip_size)); - ut_ad(zip_size >= UNIV_ZIP_SIZE_MIN); - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - ut_ad(space_id); - - err = inflateInit(&d_stream); - ut_a(err == Z_OK); - - for (;;) { - buf_page_t* bpage; - ulint next_page_no; - - /* There is no latch on bpage directly. Instead, - bpage is protected by the B-tree page latch that - is being held on the clustered index record, or, - in row_merge_copy_blobs(), by an exclusive table lock. */ - bpage = buf_page_get_zip(space_id, zip_size, page_no); - - if (UNIV_UNLIKELY(!bpage)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Cannot load" - " compressed BLOB" - " page %lu space %lu\n", - (ulong) page_no, (ulong) space_id); - goto func_exit; - } - - if (UNIV_UNLIKELY - (fil_page_get_type(bpage->zip.data) != page_type)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Unexpected type %lu of" - " compressed BLOB" - " page %lu space %lu\n", - (ulong) fil_page_get_type(bpage->zip.data), - (ulong) page_no, (ulong) space_id); - ut_ad(0); - goto end_of_blob; - } - - next_page_no = mach_read_from_4(bpage->zip.data + offset); - - if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) { - /* When the BLOB begins at page header, - the compressed data payload does not - immediately follow the next page pointer. */ - offset = FIL_PAGE_DATA; - } else { - offset += 4; - } - - d_stream.next_in = bpage->zip.data + offset; - d_stream.avail_in = static_cast<uInt>(zip_size - offset); - - err = inflate(&d_stream, Z_NO_FLUSH); - switch (err) { - case Z_OK: - if (!d_stream.avail_out) { - goto end_of_blob; - } - break; - case Z_STREAM_END: - if (next_page_no == FIL_NULL) { - goto end_of_blob; - } - /* fall through */ - default: -inflate_error: - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: inflate() of" - " compressed BLOB" - " page %lu space %lu returned %d (%s)\n", - (ulong) page_no, (ulong) space_id, - err, d_stream.msg); - case Z_BUF_ERROR: - goto end_of_blob; - } - - if (next_page_no == FIL_NULL) { - if (!d_stream.avail_in) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: unexpected end of" - " compressed BLOB" - " page %lu space %lu\n", - (ulong) page_no, - (ulong) space_id); - } else { - err = inflate(&d_stream, Z_FINISH); - switch (err) { - case Z_STREAM_END: - case Z_BUF_ERROR: - break; - default: - goto inflate_error; - } - } - -end_of_blob: - buf_page_release_zip(bpage); - goto func_exit; - } - - buf_page_release_zip(bpage); - - /* On other BLOB pages except the first - the BLOB header always is at the page header: */ - - page_no = next_page_no; - offset = FIL_PAGE_NEXT; - page_type = FIL_PAGE_TYPE_ZBLOB2; - } - -func_exit: - inflateEnd(&d_stream); - mem_heap_free(heap); - UNIV_MEM_ASSERT_RW(buf, d_stream.total_out); - return(d_stream.total_out); -} - -/*******************************************************************//** -Copies the prefix of an externally stored field of a record. The -clustered index record that points to this BLOB must be protected by a -lock or a page latch. -@return number of bytes written to buf */ -static -ulint -btr_copy_externally_stored_field_prefix_low( -/*========================================*/ - byte* buf, /*!< out: the externally stored part of - the field, or a prefix of it */ - ulint len, /*!< in: length of buf, in bytes */ - ulint zip_size,/*!< in: nonzero=compressed BLOB page size, - zero for uncompressed BLOBs */ - ulint space_id,/*!< in: space id of the first BLOB page */ - ulint page_no,/*!< in: page number of the first BLOB page */ - ulint offset, /*!< in: offset on the first BLOB page */ - trx_t* trx) /*!< in: transaction handle */ -{ - if (UNIV_UNLIKELY(len == 0)) { - return(0); - } - - if (zip_size) { - return(btr_copy_zblob_prefix(buf, len, zip_size, - space_id, page_no, offset)); - } else { - return(btr_copy_blob_prefix(buf, len, space_id, - page_no, offset, trx)); - } -} - -/*******************************************************************//** -Copies the prefix of an externally stored field of a record. The -clustered index record must be protected by a lock or a page latch. -@return the length of the copied field, or 0 if the column was being -or has been deleted */ -UNIV_INTERN -ulint -btr_copy_externally_stored_field_prefix( -/*====================================*/ - byte* buf, /*!< out: the field, or a prefix of it */ - ulint len, /*!< in: length of buf, in bytes */ - ulint zip_size,/*!< in: nonzero=compressed BLOB page size, - zero for uncompressed BLOBs */ - const byte* data, /*!< in: 'internally' stored part of the - field containing also the reference to - the external part; must be protected by - a lock or a page latch */ - ulint local_len,/*!< in: length of data, in bytes */ - trx_t* trx) /*!< in: transaction handle */ -{ - ulint space_id; - ulint page_no; - ulint offset; - - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - if (UNIV_UNLIKELY(local_len >= len)) { - memcpy(buf, data, len); - return(len); - } - - memcpy(buf, data, local_len); - data += local_len; - - ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)); - - if (!mach_read_from_4(data + BTR_EXTERN_LEN + 4)) { - /* The externally stored part of the column has been - (partially) deleted. Signal the half-deleted BLOB - to the caller. */ - - return(0); - } - - space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID); - - page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO); - - offset = mach_read_from_4(data + BTR_EXTERN_OFFSET); - - return(local_len - + btr_copy_externally_stored_field_prefix_low(buf + local_len, - len - local_len, - zip_size, - space_id, page_no, - offset, trx)); -} - -/*******************************************************************//** -Copies an externally stored field of a record to mem heap. The -clustered index record must be protected by a lock or a page latch. -@return the whole field copied to heap */ -UNIV_INTERN -byte* -btr_copy_externally_stored_field( -/*=============================*/ - ulint* len, /*!< out: length of the whole field */ - const byte* data, /*!< in: 'internally' stored part of the - field containing also the reference to - the external part; must be protected by - a lock or a page latch */ - ulint zip_size,/*!< in: nonzero=compressed BLOB page size, - zero for uncompressed BLOBs */ - ulint local_len,/*!< in: length of data */ - mem_heap_t* heap, /*!< in: mem heap */ - trx_t* trx) /*!< in: transaction handle */ -{ - ulint space_id; - ulint page_no; - ulint offset; - ulint extern_len; - byte* buf; - - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID); - - page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO); - - offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET); - - /* Currently a BLOB cannot be bigger than 4 GB; we - leave the 4 upper bytes in the length field unused */ - - extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4); - - buf = (byte*) mem_heap_alloc(heap, local_len + extern_len); - - memcpy(buf, data, local_len); - *len = local_len - + btr_copy_externally_stored_field_prefix_low(buf + local_len, - extern_len, - zip_size, - space_id, - page_no, offset, - trx); - - return(buf); -} - -/*******************************************************************//** -Copies an externally stored field of a record to mem heap. -@return the field copied to heap, or NULL if the field is incomplete */ -UNIV_INTERN -byte* -btr_rec_copy_externally_stored_field( -/*=================================*/ - const rec_t* rec, /*!< in: record in a clustered index; - must be protected by a lock or a page latch */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint zip_size,/*!< in: nonzero=compressed BLOB page size, - zero for uncompressed BLOBs */ - ulint no, /*!< in: field number */ - ulint* len, /*!< out: length of the field */ - mem_heap_t* heap, /*!< in: mem heap */ - trx_t* trx) /*!< in: transaction handle */ -{ - ulint local_len; - const byte* data; - - ut_a(rec_offs_nth_extern(offsets, no)); - - /* An externally stored field can contain some initial - data from the field, and in the last 20 bytes it has the - space id, page number, and offset where the rest of the - field data is stored, and the data length in addition to - the data stored locally. We may need to store some data - locally to get the local record length above the 128 byte - limit so that field offsets are stored in two bytes, and - the extern bit is available in those two bytes. */ - - data = rec_get_nth_field(rec, offsets, no, &local_len); - - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - - if (UNIV_UNLIKELY - (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE, - field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { - /* The externally stored field was not written yet. - This record should only be seen by - recv_recovery_rollback_active() or any - TRX_ISO_READ_UNCOMMITTED transactions. */ - return(NULL); - } - - return(btr_copy_externally_stored_field(len, data, - zip_size, local_len, heap, - trx)); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/btr/btr0defragment.cc b/storage/xtradb/btr/btr0defragment.cc deleted file mode 100644 index c2f58a8e1cf..00000000000 --- a/storage/xtradb/btr/btr0defragment.cc +++ /dev/null @@ -1,833 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. -Copyright (C) 2014, 2015, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ -/**************************************************//** -@file btr/btr0defragment.cc -Index defragmentation. - -Created 05/29/2014 Rongrong Zhong -Modified 16/07/2014 Sunguck Lee -Modified 30/07/2014 Jan Lindström jan.lindstrom@mariadb.com -*******************************************************/ - -#include "btr0defragment.h" -#ifndef UNIV_HOTBACKUP -#include "btr0cur.h" -#include "btr0sea.h" -#include "btr0pcur.h" -#include "dict0stats.h" -#include "dict0stats_bg.h" -#include "ibuf0ibuf.h" -#include "lock0lock.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "ut0timer.h" - -#include <list> - -/**************************************************//** -Custom nullptr implementation for under g++ 4.6 -*******************************************************/ -/* -// #pragma once -namespace std -{ - // based on SC22/WG21/N2431 = J16/07-0301 - struct nullptr_t - { - template<typename any> operator any * () const - { - return 0; - } - template<class any, typename T> operator T any:: * () const - { - return 0; - } - -#ifdef _MSC_VER - struct pad {}; - pad __[sizeof(void*)/sizeof(pad)]; -#else - char __[sizeof(void*)]; -#endif -private: - // nullptr_t();// {} - // nullptr_t(const nullptr_t&); - // void operator = (const nullptr_t&); - void operator &() const; - template<typename any> void operator +(any) const - { - // I Love MSVC 2005! - } - template<typename any> void operator -(any) const - { - // I Love MSVC 2005! - } - }; -static const nullptr_t __nullptr = {}; -} - -#ifndef nullptr -#define nullptr std::__nullptr -#endif -*/ -/**************************************************//** -End of Custom nullptr implementation for under g++ 4.6 -*******************************************************/ - -/* When there's no work, either because defragment is disabled, or because no -query is submitted, thread checks state every BTR_DEFRAGMENT_SLEEP_IN_USECS.*/ -#define BTR_DEFRAGMENT_SLEEP_IN_USECS 1000000 -/* Reduce the target page size by this amount when compression failure happens -during defragmentaiton. 512 is chosen because it's a power of 2 and it is about -3% of the page size. When there are compression failures in defragmentation, -our goal is to get a decent defrag ratio with as few compression failure as -possible. From experimentation it seems that reduce the target size by 512 every -time will make sure the page is compressible within a couple of iterations. */ -#define BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE 512 - -/* Work queue for defragmentation. */ -typedef std::list<btr_defragment_item_t*> btr_defragment_wq_t; -static btr_defragment_wq_t btr_defragment_wq; - -/* Mutex protecting the defragmentation work queue.*/ -ib_mutex_t btr_defragment_mutex; -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t btr_defragment_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/* Number of compression failures caused by defragmentation since server -start. */ -ulint btr_defragment_compression_failures = 0; -/* Number of btr_defragment_n_pages calls that altered page but didn't -manage to release any page. */ -ulint btr_defragment_failures = 0; -/* Total number of btr_defragment_n_pages calls that altered page. -The difference between btr_defragment_count and btr_defragment_failures shows -the amount of effort wasted. */ -ulint btr_defragment_count = 0; - -/******************************************************************//** -Constructor for btr_defragment_item_t. */ -btr_defragment_item_t::btr_defragment_item_t( - btr_pcur_t* pcur, - os_event_t event) -{ - this->pcur = pcur; - this->event = event; - this->removed = false; - this->last_processed = 0; -} - -/******************************************************************//** -Destructor for btr_defragment_item_t. */ -btr_defragment_item_t::~btr_defragment_item_t() { - if (this->pcur) { - btr_pcur_free_for_mysql(this->pcur); - } - if (this->event) { - os_event_set(this->event); - } -} - -/******************************************************************//** -Initialize defragmentation. */ -void -btr_defragment_init() -{ - srv_defragment_interval = ut_microseconds_to_timer( - (ulonglong) (1000000.0 / srv_defragment_frequency)); - mutex_create(btr_defragment_mutex_key, &btr_defragment_mutex, - SYNC_ANY_LATCH); -} - -/******************************************************************//** -Shutdown defragmentation. Release all resources. */ -void -btr_defragment_shutdown() -{ - mutex_enter(&btr_defragment_mutex); - list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin(); - while(iter != btr_defragment_wq.end()) { - btr_defragment_item_t* item = *iter; - iter = btr_defragment_wq.erase(iter); - delete item; - } - mutex_exit(&btr_defragment_mutex); - mutex_free(&btr_defragment_mutex); -} - - -/******************************************************************//** -Functions used by the query threads: btr_defragment_xxx_index -Query threads find/add/remove index. */ -/******************************************************************//** -Check whether the given index is in btr_defragment_wq. We use index->id -to identify indices. */ -bool -btr_defragment_find_index( - dict_index_t* index) /*!< Index to find. */ -{ - mutex_enter(&btr_defragment_mutex); - for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin(); - iter != btr_defragment_wq.end(); - ++iter) { - btr_defragment_item_t* item = *iter; - btr_pcur_t* pcur = item->pcur; - btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur); - dict_index_t* idx = btr_cur_get_index(cursor); - if (index->id == idx->id) { - mutex_exit(&btr_defragment_mutex); - return true; - } - } - mutex_exit(&btr_defragment_mutex); - return false; -} - -/******************************************************************//** -Query thread uses this function to add an index to btr_defragment_wq. -Return a pointer to os_event for the query thread to wait on if this is a -synchronized defragmentation. */ -os_event_t -btr_defragment_add_index( - dict_index_t* index, /*!< index to be added */ - bool async, /*!< whether this is an async - defragmentation */ - dberr_t* err) /*!< out: error code */ -{ - mtr_t mtr; - ulint space = dict_index_get_space(index); - ulint zip_size = dict_table_zip_size(index->table); - ulint page_no = dict_index_get_page(index); - *err = DB_SUCCESS; - - mtr_start(&mtr); - // Load index rood page. - buf_block_t* block = btr_block_get(space, zip_size, page_no, RW_NO_LATCH, index, &mtr); - page_t* page = NULL; - - if (block) { - page = buf_block_get_frame(block); - } - - if (page == NULL && index->table->file_unreadable) { - mtr_commit(&mtr); - *err = DB_DECRYPTION_FAILED; - return NULL; - } - - if (page_is_leaf(page)) { - // Index root is a leaf page, no need to defragment. - mtr_commit(&mtr); - return NULL; - } - btr_pcur_t* pcur = btr_pcur_create_for_mysql(); - os_event_t event = NULL; - if (!async) { - event = os_event_create(); - } - btr_pcur_open_at_index_side(true, index, BTR_SEARCH_LEAF, pcur, - true, 0, &mtr); - btr_pcur_move_to_next(pcur, &mtr); - btr_pcur_store_position(pcur, &mtr); - mtr_commit(&mtr); - dict_stats_empty_defrag_summary(index); - btr_defragment_item_t* item = new btr_defragment_item_t(pcur, event); - mutex_enter(&btr_defragment_mutex); - btr_defragment_wq.push_back(item); - mutex_exit(&btr_defragment_mutex); - return event; -} - -/******************************************************************//** -When table is dropped, this function is called to mark a table as removed in -btr_efragment_wq. The difference between this function and the remove_index -function is this will not NULL the event. */ -void -btr_defragment_remove_table( - dict_table_t* table) /*!< Index to be removed. */ -{ - mutex_enter(&btr_defragment_mutex); - for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin(); - iter != btr_defragment_wq.end(); - ++iter) { - btr_defragment_item_t* item = *iter; - btr_pcur_t* pcur = item->pcur; - btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur); - dict_index_t* idx = btr_cur_get_index(cursor); - if (table->id == idx->table->id) { - item->removed = true; - } - } - mutex_exit(&btr_defragment_mutex); -} - -/******************************************************************//** -Query thread uses this function to mark an index as removed in -btr_efragment_wq. */ -void -btr_defragment_remove_index( - dict_index_t* index) /*!< Index to be removed. */ -{ - mutex_enter(&btr_defragment_mutex); - for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin(); - iter != btr_defragment_wq.end(); - ++iter) { - btr_defragment_item_t* item = *iter; - btr_pcur_t* pcur = item->pcur; - btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur); - dict_index_t* idx = btr_cur_get_index(cursor); - if (index->id == idx->id) { - item->removed = true; - item->event = NULL; - break; - } - } - mutex_exit(&btr_defragment_mutex); -} - -/******************************************************************//** -Functions used by defragmentation thread: btr_defragment_xxx_item. -Defragmentation thread operates on the work *item*. It gets/removes -item from the work queue. */ -/******************************************************************//** -Defragment thread uses this to remove an item from btr_defragment_wq. -When an item is removed from the work queue, all resources associated with it -are free as well. */ -void -btr_defragment_remove_item( - btr_defragment_item_t* item) /*!< Item to be removed. */ -{ - mutex_enter(&btr_defragment_mutex); - for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin(); - iter != btr_defragment_wq.end(); - ++iter) { - if (item == *iter) { - btr_defragment_wq.erase(iter); - delete item; - break; - } - } - mutex_exit(&btr_defragment_mutex); -} - -/******************************************************************//** -Defragment thread uses this to get an item from btr_defragment_wq to work on. -The item is not removed from the work queue so query threads can still access -this item. We keep it this way so query threads can find and kill a -defragmentation even if that index is being worked on. Be aware that while you -work on this item you have no lock protection on it whatsoever. This is OK as -long as the query threads and defragment thread won't modify the same fields -without lock protection. -*/ -btr_defragment_item_t* -btr_defragment_get_item() -{ - if (btr_defragment_wq.empty()) { - return NULL; - //return nullptr; - } - mutex_enter(&btr_defragment_mutex); - list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin(); - if (iter == btr_defragment_wq.end()) { - iter = btr_defragment_wq.begin(); - } - btr_defragment_item_t* item = *iter; - iter++; - mutex_exit(&btr_defragment_mutex); - return item; -} - -/*********************************************************************//** -Check whether we should save defragmentation statistics to persistent storage. -Currently we save the stats to persistent storage every 100 updates. */ -UNIV_INTERN -void -btr_defragment_save_defrag_stats_if_needed( - dict_index_t* index) /*!< in: index */ -{ - if (srv_defragment_stats_accuracy != 0 // stats tracking disabled - && dict_index_get_space(index) != 0 // do not track system tables - && index->stat_defrag_modified_counter - >= srv_defragment_stats_accuracy) { - dict_stats_defrag_pool_add(index); - index->stat_defrag_modified_counter = 0; - } -} - -/*********************************************************************//** -Main defragment functionalities used by defragment thread.*/ -/*************************************************************//** -Calculate number of records from beginning of block that can -fit into size_limit -@return number of records */ -UNIV_INTERN -ulint -btr_defragment_calc_n_recs_for_size( - buf_block_t* block, /*!< in: B-tree page */ - dict_index_t* index, /*!< in: index of the page */ - ulint size_limit, /*!< in: size limit to fit records in */ - ulint* n_recs_size) /*!< out: actual size of the records that fit - in size_limit. */ -{ - page_t* page = buf_block_get_frame(block); - ulint n_recs = 0; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - mem_heap_t* heap = NULL; - ulint size = 0; - page_cur_t cur; - - page_cur_set_before_first(block, &cur); - page_cur_move_to_next(&cur); - while (page_cur_get_rec(&cur) != page_get_supremum_rec(page)) { - rec_t* cur_rec = page_cur_get_rec(&cur); - offsets = rec_get_offsets(cur_rec, index, offsets, - ULINT_UNDEFINED, &heap); - ulint rec_size = rec_offs_size(offsets); - size += rec_size; - if (size > size_limit) { - size = size - rec_size; - break; - } - n_recs ++; - page_cur_move_to_next(&cur); - } - *n_recs_size = size; - return n_recs; -} - -/*************************************************************//** -Merge as many records from the from_block to the to_block. Delete -the from_block if all records are successfully merged to to_block. -@return the to_block to target for next merge operation. */ -UNIV_INTERN -buf_block_t* -btr_defragment_merge_pages( - dict_index_t* index, /*!< in: index tree */ - buf_block_t* from_block, /*!< in: origin of merge */ - buf_block_t* to_block, /*!< in: destination of merge */ - ulint zip_size, /*!< in: zip size of the block */ - ulint reserved_space, /*!< in: space reserved for future - insert to avoid immediate page split */ - ulint* max_data_size, /*!< in/out: max data size to - fit in a single compressed page. */ - mem_heap_t* heap, /*!< in/out: pointer to memory heap */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - page_t* from_page = buf_block_get_frame(from_block); - page_t* to_page = buf_block_get_frame(to_block); - ulint space = dict_index_get_space(index); - ulint level = btr_page_get_level(from_page, mtr); - ulint n_recs = page_get_n_recs(from_page); - ulint new_data_size = page_get_data_size(to_page); - ulint max_ins_size = - page_get_max_insert_size(to_page, n_recs); - ulint max_ins_size_reorg = - page_get_max_insert_size_after_reorganize( - to_page, n_recs); - ulint max_ins_size_to_use = max_ins_size_reorg > reserved_space - ? max_ins_size_reorg - reserved_space : 0; - ulint move_size = 0; - ulint n_recs_to_move = 0; - rec_t* rec = NULL; - ulint target_n_recs = 0; - rec_t* orig_pred; - - // Estimate how many records can be moved from the from_page to - // the to_page. - if (zip_size) { - ulint page_diff = UNIV_PAGE_SIZE - *max_data_size; - max_ins_size_to_use = (max_ins_size_to_use > page_diff) - ? max_ins_size_to_use - page_diff : 0; - } - n_recs_to_move = btr_defragment_calc_n_recs_for_size( - from_block, index, max_ins_size_to_use, &move_size); - - // If max_ins_size >= move_size, we can move the records without - // reorganizing the page, otherwise we need to reorganize the page - // first to release more space. - if (move_size > max_ins_size) { - if (!btr_page_reorganize_block(false, page_zip_level, - to_block, index, - mtr)) { - if (!dict_index_is_clust(index) - && page_is_leaf(to_page)) { - ibuf_reset_free_bits(to_block); - } - // If reorganization fails, that means page is - // not compressable. There's no point to try - // merging into this page. Continue to the - // next page. - return from_block; - } - ut_ad(page_validate(to_page, index)); - max_ins_size = page_get_max_insert_size(to_page, n_recs); - ut_a(max_ins_size >= move_size); - } - - // Move records to pack to_page more full. - orig_pred = NULL; - target_n_recs = n_recs_to_move; - while (n_recs_to_move > 0) { - rec = page_rec_get_nth(from_page, - n_recs_to_move + 1); - orig_pred = page_copy_rec_list_start( - to_block, from_block, rec, index, mtr); - if (orig_pred) - break; - // If we reach here, that means compression failed after packing - // n_recs_to_move number of records to to_page. We try to reduce - // the targeted data size on the to_page by - // BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE and try again. - os_atomic_increment_ulint( - &btr_defragment_compression_failures, 1); - max_ins_size_to_use = - move_size > BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE - ? move_size - BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE - : 0; - if (max_ins_size_to_use == 0) { - n_recs_to_move = 0; - move_size = 0; - break; - } - n_recs_to_move = btr_defragment_calc_n_recs_for_size( - from_block, index, max_ins_size_to_use, &move_size); - } - // If less than target_n_recs are moved, it means there are - // compression failures during page_copy_rec_list_start. Adjust - // the max_data_size estimation to reduce compression failures - // in the following runs. - if (target_n_recs > n_recs_to_move - && *max_data_size > new_data_size + move_size) { - *max_data_size = new_data_size + move_size; - } - // Set ibuf free bits if necessary. - if (!dict_index_is_clust(index) - && page_is_leaf(to_page)) { - if (zip_size) { - ibuf_reset_free_bits(to_block); - } else { - ibuf_update_free_bits_if_full( - to_block, - UNIV_PAGE_SIZE, - ULINT_UNDEFINED); - } - } - if (n_recs_to_move == n_recs) { - /* The whole page is merged with the previous page, - free it. */ - lock_update_merge_left(to_block, orig_pred, - from_block); - btr_search_drop_page_hash_index(from_block); - btr_level_list_remove(space, zip_size, from_page, - index, mtr); - btr_node_ptr_delete(index, from_block, mtr); - btr_blob_dbg_remove(from_page, index, - "btr_defragment_n_pages"); - btr_page_free(index, from_block, mtr); - } else { - // There are still records left on the page, so - // increment n_defragmented. Node pointer will be changed - // so remove the old node pointer. - if (n_recs_to_move > 0) { - // Part of the page is merged to left, remove - // the merged records, update record locks and - // node pointer. - dtuple_t* node_ptr; - page_delete_rec_list_start(rec, from_block, - index, mtr); - lock_update_split_and_merge(to_block, - orig_pred, - from_block); - btr_node_ptr_delete(index, from_block, mtr); - rec = page_rec_get_next( - page_get_infimum_rec(from_page)); - node_ptr = dict_index_build_node_ptr( - index, rec, page_get_page_no(from_page), - heap, level + 1); - btr_insert_on_non_leaf_level(0, index, level+1, - node_ptr, mtr); - } - to_block = from_block; - } - return to_block; -} - -/*************************************************************//** -Tries to merge N consecutive pages, starting from the page pointed by the -cursor. Skip space 0. Only consider leaf pages. -This function first loads all N pages into memory, then for each of -the pages other than the first page, it tries to move as many records -as possible to the left sibling to keep the left sibling full. During -the process, if any page becomes empty, that page will be removed from -the level list. Record locks, hash, and node pointers are updated after -page reorganization. -@return pointer to the last block processed, or NULL if reaching end of index */ -UNIV_INTERN -buf_block_t* -btr_defragment_n_pages( - buf_block_t* block, /*!< in: starting block for defragmentation */ - dict_index_t* index, /*!< in: index tree */ - uint n_pages,/*!< in: number of pages to defragment */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint space; - ulint zip_size; - /* We will need to load the n+1 block because if the last page is freed - and we need to modify the prev_page_no of that block. */ - buf_block_t* blocks[BTR_DEFRAGMENT_MAX_N_PAGES + 1]; - page_t* first_page; - buf_block_t* current_block; - ulint total_data_size = 0; - ulint total_n_recs = 0; - ulint data_size_per_rec; - ulint optimal_page_size; - ulint reserved_space; - ulint level; - ulint max_data_size = 0; - uint n_defragmented = 0; - uint n_new_slots; - mem_heap_t* heap; - ibool end_of_index = FALSE; - - /* It doesn't make sense to call this function with n_pages = 1. */ - ut_ad(n_pages > 1); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - space = dict_index_get_space(index); - if (space == 0) { - /* Ignore space 0. */ - return NULL; - } - - if (n_pages > BTR_DEFRAGMENT_MAX_N_PAGES) { - n_pages = BTR_DEFRAGMENT_MAX_N_PAGES; - } - - zip_size = dict_table_zip_size(index->table); - first_page = buf_block_get_frame(block); - level = btr_page_get_level(first_page, mtr); - - if (level != 0) { - return NULL; - } - - /* 1. Load the pages and calculate the total data size. */ - blocks[0] = block; - for (uint i = 1; i <= n_pages; i++) { - page_t* page = buf_block_get_frame(blocks[i-1]); - ulint page_no = btr_page_get_next(page, mtr); - total_data_size += page_get_data_size(page); - total_n_recs += page_get_n_recs(page); - if (page_no == FIL_NULL) { - n_pages = i; - end_of_index = TRUE; - break; - } - blocks[i] = btr_block_get(space, zip_size, page_no, - RW_X_LATCH, index, mtr); - } - - if (n_pages == 1) { - if (btr_page_get_prev(first_page, mtr) == FIL_NULL) { - /* last page in the index */ - if (dict_index_get_page(index) - == page_get_page_no(first_page)) - return NULL; - /* given page is the last page. - Lift the records to father. */ - btr_lift_page_up(index, block, mtr); - } - return NULL; - } - - /* 2. Calculate how many pages data can fit in. If not compressable, - return early. */ - ut_a(total_n_recs != 0); - data_size_per_rec = total_data_size / total_n_recs; - // For uncompressed pages, the optimal data size if the free space of a - // empty page. - optimal_page_size = page_get_free_space_of_empty( - page_is_comp(first_page)); - // For compressed pages, we take compression failures into account. - if (zip_size) { - ulint size = 0; - int i = 0; - // We estimate the optimal data size of the index use samples of - // data size. These samples are taken when pages failed to - // compress due to insertion on the page. We use the average - // of all samples we have as the estimation. Different pages of - // the same index vary in compressibility. Average gives a good - // enough estimation. - for (;i < STAT_DEFRAG_DATA_SIZE_N_SAMPLE; i++) { - if (index->stat_defrag_data_size_sample[i] == 0) { - break; - } - size += index->stat_defrag_data_size_sample[i]; - } - if (i != 0) { - size = size / i; - optimal_page_size = min(optimal_page_size, size); - } - max_data_size = optimal_page_size; - } - - reserved_space = min((ulint)(optimal_page_size - * (1 - srv_defragment_fill_factor)), - (data_size_per_rec - * srv_defragment_fill_factor_n_recs)); - optimal_page_size -= reserved_space; - n_new_slots = (total_data_size + optimal_page_size - 1) - / optimal_page_size; - if (n_new_slots >= n_pages) { - /* Can't defragment. */ - if (end_of_index) - return NULL; - return blocks[n_pages-1]; - } - - /* 3. Defragment pages. */ - heap = mem_heap_create(256); - // First defragmented page will be the first page. - current_block = blocks[0]; - // Start from the second page. - for (uint i = 1; i < n_pages; i ++) { - buf_block_t* new_block = btr_defragment_merge_pages( - index, blocks[i], current_block, zip_size, - reserved_space, &max_data_size, heap, mtr); - if (new_block != current_block) { - n_defragmented ++; - current_block = new_block; - } - } - mem_heap_free(heap); - n_defragmented ++; - os_atomic_increment_ulint( - &btr_defragment_count, 1); - if (n_pages == n_defragmented) { - os_atomic_increment_ulint( - &btr_defragment_failures, 1); - } else { - index->stat_defrag_n_pages_freed += (n_pages - n_defragmented); - } - if (end_of_index) - return NULL; - return current_block; -} - -/** Whether btr_defragment_thread is active */ -bool btr_defragment_thread_active; - -/** Merge consecutive b-tree pages into fewer pages to defragment indexes */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(btr_defragment_thread)(void*) -{ - btr_pcur_t* pcur; - btr_cur_t* cursor; - dict_index_t* index; - mtr_t mtr; - buf_block_t* first_block; - buf_block_t* last_block; - - while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { - ut_ad(btr_defragment_thread_active); - - /* If defragmentation is disabled, sleep before - checking whether it's enabled. */ - if (!srv_defragment) { - os_thread_sleep(BTR_DEFRAGMENT_SLEEP_IN_USECS); - continue; - } - /* The following call won't remove the item from work queue. - We only get a pointer to it to work on. This will make sure - when user issue a kill command, all indices are in the work - queue to be searched. This also means that the user thread - cannot directly remove the item from queue (since we might be - using it). So user thread only marks index as removed. */ - btr_defragment_item_t* item = btr_defragment_get_item(); - /* If work queue is empty, sleep and check later. */ - if (!item) { - os_thread_sleep(BTR_DEFRAGMENT_SLEEP_IN_USECS); - continue; - } - /* If an index is marked as removed, we remove it from the work - queue. No other thread could be using this item at this point so - it's safe to remove now. */ - if (item->removed) { - btr_defragment_remove_item(item); - continue; - } - - pcur = item->pcur; - ulonglong now = ut_timer_now(); - ulonglong elapsed = now - item->last_processed; - - if (elapsed < srv_defragment_interval) { - /* If we see an index again before the interval - determined by the configured frequency is reached, - we just sleep until the interval pass. Since - defragmentation of all indices queue up on a single - thread, it's likely other indices that follow this one - don't need to sleep again. */ - os_thread_sleep(((ulint)ut_timer_to_microseconds( - srv_defragment_interval - elapsed))); - } - - now = ut_timer_now(); - mtr_start(&mtr); - btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr); - cursor = btr_pcur_get_btr_cur(pcur); - index = btr_cur_get_index(cursor); - first_block = btr_cur_get_block(cursor); - last_block = btr_defragment_n_pages(first_block, index, - srv_defragment_n_pages, - &mtr); - if (last_block) { - /* If we haven't reached the end of the index, - place the cursor on the last record of last page, - store the cursor position, and put back in queue. */ - page_t* last_page = buf_block_get_frame(last_block); - rec_t* rec = page_rec_get_prev( - page_get_supremum_rec(last_page)); - ut_a(page_rec_is_user_rec(rec)); - page_cur_position(rec, last_block, - btr_cur_get_page_cur(cursor)); - btr_pcur_store_position(pcur, &mtr); - mtr_commit(&mtr); - /* Update the last_processed time of this index. */ - item->last_processed = now; - } else { - mtr_commit(&mtr); - /* Reaching the end of the index. */ - dict_stats_empty_defrag_stats(index); - dict_stats_save_defrag_stats(index); - dict_stats_save_defrag_summary(index); - btr_defragment_remove_item(item); - } - } - - btr_defragment_thread_active = false; - os_thread_exit(NULL); - OS_THREAD_DUMMY_RETURN; -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/btr/btr0pcur.cc b/storage/xtradb/btr/btr0pcur.cc deleted file mode 100644 index 0b970e1cf49..00000000000 --- a/storage/xtradb/btr/btr0pcur.cc +++ /dev/null @@ -1,620 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file btr/btr0pcur.cc -The index tree persistent cursor - -Created 2/23/1996 Heikki Tuuri -*******************************************************/ - -#include "btr0pcur.h" - -#ifdef UNIV_NONINL -#include "btr0pcur.ic" -#endif - -#include "ut0byte.h" -#include "rem0cmp.h" -#include "trx0trx.h" -#include "srv0srv.h" -/**************************************************************//** -Allocates memory for a persistent cursor object and initializes the cursor. -@return own: persistent cursor */ -UNIV_INTERN -btr_pcur_t* -btr_pcur_create_for_mysql(void) -/*============================*/ -{ - btr_pcur_t* pcur; - - pcur = (btr_pcur_t*) mem_alloc(sizeof(btr_pcur_t)); - - pcur->btr_cur.index = NULL; - btr_pcur_init(pcur); - pcur->btr_cur.tree_height = ULINT_UNDEFINED; - - return(pcur); -} - -/**************************************************************//** -Resets a persistent cursor object, freeing ::old_rec_buf if it is -allocated and resetting the other members to their initial values. */ -UNIV_INTERN -void -btr_pcur_reset( -/*===========*/ - btr_pcur_t* cursor) /*!< in, out: persistent cursor */ -{ - if (cursor->old_rec_buf != NULL) { - - mem_free(cursor->old_rec_buf); - - cursor->old_rec_buf = NULL; - } - - cursor->btr_cur.index = NULL; - cursor->btr_cur.page_cur.rec = NULL; - cursor->old_rec = NULL; - cursor->old_n_fields = 0; - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - cursor->latch_mode = BTR_NO_LATCHES; - cursor->pos_state = BTR_PCUR_NOT_POSITIONED; -} - -/**************************************************************//** -Frees the memory for a persistent cursor object. */ -UNIV_INTERN -void -btr_pcur_free_for_mysql( -/*====================*/ - btr_pcur_t* cursor) /*!< in, own: persistent cursor */ -{ - btr_pcur_reset(cursor); - mem_free(cursor); -} - -/**************************************************************//** -The position of the cursor is stored by taking an initial segment of the -record the cursor is positioned on, before, or after, and copying it to the -cursor data structure, or just setting a flag if the cursor id before the -first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the -page where the cursor is positioned must not be empty if the index tree is -not totally empty! */ -UNIV_INTERN -void -btr_pcur_store_position( -/*====================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_cur_t* page_cursor; - buf_block_t* block; - rec_t* rec; - dict_index_t* index; - page_t* page; - ulint offs; - - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - block = btr_pcur_get_block(cursor); - - SRV_CORRUPT_TABLE_CHECK(block, return;); - - index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); - - page_cursor = btr_pcur_get_page_cur(cursor); - - rec = page_cur_get_rec(page_cursor); - page = page_align(rec); - offs = page_offset(rec); - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - - if (page_is_empty(page)) { - /* It must be an empty index tree; NOTE that in this case - we do not store the modify_clock, but always do a search - if we restore the cursor position */ - - ut_a(btr_page_get_next(page, mtr) == FIL_NULL); - ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(page_is_leaf(page)); - ut_ad(page_get_page_no(page) == index->page); - - cursor->old_stored = BTR_PCUR_OLD_STORED; - - if (page_rec_is_supremum_low(offs)) { - - cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE; - } else { - cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE; - } - - return; - } - - if (page_rec_is_supremum_low(offs)) { - - rec = page_rec_get_prev(rec); - - cursor->rel_pos = BTR_PCUR_AFTER; - - } else if (page_rec_is_infimum_low(offs)) { - - rec = page_rec_get_next(rec); - - cursor->rel_pos = BTR_PCUR_BEFORE; - } else { - cursor->rel_pos = BTR_PCUR_ON; - } - - cursor->old_stored = BTR_PCUR_OLD_STORED; - cursor->old_rec = dict_index_copy_rec_order_prefix( - index, rec, &cursor->old_n_fields, - &cursor->old_rec_buf, &cursor->buf_size); - - cursor->block_when_stored = block; - cursor->modify_clock = buf_block_get_modify_clock(block); -} - -/**************************************************************//** -Copies the stored position of a pcur to another pcur. */ -UNIV_INTERN -void -btr_pcur_copy_stored_position( -/*==========================*/ - btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the - position info */ - btr_pcur_t* pcur_donate) /*!< in: pcur from which the info is - copied */ -{ - if (pcur_receive->old_rec_buf) { - mem_free(pcur_receive->old_rec_buf); - } - - ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t)); - - if (pcur_donate->old_rec_buf) { - - pcur_receive->old_rec_buf = (byte*) - mem_alloc(pcur_donate->buf_size); - - ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf, - pcur_donate->buf_size); - pcur_receive->old_rec = pcur_receive->old_rec_buf - + (pcur_donate->old_rec - pcur_donate->old_rec_buf); - } - - pcur_receive->old_n_fields = pcur_donate->old_n_fields; -} - -/**************************************************************//** -Restores the stored position of a persistent cursor bufferfixing the page and -obtaining the specified latches. If the cursor position was saved when the -(1) cursor was positioned on a user record: this function restores the position -to the last record LESS OR EQUAL to the stored record; -(2) cursor was positioned on a page infimum record: restores the position to -the last record LESS than the user record which was the successor of the page -infimum; -(3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. -(4) cursor was positioned before the first or after the last in an empty tree: -restores to before first or after the last in the tree. -@return TRUE if the cursor position was stored when it was on a user -record and it can be restored on a user record whose ordering fields -are identical to the ones of the original user record */ -UNIV_INTERN -ibool -btr_pcur_restore_position_func( -/*===========================*/ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: detached persistent cursor */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - dict_index_t* index; - dtuple_t* tuple; - ulint mode; - ulint old_mode; - mem_heap_t* heap; - - ut_ad(mtr); - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED); - ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED - || cursor->pos_state == BTR_PCUR_IS_POSITIONED); - - index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); - - if (UNIV_UNLIKELY - (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE - || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) { - - /* In these cases we do not try an optimistic restoration, - but always do a search */ - - btr_cur_open_at_index_side( - cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE, - index, latch_mode, - btr_pcur_get_btr_cur(cursor), 0, mtr); - - cursor->latch_mode = latch_mode; - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - cursor->block_when_stored = btr_pcur_get_block(cursor); - - return(FALSE); - } - - ut_a(cursor->old_rec); - ut_a(cursor->old_n_fields); - - if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF) - || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) { - /* Try optimistic restoration. */ - - if (buf_page_optimistic_get(latch_mode, - cursor->block_when_stored, - cursor->modify_clock, - file, line, mtr)) { - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - cursor->latch_mode = latch_mode; - - buf_block_dbg_add_level( - btr_pcur_get_block(cursor), - dict_index_is_ibuf(index) - ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE); - - if (cursor->rel_pos == BTR_PCUR_ON) { -#ifdef UNIV_DEBUG - const rec_t* rec; - const ulint* offsets1; - const ulint* offsets2; - rec = btr_pcur_get_rec(cursor); - - heap = mem_heap_create(256); - offsets1 = rec_get_offsets( - cursor->old_rec, index, NULL, - cursor->old_n_fields, &heap); - offsets2 = rec_get_offsets( - rec, index, NULL, - cursor->old_n_fields, &heap); - - ut_ad(!cmp_rec_rec(cursor->old_rec, - rec, offsets1, offsets2, - index)); - mem_heap_free(heap); -#endif /* UNIV_DEBUG */ - return(TRUE); - } - /* This is the same record as stored, - may need to be adjusted for BTR_PCUR_BEFORE/AFTER, - depending on search mode and direction. */ - if (btr_pcur_is_on_user_rec(cursor)) { - cursor->pos_state - = BTR_PCUR_IS_POSITIONED_OPTIMISTIC; - } - return(FALSE); - } - } - - /* If optimistic restoration did not succeed, open the cursor anew */ - - heap = mem_heap_create(256); - - tuple = dict_index_build_data_tuple(index, cursor->old_rec, - cursor->old_n_fields, heap); - - /* Save the old search mode of the cursor */ - old_mode = cursor->search_mode; - - switch (cursor->rel_pos) { - case BTR_PCUR_ON: - mode = PAGE_CUR_LE; - break; - case BTR_PCUR_AFTER: - mode = PAGE_CUR_G; - break; - case BTR_PCUR_BEFORE: - mode = PAGE_CUR_L; - break; - default: - ut_error; - mode = 0; - } - - btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode, - cursor, 0, file, line, mtr); - - /* Restore the old search mode */ - cursor->search_mode = old_mode; - - switch (cursor->rel_pos) { - case BTR_PCUR_ON: - if (btr_pcur_is_on_user_rec(cursor) - && !cmp_dtuple_rec( - tuple, btr_pcur_get_rec(cursor), - rec_get_offsets(btr_pcur_get_rec(cursor), - index, NULL, - ULINT_UNDEFINED, &heap))) { - - /* We have to store the NEW value for - the modify clock, since the cursor can - now be on a different page! But we can - retain the value of old_rec */ - - cursor->block_when_stored = - btr_pcur_get_block(cursor); - cursor->modify_clock = - buf_block_get_modify_clock( - cursor->block_when_stored); - cursor->old_stored = BTR_PCUR_OLD_STORED; - - mem_heap_free(heap); - - return(TRUE); - } -#ifdef UNIV_DEBUG - /* fall through */ - case BTR_PCUR_BEFORE: - case BTR_PCUR_AFTER: - break; - default: - ut_error; -#endif /* UNIV_DEBUG */ - } - - mem_heap_free(heap); - - /* We have to store new position information, modify_clock etc., - to the cursor because it can now be on a different page, the record - under it may have been removed, etc. */ - - btr_pcur_store_position(cursor, mtr); - - return(FALSE); -} - -/*********************************************************//** -Moves the persistent cursor to the first record on the next page. Releases the -latch on the current page, and bufferunfixes it. Note that there must not be -modifications on the current page, as then the x-latch can be released only in -mtr_commit. */ -UNIV_INTERN -void -btr_pcur_move_to_next_page( -/*=======================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the - last record of the current page */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint next_page_no; - ulint space; - ulint zip_size; - page_t* page; - buf_block_t* next_block; - page_t* next_page; - - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - ut_ad(btr_pcur_is_after_last_on_page(cursor)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - page = btr_pcur_get_page(cursor); - - if (UNIV_UNLIKELY(!page)) { - return; - } - - next_page_no = btr_page_get_next(page, mtr); - space = buf_block_get_space(btr_pcur_get_block(cursor)); - zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor)); - - ut_ad(next_page_no != FIL_NULL); - - next_block = btr_block_get(space, zip_size, next_page_no, - cursor->latch_mode, - btr_pcur_get_btr_cur(cursor)->index, mtr); - - if (UNIV_UNLIKELY(!next_block)) { - return; - } - - next_page = buf_block_get_frame(next_block); - - SRV_CORRUPT_TABLE_CHECK(next_page, - { - btr_leaf_page_release(btr_pcur_get_block(cursor), - cursor->latch_mode, mtr); - btr_pcur_get_page_cur(cursor)->block = 0; - btr_pcur_get_page_cur(cursor)->rec = 0; - - return; - }); - -#ifdef UNIV_BTR_DEBUG - ut_a(page_is_comp(next_page) == page_is_comp(page)); - ut_a(btr_page_get_prev(next_page, mtr) - == buf_block_get_page_no(btr_pcur_get_block(cursor))); -#endif /* UNIV_BTR_DEBUG */ - next_block->check_index_page_at_flush = TRUE; - - btr_leaf_page_release(btr_pcur_get_block(cursor), - cursor->latch_mode, mtr); - - page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor)); - - page_check_dir(next_page); -} - -/*********************************************************//** -Moves the persistent cursor backward if it is on the first record of the page. -Commits mtr. Note that to prevent a possible deadlock, the operation -first stores the position of the cursor, commits mtr, acquires the necessary -latches and restores the cursor position again before returning. The -alphabetical position of the cursor is guaranteed to be sensible on -return, but it may happen that the cursor is not positioned on the last -record of any page, because the structure of the tree may have changed -during the time when the cursor had no latches. */ -UNIV_INTERN -void -btr_pcur_move_backward_from_page( -/*=============================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the first - record of the current page */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint prev_page_no; - page_t* page; - buf_block_t* prev_block; - ulint latch_mode; - ulint latch_mode2; - - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - ut_ad(btr_pcur_is_before_first_on_page(cursor)); - ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr)); - - latch_mode = cursor->latch_mode; - - if (latch_mode == BTR_SEARCH_LEAF) { - - latch_mode2 = BTR_SEARCH_PREV; - - } else if (latch_mode == BTR_MODIFY_LEAF) { - - latch_mode2 = BTR_MODIFY_PREV; - } else { - latch_mode2 = 0; /* To eliminate compiler warning */ - ut_error; - } - - btr_pcur_store_position(cursor, mtr); - - mtr_commit(mtr); - - mtr_start_trx(mtr, mtr->trx); - - btr_pcur_restore_position(latch_mode2, cursor, mtr); - - page = btr_pcur_get_page(cursor); - - prev_page_no = btr_page_get_prev(page, mtr); - - if (prev_page_no == FIL_NULL) { - } else if (btr_pcur_is_before_first_on_page(cursor)) { - - prev_block = btr_pcur_get_btr_cur(cursor)->left_block; - - btr_leaf_page_release(btr_pcur_get_block(cursor), - latch_mode, mtr); - - page_cur_set_after_last(prev_block, - btr_pcur_get_page_cur(cursor)); - } else { - - /* The repositioned cursor did not end on an infimum record on - a page. Cursor repositioning acquired a latch also on the - previous page, but we do not need the latch: release it. */ - - prev_block = btr_pcur_get_btr_cur(cursor)->left_block; - - btr_leaf_page_release(prev_block, latch_mode, mtr); - } - - cursor->latch_mode = latch_mode; - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} - -/*********************************************************//** -Moves the persistent cursor to the previous record in the tree. If no records -are left, the cursor stays 'before first in tree'. -@return TRUE if the cursor was not before first in tree */ -UNIV_INTERN -ibool -btr_pcur_move_to_prev( -/*==================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - if (btr_pcur_is_before_first_on_page(cursor)) { - - if (btr_pcur_is_before_first_in_tree(cursor, mtr)) { - - return(FALSE); - } - - btr_pcur_move_backward_from_page(cursor, mtr); - - return(TRUE); - } - - btr_pcur_move_to_prev_on_page(cursor); - - return(TRUE); -} - -/**************************************************************//** -If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first -user record satisfying the search condition, in the case PAGE_CUR_L or -PAGE_CUR_LE, on the last user record. If no such user record exists, then -in the first case sets the cursor after last in tree, and in the latter case -before first in tree. The latching mode must be BTR_SEARCH_LEAF or -BTR_MODIFY_LEAF. */ -UNIV_INTERN -void -btr_pcur_open_on_user_rec_func( -/*===========================*/ - dict_index_t* index, /*!< in: index */ - const dtuple_t* tuple, /*!< in: tuple on which search done */ - ulint mode, /*!< in: PAGE_CUR_L, ... */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or - BTR_MODIFY_LEAF */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent - cursor */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - btr_pcur_open_low(index, 0, tuple, mode, latch_mode, cursor, - file, line, mtr); - - if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) { - - if (btr_pcur_is_after_last_on_page(cursor)) { - - btr_pcur_move_to_next_user_rec(cursor, mtr); - } - } else { - ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L)); - - /* Not implemented yet */ - - ut_error; - } -} diff --git a/storage/xtradb/btr/btr0scrub.cc b/storage/xtradb/btr/btr0scrub.cc deleted file mode 100644 index 24c84ed301b..00000000000 --- a/storage/xtradb/btr/btr0scrub.cc +++ /dev/null @@ -1,931 +0,0 @@ -// Copyright (c) 2014, Google Inc. - -/**************************************************//** -@file btr/btr0scrub.cc -Scrubbing of btree pages - -*******************************************************/ - -#include "btr0btr.h" -#include "btr0cur.h" -#include "btr0scrub.h" -#include "ibuf0ibuf.h" -#include "fsp0fsp.h" -#include "dict0dict.h" -#include "mtr0mtr.h" - -/* used when trying to acquire dict-lock */ -UNIV_INTERN bool fil_crypt_is_closing(ulint space); - -/** -* scrub data at delete time (e.g purge thread) -*/ -my_bool srv_immediate_scrub_data_uncompressed = false; - -/** -* background scrub uncompressed data -* -* if srv_immediate_scrub_data_uncompressed is enabled -* this is only needed to handle "old" data -*/ -my_bool srv_background_scrub_data_uncompressed = false; - -/** -* backgrounds scrub compressed data -* -* reorganize compressed page for scrubbing -* (only way to scrub compressed data) -*/ -my_bool srv_background_scrub_data_compressed = false; - -/* check spaces once per hour */ -UNIV_INTERN uint srv_background_scrub_data_check_interval = (60 * 60); - -/* default to scrub spaces that hasn't been scrubbed in a week */ -UNIV_INTERN uint srv_background_scrub_data_interval = (7 * 24 * 60 * 60); - -/** -* statistics for scrubbing by background threads -*/ -static btr_scrub_stat_t scrub_stat; -static ib_mutex_t scrub_stat_mutex; -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t scrub_stat_mutex_key; -#endif - -#ifdef UNIV_DEBUG -/** -* srv_scrub_force_testing -* -* - force scrubbing using background threads even for uncompressed tables -* - force pessimistic scrubbing (page split) even if not needed -* (see test_pessimistic_scrub_pct) -*/ -my_bool srv_scrub_force_testing = true; - -/** -* Force pessimistic scrubbing in 50% of the cases (UNIV_DEBUG only) -*/ -static int test_pessimistic_scrub_pct = 50; - -#endif -static uint scrub_compression_level = page_zip_level; - -/**************************************************************//** -Log a scrubbing failure */ -static -void -log_scrub_failure( -/*===============*/ - btr_scrub_t* scrub_data, /*!< in: data to store statistics on */ - buf_block_t* block, /*!< in: block */ - dberr_t err) /*!< in: error */ -{ - const char* reason = "unknown"; - switch(err) { - case DB_UNDERFLOW: - reason = "too few records on page"; - scrub_data->scrub_stat.page_split_failures_underflow++; - break; - case DB_INDEX_CORRUPT: - reason = "unable to find index!"; - scrub_data->scrub_stat.page_split_failures_missing_index++; - break; - case DB_OUT_OF_FILE_SPACE: - reason = "out of filespace"; - scrub_data->scrub_stat.page_split_failures_out_of_filespace++; - break; - default: - ut_ad(0); - reason = "unknown"; - scrub_data->scrub_stat.page_split_failures_unknown++; - } - fprintf(stderr, - "InnoDB: Warning: Failed to scrub page %lu in space %lu : %s\n", - buf_block_get_page_no(block), - buf_block_get_space(block), - reason); -} - -/**************************************************************** -Lock dict mutexes */ -static -bool -btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table, - const char * file, uint line) -{ - time_t start = time(0); - time_t last = start; - - /* FIXME: this is not the proper way of doing things. The - dict_sys->mutex should not be held by any thread for longer - than a few microseconds. It must not be held during I/O, - for example. So, what is the purpose for this busy-waiting? - This function should be rewritten as part of MDEV-8139: - Fix scrubbing tests. */ - - while (mutex_enter_nowait_func(&(dict_sys->mutex), file, line)) { - /* if we lock to close a table, we wait forever - * if we don't lock to close a table, we check if space - * is closing, and then instead give up - */ - if (lock_to_close_table) { - } else if (fil_space_t* space = fil_space_acquire(space_id)) { - bool stopping = space->is_stopping(); - fil_space_release(space); - if (stopping) { - return false; - } - } else { - return false; - } - - os_thread_sleep(250000); - - time_t now = time(0); - if (now >= last + 30) { - fprintf(stderr, - "WARNING: %s:%u waited %ld seconds for" - " dict_sys lock, space: %lu" - " lock_to_close_table: %d\n", - file, line, now - start, space_id, - lock_to_close_table); - - last = now; - } - } - - ut_ad(mutex_own(&dict_sys->mutex)); - return true; -} - -#define btr_scrub_lock_dict(space, lock_to_close_table) \ - btr_scrub_lock_dict_func(space, lock_to_close_table, __FILE__, __LINE__) - -/**************************************************************** -Unlock dict mutexes */ -static -void -btr_scrub_unlock_dict() -{ - dict_mutex_exit_for_mysql(); -} - -/**************************************************************** -Release reference to table -*/ -static -void -btr_scrub_table_close( -/*==================*/ - dict_table_t* table) /*!< in: table */ -{ - bool dict_locked = true; - bool try_drop = false; - table->stats_bg_flag &= ~BG_SCRUB_IN_PROGRESS; - dict_table_close(table, dict_locked, try_drop); -} - -/**************************************************************** -Release reference to table -*/ -static -void -btr_scrub_table_close_for_thread( - btr_scrub_t *scrub_data) -{ - if (scrub_data->current_table == NULL) { - return; - } - - if (fil_space_t* space = fil_space_acquire(scrub_data->space)) { - /* If tablespace is not marked as stopping perform - the actual close. */ - if (!space->is_stopping()) { - mutex_enter(&dict_sys->mutex); - /* perform the actual closing */ - btr_scrub_table_close(scrub_data->current_table); - mutex_exit(&dict_sys->mutex); - } - fil_space_release(space); - } - - scrub_data->current_table = NULL; - scrub_data->current_index = NULL; -} - -/**************************************************************//** -Check if scrubbing is turned ON or OFF */ -static -bool -check_scrub_setting( -/*=====================*/ - btr_scrub_t* scrub_data) /*!< in: scrub data */ -{ - if (scrub_data->compressed) - return srv_background_scrub_data_compressed; - else - return srv_background_scrub_data_uncompressed; -} - -#define IBUF_INDEX_ID (DICT_IBUF_ID_MIN + IBUF_SPACE_ID) - -/**************************************************************//** -Check if a page needs scrubbing */ -UNIV_INTERN -int -btr_page_needs_scrubbing( -/*=====================*/ - btr_scrub_t* scrub_data, /*!< in: scrub data */ - buf_block_t* block, /*!< in: block to check, latched */ - btr_scrub_page_allocation_status_t allocated) /*!< in: is block known - to be allocated */ -{ - /** - * Check if scrubbing has been turned OFF. - * - * at start of space, we check if scrubbing is ON or OFF - * here we only check if scrubbing is turned OFF. - * - * Motivation is that it's only valueable to have a full table (space) - * scrubbed. - */ - if (!check_scrub_setting(scrub_data)) { - bool before_value = scrub_data->scrubbing; - scrub_data->scrubbing = false; - - if (before_value == true) { - /* we toggle scrubbing from on to off */ - return BTR_SCRUB_TURNED_OFF; - } - } - - if (scrub_data->scrubbing == false) { - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - page_t* page = buf_block_get_frame(block); - uint type = fil_page_get_type(page); - - if (allocated == BTR_SCRUB_PAGE_ALLOCATED) { - if (type != FIL_PAGE_INDEX) { - /* this function is called from fil-crypt-threads. - * these threads iterate all pages of all tablespaces - * and don't know about fil_page_type. - * But scrubbing is only needed for index-pages. */ - - /** - * NOTE: scrubbing is also needed for UNDO pages, - * but they are scrubbed at purge-time, since they are - * uncompressed - */ - - /* if encountering page type not needing scrubbing - release reference to table object */ - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - if (page_has_garbage(page) == false) { - /* no garbage (from deleted/shrunken records) */ - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - } else if (allocated == BTR_SCRUB_PAGE_FREE || - allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) { - - if (! (type == FIL_PAGE_INDEX || - type == FIL_PAGE_TYPE_BLOB || - type == FIL_PAGE_TYPE_ZBLOB || - type == FIL_PAGE_TYPE_ZBLOB2)) { - - /** - * If this is a dropped page, we also need to scrub - * BLOB pages - */ - - /* if encountering page type not needing scrubbing - release reference to table object */ - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - } - - if (btr_page_get_index_id(page) == IBUF_INDEX_ID) { - /* skip ibuf */ - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - return BTR_SCRUB_PAGE; -} - -/**************************************************************** -Handle a skipped page -*/ -UNIV_INTERN -void -btr_scrub_skip_page( -/*==================*/ - btr_scrub_t* scrub_data, /*!< in: data with scrub state */ - int needs_scrubbing) /*!< in: return code from - btr_page_needs_scrubbing */ -{ - switch(needs_scrubbing) { - case BTR_SCRUB_SKIP_PAGE: - /* nothing todo */ - return; - case BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE: - btr_scrub_table_close_for_thread(scrub_data); - return; - case BTR_SCRUB_TURNED_OFF: - case BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE: - btr_scrub_complete_space(scrub_data); - return; - } - - /* unknown value. should not happen */ - ut_a(0); -} - -/**************************************************************** -Try to scrub a page using btr_page_reorganize_low -return DB_SUCCESS on success or DB_OVERFLOW on failure */ -static -dberr_t -btr_optimistic_scrub( -/*==================*/ - btr_scrub_t* scrub_data, /*!< in: data with scrub state */ - buf_block_t* block, /*!< in: block to scrub */ - dict_index_t* index, /*!< in: index */ - mtr_t* mtr) /*!< in: mtr */ -{ -#ifdef UNIV_DEBUG - if (srv_scrub_force_testing && - page_get_n_recs(buf_block_get_frame(block)) > 2 && - (rand() % 100) < test_pessimistic_scrub_pct) { - - fprintf(stderr, - "scrub: simulate btr_page_reorganize failed %lu:%lu " - " table: %llu:%s index: %llu:%s get_n_recs(): %lu\n", - buf_block_get_space(block), - buf_block_get_page_no(block), - (ulonglong)scrub_data->current_table->id, - scrub_data->current_table->name, - (ulonglong)scrub_data->current_index->id, - scrub_data->current_index->name, - page_get_n_recs(buf_block_get_frame(block))); - return DB_OVERFLOW; - } -#endif - - page_cur_t cur; - page_cur_set_before_first(block, &cur); - bool recovery = false; - if (!btr_page_reorganize_low(recovery, scrub_compression_level, - &cur, index, mtr)) { - return DB_OVERFLOW; - } - - /* We play safe and reset the free bits */ - if (!dict_index_is_clust(index) && - block != NULL) { - buf_frame_t* frame = buf_block_get_frame(block); - if (frame && - page_is_leaf(frame)) { - - ibuf_reset_free_bits(block); - } - } - - scrub_data->scrub_stat.page_reorganizations++; - - return DB_SUCCESS; -} - -/**************************************************************** -Try to scrub a page by splitting it -return DB_SUCCESS on success -DB_UNDERFLOW if page has too few records -DB_OUT_OF_FILE_SPACE if we can't find space for split */ -static -dberr_t -btr_pessimistic_scrub( -/*==================*/ - btr_scrub_t* scrub_data, /*!< in: data with scrub state */ - buf_block_t* block, /*!< in: block to scrub */ - dict_index_t* index, /*!< in: index */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* page = buf_block_get_frame(block); - if (page_get_n_recs(page) < 2) { - /** - * There is no way we can split a page with < 2 records - */ - log_scrub_failure(scrub_data, block, DB_UNDERFLOW); - return DB_UNDERFLOW; - } - - /** - * Splitting page needs new space, allocate it here - * so that splitting won't fail due to this */ - ulint n_extents = 3; - ulint n_reserved = 0; - if (!fsp_reserve_free_extents(&n_reserved, index->space, - n_extents, FSP_NORMAL, mtr)) { - log_scrub_failure(scrub_data, block, - DB_OUT_OF_FILE_SPACE); - return DB_OUT_OF_FILE_SPACE; - } - - /* read block variables */ - ulint space = buf_block_get_space(block); - ulint page_no = buf_block_get_page_no(block); - ulint zip_size = buf_block_get_zip_size(block); - ulint left_page_no = btr_page_get_prev(page, mtr); - ulint right_page_no = btr_page_get_next(page, mtr); - - /** - * When splitting page, we need X-latches on left/right brothers - * see e.g btr_cur_latch_leaves - */ - - if (left_page_no != FIL_NULL) { - /** - * pages needs to be locked left-to-right, release block - * and re-lock. We still have x-lock on index - * so this should be safe - */ - mtr_release_buf_page_at_savepoint(mtr, scrub_data->savepoint, - block); - - buf_block_t* get_block = btr_block_get( - space, zip_size, left_page_no, - RW_X_LATCH, index, mtr); - get_block->check_index_page_at_flush = TRUE; - - /** - * Refetch block and re-initialize page - */ - block = btr_block_get( - space, zip_size, page_no, - RW_X_LATCH, index, mtr); - - page = buf_block_get_frame(block); - - /** - * structure should be unchanged - */ - ut_a(left_page_no == btr_page_get_prev(page, mtr)); - ut_a(right_page_no == btr_page_get_next(page, mtr)); - } - - if (right_page_no != FIL_NULL) { - buf_block_t* get_block = btr_block_get( - space, zip_size, right_page_no, - RW_X_LATCH, index, mtr); - get_block->check_index_page_at_flush = TRUE; - } - - /* arguments to btr_page_split_and_insert */ - mem_heap_t* heap = NULL; - dtuple_t* entry = NULL; - ulint* offsets = NULL; - ulint n_ext = 0; - ulint flags = BTR_MODIFY_TREE; - - /** - * position a cursor on first record on page - */ - rec_t* rec = page_rec_get_next(page_get_infimum_rec(page)); - btr_cur_t cursor; - btr_cur_position(index, rec, block, &cursor); - - /** - * call split page with NULL as argument for entry to insert - */ - if (dict_index_get_page(index) == buf_block_get_page_no(block)) { - /* The page is the root page - * NOTE: ibuf_reset_free_bits is called inside - * btr_root_raise_and_insert */ - rec = btr_root_raise_and_insert( - flags, &cursor, &offsets, &heap, entry, n_ext, mtr); - } else { - /* We play safe and reset the free bits - * NOTE: need to call this prior to btr_page_split_and_insert */ - if (!dict_index_is_clust(index) && - block != NULL) { - buf_frame_t* frame = buf_block_get_frame(block); - if (frame && - page_is_leaf(frame)) { - - ibuf_reset_free_bits(block); - } - } - - rec = btr_page_split_and_insert( - flags, &cursor, &offsets, &heap, entry, n_ext, mtr); - } - - if (heap) { - mem_heap_free(heap); - } - - if (n_reserved > 0) { - fil_space_release_free_extents(index->space, n_reserved); - } - - scrub_data->scrub_stat.page_splits++; - return DB_SUCCESS; -} - -/**************************************************************** -Location index by id for a table -return index or NULL */ -static -dict_index_t* -find_index( -/*========*/ - dict_table_t* table, /*!< in: table */ - index_id_t index_id) /*!< in: index id */ -{ - if (table != NULL) { - dict_index_t* index = dict_table_get_first_index(table); - while (index != NULL) { - if (index->id == index_id) - return index; - index = dict_table_get_next_index(index); - } - } - - return NULL; -} - -/**************************************************************** -Check if table should be scrubbed -*/ -static -bool -btr_scrub_table_needs_scrubbing( -/*============================*/ - dict_table_t* table) /*!< in: table */ -{ - if (table == NULL) - return false; - - if (table->stats_bg_flag & BG_STAT_SHOULD_QUIT) { - return false; - } - - if (table->to_be_dropped) { - return false; - } - - if (!table->is_readable()) { - return false; - } - - return true; -} - -/**************************************************************** -Check if index should be scrubbed -*/ -static -bool -btr_scrub_index_needs_scrubbing( -/*============================*/ - dict_index_t* index) /*!< in: index */ -{ - if (index == NULL) - return false; - - if (dict_index_is_ibuf(index)) { - return false; - } - - if (dict_index_is_online_ddl(index)) { - return false; - } - - return true; -} - -/**************************************************************** -Get table and index and store it on scrub_data -*/ -static -void -btr_scrub_get_table_and_index( -/*=========================*/ - btr_scrub_t* scrub_data, /*!< in/out: scrub data */ - index_id_t index_id) /*!< in: index id */ -{ - /* first check if it's an index to current table */ - scrub_data->current_index = find_index(scrub_data->current_table, - index_id); - - if (scrub_data->current_index != NULL) { - /* yes it was */ - return; - } - - if (!btr_scrub_lock_dict(scrub_data->space, false)) { - btr_scrub_complete_space(scrub_data); - return; - } - - /* close current table (if any) */ - if (scrub_data->current_table != NULL) { - btr_scrub_table_close(scrub_data->current_table); - scrub_data->current_table = NULL; - } - - /* argument to dict_table_open_on_index_id */ - bool dict_locked = true; - - /* open table based on index_id */ - dict_table_t* table = dict_table_open_on_index_id( - index_id, - dict_locked); - - if (table != NULL) { - /* mark table as being scrubbed */ - table->stats_bg_flag |= BG_SCRUB_IN_PROGRESS; - - if (!btr_scrub_table_needs_scrubbing(table)) { - btr_scrub_table_close(table); - btr_scrub_unlock_dict(); - return; - } - } - - btr_scrub_unlock_dict(); - scrub_data->current_table = table; - scrub_data->current_index = find_index(table, index_id); -} - -/**************************************************************** -Handle free page */ -UNIV_INTERN -int -btr_scrub_free_page( -/*====================*/ - btr_scrub_t* scrub_data, /*!< in/out: scrub data */ - buf_block_t* block, /*!< in: block to scrub */ - mtr_t* mtr) /*!< in: mtr */ -{ - // TODO(jonaso): scrub only what is actually needed - - { - /* note: perform both the memset and setting of FIL_PAGE_TYPE - * wo/ logging. so that if we crash before page is flushed - * it will be found by scrubbing thread again - */ - memset(buf_block_get_frame(block) + PAGE_HEADER, 0, - UNIV_PAGE_SIZE - PAGE_HEADER); - - mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_ALLOCATED); - } - - ulint compact = 1; - page_create(block, mtr, compact); - - mtr_commit(mtr); - - /* page doesn't need further processing => SKIP - * and close table/index so that we don't keep references too long */ - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; -} - -/**************************************************************** -Recheck if a page needs scrubbing, and if it does load appropriate -table and index */ -UNIV_INTERN -int -btr_scrub_recheck_page( -/*====================*/ - btr_scrub_t* scrub_data, /*!< inut: scrub data */ - buf_block_t* block, /*!< in: block */ - btr_scrub_page_allocation_status_t allocated, /*!< in: is block - allocated or free */ - mtr_t* mtr) /*!< in: mtr */ -{ - /* recheck if page needs scrubbing (knowing allocation status) */ - int needs_scrubbing = btr_page_needs_scrubbing( - scrub_data, block, allocated); - - if (needs_scrubbing != BTR_SCRUB_PAGE) { - mtr_commit(mtr); - return needs_scrubbing; - } - - if (allocated == BTR_SCRUB_PAGE_FREE) { - /** we don't need to load table/index for free pages - * so scrub directly here */ - /* mtr is committed inside btr_scrub_page_free */ - return btr_scrub_free_page(scrub_data, - block, - mtr); - } - - page_t* page = buf_block_get_frame(block); - index_id_t index_id = btr_page_get_index_id(page); - - if (scrub_data->current_index == NULL || - scrub_data->current_index->id != index_id) { - - /** - * commit mtr (i.e release locks on block) - * and try to get table&index potentially loading it - * from disk - */ - mtr_commit(mtr); - btr_scrub_get_table_and_index(scrub_data, index_id); - } else { - /* we already have correct index - * commit mtr so that we can lock index before fetching page - */ - mtr_commit(mtr); - } - - /* check if table is about to be dropped */ - if (!btr_scrub_table_needs_scrubbing(scrub_data->current_table)) { - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - /* check if index is scrubbable */ - if (!btr_scrub_index_needs_scrubbing(scrub_data->current_index)) { - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - mtr_start(mtr); - mtr_x_lock(dict_index_get_lock(scrub_data->current_index), mtr); - /** set savepoint for X-latch of block */ - scrub_data->savepoint = mtr_set_savepoint(mtr); - return BTR_SCRUB_PAGE; -} - -/**************************************************************** -Perform actual scrubbing of page */ -UNIV_INTERN -int -btr_scrub_page( -/*============*/ - btr_scrub_t* scrub_data, /*!< in/out: scrub data */ - buf_block_t* block, /*!< in: block */ - btr_scrub_page_allocation_status_t allocated, /*!< in: is block - allocated or free */ - mtr_t* mtr) /*!< in: mtr */ -{ - /* recheck if page needs scrubbing (knowing allocation status) */ - int needs_scrubbing = BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - - if (block) { - btr_page_needs_scrubbing(scrub_data, block, allocated); - } - - if (!block || needs_scrubbing != BTR_SCRUB_PAGE) { - mtr_commit(mtr); - return needs_scrubbing; - } - - if (allocated == BTR_SCRUB_PAGE_FREE) { - /* mtr is committed inside btr_scrub_page_free */ - return btr_scrub_free_page(scrub_data, - block, - mtr); - } - - /* check that table/index still match now that they are loaded */ - - if (scrub_data->current_table->space != scrub_data->space) { - /* this is truncate table */ - mtr_commit(mtr); - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - if (scrub_data->current_index->space != scrub_data->space) { - /* this is truncate table */ - mtr_commit(mtr); - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - if (scrub_data->current_index->page == FIL_NULL) { - /* this is truncate table */ - mtr_commit(mtr); - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - buf_frame_t* frame = buf_block_get_frame(block); - - if (!frame || btr_page_get_index_id(frame) != - scrub_data->current_index->id) { - /* page has been reallocated to new index */ - mtr_commit(mtr); - return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE; - } - - /* check if I can scrub (reorganize) page wo/ overflow */ - if (btr_optimistic_scrub(scrub_data, - block, - scrub_data->current_index, - mtr) != DB_SUCCESS) { - - /** - * Can't reorganize page...need to split it - */ - btr_pessimistic_scrub(scrub_data, - block, - scrub_data->current_index, - mtr); - } - mtr_commit(mtr); - - return BTR_SCRUB_SKIP_PAGE; // no further action needed -} - -/**************************************************************//** -Start iterating a space */ -UNIV_INTERN -bool -btr_scrub_start_space( -/*===================*/ - ulint space, /*!< in: space */ - btr_scrub_t* scrub_data) /*!< in/out: scrub data */ -{ - scrub_data->space = space; - scrub_data->current_table = NULL; - scrub_data->current_index = NULL; - - scrub_data->compressed = fil_space_get_zip_size(space) > 0; - scrub_data->scrubbing = check_scrub_setting(scrub_data); - return scrub_data->scrubbing; -} - -/*********************************************************************** -Update global statistics with thread statistics */ -static -void -btr_scrub_update_total_stat(btr_scrub_t *scrub_data) -{ - mutex_enter(&scrub_stat_mutex); - scrub_stat.page_reorganizations += - scrub_data->scrub_stat.page_reorganizations; - scrub_stat.page_splits += - scrub_data->scrub_stat.page_splits; - scrub_stat.page_split_failures_underflow += - scrub_data->scrub_stat.page_split_failures_underflow; - scrub_stat.page_split_failures_out_of_filespace += - scrub_data->scrub_stat.page_split_failures_out_of_filespace; - scrub_stat.page_split_failures_missing_index += - scrub_data->scrub_stat.page_split_failures_missing_index; - scrub_stat.page_split_failures_unknown += - scrub_data->scrub_stat.page_split_failures_unknown; - mutex_exit(&scrub_stat_mutex); - - // clear stat - memset(&scrub_data->scrub_stat, 0, sizeof(scrub_data->scrub_stat)); -} - -/**************************************************************//** -Complete iterating a space */ -UNIV_INTERN -bool -btr_scrub_complete_space( -/*=====================*/ - btr_scrub_t* scrub_data) /*!< in/out: scrub data */ -{ - btr_scrub_table_close_for_thread(scrub_data); - btr_scrub_update_total_stat(scrub_data); - return scrub_data->scrubbing; -} - -/********************************************************************* -Return scrub statistics */ -void -btr_scrub_total_stat(btr_scrub_stat_t *stat) -{ - mutex_enter(&scrub_stat_mutex); - *stat = scrub_stat; - mutex_exit(&scrub_stat_mutex); -} - -/********************************************************************* -Init global variables */ -UNIV_INTERN -void -btr_scrub_init() -{ - mutex_create(scrub_stat_mutex_key, - &scrub_stat_mutex, SYNC_NO_ORDER_CHECK); - - memset(&scrub_stat, 0, sizeof(scrub_stat)); -} - -/********************************************************************* -Cleanup globals */ -UNIV_INTERN -void -btr_scrub_cleanup() -{ - mutex_free(&scrub_stat_mutex); -} diff --git a/storage/xtradb/btr/btr0sea.cc b/storage/xtradb/btr/btr0sea.cc deleted file mode 100644 index 2f0428747d5..00000000000 --- a/storage/xtradb/btr/btr0sea.cc +++ /dev/null @@ -1,2085 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file btr/btr0sea.cc -The index tree adaptive search - -Created 2/17/1996 Heikki Tuuri -*************************************************************************/ - -#include "btr0sea.h" -#ifdef UNIV_NONINL -#include "btr0sea.ic" -#endif - -#include "buf0buf.h" -#include "page0page.h" -#include "page0cur.h" -#include "btr0cur.h" -#include "btr0pcur.h" -#include "btr0btr.h" -#include "ha0ha.h" -#include "srv0srv.h" -/** Flag: has the search system been enabled? -Protected by btr_search_latch. */ -UNIV_INTERN char btr_search_enabled = TRUE; - -/** Number of adaptive hash index partitions */ -UNIV_INTERN ulint btr_search_index_num; - -/** A dummy variable to fool the compiler */ -UNIV_INTERN ulint btr_search_this_is_zero = 0; - -/** padding to prevent other memory update -hotspots from residing on the same memory -cache line as btr_search_latch */ -UNIV_INTERN byte btr_sea_pad1[CACHE_LINE_SIZE]; - -/** Array of latches protecting individual AHI partitions. The latches -protect: (1) positions of records on those pages where a hash index from the -corresponding AHI partition has been built. -NOTE: They do not protect values of non-ordering fields within a record from -being updated in-place! We can use fact (1) to perform unique searches to -indexes. */ - -UNIV_INTERN prio_rw_lock_t* btr_search_latch_arr; - -/** padding to prevent other memory update hotspots from residing on -the same memory cache line */ -UNIV_INTERN byte btr_sea_pad2[CACHE_LINE_SIZE]; - -/** The adaptive hash index */ -UNIV_INTERN btr_search_sys_t* btr_search_sys; - -#ifdef UNIV_PFS_RWLOCK -/* Key to register btr_search_sys with performance schema */ -UNIV_INTERN mysql_pfs_key_t btr_search_latch_key; -#endif /* UNIV_PFS_RWLOCK */ - -/** If the number of records on the page divided by this parameter -would have been successfully accessed using a hash index, the index -is then built on the page, assuming the global limit has been reached */ -#define BTR_SEARCH_PAGE_BUILD_LIMIT 16 - -/** The global limit for consecutive potentially successful hash searches, -before hash index building is started */ -#define BTR_SEARCH_BUILD_LIMIT 100 - -/********************************************************************//** -Builds a hash index on a page with the given parameters. If the page already -has a hash index with different parameters, the old hash index is removed. -If index is non-NULL, this function checks if n_fields and n_bytes are -sensible values, and does not build a hash index if not. */ -static -void -btr_search_build_page_hash_index( -/*=============================*/ - dict_index_t* index, /*!< in: index for which to build, or NULL if - not known */ - buf_block_t* block, /*!< in: index page, s- or x-latched */ - ulint n_fields,/*!< in: hash this many full fields */ - ulint n_bytes,/*!< in: hash this many bytes from the next - field */ - ibool left_side);/*!< in: hash for searches from left side? */ - -/*****************************************************************//** -This function should be called before reserving any btr search mutex, if -the intended operation might add nodes to the search system hash table. -Because of the latching order, once we have reserved the btr search system -latch, we cannot allocate a free frame from the buffer pool. Checks that -there is a free buffer frame allocated for hash table heap in the btr search -system. If not, allocates a free frames for the heap. This check makes it -probable that, when have reserved the btr search system latch and we need to -allocate a new node to the hash table, it will succeed. However, the check -will not guarantee success. */ -static -void -btr_search_check_free_space_in_heap( -/*================================*/ - dict_index_t* index) -{ - hash_table_t* table; - mem_heap_t* heap; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - table = btr_search_get_hash_table(index); - - heap = table->heap; - - /* Note that we peek the value of heap->free_block without reserving - the latch: this is ok, because we will not guarantee that there will - be enough free space in the hash table. */ - - if (heap->free_block == NULL) { - buf_block_t* block = buf_block_alloc(NULL); - - rw_lock_x_lock(btr_search_get_latch(index)); - - if (heap->free_block == NULL) { - heap->free_block = block; - } else { - buf_block_free(block); - } - - rw_lock_x_unlock(btr_search_get_latch(index)); - } -} - -/*****************************************************************//** -Creates and initializes the adaptive search system at a database start. */ -UNIV_INTERN -void -btr_search_sys_create( -/*==================*/ - ulint hash_size) /*!< in: hash index hash table size */ -{ - ulint i; - - /* PS bug lp:1018264 - Multiple hash index partitions causes overly - large hash index: When multiple adaptive hash index partitions are - specified, _each_ partition was being created with hash_size which - should be 1/64 of the total size of all buffer pools which is - incorrect and can cause overly high memory usage. hash_size - should be representing the _total_ size of all partitions, not the - individual size of each partition. */ - hash_size /= btr_search_index_num; - - /* We allocate the search latch from dynamic memory: - see above at the global variable definition */ - - /* btr_search_index_num is constrained to machine word size for - historical reasons. This limitation can be easily removed later. */ - - btr_search_latch_arr = (prio_rw_lock_t *) - mem_alloc(sizeof(prio_rw_lock_t) * btr_search_index_num); - - btr_search_sys = (btr_search_sys_t*) - mem_alloc(sizeof(btr_search_sys_t)); - - btr_search_sys->hash_tables = (hash_table_t **) - mem_alloc(sizeof(hash_table_t *) * btr_search_index_num); - - for (i = 0; i < btr_search_index_num; i++) { - - rw_lock_create(btr_search_latch_key, - &btr_search_latch_arr[i], SYNC_SEARCH_SYS); - - btr_search_sys->hash_tables[i] - = ib_create(hash_size, 0, MEM_HEAP_FOR_BTR_SEARCH, 0); - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - btr_search_sys->hash_tables[i]->adaptive = TRUE; -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - } -} - -/*****************************************************************//** -Frees the adaptive search system at a database shutdown. */ -UNIV_INTERN -void -btr_search_sys_free(void) -/*=====================*/ -{ - ulint i; - - for (i = 0; i < btr_search_index_num; i++) { - - rw_lock_free(&btr_search_latch_arr[i]); - - mem_heap_free(btr_search_sys->hash_tables[i]->heap); - - hash_table_free(btr_search_sys->hash_tables[i]); - - } - - mem_free(btr_search_latch_arr); - btr_search_latch_arr = NULL; - - mem_free(btr_search_sys->hash_tables); - - mem_free(btr_search_sys); - btr_search_sys = NULL; -} - -/********************************************************************//** -Set index->ref_count = 0 on all indexes of a table. */ -static -void -btr_search_disable_ref_count( -/*=========================*/ - dict_table_t* table) /*!< in/out: table */ -{ - dict_index_t* index; - - ut_ad(mutex_own(&dict_sys->mutex)); - - for (index = dict_table_get_first_index(table); index; - index = dict_table_get_next_index(index)) { - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(btr_search_get_latch(index), - RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - index->search_info->ref_count = 0; - } -} - -/********************************************************************//** -Disable the adaptive hash search system and empty the index. */ -UNIV_INTERN -void -btr_search_disable(void) -/*====================*/ -{ - dict_table_t* table; - ulint i; - - mutex_enter(&dict_sys->mutex); - btr_search_x_lock_all(); - - btr_search_enabled = FALSE; - - /* Clear the index->search_info->ref_count of every index in - the data dictionary cache. */ - for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table; - table = UT_LIST_GET_NEXT(table_LRU, table)) { - - btr_search_disable_ref_count(table); - } - - for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); table; - table = UT_LIST_GET_NEXT(table_LRU, table)) { - - btr_search_disable_ref_count(table); - } - - mutex_exit(&dict_sys->mutex); - - /* Set all block->index = NULL. */ - buf_pool_clear_hash_index(); - - /* Clear the adaptive hash index. */ - for (i = 0; i < btr_search_index_num; i++) { - hash_table_clear(btr_search_sys->hash_tables[i]); - mem_heap_empty(btr_search_sys->hash_tables[i]->heap); - } - - btr_search_x_unlock_all(); -} - -/********************************************************************//** -Enable the adaptive hash search system. */ -UNIV_INTERN -void -btr_search_enable(void) -/*====================*/ -{ - btr_search_x_lock_all(); - - btr_search_enabled = TRUE; - - btr_search_x_unlock_all(); -} - -/*****************************************************************//** -Creates and initializes a search info struct. -@return own: search info struct */ -UNIV_INTERN -btr_search_t* -btr_search_info_create( -/*===================*/ - mem_heap_t* heap) /*!< in: heap where created */ -{ - btr_search_t* info; - - info = (btr_search_t*) mem_heap_alloc(heap, sizeof(btr_search_t)); - -#ifdef UNIV_DEBUG - info->magic_n = BTR_SEARCH_MAGIC_N; -#endif /* UNIV_DEBUG */ - - info->ref_count = 0; - info->root_guess = NULL; - - info->hash_analysis = 0; - info->n_hash_potential = 0; - - info->last_hash_succ = FALSE; - -#ifdef UNIV_SEARCH_PERF_STAT - info->n_hash_succ = 0; - info->n_hash_fail = 0; - info->n_patt_succ = 0; - info->n_searches = 0; -#endif /* UNIV_SEARCH_PERF_STAT */ - - /* Set some sensible values */ - info->n_fields = 1; - info->n_bytes = 0; - - info->left_side = TRUE; - - return(info); -} - -/*****************************************************************//** -Returns the value of ref_count. The value is protected by -the latch of the AHI partition corresponding to this index. -@return ref_count value. */ -UNIV_INTERN -ulint -btr_search_info_get_ref_count( -/*==========================*/ - btr_search_t* info, /*!< in: search info. */ - dict_index_t* index) /*!< in: index */ -{ - ulint ret; - - ut_ad(info); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_s_lock(btr_search_get_latch(index)); - ret = info->ref_count; - rw_lock_s_unlock(btr_search_get_latch(index)); - - return(ret); -} - -/*********************************************************************//** -Updates the search info of an index about hash successes. NOTE that info -is NOT protected by any semaphore, to save CPU time! Do not assume its fields -are consistent. */ -static -void -btr_search_info_update_hash( -/*========================*/ - btr_search_t* info, /*!< in/out: search info */ - btr_cur_t* cursor) /*!< in: cursor which was just positioned */ -{ - dict_index_t* index = cursor->index; - ulint n_unique; - int cmp; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (dict_index_is_ibuf(index)) { - /* So many deletes are performed on an insert buffer tree - that we do not consider a hash index useful on it: */ - - return; - } - - n_unique = dict_index_get_n_unique_in_tree(index); - - if (info->n_hash_potential == 0) { - - goto set_new_recomm; - } - - /* Test if the search would have succeeded using the recommended - hash prefix */ - - if (info->n_fields >= n_unique && cursor->up_match >= n_unique) { -increment_potential: - info->n_hash_potential++; - - return; - } - - cmp = ut_pair_cmp(info->n_fields, info->n_bytes, - cursor->low_match, cursor->low_bytes); - - if (info->left_side ? cmp <= 0 : cmp > 0) { - - goto set_new_recomm; - } - - cmp = ut_pair_cmp(info->n_fields, info->n_bytes, - cursor->up_match, cursor->up_bytes); - - if (info->left_side ? cmp <= 0 : cmp > 0) { - - goto increment_potential; - } - -set_new_recomm: - /* We have to set a new recommendation; skip the hash analysis - for a while to avoid unnecessary CPU time usage when there is no - chance for success */ - - info->hash_analysis = 0; - - cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes, - cursor->low_match, cursor->low_bytes); - if (cmp == 0) { - info->n_hash_potential = 0; - - /* For extra safety, we set some sensible values here */ - - info->n_fields = 1; - info->n_bytes = 0; - - info->left_side = TRUE; - - } else if (cmp > 0) { - info->n_hash_potential = 1; - - if (cursor->up_match >= n_unique) { - - info->n_fields = n_unique; - info->n_bytes = 0; - - } else if (cursor->low_match < cursor->up_match) { - - info->n_fields = cursor->low_match + 1; - info->n_bytes = 0; - } else { - info->n_fields = cursor->low_match; - info->n_bytes = cursor->low_bytes + 1; - } - - info->left_side = TRUE; - } else { - info->n_hash_potential = 1; - - if (cursor->low_match >= n_unique) { - - info->n_fields = n_unique; - info->n_bytes = 0; - - } else if (cursor->low_match > cursor->up_match) { - - info->n_fields = cursor->up_match + 1; - info->n_bytes = 0; - } else { - info->n_fields = cursor->up_match; - info->n_bytes = cursor->up_bytes + 1; - } - - info->left_side = FALSE; - } -} - -/*********************************************************************//** -Updates the block search info on hash successes. NOTE that info and -block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any -semaphore, to save CPU time! Do not assume the fields are consistent. -@return TRUE if building a (new) hash index on the block is recommended */ -static -ibool -btr_search_update_block_hash_info( -/*==============================*/ - btr_search_t* info, /*!< in: search info */ - buf_block_t* block, /*!< in: buffer block */ - btr_cur_t* cursor MY_ATTRIBUTE((unused))) - /*!< in: cursor */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index), - RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index), - RW_LOCK_EX)); - ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED) - || rw_lock_own(&block->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(cursor); - - info->last_hash_succ = FALSE; - - ut_a(buf_block_state_valid(block)); - ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N); - - if ((block->n_hash_helps > 0) - && (info->n_hash_potential > 0) - && (block->n_fields == info->n_fields) - && (block->n_bytes == info->n_bytes) - && (block->left_side == info->left_side)) { - - if ((block->index) - && (block->curr_n_fields == info->n_fields) - && (block->curr_n_bytes == info->n_bytes) - && (block->curr_left_side == info->left_side)) { - - /* The search would presumably have succeeded using - the hash index */ - - info->last_hash_succ = TRUE; - } - - block->n_hash_helps++; - } else { - block->n_hash_helps = 1; - block->n_fields = info->n_fields; - block->n_bytes = info->n_bytes; - block->left_side = info->left_side; - } - -#ifdef UNIV_DEBUG - if (cursor->index->table->does_not_fit_in_memory) { - block->n_hash_helps = 0; - } -#endif /* UNIV_DEBUG */ - - if ((block->n_hash_helps > page_get_n_recs(block->frame) - / BTR_SEARCH_PAGE_BUILD_LIMIT) - && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) { - - if ((!block->index) - || (block->n_hash_helps - > 2 * page_get_n_recs(block->frame)) - || (block->n_fields != block->curr_n_fields) - || (block->n_bytes != block->curr_n_bytes) - || (block->left_side != block->curr_left_side)) { - - /* Build a new hash index on the page */ - - return(TRUE); - } - } - - return(FALSE); -} - -/*********************************************************************//** -Updates a hash node reference when it has been unsuccessfully used in a -search which could have succeeded with the used hash parameters. This can -happen because when building a hash index for a page, we do not check -what happens at page boundaries, and therefore there can be misleading -hash nodes. Also, collisions in the fold value can lead to misleading -references. This function lazily fixes these imperfections in the hash -index. */ -static -void -btr_search_update_hash_ref( -/*=======================*/ - btr_search_t* info, /*!< in: search info */ - buf_block_t* block, /*!< in: buffer block where cursor positioned */ - btr_cur_t* cursor) /*!< in: cursor */ -{ - dict_index_t* index; - ulint fold; - const rec_t* rec; - - ut_ad(cursor->flag == BTR_CUR_HASH_FAIL); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(btr_search_get_latch(cursor->index), - RW_LOCK_EX)); - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) - || rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(page_align(btr_cur_get_rec(cursor)) - == buf_block_get_frame(block)); - - index = block->index; - - if (!index) { - - return; - } - - ut_a(index == cursor->index); - ut_a(!dict_index_is_ibuf(index)); - - if ((info->n_hash_potential > 0) - && (block->curr_n_fields == info->n_fields) - && (block->curr_n_bytes == info->n_bytes) - && (block->curr_left_side == info->left_side)) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - - rec = btr_cur_get_rec(cursor); - - if (!page_rec_is_user_rec(rec)) { - - return; - } - - fold = rec_fold(rec, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), - block->curr_n_fields, - block->curr_n_bytes, index->id); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(btr_search_get_latch(cursor->index), - RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ha_insert_for_fold(btr_search_get_hash_table(cursor->index), - fold, block, rec); - - MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); - } -} - -/*********************************************************************//** -Updates the search info. */ -UNIV_INTERN -void -btr_search_info_update_slow( -/*========================*/ - btr_search_t* info, /*!< in/out: search info */ - btr_cur_t* cursor) /*!< in: cursor which was just positioned */ -{ - buf_block_t* block; - ibool build_index; - ulint* params; - ulint* params2; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index), - RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index), - RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - block = btr_cur_get_block(cursor); - - SRV_CORRUPT_TABLE_CHECK(block, return;); - - /* NOTE that the following two function calls do NOT protect - info or block->n_fields etc. with any semaphore, to save CPU time! - We cannot assume the fields are consistent when we return from - those functions! */ - - btr_search_info_update_hash(info, cursor); - - build_index = btr_search_update_block_hash_info(info, block, cursor); - - if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) { - - btr_search_check_free_space_in_heap(cursor->index); - } - - if (cursor->flag == BTR_CUR_HASH_FAIL) { - /* Update the hash node reference, if appropriate */ - - rw_lock_x_lock(btr_search_get_latch(cursor->index)); - - btr_search_update_hash_ref(info, block, cursor); - - rw_lock_x_unlock(btr_search_get_latch(cursor->index)); - } - - if (build_index) { - /* Note that since we did not protect block->n_fields etc. - with any semaphore, the values can be inconsistent. We have - to check inside the function call that they make sense. We - also malloc an array and store the values there to make sure - the compiler does not let the function call parameters change - inside the called function. It might be that the compiler - would optimize the call just to pass pointers to block. */ - - params = (ulint*) mem_alloc(3 * sizeof(ulint)); - params[0] = block->n_fields; - params[1] = block->n_bytes; - params[2] = block->left_side; - - /* Make sure the compiler cannot deduce the values and do - optimizations */ - - params2 = params + btr_search_this_is_zero; - - btr_search_build_page_hash_index(cursor->index, - block, - params2[0], - params2[1], - params2[2]); - mem_free(params); - } -} - -/******************************************************************//** -Checks if a guessed position for a tree cursor is right. Note that if -mode is PAGE_CUR_LE, which is used in inserts, and the function returns -TRUE, then cursor->up_match and cursor->low_match both have sensible values. -@return TRUE if success */ -static -ibool -btr_search_check_guess( -/*===================*/ - btr_cur_t* cursor, /*!< in: guessed cursor position */ - ibool can_only_compare_to_cursor_rec, - /*!< in: if we do not have a latch on the page - of cursor, but only a latch on - btr_search_latch, then ONLY the columns - of the record UNDER the cursor are - protected, not the next or previous record - in the chain: we cannot look at the next or - previous record to check our guess! */ - const dtuple_t* tuple, /*!< in: data tuple */ - ulint mode, /*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, - or PAGE_CUR_GE */ - mtr_t* mtr) /*!< in: mtr */ -{ - rec_t* rec; - ulint n_unique; - ulint match; - ulint bytes; - int cmp; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - ibool success = FALSE; - rec_offs_init(offsets_); - - n_unique = dict_index_get_n_unique_in_tree(cursor->index); - - rec = btr_cur_get_rec(cursor); - - ut_ad(page_rec_is_user_rec(rec)); - - match = 0; - bytes = 0; - - offsets = rec_get_offsets(rec, cursor->index, offsets, - n_unique, &heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, - offsets, &match, &bytes); - - if (mode == PAGE_CUR_GE) { - if (cmp == 1) { - goto exit_func; - } - - cursor->up_match = match; - - if (match >= n_unique) { - success = TRUE; - goto exit_func; - } - } else if (mode == PAGE_CUR_LE) { - if (cmp == -1) { - goto exit_func; - } - - cursor->low_match = match; - - } else if (mode == PAGE_CUR_G) { - if (cmp != -1) { - goto exit_func; - } - } else if (mode == PAGE_CUR_L) { - if (cmp != 1) { - goto exit_func; - } - } - - if (can_only_compare_to_cursor_rec) { - /* Since we could not determine if our guess is right just by - looking at the record under the cursor, return FALSE */ - goto exit_func; - } - - match = 0; - bytes = 0; - - if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) { - rec_t* prev_rec; - - ut_ad(!page_rec_is_infimum(rec)); - - prev_rec = page_rec_get_prev(rec); - - if (page_rec_is_infimum(prev_rec)) { - success = btr_page_get_prev(page_align(prev_rec), mtr) - == FIL_NULL; - - goto exit_func; - } - - offsets = rec_get_offsets(prev_rec, cursor->index, offsets, - n_unique, &heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec, - offsets, &match, &bytes); - if (mode == PAGE_CUR_GE) { - success = cmp == 1; - } else { - success = cmp != -1; - } - - goto exit_func; - } else { - rec_t* next_rec; - - ut_ad(!page_rec_is_supremum(rec)); - - next_rec = page_rec_get_next(rec); - - if (page_rec_is_supremum(next_rec)) { - if (btr_page_get_next(page_align(next_rec), mtr) - == FIL_NULL) { - - cursor->up_match = 0; - success = TRUE; - } - - goto exit_func; - } - - offsets = rec_get_offsets(next_rec, cursor->index, offsets, - n_unique, &heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, - offsets, &match, &bytes); - if (mode == PAGE_CUR_LE) { - success = cmp == -1; - cursor->up_match = match; - } else { - success = cmp != 1; - } - } -exit_func: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(success); -} - -/******************************************************************//** -Tries to guess the right search position based on the hash search info -of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts, -and the function returns TRUE, then cursor->up_match and cursor->low_match -both have sensible values. -@return TRUE if succeeded */ -UNIV_INTERN -ibool -btr_search_guess_on_hash( -/*=====================*/ - dict_index_t* index, /*!< in: index */ - btr_search_t* info, /*!< in: index search info */ - const dtuple_t* tuple, /*!< in: logical record */ - ulint mode, /*!< in: PAGE_CUR_L, ... */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ...; - NOTE that only if has_search_latch - is 0, we will have a latch set on - the cursor page, otherwise we assume - the caller uses his search latch - to protect the record! */ - btr_cur_t* cursor, /*!< out: tree cursor */ - ulint has_search_latch,/*!< in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, RW_X_LATCH, or 0 */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_pool_t* buf_pool; - buf_block_t* block; - const rec_t* rec; - ulint fold; - index_id_t index_id; -#ifdef notdefined - btr_cur_t cursor2; - btr_pcur_t pcur; -#endif - ut_ad(index && info && tuple && cursor && mtr); - ut_ad(!dict_index_is_ibuf(index)); - ut_ad((latch_mode == BTR_SEARCH_LEAF) - || (latch_mode == BTR_MODIFY_LEAF)); - - /* Note that, for efficiency, the struct info may not be protected by - any latch here! */ - - if (UNIV_UNLIKELY(info->n_hash_potential == 0)) { - - return(FALSE); - } - - cursor->n_fields = info->n_fields; - cursor->n_bytes = info->n_bytes; - - if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple) - < cursor->n_fields + (cursor->n_bytes > 0))) { - - return(FALSE); - } - - index_id = index->id; - -#ifdef UNIV_SEARCH_PERF_STAT - info->n_hash_succ++; -#endif - fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id); - - cursor->fold = fold; - cursor->flag = BTR_CUR_HASH; - - if (UNIV_LIKELY(!has_search_latch)) { - rw_lock_s_lock(btr_search_get_latch(index)); - - if (UNIV_UNLIKELY(!btr_search_enabled)) { - goto failure_unlock; - } - } - - ut_ad(rw_lock_get_writer(btr_search_get_latch(index)) != RW_LOCK_EX); - ut_ad(rw_lock_get_reader_count(btr_search_get_latch(index)) > 0); - - rec = (rec_t*) ha_search_and_get_data( - btr_search_get_hash_table(index), fold); - - if (UNIV_UNLIKELY(!rec)) { - goto failure_unlock; - } - - block = buf_block_align(rec); - - if (UNIV_LIKELY(!has_search_latch)) { - - if (UNIV_UNLIKELY( - !buf_page_get_known_nowait(latch_mode, block, - BUF_MAKE_YOUNG, - __FILE__, __LINE__, - mtr))) { - goto failure_unlock; - } - - rw_lock_s_unlock(btr_search_get_latch(index)); - - buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH); - } - - if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) { - ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH); - - if (UNIV_LIKELY(!has_search_latch)) { - - btr_leaf_page_release(block, latch_mode, mtr); - } - - goto failure; - } - - ut_ad(page_rec_is_user_rec(rec)); - - btr_cur_position(index, (rec_t*) rec, block, cursor); - - /* Check the validity of the guess within the page */ - - /* If we only have the latch on btr_search_latch, not on the - page, it only protects the columns of the record the cursor - is positioned on. We cannot look at the next of the previous - record to determine if our guess for the cursor position is - right. */ - if (UNIV_UNLIKELY(index_id != btr_page_get_index_id(block->frame)) - || !btr_search_check_guess(cursor, - has_search_latch, - tuple, mode, mtr)) { - if (UNIV_LIKELY(!has_search_latch)) { - btr_leaf_page_release(block, latch_mode, mtr); - } - - goto failure; - } - - if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) { - - info->n_hash_potential++; - } - -#ifdef notdefined - /* These lines of code can be used in a debug version to check - the correctness of the searched cursor position: */ - - info->last_hash_succ = FALSE; - - /* Currently, does not work if the following fails: */ - ut_ad(!has_search_latch); - - btr_leaf_page_release(block, latch_mode, mtr); - - btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - &cursor2, 0, mtr); - if (mode == PAGE_CUR_GE - && page_rec_is_supremum(btr_cur_get_rec(&cursor2))) { - - /* If mode is PAGE_CUR_GE, then the binary search - in the index tree may actually take us to the supremum - of the previous page */ - - info->last_hash_succ = FALSE; - - btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode, - &pcur, mtr); - ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor)); - } else { - ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor)); - } - - /* NOTE that it is theoretically possible that the above assertions - fail if the page of the cursor gets removed from the buffer pool - meanwhile! Thus it might not be a bug. */ -#endif - info->last_hash_succ = TRUE; - -#ifdef UNIV_SEARCH_PERF_STAT -#endif - if (UNIV_LIKELY(!has_search_latch) - && buf_page_peek_if_too_old(&block->page)) { - - buf_page_make_young(&block->page); - } - - /* Increment the page get statistics though we did not really - fix the page: for user info only */ - buf_pool = buf_pool_from_bpage(&block->page); - buf_pool->stat.n_page_gets++; - - return(TRUE); - - /*-------------------------------------------*/ -failure_unlock: - if (UNIV_LIKELY(!has_search_latch)) { - rw_lock_s_unlock(btr_search_get_latch(index)); - } -failure: - cursor->flag = BTR_CUR_HASH_FAIL; - -#ifdef UNIV_SEARCH_PERF_STAT - info->n_hash_fail++; - - if (info->n_hash_succ > 0) { - info->n_hash_succ--; - } -#endif - info->last_hash_succ = FALSE; - - return(FALSE); -} - -/********************************************************************//** -Drops a page hash index. */ -UNIV_INTERN -void -btr_search_drop_page_hash_index( -/*============================*/ - buf_block_t* block) /*!< in: block containing index page, - s- or x-latched, or an index page - for which we know that - block->buf_fix_count == 0 or it is an - index page which has already been - removed from the buf_pool->page_hash - i.e.: it is in state - BUF_BLOCK_REMOVE_HASH */ -{ - hash_table_t* table; - ulint n_fields; - ulint n_bytes; - const page_t* page; - const rec_t* rec; - ulint fold; - ulint prev_fold; - index_id_t index_id; - ulint n_cached; - ulint n_recs; - ulint* folds; - ulint i; - mem_heap_t* heap; - const dict_index_t* index; - ulint* offsets; - btr_search_t* info; - -retry: - /* Do a dirty check on block->index, return if the block is not in the - adaptive hash index. This is to avoid acquiring an AHI latch for - performance considerations. */ - - index = block->index; - if (!index) { - - return; - } - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - rw_lock_s_lock(btr_search_get_latch(index)); - - if (UNIV_UNLIKELY(index != block->index)) { - - rw_lock_s_unlock(btr_search_get_latch(index)); - - goto retry; - } - - ut_a(!dict_index_is_ibuf(index)); -#ifdef UNIV_DEBUG - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_CREATION: - /* The index is being created (bulk loaded). */ - case ONLINE_INDEX_COMPLETE: - /* The index has been published. */ - case ONLINE_INDEX_ABORTED: - /* Either the index creation was aborted due to an - error observed by InnoDB (in which case there should - not be any adaptive hash index entries), or it was - completed and then flagged aborted in - rollback_inplace_alter_table(). */ - break; - case ONLINE_INDEX_ABORTED_DROPPED: - /* The index should have been dropped from the tablespace - already, and the adaptive hash index entries should have - been dropped as well. */ - ut_error; - } -#endif /* UNIV_DEBUG */ - - table = btr_search_get_hash_table(index); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) - || rw_lock_own(&(block->lock), RW_LOCK_EX) - || block->page.buf_fix_count == 0 - || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH); -#endif /* UNIV_SYNC_DEBUG */ - - n_fields = block->curr_n_fields; - n_bytes = block->curr_n_bytes; - - /* NOTE: The fields of block must not be accessed after - releasing btr_search_latch, as the index page might only - be s-latched! */ - - rw_lock_s_unlock(btr_search_get_latch(index)); - - ut_a(n_fields + n_bytes > 0); - - page = block->frame; - n_recs = page_get_n_recs(page); - - /* Calculate and cache fold values into an array for fast deletion - from the hash index */ - - folds = (ulint*) mem_alloc(n_recs * sizeof(ulint)); - - n_cached = 0; - - rec = page_get_infimum_rec(page); - rec = page_rec_get_next_low(rec, page_is_comp(page)); - - index_id = btr_page_get_index_id(page); - - ut_a(index_id == index->id); - - prev_fold = 0; - - heap = NULL; - offsets = NULL; - - while (!page_rec_is_supremum(rec)) { - offsets = rec_get_offsets(rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0)); - fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); - - if (fold == prev_fold && prev_fold != 0) { - - goto next_rec; - } - - /* Remove all hash nodes pointing to this page from the - hash chain */ - - folds[n_cached] = fold; - n_cached++; -next_rec: - rec = page_rec_get_next_low(rec, page_rec_is_comp(rec)); - prev_fold = fold; - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - rw_lock_x_lock(btr_search_get_latch(index)); - - if (UNIV_UNLIKELY(!block->index)) { - /* Someone else has meanwhile dropped the hash index */ - - goto cleanup; - } - - ut_a(block->index == index); - - if (UNIV_UNLIKELY(block->curr_n_fields != n_fields) - || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) { - - /* Someone else has meanwhile built a new hash index on the - page, with different parameters */ - - rw_lock_x_unlock(btr_search_get_latch(index)); - - mem_free(folds); - goto retry; - } - - for (i = 0; i < n_cached; i++) { - - ha_remove_all_nodes_to_page(table, folds[i], page); - } - - info = btr_search_get_info(block->index); - ut_a(info->ref_count > 0); - info->ref_count--; - - block->index = NULL; - - MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_REMOVED); - MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_REMOVED, n_cached); - -cleanup: -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (UNIV_UNLIKELY(block->n_pointers)) { - /* Corruption */ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Corruption of adaptive hash index." - " After dropping\n" - "InnoDB: the hash index to a page of %s," - " still %lu hash nodes remain.\n", - index->name, (ulong) block->n_pointers); - rw_lock_x_unlock(btr_search_get_latch(index)); - - ut_ad(btr_search_validate()); - } else { - rw_lock_x_unlock(btr_search_get_latch(index)); - } -#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - rw_lock_x_unlock(btr_search_get_latch(index)); -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - - mem_free(folds); -} - -/********************************************************************//** -Drops a possible page hash index when a page is evicted from the buffer pool -or freed in a file segment. */ -UNIV_INTERN -void -btr_search_drop_page_hash_when_freed( -/*=================================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no) /*!< in: page number */ -{ - buf_block_t* block; - mtr_t mtr; - - mtr_start(&mtr); - - /* If the caller has a latch on the page, then the caller must - have a x-latch on the page and it must have already dropped - the hash index for the page. Because of the x-latch that we - are possibly holding, we cannot s-latch the page, but must - (recursively) x-latch it, even though we are only reading. */ - - block = buf_page_get_gen(space, zip_size, page_no, RW_X_LATCH, NULL, - BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__, - &mtr); - - if (block && block->index) { - - buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH); - - btr_search_drop_page_hash_index(block); - } - - mtr_commit(&mtr); -} - -/********************************************************************//** -Builds a hash index on a page with the given parameters. If the page already -has a hash index with different parameters, the old hash index is removed. -If index is non-NULL, this function checks if n_fields and n_bytes are -sensible values, and does not build a hash index if not. */ -static -void -btr_search_build_page_hash_index( -/*=============================*/ - dict_index_t* index, /*!< in: index for which to build */ - buf_block_t* block, /*!< in: index page, s- or x-latched */ - ulint n_fields,/*!< in: hash this many full fields */ - ulint n_bytes,/*!< in: hash this many bytes from the next - field */ - ibool left_side)/*!< in: hash for searches from left side? */ -{ - hash_table_t* table; - page_t* page; - rec_t* rec; - rec_t* next_rec; - ulint fold; - ulint next_fold; - ulint n_cached; - ulint n_recs; - ulint* folds; - rec_t** recs; - ulint i; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(index); - ut_a(!dict_index_is_ibuf(index)); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX)); - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) - || rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_s_lock(btr_search_get_latch(index)); - - if (!btr_search_enabled) { - rw_lock_s_unlock(btr_search_get_latch(index)); - return; - } - - table = btr_search_get_hash_table(index); - page = buf_block_get_frame(block); - - if (block->index && ((block->curr_n_fields != n_fields) - || (block->curr_n_bytes != n_bytes) - || (block->curr_left_side != left_side))) { - - rw_lock_s_unlock(btr_search_get_latch(index)); - - btr_search_drop_page_hash_index(block); - } else { - rw_lock_s_unlock(btr_search_get_latch(index)); - } - - n_recs = page_get_n_recs(page); - - if (n_recs == 0) { - - return; - } - - /* Check that the values for hash index build are sensible */ - - if (n_fields + n_bytes == 0) { - - return; - } - - if (dict_index_get_n_unique_in_tree(index) < n_fields - || (dict_index_get_n_unique_in_tree(index) == n_fields - && n_bytes > 0)) { - return; - } - - /* Calculate and cache fold values and corresponding records into - an array for fast insertion to the hash index */ - - folds = (ulint*) mem_alloc(n_recs * sizeof(ulint)); - recs = (rec_t**) mem_alloc(n_recs * sizeof(rec_t*)); - - n_cached = 0; - - ut_a(index->id == btr_page_get_index_id(page)); - - rec = page_rec_get_next(page_get_infimum_rec(page)); - - offsets = rec_get_offsets(rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - - if (!page_rec_is_supremum(rec)) { - ut_a(n_fields <= rec_offs_n_fields(offsets)); - - if (n_bytes > 0) { - ut_a(n_fields < rec_offs_n_fields(offsets)); - } - } - - fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id); - - if (left_side) { - - folds[n_cached] = fold; - recs[n_cached] = rec; - n_cached++; - } - - for (;;) { - next_rec = page_rec_get_next(rec); - - if (page_rec_is_supremum(next_rec)) { - - if (!left_side) { - - folds[n_cached] = fold; - recs[n_cached] = rec; - n_cached++; - } - - break; - } - - offsets = rec_get_offsets(next_rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - next_fold = rec_fold(next_rec, offsets, n_fields, - n_bytes, index->id); - - if (fold != next_fold) { - /* Insert an entry into the hash index */ - - if (left_side) { - - folds[n_cached] = next_fold; - recs[n_cached] = next_rec; - n_cached++; - } else { - folds[n_cached] = fold; - recs[n_cached] = rec; - n_cached++; - } - } - - rec = next_rec; - fold = next_fold; - } - - btr_search_check_free_space_in_heap(index); - - rw_lock_x_lock(btr_search_get_latch(index)); - - if (UNIV_UNLIKELY(!btr_search_enabled)) { - goto exit_func; - } - - if (block->index && ((block->curr_n_fields != n_fields) - || (block->curr_n_bytes != n_bytes) - || (block->curr_left_side != left_side))) { - goto exit_func; - } - - /* This counter is decremented every time we drop page - hash index entries and is incremented here. Since we can - rebuild hash index for a page that is already hashed, we - have to take care not to increment the counter in that - case. */ - if (!block->index) { - index->search_info->ref_count++; - } - - block->n_hash_helps = 0; - - block->curr_n_fields = n_fields; - block->curr_n_bytes = n_bytes; - block->curr_left_side = left_side; - block->index = index; - - for (i = 0; i < n_cached; i++) { - - ha_insert_for_fold(table, folds[i], block, recs[i]); - } - - MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED); - MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached); -exit_func: - rw_lock_x_unlock(btr_search_get_latch(index)); - - mem_free(folds); - mem_free(recs); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/********************************************************************//** -Moves or deletes hash entries for moved records. If new_page is already hashed, -then the hash index for page, if any, is dropped. If new_page is not hashed, -and page is hashed, then a new hash index is built to new_page with the same -parameters as page (this often happens when a page is split). */ -UNIV_INTERN -void -btr_search_move_or_delete_hash_entries( -/*===================================*/ - buf_block_t* new_block, /*!< in: records are copied - to this page */ - buf_block_t* block, /*!< in: index page from which - records were copied, and the - copied records will be deleted - from this page */ - dict_index_t* index) /*!< in: record descriptor */ -{ - ulint n_fields; - ulint n_bytes; - ibool left_side; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); - ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_s_lock(btr_search_get_latch(index)); - - ut_a(!new_block->index || new_block->index == index); - ut_a(!block->index || block->index == index); - ut_a(!(new_block->index || block->index) - || !dict_index_is_ibuf(index)); - - if (new_block->index) { - - rw_lock_s_unlock(btr_search_get_latch(index)); - - btr_search_drop_page_hash_index(block); - - return; - } - - if (block->index) { - - n_fields = block->curr_n_fields; - n_bytes = block->curr_n_bytes; - left_side = block->curr_left_side; - - new_block->n_fields = block->curr_n_fields; - new_block->n_bytes = block->curr_n_bytes; - new_block->left_side = left_side; - - rw_lock_s_unlock(btr_search_get_latch(index)); - - ut_a(n_fields + n_bytes > 0); - - btr_search_build_page_hash_index(index, new_block, n_fields, - n_bytes, left_side); - ut_ad(n_fields == block->curr_n_fields); - ut_ad(n_bytes == block->curr_n_bytes); - ut_ad(left_side == block->curr_left_side); - return; - } - - rw_lock_s_unlock(btr_search_get_latch(index)); -} - -/********************************************************************//** -Updates the page hash index when a single record is deleted from a page. */ -UNIV_INTERN -void -btr_search_update_hash_on_delete( -/*=============================*/ - btr_cur_t* cursor) /*!< in: cursor which was positioned on the - record to delete using btr_cur_search_..., - the record is not yet deleted */ -{ - hash_table_t* table; - buf_block_t* block; - const rec_t* rec; - ulint fold; - dict_index_t* index; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - mem_heap_t* heap = NULL; - rec_offs_init(offsets_); - - block = btr_cur_get_block(cursor); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - index = block->index; - - if (!index) { - - return; - } - - ut_a(index == cursor->index); - ut_a(block->curr_n_fields + block->curr_n_bytes > 0); - ut_a(!dict_index_is_ibuf(index)); - - table = btr_search_get_hash_table(cursor->index); - - rec = btr_cur_get_rec(cursor); - - fold = rec_fold(rec, rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), - block->curr_n_fields, block->curr_n_bytes, index->id); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - rw_lock_x_lock(btr_search_get_latch(cursor->index)); - - if (block->index) { - ut_a(block->index == index); - - if (ha_search_and_delete_if_found(table, fold, rec)) { - MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_REMOVED); - } else { - MONITOR_INC( - MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND); - } - } - - rw_lock_x_unlock(btr_search_get_latch(cursor->index)); -} - -/********************************************************************//** -Updates the page hash index when a single record is inserted on a page. */ -UNIV_INTERN -void -btr_search_update_hash_node_on_insert( -/*==================================*/ - btr_cur_t* cursor) /*!< in: cursor which was positioned to the - place to insert using btr_cur_search_..., - and the new record has been inserted next - to the cursor */ -{ - hash_table_t* table; - buf_block_t* block; - dict_index_t* index; - rec_t* rec; - - rec = btr_cur_get_rec(cursor); - - block = btr_cur_get_block(cursor); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - index = block->index; - - if (!index) { - - return; - } - - ut_a(cursor->index == index); - ut_a(!dict_index_is_ibuf(index)); - - rw_lock_x_lock(btr_search_get_latch(cursor->index)); - - if (!block->index) { - - goto func_exit; - } - - ut_a(block->index == index); - - if ((cursor->flag == BTR_CUR_HASH) - && (cursor->n_fields == block->curr_n_fields) - && (cursor->n_bytes == block->curr_n_bytes) - && !block->curr_left_side) { - - table = btr_search_get_hash_table(cursor->index); - - if (ha_search_and_update_if_found( - table, cursor->fold, rec, block, - page_rec_get_next(rec))) { - MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_UPDATED); - } - -func_exit: - rw_lock_x_unlock(btr_search_get_latch(cursor->index)); - } else { - rw_lock_x_unlock(btr_search_get_latch(cursor->index)); - - btr_search_update_hash_on_insert(cursor); - } -} - -/********************************************************************//** -Updates the page hash index when a single record is inserted on a page. */ -UNIV_INTERN -void -btr_search_update_hash_on_insert( -/*=============================*/ - btr_cur_t* cursor) /*!< in: cursor which was positioned to the - place to insert using btr_cur_search_..., - and the new record has been inserted next - to the cursor */ -{ - hash_table_t* table; - buf_block_t* block; - dict_index_t* index; - const rec_t* rec; - const rec_t* ins_rec; - const rec_t* next_rec; - ulint fold; - ulint ins_fold; - ulint next_fold = 0; /* remove warning (??? bug ???) */ - ulint n_fields; - ulint n_bytes; - ibool left_side; - ibool locked = FALSE; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - block = btr_cur_get_block(cursor); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - index = block->index; - - if (!index) { - - return; - } - - btr_search_check_free_space_in_heap(cursor->index); - - table = btr_search_get_hash_table(cursor->index); - - rec = btr_cur_get_rec(cursor); - - ut_a(index == cursor->index); - ut_a(!dict_index_is_ibuf(index)); - - n_fields = block->curr_n_fields; - n_bytes = block->curr_n_bytes; - left_side = block->curr_left_side; - - ins_rec = page_rec_get_next_const(rec); - next_rec = page_rec_get_next_const(ins_rec); - - offsets = rec_get_offsets(ins_rec, index, offsets, - ULINT_UNDEFINED, &heap); - ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index->id); - - if (!page_rec_is_supremum(next_rec)) { - offsets = rec_get_offsets(next_rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - next_fold = rec_fold(next_rec, offsets, n_fields, - n_bytes, index->id); - } - - if (!page_rec_is_infimum(rec)) { - offsets = rec_get_offsets(rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id); - } else { - if (left_side) { - - rw_lock_x_lock(btr_search_get_latch(index)); - - locked = TRUE; - - if (!btr_search_enabled) { - goto function_exit; - } - - ha_insert_for_fold(table, ins_fold, block, ins_rec); - } - - goto check_next_rec; - } - - if (fold != ins_fold) { - - if (!locked) { - - rw_lock_x_lock(btr_search_get_latch(index)); - - locked = TRUE; - - if (!btr_search_enabled) { - goto function_exit; - } - } - - if (!left_side) { - ha_insert_for_fold(table, fold, block, rec); - } else { - ha_insert_for_fold(table, ins_fold, block, ins_rec); - } - } - -check_next_rec: - if (page_rec_is_supremum(next_rec)) { - - if (!left_side) { - - if (!locked) { - rw_lock_x_lock(btr_search_get_latch(index)); - - locked = TRUE; - - if (!btr_search_enabled) { - goto function_exit; - } - } - - ha_insert_for_fold(table, ins_fold, block, ins_rec); - } - - goto function_exit; - } - - if (ins_fold != next_fold) { - - if (!locked) { - - rw_lock_x_lock(btr_search_get_latch(index)); - - locked = TRUE; - - if (!btr_search_enabled) { - goto function_exit; - } - } - - if (!left_side) { - - ha_insert_for_fold(table, ins_fold, block, ins_rec); - /* - fputs("Hash insert for ", stderr); - dict_index_name_print(stderr, index); - fprintf(stderr, " fold %lu\n", ins_fold); - */ - } else { - ha_insert_for_fold(table, next_fold, block, next_rec); - } - } - -function_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - if (locked) { - rw_lock_x_unlock(btr_search_get_latch(index)); - } -} - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG -/********************************************************************//** -Validates one hash table in the search system. -@return TRUE if ok */ -static -ibool -btr_search_validate_one_table( -/*==========================*/ - ulint t) -{ - ha_node_t* node; - ulint n_page_dumps = 0; - ibool ok = TRUE; - ulint i; - ulint cell_count; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - - /* How many cells to check before temporarily releasing - btr_search_latch. */ - ulint chunk_size = 10000; - - rec_offs_init(offsets_); - - cell_count = hash_get_n_cells(btr_search_sys->hash_tables[t]); - - for (i = 0; i < cell_count; i++) { - /* We release btr_search_latch every once in a while to - give other queries a chance to run. */ - if ((i != 0) && ((i % chunk_size) == 0)) { - btr_search_x_unlock_all(); - os_thread_yield(); - btr_search_x_lock_all(); - } - - node = (ha_node_t*) - hash_get_nth_cell(btr_search_sys->hash_tables[t], - i)->node; - - for (; node != NULL; node = node->next) { - buf_block_t* block - = buf_block_align((byte*) node->data); - const buf_block_t* hash_block; - buf_pool_t* buf_pool; - index_id_t page_index_id; - - buf_pool = buf_pool_from_bpage((buf_page_t *) block); - /* Prevent BUF_BLOCK_FILE_PAGE -> BUF_BLOCK_REMOVE_HASH - transition until we lock the block mutex */ - mutex_enter(&buf_pool->LRU_list_mutex); - - if (UNIV_LIKELY(buf_block_get_state(block) - == BUF_BLOCK_FILE_PAGE)) { - - /* The space and offset are only valid - for file blocks. It is possible that - the block is being freed - (BUF_BLOCK_REMOVE_HASH, see the - assertion and the comment below) */ - hash_block = buf_block_hash_get( - buf_pool, - buf_block_get_space(block), - buf_block_get_page_no(block)); - } else { - hash_block = NULL; - } - - if (hash_block) { - ut_a(hash_block == block); - } else { - /* When a block is being freed, - buf_LRU_search_and_free_block() first - removes the block from - buf_pool->page_hash by calling - buf_LRU_block_remove_hashed_page(). - After that, it invokes - btr_search_drop_page_hash_index() to - remove the block from - btr_search_sys->hash_index. */ - - ut_a(buf_block_get_state(block) - == BUF_BLOCK_REMOVE_HASH); - } - - mutex_enter(&block->mutex); - mutex_exit(&buf_pool->LRU_list_mutex); - - ut_a(!dict_index_is_ibuf(block->index)); - - page_index_id = btr_page_get_index_id(block->frame); - - offsets = rec_get_offsets(node->data, - block->index, offsets, - block->curr_n_fields - + (block->curr_n_bytes > 0), - &heap); - - if (!block->index || node->fold - != rec_fold(node->data, - offsets, - block->curr_n_fields, - block->curr_n_bytes, - page_index_id)) { - const page_t* page = block->frame; - - ok = FALSE; - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error in an adaptive hash" - " index pointer to page %lu\n" - "InnoDB: ptr mem address %p" - " index id %llu," - " node fold %lu, rec fold %lu\n", - (ulong) page_get_page_no(page), - node->data, - (ullint) page_index_id, - (ulong) node->fold, - (ulong) rec_fold(node->data, - offsets, - block->curr_n_fields, - block->curr_n_bytes, - page_index_id)); - - fputs("InnoDB: Record ", stderr); - rec_print_new(stderr, node->data, offsets); - fprintf(stderr, "\nInnoDB: on that page." - " Page mem address %p, is hashed %p," - " n fields %lu, n bytes %lu\n" - "InnoDB: side %lu\n", - (void*) page, (void*) block->index, - (ulong) block->curr_n_fields, - (ulong) block->curr_n_bytes, - (ulong) block->curr_left_side); - - if (n_page_dumps < 20) { - buf_page_print( - page, 0, - BUF_PAGE_PRINT_NO_CRASH); - n_page_dumps++; - } - } - - mutex_exit(&block->mutex); - } - } - - for (i = 0; i < cell_count; i += chunk_size) { - ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1); - - /* We release btr_search_latch every once in a while to - give other queries a chance to run. */ - if (i != 0) { - btr_search_x_unlock_all(); - os_thread_yield(); - btr_search_x_lock_all(); - } - - if (!ha_validate(btr_search_sys->hash_tables[t], i, - end_index)) { - ok = FALSE; - } - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return(ok); -} - -/********************************************************************//** -Validates the search system. -@return TRUE if ok */ -UNIV_INTERN -ibool -btr_search_validate(void) -/*=====================*/ -{ - ulint i; - ibool ok = TRUE; - - btr_search_x_lock_all(); - - for (i = 0; i < btr_search_index_num; i++) { - - if (!btr_search_validate_one_table(i)) - ok = FALSE; - } - - btr_search_x_unlock_all(); - - return(ok); -} - - -#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ diff --git a/storage/xtradb/buf/buf0buddy.cc b/storage/xtradb/buf/buf0buddy.cc deleted file mode 100644 index 2ee39c6c992..00000000000 --- a/storage/xtradb/buf/buf0buddy.cc +++ /dev/null @@ -1,741 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0buddy.cc -Binary buddy allocator for compressed pages - -Created December 2006 by Marko Makela -*******************************************************/ - -#define THIS_MODULE -#include "buf0buddy.h" -#ifdef UNIV_NONINL -# include "buf0buddy.ic" -#endif -#undef THIS_MODULE -#include "buf0buf.h" -#include "buf0lru.h" -#include "buf0flu.h" -#include "page0zip.h" -#include "srv0start.h" - -/** When freeing a buf we attempt to coalesce by looking at its buddy -and deciding whether it is free or not. To ascertain if the buddy is -free we look for BUF_BUDDY_STAMP_FREE at BUF_BUDDY_STAMP_OFFSET -within the buddy. The question is how we can be sure that it is -safe to look at BUF_BUDDY_STAMP_OFFSET. -The answer lies in following invariants: -* All blocks allocated by buddy allocator are used for compressed -page frame. -* A compressed table always have space_id < SRV_LOG_SPACE_FIRST_ID -* BUF_BUDDY_STAMP_OFFSET always points to the space_id field in -a frame. - -- The above is true because we look at these fields when the - corresponding buddy block is free which implies that: - * The block we are looking at must have an address aligned at - the same size that its free buddy has. For example, if we have - a free block of 8K then its buddy's address must be aligned at - 8K as well. - * It is possible that the block we are looking at may have been - further divided into smaller sized blocks but its starting - address must still remain the start of a page frame i.e.: it - cannot be middle of a block. For example, if we have a free - block of size 8K then its buddy may be divided into blocks - of, say, 1K, 1K, 2K, 4K but the buddy's address will still be - the starting address of first 1K compressed page. - * What is important to note is that for any given block, the - buddy's address cannot be in the middle of a larger block i.e.: - in above example, our 8K block cannot have a buddy whose address - is aligned on 8K but it is part of a larger 16K block. -*/ - -/** Offset within buf_buddy_free_t where free or non_free stamps -are written.*/ -#define BUF_BUDDY_STAMP_OFFSET FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID - -/** Value that we stamp on all buffers that are currently on the zip_free -list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */ -#define BUF_BUDDY_STAMP_FREE (SRV_LOG_SPACE_FIRST_ID) - -/** Stamp value for non-free buffers. Will be overwritten by a non-zero -value by the consumer of the block */ -#define BUF_BUDDY_STAMP_NONFREE (0XFFFFFFFF) - -#if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE -# error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE" -#endif - -/** Return type of buf_buddy_is_free() */ -enum buf_buddy_state_t { - BUF_BUDDY_STATE_FREE, /*!< If the buddy to completely free */ - BUF_BUDDY_STATE_USED, /*!< Buddy currently in used */ - BUF_BUDDY_STATE_PARTIALLY_USED/*!< Some sub-blocks in the buddy - are in use */ -}; - -#ifdef UNIV_DEBUG_VALGRIND -/**********************************************************************//** -Invalidate memory area that we won't access while page is free */ -UNIV_INLINE -void -buf_buddy_mem_invalid( -/*==================*/ - buf_buddy_free_t* buf, /*!< in: block to check */ - ulint i) /*!< in: index of zip_free[] */ -{ - const size_t size = BUF_BUDDY_LOW << i; - ut_ad(i <= BUF_BUDDY_SIZES); - - UNIV_MEM_ASSERT_W(buf, size); - UNIV_MEM_INVALID(buf, size); -} -#else /* UNIV_DEBUG_VALGRIND */ -# define buf_buddy_mem_invalid(buf, i) ut_ad((i) <= BUF_BUDDY_SIZES) -#endif /* UNIV_DEBUG_VALGRIND */ - -/**********************************************************************//** -Check if a buddy is stamped free. -@return whether the buddy is free */ -UNIV_INLINE MY_ATTRIBUTE((warn_unused_result)) -bool -buf_buddy_stamp_is_free( -/*====================*/ - const buf_buddy_free_t* buf) /*!< in: block to check */ -{ - return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET) - == BUF_BUDDY_STAMP_FREE); -} - -/**********************************************************************//** -Stamps a buddy free. */ -UNIV_INLINE -void -buf_buddy_stamp_free( -/*=================*/ - buf_buddy_free_t* buf, /*!< in/out: block to stamp */ - ulint i) /*!< in: block size */ -{ - ut_d(memset(buf, static_cast<int>(i), BUF_BUDDY_LOW << i)); - buf_buddy_mem_invalid(buf, i); - mach_write_to_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, - BUF_BUDDY_STAMP_FREE); - buf->stamp.size = i; -} - -/**********************************************************************//** -Stamps a buddy nonfree. -@param[in/out] buf block to stamp -@param[in] i block size */ -#define buf_buddy_stamp_nonfree(buf, i) do { \ - buf_buddy_mem_invalid(buf, i); \ - memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4); \ -} while (0) -#if BUF_BUDDY_STAMP_NONFREE != 0xffffffff -# error "BUF_BUDDY_STAMP_NONFREE != 0xffffffff" -#endif - -/**********************************************************************//** -Get the offset of the buddy of a compressed page frame. -@return the buddy relative of page */ -UNIV_INLINE -void* -buf_buddy_get( -/*==========*/ - byte* page, /*!< in: compressed page */ - ulint size) /*!< in: page size in bytes */ -{ - ut_ad(ut_is_2pow(size)); - ut_ad(size >= BUF_BUDDY_LOW); - ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN); - ut_ad(size < BUF_BUDDY_HIGH); - ut_ad(BUF_BUDDY_HIGH == UNIV_PAGE_SIZE); - ut_ad(!ut_align_offset(page, size)); - - if (((ulint) page) & size) { - return(page - size); - } else { - return(page + size); - } -} - -/** Validate a given zip_free list. */ -struct CheckZipFree { - ulint i; - CheckZipFree(ulint i) : i (i) {} - - void operator()(const buf_buddy_free_t* elem) const - { - ut_a(buf_buddy_stamp_is_free(elem)); - ut_a(elem->stamp.size <= i); - } -}; - -#define BUF_BUDDY_LIST_VALIDATE(bp, i) \ - UT_LIST_VALIDATE(list, buf_buddy_free_t, \ - bp->zip_free[i], CheckZipFree(i)) - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Debug function to validate that a buffer is indeed free i.e.: in the -zip_free[]. -@return true if free */ -UNIV_INLINE -bool -buf_buddy_check_free( -/*=================*/ - buf_pool_t* buf_pool,/*!< in: buffer pool instance */ - const buf_buddy_free_t* buf, /*!< in: block to check */ - ulint i) /*!< in: index of buf_pool->zip_free[] */ -{ - const ulint size = BUF_BUDDY_LOW << i; - - ut_ad(mutex_own(&buf_pool->zip_free_mutex)); - ut_ad(!ut_align_offset(buf, size)); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - - buf_buddy_free_t* itr; - - for (itr = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); - itr && itr != buf; - itr = UT_LIST_GET_NEXT(list, itr)) { - } - - return(itr == buf); -} -#endif /* UNIV_DEBUG */ - -/**********************************************************************//** -Checks if a buf is free i.e.: in the zip_free[]. -@retval BUF_BUDDY_STATE_FREE if fully free -@retval BUF_BUDDY_STATE_USED if currently in use -@retval BUF_BUDDY_STATE_PARTIALLY_USED if partially in use. */ -static MY_ATTRIBUTE((warn_unused_result)) -buf_buddy_state_t -buf_buddy_is_free( -/*==============*/ - buf_buddy_free_t* buf, /*!< in: block to check */ - ulint i) /*!< in: index of - buf_pool->zip_free[] */ -{ -#ifdef UNIV_DEBUG - const ulint size = BUF_BUDDY_LOW << i; - ut_ad(!ut_align_offset(buf, size)); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); -#endif /* UNIV_DEBUG */ - - /* We assume that all memory from buf_buddy_alloc() - is used for compressed page frames. */ - - /* We look inside the allocated objects returned by - buf_buddy_alloc() and assume that each block is a compressed - page that contains one of the following in space_id. - * BUF_BUDDY_STAMP_FREE if the block is in a zip_free list or - * BUF_BUDDY_STAMP_NONFREE if the block has been allocated but - not initialized yet or - * A valid space_id of a compressed tablespace - - The call below attempts to read from free memory. The memory - is "owned" by the buddy allocator (and it has been allocated - from the buffer pool), so there is nothing wrong about this. */ - if (!buf_buddy_stamp_is_free(buf)) { - return(BUF_BUDDY_STATE_USED); - } - - /* A block may be free but a fragment of it may still be in use. - To guard against that we write the free block size in terms of - zip_free index at start of stamped block. Note that we can - safely rely on this value only if the buf is free. */ - ut_ad(buf->stamp.size <= i); - return(buf->stamp.size == i - ? BUF_BUDDY_STATE_FREE - : BUF_BUDDY_STATE_PARTIALLY_USED); -} - -/**********************************************************************//** -Add a block to the head of the appropriate buddy free list. */ -UNIV_INLINE -void -buf_buddy_add_to_free( -/*==================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_buddy_free_t* buf, /*!< in,own: block to be freed */ - ulint i) /*!< in: index of - buf_pool->zip_free[] */ -{ - ut_ad(mutex_own(&buf_pool->zip_free_mutex)); - ut_ad(buf_pool->zip_free[i].start != buf); - - buf_buddy_stamp_free(buf, i); - UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buf); - ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i)); -} - -/**********************************************************************//** -Remove a block from the appropriate buddy free list. */ -UNIV_INLINE -void -buf_buddy_remove_from_free( -/*=======================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_buddy_free_t* buf, /*!< in,own: block to be freed */ - ulint i) /*!< in: index of - buf_pool->zip_free[] */ -{ - ut_ad(mutex_own(&buf_pool->zip_free_mutex)); - ut_ad(buf_buddy_check_free(buf_pool, buf, i)); - - UT_LIST_REMOVE(list, buf_pool->zip_free[i], buf); - buf_buddy_stamp_nonfree(buf, i); -} - -/**********************************************************************//** -Try to allocate a block from buf_pool->zip_free[]. -@return allocated block, or NULL if buf_pool->zip_free[] was empty */ -static -buf_buddy_free_t* -buf_buddy_alloc_zip( -/*================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint i) /*!< in: index of buf_pool->zip_free[] */ -{ - buf_buddy_free_t* buf; - - ut_ad(mutex_own(&buf_pool->zip_free_mutex)); - ut_a(i < BUF_BUDDY_SIZES); - ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - - ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i)); - - buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); - - if (buf) { - buf_buddy_remove_from_free(buf_pool, buf, i); - } else if (i + 1 < BUF_BUDDY_SIZES) { - /* Attempt to split. */ - buf = buf_buddy_alloc_zip(buf_pool, i + 1); - - if (buf) { - buf_buddy_free_t* buddy = - reinterpret_cast<buf_buddy_free_t*>( - buf->stamp.bytes - + (BUF_BUDDY_LOW << i)); - - ut_ad(!buf_pool_contains_zip(buf_pool, buddy)); - buf_buddy_add_to_free(buf_pool, buddy, i); - } - } - - if (buf) { - /* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */ - UNIV_MEM_TRASH(buf, ~i, BUF_BUDDY_STAMP_OFFSET); - UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4 - + buf->stamp.bytes, ~i, - (BUF_BUDDY_LOW << i) - - (BUF_BUDDY_STAMP_OFFSET + 4)); - ut_ad(mach_read_from_4(buf->stamp.bytes - + BUF_BUDDY_STAMP_OFFSET) - == BUF_BUDDY_STAMP_NONFREE); - } - - return(buf); -} - -/**********************************************************************//** -Deallocate a buffer frame of UNIV_PAGE_SIZE. */ -static -void -buf_buddy_block_free( -/*=================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - void* buf) /*!< in: buffer frame to deallocate */ -{ - const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf); - buf_page_t* bpage; - buf_block_t* block; - - ut_ad(!mutex_own(&buf_pool->zip_mutex)); - ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE)); - - mutex_enter(&buf_pool->zip_hash_mutex); - - HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage, - ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY - && bpage->in_zip_hash && !bpage->in_page_hash), - ((buf_block_t*) bpage)->frame == buf); - ut_a(bpage); - ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY); - ut_ad(!bpage->in_page_hash); - ut_ad(bpage->in_zip_hash); - ut_d(bpage->in_zip_hash = FALSE); - HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage); - - mutex_exit(&buf_pool->zip_hash_mutex); - - ut_d(memset(buf, 0, UNIV_PAGE_SIZE)); - UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE); - - block = (buf_block_t*) bpage; - mutex_enter(&block->mutex); - buf_LRU_block_free_non_file_page(block); - mutex_exit(&block->mutex); - - ut_ad(buf_pool->buddy_n_frames > 0); - ut_d(buf_pool->buddy_n_frames--); -} - -/**********************************************************************//** -Allocate a buffer block to the buddy allocator. */ -static -void -buf_buddy_block_register( -/*=====================*/ - buf_block_t* block) /*!< in: buffer frame to allocate */ -{ - buf_pool_t* buf_pool = buf_pool_from_block(block); - const ulint fold = BUF_POOL_ZIP_FOLD(block); - ut_ad(!mutex_own(&buf_pool->zip_mutex)); - ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE); - - buf_block_set_state(block, BUF_BLOCK_MEMORY); - - ut_a(block->frame); - ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE)); - - ut_ad(!block->page.in_page_hash); - ut_ad(!block->page.in_zip_hash); - ut_d(block->page.in_zip_hash = TRUE); - - mutex_enter(&buf_pool->zip_hash_mutex); - HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page); - mutex_exit(&buf_pool->zip_hash_mutex); - - ut_d(buf_pool->buddy_n_frames++); -} - -/**********************************************************************//** -Allocate a block from a bigger object. -@return allocated block */ -static -void* -buf_buddy_alloc_from( -/*=================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - void* buf, /*!< in: a block that is free to use */ - ulint i, /*!< in: index of - buf_pool->zip_free[] */ - ulint j) /*!< in: size of buf as an index - of buf_pool->zip_free[] */ -{ - ulint offs = BUF_BUDDY_LOW << j; - ut_ad(mutex_own(&buf_pool->zip_free_mutex)); - ut_ad(j <= BUF_BUDDY_SIZES); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - ut_ad(j >= i); - ut_ad(!ut_align_offset(buf, offs)); - - /* Add the unused parts of the block to the free lists. */ - while (j > i) { - buf_buddy_free_t* zip_buf; - - offs >>= 1; - j--; - - zip_buf = reinterpret_cast<buf_buddy_free_t*>( - reinterpret_cast<byte*>(buf) + offs); - buf_buddy_add_to_free(buf_pool, zip_buf, j); - } - - buf_buddy_stamp_nonfree(reinterpret_cast<buf_buddy_free_t*>(buf), i); - return(buf); -} - -/**********************************************************************//** -Allocate a block. The thread calling this function must hold -buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any -block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired. -@return allocated block, never NULL */ -UNIV_INTERN -void* -buf_buddy_alloc_low( -/*================*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - ulint i, /*!< in: index of buf_pool->zip_free[], - or BUF_BUDDY_SIZES */ - ibool* lru) /*!< in: pointer to a variable that - will be assigned TRUE if storage was - allocated from the LRU list and - buf_pool->LRU_list_mutex was - temporarily released */ -{ - buf_block_t* block; - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(!mutex_own(&buf_pool->zip_mutex)); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - - if (i < BUF_BUDDY_SIZES) { - /* Try to allocate from the buddy system. */ - mutex_enter(&buf_pool->zip_free_mutex); - block = (buf_block_t*) buf_buddy_alloc_zip(buf_pool, i); - - if (block) { - goto func_exit; - } - mutex_exit(&buf_pool->zip_free_mutex); - } - - /* Try allocating from the buf_pool->free list. */ - block = buf_LRU_get_free_only(buf_pool); - - if (block) { - - goto alloc_big; - } - - /* Try replacing an uncompressed page in the buffer pool. */ - mutex_exit(&buf_pool->LRU_list_mutex); - block = buf_LRU_get_free_block(buf_pool); - *lru = TRUE; - mutex_enter(&buf_pool->LRU_list_mutex); - -alloc_big: - buf_buddy_block_register(block); - - mutex_enter(&buf_pool->zip_free_mutex); - block = (buf_block_t*) buf_buddy_alloc_from( - buf_pool, block->frame, i, BUF_BUDDY_SIZES); - -func_exit: - buf_pool->buddy_stat[i].used++; - mutex_exit(&buf_pool->zip_free_mutex); - - return(block); -} - -/**********************************************************************//** -Try to relocate a block. The caller must hold zip_free_mutex, and this -function will release and lock it again. -@return true if relocated */ -static -bool -buf_buddy_relocate( -/*===============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - void* src, /*!< in: block to relocate */ - void* dst, /*!< in: free block to relocate to */ - ulint i) /*!< in: index of - buf_pool->zip_free[] */ -{ - buf_page_t* bpage; - const ulint size = BUF_BUDDY_LOW << i; - ulint space; - ulint offset; - - ut_ad(mutex_own(&buf_pool->zip_free_mutex)); - ut_ad(!mutex_own(&buf_pool->zip_mutex)); - ut_ad(!ut_align_offset(src, size)); - ut_ad(!ut_align_offset(dst, size)); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - UNIV_MEM_ASSERT_W(dst, size); - - space = mach_read_from_4((const byte*) src - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - offset = mach_read_from_4((const byte*) src - + FIL_PAGE_OFFSET); - - /* Suppress Valgrind warnings about conditional jump - on uninitialized value. */ - UNIV_MEM_VALID(&space, sizeof space); - UNIV_MEM_VALID(&offset, sizeof offset); - - ut_ad(space != BUF_BUDDY_STAMP_FREE); - - mutex_exit(&buf_pool->zip_free_mutex); - - ulint fold = buf_page_address_fold(space, offset); - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); - - rw_lock_x_lock(hash_lock); - - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - - if (!bpage || bpage->zip.data != src) { - /* The block has probably been freshly - allocated by buf_LRU_get_free_block() but not - added to buf_pool->page_hash yet. Obviously, - it cannot be relocated. */ - - rw_lock_x_unlock(hash_lock); - - mutex_enter(&buf_pool->zip_free_mutex); - return(false); - } - - if (page_zip_get_size(&bpage->zip) != size) { - /* The block is of different size. We would - have to relocate all blocks covered by src. - For the sake of simplicity, give up. */ - ut_ad(page_zip_get_size(&bpage->zip) < size); - - rw_lock_x_unlock(hash_lock); - - mutex_enter(&buf_pool->zip_free_mutex); - return(false); - } - - /* The block must have been allocated, but it may - contain uninitialized data. */ - UNIV_MEM_ASSERT_W(src, size); - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); - - mutex_enter(&buf_pool->zip_free_mutex); - - if (buf_page_can_relocate(bpage)) { - /* Relocate the compressed page. */ - ullint usec = ut_time_us(NULL); - - ut_a(bpage->zip.data == src); - - /* Note: This is potentially expensive, we need a better - solution here. We go with correctness for now. */ - ::memcpy(dst, src, size); - - bpage->zip.data = reinterpret_cast<page_zip_t*>(dst); - - rw_lock_x_unlock(hash_lock); - - mutex_exit(block_mutex); - - buf_buddy_mem_invalid( - reinterpret_cast<buf_buddy_free_t*>(src), i); - - buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i]; - - ++buddy_stat->relocated; - - buddy_stat->relocated_usec += ut_time_us(NULL) - usec; - - return(true); - } - - rw_lock_x_unlock(hash_lock); - - mutex_exit(block_mutex); - - return(false); -} - -/**********************************************************************//** -Deallocate a block. */ -UNIV_INTERN -void -buf_buddy_free_low( -/*===============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - void* buf, /*!< in: block to be freed, must not be - pointed to by the buffer pool */ - ulint i) /*!< in: index of buf_pool->zip_free[], - or BUF_BUDDY_SIZES */ -{ - buf_buddy_free_t* buddy; - - ut_ad(!mutex_own(&buf_pool->zip_mutex)); - ut_ad(i <= BUF_BUDDY_SIZES); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - - mutex_enter(&buf_pool->zip_free_mutex); - - ut_ad(buf_pool->buddy_stat[i].used > 0); - buf_pool->buddy_stat[i].used--; -recombine: - UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i); - - if (i == BUF_BUDDY_SIZES) { - mutex_exit(&buf_pool->zip_free_mutex); - buf_buddy_block_free(buf_pool, buf); - return; - } - - ut_ad(i < BUF_BUDDY_SIZES); - ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i)); - ut_ad(!buf_pool_contains_zip(buf_pool, buf)); - - /* Do not recombine blocks if there are few free blocks. - We may waste up to 15360*max_len bytes to free blocks - (1024 + 2048 + 4096 + 8192 = 15360) */ - if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) { - goto func_exit; - } - - /* Try to combine adjacent blocks. */ - buddy = reinterpret_cast<buf_buddy_free_t*>( - buf_buddy_get(reinterpret_cast<byte*>(buf), - BUF_BUDDY_LOW << i)); - - switch (buf_buddy_is_free(buddy, i)) { - case BUF_BUDDY_STATE_FREE: - /* The buddy is free: recombine */ - buf_buddy_remove_from_free(buf_pool, buddy, i); -buddy_is_free: - ut_ad(!buf_pool_contains_zip(buf_pool, buddy)); - i++; - buf = ut_align_down(buf, BUF_BUDDY_LOW << i); - - goto recombine; - - case BUF_BUDDY_STATE_USED: - ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i)); - - /* The buddy is not free. Is there a free block of - this size? */ - if (buf_buddy_free_t* zip_buf = - UT_LIST_GET_FIRST(buf_pool->zip_free[i])) { - - /* Remove the block from the free list, because - a successful buf_buddy_relocate() will overwrite - zip_free->list. */ - buf_buddy_remove_from_free(buf_pool, zip_buf, i); - - /* Try to relocate the buddy of buf to the free - block. */ - if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i)) { - - goto buddy_is_free; - } - - buf_buddy_add_to_free(buf_pool, zip_buf, i); - } - - break; - case BUF_BUDDY_STATE_PARTIALLY_USED: - /* Some sub-blocks in the buddy are still in use. - Relocation will fail. No need to try. */ - break; - } - -func_exit: - /* Free the block to the buddy list. */ - buf_buddy_add_to_free(buf_pool, - reinterpret_cast<buf_buddy_free_t*>(buf), - i); - mutex_exit(&buf_pool->zip_free_mutex); -} diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc deleted file mode 100644 index 01bec11d2ed..00000000000 --- a/storage/xtradb/buf/buf0buf.cc +++ /dev/null @@ -1,6504 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0buf.cc -The database buffer buf_pool - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0buf.h" - -#ifdef UNIV_NONINL -#include "buf0buf.ic" -#endif - -#include "mem0mem.h" -#include "btr0btr.h" -#include "fil0fil.h" -#include "fil0crypt.h" -#ifndef UNIV_HOTBACKUP -#include "buf0buddy.h" -#include "lock0lock.h" -#include "btr0sea.h" -#include "ibuf0ibuf.h" -#include "trx0undo.h" -#include "log0log.h" -#endif /* !UNIV_HOTBACKUP */ -#include "srv0srv.h" -#include "dict0dict.h" -#include "log0recv.h" -#include "page0zip.h" -#include "srv0mon.h" -#include "buf0checksum.h" -#ifdef HAVE_LIBNUMA -#include <numa.h> -#include <numaif.h> -#endif // HAVE_LIBNUMA -#include "trx0trx.h" -#include "srv0start.h" -#include "ut0byte.h" -#include "fil0pagecompress.h" -#include "ha_prototypes.h" - -#ifdef UNIV_LINUX -#include <stdlib.h> -#endif - -#ifdef HAVE_LZO -#include "lzo/lzo1x.h" -#endif - -#ifdef HAVE_SNAPPY -#include "snappy-c.h" -#endif - -/** Decrypt a page. -@param[in,out] bpage Page control block -@param[in,out] space tablespace -@return whether the operation was successful */ -static -bool -buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) - MY_ATTRIBUTE((nonnull)); - -/********************************************************************//** -Mark a table with the specified space pointed by bpage->space corrupted. -Also remove the bpage from LRU list. -@param[in,out] bpage Block */ -static -void -buf_mark_space_corrupt( - buf_page_t* bpage); - -/* prototypes for new functions added to ha_innodb.cc */ -trx_t* innobase_get_trx(); - -inline void* aligned_malloc(size_t size, size_t align) { - void *result; -#ifdef _MSC_VER - result = _aligned_malloc(size, align); -#else - if(posix_memalign(&result, align, size)) { - result = 0; - } -#endif - return result; -} - -inline void aligned_free(void *ptr) { -#ifdef _MSC_VER - _aligned_free(ptr); -#else - free(ptr); -#endif -} - -static inline -void -_increment_page_get_statistics(buf_block_t* block, trx_t* trx) -{ - ulint block_hash; - ulint block_hash_byte; - byte block_hash_offset; - - ut_ad(block); - ut_ad(trx && trx->take_stats); - - if (!trx->distinct_page_access_hash) { - trx->distinct_page_access_hash - = static_cast<byte *>(mem_alloc(DPAH_SIZE)); - memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); - } - - block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space + - block->page.offset, DPAH_SIZE << 3); - block_hash_byte = block_hash >> 3; - block_hash_offset = (byte) block_hash & 0x07; - if (block_hash_byte >= DPAH_SIZE) - fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); - if (block_hash_offset > 7) - fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset); - if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0) - trx->distinct_page_access++; - trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset; - return; -} - -/* - IMPLEMENTATION OF THE BUFFER POOL - ================================= - -Performance improvement: ------------------------- -Thread scheduling in NT may be so slow that the OS wait mechanism should -not be used even in waiting for disk reads to complete. -Rather, we should put waiting query threads to the queue of -waiting jobs, and let the OS thread do something useful while the i/o -is processed. In this way we could remove most OS thread switches in -an i/o-intensive benchmark like TPC-C. - -A possibility is to put a user space thread library between the database -and NT. User space thread libraries might be very fast. - -SQL Server 7.0 can be configured to use 'fibers' which are lightweight -threads in NT. These should be studied. - - Buffer frames and blocks - ------------------------ -Following the terminology of Gray and Reuter, we call the memory -blocks where file pages are loaded buffer frames. For each buffer -frame there is a control block, or shortly, a block, in the buffer -control array. The control info which does not need to be stored -in the file along with the file page, resides in the control block. - - Buffer pool struct - ------------------ -The buffer buf_pool contains several mutexes which protect all the -control data structures of the buf_pool. The content of a buffer frame is -protected by a separate read-write lock in its control block, though. - - Control blocks - -------------- - -The control block contains, for instance, the bufferfix count -which is incremented when a thread wants a file page to be fixed -in a buffer frame. The bufferfix operation does not lock the -contents of the frame, however. For this purpose, the control -block contains a read-write lock. - -The buffer frames have to be aligned so that the start memory -address of a frame is divisible by the universal page size, which -is a power of two. - -We intend to make the buffer buf_pool size on-line reconfigurable, -that is, the buf_pool size can be changed without closing the database. -Then the database administarator may adjust it to be bigger -at night, for example. The control block array must -contain enough control blocks for the maximum buffer buf_pool size -which is used in the particular database. -If the buf_pool size is cut, we exploit the virtual memory mechanism of -the OS, and just refrain from using frames at high addresses. Then the OS -can swap them to disk. - -The control blocks containing file pages are put to a hash table -according to the file address of the page. -We could speed up the access to an individual page by using -"pointer swizzling": we could replace the page references on -non-leaf index pages by direct pointers to the page, if it exists -in the buf_pool. We could make a separate hash table where we could -chain all the page references in non-leaf pages residing in the buf_pool, -using the page reference as the hash key, -and at the time of reading of a page update the pointers accordingly. -Drawbacks of this solution are added complexity and, -possibly, extra space required on non-leaf pages for memory pointers. -A simpler solution is just to speed up the hash table mechanism -in the database, using tables whose size is a power of 2. - - Lists of blocks - --------------- - -There are several lists of control blocks. - -The free list (buf_pool->free) contains blocks which are currently not -used. - -The common LRU list contains all the blocks holding a file page -except those for which the bufferfix count is non-zero. -The pages are in the LRU list roughly in the order of the last -access to the page, so that the oldest pages are at the end of the -list. We also keep a pointer to near the end of the LRU list, -which we can use when we want to artificially age a page in the -buf_pool. This is used if we know that some page is not needed -again for some time: we insert the block right after the pointer, -causing it to be replaced sooner than would normally be the case. -Currently this aging mechanism is used for read-ahead mechanism -of pages, and it can also be used when there is a scan of a full -table which cannot fit in the memory. Putting the pages near the -end of the LRU list, we make sure that most of the buf_pool stays -in the main memory, undisturbed. - -The unzip_LRU list contains a subset of the common LRU list. The -blocks on the unzip_LRU list hold a compressed file page and the -corresponding uncompressed page frame. A block is in unzip_LRU if and -only if the predicate buf_page_belongs_to_unzip_LRU(&block->page) -holds. The blocks in unzip_LRU will be in same order as they are in -the common LRU list. That is, each manipulation of the common LRU -list will result in the same manipulation of the unzip_LRU list. - -The chain of modified blocks (buf_pool->flush_list) contains the blocks -holding file pages that have been modified in the memory -but not written to disk yet. The block with the oldest modification -which has not yet been written to disk is at the end of the chain. -The access to this list is protected by buf_pool->flush_list_mutex. - -The chain of unmodified compressed blocks (buf_pool->zip_clean) -contains the control blocks (buf_page_t) of those compressed pages -that are not in buf_pool->flush_list and for which no uncompressed -page has been allocated in the buffer pool. The control blocks for -uncompressed pages are accessible via buf_block_t objects that are -reachable via buf_pool->chunks[]. - -The chains of free memory blocks (buf_pool->zip_free[]) are used by -the buddy allocator (buf0buddy.cc) to keep track of currently unused -memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These -blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type -BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer -pool. The buddy allocator is solely used for allocating control -blocks for compressed pages (buf_page_t) and compressed page frames. - - Loading a file page - ------------------- - -First, a victim block for replacement has to be found in the -buf_pool. It is taken from the free list or searched for from the -end of the LRU-list. An exclusive lock is reserved for the frame, -the io_fix field is set in the block fixing the block in buf_pool, -and the io-operation for loading the page is queued. The io-handler thread -releases the X-lock on the frame and resets the io_fix field -when the io operation completes. - -A thread may request the above operation using the function -buf_page_get(). It may then continue to request a lock on the frame. -The lock is granted when the io-handler releases the x-lock. - - Read-ahead - ---------- - -The read-ahead mechanism is intended to be intelligent and -isolated from the semantically higher levels of the database -index management. From the higher level we only need the -information if a file page has a natural successor or -predecessor page. On the leaf level of a B-tree index, -these are the next and previous pages in the natural -order of the pages. - -Let us first explain the read-ahead mechanism when the leafs -of a B-tree are scanned in an ascending or descending order. -When a read page is the first time referenced in the buf_pool, -the buffer manager checks if it is at the border of a so-called -linear read-ahead area. The tablespace is divided into these -areas of size 64 blocks, for example. So if the page is at the -border of such an area, the read-ahead mechanism checks if -all the other blocks in the area have been accessed in an -ascending or descending order. If this is the case, the system -looks at the natural successor or predecessor of the page, -checks if that is at the border of another area, and in this case -issues read-requests for all the pages in that area. Maybe -we could relax the condition that all the pages in the area -have to be accessed: if data is deleted from a table, there may -appear holes of unused pages in the area. - -A different read-ahead mechanism is used when there appears -to be a random access pattern to a file. -If a new page is referenced in the buf_pool, and several pages -of its random access area (for instance, 32 consecutive pages -in a tablespace) have recently been referenced, we may predict -that the whole area may be needed in the near future, and issue -the read requests for the whole area. -*/ - -#ifndef UNIV_HOTBACKUP -/** Value in microseconds */ -static const int WAIT_FOR_READ = 100; -/** Number of attemtps made to read in a page in the buffer pool */ -static const ulint BUF_PAGE_READ_MAX_RETRIES = 100; - -/** The buffer pools of the database */ -UNIV_INTERN buf_pool_t* buf_pool_ptr; - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -static ulint buf_dbg_counter = 0; /*!< This is used to insert validation - operations in execution in the - debug version */ -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG -/** If this is set TRUE, the program prints info whenever -read-ahead or flush occurs */ -UNIV_INTERN ibool buf_debug_prints = FALSE; -#endif /* UNIV_DEBUG */ - -#ifdef UNIV_PFS_RWLOCK -/* Keys to register buffer block related rwlocks and mutexes with -performance schema */ -UNIV_INTERN mysql_pfs_key_t buf_block_lock_key; -# ifdef UNIV_SYNC_DEBUG -UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key; -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_PFS_RWLOCK */ - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key; -UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key; -UNIV_INTERN mysql_pfs_key_t buf_pool_flush_state_mutex_key; -UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key; -UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key; -UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key; -UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key; -UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK -# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK - -/* Buffer block mutexes and rwlocks can be registered -in one group rather than individually. If PFS_GROUP_BUFFER_SYNC -is defined, register buffer block mutex and rwlock -in one group after their initialization. */ -# define PFS_GROUP_BUFFER_SYNC - -/* This define caps the number of mutexes/rwlocks can -be registered with performance schema. Developers can -modify this define if necessary. Please note, this would -be effective only if PFS_GROUP_BUFFER_SYNC is defined. */ -# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX - -# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */ -#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */ - -/** Macro to determine whether the read of write counter is used depending -on the io_type */ -#define MONITOR_RW_COUNTER(io_type, counter) \ - ((io_type == BUF_IO_READ) \ - ? (counter##_READ) \ - : (counter##_WRITTEN)) - -/********************************************************************//** -Gets the smallest oldest_modification lsn for any page in the pool. Returns -zero if all modified pages have been flushed to disk. -@return oldest modification in pool, zero if none */ -UNIV_INTERN -lsn_t -buf_pool_get_oldest_modification(void) -/*==================================*/ -{ - ulint i; - buf_page_t* bpage; - lsn_t lsn = 0; - lsn_t oldest_lsn = 0; - - /* When we traverse all the flush lists we don't want another - thread to add a dirty page to any flush list. */ - if (srv_buf_pool_instances > 1) - log_flush_order_mutex_enter(); - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - buf_flush_list_mutex_enter(buf_pool); - - bpage = UT_LIST_GET_LAST(buf_pool->flush_list); - - if (bpage != NULL) { - ut_ad(bpage->in_flush_list); - lsn = bpage->oldest_modification; - } - - buf_flush_list_mutex_exit(buf_pool); - - if (!oldest_lsn || oldest_lsn > lsn) { - oldest_lsn = lsn; - } - } - - if (srv_buf_pool_instances > 1) - log_flush_order_mutex_exit(); - - /* The returned answer may be out of date: the flush_list can - change after the mutex has been released. */ - - return(oldest_lsn); -} - -/********************************************************************//** -Gets the smallest oldest_modification lsn for any page in the pool. Returns -zero if all modified pages have been flushed to disk. -@return oldest modification in pool, zero if none */ -UNIV_INTERN -lsn_t -buf_pool_get_oldest_modification_peek(void) -/*=======================================*/ -{ - ulint i; - buf_page_t* bpage; - lsn_t lsn = 0; - lsn_t oldest_lsn = 0; - - /* Dirsty read to buffer pool array */ - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - buf_flush_list_mutex_enter(buf_pool); - - bpage = UT_LIST_GET_LAST(buf_pool->flush_list); - - if (bpage != NULL) { - ut_ad(bpage->in_flush_list); - lsn = bpage->oldest_modification; - } - - buf_flush_list_mutex_exit(buf_pool); - - if (!oldest_lsn || oldest_lsn > lsn) { - oldest_lsn = lsn; - } - } - - /* The returned answer may be out of date: the flush_list can - change after the mutex has been released. */ - - return(oldest_lsn); -} - -/********************************************************************//** -Get total buffer pool statistics. */ -UNIV_INTERN -void -buf_get_total_list_len( -/*===================*/ - ulint* LRU_len, /*!< out: length of all LRU lists */ - ulint* free_len, /*!< out: length of all free lists */ - ulint* flush_list_len) /*!< out: length of all flush lists */ -{ - ulint i; - - *LRU_len = 0; - *free_len = 0; - *flush_list_len = 0; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU); - *free_len += UT_LIST_GET_LEN(buf_pool->free); - *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list); - } -} - -/********************************************************************//** -Get total list size in bytes from all buffer pools. */ -UNIV_INTERN -void -buf_get_total_list_size_in_bytes( -/*=============================*/ - buf_pools_list_size_t* buf_pools_list_size) /*!< out: list sizes - in all buffer pools */ -{ - ut_ad(buf_pools_list_size); - memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size)); - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - /* We don't need mutex protection since this is - for statistics purpose */ - buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes; - buf_pools_list_size->unzip_LRU_bytes += - UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE; - buf_pools_list_size->flush_list_bytes += - buf_pool->stat.flush_list_bytes; - } -} - -/********************************************************************//** -Get total buffer pool statistics. */ -UNIV_INTERN -void -buf_get_total_stat( -/*===============*/ - buf_pool_stat_t* tot_stat) /*!< out: buffer pool stats */ -{ - ulint i; - - memset(tot_stat, 0, sizeof(*tot_stat)); - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_stat_t*buf_stat; - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - buf_stat = &buf_pool->stat; - tot_stat->n_page_gets += buf_stat->n_page_gets; - tot_stat->n_pages_read += buf_stat->n_pages_read; - tot_stat->n_pages_written += buf_stat->n_pages_written; - tot_stat->n_pages_created += buf_stat->n_pages_created; - tot_stat->n_ra_pages_read_rnd += buf_stat->n_ra_pages_read_rnd; - tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read; - tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted; - tot_stat->n_pages_made_young += buf_stat->n_pages_made_young; - - tot_stat->n_pages_not_made_young += - buf_stat->n_pages_not_made_young; - } -} - -/********************************************************************//** -Allocates a buffer block. -@return own: the allocated block, in state BUF_BLOCK_MEMORY */ -UNIV_INTERN -buf_block_t* -buf_block_alloc( -/*============*/ - buf_pool_t* buf_pool) /*!< in/out: buffer pool instance, - or NULL for round-robin selection - of the buffer pool */ -{ - buf_block_t* block; - ulint index; - static ulint buf_pool_index; - - if (buf_pool == NULL) { - /* We are allocating memory from any buffer pool, ensure - we spread the grace on all buffer pool instances. */ - index = buf_pool_index++ % srv_buf_pool_instances; - buf_pool = buf_pool_from_array(index); - } - - block = buf_LRU_get_free_block(buf_pool); - - buf_block_set_state(block, BUF_BLOCK_MEMORY); - - return(block); -} -#endif /* !UNIV_HOTBACKUP */ - -/** Check if a page is all zeroes. -@param[in] read_buf database page -@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 -@return whether the page is all zeroes */ -UNIV_INTERN -bool -buf_page_is_zeroes(const byte* read_buf, ulint zip_size) -{ - const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE; - - for (ulint i = 0; i < page_size; i++) { - if (read_buf[i] != 0) { - return(false); - } - } - return(true); -} - -/** Checks if the page is in crc32 checksum format. -@param[in] read_buf database page -@param[in] checksum_field1 new checksum field -@param[in] checksum_field2 old checksum field -@return true if the page is in crc32 checksum format */ -UNIV_INTERN -bool -buf_page_is_checksum_valid_crc32( - const byte* read_buf, - ulint checksum_field1, - ulint checksum_field2) -{ - ib_uint32_t crc32 = buf_calc_page_crc32(read_buf); - - if (!(checksum_field1 == crc32 && checksum_field2 == crc32)) { - DBUG_PRINT("buf_checksum", - ("Page checksum crc32 not valid field1 " ULINTPF - " field2 " ULINTPF " crc32 %u.", - checksum_field1, checksum_field2, crc32)); - return (false); - } - - return (true); -} - -/** Checks if the page is in innodb checksum format. -@param[in] read_buf database page -@param[in] checksum_field1 new checksum field -@param[in] checksum_field2 old checksum field -@return true if the page is in innodb checksum format */ -UNIV_INTERN -bool -buf_page_is_checksum_valid_innodb( - const byte* read_buf, - ulint checksum_field1, - ulint checksum_field2) -{ - /* There are 2 valid formulas for - checksum_field2 (old checksum field) which algo=innodb could have - written to the page: - - 1. Very old versions of InnoDB only stored 8 byte lsn to the - start and the end of the page. - - 2. Newer InnoDB versions store the old formula checksum - (buf_calc_page_old_checksum()). */ - - if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN) - && checksum_field2 != buf_calc_page_old_checksum(read_buf)) { - - DBUG_PRINT("buf_checksum", - ("Page checksum innodb not valid field1 " ULINTPF - " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".", - checksum_field1, checksum_field2, buf_calc_page_old_checksum(read_buf), - mach_read_from_4(read_buf + FIL_PAGE_LSN))); - - return(false); - } - - /* old field is fine, check the new field */ - - /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id - (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */ - - if (checksum_field1 != 0 - && checksum_field1 != buf_calc_page_new_checksum(read_buf)) { - - DBUG_PRINT("buf_checksum", - ("Page checksum innodb not valid field1 " ULINTPF - " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".", - checksum_field1, checksum_field2, buf_calc_page_new_checksum(read_buf), - mach_read_from_4(read_buf + FIL_PAGE_LSN))); - - return(false); - } - - return(true); -} - -/** Checks if the page is in none checksum format. -@param[in] read_buf database page -@param[in] checksum_field1 new checksum field -@param[in] checksum_field2 old checksum field -@return true if the page is in none checksum format */ -UNIV_INTERN -bool -buf_page_is_checksum_valid_none( - const byte* read_buf, - ulint checksum_field1, - ulint checksum_field2) -{ - - if (!(checksum_field1 == checksum_field2 && checksum_field1 == BUF_NO_CHECKSUM_MAGIC)) { - DBUG_PRINT("buf_checksum", - ("Page checksum none not valid field1 " ULINTPF - " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".", - checksum_field1, checksum_field2, BUF_NO_CHECKSUM_MAGIC, - mach_read_from_4(read_buf + FIL_PAGE_LSN))); - } - - return(checksum_field1 == checksum_field2 - && checksum_field1 == BUF_NO_CHECKSUM_MAGIC); -} - -/** Check if a page is corrupt. -@param[in] check_lsn true if LSN should be checked -@param[in] read_buf Page to be checked -@param[in] zip_size compressed size or 0 -@param[in] space Pointer to tablespace -@return true if corrupted, false if not */ -UNIV_INTERN -bool -buf_page_is_corrupted( - bool check_lsn, - const byte* read_buf, - ulint zip_size, - const fil_space_t* space) -{ - ulint checksum_field1; - ulint checksum_field2; - ulint space_id = mach_read_from_4( - read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ulint page_type = mach_read_from_2( - read_buf + FIL_PAGE_TYPE); - - /* We can trust page type if page compression is set on tablespace - flags because page compression flag means file must have been - created with 10.1 (later than 5.5 code base). In 10.1 page - compressed tables do not contain post compression checksum and - FIL_PAGE_END_LSN_OLD_CHKSUM field stored. Note that space can - be null if we are in fil_check_first_page() and first page - is not compressed or encrypted. Page checksum is verified - after decompression (i.e. normally pages are already - decompressed at this stage). */ - if ((page_type == FIL_PAGE_PAGE_COMPRESSED || - page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) - && space && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)) { - return (false); - } - - if (!zip_size - && memcmp(read_buf + FIL_PAGE_LSN + 4, - read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) { - - /* Stored log sequence numbers at the start and the end - of page do not match */ - - ib_logf(IB_LOG_LEVEL_INFO, - "Log sequence number at the start %lu and the end %lu do not match.", - mach_read_from_4(read_buf + FIL_PAGE_LSN + 4), - mach_read_from_4(read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)); - - return(TRUE); - } - -#ifndef UNIV_HOTBACKUP - if (check_lsn && recv_lsn_checks_on) { - lsn_t current_lsn; - - /* Since we are going to reset the page LSN during the import - phase it makes no sense to spam the log with error messages. */ - - if (log_peek_lsn(¤t_lsn) - && current_lsn - < mach_read_from_8(read_buf + FIL_PAGE_LSN)) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: page %lu log sequence number" - " " LSN_PF "\n" - "InnoDB: is in the future! Current system " - "log sequence number " LSN_PF ".\n" - "InnoDB: Your database may be corrupt or " - "you may have copied the InnoDB\n" - "InnoDB: tablespace but not the InnoDB " - "log files. See\n" - "InnoDB: " REFMAN - "forcing-innodb-recovery.html\n" - "InnoDB: for more information.\n", - (ulint) mach_read_from_4( - read_buf + FIL_PAGE_OFFSET), - (lsn_t) mach_read_from_8( - read_buf + FIL_PAGE_LSN), - current_lsn); - } - } -#endif - - /* Check whether the checksum fields have correct values */ - - if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) { - return(false); - } - - if (zip_size) { - return(!page_zip_verify_checksum(read_buf, zip_size)); - } - - checksum_field1 = mach_read_from_4( - read_buf + FIL_PAGE_SPACE_OR_CHKSUM); - - checksum_field2 = mach_read_from_4( - read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); - -#if FIL_PAGE_LSN % 8 -#error "FIL_PAGE_LSN must be 64 bit aligned" -#endif - - /* declare empty pages non-corrupted */ - if (checksum_field1 == 0 && checksum_field2 == 0 - && *reinterpret_cast<const ib_uint64_t*>(read_buf + - FIL_PAGE_LSN) == 0) { - /* make sure that the page is really empty */ - for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) { - if (read_buf[i] != 0) { - ib_logf(IB_LOG_LEVEL_INFO, - "Checksum fields zero but page is not empty."); - - return(true); - } - } - - return(false); - } - - DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(true); ); - - ulint page_no = mach_read_from_4(read_buf + FIL_PAGE_OFFSET); - - const srv_checksum_algorithm_t curr_algo = - static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); - - switch (curr_algo) { - case SRV_CHECKSUM_ALGORITHM_CRC32: - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - - if (buf_page_is_checksum_valid_crc32(read_buf, - checksum_field1, checksum_field2)) { - return(false); - } - - if (buf_page_is_checksum_valid_none(read_buf, - checksum_field1, checksum_field2)) { - if (curr_algo - == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_NONE, - space_id, page_no); - } - - return(false); - } - - if (buf_page_is_checksum_valid_innodb(read_buf, - checksum_field1, checksum_field2)) { - if (curr_algo - == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_INNODB, - space_id, page_no); - } - - return(false); - } - - return(true); - - case SRV_CHECKSUM_ALGORITHM_INNODB: - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - - if (buf_page_is_checksum_valid_innodb(read_buf, - checksum_field1, checksum_field2)) { - return(false); - } - - if (buf_page_is_checksum_valid_none(read_buf, - checksum_field1, checksum_field2)) { - if (curr_algo - == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_NONE, - space_id, page_no); - } - - return(false); - } - - if (buf_page_is_checksum_valid_crc32(read_buf, - checksum_field1, checksum_field2)) { - if (curr_algo - == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_CRC32, - space_id, page_no); - } - - return(false); - } - - return(true); - - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - - if (buf_page_is_checksum_valid_none(read_buf, - checksum_field1, checksum_field2)) { - return(false); - } - - if (buf_page_is_checksum_valid_crc32(read_buf, - checksum_field1, checksum_field2)) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_CRC32, - space_id, page_no); - return(false); - } - - if (buf_page_is_checksum_valid_innodb(read_buf, - checksum_field1, checksum_field2)) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_INNODB, - space_id, page_no); - return(false); - } - - return(true); - - case SRV_CHECKSUM_ALGORITHM_NONE: - /* should have returned FALSE earlier */ - break; - /* no default so the compiler will emit a warning if new enum - is added and not handled here */ - } - - ut_error; - return(false); -} - -/********************************************************************//** -Prints a page to stderr. */ -UNIV_INTERN -void -buf_page_print( -/*===========*/ - const byte* read_buf, /*!< in: a database page */ - ulint zip_size, /*!< in: compressed page size, or - 0 for uncompressed pages */ - ulint flags) /*!< in: 0 or - BUF_PAGE_PRINT_NO_CRASH or - BUF_PAGE_PRINT_NO_FULL */ - -{ -#ifndef UNIV_HOTBACKUP - dict_index_t* index; -#endif /* !UNIV_HOTBACKUP */ - ulint size = zip_size; - - if (!size) { - size = UNIV_PAGE_SIZE; - } - - if (!(flags & BUF_PAGE_PRINT_NO_FULL)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Page dump in ascii and hex (%lu bytes):\n", - size); - ut_print_buf(stderr, read_buf, size); - fputs("\nInnoDB: End of page dump\n", stderr); - } - - if (zip_size) { - /* Print compressed page. */ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Compressed page type (" ULINTPF "); " - "stored checksum in field1 " ULINTPF "; " - "calculated checksums for field1: " - "%s " ULINTPF ", " - "%s " ULINTPF ", " - "%s " ULINTPF "; " - "page LSN " LSN_PF "; " - "page number (if stored to page already) " ULINTPF "; " - "space id (if stored to page already) " ULINTPF "\n", - fil_page_get_type(read_buf), - mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM), - buf_checksum_algorithm_name( - SRV_CHECKSUM_ALGORITHM_CRC32), - page_zip_calc_checksum(read_buf, zip_size, - SRV_CHECKSUM_ALGORITHM_CRC32), - buf_checksum_algorithm_name( - SRV_CHECKSUM_ALGORITHM_INNODB), - page_zip_calc_checksum(read_buf, zip_size, - SRV_CHECKSUM_ALGORITHM_INNODB), - buf_checksum_algorithm_name( - SRV_CHECKSUM_ALGORITHM_NONE), - page_zip_calc_checksum(read_buf, zip_size, - SRV_CHECKSUM_ALGORITHM_NONE), - mach_read_from_8(read_buf + FIL_PAGE_LSN), - mach_read_from_4(read_buf + FIL_PAGE_OFFSET), - mach_read_from_4(read_buf - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); - } else { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: uncompressed page, " - "stored checksum in field1 " ULINTPF ", " - "calculated checksums for field1: " - "%s " UINT32PF ", " - "%s " ULINTPF ", " - "%s " ULINTPF ", " - - "stored checksum in field2 " ULINTPF ", " - "calculated checksums for field2: " - "%s " UINT32PF ", " - "%s " ULINTPF ", " - "%s " ULINTPF ", " - - "page LSN " ULINTPF " " ULINTPF ", " - "low 4 bytes of LSN at page end " ULINTPF ", " - "page number (if stored to page already) " ULINTPF ", " - "space id (if created with >= MySQL-4.1.1 " - "and stored already) %lu\n", - mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM), - buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32), - buf_calc_page_crc32(read_buf), - buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB), - buf_calc_page_new_checksum(read_buf), - buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE), - BUF_NO_CHECKSUM_MAGIC, - - mach_read_from_4(read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM), - buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32), - buf_calc_page_crc32(read_buf), - buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB), - buf_calc_page_old_checksum(read_buf), - buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE), - BUF_NO_CHECKSUM_MAGIC, - - mach_read_from_4(read_buf + FIL_PAGE_LSN), - mach_read_from_4(read_buf + FIL_PAGE_LSN + 4), - mach_read_from_4(read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), - mach_read_from_4(read_buf + FIL_PAGE_OFFSET), - mach_read_from_4(read_buf - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); - - ulint page_type = fil_page_get_type(read_buf); - - fprintf(stderr, "InnoDB: page type %ld meaning %s\n", page_type, - fil_get_page_type_name(page_type)); - } - -#ifndef UNIV_HOTBACKUP - if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_INSERT) { - fprintf(stderr, - "InnoDB: Page may be an insert undo log page\n"); - } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_UPDATE) { - fprintf(stderr, - "InnoDB: Page may be an update undo log page\n"); - } -#endif /* !UNIV_HOTBACKUP */ - - switch (fil_page_get_type(read_buf)) { - index_id_t index_id; - case FIL_PAGE_INDEX: - index_id = btr_page_get_index_id(read_buf); - fprintf(stderr, - "InnoDB: Page may be an index page where" - " index id is %llu\n", - (ullint) index_id); -#ifndef UNIV_HOTBACKUP - index = dict_index_find_on_id_low(index_id); - if (index) { - fputs("InnoDB: (", stderr); - dict_index_name_print(stderr, NULL, index); - fputs(")\n", stderr); - } -#endif /* !UNIV_HOTBACKUP */ - break; - case FIL_PAGE_INODE: - fputs("InnoDB: Page may be an 'inode' page\n", stderr); - break; - case FIL_PAGE_IBUF_FREE_LIST: - fputs("InnoDB: Page may be an insert buffer free list page\n", - stderr); - break; - case FIL_PAGE_TYPE_ALLOCATED: - fputs("InnoDB: Page may be a freshly allocated page\n", - stderr); - break; - case FIL_PAGE_IBUF_BITMAP: - fputs("InnoDB: Page may be an insert buffer bitmap page\n", - stderr); - break; - case FIL_PAGE_TYPE_SYS: - fputs("InnoDB: Page may be a system page\n", - stderr); - break; - case FIL_PAGE_TYPE_TRX_SYS: - fputs("InnoDB: Page may be a transaction system page\n", - stderr); - break; - case FIL_PAGE_TYPE_FSP_HDR: - fputs("InnoDB: Page may be a file space header page\n", - stderr); - break; - case FIL_PAGE_TYPE_XDES: - fputs("InnoDB: Page may be an extent descriptor page\n", - stderr); - break; - case FIL_PAGE_TYPE_BLOB: - fputs("InnoDB: Page may be a BLOB page\n", - stderr); - break; - case FIL_PAGE_TYPE_ZBLOB: - case FIL_PAGE_TYPE_ZBLOB2: - fputs("InnoDB: Page may be a compressed BLOB page\n", - stderr); - break; - } - - ut_ad(flags & BUF_PAGE_PRINT_NO_CRASH); -} - -#ifndef UNIV_HOTBACKUP - -# ifdef PFS_GROUP_BUFFER_SYNC -/********************************************************************//** -This function registers mutexes and rwlocks in buffer blocks with -performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is -defined to be a value less than chunk->size, then only mutexes -and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER -blocks are registered. */ -static -void -pfs_register_buffer_block( -/*======================*/ - buf_chunk_t* chunk) /*!< in/out: chunk of buffers */ -{ - ulint i; - ulint num_to_register; - buf_block_t* block; - - block = chunk->blocks; - - num_to_register = ut_min(chunk->size, - PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER); - - for (i = 0; i < num_to_register; i++) { - ib_mutex_t* mutex; - rw_lock_t* rwlock; - -# ifdef UNIV_PFS_MUTEX - mutex = &block->mutex; - ut_a(!mutex->pfs_psi); - mutex->pfs_psi = (PSI_server) - ? PSI_server->init_mutex(buffer_block_mutex_key, mutex) - : NULL; -# endif /* UNIV_PFS_MUTEX */ - -# ifdef UNIV_PFS_RWLOCK - rwlock = &block->lock; - ut_a(!rwlock->pfs_psi); - rwlock->pfs_psi = (PSI_server) - ? PSI_server->init_rwlock(buf_block_lock_key, rwlock) - : NULL; - -# ifdef UNIV_SYNC_DEBUG - rwlock = &block->debug_latch; - ut_a(!rwlock->pfs_psi); - rwlock->pfs_psi = (PSI_server) - ? PSI_server->init_rwlock(buf_block_debug_latch_key, - rwlock) - : NULL; -# endif /* UNIV_SYNC_DEBUG */ - -# endif /* UNIV_PFS_RWLOCK */ - block++; - } -} -# endif /* PFS_GROUP_BUFFER_SYNC */ - -/********************************************************************//** -Initializes a buffer control block when the buf_pool is created. */ -static -void -buf_block_init( -/*===========*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_block_t* block, /*!< in: pointer to control block */ - byte* frame) /*!< in: pointer to buffer frame */ -{ - UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE); - - block->frame = frame; - - block->page.buf_pool_index = buf_pool_index(buf_pool); - block->page.flush_type = BUF_FLUSH_LRU; - block->page.state = BUF_BLOCK_NOT_USED; - block->page.buf_fix_count = 0; - block->page.io_fix = BUF_IO_NONE; - block->page.encrypted = false; - block->page.real_size = 0; - block->page.write_size = 0; - block->modify_clock = 0; - block->page.slot = NULL; - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - block->page.file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ - - block->check_index_page_at_flush = FALSE; - block->index = NULL; - -#ifdef UNIV_DEBUG - block->page.in_page_hash = FALSE; - block->page.in_zip_hash = FALSE; - block->page.in_flush_list = FALSE; - block->page.in_free_list = FALSE; - block->page.in_LRU_list = FALSE; - block->in_unzip_LRU_list = FALSE; -#endif /* UNIV_DEBUG */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - block->n_pointers = 0; -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - page_zip_des_init(&block->page.zip); - -#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC - /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration - of buffer block mutex/rwlock with performance schema. If - PFS_GROUP_BUFFER_SYNC is defined, skip the registration - since buffer block mutex/rwlock will be registered later in - pfs_register_buffer_block() */ - - mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK); - rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING); - -# ifdef UNIV_SYNC_DEBUG - rw_lock_create(PFS_NOT_INSTRUMENTED, - &block->debug_latch, SYNC_NO_ORDER_CHECK); -# endif /* UNIV_SYNC_DEBUG */ - -#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */ - mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK); - rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING); - -# ifdef UNIV_SYNC_DEBUG - rw_lock_create(buf_block_debug_latch_key, - &block->debug_latch, SYNC_NO_ORDER_CHECK); -# endif /* UNIV_SYNC_DEBUG */ -#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */ - - ut_ad(rw_lock_validate(&(block->lock))); -} - -/********************************************************************//** -Allocates a chunk of buffer frames. -@return chunk, or NULL on failure */ -static -buf_chunk_t* -buf_chunk_init( -/*===========*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_chunk_t* chunk, /*!< out: chunk of buffers */ - ulint mem_size) /*!< in: requested size in bytes */ -{ - buf_block_t* block; - byte* frame; - ulint i; - ulint size_target; - - /* Round down to a multiple of page size, - although it already should be. */ - mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE); - size_target = (mem_size / UNIV_PAGE_SIZE) - 1; - /* Reserve space for the block descriptors. */ - mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block) - + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); - - chunk->mem_size = mem_size; - chunk->mem = os_mem_alloc_large(&chunk->mem_size); - - if (UNIV_UNLIKELY(chunk->mem == NULL)) { - - return(NULL); - } - -#ifdef HAVE_LIBNUMA - if (srv_numa_interleave) { - struct bitmask *numa_mems_allowed = numa_get_mems_allowed(); - int st = mbind(chunk->mem, chunk->mem_size, - MPOL_INTERLEAVE, - numa_mems_allowed->maskp, - numa_mems_allowed->size, - MPOL_MF_MOVE); - if (st != 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "Failed to set NUMA memory policy of buffer" - " pool page frames to MPOL_INTERLEAVE" - " (error: %s).", strerror(errno)); - } - } -#endif // HAVE_LIBNUMA - - /* Allocate the block descriptors from - the start of the memory block. */ - chunk->blocks = (buf_block_t*) chunk->mem; - - /* Align a pointer to the first frame. Note that when - os_large_page_size is smaller than UNIV_PAGE_SIZE, - we may allocate one fewer block than requested. When - it is bigger, we may allocate more blocks than requested. */ - - frame = (byte*) ut_align(chunk->mem, UNIV_PAGE_SIZE); - chunk->size = chunk->mem_size / UNIV_PAGE_SIZE - - (frame != chunk->mem); - - /* Subtract the space needed for block descriptors. */ - { - ulint size = chunk->size; - - while (frame < (byte*) (chunk->blocks + size)) { - frame += UNIV_PAGE_SIZE; - size--; - } - - chunk->size = size; - } - - if (chunk->size > size_target) { - chunk->size = size_target; - } - - /* Init block structs and assign frames for them. Then we - assign the frames to the first blocks (we already mapped the - memory above). */ - - block = chunk->blocks; - - for (i = chunk->size; i--; ) { - - buf_block_init(buf_pool, block, frame); - UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE); - - /* Add the block to the free list */ - UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page)); - - ut_d(block->page.in_free_list = TRUE); - ut_ad(buf_pool_from_block(block) == buf_pool); - - block++; - frame += UNIV_PAGE_SIZE; - } - -#ifdef PFS_GROUP_BUFFER_SYNC - pfs_register_buffer_block(chunk); -#endif - return(chunk); -} - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Finds a block in the given buffer chunk that points to a -given compressed page. -@return buffer block pointing to the compressed page, or NULL */ -static -buf_block_t* -buf_chunk_contains_zip( -/*===================*/ - buf_chunk_t* chunk, /*!< in: chunk being checked */ - const void* data) /*!< in: pointer to compressed page */ -{ - buf_block_t* block; - ulint i; - - block = chunk->blocks; - - for (i = chunk->size; i--; block++) { - if (block->page.zip.data == data) { - - return(block); - } - } - - return(NULL); -} - -/*********************************************************************//** -Finds a block in the buffer pool that points to a -given compressed page. -@return buffer block pointing to the compressed page, or NULL */ -UNIV_INTERN -buf_block_t* -buf_pool_contains_zip( -/*==================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - const void* data) /*!< in: pointer to compressed page */ -{ - ulint n; - buf_chunk_t* chunk = buf_pool->chunks; - - ut_ad(buf_pool); - for (n = buf_pool->n_chunks; n--; chunk++) { - - buf_block_t* block = buf_chunk_contains_zip(chunk, data); - - if (block) { - return(block); - } - } - - return(NULL); -} -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Checks that all file pages in the buffer chunk are in a replaceable state. -@return address of a non-free block, or NULL if all freed */ -static -const buf_block_t* -buf_chunk_not_freed( -/*================*/ - buf_chunk_t* chunk) /*!< in: chunk being checked */ -{ - buf_block_t* block; - ulint i; - - block = chunk->blocks; - - for (i = chunk->size; i--; block++) { - ibool ready; - - switch (buf_block_get_state(block)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - /* The uncompressed buffer pool should never - contain compressed block descriptors. */ - ut_error; - break; - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - /* Skip blocks that are not being used for - file pages. */ - break; - case BUF_BLOCK_FILE_PAGE: - mutex_enter(&block->mutex); - ready = buf_flush_ready_for_replace(&block->page); - mutex_exit(&block->mutex); - - if (UNIV_UNLIKELY(block->page.is_corrupt)) { - /* corrupt page may remain, it can be - skipped */ - break; - } - - if (!ready) { - - return(block); - } - - break; - } - } - - return(NULL); -} - -/********************************************************************//** -Set buffer pool size variables after resizing it */ -static -void -buf_pool_set_sizes(void) -/*====================*/ -{ - ulint i; - ulint curr_size = 0; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - curr_size += buf_pool->curr_pool_size; - } - - srv_buf_pool_curr_size = curr_size; - srv_buf_pool_old_size = srv_buf_pool_size; -} - -/********************************************************************//** -Initialize a buffer pool instance. -@return DB_SUCCESS if all goes well. */ -UNIV_INTERN -ulint -buf_pool_init_instance( -/*===================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint buf_pool_size, /*!< in: size in bytes */ - ulint instance_no) /*!< in: id of the instance */ -{ - ulint i; - buf_chunk_t* chunk; - - /* 1. Initialize general fields - ------------------------------- */ - mutex_create(buf_pool_LRU_list_mutex_key, - &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST); - mutex_create(buf_pool_free_list_mutex_key, - &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST); - mutex_create(buf_pool_zip_free_mutex_key, - &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE); - mutex_create(buf_pool_zip_hash_mutex_key, - &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH); - mutex_create(buf_pool_zip_mutex_key, - &buf_pool->zip_mutex, SYNC_BUF_BLOCK); - mutex_create(buf_pool_flush_state_mutex_key, - &buf_pool->flush_state_mutex, SYNC_BUF_FLUSH_STATE); - - if (buf_pool_size > 0) { - buf_pool->n_chunks = 1; - - buf_pool->chunks = chunk = - (buf_chunk_t*) mem_zalloc(sizeof *chunk); - - if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) { - mem_free(chunk); - mem_free(buf_pool); - - return(DB_ERROR); - } - - buf_pool->instance_no = instance_no; - buf_pool->old_pool_size = buf_pool_size; - buf_pool->curr_size = chunk->size; - buf_pool->read_ahead_area - = ut_min(64, ut_2_power_up(buf_pool->curr_size / 32)); - buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE; - - /* Number of locks protecting page_hash must be a - power of two */ - srv_n_page_hash_locks = static_cast<ulong>( - ut_2_power_up(srv_n_page_hash_locks)); - ut_a(srv_n_page_hash_locks != 0); - ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS); - - buf_pool->page_hash = ib_create(2 * buf_pool->curr_size, - srv_n_page_hash_locks, - MEM_HEAP_FOR_PAGE_HASH, - SYNC_BUF_PAGE_HASH); - - buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size); - - buf_pool->last_printout_time = ut_time(); - } - /* 2. Initialize flushing fields - -------------------------------- */ - - mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex, - SYNC_BUF_FLUSH_LIST); - - for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) { - buf_pool->no_flush[i] = os_event_create(); - } - - buf_pool->watch = (buf_page_t*) mem_zalloc( - sizeof(*buf_pool->watch) * BUF_POOL_WATCH_SIZE); - - /* All fields are initialized by mem_zalloc(). */ - - /* Initialize the temporal memory array and slots */ - buf_pool->tmp_arr = (buf_tmp_array_t *)mem_zalloc(sizeof(buf_tmp_array_t)); - ulint n_slots = (srv_n_read_io_threads + srv_n_write_io_threads) * (8 * OS_AIO_N_PENDING_IOS_PER_THREAD); - buf_pool->tmp_arr->n_slots = n_slots; - buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)mem_zalloc(sizeof(buf_tmp_buffer_t) * n_slots); - - buf_pool->try_LRU_scan = TRUE; - - DBUG_EXECUTE_IF("buf_pool_init_instance_force_oom", - return(DB_ERROR); ); - - return(DB_SUCCESS); -} - -/********************************************************************//** -free one buffer pool instance */ -static -void -buf_pool_free_instance( -/*===================*/ - buf_pool_t* buf_pool) /* in,own: buffer pool instance - to free */ -{ - buf_chunk_t* chunk; - buf_chunk_t* chunks; - buf_page_t* bpage; - ulint i; - - bpage = UT_LIST_GET_LAST(buf_pool->LRU); - while (bpage != NULL) { - buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage); - enum buf_page_state state = buf_page_get_state(bpage); - - ut_ad(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); - - if (state != BUF_BLOCK_FILE_PAGE) { - /* We must not have any dirty block except - when doing a fast shutdown. */ - ut_ad(state == BUF_BLOCK_ZIP_PAGE - || srv_fast_shutdown == 2); - buf_page_free_descriptor(bpage); - } - - bpage = prev_bpage; - } - - mem_free(buf_pool->watch); - buf_pool->watch = NULL; - - for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) { - os_event_free(buf_pool->no_flush[i]); - } - mutex_free(&buf_pool->LRU_list_mutex); - mutex_free(&buf_pool->free_list_mutex); - mutex_free(&buf_pool->zip_free_mutex); - mutex_free(&buf_pool->zip_hash_mutex); - mutex_free(&buf_pool->zip_mutex); - mutex_free(&buf_pool->flush_state_mutex); - mutex_free(&buf_pool->flush_list_mutex); - - chunks = buf_pool->chunks; - chunk = chunks + buf_pool->n_chunks; - - while (--chunk >= chunks) { - buf_block_t* block = chunk->blocks; - for (i = 0; i < chunk->size; i++, block++) { - mutex_free(&block->mutex); - rw_lock_free(&block->lock); -#ifdef UNIV_SYNC_DEBUG - rw_lock_free(&block->debug_latch); -#endif - } - os_mem_free_large(chunk->mem, chunk->mem_size); - } - - mem_free(buf_pool->chunks); - ha_clear(buf_pool->page_hash); - hash_table_free(buf_pool->page_hash); - hash_table_free(buf_pool->zip_hash); - - /* Free all used temporary slots */ - if (buf_pool->tmp_arr) { - for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) { - buf_tmp_buffer_t* slot = &(buf_pool->tmp_arr->slots[i]); - if (slot && slot->crypt_buf) { - aligned_free(slot->crypt_buf); - slot->crypt_buf = NULL; - } - - if (slot && slot->comp_buf) { - aligned_free(slot->comp_buf); - slot->comp_buf = NULL; - } - } - } - - mem_free(buf_pool->tmp_arr->slots); - mem_free(buf_pool->tmp_arr); - buf_pool->tmp_arr = NULL; -} - -/********************************************************************//** -Creates the buffer pool. -@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */ -UNIV_INTERN -dberr_t -buf_pool_init( -/*==========*/ - ulint total_size, /*!< in: size of the total pool in bytes */ - ulint n_instances) /*!< in: number of instances */ -{ - ulint i; - const ulint size = total_size / n_instances; - - ut_ad(n_instances > 0); - ut_ad(n_instances <= MAX_BUFFER_POOLS); - ut_ad(n_instances == srv_buf_pool_instances); - -#ifdef HAVE_LIBNUMA - if (srv_numa_interleave) { - struct bitmask *numa_mems_allowed = numa_get_mems_allowed(); - - ib_logf(IB_LOG_LEVEL_INFO, - "Setting NUMA memory policy to MPOL_INTERLEAVE"); - if (set_mempolicy(MPOL_INTERLEAVE, - numa_mems_allowed->maskp, - numa_mems_allowed->size) != 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "Failed to set NUMA memory policy to" - " MPOL_INTERLEAVE (error: %s).", - strerror(errno)); - } - } -#endif // HAVE_LIBNUMA - - buf_pool_ptr = (buf_pool_t*) mem_zalloc( - n_instances * sizeof *buf_pool_ptr); - - for (i = 0; i < n_instances; i++) { - buf_pool_t* ptr = &buf_pool_ptr[i]; - - if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) { - - /* Free all the instances created so far. */ - buf_pool_free(i); - - return(DB_ERROR); - } - } - - buf_pool_set_sizes(); - buf_LRU_old_ratio_update(100 * 3/ 8, FALSE); - - btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64); - -#ifdef HAVE_LIBNUMA - if (srv_numa_interleave) { - ib_logf(IB_LOG_LEVEL_INFO, - "Setting NUMA memory policy to MPOL_DEFAULT"); - if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "Failed to set NUMA memory policy to" - " MPOL_DEFAULT (error: %s).", strerror(errno)); - } - } -#endif // HAVE_LIBNUMA - - return(DB_SUCCESS); -} - -/********************************************************************//** -Frees the buffer pool at shutdown. This must not be invoked before -freeing all mutexes. */ -UNIV_INTERN -void -buf_pool_free( -/*==========*/ - ulint n_instances) /*!< in: numbere of instances to free */ -{ - ulint i; - - for (i = 0; i < n_instances; i++) { - buf_pool_free_instance(buf_pool_from_array(i)); - } - - mem_free(buf_pool_ptr); - buf_pool_ptr = NULL; -} - -/********************************************************************//** -Clears the adaptive hash index on all pages in the buffer pool. */ -UNIV_INTERN -void -buf_pool_clear_hash_index(void) -/*===========================*/ -{ - ulint p; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(btr_search_own_all(RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(!btr_search_enabled); - - for (p = 0; p < srv_buf_pool_instances; p++) { - buf_pool_t* buf_pool = buf_pool_from_array(p); - buf_chunk_t* chunks = buf_pool->chunks; - buf_chunk_t* chunk = chunks + buf_pool->n_chunks; - - while (--chunk >= chunks) { - buf_block_t* block = chunk->blocks; - ulint i = chunk->size; - - for (; i--; block++) { - dict_index_t* index = block->index; - - /* We can set block->index = NULL - when we have an x-latch on btr_search_latch; - see the comment in buf0buf.h */ - - if (!index) { - /* Not hashed */ - continue; - } - - block->index = NULL; -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - block->n_pointers = 0; -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - } - } - } -} - -/********************************************************************//** -Relocate a buffer control block. Relocates the block on the LRU list -and in buf_pool->page_hash. Does not relocate bpage->list. -The caller must take care of relocating bpage->list. */ -UNIV_INTERN -void -buf_relocate( -/*=========*/ - buf_page_t* bpage, /*!< in/out: control block being relocated; - buf_page_get_state(bpage) must be - BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */ - buf_page_t* dpage) /*!< in/out: destination control block */ -{ - buf_page_t* b; - ulint fold; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - fold = buf_page_address_fold(bpage->space, bpage->offset); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage)); - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE); - ut_a(bpage->buf_fix_count == 0); - ut_ad(bpage->in_LRU_list); - ut_ad(!bpage->in_zip_hash); - ut_ad(bpage->in_page_hash); - ut_ad(bpage == buf_page_hash_get_low(buf_pool, - bpage->space, - bpage->offset, - fold)); - - ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); -#ifdef UNIV_DEBUG - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_FILE_PAGE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - case BUF_BLOCK_ZIP_DIRTY: - case BUF_BLOCK_ZIP_PAGE: - break; - } -#endif /* UNIV_DEBUG */ - - memcpy(dpage, bpage, sizeof *dpage); - - ut_d(bpage->in_LRU_list = FALSE); - ut_d(bpage->in_page_hash = FALSE); - - /* relocate buf_pool->LRU */ - b = UT_LIST_GET_PREV(LRU, bpage); - UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage); - - if (b) { - UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage); - } else { - UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage); - } - - if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) { - buf_pool->LRU_old = dpage; -#ifdef UNIV_LRU_DEBUG - /* buf_pool->LRU_old must be the first item in the LRU list - whose "old" flag is set. */ - ut_a(buf_pool->LRU_old->old); - ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old) - || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old); - ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old) - || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old); - } else { - /* Check that the "old" flag is consistent in - the block and its neighbours. */ - buf_page_set_old(dpage, buf_page_is_old(dpage)); -#endif /* UNIV_LRU_DEBUG */ - } - - ut_d(UT_LIST_VALIDATE( - LRU, buf_page_t, buf_pool->LRU, CheckInLRUList())); - - /* relocate buf_pool->page_hash */ - HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage); - HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage); -} - -/********************************************************************//** -Determine if a block is a sentinel for a buffer pool watch. -@return TRUE if a sentinel for a buffer pool watch, FALSE if not */ -UNIV_INTERN -ibool -buf_pool_watch_is_sentinel( -/*=======================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - const buf_page_t* bpage) /*!< in: block */ -{ - /* We must also own the appropriate hash lock. */ - ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage)); - ut_ad(buf_page_in_file(bpage)); - - if (bpage < &buf_pool->watch[0] - || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) { - - ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE - || bpage->zip.data != NULL); - - return(FALSE); - } - - ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE); - ut_ad(!bpage->in_zip_hash); - ut_ad(bpage->in_page_hash); - ut_ad(bpage->zip.data == NULL); - ut_ad(bpage->buf_fix_count > 0); - return(TRUE); -} - -/****************************************************************//** -Add watch for the given page to be read in. Caller must have -appropriate hash_lock for the bpage and hold the LRU list mutex to avoid a race -condition with buf_LRU_free_page inserting the same page into the page hash. -This function may release the hash_lock and reacquire it. -@return NULL if watch set, block if the page is in the buffer pool */ -UNIV_INTERN -buf_page_t* -buf_pool_watch_set( -/*===============*/ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page number */ - ulint fold) /*!< in: buf_page_address_fold(space, offset) */ -{ - buf_page_t* bpage; - ulint i; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - prio_rw_lock_t* hash_lock; - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - hash_lock = buf_page_hash_lock_get(buf_pool, fold); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - - if (bpage != NULL) { -page_found: - if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) { - /* The page was loaded meanwhile. */ - return(bpage); - } - - /* Add to an existing watch. */ -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_increment_uint32(&bpage->buf_fix_count, 1); -#else - ++bpage->buf_fix_count; -#endif /* PAGE_ATOMIC_REF_COUNT */ - return(NULL); - } - - /* From this point this function becomes fairly heavy in terms - of latching. We acquire all the hash_locks. They are needed - because we don't want to read any stale information in - buf_pool->watch[]. However, it is not in the critical code path - as this function will be called only by the purge thread. */ - - - /* To obey latching order first release the hash_lock. */ - rw_lock_x_unlock(hash_lock); - - hash_lock_x_all(buf_pool->page_hash); - - /* We have to recheck that the page - was not loaded or a watch set by some other - purge thread. This is because of the small - time window between when we release the - hash_lock to acquire all the hash locks above. */ - - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - if (UNIV_LIKELY_NULL(bpage)) { - hash_unlock_x_all_but(buf_pool->page_hash, hash_lock); - goto page_found; - } - - /* The maximum number of purge threads should never exceed - BUF_POOL_WATCH_SIZE. So there is no way for purge thread - instance to hold a watch when setting another watch. */ - for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) { - bpage = &buf_pool->watch[i]; - - ut_ad(bpage->access_time == 0); - ut_ad(bpage->newest_modification == 0); - ut_ad(bpage->oldest_modification == 0); - ut_ad(bpage->zip.data == NULL); - ut_ad(!bpage->in_zip_hash); - - switch (bpage->state) { - case BUF_BLOCK_POOL_WATCH: - ut_ad(!bpage->in_page_hash); - ut_ad(bpage->buf_fix_count == 0); - - bpage->state = BUF_BLOCK_ZIP_PAGE; - bpage->space = static_cast<ib_uint32_t>(space); - bpage->offset = static_cast<ib_uint32_t>(offset); - bpage->buf_fix_count = 1; - bpage->buf_pool_index = buf_pool_index(buf_pool); - - ut_d(bpage->in_page_hash = TRUE); - HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, - fold, bpage); - - /* Once the sentinel is in the page_hash we can - safely release all locks except just the - relevant hash_lock */ - hash_unlock_x_all_but(buf_pool->page_hash, - hash_lock); - - return(NULL); - case BUF_BLOCK_ZIP_PAGE: - ut_ad(bpage->in_page_hash); - ut_ad(bpage->buf_fix_count > 0); - break; - default: - ut_error; - } - } - - /* Allocation failed. Either the maximum number of purge - threads should never exceed BUF_POOL_WATCH_SIZE, or this code - should be modified to return a special non-NULL value and the - caller should purge the record directly. */ - ut_error; - - /* Fix compiler warning */ - return(NULL); -} - -/****************************************************************//** -Remove the sentinel block for the watch before replacing it with a real block. -buf_page_watch_clear() or buf_page_watch_occurred() will notice that -the block has been replaced with the real block. -@return reference count, to be added to the replacement block */ -static -void -buf_pool_watch_remove( -/*==================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint fold, /*!< in: buf_page_address_fold( - space, offset) */ - buf_page_t* watch) /*!< in/out: sentinel for watch */ -{ -#ifdef UNIV_SYNC_DEBUG - /* We must also own the appropriate hash_bucket mutex. */ - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); - ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(buf_page_get_state(watch) == BUF_BLOCK_ZIP_PAGE); - - HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch); - ut_d(watch->in_page_hash = FALSE); - watch->buf_fix_count = 0; - watch->state = BUF_BLOCK_POOL_WATCH; -} - -/****************************************************************//** -Stop watching if the page has been read in. -buf_pool_watch_set(space,offset) must have returned NULL before. */ -UNIV_INTERN -void -buf_pool_watch_unset( -/*=================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ -{ - buf_page_t* bpage; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - ulint fold = buf_page_address_fold(space, offset); - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); - - rw_lock_x_lock(hash_lock); - - /* The page must exist because buf_pool_watch_set() increments - buf_fix_count. */ - - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - - if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) { - buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage)); - } else { - - ut_ad(bpage->buf_fix_count > 0); - -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_decrement_uint32(&bpage->buf_fix_count, 1); -#else - --bpage->buf_fix_count; -#endif /* PAGE_ATOMIC_REF_COUNT */ - - if (bpage->buf_fix_count == 0) { - buf_pool_watch_remove(buf_pool, fold, bpage); - } - } - - rw_lock_x_unlock(hash_lock); -} - -/****************************************************************//** -Check if the page has been read in. -This may only be called after buf_pool_watch_set(space,offset) -has returned NULL and before invoking buf_pool_watch_unset(space,offset). -@return FALSE if the given page was not read in, TRUE if it was */ -UNIV_INTERN -ibool -buf_pool_watch_occurred( -/*====================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ -{ - ibool ret; - buf_page_t* bpage; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - ulint fold = buf_page_address_fold(space, offset); - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, - fold); - - rw_lock_s_lock(hash_lock); - - /* The page must exist because buf_pool_watch_set() - increments buf_fix_count. */ - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - - ret = !buf_pool_watch_is_sentinel(buf_pool, bpage); - rw_lock_s_unlock(hash_lock); - - return(ret); -} - -/********************************************************************//** -Moves a page to the start of the buffer pool LRU list. This high-level -function can be used to prevent an important page from slipping out of -the buffer pool. */ -UNIV_INTERN -void -buf_page_make_young( -/*================*/ - buf_page_t* bpage) /*!< in: buffer block of a file page */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - mutex_enter(&buf_pool->LRU_list_mutex); - - ut_a(buf_page_in_file(bpage)); - - buf_LRU_make_block_young(bpage); - - mutex_exit(&buf_pool->LRU_list_mutex); -} - -/********************************************************************//** -Moves a page to the start of the buffer pool LRU list if it is too old. -This high-level function can be used to prevent an important page from -slipping out of the buffer pool. */ -static -void -buf_page_make_young_if_needed( -/*==========================*/ - buf_page_t* bpage) /*!< in/out: buffer block of a - file page */ -{ - ut_a(buf_page_in_file(bpage)); - - if (buf_page_peek_if_too_old(bpage)) { - buf_page_make_young(bpage); - } -} - -/********************************************************************//** -Resets the check_index_page_at_flush field of a page if found in the buffer -pool. */ -UNIV_INTERN -void -buf_reset_check_index_page_at_flush( -/*================================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ -{ - buf_block_t* block; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - - block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset); - - if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) { - ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page)); - block->check_index_page_at_flush = FALSE; - } -} - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG -/********************************************************************//** -Sets file_page_was_freed TRUE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. -@return control block if found in page hash table, otherwise NULL */ -UNIV_INTERN -buf_page_t* -buf_page_set_file_page_was_freed( -/*=============================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ -{ - buf_page_t* bpage; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - prio_rw_lock_t* hash_lock; - - bpage = buf_page_hash_get_s_locked(buf_pool, space, offset, - &hash_lock); - - if (bpage) { - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); - mutex_enter(block_mutex); - rw_lock_s_unlock(hash_lock); - /* bpage->file_page_was_freed can already hold - when this code is invoked from dict_drop_index_tree() */ - bpage->file_page_was_freed = TRUE; - mutex_exit(block_mutex); - } - - return(bpage); -} - -/********************************************************************//** -Sets file_page_was_freed FALSE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. -@return control block if found in page hash table, otherwise NULL */ -UNIV_INTERN -buf_page_t* -buf_page_reset_file_page_was_freed( -/*===============================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ -{ - buf_page_t* bpage; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - prio_rw_lock_t* hash_lock; - - bpage = buf_page_hash_get_s_locked(buf_pool, space, offset, - &hash_lock); - if (bpage) { - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); - mutex_enter(block_mutex); - rw_lock_s_unlock(hash_lock); - bpage->file_page_was_freed = FALSE; - mutex_exit(block_mutex); - } - - return(bpage); -} -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ - -/********************************************************************//** -Attempts to discard the uncompressed frame of a compressed page. The -caller should not be holding any mutexes when this function is called. -@return TRUE if successful, FALSE otherwise. */ -static -void -buf_block_try_discard_uncompressed( -/*===============================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ -{ - buf_page_t* bpage; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - - /* Since we need to acquire buf_pool->LRU_list_mutex to discard - the uncompressed frame and because page_hash mutex resides below - buf_pool->LRU_list_mutex in sync ordering therefore we must first - release the page_hash mutex. This means that the block in question - can move out of page_hash. Therefore we need to check again if the - block is still in page_hash. */ - - mutex_enter(&buf_pool->LRU_list_mutex); - - bpage = buf_page_hash_get(buf_pool, space, offset); - - if (bpage) { - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); - - if (buf_LRU_free_page(bpage, false)) { - - mutex_exit(block_mutex); - return; - } - mutex_exit(block_mutex); - } - - mutex_exit(&buf_pool->LRU_list_mutex); -} - -/********************************************************************//** -Get read access to a compressed page (usually of type -FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2). -The page must be released with buf_page_release_zip(). -NOTE: the page is not protected by any latch. Mutual exclusion has to -be implemented at a higher level. In other words, all possible -accesses to a given page through this function must be protected by -the same set of mutexes or latches. -@return pointer to the block */ -UNIV_INTERN -buf_page_t* -buf_page_get_zip( -/*=============*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size */ - ulint offset) /*!< in: page number */ -{ - buf_page_t* bpage; - ib_mutex_t* block_mutex; - prio_rw_lock_t* hash_lock; - ibool discard_attempted = FALSE; - ibool must_read; - trx_t* trx = NULL; - ulint sec; - ulint ms; - ib_uint64_t start_time; - ib_uint64_t finish_time; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - - if (UNIV_UNLIKELY(innobase_get_slow_log())) { - trx = innobase_get_trx(); - } - buf_pool->stat.n_page_gets++; - - for (;;) { -lookup: - - /* The following call will also grab the page_hash - mutex if the page is found. */ - bpage = buf_page_hash_get_s_locked(buf_pool, space, - offset, &hash_lock); - if (bpage) { - ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); - break; - } - - /* Page not in buf_pool: needs to be read from file */ - - ut_ad(!hash_lock); - dberr_t err = buf_read_page(space, zip_size, offset, trx); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Reading compressed page " ULINTPF - ":" ULINTPF - " failed with error: %s.", - space, offset, ut_strerr(err)); - - goto err_exit; - } - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(++buf_dbg_counter % 5771 || buf_validate()); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - } - - ut_ad(buf_page_hash_lock_held_s(buf_pool, bpage)); - - if (!bpage->zip.data) { - /* There is no compressed page. */ -err_exit: - rw_lock_s_unlock(hash_lock); - return(NULL); - } - - if (UNIV_UNLIKELY(bpage->is_corrupt && srv_pass_corrupt_table <= 1)) { - - rw_lock_s_unlock(hash_lock); - - return(NULL); - } - - ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage)); - - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - block_mutex = &buf_pool->zip_mutex; - mutex_enter(block_mutex); -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_increment_uint32(&bpage->buf_fix_count, 1); -#else - ++bpage->buf_fix_count; -#endif /* PAGE_ATOMIC_REF_COUNT */ - goto got_block; - case BUF_BLOCK_FILE_PAGE: - /* Discard the uncompressed page frame if possible. */ - if (!discard_attempted) { - rw_lock_s_unlock(hash_lock); - buf_block_try_discard_uncompressed(space, offset); - discard_attempted = TRUE; - goto lookup; - } - - block_mutex = &((buf_block_t*) bpage)->mutex; - - mutex_enter(block_mutex); - - buf_block_buf_fix_inc((buf_block_t*) bpage, __FILE__, __LINE__); - goto got_block; - } - - ut_error; - goto err_exit; - -got_block: - must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ; - - rw_lock_s_unlock(hash_lock); -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - ut_a(!bpage->file_page_was_freed); -#endif /* defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG */ - - buf_page_set_accessed(bpage); - - mutex_exit(block_mutex); - - buf_page_make_young_if_needed(bpage); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(++buf_dbg_counter % 5771 || buf_validate()); - ut_a(bpage->buf_fix_count > 0); - ut_a(buf_page_in_file(bpage)); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - if (must_read) { - /* Let us wait until the read operation - completes */ - - if (UNIV_UNLIKELY(trx && trx->take_stats)) - { - ut_usectime(&sec, &ms); - start_time = (ib_uint64_t)sec * 1000000 + ms; - } else { - start_time = 0; - } - for (;;) { - enum buf_io_fix io_fix; - - mutex_enter(block_mutex); - io_fix = buf_page_get_io_fix(bpage); - mutex_exit(block_mutex); - - if (io_fix == BUF_IO_READ) { - - os_thread_sleep(WAIT_FOR_READ); - } else { - break; - } - } - if (UNIV_UNLIKELY(start_time != 0)) - { - ut_usectime(&sec, &ms); - finish_time = (ib_uint64_t)sec * 1000000 + ms; - trx->io_reads_wait_timer += (ulint)(finish_time - start_time); - } - } - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(buf_page_get_space(bpage), - buf_page_get_page_no(bpage)) == 0); -#endif - return(bpage); -} - -/********************************************************************//** -Initialize some fields of a control block. */ -UNIV_INLINE -void -buf_block_init_low( -/*===============*/ - buf_block_t* block) /*!< in: block to init */ -{ - block->check_index_page_at_flush = FALSE; - block->index = NULL; - - block->n_hash_helps = 0; - block->n_fields = 1; - block->n_bytes = 0; - block->left_side = TRUE; -} -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************************//** -Decompress a block. -@return TRUE if successful */ -UNIV_INTERN -ibool -buf_zip_decompress( -/*===============*/ - buf_block_t* block, /*!< in/out: block */ - ibool check) /*!< in: TRUE=verify the page checksum */ -{ - const byte* frame = block->page.zip.data; - ulint size = page_zip_get_size(&block->page.zip); - /* Space is not found if this function is called during IMPORT */ - fil_space_t* space = fil_space_acquire_for_io(block->page.space); - const unsigned key_version = mach_read_from_4(frame + - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - fil_space_crypt_t* crypt_data = space ? space->crypt_data : NULL; - const bool encrypted = crypt_data - && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED - && (!crypt_data->is_default_encryption() - || srv_encrypt_tables); - - ut_ad(buf_block_get_zip_size(block)); - ut_a(buf_block_get_space(block) != 0); - - if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Compressed page checksum mismatch" - " for %s [%u:%u]: stored: " ULINTPF ", crc32: " ULINTPF - " innodb: " ULINTPF ", none: " ULINTPF ".", - space ? space->chain.start->name : "N/A", - block->page.space, block->page.offset, - mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM), - page_zip_calc_checksum(frame, size, - SRV_CHECKSUM_ALGORITHM_CRC32), - page_zip_calc_checksum(frame, size, - SRV_CHECKSUM_ALGORITHM_INNODB), - page_zip_calc_checksum(frame, size, - SRV_CHECKSUM_ALGORITHM_NONE)); - goto err_exit; - } - - switch (fil_page_get_type(frame)) { - case FIL_PAGE_INDEX: { - - if (page_zip_decompress(&block->page.zip, - block->frame, TRUE)) { - if (space) { - fil_space_release_for_io(space); - } - return(TRUE); - } - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unable to decompress space %s [%u:%u]", - space ? space->chain.start->name : "N/A", - block->page.space, - block->page.offset); - - goto err_exit; - } - case FIL_PAGE_TYPE_ALLOCATED: - case FIL_PAGE_INODE: - case FIL_PAGE_IBUF_BITMAP: - case FIL_PAGE_TYPE_FSP_HDR: - case FIL_PAGE_TYPE_XDES: - case FIL_PAGE_TYPE_ZBLOB: - case FIL_PAGE_TYPE_ZBLOB2: - /* Copy to uncompressed storage. */ - memcpy(block->frame, frame, - buf_block_get_zip_size(block)); - - if (space) { - fil_space_release_for_io(space); - } - - return(TRUE); - } - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown compressed page in %s [%u:%u]" - " type %s [" ULINTPF "].", - space ? space->chain.start->name : "N/A", - block->page.space, block->page.offset, - fil_get_page_type_name(fil_page_get_type(frame)), fil_page_get_type(frame)); - -err_exit: - if (encrypted) { - ib_logf(IB_LOG_LEVEL_INFO, - "Row compressed page could be encrypted with key_version %u.", - key_version); - block->page.encrypted = true; - dict_set_encrypted_by_space(block->page.space); - } else { - dict_set_corrupted_by_space(block->page.space); - } - - if (space) { - fil_space_release_for_io(space); - } - - return(FALSE); -} - -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Gets the block to whose frame the pointer is pointing to if found -in this buffer pool instance. -@return pointer to block */ -UNIV_INTERN -buf_block_t* -buf_block_align_instance( -/*=====================*/ - buf_pool_t* buf_pool, /*!< in: buffer in which the block - resides */ - const byte* ptr) /*!< in: pointer to a frame */ -{ - buf_chunk_t* chunk; - ulint i; - - /* TODO: protect buf_pool->chunks with a mutex (it will - currently remain constant after buf_pool_init()) */ - for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) { - ulint offs; - - if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) { - - continue; - } - /* else */ - - offs = ptr - chunk->blocks->frame; - - offs >>= UNIV_PAGE_SIZE_SHIFT; - - if (UNIV_LIKELY(offs < chunk->size)) { - buf_block_t* block = &chunk->blocks[offs]; - - /* The function buf_chunk_init() invokes - buf_block_init() so that block[n].frame == - block->frame + n * UNIV_PAGE_SIZE. Check it. */ - ut_ad(block->frame == page_align(ptr)); -#ifdef UNIV_DEBUG - /* A thread that updates these fields must - hold one of the buf_pool mutexes, depending on the - page state, and block->mutex. Acquire - only the latter. */ - mutex_enter(&block->mutex); - - switch (buf_block_get_state(block)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - /* These types should only be used in - the compressed buffer pool, whose - memory is allocated from - buf_pool->chunks, in UNIV_PAGE_SIZE - blocks flagged as BUF_BLOCK_MEMORY. */ - ut_error; - break; - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - /* Some data structures contain - "guess" pointers to file pages. The - file pages may have been freed and - reused. Do not complain. */ - break; - case BUF_BLOCK_REMOVE_HASH: - /* buf_LRU_block_remove_hashed_page() - will overwrite the FIL_PAGE_OFFSET and - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with - 0xff and set the state to - BUF_BLOCK_REMOVE_HASH. */ - ut_ad(page_get_space_id(page_align(ptr)) - == 0xffffffff); - ut_ad(page_get_page_no(page_align(ptr)) - == 0xffffffff); - break; - case BUF_BLOCK_FILE_PAGE: { - ulint space = page_get_space_id(page_align(ptr)); - ulint offset = page_get_page_no(page_align(ptr)); - - if (block->page.space != space || - block->page.offset != offset) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Block space_id %lu != page space_id %lu or " - "Block offset %lu != page offset %lu", - (ulint)block->page.space, space, - (ulint)block->page.offset, offset); - } - - ut_ad(block->page.space - == page_get_space_id(page_align(ptr))); - ut_ad(block->page.offset - == page_get_page_no(page_align(ptr))); - break; - } - } - - mutex_exit(&block->mutex); -#endif /* UNIV_DEBUG */ - - return(block); - } - } - - return(NULL); -} - -/*******************************************************************//** -Gets the block to whose frame the pointer is pointing to. -@return pointer to block, never NULL */ -UNIV_INTERN -buf_block_t* -buf_block_align( -/*============*/ - const byte* ptr) /*!< in: pointer to a frame */ -{ - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_block_t* block; - - block = buf_block_align_instance( - buf_pool_from_array(i), ptr); - if (block) { - return(block); - } - } - - /* The block should always be found. */ - ut_error; - return(NULL); -} - -/********************************************************************//** -Find out if a pointer belongs to a buf_block_t. It can be a pointer to -the buf_block_t itself or a member of it. This functions checks one of -the buffer pool instances. -@return TRUE if ptr belongs to a buf_block_t struct */ -static -ibool -buf_pointer_is_block_field_instance( -/*================================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - const void* ptr) /*!< in: pointer not dereferenced */ -{ - const buf_chunk_t* chunk = buf_pool->chunks; - const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks; - - /* TODO: protect buf_pool->chunks with a mutex (it will - currently remain constant after buf_pool_init()) */ - while (chunk < echunk) { - if (ptr >= (void*) chunk->blocks - && ptr < (void*) (chunk->blocks + chunk->size)) { - - return(TRUE); - } - - chunk++; - } - - return(FALSE); -} - -/********************************************************************//** -Find out if a pointer belongs to a buf_block_t. It can be a pointer to -the buf_block_t itself or a member of it -@return TRUE if ptr belongs to a buf_block_t struct */ -UNIV_INTERN -ibool -buf_pointer_is_block_field( -/*=======================*/ - const void* ptr) /*!< in: pointer not dereferenced */ -{ - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { - ibool found; - - found = buf_pointer_is_block_field_instance( - buf_pool_from_array(i), ptr); - if (found) { - return(TRUE); - } - } - - return(FALSE); -} - -/********************************************************************//** -Find out if a buffer block was created by buf_chunk_init(). -@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */ -static -ibool -buf_block_is_uncompressed( -/*======================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - const buf_block_t* block) /*!< in: pointer to block, - not dereferenced */ -{ - if ((((ulint) block) % sizeof *block) != 0) { - /* The pointer should be aligned. */ - return(FALSE); - } - - return(buf_pointer_is_block_field_instance(buf_pool, (void*) block)); -} - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -/********************************************************************//** -Return true if probe is enabled. -@return true if probe enabled. */ -static -bool -buf_debug_execute_is_force_flush() -/*==============================*/ -{ - DBUG_EXECUTE_IF("ib_buf_force_flush", return(true); ); - - /* This is used during queisce testing, we want to ensure maximum - buffering by the change buffer. */ - - if (srv_ibuf_disable_background_merge) { - return(true); - } - - return(false); -} -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - -/** -Wait for the block to be read in. -@param block The block to check -@param trx Transaction to account the I/Os to */ -static -void -buf_wait_for_read(buf_block_t* block, trx_t* trx) -{ - /* Note: For the PAGE_ATOMIC_REF_COUNT case: - - We are using the block->lock to check for IO state (and a dirty read). - We set the IO_READ state under the protection of the hash_lock - (and block->mutex). This is safe because another thread can only - access the block (and check for IO state) after the block has been - added to the page hashtable. */ - - if (buf_block_get_io_fix_unlocked(block) == BUF_IO_READ) { - - ib_uint64_t start_time; - ulint sec; - ulint ms; - - /* Wait until the read operation completes */ - - ib_mutex_t* mutex = buf_page_get_mutex(&block->page); - - if (UNIV_UNLIKELY(trx && trx->take_stats)) - { - ut_usectime(&sec, &ms); - start_time = (ib_uint64_t)sec * 1000000 + ms; - } else { - start_time = 0; - } - - for (;;) { - buf_io_fix io_fix; - - mutex_enter(mutex); - - io_fix = buf_block_get_io_fix(block); - - mutex_exit(mutex); - - if (io_fix == BUF_IO_READ) { - /* Wait by temporaly s-latch */ - rw_lock_s_lock(&block->lock); - rw_lock_s_unlock(&block->lock); - } else { - break; - } - } - - if (UNIV_UNLIKELY(start_time != 0)) - { - ut_usectime(&sec, &ms); - ib_uint64_t finish_time - = (ib_uint64_t)sec * 1000000 + ms; - trx->io_reads_wait_timer - += (ulint)(finish_time - start_time); - } - - } -} - -/********************************************************************//** -This is the general function used to get access to a database page. -@return pointer to the block or NULL */ -UNIV_INTERN -buf_block_t* -buf_page_get_gen( -/*=============*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint offset, /*!< in: page number */ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ - buf_block_t* guess, /*!< in: guessed block or NULL */ - ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or - BUF_GET_IF_IN_POOL_OR_WATCH */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr, /*!< in: mini-transaction */ - dberr_t* err) /*!< out: error code */ -{ - buf_block_t* block; - ulint fold; - unsigned access_time; - ulint fix_type; - prio_rw_lock_t* hash_lock; - ulint retries = 0; - trx_t* trx = NULL; - buf_block_t* fix_block; - ib_mutex_t* fix_mutex = NULL; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - - ut_ad(mtr); - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad((rw_latch == RW_S_LATCH) - || (rw_latch == RW_X_LATCH) - || (rw_latch == RW_NO_LATCH)); - - if (err) { - *err = DB_SUCCESS; - } - -#ifdef UNIV_DEBUG - switch (mode) { - case BUF_GET_NO_LATCH: - ut_ad(rw_latch == RW_NO_LATCH); - break; - case BUF_GET: - case BUF_GET_IF_IN_POOL: - case BUF_PEEK_IF_IN_POOL: - case BUF_GET_IF_IN_POOL_OR_WATCH: - case BUF_GET_POSSIBLY_FREED: - break; - default: - ut_error; - } -#endif /* UNIV_DEBUG */ - ut_ad(zip_size == fil_space_get_zip_size(space)); - ut_ad(ut_is_2pow(zip_size)); -#ifndef UNIV_LOG_DEBUG - ut_ad(!ibuf_inside(mtr) - || ibuf_page_low(space, zip_size, offset, - FALSE, file, line, NULL)); -#endif - if (UNIV_UNLIKELY(innobase_get_slow_log())) { - trx = innobase_get_trx(); - } - buf_pool->stat.n_page_gets++; - fold = buf_page_address_fold(space, offset); - hash_lock = buf_page_hash_lock_get(buf_pool, fold); -loop: - block = guess; - - rw_lock_s_lock(hash_lock); - - if (block != NULL) { - - /* If the guess is a compressed page descriptor that - has been allocated by buf_page_alloc_descriptor(), - it may have been freed by buf_relocate(). */ - - if (!buf_block_is_uncompressed(buf_pool, block) - || offset != block->page.offset - || space != block->page.space - || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) { - - /* Our guess was bogus or things have changed - since. */ - block = guess = NULL; - } else { - ut_ad(!block->page.in_zip_hash); - } - } - - if (block == NULL) { - block = (buf_block_t*) buf_page_hash_get_low( - buf_pool, space, offset, fold); - } - - if (!block || buf_pool_watch_is_sentinel(buf_pool, &block->page)) { - rw_lock_s_unlock(hash_lock); - block = NULL; - } - - if (block == NULL) { - - /* Page not in buf_pool: needs to be read from file */ - - if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { - mutex_enter(&buf_pool->LRU_list_mutex); - rw_lock_x_lock(hash_lock); - block = (buf_block_t*) buf_pool_watch_set( - space, offset, fold); - mutex_exit(&buf_pool->LRU_list_mutex); - - if (UNIV_LIKELY_NULL(block)) { - /* We can release hash_lock after we - increment the fix count to make - sure that no state change takes place. */ - fix_block = block; - buf_block_fix(fix_block); - - /* Now safe to release page_hash mutex */ - rw_lock_x_unlock(hash_lock); - goto got_block; - } - - rw_lock_x_unlock(hash_lock); - } - - if (mode == BUF_GET_IF_IN_POOL - || mode == BUF_PEEK_IF_IN_POOL - || mode == BUF_GET_IF_IN_POOL_OR_WATCH) { -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - return(NULL); - } - - /* Call path is buf_read_page() -> buf_read_page_low() - (_fil_io()) -> buf_page_io_complete() -> - buf_decrypt_after_read() here fil_space_t* is used - and we decrypt -> buf_page_check_corrupt() where - page checksums are compared. Decryption/decompression - is handled lower level, error handling is handled on lower - level, here we need only to know is page really corrupted - or encrypted page with correct checksum. */ - - dberr_t local_err = buf_read_page(space, zip_size, offset, trx); - - if (local_err == DB_SUCCESS) { - buf_read_ahead_random(space, zip_size, offset, - ibuf_inside(mtr), trx); - - retries = 0; - } else if (retries < BUF_PAGE_READ_MAX_RETRIES) { - ++retries; - - DBUG_EXECUTE_IF( - "innodb_page_corruption_retries", - retries = BUF_PAGE_READ_MAX_RETRIES; - ); - } else { - if (err) { - *err = local_err; - } - - /* Pages whose encryption key is unavailable or used - key, encryption algorithm or encryption method is - incorrect are marked as encrypted in - buf_page_check_corrupt(). Unencrypted page could be - corrupted in a way where the key_id field is - nonzero. There is no checksum on field - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION. */ - if (local_err == DB_DECRYPTION_FAILED) { - return (NULL); - } - - /* Try to set table as corrupted instead of - asserting. */ - if (space > TRX_SYS_SPACE && - dict_set_corrupted_by_space(space)) { - return (NULL); - } - - ib_logf(IB_LOG_LEVEL_FATAL, "Unable" - " to read tablespace " ULINTPF " page no " - ULINTPF " into the buffer pool after " - ULINTPF " attempts." - " The most probable cause" - " of this error may be that the" - " table has been corrupted." - " You can try to fix this" - " problem by using" - " innodb_force_recovery." - " Please see " REFMAN " for more" - " details. Aborting...", - space, offset, - BUF_PAGE_READ_MAX_RETRIES); - } - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(++buf_dbg_counter % 5771 || buf_validate()); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - goto loop; - } else { - fix_block = block; - } - - buf_block_fix(fix_block); - - /* Now safe to release page_hash mutex */ - rw_lock_s_unlock(hash_lock); - -got_block: - - fix_mutex = buf_page_get_mutex(&fix_block->page); - - ut_ad(page_zip_get_size(&block->page.zip) == zip_size); - - if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) { - - bool must_read; - - { - buf_page_t* fix_page = &fix_block->page; - - mutex_enter(fix_mutex); - - buf_io_fix io_fix = buf_page_get_io_fix(fix_page); - - must_read = (io_fix == BUF_IO_READ); - - mutex_exit(fix_mutex); - } - - if (must_read) { - /* The page is being read to buffer pool, - but we cannot wait around for the read to - complete. */ - buf_block_unfix(fix_block); - - return(NULL); - } - } - - if (UNIV_UNLIKELY(fix_block->page.is_corrupt && - srv_pass_corrupt_table <= 1)) { - - buf_block_unfix(fix_block); - - return(NULL); - } - - switch(buf_block_get_state(fix_block)) { - buf_page_t* bpage; - - case BUF_BLOCK_FILE_PAGE: - ut_ad(fix_mutex != &buf_pool->zip_mutex); - break; - - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - if (mode == BUF_PEEK_IF_IN_POOL) { - /* This mode is only used for dropping an - adaptive hash index. There cannot be an - adaptive hash index for a compressed-only - page, so do not bother decompressing the page. */ - buf_block_unfix(fix_block); - - return(NULL); - } - - bpage = &block->page; - ut_ad(fix_mutex == &buf_pool->zip_mutex); - - /* Note: We have already buffer fixed this block. */ - if (bpage->buf_fix_count > 1 - || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) { - - /* This condition often occurs when the buffer - is not buffer-fixed, but I/O-fixed by - buf_page_init_for_read(). */ - - buf_block_unfix(fix_block); - - /* The block is buffer-fixed or I/O-fixed. - Try again later. */ - os_thread_sleep(WAIT_FOR_READ); - - goto loop; - } - - /* Buffer-fix the block so that it cannot be evicted - or relocated while we are attempting to allocate an - uncompressed page. */ - - /* Allocate an uncompressed page. */ - - block = buf_LRU_get_free_block(buf_pool); - - mutex_enter(&buf_pool->LRU_list_mutex); - - rw_lock_x_lock(hash_lock); - - /* Buffer-fixing prevents the page_hash from changing. */ - ut_ad(bpage == buf_page_hash_get_low( - buf_pool, space, offset, fold)); - - buf_block_mutex_enter(block); - - mutex_enter(&buf_pool->zip_mutex); - - ut_ad(fix_block->page.buf_fix_count > 0); - -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_decrement_uint32(&fix_block->page.buf_fix_count, 1); -#else - --fix_block->page.buf_fix_count; -#endif /* PAGE_ATOMIC_REF_COUNT */ - - fix_block = block; - - if (bpage->buf_fix_count > 0 - || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { - - mutex_exit(&buf_pool->zip_mutex); - /* The block was buffer-fixed or I/O-fixed while - buf_pool->mutex was not held by this thread. - Free the block that was allocated and retry. - This should be extremely unlikely, for example, - if buf_page_get_zip() was invoked. */ - - buf_LRU_block_free_non_file_page(block); - mutex_exit(&buf_pool->LRU_list_mutex); - rw_lock_x_unlock(hash_lock); - buf_block_mutex_exit(block); - - /* Try again */ - goto loop; - } - - /* Move the compressed page from bpage to block, - and uncompress it. */ - - /* Note: this is the uncompressed block and it is not - accessible by other threads yet because it is not in - any list or hash table */ - buf_relocate(bpage, &block->page); - - buf_block_init_low(block); - - /* Set after relocate(). */ - block->page.buf_fix_count = 1; - - block->lock_hash_val = lock_rec_hash(space, offset); - - UNIV_MEM_DESC(&block->page.zip.data, - page_zip_get_size(&block->page.zip)); - - if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) { -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - UT_LIST_REMOVE(list, buf_pool->zip_clean, - &block->page); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - ut_ad(!block->page.in_flush_list); - } else { - /* Relocate buf_pool->flush_list. */ - buf_flush_relocate_on_flush_list(bpage, &block->page); - } - - /* Buffer-fix, I/O-fix, and X-latch the block - for the duration of the decompression. - Also add the block to the unzip_LRU list. */ - block->page.state = BUF_BLOCK_FILE_PAGE; - - /* Insert at the front of unzip_LRU list */ - buf_unzip_LRU_add_block(block, FALSE); - - mutex_exit(&buf_pool->LRU_list_mutex); - - buf_block_set_io_fix(block, BUF_IO_READ); - rw_lock_x_lock_inline(&block->lock, 0, file, line); - - UNIV_MEM_INVALID(bpage, sizeof *bpage); - - rw_lock_x_unlock(hash_lock); - - os_atomic_increment_ulint(&buf_pool->n_pend_unzip, 1); - - mutex_exit(&buf_pool->zip_mutex); - - access_time = buf_page_is_accessed(&block->page); - - buf_block_mutex_exit(block); - - buf_page_free_descriptor(bpage); - - /* Decompress the page while not holding - any buf_pool or block->mutex. */ - - { - bool success = buf_zip_decompress(block, TRUE); - - if (!success) { - buf_block_mutex_enter(fix_block); - buf_block_set_io_fix(fix_block, BUF_IO_NONE); - buf_block_mutex_exit(fix_block); - - os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1); - rw_lock_x_unlock(&fix_block->lock); - mutex_enter(&buf_pool->LRU_list_mutex); - buf_block_unfix(fix_block); - mutex_exit(&buf_pool->LRU_list_mutex); - return NULL; - } - } - - if (!recv_no_ibuf_operations) { - if (access_time) { -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(space, offset) == 0); -#endif /* UNIV_IBUF_COUNT_DEBUG */ - } else { - ibuf_merge_or_delete_for_page( - block, space, offset, zip_size, TRUE); - } - } - - /* Unfix and unlatch the block. */ - buf_block_mutex_enter(fix_block); - - buf_block_set_io_fix(fix_block, BUF_IO_NONE); - - buf_block_mutex_exit(fix_block); - - os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1); - - rw_lock_x_unlock(&block->lock); - - break; - - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - break; - } - - ut_ad(block == fix_block); - ut_ad(fix_block->page.buf_fix_count > 0); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE); - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG - - if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH) - && (ibuf_debug || buf_debug_execute_is_force_flush())) { - - /* Try to evict the block from the buffer pool, to use the - insert buffer (change buffer) as much as possible. */ - - mutex_enter(&buf_pool->LRU_list_mutex); - - buf_block_unfix(fix_block); - - /* Now we are only holding the buf_pool->LRU_list_mutex, - not block->mutex or hash_lock. Blocks cannot be - relocated or enter or exit the buf_pool while we - are holding the buf_pool->LRU_list_mutex. */ - - fix_mutex = buf_page_get_mutex(&fix_block->page); - mutex_enter(fix_mutex); - - if (buf_LRU_free_page(&fix_block->page, true)) { - - mutex_exit(fix_mutex); - - if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { - mutex_enter(&buf_pool->LRU_list_mutex); - rw_lock_x_lock(hash_lock); - - /* Set the watch, as it would have - been set if the page were not in the - buffer pool in the first place. */ - block = (buf_block_t*) buf_pool_watch_set( - space, offset, fold); - mutex_exit(&buf_pool->LRU_list_mutex); - } else { - rw_lock_x_lock(hash_lock); - block = (buf_block_t*) buf_page_hash_get_low( - buf_pool, space, offset, fold); - } - - rw_lock_x_unlock(hash_lock); - - if (block != NULL) { - /* Either the page has been read in or - a watch was set on that in the window - where we released the buf_pool::mutex - and before we acquire the hash_lock - above. Try again. */ - guess = block; - goto loop; - } - - return(NULL); - } - - if (buf_flush_page_try(buf_pool, fix_block)) { - guess = fix_block; - goto loop; - } - - mutex_exit(&buf_pool->LRU_list_mutex); - - buf_block_mutex_exit(fix_block); - - buf_block_fix(fix_block); - - /* Failed to evict the page; change it directly */ - } -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - - ut_ad(fix_block->page.buf_fix_count > 0); - -#ifdef UNIV_SYNC_DEBUG - /* We have already buffer fixed the page, and we are committed to - returning this page to the caller. Register for debugging. */ - { - ibool ret; - ret = rw_lock_s_lock_nowait(&fix_block->debug_latch, file, line); - ut_a(ret); - } -#endif /* UNIV_SYNC_DEBUG */ - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - ut_a(mode == BUF_GET_POSSIBLY_FREED - || !fix_block->page.file_page_was_freed); -#endif - /* Check if this is the first access to the page */ - access_time = buf_page_is_accessed(&fix_block->page); - - /* This is a heuristic and we don't care about ordering issues. */ - if (access_time == 0) { - buf_block_mutex_enter(fix_block); - - buf_page_set_accessed(&fix_block->page); - - buf_block_mutex_exit(fix_block); - } - - if (mode != BUF_PEEK_IF_IN_POOL) { - buf_page_make_young_if_needed(&fix_block->page); - } - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(++buf_dbg_counter % 5771 || buf_validate()); - ut_a(fix_block->page.buf_fix_count > 0); - ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -#ifdef PAGE_ATOMIC_REF_COUNT - /* We have to wait here because the IO_READ state was set - under the protection of the hash_lock and the block->mutex - but not the block->lock. */ - buf_wait_for_read(fix_block, trx); -#endif /* PAGE_ATOMIC_REF_COUNT */ - - switch (rw_latch) { - case RW_NO_LATCH: - -#ifndef PAGE_ATOMIC_REF_COUNT - buf_wait_for_read(fix_block, trx); -#endif /* !PAGE_ATOMIC_REF_COUNT */ - - fix_type = MTR_MEMO_BUF_FIX; - break; - - case RW_S_LATCH: - rw_lock_s_lock_inline(&fix_block->lock, 0, file, line); - - fix_type = MTR_MEMO_PAGE_S_FIX; - break; - - default: - ut_ad(rw_latch == RW_X_LATCH); - rw_lock_x_lock_inline(&fix_block->lock, 0, file, line); - - fix_type = MTR_MEMO_PAGE_X_FIX; - break; - } - - mtr_memo_push(mtr, fix_block, fix_type); - - if (mode != BUF_PEEK_IF_IN_POOL && !access_time) { - /* In the case of a first access, try to apply linear - read-ahead */ - - buf_read_ahead_linear( - space, zip_size, offset, ibuf_inside(mtr), trx); - } - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(buf_block_get_space(fix_block), - buf_block_get_page_no(fix_block)) == 0); -#endif -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - if (UNIV_UNLIKELY(trx && trx->take_stats)) { - _increment_page_get_statistics(block, trx); - } - - return(fix_block); -} - -/********************************************************************//** -This is the general function used to get optimistic access to a database -page. -@return TRUE if success */ -UNIV_INTERN -ibool -buf_page_optimistic_get( -/*====================*/ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /*!< in: guessed buffer block */ - ib_uint64_t modify_clock,/*!< in: modify clock value */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - buf_pool_t* buf_pool; - unsigned access_time; - ibool success; - ulint fix_type; - trx_t* trx = NULL; - - ut_ad(block); - ut_ad(mtr); - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); - - mutex_enter(&block->mutex); - - if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) { - - mutex_exit(&block->mutex); - - return(FALSE); - } - - buf_block_buf_fix_inc(block, file, line); - - access_time = buf_page_is_accessed(&block->page); - - buf_page_set_accessed(&block->page); - - mutex_exit(&block->mutex); - - buf_page_make_young_if_needed(&block->page); - - ut_ad(!ibuf_inside(mtr) - || ibuf_page(buf_block_get_space(block), - buf_block_get_zip_size(block), - buf_block_get_page_no(block), NULL)); - - if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_nowait(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_S_FIX; - } else { - success = rw_lock_x_lock_func_nowait_inline(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_X_FIX; - } - - if (UNIV_UNLIKELY(!success)) { - buf_block_buf_fix_dec(block); - - return(FALSE); - } - - if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) { - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); - - if (rw_latch == RW_S_LATCH) { - rw_lock_s_unlock(&(block->lock)); - } else { - rw_lock_x_unlock(&(block->lock)); - } - - buf_block_buf_fix_dec(block); - - return(FALSE); - } - - mtr_memo_push(mtr, block, fix_type); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(++buf_dbg_counter % 5771 || buf_validate()); - ut_a(block->page.buf_fix_count > 0); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - mutex_enter(&block->mutex); - ut_a(!block->page.file_page_was_freed); - mutex_exit(&block->mutex); -#endif - if (UNIV_UNLIKELY(innobase_get_slow_log())) { - trx = innobase_get_trx(); - } - - if (!access_time) { - /* In the case of a first access, try to apply linear - read-ahead */ - - buf_read_ahead_linear(buf_block_get_space(block), - buf_block_get_zip_size(block), - buf_block_get_page_no(block), - ibuf_inside(mtr), trx); - } - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(buf_block_get_space(block), - buf_block_get_page_no(block)) == 0); -#endif - buf_pool = buf_pool_from_block(block); - buf_pool->stat.n_page_gets++; - - if (UNIV_UNLIKELY(trx && trx->take_stats)) { - _increment_page_get_statistics(block, trx); - } - return(TRUE); -} - -/********************************************************************//** -This is used to get access to a known database page, when no waiting can be -done. For example, if a search in an adaptive hash index leads us to this -frame. -@return TRUE if success */ -UNIV_INTERN -ibool -buf_page_get_known_nowait( -/*======================*/ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /*!< in: the known page */ - ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - buf_pool_t* buf_pool; - ibool success; - ulint fix_type; - trx_t* trx = NULL; - - ut_ad(mtr); - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); - - mutex_enter(&block->mutex); - - if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) { - /* Another thread is just freeing the block from the LRU list - of the buffer pool: do not try to access this page; this - attempt to access the page can only come through the hash - index because when the buffer block state is ..._REMOVE_HASH, - we have already removed it from the page address hash table - of the buffer pool. */ - - mutex_exit(&block->mutex); - - return(FALSE); - } - - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - - buf_block_buf_fix_inc(block, file, line); - - buf_page_set_accessed(&block->page); - - mutex_exit(&block->mutex); - - buf_pool = buf_pool_from_block(block); - - if (mode == BUF_MAKE_YOUNG) { - buf_page_make_young_if_needed(&block->page); - } - - ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD); - - if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_nowait(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_S_FIX; - } else { - success = rw_lock_x_lock_func_nowait_inline(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_X_FIX; - } - - if (!success) { - buf_block_buf_fix_dec(block); - - return(FALSE); - } - - mtr_memo_push(mtr, block, fix_type); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(++buf_dbg_counter % 5771 || buf_validate()); - ut_a(block->page.buf_fix_count > 0); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - if (mode != BUF_KEEP_OLD) { - /* If mode == BUF_KEEP_OLD, we are executing an I/O - completion routine. Avoid a bogus assertion failure - when ibuf_merge_or_delete_for_page() is processing a - page that was just freed due to DROP INDEX, or - deleting a record from SYS_INDEXES. This check will be - skipped in recv_recover_page() as well. */ - - mutex_enter(&block->mutex); - ut_a(!block->page.file_page_was_freed); - mutex_exit(&block->mutex); - } -#endif - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a((mode == BUF_KEEP_OLD) - || (ibuf_count_get(buf_block_get_space(block), - buf_block_get_page_no(block)) == 0)); -#endif - buf_pool->stat.n_page_gets++; - - if (UNIV_UNLIKELY(innobase_get_slow_log())) { - - trx = innobase_get_trx(); - if (trx != NULL && trx->take_stats) { - - _increment_page_get_statistics(block, trx); - } - } - - return(TRUE); -} - -/*******************************************************************//** -Given a tablespace id and page number tries to get that page. If the -page is not in the buffer pool it is not loaded and NULL is returned. -Suitable for using when holding the lock_sys_t::mutex. -@return pointer to a page or NULL */ -UNIV_INTERN -buf_block_t* -buf_page_try_get_func( -/*==================*/ - ulint space_id,/*!< in: tablespace id */ - ulint page_no,/*!< in: page number */ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ - bool possibly_freed, - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - buf_block_t* block; - ibool success; - ulint fix_type; - buf_pool_t* buf_pool = buf_pool_get(space_id, page_no); - prio_rw_lock_t* hash_lock; - - ut_ad(mtr); - ut_ad(mtr->state == MTR_ACTIVE); - - block = buf_block_hash_get_s_locked(buf_pool, space_id, - page_no, &hash_lock); - - if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) { - if (block) { - rw_lock_s_unlock(hash_lock); - } - return(NULL); - } - - ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page)); - - mutex_enter(&block->mutex); - rw_lock_s_unlock(hash_lock); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_a(buf_block_get_space(block) == space_id); - ut_a(buf_block_get_page_no(block) == page_no); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - buf_block_buf_fix_inc(block, file, line); - mutex_exit(&block->mutex); - - if (rw_latch == RW_S_LATCH) { - fix_type = MTR_MEMO_PAGE_S_FIX; - success = rw_lock_s_lock_nowait(&block->lock, file, line); - } else { - success = false; - } - - if (!success) { - /* Let us try to get an X-latch. If the current thread - is holding an X-latch on the page, we cannot get an - S-latch. */ - - fix_type = MTR_MEMO_PAGE_X_FIX; - success = rw_lock_x_lock_func_nowait_inline(&block->lock, - file, line); - } - - if (!success) { - buf_block_buf_fix_dec(block); - - return(NULL); - } - - mtr_memo_push(mtr, block, fix_type); -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(++buf_dbg_counter % 5771 || buf_validate()); - ut_a(block->page.buf_fix_count > 0); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - if (!possibly_freed) { - mutex_enter(&block->mutex); - ut_a(!block->page.file_page_was_freed); - mutex_exit(&block->mutex); - } -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); - - buf_pool->stat.n_page_gets++; - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(buf_block_get_space(block), - buf_block_get_page_no(block)) == 0); -#endif - - return(block); -} - -/********************************************************************//** -Initialize some fields of a control block. */ -UNIV_INLINE -void -buf_page_init_low( -/*==============*/ - buf_page_t* bpage) /*!< in: block to init */ -{ - bpage->flush_type = BUF_FLUSH_LRU; - bpage->io_fix = BUF_IO_NONE; - bpage->buf_fix_count = 0; - bpage->freed_page_clock = 0; - bpage->access_time = 0; - bpage->newest_modification = 0; - bpage->oldest_modification = 0; - bpage->write_size = 0; - bpage->encrypted = false; - bpage->real_size = 0; - - HASH_INVALIDATE(bpage, hash); - bpage->is_corrupt = FALSE; -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - bpage->file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ -} - -/********************************************************************//** -Inits a page to the buffer buf_pool. */ -static MY_ATTRIBUTE((nonnull)) -void -buf_page_init( -/*==========*/ - buf_pool_t* buf_pool,/*!< in/out: buffer pool */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page within space - in units of a page */ - ulint fold, /*!< in: buf_page_address_fold(space,offset) */ - ulint zip_size,/*!< in: compressed page size, or 0 */ - buf_block_t* block) /*!< in/out: block to init */ -{ - buf_page_t* hash_page; - - ut_ad(buf_pool == buf_pool_get(space, offset)); - - ut_ad(mutex_own(&(block->mutex))); - ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, fold), - RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - /* Set the state of the block */ - buf_block_set_file_page(block, space, offset); - - buf_block_init_low(block); - - block->lock_hash_val = lock_rec_hash(space, offset); - - buf_page_init_low(&block->page); - - /* Insert into the hash table of file pages */ - - hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold); - - if (hash_page == NULL) { - /* Block not found in the hash table */ - } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) { - - mutex_enter(&buf_pool->zip_mutex); - - ib_uint32_t buf_fix_count = hash_page->buf_fix_count; - - ut_a(buf_fix_count > 0); - -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_increment_uint32( - &block->page.buf_fix_count, buf_fix_count); -#else - block->page.buf_fix_count += ulint(buf_fix_count); -#endif /* PAGE_ATOMIC_REF_COUNT */ - - buf_pool_watch_remove(buf_pool, fold, hash_page); - - mutex_exit(&buf_pool->zip_mutex); - - } else { - fprintf(stderr, - "InnoDB: Error: page %lu %lu already found" - " in the hash table: %p, %p\n", - space, - offset, - (const void*) hash_page, (const void*) block); -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - mutex_exit(&block->mutex); - buf_print(); - buf_LRU_print(); - buf_validate(); - buf_LRU_validate(); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - ut_error; - } - - ut_ad(!block->page.in_zip_hash); - ut_ad(!block->page.in_page_hash); - ut_d(block->page.in_page_hash = TRUE); - - HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, &block->page); - - if (zip_size) { - page_zip_set_size(&block->page.zip, zip_size); - } -} - -/********************************************************************//** -Function which inits a page for read to the buffer buf_pool. If the page is -(1) already in buf_pool, or -(2) if we specify to read only ibuf pages and the page is not an ibuf page, or -(3) if the space is deleted or being deleted, -then this function does nothing. -Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock -on the buffer frame. The io-handler must take care that the flag is cleared -and the lock released later. -@return pointer to the block or NULL */ -UNIV_INTERN -buf_page_t* -buf_page_init_for_read( -/*===================*/ - dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */ - ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size, or 0 */ - ibool unzip, /*!< in: TRUE=request uncompressed page */ - ib_int64_t tablespace_version, - /*!< in: prevents reading from a wrong - version of the tablespace in case we have done - DISCARD + IMPORT */ - ulint offset) /*!< in: page number */ -{ - buf_block_t* block; - buf_page_t* bpage = NULL; - buf_page_t* watch_page; - prio_rw_lock_t* hash_lock; - mtr_t mtr; - ulint fold; - ibool lru; - void* data; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - - ut_ad(buf_pool); - - *err = DB_SUCCESS; - - if (mode == BUF_READ_IBUF_PAGES_ONLY) { - /* It is a read-ahead within an ibuf routine */ - - ut_ad(!ibuf_bitmap_page(zip_size, offset)); - - ibuf_mtr_start(&mtr); - - if (!recv_no_ibuf_operations - && !ibuf_page(space, zip_size, offset, &mtr)) { - - ibuf_mtr_commit(&mtr); - - return(NULL); - } - } else { - ut_ad(mode == BUF_READ_ANY_PAGE); - } - - if (zip_size && !unzip && !recv_recovery_is_on()) { - block = NULL; - } else { - block = buf_LRU_get_free_block(buf_pool); - ut_ad(block); - ut_ad(buf_pool_from_block(block) == buf_pool); - } - - fold = buf_page_address_fold(space, offset); - hash_lock = buf_page_hash_lock_get(buf_pool, fold); - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - mutex_enter(&buf_pool->LRU_list_mutex); - rw_lock_x_lock(hash_lock); - - watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold); - if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) { - /* The page is already in the buffer pool. */ - watch_page = NULL; -err_exit: - mutex_exit(&buf_pool->LRU_list_mutex); - rw_lock_x_unlock(hash_lock); - if (block) { - mutex_enter(&block->mutex); - buf_LRU_block_free_non_file_page(block); - mutex_exit(&block->mutex); - } - - bpage = NULL; - goto func_exit; - } - - if (fil_tablespace_deleted_or_being_deleted_in_mem( - space, tablespace_version)) { - /* The page belongs to a space which has been - deleted or is being deleted. */ - *err = DB_TABLESPACE_DELETED; - - goto err_exit; - } - - if (block) { - bpage = &block->page; - - mutex_enter(&block->mutex); - - ut_ad(buf_pool_from_bpage(bpage) == buf_pool); - - buf_page_init(buf_pool, space, offset, fold, zip_size, block); - -#ifdef PAGE_ATOMIC_REF_COUNT - /* Note: We set the io state without the protection of - the block->lock. This is because other threads cannot - access this block unless it is in the hash table. */ - - buf_page_set_io_fix(bpage, BUF_IO_READ); -#endif /* PAGE_ATOMIC_REF_COUNT */ - - /* The block must be put to the LRU list, to the old blocks */ - buf_LRU_add_block(bpage, TRUE/* to old blocks */); - mutex_exit(&buf_pool->LRU_list_mutex); - - /* We set a pass-type x-lock on the frame because then - the same thread which called for the read operation - (and is running now at this point of code) can wait - for the read to complete by waiting for the x-lock on - the frame; if the x-lock were recursive, the same - thread would illegally get the x-lock before the page - read is completed. The x-lock is cleared by the - io-handler thread. */ - - rw_lock_x_lock_gen(&block->lock, BUF_IO_READ); - -#ifndef PAGE_ATOMIC_REF_COUNT - buf_page_set_io_fix(bpage, BUF_IO_READ); -#endif /* !PAGE_ATOMIC_REF_COUNT */ - - rw_lock_x_unlock(hash_lock); - - if (zip_size) { - /* buf_pool->LRU_list_mutex may be released and - reacquired by buf_buddy_alloc(). Thus, we - must release block->mutex in order not to - break the latching order in the reacquisition - of buf_pool->LRU_list_mutex. We also must defer this - operation until after the block descriptor has - been added to buf_pool->LRU and - buf_pool->page_hash. */ - mutex_exit(&block->mutex); - mutex_enter(&buf_pool->LRU_list_mutex); - data = buf_buddy_alloc(buf_pool, zip_size, &lru); - mutex_enter(&block->mutex); - block->page.zip.data = (page_zip_t*) data; - - /* To maintain the invariant - block->in_unzip_LRU_list - == buf_page_belongs_to_unzip_LRU(&block->page) - we have to add this block to unzip_LRU - after block->page.zip.data is set. */ - ut_ad(buf_page_belongs_to_unzip_LRU(&block->page)); - buf_unzip_LRU_add_block(block, TRUE); - mutex_exit(&buf_pool->LRU_list_mutex); - } - - mutex_exit(&block->mutex); - } else { - rw_lock_x_unlock(hash_lock); - - /* The compressed page must be allocated before the - control block (bpage), in order to avoid the - invocation of buf_buddy_relocate_block() on - uninitialized data. */ - data = buf_buddy_alloc(buf_pool, zip_size, &lru); - - rw_lock_x_lock(hash_lock); - - /* We must check the page_hash again, as it may have been - modified. */ - - watch_page = buf_page_hash_get_low( - buf_pool, space, offset, fold); - - if (UNIV_UNLIKELY(watch_page - && !buf_pool_watch_is_sentinel(buf_pool, - watch_page))) { - - /* The block was added by some other thread. */ - mutex_exit(&buf_pool->LRU_list_mutex); - rw_lock_x_unlock(hash_lock); - watch_page = NULL; - buf_buddy_free(buf_pool, data, zip_size); - - bpage = NULL; - goto func_exit; - } - - bpage = buf_page_alloc_descriptor(); - - /* Initialize the buf_pool pointer. */ - bpage->buf_pool_index = buf_pool_index(buf_pool); - - page_zip_des_init(&bpage->zip); - page_zip_set_size(&bpage->zip, zip_size); - bpage->zip.data = (page_zip_t*) data; - - bpage->slot = NULL; - - mutex_enter(&buf_pool->zip_mutex); - UNIV_MEM_DESC(bpage->zip.data, - page_zip_get_size(&bpage->zip)); - - buf_page_init_low(bpage); - - bpage->state = BUF_BLOCK_ZIP_PAGE; - bpage->space = static_cast<ib_uint32_t>(space); - bpage->offset = static_cast<ib_uint32_t>(offset); - -#ifdef UNIV_DEBUG - bpage->in_page_hash = FALSE; - bpage->in_zip_hash = FALSE; - bpage->in_flush_list = FALSE; - bpage->in_free_list = FALSE; - bpage->in_LRU_list = FALSE; -#endif /* UNIV_DEBUG */ - - ut_d(bpage->in_page_hash = TRUE); - - if (watch_page != NULL) { - - /* Preserve the reference count. */ - ib_uint32_t buf_fix_count; - - buf_fix_count = watch_page->buf_fix_count; - - ut_a(buf_fix_count > 0); - - ut_ad(buf_own_zip_mutex_for_page(bpage)); - -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_increment_uint32( - &bpage->buf_fix_count, buf_fix_count); -#else - bpage->buf_fix_count += buf_fix_count; -#endif /* PAGE_ATOMIC_REF_COUNT */ - - ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page)); - buf_pool_watch_remove(buf_pool, fold, watch_page); - } - - HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, - bpage); - - rw_lock_x_unlock(hash_lock); - - /* The block must be put to the LRU list, to the old blocks. - The zip_size is already set into the page zip */ - buf_LRU_add_block(bpage, TRUE/* to old blocks */); -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - buf_LRU_insert_zip_clean(bpage); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - mutex_exit(&buf_pool->LRU_list_mutex); - - buf_page_set_io_fix(bpage, BUF_IO_READ); - - mutex_exit(&buf_pool->zip_mutex); - } - - os_atomic_increment_ulint(&buf_pool->n_pend_reads, 1); -func_exit: - - if (mode == BUF_READ_IBUF_PAGES_ONLY) { - - ibuf_mtr_commit(&mtr); - } - - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(!bpage || buf_page_in_file(bpage)); - return(bpage); -} - -/********************************************************************//** -Initializes a page to the buffer buf_pool. The page is usually not read -from a file even if it cannot be found in the buffer buf_pool. This is one -of the functions which perform to a block a state transition NOT_USED => -FILE_PAGE (the other is buf_page_get_gen). -@return pointer to the block, page bufferfixed */ -UNIV_INTERN -buf_block_t* -buf_page_create( -/*============*/ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page within space in units of - a page */ - ulint zip_size,/*!< in: compressed page size, or 0 */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - buf_frame_t* frame; - buf_block_t* block; - ulint fold; - buf_block_t* free_block = NULL; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - prio_rw_lock_t* hash_lock; - - ut_ad(mtr); - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad(space || !zip_size); - - free_block = buf_LRU_get_free_block(buf_pool); - - fold = buf_page_address_fold(space, offset); - hash_lock = buf_page_hash_lock_get(buf_pool, fold); - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - mutex_enter(&buf_pool->LRU_list_mutex); - rw_lock_x_lock(hash_lock); - - block = (buf_block_t*) buf_page_hash_get_low( - buf_pool, space, offset, fold); - - if (block - && buf_page_in_file(&block->page) - && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) { -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(space, offset) == 0); -#endif -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - block->page.file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ - - /* Page can be found in buf_pool */ - rw_lock_x_unlock(hash_lock); - mutex_exit(&buf_pool->LRU_list_mutex); - - buf_block_free(free_block); - - return(buf_page_get_with_no_latch(space, zip_size, offset, mtr)); - } - - /* If we get here, the page was not in buf_pool: init it there */ - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, "Creating space %lu page %lu to buffer\n", - space, offset); - } -#endif /* UNIV_DEBUG */ - - block = free_block; - - mutex_enter(&block->mutex); - - buf_page_init(buf_pool, space, offset, fold, zip_size, block); - - rw_lock_x_unlock(hash_lock); - - /* The block must be put to the LRU list */ - buf_LRU_add_block(&block->page, FALSE); - - buf_block_buf_fix_inc(block, __FILE__, __LINE__); - buf_pool->stat.n_pages_created++; - - if (zip_size) { - void* data; - ibool lru; - - /* Prevent race conditions during buf_buddy_alloc(), - which may release and reacquire buf_pool->LRU_list_mutex, - by IO-fixing and X-latching the block. */ - - buf_page_set_io_fix(&block->page, BUF_IO_READ); - rw_lock_x_lock(&block->lock); - - mutex_exit(&block->mutex); - /* buf_pool->LRU_list_mutex may be released and reacquired by - buf_buddy_alloc(). Thus, we must release block->mutex - in order not to break the latching order in - the reacquisition of buf_pool->LRU_list_mutex. We also must - defer this operation until after the block descriptor - has been added to buf_pool->LRU and buf_pool->page_hash. */ - data = buf_buddy_alloc(buf_pool, zip_size, &lru); - mutex_enter(&block->mutex); - block->page.zip.data = (page_zip_t*) data; - - /* To maintain the invariant - block->in_unzip_LRU_list - == buf_page_belongs_to_unzip_LRU(&block->page) - we have to add this block to unzip_LRU after - block->page.zip.data is set. */ - ut_ad(buf_page_belongs_to_unzip_LRU(&block->page)); - buf_unzip_LRU_add_block(block, FALSE); - - buf_page_set_io_fix(&block->page, BUF_IO_NONE); - rw_lock_x_unlock(&block->lock); - } - - mutex_exit(&buf_pool->LRU_list_mutex); - - mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); - - buf_page_set_accessed(&block->page); - - mutex_exit(&block->mutex); - - /* Delete possible entries for the page from the insert buffer: - such can exist if the page belonged to an index which was dropped */ - - ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE); - - frame = block->frame; - - memset(frame + FIL_PAGE_PREV, 0xff, 4); - memset(frame + FIL_PAGE_NEXT, 0xff, 4); - mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED); - - /* FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is only used on the - following pages: - (1) The first page of the InnoDB system tablespace (page 0:0) - (2) FIL_RTREE_SPLIT_SEQ_NUM on R-tree pages - (3) key_version on encrypted pages (not page 0:0) */ - - memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(++buf_dbg_counter % 5771 || buf_validate()); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(buf_block_get_space(block), - buf_block_get_page_no(block)) == 0); -#endif - return(block); -} - -/********************************************************************//** -Monitor the buffer page read/write activity, and increment corresponding -counter value if MONITOR_MODULE_BUF_PAGE (module_buf_page) module is -enabled. */ -static -void -buf_page_monitor( -/*=============*/ - const buf_page_t* bpage, /*!< in: pointer to the block */ - enum buf_io_fix io_type)/*!< in: io_fix types */ -{ - const byte* frame; - monitor_id_t counter; - - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - - /* If the counter module is not turned on, just return */ - if (!MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)) { - return; - } - - ut_a(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE); - - frame = bpage->zip.data - ? bpage->zip.data - : ((buf_block_t*) bpage)->frame; - - switch (fil_page_get_type(frame)) { - ulint level; - - case FIL_PAGE_INDEX: - level = btr_page_get_level_low(frame); - - /* Check if it is an index page for insert buffer */ - if (btr_page_get_index_id(frame) - == (index_id_t)(DICT_IBUF_ID_MIN + IBUF_SPACE_ID)) { - if (level == 0) { - counter = MONITOR_RW_COUNTER( - io_type, MONITOR_INDEX_IBUF_LEAF_PAGE); - } else { - counter = MONITOR_RW_COUNTER( - io_type, - MONITOR_INDEX_IBUF_NON_LEAF_PAGE); - } - } else { - if (level == 0) { - counter = MONITOR_RW_COUNTER( - io_type, MONITOR_INDEX_LEAF_PAGE); - } else { - counter = MONITOR_RW_COUNTER( - io_type, MONITOR_INDEX_NON_LEAF_PAGE); - } - } - break; - - case FIL_PAGE_UNDO_LOG: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_UNDO_LOG_PAGE); - break; - - case FIL_PAGE_INODE: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_INODE_PAGE); - break; - - case FIL_PAGE_IBUF_FREE_LIST: - counter = MONITOR_RW_COUNTER(io_type, - MONITOR_IBUF_FREELIST_PAGE); - break; - - case FIL_PAGE_IBUF_BITMAP: - counter = MONITOR_RW_COUNTER(io_type, - MONITOR_IBUF_BITMAP_PAGE); - break; - - case FIL_PAGE_TYPE_SYS: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_SYSTEM_PAGE); - break; - - case FIL_PAGE_TYPE_TRX_SYS: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_TRX_SYSTEM_PAGE); - break; - - case FIL_PAGE_TYPE_FSP_HDR: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_FSP_HDR_PAGE); - break; - - case FIL_PAGE_TYPE_XDES: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_XDES_PAGE); - break; - - case FIL_PAGE_TYPE_BLOB: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_BLOB_PAGE); - break; - - case FIL_PAGE_TYPE_ZBLOB: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB_PAGE); - break; - - case FIL_PAGE_TYPE_ZBLOB2: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB2_PAGE); - break; - - default: - counter = MONITOR_RW_COUNTER(io_type, MONITOR_OTHER_PAGE); - } - - MONITOR_INC_NOCHECK(counter); -} - -/********************************************************************//** -Mark a table with the specified space pointed by bpage->space corrupted. -Also remove the bpage from LRU list. -@param[in,out] bpage Block */ -static -void -buf_mark_space_corrupt( - buf_page_t* bpage) -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - const ibool uncompressed = (buf_page_get_state(bpage) - == BUF_BLOCK_FILE_PAGE); - ulint space = bpage->space; - const ulint fold = buf_page_address_fold(bpage->space, - bpage->offset); - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - /* First unfix and release lock on the bpage */ - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - - mutex_enter(&buf_pool->LRU_list_mutex); - rw_lock_x_lock(hash_lock); - mutex_enter(block_mutex); - ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ); - ut_ad(bpage->buf_fix_count == 0); - - /* Set BUF_IO_NONE before we remove the block from LRU list */ - buf_page_set_io_fix(bpage, BUF_IO_NONE); - - if (uncompressed) { - rw_lock_x_unlock_gen( - &((buf_block_t*) bpage)->lock, - BUF_IO_READ); - } - - /* If block is not encrypted find the table with specified - space id, and mark it corrupted. Encrypted tables - are marked unusable later e.g. in ::open(). */ - if (!bpage->encrypted) { - dict_set_corrupted_by_space(space); - } else { - dict_set_encrypted_by_space(space); - } - - /* After this point bpage can't be referenced. This - function will release the hash_lock acquired above. */ - buf_LRU_free_one_page(bpage); - - ut_ad(buf_pool->n_pend_reads > 0); - os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1); - mutex_exit(&buf_pool->LRU_list_mutex); -} - -/** Check if page is maybe compressed, encrypted or both when we encounter -corrupted page. Note that we can't be 100% sure if page is corrupted -or decrypt/decompress just failed. -@param[in,out] bpage page -@param[in,out] space tablespace from fil_space_acquire_for_io() -@return whether the operation succeeded -@retval DB_SUCCESS if page has been read and is not corrupted -@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted -@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but -after decryption normal page checksum does not match. -@retval DB_TABLESPACE_DELETED if accessed tablespace is not found */ -static -dberr_t -buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space) -{ - ut_ad(space->n_pending_ios > 0); - - ulint zip_size = buf_page_get_zip_size(bpage); - byte* dst_frame = (zip_size) ? bpage->zip.data : - ((buf_block_t*) bpage)->frame; - bool still_encrypted = false; - dberr_t err = DB_SUCCESS; - bool corrupted = false; - fil_space_crypt_t* crypt_data = space->crypt_data; - - /* In buf_decrypt_after_read we have either decrypted the page if - page post encryption checksum matches and used key_id is found - from the encryption plugin. If checksum did not match page was - not decrypted and it could be either encrypted and corrupted - or corrupted or good page. If we decrypted, there page could - still be corrupted if used key does not match. */ - still_encrypted = (crypt_data && - crypt_data->type != CRYPT_SCHEME_UNENCRYPTED && - !bpage->encrypted && - fil_space_verify_crypt_checksum(dst_frame, zip_size, - space, bpage->offset)); - - if (!still_encrypted) { - /* If traditional checksums match, we assume that page is - not anymore encrypted. */ - corrupted = buf_page_is_corrupted(true, dst_frame, zip_size, - space); - - if (!corrupted) { - bpage->encrypted = false; - } else { - err = DB_PAGE_CORRUPTED; - } - } - - /* Pages that we think are unencrypted but do not match the checksum - checks could be corrupted or encrypted or both. */ - if (corrupted && !bpage->encrypted) { - /* An error will be reported by - buf_page_io_complete(). */ - } else if (still_encrypted || (bpage->encrypted && corrupted)) { - bpage->encrypted = true; - err = DB_DECRYPTION_FAILED; - - ib_logf(IB_LOG_LEVEL_ERROR, - "The page [page id: space=%u" - ", page number=%u]" - " in file %s cannot be decrypted.", - bpage->space, bpage->offset, - space->name); - - ib_logf(IB_LOG_LEVEL_INFO, - "However key management plugin or used key_version " ULINTPF - " is not found or" - " used encryption algorithm or method does not match.", - mach_read_from_4(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION)); - - if (bpage->space > TRX_SYS_SPACE) { - ib_logf(IB_LOG_LEVEL_INFO, - "Marking tablespace as missing. You may drop this table or" - " install correct key management plugin and key file."); - } - } - - return (err); -} - -/** Complete a read or write request of a file page to or from the buffer pool. -@param[in,out] bpage Page to complete -@return whether the operation succeeded -@retval DB_SUCCESS always when writing, or if a read page was OK -@retval DB_PAGE_CORRUPTED if the checksum fails on a page read -@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but - after decryption normal page checksum does - not match */ -UNIV_INTERN -dberr_t -buf_page_io_complete(buf_page_t* bpage) -{ - enum buf_io_fix io_type; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - const ibool uncompressed = (buf_page_get_state(bpage) - == BUF_BLOCK_FILE_PAGE); - bool have_LRU_mutex = false; - byte* frame = NULL; - dberr_t err = DB_SUCCESS; - - ut_a(buf_page_in_file(bpage)); - - /* We do not need protect io_fix here by mutex to read - it because this is the only function where we can change the value - from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code - ensures that this is the only thread that handles the i/o for this - block. */ - - io_type = buf_page_get_io_fix_unlocked(bpage); - ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE); - - if (io_type == BUF_IO_READ) { - ulint read_page_no = 0; - ulint read_space_id = 0; - uint key_version = 0; - - ut_ad(bpage->zip.data || ((buf_block_t*)bpage)->frame); - fil_space_t* space = fil_space_acquire_for_io(bpage->space); - if (!space) { - return(DB_TABLESPACE_DELETED); - } - - buf_page_decrypt_after_read(bpage, space); - - if (buf_page_get_zip_size(bpage)) { - frame = bpage->zip.data; - } else { - frame = ((buf_block_t*) bpage)->frame; - } - - if (buf_page_get_zip_size(bpage)) { - frame = bpage->zip.data; - os_atomic_increment_ulint(&buf_pool->n_pend_unzip, 1); - if (uncompressed - && !buf_zip_decompress((buf_block_t*) bpage, - FALSE)) { - - os_atomic_decrement_ulint( - &buf_pool->n_pend_unzip, 1); - - ib_logf(IB_LOG_LEVEL_INFO, - "Page %u in tablespace %u zip_decompress failure.", - bpage->offset, bpage->space); - - err = DB_PAGE_CORRUPTED; - - goto database_corrupted; - } - - os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1); - } else { - ut_a(uncompressed); - frame = ((buf_block_t*) bpage)->frame; - } - - /* If this page is not uninitialized and not in the - doublewrite buffer, then the page number and space id - should be the same as in block. */ - read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET); - read_space_id = mach_read_from_4( - frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - key_version = mach_read_from_4( - frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - - if (bpage->space == TRX_SYS_SPACE - && buf_dblwr_page_inside(bpage->offset)) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: reading page %u\n" - "InnoDB: which is in the" - " doublewrite buffer!\n", - bpage->offset); - } else if (!read_space_id && !read_page_no) { - /* This is likely an uninitialized page. */ - } else if ((bpage->space - && bpage->space != read_space_id) - || bpage->offset != read_page_no) { - /* We did not compare space_id to read_space_id - if bpage->space == 0, because the field on the - page may contain garbage in MySQL < 4.1.1, - which only supported bpage->space == 0. */ - - ib_logf(IB_LOG_LEVEL_ERROR, - "Space id and page n:o" - " stored in the page" - " read in are " ULINTPF ":" ULINTPF "," - " should be %u:%u!", - read_space_id, - read_page_no, - bpage->space, - bpage->offset); - } - - if (UNIV_LIKELY(!bpage->is_corrupt || - !srv_pass_corrupt_table)) { - err = buf_page_check_corrupt(bpage, space); - } - -database_corrupted: - - if (err != DB_SUCCESS) { - /* Not a real corruption if it was triggered by - error injection */ - DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", - if (bpage->space > TRX_SYS_SPACE) { - buf_mark_space_corrupt(bpage); - ib_logf(IB_LOG_LEVEL_INFO, - "Simulated page corruption"); - fil_space_release_for_io(space); - return(err); - } - err = DB_SUCCESS; - goto page_not_corrupt; - ); - - if (err == DB_PAGE_CORRUPTED) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Database page corruption on disk" - " or a failed file read of tablespace %s" - " page [page id: space=%u" - ", page number=%u]" - ". You may have to recover from " - "a backup.", - space->name, - bpage->space, bpage->offset); - - buf_page_print(frame, buf_page_get_zip_size(bpage), - BUF_PAGE_PRINT_NO_CRASH); - - ib_logf(IB_LOG_LEVEL_INFO, - "It is also possible that your" - " operating system has corrupted" - " its own file cache and rebooting" - " your computer removes the error." - " If the corrupt page is an index page." - " You can also try to fix the" - " corruption by dumping, dropping," - " and reimporting the corrupt table." - " You can use CHECK TABLE to scan" - " your table for corruption. " - "Please refer to " REFMAN "forcing-innodb-recovery.html" - " for information about forcing recovery."); - } - - if (srv_pass_corrupt_table && bpage->space != 0 - && bpage->space < SRV_LOG_SPACE_FIRST_ID) { - - ib_logf(IB_LOG_LEVEL_INFO, - "InnoDB: space %u will be treated as corrupt.", - bpage->space); - fil_space_set_corrupt(bpage->space); - - dict_set_corrupted_by_space(bpage->space); - bpage->is_corrupt = TRUE; - } - - if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { - /* If page space id is larger than TRX_SYS_SPACE - (0), we will attempt to mark the corresponding - table as corrupted instead of crashing server */ - if (bpage->space > TRX_SYS_SPACE) { - buf_mark_space_corrupt(bpage); - fil_space_release_for_io(space); - return(err); - } else { - ib_logf(IB_LOG_LEVEL_FATAL, - "Ending processing because of a corrupt database page."); - } - } - } - - DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", - page_not_corrupt: bpage = bpage; ); - - if (recv_recovery_is_on()) { - /* Pages must be uncompressed for crash recovery. */ - ut_a(uncompressed); - recv_recover_page(TRUE, (buf_block_t*) bpage); - } - - if (uncompressed && !recv_no_ibuf_operations - && fil_page_get_type(frame) == FIL_PAGE_INDEX - && page_is_leaf(frame)) { - - if (bpage && bpage->encrypted) { - ib_logf(IB_LOG_LEVEL_WARN, - "Table in tablespace " ULINTPF " encrypted." - "However key management plugin or used " - " key_version %u is not found or" - " used encryption algorithm or method does not match." - " Can't continue opening the table.", - read_space_id, key_version); - } else { - - ibuf_merge_or_delete_for_page( - (buf_block_t*)bpage, bpage->space, - bpage->offset, buf_page_get_zip_size(bpage), - TRUE); - } - - } - - fil_space_release_for_io(space); - } else { - /* io_type == BUF_IO_WRITE */ - if (bpage->slot) { - /* Mark slot free */ - bpage->slot->reserved = false; - bpage->slot = NULL; - } - } - - if (io_type == BUF_IO_WRITE - && ( -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - /* to keep consistency at buf_LRU_insert_zip_clean() */ - buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY || -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) { - - have_LRU_mutex = true; /* optimistic */ - } -retry_mutex: - if (have_LRU_mutex) { - mutex_enter(&buf_pool->LRU_list_mutex); - } - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - mutex_enter(block_mutex); - - if (io_type == BUF_IO_WRITE - && ( -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY - || -#endif - buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) - && !have_LRU_mutex) { - - mutex_exit(block_mutex); - have_LRU_mutex = true; - goto retry_mutex; - } - -#ifdef UNIV_IBUF_COUNT_DEBUG - if (io_type == BUF_IO_WRITE || uncompressed) { - /* For BUF_IO_READ of compressed-only blocks, the - buffered operations will be merged by buf_page_get_gen() - after the block has been uncompressed. */ - ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0); - } -#endif - /* Because this thread which does the unlocking is not the same that - did the locking, we use a pass value != 0 in unlock, which simply - removes the newest lock debug record, without checking the thread - id. */ - - switch (io_type) { - case BUF_IO_READ: - - buf_page_set_io_fix(bpage, BUF_IO_NONE); - - /* NOTE that the call to ibuf may have moved the ownership of - the x-latch to this OS thread: do not let this confuse you in - debugging! */ - - ut_ad(buf_pool->n_pend_reads > 0); - os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1); - os_atomic_increment_ulint(&buf_pool->stat.n_pages_read, 1); - - ut_ad(!have_LRU_mutex); - - if (uncompressed) { - rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock, - BUF_IO_READ); - } - - break; - - case BUF_IO_WRITE: - /* Write means a flush operation: call the completion - routine in the flush system */ - - buf_flush_write_complete(bpage); - - os_atomic_increment_ulint(&buf_pool->stat.n_pages_written, 1); - - if (have_LRU_mutex) { - mutex_exit(&buf_pool->LRU_list_mutex); - } - - if (uncompressed) { - rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock, - BUF_IO_WRITE); - } - - break; - - default: - ut_error; - } - - buf_page_monitor(bpage, io_type); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, "Has %s page space %lu page no %lu\n", - io_type == BUF_IO_READ ? "read" : "written", - buf_page_get_space(bpage), - buf_page_get_page_no(bpage)); - } -#endif /* UNIV_DEBUG */ - - mutex_exit(block_mutex); - - return(err); -} - -/*********************************************************************//** -Asserts that all file pages in the buffer are in a replaceable state. -@return TRUE */ -static -ibool -buf_all_freed_instance( -/*===================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool instancce */ -{ - ulint i; - buf_chunk_t* chunk; - - ut_ad(buf_pool); - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - chunk = buf_pool->chunks; - - for (i = buf_pool->n_chunks; i--; chunk++) { - - mutex_enter(&buf_pool->LRU_list_mutex); - - const buf_block_t* block = buf_chunk_not_freed(chunk); - - mutex_exit(&buf_pool->LRU_list_mutex); - - if (UNIV_LIKELY_NULL(block)) { - fil_space_t* space = fil_space_get(block->page.space); - ib_logf(IB_LOG_LEVEL_ERROR, - "Page %u %u still fixed or dirty.", - block->page.space, - block->page.offset); - ib_logf(IB_LOG_LEVEL_ERROR, - "Page oldest_modification " LSN_PF - " fix_count %d io_fix %d.", - block->page.oldest_modification, - block->page.buf_fix_count, - buf_page_get_io_fix(&block->page)); - ib_logf(IB_LOG_LEVEL_FATAL, - "Page space_id %u name %s.", - block->page.space, - (space && space->name) ? space->name : "NULL"); - } - } - - return(TRUE); -} - -/*********************************************************************//** -Invalidates file pages in one buffer pool instance */ -static -void -buf_pool_invalidate_instance( -/*=========================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ -{ - ulint i; - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - - mutex_enter(&buf_pool->flush_state_mutex); - - for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) { - - /* As this function is called during startup and - during redo application phase during recovery, InnoDB - is single threaded (apart from IO helper threads) at - this stage. No new write batch can be in intialization - stage at this point. */ - ut_ad(buf_pool->init_flush[i] == FALSE); - - /* However, it is possible that a write batch that has - been posted earlier is still not complete. For buffer - pool invalidation to proceed we must ensure there is NO - write activity happening. */ - if (buf_pool->n_flush[i] > 0) { - buf_flush_t type = static_cast<buf_flush_t>(i); - - mutex_exit(&buf_pool->flush_state_mutex); - buf_flush_wait_batch_end(buf_pool, type); - mutex_enter(&buf_pool->flush_state_mutex); - } - } - mutex_exit(&buf_pool->flush_state_mutex); - - ut_ad(buf_all_freed_instance(buf_pool)); - - while (buf_LRU_scan_and_free_block(buf_pool, TRUE)) { - } - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - mutex_enter(&buf_pool->LRU_list_mutex); - - ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0); - ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0); - - buf_pool->freed_page_clock = 0; - buf_pool->LRU_old = NULL; - buf_pool->LRU_old_len = 0; - - mutex_exit(&buf_pool->LRU_list_mutex); - - memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat)); - buf_refresh_io_stats(buf_pool); -} - -/*********************************************************************//** -Invalidates the file pages in the buffer pool when an archive recovery is -completed. All the file pages buffered must be in a replaceable state when -this function is called: not latched and not modified. */ -UNIV_INTERN -void -buf_pool_invalidate(void) -/*=====================*/ -{ - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_invalidate_instance(buf_pool_from_array(i)); - } -} - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/*********************************************************************//** -Validates data in one buffer pool instance -@return TRUE */ -static -ibool -buf_pool_validate_instance( -/*=======================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ -{ - buf_page_t* b; - buf_chunk_t* chunk; - ulint i; - ulint n_lru_flush = 0; - ulint n_page_flush = 0; - ulint n_list_flush = 0; - ulint n_lru = 0; - ulint n_flush = 0; - ulint n_free = 0; - ulint n_zip = 0; - ulint fold = 0; - ulint space = 0; - ulint offset = 0; - - ut_ad(buf_pool); - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - mutex_enter(&buf_pool->LRU_list_mutex); - hash_lock_x_all(buf_pool->page_hash); - mutex_enter(&buf_pool->zip_mutex); - mutex_enter(&buf_pool->free_list_mutex); - mutex_enter(&buf_pool->flush_state_mutex); - - chunk = buf_pool->chunks; - - /* Check the uncompressed blocks. */ - - for (i = buf_pool->n_chunks; i--; chunk++) { - - ulint j; - buf_block_t* block = chunk->blocks; - - for (j = chunk->size; j--; block++) { - - switch (buf_block_get_state(block)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - /* These should only occur on - zip_clean, zip_free[], or flush_list. */ - ut_error; - break; - - case BUF_BLOCK_FILE_PAGE: - - space = buf_block_get_space(block); - offset = buf_block_get_page_no(block); - fold = buf_page_address_fold(space, offset); - ut_a(buf_page_hash_get_low(buf_pool, - space, - offset, - fold) - == &block->page); - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(buf_page_get_io_fix_unlocked(&block->page) - == BUF_IO_READ - || !ibuf_count_get(buf_block_get_space( - block), - buf_block_get_page_no( - block))); -#endif - switch (buf_page_get_io_fix_unlocked( - &block->page)) { - case BUF_IO_NONE: - break; - - case BUF_IO_WRITE: - switch (buf_page_get_flush_type( - &block->page)) { - case BUF_FLUSH_LRU: - case BUF_FLUSH_SINGLE_PAGE: - case BUF_FLUSH_LIST: - break; - default: - ut_error; - } - - break; - - case BUF_IO_READ: - - ut_a(rw_lock_is_locked(&block->lock, - RW_LOCK_EX)); - break; - - case BUF_IO_PIN: - break; - } - - n_lru++; - break; - - case BUF_BLOCK_NOT_USED: - n_free++; - break; - - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - /* do nothing */ - break; - } - } - } - - /* Check clean compressed-only blocks. */ - - for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; - b = UT_LIST_GET_NEXT(list, b)) { - ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE); - switch (buf_page_get_io_fix(b)) { - case BUF_IO_NONE: - case BUF_IO_PIN: - /* All clean blocks should be I/O-unfixed. */ - break; - case BUF_IO_READ: - /* In buf_LRU_free_page(), we temporarily set - b->io_fix = BUF_IO_READ for a newly allocated - control block in order to prevent - buf_page_get_gen() from decompressing the block. */ - break; - default: - ut_error; - break; - } - - /* It is OK to read oldest_modification here because - we have acquired buf_pool->zip_mutex above which acts - as the 'block->mutex' for these bpages. */ - ut_a(!b->oldest_modification); - fold = buf_page_address_fold(b->space, b->offset); - ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset, - fold) == b); - n_lru++; - n_zip++; - } - - /* Check dirty blocks. */ - - buf_flush_list_mutex_enter(buf_pool); - for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; - b = UT_LIST_GET_NEXT(list, b)) { - ut_ad(b->in_flush_list); - ut_a(b->oldest_modification); - n_flush++; - - switch (buf_page_get_state(b)) { - case BUF_BLOCK_ZIP_DIRTY: - n_lru++; - n_zip++; - /* fallthrough */ - case BUF_BLOCK_FILE_PAGE: - switch (buf_page_get_io_fix_unlocked(b)) { - case BUF_IO_NONE: - case BUF_IO_READ: - case BUF_IO_PIN: - break; - case BUF_IO_WRITE: - switch (buf_page_get_flush_type(b)) { - case BUF_FLUSH_LRU: - n_lru_flush++; - break; - case BUF_FLUSH_SINGLE_PAGE: - n_page_flush++; - break; - case BUF_FLUSH_LIST: - n_list_flush++; - break; - default: - ut_error; - } - break; - default: - ut_error; - } - break; - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - break; - } - fold = buf_page_address_fold(b->space, b->offset); - ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset, - fold) == b); - } - - ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush); - - hash_unlock_x_all(buf_pool->page_hash); - buf_flush_list_mutex_exit(buf_pool); - - mutex_exit(&buf_pool->zip_mutex); - - if (n_lru + n_free > buf_pool->curr_size + n_zip) { - fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n", - n_lru, n_free, - buf_pool->curr_size, n_zip); - ut_error; - } - - ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru); - - mutex_exit(&buf_pool->LRU_list_mutex); - - if (UT_LIST_GET_LEN(buf_pool->free) != n_free) { - fprintf(stderr, "Free list len %lu, free blocks %lu\n", - UT_LIST_GET_LEN(buf_pool->free), - n_free); - ut_error; - } - - mutex_exit(&buf_pool->free_list_mutex); - - ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush); - ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush); - ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush); - - mutex_exit(&buf_pool->flush_state_mutex); - - ut_a(buf_LRU_validate()); - ut_a(buf_flush_validate(buf_pool)); - - return(TRUE); -} - -/*********************************************************************//** -Validates the buffer buf_pool data structure. -@return TRUE */ -UNIV_INTERN -ibool -buf_validate(void) -/*==============*/ -{ - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - buf_pool_validate_instance(buf_pool); - } - return(TRUE); -} - -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/*********************************************************************//** -Prints info of the buffer buf_pool data structure for one instance. */ -static -void -buf_print_instance( -/*===============*/ - buf_pool_t* buf_pool) -{ - index_id_t* index_ids; - ulint* counts; - ulint size; - ulint i; - ulint j; - index_id_t id; - ulint n_found; - buf_chunk_t* chunk; - dict_index_t* index; - - ut_ad(buf_pool); - - size = buf_pool->curr_size; - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - index_ids = static_cast<index_id_t*>( - mem_alloc(size * sizeof *index_ids)); - - counts = static_cast<ulint*>(mem_alloc(sizeof(ulint) * size)); - - /* Dirty reads below */ - - fprintf(stderr, - "buf_pool size %lu\n" - "database pages %lu\n" - "free pages %lu\n" - "modified database pages %lu\n" - "n pending decompressions %lu\n" - "n pending reads %lu\n" - "n pending flush LRU %lu list %lu single page %lu\n" - "pages made young %lu, not young %lu\n" - "pages read %lu, created %lu, written %lu\n", - (ulint) size, - (ulint) UT_LIST_GET_LEN(buf_pool->LRU), - (ulint) UT_LIST_GET_LEN(buf_pool->free), - (ulint) UT_LIST_GET_LEN(buf_pool->flush_list), - (ulint) buf_pool->n_pend_unzip, - (ulint) buf_pool->n_pend_reads, - (ulint) buf_pool->n_flush[BUF_FLUSH_LRU], - (ulint) buf_pool->n_flush[BUF_FLUSH_LIST], - (ulint) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE], - (ulint) buf_pool->stat.n_pages_made_young, - (ulint) buf_pool->stat.n_pages_not_made_young, - (ulint) buf_pool->stat.n_pages_read, - (ulint) buf_pool->stat.n_pages_created, - (ulint) buf_pool->stat.n_pages_written); - - /* Count the number of blocks belonging to each index in the buffer */ - - n_found = 0; - - mutex_enter(&buf_pool->LRU_list_mutex); - - chunk = buf_pool->chunks; - - for (i = buf_pool->n_chunks; i--; chunk++) { - buf_block_t* block = chunk->blocks; - ulint n_blocks = chunk->size; - - for (; n_blocks--; block++) { - const buf_frame_t* frame = block->frame; - - if (fil_page_get_type(frame) == FIL_PAGE_INDEX) { - - id = btr_page_get_index_id(frame); - - /* Look for the id in the index_ids array */ - j = 0; - - while (j < n_found) { - - if (index_ids[j] == id) { - counts[j]++; - - break; - } - j++; - } - - if (j == n_found) { - n_found++; - index_ids[j] = id; - counts[j] = 1; - } - } - } - } - - mutex_exit(&buf_pool->LRU_list_mutex); - - for (i = 0; i < n_found; i++) { - index = dict_index_get_if_in_cache(index_ids[i]); - - fprintf(stderr, - "Block count for index %llu in buffer is about %lu", - (ullint) index_ids[i], - (ulint) counts[i]); - - if (index) { - putc(' ', stderr); - dict_index_name_print(stderr, NULL, index); - } - - putc('\n', stderr); - } - - mem_free(index_ids); - mem_free(counts); - - ut_a(buf_pool_validate_instance(buf_pool)); -} - -/*********************************************************************//** -Prints info of the buffer buf_pool data structure. */ -UNIV_INTERN -void -buf_print(void) -/*===========*/ -{ - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - buf_print_instance(buf_pool); - } -} -#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Returns the number of latched pages in the buffer pool. -@return number of latched pages */ -UNIV_INTERN -ulint -buf_get_latched_pages_number_instance( -/*==================================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ -{ - buf_page_t* b; - ulint i; - buf_chunk_t* chunk; - ulint fixed_pages_number = 0; - - /* The LRU list mutex is enough to protect the required fields below */ - mutex_enter(&buf_pool->LRU_list_mutex); - - chunk = buf_pool->chunks; - - for (i = buf_pool->n_chunks; i--; chunk++) { - buf_block_t* block; - ulint j; - - block = chunk->blocks; - - for (j = chunk->size; j--; block++) { - if (buf_block_get_state(block) - != BUF_BLOCK_FILE_PAGE) { - - continue; - } - - if (block->page.buf_fix_count != 0 - || buf_page_get_io_fix_unlocked(&block->page) - != BUF_IO_NONE) { - fixed_pages_number++; - } - - } - } - - mutex_exit(&buf_pool->LRU_list_mutex); - - mutex_enter(&buf_pool->zip_mutex); - - /* Traverse the lists of clean and dirty compressed-only blocks. */ - - for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b; - b = UT_LIST_GET_NEXT(list, b)) { - ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE); - ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE); - - if (b->buf_fix_count != 0 - || buf_page_get_io_fix(b) != BUF_IO_NONE) { - fixed_pages_number++; - } - } - - buf_flush_list_mutex_enter(buf_pool); - for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b; - b = UT_LIST_GET_NEXT(list, b)) { - ut_ad(b->in_flush_list); - - switch (buf_page_get_state(b)) { - case BUF_BLOCK_ZIP_DIRTY: - if (b->buf_fix_count != 0 - || buf_page_get_io_fix(b) != BUF_IO_NONE) { - fixed_pages_number++; - } - break; - case BUF_BLOCK_FILE_PAGE: - /* uncompressed page */ - case BUF_BLOCK_REMOVE_HASH: - /* We hold flush list but not LRU list mutex here. - Thus encountering BUF_BLOCK_REMOVE_HASH pages is - possible. */ - break; - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - ut_error; - break; - } - } - - buf_flush_list_mutex_exit(buf_pool); - mutex_exit(&buf_pool->zip_mutex); - - return(fixed_pages_number); -} - -/*********************************************************************//** -Returns the number of latched pages in all the buffer pools. -@return number of latched pages */ -UNIV_INTERN -ulint -buf_get_latched_pages_number(void) -/*==============================*/ -{ - ulint i; - ulint total_latched_pages = 0; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - total_latched_pages += buf_get_latched_pages_number_instance( - buf_pool); - } - - return(total_latched_pages); -} - -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Returns the number of pending buf pool read ios. -@return number of pending read I/O operations */ -UNIV_INTERN -ulint -buf_get_n_pending_read_ios(void) -/*============================*/ -{ - ulint i; - ulint pend_ios = 0; - - for (i = 0; i < srv_buf_pool_instances; i++) { - pend_ios += buf_pool_from_array(i)->n_pend_reads; - } - - return(pend_ios); -} - -/*********************************************************************//** -Returns the ratio in percents of modified pages in the buffer pool / -database pages in the buffer pool. -@return modified page percentage ratio */ -UNIV_INTERN -double -buf_get_modified_ratio_pct(void) -/*============================*/ -{ - double percentage = 0.0; - ulint lru_len = 0; - ulint free_len = 0; - ulint flush_list_len = 0; - - buf_get_total_list_len(&lru_len, &free_len, &flush_list_len); - - percentage = (100.0 * flush_list_len) / (1.0 + lru_len + free_len); - - /* 1 + is there to avoid division by zero */ - - return(percentage); -} - -/*******************************************************************//** -Aggregates a pool stats information with the total buffer pool stats */ -static -void -buf_stats_aggregate_pool_info( -/*==========================*/ - buf_pool_info_t* total_info, /*!< in/out: the buffer pool - info to store aggregated - result */ - const buf_pool_info_t* pool_info) /*!< in: individual buffer pool - stats info */ -{ - ut_a(total_info && pool_info); - - /* Nothing to copy if total_info is the same as pool_info */ - if (total_info == pool_info) { - return; - } - - total_info->pool_size += pool_info->pool_size; - total_info->pool_size_bytes += pool_info->pool_size_bytes; - total_info->lru_len += pool_info->lru_len; - total_info->old_lru_len += pool_info->old_lru_len; - total_info->free_list_len += pool_info->free_list_len; - total_info->flush_list_len += pool_info->flush_list_len; - total_info->n_pend_unzip += pool_info->n_pend_unzip; - total_info->n_pend_reads += pool_info->n_pend_reads; - total_info->n_pending_flush_lru += pool_info->n_pending_flush_lru; - total_info->n_pending_flush_list += pool_info->n_pending_flush_list; - total_info->n_pages_made_young += pool_info->n_pages_made_young; - total_info->n_pages_not_made_young += pool_info->n_pages_not_made_young; - total_info->n_pages_read += pool_info->n_pages_read; - total_info->n_pages_created += pool_info->n_pages_created; - total_info->n_pages_written += pool_info->n_pages_written; - total_info->n_page_gets += pool_info->n_page_gets; - total_info->n_ra_pages_read_rnd += pool_info->n_ra_pages_read_rnd; - total_info->n_ra_pages_read += pool_info->n_ra_pages_read; - total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted; - total_info->page_made_young_rate += pool_info->page_made_young_rate; - total_info->page_not_made_young_rate += - pool_info->page_not_made_young_rate; - total_info->pages_read_rate += pool_info->pages_read_rate; - total_info->pages_created_rate += pool_info->pages_created_rate; - total_info->pages_written_rate += pool_info->pages_written_rate; - total_info->n_page_get_delta += pool_info->n_page_get_delta; - total_info->page_read_delta += pool_info->page_read_delta; - total_info->young_making_delta += pool_info->young_making_delta; - total_info->not_young_making_delta += pool_info->not_young_making_delta; - total_info->pages_readahead_rnd_rate += pool_info->pages_readahead_rnd_rate; - total_info->pages_readahead_rate += pool_info->pages_readahead_rate; - total_info->pages_evicted_rate += pool_info->pages_evicted_rate; - total_info->unzip_lru_len += pool_info->unzip_lru_len; - total_info->io_sum += pool_info->io_sum; - total_info->io_cur += pool_info->io_cur; - total_info->unzip_sum += pool_info->unzip_sum; - total_info->unzip_cur += pool_info->unzip_cur; -} -/*******************************************************************//** -Collect buffer pool stats information for a buffer pool. Also -record aggregated stats if there are more than one buffer pool -in the server */ -UNIV_INTERN -void -buf_stats_get_pool_info( -/*====================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool */ - ulint pool_id, /*!< in: buffer pool ID */ - buf_pool_info_t* all_pool_info) /*!< in/out: buffer pool info - to fill */ -{ - buf_pool_info_t* pool_info; - time_t current_time; - double time_elapsed; - - /* Find appropriate pool_info to store stats for this buffer pool */ - pool_info = &all_pool_info[pool_id]; - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - - pool_info->pool_unique_id = pool_id; - - pool_info->pool_size = buf_pool->curr_size; - - pool_info->pool_size_bytes = buf_pool->curr_pool_size; - - pool_info->lru_len = UT_LIST_GET_LEN(buf_pool->LRU); - - pool_info->old_lru_len = buf_pool->LRU_old_len; - - pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool->free); - - pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool->flush_list); - - pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool->unzip_LRU); - - pool_info->n_pend_reads = buf_pool->n_pend_reads; - - mutex_enter(&buf_pool->flush_state_mutex); - - pool_info->n_pending_flush_lru = - (buf_pool->n_flush[BUF_FLUSH_LRU] - + buf_pool->init_flush[BUF_FLUSH_LRU]); - - pool_info->n_pending_flush_list = - (buf_pool->n_flush[BUF_FLUSH_LIST] - + buf_pool->init_flush[BUF_FLUSH_LIST]); - - pool_info->n_pending_flush_single_page = - (buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] - + buf_pool->init_flush[BUF_FLUSH_SINGLE_PAGE]); - - mutex_exit(&buf_pool->flush_state_mutex); - - current_time = time(NULL); - time_elapsed = 0.001 + difftime(current_time, - buf_pool->last_printout_time); - - pool_info->n_pages_made_young = buf_pool->stat.n_pages_made_young; - - pool_info->n_pages_not_made_young = - buf_pool->stat.n_pages_not_made_young; - - pool_info->n_pages_read = buf_pool->stat.n_pages_read; - - pool_info->n_pages_created = buf_pool->stat.n_pages_created; - - pool_info->n_pages_written = buf_pool->stat.n_pages_written; - - pool_info->n_page_gets = buf_pool->stat.n_page_gets; - - pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd; - pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read; - - pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted; - - pool_info->page_made_young_rate = - (buf_pool->stat.n_pages_made_young - - buf_pool->old_stat.n_pages_made_young) / time_elapsed; - - pool_info->page_not_made_young_rate = - (buf_pool->stat.n_pages_not_made_young - - buf_pool->old_stat.n_pages_not_made_young) / time_elapsed; - - pool_info->pages_read_rate = - (buf_pool->stat.n_pages_read - - buf_pool->old_stat.n_pages_read) / time_elapsed; - - pool_info->pages_created_rate = - (buf_pool->stat.n_pages_created - - buf_pool->old_stat.n_pages_created) / time_elapsed; - - pool_info->pages_written_rate = - (buf_pool->stat.n_pages_written - - buf_pool->old_stat.n_pages_written) / time_elapsed; - - pool_info->n_page_get_delta = buf_pool->stat.n_page_gets - - buf_pool->old_stat.n_page_gets; - - if (pool_info->n_page_get_delta) { - pool_info->page_read_delta = buf_pool->stat.n_pages_read - - buf_pool->old_stat.n_pages_read; - - pool_info->young_making_delta = - buf_pool->stat.n_pages_made_young - - buf_pool->old_stat.n_pages_made_young; - - pool_info->not_young_making_delta = - buf_pool->stat.n_pages_not_made_young - - buf_pool->old_stat.n_pages_not_made_young; - } - pool_info->pages_readahead_rnd_rate = - (buf_pool->stat.n_ra_pages_read_rnd - - buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed; - - - pool_info->pages_readahead_rate = - (buf_pool->stat.n_ra_pages_read - - buf_pool->old_stat.n_ra_pages_read) / time_elapsed; - - pool_info->pages_evicted_rate = - (buf_pool->stat.n_ra_pages_evicted - - buf_pool->old_stat.n_ra_pages_evicted) / time_elapsed; - - pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU); - - pool_info->io_sum = buf_LRU_stat_sum.io; - - pool_info->io_cur = buf_LRU_stat_cur.io; - - pool_info->unzip_sum = buf_LRU_stat_sum.unzip; - - pool_info->unzip_cur = buf_LRU_stat_cur.unzip; - - buf_refresh_io_stats(buf_pool); -} - -/*********************************************************************//** -Prints info of the buffer i/o. */ -UNIV_INTERN -void -buf_print_io_instance( -/*==================*/ - buf_pool_info_t*pool_info, /*!< in: buffer pool info */ - FILE* file) /*!< in/out: buffer where to print */ -{ - ut_ad(pool_info); - - fprintf(file, - "Buffer pool size %lu\n" - "Buffer pool size, bytes " ULINTPF "\n" - "Free buffers %lu\n" - "Database pages %lu\n" - "Old database pages %lu\n" - "Modified db pages %lu\n" - "Percent of dirty pages(LRU & free pages): %.3f\n" - "Max dirty pages percent: %.3f\n" - "Pending reads %lu\n" - "Pending writes: LRU %lu, flush list %lu, single page %lu\n", - pool_info->pool_size, - pool_info->pool_size_bytes, - pool_info->free_list_len, - pool_info->lru_len, - pool_info->old_lru_len, - pool_info->flush_list_len, - (((double) pool_info->flush_list_len) / - (pool_info->lru_len + pool_info->free_list_len + 1.0)) * 100.0, - srv_max_buf_pool_modified_pct, - pool_info->n_pend_reads, - pool_info->n_pending_flush_lru, - pool_info->n_pending_flush_list, - pool_info->n_pending_flush_single_page); - - fprintf(file, - "Pages made young %lu, not young %lu\n" - "%.2f youngs/s, %.2f non-youngs/s\n" - "Pages read %lu, created %lu, written %lu\n" - "%.2f reads/s, %.2f creates/s, %.2f writes/s\n", - pool_info->n_pages_made_young, - pool_info->n_pages_not_made_young, - pool_info->page_made_young_rate, - pool_info->page_not_made_young_rate, - pool_info->n_pages_read, - pool_info->n_pages_created, - pool_info->n_pages_written, - pool_info->pages_read_rate, - pool_info->pages_created_rate, - pool_info->pages_written_rate); - - if (pool_info->n_page_get_delta) { - double hit_rate = double(pool_info->page_read_delta) - / pool_info->n_page_get_delta; - - if (hit_rate > 1) { - hit_rate = 1; - } - - fprintf(file, - "Buffer pool hit rate " ULINTPF " / 1000," - " young-making rate " ULINTPF " / 1000 not " - ULINTPF " / 1000\n", - ulint(1000 * (1 - hit_rate)), - ulint(1000 * double(pool_info->young_making_delta) - / pool_info->n_page_get_delta), - ulint(1000 * double(pool_info->not_young_making_delta) - / pool_info->n_page_get_delta)); - } else { - fputs("No buffer pool page gets since the last printout\n", - file); - } - - /* Statistics about read ahead algorithm */ - fprintf(file, "Pages read ahead %.2f/s," - " evicted without access %.2f/s," - " Random read ahead %.2f/s\n", - - pool_info->pages_readahead_rate, - pool_info->pages_evicted_rate, - pool_info->pages_readahead_rnd_rate); - - /* Print some values to help us with visualizing what is - happening with LRU eviction. */ - fprintf(file, - "LRU len: %lu, unzip_LRU len: %lu\n" - "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n", - pool_info->lru_len, pool_info->unzip_lru_len, - pool_info->io_sum, pool_info->io_cur, - pool_info->unzip_sum, pool_info->unzip_cur); -} - -/*********************************************************************//** -Prints info of the buffer i/o. */ -UNIV_INTERN -void -buf_print_io( -/*=========*/ - FILE* file) /*!< in/out: buffer where to print */ -{ - ulint i; - buf_pool_info_t* pool_info; - buf_pool_info_t* pool_info_total; - - /* If srv_buf_pool_instances is greater than 1, allocate - one extra buf_pool_info_t, the last one stores - aggregated/total values from all pools */ - if (srv_buf_pool_instances > 1) { - pool_info = (buf_pool_info_t*) mem_zalloc(( - srv_buf_pool_instances + 1) * sizeof *pool_info); - - pool_info_total = &pool_info[srv_buf_pool_instances]; - } else { - ut_a(srv_buf_pool_instances == 1); - - pool_info_total = pool_info = - static_cast<buf_pool_info_t*>( - mem_zalloc(sizeof *pool_info)); - } - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - /* Fetch individual buffer pool info and calculate - aggregated stats along the way */ - buf_stats_get_pool_info(buf_pool, i, pool_info); - - /* If we have more than one buffer pool, store - the aggregated stats */ - if (srv_buf_pool_instances > 1) { - buf_stats_aggregate_pool_info(pool_info_total, - &pool_info[i]); - } - } - - /* Print the aggreate buffer pool info */ - buf_print_io_instance(pool_info_total, file); - - /* If there are more than one buffer pool, print each individual pool - info */ - if (srv_buf_pool_instances > 1) { - fputs("----------------------\n" - "INDIVIDUAL BUFFER POOL INFO\n" - "----------------------\n", file); - - for (i = 0; i < srv_buf_pool_instances; i++) { - fprintf(file, "---BUFFER POOL %lu\n", i); - buf_print_io_instance(&pool_info[i], file); - } - } - - mem_free(pool_info); -} - -/**********************************************************************//** -Refreshes the statistics used to print per-second averages. */ -UNIV_INTERN -void -buf_refresh_io_stats( -/*=================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ -{ - buf_pool->last_printout_time = ut_time(); - buf_pool->old_stat = buf_pool->stat; -} - -/**********************************************************************//** -Refreshes the statistics used to print per-second averages. */ -UNIV_INTERN -void -buf_refresh_io_stats_all(void) -/*==========================*/ -{ - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - buf_refresh_io_stats(buf_pool); - } -} - -/**********************************************************************//** -Check if all pages in all buffer pools are in a replacable state. -@return FALSE if not */ -UNIV_INTERN -ibool -buf_all_freed(void) -/*===============*/ -{ - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - if (!buf_all_freed_instance(buf_pool)) { - return(FALSE); - } - } - - return(TRUE); -} - -/*********************************************************************//** -Checks that there currently are no pending i/o-operations for the buffer -pool. -@return number of pending i/o */ -UNIV_INTERN -ulint -buf_pool_check_no_pending_io(void) -/*==============================*/ -{ - ulint i; - ulint pending_io = 0; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - pending_io += buf_pool->n_pend_reads; - - mutex_enter(&buf_pool->flush_state_mutex); - - pending_io += buf_pool->n_flush[BUF_FLUSH_LRU]; - pending_io += buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]; - pending_io += buf_pool->n_flush[BUF_FLUSH_LIST]; - - mutex_exit(&buf_pool->flush_state_mutex); - } - - return(pending_io); -} - -#if 0 -Code currently not used -/*********************************************************************//** -Gets the current length of the free list of buffer blocks. -@return length of the free list */ -UNIV_INTERN -ulint -buf_get_free_list_len(void) -/*=======================*/ -{ - ulint len; - - mutex_enter(&buf_pool->free_list_mutex); - - len = UT_LIST_GET_LEN(buf_pool->free); - - mutex_exit(&buf_pool->free_list_mutex); - - return(len); -} -#endif - -#else /* !UNIV_HOTBACKUP */ -/********************************************************************//** -Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */ -UNIV_INTERN -void -buf_page_init_for_backup_restore( -/*=============================*/ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page within space - in units of a page */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - buf_block_t* block) /*!< in: block to init */ -{ - block->page.state = BUF_BLOCK_FILE_PAGE; - block->page.space = space; - block->page.offset = offset; - - page_zip_des_init(&block->page.zip); - - /* We assume that block->page.data has been allocated - with zip_size == UNIV_PAGE_SIZE. */ - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - ut_ad(ut_is_2pow(zip_size)); - page_zip_set_size(&block->page.zip, zip_size); - if (zip_size) { - block->page.zip.data = block->frame + UNIV_PAGE_SIZE; - } -} -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Aquire LRU list mutex */ -void -buf_pool_mutex_enter( -/*=================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool */ -{ - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - mutex_enter(&buf_pool->LRU_list_mutex); -} -/*********************************************************************//** -Exit LRU list mutex */ -void -buf_pool_mutex_exit( -/*================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool */ -{ - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - mutex_exit(&buf_pool->LRU_list_mutex); -} - -/********************************************************************//** -Reserve unused slot from temporary memory array and allocate necessary -temporary memory if not yet allocated. -@return reserved slot */ -UNIV_INTERN -buf_tmp_buffer_t* -buf_pool_reserve_tmp_slot( -/*======================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool where to - reserve */ - bool compressed) /*!< in: is file space compressed */ -{ - buf_tmp_buffer_t *free_slot=NULL; - - /* Array is protected by buf_pool mutex */ - buf_pool_mutex_enter(buf_pool); - - for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) { - buf_tmp_buffer_t *slot = &buf_pool->tmp_arr->slots[i]; - - if(slot->reserved == false) { - free_slot = slot; - break; - } - } - - /* We assume that free slot is found */ - ut_a(free_slot != NULL); - free_slot->reserved = true; - /* Now that we have reserved this slot we can release - buf_pool mutex */ - buf_pool_mutex_exit(buf_pool); - - /* Allocate temporary memory for encryption/decryption */ - if (free_slot->crypt_buf == NULL) { - free_slot->crypt_buf = static_cast<byte*>(aligned_malloc(UNIV_PAGE_SIZE, UNIV_PAGE_SIZE)); - memset(free_slot->crypt_buf, 0, UNIV_PAGE_SIZE); - } - - /* For page compressed tables allocate temporary memory for - compression/decompression */ - if (compressed && free_slot->comp_buf == NULL) { - ulint size = UNIV_PAGE_SIZE; - - /* Both snappy and lzo compression methods require that - output buffer used for compression is bigger than input - buffer. Increase the allocated buffer size accordingly. */ -#if HAVE_SNAPPY - size = snappy_max_compressed_length(size); -#endif -#if HAVE_LZO - size += LZO1X_1_15_MEM_COMPRESS; -#endif - free_slot->comp_buf = static_cast<byte*>(aligned_malloc(size, UNIV_PAGE_SIZE)); - memset(free_slot->comp_buf, 0, size); - } - - return (free_slot); -} - -/** Encryption and page_compression hook that is called just before -a page is written to disk. -@param[in,out] space tablespace -@param[in,out] bpage buffer page -@param[in] src_frame physical page frame that is being encrypted -@return page frame to be written to file -(may be src_frame or an encrypted/compressed copy of it) */ -UNIV_INTERN -byte* -buf_page_encrypt_before_write( - fil_space_t* space, - buf_page_t* bpage, - byte* src_frame) -{ - ut_ad(space->id == bpage->space); - bpage->real_size = UNIV_PAGE_SIZE; - - fil_page_type_validate(src_frame); - - switch (bpage->offset) { - case 0: - /* Page 0 of a tablespace is not encrypted/compressed */ - return src_frame; - case TRX_SYS_PAGE_NO: - if (bpage->space == TRX_SYS_SPACE) { - /* don't encrypt/compress page as it contains - address to dblwr buffer */ - return src_frame; - } - } - - fil_space_crypt_t* crypt_data = space->crypt_data; - - const bool encrypted = crypt_data - && !crypt_data->not_encrypted() - && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED - && (!crypt_data->is_default_encryption() - || srv_encrypt_tables); - - bool page_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); - - if (!encrypted && !page_compressed) { - /* No need to encrypt or page compress the page. - Clear key-version & crypt-checksum. */ - memset(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); - return src_frame; - } - - ulint zip_size = buf_page_get_zip_size(bpage); - ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - /* Find free slot from temporary memory array */ - buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); - slot->out_buf = NULL; - bpage->slot = slot; - - byte *dst_frame = slot->crypt_buf; - - if (!page_compressed) { - /* Encrypt page content */ - byte* tmp = fil_space_encrypt(space, - bpage->offset, - bpage->newest_modification, - src_frame, - dst_frame); - - bpage->real_size = page_size; - slot->out_buf = dst_frame = tmp; - - ut_d(fil_page_type_validate(tmp)); - } else { - /* First we compress the page content */ - ulint out_len = 0; - - byte *tmp = fil_compress_page( - space, - (byte *)src_frame, - slot->comp_buf, - page_size, - fsp_flags_get_page_compression_level(space->flags), - fil_space_get_block_size(space, bpage->offset), - encrypted, - &out_len); - - bpage->real_size = out_len; - -#ifdef UNIV_DEBUG - fil_page_type_validate(tmp); -#endif - - if(encrypted) { - - /* And then we encrypt the page content */ - tmp = fil_space_encrypt(space, - bpage->offset, - bpage->newest_modification, - tmp, - dst_frame); - } - - slot->out_buf = dst_frame = tmp; - } - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - - // return dst_frame which will be written - return dst_frame; -} - -/** Decrypt a page. -@param[in,out] bpage Page control block -@param[in,out] space tablespace -@return whether the operation was successful */ -static -bool -buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) -{ - ut_ad(space->n_pending_ios > 0); - ut_ad(space->id == bpage->space); - - ulint zip_size = buf_page_get_zip_size(bpage); - ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; - - byte* dst_frame = (zip_size) ? bpage->zip.data : - ((buf_block_t*) bpage)->frame; - unsigned key_version = - mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - bool page_compressed = fil_page_is_compressed(dst_frame); - bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame); - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - bool success = true; - - if (bpage->offset == 0) { - /* File header pages are not encrypted/compressed */ - return (true); - } - - /* Page is encrypted if encryption information is found from - tablespace and page contains used key_version. This is true - also for pages first compressed and then encrypted. */ - if (!space->crypt_data) { - key_version = 0; - } - - if (page_compressed) { - /* the page we read is unencrypted */ - /* Find free slot from temporary memory array */ - buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - - /* decompress using comp_buf to dst_frame */ - fil_decompress_page(slot->comp_buf, - dst_frame, - ulong(size), - &bpage->write_size); - - /* Mark this slot as free */ - slot->reserved = false; - key_version = 0; - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - } else { - buf_tmp_buffer_t* slot = NULL; - - if (key_version) { - /* Verify encryption checksum before we even try to - decrypt. */ - if (!fil_space_verify_crypt_checksum(dst_frame, - zip_size, NULL, bpage->offset)) { - - /* Mark page encrypted in case it should - be. */ - if (space->crypt_data->type - != CRYPT_SCHEME_UNENCRYPTED) { - bpage->encrypted = true; - } - - return (false); - } - - /* Find free slot from temporary memory array */ - slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - - /* decrypt using crypt_buf to dst_frame */ - if (!fil_space_decrypt(space, slot->crypt_buf, - dst_frame, &bpage->encrypted)) { - success = false; - } - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - } - - if (page_compressed_encrypted && success) { - if (!slot) { - slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed); - } - -#ifdef UNIV_DEBUG - fil_page_type_validate(dst_frame); -#endif - /* decompress using comp_buf to dst_frame */ - fil_decompress_page(slot->comp_buf, - dst_frame, - ulong(size), - &bpage->write_size); - ut_d(fil_page_type_validate(dst_frame)); - } - - /* Mark this slot as free */ - if (slot) { - slot->reserved = false; - } - } - - ut_ad(space->n_pending_ios > 0); - return (success); -} diff --git a/storage/xtradb/buf/buf0checksum.cc b/storage/xtradb/buf/buf0checksum.cc deleted file mode 100644 index 01b646a78e0..00000000000 --- a/storage/xtradb/buf/buf0checksum.cc +++ /dev/null @@ -1,156 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0checksum.cc -Buffer pool checksum functions, also linked from /extra/innochecksum.cc - -Created Aug 11, 2011 Vasil Dimov -*******************************************************/ - -#include "univ.i" -#include "fil0fil.h" /* FIL_* */ -#include "ut0crc32.h" /* ut_crc32() */ -#include "ut0rnd.h" /* ut_fold_binary() */ -#include "buf0types.h" - -#ifndef UNIV_INNOCHECKSUM - -#include "srv0srv.h" /* SRV_CHECKSUM_* */ - -#endif /* !UNIV_INNOCHECKSUM */ - -/** the macro MYSQL_SYSVAR_ENUM() requires "long unsigned int" and if we -use srv_checksum_algorithm_t here then we get a compiler error: -ha_innodb.cc:12251: error: cannot convert 'srv_checksum_algorithm_t*' to - 'long unsigned int*' in initialization */ -UNIV_INTERN ulong srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB; - -/********************************************************************//** -Calculates a page CRC32 which is stored to the page when it is written -to a file. Note that we must be careful to calculate the same value on -32-bit and 64-bit architectures. -@return checksum */ -UNIV_INTERN -ib_uint32_t -buf_calc_page_crc32( -/*================*/ - const byte* page) /*!< in: buffer page */ -{ - ib_uint32_t checksum; - - /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written outside the buffer pool - to the first pages of data files, we have to skip them in the page - checksum calculation. - We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the - checksum is stored, and also the last 8 bytes of page because - there we store the old formula checksum. */ - - checksum = ut_crc32(page + FIL_PAGE_OFFSET, - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION - - FIL_PAGE_OFFSET) - ^ ut_crc32(page + FIL_PAGE_DATA, - UNIV_PAGE_SIZE - FIL_PAGE_DATA - - FIL_PAGE_END_LSN_OLD_CHKSUM); - - return(checksum); -} - -/********************************************************************//** -Calculates a page checksum which is stored to the page when it is written -to a file. Note that we must be careful to calculate the same value on -32-bit and 64-bit architectures. -@return checksum */ -UNIV_INTERN -ulint -buf_calc_page_new_checksum( -/*=======================*/ - const byte* page) /*!< in: buffer page */ -{ - ulint checksum; - - /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written outside the buffer pool - to the first pages of data files, we have to skip them in the page - checksum calculation. - We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the - checksum is stored, and also the last 8 bytes of page because - there we store the old formula checksum. */ - - checksum = ut_fold_binary(page + FIL_PAGE_OFFSET, - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION - - FIL_PAGE_OFFSET) - + ut_fold_binary(page + FIL_PAGE_DATA, - UNIV_PAGE_SIZE - FIL_PAGE_DATA - - FIL_PAGE_END_LSN_OLD_CHKSUM); - checksum = checksum & 0xFFFFFFFFUL; - - return(checksum); -} - -/********************************************************************//** -In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only -looked at the first few bytes of the page. This calculates that old -checksum. -NOTE: we must first store the new formula checksum to -FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum -because this takes that field as an input! -@return checksum */ -UNIV_INTERN -ulint -buf_calc_page_old_checksum( -/*=======================*/ - const byte* page) /*!< in: buffer page */ -{ - ulint checksum; - - checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - - checksum = checksum & 0xFFFFFFFFUL; - - return(checksum); -} - -/********************************************************************//** -Return a printable string describing the checksum algorithm. -@return algorithm name */ -UNIV_INTERN -const char* -buf_checksum_algorithm_name( -/*========================*/ - srv_checksum_algorithm_t algo) /*!< in: algorithm */ -{ - switch (algo) { - case SRV_CHECKSUM_ALGORITHM_CRC32: - return("crc32"); - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - return("strict_crc32"); - case SRV_CHECKSUM_ALGORITHM_INNODB: - return("innodb"); - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - return("strict_innodb"); - case SRV_CHECKSUM_ALGORITHM_NONE: - return("none"); - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - return("strict_none"); - } - - ut_error; - return(NULL); -} diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc deleted file mode 100644 index 49371f9a6f1..00000000000 --- a/storage/xtradb/buf/buf0dblwr.cc +++ /dev/null @@ -1,1288 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0dblwr.cc -Doublwrite buffer module - -Created 2011/12/19 -*******************************************************/ - -#include "buf0dblwr.h" - -#ifdef UNIV_NONINL -#include "buf0buf.ic" -#endif - -#include "buf0buf.h" -#include "buf0checksum.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "page0zip.h" -#include "trx0sys.h" -#include "fil0crypt.h" -#include "fil0pagecompress.h" - -#ifndef UNIV_HOTBACKUP - -#ifdef UNIV_PFS_MUTEX -/* Key to register the mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t buf_dblwr_mutex_key; -#endif /* UNIV_PFS_RWLOCK */ - -/** The doublewrite buffer */ -UNIV_INTERN buf_dblwr_t* buf_dblwr = NULL; - -/** Set to TRUE when the doublewrite buffer is being created */ -UNIV_INTERN ibool buf_dblwr_being_created = FALSE; - -#define TRX_SYS_DOUBLEWRITE_BLOCKS 2 - -/****************************************************************//** -Determines if a page number is located inside the doublewrite buffer. -@return TRUE if the location is inside the two blocks of the -doublewrite buffer */ -UNIV_INTERN -ibool -buf_dblwr_page_inside( -/*==================*/ - ulint page_no) /*!< in: page number */ -{ - if (buf_dblwr == NULL) { - - return(FALSE); - } - - if (page_no >= buf_dblwr->block1 - && page_no < buf_dblwr->block1 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - return(TRUE); - } - - if (page_no >= buf_dblwr->block2 - && page_no < buf_dblwr->block2 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - return(TRUE); - } - - return(FALSE); -} - -/****************************************************************//** -Calls buf_page_get() on the TRX_SYS_PAGE and returns a pointer to the -doublewrite buffer within it. -@return pointer to the doublewrite buffer within the filespace header -page. */ -UNIV_INLINE -byte* -buf_dblwr_get( -/*==========*/ - mtr_t* mtr) /*!< in/out: MTR to hold the page latch */ -{ - buf_block_t* block; - - block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, - RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); - - return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE); -} - -/********************************************************************//** -Flush a batch of writes to the datafiles that have already been -written to the dblwr buffer on disk. */ -UNIV_INLINE -void -buf_dblwr_sync_datafiles() -/*======================*/ -{ - /* Wake possible simulated aio thread to actually post the - writes to the operating system */ - os_aio_simulated_wake_handler_threads(); - - /* Wait that all async writes to tablespaces have been posted to - the OS */ - os_aio_wait_until_no_pending_writes(); - - /* Now we flush the data to disk (for example, with fsync) */ - fil_flush_file_spaces(FIL_TABLESPACE); -} - -/****************************************************************//** -Creates or initialializes the doublewrite buffer at a database start. */ -static -void -buf_dblwr_init( -/*===========*/ - byte* doublewrite) /*!< in: pointer to the doublewrite buf - header on trx sys page */ -{ - ulint buf_size; - - buf_dblwr = static_cast<buf_dblwr_t*>( - mem_zalloc(sizeof(buf_dblwr_t))); - - /* There are two blocks of same size in the doublewrite - buffer. */ - buf_size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; - - /* There must be atleast one buffer for single page writes - and one buffer for batch writes. */ - ut_a(srv_doublewrite_batch_size > 0 - && srv_doublewrite_batch_size < buf_size); - - mutex_create(buf_dblwr_mutex_key, - &buf_dblwr->mutex, SYNC_DOUBLEWRITE); - - buf_dblwr->b_event = os_event_create(); - buf_dblwr->s_event = os_event_create(); - buf_dblwr->first_free = 0; - buf_dblwr->s_reserved = 0; - buf_dblwr->b_reserved = 0; - - buf_dblwr->block1 = mach_read_from_4( - doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1); - buf_dblwr->block2 = mach_read_from_4( - doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2); - - buf_dblwr->in_use = static_cast<bool*>( - mem_zalloc(buf_size * sizeof(bool))); - - buf_dblwr->write_buf_unaligned = static_cast<byte*>( - ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE)); - - buf_dblwr->write_buf = static_cast<byte*>( - ut_align(buf_dblwr->write_buf_unaligned, - UNIV_PAGE_SIZE)); - - buf_dblwr->buf_block_arr = static_cast<buf_page_t**>( - mem_zalloc(buf_size * sizeof(void*))); -} - -/** Create the doublewrite buffer if the doublewrite buffer header -is not present in the TRX_SYS page. -@return whether the operation succeeded -@retval true if the doublewrite buffer exists or was created -@retval false if the creation failed (too small first data file) */ -UNIV_INTERN -bool -buf_dblwr_create() -{ - buf_block_t* block2; - buf_block_t* new_block; - byte* doublewrite; - byte* fseg_header; - ulint page_no; - ulint prev_page_no; - ulint i; - mtr_t mtr; - - if (buf_dblwr) { - /* Already inited */ - return(true); - } - -start_again: - mtr_start(&mtr); - buf_dblwr_being_created = TRUE; - - doublewrite = buf_dblwr_get(&mtr); - - if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) - == TRX_SYS_DOUBLEWRITE_MAGIC_N) { - /* The doublewrite buffer has already been created: - just read in some numbers */ - - buf_dblwr_init(doublewrite); - - mtr_commit(&mtr); - buf_dblwr_being_created = FALSE; - return(true); - } - - if (buf_pool_get_curr_size() - < ((TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE - + FSP_EXTENT_SIZE / 2 + 100) - * UNIV_PAGE_SIZE)) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot create doublewrite buffer: " - "innodb_buffer_pool_size is too small."); - mtr_commit(&mtr); - return(false); - } else { - fil_space_t* space = fil_space_acquire(TRX_SYS_SPACE); - const bool fail = UT_LIST_GET_FIRST(space->chain)->size - < 3 * FSP_EXTENT_SIZE; - fil_space_release(space); - - if (fail) { - goto too_small; - } - } - - block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, - TRX_SYS_DOUBLEWRITE - + TRX_SYS_DOUBLEWRITE_FSEG, &mtr); - - if (block2 == NULL) { -too_small: - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot create doublewrite buffer: " - "the first file in innodb_data_file_path" - " must be at least %luM.", - 3 * (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) >> 20); - mtr_commit(&mtr); - return(false); - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Doublewrite buffer not found: creating new"); - - /* FIXME: After this point, the doublewrite buffer creation - is not atomic. The doublewrite buffer should not exist in - the InnoDB system tablespace file in the first place. - It could be located in separate optional file(s) in a - user-specified location. */ - - /* fseg_create acquires a second latch on the page, - therefore we must declare it: */ - - buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK); - - fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG; - prev_page_no = 0; - - for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE - + FSP_EXTENT_SIZE / 2; i++) { - new_block = fseg_alloc_free_page( - fseg_header, prev_page_no + 1, FSP_UP, &mtr); - if (new_block == NULL) { - ib_logf(IB_LOG_LEVEL_FATAL, - "Cannot create doublewrite buffer: you must " - "increase your tablespace size. " - "Cannot continue operation."); - } - - /* We read the allocated pages to the buffer pool; - when they are written to disk in a flush, the space - id and page number fields are also written to the - pages. When we at database startup read pages - from the doublewrite buffer, we know that if the - space id and page number in them are the same as - the page position in the tablespace, then the page - has not been written to in doublewrite. */ - - ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1); - page_no = buf_block_get_page_no(new_block); - - if (i == FSP_EXTENT_SIZE / 2) { - ut_a(page_no == FSP_EXTENT_SIZE); - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_BLOCK1, - page_no, MLOG_4BYTES, &mtr); - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_REPEAT - + TRX_SYS_DOUBLEWRITE_BLOCK1, - page_no, MLOG_4BYTES, &mtr); - - } else if (i == FSP_EXTENT_SIZE / 2 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - ut_a(page_no == 2 * FSP_EXTENT_SIZE); - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_BLOCK2, - page_no, MLOG_4BYTES, &mtr); - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_REPEAT - + TRX_SYS_DOUBLEWRITE_BLOCK2, - page_no, MLOG_4BYTES, &mtr); - - } else if (i > FSP_EXTENT_SIZE / 2) { - ut_a(page_no == prev_page_no + 1); - } - - if (((i + 1) & 15) == 0) { - /* rw_locks can only be recursively x-locked - 2048 times. (on 32 bit platforms, - (lint) 0 - (X_LOCK_DECR * 2049) - is no longer a negative number, and thus - lock_word becomes like a shared lock). - For 4k page size this loop will - lock the fseg header too many times. Since - this code is not done while any other threads - are active, restart the MTR occasionally. */ - mtr_commit(&mtr); - mtr_start(&mtr); - doublewrite = buf_dblwr_get(&mtr); - fseg_header = doublewrite - + TRX_SYS_DOUBLEWRITE_FSEG; - } - - prev_page_no = page_no; - } - - mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC, - TRX_SYS_DOUBLEWRITE_MAGIC_N, - MLOG_4BYTES, &mtr); - mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC - + TRX_SYS_DOUBLEWRITE_REPEAT, - TRX_SYS_DOUBLEWRITE_MAGIC_N, - MLOG_4BYTES, &mtr); - - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED, - TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N, - MLOG_4BYTES, &mtr); - mtr_commit(&mtr); - - /* Flush the modified pages to disk and make a checkpoint */ - log_make_checkpoint_at(LSN_MAX, TRUE); - - /* Remove doublewrite pages from LRU */ - buf_pool_invalidate(); - - ib_logf(IB_LOG_LEVEL_INFO, "Doublewrite buffer created"); - - goto start_again; -} - -/****************************************************************//** -At a database startup initializes the doublewrite buffer memory structure if -we already have a doublewrite buffer created in the data files. If we are -upgrading to an InnoDB version which supports multiple tablespaces, then this -function performs the necessary update operations. If we are in a crash -recovery, this function loads the pages from double write buffer into memory. */ -void -buf_dblwr_init_or_load_pages( -/*=========================*/ - pfs_os_file_t file, - char* path, - bool load_corrupt_pages) -{ - byte* buf; - byte* read_buf; - byte* unaligned_read_buf; - ulint block1; - ulint block2; - byte* page; - ibool reset_space_ids = FALSE; - byte* doublewrite; - ulint space_id; - ulint i; - ulint block_bytes = 0; - recv_dblwr_t& recv_dblwr = recv_sys->dblwr; - - /* We do the file i/o past the buffer pool */ - - unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE)); - - read_buf = static_cast<byte*>( - ut_align(unaligned_read_buf, UNIV_PAGE_SIZE)); - - /* Read the trx sys header to check if we are using the doublewrite - buffer */ - off_t trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE; - os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE); - - doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; - - /* TRX_SYS_PAGE_NO is not encrypted see fil_crypt_rotate_page() */ - - if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) - == TRX_SYS_DOUBLEWRITE_MAGIC_N) { - /* The doublewrite buffer has been created */ - - buf_dblwr_init(doublewrite); - - block1 = buf_dblwr->block1; - block2 = buf_dblwr->block2; - - buf = buf_dblwr->write_buf; - } else { - goto leave_func; - } - - if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED) - != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) { - - /* We are upgrading from a version < 4.1.x to a version where - multiple tablespaces are supported. We must reset the space id - field in the pages in the doublewrite buffer because starting - from this version the space id is stored to - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */ - - reset_space_ids = TRUE; - - ib_logf(IB_LOG_LEVEL_INFO, - "Resetting space id's in the doublewrite buffer"); - } - - /* Read the pages from the doublewrite buffer to memory */ - - block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; - - os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes); - os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE, - block_bytes); - - /* Check if any of these pages is half-written in data files, in the - intended position */ - - page = buf; - - for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * TRX_SYS_DOUBLEWRITE_BLOCKS; i++) { - - ulint source_page_no; - - if (reset_space_ids) { - - space_id = 0; - mach_write_to_4(page - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); - /* We do not need to calculate new checksums for the - pages because the field .._SPACE_ID does not affect - them. Write the page back to where we read it from. */ - - if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - source_page_no = block1 + i; - } else { - source_page_no = block2 - + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; - } - - os_file_write(path, file, page, - source_page_no * UNIV_PAGE_SIZE, - UNIV_PAGE_SIZE); - } else if (load_corrupt_pages - && !buf_page_is_zeroes(page, FIL_PAGE_DATA)) { - /* Each valid page header must contain some - nonzero bytes, such as FIL_PAGE_OFFSET - or FIL_PAGE_LSN. */ - recv_dblwr.add(page); - } - - page += UNIV_PAGE_SIZE; - } - - if (reset_space_ids) { - os_file_flush(file); - } - -leave_func: - ut_free(unaligned_read_buf); -} - -/****************************************************************//** -Process the double write buffer pages. */ -void -buf_dblwr_process() -/*===============*/ -{ - ulint space_id; - ulint page_no; - ulint page_no_dblwr = 0; - byte* page; - byte* read_buf; - byte* unaligned_read_buf; - recv_dblwr_t& recv_dblwr = recv_sys->dblwr; - - if (!buf_dblwr) { - return; - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Restoring possible half-written data pages " - "from the doublewrite buffer..."); - - unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); - - read_buf = static_cast<byte*>( - ut_align(unaligned_read_buf, UNIV_PAGE_SIZE)); - - for (std::list<byte*>::iterator i = recv_dblwr.pages.begin(); - i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) { - page = *i; - page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); - space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID); - - FilSpace space(space_id, true); - - if (!space()) { - /* Maybe we have dropped the single-table tablespace - and this page once belonged to it: do nothing */ - continue; - } - - if (!fil_check_adress_in_tablespace(space_id, page_no)) { - ib_logf(IB_LOG_LEVEL_WARN, - "A copy of page " ULINTPF ":" ULINTPF - " in the doublewrite buffer slot " ULINTPF - " is not within space bounds", - space_id, page_no, page_no_dblwr); - continue; - } - - ulint zip_size = fsp_flags_get_zip_size(space()->flags); - ut_ad(!buf_page_is_zeroes(page, zip_size)); - - /* Read in the actual page from the file */ - fil_io(OS_FILE_READ, - true, - space_id, - zip_size, - page_no, - 0, - zip_size ? zip_size : UNIV_PAGE_SIZE, - read_buf, - NULL, - 0); - - const bool is_all_zero = buf_page_is_zeroes( - read_buf, zip_size); - - if (is_all_zero) { - /* We will check if the copy in the - doublewrite buffer is valid. If not, we will - ignore this page (there should be redo log - records to initialize it). */ - } else { - if (fil_page_is_compressed_encrypted(read_buf) || - fil_page_is_compressed(read_buf)) { - /* Decompress the page before - validating the checksum. */ - fil_decompress_page( - NULL, read_buf, srv_page_size, - NULL, true); - } - - if (fil_space_verify_crypt_checksum( - read_buf, zip_size, NULL, page_no) - || !buf_page_is_corrupted( - true, read_buf, zip_size, space())) { - /* The page is good; there is no need - to consult the doublewrite buffer. */ - continue; - } - - /* We intentionally skip this message for - is_all_zero pages. */ - ib_logf(IB_LOG_LEVEL_INFO, - "Trying to recover page " ULINTPF ":" ULINTPF - " from the doublewrite buffer.", - space_id, page_no); - } - - /* Next, validate the doublewrite page. */ - if (fil_page_is_compressed_encrypted(page) || - fil_page_is_compressed(page)) { - /* Decompress the page before - validating the checksum. */ - fil_decompress_page( - NULL, page, srv_page_size, NULL, true); - } - - if (!fil_space_verify_crypt_checksum(page, zip_size, NULL, page_no) - && buf_page_is_corrupted(true, page, zip_size, space)) { - if (!is_all_zero) { - ib_logf(IB_LOG_LEVEL_WARN, - "A doublewrite copy of page " - ULINTPF ":" ULINTPF " is corrupted.", - space_id, page_no); - } - /* Theoretically we could have another good - copy for this page in the doublewrite - buffer. If not, we will report a fatal error - for a corrupted page somewhere else if that - page was truly needed. */ - continue; - } - - if (page_no == 0) { - /* Check the FSP_SPACE_FLAGS. */ - ulint flags = fsp_header_get_flags(page); - if (!fsp_flags_is_valid(flags) - && fsp_flags_convert_from_101(flags) - == ULINT_UNDEFINED) { - ib_logf(IB_LOG_LEVEL_WARN, - "Ignoring a doublewrite copy of page " - ULINTPF ":0 due to invalid flags 0x%x", - space_id, int(flags)); - continue; - } - /* The flags on the page should be converted later. */ - } - - /* Write the good page from the doublewrite buffer to - the intended position. */ - - fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0, - zip_size ? zip_size : UNIV_PAGE_SIZE, - page, NULL, 0); - - ib_logf(IB_LOG_LEVEL_INFO, - "Recovered page " ULINTPF ":" ULINTPF " from" - " the doublewrite buffer.", - space_id, page_no); - } - - ut_free(unaligned_read_buf); - fil_flush_file_spaces(FIL_TABLESPACE); - - { - size_t bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; - byte *unaligned_buf = static_cast<byte*>( - ut_malloc(bytes + UNIV_PAGE_SIZE - 1)); - - byte *buf = static_cast<byte*>( - ut_align(unaligned_buf, UNIV_PAGE_SIZE)); - memset(buf, 0, bytes); - - fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, - buf_dblwr->block1, 0, bytes, buf, NULL, NULL); - fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, - buf_dblwr->block2, 0, bytes, buf, NULL, NULL); - - ut_free(unaligned_buf); - } -} - -/****************************************************************//** -Frees doublewrite buffer. */ -UNIV_INTERN -void -buf_dblwr_free(void) -/*================*/ -{ - /* Free the double write data structures. */ - ut_a(buf_dblwr != NULL); - ut_ad(buf_dblwr->s_reserved == 0); - ut_ad(buf_dblwr->b_reserved == 0); - - os_event_free(buf_dblwr->b_event); - os_event_free(buf_dblwr->s_event); - ut_free(buf_dblwr->write_buf_unaligned); - buf_dblwr->write_buf_unaligned = NULL; - - mem_free(buf_dblwr->buf_block_arr); - buf_dblwr->buf_block_arr = NULL; - - mem_free(buf_dblwr->in_use); - buf_dblwr->in_use = NULL; - - mutex_free(&buf_dblwr->mutex); - mem_free(buf_dblwr); - buf_dblwr = NULL; -} - -/********************************************************************//** -Updates the doublewrite buffer when an IO request is completed. */ -UNIV_INTERN -void -buf_dblwr_update( -/*=============*/ - const buf_page_t* bpage, /*!< in: buffer block descriptor */ - buf_flush_t flush_type)/*!< in: flush type */ -{ - if (!srv_use_doublewrite_buf || buf_dblwr == NULL) { - return; - } - - switch (flush_type) { - case BUF_FLUSH_LIST: - case BUF_FLUSH_LRU: - mutex_enter(&buf_dblwr->mutex); - - ut_ad(buf_dblwr->batch_running); - ut_ad(buf_dblwr->b_reserved > 0); - ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free); - - buf_dblwr->b_reserved--; - - if (buf_dblwr->b_reserved == 0) { - mutex_exit(&buf_dblwr->mutex); - /* This will finish the batch. Sync data files - to the disk. */ - fil_flush_file_spaces(FIL_TABLESPACE); - mutex_enter(&buf_dblwr->mutex); - - /* We can now reuse the doublewrite memory buffer: */ - buf_dblwr->first_free = 0; - buf_dblwr->batch_running = false; - os_event_set(buf_dblwr->b_event); - } - - mutex_exit(&buf_dblwr->mutex); - break; - case BUF_FLUSH_SINGLE_PAGE: - { - const ulint size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; - ulint i; - mutex_enter(&buf_dblwr->mutex); - for (i = srv_doublewrite_batch_size; i < size; ++i) { - if (buf_dblwr->buf_block_arr[i] == bpage) { - buf_dblwr->s_reserved--; - buf_dblwr->buf_block_arr[i] = NULL; - buf_dblwr->in_use[i] = false; - break; - } - } - - /* The block we are looking for must exist as a - reserved block. */ - ut_a(i < size); - } - os_event_set(buf_dblwr->s_event); - mutex_exit(&buf_dblwr->mutex); - break; - case BUF_FLUSH_N_TYPES: - ut_error; - } -} - -/********************************************************************//** -Check the LSN values on the page. */ -static -void -buf_dblwr_check_page_lsn( -/*=====================*/ - const page_t* page) /*!< in: page to check */ -{ - ibool page_compressed = (mach_read_from_2(page+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED); - uint key_version = mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - - /* Ignore page compressed or encrypted pages */ - if (page_compressed || key_version) { - return; - } - - if (memcmp(page + (FIL_PAGE_LSN + 4), - page + (UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), - 4)) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: The page to be written" - " seems corrupt!\n" - "InnoDB: The low 4 bytes of LSN fields do not match " - "(" ULINTPF " != " ULINTPF ")!" - " Noticed in the buffer pool.\n", - mach_read_from_4( - page + FIL_PAGE_LSN + 4), - mach_read_from_4( - page + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)); - } -} - -/********************************************************************//** -Asserts when a corrupt block is find during writing out data to the -disk. */ -static -void -buf_dblwr_assert_on_corrupt_block( -/*==============================*/ - const buf_block_t* block) /*!< in: block to check */ -{ - buf_page_print(block->frame, 0, BUF_PAGE_PRINT_NO_CRASH); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Apparent corruption of an" - " index page n:o %lu in space %lu\n" - "InnoDB: to be written to data file." - " We intentionally crash server\n" - "InnoDB: to prevent corrupt data" - " from ending up in data\n" - "InnoDB: files.\n", - (ulong) buf_block_get_page_no(block), - (ulong) buf_block_get_space(block)); - - ut_error; -} - -/********************************************************************//** -Check the LSN values on the page with which this block is associated. -Also validate the page if the option is set. */ -static -void -buf_dblwr_check_block( -/*==================*/ - const buf_block_t* block) /*!< in: block to check */ -{ - if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE - || block->page.zip.data) { - /* No simple validate for compressed pages exists. */ - return; - } - - buf_dblwr_check_page_lsn(block->frame); - - if (!block->check_index_page_at_flush) { - return; - } - - if (page_is_comp(block->frame)) { - if (!page_simple_validate_new(block->frame)) { - buf_dblwr_assert_on_corrupt_block(block); - } - } else if (!page_simple_validate_old(block->frame)) { - - buf_dblwr_assert_on_corrupt_block(block); - } -} - -/********************************************************************//** -Writes a page that has already been written to the doublewrite buffer -to the datafile. It is the job of the caller to sync the datafile. */ -static -void -buf_dblwr_write_block_to_datafile( -/*==============================*/ - const buf_page_t* bpage, /*!< in: page to write */ - bool sync) /*!< in: true if sync IO - is requested */ -{ - ut_a(bpage); - ut_a(buf_page_in_file(bpage)); - - const ulint flags = sync - ? OS_FILE_WRITE - : OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER; - - void * frame = buf_page_get_frame(bpage); - - if (bpage->zip.data) { - fil_io(flags, - sync, - buf_page_get_space(bpage), - buf_page_get_zip_size(bpage), - buf_page_get_page_no(bpage), - 0, - buf_page_get_zip_size(bpage), - frame, - (void*) bpage, - 0); - - return; - } - - - const buf_block_t* block = (buf_block_t*) bpage; - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - buf_dblwr_check_page_lsn(block->frame); - - fil_io(flags, - sync, - buf_block_get_space(block), - 0, - buf_block_get_page_no(block), - 0, - bpage->real_size, - frame, - (void*) block, - (ulint *)&bpage->write_size); -} - -/********************************************************************//** -Flushes possible buffered writes from the doublewrite memory buffer to disk, -and also wakes up the aio thread if simulated aio is used. It is very -important to call this function after a batch of writes has been posted, -and also when we may have to wait for a page latch! Otherwise a deadlock -of threads can occur. */ -UNIV_INTERN -void -buf_dblwr_flush_buffered_writes(void) -/*=================================*/ -{ - byte* write_buf; - ulint first_free; - ulint len; - - if (!srv_use_doublewrite_buf || buf_dblwr == NULL) { - /* Sync the writes to the disk. */ - buf_dblwr_sync_datafiles(); - return; - } - -try_again: - mutex_enter(&buf_dblwr->mutex); - - /* Write first to doublewrite buffer blocks. We use synchronous - aio and thus know that file write has been completed when the - control returns. */ - - if (buf_dblwr->first_free == 0) { - - mutex_exit(&buf_dblwr->mutex); - - return; - } - - if (buf_dblwr->batch_running) { - /* Another thread is running the batch right now. Wait - for it to finish. */ - ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event); - mutex_exit(&buf_dblwr->mutex); - - os_event_wait_low(buf_dblwr->b_event, sig_count); - goto try_again; - } - - ut_a(!buf_dblwr->batch_running); - ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved); - - /* Disallow anyone else to post to doublewrite buffer or to - start another batch of flushing. */ - buf_dblwr->batch_running = true; - first_free = buf_dblwr->first_free; - - /* Now safe to release the mutex. Note that though no other - thread is allowed to post to the doublewrite batch flushing - but any threads working on single page flushes are allowed - to proceed. */ - mutex_exit(&buf_dblwr->mutex); - - write_buf = buf_dblwr->write_buf; - - for (ulint len2 = 0, i = 0; - i < buf_dblwr->first_free; - len2 += UNIV_PAGE_SIZE, i++) { - - const buf_block_t* block; - - block = (buf_block_t*) buf_dblwr->buf_block_arr[i]; - - if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE - || block->page.zip.data) { - /* No simple validate for compressed - pages exists. */ - continue; - } - - /* Check that the actual page in the buffer pool is - not corrupt and the LSN values are sane. */ - buf_dblwr_check_block(block); - - /* Check that the page as written to the doublewrite - buffer has sane LSN values. */ - buf_dblwr_check_page_lsn(write_buf + len2); - } - - /* Write out the first block of the doublewrite buffer */ - len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE, - buf_dblwr->first_free) * UNIV_PAGE_SIZE; - - fil_io(OS_FILE_WRITE, - true, - TRX_SYS_SPACE, - 0, - buf_dblwr->block1, - 0, - len, - (void*) - write_buf, - NULL, - 0); - - if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - /* No unwritten pages in the second block. */ - goto flush; - } - - /* Write out the second block of the doublewrite buffer. */ - len = (buf_dblwr->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) - * UNIV_PAGE_SIZE; - - write_buf = buf_dblwr->write_buf - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; - - fil_io(OS_FILE_WRITE, - true, - TRX_SYS_SPACE, - 0, - buf_dblwr->block2, - 0, - len, - (void*) write_buf, - NULL, - 0); - -flush: - /* increment the doublewrite flushed pages counter */ - srv_stats.dblwr_pages_written.add(buf_dblwr->first_free); - srv_stats.dblwr_writes.inc(); - - /* Now flush the doublewrite buffer data to disk */ - fil_flush(ulint(TRX_SYS_SPACE)); - - /* We know that the writes have been flushed to disk now - and in recovery we will find them in the doublewrite buffer - blocks. Next do the writes to the intended positions. */ - - /* Up to this point first_free and buf_dblwr->first_free are - same because we have set the buf_dblwr->batch_running flag - disallowing any other thread to post any request but we - can't safely access buf_dblwr->first_free in the loop below. - This is so because it is possible that after we are done with - the last iteration and before we terminate the loop, the batch - gets finished in the IO helper thread and another thread posts - a new batch setting buf_dblwr->first_free to a higher value. - If this happens and we are using buf_dblwr->first_free in the - loop termination condition then we'll end up dispatching - the same block twice from two different threads. */ - ut_ad(first_free == buf_dblwr->first_free); - for (ulint i = 0; i < first_free; i++) { - buf_dblwr_write_block_to_datafile( - buf_dblwr->buf_block_arr[i], false); - } - - /* Wake possible simulated aio thread to actually post the - writes to the operating system. We don't flush the files - at this point. We leave it to the IO helper thread to flush - datafiles when the whole batch has been processed. */ - os_aio_simulated_wake_handler_threads(); -} - -/********************************************************************//** -Posts a buffer page for writing. If the doublewrite memory buffer is -full, calls buf_dblwr_flush_buffered_writes and waits for for free -space to appear. */ -UNIV_INTERN -void -buf_dblwr_add_to_batch( -/*====================*/ - buf_page_t* bpage) /*!< in: buffer block to write */ -{ - ulint zip_size; - - ut_a(buf_page_in_file(bpage)); - ut_ad(!mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex)); - -try_again: - mutex_enter(&buf_dblwr->mutex); - - ut_a(buf_dblwr->first_free <= srv_doublewrite_batch_size); - - if (buf_dblwr->batch_running) { - - /* This not nearly as bad as it looks. There is only - page_cleaner thread which does background flushing - in batches therefore it is unlikely to be a contention - point. The only exception is when a user thread is - forced to do a flush batch because of a sync - checkpoint. */ - ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event); - mutex_exit(&buf_dblwr->mutex); - - os_event_wait_low(buf_dblwr->b_event, sig_count); - goto try_again; - } - - if (buf_dblwr->first_free == srv_doublewrite_batch_size) { - mutex_exit(&(buf_dblwr->mutex)); - - buf_dblwr_flush_buffered_writes(); - - goto try_again; - } - - zip_size = buf_page_get_zip_size(bpage); - void * frame = buf_page_get_frame(bpage); - - if (zip_size) { - UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size); - /* Copy the compressed page and clear the rest. */ - memcpy(buf_dblwr->write_buf - + UNIV_PAGE_SIZE * buf_dblwr->first_free, - frame, zip_size); - memset(buf_dblwr->write_buf - + UNIV_PAGE_SIZE * buf_dblwr->first_free - + zip_size, 0, UNIV_PAGE_SIZE - zip_size); - } else { - ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame, - UNIV_PAGE_SIZE); - - memcpy(buf_dblwr->write_buf - + UNIV_PAGE_SIZE * buf_dblwr->first_free, - frame, UNIV_PAGE_SIZE); - } - - buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage; - - buf_dblwr->first_free++; - buf_dblwr->b_reserved++; - - ut_ad(!buf_dblwr->batch_running); - ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved); - ut_ad(buf_dblwr->b_reserved <= srv_doublewrite_batch_size); - - if (buf_dblwr->first_free == srv_doublewrite_batch_size) { - mutex_exit(&(buf_dblwr->mutex)); - - buf_dblwr_flush_buffered_writes(); - - return; - } - - mutex_exit(&(buf_dblwr->mutex)); -} - -/********************************************************************//** -Writes a page to the doublewrite buffer on disk, sync it, then write -the page to the datafile and sync the datafile. This function is used -for single page flushes. If all the buffers allocated for single page -flushes in the doublewrite buffer are in use we wait here for one to -become free. We are guaranteed that a slot will become free because any -thread that is using a slot must also release the slot before leaving -this function. */ -UNIV_INTERN -void -buf_dblwr_write_single_page( -/*========================*/ - buf_page_t* bpage, /*!< in: buffer block to write */ - bool sync) /*!< in: true if sync IO requested */ -{ - ulint n_slots; - ulint size; - ulint zip_size; - ulint offset; - ulint i; - - ut_a(buf_page_in_file(bpage)); - ut_a(srv_use_doublewrite_buf); - ut_a(buf_dblwr != NULL); - - /* total number of slots available for single page flushes - starts from srv_doublewrite_batch_size to the end of the - buffer. */ - size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; - ut_a(size > srv_doublewrite_batch_size); - n_slots = size - srv_doublewrite_batch_size; - - if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) { - - /* Check that the actual page in the buffer pool is - not corrupt and the LSN values are sane. */ - buf_dblwr_check_block((buf_block_t*) bpage); - - /* Check that the page as written to the doublewrite - buffer has sane LSN values. */ - if (!bpage->zip.data) { - buf_dblwr_check_page_lsn( - ((buf_block_t*) bpage)->frame); - } - } - -retry: - mutex_enter(&buf_dblwr->mutex); - if (buf_dblwr->s_reserved == n_slots) { - - /* All slots are reserved. */ - ib_int64_t sig_count = - os_event_reset(buf_dblwr->s_event); - mutex_exit(&buf_dblwr->mutex); - os_event_wait_low(buf_dblwr->s_event, sig_count); - - goto retry; - } - - for (i = srv_doublewrite_batch_size; i < size; ++i) { - - if (!buf_dblwr->in_use[i]) { - break; - } - } - - /* We are guaranteed to find a slot. */ - ut_a(i < size); - buf_dblwr->in_use[i] = true; - buf_dblwr->s_reserved++; - buf_dblwr->buf_block_arr[i] = bpage; - - /* increment the doublewrite flushed pages counter */ - srv_stats.dblwr_pages_written.inc(); - srv_stats.dblwr_writes.inc(); - - mutex_exit(&buf_dblwr->mutex); - - /* Lets see if we are going to write in the first or second - block of the doublewrite buffer. */ - if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - offset = buf_dblwr->block1 + i; - } else { - offset = buf_dblwr->block2 + i - - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; - } - - /* We deal with compressed and uncompressed pages a little - differently here. In case of uncompressed pages we can - directly write the block to the allocated slot in the - doublewrite buffer in the system tablespace and then after - syncing the system table space we can proceed to write the page - in the datafile. - In case of compressed page we first do a memcpy of the block - to the in-memory buffer of doublewrite before proceeding to - write it. This is so because we want to pad the remaining - bytes in the doublewrite page with zeros. */ - - zip_size = buf_page_get_zip_size(bpage); - void * frame = buf_page_get_frame(bpage); - - if (zip_size) { - memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i, - frame, zip_size); - memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i - + zip_size, 0, UNIV_PAGE_SIZE - zip_size); - - fil_io(OS_FILE_WRITE, - true, - TRX_SYS_SPACE, - 0, - offset, - 0, - UNIV_PAGE_SIZE, - (void*) (buf_dblwr->write_buf + UNIV_PAGE_SIZE * i), - NULL, - 0); - } else { - /* It is a regular page. Write it directly to the - doublewrite buffer */ - fil_io(OS_FILE_WRITE, - true, - TRX_SYS_SPACE, - 0, - offset, - 0, - bpage->real_size, - frame, - NULL, - 0); - } - - /* Now flush the doublewrite buffer data to disk */ - fil_flush(ulint(TRX_SYS_SPACE)); - - /* We know that the write has been flushed to disk now - and during recovery we will find it in the doublewrite buffer - blocks. Next do the write to the intended position. */ - buf_dblwr_write_block_to_datafile(bpage, sync); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc deleted file mode 100644 index 71b97b770e1..00000000000 --- a/storage/xtradb/buf/buf0dump.cc +++ /dev/null @@ -1,732 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0dump.cc -Implements a buffer pool dump/load. - -Created April 08, 2011 Vasil Dimov -*******************************************************/ - -#include "univ.i" - -#include <stdarg.h> /* va_* */ -#include <string.h> /* strerror() */ - -#include "buf0buf.h" /* srv_buf_pool_instances */ -#include "buf0dump.h" -#include "db0err.h" -#include "dict0dict.h" /* dict_operation_lock */ -#include "os0file.h" /* OS_FILE_MAX_PATH */ -#include "os0sync.h" /* os_event* */ -#include "os0thread.h" /* os_thread_* */ -#include "srv0srv.h" /* srv_fast_shutdown, srv_buf_dump* */ -#include "srv0start.h" /* srv_shutdown_state */ -#include "sync0rw.h" /* rw_lock_s_lock() */ -#include "ut0byte.h" /* ut_ull_create() */ -#include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */ - -enum status_severity { - STATUS_INFO, - STATUS_NOTICE, - STATUS_ERR -}; - -#define SHUTTING_DOWN() (UNIV_UNLIKELY(srv_shutdown_state \ - != SRV_SHUTDOWN_NONE)) - -/* Flags that tell the buffer pool dump/load thread which action should it -take after being waked up. */ -static volatile bool buf_dump_should_start; -static volatile bool buf_load_should_start; - -static ibool buf_load_abort_flag = FALSE; - -/* Used to temporary store dump info in order to avoid IO while holding -buffer pool LRU list mutex during dump and also to sort the contents of the -dump before reading the pages from disk during load. -We store the space id in the high 32 bits and page no in low 32 bits. */ -typedef ib_uint64_t buf_dump_t; - -/* Aux macros to create buf_dump_t and to extract space and page from it */ -#define BUF_DUMP_CREATE(space, page) ut_ull_create(space, page) -#define BUF_DUMP_SPACE(a) ((ulint) ((a) >> 32)) -#define BUF_DUMP_PAGE(a) ((ulint) ((a) & 0xFFFFFFFFUL)) - -/*****************************************************************//** -Wakes up the buffer pool dump/load thread and instructs it to start -a dump. This function is called by MySQL code via buffer_pool_dump_now() -and it should return immediately because the whole MySQL is frozen during -its execution. */ -UNIV_INTERN -void -buf_dump_start() -/*============*/ -{ - buf_dump_should_start = true; - os_event_set(srv_buf_dump_event); -} - -/*****************************************************************//** -Wakes up the buffer pool dump/load thread and instructs it to start -a load. This function is called by MySQL code via buffer_pool_load_now() -and it should return immediately because the whole MySQL is frozen during -its execution. */ -UNIV_INTERN -void -buf_load_start() -/*============*/ -{ - buf_load_should_start = true; - os_event_set(srv_buf_dump_event); -} - -/*****************************************************************//** -Sets the global variable that feeds MySQL's innodb_buffer_pool_dump_status -to the specified string. The format and the following parameters are the -same as the ones used for printf(3). The value of this variable can be -retrieved by: -SELECT variable_value FROM information_schema.global_status WHERE -variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS'; -or by: -SHOW STATUS LIKE 'innodb_buffer_pool_dump_status'; */ -static MY_ATTRIBUTE((nonnull, format(printf, 2, 3))) -void -buf_dump_status( -/*============*/ - enum status_severity severity,/*!< in: status severity */ - const char* fmt, /*!< in: format */ - ...) /*!< in: extra parameters according - to fmt */ -{ - va_list ap; - - va_start(ap, fmt); - - ut_vsnprintf( - export_vars.innodb_buffer_pool_dump_status, - sizeof(export_vars.innodb_buffer_pool_dump_status), - fmt, ap); - - ib_logf((ib_log_level_t) severity, "%s", export_vars.innodb_buffer_pool_dump_status); - - va_end(ap); -} - -/*****************************************************************//** -Sets the global variable that feeds MySQL's innodb_buffer_pool_load_status -to the specified string. The format and the following parameters are the -same as the ones used for printf(3). The value of this variable can be -retrieved by: -SELECT variable_value FROM information_schema.global_status WHERE -variable_name = 'INNODB_BUFFER_POOL_LOAD_STATUS'; -or by: -SHOW STATUS LIKE 'innodb_buffer_pool_load_status'; */ -static MY_ATTRIBUTE((nonnull, format(printf, 2, 3))) -void -buf_load_status( -/*============*/ - enum status_severity severity,/*!< in: status severity */ - const char* fmt, /*!< in: format */ - ...) /*!< in: extra parameters according to fmt */ -{ - va_list ap; - - va_start(ap, fmt); - - ut_vsnprintf( - export_vars.innodb_buffer_pool_load_status, - sizeof(export_vars.innodb_buffer_pool_load_status), - fmt, ap); - - if (severity == STATUS_NOTICE || severity == STATUS_ERR) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: %s\n", - export_vars.innodb_buffer_pool_load_status); - } - - va_end(ap); -} - -/** Returns the directory path where the buffer pool dump file will be created. -@return directory path */ -static -const char* -get_buf_dump_dir() -{ - const char* dump_dir; - - /* The dump file should be created in the default data directory if - innodb_data_home_dir is set as an empty string. */ - if (strcmp(srv_data_home, "") == 0) { - dump_dir = fil_path_to_mysql_datadir; - } else { - dump_dir = srv_data_home; - } - - return(dump_dir); -} - -/*****************************************************************//** -Perform a buffer pool dump into the file specified by -innodb_buffer_pool_filename. If any errors occur then the value of -innodb_buffer_pool_dump_status will be set accordingly, see buf_dump_status(). -The dump filename can be specified by (relative to srv_data_home): -SET GLOBAL innodb_buffer_pool_filename='filename'; */ -static -void -buf_dump( -/*=====*/ - ibool obey_shutdown) /*!< in: quit if we are in a shutting down - state */ -{ -#define SHOULD_QUIT() (SHUTTING_DOWN() && obey_shutdown) - - char full_filename[OS_FILE_MAX_PATH]; - char tmp_filename[OS_FILE_MAX_PATH]; - char now[32]; - FILE* f; - ulint i; - int ret; - - ut_snprintf(full_filename, sizeof(full_filename), - "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR, - srv_buf_dump_filename); - - ut_snprintf(tmp_filename, sizeof(tmp_filename), - "%s.incomplete", full_filename); - - buf_dump_status(STATUS_NOTICE, "Dumping buffer pool(s) to %s", - full_filename); - - f = fopen(tmp_filename, "w"); - if (f == NULL) { - buf_dump_status(STATUS_ERR, - "Cannot open '%s' for writing: %s", - tmp_filename, strerror(errno)); - return; - } - /* else */ - - /* walk through each buffer pool */ - for (i = 0; i < srv_buf_pool_instances && !SHOULD_QUIT(); i++) { - buf_pool_t* buf_pool; - const buf_page_t* bpage; - buf_dump_t* dump; - ulint n_pages; - ulint j; - ulint limit; - ulint counter; - - buf_pool = buf_pool_from_array(i); - - /* obtain buf_pool LRU list mutex before allocate, since - UT_LIST_GET_LEN(buf_pool->LRU) could change */ - mutex_enter(&buf_pool->LRU_list_mutex); - - n_pages = UT_LIST_GET_LEN(buf_pool->LRU); - - /* skip empty buffer pools */ - if (n_pages == 0) { - mutex_exit(&buf_pool->LRU_list_mutex); - continue; - } - - if (srv_buf_pool_dump_pct != 100) { - ut_ad(srv_buf_pool_dump_pct < 100); - - n_pages = n_pages * srv_buf_pool_dump_pct / 100; - - if (n_pages == 0) { - n_pages = 1; - } - } - - dump = static_cast<buf_dump_t*>( - ut_malloc(n_pages * sizeof(*dump))) ; - - if (dump == NULL) { - mutex_exit(&buf_pool->LRU_list_mutex); - fclose(f); - buf_dump_status(STATUS_ERR, - "Cannot allocate " ULINTPF " bytes: %s", - (ulint) (n_pages * sizeof(*dump)), - strerror(errno)); - /* leave tmp_filename to exist */ - return; - } - - for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU), j = 0; - bpage != NULL && j < n_pages; - bpage = UT_LIST_GET_NEXT(LRU, bpage), j++) { - - ut_a(buf_page_in_file(bpage)); - - dump[j] = BUF_DUMP_CREATE(buf_page_get_space(bpage), - buf_page_get_page_no(bpage)); - } - - ut_a(j == n_pages); - - mutex_exit(&buf_pool->LRU_list_mutex); - - limit = (ulint)((double)n_pages * ((double)srv_buf_dump_status_frequency / (double)100)); - counter = 0; - - for (j = 0; j < n_pages && !SHOULD_QUIT(); j++) { - ret = fprintf(f, ULINTPF "," ULINTPF "\n", - BUF_DUMP_SPACE(dump[j]), - BUF_DUMP_PAGE(dump[j])); - if (ret < 0) { - ut_free(dump); - fclose(f); - buf_dump_status(STATUS_ERR, - "Cannot write to '%s': %s", - tmp_filename, strerror(errno)); - /* leave tmp_filename to exist */ - return; - } - - counter++; - - /* Print buffer pool dump status only if - srv_buf_dump_status_frequency is > 0 and - we have processed that amount of pages. */ - if (srv_buf_dump_status_frequency && - counter == limit) { - counter = 0; - buf_dump_status( - STATUS_INFO, - "Dumping buffer pool " - ULINTPF "/" ULINTPF ", " - "page " ULINTPF "/" ULINTPF, - i + 1, srv_buf_pool_instances, - j + 1, n_pages); - } - } - - ut_free(dump); - } - - ret = fclose(f); - if (ret != 0) { - buf_dump_status(STATUS_ERR, - "Cannot close '%s': %s", - tmp_filename, strerror(errno)); - return; - } - /* else */ - - ret = unlink(full_filename); - if (ret != 0 && errno != ENOENT) { - buf_dump_status(STATUS_ERR, - "Cannot delete '%s': %s", - full_filename, strerror(errno)); - /* leave tmp_filename to exist */ - return; - } - /* else */ - - ret = rename(tmp_filename, full_filename); - if (ret != 0) { - buf_dump_status(STATUS_ERR, - "Cannot rename '%s' to '%s': %s", - tmp_filename, full_filename, - strerror(errno)); - /* leave tmp_filename to exist */ - return; - } - /* else */ - - /* success */ - - ut_sprintf_timestamp(now); - - buf_dump_status(STATUS_NOTICE, - "Buffer pool(s) dump completed at %s", now); -} - -/*****************************************************************//** -Compare two buffer pool dump entries, used to sort the dump on -space_no,page_no before loading in order to increase the chance for -sequential IO. -@return -1/0/1 if entry 1 is smaller/equal/bigger than entry 2 */ -static -lint -buf_dump_cmp( -/*=========*/ - const buf_dump_t d1, /*!< in: buffer pool dump entry 1 */ - const buf_dump_t d2) /*!< in: buffer pool dump entry 2 */ -{ - if (d1 < d2) { - return(-1); - } else if (d1 == d2) { - return(0); - } else { - return(1); - } -} - -/*****************************************************************//** -Sort a buffer pool dump on space_no, page_no. */ -static -void -buf_dump_sort( -/*==========*/ - buf_dump_t* dump, /*!< in/out: buffer pool dump to sort */ - buf_dump_t* tmp, /*!< in/out: temp storage */ - ulint low, /*!< in: lowest index (inclusive) */ - ulint high) /*!< in: highest index (non-inclusive) */ -{ - UT_SORT_FUNCTION_BODY(buf_dump_sort, dump, tmp, low, high, - buf_dump_cmp); -} - -/*****************************************************************//** -Artificially delay the buffer pool loading if necessary. The idea of -this function is to prevent hogging the server with IO and slowing down -too much normal client queries. */ -UNIV_INLINE -void -buf_load_throttle_if_needed( -/*========================*/ - ulint* last_check_time, /*!< in/out: miliseconds since epoch - of the last time we did check if - throttling is needed, we do the check - every srv_io_capacity IO ops. */ - ulint* last_activity_count, - ulint n_io) /*!< in: number of IO ops done since - buffer pool load has started */ -{ - if (n_io % srv_io_capacity < srv_io_capacity - 1) { - return; - } - - if (*last_check_time == 0 || *last_activity_count == 0) { - *last_check_time = ut_time_ms(); - *last_activity_count = srv_get_activity_count(); - return; - } - - /* srv_io_capacity IO operations have been performed by buffer pool - load since the last time we were here. */ - - /* If no other activity, then keep going without any delay. */ - if (srv_get_activity_count() == *last_activity_count) { - return; - } - - /* There has been other activity, throttle. */ - - ulint now = ut_time_ms(); - ulint elapsed_time = now - *last_check_time; - - /* Notice that elapsed_time is not the time for the last - srv_io_capacity IO operations performed by BP load. It is the - time elapsed since the last time we detected that there has been - other activity. This has a small and acceptable deficiency, e.g.: - 1. BP load runs and there is no other activity. - 2. Other activity occurs, we run N IO operations after that and - enter here (where 0 <= N < srv_io_capacity). - 3. last_check_time is very old and we do not sleep at this time, but - only update last_check_time and last_activity_count. - 4. We run srv_io_capacity more IO operations and call this function - again. - 5. There has been more other activity and thus we enter here. - 6. Now last_check_time is recent and we sleep if necessary to prevent - more than srv_io_capacity IO operations per second. - The deficiency is that we could have slept at 3., but for this we - would have to update last_check_time before the - "cur_activity_count == *last_activity_count" check and calling - ut_time_ms() that often may turn out to be too expensive. */ - - if (elapsed_time < 1000 /* 1 sec (1000 mili secs) */) { - os_thread_sleep((1000 - elapsed_time) * 1000 /* micro secs */); - } - - *last_check_time = ut_time_ms(); - *last_activity_count = srv_get_activity_count(); -} - -/*****************************************************************//** -Perform a buffer pool load from the file specified by -innodb_buffer_pool_filename. If any errors occur then the value of -innodb_buffer_pool_load_status will be set accordingly, see buf_load_status(). -The dump filename can be specified by (relative to srv_data_home): -SET GLOBAL innodb_buffer_pool_filename='filename'; */ -static -void -buf_load() -/*======*/ -{ - char full_filename[OS_FILE_MAX_PATH]; - char now[32]; - FILE* f; - buf_dump_t* dump; - buf_dump_t* dump_tmp; - ulint dump_n; - ulint total_buffer_pools_pages; - ulint i; - ulint space_id; - ulint page_no; - int fscanf_ret; - - /* Ignore any leftovers from before */ - buf_load_abort_flag = FALSE; - - ut_snprintf(full_filename, sizeof(full_filename), - "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR, - srv_buf_dump_filename); - - buf_load_status(STATUS_NOTICE, - "Loading buffer pool(s) from %s", full_filename); - - f = fopen(full_filename, "r"); - if (f == NULL) { - buf_load_status(STATUS_ERR, - "Cannot open '%s' for reading: %s", - full_filename, strerror(errno)); - return; - } - /* else */ - - /* First scan the file to estimate how many entries are in it. - This file is tiny (approx 500KB per 1GB buffer pool), reading it - two times is fine. */ - dump_n = 0; - while (fscanf(f, ULINTPF "," ULINTPF, &space_id, &page_no) == 2 - && !SHUTTING_DOWN()) { - dump_n++; - } - - if (!SHUTTING_DOWN() && !feof(f)) { - /* fscanf() returned != 2 */ - const char* what; - if (ferror(f)) { - what = "reading"; - } else { - what = "parsing"; - } - fclose(f); - buf_load_status(STATUS_ERR, "Error %s '%s', " - "unable to load buffer pool (stage 1)", - what, full_filename); - return; - } - - /* If dump is larger than the buffer pool(s), then we ignore the - extra trailing. This could happen if a dump is made, then buffer - pool is shrunk and then load it attempted. */ - total_buffer_pools_pages = buf_pool_get_n_pages() - * srv_buf_pool_instances; - if (dump_n > total_buffer_pools_pages) { - dump_n = total_buffer_pools_pages; - } - - dump = static_cast<buf_dump_t*>(ut_malloc(dump_n * sizeof(*dump))); - - if (dump == NULL) { - fclose(f); - buf_load_status(STATUS_ERR, - "Cannot allocate " ULINTPF " bytes: %s", - (ulint) (dump_n * sizeof(*dump)), - strerror(errno)); - return; - } - - dump_tmp = static_cast<buf_dump_t*>( - ut_malloc(dump_n * sizeof(*dump_tmp))); - - if (dump_tmp == NULL) { - ut_free(dump); - fclose(f); - buf_load_status(STATUS_ERR, - "Cannot allocate " ULINTPF " bytes: %s", - (ulint) (dump_n * sizeof(*dump_tmp)), - strerror(errno)); - return; - } - - rewind(f); - - for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) { - fscanf_ret = fscanf(f, ULINTPF "," ULINTPF, - &space_id, &page_no); - - if (fscanf_ret != 2) { - if (feof(f)) { - break; - } - /* else */ - - ut_free(dump); - ut_free(dump_tmp); - fclose(f); - buf_load_status(STATUS_ERR, - "Error parsing '%s', unable " - "to load buffer pool (stage 2)", - full_filename); - return; - } - - if (space_id > ULINT32_MASK || page_no > ULINT32_MASK) { - ut_free(dump); - ut_free(dump_tmp); - fclose(f); - buf_load_status(STATUS_ERR, - "Error parsing '%s': bogus " - "space,page " ULINTPF "," ULINTPF - " at line " ULINTPF ", " - "unable to load buffer pool", - full_filename, - space_id, page_no, - i); - return; - } - - dump[i] = BUF_DUMP_CREATE(space_id, page_no); - } - - /* Set dump_n to the actual number of initialized elements, - i could be smaller than dump_n here if the file got truncated after - we read it the first time. */ - dump_n = i; - - fclose(f); - - if (dump_n == 0) { - ut_free(dump); - ut_free(dump_tmp); - ut_sprintf_timestamp(now); - buf_load_status(STATUS_NOTICE, - "Buffer pool(s) load completed at %s " - "(%s was empty)", now, full_filename); - return; - } - - if (!SHUTTING_DOWN()) { - buf_dump_sort(dump, dump_tmp, 0, dump_n); - } - - ut_free(dump_tmp); - - ulint last_check_time = 0; - ulint last_activity_cnt = 0; - - for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) { - - buf_read_page_async(BUF_DUMP_SPACE(dump[i]), - BUF_DUMP_PAGE(dump[i])); - - if (i % 64 == 63) { - os_aio_simulated_wake_handler_threads(); - } - - if (i % 128 == 0) { - buf_load_status(STATUS_INFO, - "Loaded " ULINTPF "/" ULINTPF " pages", - i + 1, dump_n); - } - - if (buf_load_abort_flag) { - buf_load_abort_flag = FALSE; - ut_free(dump); - buf_load_status( - STATUS_NOTICE, - "Buffer pool(s) load aborted on request"); - return; - } - - buf_load_throttle_if_needed( - &last_check_time, &last_activity_cnt, i); - } - - ut_free(dump); - - ut_sprintf_timestamp(now); - - buf_load_status(STATUS_NOTICE, - "Buffer pool(s) load completed at %s", now); -} - -/*****************************************************************//** -Aborts a currently running buffer pool load. This function is called by -MySQL code via buffer_pool_load_abort() and it should return immediately -because the whole MySQL is frozen during its execution. */ -UNIV_INTERN -void -buf_load_abort() -/*============*/ -{ - buf_load_abort_flag = TRUE; -} - -/*****************************************************************//** -This is the main thread for buffer pool dump/load. It waits for an -event and when waked up either performs a dump or load and sleeps -again. -@return this function does not return, it calls os_thread_exit() */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(buf_dump_thread)(void*) -{ - my_thread_init(); - ut_ad(!srv_read_only_mode); - - buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) not yet started"); - buf_load_status(STATUS_INFO, "Loading buffer pool(s) not yet started"); - - if (srv_buffer_pool_load_at_startup) { - buf_load(); - } - - while (!SHUTTING_DOWN()) { - - os_event_wait(srv_buf_dump_event); - - if (buf_dump_should_start) { - buf_dump_should_start = false; - buf_dump(TRUE /* quit on shutdown */); - } - - if (buf_load_should_start) { - buf_load_should_start = false; - buf_load(); - } - - if (buf_dump_should_start || buf_load_should_start) { - continue; - } - os_event_reset(srv_buf_dump_event); - } - - if (srv_buffer_pool_dump_at_shutdown && srv_fast_shutdown != 2) { - buf_dump(FALSE /* ignore shutdown down flag, - keep going even if we are in a shutdown state */); - } - - srv_buf_dump_thread_active = false; - - my_thread_end(); - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc deleted file mode 100644 index 84eea3bc692..00000000000 --- a/storage/xtradb/buf/buf0flu.cc +++ /dev/null @@ -1,3133 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. -Copyright (c) 2013, 2014, Fusion-io - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0flu.cc -The database buffer buf_pool flush algorithm - -Created 11/11/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0flu.h" - -#ifdef UNIV_NONINL -#include "buf0flu.ic" -#endif - -#include "buf0buf.h" -#include "buf0mtflu.h" -#include "buf0checksum.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "page0zip.h" -#ifndef UNIV_HOTBACKUP -#include "ut0byte.h" -#include "ut0lst.h" -#include "page0page.h" -#include "fil0fil.h" -#include "buf0lru.h" -#include "buf0rea.h" -#include "ibuf0ibuf.h" -#include "log0log.h" -#include "os0file.h" -#include "os0sync.h" -#include "trx0sys.h" -#include "srv0mon.h" -#include "mysql/plugin.h" -#include "mysql/service_thd_wait.h" -#include "fil0pagecompress.h" - -/** Number of pages flushed through non flush_list flushes. */ -// static ulint buf_lru_flush_page_count = 0; - -/** Flag indicating if the page_cleaner is in active state. This flag -is set to TRUE by the page_cleaner thread when it is spawned and is set -back to FALSE at shutdown by the page_cleaner as well. Therefore no -need to protect it by a mutex. It is only ever read by the thread -doing the shutdown */ -UNIV_INTERN bool buf_page_cleaner_is_active; - -/** Flag indicating if the lru_manager is in active state. */ -UNIV_INTERN bool buf_lru_manager_is_active; - -#ifdef UNIV_PFS_THREAD -UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key; -UNIV_INTERN mysql_pfs_key_t buf_lru_manager_thread_key; -#endif /* UNIV_PFS_THREAD */ - -/* @} */ - -/******************************************************************//** -Increases flush_list size in bytes with zip_size for compressed page, -UNIV_PAGE_SIZE for uncompressed page in inline function */ -static inline -void -incr_flush_list_size_in_bytes( -/*==========================*/ - buf_block_t* block, /*!< in: control block */ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ -{ - ut_ad(buf_flush_list_mutex_own(buf_pool)); - ulint zip_size = page_zip_get_size(&block->page.zip); - buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE; - ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size); -} - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/******************************************************************//** -Validates the flush list. -@return TRUE if ok */ -static -ibool -buf_flush_validate_low( -/*===================*/ - buf_pool_t* buf_pool); /*!< in: Buffer pool instance */ - -/******************************************************************//** -Validates the flush list some of the time. -@return TRUE if ok or the check was skipped */ -static -ibool -buf_flush_validate_skip( -/*====================*/ - buf_pool_t* buf_pool) /*!< in: Buffer pool instance */ -{ -/** Try buf_flush_validate_low() every this many times */ -# define BUF_FLUSH_VALIDATE_SKIP 23 - - /** The buf_flush_validate_low() call skip counter. - Use a signed type because of the race condition below. */ - static int buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP; - - /* There is a race condition below, but it does not matter, - because this call is only for heuristic purposes. We want to - reduce the call frequency of the costly buf_flush_validate_low() - check in debug builds. */ - if (--buf_flush_validate_count > 0) { - return(TRUE); - } - - buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP; - return(buf_flush_validate_low(buf_pool)); -} -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -/*******************************************************************//** -Sets hazard pointer during flush_list iteration. */ -UNIV_INLINE -void -buf_flush_set_hp( -/*=============*/ - buf_pool_t* buf_pool,/*!< in/out: buffer pool instance */ - const buf_page_t* bpage) /*!< in: buffer control block */ -{ - ut_ad(buf_flush_list_mutex_own(buf_pool)); - ut_ad(buf_pool->flush_list_hp == NULL || bpage == NULL); - ut_ad(!bpage || buf_page_in_file(bpage) - || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH); - ut_ad(!bpage || bpage->in_flush_list); - ut_ad(!bpage || buf_pool_from_bpage(bpage) == buf_pool); - - buf_pool->flush_list_hp = bpage; -} - -/*******************************************************************//** -Checks if the given block is a hazard pointer -@return true if bpage is hazard pointer */ -UNIV_INLINE -bool -buf_flush_is_hp( -/*============*/ - buf_pool_t* buf_pool,/*!< in: buffer pool instance */ - const buf_page_t* bpage) /*!< in: buffer control block */ -{ - ut_ad(buf_flush_list_mutex_own(buf_pool)); - - return(buf_pool->flush_list_hp == bpage); -} - -/*******************************************************************//** -Whenever we move a block in flush_list (either to remove it or to -relocate it) we check the hazard pointer set by some other thread -doing the flush list scan. If the hazard pointer is the same as the -one we are about going to move then we set it to NULL to force a rescan -in the thread doing the batch. */ -UNIV_INLINE -void -buf_flush_update_hp( -/*================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_page_t* bpage) /*!< in: buffer control block */ -{ - ut_ad(buf_flush_list_mutex_own(buf_pool)); - - if (buf_flush_is_hp(buf_pool, bpage)) { - buf_flush_set_hp(buf_pool, NULL); - MONITOR_INC(MONITOR_FLUSH_HP_RESCAN); - } -} - -/******************************************************************//** -Insert a block in the flush_rbt and returns a pointer to its -predecessor or NULL if no predecessor. The ordering is maintained -on the basis of the <oldest_modification, space, offset> key. -@return pointer to the predecessor or NULL if no predecessor. */ -static -buf_page_t* -buf_flush_insert_in_flush_rbt( -/*==========================*/ - buf_page_t* bpage) /*!< in: bpage to be inserted. */ -{ - const ib_rbt_node_t* c_node; - const ib_rbt_node_t* p_node; - buf_page_t* prev = NULL; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(buf_flush_list_mutex_own(buf_pool)); - - /* Insert this buffer into the rbt. */ - c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage); - ut_a(c_node != NULL); - - /* Get the predecessor. */ - p_node = rbt_prev(buf_pool->flush_rbt, c_node); - - if (p_node != NULL) { - buf_page_t** value; - value = rbt_value(buf_page_t*, p_node); - prev = *value; - ut_a(prev != NULL); - } - - return(prev); -} - -/*********************************************************//** -Delete a bpage from the flush_rbt. */ -static -void -buf_flush_delete_from_flush_rbt( -/*============================*/ - buf_page_t* bpage) /*!< in: bpage to be removed. */ -{ -#ifdef UNIV_DEBUG - ibool ret = FALSE; -#endif /* UNIV_DEBUG */ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(buf_flush_list_mutex_own(buf_pool)); - -#ifdef UNIV_DEBUG - ret = -#endif /* UNIV_DEBUG */ - rbt_delete(buf_pool->flush_rbt, &bpage); - - ut_ad(ret); -} - -/*****************************************************************//** -Compare two modified blocks in the buffer pool. The key for comparison -is: -key = <oldest_modification, space, offset> -This comparison is used to maintian ordering of blocks in the -buf_pool->flush_rbt. -Note that for the purpose of flush_rbt, we only need to order blocks -on the oldest_modification. The other two fields are used to uniquely -identify the blocks. -@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */ -static -int -buf_flush_block_cmp( -/*================*/ - const void* p1, /*!< in: block1 */ - const void* p2) /*!< in: block2 */ -{ - int ret; - const buf_page_t* b1 = *(const buf_page_t**) p1; - const buf_page_t* b2 = *(const buf_page_t**) p2; -#ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(b1); -#endif /* UNIV_DEBUG */ - - ut_ad(b1 != NULL); - ut_ad(b2 != NULL); - - ut_ad(buf_flush_list_mutex_own(buf_pool)); - - ut_ad(b1->in_flush_list); - ut_ad(b2->in_flush_list); - - if (b2->oldest_modification > b1->oldest_modification) { - return(1); - } else if (b2->oldest_modification < b1->oldest_modification) { - return(-1); - } - - /* If oldest_modification is same then decide on the space. */ - ret = (int)(b2->space - b1->space); - - /* Or else decide ordering on the offset field. */ - return(ret ? ret : (int)(b2->offset - b1->offset)); -} - -/********************************************************************//** -Initialize the red-black tree to speed up insertions into the flush_list -during recovery process. Should be called at the start of recovery -process before any page has been read/written. */ -UNIV_INTERN -void -buf_flush_init_flush_rbt(void) -/*==========================*/ -{ - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - buf_flush_list_mutex_enter(buf_pool); - - ut_ad(buf_pool->flush_rbt == NULL); - - /* Create red black tree for speedy insertions in flush list. */ - buf_pool->flush_rbt = rbt_create( - sizeof(buf_page_t*), buf_flush_block_cmp); - - buf_flush_list_mutex_exit(buf_pool); - } -} - -/********************************************************************//** -Frees up the red-black tree. */ -UNIV_INTERN -void -buf_flush_free_flush_rbt(void) -/*==========================*/ -{ - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - buf_flush_list_mutex_enter(buf_pool); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(buf_flush_validate_low(buf_pool)); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - rbt_free(buf_pool->flush_rbt); - buf_pool->flush_rbt = NULL; - - buf_flush_list_mutex_exit(buf_pool); - } -} - -/********************************************************************//** -Inserts a modified block into the flush list. */ -UNIV_INTERN -void -buf_flush_insert_into_flush_list( -/*=============================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - buf_block_t* block, /*!< in/out: block which is modified */ - lsn_t lsn) /*!< in: oldest modification */ -{ - ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); - ut_ad(log_flush_order_mutex_own()); - ut_ad(mutex_own(&block->mutex)); - - buf_flush_list_mutex_enter(buf_pool); - - ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL) - || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification - <= lsn)); - - /* If we are in the recovery then we need to update the flush - red-black tree as well. */ - if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { - buf_flush_list_mutex_exit(buf_pool); - buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn); - return; - } - - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_ad(!block->page.in_flush_list); - - ut_d(block->page.in_flush_list = TRUE); - block->page.oldest_modification = lsn; - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page); - incr_flush_list_size_in_bytes(block, buf_pool); - -#ifdef UNIV_DEBUG_VALGRIND - { - ulint zip_size = buf_block_get_zip_size(block); - - if (zip_size) { - UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); - } else { - UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); - } - } -#endif /* UNIV_DEBUG_VALGRIND */ -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(buf_flush_validate_skip(buf_pool)); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - buf_flush_list_mutex_exit(buf_pool); -} - -/********************************************************************//** -Inserts a modified block into the flush list in the right sorted position. -This function is used by recovery, because there the modifications do not -necessarily come in the order of lsn's. */ -UNIV_INTERN -void -buf_flush_insert_sorted_into_flush_list( -/*====================================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_block_t* block, /*!< in/out: block which is modified */ - lsn_t lsn) /*!< in: oldest modification */ -{ - buf_page_t* prev_b; - buf_page_t* b; - - ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); - ut_ad(log_flush_order_mutex_own()); - ut_ad(mutex_own(&block->mutex)); - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - - buf_flush_list_mutex_enter(buf_pool); - - /* The field in_LRU_list is protected by buf_pool->LRU_list_mutex, - which we are not holding. However, while a block is in the flush - list, it is dirty and cannot be discarded, not from the - page_hash or from the LRU list. At most, the uncompressed - page frame of a compressed block may be discarded or created - (copying the block->page to or from a buf_page_t that is - dynamically allocated from buf_buddy_alloc()). Because those - transitions hold block->mutex and the flush list mutex (via - buf_flush_relocate_on_flush_list()), there is no possibility - of a race condition in the assertions below. */ - ut_ad(block->page.in_LRU_list); - ut_ad(block->page.in_page_hash); - /* buf_buddy_block_register() will take a block in the - BUF_BLOCK_MEMORY state, not a file page. */ - ut_ad(!block->page.in_zip_hash); - - ut_ad(!block->page.in_flush_list); - ut_d(block->page.in_flush_list = TRUE); - block->page.oldest_modification = lsn; - -#ifdef UNIV_DEBUG_VALGRIND - { - ulint zip_size = buf_block_get_zip_size(block); - - if (zip_size) { - UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size); - } else { - UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE); - } - } -#endif /* UNIV_DEBUG_VALGRIND */ - - prev_b = NULL; - - /* For the most part when this function is called the flush_rbt - should not be NULL. In a very rare boundary case it is possible - that the flush_rbt has already been freed by the recovery thread - before the last page was hooked up in the flush_list by the - io-handler thread. In that case we'll just do a simple - linear search in the else block. */ - if (buf_pool->flush_rbt) { - - prev_b = buf_flush_insert_in_flush_rbt(&block->page); - - } else { - - b = UT_LIST_GET_FIRST(buf_pool->flush_list); - - while (b && b->oldest_modification - > block->page.oldest_modification) { - ut_ad(b->in_flush_list); - prev_b = b; - b = UT_LIST_GET_NEXT(list, b); - } - } - - if (prev_b == NULL) { - UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page); - } else { - UT_LIST_INSERT_AFTER(list, buf_pool->flush_list, - prev_b, &block->page); - } - - incr_flush_list_size_in_bytes(block, buf_pool); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(buf_flush_validate_low(buf_pool)); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - buf_flush_list_mutex_exit(buf_pool); -} - -/********************************************************************//** -Returns TRUE if the file page block is immediately suitable for replacement, -i.e., the transition FILE_PAGE => NOT_USED allowed. -@return TRUE if can replace immediately */ -UNIV_INTERN -ibool -buf_flush_ready_for_replace( -/*========================*/ - buf_page_t* bpage) /*!< in: buffer control block, must be - buf_page_in_file(bpage) and in the LRU list */ -{ -#ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); -#endif /* UNIV_DEBUG */ - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - ut_ad(bpage->in_LRU_list); - - if (buf_page_in_file(bpage)) { - - return(bpage->oldest_modification == 0 - && bpage->buf_fix_count == 0 - && buf_page_get_io_fix(bpage) == BUF_IO_NONE); - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: buffer block state %lu" - " in the LRU list!\n", - (ulong) buf_page_get_state(bpage)); - ut_print_buf(stderr, bpage, sizeof(buf_page_t)); - putc('\n', stderr); - - return(FALSE); -} - -/********************************************************************//** -Returns true if the block is modified and ready for flushing. -@return true if can flush immediately */ -UNIV_INTERN -bool -buf_flush_ready_for_flush( -/*======================*/ - buf_page_t* bpage, /*!< in: buffer control block, must be - buf_page_in_file(bpage) */ - buf_flush_t flush_type)/*!< in: type of flush */ -{ - ut_ad(flush_type < BUF_FLUSH_N_TYPES); - ut_ad(mutex_own(buf_page_get_mutex(bpage)) - || flush_type == BUF_FLUSH_LIST); - ut_a(buf_page_in_file(bpage) || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH); - - if (bpage->oldest_modification == 0 - || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) { - return(false); - } - - ut_ad(bpage->in_flush_list); - - switch (flush_type) { - case BUF_FLUSH_LIST: - return(buf_page_get_state(bpage) != BUF_BLOCK_REMOVE_HASH); - case BUF_FLUSH_LRU: - case BUF_FLUSH_SINGLE_PAGE: - return(true); - - case BUF_FLUSH_N_TYPES: - break; - } - - ut_error; - return(false); -} - -/********************************************************************//** -Remove a block from the flush list of modified blocks. */ -UNIV_INTERN -void -buf_flush_remove( -/*=============*/ - buf_page_t* bpage) /*!< in: pointer to the block in question */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ulint zip_size; - - ut_ad(mutex_own(buf_page_get_mutex(bpage))); -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_DIRTY - || mutex_own(&buf_pool->LRU_list_mutex)); -#endif - ut_ad(bpage->in_flush_list); - - buf_flush_list_mutex_enter(buf_pool); - - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - /* Clean compressed pages should not be on the flush list */ - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - return; - case BUF_BLOCK_ZIP_DIRTY: - buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE); - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage); -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - buf_LRU_insert_zip_clean(bpage); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - break; - case BUF_BLOCK_FILE_PAGE: - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage); - break; - } - - /* If the flush_rbt is active then delete from there as well. */ - if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { - buf_flush_delete_from_flush_rbt(bpage); - } - - /* Must be done after we have removed it from the flush_rbt - because we assert on in_flush_list in comparison function. */ - ut_d(bpage->in_flush_list = FALSE); - - zip_size = page_zip_get_size(&bpage->zip); - buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE; - - bpage->oldest_modification = 0; - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(buf_flush_validate_skip(buf_pool)); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - buf_flush_update_hp(buf_pool, bpage); - buf_flush_list_mutex_exit(buf_pool); -} - -/*******************************************************************//** -Relocates a buffer control block on the flush_list. -Note that it is assumed that the contents of bpage have already been -copied to dpage. -IMPORTANT: When this function is called bpage and dpage are not -exact copies of each other. For example, they both will have different -::state. Also the ::list pointers in dpage may be stale. We need to -use the current list node (bpage) to do the list manipulation because -the list pointers could have changed between the time that we copied -the contents of bpage to the dpage and the flush list manipulation -below. */ -UNIV_INTERN -void -buf_flush_relocate_on_flush_list( -/*=============================*/ - buf_page_t* bpage, /*!< in/out: control block being moved */ - buf_page_t* dpage) /*!< in/out: destination block */ -{ - buf_page_t* prev; - buf_page_t* prev_b = NULL; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - /* Must reside in the same buffer pool. */ - ut_ad(buf_pool == buf_pool_from_bpage(dpage)); - - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - - buf_flush_list_mutex_enter(buf_pool); - - ut_ad(bpage->in_flush_list); - ut_ad(dpage->in_flush_list); - - /* If recovery is active we must swap the control blocks in - the flush_rbt as well. */ - if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { - buf_flush_delete_from_flush_rbt(bpage); - prev_b = buf_flush_insert_in_flush_rbt(dpage); - } - - /* Must be done after we have removed it from the flush_rbt - because we assert on in_flush_list in comparison function. */ - ut_d(bpage->in_flush_list = FALSE); - - prev = UT_LIST_GET_PREV(list, bpage); - UT_LIST_REMOVE(list, buf_pool->flush_list, bpage); - - if (prev) { - ut_ad(prev->in_flush_list); - UT_LIST_INSERT_AFTER( - list, - buf_pool->flush_list, - prev, dpage); - } else { - UT_LIST_ADD_FIRST( - list, - buf_pool->flush_list, - dpage); - } - - /* Just an extra check. Previous in flush_list - should be the same control block as in flush_rbt. */ - ut_a(!buf_pool->flush_rbt || prev_b == prev); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - ut_a(buf_flush_validate_low(buf_pool)); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - buf_flush_update_hp(buf_pool, bpage); - buf_flush_list_mutex_exit(buf_pool); -} - -/********************************************************************//** -Updates the flush system data structures when a write is completed. */ -UNIV_INTERN -void -buf_flush_write_complete( -/*=====================*/ - buf_page_t* bpage) /*!< in: pointer to the block in question */ -{ - buf_flush_t flush_type = buf_page_get_flush_type(bpage); - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - mutex_enter(&buf_pool->flush_state_mutex); - - buf_flush_remove(bpage); - - buf_page_set_io_fix(bpage, BUF_IO_NONE); - - buf_pool->n_flush[flush_type]--; - ut_ad(buf_pool->n_flush[flush_type] != ULINT_MAX); - -#ifdef UNIV_MTFLUSH_DEBUG - fprintf(stderr, "n pending flush %lu\n", - buf_pool->n_flush[flush_type]); -#endif - - if (buf_pool->n_flush[flush_type] == 0 - && buf_pool->init_flush[flush_type] == FALSE) { - - /* The running flush batch has ended */ - - os_event_set(buf_pool->no_flush[flush_type]); - } - - buf_dblwr_update(bpage, flush_type); - - mutex_exit(&buf_pool->flush_state_mutex); -} -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************************//** -Calculate the checksum of a page from compressed table and update the page. */ -UNIV_INTERN -void -buf_flush_update_zip_checksum( -/*==========================*/ - buf_frame_t* page, /*!< in/out: Page to update */ - ulint zip_size, /*!< in: Compressed page size */ - lsn_t lsn) /*!< in: Lsn to stamp on the page */ -{ - ut_a(zip_size > 0); - - ib_uint32_t checksum = static_cast<ib_uint32_t>( - page_zip_calc_checksum( - page, zip_size, - static_cast<srv_checksum_algorithm_t>( - srv_checksum_algorithm))); - - mach_write_to_8(page + FIL_PAGE_LSN, lsn); - memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); - mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); -} - -/********************************************************************//** -Initializes a page for writing to the tablespace. */ -UNIV_INTERN -void -buf_flush_init_for_writing( -/*=======================*/ - byte* page, /*!< in/out: page */ - void* page_zip_, /*!< in/out: compressed page, or NULL */ - lsn_t newest_lsn) /*!< in: newest modification lsn - to the page */ -{ - ib_uint32_t checksum = 0 /* silence bogus gcc warning */; - - ut_ad(page); - - if (page_zip_) { - page_zip_des_t* page_zip; - ulint zip_size; - - page_zip = static_cast<page_zip_des_t*>(page_zip_); - zip_size = page_zip_get_size(page_zip); - - ut_ad(zip_size); - ut_ad(ut_is_2pow(zip_size)); - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - - switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) { - case FIL_PAGE_TYPE_ALLOCATED: - case FIL_PAGE_INODE: - case FIL_PAGE_IBUF_BITMAP: - case FIL_PAGE_TYPE_FSP_HDR: - case FIL_PAGE_TYPE_XDES: - /* These are essentially uncompressed pages. */ - memcpy(page_zip->data, page, zip_size); - /* fall through */ - case FIL_PAGE_TYPE_ZBLOB: - case FIL_PAGE_TYPE_ZBLOB2: - case FIL_PAGE_INDEX: - - buf_flush_update_zip_checksum( - page_zip->data, zip_size, newest_lsn); - - return; - } - - ut_print_timestamp(stderr); - fputs(" InnoDB: ERROR: The compressed page to be written" - " seems corrupt:", stderr); - ut_print_buf(stderr, page, zip_size); - fputs("\nInnoDB: Possibly older version of the page:", stderr); - ut_print_buf(stderr, page_zip->data, zip_size); - putc('\n', stderr); - ut_error; - } - - /* Write the newest modification lsn to the page header and trailer */ - mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn); - - mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, - newest_lsn); - - /* Store the new formula checksum */ - - switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) { - case SRV_CHECKSUM_ALGORITHM_CRC32: - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - checksum = buf_calc_page_crc32(page); - mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); - break; - case SRV_CHECKSUM_ALGORITHM_INNODB: - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - checksum = (ib_uint32_t) buf_calc_page_new_checksum(page); - mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); - checksum = (ib_uint32_t) buf_calc_page_old_checksum(page); - break; - case SRV_CHECKSUM_ALGORITHM_NONE: - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - checksum = BUF_NO_CHECKSUM_MAGIC; - mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum); - break; - /* no default so the compiler will emit a warning if new enum - is added and not handled here */ - } - - /* With the InnoDB checksum, we overwrite the first 4 bytes of - the end lsn field to store the old formula checksum. Since it - depends also on the field FIL_PAGE_SPACE_OR_CHKSUM, it has to - be calculated after storing the new formula checksum. - - In other cases we write the same value to both fields. - If CRC32 is used then it is faster to use that checksum - (calculated above) instead of calculating another one. - We can afford to store something other than - buf_calc_page_old_checksum() or BUF_NO_CHECKSUM_MAGIC in - this field because the file will not be readable by old - versions of MySQL/InnoDB anyway (older than MySQL 5.6.3) */ - - mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, - checksum); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Does an asynchronous write of a buffer page. NOTE: in simulated aio and -also when the doublewrite buffer is used, we must call -buf_dblwr_flush_buffered_writes after we have posted a batch of -writes! */ -static -void -buf_flush_write_block_low( -/*======================*/ - buf_page_t* bpage, /*!< in: buffer block to write */ - buf_flush_t flush_type, /*!< in: type of flush */ - bool sync) /*!< in: true if sync IO request */ -{ - fil_space_t* space = fil_space_acquire_for_io(bpage->space); - if (!space) { - return; - } - ulint zip_size = buf_page_get_zip_size(bpage); - page_t* frame = NULL; -#ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); -#endif - -#ifdef UNIV_LOG_DEBUG - static ibool univ_log_debug_warned; -#endif /* UNIV_LOG_DEBUG */ - - ut_ad(buf_page_in_file(bpage)); - - /* We are not holding block_mutex here. - Nevertheless, it is safe to access bpage, because it is - io_fixed and oldest_modification != 0. Thus, it cannot be - relocated in the buffer pool or removed from flush_list or - LRU_list. */ - ut_ad(!buf_flush_list_mutex_own(buf_pool)); - ut_ad(!mutex_own(buf_page_get_mutex(bpage))); - ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE); - ut_ad(bpage->oldest_modification != 0); - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0); -#endif - ut_ad(bpage->newest_modification != 0); - -#ifdef UNIV_LOG_DEBUG - if (!univ_log_debug_warned) { - univ_log_debug_warned = TRUE; - fputs("Warning: cannot force log to disk if" - " UNIV_LOG_DEBUG is defined!\n" - "Crash recovery will not work!\n", - stderr); - } -#else - /* Force the log to the disk before writing the modified block */ - log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE); -#endif - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */ - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - break; - case BUF_BLOCK_ZIP_DIRTY: - frame = bpage->zip.data; - mach_write_to_8(frame + FIL_PAGE_LSN, - bpage->newest_modification); - - ut_a(page_zip_verify_checksum(frame, zip_size)); - - memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); - break; - case BUF_BLOCK_FILE_PAGE: - frame = bpage->zip.data; - if (!frame) { - frame = ((buf_block_t*) bpage)->frame; - } - - buf_flush_init_for_writing(((buf_block_t*) bpage)->frame, - bpage->zip.data - ? &bpage->zip : NULL, - bpage->newest_modification); - break; - } - - frame = buf_page_encrypt_before_write(space, bpage, frame); - - if (!srv_use_doublewrite_buf || !buf_dblwr) { - fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, - sync, - buf_page_get_space(bpage), - zip_size, - buf_page_get_page_no(bpage), - 0, - zip_size ? zip_size : bpage->real_size, - frame, - bpage, - &bpage->write_size); - } else { - /* InnoDB uses doublewrite buffer and doublewrite buffer - is initialized. User can define do we use atomic writes - on a file space (table) or not. If atomic writes are - not used we should use doublewrite buffer and if - atomic writes should be used, no doublewrite buffer - is used. */ - - if (fsp_flags_get_atomic_writes(space->flags) - == ATOMIC_WRITES_ON) { - fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, - FALSE, - buf_page_get_space(bpage), - zip_size, - buf_page_get_page_no(bpage), - 0, - zip_size ? zip_size : bpage->real_size, - frame, - bpage, - &bpage->write_size); - } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) { - buf_dblwr_write_single_page(bpage, sync); - } else { - buf_dblwr_add_to_batch(bpage); - } - } - - /* When doing single page flushing the IO is done synchronously - and we flush the changes to disk only for the tablespace we - are working on. */ - if (sync) { - ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE); - fil_flush(space); - - /* The tablespace could already have been dropped, - because fil_io(request, sync) would already have - decremented the node->n_pending. However, - buf_page_io_complete() only needs to look up the - tablespace during read requests, not during writes. */ - ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE); - -#ifdef UNIV_DEBUG - dberr_t err = -#endif - buf_page_io_complete(bpage); - - ut_ad(err == DB_SUCCESS); - } - - fil_space_release_for_io(space); - - /* Increment the counter of I/O operations used - for selecting LRU policy. */ - buf_LRU_stat_inc_io(); -} - -/********************************************************************//** -Writes a flushable page asynchronously from the buffer pool to a file. -NOTE: in simulated aio we must call -os_aio_simulated_wake_handler_threads after we have posted a batch of -writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this -function, and it will be released by this function if it returns true. -LRU_list_mutex must be held iff performing a single page flush and will be -released by the function if it returns true. -@return TRUE if the page was flushed */ -UNIV_INTERN -bool -buf_flush_page( -/*===========*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_page_t* bpage, /*!< in: buffer control block */ - buf_flush_t flush_type, /*!< in: type of flush */ - bool sync) /*!< in: true if sync IO request */ -{ - ut_ad(flush_type < BUF_FLUSH_N_TYPES); - /* Hold the LRU list mutex iff called for a single page LRU - flush. A single page LRU flush is already non-performant, and holding - the LRU list mutex allows us to avoid having to store the previous LRU - list page or to restart the LRU scan in - buf_flush_single_page_from_LRU(). */ - ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE || - !mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(flush_type != BUF_FLUSH_SINGLE_PAGE || - mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(buf_page_in_file(bpage)); - ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE); - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - ut_ad(mutex_own(block_mutex)); - - ut_ad(buf_flush_ready_for_flush(bpage, flush_type)); - - bool is_uncompressed; - - is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex)); - - ibool flush; - rw_lock_t* rw_lock; - bool no_fix_count = bpage->buf_fix_count == 0; - - if (!is_uncompressed) { - flush = TRUE; - rw_lock = NULL; - - } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)) { - /* This is a heuristic, to avoid expensive S attempts. */ - flush = FALSE; - } else { - - rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock; - - if (flush_type != BUF_FLUSH_LIST) { - flush = rw_lock_s_lock_gen_nowait( - rw_lock, BUF_IO_WRITE); - } else { - /* Will S lock later */ - flush = TRUE; - } - } - - if (flush) { - - /* We are committed to flushing by the time we get here */ - - mutex_enter(&buf_pool->flush_state_mutex); - - buf_page_set_io_fix(bpage, BUF_IO_WRITE); - - buf_page_set_flush_type(bpage, flush_type); - - if (buf_pool->n_flush[flush_type] == 0) { - - os_event_reset(buf_pool->no_flush[flush_type]); - } - - ++buf_pool->n_flush[flush_type]; - ut_ad(buf_pool->n_flush[flush_type] != 0); - - mutex_exit(&buf_pool->flush_state_mutex); - - mutex_exit(block_mutex); - - if (flush_type == BUF_FLUSH_SINGLE_PAGE) - mutex_exit(&buf_pool->LRU_list_mutex); - - if (flush_type == BUF_FLUSH_LIST - && is_uncompressed - && !rw_lock_s_lock_gen_nowait(rw_lock, BUF_IO_WRITE)) { - /* avoiding deadlock possibility involves doublewrite - buffer, should flush it, because it might hold the - another block->lock. */ - buf_dblwr_flush_buffered_writes(); - - rw_lock_s_lock_gen(rw_lock, BUF_IO_WRITE); - } - - /* Even though bpage is not protected by any mutex at this - point, it is safe to access bpage, because it is io_fixed and - oldest_modification != 0. Thus, it cannot be relocated in the - buffer pool or removed from flush_list or LRU_list. */ - - buf_flush_write_block_low(bpage, flush_type, sync); - } - - return(flush); -} - -# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -/********************************************************************//** -Writes a flushable page asynchronously from the buffer pool to a file. -NOTE: block and LRU list mutexes must be held upon entering this function, and -they will be released by this function after flushing. This is loosely based on -buf_flush_batch() and buf_flush_page(). -@return TRUE if the page was flushed and the mutexes released */ -UNIV_INTERN -ibool -buf_flush_page_try( -/*===============*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - buf_block_t* block) /*!< in/out: buffer control block */ -{ - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_ad(mutex_own(&block->mutex)); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) { - return(FALSE); - } - - /* The following call will release the LRU list and - block mutex if successful. */ - return(buf_flush_page( - buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true)); -} -# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ -/***********************************************************//** -Check the page is in buffer pool and can be flushed. -@return true if the page can be flushed. */ -static -bool -buf_flush_check_neighbor( -/*=====================*/ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page offset */ - buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU or - BUF_FLUSH_LIST */ -{ - buf_page_t* bpage; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - bool ret; - prio_rw_lock_t* hash_lock; - ib_mutex_t* block_mutex; - - ut_ad(flush_type == BUF_FLUSH_LRU - || flush_type == BUF_FLUSH_LIST); - - /* We only want to flush pages from this buffer pool. */ - bpage = buf_page_hash_get_s_locked(buf_pool, space, offset, - &hash_lock); - - if (!bpage) { - - return(false); - } - - block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); - - rw_lock_s_unlock(hash_lock); - - ut_a(buf_page_in_file(bpage)); - - /* We avoid flushing 'non-old' blocks in an LRU flush, - because the flushed blocks are soon freed */ - - ret = false; - if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) { - - if (buf_flush_ready_for_flush(bpage, flush_type)) { - ret = true; - } - } - - mutex_exit(block_mutex); - - return(ret); -} - -/***********************************************************//** -Flushes to disk all flushable pages within the flush area. -@return number of pages flushed */ -static -ulint -buf_flush_try_neighbors( -/*====================*/ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page offset */ - buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or - BUF_FLUSH_LIST */ - ulint n_flushed, /*!< in: number of pages - flushed so far in this batch */ - ulint n_to_flush) /*!< in: maximum number of pages - we are allowed to flush */ -{ - ulint i; - ulint low; - ulint high; - buf_pool_t* buf_pool = buf_pool_get(space, offset); - - ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(!buf_flush_list_mutex_own(buf_pool)); - - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN - || srv_flush_neighbors == 0) { - /* If there is little space or neighbor flushing is - not enabled then just flush the victim. */ - low = offset; - high = offset + 1; - } else { - /* When flushed, dirty blocks are searched in - neighborhoods of this size, and flushed along with the - original page. */ - - ulint buf_flush_area; - - buf_flush_area = ut_min( - BUF_READ_AHEAD_AREA(buf_pool), - buf_pool->curr_size / 16); - - low = (offset / buf_flush_area) * buf_flush_area; - high = (offset / buf_flush_area + 1) * buf_flush_area; - - if (srv_flush_neighbors == 1) { - /* adjust 'low' and 'high' to limit - for contiguous dirty area */ - if (offset > low) { - for (i = offset - 1; - i >= low - && buf_flush_check_neighbor( - space, i, flush_type); - i--) { - /* do nothing */ - } - low = i + 1; - } - - for (i = offset + 1; - i < high - && buf_flush_check_neighbor( - space, i, flush_type); - i++) { - /* do nothing */ - } - high = i; - } - } - - /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */ - - if (high > fil_space_get_size(space)) { - high = fil_space_get_size(space); - } - - ulint count = 0; - - for (i = low; i < high; i++) { - - prio_rw_lock_t* hash_lock; - ib_mutex_t* block_mutex; - - if ((count + n_flushed) >= n_to_flush) { - - /* We have already flushed enough pages and - should call it a day. There is, however, one - exception. If the page whose neighbors we - are flushing has not been flushed yet then - we'll try to flush the victim that we - selected originally. */ - if (i <= offset) { - i = offset; - } else { - break; - } - } - - buf_pool = buf_pool_get(space, i); - - /* We only want to flush pages from this buffer pool. */ - buf_page_t* bpage = buf_page_hash_get_s_locked(buf_pool, - space, i, &hash_lock); - - if (bpage == NULL) { - - continue; - } - - block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); - - rw_lock_s_unlock(hash_lock); - - ut_a(buf_page_in_file(bpage)); - - /* We avoid flushing 'non-old' blocks in an LRU flush, - because the flushed blocks are soon freed */ - - if (flush_type != BUF_FLUSH_LRU - || i == offset - || buf_page_is_old(bpage)) { - - if (buf_flush_ready_for_flush(bpage, flush_type) - && (i == offset || bpage->buf_fix_count == 0) - && buf_flush_page( - buf_pool, bpage, flush_type, false)) { - - ++count; - - continue; - } - } - - mutex_exit(block_mutex); - } - - if (count > 0) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, - MONITOR_FLUSH_NEIGHBOR_COUNT, - MONITOR_FLUSH_NEIGHBOR_PAGES, - (count - 1)); - } - - return(count); -} - -/********************************************************************//** -Check if the block is modified and ready for flushing. If the the block -is ready to flush then flush the page and try o flush its neighbors. - -@return TRUE if, depending on the flush type, either LRU or flush list -mutex was released during this function. This does not guarantee that some -pages were written as well. -Number of pages written are incremented to the count. */ -static -ibool -buf_flush_page_and_try_neighbors( -/*=============================*/ - buf_page_t* bpage, /*!< in: buffer control block, - must be - buf_page_in_file(bpage) */ - buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ - ulint n_to_flush, /*!< in: number of pages to - flush */ - ulint* count) /*!< in/out: number of pages - flushed */ -{ - ibool flushed; - ib_mutex_t* block_mutex = NULL; -#ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); -#endif /* UNIV_DEBUG */ - - ut_ad((flush_type == BUF_FLUSH_LRU - && mutex_own(&buf_pool->LRU_list_mutex)) - || (flush_type == BUF_FLUSH_LIST - && buf_flush_list_mutex_own(buf_pool))); - - if (flush_type == BUF_FLUSH_LRU) { - block_mutex = buf_page_get_mutex(bpage); - mutex_enter(block_mutex); - } - - ut_a(buf_page_in_file(bpage) - || (buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH - )); - - if (buf_flush_ready_for_flush(bpage, flush_type)) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_bpage(bpage); - - if (flush_type == BUF_FLUSH_LRU) { - mutex_exit(&buf_pool->LRU_list_mutex); - } - - /* These fields are protected by the buf_page_get_mutex() - mutex. */ - /* Read the fields directly in order to avoid asserting on - BUF_BLOCK_REMOVE_HASH pages. */ - ulint space = bpage->space; - ulint offset = bpage->offset; - - if (flush_type == BUF_FLUSH_LRU) { - mutex_exit(block_mutex); - } else { - buf_flush_list_mutex_exit(buf_pool); - } - - /* Try to flush also all the neighbors */ - *count += buf_flush_try_neighbors( - space, offset, flush_type, *count, n_to_flush); - - if (flush_type == BUF_FLUSH_LRU) { - mutex_enter(&buf_pool->LRU_list_mutex); - } else { - buf_flush_list_mutex_enter(buf_pool); - } - flushed = TRUE; - - } else if (flush_type == BUF_FLUSH_LRU) { - mutex_exit(block_mutex); - flushed = FALSE; - } else { - flushed = FALSE; - } - - ut_ad((flush_type == BUF_FLUSH_LRU - && mutex_own(&buf_pool->LRU_list_mutex)) - || (flush_type == BUF_FLUSH_LIST - && buf_flush_list_mutex_own(buf_pool))); - - return(flushed); -} - -/*******************************************************************//** -This utility moves the uncompressed frames of pages to the free list. -Note that this function does not actually flush any data to disk. It -just detaches the uncompressed frames from the compressed pages at the -tail of the unzip_LRU and puts those freed frames in the free list. -Note that it is a best effort attempt and it is not guaranteed that -after a call to this function there will be 'max' blocks in the free -list. -@return number of blocks moved to the free list. */ -static -ulint -buf_free_from_unzip_LRU_list_batch( -/*===============================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint max) /*!< in: desired number of - blocks in the free_list */ -{ - buf_block_t* block; - ulint scanned = 0; - ulint count = 0; - ulint free_len = UT_LIST_GET_LEN(buf_pool->free); - ulint lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - block = UT_LIST_GET_LAST(buf_pool->unzip_LRU); - while (block != NULL && count < max - && free_len < srv_LRU_scan_depth - && lru_len > UT_LIST_GET_LEN(buf_pool->LRU) / 10) { - - ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page); - - ++scanned; - - mutex_enter(block_mutex); - - if (buf_LRU_free_page(&block->page, false)) { - - mutex_exit(block_mutex); - /* Block was freed. LRU list mutex potentially - released and reacquired */ - ++count; - mutex_enter(&buf_pool->LRU_list_mutex); - block = UT_LIST_GET_LAST(buf_pool->unzip_LRU); - - } else { - - mutex_exit(block_mutex); - block = UT_LIST_GET_PREV(unzip_LRU, block); - } - - free_len = UT_LIST_GET_LEN(buf_pool->free); - lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU); - } - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - if (scanned) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_BATCH_SCANNED, - MONITOR_LRU_BATCH_SCANNED_NUM_CALL, - MONITOR_LRU_BATCH_SCANNED_PER_CALL, - scanned); - } - - return(count); -} - -/*******************************************************************//** -This utility flushes dirty blocks from the end of the LRU list. -The calling thread is not allowed to own any latches on pages! -It attempts to make 'max' blocks available in the free list. Note that -it is a best effort attempt and it is not guaranteed that after a call -to this function there will be 'max' blocks in the free list. -@return number of blocks for which the write request was queued. */ -MY_ATTRIBUTE((nonnull)) -static -void -buf_flush_LRU_list_batch( -/*=====================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint max, /*!< in: desired number of - blocks in the free_list */ - bool limited_scan, /*!< in: if true, allow to scan only up - to srv_LRU_scan_depth pages in total */ - flush_counters_t* n) /*!< out: flushed/evicted page - counts */ -{ - buf_page_t* bpage; - ulint scanned = 0; - ulint lru_position = 0; - ulint max_lru_position; - ulint max_scanned_pages; - ulint free_len = UT_LIST_GET_LEN(buf_pool->free); - ulint lru_len = UT_LIST_GET_LEN(buf_pool->LRU); - - n->flushed = 0; - n->evicted = 0; - n->unzip_LRU_evicted = 0; - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - max_scanned_pages = limited_scan ? srv_LRU_scan_depth : lru_len * max; - max_lru_position = ut_min(srv_LRU_scan_depth, lru_len); - - bpage = UT_LIST_GET_LAST(buf_pool->LRU); - while (bpage != NULL - && (srv_cleaner_eviction_factor ? n->evicted : n->flushed) < max - && free_len < srv_LRU_scan_depth - && lru_len > BUF_LRU_MIN_LEN - && lru_position < max_lru_position - && scanned < max_scanned_pages) { - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - ibool evict; - ulint failed_acquire; - - ++scanned; - ++lru_position; - - failed_acquire = mutex_enter_nowait(block_mutex); - - evict = UNIV_LIKELY(!failed_acquire) - && buf_flush_ready_for_replace(bpage); - - if (UNIV_LIKELY(!failed_acquire) && !evict) { - - mutex_exit(block_mutex); - } - - /* If the block is ready to be replaced we try to - free it i.e.: put it on the free list. - Otherwise we try to flush the block and its - neighbors. In this case we'll put it on the - free list in the next pass. We do this extra work - of putting blocks to the free list instead of - just flushing them because after every flush - we have to restart the scan from the tail of - the LRU list and if we don't clear the tail - of the flushed pages then the scan becomes - O(n*n). */ - if (evict) { - - if (buf_LRU_free_page(bpage, true)) { - - mutex_exit(block_mutex); - n->evicted++; - lru_position = 0; - mutex_enter(&buf_pool->LRU_list_mutex); - bpage = UT_LIST_GET_LAST(buf_pool->LRU); - } else { - - bpage = UT_LIST_GET_PREV(LRU, bpage); - mutex_exit(block_mutex); - } - } else if (UNIV_LIKELY(!failed_acquire)) { - - ulint space; - ulint offset; - buf_page_t* prev_bpage; - - prev_bpage = UT_LIST_GET_PREV(LRU, bpage); - - /* Save the previous bpage */ - - if (prev_bpage != NULL) { - space = prev_bpage->space; - offset = prev_bpage->offset; - } else { - space = ULINT_UNDEFINED; - offset = ULINT_UNDEFINED; - } - - if (buf_flush_page_and_try_neighbors( - bpage, - BUF_FLUSH_LRU, max, &n->flushed)) { - - /* LRU list mutex was released. - reposition the iterator. Note: the - prev block could have been repositioned - too but that should be rare. */ - - if (prev_bpage != NULL) { - - ut_ad(space != ULINT_UNDEFINED); - ut_ad(offset != ULINT_UNDEFINED); - - prev_bpage = buf_page_hash_get( - buf_pool, space, offset); - } - } - - bpage = prev_bpage; - } - - free_len = UT_LIST_GET_LEN(buf_pool->free); - lru_len = UT_LIST_GET_LEN(buf_pool->LRU); - } - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - /* We keep track of all flushes happening as part of LRU - flush. When estimating the desired rate at which flush_list - should be flushed, we factor in this value. */ - buf_pool->stat.buf_lru_flush_page_count += n->flushed; - - if (scanned) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_BATCH_SCANNED, - MONITOR_LRU_BATCH_SCANNED_NUM_CALL, - MONITOR_LRU_BATCH_SCANNED_PER_CALL, - scanned); - } -} - -/*******************************************************************//** -Flush and move pages from LRU or unzip_LRU list to the free list. -Whether LRU or unzip_LRU is used depends on the state of the system. -@return number of blocks for which either the write request was queued -or in case of unzip_LRU the number of blocks actually moved to the -free list */ -MY_ATTRIBUTE((nonnull)) -static -void -buf_do_LRU_batch( -/*=============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint max, /*!< in: desired number of - blocks in the free_list */ - bool limited_scan, /*!< in: if true, allow to scan only up - to srv_LRU_scan_depth pages in total */ - flush_counters_t* n) /*!< out: flushed/evicted page - counts */ -{ - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - if (buf_LRU_evict_from_unzip_LRU(buf_pool)) { - n->unzip_LRU_evicted - = buf_free_from_unzip_LRU_list_batch(buf_pool, max); - } else { - n->unzip_LRU_evicted = 0; - } - - if (max > n->unzip_LRU_evicted) { - buf_flush_LRU_list_batch(buf_pool, max - n->unzip_LRU_evicted, - limited_scan, n); - } else { - n->evicted = 0; - n->flushed = 0; - } - - n->evicted += n->unzip_LRU_evicted; -} - -/*******************************************************************//** -This utility flushes dirty blocks from the end of the flush_list. -the calling thread is not allowed to own any latches on pages! -@return number of blocks for which the write request was queued; -ULINT_UNDEFINED if there was a flush of the same type already -running */ -static -ulint -buf_do_flush_list_batch( -/*====================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint min_n, /*!< in: wished minimum mumber - of blocks flushed (it is not - guaranteed that the actual - number is that big, though) */ - lsn_t lsn_limit) /*!< all blocks whose - oldest_modification is smaller - than this should be flushed (if - their number does not exceed - min_n) */ -{ - ulint count = 0; - ulint scanned = 0; - - /* Start from the end of the list looking for a suitable - block to be flushed. */ - buf_flush_list_mutex_enter(buf_pool); - ulint len = UT_LIST_GET_LEN(buf_pool->flush_list); - - /* In order not to degenerate this scan to O(n*n) we attempt - to preserve pointer of previous block in the flush list. To do - so we declare it a hazard pointer. Any thread working on the - flush list must check the hazard pointer and if it is removing - the same block then it must reset it. */ - for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->flush_list); - count < min_n && bpage != NULL && len > 0 - && bpage->oldest_modification < lsn_limit; - ++scanned) { - - buf_page_t* prev; - - ut_a(bpage->oldest_modification > 0); - ut_ad(bpage->in_flush_list); - - prev = UT_LIST_GET_PREV(list, bpage); - buf_flush_set_hp(buf_pool, prev); - -#ifdef UNIV_DEBUG - bool flushed = -#endif /* UNIV_DEBUG */ - buf_flush_page_and_try_neighbors( - bpage, BUF_FLUSH_LIST, min_n, &count); - - ut_ad(flushed || buf_flush_is_hp(buf_pool, prev)); - - if (!buf_flush_is_hp(buf_pool, prev)) { - /* The hazard pointer was reset by some other - thread. Restart the scan. */ - ut_ad(buf_flush_is_hp(buf_pool, NULL)); - bpage = UT_LIST_GET_LAST(buf_pool->flush_list); - len = UT_LIST_GET_LEN(buf_pool->flush_list); - } else { - bpage = prev; - --len; - buf_flush_set_hp(buf_pool, NULL); - } - - ut_ad(!bpage || bpage->in_flush_list); - } - - buf_flush_list_mutex_exit(buf_pool); - - MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED, - MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL, - MONITOR_FLUSH_BATCH_SCANNED_PER_CALL, - scanned); - - return(count); -} - -/*******************************************************************//** -This utility flushes dirty blocks from the end of the LRU list or flush_list. -NOTE 1: in the case of an LRU flush the calling thread may own latches to -pages: to avoid deadlocks, this function must be written so that it cannot -end up waiting for these latches! NOTE 2: in the case of a flush list flush, -the calling thread is not allowed to own any latches on pages! -@return number of blocks for which the write request was queued */ -MY_ATTRIBUTE((nonnull)) -void -buf_flush_batch( -/*============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or - BUF_FLUSH_LIST; if BUF_FLUSH_LIST, - then the caller must not own any - latches on pages */ - ulint min_n, /*!< in: wished minimum mumber of blocks - flushed (it is not guaranteed that the - actual number is that big, though) */ - lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST - all blocks whose oldest_modification is - smaller than this should be flushed - (if their number does not exceed - min_n), otherwise ignored */ - bool limited_lru_scan,/*!< in: for LRU flushes, if true, - allow to scan only up to - srv_LRU_scan_depth pages in total */ - flush_counters_t* n) /*!< out: flushed/evicted page - counts */ -{ - ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); -#ifdef UNIV_SYNC_DEBUG - ut_ad((flush_type != BUF_FLUSH_LIST) - || sync_thread_levels_empty_except_dict()); -#endif /* UNIV_SYNC_DEBUG */ - - /* Note: The buffer pool mutexes are released and reacquired within - the flush functions. */ - switch (flush_type) { - case BUF_FLUSH_LRU: - mutex_enter(&buf_pool->LRU_list_mutex); - buf_do_LRU_batch(buf_pool, min_n, limited_lru_scan, n); - mutex_exit(&buf_pool->LRU_list_mutex); - break; - case BUF_FLUSH_LIST: - ut_ad(!limited_lru_scan); - n->flushed = buf_do_flush_list_batch(buf_pool, min_n, - lsn_limit); - n->evicted = 0; - break; - default: - ut_error; - } - -#ifdef UNIV_DEBUG - if (buf_debug_prints && n->flushed > 0) { - fprintf(stderr, flush_type == BUF_FLUSH_LRU - ? "Flushed %lu pages in LRU flush\n" - : "Flushed %lu pages in flush list flush\n", - (ulong) n->flushed); - } -#endif /* UNIV_DEBUG */ -} - -/******************************************************************//** -Gather the aggregated stats for both flush list and LRU list flushing */ -void -buf_flush_common( -/*=============*/ - buf_flush_t flush_type, /*!< in: type of flush */ - ulint page_count) /*!< in: number of pages flushed */ -{ - if (page_count) { - buf_dblwr_flush_buffered_writes(); - } - - ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); - -#ifdef UNIV_DEBUG - if (buf_debug_prints && page_count > 0) { - fprintf(stderr, flush_type == BUF_FLUSH_LRU - ? "Flushed %lu pages in LRU flush\n" - : "Flushed %lu pages in flush list flush\n", - (ulong) page_count); - } -#endif /* UNIV_DEBUG */ - - srv_stats.buf_pool_flushed.add(page_count); -} - -/******************************************************************//** -Start a buffer flush batch for LRU or flush list */ -ibool -buf_flush_start( -/*============*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ -{ - mutex_enter(&buf_pool->flush_state_mutex); - - if (buf_pool->n_flush[flush_type] > 0 - || buf_pool->init_flush[flush_type] == TRUE) { - - /* There is already a flush batch of the same type running */ - -#ifdef UNIV_PAGECOMPRESS_DEBUG - fprintf(stderr, "Error: flush_type %d n_flush %lu init_flush %lu\n", - flush_type, buf_pool->n_flush[flush_type], buf_pool->init_flush[flush_type]); -#endif - - mutex_exit(&buf_pool->flush_state_mutex); - - return(FALSE); - } - - buf_pool->init_flush[flush_type] = TRUE; - - mutex_exit(&buf_pool->flush_state_mutex); - - return(TRUE); -} - -/******************************************************************//** -End a buffer flush batch for LRU or flush list */ -void -buf_flush_end( -/*==========*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ -{ - mutex_enter(&buf_pool->flush_state_mutex); - - buf_pool->init_flush[flush_type] = FALSE; - - buf_pool->try_LRU_scan = TRUE; - - if (buf_pool->n_flush[flush_type] == 0) { - - /* The running flush batch has ended */ - - os_event_set(buf_pool->no_flush[flush_type]); - } - - mutex_exit(&buf_pool->flush_state_mutex); -} - -/******************************************************************//** -Waits until a flush batch of the given type ends */ -UNIV_INTERN -void -buf_flush_wait_batch_end( -/*=====================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - buf_flush_t type) /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ -{ - ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST); - - if (buf_pool == NULL) { - ulint i; - - for (i = 0; i < srv_buf_pool_instances; ++i) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - thd_wait_begin(NULL, THD_WAIT_DISKIO); - os_event_wait(buf_pool->no_flush[type]); - thd_wait_end(NULL); - } - } else { - thd_wait_begin(NULL, THD_WAIT_DISKIO); - os_event_wait(buf_pool->no_flush[type]); - thd_wait_end(NULL); - } -} - -/* JAN: TODO: */ - -void buf_pool_enter_LRU_mutex( - buf_pool_t* buf_pool) -{ - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - mutex_enter(&buf_pool->LRU_list_mutex); -} - -void buf_pool_exit_LRU_mutex( - buf_pool_t* buf_pool) -{ - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - mutex_exit(&buf_pool->LRU_list_mutex); -} - -/* JAN: TODO: END: */ - -/*******************************************************************//** -This utility flushes dirty blocks from the end of the LRU list and also -puts replaceable clean pages from the end of the LRU list to the free -list. -NOTE: The calling thread is not allowed to own any latches on pages! -@return true if a batch was queued successfully. false if another batch -of same type was already running. */ -MY_ATTRIBUTE((nonnull)) -static -bool -buf_flush_LRU( -/*==========*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - ulint min_n, /*!< in: wished minimum mumber of blocks - flushed (it is not guaranteed that the - actual number is that big, though) */ - bool limited_scan, /*!< in: if true, allow to scan - only up to srv_LRU_scan_depth - pages in total */ - flush_counters_t *n) /*!< out: flushed/evicted page - counts */ -{ - if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) { - n->flushed = 0; - n->evicted = 0; - n->unzip_LRU_evicted = 0; - return(false); - } - - buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0, limited_scan, n); - - buf_flush_end(buf_pool, BUF_FLUSH_LRU); - - buf_flush_common(BUF_FLUSH_LRU, n->flushed); - - return(true); -} - -/*******************************************************************//** -This utility flushes dirty blocks from the end of the flush list of -all buffer pool instances. -NOTE: The calling thread is not allowed to own any latches on pages! -@return true if a batch was queued successfully for each buffer pool -instance. false if another batch of same type was already running in -at least one of the buffer pool instance */ -UNIV_INTERN -bool -buf_flush_list( -/*===========*/ - ulint min_n, /*!< in: wished minimum mumber of blocks - flushed (it is not guaranteed that the - actual number is that big, though) */ - lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all - blocks whose oldest_modification is - smaller than this should be flushed - (if their number does not exceed - min_n), otherwise ignored */ - ulint* n_processed) /*!< out: the number of pages - which were processed is passed - back to caller. Ignored if NULL */ - -{ - ulint i; - - ulint requested_pages[MAX_BUFFER_POOLS]; - bool active_instance[MAX_BUFFER_POOLS]; - ulint remaining_instances = srv_buf_pool_instances; - bool timeout = false; - ulint flush_start_time = 0; - - if (buf_mtflu_init_done()) { - return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed)); - } - - for (i = 0; i < srv_buf_pool_instances; i++) { - requested_pages[i] = 0; - active_instance[i] = true; - } - - if (n_processed) { - *n_processed = 0; - } - - if (min_n != ULINT_MAX) { - /* Ensure that flushing is spread evenly amongst the - buffer pool instances. When min_n is ULINT_MAX - we need to flush everything up to the lsn limit - so no limit here. */ - min_n = (min_n + srv_buf_pool_instances - 1) - / srv_buf_pool_instances; - if (lsn_limit != LSN_MAX) { - flush_start_time = ut_time_ms(); - } - } - - /* Flush to lsn_limit in all buffer pool instances */ - while (remaining_instances && !timeout) { - - ulint flush_common_batch = 0; - - for (i = 0; i < srv_buf_pool_instances; i++) { - - if (flush_start_time - && (ut_time_ms() - flush_start_time - >= srv_cleaner_max_flush_time)) { - - timeout = true; - break; - } - - if (active_instance[i]) { - - buf_pool_t* buf_pool; - ulint chunk_size; - flush_counters_t n; - - chunk_size = ut_min( - srv_cleaner_flush_chunk_size, - min_n - requested_pages[i]); - - buf_pool = buf_pool_from_array(i); - - if (!buf_flush_start(buf_pool, - BUF_FLUSH_LIST)) { - - continue; - } - - buf_flush_batch(buf_pool, BUF_FLUSH_LIST, - chunk_size, lsn_limit, false, - &n); - - buf_flush_end(buf_pool, BUF_FLUSH_LIST); - - flush_common_batch += n.flushed; - - if (n_processed) { - *n_processed += n.flushed; - } - - requested_pages[i] += chunk_size; - - if (requested_pages[i] >= min_n - || !n.flushed) { - - active_instance[i] = false; - remaining_instances--; - } - - if (n.flushed) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_BATCH_TOTAL_PAGE, - MONITOR_FLUSH_BATCH_COUNT, - MONITOR_FLUSH_BATCH_PAGES, - n.flushed); - } - } - } - - buf_flush_common(BUF_FLUSH_LIST, flush_common_batch); - } - - /* If we haven't flushed all the instances due to timeout or a repeat - failure to start a flush, return failure */ - for (i = 0; i < srv_buf_pool_instances; i++) { - if (active_instance[i]) { - return(false); - } - } - - return(true); -} - -/******************************************************************//** -This function picks up a single dirty page from the tail of the LRU -list, flushes it, removes it from page_hash and LRU list and puts -it on the free list. It is called from user threads when they are -unable to find a replaceable page at the tail of the LRU list i.e.: -when the background LRU flushing in the page_cleaner thread is not -fast enough to keep pace with the workload. -@return TRUE if success. */ -UNIV_INTERN -ibool -buf_flush_single_page_from_LRU( -/*===========================*/ - buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */ -{ - ulint scanned; - buf_page_t* bpage; - ibool flushed = FALSE; - - mutex_enter(&buf_pool->LRU_list_mutex); - - for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), scanned = 1; - bpage != NULL; - bpage = UT_LIST_GET_PREV(LRU, bpage), ++scanned) { - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); - - if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) { - - /* The following call will release the LRU list - and block mutex. */ - - flushed = buf_flush_page(buf_pool, bpage, - BUF_FLUSH_SINGLE_PAGE, true); - - if (flushed) { - /* buf_flush_page() will release the - block mutex */ - break; - } - } - - mutex_exit(block_mutex); - } - - if (!flushed) - mutex_exit(&buf_pool->LRU_list_mutex); - - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_SINGLE_FLUSH_SCANNED, - MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL, - MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL, - scanned); - - if (bpage == NULL) { - /* Can't find a single flushable page. */ - return(FALSE); - } - - - ibool freed = FALSE; - - /* At this point the page has been written to the disk. - As we are not holding LRU list or buf_page_get_mutex() mutex therefore - we cannot use the bpage safely. It may have been plucked out - of the LRU list by some other thread or it may even have - relocated in case of a compressed page. We need to start - the scan of LRU list again to remove the block from the LRU - list and put it on the free list. */ - mutex_enter(&buf_pool->LRU_list_mutex); - - for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); - bpage != NULL; - bpage = UT_LIST_GET_PREV(LRU, bpage)) { - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); - - ibool ready = buf_flush_ready_for_replace(bpage); - - if (ready) { - bool evict_zip; - - evict_zip = !buf_LRU_evict_from_unzip_LRU(buf_pool); - - freed = buf_LRU_free_page(bpage, evict_zip); - - mutex_exit(block_mutex); - - break; - } - - mutex_exit(block_mutex); - - } - - if (!freed) - mutex_exit(&buf_pool->LRU_list_mutex); - - return(freed); -} - -/*********************************************************************//** -Clears up tail of the LRU lists: -* Put replaceable pages at the tail of LRU to the free list -* Flush dirty pages at the tail of LRU to the disk -The depth to which we scan each buffer pool is controlled by dynamic -config parameter innodb_LRU_scan_depth. -@return number of flushed and evicted pages */ -UNIV_INTERN -ulint -buf_flush_LRU_tail(void) -/*====================*/ -{ - ulint total_flushed = 0; - ulint total_evicted = 0; - ulint start_time = ut_time_ms(); - ulint scan_depth[MAX_BUFFER_POOLS]; - ulint requested_pages[MAX_BUFFER_POOLS]; - bool active_instance[MAX_BUFFER_POOLS]; - bool limited_scan[MAX_BUFFER_POOLS]; - ulint previous_evicted[MAX_BUFFER_POOLS]; - ulint remaining_instances = srv_buf_pool_instances; - ulint lru_chunk_size = srv_cleaner_lru_chunk_size; - ulint free_list_lwm = srv_LRU_scan_depth / 100 - * srv_cleaner_free_list_lwm; - - if(buf_mtflu_init_done()) - { - return(buf_mtflu_flush_LRU_tail()); - } - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - - const buf_pool_t* buf_pool = buf_pool_from_array(i); - - scan_depth[i] = ut_min(srv_LRU_scan_depth, - UT_LIST_GET_LEN(buf_pool->LRU)); - requested_pages[i] = 0; - active_instance[i] = true; - limited_scan[i] = true; - previous_evicted[i] = 0; - } - - while (remaining_instances) { - - if (ut_time_ms() - start_time >= srv_cleaner_max_lru_time) { - - break; - } - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - - if (!active_instance[i]) { - continue; - } - - ulint free_len = free_list_lwm; - buf_pool_t* buf_pool = buf_pool_from_array(i); - - do { - flush_counters_t n; - - ut_ad(requested_pages[i] <= scan_depth[i]); - - /* Currently page_cleaner is the only thread - that can trigger an LRU flush. It is possible - that a batch triggered during last iteration is - still running, */ - if (buf_flush_LRU(buf_pool, lru_chunk_size, - limited_scan[i], &n)) { - - /* Allowed only one batch per - buffer pool instance. */ - buf_flush_wait_batch_end( - buf_pool, BUF_FLUSH_LRU); - } - - total_flushed += n.flushed; - - /* When we evict less pages than we did on a - previous try we relax the LRU scan limit in - order to attempt to evict more */ - limited_scan[i] - = (previous_evicted[i] > n.evicted); - previous_evicted[i] = n.evicted; - total_evicted += n.evicted; - - requested_pages[i] += lru_chunk_size; - - /* If we failed to flush or evict this - instance, do not bother anymore. But take into - account that we might have zero flushed pages - because the flushing request was fully - satisfied by unzip_LRU evictions. */ - if (requested_pages[i] >= scan_depth[i] - || !(srv_cleaner_eviction_factor - ? n.evicted - : (n.flushed + n.unzip_LRU_evicted))) { - - active_instance[i] = false; - remaining_instances--; - } else { - - free_len = UT_LIST_GET_LEN( - buf_pool->free); - } - if (n.flushed) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, - MONITOR_LRU_BATCH_FLUSH_COUNT, - MONITOR_LRU_BATCH_FLUSH_PAGES, - n.flushed); - } - - if (n.evicted) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, - MONITOR_LRU_BATCH_EVICT_COUNT, - MONITOR_LRU_BATCH_EVICT_PAGES, - n.evicted); - } - } while (active_instance[i] - && free_len <= free_list_lwm); - } - } - - return(total_flushed + total_evicted); -} - -/*********************************************************************//** -Wait for any possible LRU flushes that are in progress to end. */ -UNIV_INTERN -void -buf_flush_wait_LRU_batch_end(void) -/*==============================*/ -{ - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - mutex_enter(&buf_pool->flush_state_mutex); - - if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0 - || buf_pool->init_flush[BUF_FLUSH_LRU]) { - - mutex_exit(&buf_pool->flush_state_mutex); - buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU); - } else { - mutex_exit(&buf_pool->flush_state_mutex); - } - } -} - -/*********************************************************************//** -Flush a batch of dirty pages from the flush list -@return number of pages flushed, 0 if no page is flushed or if another -flush_list type batch is running */ -static -ulint -page_cleaner_do_flush_batch( -/*========================*/ - ulint n_to_flush, /*!< in: number of pages that - we should attempt to flush. */ - lsn_t lsn_limit) /*!< in: LSN up to which flushing - must happen */ -{ - ulint n_flushed; - - buf_flush_list(n_to_flush, lsn_limit, &n_flushed); - - return(n_flushed); -} - -/*********************************************************************//** -Calculates if flushing is required based on number of dirty pages in -the buffer pool. -@return percent of io_capacity to flush to manage dirty page ratio */ -static -ulint -af_get_pct_for_dirty() -/*==================*/ -{ - ulint dirty_pct = (ulint) buf_get_modified_ratio_pct(); - - if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) { - return(100); - } - - ut_a(srv_max_dirty_pages_pct_lwm - <= srv_max_buf_pool_modified_pct); - - if (srv_max_dirty_pages_pct_lwm == 0) { - /* The user has not set the option to preflush dirty - pages as we approach the high water mark. */ - if (dirty_pct > srv_max_buf_pool_modified_pct) { - /* We have crossed the high water mark of dirty - pages In this case we start flushing at 100% of - innodb_io_capacity. */ - return(100); - } - } else if (dirty_pct > srv_max_dirty_pages_pct_lwm) { - /* We should start flushing pages gradually. */ - return (ulint) ((dirty_pct * 100) - / (srv_max_buf_pool_modified_pct + 1)); - } - - return(0); -} - -/*********************************************************************//** -Calculates if flushing is required based on redo generation rate. -@return percent of io_capacity to flush to manage redo space */ -static -ulint -af_get_pct_for_lsn( -/*===============*/ - lsn_t age) /*!< in: current age of LSN. */ -{ - lsn_t max_async_age; - lsn_t lsn_age_factor; - lsn_t af_lwm = (lsn_t) ((srv_adaptive_flushing_lwm - * log_get_capacity()) / 100); - - if (age < af_lwm) { - /* No adaptive flushing. */ - return(0); - } - - max_async_age = log_get_max_modified_age_async(); - - if (age < max_async_age && !srv_adaptive_flushing) { - /* We have still not reached the max_async point and - the user has disabled adaptive flushing. */ - return(0); - } - - /* If we are here then we know that either: - 1) User has enabled adaptive flushing - 2) User may have disabled adaptive flushing but we have reached - max_async_age. */ - lsn_age_factor = (age * 100) / max_async_age; - - ut_ad(srv_max_io_capacity >= srv_io_capacity); - switch ((srv_cleaner_lsn_age_factor_t)srv_cleaner_lsn_age_factor) { - case SRV_CLEANER_LSN_AGE_FACTOR_LEGACY: - return(static_cast<ulint>( - ((srv_max_io_capacity / srv_io_capacity) - * (lsn_age_factor - * sqrt((double)lsn_age_factor))) - / 7.5)); - case SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT: - return(static_cast<ulint>( - ((srv_max_io_capacity / srv_io_capacity) - * (lsn_age_factor * lsn_age_factor - * sqrt((double)lsn_age_factor))) - / 700.5)); - default: - ut_error; - } -} - -/*********************************************************************//** -This function is called approximately once every second by the -page_cleaner thread. Based on various factors it decides if there is a -need to do flushing. If flushing is needed it is performed and the -number of pages flushed is returned. -@return number of pages flushed */ -static -ulint -page_cleaner_flush_pages_if_needed(void) -/*====================================*/ -{ - static lsn_t lsn_avg_rate = 0; - static lsn_t prev_lsn = 0; - static lsn_t last_lsn = 0; - static ulint sum_pages = 0; - static ulint last_pages = 0; - static ulint prev_pages = 0; - static ulint avg_page_rate = 0; - static ulint n_iterations = 0; - lsn_t oldest_lsn; - lsn_t cur_lsn; - lsn_t age; - lsn_t lsn_rate; - ulint n_pages = 0; - ulint pct_for_dirty = 0; - ulint pct_for_lsn = 0; - ulint pct_total = 0; - int age_factor = 0; - - cur_lsn = log_get_lsn_nowait(); - - /* log_get_lsn_nowait tries to get log_sys->mutex with - mutex_enter_nowait, if this does not succeed function - returns 0, do not use that value to update stats. */ - if (cur_lsn == 0) { - return(0); - } - - if (prev_lsn == 0) { - /* First time around. */ - prev_lsn = cur_lsn; - return(0); - } - - if (prev_lsn == cur_lsn) { - return(0); - } - - /* We update our variables every srv_flushing_avg_loops - iterations to smooth out transition in workload. */ - if (++n_iterations >= srv_flushing_avg_loops) { - - avg_page_rate = ((sum_pages / srv_flushing_avg_loops) - + avg_page_rate) / 2; - - /* How much LSN we have generated since last call. */ - lsn_rate = (cur_lsn - prev_lsn) / srv_flushing_avg_loops; - - lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2; - - prev_lsn = cur_lsn; - - n_iterations = 0; - - sum_pages = 0; - } - - oldest_lsn = buf_pool_get_oldest_modification(); - - ut_ad(oldest_lsn <= log_get_lsn()); - - age = cur_lsn > oldest_lsn ? cur_lsn - oldest_lsn : 0; - - pct_for_dirty = af_get_pct_for_dirty(); - pct_for_lsn = af_get_pct_for_lsn(age); - - pct_total = ut_max(pct_for_dirty, pct_for_lsn); - - /* Cap the maximum IO capacity that we are going to use by - max_io_capacity. */ - n_pages = PCT_IO(pct_total); - if (age < log_get_max_modified_age_async()) - n_pages = (n_pages + avg_page_rate) / 2; - - if (n_pages > srv_max_io_capacity) { - n_pages = srv_max_io_capacity; - } - - if (last_pages && cur_lsn - last_lsn > lsn_avg_rate / 2) { - age_factor = static_cast<int>(prev_pages / last_pages); - } - - MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages); - - prev_pages = n_pages; - n_pages = page_cleaner_do_flush_batch( - n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1)); - - last_lsn= cur_lsn; - last_pages= n_pages + 1; - - MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate); - MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate); - MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty); - MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn); - - if (n_pages) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, - MONITOR_FLUSH_ADAPTIVE_COUNT, - MONITOR_FLUSH_ADAPTIVE_PAGES, - n_pages); - - sum_pages += n_pages; - } - - return(n_pages); -} - -/*********************************************************************//** -Puts the page_cleaner thread to sleep if it has finished work in less -than a second */ -static -void -page_cleaner_sleep_if_needed( -/*=========================*/ - ulint next_loop_time) /*!< in: time when next loop iteration - should start */ -{ - /* No sleep if we are cleaning the buffer pool during the shutdown - with everything else finished */ - if (srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE) - return; - - ulint cur_time = ut_time_ms(); - - if (next_loop_time > cur_time) { - /* Get sleep interval in micro seconds. We use - ut_min() to avoid long sleep in case of - wrap around. */ - os_thread_sleep(ut_min(1000000, - (next_loop_time - cur_time) - * 1000)); - } -} - -/*********************************************************************//** -Returns the aggregate free list length over all buffer pool instances. -@return total free list length. */ -MY_ATTRIBUTE((warn_unused_result)) -static -ulint -buf_get_total_free_list_length(void) -/*================================*/ -{ - ulint result = 0; - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - - result += UT_LIST_GET_LEN(buf_pool_from_array(i)->free); - } - - return result; -} - -/** Returns the aggregate LRU list length over all buffer pool instances. -@return total LRU list length. */ -MY_ATTRIBUTE((warn_unused_result)) -static -ulint -buf_get_total_LRU_list_length(void) -{ - ulint result = 0; - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - - result += UT_LIST_GET_LEN(buf_pool_from_array(i)->LRU); - } - - return result; -} - -/*********************************************************************//** -Adjust the desired page cleaner thread sleep time for LRU flushes. */ -MY_ATTRIBUTE((nonnull)) -static -void -page_cleaner_adapt_lru_sleep_time( -/*==============================*/ - ulint* lru_sleep_time, /*!< in/out: desired page cleaner thread sleep - time for LRU flushes */ - ulint lru_n_flushed) /*!< in: number of flushed in previous batch */ - -{ - ulint free_len = buf_get_total_free_list_length(); - ulint max_free_len = ut_min(buf_get_total_LRU_list_length(), - srv_LRU_scan_depth * srv_buf_pool_instances); - - if (free_len < max_free_len / 100 && lru_n_flushed) { - - /* Free lists filled less than 1% - and iteration was able to flush, no sleep */ - *lru_sleep_time = 0; - } else if (free_len > max_free_len / 5 - || (free_len < max_free_len / 100 && lru_n_flushed == 0)) { - - /* Free lists filled more than 20% - or no pages flushed in previous batch, sleep a bit more */ - *lru_sleep_time += 1; - if (*lru_sleep_time > srv_cleaner_max_lru_time) - *lru_sleep_time = srv_cleaner_max_lru_time; - } else if (free_len < max_free_len / 20 && *lru_sleep_time >= 50) { - - /* Free lists filled less than 5%, sleep a bit less */ - *lru_sleep_time -= 50; - } else { - - /* Free lists filled between 5% and 20%, no change */ - } -} - -/*********************************************************************//** -Get the desired page cleaner thread sleep time for flush list flushes. -@return desired sleep time */ -MY_ATTRIBUTE((warn_unused_result)) -static -ulint -page_cleaner_adapt_flush_sleep_time(void) -/*=====================================*/ -{ - lsn_t age = log_get_lsn() - log_sys->last_checkpoint_lsn; - - if (age > log_sys->max_modified_age_sync) { - - /* No sleep if in sync preflush zone */ - return(0); - } - - /* In all other cases flush list factors do not influence the page - cleaner sleep time */ - return(srv_cleaner_max_flush_time); -} - -/******************************************************************//** -page_cleaner thread tasked with flushing dirty pages from the buffer -pool flush lists. As of now we'll have only one instance of this thread. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(buf_flush_page_cleaner_thread)( -/*==========================================*/ - void* arg MY_ATTRIBUTE((unused))) - /*!< in: a dummy parameter required by - os_thread_create */ -{ - my_thread_init(); - ulint next_loop_time = ut_time_ms() + 1000; - ulint n_flushed = 0; - ulint last_activity = srv_get_activity_count(); - ulint last_activity_time = ut_time_ms(); - - ut_ad(!srv_read_only_mode); - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(buf_page_cleaner_thread_key); -#endif /* UNIV_PFS_THREAD */ - - srv_cleaner_tid = os_thread_get_tid(); - - os_thread_set_priority(srv_cleaner_tid, srv_sched_priority_cleaner); - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "InnoDB: page_cleaner thread running, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - - while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { - - ulint page_cleaner_sleep_time; - ibool server_active; - - srv_current_thread_priority = srv_cleaner_thread_priority; - - page_cleaner_sleep_if_needed(next_loop_time); - - page_cleaner_sleep_time - = page_cleaner_adapt_flush_sleep_time(); - - next_loop_time = ut_time_ms() + page_cleaner_sleep_time; - - server_active = srv_check_activity(last_activity); - - if (server_active - || ut_time_ms() - last_activity_time < 1000) { - - if (server_active) { - - last_activity = srv_get_activity_count(); - last_activity_time = ut_time_ms(); - } - - /* Flush pages from flush_list if required */ - page_cleaner_flush_pages_if_needed(); - } else if (srv_idle_flush_pct) { - n_flushed = page_cleaner_do_flush_batch( - PCT_IO(100), - LSN_MAX); - - if (n_flushed) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, - MONITOR_FLUSH_BACKGROUND_COUNT, - MONITOR_FLUSH_BACKGROUND_PAGES, - n_flushed); - } - } - - /* Flush pages from end of LRU if required */ - buf_flush_LRU_tail(); - } - - ut_ad(srv_shutdown_state > 0); - if (srv_fast_shutdown == 2) { - /* In very fast shutdown we simulate a crash of - buffer pool. We are not required to do any flushing */ - goto thread_exit; - } - - /* In case of normal and slow shutdown the page_cleaner thread - must wait for all other activity in the server to die down. - Note that we can start flushing the buffer pool as soon as the - server enters shutdown phase but we must stay alive long enough - to ensure that any work done by the master or purge threads is - also flushed. - During shutdown we pass through two stages. In the first stage, - when SRV_SHUTDOWN_CLEANUP is set other threads like the master - and the purge threads may be working as well. We start flushing - the buffer pool but can't be sure that no new pages are being - dirtied until we enter SRV_SHUTDOWN_FLUSH_PHASE phase. */ - - do { - n_flushed = page_cleaner_do_flush_batch(PCT_IO(100), LSN_MAX); - - /* We sleep only if there are no pages to flush */ - if (n_flushed == 0) { - os_thread_sleep(100000); - } - } while (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); - - /* At this point all threads including the master and the purge - thread must have been suspended. */ - ut_a(srv_get_active_thread_type() == SRV_NONE); - ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE); - - /* We can now make a final sweep on flushing the buffer pool - and exit after we have cleaned the whole buffer pool. - It is important that we wait for any running batch that has - been triggered by us to finish. Otherwise we can end up - considering end of that batch as a finish of our final - sweep and we'll come out of the loop leaving behind dirty pages - in the flush_list */ - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - buf_flush_wait_LRU_batch_end(); - - bool success; - - do { - - success = buf_flush_list(PCT_IO(100), LSN_MAX, &n_flushed); - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - - } while (!success || n_flushed > 0 || (IS_XTRABACKUP() && buf_get_n_pending_read_ios() > 0)); - - /* Some sanity checks */ - ut_a(srv_get_active_thread_type() == SRV_NONE); - ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE); - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool = buf_pool_from_array(i); - ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == 0); - } - - /* We have lived our life. Time to die. */ - -thread_exit: - buf_page_cleaner_is_active = false; - - my_thread_end(); - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/******************************************************************//** -lru_manager thread tasked with performing LRU flushes and evictions to refill -the buffer pool free lists. As of now we'll have only one instance of this -thread. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(buf_flush_lru_manager_thread)( -/*==========================================*/ - void* arg MY_ATTRIBUTE((unused))) - /*!< in: a dummy parameter required by - os_thread_create */ -{ - ulint next_loop_time = ut_time_ms() + 1000; - ulint lru_sleep_time = srv_cleaner_max_lru_time; - ulint lru_n_flushed = 1; - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(buf_lru_manager_thread_key); -#endif /* UNIV_PFS_THREAD */ - - srv_lru_manager_tid = os_thread_get_tid(); - - os_thread_set_priority(srv_lru_manager_tid, - srv_sched_priority_cleaner); - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "InnoDB: lru_manager thread running, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - - /* On server shutdown, the LRU manager thread runs through cleanup - phase to provide free pages for the master and purge threads. */ - while (srv_shutdown_state == SRV_SHUTDOWN_NONE - || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP) { - - srv_current_thread_priority = srv_cleaner_thread_priority; - - page_cleaner_sleep_if_needed(next_loop_time); - - page_cleaner_adapt_lru_sleep_time(&lru_sleep_time, lru_n_flushed); - - next_loop_time = ut_time_ms() + lru_sleep_time; - - lru_n_flushed = buf_flush_LRU_tail(); - } - - buf_lru_manager_is_active = false; - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - -/** Functor to validate the flush list. */ -struct Check { - void operator()(const buf_page_t* elem) - { - ut_a(elem->in_flush_list); - } -}; - -/******************************************************************//** -Validates the flush list. -@return TRUE if ok */ -static -ibool -buf_flush_validate_low( -/*===================*/ - buf_pool_t* buf_pool) /*!< in: Buffer pool instance */ -{ - buf_page_t* bpage; - const ib_rbt_node_t* rnode = NULL; - - ut_ad(buf_flush_list_mutex_own(buf_pool)); - - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list, Check()); - - bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); - - /* If we are in recovery mode i.e.: flush_rbt != NULL - then each block in the flush_list must also be present - in the flush_rbt. */ - if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { - rnode = rbt_first(buf_pool->flush_rbt); - } - - while (bpage != NULL) { - const lsn_t om = bpage->oldest_modification; - - ut_ad(buf_pool_from_bpage(bpage) == buf_pool); - - ut_ad(bpage->in_flush_list); - - /* A page in buf_pool->flush_list can be in - BUF_BLOCK_REMOVE_HASH state. This happens when a page - is in the middle of being relocated. In that case the - original descriptor can have this state and still be - in the flush list waiting to acquire the - buf_pool->flush_list_mutex to complete the relocation. */ - ut_a(buf_page_in_file(bpage) - || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH); - ut_a(om > 0); - - if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) { - buf_page_t** prpage; - - ut_a(rnode); - prpage = rbt_value(buf_page_t*, rnode); - - ut_a(*prpage); - ut_a(*prpage == bpage); - rnode = rbt_next(buf_pool->flush_rbt, rnode); - } - - bpage = UT_LIST_GET_NEXT(list, bpage); - - ut_a(!bpage || om >= bpage->oldest_modification); - } - - /* By this time we must have exhausted the traversal of - flush_rbt (if active) as well. */ - ut_a(rnode == NULL); - - return(TRUE); -} - -/******************************************************************//** -Validates the flush list. -@return TRUE if ok */ -UNIV_INTERN -ibool -buf_flush_validate( -/*===============*/ - buf_pool_t* buf_pool) /*!< buffer pool instance */ -{ - ibool ret; - - buf_flush_list_mutex_enter(buf_pool); - - ret = buf_flush_validate_low(buf_pool); - - buf_flush_list_mutex_exit(buf_pool); - - return(ret); -} -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -/******************************************************************//** -Check if there are any dirty pages that belong to a space id in the flush -list in a particular buffer pool. -@return number of dirty pages present in a single buffer pool */ -UNIV_INTERN -ulint -buf_pool_get_dirty_pages_count( -/*===========================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool */ - ulint id) /*!< in: space id to check */ - -{ - ulint count = 0; - - buf_flush_list_mutex_enter(buf_pool); - - buf_page_t* bpage; - - for (bpage = UT_LIST_GET_FIRST(buf_pool->flush_list); - bpage != 0; - bpage = UT_LIST_GET_NEXT(list, bpage)) { - - ut_ad(buf_page_in_file(bpage) - || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH); - ut_ad(bpage->in_flush_list); - ut_ad(bpage->oldest_modification > 0); - - if (bpage->space == id) { - ++count; - } - } - - buf_flush_list_mutex_exit(buf_pool); - - return(count); -} - -/******************************************************************//** -Check if there are any dirty pages that belong to a space id in the flush list. -@return number of dirty pages present in all the buffer pools */ -UNIV_INTERN -ulint -buf_flush_get_dirty_pages_count( -/*============================*/ - ulint id) /*!< in: space id to check */ - -{ - ulint count = 0; - - for (ulint i = 0; i < srv_buf_pool_instances; ++i) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - count += buf_pool_get_dirty_pages_count(buf_pool, id); - } - - return(count); -} -#endif /* UNIV_DEBUG */ diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc deleted file mode 100644 index d979eb44a96..00000000000 --- a/storage/xtradb/buf/buf0lru.cc +++ /dev/null @@ -1,3016 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0lru.cc -The database buffer replacement algorithm - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0lru.h" - -#ifndef UNIV_HOTBACKUP -#ifdef UNIV_NONINL -#include "buf0lru.ic" -#endif - -#include "ut0byte.h" -#include "ut0lst.h" -#include "ut0rnd.h" -#include "sync0sync.h" -#include "sync0rw.h" -#include "hash0hash.h" -#include "os0sync.h" -#include "fil0fil.h" -#include "btr0btr.h" -#include "buf0buddy.h" -#include "buf0buf.h" -#include "buf0dblwr.h" -#include "buf0flu.h" -#include "buf0rea.h" -#include "btr0sea.h" -#include "ibuf0ibuf.h" -#include "os0file.h" -#include "page0zip.h" -#include "log0recv.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "srv0mon.h" -#include "lock0lock.h" - -#include "ha_prototypes.h" - -/** The number of blocks from the LRU_old pointer onward, including -the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV -of the whole LRU list length, except that the tolerance defined below -is allowed. Note that the tolerance must be small enough such that for -even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not -allowed to point to either end of the LRU list. */ - -#define BUF_LRU_OLD_TOLERANCE 20 - -/** The minimum amount of non-old blocks when the LRU_old list exists -(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks). -@see buf_LRU_old_adjust_len */ -#define BUF_LRU_NON_OLD_MIN_LEN 5 -#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN -# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN" -#endif - -/** When dropping the search hash index entries before deleting an ibd -file, we build a local array of pages belonging to that tablespace -in the buffer pool. Following is the size of that array. -We also release buf_pool->LRU_list_mutex after scanning this many pages of the -flush_list when dropping a table. This is to ensure that other threads -are not blocked for extended period of time when using very large -buffer pools. */ -#define BUF_LRU_DROP_SEARCH_SIZE 1024 - -/** If we switch on the InnoDB monitor because there are too few available -frames in the buffer pool, we set this to TRUE */ -static ibool buf_lru_switched_on_innodb_mon = FALSE; - -/******************************************************************//** -These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O -and page_zip_decompress() operations. Based on the statistics, -buf_LRU_evict_from_unzip_LRU() decides if we want to evict from -unzip_LRU or the regular LRU. From unzip_LRU, we will only evict the -uncompressed frame (meaning we can evict dirty blocks as well). From -the regular LRU, we will evict the entire block (i.e.: both the -uncompressed and compressed data), which must be clean. */ - -/* @{ */ - -/** Number of intervals for which we keep the history of these stats. -Each interval is 1 second, defined by the rate at which -srv_error_monitor_thread() calls buf_LRU_stat_update(). */ -#define BUF_LRU_STAT_N_INTERVAL 50 - -/** Co-efficient with which we multiply I/O operations to equate them -with page_zip_decompress() operations. */ -#define BUF_LRU_IO_TO_UNZIP_FACTOR 50 - -/** Sampled values buf_LRU_stat_cur. -Not protected by any mutex. Updated by buf_LRU_stat_update(). */ -static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL]; - -/** Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */ -static ulint buf_LRU_stat_arr_ind; - -/** Current operation counters. Not protected by any mutex. Cleared -by buf_LRU_stat_update(). */ -UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_cur; - -/** Running sum of past values of buf_LRU_stat_cur. -Updated by buf_LRU_stat_update(). Not Protected by any mutex. */ -UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum; - -/* @} */ - -/** @name Heuristics for detecting index scan @{ */ -/** Move blocks to "new" LRU list only if the first access was at -least this many milliseconds ago. Not protected by any mutex or latch. */ -UNIV_INTERN uint buf_LRU_old_threshold_ms; -/* @} */ - -/******************************************************************//** -Takes a block out of the LRU list and page hash table. -If the block is compressed-only (BUF_BLOCK_ZIP_PAGE), -the object will be freed. - -The caller must hold buf_pool->LRU_list_mutex, the buf_page_get_mutex() mutex -and the appropriate hash_lock. This function will release the -buf_page_get_mutex() and the hash_lock. - -If a compressed page is freed other compressed pages may be relocated. -@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The -caller needs to free the page to the free list -@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In -this case the block is already returned to the buddy allocator. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -buf_LRU_block_remove_hashed( -/*========================*/ - buf_page_t* bpage, /*!< in: block, must contain a file page and - be in a state where it can be freed; there - may or may not be a hash index to the page */ - bool zip); /*!< in: true if should remove also the - compressed page of an uncompressed page */ -/******************************************************************//** -Puts a file page whose has no hash index to the free list. */ -static -void -buf_LRU_block_free_hashed_page( -/*===========================*/ - buf_block_t* block); /*!< in: block, must contain a file page and - be in a state where it can be freed */ - -/******************************************************************//** -Increases LRU size in bytes with zip_size for compressed page, -UNIV_PAGE_SIZE for uncompressed page in inline function */ -static inline -void -incr_LRU_size_in_bytes( -/*===================*/ - buf_page_t* bpage, /*!< in: control block */ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ -{ - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ulint zip_size = page_zip_get_size(&bpage->zip); - buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE; - ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size); -} - -/******************************************************************//** -Determines if the unzip_LRU list should be used for evicting a victim -instead of the general LRU list. -@return TRUE if should use unzip_LRU */ -UNIV_INTERN -ibool -buf_LRU_evict_from_unzip_LRU( -/*=========================*/ - buf_pool_t* buf_pool) -{ - ulint io_avg; - ulint unzip_avg; - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - /* If the unzip_LRU list is empty, we can only use the LRU. */ - if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) { - return(FALSE); - } - - /* If unzip_LRU is at most 10% of the size of the LRU list, - then use the LRU. This slack allows us to keep hot - decompressed pages in the buffer pool. */ - if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) - <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) { - return(FALSE); - } - - /* If eviction hasn't started yet, we assume by default - that a workload is disk bound. */ - if (buf_pool->freed_page_clock == 0) { - return(TRUE); - } - - /* Calculate the average over past intervals, and add the values - of the current interval. */ - io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL - + buf_LRU_stat_cur.io; - unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL - + buf_LRU_stat_cur.unzip; - - /* Decide based on our formula. If the load is I/O bound - (unzip_avg is smaller than the weighted io_avg), evict an - uncompressed frame from unzip_LRU. Otherwise we assume that - the load is CPU bound and evict from the regular LRU. */ - return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR); -} - -/******************************************************************//** -Attempts to drop page hash index on a batch of pages belonging to a -particular space id. */ -static -void -buf_LRU_drop_page_hash_batch( -/*=========================*/ - ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - const ulint* arr, /*!< in: array of page_no */ - ulint count) /*!< in: number of entries in array */ -{ - ulint i; - - ut_ad(arr != NULL); - ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE); - - for (i = 0; i < count; ++i) { - btr_search_drop_page_hash_when_freed(space_id, zip_size, - arr[i]); - } -} - -/******************************************************************//** -When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page -hash index entries belonging to that table. This function tries to -do that in batch. Note that this is a 'best effort' attempt and does -not guarantee that ALL hash entries will be removed. */ -static -void -buf_LRU_drop_page_hash_for_tablespace( -/*==================================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint id) /*!< in: space id */ -{ - buf_page_t* bpage; - ulint* page_arr; - ulint num_entries; - ulint zip_size; - - zip_size = fil_space_get_zip_size(id); - - if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { - /* Somehow, the tablespace does not exist. Nothing to drop. */ - ut_ad(0); - return; - } - - page_arr = static_cast<ulint*>(ut_malloc( - sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE)); - - mutex_enter(&buf_pool->LRU_list_mutex); - num_entries = 0; - -scan_again: - bpage = UT_LIST_GET_LAST(buf_pool->LRU); - - while (bpage != NULL) { - buf_page_t* prev_bpage; - ibool is_fixed; - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - prev_bpage = UT_LIST_GET_PREV(LRU, bpage); - - ut_a(buf_page_in_file(bpage)); - - if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE - || bpage->space != id - || bpage->io_fix != BUF_IO_NONE) { - /* Compressed pages are never hashed. - Skip blocks of other tablespaces. - Skip I/O-fixed blocks (to be dealt with later). */ -next_page: - bpage = prev_bpage; - continue; - } - - mutex_enter(block_mutex); - is_fixed = bpage->buf_fix_count > 0 - || !((buf_block_t*) bpage)->index; - mutex_exit(block_mutex); - - if (is_fixed) { - goto next_page; - } - - /* Store the page number so that we can drop the hash - index in a batch later. */ - page_arr[num_entries] = bpage->offset; - ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE); - ++num_entries; - - if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) { - goto next_page; - } - - /* Array full. We release the buf_pool->LRU_list_mutex to obey - the latching order. */ - mutex_exit(&buf_pool->LRU_list_mutex); - - buf_LRU_drop_page_hash_batch( - id, zip_size, page_arr, num_entries); - - num_entries = 0; - - mutex_enter(&buf_pool->LRU_list_mutex); - - /* Note that we released the buf_pool->LRU_list_mutex above - after reading the prev_bpage during processing of a - page_hash_batch (i.e.: when the array was full). - Because prev_bpage could belong to a compressed-only - block, it may have been relocated, and thus the - pointer cannot be trusted. Because bpage is of type - buf_block_t, it is safe to dereference. - - bpage can change in the LRU list. This is OK because - this function is a 'best effort' to drop as many - search hash entries as possible and it does not - guarantee that ALL such entries will be dropped. */ - - /* If, however, bpage has been removed from LRU list - to the free list then we should restart the scan. */ - - if (bpage - && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { - goto scan_again; - } - } - - mutex_exit(&buf_pool->LRU_list_mutex); - - /* Drop any remaining batch of search hashed pages. */ - buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries); - ut_free(page_arr); -} - -/******************************************************************//** -While flushing (or removing dirty) pages from a tablespace we don't -want to hog the CPU and resources. Release the buffer pool and block -mutex and try to force a context switch. Then reacquire the same mutexes. -The current page is "fixed" before the release of the mutexes and then -"unfixed" again once we have reacquired the mutexes. */ -static MY_ATTRIBUTE((nonnull)) -void -buf_flush_yield( -/*============*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - buf_page_t* bpage) /*!< in/out: current page */ -{ - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(mutex_own(block_mutex)); - ut_ad(buf_page_in_file(bpage)); - - /* "Fix" the block so that the position cannot be - changed after we release the buffer pool and - block mutexes. */ - buf_page_set_sticky(bpage); - - /* Now it is safe to release the LRU list mutex */ - mutex_exit(&buf_pool->LRU_list_mutex); - - mutex_exit(block_mutex); - /* Try and force a context switch. */ - os_thread_yield(); - - mutex_enter(&buf_pool->LRU_list_mutex); - - mutex_enter(block_mutex); - /* "Unfix" the block now that we have both the - buffer pool and block mutex again. */ - buf_page_unset_sticky(bpage); - mutex_exit(block_mutex); -} - -/******************************************************************//** -If we have hogged the resources for too long then release the buffer -pool and flush list mutex and do a thread yield. Set the current page -to "sticky" so that it is not relocated during the yield. -@return true if yielded */ -static MY_ATTRIBUTE((nonnull(1), warn_unused_result)) -bool -buf_flush_try_yield( -/*================*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - buf_page_t* bpage, /*!< in/out: bpage to remove */ - ulint processed, /*!< in: number of pages processed */ - bool* must_restart) /*!< in/out: if true, we have to - restart the flush list scan */ -{ - /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the - loop we release buf_pool->LRU_list_mutex to let other threads - do their job but only if the block is not IO fixed. This - ensures that the block stays in its position in the - flush_list. */ - - if (bpage != NULL - && processed >= BUF_LRU_DROP_SEARCH_SIZE - && buf_page_get_io_fix_unlocked(bpage) == BUF_IO_NONE) { - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - buf_flush_list_mutex_exit(buf_pool); - - /* We don't have to worry about bpage becoming a dangling - pointer by a compressed page flush list relocation because - buf_page_get_gen() won't be called for pages from this - tablespace. */ - - mutex_enter(block_mutex); - /* Recheck the I/O fix and the flush list presence now that we - hold the right mutex */ - if (UNIV_UNLIKELY(buf_page_get_io_fix(bpage) != BUF_IO_NONE - || bpage->oldest_modification == 0)) { - - mutex_exit(block_mutex); - - *must_restart = true; - - buf_flush_list_mutex_enter(buf_pool); - - return false; - } - - *must_restart = false; - - /* Release the LRU list and buf_page_get_mutex() mutex - to give the other threads a go. */ - - buf_flush_yield(buf_pool, bpage); - - buf_flush_list_mutex_enter(buf_pool); - - /* Should not have been removed from the flush - list during the yield. However, this check is - not sufficient to catch a remove -> add. */ - - ut_ad(bpage->in_flush_list); - - return(true); - } - - return(false); -} - -/******************************************************************//** -Removes a single page from a given tablespace inside a specific -buffer pool instance. -@return true if page was removed. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -buf_flush_or_remove_page( -/*=====================*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - buf_page_t* bpage, /*!< in/out: bpage to remove */ - bool flush, /*!< in: flush to disk if true but - don't remove else remove without - flushing to disk */ - bool* must_restart) /*!< in/out: if true, must restart the - flush list scan */ -{ - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(buf_flush_list_mutex_own(buf_pool)); - - /* It is safe to check bpage->space and bpage->io_fix while holding - buf_pool->LRU_list_mutex only. */ - - if (UNIV_UNLIKELY(buf_page_get_io_fix_unlocked(bpage) - != BUF_IO_NONE)) { - - /* We cannot remove this page during this scan - yet; maybe the system is currently reading it - in, or flushing the modifications to the file */ - return(false); - } - - buf_flush_list_mutex_exit(buf_pool); - - /* We don't have to worry about bpage becoming a dangling - pointer by a compressed page flush list relocation because - buf_page_get_gen() won't be called for pages from this - tablespace. */ - bool processed; - - mutex_enter(block_mutex); - - /* Recheck the page I/O fix and the flush list presence now - that we hold the right mutex. */ - if (UNIV_UNLIKELY(buf_page_get_io_fix(bpage) != BUF_IO_NONE - || bpage->oldest_modification == 0)) { - - /* The page became I/O-fixed or is not on the flush - list anymore, this invalidates any flush-list-page - pointers we have. */ - - mutex_exit(block_mutex); - - *must_restart = true; - processed = false; - - } else if (!flush) { - - buf_flush_remove(bpage); - - mutex_exit(block_mutex); - - processed = true; - - } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) { - - if (buf_flush_page( - buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false)) { - - /* Wake possible simulated aio thread to actually - post the writes to the operating system */ - os_aio_simulated_wake_handler_threads(); - - mutex_enter(&buf_pool->LRU_list_mutex); - - processed = true; - - } else { - mutex_exit(block_mutex); - - processed = false; - } - - } else { - mutex_exit(block_mutex); - - processed = false; - } - - buf_flush_list_mutex_enter(buf_pool); - - ut_ad(!mutex_own(block_mutex)); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - return(processed); -} - -/******************************************************************//** -Remove all dirty pages belonging to a given tablespace inside a specific -buffer pool instance when we are deleting the data file(s) of that -tablespace. The pages still remain a part of LRU and are evicted from -the list as they age towards the tail of the LRU. -@retval DB_SUCCESS if all freed -@retval DB_FAIL if not all freed -@retval DB_INTERRUPTED if the transaction was interrupted */ -static MY_ATTRIBUTE((nonnull(1), warn_unused_result)) -dberr_t -buf_flush_or_remove_pages( -/*======================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint id, /*!< in: target space id for which - to remove or flush pages */ - bool flush, /*!< in: flush to disk if true but - don't remove else remove without - flushing to disk */ - const trx_t* trx) /*!< to check if the operation must - be interrupted, can be 0 */ -{ - buf_page_t* prev; - buf_page_t* bpage; - ulint processed = 0; - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - buf_flush_list_mutex_enter(buf_pool); - -rescan: - bool must_restart = false; - bool all_freed = true; - - for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list); - bpage != NULL; - bpage = prev) { - - ut_ad(!must_restart); - ut_a(buf_page_in_file(bpage)); - - /* Save the previous link because once we free the - page we can't rely on the links. */ - - prev = UT_LIST_GET_PREV(list, bpage); - - if (buf_page_get_space(bpage) != id) { - - /* Skip this block, as it does not belong to - the target space. */ - - } else if (!buf_flush_or_remove_page(buf_pool, bpage, flush, - &must_restart)) { - - /* Remove was unsuccessful, we have to try again - by scanning the entire list from the end. - buf_flush_or_remove_page() released the - flush list mutex but not the LRU list mutex. - Therefore it is possible that a new page was - added to the flush list. For example, in case - where we are at the head of the flush list and - prev == NULL. That is OK because we have the - tablespace quiesced and no new pages for this - space-id should enter flush_list. This is - because the only callers of this function are - DROP TABLE and FLUSH TABLE FOR EXPORT. - We know that we'll have to do at least one more - scan but we don't break out of loop here and - try to do as much work as we can in this - iteration. */ - - all_freed = false; - if (UNIV_UNLIKELY(must_restart)) { - - /* Cannot trust the prev pointer */ - break; - } - } else if (flush) { - - /* The processing was successful. And during the - processing we have released all the buf_pool mutexes - when calling buf_page_flush(). We cannot trust - prev pointer. */ - goto rescan; - } - - ut_ad(!must_restart); - ++processed; - - /* Yield if we have hogged the CPU and mutexes for too long. */ - if (buf_flush_try_yield(buf_pool, prev, processed, - &must_restart)) { - - ut_ad(!must_restart); - /* Reset the batch size counter if we had to yield. */ - - processed = 0; - } else if (UNIV_UNLIKELY(must_restart)) { - - /* Cannot trust the prev pointer */ - all_freed = false; - break; - } - -#ifdef DBUG_OFF - if (flush) { - DBUG_EXECUTE_IF("ib_export_flush_crash", - static ulint n_pages; - if (++n_pages == 4) {DBUG_SUICIDE();}); - } -#endif /* DBUG_OFF */ - - /* The check for trx is interrupted is expensive, we want - to check every N iterations. */ - if (!processed && trx && trx_is_interrupted(trx)) { - buf_flush_list_mutex_exit(buf_pool); - return(DB_INTERRUPTED); - } - } - - buf_flush_list_mutex_exit(buf_pool); - - return(all_freed ? DB_SUCCESS : DB_FAIL); -} - -/******************************************************************//** -Remove or flush all the dirty pages that belong to a given tablespace -inside a specific buffer pool instance. The pages will remain in the LRU -list and will be evicted from the LRU list as they age and move towards -the tail of the LRU list. */ -static MY_ATTRIBUTE((nonnull(1))) -void -buf_flush_dirty_pages( -/*==================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint id, /*!< in: space id */ - bool flush, /*!< in: flush to disk if true otherwise - remove the pages without flushing */ - const trx_t* trx) /*!< to check if the operation must - be interrupted */ -{ - dberr_t err; - - do { - mutex_enter(&buf_pool->LRU_list_mutex); - - err = buf_flush_or_remove_pages(buf_pool, id, flush, trx); - - mutex_exit(&buf_pool->LRU_list_mutex); - - ut_ad(buf_flush_validate(buf_pool)); - - if (err == DB_FAIL) { - os_thread_sleep(2000); - } - - /* DB_FAIL is a soft error, it means that the task wasn't - completed, needs to be retried. */ - - ut_ad(buf_flush_validate(buf_pool)); - - } while (err == DB_FAIL); - - ut_ad(err == DB_INTERRUPTED - || buf_pool_get_dirty_pages_count(buf_pool, id) == 0); -} - -/******************************************************************//** -Remove all pages that belong to a given tablespace inside a specific -buffer pool instance when we are DISCARDing the tablespace. */ -static MY_ATTRIBUTE((nonnull)) -void -buf_LRU_remove_all_pages( -/*=====================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint id) /*!< in: space id */ -{ - buf_page_t* bpage; - ibool all_freed; - -scan_again: - mutex_enter(&buf_pool->LRU_list_mutex); - - all_freed = TRUE; - - for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); - bpage != NULL; - /* No op */) { - - prio_rw_lock_t* hash_lock; - buf_page_t* prev_bpage; - ib_mutex_t* block_mutex = NULL; - - ut_a(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); - - prev_bpage = UT_LIST_GET_PREV(LRU, bpage); - - /* It is safe to check bpage->space and bpage->io_fix while - holding buf_pool->LRU_list_mutex only and later recheck - while holding the buf_page_get_mutex() mutex. */ - - if (buf_page_get_space(bpage) != id) { - /* Skip this block, as it does not belong to - the space that is being invalidated. */ - goto next_page; - } else if (UNIV_UNLIKELY(buf_page_get_io_fix_unlocked(bpage) - != BUF_IO_NONE)) { - /* We cannot remove this page during this scan - yet; maybe the system is currently reading it - in, or flushing the modifications to the file */ - - all_freed = FALSE; - goto next_page; - } else { - ulint fold = buf_page_address_fold( - bpage->space, bpage->offset); - - hash_lock = buf_page_hash_lock_get(buf_pool, fold); - - rw_lock_x_lock(hash_lock); - - block_mutex = buf_page_get_mutex(bpage); - mutex_enter(block_mutex); - - if (UNIV_UNLIKELY( - buf_page_get_space(bpage) != id - || bpage->buf_fix_count > 0 - || (buf_page_get_io_fix(bpage) - != BUF_IO_NONE))) { - - mutex_exit(block_mutex); - - rw_lock_x_unlock(hash_lock); - - /* We cannot remove this page during - this scan yet; maybe the system is - currently reading it in, or flushing - the modifications to the file */ - - all_freed = FALSE; - - goto next_page; - } - } - - ut_ad(mutex_own(block_mutex)); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Dropping space %lu page %lu\n", - (ulong) buf_page_get_space(bpage), - (ulong) buf_page_get_page_no(bpage)); - } -#endif - if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { - /* Do nothing, because the adaptive hash index - covers uncompressed pages only. */ - } else if (((buf_block_t*) bpage)->index) { - ulint page_no; - ulint zip_size; - - mutex_exit(&buf_pool->LRU_list_mutex); - - zip_size = buf_page_get_zip_size(bpage); - page_no = buf_page_get_page_no(bpage); - - mutex_exit(block_mutex); - - rw_lock_x_unlock(hash_lock); - - /* Note that the following call will acquire - and release block->lock X-latch. */ - - btr_search_drop_page_hash_when_freed( - id, zip_size, page_no); - - goto scan_again; - } - - if (bpage->oldest_modification != 0) { - - buf_flush_remove(bpage); - } - - ut_ad(!bpage->in_flush_list); - - /* Remove from the LRU list. */ - - if (buf_LRU_block_remove_hashed(bpage, true)) { - - mutex_enter(block_mutex); - buf_LRU_block_free_hashed_page((buf_block_t*) bpage); - mutex_exit(block_mutex); - } else { - ut_ad(block_mutex == &buf_pool->zip_mutex); - } - - ut_ad(!mutex_own(block_mutex)); - -#ifdef UNIV_SYNC_DEBUG - /* buf_LRU_block_remove_hashed() releases the hash_lock */ - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - -next_page: - bpage = prev_bpage; - } - - mutex_exit(&buf_pool->LRU_list_mutex); - - if (!all_freed) { - os_thread_sleep(20000); - - goto scan_again; - } -} - -/******************************************************************//** -Remove pages belonging to a given tablespace inside a specific -buffer pool instance when we are deleting the data file(s) of that -tablespace. The pages still remain a part of LRU and are evicted from -the list as they age towards the tail of the LRU only if buf_remove -is BUF_REMOVE_FLUSH_NO_WRITE. */ -static MY_ATTRIBUTE((nonnull(1))) -void -buf_LRU_remove_pages( -/*=================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint id, /*!< in: space id */ - buf_remove_t buf_remove, /*!< in: remove or flush strategy */ - const trx_t* trx) /*!< to check if the operation must - be interrupted */ -{ - switch (buf_remove) { - case BUF_REMOVE_ALL_NO_WRITE: - buf_LRU_remove_all_pages(buf_pool, id); - break; - - case BUF_REMOVE_FLUSH_NO_WRITE: - ut_a(trx == 0); - buf_flush_dirty_pages(buf_pool, id, false, NULL); - break; - - case BUF_REMOVE_FLUSH_WRITE: - ut_a(trx != 0); - buf_flush_dirty_pages(buf_pool, id, true, trx); - /* Ensure that all asynchronous IO is completed. */ - os_aio_wait_until_no_pending_writes(); - fil_flush(id); - break; - } -} - -/******************************************************************//** -Flushes all dirty pages or removes all pages belonging -to a given tablespace. A PROBLEM: if readahead is being started, what -guarantees that it will not try to read in pages after this operation -has completed? */ -UNIV_INTERN -void -buf_LRU_flush_or_remove_pages( -/*==========================*/ - ulint id, /*!< in: space id */ - buf_remove_t buf_remove, /*!< in: remove or flush strategy */ - const trx_t* trx) /*!< to check if the operation must - be interrupted */ -{ - ulint i; - - /* Before we attempt to drop pages one by one we first - attempt to drop page hash index entries in batches to make - it more efficient. The batching attempt is a best effort - attempt and does not guarantee that all pages hash entries - will be dropped. We get rid of remaining page hash entries - one by one below. */ - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - switch (buf_remove) { - case BUF_REMOVE_ALL_NO_WRITE: - buf_LRU_drop_page_hash_for_tablespace(buf_pool, id); - break; - - case BUF_REMOVE_FLUSH_NO_WRITE: - /* It is a DROP TABLE for a single table - tablespace. No AHI entries exist because - we already dealt with them when freeing up - extents. */ - case BUF_REMOVE_FLUSH_WRITE: - /* We allow read-only queries against the - table, there is no need to drop the AHI entries. */ - break; - } - - buf_LRU_remove_pages(buf_pool, id, buf_remove, trx); - } -} - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/********************************************************************//** -Insert a compressed block into buf_pool->zip_clean in the LRU order. */ -UNIV_INTERN -void -buf_LRU_insert_zip_clean( -/*=====================*/ - buf_page_t* bpage) /*!< in: pointer to the block in question */ -{ - buf_page_t* b; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(mutex_own(&buf_pool->zip_mutex)); - ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE); - - /* Find the first successor of bpage in the LRU list - that is in the zip_clean list. */ - b = bpage; - do { - b = UT_LIST_GET_NEXT(LRU, b); - } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE); - - /* Insert bpage before b, i.e., after the predecessor of b. */ - if (b) { - b = UT_LIST_GET_PREV(list, b); - } - - if (b) { - UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage); - } else { - UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage); - } -} -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -/******************************************************************//** -Try to free an uncompressed page of a compressed block from the unzip -LRU list. The compressed page is preserved, and it need not be clean. -@return TRUE if freed */ -UNIV_INLINE -ibool -buf_LRU_free_from_unzip_LRU_list( -/*=============================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ibool scan_all) /*!< in: scan whole LRU list - if TRUE, otherwise scan only - srv_LRU_scan_depth / 2 blocks. */ -{ - buf_block_t* block; - ibool freed; - ulint scanned; - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) { - return(FALSE); - } - - for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU), - scanned = 1, freed = FALSE; - block != NULL && !freed - && (scan_all || scanned < srv_LRU_scan_depth); - ++scanned) { - - buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, - block); - - mutex_enter(&block->mutex); - - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_ad(block->in_unzip_LRU_list); - ut_ad(block->page.in_LRU_list); - - freed = buf_LRU_free_page(&block->page, false); - - mutex_exit(&block->mutex); - - block = prev_block; - } - - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_UNZIP_SEARCH_SCANNED, - MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL, - MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL, - scanned); - return(freed); -} - -/******************************************************************//** -Try to free a clean page from the common LRU list. -@return TRUE if freed */ -UNIV_INLINE -ibool -buf_LRU_free_from_common_LRU_list( -/*==============================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ibool scan_all) /*!< in: scan whole LRU list - if TRUE, otherwise scan only - srv_LRU_scan_depth / 2 blocks. */ -{ - buf_page_t* bpage; - ibool freed; - ulint scanned; - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), - scanned = 1, freed = FALSE; - bpage != NULL && !freed - && (scan_all || scanned < srv_LRU_scan_depth); - ++scanned) { - - unsigned accessed; - buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, - bpage); - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - ut_ad(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); - - accessed = buf_page_is_accessed(bpage); - - mutex_enter(block_mutex); - - freed = buf_LRU_free_page(bpage, true); - - mutex_exit(block_mutex); - - if (freed && !accessed) { - /* Keep track of pages that are evicted without - ever being accessed. This gives us a measure of - the effectiveness of readahead */ - ++buf_pool->stat.n_ra_pages_evicted; - } - - bpage = prev_bpage; - } - - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_SEARCH_SCANNED, - MONITOR_LRU_SEARCH_SCANNED_NUM_CALL, - MONITOR_LRU_SEARCH_SCANNED_PER_CALL, - scanned); - - return(freed); -} - -/******************************************************************//** -Try to free a replaceable block. -@return TRUE if found and freed */ -UNIV_INTERN -ibool -buf_LRU_scan_and_free_block( -/*========================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ibool scan_all) /*!< in: scan whole LRU list - if TRUE, otherwise scan only - 'old' blocks. */ -{ - ibool freed = FALSE; - bool use_unzip_list = UT_LIST_GET_LEN(buf_pool->unzip_LRU) > 0; - - mutex_enter(&buf_pool->LRU_list_mutex); - - if (use_unzip_list) { - freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all); - } - - if (!freed) { - freed = buf_LRU_free_from_common_LRU_list(buf_pool, scan_all); - } - - if (!freed) { - mutex_exit(&buf_pool->LRU_list_mutex); - } - - return(freed); -} - -/******************************************************************//** -Returns TRUE if less than 25 % of the buffer pool in any instance is -available. This can be used in heuristics to prevent huge transactions -eating up the whole buffer pool for their locks. -@return TRUE if less than 25 % of buffer pool left */ -UNIV_INTERN -ibool -buf_LRU_buf_pool_running_out(void) -/*==============================*/ -{ - ulint i; - ibool ret = FALSE; - - for (i = 0; i < srv_buf_pool_instances && !ret; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - if (!recv_recovery_on - && UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU) - < buf_pool->curr_size / 4) { - - ret = TRUE; - } - } - - return(ret); -} - -/******************************************************************//** -Returns a free block from the buf_pool. The block is taken off the -free list. If it is empty, returns NULL. -@return a free control block, or NULL if the buf_block->free list is empty */ -UNIV_INTERN -buf_block_t* -buf_LRU_get_free_only( -/*==================*/ - buf_pool_t* buf_pool) -{ - buf_block_t* block; - - mutex_enter_last(&buf_pool->free_list_mutex); - - block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free); - - if (block) { - - ut_ad(block->page.in_free_list); - ut_d(block->page.in_free_list = FALSE); - ut_ad(!block->page.in_flush_list); - ut_ad(!block->page.in_LRU_list); - ut_a(!buf_page_in_file(&block->page)); - UT_LIST_REMOVE(list, buf_pool->free, (&block->page)); - buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE); - - mutex_exit(&buf_pool->free_list_mutex); - - mutex_enter(&block->mutex); - - UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); - - ut_ad(buf_pool_from_block(block) == buf_pool); - - mutex_exit(&block->mutex); - return(block); - } - - mutex_exit(&buf_pool->free_list_mutex); - - return(NULL); -} - -/******************************************************************//** -Checks how much of buf_pool is occupied by non-data objects like -AHI, lock heaps etc. Depending on the size of non-data objects this -function will either assert or issue a warning and switch on the -status monitor. */ -static -void -buf_LRU_check_size_of_non_data_objects( -/*===================================*/ - const buf_pool_t* buf_pool) /*!< in: buffer pool instance */ -{ - if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: ERROR: over 95 percent of the buffer pool" - " is occupied by\n" - "InnoDB: lock heaps or the adaptive hash index!" - " Check that your\n" - "InnoDB: transactions do not set too many row locks.\n" - "InnoDB: Your buffer pool size is %lu MB." - " Maybe you should make\n" - "InnoDB: the buffer pool bigger?\n" - "InnoDB: We intentionally generate a seg fault" - " to print a stack trace\n" - "InnoDB: on Linux!\n", - (ulong) (buf_pool->curr_size - / (1024 * 1024 / UNIV_PAGE_SIZE))); - - ut_error; - - } else if (!recv_recovery_on - && (UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU)) - < buf_pool->curr_size / 3) { - - if (!buf_lru_switched_on_innodb_mon) { - - /* Over 67 % of the buffer pool is occupied by lock - heaps or the adaptive hash index. This may be a memory - leak! */ - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: WARNING: over 67 percent of" - " the buffer pool is occupied by\n" - "InnoDB: lock heaps or the adaptive" - " hash index! Check that your\n" - "InnoDB: transactions do not set too many" - " row locks.\n" - "InnoDB: Your buffer pool size is %lu MB." - " Maybe you should make\n" - "InnoDB: the buffer pool bigger?\n" - "InnoDB: Starting the InnoDB Monitor to print" - " diagnostics, including\n" - "InnoDB: lock heap and hash index sizes.\n", - (ulong) (buf_pool->curr_size - / (1024 * 1024 / UNIV_PAGE_SIZE))); - - buf_lru_switched_on_innodb_mon = TRUE; - srv_print_innodb_monitor = TRUE; - os_event_set(srv_monitor_event); - } - } else if (buf_lru_switched_on_innodb_mon) { - - /* Switch off the InnoDB Monitor; this is a simple way - to stop the monitor if the situation becomes less urgent, - but may also surprise users if the user also switched on the - monitor! */ - - buf_lru_switched_on_innodb_mon = FALSE; - srv_print_innodb_monitor = FALSE; - } -} - -/** Diagnose failure to get a free page and request InnoDB monitor output in -the error log if more than two seconds have been spent already. -@param[in] n_iterations how many buf_LRU_get_free_page iterations - already completed -@param[in] started_ms timestamp in ms of when the attempt to get the - free page started -@param[in] flush_failures how many times single-page flush, if allowed, - has failed -@param[out] mon_value_was previous srv_print_innodb_monitor value -@param[out] started_monitor whether InnoDB monitor print has been requested -*/ -static -void -buf_LRU_handle_lack_of_free_blocks(ulint n_iterations, ulint started_ms, - ulint flush_failures, - ibool *mon_value_was, - ibool *started_monitor) -{ - static ulint last_printout_ms = 0; - - /* Legacy algorithm started warning after at least 2 seconds, we - emulate this. */ - const ulint current_ms = ut_time_ms(); - - if ((current_ms > started_ms + 2000) - && (current_ms > last_printout_ms + 2000)) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: difficult to find free blocks in\n" - "InnoDB: the buffer pool (%lu search iterations)!\n" - "InnoDB: %lu failed attempts to flush a page!" - " Consider\n" - "InnoDB: increasing the buffer pool size.\n" - "InnoDB: It is also possible that" - " in your Unix version\n" - "InnoDB: fsync is very slow, or" - " completely frozen inside\n" - "InnoDB: the OS kernel. Then upgrading to" - " a newer version\n" - "InnoDB: of your operating system may help." - " Look at the\n" - "InnoDB: number of fsyncs in diagnostic info below.\n" - "InnoDB: Pending flushes (fsync) log: %lu;" - " buffer pool: %lu\n" - "InnoDB: %lu OS file reads, %lu OS file writes," - " %lu OS fsyncs\n" - "InnoDB: Starting InnoDB Monitor to print further\n" - "InnoDB: diagnostics to the standard output.\n", - (ulong) n_iterations, - (ulong) flush_failures, - (ulong) fil_n_pending_log_flushes, - (ulong) fil_n_pending_tablespace_flushes, - (ulong) os_n_file_reads, (ulong) os_n_file_writes, - (ulong) os_n_fsyncs); - - last_printout_ms = current_ms; - *mon_value_was = srv_print_innodb_monitor; - *started_monitor = TRUE; - srv_print_innodb_monitor = TRUE; - os_event_set(lock_sys->timeout_event); - } - -} - -/** The maximum allowed backoff sleep time duration, microseconds */ -#define MAX_FREE_LIST_BACKOFF_SLEEP 10000 - -/** The sleep reduction factor for high-priority waiter backoff sleeps */ -#define FREE_LIST_BACKOFF_HIGH_PRIO_DIVIDER 100 - -/** The sleep reduction factor for low-priority waiter backoff sleeps */ -#define FREE_LIST_BACKOFF_LOW_PRIO_DIVIDER 1 - -/******************************************************************//** -Returns a free block from the buf_pool. The block is taken off the -free list. If free list is empty, blocks are moved from the end of the -LRU list to the free list. -This function is called from a user thread when it needs a clean -block to read in a page. Note that we only ever get a block from -the free list. Even when we flush a page or find a page in LRU scan -we put it to free list to be used. -* iteration 0: - * get a block from free list, success:done - * if there is an LRU flush batch in progress: - * wait for batch to end: retry free list - * if buf_pool->try_LRU_scan is set - * scan LRU up to srv_LRU_scan_depth to find a clean block - * the above will put the block on free list - * success:retry the free list - * flush one dirty page from tail of LRU to disk - * the above will put the block on free list - * success: retry the free list -* iteration 1: - * same as iteration 0 except: - * scan whole LRU list - * scan LRU list even if buf_pool->try_LRU_scan is not set -* iteration > 1: - * same as iteration 1 but sleep 100ms -@return the free control block, in state BUF_BLOCK_READY_FOR_USE */ -UNIV_INTERN -buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */ -{ - buf_block_t* block = NULL; - ibool freed = FALSE; - ulint n_iterations = 0; - ulint flush_failures = 0; - ibool mon_value_was = FALSE; - ibool started_monitor = FALSE; - ulint started_ms = 0; - - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - - MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH); -loop: - buf_LRU_check_size_of_non_data_objects(buf_pool); - - /* If there is a block in the free list, take it */ - if (DBUG_EVALUATE_IF("simulate_lack_of_pages", true, false)) { - - block = NULL; - - if (srv_debug_monitor_printed) - DBUG_SET("-d,simulate_lack_of_pages"); - - } else if (DBUG_EVALUATE_IF("simulate_recovery_lack_of_pages", - recv_recovery_on, false)) { - - block = NULL; - - if (srv_debug_monitor_printed) - DBUG_SUICIDE(); - } else { - - block = buf_LRU_get_free_only(buf_pool); - } - - if (block) { - - ut_ad(buf_pool_from_block(block) == buf_pool); - memset(&block->page.zip, 0, sizeof block->page.zip); - - if (started_monitor) { - srv_print_innodb_monitor = - static_cast<my_bool>(mon_value_was); - } - - return(block); - } - - if (!started_ms) - started_ms = ut_time_ms(); - - if (srv_empty_free_list_algorithm == SRV_EMPTY_FREE_LIST_BACKOFF - && buf_lru_manager_is_active - && (srv_shutdown_state == SRV_SHUTDOWN_NONE - || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP)) { - - /* Backoff to minimize the free list mutex contention while the - free list is empty */ - ulint priority = srv_current_thread_priority; - - if (n_iterations < 3) { - - os_thread_yield(); - if (!priority) { - os_thread_yield(); - } - } else { - - ulint i, b; - - if (n_iterations < 6) { - i = n_iterations - 3; - } else if (n_iterations < 8) { - i = 4; - } else if (n_iterations < 11) { - i = 5; - } else { - i = n_iterations - 5; - } - b = 1 << i; - if (b > MAX_FREE_LIST_BACKOFF_SLEEP) { - b = MAX_FREE_LIST_BACKOFF_SLEEP; - } - os_thread_sleep(b / (priority - ? FREE_LIST_BACKOFF_HIGH_PRIO_DIVIDER - : FREE_LIST_BACKOFF_LOW_PRIO_DIVIDER)); - } - - buf_LRU_handle_lack_of_free_blocks(n_iterations, started_ms, - flush_failures, - &mon_value_was, - &started_monitor); - - n_iterations++; - - srv_stats.buf_pool_wait_free.inc(); - - /* In case of backoff, do not ever attempt single page flushes - and wait for the cleaner to free some pages instead. */ - goto loop; - } else { - - /* The LRU manager is not running or Oracle MySQL 5.6 algorithm - was requested, will perform a single page flush */ - ut_ad((srv_empty_free_list_algorithm - == SRV_EMPTY_FREE_LIST_LEGACY) - || !buf_lru_manager_is_active - || (srv_shutdown_state != SRV_SHUTDOWN_NONE - && srv_shutdown_state != SRV_SHUTDOWN_CLEANUP)); - } - - mutex_enter(&buf_pool->flush_state_mutex); - - if (buf_pool->init_flush[BUF_FLUSH_LRU] - && srv_use_doublewrite_buf - && buf_dblwr != NULL) { - - mutex_exit(&buf_pool->flush_state_mutex); - - /* If there is an LRU flush happening in the background - then we wait for it to end instead of trying a single - page flush. If, however, we are not using doublewrite - buffer then it is better to do our own single page - flush instead of waiting for LRU flush to end. */ - buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU); - goto loop; - } - - mutex_exit(&buf_pool->flush_state_mutex); - - if (DBUG_EVALUATE_IF("simulate_recovery_lack_of_pages", true, false) - || DBUG_EVALUATE_IF("simulate_lack_of_pages", true, false)) { - - buf_pool->try_LRU_scan = false; - } - - freed = FALSE; - if (buf_pool->try_LRU_scan || n_iterations > 0) { - - /* If no block was in the free list, search from the - end of the LRU list and try to free a block there. - If we are doing for the first time we'll scan only - tail of the LRU list otherwise we scan the whole LRU - list. */ - freed = buf_LRU_scan_and_free_block(buf_pool, - n_iterations > 0); - - if (!freed && n_iterations == 0) { - /* Tell other threads that there is no point - in scanning the LRU list. This flag is set to - TRUE again when we flush a batch from this - buffer pool. */ - buf_pool->try_LRU_scan = FALSE; - } - } - - if (freed) { - goto loop; - - } - - buf_LRU_handle_lack_of_free_blocks(n_iterations, started_ms, - flush_failures, &mon_value_was, - &started_monitor); - - /* If we have scanned the whole LRU and still are unable to - find a free block then we should sleep here to let the - page_cleaner do an LRU batch for us. - TODO: It'd be better if we can signal the page_cleaner. Perhaps - we should use timed wait for page_cleaner. */ - if (n_iterations > 1) { - - os_thread_sleep(100000); - } - - /* No free block was found: try to flush the LRU list. - This call will flush one page from the LRU and put it on the - free list. That means that the free block is up for grabs for - all user threads. - TODO: A more elegant way would have been to return the freed - up block to the caller here but the code that deals with - removing the block from page_hash and LRU_list is fairly - involved (particularly in case of compressed pages). We - can do that in a separate patch sometime in future. */ - if (!buf_flush_single_page_from_LRU(buf_pool)) { - MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT); - ++flush_failures; - } - - srv_stats.buf_pool_wait_free.inc(); - - n_iterations++; - - goto loop; -} - -/*******************************************************************//** -Moves the LRU_old pointer so that the length of the old blocks list -is inside the allowed limits. */ -UNIV_INLINE -void -buf_LRU_old_adjust_len( -/*===================*/ - buf_pool_t* buf_pool) /*!< in: buffer pool instance */ -{ - ulint old_len; - ulint new_len; - - ut_a(buf_pool->LRU_old); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN); - ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX); -#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5) -# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)" -#endif -#ifdef UNIV_LRU_DEBUG - /* buf_pool->LRU_old must be the first item in the LRU list - whose "old" flag is set. */ - ut_a(buf_pool->LRU_old->old); - ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old) - || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old); - ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old) - || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old); -#endif /* UNIV_LRU_DEBUG */ - - old_len = buf_pool->LRU_old_len; - new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU) - * buf_pool->LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV, - UT_LIST_GET_LEN(buf_pool->LRU) - - (BUF_LRU_OLD_TOLERANCE - + BUF_LRU_NON_OLD_MIN_LEN)); - - for (;;) { - buf_page_t* LRU_old = buf_pool->LRU_old; - - ut_a(LRU_old); - ut_ad(LRU_old->in_LRU_list); -#ifdef UNIV_LRU_DEBUG - ut_a(LRU_old->old); -#endif /* UNIV_LRU_DEBUG */ - - /* Update the LRU_old pointer if necessary */ - - if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) { - - buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV( - LRU, LRU_old); -#ifdef UNIV_LRU_DEBUG - ut_a(!LRU_old->old); -#endif /* UNIV_LRU_DEBUG */ - old_len = ++buf_pool->LRU_old_len; - buf_page_set_old(LRU_old, TRUE); - - } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) { - - buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old); - old_len = --buf_pool->LRU_old_len; - buf_page_set_old(LRU_old, FALSE); - } else { - return; - } - } -} - -/*******************************************************************//** -Initializes the old blocks pointer in the LRU list. This function should be -called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */ -static -void -buf_LRU_old_init( -/*=============*/ - buf_pool_t* buf_pool) -{ - buf_page_t* bpage; - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN); - - /* We first initialize all blocks in the LRU list as old and then use - the adjust function to move the LRU_old pointer to the right - position */ - - for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL; - bpage = UT_LIST_GET_PREV(LRU, bpage)) { - ut_ad(bpage->in_LRU_list); - ut_ad(buf_page_in_file(bpage)); - /* This loop temporarily violates the - assertions of buf_page_set_old(). */ - bpage->old = TRUE; - } - - buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU); - buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU); - - buf_LRU_old_adjust_len(buf_pool); -} - -/******************************************************************//** -Remove a block from the unzip_LRU list if it belonged to the list. */ -static -void -buf_unzip_LRU_remove_block_if_needed( -/*=================================*/ - buf_page_t* bpage) /*!< in/out: control block */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(buf_pool); - ut_ad(bpage); - ut_ad(buf_page_in_file(bpage)); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - if (buf_page_belongs_to_unzip_LRU(bpage)) { - buf_block_t* block = (buf_block_t*) bpage; - - ut_ad(block->in_unzip_LRU_list); - ut_d(block->in_unzip_LRU_list = FALSE); - - UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block); - } -} - -/******************************************************************//** -Removes a block from the LRU list. */ -UNIV_INLINE -void -buf_LRU_remove_block( -/*=================*/ - buf_page_t* bpage) /*!< in: control block */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ulint zip_size; - - ut_ad(buf_pool); - ut_ad(bpage); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - ut_a(buf_page_in_file(bpage)); - - ut_ad(bpage->in_LRU_list); - - /* If the LRU_old pointer is defined and points to just this block, - move it backward one step */ - - if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) { - - /* Below: the previous block is guaranteed to exist, - because the LRU_old pointer is only allowed to differ - by BUF_LRU_OLD_TOLERANCE from strict - buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU - list length. */ - buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage); - - ut_a(prev_bpage); -#ifdef UNIV_LRU_DEBUG - ut_a(!prev_bpage->old); -#endif /* UNIV_LRU_DEBUG */ - buf_pool->LRU_old = prev_bpage; - buf_page_set_old(prev_bpage, TRUE); - - buf_pool->LRU_old_len++; - } - - /* Remove the block from the LRU list */ - UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage); - ut_d(bpage->in_LRU_list = FALSE); - - zip_size = page_zip_get_size(&bpage->zip); - buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE; - - buf_unzip_LRU_remove_block_if_needed(bpage); - - /* If the LRU list is so short that LRU_old is not defined, - clear the "old" flags and return */ - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { - - for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL; - bpage = UT_LIST_GET_NEXT(LRU, bpage)) { - /* This loop temporarily violates the - assertions of buf_page_set_old(). */ - bpage->old = FALSE; - } - - buf_pool->LRU_old = NULL; - buf_pool->LRU_old_len = 0; - - return; - } - - ut_ad(buf_pool->LRU_old); - - /* Update the LRU_old_len field if necessary */ - if (buf_page_is_old(bpage)) { - - buf_pool->LRU_old_len--; - } - - /* Adjust the length of the old block list if necessary */ - buf_LRU_old_adjust_len(buf_pool); -} - -/******************************************************************//** -Adds a block to the LRU list of decompressed zip pages. */ -UNIV_INTERN -void -buf_unzip_LRU_add_block( -/*====================*/ - buf_block_t* block, /*!< in: control block */ - ibool old) /*!< in: TRUE if should be put to the end - of the list, else put to the start */ -{ - buf_pool_t* buf_pool = buf_pool_from_block(block); - - ut_ad(buf_pool); - ut_ad(block); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - ut_a(buf_page_belongs_to_unzip_LRU(&block->page)); - - ut_ad(!block->in_unzip_LRU_list); - ut_d(block->in_unzip_LRU_list = TRUE); - - if (old) { - UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block); - } else { - UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block); - } -} - -/******************************************************************//** -Adds a block to the LRU list end. Please make sure that the zip_size is -already set into the page zip when invoking the function, so that we -can get correct zip_size from the buffer page when adding a block -into LRU */ -UNIV_INLINE -void -buf_LRU_add_block_to_end_low( -/*=========================*/ - buf_page_t* bpage) /*!< in: control block */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(buf_pool); - ut_ad(bpage); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - ut_a(buf_page_in_file(bpage)); - - ut_ad(!bpage->in_LRU_list); - UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage); - ut_d(bpage->in_LRU_list = TRUE); - - incr_LRU_size_in_bytes(bpage, buf_pool); - - if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { - - ut_ad(buf_pool->LRU_old); - - /* Adjust the length of the old block list if necessary */ - - buf_page_set_old(bpage, TRUE); - buf_pool->LRU_old_len++; - buf_LRU_old_adjust_len(buf_pool); - - } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { - - /* The LRU list is now long enough for LRU_old to become - defined: init it */ - - buf_LRU_old_init(buf_pool); - } else { - buf_page_set_old(bpage, buf_pool->LRU_old != NULL); - } - - /* If this is a zipped block with decompressed frame as well - then put it on the unzip_LRU list */ - if (buf_page_belongs_to_unzip_LRU(bpage)) { - buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE); - } -} - -/******************************************************************//** -Adds a block to the LRU list. Please make sure that the zip_size is -already set into the page zip when invoking the function, so that we -can get correct zip_size from the buffer page when adding a block -into LRU */ -UNIV_INLINE -void -buf_LRU_add_block_low( -/*==================*/ - buf_page_t* bpage, /*!< in: control block */ - ibool old) /*!< in: TRUE if should be put to the old blocks - in the LRU list, else put to the start; if the - LRU list is very short, the block is added to - the start, regardless of this parameter */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - ut_a(buf_page_in_file(bpage)); - ut_ad(!bpage->in_LRU_list); - - if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) { - - UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage); - - bpage->freed_page_clock = buf_pool->freed_page_clock; - } else { -#ifdef UNIV_LRU_DEBUG - /* buf_pool->LRU_old must be the first item in the LRU list - whose "old" flag is set. */ - ut_a(buf_pool->LRU_old->old); - ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old) - || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old); - ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old) - || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old); -#endif /* UNIV_LRU_DEBUG */ - UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old, - bpage); - buf_pool->LRU_old_len++; - } - - ut_d(bpage->in_LRU_list = TRUE); - - incr_LRU_size_in_bytes(bpage, buf_pool); - - if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { - - ut_ad(buf_pool->LRU_old); - - /* Adjust the length of the old block list if necessary */ - - buf_page_set_old(bpage, old); - buf_LRU_old_adjust_len(buf_pool); - - } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { - - /* The LRU list is now long enough for LRU_old to become - defined: init it */ - - buf_LRU_old_init(buf_pool); - } else { - buf_page_set_old(bpage, buf_pool->LRU_old != NULL); - } - - /* If this is a zipped block with decompressed frame as well - then put it on the unzip_LRU list */ - if (buf_page_belongs_to_unzip_LRU(bpage)) { - buf_unzip_LRU_add_block((buf_block_t*) bpage, old); - } -} - -/******************************************************************//** -Adds a block to the LRU list. Please make sure that the zip_size is -already set into the page zip when invoking the function, so that we -can get correct zip_size from the buffer page when adding a block -into LRU */ -UNIV_INTERN -void -buf_LRU_add_block( -/*==============*/ - buf_page_t* bpage, /*!< in: control block */ - ibool old) /*!< in: TRUE if should be put to the old - blocks in the LRU list, else put to the start; - if the LRU list is very short, the block is - added to the start, regardless of this - parameter */ -{ - buf_LRU_add_block_low(bpage, old); -} - -/******************************************************************//** -Moves a block to the start of the LRU list. */ -UNIV_INTERN -void -buf_LRU_make_block_young( -/*=====================*/ - buf_page_t* bpage) /*!< in: control block */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - - if (bpage->old) { - buf_pool->stat.n_pages_made_young++; - } - - buf_LRU_remove_block(bpage); - buf_LRU_add_block_low(bpage, FALSE); -} - -/******************************************************************//** -Moves a block to the end of the LRU list. */ -UNIV_INTERN -void -buf_LRU_make_block_old( -/*===================*/ - buf_page_t* bpage) /*!< in: control block */ -{ - buf_LRU_remove_block(bpage); - buf_LRU_add_block_to_end_low(bpage); -} - -/******************************************************************//** -Try to free a block. If bpage is a descriptor of a compressed-only -page, the descriptor object will be freed as well. - -NOTE: If this function returns true, it will release the LRU list mutex, -and temporarily release and relock the buf_page_get_mutex() mutex. -Furthermore, the page frame will no longer be accessible via bpage. If this -function returns false, the buf_page_get_mutex() might be temporarily released -and relocked too. - -The caller must hold the LRU list and buf_page_get_mutex() mutexes. - -@return true if freed, false otherwise. */ -UNIV_INTERN -bool -buf_LRU_free_page( -/*===============*/ - buf_page_t* bpage, /*!< in: block to be freed */ - bool zip) /*!< in: true if should remove also the - compressed page of an uncompressed page */ -{ - buf_page_t* b = NULL; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - const ulint fold = buf_page_address_fold(bpage->space, - bpage->offset); - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); - - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(mutex_own(block_mutex)); - ut_ad(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); - - if (!buf_page_can_relocate(bpage)) { - - /* Do not free buffer fixed or I/O-fixed blocks. */ - return(false); - } - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0); -#endif /* UNIV_IBUF_COUNT_DEBUG */ - - if (zip || !bpage->zip.data) { - /* This would completely free the block. */ - /* Do not completely free dirty blocks. */ - - if (bpage->oldest_modification) { - return(false); - } - } else if (bpage->oldest_modification > 0 - && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { - - ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY); - - return(false); - - } else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) { - b = buf_page_alloc_descriptor(); - ut_a(b); - } - - ut_ad(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); - ut_ad(!bpage->in_flush_list == !bpage->oldest_modification); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, "Putting space %lu page %lu to free list\n", - (ulong) buf_page_get_space(bpage), - (ulong) buf_page_get_page_no(bpage)); - } -#endif /* UNIV_DEBUG */ - - mutex_exit(block_mutex); - - rw_lock_x_lock(hash_lock); - mutex_enter(block_mutex); - - if (UNIV_UNLIKELY(!buf_page_can_relocate(bpage) - || ((zip || !bpage->zip.data) - && bpage->oldest_modification))) { - -not_freed: - rw_lock_x_unlock(hash_lock); - if (b) { - buf_page_free_descriptor(b); - } - - return(false); - } else if (UNIV_UNLIKELY(bpage->oldest_modification - && (buf_page_get_state(bpage) - != BUF_BLOCK_FILE_PAGE))) { - - ut_ad(buf_page_get_state(bpage) - == BUF_BLOCK_ZIP_DIRTY); - goto not_freed; - } - - if (b) { - memcpy(b, bpage, sizeof *b); - } - - if (!buf_LRU_block_remove_hashed(bpage, zip)) { - - mutex_exit(&buf_pool->LRU_list_mutex); - - if (b) { - buf_page_free_descriptor(b); - } - - mutex_enter(block_mutex); - - return(true); - } - -#ifdef UNIV_SYNC_DEBUG - /* buf_LRU_block_remove_hashed() releases the hash_lock */ - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX) - && !rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - /* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL - then it was a compressed page with an uncompressed frame and - we are interested in freeing only the uncompressed frame. - Therefore we have to reinsert the compressed page descriptor - into the LRU and page_hash (and possibly flush_list). - if b == NULL then it was a regular page that has been freed */ - - if (b) { - buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b); - - rw_lock_x_lock(hash_lock); - mutex_enter(block_mutex); - - ut_a(!buf_page_hash_get_low( - buf_pool, b->space, b->offset, fold)); - - b->state = b->oldest_modification - ? BUF_BLOCK_ZIP_DIRTY - : BUF_BLOCK_ZIP_PAGE; - UNIV_MEM_DESC(b->zip.data, - page_zip_get_size(&b->zip)); - - /* The fields in_page_hash and in_LRU_list of - the to-be-freed block descriptor should have - been cleared in - buf_LRU_block_remove_hashed(), which - invokes buf_LRU_remove_block(). */ - ut_ad(!bpage->in_page_hash); - ut_ad(!bpage->in_LRU_list); - /* bpage->state was BUF_BLOCK_FILE_PAGE because - b != NULL. The type cast below is thus valid. */ - ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list); - - /* The fields of bpage were copied to b before - buf_LRU_block_remove_hashed() was invoked. */ - ut_ad(!b->in_zip_hash); - ut_ad(b->in_page_hash); - ut_ad(b->in_LRU_list); - - HASH_INSERT(buf_page_t, hash, - buf_pool->page_hash, fold, b); - - /* Insert b where bpage was in the LRU list. */ - if (UNIV_LIKELY(prev_b != NULL)) { - ulint lru_len; - - ut_ad(prev_b->in_LRU_list); - ut_ad(buf_page_in_file(prev_b)); - UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, - prev_b, b); - - incr_LRU_size_in_bytes(b, buf_pool); - - if (buf_page_is_old(b)) { - buf_pool->LRU_old_len++; - if (UNIV_UNLIKELY - (buf_pool->LRU_old - == UT_LIST_GET_NEXT(LRU, b))) { - - buf_pool->LRU_old = b; - } - } - - lru_len = UT_LIST_GET_LEN(buf_pool->LRU); - - if (lru_len > BUF_LRU_OLD_MIN_LEN) { - ut_ad(buf_pool->LRU_old); - /* Adjust the length of the - old block list if necessary */ - buf_LRU_old_adjust_len(buf_pool); - } else if (lru_len == BUF_LRU_OLD_MIN_LEN) { - /* The LRU list is now long - enough for LRU_old to become - defined: init it */ - buf_LRU_old_init(buf_pool); - } -#ifdef UNIV_LRU_DEBUG - /* Check that the "old" flag is consistent - in the block and its neighbours. */ - buf_page_set_old(b, buf_page_is_old(b)); -#endif /* UNIV_LRU_DEBUG */ - } else { - ut_d(b->in_LRU_list = FALSE); - buf_LRU_add_block_low(b, buf_page_is_old(b)); - } - - mutex_enter(&buf_pool->zip_mutex); - rw_lock_x_unlock(hash_lock); - if (b->state == BUF_BLOCK_ZIP_PAGE) { -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - buf_LRU_insert_zip_clean(b); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - } else { - /* Relocate on buf_pool->flush_list. */ - buf_flush_relocate_on_flush_list(bpage, b); - } - - bpage->zip.data = NULL; - page_zip_set_size(&bpage->zip, 0); - - /* Prevent buf_page_get_gen() from - decompressing the block while we release block_mutex. */ - buf_page_set_sticky(b); - mutex_exit(&buf_pool->zip_mutex); - mutex_exit(block_mutex); - - } - - mutex_exit(&buf_pool->LRU_list_mutex); - - /* Remove possible adaptive hash index on the page. - The page was declared uninitialized by - buf_LRU_block_remove_hashed(). We need to flag - the contents of the page valid (which it still is) in - order to avoid bogus Valgrind warnings.*/ - - UNIV_MEM_VALID(((buf_block_t*) bpage)->frame, - UNIV_PAGE_SIZE); - btr_search_drop_page_hash_index((buf_block_t*) bpage); - UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame, - UNIV_PAGE_SIZE); - - if (b) { - ib_uint32_t checksum; - /* Compute and stamp the compressed page - checksum while not holding any mutex. The - block is already half-freed - (BUF_BLOCK_REMOVE_HASH) and removed from - buf_pool->page_hash, thus inaccessible by any - other thread. */ - - checksum = static_cast<ib_uint32_t>( - page_zip_calc_checksum( - b->zip.data, - page_zip_get_size(&b->zip), - static_cast<srv_checksum_algorithm_t>( - srv_checksum_algorithm))); - - mach_write_to_4(b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM, - checksum); - } - - mutex_enter(block_mutex); - - if (b) { - mutex_enter(&buf_pool->zip_mutex); - buf_page_unset_sticky(b); - mutex_exit(&buf_pool->zip_mutex); - } - - buf_LRU_block_free_hashed_page((buf_block_t*) bpage); - ut_ad(mutex_own(block_mutex)); - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); - return(true); -} - -/******************************************************************//** -Puts a block back to the free list. */ -UNIV_INTERN -void -buf_LRU_block_free_non_file_page( -/*=============================*/ - buf_block_t* block) /*!< in: block, must not contain a file page */ -{ - void* data; - buf_pool_t* buf_pool = buf_pool_from_block(block); - - ut_ad(block); - ut_ad(mutex_own(&block->mutex)); - - switch (buf_block_get_state(block)) { - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_READY_FOR_USE: - break; - default: - fprintf(stderr, "InnoDB: Error: Block %p incorrect state %s in buf_LRU_block_free_non_file_page()\n", - block, buf_get_state_name(block)); - return; /* Continue */ - } - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(block->n_pointers == 0); -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - ut_ad(!block->page.in_free_list); - ut_ad(!block->page.in_flush_list); - ut_ad(!block->page.in_LRU_list); - - UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); -#ifdef UNIV_DEBUG - /* Wipe contents of page to reveal possible stale pointers to it */ - memset(block->frame, '\0', UNIV_PAGE_SIZE); -#else - /* Wipe page_no and space_id */ - memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4); - memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4); -#endif - data = block->page.zip.data; - - if (data) { - block->page.zip.data = NULL; - mutex_exit(&block->mutex); - - buf_buddy_free( - buf_pool, data, page_zip_get_size(&block->page.zip)); - - mutex_enter(&block->mutex); - page_zip_set_size(&block->page.zip, 0); - } - - mutex_enter_first(&buf_pool->free_list_mutex); - buf_block_set_state(block, BUF_BLOCK_NOT_USED); - UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page)); - ut_d(block->page.in_free_list = TRUE); - mutex_exit(&buf_pool->free_list_mutex); - - UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE); -} - -/******************************************************************//** -Takes a block out of the LRU list and page hash table. -If the block is compressed-only (BUF_BLOCK_ZIP_PAGE), -the object will be freed. - -The caller must hold buf_pool->LRU_list_mutex, the buf_page_get_mutex() mutex -and the appropriate hash_lock. This function will release the -buf_page_get_mutex() and the hash_lock. - -If a compressed page is freed other compressed pages may be relocated. -@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The -caller needs to free the page to the free list -@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In -this case the block is already returned to the buddy allocator. */ -static -bool -buf_LRU_block_remove_hashed( -/*========================*/ - buf_page_t* bpage, /*!< in: block, must contain a file page and - be in a state where it can be freed; there - may or may not be a hash index to the page */ - bool zip) /*!< in: true if should remove also the - compressed page of an uncompressed page */ -{ - ulint fold; - const buf_page_t* hashed_bpage; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - prio_rw_lock_t* hash_lock; - - ut_ad(bpage); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - - fold = buf_page_address_fold(bpage->space, bpage->offset); - hash_lock = buf_page_hash_lock_get(buf_pool, fold); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE); - ut_a(bpage->buf_fix_count == 0); - - buf_LRU_remove_block(bpage); - - buf_pool->freed_page_clock += 1; - - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_FILE_PAGE: - UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t)); - UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame, - UNIV_PAGE_SIZE); - buf_block_modify_clock_inc((buf_block_t*) bpage); - if (bpage->zip.data) { - const page_t* page = ((buf_block_t*) bpage)->frame; - const ulint zip_size - = page_zip_get_size(&bpage->zip); - - ut_a(!zip || bpage->oldest_modification == 0); - - switch (UNIV_EXPECT(fil_page_get_type(page), - FIL_PAGE_INDEX)) { - case FIL_PAGE_TYPE_ALLOCATED: - case FIL_PAGE_INODE: - case FIL_PAGE_IBUF_BITMAP: - case FIL_PAGE_TYPE_FSP_HDR: - case FIL_PAGE_TYPE_XDES: - /* These are essentially uncompressed pages. */ - if (!zip) { - /* InnoDB writes the data to the - uncompressed page frame. Copy it - to the compressed page, which will - be preserved. */ - memcpy(bpage->zip.data, page, - zip_size); - } - break; - case FIL_PAGE_TYPE_ZBLOB: - case FIL_PAGE_TYPE_ZBLOB2: - break; - case FIL_PAGE_INDEX: -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate( - &bpage->zip, page, - ((buf_block_t*) bpage)->index)); -#endif /* UNIV_ZIP_DEBUG */ - break; - default: - ut_print_timestamp(stderr); - fputs(" InnoDB: ERROR: The compressed page" - " to be evicted seems corrupt:", stderr); - ut_print_buf(stderr, page, zip_size); - fputs("\nInnoDB: Possibly older version" - " of the page:", stderr); - ut_print_buf(stderr, bpage->zip.data, - zip_size); - putc('\n', stderr); - ut_error; - } - - break; - } - /* fall through */ - case BUF_BLOCK_ZIP_PAGE: - ut_a(bpage->oldest_modification == 0); - UNIV_MEM_ASSERT_W(bpage->zip.data, - page_zip_get_size(&bpage->zip)); - break; - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_DIRTY: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - break; - } - - hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->space, - bpage->offset, fold); - - if (UNIV_UNLIKELY(bpage != hashed_bpage)) { - fprintf(stderr, - "InnoDB: Error: page %lu %lu not found" - " in the hash table\n", - (ulong) bpage->space, - (ulong) bpage->offset); - -#ifdef UNIV_DEBUG - fprintf(stderr, - "InnoDB: in_page_hash %lu in_zip_hash %lu\n" - " in_free_list %lu in_flush_list %lu in_LRU_list %lu\n" - " zip.data %p zip_size %lu page_state %d\n", - bpage->in_page_hash, bpage->in_zip_hash, - bpage->in_free_list, bpage->in_flush_list, - bpage->in_LRU_list, bpage->zip.data, - buf_page_get_zip_size(bpage), - buf_page_get_state(bpage)); -#else - fprintf(stderr, - "InnoDB: zip.data %p zip_size %lu page_state %d\n", - bpage->zip.data, - buf_page_get_zip_size(bpage), - buf_page_get_state(bpage)); -#endif - - if (hashed_bpage) { - fprintf(stderr, - "InnoDB: In hash table we find block" - " %p of %lu %lu which is not %p\n", - (const void*) hashed_bpage, - (ulong) hashed_bpage->space, - (ulong) hashed_bpage->offset, - (const void*) bpage); - } - - ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE); - ut_a(bpage->buf_fix_count == 0); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - mutex_exit(buf_page_get_mutex(bpage)); - rw_lock_x_unlock(hash_lock); - mutex_exit(&buf_pool->LRU_list_mutex); - buf_print(); - buf_LRU_print(); - buf_validate(); - buf_LRU_validate(); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - ut_error; - } - - ut_ad(!bpage->in_zip_hash); - ut_ad(bpage->in_page_hash); - ut_d(bpage->in_page_hash = FALSE); - HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage); - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_ZIP_PAGE: - ut_ad(!bpage->in_free_list); - ut_ad(!bpage->in_flush_list); - ut_ad(!bpage->in_LRU_list); - ut_a(bpage->zip.data); - ut_a(buf_page_get_zip_size(bpage)); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - - mutex_exit(&buf_pool->zip_mutex); - rw_lock_x_unlock(hash_lock); - - buf_buddy_free( - buf_pool, bpage->zip.data, - page_zip_get_size(&bpage->zip)); - - buf_page_free_descriptor(bpage); - return(false); - - case BUF_BLOCK_FILE_PAGE: - memset(((buf_block_t*) bpage)->frame - + FIL_PAGE_OFFSET, 0xff, 4); - memset(((buf_block_t*) bpage)->frame - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); - UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame, - UNIV_PAGE_SIZE); - buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH); - - if (buf_pool->flush_rbt == NULL) { - bpage->space = ULINT32_UNDEFINED; - bpage->offset = ULINT32_UNDEFINED; - } - - /* Question: If we release bpage and hash mutex here - then what protects us against: - 1) Some other thread buffer fixing this page - 2) Some other thread trying to read this page and - not finding it in buffer pool attempting to read it - from the disk. - Answer: - 1) Cannot happen because the page is no longer in the - page_hash. Only possibility is when while invalidating - a tablespace we buffer fix the prev_page in LRU to - avoid relocation during the scan. But that is not - possible because we are holding LRU list mutex. - - 2) Not possible because in buf_page_init_for_read() - we do a look up of page_hash while holding LRU list - mutex and since we are holding LRU list mutex here - and by the time we'll release it in the caller we'd - have inserted the compressed only descriptor in the - page_hash. */ - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - rw_lock_x_unlock(hash_lock); - mutex_exit(&((buf_block_t*) bpage)->mutex); - - if (zip && bpage->zip.data) { - /* Free the compressed page. */ - void* data = bpage->zip.data; - bpage->zip.data = NULL; - - ut_ad(!bpage->in_free_list); - ut_ad(!bpage->in_flush_list); - ut_ad(!bpage->in_LRU_list); - - buf_buddy_free( - buf_pool, data, - page_zip_get_size(&bpage->zip)); - - page_zip_set_size(&bpage->zip, 0); - } - - return(true); - - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_DIRTY: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - break; - } - - ut_error; - return(false); -} - -/******************************************************************//** -Puts a file page whose has no hash index to the free list. */ -static -void -buf_LRU_block_free_hashed_page( -/*===========================*/ - buf_block_t* block) /*!< in: block, must contain a file page and - be in a state where it can be freed */ -{ - ut_ad(mutex_own(&block->mutex)); - - buf_block_set_state(block, BUF_BLOCK_MEMORY); - - buf_LRU_block_free_non_file_page(block); -} - -/******************************************************************//** -Remove one page from LRU list and put it to free list */ -UNIV_INTERN -void -buf_LRU_free_one_page( -/*==================*/ - buf_page_t* bpage) /*!< in/out: block, must contain a file page and - be in a state where it can be freed; there - may or may not be a hash index to the page */ -{ -#if defined(UNIV_DEBUG) || defined(UNIV_SYNC_DEBUG) - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); -#endif -#ifdef UNIV_SYNC_DEBUG - const ulint fold = buf_page_address_fold(bpage->space, - bpage->offset); - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); -#endif - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(mutex_own(block_mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)); -#endif - - if (buf_LRU_block_remove_hashed(bpage, true)) { - mutex_enter(block_mutex); - buf_LRU_block_free_hashed_page((buf_block_t*) bpage); - mutex_exit(block_mutex); - } - - /* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX) - && !rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(!mutex_own(block_mutex)); -} - -/**********************************************************************//** -Updates buf_pool->LRU_old_ratio for one buffer pool instance. -@return updated old_pct */ -static -uint -buf_LRU_old_ratio_update_instance( -/*==============================*/ - buf_pool_t* buf_pool,/*!< in: buffer pool instance */ - uint old_pct,/*!< in: Reserve this percentage of - the buffer pool for "old" blocks. */ - ibool adjust) /*!< in: TRUE=adjust the LRU list; - FALSE=just assign buf_pool->LRU_old_ratio - during the initialization of InnoDB */ -{ - uint ratio; - - ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100; - if (ratio < BUF_LRU_OLD_RATIO_MIN) { - ratio = BUF_LRU_OLD_RATIO_MIN; - } else if (ratio > BUF_LRU_OLD_RATIO_MAX) { - ratio = BUF_LRU_OLD_RATIO_MAX; - } - - if (adjust) { - mutex_enter(&buf_pool->LRU_list_mutex); - - if (ratio != buf_pool->LRU_old_ratio) { - buf_pool->LRU_old_ratio = ratio; - - if (UT_LIST_GET_LEN(buf_pool->LRU) - >= BUF_LRU_OLD_MIN_LEN) { - - buf_LRU_old_adjust_len(buf_pool); - } - } - - mutex_exit(&buf_pool->LRU_list_mutex); - } else { - buf_pool->LRU_old_ratio = ratio; - } - /* the reverse of - ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */ - return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5)); -} - -/**********************************************************************//** -Updates buf_pool->LRU_old_ratio. -@return updated old_pct */ -UNIV_INTERN -ulint -buf_LRU_old_ratio_update( -/*=====================*/ - uint old_pct,/*!< in: Reserve this percentage of - the buffer pool for "old" blocks. */ - ibool adjust) /*!< in: TRUE=adjust the LRU list; - FALSE=just assign buf_pool->LRU_old_ratio - during the initialization of InnoDB */ -{ - ulint i; - ulint new_ratio = 0; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - new_ratio = buf_LRU_old_ratio_update_instance( - buf_pool, old_pct, adjust); - } - - return(new_ratio); -} - -/********************************************************************//** -Update the historical stats that we are collecting for LRU eviction -policy at the end of each interval. */ -UNIV_INTERN -void -buf_LRU_stat_update(void) -/*=====================*/ -{ - ulint i; - buf_LRU_stat_t* item; - buf_pool_t* buf_pool; - ibool evict_started = FALSE; - buf_LRU_stat_t cur_stat; - - /* If we haven't started eviction yet then don't update stats. */ - for (i = 0; i < srv_buf_pool_instances; i++) { - - buf_pool = buf_pool_from_array(i); - - if (buf_pool->freed_page_clock != 0) { - evict_started = TRUE; - break; - } - } - - if (!evict_started) { - goto func_exit; - } - - /* Update the index. */ - item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind]; - buf_LRU_stat_arr_ind++; - buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL; - - /* Add the current value and subtract the obsolete entry. - Since buf_LRU_stat_cur is not protected by any mutex, - it can be changing between adding to buf_LRU_stat_sum - and copying to item. Assign it to local variables to make - sure the same value assign to the buf_LRU_stat_sum - and item */ - cur_stat = buf_LRU_stat_cur; - - buf_LRU_stat_sum.io += cur_stat.io - item->io; - buf_LRU_stat_sum.unzip += cur_stat.unzip - item->unzip; - - /* Put current entry in the array. */ - memcpy(item, &cur_stat, sizeof *item); - -func_exit: - /* Clear the current entry. */ - memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur); -} - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/**********************************************************************//** -Validates the LRU list for one buffer pool instance. */ -static -void -buf_LRU_validate_instance( -/*======================*/ - buf_pool_t* buf_pool) -{ - buf_page_t* bpage; - buf_block_t* block; - ulint old_len; - ulint new_len; - - ut_ad(buf_pool); - mutex_enter(&buf_pool->LRU_list_mutex); - - if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { - - ut_a(buf_pool->LRU_old); - old_len = buf_pool->LRU_old_len; - new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU) - * buf_pool->LRU_old_ratio - / BUF_LRU_OLD_RATIO_DIV, - UT_LIST_GET_LEN(buf_pool->LRU) - - (BUF_LRU_OLD_TOLERANCE - + BUF_LRU_NON_OLD_MIN_LEN)); - ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE); - ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE); - } - - UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU, CheckInLRUList()); - - old_len = 0; - - for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); - bpage != NULL; - bpage = UT_LIST_GET_NEXT(LRU, bpage)) { - - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - ut_error; - break; - case BUF_BLOCK_FILE_PAGE: - ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list - == buf_page_belongs_to_unzip_LRU(bpage)); - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - break; - } - - if (buf_page_is_old(bpage)) { - const buf_page_t* prev - = UT_LIST_GET_PREV(LRU, bpage); - const buf_page_t* next - = UT_LIST_GET_NEXT(LRU, bpage); - - if (!old_len++) { - ut_a(buf_pool->LRU_old == bpage); - } else { - ut_a(!prev || buf_page_is_old(prev)); - } - - ut_a(!next || buf_page_is_old(next)); - } - } - - ut_a(buf_pool->LRU_old_len == old_len); - - mutex_exit(&buf_pool->LRU_list_mutex); - - mutex_enter(&buf_pool->free_list_mutex); - - UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free, CheckInFreeList()); - - for (bpage = UT_LIST_GET_FIRST(buf_pool->free); - bpage != NULL; - bpage = UT_LIST_GET_NEXT(list, bpage)) { - - ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED); - } - - mutex_exit(&buf_pool->free_list_mutex); - - mutex_enter(&buf_pool->LRU_list_mutex); - - UT_LIST_VALIDATE( - unzip_LRU, buf_block_t, buf_pool->unzip_LRU, - CheckUnzipLRUAndLRUList()); - - for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU); - block; - block = UT_LIST_GET_NEXT(unzip_LRU, block)) { - - ut_ad(block->in_unzip_LRU_list); - ut_ad(block->page.in_LRU_list); - ut_a(buf_page_belongs_to_unzip_LRU(&block->page)); - } - - mutex_exit(&buf_pool->LRU_list_mutex); -} - -/**********************************************************************//** -Validates the LRU list. -@return TRUE */ -UNIV_INTERN -ibool -buf_LRU_validate(void) -/*==================*/ -{ - ulint i; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - buf_LRU_validate_instance(buf_pool); - } - - return(TRUE); -} -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/**********************************************************************//** -Prints the LRU list for one buffer pool instance. */ -UNIV_INTERN -void -buf_LRU_print_instance( -/*===================*/ - buf_pool_t* buf_pool) -{ - const buf_page_t* bpage; - - ut_ad(buf_pool); - mutex_enter(&buf_pool->LRU_list_mutex); - - bpage = UT_LIST_GET_FIRST(buf_pool->LRU); - - while (bpage != NULL) { - - mutex_enter(buf_page_get_mutex(bpage)); - fprintf(stderr, "BLOCK space %lu page %lu ", - (ulong) buf_page_get_space(bpage), - (ulong) buf_page_get_page_no(bpage)); - - if (buf_page_is_old(bpage)) { - fputs("old ", stderr); - } - - if (bpage->buf_fix_count) { - fprintf(stderr, "buffix count %lu ", - (ulong) bpage->buf_fix_count); - } - - if (buf_page_get_io_fix(bpage)) { - fprintf(stderr, "io_fix %lu ", - (ulong) buf_page_get_io_fix(bpage)); - } - - if (bpage->oldest_modification) { - fputs("modif. ", stderr); - } - - switch (buf_page_get_state(bpage)) { - const byte* frame; - case BUF_BLOCK_FILE_PAGE: - frame = buf_block_get_frame((buf_block_t*) bpage); - fprintf(stderr, "\ntype %lu" - " index id %llu\n", - (ulong) fil_page_get_type(frame), - (ullint) btr_page_get_index_id(frame)); - break; - case BUF_BLOCK_ZIP_PAGE: - frame = bpage->zip.data; - fprintf(stderr, "\ntype %lu size %lu" - " index id %llu\n", - (ulong) fil_page_get_type(frame), - (ulong) buf_page_get_zip_size(bpage), - (ullint) btr_page_get_index_id(frame)); - break; - - default: - fprintf(stderr, "\n!state %lu!\n", - (ulong) buf_page_get_state(bpage)); - break; - } - - mutex_exit(buf_page_get_mutex(bpage)); - bpage = UT_LIST_GET_NEXT(LRU, bpage); - } - - mutex_exit(&buf_pool->LRU_list_mutex); -} - -/**********************************************************************//** -Prints the LRU list. */ -UNIV_INTERN -void -buf_LRU_print(void) -/*===============*/ -{ - ulint i; - buf_pool_t* buf_pool; - - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool = buf_pool_from_array(i); - buf_LRU_print_instance(buf_pool); - } -} -#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc deleted file mode 100644 index f90b1e46c1e..00000000000 --- a/storage/xtradb/buf/buf0mtflu.cc +++ /dev/null @@ -1,756 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved. -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -/******************************************************************//** -@file buf/buf0mtflu.cc -Multi-threaded flush method implementation - -Created 06/11/2013 Dhananjoy Das DDas@fusionio.com -Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com -Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com -Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com -***********************************************************************/ - -#include "buf0buf.h" -#include "buf0flu.h" -#include "buf0mtflu.h" -#include "buf0checksum.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "page0zip.h" -#include "ut0byte.h" -#include "ut0lst.h" -#include "page0page.h" -#include "fil0fil.h" -#include "buf0lru.h" -#include "buf0rea.h" -#include "ibuf0ibuf.h" -#include "log0log.h" -#include "os0file.h" -#include "os0sync.h" -#include "trx0sys.h" -#include "srv0mon.h" -#include "mysql/plugin.h" -#include "mysql/service_thd_wait.h" -#include "fil0pagecompress.h" - -#define MT_COMP_WATER_MARK 50 -/** Time to wait for a message. */ -#define MT_WAIT_IN_USECS 5000000 - -/* Work item status */ -typedef enum wrk_status { - WRK_ITEM_UNSET=0, /*!< Work item is not set */ - WRK_ITEM_START=1, /*!< Processing of work item has started */ - WRK_ITEM_DONE=2, /*!< Processing is done usually set to - SUCCESS/FAILED */ - WRK_ITEM_SUCCESS=2, /*!< Work item successfully processed */ - WRK_ITEM_FAILED=3, /*!< Work item process failed */ - WRK_ITEM_EXIT=4, /*!< Exiting */ - WRK_ITEM_SET=5, /*!< Work item is set */ - WRK_ITEM_STATUS_UNDEFINED -} wrk_status_t; - -/* Work item task type */ -typedef enum mt_wrk_tsk { - MT_WRK_NONE=0, /*!< Exit queue-wait */ - MT_WRK_WRITE=1, /*!< Flush operation */ - MT_WRK_READ=2, /*!< Read operation */ - MT_WRK_UNDEFINED -} mt_wrk_tsk_t; - -/* Work thread status */ -typedef enum wthr_status { - WTHR_NOT_INIT=0, /*!< Work thread not initialized */ - WTHR_INITIALIZED=1, /*!< Work thread initialized */ - WTHR_SIG_WAITING=2, /*!< Work thread wating signal */ - WTHR_RUNNING=3, /*!< Work thread running */ - WTHR_NO_WORK=4, /*!< Work thread has no work */ - WTHR_KILL_IT=5, /*!< Work thread should exit */ - WTHR_STATUS_UNDEFINED -} wthr_status_t; - -/* Write work task */ -typedef struct wr_tsk { - buf_pool_t *buf_pool; /*!< buffer-pool instance */ - buf_flush_t flush_type; /*!< flush-type for buffer-pool - flush operation */ - ulint min; /*!< minimum number of pages - requested to be flushed */ - lsn_t lsn_limit; /*!< lsn limit for the buffer-pool - flush operation */ -} wr_tsk_t; - -/* Read work task */ -typedef struct rd_tsk { - buf_pool_t *page_pool; /*!< list of pages to decompress; */ -} rd_tsk_t; - -/* Work item */ -typedef struct wrk_itm -{ - mt_wrk_tsk_t tsk; /*!< Task type. Based on task-type - one of the entries wr_tsk/rd_tsk - will be used */ - wr_tsk_t wr; /*!< Flush page list */ - rd_tsk_t rd; /*!< Decompress page list */ - ulint n_flushed; /*!< Number of flushed pages */ - ulint n_evicted; /*!< Number of evicted pages */ - os_thread_id_t id_usr; /*!< Thread-id currently working */ - wrk_status_t wi_status; /*!< Work item status */ - mem_heap_t *wheap; /*!< Heap were to allocate memory - for queue nodes */ - mem_heap_t *rheap; -} wrk_t; - -typedef struct thread_data -{ - os_thread_id_t wthread_id; /*!< Identifier */ - os_thread_t wthread; /*!< Thread id */ - wthr_status_t wt_status; /*!< Worker thread status */ -} thread_data_t; - -/* Thread syncronization data */ -typedef struct thread_sync -{ - /* Global variables used by all threads */ - os_fast_mutex_t thread_global_mtx; /*!< Mutex used protecting below - variables */ - ulint n_threads; /*!< Number of threads */ - ib_wqueue_t *wq; /*!< Work Queue */ - ib_wqueue_t *wr_cq; /*!< Write Completion Queue */ - ib_wqueue_t *rd_cq; /*!< Read Completion Queue */ - mem_heap_t* wheap; /*!< Work heap where memory - is allocated */ - mem_heap_t* rheap; /*!< Work heap where memory - is allocated */ - wthr_status_t gwt_status; /*!< Global thread status */ - - /* Variables used by only one thread at a time */ - thread_data_t* thread_data; /*!< Thread specific data */ - -} thread_sync_t; - -static int mtflush_work_initialized = -1; -static thread_sync_t* mtflush_ctx=NULL; -static os_fast_mutex_t mtflush_mtx; - -/******************************************************************//** -Set multi-threaded flush work initialized. */ -static inline -void -buf_mtflu_work_init(void) -/*=====================*/ -{ - mtflush_work_initialized = 1; -} - -/******************************************************************//** -Return true if multi-threaded flush is initialized -@return true if initialized */ -bool -buf_mtflu_init_done(void) -/*=====================*/ -{ - return(mtflush_work_initialized == 1); -} - -/******************************************************************//** -Fush buffer pool instance. -@return number of flushed pages, or 0 if error happened -*/ -static -ulint -buf_mtflu_flush_pool_instance( -/*==========================*/ - wrk_t *work_item) /*!< inout: work item to be flushed */ -{ - flush_counters_t n; - ut_a(work_item != NULL); - ut_a(work_item->wr.buf_pool != NULL); - - if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) { - /* We have two choices here. If lsn_limit was - specified then skipping an instance of buffer - pool means we cannot guarantee that all pages - up to lsn_limit has been flushed. We can - return right now with failure or we can try - to flush remaining buffer pools up to the - lsn_limit. We attempt to flush other buffer - pools based on the assumption that it will - help in the retry which will follow the - failure. */ -#ifdef UNIV_MTFLUSH_DEBUG - fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n"); -#endif - return 0; - } - - memset(&n, 0, sizeof(flush_counters_t)); - - if (work_item->wr.flush_type == BUF_FLUSH_LRU) { - /* srv_LRU_scan_depth can be arbitrarily large value. - * We cap it with current LRU size. - */ - buf_pool_mutex_enter(work_item->wr.buf_pool); - work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU); - buf_pool_mutex_exit(work_item->wr.buf_pool); - work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min); - } - - buf_flush_batch(work_item->wr.buf_pool, - work_item->wr.flush_type, - work_item->wr.min, - work_item->wr.lsn_limit, - false, - &n); - - work_item->n_flushed = n.flushed; - buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type); - buf_flush_common(work_item->wr.flush_type, work_item->n_flushed); - - return work_item->n_flushed; -} - -/******************************************************************//** -Worker function to wait for work items and processing them and -sending reply back. -*/ -static -void -mtflush_service_io( -/*===============*/ - thread_sync_t* mtflush_io, /*!< inout: multi-threaded flush - syncronization data */ - thread_data_t* thread_data) /* Thread status data */ -{ - wrk_t *work_item = NULL; - ulint n_flushed=0; - - ut_a(mtflush_io != NULL); - ut_a(thread_data != NULL); - - thread_data->wt_status = WTHR_SIG_WAITING; - - work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq); - - if (work_item == NULL) { - work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq); - } - - if (work_item) { - thread_data->wt_status = WTHR_RUNNING; - } else { - /* Thread did not get any work */ - thread_data->wt_status = WTHR_NO_WORK; - return; - } - - if (work_item->wi_status != WRK_ITEM_EXIT) { - work_item->wi_status = WRK_ITEM_SET; - } - -#ifdef UNIV_MTFLUSH_DEBUG - ut_a(work_item->id_usr == 0); -#endif - work_item->id_usr = os_thread_get_curr_id(); - - /* This works as a producer/consumer model, where in tasks are - * inserted into the work-queue (wq) and completions are based - * on the type of operations performed and as a result the WRITE/ - * compression/flush operation completions get posted to wr_cq. - * And READ/decompress operations completions get posted to rd_cq. - * in future we may have others. - */ - - switch(work_item->tsk) { - case MT_WRK_NONE: - ut_a(work_item->wi_status == WRK_ITEM_EXIT); - work_item->wi_status = WRK_ITEM_EXIT; - ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap); - thread_data->wt_status = WTHR_KILL_IT; - break; - - case MT_WRK_WRITE: - ut_a(work_item->wi_status == WRK_ITEM_SET); - work_item->wi_status = WRK_ITEM_START; - /* Process work item */ - if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) { - work_item->wi_status = WRK_ITEM_FAILED; - } - work_item->wi_status = WRK_ITEM_SUCCESS; - ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap); - break; - - case MT_WRK_READ: - ut_a(0); - break; - - default: - /* None other than Write/Read handling planned */ - ut_a(0); - break; - } -} - -/******************************************************************//** -Thead used to flush dirty pages when multi-threaded flush is -used. -@return a dummy parameter*/ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(mtflush_io_thread)( -/*==============================*/ - void * arg) -{ - thread_sync_t *mtflush_io = ((thread_sync_t *)arg); - thread_data_t *this_thread_data = NULL; - ulint i; - - /* Find correct slot for this thread */ - os_fast_mutex_lock(&(mtflush_io->thread_global_mtx)); - for(i=0; i < mtflush_io->n_threads; i ++) { - if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) { - break; - } - } - - ut_a(i <= mtflush_io->n_threads); - this_thread_data = &mtflush_io->thread_data[i]; - os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx)); - - while (TRUE) { - -#ifdef UNIV_MTFLUSH_DEBUG - fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n", - os_thread_get_curr_id(), - ib_wqueue_len(mtflush_io->wq), - ib_wqueue_len(mtflush_io->wr_cq)); -#endif /* UNIV_MTFLUSH_DEBUG */ - - mtflush_service_io(mtflush_io, this_thread_data); - - - if (this_thread_data->wt_status == WTHR_KILL_IT) { - break; - } - } - - os_thread_exit(NULL); - OS_THREAD_DUMMY_RETURN; -} - -/******************************************************************//** -Add exit work item to work queue to signal multi-threded flush -threads that they should exit. -*/ -void -buf_mtflu_io_thread_exit(void) -/*==========================*/ -{ - ulint i; - thread_sync_t* mtflush_io = mtflush_ctx; - wrk_t* work_item = NULL; - - ut_a(mtflush_io != NULL); - - /* Allocate work items for shutdown message */ - work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads); - - /* Confirm if the io-thread KILL is in progress, bailout */ - if (mtflush_io->gwt_status == WTHR_KILL_IT) { - return; - } - - mtflush_io->gwt_status = WTHR_KILL_IT; - - /* This lock is to safequard against timing bug: flush request take - this mutex before sending work items to be processed by flush - threads. Inside flush thread we assume that work queue contains only - a constant number of items. Thus, we may not install new work items - below before all previous ones are processed. This mutex is released - by flush request after all work items sent to flush threads have - been processed. Thus, we can get this mutex if and only if work - queue is empty. */ - - os_fast_mutex_lock(&mtflush_mtx); - - /* Make sure the work queue is empty */ - ut_a(ib_wqueue_is_empty(mtflush_io->wq)); - - /* Send one exit work item/thread */ - for (i=0; i < (ulint)srv_mtflush_threads; i++) { - work_item[i].tsk = MT_WRK_NONE; - work_item[i].wi_status = WRK_ITEM_EXIT; - work_item[i].wheap = mtflush_io->wheap; - work_item[i].rheap = mtflush_io->rheap; - work_item[i].id_usr = 0; - - ib_wqueue_add(mtflush_io->wq, - (void *)&(work_item[i]), - mtflush_io->wheap); - } - - /* Requests sent */ - os_fast_mutex_unlock(&mtflush_mtx); - - /* Wait until all work items on a work queue are processed */ - while(!ib_wqueue_is_empty(mtflush_io->wq)) { - /* Wait */ - os_thread_sleep(MT_WAIT_IN_USECS); - } - - ut_a(ib_wqueue_is_empty(mtflush_io->wq)); - - /* Collect all work done items */ - for (i=0; i < (ulint)srv_mtflush_threads;) { - wrk_t* work_item = NULL; - - work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS); - - /* If we receive reply to work item and it's status is exit, - thead has processed this message and existed */ - if (work_item && work_item->wi_status == WRK_ITEM_EXIT) { - i++; - } - } - - /* Wait about 1/2 sec to allow threads really exit */ - os_thread_sleep(MT_WAIT_IN_USECS); - - /* Make sure that work queue is empty */ - while(!ib_wqueue_is_empty(mtflush_io->wq)) - { - ib_wqueue_nowait(mtflush_io->wq); - } - - os_fast_mutex_lock(&mtflush_mtx); - - ut_a(ib_wqueue_is_empty(mtflush_io->wq)); - ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq)); - ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq)); - - /* Free all queues */ - ib_wqueue_free(mtflush_io->wq); - ib_wqueue_free(mtflush_io->wr_cq); - ib_wqueue_free(mtflush_io->rd_cq); - - mtflush_io->wq = NULL; - mtflush_io->wr_cq = NULL; - mtflush_io->rd_cq = NULL; - mtflush_work_initialized = 0; - - /* Free heap */ - mem_heap_free(mtflush_io->wheap); - mem_heap_free(mtflush_io->rheap); - - os_fast_mutex_unlock(&mtflush_mtx); - os_fast_mutex_free(&mtflush_mtx); - os_fast_mutex_free(&mtflush_io->thread_global_mtx); -} - -/******************************************************************//** -Initialize multi-threaded flush thread syncronization data. -@return Initialized multi-threaded flush thread syncroniztion data. */ -void* -buf_mtflu_handler_init( -/*===================*/ - ulint n_threads, /*!< in: Number of threads to create */ - ulint wrk_cnt) /*!< in: Number of work items */ -{ - ulint i; - mem_heap_t* mtflush_heap; - mem_heap_t* mtflush_heap2; - - /* Create heap, work queue, write completion queue, read - completion queue for multi-threaded flush, and init - handler. */ - mtflush_heap = mem_heap_create(0); - ut_a(mtflush_heap != NULL); - mtflush_heap2 = mem_heap_create(0); - ut_a(mtflush_heap2 != NULL); - - mtflush_ctx = (thread_sync_t *)mem_heap_zalloc(mtflush_heap, - sizeof(thread_sync_t)); - - ut_a(mtflush_ctx != NULL); - mtflush_ctx->thread_data = (thread_data_t*)mem_heap_zalloc( - mtflush_heap, sizeof(thread_data_t) * n_threads); - ut_a(mtflush_ctx->thread_data); - - mtflush_ctx->n_threads = n_threads; - mtflush_ctx->wq = ib_wqueue_create(); - ut_a(mtflush_ctx->wq); - mtflush_ctx->wr_cq = ib_wqueue_create(); - ut_a(mtflush_ctx->wr_cq); - mtflush_ctx->rd_cq = ib_wqueue_create(); - ut_a(mtflush_ctx->rd_cq); - mtflush_ctx->wheap = mtflush_heap; - mtflush_ctx->rheap = mtflush_heap2; - - os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx); - os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx); - - /* Create threads for page-compression-flush */ - for(i=0; i < n_threads; i++) { - os_thread_id_t new_thread_id; - - mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED; - - mtflush_ctx->thread_data[i].wthread = os_thread_create( - mtflush_io_thread, - ((void *) mtflush_ctx), - &new_thread_id); - - mtflush_ctx->thread_data[i].wthread_id = new_thread_id; - } - - buf_mtflu_work_init(); - - return((void *)mtflush_ctx); -} - -/******************************************************************//** -Flush buffer pool instances. -@return number of pages flushed. */ -ulint -buf_mtflu_flush_work_items( -/*=======================*/ - ulint buf_pool_inst, /*!< in: Number of buffer pool instances */ - flush_counters_t *per_pool_cnt, /*!< out: Number of pages - flushed or evicted /instance */ - buf_flush_t flush_type, /*!< in: Type of flush */ - ulint min_n, /*!< in: Wished minimum number of - blocks to be flushed */ - lsn_t lsn_limit) /*!< in: All blocks whose - oldest_modification is smaller than - this should be flushed (if their - number does not exceed min_n) */ -{ - ulint n_flushed=0, i; - mem_heap_t* work_heap; - mem_heap_t* reply_heap; - wrk_t work_item[MTFLUSH_MAX_WORKER]; - - if (mtflush_ctx->gwt_status == WTHR_KILL_IT) { - return 0; - } - - /* Allocate heap where all work items used and queue - node items areallocated */ - work_heap = mem_heap_create(0); - reply_heap = mem_heap_create(0); - - - for(i=0;i<buf_pool_inst; i++) { - work_item[i].tsk = MT_WRK_WRITE; - work_item[i].wr.buf_pool = buf_pool_from_array(i); - work_item[i].wr.flush_type = flush_type; - work_item[i].wr.min = min_n; - work_item[i].wr.lsn_limit = lsn_limit; - work_item[i].wi_status = WRK_ITEM_UNSET; - work_item[i].wheap = work_heap; - work_item[i].rheap = reply_heap; - work_item[i].n_flushed = 0; - work_item[i].n_evicted = 0; - work_item[i].id_usr = 0; - - ib_wqueue_add(mtflush_ctx->wq, - (void *)(work_item + i), - work_heap); - } - - /* wait on the completion to arrive */ - for(i=0; i< buf_pool_inst;) { - wrk_t *done_wi = NULL; - done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq); - - if (done_wi != NULL) { - per_pool_cnt[i].flushed = done_wi->n_flushed; - per_pool_cnt[i].evicted = done_wi->n_evicted; - -#ifdef UNIV_MTFLUSH_DEBUG - if((int)done_wi->id_usr == 0 && - (done_wi->wi_status == WRK_ITEM_SET || - done_wi->wi_status == WRK_ITEM_UNSET)) { - fprintf(stderr, - "**Set/Unused work_item[%lu] flush_type=%d\n", - i, - done_wi->wr.flush_type); - ut_a(0); - } -#endif - - n_flushed+= done_wi->n_flushed+done_wi->n_evicted; - i++; - } - } - - /* Release used work_items and queue nodes */ - mem_heap_free(work_heap); - mem_heap_free(reply_heap); - - return(n_flushed); -} - -/*******************************************************************//** -Multi-threaded version of buf_flush_list -*/ -bool -buf_mtflu_flush_list( -/*=================*/ - ulint min_n, /*!< in: wished minimum mumber of blocks - flushed (it is not guaranteed that the - actual number is that big, though) */ - lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all - blocks whose oldest_modification is - smaller than this should be flushed - (if their number does not exceed - min_n), otherwise ignored */ - ulint* n_processed) /*!< out: the number of pages - which were processed is passed - back to caller. Ignored if NULL */ - -{ - ulint i; - bool success = true; - flush_counters_t cnt[MTFLUSH_MAX_WORKER]; - - if (n_processed) { - *n_processed = 0; - } - - if (min_n != ULINT_MAX) { - /* Ensure that flushing is spread evenly amongst the - buffer pool instances. When min_n is ULINT_MAX - we need to flush everything up to the lsn limit - so no limit here. */ - min_n = (min_n + srv_buf_pool_instances - 1) - / srv_buf_pool_instances; - } - - /* This lock is to safequard against re-entry if any. */ - os_fast_mutex_lock(&mtflush_mtx); - buf_mtflu_flush_work_items(srv_buf_pool_instances, - cnt, BUF_FLUSH_LIST, - min_n, lsn_limit); - os_fast_mutex_unlock(&mtflush_mtx); - - for (i = 0; i < srv_buf_pool_instances; i++) { - if (n_processed) { - *n_processed += cnt[i].flushed+cnt[i].evicted; - } - - if (cnt[i].flushed) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_BATCH_TOTAL_PAGE, - MONITOR_FLUSH_BATCH_COUNT, - MONITOR_FLUSH_BATCH_PAGES, - cnt[i].flushed); - } - - if(cnt[i].evicted) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, - MONITOR_LRU_BATCH_EVICT_COUNT, - MONITOR_LRU_BATCH_EVICT_PAGES, - cnt[i].evicted); - } - } -#ifdef UNIV_MTFLUSH_DEBUG - fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n", - __FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed); -#endif - return(success); -} - -/*********************************************************************//** -Clears up tail of the LRU lists: -* Put replaceable pages at the tail of LRU to the free list -* Flush dirty pages at the tail of LRU to the disk -The depth to which we scan each buffer pool is controlled by dynamic -config parameter innodb_LRU_scan_depth. -@return total pages flushed */ -UNIV_INTERN -ulint -buf_mtflu_flush_LRU_tail(void) -/*==========================*/ -{ - ulint total_flushed=0, i; - flush_counters_t cnt[MTFLUSH_MAX_WORKER]; - - ut_a(buf_mtflu_init_done()); - - /* At shutdown do not send requests anymore */ - if (!mtflush_ctx || mtflush_ctx->gwt_status == WTHR_KILL_IT) { - return (total_flushed); - } - - /* This lock is to safeguard against re-entry if any */ - os_fast_mutex_lock(&mtflush_mtx); - buf_mtflu_flush_work_items(srv_buf_pool_instances, - cnt, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0); - os_fast_mutex_unlock(&mtflush_mtx); - - for (i = 0; i < srv_buf_pool_instances; i++) { - total_flushed += cnt[i].flushed+cnt[i].evicted; - - if (cnt[i].flushed) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, - MONITOR_LRU_BATCH_FLUSH_COUNT, - MONITOR_LRU_BATCH_FLUSH_PAGES, - cnt[i].flushed); - } - if(cnt[i].evicted) { - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, - MONITOR_LRU_BATCH_EVICT_COUNT, - MONITOR_LRU_BATCH_EVICT_PAGES, - cnt[i].evicted); - } - } - -#if UNIV_MTFLUSH_DEBUG - fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", ( - srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed); -#endif - - return(total_flushed); -} - -/*********************************************************************//** -Set correct thread identifiers to io thread array based on -information we have. */ -void -buf_mtflu_set_thread_ids( -/*=====================*/ - ulint n_threads, /*!<in: Number of threads to fill */ - void* ctx, /*!<in: thread context */ - os_thread_id_t* thread_ids) /*!<in: thread id array */ -{ - thread_sync_t *mtflush_io = ((thread_sync_t *)ctx); - ulint i; - ut_a(mtflush_io != NULL); - ut_a(thread_ids != NULL); - - for(i = 0; i < n_threads; i++) { - thread_ids[i] = mtflush_io->thread_data[i].wthread_id; - } -} diff --git a/storage/xtradb/buf/buf0rea.cc b/storage/xtradb/buf/buf0rea.cc deleted file mode 100644 index b2b737b8d40..00000000000 --- a/storage/xtradb/buf/buf0rea.cc +++ /dev/null @@ -1,1125 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0rea.cc -The database buffer read - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0rea.h" - -#include "fil0fil.h" -#include "mtr0mtr.h" - -#include "buf0buf.h" -#include "buf0flu.h" -#include "buf0lru.h" -#include "buf0dblwr.h" -#include "ibuf0ibuf.h" -#include "log0recv.h" -#include "trx0sys.h" -#include "os0file.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "mysql/plugin.h" -#include "mysql/service_thd_wait.h" - -/** There must be at least this many pages in buf_pool in the area to start -a random read-ahead */ -#define BUF_READ_AHEAD_RANDOM_THRESHOLD(b) \ - (5 + BUF_READ_AHEAD_AREA(b) / 8) - -/** If there are buf_pool->curr_size per the number below pending reads, then -read-ahead is not done: this is to prevent flooding the buffer pool with -i/o-fixed buffer blocks */ -#define BUF_READ_AHEAD_PEND_LIMIT 2 - -/********************************************************************//** -Unfixes the pages, unlatches the page, -removes it from page_hash and removes it from LRU. */ -static -void -buf_read_page_handle_error( -/*=======================*/ - buf_page_t* bpage) /*!< in: pointer to the block */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - const bool uncompressed = (buf_page_get_state(bpage) - == BUF_BLOCK_FILE_PAGE); - const ulint fold = buf_page_address_fold(bpage->space, - bpage->offset); - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); - - mutex_enter(&buf_pool->LRU_list_mutex); - rw_lock_x_lock(hash_lock); - mutex_enter(buf_page_get_mutex(bpage)); - - /* First unfix and release lock on the bpage */ - ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ); - ut_ad(bpage->buf_fix_count == 0); - - /* Set BUF_IO_NONE before we remove the block from LRU list */ - buf_page_set_io_fix(bpage, BUF_IO_NONE); - - if (uncompressed) { - rw_lock_x_unlock_gen( - &((buf_block_t*) bpage)->lock, - BUF_IO_READ); - } - - /* remove the block from LRU list */ - buf_LRU_free_one_page(bpage); - - mutex_exit(&buf_pool->LRU_list_mutex); - - ut_ad(buf_pool->n_pend_reads > 0); - os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1); -} - -/********************************************************************//** -Low-level function which reads a page asynchronously from a file to the -buffer buf_pool if it is not already there, in which case does nothing. -Sets the io_fix flag and sets an exclusive lock on the buffer frame. The -flag is cleared and the x-lock released by an i/o-handler thread. -@return 1 if a read request was queued, 0 if the page already resided -in buf_pool, or if the page is in the doublewrite buffer blocks in -which case it is never read into the pool, or if the tablespace does -not exist or is being dropped - -@param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED if we are - trying to read from a non-existent tablespace, or a - tablespace which is just now being dropped, - DB_PAGE_CORRUPTED if page based on checksum - check is corrupted, or DB_DECRYPTION_FAILED - if page post encryption checksum matches but - after decryption normal page checksum does not match. -@param[in] sync true if synchronous aio is desired -@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ..., - ORed to OS_AIO_SIMULATED_WAKE_LATER (see below - at read-ahead functions) -@param[in] space space id -@param[in] zip_size compressed page size, or 0 -@param[in] unzip TRUE=request uncompressed page -@param[in] tablespace_version if the space memory object has - this timestamp different from what we are giving here, - treat the tablespace as dropped; this is a timestamp we - use to stop dangling page reads from a tablespace - which we have DISCARDed + IMPORTed back -@param[in] offset page number -@param[in] trx transaction -@return 1 if read request is issued. 0 if it is not */ -static -ulint -buf_read_page_low( - dberr_t* err, - bool sync, - ulint mode, - ulint space, - ulint zip_size, - ibool unzip, - ib_int64_t tablespace_version, - ulint offset, - trx_t* trx = NULL) -{ - buf_page_t* bpage; - ulint wake_later; - ibool ignore_nonexistent_pages; - - wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; - mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER; - - ignore_nonexistent_pages = mode & BUF_READ_IGNORE_NONEXISTENT_PAGES; - mode &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES; - - if (space == TRX_SYS_SPACE && buf_dblwr_page_inside(offset)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: trying to read" - " doublewrite buffer page %lu\n", - (ulong) offset); - - return(0); - } - - if (ibuf_bitmap_page(zip_size, offset) - || trx_sys_hdr_page(space, offset)) { - - /* Trx sys header is so low in the latching order that we play - safe and do not leave the i/o-completion to an asynchronous - i/o-thread. Ibuf bitmap pages must always be read with - syncronous i/o, to make sure they do not get involved in - thread deadlocks. */ - - sync = true; - } - - /* The following call will also check if the tablespace does not exist - or is being dropped; if we succeed in initing the page in the buffer - pool for read, then DISCARD cannot proceed until the read has - completed */ - bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip, - tablespace_version, offset); - if (bpage == NULL) { - /* bugfix: http://bugs.mysql.com/bug.php?id=43948 */ - if (recv_recovery_is_on() && *err == DB_TABLESPACE_DELETED) { - /* hashed log recs must be treated here */ - recv_addr_t* recv_addr; - - mutex_enter(&(recv_sys->mutex)); - - if (recv_sys->apply_log_recs == FALSE) { - mutex_exit(&(recv_sys->mutex)); - goto not_to_recover; - } - - /* recv_get_fil_addr_struct() */ - recv_addr = (recv_addr_t*)HASH_GET_FIRST(recv_sys->addr_hash, - hash_calc_hash(ut_fold_ulint_pair(space, offset), - recv_sys->addr_hash)); - while (recv_addr) { - if ((recv_addr->space == space) - && (recv_addr->page_no == offset)) { - break; - } - recv_addr = (recv_addr_t*)HASH_GET_NEXT(addr_hash, recv_addr); - } - - if ((recv_addr == NULL) - || (recv_addr->state == RECV_BEING_PROCESSED) - || (recv_addr->state == RECV_PROCESSED)) { - mutex_exit(&(recv_sys->mutex)); - goto not_to_recover; - } - - fprintf(stderr, " (cannot find space: %lu)", space); - recv_addr->state = RECV_PROCESSED; - - ut_a(recv_sys->n_addrs); - recv_sys->n_addrs--; - - mutex_exit(&(recv_sys->mutex)); - } -not_to_recover: - - return(0); - } - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Posting read request for page %lu, sync %s\n", - (ulong) offset, sync ? "true" : "false"); - } -#endif - - ut_ad(buf_page_in_file(bpage)); - ut_ad(!mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex)); - - byte* frame = zip_size ? bpage->zip.data : ((buf_block_t*) bpage)->frame; - - if (sync) { - thd_wait_begin(NULL, THD_WAIT_DISKIO); - } - - if (zip_size) { - *err = _fil_io(OS_FILE_READ | wake_later - | ignore_nonexistent_pages, - sync, space, zip_size, offset, 0, zip_size, - frame, bpage, 0, trx); - } else { - ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - - *err = _fil_io(OS_FILE_READ | wake_later - | ignore_nonexistent_pages, - sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - frame, bpage, &bpage->write_size, trx); - } - - if (sync) { - thd_wait_end(NULL); - } - - if (*err != DB_SUCCESS) { - if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) { - buf_read_page_handle_error(bpage); - return(0); - } - SRV_CORRUPT_TABLE_CHECK(*err == DB_SUCCESS, - bpage->is_corrupt = TRUE;); - } - - if (sync) { - /* The i/o is already completed when we arrive from - fil_read */ - *err = buf_page_io_complete(bpage); - - if (*err != DB_SUCCESS) { - return(0); - } - } - - return(1); -} - -/********************************************************************//** -Applies a random read-ahead in buf_pool if there are at least a threshold -value of accessed pages from the random read-ahead area. Does not read any -page, not even the one at the position (space, offset), if the read-ahead -mechanism is not activated. NOTE 1: the calling thread may own latches on -pages: to avoid deadlocks this function must be written such that it cannot -end up waiting for these latches! NOTE 2: the calling thread must want -access to the page given: this rule is set to prevent unintended read-aheads -performed by ibuf routines, a situation which could result in a deadlock if -the OS does not support asynchronous i/o. -@return number of page read requests issued; NOTE that if we read ibuf -pages, it may happen that the page at the given page number does not -get read even if we return a positive value! -@return number of page read requests issued */ -UNIV_INTERN -ulint -buf_read_ahead_random( -/*==================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes, - or 0 */ - ulint offset, /*!< in: page number of a page which - the current thread wants to access */ - ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf - routine */ - trx_t* trx) -{ - buf_pool_t* buf_pool = buf_pool_get(space, offset); - ib_int64_t tablespace_version; - ulint recent_blocks = 0; - ulint ibuf_mode; - ulint count; - ulint low, high; - dberr_t err = DB_SUCCESS; - ulint i; - const ulint buf_read_ahead_random_area - = BUF_READ_AHEAD_AREA(buf_pool); - - if (!srv_random_read_ahead) { - /* Disabled by user */ - return(0); - } - - if (srv_startup_is_before_trx_rollback_phase) { - /* No read-ahead to avoid thread deadlocks */ - return(0); - } - - if (ibuf_bitmap_page(zip_size, offset) - || trx_sys_hdr_page(space, offset)) { - - /* If it is an ibuf bitmap page or trx sys hdr, we do - no read-ahead, as that could break the ibuf page access - order */ - - return(0); - } - - /* Remember the tablespace version before we ask te tablespace size - below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we - do not try to read outside the bounds of the tablespace! */ - - tablespace_version = fil_space_get_version(space); - - low = (offset / buf_read_ahead_random_area) - * buf_read_ahead_random_area; - high = (offset / buf_read_ahead_random_area + 1) - * buf_read_ahead_random_area; - if (high > fil_space_get_size(space)) { - - high = fil_space_get_size(space); - } - - if (buf_pool->n_pend_reads - > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) { - - return(0); - } - - /* Count how many blocks in the area have been recently accessed, - that is, reside near the start of the LRU list. */ - - for (i = low; i < high; i++) { - - prio_rw_lock_t* hash_lock; - - const buf_page_t* bpage = - buf_page_hash_get_s_locked(buf_pool, space, i, - &hash_lock); - - if (bpage - && buf_page_is_accessed(bpage) - && buf_page_peek_if_young(bpage)) { - - recent_blocks++; - - if (recent_blocks - >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) { - - rw_lock_s_unlock(hash_lock); - goto read_ahead; - } - } - - if (bpage) { - rw_lock_s_unlock(hash_lock); - } - } - - /* Do nothing */ - return(0); - -read_ahead: - /* Read all the suitable blocks within the area */ - - if (inside_ibuf) { - ibuf_mode = BUF_READ_IBUF_PAGES_ONLY; - } else { - ibuf_mode = BUF_READ_ANY_PAGE; - } - - count = 0; - - for (i = low; i < high; i++) { - /* It is only sensible to do read-ahead in the non-sync aio - mode: hence FALSE as the first parameter */ - - if (!ibuf_bitmap_page(zip_size, i)) { - - count += buf_read_page_low( - &err, false, - ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, - space, zip_size, FALSE, - tablespace_version, i, trx); - - switch(err) { - case DB_SUCCESS: - case DB_ERROR: - break; - case DB_TABLESPACE_DELETED: - ib_logf(IB_LOG_LEVEL_WARN, - "In random" - " readahead trying to access" - " tablespace " ULINTPF ":" ULINTPF - " but the tablespace does not" - " exist or is just being dropped.", - space, i); - break; - case DB_DECRYPTION_FAILED: - ib_logf(IB_LOG_LEVEL_ERROR, - "Random readahead failed to decrypt page " - ULINTPF ":" ULINTPF ".", - i, space); - break; - default: - ut_error; - } - } - } - - /* In simulated aio we wake the aio handler threads only after - queuing all aio requests, in native aio the following call does - nothing: */ - - os_aio_simulated_wake_handler_threads(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints && (count > 0)) { - fprintf(stderr, - "Random read-ahead space %lu offset %lu pages %lu\n", - (ulong) space, (ulong) offset, - (ulong) count); - } -#endif /* UNIV_DEBUG */ - - /* Read ahead is considered one I/O operation for the purpose of - LRU policy decision. */ - buf_LRU_stat_inc_io(); - - buf_pool->stat.n_ra_pages_read_rnd += count; - srv_stats.buf_pool_reads.add(count); - return(count); -} - -/********************************************************************//** -High-level function which reads a page asynchronously from a file to the -buffer buf_pool if it is not already there. Sets the io_fix flag and sets -an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. - -@param[in] space_id space_id -@param[in] zip_size compressed page size in bytes, or 0 -@param[in] offset page number -@param[in] trx transaction -@param[out] encrypted true if page encrypted -@return DB_SUCCESS if page has been read and is not corrupted, -@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted, -@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but -after decryption normal page checksum does not match. -@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */ -UNIV_INTERN -dberr_t -buf_read_page( - ulint space_id, - ulint zip_size, - ulint offset, - trx_t* trx) -{ - ib_int64_t tablespace_version; - ulint count; - dberr_t err = DB_SUCCESS; - - tablespace_version = fil_space_get_version(space_id); - - FilSpace space(space_id, true); - - if (space()) { - - /* We do the i/o in the synchronous aio mode to save thread - switches: hence TRUE */ - count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space_id, - zip_size, FALSE, - tablespace_version, offset, trx); - - srv_stats.buf_pool_reads.add(count); - } - - /* Page corruption and decryption failures are already reported - in above function. */ - if (!space() || err == DB_TABLESPACE_DELETED) { - err = DB_TABLESPACE_DELETED; - ib_logf(IB_LOG_LEVEL_ERROR, - "Trying to access" - " tablespace [space=" ULINTPF ": page=" ULINTPF - "] but the tablespace does not exist" - " or is just being dropped.", - space_id, offset); - } - - /* Increment number of I/O operations used for LRU policy. */ - buf_LRU_stat_inc_io(); - - return(err); -} - -/********************************************************************//** -High-level function which reads a page asynchronously from a file to the -buffer buf_pool if it is not already there. Sets the io_fix flag and sets -an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. -@param[in] space Tablespace id -@param[in] offset Page no */ -UNIV_INTERN -void -buf_read_page_async( - ulint space, - ulint offset) -{ - ulint zip_size; - ib_int64_t tablespace_version; - ulint count; - dberr_t err = DB_SUCCESS; - - zip_size = fil_space_get_zip_size(space); - - if (zip_size == ULINT_UNDEFINED) { - return; - } - - tablespace_version = fil_space_get_version(space); - - count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE - | OS_AIO_SIMULATED_WAKE_LATER - | BUF_READ_IGNORE_NONEXISTENT_PAGES, - space, zip_size, FALSE, - tablespace_version, offset); - - switch(err) { - case DB_SUCCESS: - case DB_ERROR: - break; - case DB_TABLESPACE_DELETED: - ib_logf(IB_LOG_LEVEL_ERROR, - "In async page read " - "trying to access " - "page " ULINTPF ":" ULINTPF - " in nonexisting or being-dropped tablespace", - space, offset); - break; - - case DB_DECRYPTION_FAILED: - ib_logf(IB_LOG_LEVEL_ERROR, - "Async page read failed to decrypt page " - ULINTPF ":" ULINTPF ".", - space, offset); - break; - default: - ut_error; - } - - srv_stats.buf_pool_reads.add(count); - - /* We do not increment number of I/O operations used for LRU policy - here (buf_LRU_stat_inc_io()). We use this in heuristics to decide - about evicting uncompressed version of compressed pages from the - buffer pool. Since this function is called from buffer pool load - these IOs are deliberate and are not part of normal workload we can - ignore these in our heuristics. */ -} - -/********************************************************************//** -Applies linear read-ahead if in the buf_pool the page is a border page of -a linear read-ahead area and all the pages in the area have been accessed. -Does not read any page if the read-ahead mechanism is not activated. Note -that the algorithm looks at the 'natural' adjacent successor and -predecessor of the page, which on the leaf level of a B-tree are the next -and previous page in the chain of leaves. To know these, the page specified -in (space, offset) must already be present in the buf_pool. Thus, the -natural way to use this function is to call it when a page in the buf_pool -is accessed the first time, calling this function just after it has been -bufferfixed. -NOTE 1: as this function looks at the natural predecessor and successor -fields on the page, what happens, if these are not initialized to any -sensible value? No problem, before applying read-ahead we check that the -area to read is within the span of the space, if not, read-ahead is not -applied. An uninitialized value may result in a useless read operation, but -only very improbably. -NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this -function must be written such that it cannot end up waiting for these -latches! -NOTE 3: the calling thread must want access to the page given: this rule is -set to prevent unintended read-aheads performed by ibuf routines, a situation -which could result in a deadlock if the OS does not support asynchronous io. -@return number of page read requests issued */ -UNIV_INTERN -ulint -buf_read_ahead_linear( -/*==================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes, or 0 */ - ulint offset, /*!< in: page number; see NOTE 3 above */ - ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */ - trx_t* trx) -{ - buf_pool_t* buf_pool = buf_pool_get(space, offset); - ib_int64_t tablespace_version; - buf_page_t* bpage; - buf_frame_t* frame; - buf_page_t* pred_bpage = NULL; - unsigned pred_bpage_is_accessed = 0; - ulint pred_offset; - ulint succ_offset; - ulint count; - int asc_or_desc; - ulint new_offset; - ulint fail_count; - ulint ibuf_mode; - ulint low, high; - dberr_t err = DB_SUCCESS; - ulint i; - const ulint buf_read_ahead_linear_area - = BUF_READ_AHEAD_AREA(buf_pool); - ulint threshold; - - /* check if readahead is disabled */ - if (!srv_read_ahead_threshold) { - return(0); - } - - if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) { - /* No read-ahead to avoid thread deadlocks */ - return(0); - } - - low = (offset / buf_read_ahead_linear_area) - * buf_read_ahead_linear_area; - high = (offset / buf_read_ahead_linear_area + 1) - * buf_read_ahead_linear_area; - - if ((offset != low) && (offset != high - 1)) { - /* This is not a border page of the area: return */ - - return(0); - } - - if (ibuf_bitmap_page(zip_size, offset) - || trx_sys_hdr_page(space, offset)) { - - /* If it is an ibuf bitmap page or trx sys hdr, we do - no read-ahead, as that could break the ibuf page access - order */ - - return(0); - } - - /* Remember the tablespace version before we ask te tablespace size - below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we - do not try to read outside the bounds of the tablespace! */ - - tablespace_version = fil_space_get_version(space); - - if (high > fil_space_get_size(space)) { - /* The area is not whole, return */ - - return(0); - } - - if (buf_pool->n_pend_reads - > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) { - - return(0); - } - - /* Check that almost all pages in the area have been accessed; if - offset == low, the accesses must be in a descending order, otherwise, - in an ascending order. */ - - asc_or_desc = 1; - - if (offset == low) { - asc_or_desc = -1; - } - - /* How many out of order accessed pages can we ignore - when working out the access pattern for linear readahead */ - threshold = ut_min((64 - srv_read_ahead_threshold), - BUF_READ_AHEAD_AREA(buf_pool)); - - fail_count = 0; - - prio_rw_lock_t* hash_lock; - - for (i = low; i < high; i++) { - - bpage = buf_page_hash_get_s_locked(buf_pool, space, i, - &hash_lock); - - if (bpage == NULL || !buf_page_is_accessed(bpage)) { - /* Not accessed */ - fail_count++; - - } else if (pred_bpage) { - /* Note that buf_page_is_accessed() returns - the time of the first access. If some blocks - of the extent existed in the buffer pool at - the time of a linear access pattern, the first - access times may be nonmonotonic, even though - the latest access times were linear. The - threshold (srv_read_ahead_factor) should help - a little against this. */ - int res = ut_ulint_cmp( - buf_page_is_accessed(bpage), - pred_bpage_is_accessed); - /* Accesses not in the right order */ - if (res != 0 && res != asc_or_desc) { - fail_count++; - } - } - - if (fail_count > threshold) { - /* Too many failures: return */ - if (bpage) { - rw_lock_s_unlock(hash_lock); - } - return(0); - } - - if (bpage) { - if (buf_page_is_accessed(bpage)) { - pred_bpage = bpage; - pred_bpage_is_accessed - = buf_page_is_accessed(bpage); - } - - rw_lock_s_unlock(hash_lock); - } - } - - /* If we got this far, we know that enough pages in the area have - been accessed in the right order: linear read-ahead can be sensible */ - - bpage = buf_page_hash_get_s_locked(buf_pool, space, offset, &hash_lock); - - if (bpage == NULL) { - - return(0); - } - - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_ZIP_PAGE: - frame = bpage->zip.data; - break; - case BUF_BLOCK_FILE_PAGE: - frame = ((buf_block_t*) bpage)->frame; - break; - default: - ut_error; - break; - } - - /* Read the natural predecessor and successor page addresses from - the page; NOTE that because the calling thread may have an x-latch - on the page, we do not acquire an s-latch on the page, this is to - prevent deadlocks. Even if we read values which are nonsense, the - algorithm will work. */ - - pred_offset = fil_page_get_prev(frame); - succ_offset = fil_page_get_next(frame); - - rw_lock_s_unlock(hash_lock); - - if ((offset == low) && (succ_offset == offset + 1)) { - - /* This is ok, we can continue */ - new_offset = pred_offset; - - } else if ((offset == high - 1) && (pred_offset == offset - 1)) { - - /* This is ok, we can continue */ - new_offset = succ_offset; - } else { - /* Successor or predecessor not in the right order */ - - return(0); - } - - low = (new_offset / buf_read_ahead_linear_area) - * buf_read_ahead_linear_area; - high = (new_offset / buf_read_ahead_linear_area + 1) - * buf_read_ahead_linear_area; - - if ((new_offset != low) && (new_offset != high - 1)) { - /* This is not a border page of the area: return */ - - return(0); - } - - if (high > fil_space_get_size(space)) { - /* The area is not whole, return */ - - return(0); - } - - /* If we got this far, read-ahead can be sensible: do it */ - - ibuf_mode = inside_ibuf - ? BUF_READ_IBUF_PAGES_ONLY | OS_AIO_SIMULATED_WAKE_LATER - : BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER; - - count = 0; - - /* Since Windows XP seems to schedule the i/o handler thread - very eagerly, and consequently it does not wait for the - full read batch to be posted, we use special heuristics here */ - - os_aio_simulated_put_read_threads_to_sleep(); - - for (i = low; i < high; i++) { - /* It is only sensible to do read-ahead in the non-sync - aio mode: hence FALSE as the first parameter */ - - if (!ibuf_bitmap_page(zip_size, i)) { - - count += buf_read_page_low( - &err, false, - ibuf_mode, - space, zip_size, FALSE, tablespace_version, - i, trx); - - switch(err) { - case DB_SUCCESS: - case DB_ERROR: - break; - case DB_TABLESPACE_DELETED: - ib_logf(IB_LOG_LEVEL_WARN, - "In linear" - " readahead trying to access" - " tablespace " ULINTPF ":" ULINTPF - " but the tablespace does not" - " exist or is just being dropped.", - space, i); - break; - - case DB_DECRYPTION_FAILED: - ib_logf(IB_LOG_LEVEL_ERROR, - "Linear readahead failed to decrypt page " - ULINTPF ":" ULINTPF ".", - i, space); - break; - default: - ut_error; - } - } - } - - /* In simulated aio we wake the aio handler threads only after - queuing all aio requests, in native aio the following call does - nothing: */ - - os_aio_simulated_wake_handler_threads(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints && (count > 0)) { - fprintf(stderr, - "LINEAR read-ahead space %lu offset %lu pages %lu\n", - (ulong) space, (ulong) offset, (ulong) count); - } -#endif /* UNIV_DEBUG */ - - /* Read ahead is considered one I/O operation for the purpose of - LRU policy decision. */ - buf_LRU_stat_inc_io(); - - buf_pool->stat.n_ra_pages_read += count; - return(count); -} - -/********************************************************************//** -Issues read requests for pages which the ibuf module wants to read in, in -order to contract the insert buffer tree. Technically, this function is like -a read-ahead function. */ -UNIV_INTERN -void -buf_read_ibuf_merge_pages( -/*======================*/ - bool sync, /*!< in: true if the caller - wants this function to wait - for the highest address page - to get read in, before this - function returns */ - const ulint* space_ids, /*!< in: array of space ids */ - const ib_int64_t* space_versions,/*!< in: the spaces must have - this version number - (timestamp), otherwise we - discard the read; we use this - to cancel reads if DISCARD + - IMPORT may have changed the - tablespace size */ - const ulint* page_nos, /*!< in: array of page numbers - to read, with the highest page - number the last in the - array */ - ulint n_stored) /*!< in: number of elements - in the arrays */ -{ - ulint i; - -#ifdef UNIV_IBUF_DEBUG - ut_a(n_stored < UNIV_PAGE_SIZE); -#endif - - for (i = 0; i < n_stored; i++) { - buf_pool_t* buf_pool; - ulint zip_size = fil_space_get_zip_size(space_ids[i]); - dberr_t err = DB_SUCCESS; - - buf_pool = buf_pool_get(space_ids[i], page_nos[i]); - - while (buf_pool->n_pend_reads - > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) { - os_thread_sleep(500000); - } - - if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { - goto tablespace_deleted; - } - - buf_read_page_low(&err, sync && (i + 1 == n_stored), - BUF_READ_ANY_PAGE, space_ids[i], - zip_size, TRUE, space_versions[i], - page_nos[i], NULL); - - switch(err) { - case DB_SUCCESS: - case DB_ERROR: - break; - case DB_TABLESPACE_DELETED: - -tablespace_deleted: - /* We have deleted or are deleting the single-table - tablespace: remove the entries for tablespace. */ - ibuf_delete_for_discarded_space(space_ids[i]); - break; - case DB_DECRYPTION_FAILED: - ib_logf(IB_LOG_LEVEL_ERROR, - "Failed to decrypt insert buffer page " - ULINTPF ":" ULINTPF ".", - space_ids[i], page_nos[i]); - break; - default: - ut_error; - } - } - - os_aio_simulated_wake_handler_threads(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Ibuf merge read-ahead space %lu pages %lu\n", - (ulong) space_ids[0], (ulong) n_stored); - } -#endif /* UNIV_DEBUG */ -} - -/********************************************************************//** -Issues read requests for pages which recovery wants to read in. */ -UNIV_INTERN -void -buf_read_recv_pages( -/*================*/ - ibool sync, /*!< in: TRUE if the caller - wants this function to wait - for the highest address page - to get read in, before this - function returns */ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in - bytes, or 0 */ - const ulint* page_nos, /*!< in: array of page numbers - to read, with the highest page - number the last in the - array */ - ulint n_stored) /*!< in: number of page numbers - in the array */ -{ - ib_int64_t tablespace_version; - ulint count; - dberr_t err = DB_SUCCESS; - ulint i; - - zip_size = fil_space_get_zip_size(space); - - if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { - /* It is a single table tablespace and the .ibd file is - missing: do nothing */ - - /* the log records should be treated here same reason - for http://bugs.mysql.com/bug.php?id=43948 */ - - if (recv_recovery_is_on()) { - recv_addr_t* recv_addr; - - mutex_enter(&(recv_sys->mutex)); - - if (recv_sys->apply_log_recs == FALSE) { - mutex_exit(&(recv_sys->mutex)); - goto not_to_recover; - } - - for (i = 0; i < n_stored; i++) { - /* recv_get_fil_addr_struct() */ - recv_addr = (recv_addr_t*)HASH_GET_FIRST(recv_sys->addr_hash, - hash_calc_hash(ut_fold_ulint_pair(space, page_nos[i]), - recv_sys->addr_hash)); - while (recv_addr) { - if ((recv_addr->space == space) - && (recv_addr->page_no == page_nos[i])) { - break; - } - recv_addr = (recv_addr_t*)HASH_GET_NEXT(addr_hash, recv_addr); - } - - if ((recv_addr == NULL) - || (recv_addr->state == RECV_BEING_PROCESSED) - || (recv_addr->state == RECV_PROCESSED)) { - continue; - } - - recv_addr->state = RECV_PROCESSED; - - ut_a(recv_sys->n_addrs); - recv_sys->n_addrs--; - } - - mutex_exit(&(recv_sys->mutex)); - - fprintf(stderr, " (cannot find space: %lu)", space); - } -not_to_recover: - - return; - } - - tablespace_version = fil_space_get_version(space); - - for (i = 0; i < n_stored; i++) { - buf_pool_t* buf_pool; - - count = 0; - - os_aio_print_debug = FALSE; - buf_pool = buf_pool_get(space, page_nos[i]); - while (buf_pool->n_pend_reads - >= recv_n_pool_free_frames / 2) { - - os_aio_simulated_wake_handler_threads(); - os_thread_sleep(10000); - - count++; - - if (count > 1000) { - ib_logf(IB_LOG_LEVEL_ERROR, - "waited for 10 seconds for " ULINTPF - " pending reads to the buffer pool to" - " be finished", - buf_pool->n_pend_reads); - - os_aio_print_debug = TRUE; - } - } - - os_aio_print_debug = FALSE; - - if ((i + 1 == n_stored) && sync) { - buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space, - zip_size, TRUE, tablespace_version, - page_nos[i], NULL); - } else { - buf_read_page_low(&err, false, BUF_READ_ANY_PAGE - | OS_AIO_SIMULATED_WAKE_LATER, - space, zip_size, TRUE, - tablespace_version, page_nos[i], - NULL); - } - - if (err == DB_DECRYPTION_FAILED) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Recovery failed to decrypt read page " - ULINTPF ":" ULINTPF ".", - space, page_nos[i]); - } - } - - os_aio_simulated_wake_handler_threads(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Recovery applies read-ahead pages %lu\n", - (ulong) n_stored); - } -#endif /* UNIV_DEBUG */ -} diff --git a/storage/xtradb/data/data0data.cc b/storage/xtradb/data/data0data.cc deleted file mode 100644 index 593af089b00..00000000000 --- a/storage/xtradb/data/data0data.cc +++ /dev/null @@ -1,751 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file data/data0data.cc -SQL data field and tuple - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#include "data0data.h" - -#ifdef UNIV_NONINL -#include "data0data.ic" -#endif - -#ifndef UNIV_HOTBACKUP -#include "rem0rec.h" -#include "rem0cmp.h" -#include "page0page.h" -#include "page0zip.h" -#include "dict0dict.h" -#include "btr0cur.h" - -#include <ctype.h> -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -/** Dummy variable to catch access to uninitialized fields. In the -debug version, dtuple_create() will make all fields of dtuple_t point -to data_error. */ -UNIV_INTERN byte data_error; - -# ifndef UNIV_DEBUG_VALGRIND -/** this is used to fool the compiler in dtuple_validate */ -UNIV_INTERN ulint data_dummy; -# endif /* !UNIV_DEBUG_VALGRIND */ -#endif /* UNIV_DEBUG */ - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Compare two data tuples, respecting the collation of character fields. -@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively, -than tuple2 */ -UNIV_INTERN -int -dtuple_coll_cmp( -/*============*/ - const dtuple_t* tuple1, /*!< in: tuple 1 */ - const dtuple_t* tuple2) /*!< in: tuple 2 */ -{ - ulint n_fields; - ulint i; - - ut_ad(tuple1 != NULL); - ut_ad(tuple2 != NULL); - ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N); - ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N); - ut_ad(dtuple_check_typed(tuple1)); - ut_ad(dtuple_check_typed(tuple2)); - - n_fields = dtuple_get_n_fields(tuple1); - - if (n_fields != dtuple_get_n_fields(tuple2)) { - - return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1); - } - - for (i = 0; i < n_fields; i++) { - int cmp; - const dfield_t* field1 = dtuple_get_nth_field(tuple1, i); - const dfield_t* field2 = dtuple_get_nth_field(tuple2, i); - - cmp = cmp_dfield_dfield(field1, field2); - - if (cmp) { - return(cmp); - } - } - - return(0); -} - -/*********************************************************************//** -Sets number of fields used in a tuple. Normally this is set in -dtuple_create, but if you want later to set it smaller, you can use this. */ -UNIV_INTERN -void -dtuple_set_n_fields( -/*================*/ - dtuple_t* tuple, /*!< in: tuple */ - ulint n_fields) /*!< in: number of fields */ -{ - ut_ad(tuple); - - tuple->n_fields = n_fields; - tuple->n_fields_cmp = n_fields; -} - -/**********************************************************//** -Checks that a data field is typed. -@return TRUE if ok */ -static -ibool -dfield_check_typed_no_assert( -/*=========================*/ - const dfield_t* field) /*!< in: data field */ -{ - if (dfield_get_type(field)->mtype > DATA_MYSQL - || dfield_get_type(field)->mtype < DATA_VARCHAR) { - - fprintf(stderr, - "InnoDB: Error: data field type %lu, len %lu\n", - (ulong) dfield_get_type(field)->mtype, - (ulong) dfield_get_len(field)); - return(FALSE); - } - - return(TRUE); -} - -/**********************************************************//** -Checks that a data tuple is typed. -@return TRUE if ok */ -UNIV_INTERN -ibool -dtuple_check_typed_no_assert( -/*=========================*/ - const dtuple_t* tuple) /*!< in: tuple */ -{ - const dfield_t* field; - ulint i; - - if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) { - fprintf(stderr, - "InnoDB: Error: index entry has %lu fields\n", - (ulong) dtuple_get_n_fields(tuple)); -dump: - fputs("InnoDB: Tuple contents: ", stderr); - dtuple_print(stderr, tuple); - putc('\n', stderr); - - return(FALSE); - } - - for (i = 0; i < dtuple_get_n_fields(tuple); i++) { - - field = dtuple_get_nth_field(tuple, i); - - if (!dfield_check_typed_no_assert(field)) { - goto dump; - } - } - - return(TRUE); -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -/**********************************************************//** -Checks that a data field is typed. Asserts an error if not. -@return TRUE if ok */ -UNIV_INTERN -ibool -dfield_check_typed( -/*===============*/ - const dfield_t* field) /*!< in: data field */ -{ - if (dfield_get_type(field)->mtype > DATA_MYSQL - || dfield_get_type(field)->mtype < DATA_VARCHAR) { - - fprintf(stderr, - "InnoDB: Error: data field type %lu, len %lu\n", - (ulong) dfield_get_type(field)->mtype, - (ulong) dfield_get_len(field)); - - ut_error; - } - - return(TRUE); -} - -/**********************************************************//** -Checks that a data tuple is typed. Asserts an error if not. -@return TRUE if ok */ -UNIV_INTERN -ibool -dtuple_check_typed( -/*===============*/ - const dtuple_t* tuple) /*!< in: tuple */ -{ - const dfield_t* field; - ulint i; - - for (i = 0; i < dtuple_get_n_fields(tuple); i++) { - - field = dtuple_get_nth_field(tuple, i); - - ut_a(dfield_check_typed(field)); - } - - return(TRUE); -} - -/**********************************************************//** -Validates the consistency of a tuple which must be complete, i.e, -all fields must have been set. -@return TRUE if ok */ -UNIV_INTERN -ibool -dtuple_validate( -/*============*/ - const dtuple_t* tuple) /*!< in: tuple */ -{ - const dfield_t* field; - ulint n_fields; - ulint len; - ulint i; - - ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); - - n_fields = dtuple_get_n_fields(tuple); - - /* We dereference all the data of each field to test - for memory traps */ - - for (i = 0; i < n_fields; i++) { - - field = dtuple_get_nth_field(tuple, i); - len = dfield_get_len(field); - - if (!dfield_is_null(field)) { - - const byte* data; - - data = static_cast<const byte*>(dfield_get_data(field)); -#ifndef UNIV_DEBUG_VALGRIND - ulint j; - - for (j = 0; j < len; j++) { - - data_dummy += *data; /* fool the compiler not - to optimize out this - code */ - data++; - } -#endif /* !UNIV_DEBUG_VALGRIND */ - - UNIV_MEM_ASSERT_RW(data, len); - } - } - - ut_a(dtuple_check_typed(tuple)); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Pretty prints a dfield value according to its data type. */ -UNIV_INTERN -void -dfield_print( -/*=========*/ - const dfield_t* dfield) /*!< in: dfield */ -{ - const byte* data; - ulint len; - ulint i; - - len = dfield_get_len(dfield); - data = static_cast<const byte*>(dfield_get_data(dfield)); - - if (dfield_is_null(dfield)) { - fputs("NULL", stderr); - - return; - } - - switch (dtype_get_mtype(dfield_get_type(dfield))) { - case DATA_CHAR: - case DATA_VARCHAR: - for (i = 0; i < len; i++) { - int c = *data++; - putc(isprint(c) ? c : ' ', stderr); - } - - if (dfield_is_ext(dfield)) { - fputs("(external)", stderr); - } - break; - case DATA_INT: - ut_a(len == 4); /* only works for 32-bit integers */ - fprintf(stderr, "%d", (int) mach_read_from_4(data)); - break; - default: - ut_error; - } -} - -/*************************************************************//** -Pretty prints a dfield value according to its data type. Also the hex string -is printed if a string contains non-printable characters. */ -UNIV_INTERN -void -dfield_print_also_hex( -/*==================*/ - const dfield_t* dfield) /*!< in: dfield */ -{ - const byte* data; - ulint len; - ulint prtype; - ulint i; - ibool print_also_hex; - - len = dfield_get_len(dfield); - data = static_cast<const byte*>(dfield_get_data(dfield)); - - if (dfield_is_null(dfield)) { - fputs("NULL", stderr); - - return; - } - - prtype = dtype_get_prtype(dfield_get_type(dfield)); - - switch (dtype_get_mtype(dfield_get_type(dfield))) { - ib_id_t id; - case DATA_INT: - switch (len) { - ulint val; - case 1: - val = mach_read_from_1(data); - - if (!(prtype & DATA_UNSIGNED)) { - val &= ~0x80; - fprintf(stderr, "%ld", (long) val); - } else { - fprintf(stderr, "%lu", (ulong) val); - } - break; - - case 2: - val = mach_read_from_2(data); - - if (!(prtype & DATA_UNSIGNED)) { - val &= ~0x8000; - fprintf(stderr, "%ld", (long) val); - } else { - fprintf(stderr, "%lu", (ulong) val); - } - break; - - case 3: - val = mach_read_from_3(data); - - if (!(prtype & DATA_UNSIGNED)) { - val &= ~0x800000; - fprintf(stderr, "%ld", (long) val); - } else { - fprintf(stderr, "%lu", (ulong) val); - } - break; - - case 4: - val = mach_read_from_4(data); - - if (!(prtype & DATA_UNSIGNED)) { - val &= ~0x80000000; - fprintf(stderr, "%ld", (long) val); - } else { - fprintf(stderr, "%lu", (ulong) val); - } - break; - - case 6: - id = mach_read_from_6(data); - fprintf(stderr, "%llu", (ullint) id); - break; - - case 7: - id = mach_read_from_7(data); - fprintf(stderr, "%llu", (ullint) id); - break; - case 8: - id = mach_read_from_8(data); - fprintf(stderr, "%llu", (ullint) id); - break; - default: - goto print_hex; - } - break; - - case DATA_SYS: - switch (prtype & DATA_SYS_PRTYPE_MASK) { - case DATA_TRX_ID: - id = mach_read_from_6(data); - - fprintf(stderr, "trx_id " TRX_ID_FMT, id); - break; - - case DATA_ROLL_PTR: - id = mach_read_from_7(data); - - fprintf(stderr, "roll_ptr " TRX_ID_FMT, id); - break; - - case DATA_ROW_ID: - id = mach_read_from_6(data); - - fprintf(stderr, "row_id " TRX_ID_FMT, id); - break; - - default: - id = mach_ull_read_compressed(data); - - fprintf(stderr, "mix_id " TRX_ID_FMT, id); - } - break; - - case DATA_CHAR: - case DATA_VARCHAR: - print_also_hex = FALSE; - - for (i = 0; i < len; i++) { - int c = *data++; - - if (!isprint(c)) { - print_also_hex = TRUE; - - fprintf(stderr, "\\x%02x", (unsigned char) c); - } else { - putc(c, stderr); - } - } - - if (dfield_is_ext(dfield)) { - fputs("(external)", stderr); - } - - if (!print_also_hex) { - break; - } - - data = static_cast<byte*>(dfield_get_data(dfield)); - /* fall through */ - - case DATA_BINARY: - default: -print_hex: - fputs(" Hex: ",stderr); - - for (i = 0; i < len; i++) { - fprintf(stderr, "%02lx", (ulint) *data++); - } - - if (dfield_is_ext(dfield)) { - fputs("(external)", stderr); - } - } -} - -/*************************************************************//** -Print a dfield value using ut_print_buf. */ -static -void -dfield_print_raw( -/*=============*/ - FILE* f, /*!< in: output stream */ - const dfield_t* dfield) /*!< in: dfield */ -{ - ulint len = dfield_get_len(dfield); - if (!dfield_is_null(dfield)) { - ulint print_len = ut_min(len, 1000); - ut_print_buf(f, dfield_get_data(dfield), print_len); - if (len != print_len) { - fprintf(f, "(total %lu bytes%s)", - (ulong) len, - dfield_is_ext(dfield) ? ", external" : ""); - } - } else { - fputs(" SQL NULL", f); - } -} - -/**********************************************************//** -The following function prints the contents of a tuple. */ -UNIV_INTERN -void -dtuple_print( -/*=========*/ - FILE* f, /*!< in: output stream */ - const dtuple_t* tuple) /*!< in: tuple */ -{ - ulint n_fields; - ulint i; - - n_fields = dtuple_get_n_fields(tuple); - - fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields); - - for (i = 0; i < n_fields; i++) { - fprintf(f, " %lu:", (ulong) i); - - dfield_print_raw(f, dtuple_get_nth_field(tuple, i)); - - putc(';', f); - putc('\n', f); - } - - ut_ad(dtuple_validate(tuple)); -} - -/**************************************************************//** -Moves parts of long fields in entry to the big record vector so that -the size of tuple drops below the maximum record size allowed in the -database. Moves data only from those fields which are not necessary -to determine uniquely the insertion place of the tuple in the index. -@return own: created big record vector, NULL if we are not able to -shorten the entry enough, i.e., if there are too many fixed-length or -short fields in entry or the index is clustered */ -UNIV_INTERN -big_rec_t* -dtuple_convert_big_rec( -/*===================*/ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in/out: index entry */ - ulint* n_ext) /*!< in/out: number of - externally stored columns */ -{ - mem_heap_t* heap; - big_rec_t* vector; - dfield_t* dfield; - dict_field_t* ifield; - ulint size; - ulint n_fields; - ulint local_len; - ulint local_prefix_len; - - if (!dict_index_is_clust(index)) { - return(NULL); - } - - if (dict_table_get_format(index->table) < UNIV_FORMAT_B) { - /* up to MySQL 5.1: store a 768-byte prefix locally */ - local_len = BTR_EXTERN_FIELD_REF_SIZE - + DICT_ANTELOPE_MAX_INDEX_COL_LEN; - } else { - /* new-format table: do not store any BLOB prefix locally */ - local_len = BTR_EXTERN_FIELD_REF_SIZE; - } - - ut_a(dtuple_check_typed_no_assert(entry)); - - size = rec_get_converted_size(index, entry, *n_ext); - - if (UNIV_UNLIKELY(size > 1000000000)) { - fprintf(stderr, - "InnoDB: Warning: tuple size very big: %lu\n", - (ulong) size); - fputs("InnoDB: Tuple contents: ", stderr); - dtuple_print(stderr, entry); - putc('\n', stderr); - } - - heap = mem_heap_create(size + dtuple_get_n_fields(entry) - * sizeof(big_rec_field_t) + 1000); - - vector = static_cast<big_rec_t*>( - mem_heap_alloc(heap, sizeof(big_rec_t))); - - vector->heap = heap; - - vector->fields = static_cast<big_rec_field_t*>( - mem_heap_alloc( - heap, - dtuple_get_n_fields(entry) * sizeof(big_rec_field_t))); - - /* Decide which fields to shorten: the algorithm is to look for - a variable-length field that yields the biggest savings when - stored externally */ - - n_fields = 0; - - while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, - *n_ext), - dict_table_is_comp(index->table), - dict_index_get_n_fields(index), - dict_table_zip_size(index->table))) { - ulint i; - ulint longest = 0; - ulint longest_i = ULINT_MAX; - byte* data; - big_rec_field_t* b; - - for (i = dict_index_get_n_unique_in_tree(index); - i < dtuple_get_n_fields(entry); i++) { - ulint savings; - - dfield = dtuple_get_nth_field(entry, i); - ifield = dict_index_get_nth_field(index, i); - - /* Skip fixed-length, NULL, externally stored, - or short columns */ - - if (ifield->fixed_len - || dfield_is_null(dfield) - || dfield_is_ext(dfield) - || dfield_get_len(dfield) <= local_len - || dfield_get_len(dfield) - <= BTR_EXTERN_FIELD_REF_SIZE * 2) { - goto skip_field; - } - - savings = dfield_get_len(dfield) - local_len; - - /* Check that there would be savings */ - if (longest >= savings) { - goto skip_field; - } - - /* In DYNAMIC and COMPRESSED format, store - locally any non-BLOB columns whose maximum - length does not exceed 256 bytes. This is - because there is no room for the "external - storage" flag when the maximum length is 255 - bytes or less. This restriction trivially - holds in REDUNDANT and COMPACT format, because - there we always store locally columns whose - length is up to local_len == 788 bytes. - @see rec_init_offsets_comp_ordinary */ - if (ifield->col->mtype != DATA_BLOB - && ifield->col->len < 256) { - goto skip_field; - } - - longest_i = i; - longest = savings; - -skip_field: - continue; - } - - if (!longest) { - /* Cannot shorten more */ - - mem_heap_free(heap); - - return(NULL); - } - - /* Move data from field longest_i to big rec vector. - - We store the first bytes locally to the record. Then - we can calculate all ordering fields in all indexes - from locally stored data. */ - - dfield = dtuple_get_nth_field(entry, longest_i); - ifield = dict_index_get_nth_field(index, longest_i); - local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE; - - b = &vector->fields[n_fields]; - b->field_no = longest_i; - b->len = dfield_get_len(dfield) - local_prefix_len; - b->data = (char*) dfield_get_data(dfield) + local_prefix_len; - - /* Allocate the locally stored part of the column. */ - data = static_cast<byte*>(mem_heap_alloc(heap, local_len)); - - /* Copy the local prefix. */ - memcpy(data, dfield_get_data(dfield), local_prefix_len); - /* Clear the extern field reference (BLOB pointer). */ - memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE); -#if 0 - /* The following would fail the Valgrind checks in - page_cur_insert_rec_low() and page_cur_insert_rec_zip(). - The BLOB pointers in the record will be initialized after - the record and the BLOBs have been written. */ - UNIV_MEM_ALLOC(data + local_prefix_len, - BTR_EXTERN_FIELD_REF_SIZE); -#endif - - dfield_set_data(dfield, data, local_len); - dfield_set_ext(dfield); - - n_fields++; - (*n_ext)++; - ut_ad(n_fields < dtuple_get_n_fields(entry)); - } - - vector->n_fields = n_fields; - return(vector); -} - -/**************************************************************//** -Puts back to entry the data stored in vector. Note that to ensure the -fields in entry can accommodate the data, vector must have been created -from entry with dtuple_convert_big_rec. */ -UNIV_INTERN -void -dtuple_convert_back_big_rec( -/*========================*/ - dict_index_t* index MY_ATTRIBUTE((unused)), /*!< in: index */ - dtuple_t* entry, /*!< in: entry whose data was put to vector */ - big_rec_t* vector) /*!< in, own: big rec vector; it is - freed in this function */ -{ - big_rec_field_t* b = vector->fields; - const big_rec_field_t* const end = b + vector->n_fields; - - for (; b < end; b++) { - dfield_t* dfield; - ulint local_len; - - dfield = dtuple_get_nth_field(entry, b->field_no); - local_len = dfield_get_len(dfield); - - ut_ad(dfield_is_ext(dfield)); - ut_ad(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - /* Only in REDUNDANT and COMPACT format, we store - up to DICT_ANTELOPE_MAX_INDEX_COL_LEN (768) bytes - locally */ - ut_ad(local_len <= DICT_ANTELOPE_MAX_INDEX_COL_LEN); - - dfield_set_data(dfield, - (char*) b->data - local_len, - b->len + local_len); - } - - mem_heap_free(vector->heap); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/data/data0type.cc b/storage/xtradb/data/data0type.cc deleted file mode 100644 index 0b9e08544a5..00000000000 --- a/storage/xtradb/data/data0type.cc +++ /dev/null @@ -1,298 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file data/data0type.cc -Data types - -Created 1/16/1996 Heikki Tuuri -*******************************************************/ - -#include "data0type.h" - -#ifdef UNIV_NONINL -#include "data0type.ic" -#endif - -#ifndef UNIV_HOTBACKUP -# include "ha_prototypes.h" - -/* At the database startup we store the default-charset collation number of -this MySQL installation to this global variable. If we have < 4.1.2 format -column definitions, or records in the insert buffer, we use this -charset-collation code for them. */ - -UNIV_INTERN ulint data_mysql_default_charset_coll; - -/*********************************************************************//** -Determine how many bytes the first n characters of the given string occupy. -If the string is shorter than n characters, returns the number of bytes -the characters in the string occupy. -@return length of the prefix, in bytes */ -UNIV_INTERN -ulint -dtype_get_at_most_n_mbchars( -/*========================*/ - ulint prtype, /*!< in: precise type */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of - a multi-byte character */ - ulint prefix_len, /*!< in: length of the requested - prefix, in characters, multiplied by - dtype_get_mbmaxlen(dtype) */ - ulint data_len, /*!< in: length of str (in bytes) */ - const char* str) /*!< in: the string whose prefix - length is being determined */ -{ - ulint mbminlen = DATA_MBMINLEN(mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(mbminmaxlen); - - ut_a(data_len != UNIV_SQL_NULL); - ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen)); - - if (mbminlen != mbmaxlen) { - ut_a(!(prefix_len % mbmaxlen)); - return(innobase_get_at_most_n_mbchars( - dtype_get_charset_coll(prtype), - prefix_len, data_len, str)); - } - - if (prefix_len < data_len) { - - return(prefix_len); - - } - - return(data_len); -} -#endif /* UNIV_HOTBACKUP */ - -/*********************************************************************//** -Checks if a data main type is a string type. Also a BLOB is considered a -string type. -@return TRUE if string type */ -UNIV_INTERN -ibool -dtype_is_string_type( -/*=================*/ - ulint mtype) /*!< in: InnoDB main data type code: DATA_CHAR, ... */ -{ - if (mtype <= DATA_BLOB - || mtype == DATA_MYSQL - || mtype == DATA_VARMYSQL) { - - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************************//** -Checks if a type is a binary string type. Note that for tables created with -< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For -those DATA_BLOB columns this function currently returns FALSE. -@return TRUE if binary string type */ -UNIV_INTERN -ibool -dtype_is_binary_string_type( -/*========================*/ - ulint mtype, /*!< in: main data type */ - ulint prtype) /*!< in: precise type */ -{ - if ((mtype == DATA_FIXBINARY) - || (mtype == DATA_BINARY) - || (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) { - - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************************//** -Checks if a type is a non-binary string type. That is, dtype_is_string_type is -TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created -with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. -For those DATA_BLOB columns this function currently returns TRUE. -@return TRUE if non-binary string type */ -UNIV_INTERN -ibool -dtype_is_non_binary_string_type( -/*============================*/ - ulint mtype, /*!< in: main data type */ - ulint prtype) /*!< in: precise type */ -{ - if (dtype_is_string_type(mtype) == TRUE - && dtype_is_binary_string_type(mtype, prtype) == FALSE) { - - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************************//** -Forms a precise type from the < 4.1.2 format precise type plus the -charset-collation code. -@return precise type, including the charset-collation code */ -UNIV_INTERN -ulint -dtype_form_prtype( -/*==============*/ - ulint old_prtype, /*!< in: the MySQL type code and the flags - DATA_BINARY_TYPE etc. */ - ulint charset_coll) /*!< in: MySQL charset-collation code */ -{ - ut_a(old_prtype < 256 * 256); - ut_a(charset_coll <= MAX_CHAR_COLL_NUM); - - return(old_prtype + (charset_coll << 16)); -} - -/*********************************************************************//** -Validates a data type structure. -@return TRUE if ok */ -UNIV_INTERN -ibool -dtype_validate( -/*===========*/ - const dtype_t* type) /*!< in: type struct to validate */ -{ - ut_a(type); - ut_a(type->mtype >= DATA_VARCHAR); - ut_a(type->mtype <= DATA_MYSQL); - - if (type->mtype == DATA_SYS) { - ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS); - } - -#ifndef UNIV_HOTBACKUP - ut_a(dtype_get_mbminlen(type) <= dtype_get_mbmaxlen(type)); -#endif /* !UNIV_HOTBACKUP */ - - return(TRUE); -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Prints a data type structure. */ -UNIV_INTERN -void -dtype_print( -/*========*/ - const dtype_t* type) /*!< in: type */ -{ - ulint mtype; - ulint prtype; - ulint len; - - ut_a(type); - - mtype = type->mtype; - prtype = type->prtype; - - switch (mtype) { - case DATA_VARCHAR: - fputs("DATA_VARCHAR", stderr); - break; - - case DATA_CHAR: - fputs("DATA_CHAR", stderr); - break; - - case DATA_BINARY: - fputs("DATA_BINARY", stderr); - break; - - case DATA_FIXBINARY: - fputs("DATA_FIXBINARY", stderr); - break; - - case DATA_BLOB: - fputs("DATA_BLOB", stderr); - break; - - case DATA_INT: - fputs("DATA_INT", stderr); - break; - - case DATA_MYSQL: - fputs("DATA_MYSQL", stderr); - break; - - case DATA_SYS: - fputs("DATA_SYS", stderr); - break; - - case DATA_FLOAT: - fputs("DATA_FLOAT", stderr); - break; - - case DATA_DOUBLE: - fputs("DATA_DOUBLE", stderr); - break; - - case DATA_DECIMAL: - fputs("DATA_DECIMAL", stderr); - break; - - case DATA_VARMYSQL: - fputs("DATA_VARMYSQL", stderr); - break; - - default: - fprintf(stderr, "type %lu", (ulong) mtype); - break; - } - - len = type->len; - - if ((type->mtype == DATA_SYS) - || (type->mtype == DATA_VARCHAR) - || (type->mtype == DATA_CHAR)) { - putc(' ', stderr); - if (prtype == DATA_ROW_ID) { - fputs("DATA_ROW_ID", stderr); - len = DATA_ROW_ID_LEN; - } else if (prtype == DATA_ROLL_PTR) { - fputs("DATA_ROLL_PTR", stderr); - len = DATA_ROLL_PTR_LEN; - } else if (prtype == DATA_TRX_ID) { - fputs("DATA_TRX_ID", stderr); - len = DATA_TRX_ID_LEN; - } else if (prtype == DATA_ENGLISH) { - fputs("DATA_ENGLISH", stderr); - } else { - fprintf(stderr, "prtype %lu", (ulong) prtype); - } - } else { - if (prtype & DATA_UNSIGNED) { - fputs(" DATA_UNSIGNED", stderr); - } - - if (prtype & DATA_BINARY_TYPE) { - fputs(" DATA_BINARY_TYPE", stderr); - } - - if (prtype & DATA_NOT_NULL) { - fputs(" DATA_NOT_NULL", stderr); - } - } - - fprintf(stderr, " len %lu", (ulong) len); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc deleted file mode 100644 index 9731211ff5f..00000000000 --- a/storage/xtradb/dict/dict0boot.cc +++ /dev/null @@ -1,531 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file dict/dict0boot.cc -Data dictionary creation and booting - -Created 4/18/1996 Heikki Tuuri -*******************************************************/ - -#include "dict0boot.h" - -#ifdef UNIV_NONINL -#include "dict0boot.ic" -#endif - -#include "dict0crea.h" -#include "btr0btr.h" -#include "btr0sea.h" -#include "dict0load.h" -#include "trx0trx.h" -#include "srv0srv.h" -#include "ibuf0ibuf.h" -#include "buf0flu.h" -#include "log0recv.h" -#include "os0file.h" - -/**********************************************************************//** -Gets a pointer to the dictionary header and x-latches its page. -@return pointer to the dictionary header, page x-latched */ -UNIV_INTERN -dict_hdr_t* -dict_hdr_get( -/*=========*/ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block; - dict_hdr_t* header; - - block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO, - RW_X_LATCH, mtr); - header = DICT_HDR + buf_block_get_frame(block); - - buf_block_dbg_add_level(block, SYNC_DICT_HEADER); - - return(header); -} - -/**********************************************************************//** -Returns a new table, index, or space id. */ -UNIV_INTERN -void -dict_hdr_get_new_id( -/*================*/ - table_id_t* table_id, /*!< out: table id - (not assigned if NULL) */ - index_id_t* index_id, /*!< out: index id - (not assigned if NULL) */ - ulint* space_id) /*!< out: space id - (not assigned if NULL) */ -{ - dict_hdr_t* dict_hdr; - ib_id_t id; - mtr_t mtr; - - mtr_start(&mtr); - - dict_hdr = dict_hdr_get(&mtr); - - if (table_id) { - id = mach_read_from_8(dict_hdr + DICT_HDR_TABLE_ID); - id++; - mlog_write_ull(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr); - *table_id = id; - } - - if (index_id) { - id = mach_read_from_8(dict_hdr + DICT_HDR_INDEX_ID); - id++; - mlog_write_ull(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr); - *index_id = id; - } - - if (space_id) { - *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, - MLOG_4BYTES, &mtr); - if (fil_assign_new_space_id(space_id)) { - mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, - *space_id, MLOG_4BYTES, &mtr); - } - } - - mtr_commit(&mtr); -} - -/**********************************************************************//** -Writes the current value of the row id counter to the dictionary header file -page. */ -UNIV_INTERN -void -dict_hdr_flush_row_id(void) -/*=======================*/ -{ - dict_hdr_t* dict_hdr; - row_id_t id; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - id = dict_sys->row_id; - - mtr_start(&mtr); - - dict_hdr = dict_hdr_get(&mtr); - - mlog_write_ull(dict_hdr + DICT_HDR_ROW_ID, id, &mtr); - - mtr_commit(&mtr); -} - -/*****************************************************************//** -Creates the file page for the dictionary header. This function is -called only at the database creation. -@return TRUE if succeed */ -static -ibool -dict_hdr_create( -/*============*/ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block; - dict_hdr_t* dict_header; - ulint root_page_no; - - ut_ad(mtr); - - /* Create the dictionary header file block in a new, allocated file - segment in the system tablespace */ - block = fseg_create(DICT_HDR_SPACE, 0, - DICT_HDR + DICT_HDR_FSEG_HEADER, mtr); - - ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block)); - - dict_header = dict_hdr_get(mtr); - - /* Start counting row, table, index, and tree ids from - DICT_HDR_FIRST_ID */ - mlog_write_ull(dict_header + DICT_HDR_ROW_ID, - DICT_HDR_FIRST_ID, mtr); - - mlog_write_ull(dict_header + DICT_HDR_TABLE_ID, - DICT_HDR_FIRST_ID, mtr); - - mlog_write_ull(dict_header + DICT_HDR_INDEX_ID, - DICT_HDR_FIRST_ID, mtr); - - mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID, - 0, MLOG_4BYTES, mtr); - - /* Obsolete, but we must initialize it anyway. */ - mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW, - DICT_HDR_FIRST_ID, MLOG_4BYTES, mtr); - - /* Create the B-tree roots for the clustered indexes of the basic - system tables */ - - /*--------------------------*/ - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, 0, DICT_TABLES_ID, - dict_ind_redundant, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0, - DICT_TABLE_IDS_ID, - dict_ind_redundant, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, 0, DICT_COLUMNS_ID, - dict_ind_redundant, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, 0, DICT_INDEXES_ID, - dict_ind_redundant, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, 0, DICT_FIELDS_ID, - dict_ind_redundant, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - - return(TRUE); -} - -/*****************************************************************//** -Initializes the data dictionary memory structures when the database is -started. This function is also called when the data dictionary is created. -@return DB_SUCCESS or error code. */ -UNIV_INTERN -dberr_t -dict_boot(void) -/*===========*/ -{ - dict_table_t* table; - dict_index_t* index; - dict_hdr_t* dict_hdr; - mem_heap_t* heap; - mtr_t mtr; - dberr_t error; - - /* Be sure these constants do not ever change. To avoid bloat, - only check the *NUM_FIELDS* in each table */ - - ut_ad(DICT_NUM_COLS__SYS_TABLES == 8); - ut_ad(DICT_NUM_FIELDS__SYS_TABLES == 10); - ut_ad(DICT_NUM_FIELDS__SYS_TABLE_IDS == 2); - ut_ad(DICT_NUM_COLS__SYS_COLUMNS == 7); - ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 9); - ut_ad(DICT_NUM_COLS__SYS_INDEXES == 7); - ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 9); - ut_ad(DICT_NUM_COLS__SYS_FIELDS == 3); - ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 5); - ut_ad(DICT_NUM_COLS__SYS_FOREIGN == 4); - ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN == 6); - ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2); - ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4); - ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6); - - mtr_start(&mtr); - - /* Create the hash tables etc. */ - dict_init(); - - heap = mem_heap_create(450); - - mutex_enter(&(dict_sys->mutex)); - - /* Get the dictionary header */ - dict_hdr = dict_hdr_get(&mtr); - - /* Because we only write new row ids to disk-based data structure - (dictionary header) when it is divisible by - DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover - the latest value of the row id counter. Therefore we advance - the counter at the database startup to avoid overlapping values. - Note that when a user after database startup first time asks for - a new row id, then because the counter is now divisible by - ..._MARGIN, it will immediately be updated to the disk-based - header. */ - - dict_sys->row_id = DICT_HDR_ROW_ID_WRITE_MARGIN - + ut_uint64_align_up(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID), - DICT_HDR_ROW_ID_WRITE_MARGIN); - - /* Insert into the dictionary cache the descriptions of the basic - system tables */ - /*-------------------------*/ - table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0); - - dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); - /* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */ - dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4); - /* The low order bit of TYPE is always set to 1. If the format - is UNIV_FORMAT_B or higher, this field matches table->flags. */ - dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0); - /* MIX_LEN may contain additional table flags when - ROW_FORMAT!=REDUNDANT. Currently, these flags include - DICT_TF2_TEMPORARY. */ - dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4); - - table->id = DICT_TABLES_ID; - - dict_table_add_to_cache(table, FALSE, heap); - dict_sys->sys_tables = table; - mem_heap_empty(heap); - - index = dict_mem_index_create("SYS_TABLES", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 1); - - dict_mem_index_add_field(index, "NAME", 0); - - index->id = DICT_TABLES_ID; - btr_search_index_init(index); - - error = dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr - + DICT_HDR_TABLES, - MLOG_4BYTES, &mtr), - FALSE); - ut_a(error == DB_SUCCESS); - - /*-------------------------*/ - index = dict_mem_index_create("SYS_TABLES", "ID_IND", - DICT_HDR_SPACE, DICT_UNIQUE, 1); - dict_mem_index_add_field(index, "ID", 0); - - index->id = DICT_TABLE_IDS_ID; - btr_search_index_init(index); - error = dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr - + DICT_HDR_TABLE_IDS, - MLOG_4BYTES, &mtr), - FALSE); - ut_a(error == DB_SUCCESS); - - /*-------------------------*/ - table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0, 0); - - dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4); - - table->id = DICT_COLUMNS_ID; - - dict_table_add_to_cache(table, FALSE, heap); - dict_sys->sys_columns = table; - mem_heap_empty(heap); - - index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 2); - - dict_mem_index_add_field(index, "TABLE_ID", 0); - dict_mem_index_add_field(index, "POS", 0); - - index->id = DICT_COLUMNS_ID; - btr_search_index_init(index); - error = dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr - + DICT_HDR_COLUMNS, - MLOG_4BYTES, &mtr), - FALSE); - ut_a(error == DB_SUCCESS); - - /*-------------------------*/ - table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0, 0); - - dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4); - - table->id = DICT_INDEXES_ID; - - dict_table_add_to_cache(table, FALSE, heap); - dict_sys->sys_indexes = table; - mem_heap_empty(heap); - - index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 2); - - dict_mem_index_add_field(index, "TABLE_ID", 0); - dict_mem_index_add_field(index, "ID", 0); - - index->id = DICT_INDEXES_ID; - btr_search_index_init(index); - error = dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr - + DICT_HDR_INDEXES, - MLOG_4BYTES, &mtr), - FALSE); - ut_a(error == DB_SUCCESS); - - /*-------------------------*/ - table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0); - - dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0); - - table->id = DICT_FIELDS_ID; - - dict_table_add_to_cache(table, FALSE, heap); - dict_sys->sys_fields = table; - mem_heap_free(heap); - - index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 2); - - dict_mem_index_add_field(index, "INDEX_ID", 0); - dict_mem_index_add_field(index, "POS", 0); - - index->id = DICT_FIELDS_ID; - btr_search_index_init(index); - error = dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr - + DICT_HDR_FIELDS, - MLOG_4BYTES, &mtr), - FALSE); - ut_a(error == DB_SUCCESS); - - mtr_commit(&mtr); - - /*-------------------------*/ - - /* Initialize the insert buffer table and index for each tablespace */ - - dberr_t err = DB_SUCCESS; - - err = ibuf_init_at_db_start(); - - if (err == DB_SUCCESS) { - if (srv_read_only_mode && !ibuf_is_empty()) { - - if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Change buffer must be empty when --innodb-read-only " - "is set!" - "You can try to recover the database with innodb_force_recovery=5"); - - err = DB_ERROR; - } else { - ib_logf(IB_LOG_LEVEL_WARN, - "Change buffer not empty when --innodb-read-only " - "is set! but srv_force_recovery = %lu, ignoring.", - srv_force_recovery); - } - } - - if (err == DB_SUCCESS) { - /* Load definitions of other indexes on system tables */ - - dict_load_sys_table(dict_sys->sys_tables); - dict_load_sys_table(dict_sys->sys_columns); - dict_load_sys_table(dict_sys->sys_indexes); - dict_load_sys_table(dict_sys->sys_fields); - } - - mutex_exit(&(dict_sys->mutex)); - } - - return(err); -} - -/*****************************************************************//** -Inserts the basic system table data into themselves in the database -creation. */ -static -void -dict_insert_initial_data(void) -/*==========================*/ -{ - /* Does nothing yet */ -} - -/*****************************************************************//** -Creates and initializes the data dictionary at the server bootstrap. -@return DB_SUCCESS or error code. */ -UNIV_INTERN -dberr_t -dict_create(void) -/*=============*/ -{ - mtr_t mtr; - - mtr_start(&mtr); - - dict_hdr_create(&mtr); - - mtr_commit(&mtr); - - dberr_t err = dict_boot(); - - if (err == DB_SUCCESS) { - dict_insert_initial_data(); - } - - return(err); -} diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc deleted file mode 100644 index 6d5b12474eb..00000000000 --- a/storage/xtradb/dict/dict0crea.cc +++ /dev/null @@ -1,1992 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file dict/dict0crea.cc -Database object creation - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#include "dict0crea.h" - -#ifdef UNIV_NONINL -#include "dict0crea.ic" -#endif - -#include "btr0pcur.h" -#include "btr0btr.h" -#include "page0page.h" -#include "mach0data.h" -#include "dict0boot.h" -#include "dict0dict.h" -#include "que0que.h" -#include "row0ins.h" -#include "row0mysql.h" -#include "row0sel.h" -#include "pars0pars.h" -#include "trx0roll.h" -#include "usr0sess.h" -#include "ut0vec.h" -#include "dict0priv.h" -#include "fts0priv.h" -#include "ha_prototypes.h" - -/*****************************************************************//** -Based on a table object, this function builds the entry to be inserted -in the SYS_TABLES system table. -@return the tuple which should be inserted */ -static -dtuple_t* -dict_create_sys_tables_tuple( -/*=========================*/ - const dict_table_t* table, /*!< in: table */ - mem_heap_t* heap) /*!< in: memory heap from - which the memory for the built - tuple is allocated */ -{ - dict_table_t* sys_tables; - dtuple_t* entry; - dfield_t* dfield; - byte* ptr; - ulint type; - - ut_ad(table); - ut_ad(heap); - - sys_tables = dict_sys->sys_tables; - - entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS); - - dict_table_copy_types(entry, sys_tables); - - /* 0: NAME -----------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_TABLES__NAME); - - dfield_set_data(dfield, table->name, ut_strlen(table->name)); - - /* 1: DB_TRX_ID added later */ - /* 2: DB_ROLL_PTR added later */ - /* 3: ID -------------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_TABLES__ID); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(ptr, table->id); - - dfield_set_data(dfield, ptr, 8); - - /* 4: N_COLS ---------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_TABLES__N_COLS); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, table->n_def - | ((table->flags & DICT_TF_COMPACT) << 31)); - dfield_set_data(dfield, ptr, 4); - - /* 5: TYPE (table flags) -----------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_TABLES__TYPE); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - - /* Validate the table flags and convert them to what is saved in - SYS_TABLES.TYPE. Table flag values 0 and 1 are both written to - SYS_TABLES.TYPE as 1. */ - type = dict_tf_to_sys_tables_type(table->flags); - mach_write_to_4(ptr, type); - - dfield_set_data(dfield, ptr, 4); - - /* 6: MIX_ID (obsolete) ---------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_TABLES__MIX_ID); - - ptr = static_cast<byte*>(mem_heap_zalloc(heap, 8)); - - dfield_set_data(dfield, ptr, 8); - - /* 7: MIX_LEN (additional flags) --------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_TABLES__MIX_LEN); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - /* Be sure all non-used bits are zero. */ - ut_a(!(table->flags2 & ~DICT_TF2_BIT_MASK)); - mach_write_to_4(ptr, table->flags2); - - dfield_set_data(dfield, ptr, 4); - - /* 8: CLUSTER_NAME ---------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_TABLES__CLUSTER_ID); - dfield_set_null(dfield); /* not supported */ - - /* 9: SPACE ----------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_TABLES__SPACE); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, table->space); - - dfield_set_data(dfield, ptr, 4); - /*----------------------------------*/ - - return(entry); -} - -/*****************************************************************//** -Based on a table object, this function builds the entry to be inserted -in the SYS_COLUMNS system table. -@return the tuple which should be inserted */ -static -dtuple_t* -dict_create_sys_columns_tuple( -/*==========================*/ - const dict_table_t* table, /*!< in: table */ - ulint i, /*!< in: column number */ - mem_heap_t* heap) /*!< in: memory heap from - which the memory for the built - tuple is allocated */ -{ - dict_table_t* sys_columns; - dtuple_t* entry; - const dict_col_t* column; - dfield_t* dfield; - byte* ptr; - const char* col_name; - - ut_ad(table); - ut_ad(heap); - - column = dict_table_get_nth_col(table, i); - - sys_columns = dict_sys->sys_columns; - - entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS); - - dict_table_copy_types(entry, sys_columns); - - /* 0: TABLE_ID -----------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__TABLE_ID); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(ptr, table->id); - - dfield_set_data(dfield, ptr, 8); - - /* 1: POS ----------------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__POS); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, i); - - dfield_set_data(dfield, ptr, 4); - - /* 2: DB_TRX_ID added later */ - /* 3: DB_ROLL_PTR added later */ - /* 4: NAME ---------------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__NAME); - - col_name = dict_table_get_col_name(table, i); - dfield_set_data(dfield, col_name, ut_strlen(col_name)); - - /* 5: MTYPE --------------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__MTYPE); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, column->mtype); - - dfield_set_data(dfield, ptr, 4); - - /* 6: PRTYPE -------------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PRTYPE); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, column->prtype); - - dfield_set_data(dfield, ptr, 4); - - /* 7: LEN ----------------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__LEN); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, column->len); - - dfield_set_data(dfield, ptr, 4); - - /* 8: PREC ---------------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PREC); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, 0/* unused */); - - dfield_set_data(dfield, ptr, 4); - /*---------------------------------*/ - - return(entry); -} - -/***************************************************************//** -Builds a table definition to insert. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -dict_build_table_def_step( -/*======================*/ - que_thr_t* thr, /*!< in: query thread */ - tab_node_t* node) /*!< in: table create node */ -{ - dict_table_t* table; - dtuple_t* row; - dberr_t error; - const char* path; - mtr_t mtr; - ulint space = 0; - bool use_tablespace; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - table = node->table; - use_tablespace = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE); - - dict_hdr_get_new_id(&table->id, NULL, NULL); - - thr_get_trx(thr)->table_id = table->id; - - /* Always set this bit for all new created tables */ - DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); - DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", - DICT_TF2_FLAG_UNSET(table, - DICT_TF2_FTS_AUX_HEX_NAME);); - - if (use_tablespace) { - /* This table will not use the system tablespace. - Get a new space id. */ - dict_hdr_get_new_id(NULL, NULL, &space); - - DBUG_EXECUTE_IF( - "ib_create_table_fail_out_of_space_ids", - space = ULINT_UNDEFINED; - ); - - if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) { - return(DB_ERROR); - } - - /* We create a new single-table tablespace for the table. - We initially let it be 4 pages: - - page 0 is the fsp header and an extent descriptor page, - - page 1 is an ibuf bitmap page, - - page 2 is the first inode page, - - page 3 will contain the root of the clustered index of the - table we create here. */ - - path = table->data_dir_path ? table->data_dir_path - : table->dir_path_of_temp_table; - - ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX); - ut_ad(!dict_table_zip_size(table) - || dict_table_get_format(table) >= UNIV_FORMAT_B); - - error = fil_create_new_single_table_tablespace( - space, table->name, path, - dict_tf_to_fsp_flags(table->flags), - table->flags2, - FIL_IBD_FILE_INITIAL_SIZE, - node->mode, node->key_id); - - table->space = (unsigned int) space; - - if (error != DB_SUCCESS) { - - return(error); - } - - mtr_start(&mtr); - - fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr); - - mtr_commit(&mtr); - } else { - /* Create in the system tablespace: disallow Barracuda - features by keeping only the first bit which says whether - the row format is redundant or compact */ - table->flags &= DICT_TF_COMPACT; - } - - row = dict_create_sys_tables_tuple(table, node->heap); - - ins_node_set_new_row(node->tab_def, row); - - return(DB_SUCCESS); -} - -/***************************************************************//** -Builds a column definition to insert. */ -static -void -dict_build_col_def_step( -/*====================*/ - tab_node_t* node) /*!< in: table create node */ -{ - dtuple_t* row; - - row = dict_create_sys_columns_tuple(node->table, node->col_no, - node->heap); - ins_node_set_new_row(node->col_def, row); -} - -/*****************************************************************//** -Based on an index object, this function builds the entry to be inserted -in the SYS_INDEXES system table. -@return the tuple which should be inserted */ -static -dtuple_t* -dict_create_sys_indexes_tuple( -/*==========================*/ - const dict_index_t* index, /*!< in: index */ - mem_heap_t* heap) /*!< in: memory heap from - which the memory for the built - tuple is allocated */ -{ - dict_table_t* sys_indexes; - dict_table_t* table; - dtuple_t* entry; - dfield_t* dfield; - byte* ptr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(index); - ut_ad(heap); - - sys_indexes = dict_sys->sys_indexes; - - table = dict_table_get_low(index->table_name); - - entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS); - - dict_table_copy_types(entry, sys_indexes); - - /* 0: TABLE_ID -----------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_INDEXES__TABLE_ID); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(ptr, table->id); - - dfield_set_data(dfield, ptr, 8); - - /* 1: ID ----------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_INDEXES__ID); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(ptr, index->id); - - dfield_set_data(dfield, ptr, 8); - - /* 2: DB_TRX_ID added later */ - /* 3: DB_ROLL_PTR added later */ - /* 4: NAME --------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_INDEXES__NAME); - - dfield_set_data(dfield, index->name, ut_strlen(index->name)); - - /* 5: N_FIELDS ----------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_INDEXES__N_FIELDS); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, index->n_fields); - - dfield_set_data(dfield, ptr, 4); - - /* 6: TYPE --------------------------*/ - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_INDEXES__TYPE); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, index->type); - - dfield_set_data(dfield, ptr, 4); - - /* 7: SPACE --------------------------*/ - - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_INDEXES__SPACE); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, index->space); - - dfield_set_data(dfield, ptr, 4); - - /* 8: PAGE_NO --------------------------*/ - - dfield = dtuple_get_nth_field( - entry, DICT_COL__SYS_INDEXES__PAGE_NO); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(ptr, FIL_NULL); - - dfield_set_data(dfield, ptr, 4); - - /*--------------------------------*/ - - return(entry); -} - -/*****************************************************************//** -Based on an index object, this function builds the entry to be inserted -in the SYS_FIELDS system table. -@return the tuple which should be inserted */ -static -dtuple_t* -dict_create_sys_fields_tuple( -/*=========================*/ - const dict_index_t* index, /*!< in: index */ - ulint fld_no, /*!< in: field number */ - mem_heap_t* heap) /*!< in: memory heap from - which the memory for the built - tuple is allocated */ -{ - dict_table_t* sys_fields; - dtuple_t* entry; - dict_field_t* field; - dfield_t* dfield; - byte* ptr; - ibool index_contains_column_prefix_field = FALSE; - ulint j; - - ut_ad(index); - ut_ad(heap); - - for (j = 0; j < index->n_fields; j++) { - if (dict_index_get_nth_field(index, j)->prefix_len > 0) { - index_contains_column_prefix_field = TRUE; - break; - } - } - - field = dict_index_get_nth_field(index, fld_no); - - sys_fields = dict_sys->sys_fields; - - entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); - - dict_table_copy_types(entry, sys_fields); - - /* 0: INDEX_ID -----------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__INDEX_ID); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(ptr, index->id); - - dfield_set_data(dfield, ptr, 8); - - /* 1: POS; FIELD NUMBER & PREFIX LENGTH -----------------------*/ - - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__POS); - - ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); - - if (index_contains_column_prefix_field) { - /* If there are column prefix fields in the index, then - we store the number of the field to the 2 HIGH bytes - and the prefix length to the 2 low bytes, */ - - mach_write_to_4(ptr, (fld_no << 16) + field->prefix_len); - } else { - /* Else we store the number of the field to the 2 LOW bytes. - This is to keep the storage format compatible with - InnoDB versions < 4.0.14. */ - - mach_write_to_4(ptr, fld_no); - } - - dfield_set_data(dfield, ptr, 4); - - /* 2: DB_TRX_ID added later */ - /* 3: DB_ROLL_PTR added later */ - /* 4: COL_NAME -------------------------*/ - dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__COL_NAME); - - dfield_set_data(dfield, field->name, - ut_strlen(field->name)); - /*---------------------------------*/ - - return(entry); -} - -/*****************************************************************//** -Creates the tuple with which the index entry is searched for writing the index -tree root page number, if such a tree is created. -@return the tuple for search */ -static -dtuple_t* -dict_create_search_tuple( -/*=====================*/ - const dtuple_t* tuple, /*!< in: the tuple inserted in the SYS_INDEXES - table */ - mem_heap_t* heap) /*!< in: memory heap from which the memory for - the built tuple is allocated */ -{ - dtuple_t* search_tuple; - const dfield_t* field1; - dfield_t* field2; - - ut_ad(tuple && heap); - - search_tuple = dtuple_create(heap, 2); - - field1 = dtuple_get_nth_field(tuple, 0); - field2 = dtuple_get_nth_field(search_tuple, 0); - - dfield_copy(field2, field1); - - field1 = dtuple_get_nth_field(tuple, 1); - field2 = dtuple_get_nth_field(search_tuple, 1); - - dfield_copy(field2, field1); - - ut_ad(dtuple_validate(search_tuple)); - - return(search_tuple); -} - -/***************************************************************//** -Builds an index definition row to insert. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -dict_build_index_def_step( -/*======================*/ - que_thr_t* thr, /*!< in: query thread */ - ind_node_t* node) /*!< in: index create node */ -{ - dict_table_t* table; - dict_index_t* index; - dtuple_t* row; - trx_t* trx; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - trx = thr_get_trx(thr); - - index = node->index; - - table = dict_table_get_low(index->table_name); - - if (table == NULL) { - return(DB_TABLE_NOT_FOUND); - } - - if (!trx->table_id) { - /* Record only the first table id. */ - trx->table_id = table->id; - } - - node->table = table; - - ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) - || dict_index_is_clust(index)); - - dict_hdr_get_new_id(NULL, &index->id, NULL); - - /* Inherit the space id from the table; we store all indexes of a - table in the same tablespace */ - - index->space = table->space; - node->page_no = FIL_NULL; - row = dict_create_sys_indexes_tuple(index, node->heap); - node->ind_row = row; - - ins_node_set_new_row(node->ind_def, row); - - /* Note that the index was created by this transaction. */ - index->trx_id = trx->id; - ut_ad(table->def_trx_id <= trx->id); - table->def_trx_id = trx->id; - - return(DB_SUCCESS); -} - -/***************************************************************//** -Builds a field definition row to insert. */ -static -void -dict_build_field_def_step( -/*======================*/ - ind_node_t* node) /*!< in: index create node */ -{ - dict_index_t* index; - dtuple_t* row; - - index = node->index; - - row = dict_create_sys_fields_tuple(index, node->field_no, node->heap); - - ins_node_set_new_row(node->field_def, row); -} - -/***************************************************************//** -Creates an index tree for the index if it is not a member of a cluster. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -dict_create_index_tree_step( -/*========================*/ - ind_node_t* node) /*!< in: index create node */ -{ - dict_index_t* index; - dict_table_t* sys_indexes; - dtuple_t* search_tuple; - btr_pcur_t pcur; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - index = node->index; - - sys_indexes = dict_sys->sys_indexes; - - if (index->type == DICT_FTS) { - /* FTS index does not need an index tree */ - return(DB_SUCCESS); - } - - /* Run a mini-transaction in which the index tree is allocated for - the index and its root address is written to the index entry in - sys_indexes */ - - mtr_start(&mtr); - - search_tuple = dict_create_search_tuple(node->ind_row, node->heap); - - btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes), - search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, - &pcur, &mtr); - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - - dberr_t err = DB_SUCCESS; - ulint zip_size = dict_table_zip_size(index->table); - - if (node->index->table->file_unreadable - || dict_table_is_discarded(node->index->table)) { - - node->page_no = FIL_NULL; - } else { - node->page_no = btr_create( - index->type, index->space, zip_size, - index->id, index, &mtr); - - if (node->page_no == FIL_NULL) { - err = DB_OUT_OF_FILE_SPACE; - } - - DBUG_EXECUTE_IF("ib_import_create_index_failure_1", - node->page_no = FIL_NULL; - err = DB_OUT_OF_FILE_SPACE; ); - } - - page_rec_write_field( - btr_pcur_get_rec(&pcur), DICT_FLD__SYS_INDEXES__PAGE_NO, - node->page_no, &mtr); - - btr_pcur_close(&pcur); - - mtr_commit(&mtr); - - return(err); -} - -/*******************************************************************//** -Drops the index tree associated with a row in SYS_INDEXES table. */ -UNIV_INTERN -void -dict_drop_index_tree( -/*=================*/ - rec_t* rec, /*!< in/out: record in the clustered index - of SYS_INDEXES table */ - mtr_t* mtr) /*!< in: mtr having the latch on the record page */ -{ - ulint root_page_no; - ulint space; - ulint zip_size; - const byte* ptr; - ulint len; - - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_a(!dict_table_is_comp(dict_sys->sys_indexes)); - ptr = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len); - - ut_ad(len == 4); - - root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); - - if (root_page_no == FIL_NULL) { - /* The tree has already been freed */ - - return; - } - - ptr = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__SPACE, &len); - - ut_ad(len == 4); - - space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); - zip_size = fil_space_get_zip_size(space); - - if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { - /* It is a single table tablespace and the .ibd file is - missing: do nothing */ - - return; - } - - /* We free all the pages but the root page first; this operation - may span several mini-transactions */ - - btr_free_but_not_root(space, zip_size, root_page_no); - - /* Then we free the root page in the same mini-transaction where - we write FIL_NULL to the appropriate field in the SYS_INDEXES - record: this mini-transaction marks the B-tree totally freed */ - - /* printf("Dropping index tree in space %lu root page %lu\n", space, - root_page_no); */ - btr_free_root(space, zip_size, root_page_no, mtr); - - page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, - FIL_NULL, mtr); -} - -/*******************************************************************//** -Truncates the index tree associated with a row in SYS_INDEXES table. -@return new root page number, or FIL_NULL on failure */ -UNIV_INTERN -ulint -dict_truncate_index_tree( -/*=====================*/ - dict_table_t* table, /*!< in: the table the index belongs to */ - ulint space, /*!< in: 0=truncate, - nonzero=create the index tree in the - given tablespace */ - btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to - record in the clustered index of - SYS_INDEXES table. The cursor may be - repositioned in this call. */ - mtr_t* mtr) /*!< in: mtr having the latch - on the record page. The mtr may be - committed and restarted in this call. */ -{ - ulint root_page_no; - ibool drop = !space; - ulint zip_size; - ulint type; - index_id_t index_id; - rec_t* rec; - const byte* ptr; - ulint len; - dict_index_t* index; - bool has_been_dropped = false; - - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_a(!dict_table_is_comp(dict_sys->sys_indexes)); - rec = btr_pcur_get_rec(pcur); - ptr = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len); - - ut_ad(len == 4); - - root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); - - if (drop && root_page_no == FIL_NULL) { - has_been_dropped = true; - drop = FALSE; - } - - ptr = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__SPACE, &len); - - ut_ad(len == 4); - - if (drop) { - space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); - } - - zip_size = fil_space_get_zip_size(space); - - if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { - /* It is a single table tablespace and the .ibd file is - missing: do nothing */ - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Trying to TRUNCATE" - " a missing .ibd file of table %s!\n", table->name); - return(FIL_NULL); - } - - ptr = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__TYPE, &len); - ut_ad(len == 4); - type = mach_read_from_4(ptr); - - ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len); - ut_ad(len == 8); - index_id = mach_read_from_8(ptr); - - if (!drop) { - - goto create; - } - - /* We free all the pages but the root page first; this operation - may span several mini-transactions */ - - btr_free_but_not_root(space, zip_size, root_page_no); - - /* Then we free the root page in the same mini-transaction where - we create the b-tree and write its new root page number to the - appropriate field in the SYS_INDEXES record: this mini-transaction - marks the B-tree totally truncated */ - - btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, NULL, mtr); - - btr_free_root(space, zip_size, root_page_no, mtr); -create: - /* We will temporarily write FIL_NULL to the PAGE_NO field - in SYS_INDEXES, so that the database will not get into an - inconsistent state in case it crashes between the mtr_commit() - below and the following mtr_commit() call. */ - page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, - FIL_NULL, mtr); - - /* We will need to commit the mini-transaction in order to avoid - deadlocks in the btr_create() call, because otherwise we would - be freeing and allocating pages in the same mini-transaction. */ - btr_pcur_store_position(pcur, mtr); - mtr_commit(mtr); - - mtr_start(mtr); - btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); - - /* Find the index corresponding to this SYS_INDEXES record. */ - for (index = UT_LIST_GET_FIRST(table->indexes); - index; - index = UT_LIST_GET_NEXT(indexes, index)) { - if (index->id == index_id) { - if (index->type & DICT_FTS) { - return(FIL_NULL); - } else { - if (has_been_dropped) { - fprintf(stderr, " InnoDB: Trying to" - " TRUNCATE a missing index of" - " table %s!\n", - index->table->name); - } - - root_page_no = btr_create(type, space, zip_size, - index_id, index, mtr); - index->page = (unsigned int) root_page_no; - return(root_page_no); - } - } - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Index %llu of table %s is missing\n" - "InnoDB: from the data dictionary during TRUNCATE!\n", - (ullint) index_id, - table->name); - - return(FIL_NULL); -} - -/*********************************************************************//** -Creates a table create graph. -@return own: table create node */ -UNIV_INTERN -tab_node_t* -tab_create_graph_create( -/*====================*/ - dict_table_t* table, /*!< in: table to create, built as a memory data - structure */ - mem_heap_t* heap, /*!< in: heap where created */ - bool commit, /*!< in: true if the commit node should be - added to the query graph */ - fil_encryption_t mode, /*!< in: encryption mode */ - ulint key_id) /*!< in: encryption key_id */ -{ - tab_node_t* node; - - node = static_cast<tab_node_t*>( - mem_heap_alloc(heap, sizeof(tab_node_t))); - - node->common.type = QUE_NODE_CREATE_TABLE; - - node->table = table; - - node->state = TABLE_BUILD_TABLE_DEF; - node->heap = mem_heap_create(256); - node->mode = mode; - node->key_id = key_id; - - node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables, - heap); - node->tab_def->common.parent = node; - - node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns, - heap); - node->col_def->common.parent = node; - - if (commit) { - node->commit_node = trx_commit_node_create(heap); - node->commit_node->common.parent = node; - } else { - node->commit_node = 0; - } - - return(node); -} - -/*********************************************************************//** -Creates an index create graph. -@return own: index create node */ -UNIV_INTERN -ind_node_t* -ind_create_graph_create( -/*====================*/ - dict_index_t* index, /*!< in: index to create, built as a memory data - structure */ - mem_heap_t* heap, /*!< in: heap where created */ - bool commit) /*!< in: true if the commit node should be - added to the query graph */ -{ - ind_node_t* node; - - node = static_cast<ind_node_t*>( - mem_heap_alloc(heap, sizeof(ind_node_t))); - - node->common.type = QUE_NODE_CREATE_INDEX; - - node->index = index; - - node->state = INDEX_BUILD_INDEX_DEF; - node->page_no = FIL_NULL; - node->heap = mem_heap_create(256); - - node->ind_def = ins_node_create(INS_DIRECT, - dict_sys->sys_indexes, heap); - node->ind_def->common.parent = node; - - node->field_def = ins_node_create(INS_DIRECT, - dict_sys->sys_fields, heap); - node->field_def->common.parent = node; - - if (commit) { - node->commit_node = trx_commit_node_create(heap); - node->commit_node->common.parent = node; - } else { - node->commit_node = 0; - } - - return(node); -} - -/***********************************************************//** -Creates a table. This is a high-level function used in SQL execution graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -dict_create_table_step( -/*===================*/ - que_thr_t* thr) /*!< in: query thread */ -{ - tab_node_t* node; - dberr_t err = DB_ERROR; - trx_t* trx; - - ut_ad(thr); - ut_ad(mutex_own(&(dict_sys->mutex))); - - trx = thr_get_trx(thr); - - node = static_cast<tab_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE); - - if (thr->prev_node == que_node_get_parent(node)) { - node->state = TABLE_BUILD_TABLE_DEF; - } - - if (node->state == TABLE_BUILD_TABLE_DEF) { - - /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ - - err = dict_build_table_def_step(thr, node); - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->state = TABLE_BUILD_COL_DEF; - node->col_no = 0; - - thr->run_node = node->tab_def; - - return(thr); - } - - if (node->state == TABLE_BUILD_COL_DEF) { - - if (node->col_no < (node->table)->n_def) { - - dict_build_col_def_step(node); - - node->col_no++; - - thr->run_node = node->col_def; - - return(thr); - } else { - node->state = TABLE_COMMIT_WORK; - } - } - - if (node->state == TABLE_COMMIT_WORK) { - - /* Table was correctly defined: do NOT commit the transaction - (CREATE TABLE does NOT do an implicit commit of the current - transaction) */ - - node->state = TABLE_ADD_TO_CACHE; - - /* thr->run_node = node->commit_node; - - return(thr); */ - } - - if (node->state == TABLE_ADD_TO_CACHE) { - - dict_table_add_to_cache(node->table, TRUE, node->heap); - - err = DB_SUCCESS; - } - -function_exit: - trx->error_state = err; - - if (err == DB_SUCCESS) { - /* Ok: do nothing */ - - } else if (err == DB_LOCK_WAIT) { - - return(NULL); - } else { - /* SQL error detected */ - - return(NULL); - } - - thr->run_node = que_node_get_parent(node); - - return(thr); -} - -/***********************************************************//** -Creates an index. This is a high-level function used in SQL execution -graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -dict_create_index_step( -/*===================*/ - que_thr_t* thr) /*!< in: query thread */ -{ - ind_node_t* node; - dberr_t err = DB_ERROR; - trx_t* trx; - - ut_ad(thr); - ut_ad(mutex_own(&(dict_sys->mutex))); - - trx = thr_get_trx(thr); - - node = static_cast<ind_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX); - - if (thr->prev_node == que_node_get_parent(node)) { - node->state = INDEX_BUILD_INDEX_DEF; - } - - if (node->state == INDEX_BUILD_INDEX_DEF) { - /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ - err = dict_build_index_def_step(thr, node); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->state = INDEX_BUILD_FIELD_DEF; - node->field_no = 0; - - thr->run_node = node->ind_def; - - return(thr); - } - - if (node->state == INDEX_BUILD_FIELD_DEF) { - - if (node->field_no < (node->index)->n_fields) { - - dict_build_field_def_step(node); - - node->field_no++; - - thr->run_node = node->field_def; - - return(thr); - } else { - node->state = INDEX_ADD_TO_CACHE; - } - } - - if (node->state == INDEX_ADD_TO_CACHE) { - - index_id_t index_id = node->index->id; - - err = dict_index_add_to_cache( - node->table, node->index, FIL_NULL, - trx_is_strict(trx) - || dict_table_get_format(node->table) - >= UNIV_FORMAT_B); - - node->index = dict_index_get_if_in_cache_low(index_id); - ut_a((node->index == 0) == (err != DB_SUCCESS)); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->state = INDEX_CREATE_INDEX_TREE; - } - - if (node->state == INDEX_CREATE_INDEX_TREE) { - - err = dict_create_index_tree_step(node); - - DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail", - err = DB_OUT_OF_MEMORY;); - - if (err != DB_SUCCESS) { - /* If this is a FTS index, we will need to remove - it from fts->cache->indexes list as well */ - if ((node->index->type & DICT_FTS) - && node->table->fts) { - fts_index_cache_t* index_cache; - - rw_lock_x_lock( - &node->table->fts->cache->init_lock); - - index_cache = (fts_index_cache_t*) - fts_find_index_cache( - node->table->fts->cache, - node->index); - - if (index_cache->words) { - rbt_free(index_cache->words); - index_cache->words = 0; - } - - ib_vector_remove( - node->table->fts->cache->indexes, - *reinterpret_cast<void**>(index_cache)); - - rw_lock_x_unlock( - &node->table->fts->cache->init_lock); - } - - dict_index_remove_from_cache(node->table, node->index); - node->index = NULL; - - goto function_exit; - } - - node->index->page = node->page_no; - /* These should have been set in - dict_build_index_def_step() and - dict_index_add_to_cache(). */ - ut_ad(node->index->trx_id == trx->id); - ut_ad(node->index->table->def_trx_id == trx->id); - node->state = INDEX_COMMIT_WORK; - } - - if (node->state == INDEX_COMMIT_WORK) { - - /* Index was correctly defined: do NOT commit the transaction - (CREATE INDEX does NOT currently do an implicit commit of - the current transaction) */ - - node->state = INDEX_CREATE_INDEX_TREE; - - /* thr->run_node = node->commit_node; - - return(thr); */ - } - -function_exit: - trx->error_state = err; - - if (err == DB_SUCCESS) { - /* Ok: do nothing */ - - } else if (err == DB_LOCK_WAIT) { - - return(NULL); - } else { - /* SQL error detected */ - - return(NULL); - } - - thr->run_node = que_node_get_parent(node); - - return(thr); -} - -/****************************************************************//** -Check whether a system table exists. Additionally, if it exists, -move it to the non-LRU end of the table LRU list. This is oly used -for system tables that can be upgraded or added to an older database, -which include SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_TABLESPACES and -SYS_DATAFILES. -@return DB_SUCCESS if the sys table exists, DB_CORRUPTION if it exists -but is not current, DB_TABLE_NOT_FOUND if it does not exist*/ -static -dberr_t -dict_check_if_system_table_exists( -/*==============================*/ - const char* tablename, /*!< in: name of table */ - ulint num_fields, /*!< in: number of fields */ - ulint num_indexes) /*!< in: number of indexes */ -{ - dict_table_t* sys_table; - dberr_t error = DB_SUCCESS; - - ut_a(srv_get_active_thread_type() == SRV_NONE); - - mutex_enter(&dict_sys->mutex); - - sys_table = dict_table_get_low(tablename); - - if (sys_table == NULL) { - error = DB_TABLE_NOT_FOUND; - - } else if (UT_LIST_GET_LEN(sys_table->indexes) != num_indexes - || sys_table->n_cols != num_fields) { - error = DB_CORRUPTION; - - } else { - /* This table has already been created, and it is OK. - Ensure that it can't be evicted from the table LRU cache. */ - - dict_table_move_from_lru_to_non_lru(sys_table); - } - - mutex_exit(&dict_sys->mutex); - - return(error); -} - -/****************************************************************//** -Creates the foreign key constraints system tables inside InnoDB -at server bootstrap or server start if they are not found or are -not of the right form. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_create_or_check_foreign_constraint_tables(void) -/*================================================*/ -{ - trx_t* trx; - my_bool srv_file_per_table_backup; - dberr_t err; - dberr_t sys_foreign_err; - dberr_t sys_foreign_cols_err; - - ut_a(srv_get_active_thread_type() == SRV_NONE); - - /* Note: The master thread has not been started at this point. */ - - - sys_foreign_err = dict_check_if_system_table_exists( - "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3); - sys_foreign_cols_err = dict_check_if_system_table_exists( - "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1); - - if (sys_foreign_err == DB_SUCCESS - && sys_foreign_cols_err == DB_SUCCESS) { - return(DB_SUCCESS); - } - - trx = trx_allocate_for_mysql(); - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - trx->op_info = "creating foreign key sys tables"; - - row_mysql_lock_data_dictionary(trx); - - /* Check which incomplete table definition to drop. */ - - if (sys_foreign_err == DB_CORRUPTION) { - ib_logf(IB_LOG_LEVEL_WARN, - "Dropping incompletely created " - "SYS_FOREIGN table."); - row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE); - } - - if (sys_foreign_cols_err == DB_CORRUPTION) { - ib_logf(IB_LOG_LEVEL_WARN, - "Dropping incompletely created " - "SYS_FOREIGN_COLS table."); - - row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE); - } - - ib_logf(IB_LOG_LEVEL_WARN, - "Creating foreign key constraint system tables."); - - /* NOTE: in dict_load_foreigns we use the fact that - there are 2 secondary indexes on SYS_FOREIGN, and they - are defined just like below */ - - /* NOTE: when designing InnoDB's foreign key support in 2001, we made - an error and made the table names and the foreign key id of type - 'CHAR' (internally, really a VARCHAR). We should have made the type - VARBINARY, like in other InnoDB system tables, to get a clean - design. */ - - srv_file_per_table_backup = srv_file_per_table; - - /* We always want SYSTEM tables to be created inside the system - tablespace. */ - - srv_file_per_table = 0; - - err = que_eval_sql( - NULL, - "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n" - "BEGIN\n" - "CREATE TABLE\n" - "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR," - " REF_NAME CHAR, N_COLS INT);\n" - "CREATE UNIQUE CLUSTERED INDEX ID_IND" - " ON SYS_FOREIGN (ID);\n" - "CREATE INDEX FOR_IND" - " ON SYS_FOREIGN (FOR_NAME);\n" - "CREATE INDEX REF_IND" - " ON SYS_FOREIGN (REF_NAME);\n" - "CREATE TABLE\n" - "SYS_FOREIGN_COLS(ID CHAR, POS INT," - " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n" - "CREATE UNIQUE CLUSTERED INDEX ID_IND" - " ON SYS_FOREIGN_COLS (ID, POS);\n" - "END;\n", - FALSE, trx); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS " - "has failed with error %lu. Tablespace is full. " - "Dropping incompletely created tables.", - (ulong) err); - - ut_ad(err == DB_OUT_OF_FILE_SPACE - || err == DB_TOO_MANY_CONCURRENT_TRXS); - - row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE); - row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE); - - if (err == DB_OUT_OF_FILE_SPACE) { - err = DB_MUST_GET_MORE_FILE_SPACE; - } - } - - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx_free_for_mysql(trx); - - srv_file_per_table = srv_file_per_table_backup; - - if (err == DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_INFO, - "Foreign key constraint system tables created"); - } - - /* Note: The master thread has not been started at this point. */ - /* Confirm and move to the non-LRU part of the table LRU list. */ - sys_foreign_err = dict_check_if_system_table_exists( - "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3); - ut_a(sys_foreign_err == DB_SUCCESS); - - sys_foreign_cols_err = dict_check_if_system_table_exists( - "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1); - ut_a(sys_foreign_cols_err == DB_SUCCESS); - - return(err); -} - -/****************************************************************//** -Evaluate the given foreign key SQL statement. -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -dict_foreign_eval_sql( -/*==================*/ - pars_info_t* info, /*!< in: info struct */ - const char* sql, /*!< in: SQL string to evaluate */ - const char* name, /*!< in: table name (for diagnostics) */ - const char* id, /*!< in: foreign key id */ - trx_t* trx) /*!< in/out: transaction */ -{ - dberr_t error; - FILE* ef = dict_foreign_err_file; - - error = que_eval_sql(info, sql, FALSE, trx); - - if (error == DB_DUPLICATE_KEY) { - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Error in foreign key constraint creation for table ", - ef); - ut_print_name(ef, trx, TRUE, name); - fputs(".\nA foreign key constraint of name ", ef); - ut_print_name(ef, trx, TRUE, id); - fputs("\nalready exists." - " (Note that internally InnoDB adds 'databasename'\n" - "in front of the user-defined constraint name.)\n" - "Note that InnoDB's FOREIGN KEY system tables store\n" - "constraint names as case-insensitive, with the\n" - "MySQL standard latin1_swedish_ci collation. If you\n" - "create tables or databases whose names differ only in\n" - "the character case, then collisions in constraint\n" - "names can occur. Workaround: name your constraints\n" - "explicitly with unique names.\n", - ef); - - mutex_exit(&dict_foreign_err_mutex); - - return(error); - } - - if (error != DB_SUCCESS) { - fprintf(stderr, - "InnoDB: Foreign key constraint creation failed:\n" - "InnoDB: internal error number %lu\n", (ulong) error); - - mutex_enter(&dict_foreign_err_mutex); - ut_print_timestamp(ef); - fputs(" Internal error in foreign key constraint creation" - " for table ", ef); - ut_print_name(ef, trx, TRUE, name); - fputs(".\n" - "See the MySQL .err log in the datadir" - " for more information.\n", ef); - mutex_exit(&dict_foreign_err_mutex); - - return(error); - } - - return(DB_SUCCESS); -} - -/********************************************************************//** -Add a single foreign key field definition to the data dictionary tables in -the database. -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -dict_create_add_foreign_field_to_dictionary( -/*========================================*/ - ulint field_nr, /*!< in: field number */ - const char* table_name, /*!< in: table name */ - const dict_foreign_t* foreign, /*!< in: foreign */ - trx_t* trx) /*!< in/out: transaction */ -{ - pars_info_t* info = pars_info_create(); - - pars_info_add_str_literal(info, "id", foreign->id); - - pars_info_add_int4_literal(info, "pos", field_nr); - - pars_info_add_str_literal(info, "for_col_name", - foreign->foreign_col_names[field_nr]); - - pars_info_add_str_literal(info, "ref_col_name", - foreign->referenced_col_names[field_nr]); - - return(dict_foreign_eval_sql( - info, - "PROCEDURE P () IS\n" - "BEGIN\n" - "INSERT INTO SYS_FOREIGN_COLS VALUES" - "(:id, :pos, :for_col_name, :ref_col_name);\n" - "END;\n", - table_name, foreign->id, trx)); -} - -/********************************************************************//** -Construct foreign key constraint defintion from data dictionary information. -*/ -UNIV_INTERN -char* -dict_foreign_def_get( -/*=================*/ - dict_foreign_t* foreign,/*!< in: foreign */ - trx_t* trx) /*!< in: trx */ -{ - char* fk_def = (char *)mem_heap_alloc(foreign->heap, 4*1024); - const char* tbname; - char tablebuf[MAX_TABLE_NAME_LEN + 1] = ""; - int i; - char* bufend; - - tbname = dict_remove_db_name(foreign->id); - bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN, - tbname, strlen(tbname), trx->mysql_thd, FALSE); - tablebuf[bufend - tablebuf] = '\0'; - - sprintf(fk_def, - (char *)"CONSTRAINT %s FOREIGN KEY (", (char *)tablebuf); - - for(i = 0; i < foreign->n_fields; i++) { - char buf[MAX_TABLE_NAME_LEN + 1] = ""; - innobase_convert_name(buf, MAX_TABLE_NAME_LEN, - foreign->foreign_col_names[i], - strlen(foreign->foreign_col_names[i]), - trx->mysql_thd, FALSE); - strcat(fk_def, buf); - if (i < foreign->n_fields-1) { - strcat(fk_def, (char *)","); - } - } - - strcat(fk_def,(char *)") REFERENCES "); - - bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN, - foreign->referenced_table_name, - strlen(foreign->referenced_table_name), - trx->mysql_thd, TRUE); - tablebuf[bufend - tablebuf] = '\0'; - - strcat(fk_def, tablebuf); - strcat(fk_def, " ("); - - for(i = 0; i < foreign->n_fields; i++) { - char buf[MAX_TABLE_NAME_LEN + 1] = ""; - bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN, - foreign->referenced_col_names[i], - strlen(foreign->referenced_col_names[i]), - trx->mysql_thd, FALSE); - buf[bufend - buf] = '\0'; - strcat(fk_def, buf); - if (i < foreign->n_fields-1) { - strcat(fk_def, (char *)","); - } - } - strcat(fk_def, (char *)")"); - - return fk_def; -} - -/********************************************************************//** -Convert foreign key column names from data dictionary to SQL-layer. -*/ -static -void -dict_foreign_def_get_fields( -/*========================*/ - dict_foreign_t* foreign,/*!< in: foreign */ - trx_t* trx, /*!< in: trx */ - char** field, /*!< out: foreign column */ - char** field2, /*!< out: referenced column */ - int col_no) /*!< in: column number */ -{ - char* bufend; - char* fieldbuf = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1); - char* fieldbuf2 = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1); - - bufend = innobase_convert_name(fieldbuf, MAX_TABLE_NAME_LEN, - foreign->foreign_col_names[col_no], - strlen(foreign->foreign_col_names[col_no]), - trx->mysql_thd, FALSE); - - fieldbuf[bufend - fieldbuf] = '\0'; - - bufend = innobase_convert_name(fieldbuf2, MAX_TABLE_NAME_LEN, - foreign->referenced_col_names[col_no], - strlen(foreign->referenced_col_names[col_no]), - trx->mysql_thd, FALSE); - - fieldbuf2[bufend - fieldbuf2] = '\0'; - *field = fieldbuf; - *field2 = fieldbuf2; -} - -/********************************************************************//** -Add a foreign key definition to the data dictionary tables. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_create_add_foreign_to_dictionary( -/*==================================*/ - dict_table_t* table, - const char* name, /*!< in: table name */ - const dict_foreign_t* foreign,/*!< in: foreign key */ - trx_t* trx) /*!< in/out: dictionary transaction */ -{ - dberr_t error; - pars_info_t* info = pars_info_create(); - - pars_info_add_str_literal(info, "id", foreign->id); - - pars_info_add_str_literal(info, "for_name", name); - - pars_info_add_str_literal(info, "ref_name", - foreign->referenced_table_name); - - pars_info_add_int4_literal(info, "n_cols", - foreign->n_fields + (foreign->type << 24)); - - error = dict_foreign_eval_sql(info, - "PROCEDURE P () IS\n" - "BEGIN\n" - "INSERT INTO SYS_FOREIGN VALUES" - "(:id, :for_name, :ref_name, :n_cols);\n" - "END;\n" - , name, foreign->id, trx); - - if (error != DB_SUCCESS) { - if (error == DB_DUPLICATE_KEY) { - char buf[MAX_TABLE_NAME_LEN + 1] = ""; - char tablename[MAX_TABLE_NAME_LEN + 1] = ""; - char* fk_def; - - innobase_convert_name(tablename, MAX_TABLE_NAME_LEN, - table->name, strlen(table->name), - trx->mysql_thd, TRUE); - - innobase_convert_name(buf, MAX_TABLE_NAME_LEN, - foreign->id, strlen(foreign->id), trx->mysql_thd, FALSE); - - fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx); - - ib_push_warning(trx, error, - "Create or Alter table %s with foreign key constraint" - " failed. Foreign key constraint %s" - " already exists on data dictionary." - " Foreign key constraint names need to be unique in database." - " Error in foreign key definition: %s.", - tablename, buf, fk_def); - } - - return(error); - } - - for (ulint i = 0; i < foreign->n_fields; i++) { - error = dict_create_add_foreign_field_to_dictionary( - i, name, foreign, trx); - - if (error != DB_SUCCESS) { - char buf[MAX_TABLE_NAME_LEN + 1] = ""; - char tablename[MAX_TABLE_NAME_LEN + 1] = ""; - char* field=NULL; - char* field2=NULL; - char* fk_def; - - innobase_convert_name(tablename, MAX_TABLE_NAME_LEN, - table->name, strlen(table->name), - trx->mysql_thd, TRUE); - innobase_convert_name(buf, MAX_TABLE_NAME_LEN, - foreign->id, strlen(foreign->id), trx->mysql_thd, FALSE); - fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx); - dict_foreign_def_get_fields((dict_foreign_t*)foreign, trx, &field, &field2, i); - - ib_push_warning(trx, error, - "Create or Alter table %s with foreign key constraint" - " failed. Error adding foreign key constraint name %s" - " fields %s or %s to the dictionary." - " Error in foreign key definition: %s.", - tablename, buf, i+1, fk_def); - - return(error); - } - } - - return(error); -} - -/** Adds the given set of foreign key objects to the dictionary tables -in the database. This function does not modify the dictionary cache. The -caller must ensure that all foreign key objects contain a valid constraint -name in foreign->id. -@param[in] local_fk_set set of foreign key objects, to be added to -the dictionary tables -@param[in] table table to which the foreign key objects in -local_fk_set belong to -@param[in,out] trx transaction -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_create_add_foreigns_to_dictionary( -/*===================================*/ - const dict_foreign_set& local_fk_set, - const dict_table_t* table, - trx_t* trx) -{ - dict_foreign_t* foreign; - dberr_t error; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - if (NULL == dict_table_get_low("SYS_FOREIGN")) { - fprintf(stderr, - "InnoDB: table SYS_FOREIGN not found" - " in internal data dictionary\n"); - - return(DB_ERROR); - } - - for (dict_foreign_set::const_iterator it = local_fk_set.begin(); - it != local_fk_set.end(); - ++it) { - - foreign = *it; - ut_ad(foreign->id != NULL); - - error = dict_create_add_foreign_to_dictionary((dict_table_t*)table, table->name, - foreign, trx); - - if (error != DB_SUCCESS) { - - return(error); - } - } - - trx->op_info = "committing foreign key definitions"; - - trx_commit(trx); - - trx->op_info = ""; - - return(DB_SUCCESS); -} - -/****************************************************************//** -Creates the tablespaces and datafiles system tables inside InnoDB -at server bootstrap or server start if they are not found or are -not of the right form. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_create_or_check_sys_tablespace(void) -/*=====================================*/ -{ - trx_t* trx; - my_bool srv_file_per_table_backup; - dberr_t err; - dberr_t sys_tablespaces_err; - dberr_t sys_datafiles_err; - - ut_a(srv_get_active_thread_type() == SRV_NONE); - - /* Note: The master thread has not been started at this point. */ - - sys_tablespaces_err = dict_check_if_system_table_exists( - "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1); - sys_datafiles_err = dict_check_if_system_table_exists( - "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1); - - if (sys_tablespaces_err == DB_SUCCESS - && sys_datafiles_err == DB_SUCCESS) { - return(DB_SUCCESS); - } - - trx = trx_allocate_for_mysql(); - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - trx->op_info = "creating tablepace and datafile sys tables"; - - row_mysql_lock_data_dictionary(trx); - - /* Check which incomplete table definition to drop. */ - - if (sys_tablespaces_err == DB_CORRUPTION) { - ib_logf(IB_LOG_LEVEL_WARN, - "Dropping incompletely created " - "SYS_TABLESPACES table."); - row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE); - } - - if (sys_datafiles_err == DB_CORRUPTION) { - ib_logf(IB_LOG_LEVEL_WARN, - "Dropping incompletely created " - "SYS_DATAFILES table."); - - row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE); - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Creating tablespace and datafile system tables."); - - /* We always want SYSTEM tables to be created inside the system - tablespace. */ - srv_file_per_table_backup = srv_file_per_table; - srv_file_per_table = 0; - - err = que_eval_sql( - NULL, - "PROCEDURE CREATE_SYS_TABLESPACE_PROC () IS\n" - "BEGIN\n" - "CREATE TABLE SYS_TABLESPACES(\n" - " SPACE INT, NAME CHAR, FLAGS INT);\n" - "CREATE UNIQUE CLUSTERED INDEX SYS_TABLESPACES_SPACE" - " ON SYS_TABLESPACES (SPACE);\n" - "CREATE TABLE SYS_DATAFILES(\n" - " SPACE INT, PATH CHAR);\n" - "CREATE UNIQUE CLUSTERED INDEX SYS_DATAFILES_SPACE" - " ON SYS_DATAFILES (SPACE);\n" - "END;\n", - FALSE, trx); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Creation of SYS_TABLESPACES and SYS_DATAFILES " - "has failed with error %lu. Tablespace is full. " - "Dropping incompletely created tables.", - (ulong) err); - - ut_a(err == DB_OUT_OF_FILE_SPACE - || err == DB_TOO_MANY_CONCURRENT_TRXS); - - row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE); - row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE); - - if (err == DB_OUT_OF_FILE_SPACE) { - err = DB_MUST_GET_MORE_FILE_SPACE; - } - } - - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx_free_for_mysql(trx); - - srv_file_per_table = srv_file_per_table_backup; - - if (err == DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_INFO, - "Tablespace and datafile system tables created."); - } - - /* Note: The master thread has not been started at this point. */ - /* Confirm and move to the non-LRU part of the table LRU list. */ - - sys_tablespaces_err = dict_check_if_system_table_exists( - "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1); - ut_a(sys_tablespaces_err == DB_SUCCESS); - - sys_datafiles_err = dict_check_if_system_table_exists( - "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1); - ut_a(sys_datafiles_err == DB_SUCCESS); - - return(err); -} - -/********************************************************************//** -Add a single tablespace definition to the data dictionary tables in the -database. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_create_add_tablespace_to_dictionary( -/*=====================================*/ - ulint space, /*!< in: tablespace id */ - const char* name, /*!< in: tablespace name */ - ulint flags, /*!< in: tablespace flags */ - const char* path, /*!< in: tablespace path */ - trx_t* trx, /*!< in/out: transaction */ - bool commit) /*!< in: if true then commit the - transaction */ -{ - dberr_t error; - - pars_info_t* info = pars_info_create(); - - ut_a(space > TRX_SYS_SPACE); - - pars_info_add_int4_literal(info, "space", space); - - pars_info_add_str_literal(info, "name", name); - - pars_info_add_int4_literal(info, "flags", flags); - - pars_info_add_str_literal(info, "path", path); - - error = que_eval_sql(info, - "PROCEDURE P () IS\n" - "BEGIN\n" - "INSERT INTO SYS_TABLESPACES VALUES" - "(:space, :name, :flags);\n" - "INSERT INTO SYS_DATAFILES VALUES" - "(:space, :path);\n" - "END;\n", - FALSE, trx); - - if (error != DB_SUCCESS) { - return(error); - } - - if (commit) { - trx->op_info = "committing tablespace and datafile definition"; - trx_commit(trx); - } - - trx->op_info = ""; - - return(error); -} diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc deleted file mode 100644 index a1cfeb3860f..00000000000 --- a/storage/xtradb/dict/dict0dict.cc +++ /dev/null @@ -1,7325 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file dict/dict0dict.cc -Data dictionary system - -Created 1/8/1996 Heikki Tuuri -***********************************************************************/ - -#include "dict0dict.h" -#include "fts0fts.h" -#include "fil0fil.h" -#include <algorithm> -#include <string> - -#ifdef UNIV_NONINL -#include "dict0dict.ic" -#include "dict0priv.ic" -#endif - -/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */ -UNIV_INTERN dict_index_t* dict_ind_redundant; -/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */ -UNIV_INTERN dict_index_t* dict_ind_compact; - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -/** Flag to control insert buffer debugging. */ -extern UNIV_INTERN uint ibuf_debug; -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - -/********************************************************************** -Issue a warning that the row is too big. */ -void -ib_warn_row_too_big(const dict_table_t* table); - -#ifndef UNIV_HOTBACKUP -#include "buf0buf.h" -#include "data0type.h" -#include "mach0data.h" -#include "dict0boot.h" -#include "dict0mem.h" -#include "dict0crea.h" -#include "dict0stats.h" -#include "trx0undo.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "btr0sea.h" -#include "os0once.h" -#include "page0zip.h" -#include "page0page.h" -#include "pars0pars.h" -#include "pars0sym.h" -#include "que0que.h" -#include "rem0cmp.h" -#include "fts0fts.h" -#include "fts0types.h" -#include "m_ctype.h" /* my_isspace() */ -#include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str() */ -#include "srv0mon.h" -#include "srv0start.h" -#include "lock0lock.h" -#include "dict0priv.h" -#include "row0upd.h" -#include "row0mysql.h" -#include "row0merge.h" -#include "row0log.h" -#include "ut0ut.h" /* ut_format_name() */ -#include "m_string.h" -#include "my_sys.h" -#include "mysqld.h" /* system_charset_info */ -#include "strfunc.h" /* strconvert() */ - -#include <ctype.h> - -/** the dictionary system */ -UNIV_INTERN dict_sys_t* dict_sys = NULL; - -/** @brief the data dictionary rw-latch protecting dict_sys - -table create, drop, etc. reserve this in X-mode; implicit or -backround operations purge, rollback, foreign key checks reserve this -in S-mode; we cannot trust that MySQL protects implicit or background -operations a table drop since MySQL does not know of them; therefore -we need this; NOTE: a transaction which reserves this must keep book -on the mode in trx_t::dict_operation_lock_mode */ -UNIV_INTERN rw_lock_t dict_operation_lock; - -/** Percentage of compression failures that are allowed in a single -round */ -UNIV_INTERN ulong zip_failure_threshold_pct = 5; - -/** Maximum percentage of a page that can be allowed as a pad to avoid -compression failures */ -UNIV_INTERN ulong zip_pad_max = 50; - -/* Keys to register rwlocks and mutexes with performance schema */ -#ifdef UNIV_PFS_RWLOCK -UNIV_INTERN mysql_pfs_key_t dict_operation_lock_key; -UNIV_INTERN mysql_pfs_key_t index_tree_rw_lock_key; -UNIV_INTERN mysql_pfs_key_t index_online_log_key; -UNIV_INTERN mysql_pfs_key_t dict_table_stats_key; -#endif /* UNIV_PFS_RWLOCK */ - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t zip_pad_mutex_key; -UNIV_INTERN mysql_pfs_key_t dict_sys_mutex_key; -UNIV_INTERN mysql_pfs_key_t dict_foreign_err_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when - creating a table or index object */ -#define DICT_POOL_PER_TABLE_HASH 512 /*!< buffer pool max size per table - hash table fixed size in bytes */ -#define DICT_POOL_PER_VARYING 4 /*!< buffer pool max size per data - dictionary varying size in bytes */ - -/** Identifies generated InnoDB foreign key names */ -static char dict_ibfk[] = "_ibfk_"; - -bool innodb_table_stats_not_found = false; -bool innodb_index_stats_not_found = false; -static bool innodb_table_stats_not_found_reported = false; -static bool innodb_index_stats_not_found_reported = false; - -/*******************************************************************//** -Tries to find column names for the index and sets the col field of the -index. -@return TRUE if the column names were found */ -static -ibool -dict_index_find_cols( -/*=================*/ - dict_table_t* table, /*!< in: table */ - dict_index_t* index); /*!< in: index */ -/*******************************************************************//** -Builds the internal dictionary cache representation for a clustered -index, containing also system fields not defined by the user. -@return own: the internal representation of the clustered index */ -static -dict_index_t* -dict_index_build_internal_clust( -/*============================*/ - const dict_table_t* table, /*!< in: table */ - dict_index_t* index); /*!< in: user representation of - a clustered index */ -/*******************************************************************//** -Builds the internal dictionary cache representation for a non-clustered -index, containing also system fields not defined by the user. -@return own: the internal representation of the non-clustered index */ -static -dict_index_t* -dict_index_build_internal_non_clust( -/*================================*/ - const dict_table_t* table, /*!< in: table */ - dict_index_t* index); /*!< in: user representation of - a non-clustered index */ -/**********************************************************************//** -Builds the internal dictionary cache representation for an FTS index. -@return own: the internal representation of the FTS index */ -static -dict_index_t* -dict_index_build_internal_fts( -/*==========================*/ - dict_table_t* table, /*!< in: table */ - dict_index_t* index); /*!< in: user representation of an FTS index */ -/**********************************************************************//** -Prints a column data. */ -static -void -dict_col_print_low( -/*===============*/ - const dict_table_t* table, /*!< in: table */ - const dict_col_t* col); /*!< in: column */ -/**********************************************************************//** -Prints an index data. */ -static -void -dict_index_print_low( -/*=================*/ - dict_index_t* index); /*!< in: index */ -/**********************************************************************//** -Prints a field data. */ -static -void -dict_field_print_low( -/*=================*/ - const dict_field_t* field); /*!< in: field */ - -/**********************************************************************//** -Removes an index from the dictionary cache. */ -static -void -dict_index_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in/out: table */ - dict_index_t* index, /*!< in, own: index */ - ibool lru_evict); /*!< in: TRUE if page being evicted - to make room in the table LRU list */ -#ifdef UNIV_DEBUG -/**********************************************************************//** -Validate the dictionary table LRU list. -@return TRUE if validate OK */ -static -ibool -dict_lru_validate(void); -/*===================*/ -/**********************************************************************//** -Check if table is in the dictionary table LRU list. -@return TRUE if table found */ -static -ibool -dict_lru_find_table( -/*================*/ - const dict_table_t* find_table); /*!< in: table to find */ -/**********************************************************************//** -Check if a table exists in the dict table non-LRU list. -@return TRUE if table found */ -static -ibool -dict_non_lru_find_table( -/*====================*/ - const dict_table_t* find_table); /*!< in: table to find */ -#endif /* UNIV_DEBUG */ - -/* Stream for storing detailed information about the latest foreign key -and unique key errors. Only created if !srv_read_only_mode */ -UNIV_INTERN FILE* dict_foreign_err_file = NULL; -/* mutex protecting the foreign and unique error buffers */ -UNIV_INTERN ib_mutex_t dict_foreign_err_mutex; - -/******************************************************************//** -Makes all characters in a NUL-terminated UTF-8 string lower case. */ -UNIV_INTERN -void -dict_casedn_str( -/*============*/ - char* a) /*!< in/out: string to put in lower case */ -{ - innobase_casedn_str(a); -} - -/********************************************************************//** -Checks if the database name in two table names is the same. -@return TRUE if same db name */ -UNIV_INTERN -ibool -dict_tables_have_same_db( -/*=====================*/ - const char* name1, /*!< in: table name in the form - dbname '/' tablename */ - const char* name2) /*!< in: table name in the form - dbname '/' tablename */ -{ - for (; *name1 == *name2; name1++, name2++) { - if (*name1 == '/') { - return(TRUE); - } - ut_a(*name1); /* the names must contain '/' */ - } - return(FALSE); -} - -/********************************************************************//** -Return the end of table name where we have removed dbname and '/'. -@return table name */ -UNIV_INTERN -const char* -dict_remove_db_name( -/*================*/ - const char* name) /*!< in: table name in the form - dbname '/' tablename */ -{ - const char* s = strchr(name, '/'); - ut_a(s); - - return(s + 1); -} - -/********************************************************************//** -Get the database name length in a table name. -@return database name length */ -UNIV_INTERN -ulint -dict_get_db_name_len( -/*=================*/ - const char* name) /*!< in: table name in the form - dbname '/' tablename */ -{ - const char* s; - s = strchr(name, '/'); - ut_a(s); - return(s - name); -} - -/********************************************************************//** -Reserves the dictionary system mutex for MySQL. */ -UNIV_INTERN -void -dict_mutex_enter_for_mysql_func(const char * file, ulint line) -/*============================*/ -{ - mutex_enter_func(&(dict_sys->mutex), file, line); -} - -/********************************************************************//** -Releases the dictionary system mutex for MySQL. */ -UNIV_INTERN -void -dict_mutex_exit_for_mysql(void) -/*===========================*/ -{ - mutex_exit(&(dict_sys->mutex)); -} - -/** Allocate and init a dict_table_t's stats latch. -This function must not be called concurrently on the same table object. -@param[in,out] table_void table whose stats latch to create */ -static -void -dict_table_stats_latch_alloc( - void* table_void) -{ - dict_table_t* table = static_cast<dict_table_t*>(table_void); - - table->stats_latch = new(std::nothrow) rw_lock_t; - - ut_a(table->stats_latch != NULL); - - rw_lock_create(dict_table_stats_key, table->stats_latch, - SYNC_INDEX_TREE); -} - -/** Deinit and free a dict_table_t's stats latch. -This function must not be called concurrently on the same table object. -@param[in,out] table table whose stats latch to free */ -static -void -dict_table_stats_latch_free( - dict_table_t* table) -{ - rw_lock_free(table->stats_latch); - delete table->stats_latch; -} - -/** Create a dict_table_t's stats latch or delay for lazy creation. -This function is only called from either single threaded environment -or from a thread that has not shared the table object with other threads. -@param[in,out] table table whose stats latch to create -@param[in] enabled if false then the latch is disabled -and dict_table_stats_lock()/unlock() become noop on this table. */ - -void -dict_table_stats_latch_create( - dict_table_t* table, - bool enabled) -{ - if (!enabled) { - table->stats_latch = NULL; - table->stats_latch_created = os_once::DONE; - return; - } - -#ifdef HAVE_ATOMIC_BUILTINS - /* We create this lazily the first time it is used. */ - table->stats_latch = NULL; - table->stats_latch_created = os_once::NEVER_DONE; -#else /* HAVE_ATOMIC_BUILTINS */ - - dict_table_stats_latch_alloc(table); - - table->stats_latch_created = os_once::DONE; -#endif /* HAVE_ATOMIC_BUILTINS */ -} - -/** Destroy a dict_table_t's stats latch. -This function is only called from either single threaded environment -or from a thread that has not shared the table object with other threads. -@param[in,out] table table whose stats latch to destroy */ - -void -dict_table_stats_latch_destroy( - dict_table_t* table) -{ - if (table->stats_latch_created == os_once::DONE - && table->stats_latch != NULL) { - - dict_table_stats_latch_free(table); - } -} - -/**********************************************************************//** -Lock the appropriate latch to protect a given table's statistics. */ -UNIV_INTERN -void -dict_table_stats_lock( -/*==================*/ - dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or RW_X_LATCH */ -{ - ut_ad(table != NULL); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - -#ifdef HAVE_ATOMIC_BUILTINS - os_once::do_or_wait_for_done( - &table->stats_latch_created, - dict_table_stats_latch_alloc, table); -#else /* HAVE_ATOMIC_BUILTINS */ - ut_ad(table->stats_latch_created == os_once::DONE); -#endif /* HAVE_ATOMIC_BUILTINS */ - - if (table->stats_latch == NULL) { - /* This is a dummy table object that is private in the current - thread and is not shared between multiple threads, thus we - skip any locking. */ - return; - } - - switch (latch_mode) { - case RW_S_LATCH: - rw_lock_s_lock(table->stats_latch); - break; - case RW_X_LATCH: - rw_lock_x_lock(table->stats_latch); - break; - case RW_NO_LATCH: - /* fall through */ - default: - ut_error; - } -} - -/**********************************************************************//** -Unlock the latch that has been locked by dict_table_stats_lock() */ -UNIV_INTERN -void -dict_table_stats_unlock( -/*====================*/ - dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or - RW_X_LATCH */ -{ - ut_ad(table != NULL); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - if (table->stats_latch == NULL) { - /* This is a dummy table object that is private in the current - thread and is not shared between multiple threads, thus we - skip any locking. */ - return; - } - - switch (latch_mode) { - case RW_S_LATCH: - rw_lock_s_unlock(table->stats_latch); - break; - case RW_X_LATCH: - rw_lock_x_unlock(table->stats_latch); - break; - case RW_NO_LATCH: - /* fall through */ - default: - ut_error; - } -} - -/**********************************************************************//** -Try to drop any indexes after an aborted index creation. -This can also be after a server kill during DROP INDEX. */ -static -void -dict_table_try_drop_aborted( -/*========================*/ - dict_table_t* table, /*!< in: table, or NULL if it - needs to be looked up again */ - table_id_t table_id, /*!< in: table identifier */ - ulint ref_count) /*!< in: expected table->n_ref_count */ -{ - trx_t* trx; - - trx = trx_allocate_for_background(); - trx->op_info = "try to drop any indexes after an aborted index creation"; - row_mysql_lock_data_dictionary(trx); - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - - if (table == NULL) { - table = dict_table_open_on_id_low( - table_id, DICT_ERR_IGNORE_NONE, FALSE); - } else { - ut_ad(table->id == table_id); - } - - if (table && table->n_ref_count == ref_count && table->drop_aborted) { - /* Silence a debug assertion in row_merge_drop_indexes(). */ - ut_d(table->n_ref_count++); - row_merge_drop_indexes(trx, table, TRUE); - ut_d(table->n_ref_count--); - ut_ad(table->n_ref_count == ref_count); - trx_commit_for_mysql(trx); - } - - row_mysql_unlock_data_dictionary(trx); - trx_free_for_background(trx); -} - -/**********************************************************************//** -When opening a table, -try to drop any indexes after an aborted index creation. -Release the dict_sys->mutex. */ -static -void -dict_table_try_drop_aborted_and_mutex_exit( -/*=======================================*/ - dict_table_t* table, /*!< in: table (may be NULL) */ - ibool try_drop) /*!< in: FALSE if should try to - drop indexes whose online creation - was aborted */ -{ - if (try_drop - && table != NULL - && table->drop_aborted - && table->n_ref_count == 1 - && dict_table_get_first_index(table)) { - - /* Attempt to drop the indexes whose online creation - was aborted. */ - table_id_t table_id = table->id; - - mutex_exit(&dict_sys->mutex); - - dict_table_try_drop_aborted(table, table_id, 1); - } else { - mutex_exit(&dict_sys->mutex); - } -} - -/********************************************************************//** -Decrements the count of open handles to a table. */ -UNIV_INTERN -void -dict_table_close( -/*=============*/ - dict_table_t* table, /*!< in/out: table */ - ibool dict_locked, /*!< in: TRUE=data dictionary locked */ - ibool try_drop) /*!< in: TRUE=try to drop any orphan - indexes after an aborted online - index creation */ -{ - if (!dict_locked) { - mutex_enter(&dict_sys->mutex); - } - - ut_ad(mutex_own(&dict_sys->mutex)); - ut_a(table->n_ref_count > 0); - - --table->n_ref_count; - - /* Force persistent stats re-read upon next open of the table - so that FLUSH TABLE can be used to forcibly fetch stats from disk - if they have been manually modified. We reset table->stat_initialized - only if table reference count is 0 because we do not want too frequent - stats re-reads (e.g. in other cases than FLUSH TABLE). */ - if (strchr(table->name, '/') != NULL - && table->n_ref_count == 0 - && dict_stats_is_persistent_enabled(table)) { - - dict_stats_deinit(table); - } - - MONITOR_DEC(MONITOR_TABLE_REFERENCE); - - ut_ad(dict_lru_validate()); - -#ifdef UNIV_DEBUG - if (table->can_be_evicted) { - ut_ad(dict_lru_find_table(table)); - } else { - ut_ad(dict_non_lru_find_table(table)); - } -#endif /* UNIV_DEBUG */ - - if (!dict_locked) { - table_id_t table_id = table->id; - ibool drop_aborted; - - drop_aborted = try_drop - && table->drop_aborted - && table->n_ref_count == 1 - && dict_table_get_first_index(table); - - mutex_exit(&dict_sys->mutex); - - if (drop_aborted) { - dict_table_try_drop_aborted(NULL, table_id, 0); - } - } -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Returns a column's name. -@return column name. NOTE: not guaranteed to stay valid if table is -modified in any way (columns added, etc.). */ -UNIV_INTERN -const char* -dict_table_get_col_name( -/*====================*/ - const dict_table_t* table, /*!< in: table */ - ulint col_nr) /*!< in: column number */ -{ - ulint i; - const char* s; - - ut_ad(table); - ut_ad(col_nr < table->n_def); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - s = table->col_names; - if (s) { - for (i = 0; i < col_nr; i++) { - s += strlen(s) + 1; - } - } - - return(s); -} - -/**********************************************************************//** -Returns a column's name. -@return column name. NOTE: not guaranteed to stay valid if table is -modified in any way (columns added, etc.). */ -UNIV_INTERN -const char* -dict_table_get_col_name_for_mysql( -/*==============================*/ - const dict_table_t* table, /*!< in: table */ - const char* col_name)/*! in: MySQL table column name */ -{ - ulint i; - const char* s; - - ut_ad(table); - ut_ad(col_name); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - s = table->col_names; - if (s) { - /* If we have many virtual columns MySQL key_part->fieldnr - could be larger than number of columns in InnoDB table - when creating new indexes. */ - for (i = 0; i < table->n_def; i++) { - - if (!innobase_strcasecmp(s, col_name)) { - break; /* Found */ - } - s += strlen(s) + 1; - } - } - - return(s); -} -#ifndef UNIV_HOTBACKUP -/** Allocate and init the autoinc latch of a given table. -This function must not be called concurrently on the same table object. -@param[in,out] table_void table whose autoinc latch to create */ -void -dict_table_autoinc_alloc( - void* table_void) -{ - dict_table_t* table = static_cast<dict_table_t*>(table_void); - table->autoinc_mutex = new (std::nothrow) ib_mutex_t(); - ut_a(table->autoinc_mutex != NULL); - mutex_create(autoinc_mutex_key, - table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX); -} - -/** Allocate and init the zip_pad_mutex of a given index. -This function must not be called concurrently on the same index object. -@param[in,out] index_void index whose zip_pad_mutex to create */ -void -dict_index_zip_pad_alloc( - void* index_void) -{ - dict_index_t* index = static_cast<dict_index_t*>(index_void); - index->zip_pad.mutex = new (std::nothrow) os_fast_mutex_t; - ut_a(index->zip_pad.mutex != NULL); - os_fast_mutex_init(zip_pad_mutex_key, index->zip_pad.mutex); -} - -/********************************************************************//** -Acquire the autoinc lock. */ -UNIV_INTERN -void -dict_table_autoinc_lock( -/*====================*/ - dict_table_t* table) /*!< in/out: table */ -{ -#ifdef HAVE_ATOMIC_BUILTINS - os_once::do_or_wait_for_done( - &table->autoinc_mutex_created, - dict_table_autoinc_alloc, table); -#else /* HAVE_ATOMIC_BUILTINS */ - ut_ad(table->autoinc_mutex_created == os_once::DONE); -#endif /* HAVE_ATOMIC_BUILTINS */ - - mutex_enter(table->autoinc_mutex); -} - -/** Acquire the zip_pad_mutex latch. -@param[in,out] index the index whose zip_pad_mutex to acquire.*/ -void -dict_index_zip_pad_lock( - dict_index_t* index) -{ -#ifdef HAVE_ATOMIC_BUILTINS - os_once::do_or_wait_for_done( - &index->zip_pad.mutex_created, - dict_index_zip_pad_alloc, index); -#else /* HAVE_ATOMIC_BUILTINS */ - ut_ad(index->zip_pad.mutex_created == os_once::DONE); -#endif /* HAVE_ATOMIC_BUILTINS */ - - os_fast_mutex_lock(index->zip_pad.mutex); -} - -/********************************************************************//** -Unconditionally set the autoinc counter. */ -UNIV_INTERN -void -dict_table_autoinc_initialize( -/*==========================*/ - dict_table_t* table, /*!< in/out: table */ - ib_uint64_t value) /*!< in: next value to assign to a row */ -{ - ut_ad(dict_table_autoinc_own(table)); - - table->autoinc = value; -} - -/************************************************************************ -Get all the FTS indexes on a table. -@return number of FTS indexes */ -UNIV_INTERN -ulint -dict_table_get_all_fts_indexes( -/*===========================*/ - dict_table_t* table, /*!< in: table */ - ib_vector_t* indexes) /*!< out: all FTS indexes on this - table */ -{ - dict_index_t* index; - - ut_a(ib_vector_size(indexes) == 0); - - for (index = dict_table_get_first_index(table); - index; - index = dict_table_get_next_index(index)) { - - if (index->type == DICT_FTS) { - ib_vector_push(indexes, &index); - } - } - - return(ib_vector_size(indexes)); -} - -/** Store autoinc value when the table is evicted. -@param[in] table table evicted */ -UNIV_INTERN -void -dict_table_autoinc_store( - const dict_table_t* table) -{ - ut_ad(mutex_own(&dict_sys->mutex)); - - if (table->autoinc != 0) { - ut_ad(dict_sys->autoinc_map->find(table->id) - == dict_sys->autoinc_map->end()); - - dict_sys->autoinc_map->insert( - std::pair<table_id_t, ib_uint64_t>( - table->id, table->autoinc)); - } -} - -/** Restore autoinc value when the table is loaded. -@param[in] table table loaded */ -UNIV_INTERN -void -dict_table_autoinc_restore( - dict_table_t* table) -{ - ut_ad(mutex_own(&dict_sys->mutex)); - - autoinc_map_t::iterator it; - it = dict_sys->autoinc_map->find(table->id); - - if (it != dict_sys->autoinc_map->end()) { - table->autoinc = it->second; - ut_ad(table->autoinc != 0); - - dict_sys->autoinc_map->erase(it); - } -} - -/********************************************************************//** -Reads the next autoinc value (== autoinc counter value), 0 if not yet -initialized. -@return value for a new row, or 0 */ -UNIV_INTERN -ib_uint64_t -dict_table_autoinc_read( -/*====================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(dict_table_autoinc_own(table)); - - return(table->autoinc); -} - -/********************************************************************//** -Updates the autoinc counter if the value supplied is greater than the -current value. */ -UNIV_INTERN -void -dict_table_autoinc_update_if_greater( -/*=================================*/ - - dict_table_t* table, /*!< in/out: table */ - ib_uint64_t value) /*!< in: value which was assigned to a row */ -{ - ut_ad(dict_table_autoinc_own(table)); - - if (value > table->autoinc) { - - table->autoinc = value; - } -} - -/********************************************************************//** -Release the autoinc lock. */ -UNIV_INTERN -void -dict_table_autoinc_unlock( -/*======================*/ - dict_table_t* table) /*!< in/out: table */ -{ - mutex_exit(table->autoinc_mutex); -} -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************************//** -Looks for column n in an index. -@return position in internal representation of the index; -ULINT_UNDEFINED if not contained */ -UNIV_INTERN -ulint -dict_index_get_nth_col_or_prefix_pos( -/*=================================*/ - const dict_index_t* index, /*!< in: index */ - ulint n, /*!< in: column number */ - ibool inc_prefix, /*!< in: TRUE=consider - column prefixes too */ - ulint* prefix_col_pos) /*!< out: col num if prefix */ -{ - const dict_field_t* field; - const dict_col_t* col; - ulint pos; - ulint n_fields; - ulint prefixed_pos_dummy; - - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad((inc_prefix && !prefix_col_pos) || (!inc_prefix)); - - if (!prefix_col_pos) { - prefix_col_pos = &prefixed_pos_dummy; - } - *prefix_col_pos = ULINT_UNDEFINED; - - if (!prefix_col_pos) { - prefix_col_pos = &prefixed_pos_dummy; - } - *prefix_col_pos = ULINT_UNDEFINED; - - col = dict_table_get_nth_col(index->table, n); - - if (dict_index_is_clust(index)) { - - return(dict_col_get_clust_pos(col, index)); - } - - n_fields = dict_index_get_n_fields(index); - - for (pos = 0; pos < n_fields; pos++) { - field = dict_index_get_nth_field(index, pos); - - if (col == field->col) { - *prefix_col_pos = pos; - if (inc_prefix || field->prefix_len == 0) { - return(pos); - } - } - } - - return(ULINT_UNDEFINED); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Returns TRUE if the index contains a column or a prefix of that column. -@return TRUE if contains the column or its prefix */ -UNIV_INTERN -ibool -dict_index_contains_col_or_prefix( -/*==============================*/ - const dict_index_t* index, /*!< in: index */ - ulint n) /*!< in: column number */ -{ - const dict_field_t* field; - const dict_col_t* col; - ulint pos; - ulint n_fields; - - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - if (dict_index_is_clust(index)) { - - return(TRUE); - } - - col = dict_table_get_nth_col(index->table, n); - - n_fields = dict_index_get_n_fields(index); - - for (pos = 0; pos < n_fields; pos++) { - field = dict_index_get_nth_field(index, pos); - - if (col == field->col) { - - return(TRUE); - } - } - - return(FALSE); -} - -/********************************************************************//** -Looks for a matching field in an index. The column has to be the same. The -column in index must be complete, or must contain a prefix longer than the -column in index2. That is, we must be able to construct the prefix in index2 -from the prefix in index. -@return position in internal representation of the index; -ULINT_UNDEFINED if not contained */ -UNIV_INTERN -ulint -dict_index_get_nth_field_pos( -/*=========================*/ - const dict_index_t* index, /*!< in: index from which to search */ - const dict_index_t* index2, /*!< in: index */ - ulint n) /*!< in: field number in index2 */ -{ - const dict_field_t* field; - const dict_field_t* field2; - ulint n_fields; - ulint pos; - - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - field2 = dict_index_get_nth_field(index2, n); - - n_fields = dict_index_get_n_fields(index); - - for (pos = 0; pos < n_fields; pos++) { - field = dict_index_get_nth_field(index, pos); - - if (field->col == field2->col - && (field->prefix_len == 0 - || (field->prefix_len >= field2->prefix_len - && field2->prefix_len != 0))) { - - return(pos); - } - } - - return(ULINT_UNDEFINED); -} - -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INTERN -dict_table_t* -dict_table_open_on_id( -/*==================*/ - table_id_t table_id, /*!< in: table id */ - ibool dict_locked, /*!< in: TRUE=data dictionary locked */ - dict_table_op_t table_op) /*!< in: operation to perform */ -{ - dict_table_t* table; - - if (!dict_locked) { - mutex_enter(&dict_sys->mutex); - } - - ut_ad(mutex_own(&dict_sys->mutex)); - - table = dict_table_open_on_id_low( - table_id, - table_op == DICT_TABLE_OP_LOAD_TABLESPACE - ? DICT_ERR_IGNORE_RECOVER_LOCK - : DICT_ERR_IGNORE_NONE, - table_op == DICT_TABLE_OP_OPEN_ONLY_IF_CACHED); - - if (table != NULL) { - - if (table->can_be_evicted) { - dict_move_to_mru(table); - } - - ++table->n_ref_count; - - MONITOR_INC(MONITOR_TABLE_REFERENCE); - } - - if (!dict_locked) { - dict_table_try_drop_aborted_and_mutex_exit( - table, table_op == DICT_TABLE_OP_DROP_ORPHAN); - } - - return(table); -} - -/********************************************************************//** -Looks for column n position in the clustered index. -@return position in internal representation of the clustered index */ -UNIV_INTERN -ulint -dict_table_get_nth_col_pos( -/*=======================*/ - const dict_table_t* table, /*!< in: table */ - ulint n) /*!< in: column number */ -{ - return(dict_index_get_nth_col_pos(dict_table_get_first_index(table), - n, NULL)); -} - -/********************************************************************//** -Checks if a column is in the ordering columns of the clustered index of a -table. Column prefixes are treated like whole columns. -@return TRUE if the column, or its prefix, is in the clustered key */ -UNIV_INTERN -ibool -dict_table_col_in_clustered_key( -/*============================*/ - const dict_table_t* table, /*!< in: table */ - ulint n) /*!< in: column number */ -{ - const dict_index_t* index; - const dict_field_t* field; - const dict_col_t* col; - ulint pos; - ulint n_fields; - - ut_ad(table); - - col = dict_table_get_nth_col(table, n); - - index = dict_table_get_first_index(table); - - n_fields = dict_index_get_n_unique(index); - - for (pos = 0; pos < n_fields; pos++) { - field = dict_index_get_nth_field(index, pos); - - if (col == field->col) { - - return(TRUE); - } - } - - return(FALSE); -} - -/**********************************************************************//** -Inits the data dictionary module. */ -UNIV_INTERN -void -dict_init(void) -/*===========*/ -{ - dict_sys = static_cast<dict_sys_t*>(mem_zalloc(sizeof(*dict_sys))); - - mutex_create(dict_sys_mutex_key, &dict_sys->mutex, SYNC_DICT); - - dict_sys->table_hash = hash_create(buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH - * UNIV_WORD_SIZE)); - dict_sys->table_id_hash = hash_create(buf_pool_get_curr_size() - / (DICT_POOL_PER_TABLE_HASH - * UNIV_WORD_SIZE)); - rw_lock_create(dict_operation_lock_key, - &dict_operation_lock, SYNC_DICT_OPERATION); - - if (!srv_read_only_mode) { - dict_foreign_err_file = os_file_create_tmpfile(NULL); - ut_a(dict_foreign_err_file); - - mutex_create(dict_foreign_err_mutex_key, - &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK); - } - - dict_sys->autoinc_map = new autoinc_map_t(); -} - -/**********************************************************************//** -Move to the most recently used segment of the LRU list. */ -UNIV_INTERN -void -dict_move_to_mru( -/*=============*/ - dict_table_t* table) /*!< in: table to move to MRU */ -{ - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(dict_lru_validate()); - ut_ad(dict_lru_find_table(table)); - - ut_a(table->can_be_evicted); - - UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - - UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); - - ut_ad(dict_lru_validate()); -} - -/**********************************************************************//** -Returns a table object and increment its open handle count. -NOTE! This is a high-level function to be used mainly from outside the -'dict' module. Inside this directory dict_table_get_low -is usually the appropriate function. -@return table, NULL if does not exist */ -UNIV_INTERN -dict_table_t* -dict_table_open_on_name( -/*====================*/ - const char* table_name, /*!< in: table name */ - ibool dict_locked, /*!< in: TRUE=data dictionary locked */ - ibool try_drop, /*!< in: TRUE=try to drop any orphan - indexes after an aborted online - index creation */ - dict_err_ignore_t - ignore_err) /*!< in: error to be ignored when - loading a table definition */ -{ - dict_table_t* table; - - if (!dict_locked) { - mutex_enter(&(dict_sys->mutex)); - } - - ut_ad(table_name); - ut_ad(mutex_own(&dict_sys->mutex)); - - table = dict_table_check_if_in_cache_low(table_name); - - if (table == NULL) { - table = dict_load_table(table_name, TRUE, ignore_err); - } - - ut_ad(!table || table->cached); - - if (table != NULL) { - - /* If table is encrypted or corrupted */ - if (ignore_err == DICT_ERR_IGNORE_NONE - && !table->is_readable()) { - /* Make life easy for drop table. */ - if (table->can_be_evicted) { - dict_table_move_from_lru_to_non_lru(table); - } - - if (table->corrupted) { - - if (!dict_locked) { - mutex_exit(&dict_sys->mutex); - } - - char buf[MAX_FULL_NAME_LEN]; - ut_format_name(table->name, TRUE, buf, sizeof(buf)); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Table %s is corrupted. Please " - "drop the table and recreate.", - buf); - - return(NULL); - } - - if (table->can_be_evicted) { - dict_move_to_mru(table); - } - - ++table->n_ref_count; - - if (!dict_locked) { - mutex_exit(&dict_sys->mutex); - } - - return (table); - } - - if (table->can_be_evicted) { - dict_move_to_mru(table); - } - - ++table->n_ref_count; - - MONITOR_INC(MONITOR_TABLE_REFERENCE); - } - - ut_ad(dict_lru_validate()); - - if (!dict_locked) { - dict_table_try_drop_aborted_and_mutex_exit(table, try_drop); - } - - return(table); -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Adds system columns to a table object. */ -UNIV_INTERN -void -dict_table_add_system_columns( -/*==========================*/ - dict_table_t* table, /*!< in/out: table */ - mem_heap_t* heap) /*!< in: temporary heap */ -{ - ut_ad(table); - ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(!table->cached); - - /* NOTE: the system columns MUST be added in the following order - (so that they can be indexed by the numerical value of DATA_ROW_ID, - etc.) and as the last columns of the table memory object. - The clustered index will not always physically contain all - system columns. */ - - dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS, - DATA_ROW_ID | DATA_NOT_NULL, - DATA_ROW_ID_LEN); -#if DATA_ROW_ID != 0 -#error "DATA_ROW_ID != 0" -#endif - dict_mem_table_add_col(table, heap, "DB_TRX_ID", DATA_SYS, - DATA_TRX_ID | DATA_NOT_NULL, - DATA_TRX_ID_LEN); -#if DATA_TRX_ID != 1 -#error "DATA_TRX_ID != 1" -#endif - dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS, - DATA_ROLL_PTR | DATA_NOT_NULL, - DATA_ROLL_PTR_LEN); -#if DATA_ROLL_PTR != 2 -#error "DATA_ROLL_PTR != 2" -#endif - - /* This check reminds that if a new system column is added to - the program, it should be dealt with here */ -#if DATA_N_SYS_COLS != 3 -#error "DATA_N_SYS_COLS != 3" -#endif -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Adds a table object to the dictionary cache. */ -UNIV_INTERN -void -dict_table_add_to_cache( -/*====================*/ - dict_table_t* table, /*!< in: table */ - ibool can_be_evicted, /*!< in: TRUE if can be evicted */ - mem_heap_t* heap) /*!< in: temporary heap */ -{ - ulint fold; - ulint id_fold; - ulint i; - ulint row_len; - - ut_ad(dict_lru_validate()); - - /* The lower limit for what we consider a "big" row */ -#define BIG_ROW_SIZE 1024 - - ut_ad(mutex_own(&(dict_sys->mutex))); - - dict_table_add_system_columns(table, heap); - - table->cached = TRUE; - - fold = ut_fold_string(table->name); - id_fold = ut_fold_ull(table->id); - - row_len = 0; - for (i = 0; i < table->n_def; i++) { - ulint col_len = dict_col_get_max_size( - dict_table_get_nth_col(table, i)); - - row_len += col_len; - - /* If we have a single unbounded field, or several gigantic - fields, mark the maximum row size as BIG_ROW_SIZE. */ - if (row_len >= BIG_ROW_SIZE || col_len >= BIG_ROW_SIZE) { - row_len = BIG_ROW_SIZE; - - break; - } - } - - table->big_rows = row_len >= BIG_ROW_SIZE; - - /* Look for a table with the same name: error if such exists */ - { - dict_table_t* table2; - HASH_SEARCH(name_hash, dict_sys->table_hash, fold, - dict_table_t*, table2, ut_ad(table2->cached), - ut_strcmp(table2->name, table->name) == 0); - ut_a(table2 == NULL); - -#ifdef UNIV_DEBUG - /* Look for the same table pointer with a different name */ - HASH_SEARCH_ALL(name_hash, dict_sys->table_hash, - dict_table_t*, table2, ut_ad(table2->cached), - table2 == table); - ut_ad(table2 == NULL); -#endif /* UNIV_DEBUG */ - } - - /* Look for a table with the same id: error if such exists */ - { - dict_table_t* table2; - HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, - dict_table_t*, table2, ut_ad(table2->cached), - table2->id == table->id); - ut_a(table2 == NULL); - -#ifdef UNIV_DEBUG - /* Look for the same table pointer with a different id */ - HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash, - dict_table_t*, table2, ut_ad(table2->cached), - table2 == table); - ut_ad(table2 == NULL); -#endif /* UNIV_DEBUG */ - } - - /* Add table to hash table of tables */ - HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, - table); - - /* Add table to hash table of tables based on table id */ - HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold, - table); - - table->can_be_evicted = can_be_evicted; - - if (table->can_be_evicted) { - UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); - } else { - UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table); - } - - dict_table_autoinc_restore(table); - - ut_ad(dict_lru_validate()); - - dict_sys->size += mem_heap_get_size(table->heap) - + strlen(table->name) + 1; -} - -/**********************************************************************//** -Test whether a table can be evicted from the LRU cache. -@return TRUE if table can be evicted. */ -static -ibool -dict_table_can_be_evicted( -/*======================*/ - const dict_table_t* table) /*!< in: table to test */ -{ - ut_ad(mutex_own(&dict_sys->mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_a(table->can_be_evicted); - ut_a(table->foreign_set.empty()); - ut_a(table->referenced_set.empty()); - - if (table->n_ref_count == 0) { - dict_index_t* index; - - /* The transaction commit and rollback are called from - outside the handler interface. This means that there is - a window where the table->n_ref_count can be zero but - the table instance is in "use". */ - - if (lock_table_has_locks(table)) { - return(FALSE); - } - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - btr_search_t* info = btr_search_get_info(index); - - /* We are not allowed to free the in-memory index - struct dict_index_t until all entries in the adaptive - hash index that point to any of the page belonging to - his b-tree index are dropped. This is so because - dropping of these entries require access to - dict_index_t struct. To avoid such scenario we keep - a count of number of such pages in the search_info and - only free the dict_index_t struct when this count - drops to zero. - - See also: dict_index_remove_from_cache_low() */ - - if (btr_search_info_get_ref_count(info, index) > 0) { - return(FALSE); - } - } - - return(TRUE); - } - - return(FALSE); -} - -/**********************************************************************//** -Make room in the table cache by evicting an unused table. The unused table -should not be part of FK relationship and currently not used in any user -transaction. There is no guarantee that it will remove a table. -@return number of tables evicted. If the number of tables in the dict_LRU -is less than max_tables it will not do anything. */ -UNIV_INTERN -ulint -dict_make_room_in_cache( -/*====================*/ - ulint max_tables, /*!< in: max tables allowed in cache */ - ulint pct_check) /*!< in: max percent to check */ -{ - ulint i; - ulint len; - dict_table_t* table; - ulint check_up_to; - ulint n_evicted = 0; - - ut_a(pct_check > 0); - ut_a(pct_check <= 100); - ut_ad(mutex_own(&dict_sys->mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(dict_lru_validate()); - - i = len = UT_LIST_GET_LEN(dict_sys->table_LRU); - - if (len < max_tables) { - return(0); - } - - check_up_to = len - ((len * pct_check) / 100); - - /* Check for overflow */ - ut_a(i == 0 || check_up_to <= i); - - /* Find a suitable candidate to evict from the cache. Don't scan the - entire LRU list. Only scan pct_check list entries. */ - - for (table = UT_LIST_GET_LAST(dict_sys->table_LRU); - table != NULL - && i > check_up_to - && (len - n_evicted) > max_tables; - --i) { - - dict_table_t* prev_table; - - prev_table = UT_LIST_GET_PREV(table_LRU, table); - - if (dict_table_can_be_evicted(table)) { - - dict_table_remove_from_cache_low(table, TRUE); - - ++n_evicted; - } - - table = prev_table; - } - - return(n_evicted); -} - -/**********************************************************************//** -Move a table to the non-LRU list from the LRU list. */ -UNIV_INTERN -void -dict_table_move_from_lru_to_non_lru( -/*================================*/ - dict_table_t* table) /*!< in: table to move from LRU to non-LRU */ -{ - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(dict_lru_find_table(table)); - - ut_a(table->can_be_evicted); - - UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - - UT_LIST_ADD_LAST(table_LRU, dict_sys->table_non_LRU, table); - - table->can_be_evicted = FALSE; -} - -/**********************************************************************//** -Move a table to the LRU list from the non-LRU list. */ -UNIV_INTERN -void -dict_table_move_from_non_lru_to_lru( -/*================================*/ - dict_table_t* table) /*!< in: table to move from non-LRU to LRU */ -{ - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(dict_non_lru_find_table(table)); - - ut_a(!table->can_be_evicted); - - UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table); - - UT_LIST_ADD_LAST(table_LRU, dict_sys->table_LRU, table); - - table->can_be_evicted = TRUE; -} - -/**********************************************************************//** -Looks for an index with the given id given a table instance. -@return index or NULL */ -UNIV_INTERN -dict_index_t* -dict_table_find_index_on_id( -/*========================*/ - const dict_table_t* table, /*!< in: table instance */ - index_id_t id) /*!< in: index id */ -{ - dict_index_t* index; - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - if (id == index->id) { - /* Found */ - - return(index); - } - } - - return(NULL); -} - -/**********************************************************************//** -Looks for an index with the given id. NOTE that we do not reserve -the dictionary mutex: this function is for emergency purposes like -printing info of a corrupt database page! -@return index or NULL if not found in cache */ -UNIV_INTERN -dict_index_t* -dict_index_find_on_id_low( -/*======================*/ - index_id_t id) /*!< in: index id */ -{ - dict_table_t* table; - - /* This can happen if the system tablespace is the wrong page size */ - if (dict_sys == NULL) { - return(NULL); - } - - for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); - table != NULL; - table = UT_LIST_GET_NEXT(table_LRU, table)) { - - dict_index_t* index = dict_table_find_index_on_id(table, id); - - if (index != NULL) { - return(index); - } - } - - for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); - table != NULL; - table = UT_LIST_GET_NEXT(table_LRU, table)) { - - dict_index_t* index = dict_table_find_index_on_id(table, id); - - if (index != NULL) { - return(index); - } - } - - return(NULL); -} - -/** Function object to remove a foreign key constraint from the -referenced_set of the referenced table. The foreign key object is -also removed from the dictionary cache. The foreign key constraint -is not removed from the foreign_set of the table containing the -constraint. */ -struct dict_foreign_remove_partial -{ - void operator()(dict_foreign_t* foreign) { - dict_table_t* table = foreign->referenced_table; - if (table != NULL) { - table->referenced_set.erase(foreign); - } - dict_foreign_free(foreign); - } -}; - -/**********************************************************************//** -Renames a table object. -@return TRUE if success */ -UNIV_INTERN -dberr_t -dict_table_rename_in_cache( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - const char* new_name, /*!< in: new name */ - ibool rename_also_foreigns)/*!< in: in ALTER TABLE we want - to preserve the original table name - in constraints which reference it */ -{ - dberr_t err; - dict_foreign_t* foreign; - dict_index_t* index; - ulint fold; - char old_name[MAX_FULL_NAME_LEN + 1]; - os_file_type_t ftype; - ibool exists; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* store the old/current name to an automatic variable */ - if (strlen(table->name) + 1 <= sizeof(old_name)) { - memcpy(old_name, table->name, strlen(table->name) + 1); - } else { - ut_print_timestamp(stderr); - fprintf(stderr, "InnoDB: too long table name: '%s', " - "max length is %d\n", table->name, - MAX_FULL_NAME_LEN); - ut_error; - } - - fold = ut_fold_string(new_name); - - /* Look for a table with the same name: error if such exists */ - dict_table_t* table2; - HASH_SEARCH(name_hash, dict_sys->table_hash, fold, - dict_table_t*, table2, ut_ad(table2->cached), - (ut_strcmp(table2->name, new_name) == 0)); - DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure", - if (table2 == NULL) { - table2 = (dict_table_t*) -1; - } ); - if (table2) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot rename table '%s' to '%s' since the " - "dictionary cache already contains '%s'.", - old_name, new_name, new_name); - return(DB_ERROR); - } - - /* If the table is stored in a single-table tablespace, rename the - .ibd file and rebuild the .isl file if needed. */ - - if (dict_table_is_discarded(table)) { - char* filepath; - - ut_ad(table->space != TRX_SYS_SPACE); - - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - - dict_get_and_save_data_dir_path(table, true); - ut_a(table->data_dir_path); - - filepath = os_file_make_remote_pathname( - table->data_dir_path, table->name, "ibd"); - } else { - filepath = fil_make_ibd_name(table->name, false); - } - - fil_delete_tablespace(table->space, BUF_REMOVE_ALL_NO_WRITE); - - /* Delete any temp file hanging around. */ - if (os_file_status(filepath, &exists, &ftype) - && exists - && !os_file_delete_if_exists(innodb_file_temp_key, - filepath)) { - - ib_logf(IB_LOG_LEVEL_INFO, - "Delete of %s failed.", filepath); - } - - mem_free(filepath); - - } else if (table->space != TRX_SYS_SPACE) { - if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: trying to rename a" - " TEMPORARY TABLE ", stderr); - ut_print_name(stderr, NULL, TRUE, old_name); - if (table->dir_path_of_temp_table != NULL) { - fputs(" (", stderr); - ut_print_filename( - stderr, table->dir_path_of_temp_table); - fputs(" )\n", stderr); - } - - return(DB_ERROR); - } - - char* new_path = NULL; - char* old_path = fil_space_get_first_path(table->space); - - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - new_path = os_file_make_new_pathname( - old_path, new_name); - - err = fil_create_link_file(new_name, new_path); - if (err != DB_SUCCESS) { - mem_free(new_path); - mem_free(old_path); - return(DB_TABLESPACE_EXISTS); - } - } else { - new_path = fil_make_ibd_name(new_name, false); - } - - /* New filepath must not exist. */ - err = fil_rename_tablespace_check( - table->space, old_path, new_path, false); - if (err != DB_SUCCESS) { - mem_free(old_path); - mem_free(new_path); - return(err); - } - - ibool success = fil_rename_tablespace( - old_name, table->space, new_name, new_path); - - mem_free(old_path); - mem_free(new_path); - - /* If the tablespace is remote, a new .isl file was created - If success, delete the old one. If not, delete the new one. */ - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - fil_delete_link_file(success ? old_name : new_name); - } - - if (!success) { - return(DB_ERROR); - } - } - - /* Remove table from the hash tables of tables */ - HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, - ut_fold_string(old_name), table); - - if (strlen(new_name) > strlen(table->name)) { - /* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid - memory fragmentation, we assume a repeated calls of - ut_realloc() with the same size do not cause fragmentation */ - ut_a(strlen(new_name) <= MAX_FULL_NAME_LEN); - - table->name = static_cast<char*>( - ut_realloc(table->name, MAX_FULL_NAME_LEN + 1)); - } - memcpy(table->name, new_name, strlen(new_name) + 1); - - /* Add table to hash table of tables */ - HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, - table); - - dict_sys->size += strlen(new_name) - strlen(old_name); - ut_a(dict_sys->size > 0); - - /* Update the table_name field in indexes */ - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - index->table_name = table->name; - } - - if (!rename_also_foreigns) { - /* In ALTER TABLE we think of the rename table operation - in the direction table -> temporary table (#sql...) - as dropping the table with the old name and creating - a new with the new name. Thus we kind of drop the - constraints from the dictionary cache here. The foreign key - constraints will be inherited to the new table from the - system tables through a call of dict_load_foreigns. */ - - /* Remove the foreign constraints from the cache */ - std::for_each(table->foreign_set.begin(), - table->foreign_set.end(), - dict_foreign_remove_partial()); - table->foreign_set.clear(); - - /* Reset table field in referencing constraints */ - for (dict_foreign_set::iterator it - = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - foreign->referenced_table = NULL; - foreign->referenced_index = NULL; - - } - - /* Make the set of referencing constraints empty */ - table->referenced_set.clear(); - - return(DB_SUCCESS); - } - - /* Update the table name fields in foreign constraints, and update also - the constraint id of new format >= 4.0.18 constraints. Note that at - this point we have already changed table->name to the new name. */ - - dict_foreign_set fk_set; - - for (;;) { - - dict_foreign_set::iterator it - = table->foreign_set.begin(); - - if (it == table->foreign_set.end()) { - break; - } - - foreign = *it; - - if (foreign->referenced_table) { - foreign->referenced_table->referenced_set.erase(foreign); - } - - if (ut_strlen(foreign->foreign_table_name) - < ut_strlen(table->name)) { - /* Allocate a longer name buffer; - TODO: store buf len to save memory */ - - foreign->foreign_table_name = mem_heap_strdup( - foreign->heap, table->name); - dict_mem_foreign_table_name_lookup_set(foreign, TRUE); - } else { - strcpy(foreign->foreign_table_name, table->name); - dict_mem_foreign_table_name_lookup_set(foreign, FALSE); - } - if (strchr(foreign->id, '/')) { - /* This is a >= 4.0.18 format id */ - - ulint db_len; - char* old_id; - char old_name_cs_filename[MAX_TABLE_NAME_LEN+20]; - uint errors = 0; - - /* All table names are internally stored in charset - my_charset_filename (except the temp tables and the - partition identifier suffix in partition tables). The - foreign key constraint names are internally stored - in UTF-8 charset. The variable fkid here is used - to store foreign key constraint name in charset - my_charset_filename for comparison further below. */ - char fkid[MAX_TABLE_NAME_LEN+20]; - ibool on_tmp = FALSE; - - /* The old table name in my_charset_filename is stored - in old_name_cs_filename */ - - strncpy(old_name_cs_filename, old_name, - MAX_TABLE_NAME_LEN); - if (strstr(old_name, TEMP_TABLE_PATH_PREFIX) == NULL) { - - innobase_convert_to_system_charset( - strchr(old_name_cs_filename, '/') + 1, - strchr(old_name, '/') + 1, - MAX_TABLE_NAME_LEN, &errors); - - if (errors) { - /* There has been an error to convert - old table into UTF-8. This probably - means that the old table name is - actually in UTF-8. */ - innobase_convert_to_filename_charset( - strchr(old_name_cs_filename, - '/') + 1, - strchr(old_name, '/') + 1, - MAX_TABLE_NAME_LEN); - } else { - /* Old name already in - my_charset_filename */ - strncpy(old_name_cs_filename, old_name, - MAX_TABLE_NAME_LEN); - } - } - - strncpy(fkid, foreign->id, MAX_TABLE_NAME_LEN); - - if (strstr(fkid, TEMP_TABLE_PATH_PREFIX) == NULL) { - innobase_convert_to_filename_charset( - strchr(fkid, '/') + 1, - strchr(foreign->id, '/') + 1, - MAX_TABLE_NAME_LEN+20); - } else { - on_tmp = TRUE; - } - - old_id = mem_strdup(foreign->id); - - if (ut_strlen(fkid) > ut_strlen(old_name_cs_filename) - + ((sizeof dict_ibfk) - 1) - && !memcmp(fkid, old_name_cs_filename, - ut_strlen(old_name_cs_filename)) - && !memcmp(fkid + ut_strlen(old_name_cs_filename), - dict_ibfk, (sizeof dict_ibfk) - 1)) { - - /* This is a generated >= 4.0.18 format id */ - - char table_name[MAX_TABLE_NAME_LEN] = ""; - uint errors = 0; - - if (strlen(table->name) > strlen(old_name)) { - foreign->id = static_cast<char*>( - mem_heap_alloc( - foreign->heap, - strlen(table->name) - + strlen(old_id) + 1)); - } - - /* Convert the table name to UTF-8 */ - strncpy(table_name, table->name, - MAX_TABLE_NAME_LEN); - innobase_convert_to_system_charset( - strchr(table_name, '/') + 1, - strchr(table->name, '/') + 1, - MAX_TABLE_NAME_LEN, &errors); - - if (errors) { - /* Table name could not be converted - from charset my_charset_filename to - UTF-8. This means that the table name - is already in UTF-8 (#mysql#50). */ - strncpy(table_name, table->name, - MAX_TABLE_NAME_LEN); - } - - /* Replace the prefix 'databasename/tablename' - with the new names */ - strcpy(foreign->id, table_name); - if (on_tmp) { - strcat(foreign->id, - old_id + ut_strlen(old_name)); - } else { - sprintf(strchr(foreign->id, '/') + 1, - "%s%s", - strchr(table_name, '/') +1, - strstr(old_id, "_ibfk_") ); - } - - } else { - /* This is a >= 4.0.18 format id where the user - gave the id name */ - db_len = dict_get_db_name_len(table->name) + 1; - - if (dict_get_db_name_len(table->name) - > dict_get_db_name_len(foreign->id)) { - - foreign->id = static_cast<char*>( - mem_heap_alloc( - foreign->heap, - db_len + strlen(old_id) + 1)); - } - - /* Replace the database prefix in id with the - one from table->name */ - - ut_memcpy(foreign->id, table->name, db_len); - - strcpy(foreign->id + db_len, - dict_remove_db_name(old_id)); - } - - mem_free(old_id); - } - - table->foreign_set.erase(it); - fk_set.insert(foreign); - - if (foreign->referenced_table) { - foreign->referenced_table->referenced_set.insert(foreign); - } - } - - ut_a(table->foreign_set.empty()); - table->foreign_set.swap(fk_set); - - for (dict_foreign_set::iterator it = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - - if (ut_strlen(foreign->referenced_table_name) - < ut_strlen(table->name)) { - /* Allocate a longer name buffer; - TODO: store buf len to save memory */ - - foreign->referenced_table_name = mem_heap_strdup( - foreign->heap, table->name); - - dict_mem_referenced_table_name_lookup_set( - foreign, TRUE); - } else { - /* Use the same buffer */ - strcpy(foreign->referenced_table_name, table->name); - - dict_mem_referenced_table_name_lookup_set( - foreign, FALSE); - } - } - - return(DB_SUCCESS); -} - -/**********************************************************************//** -Change the id of a table object in the dictionary cache. This is used in -DISCARD TABLESPACE. */ -UNIV_INTERN -void -dict_table_change_id_in_cache( -/*==========================*/ - dict_table_t* table, /*!< in/out: table object already in cache */ - table_id_t new_id) /*!< in: new id to set */ -{ - ut_ad(table); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - /* Remove the table from the hash table of id's */ - - HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, - ut_fold_ull(table->id), table); - table->id = new_id; - - /* Add the table back to the hash table */ - HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, - ut_fold_ull(table->id), table); -} - -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -void -dict_table_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in, own: table */ - ibool lru_evict) /*!< in: TRUE if table being evicted - to make room in the table LRU list */ -{ - dict_foreign_t* foreign; - dict_index_t* index; - ulint size; - - ut_ad(table); - ut_ad(dict_lru_validate()); - ut_a(table->n_ref_count == 0); - ut_a(table->n_rec_locks == 0); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - /* Remove the foreign constraints from the cache */ - std::for_each(table->foreign_set.begin(), table->foreign_set.end(), - dict_foreign_remove_partial()); - table->foreign_set.clear(); - - /* Reset table field in referencing constraints */ - for (dict_foreign_set::iterator it = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - foreign->referenced_table = NULL; - foreign->referenced_index = NULL; - } - - /* Remove the indexes from the cache */ - - for (index = UT_LIST_GET_LAST(table->indexes); - index != NULL; - index = UT_LIST_GET_LAST(table->indexes)) { - - dict_index_remove_from_cache_low(table, index, lru_evict); - } - - /* Remove table from the hash tables of tables */ - - HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, - ut_fold_string(table->name), table); - - HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, - ut_fold_ull(table->id), table); - - /* Remove table from LRU or non-LRU list. */ - if (table->can_be_evicted) { - ut_ad(dict_lru_find_table(table)); - UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - } else { - ut_ad(dict_non_lru_find_table(table)); - UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table); - } - - ut_ad(dict_lru_validate()); - - if (lru_evict) { - dict_table_autoinc_store(table); - } - - if (lru_evict && table->drop_aborted) { - /* Do as dict_table_try_drop_aborted() does. */ - - trx_t* trx = trx_allocate_for_background(); - - ut_ad(mutex_own(&dict_sys->mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - /* Mimic row_mysql_lock_data_dictionary(). */ - trx->dict_operation_lock_mode = RW_X_LATCH; - - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - - /* Silence a debug assertion in row_merge_drop_indexes(). */ - ut_d(table->n_ref_count++); - row_merge_drop_indexes(trx, table, TRUE); - ut_d(table->n_ref_count--); - ut_ad(table->n_ref_count == 0); - trx_commit_for_mysql(trx); - trx->dict_operation_lock_mode = 0; - trx_free_for_background(trx); - } - - size = mem_heap_get_size(table->heap) + strlen(table->name) + 1; - - ut_ad(dict_sys->size >= size); - - dict_sys->size -= size; - - dict_mem_table_free(table); -} - -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -UNIV_INTERN -void -dict_table_remove_from_cache( -/*=========================*/ - dict_table_t* table) /*!< in, own: table */ -{ - dict_table_remove_from_cache_low(table, FALSE); -} - -/****************************************************************//** -If the given column name is reserved for InnoDB system columns, return -TRUE. -@return TRUE if name is reserved */ -UNIV_INTERN -ibool -dict_col_name_is_reserved( -/*======================*/ - const char* name) /*!< in: column name */ -{ - /* This check reminds that if a new system column is added to - the program, it should be dealt with here. */ -#if DATA_N_SYS_COLS != 3 -#error "DATA_N_SYS_COLS != 3" -#endif - - static const char* reserved_names[] = { - "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR" - }; - - ulint i; - - for (i = 0; i < UT_ARR_SIZE(reserved_names); i++) { - if (innobase_strcasecmp(name, reserved_names[i]) == 0) { - - return(TRUE); - } - } - - return(FALSE); -} - -#if 1 /* This function is not very accurate at determining - whether an UNDO record will be too big. See innodb_4k.test, - Bug 13336585, for a testcase that shows an index that can - be created but cannot be updated. */ - -/****************************************************************//** -If an undo log record for this table might not fit on a single page, -return TRUE. -@return TRUE if the undo log record could become too big */ -static -ibool -dict_index_too_big_for_undo( -/*========================*/ - const dict_table_t* table, /*!< in: table */ - const dict_index_t* new_index) /*!< in: index */ -{ - /* Make sure that all column prefixes will fit in the undo log record - in trx_undo_page_report_modify() right after trx_undo_page_init(). */ - - ulint i; - const dict_index_t* clust_index - = dict_table_get_first_index(table); - ulint undo_page_len - = TRX_UNDO_PAGE_HDR - TRX_UNDO_PAGE_HDR_SIZE - + 2 /* next record pointer */ - + 1 /* type_cmpl */ - + 11 /* trx->undo_no */ + 11 /* table->id */ - + 1 /* rec_get_info_bits() */ - + 11 /* DB_TRX_ID */ - + 11 /* DB_ROLL_PTR */ - + 10 + FIL_PAGE_DATA_END /* trx_undo_left() */ - + 2/* pointer to previous undo log record */; - - /* FTS index consists of auxiliary tables, they shall be excluded from - index row size check */ - if (new_index->type & DICT_FTS) { - return(false); - } - - if (!clust_index) { - ut_a(dict_index_is_clust(new_index)); - clust_index = new_index; - } - - /* Add the size of the ordering columns in the - clustered index. */ - for (i = 0; i < clust_index->n_uniq; i++) { - const dict_col_t* col - = dict_index_get_nth_col(clust_index, i); - - /* Use the maximum output size of - mach_write_compressed(), although the encoded - length should always fit in 2 bytes. */ - undo_page_len += 5 + dict_col_get_max_size(col); - } - - /* Add the old values of the columns to be updated. - First, the amount and the numbers of the columns. - These are written by mach_write_compressed() whose - maximum output length is 5 bytes. However, given that - the quantities are below REC_MAX_N_FIELDS (10 bits), - the maximum length is 2 bytes per item. */ - undo_page_len += 2 * (dict_table_get_n_cols(table) + 1); - - for (i = 0; i < clust_index->n_def; i++) { - const dict_col_t* col - = dict_index_get_nth_col(clust_index, i); - ulint max_size - = dict_col_get_max_size(col); - ulint fixed_size - = dict_col_get_fixed_size(col, - dict_table_is_comp(table)); - ulint max_prefix - = col->max_prefix; - - if (fixed_size) { - /* Fixed-size columns are stored locally. */ - max_size = fixed_size; - } else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) { - /* Short columns are stored locally. */ - } else if (!col->ord_part - || (col->max_prefix - < (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table))) { - /* See if col->ord_part would be set - because of new_index. Also check if the new - index could have longer prefix on columns - that already had ord_part set */ - ulint j; - - for (j = 0; j < new_index->n_uniq; j++) { - if (dict_index_get_nth_col( - new_index, j) == col) { - const dict_field_t* field - = dict_index_get_nth_field( - new_index, j); - - if (field->prefix_len - > col->max_prefix) { - max_prefix = - field->prefix_len; - } - - goto is_ord_part; - } - } - - if (col->ord_part) { - goto is_ord_part; - } - - /* This is not an ordering column in any index. - Thus, it can be stored completely externally. */ - max_size = BTR_EXTERN_FIELD_REF_SIZE; - } else { - ulint max_field_len; -is_ord_part: - max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table); - - /* This is an ordering column in some index. - A long enough prefix must be written to the - undo log. See trx_undo_page_fetch_ext(). */ - max_size = ut_min(max_size, max_field_len); - - /* We only store the needed prefix length in undo log */ - if (max_prefix) { - ut_ad(dict_table_get_format(table) - >= UNIV_FORMAT_B); - - max_size = ut_min(max_prefix, max_size); - } - - max_size += BTR_EXTERN_FIELD_REF_SIZE; - } - - undo_page_len += 5 + max_size; - } - - return(undo_page_len >= UNIV_PAGE_SIZE); -} -#endif - -/****************************************************************//** -If a record of this index might not fit on a single B-tree page, -return TRUE. -@return TRUE if the index record could become too big */ -static -ibool -dict_index_too_big_for_tree( -/*========================*/ - const dict_table_t* table, /*!< in: table */ - const dict_index_t* new_index) /*!< in: index */ -{ - ulint zip_size; - ulint comp; - ulint i; - /* maximum possible storage size of a record */ - ulint rec_max_size; - /* maximum allowed size of a record on a leaf page */ - ulint page_rec_max; - /* maximum allowed size of a node pointer record */ - ulint page_ptr_max; - - /* FTS index consists of auxiliary tables, they shall be excluded from - index row size check */ - if (new_index->type & DICT_FTS) { - return(false); - } - - DBUG_EXECUTE_IF( - "ib_force_create_table", - return(FALSE);); - - comp = dict_table_is_comp(table); - zip_size = dict_table_zip_size(table); - - if (zip_size && zip_size < UNIV_PAGE_SIZE) { - /* On a compressed page, two records must fit in the - uncompressed page modification log. On compressed - pages with zip_size == UNIV_PAGE_SIZE, this limit will - never be reached. */ - ut_ad(comp); - /* The maximum allowed record size is the size of - an empty page, minus a byte for recoding the heap - number in the page modification log. The maximum - allowed node pointer size is half that. */ - page_rec_max = page_zip_empty_size(new_index->n_fields, - zip_size); - if (page_rec_max) { - page_rec_max--; - } - page_ptr_max = page_rec_max / 2; - /* On a compressed page, there is a two-byte entry in - the dense page directory for every record. But there - is no record header. */ - rec_max_size = 2; - } else { - /* The maximum allowed record size is half a B-tree - page. No additional sparse page directory entry will - be generated for the first few user records. */ - page_rec_max = page_get_free_space_of_empty(comp) / 2; - page_ptr_max = page_rec_max; - /* Each record has a header. */ - rec_max_size = comp - ? REC_N_NEW_EXTRA_BYTES - : REC_N_OLD_EXTRA_BYTES; - } - - if (comp) { - /* Include the "null" flags in the - maximum possible record size. */ - rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable); - } else { - /* For each column, include a 2-byte offset and a - "null" flag. The 1-byte format is only used in short - records that do not contain externally stored columns. - Such records could never exceed the page limit, even - when using the 2-byte format. */ - rec_max_size += 2 * new_index->n_fields; - } - - /* Compute the maximum possible record size. */ - for (i = 0; i < new_index->n_fields; i++) { - const dict_field_t* field - = dict_index_get_nth_field(new_index, i); - const dict_col_t* col - = dict_field_get_col(field); - ulint field_max_size; - ulint field_ext_max_size; - - /* In dtuple_convert_big_rec(), variable-length columns - that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2 - may be chosen for external storage. - - Fixed-length columns, and all columns of secondary - index records are always stored inline. */ - - /* Determine the maximum length of the index field. - The field_ext_max_size should be computed as the worst - case in rec_get_converted_size_comp() for - REC_STATUS_ORDINARY records. */ - - field_max_size = dict_col_get_fixed_size(col, comp); - if (field_max_size) { - /* dict_index_add_col() should guarantee this */ - ut_ad(!field->prefix_len - || field->fixed_len == field->prefix_len); - /* Fixed lengths are not encoded - in ROW_FORMAT=COMPACT. */ - field_ext_max_size = 0; - goto add_field_size; - } - - field_max_size = dict_col_get_max_size(col); - field_ext_max_size = field_max_size < 256 ? 1 : 2; - - if (field->prefix_len) { - if (field->prefix_len < field_max_size) { - field_max_size = field->prefix_len; - } - } else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2 - && dict_index_is_clust(new_index)) { - - /* In the worst case, we have a locally stored - column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes. - The length can be stored in one byte. If the - column were stored externally, the lengths in - the clustered index page would be - BTR_EXTERN_FIELD_REF_SIZE and 2. */ - field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2; - field_ext_max_size = 1; - } - - if (comp) { - /* Add the extra size for ROW_FORMAT=COMPACT. - For ROW_FORMAT=REDUNDANT, these bytes were - added to rec_max_size before this loop. */ - rec_max_size += field_ext_max_size; - } -add_field_size: - rec_max_size += field_max_size; - - /* Check the size limit on leaf pages. */ - if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) { - - return(TRUE); - } - - /* Check the size limit on non-leaf pages. Records - stored in non-leaf B-tree pages consist of the unique - columns of the record (the key columns of the B-tree) - and a node pointer field. When we have processed the - unique columns, rec_max_size equals the size of the - node pointer record minus the node pointer column. */ - if (i + 1 == dict_index_get_n_unique_in_tree(new_index) - && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) { - - return(TRUE); - } - } - - return(FALSE); -} - -/**********************************************************************//** -Adds an index to the dictionary cache. -@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */ -UNIV_INTERN -dberr_t -dict_index_add_to_cache( -/*====================*/ - dict_table_t* table, /*!< in: table on which the index is */ - dict_index_t* index, /*!< in, own: index; NOTE! The index memory - object is freed in this function! */ - ulint page_no,/*!< in: root page number of the index */ - ibool strict) /*!< in: TRUE=refuse to create the index - if records could be too big to fit in - an B-tree page */ -{ - dict_index_t* new_index; - ulint n_ord; - ulint i; - - ut_ad(index); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(index->n_def == index->n_fields); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(!dict_index_is_online_ddl(index)); - - ut_ad(mem_heap_validate(index->heap)); - ut_a(!dict_index_is_clust(index) - || UT_LIST_GET_LEN(table->indexes) == 0); - - if (!dict_index_find_cols(table, index)) { - - dict_mem_index_free(index); - return(DB_CORRUPTION); - } - - /* Build the cache internal representation of the index, - containing also the added system fields */ - - if (index->type == DICT_FTS) { - new_index = dict_index_build_internal_fts(table, index); - } else if (dict_index_is_clust(index)) { - new_index = dict_index_build_internal_clust(table, index); - } else { - new_index = dict_index_build_internal_non_clust(table, index); - } - - /* Set the n_fields value in new_index to the actual defined - number of fields in the cache internal representation */ - - new_index->n_fields = new_index->n_def; - new_index->trx_id = index->trx_id; - - if (dict_index_too_big_for_tree(table, new_index)) { - - if (strict) { -too_big: - dict_mem_index_free(new_index); - dict_mem_index_free(index); - return(DB_TOO_BIG_RECORD); - } else if (current_thd != NULL) { - /* Avoid the warning to be printed - during recovery. */ - ib_warn_row_too_big(table); - } - } - - if (dict_index_is_univ(index)) { - n_ord = new_index->n_fields; - } else { - n_ord = new_index->n_uniq; - } - -#if 1 /* The following code predetermines whether to call - dict_index_too_big_for_undo(). This function is not - accurate. See innodb_4k.test, Bug 13336585, for a - testcase that shows an index that can be created but - cannot be updated. */ - - switch (dict_table_get_format(table)) { - case UNIV_FORMAT_A: - /* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store - prefixes of externally stored columns locally within - the record. There are no special considerations for - the undo log record size. */ - goto undo_size_ok; - - case UNIV_FORMAT_B: - /* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED, - column prefix indexes require that prefixes of - externally stored columns are written to the undo log. - This may make the undo log record bigger than the - record on the B-tree page. The maximum size of an - undo log record is the page size. That must be - checked for below. */ - break; - -#if UNIV_FORMAT_B != UNIV_FORMAT_MAX -# error "UNIV_FORMAT_B != UNIV_FORMAT_MAX" -#endif - } - - for (i = 0; i < n_ord; i++) { - const dict_field_t* field - = dict_index_get_nth_field(new_index, i); - const dict_col_t* col - = dict_field_get_col(field); - - /* In dtuple_convert_big_rec(), variable-length columns - that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2 - may be chosen for external storage. If the column appears - in an ordering column of an index, a longer prefix determined - by dict_max_field_len_store_undo() will be copied to the undo - log by trx_undo_page_report_modify() and - trx_undo_page_fetch_ext(). It suffices to check the - capacity of the undo log whenever new_index includes - a column prefix on a column that may be stored externally. */ - - if (field->prefix_len /* prefix index */ - && (!col->ord_part /* not yet ordering column */ - || field->prefix_len > col->max_prefix) - && !dict_col_get_fixed_size(col, TRUE) /* variable-length */ - && dict_col_get_max_size(col) - > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) { - - if (dict_index_too_big_for_undo(table, new_index)) { - /* An undo log record might not fit in - a single page. Refuse to create this index. */ - - goto too_big; - } - - break; - } - } - -undo_size_ok: -#endif - /* Flag the ordering columns and also set column max_prefix */ - - for (i = 0; i < n_ord; i++) { - const dict_field_t* field - = dict_index_get_nth_field(new_index, i); - - field->col->ord_part = 1; - - if (field->prefix_len > field->col->max_prefix) { - field->col->max_prefix = field->prefix_len; - } - } - - if (!dict_index_is_univ(new_index)) { - - new_index->stat_n_diff_key_vals = - static_cast<ib_uint64_t*>(mem_heap_zalloc( - new_index->heap, - dict_index_get_n_unique(new_index) - * sizeof(*new_index->stat_n_diff_key_vals))); - - new_index->stat_n_sample_sizes = - static_cast<ib_uint64_t*>(mem_heap_zalloc( - new_index->heap, - dict_index_get_n_unique(new_index) - * sizeof(*new_index->stat_n_sample_sizes))); - - new_index->stat_n_non_null_key_vals = - static_cast<ib_uint64_t*>(mem_heap_zalloc( - new_index->heap, - dict_index_get_n_unique(new_index) - * sizeof(*new_index->stat_n_non_null_key_vals))); - } - - new_index->stat_index_size = 1; - new_index->stat_n_leaf_pages = 1; - - new_index->stat_defrag_n_pages_freed = 0; - new_index->stat_defrag_n_page_split = 0; - - new_index->stat_defrag_sample_next_slot = 0; - memset(&new_index->stat_defrag_data_size_sample, - 0x0, sizeof(ulint) * STAT_DEFRAG_DATA_SIZE_N_SAMPLE); - - /* Add the new index as the last index for the table */ - - UT_LIST_ADD_LAST(indexes, table->indexes, new_index); - new_index->table = table; - new_index->table_name = table->name; - new_index->search_info = btr_search_info_create(new_index->heap); - - new_index->page = page_no; - rw_lock_create(index_tree_rw_lock_key, &new_index->lock, - dict_index_is_ibuf(index) - ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE); - - dict_sys->size += mem_heap_get_size(new_index->heap); - - dict_mem_index_free(index); - - return(DB_SUCCESS); -} - -/**********************************************************************//** -Removes an index from the dictionary cache. */ -static -void -dict_index_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in/out: table */ - dict_index_t* index, /*!< in, own: index */ - ibool lru_evict) /*!< in: TRUE if index being evicted - to make room in the table LRU list */ -{ - ulint size; - ulint retries = 0; - btr_search_t* info; - - ut_ad(table && index); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* No need to acquire the dict_index_t::lock here because - there can't be any active operations on this index (or table). */ - - if (index->online_log) { - ut_ad(index->online_status == ONLINE_INDEX_CREATION); - row_log_free(index->online_log); - } - - /* We always create search info whether or not adaptive - hash index is enabled or not. */ - info = btr_search_get_info(index); - ut_ad(info); - - /* We are not allowed to free the in-memory index struct - dict_index_t until all entries in the adaptive hash index - that point to any of the page belonging to his b-tree index - are dropped. This is so because dropping of these entries - require access to dict_index_t struct. To avoid such scenario - We keep a count of number of such pages in the search_info and - only free the dict_index_t struct when this count drops to - zero. See also: dict_table_can_be_evicted() */ - - do { - ulint ref_count = btr_search_info_get_ref_count(info, - index); - - if (ref_count == 0) { - break; - } - - /* Sleep for 10ms before trying again. */ - os_thread_sleep(10000); - ++retries; - - if (retries % 500 == 0) { - /* No luck after 5 seconds of wait. */ - fprintf(stderr, "InnoDB: Error: Waited for" - " %lu secs for hash index" - " ref_count (%lu) to drop" - " to 0.\n" - "index: \"%s\"" - " table: \"%s\"\n", - retries/100, - ref_count, - index->name, - table->name); - } - - /* To avoid a hang here we commit suicide if the - ref_count doesn't drop to zero in 600 seconds. */ - if (retries >= 60000) { - ut_error; - } - } while (srv_shutdown_state == SRV_SHUTDOWN_NONE || !lru_evict); - - rw_lock_free(&index->lock); - - /* Remove the index from the list of indexes of the table */ - UT_LIST_REMOVE(indexes, table->indexes, index); - - size = mem_heap_get_size(index->heap); - - ut_ad(dict_sys->size >= size); - - dict_sys->size -= size; - - dict_mem_index_free(index); -} - -/**********************************************************************//** -Removes an index from the dictionary cache. */ -UNIV_INTERN -void -dict_index_remove_from_cache( -/*=========================*/ - dict_table_t* table, /*!< in/out: table */ - dict_index_t* index) /*!< in, own: index */ -{ - dict_index_remove_from_cache_low(table, index, FALSE); -} - -/*******************************************************************//** -Tries to find column names for the index and sets the col field of the -index. -@return TRUE if the column names were found */ -static -ibool -dict_index_find_cols( -/*=================*/ - dict_table_t* table, /*!< in: table */ - dict_index_t* index) /*!< in: index */ -{ - ulint i; - - ut_ad(table && index); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(mutex_own(&(dict_sys->mutex))); - - for (i = 0; i < index->n_fields; i++) { - ulint j; - dict_field_t* field = dict_index_get_nth_field(index, i); - - for (j = 0; j < table->n_cols; j++) { - if (!innobase_strcasecmp(dict_table_get_col_name(table, j), - field->name)) { - field->col = dict_table_get_nth_col(table, j); - - goto found; - } - } - -#ifdef UNIV_DEBUG - /* It is an error not to find a matching column. */ - fputs("InnoDB: Error: no matching column for ", stderr); - ut_print_name(stderr, NULL, FALSE, field->name); - fputs(" in ", stderr); - dict_index_name_print(stderr, NULL, index); - fputs("!\n", stderr); -#endif /* UNIV_DEBUG */ - return(FALSE); - -found: - ; - } - - return(TRUE); -} -#endif /* !UNIV_HOTBACKUP */ - -/*******************************************************************//** -Adds a column to index. */ -UNIV_INTERN -void -dict_index_add_col( -/*===============*/ - dict_index_t* index, /*!< in/out: index */ - const dict_table_t* table, /*!< in: table */ - dict_col_t* col, /*!< in: column */ - ulint prefix_len) /*!< in: column prefix length */ -{ - dict_field_t* field; - const char* col_name; - - col_name = dict_table_get_col_name(table, dict_col_get_no(col)); - - dict_mem_index_add_field(index, col_name, prefix_len); - - field = dict_index_get_nth_field(index, index->n_def - 1); - - field->col = col; - field->fixed_len = (unsigned int) dict_col_get_fixed_size( - col, dict_table_is_comp(table)); - - if (prefix_len && field->fixed_len > prefix_len) { - field->fixed_len = (unsigned int) prefix_len; - } - - /* Long fixed-length fields that need external storage are treated as - variable-length fields, so that the extern flag can be embedded in - the length word. */ - - if (field->fixed_len > DICT_MAX_FIXED_COL_LEN) { - field->fixed_len = 0; - } -#if DICT_MAX_FIXED_COL_LEN != 768 - /* The comparison limit above must be constant. If it were - changed, the disk format of some fixed-length columns would - change, which would be a disaster. */ -# error "DICT_MAX_FIXED_COL_LEN != 768" -#endif - - if (!(col->prtype & DATA_NOT_NULL)) { - index->n_nullable++; - } -} - -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Copies fields contained in index2 to index1. */ -static -void -dict_index_copy( -/*============*/ - dict_index_t* index1, /*!< in: index to copy to */ - dict_index_t* index2, /*!< in: index to copy from */ - const dict_table_t* table, /*!< in: table */ - ulint start, /*!< in: first position to copy */ - ulint end) /*!< in: last position to copy */ -{ - dict_field_t* field; - ulint i; - - /* Copy fields contained in index2 */ - - for (i = start; i < end; i++) { - - field = dict_index_get_nth_field(index2, i); - dict_index_add_col(index1, table, field->col, - field->prefix_len); - } -} - -/*******************************************************************//** -Copies types of fields contained in index to tuple. */ -UNIV_INTERN -void -dict_index_copy_types( -/*==================*/ - dtuple_t* tuple, /*!< in/out: data tuple */ - const dict_index_t* index, /*!< in: index */ - ulint n_fields) /*!< in: number of - field types to copy */ -{ - ulint i; - - if (dict_index_is_univ(index)) { - dtuple_set_types_binary(tuple, n_fields); - - return; - } - - for (i = 0; i < n_fields; i++) { - const dict_field_t* ifield; - dtype_t* dfield_type; - - ifield = dict_index_get_nth_field(index, i); - dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i)); - dict_col_copy_type(dict_field_get_col(ifield), dfield_type); - } -} - -/*******************************************************************//** -Copies types of columns contained in table to tuple and sets all -fields of the tuple to the SQL NULL value. This function should -be called right after dtuple_create(). */ -UNIV_INTERN -void -dict_table_copy_types( -/*==================*/ - dtuple_t* tuple, /*!< in/out: data tuple */ - const dict_table_t* table) /*!< in: table */ -{ - ulint i; - - for (i = 0; i < dtuple_get_n_fields(tuple); i++) { - - dfield_t* dfield = dtuple_get_nth_field(tuple, i); - dtype_t* dtype = dfield_get_type(dfield); - - dfield_set_null(dfield); - dict_col_copy_type(dict_table_get_nth_col(table, i), dtype); - } -} - -/******************************************************************** -Wait until all the background threads of the given table have exited, i.e., -bg_threads == 0. Note: bg_threads_mutex must be reserved when -calling this. */ -UNIV_INTERN -void -dict_table_wait_for_bg_threads_to_exit( -/*===================================*/ - dict_table_t* table, /*< in: table */ - ulint delay) /*< in: time in microseconds to wait between - checks of bg_threads. */ -{ - fts_t* fts = table->fts; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(mutex_own(&fts->bg_threads_mutex)); -#endif /* UNIV_SYNC_DEBUG */ - - while (fts->bg_threads > 0) { - mutex_exit(&fts->bg_threads_mutex); - - os_thread_sleep(delay); - - mutex_enter(&fts->bg_threads_mutex); - } -} - -/*******************************************************************//** -Builds the internal dictionary cache representation for a clustered -index, containing also system fields not defined by the user. -@return own: the internal representation of the clustered index */ -static -dict_index_t* -dict_index_build_internal_clust( -/*============================*/ - const dict_table_t* table, /*!< in: table */ - dict_index_t* index) /*!< in: user representation of - a clustered index */ -{ - dict_index_t* new_index; - dict_field_t* field; - ulint trx_id_pos; - ulint i; - ibool* indexed; - - ut_ad(table && index); - ut_ad(dict_index_is_clust(index)); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - /* Create a new index object with certainly enough fields */ - new_index = dict_mem_index_create(table->name, - index->name, table->space, - index->type, - index->n_fields + table->n_cols); - - /* Copy other relevant data from the old index struct to the new - struct: it inherits the values */ - - new_index->n_user_defined_cols = index->n_fields; - - new_index->id = index->id; - btr_search_index_init(new_index); - - /* Copy the fields of index */ - dict_index_copy(new_index, index, table, 0, index->n_fields); - - if (dict_index_is_univ(index)) { - /* No fixed number of fields determines an entry uniquely */ - - new_index->n_uniq = REC_MAX_N_FIELDS; - - } else if (dict_index_is_unique(index)) { - /* Only the fields defined so far are needed to identify - the index entry uniquely */ - - new_index->n_uniq = new_index->n_def; - } else { - /* Also the row id is needed to identify the entry */ - new_index->n_uniq = 1 + new_index->n_def; - } - - new_index->trx_id_offset = 0; - - if (!dict_index_is_ibuf(index)) { - /* Add system columns, trx id first */ - - trx_id_pos = new_index->n_def; - -#if DATA_ROW_ID != 0 -# error "DATA_ROW_ID != 0" -#endif -#if DATA_TRX_ID != 1 -# error "DATA_TRX_ID != 1" -#endif -#if DATA_ROLL_PTR != 2 -# error "DATA_ROLL_PTR != 2" -#endif - - if (!dict_index_is_unique(index)) { - dict_index_add_col(new_index, table, - dict_table_get_sys_col( - table, DATA_ROW_ID), - 0); - trx_id_pos++; - } - - dict_index_add_col(new_index, table, - dict_table_get_sys_col(table, DATA_TRX_ID), - 0); - - dict_index_add_col(new_index, table, - dict_table_get_sys_col(table, - DATA_ROLL_PTR), - 0); - - for (i = 0; i < trx_id_pos; i++) { - - ulint fixed_size = dict_col_get_fixed_size( - dict_index_get_nth_col(new_index, i), - dict_table_is_comp(table)); - - if (fixed_size == 0) { - new_index->trx_id_offset = 0; - - break; - } - - if (dict_index_get_nth_field(new_index, i)->prefix_len - > 0) { - new_index->trx_id_offset = 0; - - break; - } - - /* Add fixed_size to new_index->trx_id_offset. - Because the latter is a bit-field, an overflow - can theoretically occur. Check for it. */ - fixed_size += new_index->trx_id_offset; - - new_index->trx_id_offset = fixed_size; - - if (new_index->trx_id_offset != fixed_size) { - /* Overflow. Pretend that this is a - variable-length PRIMARY KEY. */ - ut_ad(0); - new_index->trx_id_offset = 0; - break; - } - } - - } - - /* Remember the table columns already contained in new_index */ - indexed = static_cast<ibool*>( - mem_zalloc(table->n_cols * sizeof *indexed)); - - /* Mark the table columns already contained in new_index */ - for (i = 0; i < new_index->n_def; i++) { - - field = dict_index_get_nth_field(new_index, i); - - /* If there is only a prefix of the column in the index - field, do not mark the column as contained in the index */ - - if (field->prefix_len == 0) { - - indexed[field->col->ind] = TRUE; - } - } - - /* Add to new_index non-system columns of table not yet included - there */ - for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) { - - dict_col_t* col = dict_table_get_nth_col(table, i); - ut_ad(col->mtype != DATA_SYS); - - if (!indexed[col->ind]) { - dict_index_add_col(new_index, table, col, 0); - } - } - - mem_free(indexed); - - ut_ad(dict_index_is_ibuf(index) - || (UT_LIST_GET_LEN(table->indexes) == 0)); - - new_index->cached = TRUE; - - return(new_index); -} - -/*******************************************************************//** -Builds the internal dictionary cache representation for a non-clustered -index, containing also system fields not defined by the user. -@return own: the internal representation of the non-clustered index */ -static -dict_index_t* -dict_index_build_internal_non_clust( -/*================================*/ - const dict_table_t* table, /*!< in: table */ - dict_index_t* index) /*!< in: user representation of - a non-clustered index */ -{ - dict_field_t* field; - dict_index_t* new_index; - dict_index_t* clust_index; - ulint i; - ibool* indexed; - - ut_ad(table && index); - ut_ad(!dict_index_is_clust(index)); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - /* The clustered index should be the first in the list of indexes */ - clust_index = UT_LIST_GET_FIRST(table->indexes); - - ut_ad(clust_index); - ut_ad(dict_index_is_clust(clust_index)); - ut_ad(!dict_index_is_univ(clust_index)); - - /* Create a new index */ - new_index = dict_mem_index_create( - table->name, index->name, index->space, index->type, - index->n_fields + 1 + clust_index->n_uniq); - - /* Copy other relevant data from the old index - struct to the new struct: it inherits the values */ - - new_index->n_user_defined_cols = index->n_fields; - - new_index->id = index->id; - btr_search_index_init(new_index); - - /* Copy fields from index to new_index */ - dict_index_copy(new_index, index, table, 0, index->n_fields); - - /* Remember the table columns already contained in new_index */ - indexed = static_cast<ibool*>( - mem_zalloc(table->n_cols * sizeof *indexed)); - - /* Mark the table columns already contained in new_index */ - for (i = 0; i < new_index->n_def; i++) { - - field = dict_index_get_nth_field(new_index, i); - - /* If there is only a prefix of the column in the index - field, do not mark the column as contained in the index */ - - if (field->prefix_len == 0) { - - indexed[field->col->ind] = TRUE; - } - } - - /* Add to new_index the columns necessary to determine the clustered - index entry uniquely */ - - for (i = 0; i < clust_index->n_uniq; i++) { - - field = dict_index_get_nth_field(clust_index, i); - - if (!indexed[field->col->ind]) { - dict_index_add_col(new_index, table, field->col, - field->prefix_len); - } - } - - mem_free(indexed); - - if (dict_index_is_unique(index)) { - new_index->n_uniq = index->n_fields; - } else { - new_index->n_uniq = new_index->n_def; - } - - /* Set the n_fields value in new_index to the actual defined - number of fields */ - - new_index->n_fields = new_index->n_def; - - new_index->cached = TRUE; - - return(new_index); -} - -/*********************************************************************** -Builds the internal dictionary cache representation for an FTS index. -@return own: the internal representation of the FTS index */ -static -dict_index_t* -dict_index_build_internal_fts( -/*==========================*/ - dict_table_t* table, /*!< in: table */ - dict_index_t* index) /*!< in: user representation of an FTS index */ -{ - dict_index_t* new_index; - - ut_ad(table && index); - ut_ad(index->type == DICT_FTS); -#ifdef UNIV_SYNC_DEBUG - ut_ad(mutex_own(&(dict_sys->mutex))); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - /* Create a new index */ - new_index = dict_mem_index_create( - table->name, index->name, index->space, index->type, - index->n_fields); - - /* Copy other relevant data from the old index struct to the new - struct: it inherits the values */ - - new_index->n_user_defined_cols = index->n_fields; - - new_index->id = index->id; - btr_search_index_init(new_index); - - /* Copy fields from index to new_index */ - dict_index_copy(new_index, index, table, 0, index->n_fields); - - new_index->n_uniq = 0; - new_index->cached = TRUE; - - if (table->fts->cache == NULL) { - table->fts->cache = fts_cache_create(table); - } - - rw_lock_x_lock(&table->fts->cache->init_lock); - /* Notify the FTS cache about this index. */ - fts_cache_index_cache_create(table, new_index); - rw_lock_x_unlock(&table->fts->cache->init_lock); - - return(new_index); -} -/*====================== FOREIGN KEY PROCESSING ========================*/ - -#define DB_FOREIGN_KEY_IS_PREFIX_INDEX 200 -#define DB_FOREIGN_KEY_COL_NOT_NULL 201 -#define DB_FOREIGN_KEY_COLS_NOT_EQUAL 202 -#define DB_FOREIGN_KEY_INDEX_NOT_FOUND 203 - -/*********************************************************************//** -Checks if a table is referenced by foreign keys. -@return TRUE if table is referenced by a foreign key */ -UNIV_INTERN -ibool -dict_table_is_referenced_by_foreign_key( -/*====================================*/ - const dict_table_t* table) /*!< in: InnoDB table */ -{ - return(!table->referenced_set.empty()); -} - -/**********************************************************************//** -Removes a foreign constraint struct from the dictionary cache. */ -UNIV_INTERN -void -dict_foreign_remove_from_cache( -/*===========================*/ - dict_foreign_t* foreign) /*!< in, own: foreign constraint */ -{ - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_a(foreign); - - if (foreign->referenced_table != NULL) { - foreign->referenced_table->referenced_set.erase(foreign); - } - - if (foreign->foreign_table != NULL) { - foreign->foreign_table->foreign_set.erase(foreign); - } - - dict_foreign_free(foreign); -} - -/**********************************************************************//** -Looks for the foreign constraint from the foreign and referenced lists -of a table. -@return foreign constraint */ -static -dict_foreign_t* -dict_foreign_find( -/*==============*/ - dict_table_t* table, /*!< in: table object */ - dict_foreign_t* foreign) /*!< in: foreign constraint */ -{ - ut_ad(mutex_own(&(dict_sys->mutex))); - - ut_ad(dict_foreign_set_validate(table->foreign_set)); - ut_ad(dict_foreign_set_validate(table->referenced_set)); - - dict_foreign_set::iterator it = table->foreign_set.find(foreign); - - if (it != table->foreign_set.end()) { - return(*it); - } - - it = table->referenced_set.find(foreign); - - if (it != table->referenced_set.end()) { - return(*it); - } - - return(NULL); -} - - -/*********************************************************************//** -Tries to find an index whose first fields are the columns in the array, -in the same order and is not marked for deletion and is not the same -as types_idx. -@return matching index, NULL if not found */ -UNIV_INTERN -dict_index_t* -dict_foreign_find_index( -/*====================*/ - const dict_table_t* table, /*!< in: table */ - const char** col_names, - /*!< in: column names, or NULL - to use table->col_names */ - const char** columns,/*!< in: array of column names */ - ulint n_cols, /*!< in: number of columns */ - const dict_index_t* types_idx, - /*!< in: NULL or an index - whose types the column types - must match */ - bool check_charsets, - /*!< in: whether to check - charsets. only has an effect - if types_idx != NULL */ - ulint check_null, - /*!< in: nonzero if none of - the columns must be declared - NOT NULL */ - ulint* error, /*!< out: error code */ - ulint* err_col_no, - /*!< out: column number where - error happened */ - dict_index_t** err_index) - /*!< out: index where error - happened */ -{ - dict_index_t* index; - - ut_ad(mutex_own(&dict_sys->mutex)); - - if (error) { - *error = DB_FOREIGN_KEY_INDEX_NOT_FOUND; - } - - index = dict_table_get_first_index(table); - - while (index != NULL) { - if (types_idx != index - && !(index->type & DICT_FTS) - && !index->to_be_dropped - && dict_foreign_qualify_index( - table, col_names, columns, n_cols, - index, types_idx, - check_charsets, check_null, - error, err_col_no,err_index)) { - if (error) { - *error = DB_SUCCESS; - } - - return(index); - } - - index = dict_table_get_next_index(index); - } - - return(NULL); -} -#ifdef WITH_WSREP -dict_index_t* -wsrep_dict_foreign_find_index( -/*====================*/ - dict_table_t* table, /*!< in: table */ - const char** col_names, /*!< in: column names, or NULL - to use table->col_names */ - const char** columns,/*!< in: array of column names */ - ulint n_cols, /*!< in: number of columns */ - dict_index_t* types_idx, /*!< in: NULL or an index to whose types the - column types must match */ - ibool check_charsets, - /*!< in: whether to check charsets. - only has an effect if types_idx != NULL */ - ulint check_null) - /*!< in: nonzero if none of the columns must - be declared NOT NULL */ -{ - return dict_foreign_find_index( - table, col_names, columns, n_cols, types_idx, check_charsets, - check_null, NULL, NULL, NULL); -} -#endif /* WITH_WSREP */ -/**********************************************************************//** -Report an error in a foreign key definition. */ -static -void -dict_foreign_error_report_low( -/*==========================*/ - FILE* file, /*!< in: output stream */ - const char* name) /*!< in: table name */ -{ - rewind(file); - ut_print_timestamp(file); - fprintf(file, " Error in foreign key constraint of table %s:\n", - name); -} - -/**********************************************************************//** -Report an error in a foreign key definition. */ -static -void -dict_foreign_error_report( -/*======================*/ - FILE* file, /*!< in: output stream */ - dict_foreign_t* fk, /*!< in: foreign key constraint */ - const char* msg) /*!< in: the error message */ -{ - std::string fk_str; - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(file, fk->foreign_table_name); - fputs(msg, file); - fputs(" Constraint:\n", file); - fk_str = dict_print_info_on_foreign_key_in_create_format(NULL, fk, TRUE); - fputs(fk_str.c_str(), file); - putc('\n', file); - if (fk->foreign_index) { - fputs("The index in the foreign key in table is ", file); - ut_print_name(file, NULL, FALSE, fk->foreign_index->name); - fputs("\n" - "See " REFMAN "innodb-foreign-key-constraints.html\n" - "for correct foreign key definition.\n", - file); - } - mutex_exit(&dict_foreign_err_mutex); -} - -/**********************************************************************//** -Adds a foreign key constraint object to the dictionary cache. May free -the object if there already is an object with the same identifier in. -At least one of the foreign table and the referenced table must already -be in the dictionary cache! -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_foreign_add_to_cache( -/*======================*/ - dict_foreign_t* foreign, - /*!< in, own: foreign key constraint */ - const char** col_names, - /*!< in: column names, or NULL to use - foreign->foreign_table->col_names */ - bool check_charsets, - /*!< in: whether to check charset - compatibility */ - dict_err_ignore_t ignore_err) - /*!< in: error to be ignored */ -{ - dict_table_t* for_table; - dict_table_t* ref_table; - dict_foreign_t* for_in_cache = NULL; - dict_index_t* index; - ibool added_to_referenced_list= FALSE; - FILE* ef = dict_foreign_err_file; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - for_table = dict_table_check_if_in_cache_low( - foreign->foreign_table_name_lookup); - - ref_table = dict_table_check_if_in_cache_low( - foreign->referenced_table_name_lookup); - ut_a(for_table || ref_table); - - if (for_table) { - for_in_cache = dict_foreign_find(for_table, foreign); - } - - if (!for_in_cache && ref_table) { - for_in_cache = dict_foreign_find(ref_table, foreign); - } - - if (for_in_cache) { - dict_foreign_free(foreign); - } else { - for_in_cache = foreign; - } - - if (ref_table && !for_in_cache->referenced_table) { - index = dict_foreign_find_index( - ref_table, NULL, - for_in_cache->referenced_col_names, - for_in_cache->n_fields, for_in_cache->foreign_index, - check_charsets, false, NULL, NULL, NULL); - - if (index == NULL - && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) { - dict_foreign_error_report( - ef, for_in_cache, - "there is no index in referenced table" - " which would contain\n" - "the columns as the first columns," - " or the data types in the\n" - "referenced table do not match" - " the ones in table."); - - if (for_in_cache == foreign) { - dict_foreign_free(foreign); - } - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - for_in_cache->referenced_table = ref_table; - for_in_cache->referenced_index = index; - - std::pair<dict_foreign_set::iterator, bool> ret - = ref_table->referenced_set.insert(for_in_cache); - - ut_a(ret.second); /* second is true if the insertion - took place */ - added_to_referenced_list = TRUE; - } - - if (for_table && !for_in_cache->foreign_table) { - ulint index_error; - ulint err_col; - dict_index_t *err_index=NULL; - - index = dict_foreign_find_index( - for_table, col_names, - for_in_cache->foreign_col_names, - for_in_cache->n_fields, - for_in_cache->referenced_index, check_charsets, - for_in_cache->type - & (DICT_FOREIGN_ON_DELETE_SET_NULL - | DICT_FOREIGN_ON_UPDATE_SET_NULL), - &index_error, &err_col, &err_index); - - if (index == NULL - && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) { - dict_foreign_error_report( - ef, for_in_cache, - "there is no index in the table" - " which would contain\n" - "the columns as the first columns," - " or the data types in the\n" - "table do not match" - " the ones in the referenced table\n" - "or one of the ON ... SET NULL columns" - " is declared NOT NULL."); - - if (for_in_cache == foreign) { - if (added_to_referenced_list) { - const dict_foreign_set::size_type n - = ref_table->referenced_set - .erase(for_in_cache); - - ut_a(n == 1); /* the number of - elements removed must - be one */ - } - - dict_foreign_free(foreign); - } - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - for_in_cache->foreign_table = for_table; - for_in_cache->foreign_index = index; - std::pair<dict_foreign_set::iterator, bool> ret - = for_table->foreign_set.insert(for_in_cache); - - ut_a(ret.second); /* second is true if the insertion - took place */ - } - - /* We need to move the table to the non-LRU end of the table LRU - list. Otherwise it will be evicted from the cache. */ - - if (ref_table != NULL && ref_table->can_be_evicted) { - dict_table_move_from_lru_to_non_lru(ref_table); - } - - if (for_table != NULL && for_table->can_be_evicted) { - dict_table_move_from_lru_to_non_lru(for_table); - } - - ut_ad(dict_lru_validate()); - - return(DB_SUCCESS); -} - -/*********************************************************************//** -Scans from pointer onwards. Stops if is at the start of a copy of -'string' where characters are compared without case sensitivity, and -only outside `` or "" quotes. Stops also at NUL. -@return scanned up to this */ -static -const char* -dict_scan_to( -/*=========*/ - const char* ptr, /*!< in: scan from */ - const char* string) /*!< in: look for this */ -{ - char quote = '\0'; - bool escape = false; - - for (; *ptr; ptr++) { - if (*ptr == quote) { - /* Closing quote character: do not look for - starting quote or the keyword. */ - - /* If the quote character is escaped by a - backslash, ignore it. */ - if (escape) { - escape = false; - } else { - quote = '\0'; - } - } else if (quote) { - /* Within quotes: do nothing. */ - if (escape) { - escape = false; - } else if (*ptr == '\\') { - escape = true; - } - } else if (*ptr == '`' || *ptr == '"' || *ptr == '\'') { - /* Starting quote: remember the quote character. */ - quote = *ptr; - } else { - /* Outside quotes: look for the keyword. */ - ulint i; - for (i = 0; string[i]; i++) { - if (toupper((int)(unsigned char)(ptr[i])) - != toupper((int)(unsigned char) - (string[i]))) { - goto nomatch; - } - } - break; -nomatch: - ; - } - } - - return(ptr); -} - -/*********************************************************************//** -Accepts a specified string. Comparisons are case-insensitive. -@return if string was accepted, the pointer is moved after that, else -ptr is returned */ -static -const char* -dict_accept( -/*========*/ - struct charset_info_st* cs,/*!< in: the character set of ptr */ - const char* ptr, /*!< in: scan from this */ - const char* string, /*!< in: accept only this string as the next - non-whitespace string */ - ibool* success)/*!< out: TRUE if accepted */ -{ - const char* old_ptr = ptr; - const char* old_ptr2; - - *success = FALSE; - - while (my_isspace(cs, *ptr)) { - ptr++; - } - - old_ptr2 = ptr; - - ptr = dict_scan_to(ptr, string); - - if (*ptr == '\0' || old_ptr2 != ptr) { - return(old_ptr); - } - - *success = TRUE; - - return(ptr + ut_strlen(string)); -} - -/*********************************************************************//** -Scans an id. For the lexical definition of an 'id', see the code below. -Strips backquotes or double quotes from around the id. -@return scanned to */ -static -const char* -dict_scan_id( -/*=========*/ - struct charset_info_st* cs,/*!< in: the character set of ptr */ - const char* ptr, /*!< in: scanned to */ - mem_heap_t* heap, /*!< in: heap where to allocate the id - (NULL=id will not be allocated, but it - will point to string near ptr) */ - const char** id, /*!< out,own: the id; NULL if no id was - scannable */ - ibool table_id,/*!< in: TRUE=convert the allocated id - as a table name; FALSE=convert to UTF-8 */ - ibool accept_also_dot) - /*!< in: TRUE if also a dot can appear in a - non-quoted id; in a quoted id it can appear - always */ -{ - char quote = '\0'; - ulint len = 0; - const char* s; - char* str; - char* dst; - - *id = NULL; - - while (my_isspace(cs, *ptr)) { - ptr++; - } - - if (*ptr == '\0') { - - return(ptr); - } - - if (*ptr == '`' || *ptr == '"') { - quote = *ptr++; - } - - s = ptr; - - if (quote) { - for (;;) { - if (!*ptr) { - /* Syntax error */ - return(ptr); - } - if (*ptr == quote) { - ptr++; - if (*ptr != quote) { - break; - } - } - ptr++; - len++; - } - } else { - while (!my_isspace(cs, *ptr) && *ptr != '(' && *ptr != ')' - && (accept_also_dot || *ptr != '.') - && *ptr != ',' && *ptr != '\0') { - - ptr++; - } - - len = ptr - s; - } - - if (UNIV_UNLIKELY(!heap)) { - /* no heap given: id will point to source string */ - *id = s; - return(ptr); - } - - if (quote) { - char* d; - - str = d = static_cast<char*>( - mem_heap_alloc(heap, len + 1)); - - while (len--) { - if ((*d++ = *s++) == quote) { - s++; - } - } - *d++ = 0; - len = d - str; - ut_ad(*s == quote); - ut_ad(s + 1 == ptr); - } else { - str = mem_heap_strdupl(heap, s, len); - } - - if (!table_id) { -convert_id: - /* Convert the identifier from connection character set - to UTF-8. */ - len = 3 * len + 1; - *id = dst = static_cast<char*>(mem_heap_alloc(heap, len)); - - innobase_convert_from_id(cs, dst, str, len); - } else if (!strncmp(str, srv_mysql50_table_name_prefix, - sizeof(srv_mysql50_table_name_prefix) - 1)) { - /* This is a pre-5.1 table name - containing chars other than [A-Za-z0-9]. - Discard the prefix and use raw UTF-8 encoding. */ - str += sizeof(srv_mysql50_table_name_prefix) - 1; - len -= sizeof(srv_mysql50_table_name_prefix) - 1; - goto convert_id; - } else { - /* Encode using filename-safe characters. */ - len = 5 * len + 1; - *id = dst = static_cast<char*>(mem_heap_alloc(heap, len)); - - innobase_convert_from_table_id(cs, dst, str, len); - } - - return(ptr); -} - -/*********************************************************************//** -Tries to scan a column name. -@return scanned to */ -static -const char* -dict_scan_col( -/*==========*/ - struct charset_info_st* cs, /*!< in: the character set of ptr */ - const char* ptr, /*!< in: scanned to */ - ibool* success,/*!< out: TRUE if success */ - dict_table_t* table, /*!< in: table in which the column is */ - const dict_col_t** column, /*!< out: pointer to column if success */ - mem_heap_t* heap, /*!< in: heap where to allocate */ - const char** name) /*!< out,own: the column name; - NULL if no name was scannable */ -{ - ulint i; - - *success = FALSE; - - ptr = dict_scan_id(cs, ptr, heap, name, FALSE, TRUE); - - if (*name == NULL) { - - return(ptr); /* Syntax error */ - } - - if (table == NULL) { - *success = TRUE; - *column = NULL; - } else { - for (i = 0; i < dict_table_get_n_cols(table); i++) { - - const char* col_name = dict_table_get_col_name( - table, i); - - if (0 == innobase_strcasecmp(col_name, *name)) { - /* Found */ - - *success = TRUE; - *column = dict_table_get_nth_col(table, i); - strcpy((char*) *name, col_name); - - break; - } - } - } - - return(ptr); -} - - -/*********************************************************************//** -Open a table from its database and table name, this is currently used by -foreign constraint parser to get the referenced table. -@return complete table name with database and table name, allocated from -heap memory passed in */ -UNIV_INTERN -char* -dict_get_referenced_table( -/*======================*/ - const char* name, /*!< in: foreign key table name */ - const char* database_name, /*!< in: table db name */ - ulint database_name_len, /*!< in: db name length */ - const char* table_name, /*!< in: table name */ - ulint table_name_len, /*!< in: table name length */ - dict_table_t** table, /*!< out: table object or NULL */ - mem_heap_t* heap) /*!< in/out: heap memory */ -{ - char* ref; - const char* db_name; - - if (!database_name) { - /* Use the database name of the foreign key table */ - - db_name = name; - database_name_len = dict_get_db_name_len(name); - } else { - db_name = database_name; - } - - /* Copy database_name, '/', table_name, '\0' */ - ref = static_cast<char*>( - mem_heap_alloc(heap, database_name_len + table_name_len + 2)); - - memcpy(ref, db_name, database_name_len); - ref[database_name_len] = '/'; - memcpy(ref + database_name_len + 1, table_name, table_name_len + 1); - - /* Values; 0 = Store and compare as given; case sensitive - 1 = Store and compare in lower; case insensitive - 2 = Store as given, compare in lower; case semi-sensitive */ - if (innobase_get_lower_case_table_names() == 2) { - innobase_casedn_str(ref); - *table = dict_table_get_low(ref); - memcpy(ref, db_name, database_name_len); - ref[database_name_len] = '/'; - memcpy(ref + database_name_len + 1, table_name, table_name_len + 1); - - } else { -#ifndef __WIN__ - if (innobase_get_lower_case_table_names() == 1) { - innobase_casedn_str(ref); - } -#else - innobase_casedn_str(ref); -#endif /* !__WIN__ */ - *table = dict_table_get_low(ref); - } - - return(ref); -} -/*********************************************************************//** -Scans a table name from an SQL string. -@return scanned to */ -static -const char* -dict_scan_table_name( -/*=================*/ - struct charset_info_st* cs,/*!< in: the character set of ptr */ - const char* ptr, /*!< in: scanned to */ - dict_table_t** table, /*!< out: table object or NULL */ - const char* name, /*!< in: foreign key table name */ - ibool* success,/*!< out: TRUE if ok name found */ - mem_heap_t* heap, /*!< in: heap where to allocate the id */ - const char** ref_name)/*!< out,own: the table name; - NULL if no name was scannable */ -{ - const char* database_name = NULL; - ulint database_name_len = 0; - const char* table_name = NULL; - const char* scan_name; - - *success = FALSE; - *table = NULL; - - ptr = dict_scan_id(cs, ptr, heap, &scan_name, TRUE, FALSE); - - if (scan_name == NULL) { - - return(ptr); /* Syntax error */ - } - - if (*ptr == '.') { - /* We scanned the database name; scan also the table name */ - - ptr++; - - database_name = scan_name; - database_name_len = strlen(database_name); - - ptr = dict_scan_id(cs, ptr, heap, &table_name, TRUE, FALSE); - - if (table_name == NULL) { - - return(ptr); /* Syntax error */ - } - } else { - /* To be able to read table dumps made with InnoDB-4.0.17 or - earlier, we must allow the dot separator between the database - name and the table name also to appear within a quoted - identifier! InnoDB used to print a constraint as: - ... REFERENCES `databasename.tablename` ... - starting from 4.0.18 it is - ... REFERENCES `databasename`.`tablename` ... */ - const char* s; - - for (s = scan_name; *s; s++) { - if (*s == '.') { - database_name = scan_name; - database_name_len = s - scan_name; - scan_name = ++s; - break;/* to do: multiple dots? */ - } - } - - table_name = scan_name; - } - - *ref_name = dict_get_referenced_table( - name, database_name, database_name_len, - table_name, strlen(table_name), table, heap); - - *success = TRUE; - return(ptr); -} - -/*********************************************************************//** -Skips one id. The id is allowed to contain also '.'. -@return scanned to */ -static -const char* -dict_skip_word( -/*===========*/ - struct charset_info_st* cs,/*!< in: the character set of ptr */ - const char* ptr, /*!< in: scanned to */ - ibool* success)/*!< out: TRUE if success, FALSE if just spaces - left in string or a syntax error */ -{ - const char* start; - - *success = FALSE; - - ptr = dict_scan_id(cs, ptr, NULL, &start, FALSE, TRUE); - - if (start) { - *success = TRUE; - } - - return(ptr); -} - -/*********************************************************************//** -Removes MySQL comments from an SQL string. A comment is either -(a) '#' to the end of the line, -(b) '--[space]' to the end of the line, or -(c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar -C comment syntax). -@return own: SQL string stripped from comments; the caller must free -this with mem_free()! */ -static -char* -dict_strip_comments( -/*================*/ - const char* sql_string, /*!< in: SQL string */ - size_t sql_length) /*!< in: length of sql_string */ -{ - char* str; - const char* sptr; - const char* eptr = sql_string + sql_length; - char* ptr; - /* unclosed quote character (0 if none) */ - char quote = 0; - bool escape = false; - - DBUG_ENTER("dict_strip_comments"); - - DBUG_PRINT("dict_strip_comments", ("%s", sql_string)); - - str = static_cast<char*>(mem_alloc(sql_length + 1)); - - sptr = sql_string; - ptr = str; - - for (;;) { -scan_more: - if (sptr >= eptr || *sptr == '\0') { -end_of_string: - *ptr = '\0'; - - ut_a(ptr <= str + sql_length); - - DBUG_PRINT("dict_strip_comments", ("%s", str)); - DBUG_RETURN(str); - } - - if (*sptr == quote) { - /* Closing quote character: do not look for - starting quote or comments. */ - - /* If the quote character is escaped by a - backslash, ignore it. */ - if (escape) { - escape = false; - } else { - quote = 0; - } - } else if (quote) { - /* Within quotes: do not look for - starting quotes or comments. */ - if (escape) { - escape = false; - } else if (*sptr == '\\') { - escape = true; - } - } else if (*sptr == '"' || *sptr == '`' || *sptr == '\'') { - /* Starting quote: remember the quote character. */ - quote = *sptr; - } else if (*sptr == '#' - || (sptr[0] == '-' && sptr[1] == '-' - && sptr[2] == ' ')) { - for (;;) { - if (++sptr >= eptr) { - goto end_of_string; - } - - /* In Unix a newline is 0x0A while in Windows - it is 0x0D followed by 0x0A */ - - switch (*sptr) { - case (char) 0X0A: - case (char) 0x0D: - case '\0': - goto scan_more; - } - } - } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { - sptr += 2; - for (;;) { - if (sptr >= eptr) { - goto end_of_string; - } - - switch (*sptr) { - case '\0': - goto scan_more; - case '*': - if (sptr[1] == '/') { - sptr += 2; - goto scan_more; - } - } - - sptr++; - } - } - - *ptr = *sptr; - - ptr++; - sptr++; - } -} - -/*********************************************************************//** -Finds the highest [number] for foreign key constraints of the table. Looks -only at the >= 4.0.18-format id's, which are of the form -databasename/tablename_ibfk_[number]. -@return highest number, 0 if table has no new format foreign key constraints */ -UNIV_INTERN -ulint -dict_table_get_highest_foreign_id( -/*==============================*/ - dict_table_t* table) /*!< in: table in the dictionary memory cache */ -{ - dict_foreign_t* foreign; - char* endp; - ulint biggest_id = 0; - ulint id; - ulint len; - - ut_a(table); - - len = ut_strlen(table->name); - - for (dict_foreign_set::iterator it = table->foreign_set.begin(); - it != table->foreign_set.end(); - ++it) { - char fkid[MAX_TABLE_NAME_LEN+20]; - foreign = *it; - - strcpy(fkid, foreign->id); - /* Convert foreign key identifier on dictionary memory - cache to filename charset. */ - innobase_convert_to_filename_charset( - strchr(fkid, '/') + 1, - strchr(foreign->id, '/') + 1, - MAX_TABLE_NAME_LEN); - - if (ut_strlen(fkid) > ((sizeof dict_ibfk) - 1) + len - && 0 == ut_memcmp(fkid, table->name, len) - && 0 == ut_memcmp(fkid + len, - dict_ibfk, (sizeof dict_ibfk) - 1) - && fkid[len + ((sizeof dict_ibfk) - 1)] != '0') { - /* It is of the >= 4.0.18 format */ - - id = strtoul(fkid + len - + ((sizeof dict_ibfk) - 1), - &endp, 10); - if (*endp == '\0') { - ut_a(id != biggest_id); - - if (id > biggest_id) { - biggest_id = id; - } - } - } - } - - return(biggest_id); -} - -/*********************************************************************//** -Reports a simple foreign key create clause syntax error. */ -static -void -dict_foreign_report_syntax_err( -/*===========================*/ - const char* fmt, /*!< in: syntax err msg */ - const char* oper, /*!< in: operation */ - const char* name, /*!< in: table name */ - const char* start_of_latest_foreign, - /*!< in: start of the foreign key clause - in the SQL string */ - const char* ptr) /*!< in: place of the syntax error */ -{ - ut_ad(!srv_read_only_mode); - - FILE* ef = dict_foreign_err_file; - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, fmt, oper, name, start_of_latest_foreign, ptr); - mutex_exit(&dict_foreign_err_mutex); -} - -/*********************************************************************//** -Push warning message to SQL-layer based on foreign key constraint -index match error. */ -static -void -dict_foreign_push_index_error( -/*==========================*/ - trx_t* trx, /*!< in: trx */ - const char* operation, /*!< in: operation create or alter - */ - const char* create_name, /*!< in: table name in create or - alter table */ - const char* latest_foreign, /*!< in: start of latest foreign key - constraint name */ - const char** columns, /*!< in: foreign key columns */ - ulint index_error, /*!< in: error code */ - ulint err_col, /*!< in: column where error happened - */ - dict_index_t* err_index, /*!< in: index where error happened - */ - dict_table_t* table, /*!< in: table */ - FILE* ef) /*!< in: output stream */ -{ - switch (index_error) { - case DB_FOREIGN_KEY_INDEX_NOT_FOUND: { - fprintf(ef, - "%s table '%s' with foreign key constraint" - " failed. There is no index in the referenced" - " table where the referenced columns appear" - " as the first columns near '%s'.\n", - operation, create_name, latest_foreign); - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table '%s' with foreign key constraint" - " failed. There is no index in the referenced" - " table where the referenced columns appear" - " as the first columns near '%s'.", - operation, create_name, latest_foreign); - break; - } - case DB_FOREIGN_KEY_IS_PREFIX_INDEX: { - fprintf(ef, - "%s table '%s' with foreign key constraint" - " failed. There is only prefix index in the referenced" - " table where the referenced columns appear" - " as the first columns near '%s'.\n", - operation, create_name, latest_foreign); - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table '%s' with foreign key constraint" - " failed. There is only prefix index in the referenced" - " table where the referenced columns appear" - " as the first columns near '%s'.", - operation, create_name, latest_foreign); - break; - } - case DB_FOREIGN_KEY_COL_NOT_NULL: { - fprintf(ef, - "%s table %s with foreign key constraint" - " failed. You have defined a SET NULL condition but " - "column '%s' on index is defined as NOT NULL near '%s'.\n", - operation, create_name, columns[err_col], latest_foreign); - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. You have defined a SET NULL condition but " - "column '%s' on index is defined as NOT NULL near '%s'.", - operation, create_name, columns[err_col], latest_foreign); - break; - } - case DB_FOREIGN_KEY_COLS_NOT_EQUAL: { - dict_field_t* field; - const char* col_name; - field = dict_index_get_nth_field(err_index, err_col); - - col_name = dict_table_get_col_name( - table, dict_col_get_no(field->col)); - fprintf(ef, - "%s table %s with foreign key constraint" - " failed. Field type or character set for column '%s' " - "does not mach referenced column '%s' near '%s'.\n", - operation, create_name, columns[err_col], col_name, latest_foreign); - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Field type or character set for column '%s' " - "does not mach referenced column '%s' near '%s'.", - operation, create_name, columns[err_col], col_name, latest_foreign); - break; - } - default: - ut_error; - } -} - -/*********************************************************************//** -Scans a table create SQL string and adds to the data dictionary the foreign -key constraints declared in the string. This function should be called after -the indexes for a table have been created. Each foreign key constraint must -be accompanied with indexes in both participating tables. The indexes are -allowed to contain more fields than mentioned in the constraint. -@return error code or DB_SUCCESS */ -static -dberr_t -dict_create_foreign_constraints_low( -/*================================*/ - trx_t* trx, /*!< in: transaction */ - mem_heap_t* heap, /*!< in: memory heap */ - struct charset_info_st* cs,/*!< in: the character set of sql_string */ - const char* sql_string, - /*!< in: CREATE TABLE or ALTER TABLE statement - where foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES table2(c, d), - table2 can be written also with the database - name before it: test.table2; the default - database is the database of parameter name */ - const char* name, /*!< in: table full name in the normalized form - database_name/table_name */ - ibool reject_fks) - /*!< in: if TRUE, fail with error code - DB_CANNOT_ADD_CONSTRAINT if any foreign - keys are found. */ -{ - dict_table_t* table = NULL; - dict_table_t* referenced_table = NULL; - dict_table_t* table_to_alter = NULL; - dict_table_t* table_to_create = NULL; - ulint highest_id_so_far = 0; - ulint number = 1; - dict_index_t* index = NULL; - dict_foreign_t* foreign = NULL; - const char* ptr = sql_string; - const char* start_of_latest_foreign = sql_string; - const char* start_of_latest_set = NULL; - FILE* ef = dict_foreign_err_file; - ulint index_error = DB_SUCCESS; - dict_index_t* err_index = NULL; - ulint err_col; - const char* constraint_name; - ibool success; - dberr_t error; - const char* ptr1; - const char* ptr2; - ulint i; - ulint j; - ibool is_on_delete; - ulint n_on_deletes; - ulint n_on_updates; - const dict_col_t*columns[500]; - const char* column_names[500]; - const char* ref_column_names[500]; - const char* referenced_table_name; - dict_foreign_set local_fk_set; - dict_foreign_set_free local_fk_set_free(local_fk_set); - const char* create_table_name; - const char* orig; - char create_name[MAX_TABLE_NAME_LEN + 1]; - char operation[8]; - - ut_ad(!srv_read_only_mode); - ut_ad(mutex_own(&(dict_sys->mutex))); - - table = dict_table_get_low(name); - /* First check if we are actually doing an ALTER TABLE, and in that - case look for the table being altered */ - orig = ptr; - ptr = dict_accept(cs, ptr, "ALTER", &success); - - strcpy((char *)operation, success ? "Alter " : "Create "); - - if (!success) { - orig = ptr; - ptr = dict_scan_to(ptr, "CREATE"); - ptr = dict_scan_to(ptr, "TABLE"); - ptr = dict_accept(cs, ptr, "TABLE", &success); - - if (success) { - ptr = dict_scan_table_name(cs, ptr, &table_to_create, name, - &success, heap, &create_table_name); - } - - if (success) { - char *bufend; - bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN, - create_table_name, strlen(create_table_name), - trx->mysql_thd, TRUE); - create_name[bufend-create_name]='\0'; - ptr = orig; - } else { - char *bufend; - ptr = orig; - bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN, - name, strlen(name), trx->mysql_thd, TRUE); - create_name[bufend-create_name]='\0'; - } - - goto loop; - } - - if (table == NULL) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, "%s table %s with foreign key constraint" - " failed. Table %s not found from data dictionary." - " Error close to %s.\n", - operation, create_name, create_name, start_of_latest_foreign); - mutex_exit(&dict_foreign_err_mutex); - ib_push_warning(trx, DB_ERROR, - "%s table %s with foreign key constraint" - " failed. Table %s not found from data dictionary." - " Error close to %s.", - operation, create_name, create_name, start_of_latest_foreign); - - return(DB_ERROR); - } - - /* If not alter table jump to loop */ - if (!success) { - - goto loop; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "TABLE", &success); - - if (!success) { - - goto loop; - } - - /* We are doing an ALTER TABLE: scan the table name we are altering */ - - orig = ptr; - ptr = dict_scan_table_name(cs, ptr, &table_to_alter, name, - &success, heap, &referenced_table_name); - - if (table_to_alter) { - char *bufend; - bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN, - table_to_alter->name, strlen(table_to_alter->name), - trx->mysql_thd, TRUE); - create_name[bufend-create_name]='\0'; - } else { - char *bufend; - bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN, - referenced_table_name, strlen(referenced_table_name), - trx->mysql_thd, TRUE); - create_name[bufend-create_name]='\0'; - - } - - if (!success) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, - "%s table %s with foreign key constraint" - " failed. Table %s not found from data dictionary." - " Error close to %s.\n", - operation, create_name, create_name, orig); - mutex_exit(&dict_foreign_err_mutex); - - ib_push_warning(trx, DB_ERROR, - "%s table %s with foreign key constraint" - " failed. Table %s not found from data dictionary." - " Error close to %s.", - operation, create_name, create_name, orig); - - return(DB_ERROR); - } - - /* Starting from 4.0.18 and 4.1.2, we generate foreign key id's in the - format databasename/tablename_ibfk_[number], where [number] is local - to the table; look for the highest [number] for table_to_alter, so - that we can assign to new constraints higher numbers. */ - - /* If we are altering a temporary table, the table name after ALTER - TABLE does not correspond to the internal table name, and - table_to_alter is NULL. TODO: should we fix this somehow? */ - - if (table_to_alter == NULL) { - highest_id_so_far = 0; - } else { - highest_id_so_far = dict_table_get_highest_foreign_id( - table_to_alter); - } - - number = highest_id_so_far + 1; - /* Scan for foreign key declarations in a loop */ -loop: - /* Scan either to "CONSTRAINT" or "FOREIGN", whichever is closer */ - - ptr1 = dict_scan_to(ptr, "CONSTRAINT"); - ptr2 = dict_scan_to(ptr, "FOREIGN"); - - constraint_name = NULL; - - if (ptr1 < ptr2) { - /* The user may have specified a constraint name. Pick it so - that we can store 'databasename/constraintname' as the id of - of the constraint to system tables. */ - ptr = ptr1; - - orig = ptr; - ptr = dict_accept(cs, ptr, "CONSTRAINT", &success); - - ut_a(success); - - if (!my_isspace(cs, *ptr) && *ptr != '"' && *ptr != '`') { - goto loop; - } - - while (my_isspace(cs, *ptr)) { - ptr++; - } - - /* read constraint name unless got "CONSTRAINT FOREIGN" */ - if (ptr != ptr2) { - ptr = dict_scan_id(cs, ptr, heap, - &constraint_name, FALSE, FALSE); - } - } else { - ptr = ptr2; - } - - if (*ptr == '\0') { - /* The proper way to reject foreign keys for temporary - tables would be to split the lexing and syntactical - analysis of foreign key clauses from the actual adding - of them, so that ha_innodb.cc could first parse the SQL - command, determine if there are any foreign keys, and - if so, immediately reject the command if the table is a - temporary one. For now, this kludge will work. */ - if (reject_fks && !local_fk_set.empty()) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, "%s table %s with foreign key constraint" - " failed. Temporary tables can't have foreign key constraints." - " Error close to %s.\n", - operation, create_name, start_of_latest_foreign); - mutex_exit(&dict_foreign_err_mutex); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Temporary tables can't have foreign key constraints." - " Error close to %s.", - operation, create_name, start_of_latest_foreign); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /**********************************************************/ - /* The following call adds the foreign key constraints - to the data dictionary system tables on disk */ - - error = dict_create_add_foreigns_to_dictionary( - local_fk_set, table, trx); - - if (error == DB_SUCCESS) { - - table->foreign_set.insert(local_fk_set.begin(), - local_fk_set.end()); - std::for_each(local_fk_set.begin(), - local_fk_set.end(), - dict_foreign_add_to_referenced_table()); - local_fk_set.clear(); - } - return(error); - } - - start_of_latest_foreign = ptr; - - orig = ptr; - ptr = dict_accept(cs, ptr, "FOREIGN", &success); - - if (!success) { - goto loop; - } - - if (!my_isspace(cs, *ptr)) { - goto loop; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "KEY", &success); - - if (!success) { - goto loop; - } - - if (my_isspace(cs, *ptr)) { - ptr1 = dict_accept(cs, ptr, "IF", &success); - - if (success) { - if (!my_isspace(cs, *ptr1)) { - goto loop; - } - ptr1 = dict_accept(cs, ptr1, "NOT", &success); - if (!success) { - goto loop; - } - ptr1 = dict_accept(cs, ptr1, "EXISTS", &success); - if (!success) { - goto loop; - } - ptr = ptr1; - } - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "(", &success); - - if (!success) { - if (constraint_name) { - /* MySQL allows also an index id before the '('; we - skip it */ - ptr = dict_skip_word(cs, ptr, &success); - if (!success) { - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, orig); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, orig); - return(DB_CANNOT_ADD_CONSTRAINT); - } - } - else { - while (my_isspace(cs, *ptr)) { - ptr++; - } - - ptr = dict_scan_id(cs, ptr, heap, - &constraint_name, FALSE, FALSE); - } - - ptr = dict_accept(cs, ptr, "(", &success); - - if (!success) { - /* We do not flag a syntax error here because in an - ALTER TABLE we may also have DROP FOREIGN KEY abc */ - - goto loop; - } - } - - i = 0; - - /* Scan the columns in the first list */ -col_loop1: - ut_a(i < (sizeof column_names) / sizeof *column_names); - orig = ptr; - ptr = dict_scan_col(cs, ptr, &success, table, columns + i, - heap, column_names + i); - if (!success) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, orig); - - mutex_exit(&dict_foreign_err_mutex); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, orig); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - i++; - - ptr = dict_accept(cs, ptr, ",", &success); - - if (success) { - goto col_loop1; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, ")", &success); - - if (!success) { - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, orig); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, orig); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /* Try to find an index which contains the columns - as the first fields and in the right order. There is - no need to check column type match (on types_idx), since - the referenced table can be NULL if foreign_key_checks is - set to 0 */ - - index = dict_foreign_find_index( - table, NULL, column_names, i, - NULL, TRUE, FALSE, &index_error, &err_col, &err_index); - - if (!index) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fputs("There is no index in table ", ef); - ut_print_name(ef, NULL, TRUE, create_name); - fprintf(ef, " where the columns appear\n" - "as the first columns. Constraint:\n%s\n" - "See " REFMAN "innodb-foreign-key-constraints.html\n" - "for correct foreign key definition.\n", - start_of_latest_foreign); - dict_foreign_push_index_error(trx, operation, create_name, start_of_latest_foreign, - column_names, index_error, err_col, err_index, table, ef); - - mutex_exit(&dict_foreign_err_mutex); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "REFERENCES", &success); - - if (!success || !my_isspace(cs, *ptr)) { - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, orig); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, orig); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /* Let us create a constraint struct */ - - foreign = dict_mem_foreign_create(); - - if (constraint_name) { - ulint db_len; - - /* Catenate 'databasename/' to the constraint name specified - by the user: we conceive the constraint as belonging to the - same MySQL 'database' as the table itself. We store the name - to foreign->id. */ - - db_len = dict_get_db_name_len(table->name); - - foreign->id = static_cast<char*>(mem_heap_alloc( - foreign->heap, db_len + strlen(constraint_name) + 2)); - - ut_memcpy(foreign->id, table->name, db_len); - foreign->id[db_len] = '/'; - strcpy(foreign->id + db_len + 1, constraint_name); - } - - if (foreign->id == NULL) { - error = dict_create_add_foreign_id(&number, - table->name, foreign); - if (error != DB_SUCCESS) { - dict_foreign_free(foreign); - return(error); - } - } - - std::pair<dict_foreign_set::iterator, bool> ret - = local_fk_set.insert(foreign); - - if (!ret.second) { - /* A duplicate foreign key name has been found */ - dict_foreign_free(foreign); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - foreign->foreign_table = table; - foreign->foreign_table_name = mem_heap_strdup( - foreign->heap, table->name); - dict_mem_foreign_table_name_lookup_set(foreign, TRUE); - - foreign->foreign_index = index; - foreign->n_fields = (unsigned int) i; - - foreign->foreign_col_names = static_cast<const char**>( - mem_heap_alloc(foreign->heap, i * sizeof(void*))); - - for (i = 0; i < foreign->n_fields; i++) { - foreign->foreign_col_names[i] = mem_heap_strdup( - foreign->heap, - dict_table_get_col_name(table, - dict_col_get_no(columns[i]))); - } - - ptr = dict_scan_table_name(cs, ptr, &referenced_table, name, - &success, heap, &referenced_table_name); - - /* Note that referenced_table can be NULL if the user has suppressed - checking of foreign key constraints! */ - - if (!success || (!referenced_table && trx->check_foreigns)) { - char buf[MAX_TABLE_NAME_LEN + 1] = ""; - char* bufend; - - bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN, - referenced_table_name, strlen(referenced_table_name), - trx->mysql_thd, TRUE); - buf[bufend - buf] = '\0'; - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint failed. Referenced table %s not found in the data dictionary " - "near '%s'.", - operation, create_name, buf, start_of_latest_foreign); - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, - "%s table %s with foreign key constraint failed. Referenced table %s not found in the data dictionary " - "near '%s'.\n", - operation, create_name, buf, start_of_latest_foreign); - - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "(", &success); - - if (!success) { - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, orig); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, orig); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /* Scan the columns in the second list */ - i = 0; - -col_loop2: - orig = ptr; - ptr = dict_scan_col(cs, ptr, &success, referenced_table, columns + i, - heap, ref_column_names + i); - i++; - - if (!success) { - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, orig); - mutex_exit(&dict_foreign_err_mutex); - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, orig); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - orig = ptr; - ptr = dict_accept(cs, ptr, ",", &success); - - if (success) { - goto col_loop2; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, ")", &success); - - if (!success || foreign->n_fields != i) { - - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s' near '%s'. Referencing column count does not match referenced column count.\n", - operation, create_name, start_of_latest_foreign, orig); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s' near '%s'. Referencing column count %d does not match referenced column count %d.\n", - operation, create_name, start_of_latest_foreign, orig, i, foreign->n_fields); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - n_on_deletes = 0; - n_on_updates = 0; - -scan_on_conditions: - /* Loop here as long as we can find ON ... conditions */ - - start_of_latest_set = ptr; - ptr = dict_accept(cs, ptr, "ON", &success); - - if (!success) { - - goto try_find_index; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "DELETE", &success); - - if (!success) { - orig = ptr; - ptr = dict_accept(cs, ptr, "UPDATE", &success); - - if (!success) { - - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - is_on_delete = FALSE; - n_on_updates++; - } else { - is_on_delete = TRUE; - n_on_deletes++; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "RESTRICT", &success); - - if (success) { - goto scan_on_conditions; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "CASCADE", &success); - - if (success) { - if (is_on_delete) { - foreign->type |= DICT_FOREIGN_ON_DELETE_CASCADE; - } else { - foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE; - } - - goto scan_on_conditions; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "NO", &success); - - if (success) { - orig = ptr; - ptr = dict_accept(cs, ptr, "ACTION", &success); - - if (!success) { - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - if (is_on_delete) { - foreign->type |= DICT_FOREIGN_ON_DELETE_NO_ACTION; - } else { - foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION; - } - - goto scan_on_conditions; - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "SET", &success); - - if (!success) { - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - orig = ptr; - ptr = dict_accept(cs, ptr, "NULL", &success); - - if (!success) { - dict_foreign_report_syntax_err( - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.\n", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. Parse error in '%s'" - " near '%s'.", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - for (j = 0; j < foreign->n_fields; j++) { - if ((dict_index_get_nth_col(foreign->foreign_index, j)->prtype) - & DATA_NOT_NULL) { - const dict_col_t* col - = dict_index_get_nth_col(foreign->foreign_index, j); - const char* col_name = dict_table_get_col_name(foreign->foreign_index->table, - dict_col_get_no(col)); - - /* It is not sensible to define SET NULL - if the column is not allowed to be NULL! */ - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, - "%s table %s with foreign key constraint" - " failed. You have defined a SET NULL condition but column '%s' is defined as NOT NULL" - " in '%s' near '%s'.\n", - operation, create_name, col_name, start_of_latest_foreign, start_of_latest_set); - mutex_exit(&dict_foreign_err_mutex); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. You have defined a SET NULL condition but column '%s' is defined as NOT NULL" - " in '%s' near '%s'.", - operation, create_name, col_name, start_of_latest_foreign, start_of_latest_set); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - } - - if (is_on_delete) { - foreign->type |= DICT_FOREIGN_ON_DELETE_SET_NULL; - } else { - foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL; - } - - goto scan_on_conditions; - -try_find_index: - if (n_on_deletes > 1 || n_on_updates > 1) { - /* It is an error to define more than 1 action */ - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, - "%s table %s with foreign key constraint" - " failed. You have more than one on delete or on update clause" - " in '%s' near '%s'.\n", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, - "%s table %s with foreign key constraint" - " failed. You have more than one on delete or on update clause" - " in '%s' near '%s'.", - operation, create_name, start_of_latest_foreign, start_of_latest_set); - - dict_foreign_free(foreign); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /* Try to find an index which contains the columns as the first fields - and in the right order, and the types are the same as in - foreign->foreign_index */ - - if (referenced_table) { - index = dict_foreign_find_index(referenced_table, NULL, - ref_column_names, i, - foreign->foreign_index, - TRUE, FALSE, &index_error, &err_col, &err_index); - if (!index) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, create_name); - fprintf(ef, "%s:\n" - "Cannot find an index in the" - " referenced table where the\n" - "referenced columns appear as the" - " first columns, or column types\n" - "in the table and the referenced table" - " do not match for constraint.\n" - "Note that the internal storage type of" - " ENUM and SET changed in\n" - "tables created with >= InnoDB-4.1.12," - " and such columns in old tables\n" - "cannot be referenced by such columns" - " in new tables.\n" - "See " REFMAN - "innodb-foreign-key-constraints.html\n" - "for correct foreign key definition.\n", - start_of_latest_foreign); - - dict_foreign_push_index_error(trx, operation, create_name, start_of_latest_foreign, - column_names, index_error, err_col, err_index, referenced_table, ef); - - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - } else { - ut_a(trx->check_foreigns == FALSE); - index = NULL; - } - - foreign->referenced_index = index; - foreign->referenced_table = referenced_table; - - foreign->referenced_table_name = mem_heap_strdup( - foreign->heap, referenced_table_name); - dict_mem_referenced_table_name_lookup_set(foreign, TRUE); - - foreign->referenced_col_names = static_cast<const char**>( - mem_heap_alloc(foreign->heap, i * sizeof(void*))); - - for (i = 0; i < foreign->n_fields; i++) { - foreign->referenced_col_names[i] - = mem_heap_strdup(foreign->heap, ref_column_names[i]); - } - - goto loop; -} - -/************************************************************************** -Determines whether a string starts with the specified keyword. -@return TRUE if str starts with keyword */ -UNIV_INTERN -ibool -dict_str_starts_with_keyword( -/*=========================*/ - THD* thd, /*!< in: MySQL thread handle */ - const char* str, /*!< in: string to scan for keyword */ - const char* keyword) /*!< in: keyword to look for */ -{ - struct charset_info_st* cs = innobase_get_charset(thd); - ibool success; - - dict_accept(cs, str, keyword, &success); - return(success); -} - -/*********************************************************************//** -Scans a table create SQL string and adds to the data dictionary the foreign -key constraints declared in the string. This function should be called after -the indexes for a table have been created. Each foreign key constraint must -be accompanied with indexes in both participating tables. The indexes are -allowed to contain more fields than mentioned in the constraint. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_create_foreign_constraints( -/*============================*/ - trx_t* trx, /*!< in: transaction */ - const char* sql_string, /*!< in: table create statement where - foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES - table2(c, d), table2 can be written - also with the database - name before it: test.table2; the - default database id the database of - parameter name */ - size_t sql_length, /*!< in: length of sql_string */ - const char* name, /*!< in: table full name in the - normalized form - database_name/table_name */ - ibool reject_fks) /*!< in: if TRUE, fail with error - code DB_CANNOT_ADD_CONSTRAINT if - any foreign keys are found. */ -{ - char* str; - dberr_t err; - mem_heap_t* heap; - - ut_a(trx); - ut_a(trx->mysql_thd); - - str = dict_strip_comments(sql_string, sql_length); - heap = mem_heap_create(10000); - - err = dict_create_foreign_constraints_low( - trx, heap, innobase_get_charset(trx->mysql_thd), str, name, - reject_fks); - - mem_heap_free(heap); - mem_free(str); - - return(err); -} - -/**********************************************************************//** -Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. -@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the -constraint id does not match */ -UNIV_INTERN -dberr_t -dict_foreign_parse_drop_constraints( -/*================================*/ - mem_heap_t* heap, /*!< in: heap from which we can - allocate memory */ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table, /*!< in: table */ - ulint* n, /*!< out: number of constraints - to drop */ - const char*** constraints_to_drop) /*!< out: id's of the - constraints to drop */ -{ - ibool success; - char* str; - size_t len; - const char* ptr; - const char* ptr1; - const char* id; - struct charset_info_st* cs; - - ut_a(trx); - ut_a(trx->mysql_thd); - - cs = innobase_get_charset(trx->mysql_thd); - - *n = 0; - - *constraints_to_drop = static_cast<const char**>( - mem_heap_alloc(heap, 1000 * sizeof(char*))); - - ptr = innobase_get_stmt(trx->mysql_thd, &len); - - str = dict_strip_comments(ptr, len); - - ptr = str; - - ut_ad(mutex_own(&(dict_sys->mutex))); -loop: - ptr = dict_scan_to(ptr, "DROP"); - - if (*ptr == '\0') { - mem_free(str); - - return(DB_SUCCESS); - } - - ptr = dict_accept(cs, ptr, "DROP", &success); - - if (!my_isspace(cs, *ptr)) { - - goto loop; - } - - ptr = dict_accept(cs, ptr, "FOREIGN", &success); - - if (!success || !my_isspace(cs, *ptr)) { - - goto loop; - } - - ptr = dict_accept(cs, ptr, "KEY", &success); - - if (!success) { - - goto syntax_error; - } - - ptr1 = dict_accept(cs, ptr, "IF", &success); - - if (success && my_isspace(cs, *ptr1)) { - ptr1 = dict_accept(cs, ptr1, "EXISTS", &success); - if (success) { - - ptr = ptr1; - } - } - - ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE); - - if (id == NULL) { - - goto syntax_error; - } - - ut_a(*n < 1000); - (*constraints_to_drop)[*n] = id; - (*n)++; - - if (std::find_if(table->foreign_set.begin(), - table->foreign_set.end(), - dict_foreign_matches_id(id)) - == table->foreign_set.end()) { - - if (!srv_read_only_mode) { - FILE* ef = dict_foreign_err_file; - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Error in dropping of a foreign key " - "constraint of table ", ef); - ut_print_name(ef, NULL, TRUE, table->name); - fputs(",\nin SQL command\n", ef); - fputs(str, ef); - fputs("\nCannot find a constraint with the " - "given id ", ef); - ut_print_name(ef, NULL, FALSE, id); - fputs(".\n", ef); - mutex_exit(&dict_foreign_err_mutex); - } - - mem_free(str); - - return(DB_CANNOT_DROP_CONSTRAINT); - } - - goto loop; - -syntax_error: - if (!srv_read_only_mode) { - FILE* ef = dict_foreign_err_file; - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Syntax error in dropping of a" - " foreign key constraint of table ", ef); - ut_print_name(ef, NULL, TRUE, table->name); - fprintf(ef, ",\n" - "close to:\n%s\n in SQL command\n%s\n", ptr, str); - mutex_exit(&dict_foreign_err_mutex); - } - - mem_free(str); - - return(DB_CANNOT_DROP_CONSTRAINT); -} - -/*==================== END OF FOREIGN KEY PROCESSING ====================*/ - -/**********************************************************************//** -Returns an index object if it is found in the dictionary cache. -Assumes that dict_sys->mutex is already being held. -@return index, NULL if not found */ -UNIV_INTERN -dict_index_t* -dict_index_get_if_in_cache_low( -/*===========================*/ - index_id_t index_id) /*!< in: index id */ -{ - ut_ad(mutex_own(&(dict_sys->mutex))); - - return(dict_index_find_on_id_low(index_id)); -} - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/**********************************************************************//** -Returns an index object if it is found in the dictionary cache. -@return index, NULL if not found */ -UNIV_INTERN -dict_index_t* -dict_index_get_if_in_cache( -/*=======================*/ - index_id_t index_id) /*!< in: index id */ -{ - dict_index_t* index; - - if (dict_sys == NULL) { - return(NULL); - } - - mutex_enter(&(dict_sys->mutex)); - - index = dict_index_get_if_in_cache_low(index_id); - - mutex_exit(&(dict_sys->mutex)); - - return(index); -} -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Checks that a tuple has n_fields_cmp value in a sensible range, so that -no comparison can occur with the page number field in a node pointer. -@return TRUE if ok */ -UNIV_INTERN -ibool -dict_index_check_search_tuple( -/*==========================*/ - const dict_index_t* index, /*!< in: index tree */ - const dtuple_t* tuple) /*!< in: tuple used in a search */ -{ - ut_a(index); - ut_a(dtuple_get_n_fields_cmp(tuple) - <= dict_index_get_n_unique_in_tree(index)); - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/**********************************************************************//** -Builds a node pointer out of a physical record and a page number. -@return own: node pointer */ -UNIV_INTERN -dtuple_t* -dict_index_build_node_ptr( -/*======================*/ - const dict_index_t* index, /*!< in: index */ - const rec_t* rec, /*!< in: record for which to build node - pointer */ - ulint page_no,/*!< in: page number to put in node - pointer */ - mem_heap_t* heap, /*!< in: memory heap where pointer - created */ - ulint level) /*!< in: level of rec in tree: - 0 means leaf level */ -{ - dtuple_t* tuple; - dfield_t* field; - byte* buf; - ulint n_unique; - - if (dict_index_is_univ(index)) { - /* In a universal index tree, we take the whole record as - the node pointer if the record is on the leaf level, - on non-leaf levels we remove the last field, which - contains the page number of the child page */ - - ut_a(!dict_table_is_comp(index->table)); - n_unique = rec_get_n_fields_old(rec); - - if (level > 0) { - ut_a(n_unique > 1); - n_unique--; - } - } else { - n_unique = dict_index_get_n_unique_in_tree(index); - } - - tuple = dtuple_create(heap, n_unique + 1); - - /* When searching in the tree for the node pointer, we must not do - comparison on the last field, the page number field, as on upper - levels in the tree there may be identical node pointers with a - different page number; therefore, we set the n_fields_cmp to one - less: */ - - dtuple_set_n_fields_cmp(tuple, n_unique); - - dict_index_copy_types(tuple, index, n_unique); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); - - mach_write_to_4(buf, page_no); - - field = dtuple_get_nth_field(tuple, n_unique); - dfield_set_data(field, buf, 4); - - dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4); - - rec_copy_prefix_to_dtuple(tuple, rec, index, n_unique, heap); - dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple) - | REC_STATUS_NODE_PTR); - - ut_ad(dtuple_check_typed(tuple)); - - return(tuple); -} - -/**********************************************************************//** -Copies an initial segment of a physical record, long enough to specify an -index entry uniquely. -@return pointer to the prefix record */ -UNIV_INTERN -rec_t* -dict_index_copy_rec_order_prefix( -/*=============================*/ - const dict_index_t* index, /*!< in: index */ - const rec_t* rec, /*!< in: record for which to - copy prefix */ - ulint* n_fields,/*!< out: number of fields copied */ - byte** buf, /*!< in/out: memory buffer for the - copied prefix, or NULL */ - ulint* buf_size)/*!< in/out: buffer size */ -{ - ulint n; - - UNIV_PREFETCH_R(rec); - - if (dict_index_is_univ(index)) { - ut_a(!dict_table_is_comp(index->table)); - n = rec_get_n_fields_old(rec); - } else { - n = dict_index_get_n_unique_in_tree(index); - } - - *n_fields = n; - return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size)); -} - -/**********************************************************************//** -Builds a typed data tuple out of a physical record. -@return own: data tuple */ -UNIV_INTERN -dtuple_t* -dict_index_build_data_tuple( -/*========================*/ - dict_index_t* index, /*!< in: index tree */ - rec_t* rec, /*!< in: record for which to build data tuple */ - ulint n_fields,/*!< in: number of data fields */ - mem_heap_t* heap) /*!< in: memory heap where tuple created */ -{ - dtuple_t* tuple; - - ut_ad(dict_table_is_comp(index->table) - || n_fields <= rec_get_n_fields_old(rec)); - - tuple = dtuple_create(heap, n_fields); - - dict_index_copy_types(tuple, index, n_fields); - - rec_copy_prefix_to_dtuple(tuple, rec, index, n_fields, heap); - - ut_ad(dtuple_check_typed(tuple)); - - return(tuple); -} - -/*********************************************************************//** -Calculates the minimum record length in an index. */ -UNIV_INTERN -ulint -dict_index_calc_min_rec_len( -/*========================*/ - const dict_index_t* index) /*!< in: index */ -{ - ulint sum = 0; - ulint i; - ulint comp = dict_table_is_comp(index->table); - - if (comp) { - ulint nullable = 0; - sum = REC_N_NEW_EXTRA_BYTES; - for (i = 0; i < dict_index_get_n_fields(index); i++) { - const dict_col_t* col - = dict_index_get_nth_col(index, i); - ulint size = dict_col_get_fixed_size(col, comp); - sum += size; - if (!size) { - size = col->len; - sum += size < 128 ? 1 : 2; - } - if (!(col->prtype & DATA_NOT_NULL)) { - nullable++; - } - } - - /* round the NULL flags up to full bytes */ - sum += UT_BITS_IN_BYTES(nullable); - - return(sum); - } - - for (i = 0; i < dict_index_get_n_fields(index); i++) { - sum += dict_col_get_fixed_size( - dict_index_get_nth_col(index, i), comp); - } - - if (sum > 127) { - sum += 2 * dict_index_get_n_fields(index); - } else { - sum += dict_index_get_n_fields(index); - } - - sum += REC_N_OLD_EXTRA_BYTES; - - return(sum); -} - -/**********************************************************************//** -Prints info of a foreign key constraint. */ -static -void -dict_foreign_print_low( -/*===================*/ - dict_foreign_t* foreign) /*!< in: foreign key constraint */ -{ - ulint i; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - fprintf(stderr, " FOREIGN KEY CONSTRAINT %s: %s (", - foreign->id, foreign->foreign_table_name); - - for (i = 0; i < foreign->n_fields; i++) { - fprintf(stderr, " %s", foreign->foreign_col_names[i]); - } - - fprintf(stderr, " )\n" - " REFERENCES %s (", - foreign->referenced_table_name); - - for (i = 0; i < foreign->n_fields; i++) { - fprintf(stderr, " %s", foreign->referenced_col_names[i]); - } - - fputs(" )\n", stderr); -} - -/**********************************************************************//** -Prints a table data. */ -UNIV_INTERN -void -dict_table_print( -/*=============*/ - dict_table_t* table) /*!< in: table */ -{ - dict_index_t* index; - ulint i; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - dict_table_stats_lock(table, RW_X_LATCH); - - if (!table->stat_initialized) { - dict_stats_update_transient(table); - } - - fprintf(stderr, - "--------------------------------------\n" - "TABLE: name %s, id %llu, flags %lx, columns %lu," - " indexes %lu, appr.rows " UINT64PF "\n" - " COLUMNS: ", - table->name, - (ullint) table->id, - (ulong) table->flags, - (ulong) table->n_cols, - (ulong) UT_LIST_GET_LEN(table->indexes), - table->stat_n_rows); - - for (i = 0; i < (ulint) table->n_cols; i++) { - dict_col_print_low(table, dict_table_get_nth_col(table, i)); - fputs("; ", stderr); - } - - putc('\n', stderr); - - index = UT_LIST_GET_FIRST(table->indexes); - - while (index != NULL) { - dict_index_print_low(index); - index = UT_LIST_GET_NEXT(indexes, index); - } - - dict_table_stats_unlock(table, RW_X_LATCH); - - std::for_each(table->foreign_set.begin(), - table->foreign_set.end(), - dict_foreign_print_low); - - std::for_each(table->referenced_set.begin(), - table->referenced_set.end(), - dict_foreign_print_low); -} - -/**********************************************************************//** -Prints a column data. */ -static -void -dict_col_print_low( -/*===============*/ - const dict_table_t* table, /*!< in: table */ - const dict_col_t* col) /*!< in: column */ -{ - dtype_t type; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - dict_col_copy_type(col, &type); - fprintf(stderr, "%s: ", dict_table_get_col_name(table, - dict_col_get_no(col))); - - dtype_print(&type); -} - -/**********************************************************************//** -Prints an index data. */ -static -void -dict_index_print_low( -/*=================*/ - dict_index_t* index) /*!< in: index */ -{ - ib_int64_t n_vals; - ulint i; - - ut_a(index->table->stat_initialized); - - ut_ad(mutex_own(&(dict_sys->mutex))); - - if (index->n_user_defined_cols > 0) { - n_vals = index->stat_n_diff_key_vals[ - index->n_user_defined_cols - 1]; - } else { - n_vals = index->stat_n_diff_key_vals[0]; - } - - fprintf(stderr, - " INDEX: name %s, id %llu, fields %lu/%lu," - " uniq %lu, type %lu\n" - " root page %lu, appr.key vals %lu," - " leaf pages %lu, size pages %lu\n" - " FIELDS: ", - index->name, - (ullint) index->id, - (ulong) index->n_user_defined_cols, - (ulong) index->n_fields, - (ulong) index->n_uniq, - (ulong) index->type, - (ulong) index->page, - (ulong) n_vals, - (ulong) index->stat_n_leaf_pages, - (ulong) index->stat_index_size); - - for (i = 0; i < index->n_fields; i++) { - dict_field_print_low(dict_index_get_nth_field(index, i)); - } - - putc('\n', stderr); - -#ifdef UNIV_BTR_PRINT - btr_print_size(index); - - btr_print_index(index, 7); -#endif /* UNIV_BTR_PRINT */ -} - -/**********************************************************************//** -Prints a field data. */ -static -void -dict_field_print_low( -/*=================*/ - const dict_field_t* field) /*!< in: field */ -{ - ut_ad(mutex_own(&(dict_sys->mutex))); - - fprintf(stderr, " %s", field->name); - - if (field->prefix_len != 0) { - fprintf(stderr, "(%lu)", (ulong) field->prefix_len); - } -} - -/**********************************************************************//** -Outputs info on a foreign key of a table in a format suitable for -CREATE TABLE. */ -UNIV_INTERN -std::string -dict_print_info_on_foreign_key_in_create_format( -/*============================================*/ - trx_t* trx, /*!< in: transaction */ - dict_foreign_t* foreign, /*!< in: foreign key constraint */ - ibool add_newline) /*!< in: whether to add a newline */ -{ - const char* stripped_id; - ulint i; - std::string str; - - if (strchr(foreign->id, '/')) { - /* Strip the preceding database name from the constraint id */ - stripped_id = foreign->id + 1 - + dict_get_db_name_len(foreign->id); - } else { - stripped_id = foreign->id; - } - - str.append(","); - - if (add_newline) { - /* SHOW CREATE TABLE wants constraints each printed nicely - on its own line, while error messages want no newlines - inserted. */ - str.append("\n "); - } - - str.append(" CONSTRAINT "); - - str.append(ut_get_name(trx, FALSE, stripped_id)); - str.append(" FOREIGN KEY ("); - - for (i = 0;;) { - str.append(ut_get_name(trx, FALSE, foreign->foreign_col_names[i])); - if (++i < foreign->n_fields) { - str.append(", "); - } else { - break; - } - } - - str.append(") REFERENCES "); - - if (dict_tables_have_same_db(foreign->foreign_table_name_lookup, - foreign->referenced_table_name_lookup)) { - /* Do not print the database name of the referenced table */ - str.append(ut_get_name(trx, TRUE, - dict_remove_db_name( - foreign->referenced_table_name))); - } else { - str.append(ut_get_name(trx, TRUE, - foreign->referenced_table_name)); - } - - str.append(" ("); - - for (i = 0;;) { - str.append(ut_get_name(trx, FALSE, - foreign->referenced_col_names[i])); - - if (++i < foreign->n_fields) { - str.append(", "); - } else { - break; - } - } - - str.append(")"); - - if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) { - str.append(" ON DELETE CASCADE"); - } - - if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) { - str.append(" ON DELETE SET NULL"); - } - - if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { - str.append(" ON DELETE NO ACTION"); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { - str.append(" ON UPDATE CASCADE"); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { - str.append(" ON UPDATE SET NULL"); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { - str.append(" ON UPDATE NO ACTION"); - } - - return str; -} - -/**********************************************************************//** -Outputs info on foreign keys of a table. */ -UNIV_INTERN -std::string -dict_print_info_on_foreign_keys( -/*============================*/ - ibool create_table_format, /*!< in: if TRUE then print in - a format suitable to be inserted into - a CREATE TABLE, otherwise in the format - of SHOW TABLE STATUS */ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table) /*!< in: table */ -{ - dict_foreign_t* foreign; - std::string str; - - mutex_enter(&(dict_sys->mutex)); - - for (dict_foreign_set::iterator it = table->foreign_set.begin(); - it != table->foreign_set.end(); - ++it) { - - foreign = *it; - - if (create_table_format) { - str.append( - dict_print_info_on_foreign_key_in_create_format( - trx, foreign, TRUE)); - } else { - ulint i; - str.append("; ("); - - for (i = 0; i < foreign->n_fields; i++) { - if (i) { - str.append(" "); - } - - str.append(ut_get_name(trx, FALSE, - foreign->foreign_col_names[i])); - } - - str.append(") REFER "); - str.append(ut_get_name(trx, TRUE, - foreign->referenced_table_name)); - str.append(")"); - - for (i = 0; i < foreign->n_fields; i++) { - if (i) { - str.append(" "); - } - str.append(ut_get_name( - trx, FALSE, - foreign->referenced_col_names[i])); - } - - str.append(")"); - - if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) { - str.append(" ON DELETE CASCADE"); - } - - if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) { - str.append(" ON DELETE SET NULL"); - } - - if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { - str.append(" ON DELETE NO ACTION"); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { - str.append(" ON UPDATE CASCADE"); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { - str.append(" ON UPDATE SET NULL"); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { - str.append(" ON UPDATE NO ACTION"); - } - } - } - - mutex_exit(&(dict_sys->mutex)); - - return str; -} - -/********************************************************************//** -Displays the names of the index and the table. */ -UNIV_INTERN -void -dict_index_name_print( -/*==================*/ - FILE* file, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - const dict_index_t* index) /*!< in: index to print */ -{ - fputs("index ", file); - ut_print_name(file, trx, FALSE, index->name); - fputs(" of table ", file); - ut_print_name(file, trx, TRUE, index->table_name); -} - -/**********************************************************************//** -Find a table in dict_sys->table_LRU list with specified space id -@return table if found, NULL if not */ -static -dict_table_t* -dict_find_table_by_space( -/*=====================*/ - ulint space_id) /*!< in: space ID */ -{ - dict_table_t* table; - ulint num_item; - ulint count = 0; - - ut_ad(space_id > 0); - - if (dict_sys == NULL) { - /* This could happen when it's in redo processing. */ - return(NULL); - } - - table = UT_LIST_GET_FIRST(dict_sys->table_LRU); - num_item = UT_LIST_GET_LEN(dict_sys->table_LRU); - - /* This function intentionally does not acquire mutex as it is used - by error handling code in deep call stack as last means to avoid - killing the server, so it worth to risk some consequencies for - the action. */ - while (table && count < num_item) { - if (table->space == space_id) { - return(table); - } - - table = UT_LIST_GET_NEXT(table_LRU, table); - count++; - } - - return(NULL); -} - -/**********************************************************************//** -Flags a table with specified space_id corrupted in the data dictionary -cache -@return TRUE if successful */ -UNIV_INTERN -ibool -dict_set_corrupted_by_space( -/*========================*/ - ulint space_id) /*!< in: space ID */ -{ - dict_table_t* table; - - table = dict_find_table_by_space(space_id); - - if (!table) { - return(FALSE); - } - - /* mark the table->corrupted bit only, since the caller - could be too deep in the stack for SYS_INDEXES update */ - table->corrupted = true; - table->file_unreadable = true; - - return(TRUE); -} - - -/** Flags a table with specified space_id encrypted in the data dictionary -cache -@param[in] space_id Tablespace id */ -UNIV_INTERN -void -dict_set_encrypted_by_space(ulint space_id) -{ - dict_table_t* table; - - table = dict_find_table_by_space(space_id); - - if (table) { - table->file_unreadable = true; - } -} - -/**********************************************************************//** -Flags an index corrupted both in the data dictionary cache -and in the SYS_INDEXES */ -UNIV_INTERN -void -dict_set_corrupted( -/*===============*/ - dict_index_t* index, /*!< in/out: index */ - trx_t* trx, /*!< in/out: transaction */ - const char* ctx) /*!< in: context */ -{ - mem_heap_t* heap; - mtr_t mtr; - dict_index_t* sys_index; - dtuple_t* tuple; - dfield_t* dfield; - byte* buf; - char* table_name; - const char* status; - btr_cur_t cursor; - bool locked = RW_X_LATCH == trx->dict_operation_lock_mode; - - if (!locked) { - row_mysql_lock_data_dictionary(trx); - } - - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(!dict_table_is_comp(dict_sys->sys_tables)); - ut_ad(!dict_table_is_comp(dict_sys->sys_indexes)); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(sync_thread_levels_empty_except_dict()); -#endif - - /* Mark the table as corrupted only if the clustered index - is corrupted */ - if (dict_index_is_clust(index)) { - index->table->corrupted = TRUE; - } - - if (index->type & DICT_CORRUPT) { - /* The index was already flagged corrupted. */ - ut_ad(!dict_index_is_clust(index) || index->table->corrupted); - goto func_exit; - } - - heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t) - + sizeof(que_fork_t) + sizeof(upd_node_t) - + sizeof(upd_t) + 12)); - mtr_start(&mtr); - index->type |= DICT_CORRUPT; - - sys_index = UT_LIST_GET_FIRST(dict_sys->sys_indexes->indexes); - - /* Find the index row in SYS_INDEXES */ - tuple = dtuple_create(heap, 2); - - dfield = dtuple_get_nth_field(tuple, 0); - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, index->table->id); - dfield_set_data(dfield, buf, 8); - - dfield = dtuple_get_nth_field(tuple, 1); - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, index->id); - dfield_set_data(dfield, buf, 8); - - dict_index_copy_types(tuple, sys_index, 2); - - btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_LE, - BTR_MODIFY_LEAF, - &cursor, 0, __FILE__, __LINE__, &mtr); - - if (cursor.low_match == dtuple_get_n_fields(tuple)) { - /* UPDATE SYS_INDEXES SET TYPE=index->type - WHERE TABLE_ID=index->table->id AND INDEX_ID=index->id */ - ulint len; - byte* field = rec_get_nth_field_old( - btr_cur_get_rec(&cursor), - DICT_FLD__SYS_INDEXES__TYPE, &len); - if (len != 4) { - goto fail; - } - mlog_write_ulint(field, index->type, MLOG_4BYTES, &mtr); - status = "Flagged"; - } else { -fail: - status = "Unable to flag"; - } - - mtr_commit(&mtr); - mem_heap_empty(heap); - table_name = static_cast<char*>(mem_heap_alloc(heap, FN_REFLEN + 1)); - *innobase_convert_name( - table_name, FN_REFLEN, - index->table_name, strlen(index->table_name), - NULL, TRUE) = 0; - - ib_logf(IB_LOG_LEVEL_ERROR, "%s corruption of %s in table %s in %s", - status, index->name, table_name, ctx); - - mem_heap_free(heap); - -func_exit: - if (!locked) { - row_mysql_unlock_data_dictionary(trx); - } -} - -/**********************************************************************//** -Flags an index corrupted in the data dictionary cache only. This -is used mostly to mark a corrupted index when index's own dictionary -is corrupted, and we force to load such index for repair purpose */ -UNIV_INTERN -void -dict_set_corrupted_index_cache_only( -/*================================*/ - dict_index_t* index, /*!< in/out: index */ - dict_table_t* table) /*!< in/out: table */ -{ - ut_ad(index != NULL); - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(!dict_table_is_comp(dict_sys->sys_tables)); - ut_ad(!dict_table_is_comp(dict_sys->sys_indexes)); - - /* Mark the table as corrupted only if the clustered index - is corrupted */ - if (dict_index_is_clust(index)) { - ut_ad((index->table != NULL) || (table != NULL) - || index->table == table); - - table->corrupted = TRUE; - } - - index->type |= DICT_CORRUPT; -} - -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Inits dict_ind_redundant and dict_ind_compact. */ -UNIV_INTERN -void -dict_ind_init(void) -/*===============*/ -{ - dict_table_t* table; - - /* create dummy table and index for REDUNDANT infimum and supremum */ - table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0); - dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, - DATA_ENGLISH | DATA_NOT_NULL, 8); - - dict_ind_redundant = dict_mem_index_create("SYS_DUMMY1", "SYS_DUMMY1", - DICT_HDR_SPACE, 0, 1); - dict_index_add_col(dict_ind_redundant, table, - dict_table_get_nth_col(table, 0), 0); - dict_ind_redundant->table = table; - - /* create dummy table and index for COMPACT infimum and supremum */ - table = dict_mem_table_create("SYS_DUMMY2", - DICT_HDR_SPACE, 1, - DICT_TF_COMPACT, 0); - dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, - DATA_ENGLISH | DATA_NOT_NULL, 8); - dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2", - DICT_HDR_SPACE, 0, 1); - dict_index_add_col(dict_ind_compact, table, - dict_table_get_nth_col(table, 0), 0); - dict_ind_compact->table = table; - - /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ - dict_ind_redundant->cached = dict_ind_compact->cached = TRUE; -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Frees dict_ind_redundant and dict_ind_compact. */ -static -void -dict_ind_free(void) -/*===============*/ -{ - dict_table_t* table; - - table = dict_ind_compact->table; - dict_mem_index_free(dict_ind_compact); - dict_ind_compact = NULL; - dict_mem_table_free(table); - - table = dict_ind_redundant->table; - dict_mem_index_free(dict_ind_redundant); - dict_ind_redundant = NULL; - dict_mem_table_free(table); -} - -/**********************************************************************//** -Get index by name -@return index, NULL if does not exist */ -UNIV_INTERN -dict_index_t* -dict_table_get_index_on_name( -/*=========================*/ - dict_table_t* table, /*!< in: table */ - const char* name) /*!< in: name of the index to find */ -{ - dict_index_t* index; - - index = dict_table_get_first_index(table); - - while (index != NULL) { - if (innobase_strcasecmp(index->name, name) == 0) { - - return(index); - } - - index = dict_table_get_next_index(index); - } - - return(NULL); -} - -/**********************************************************************//** -Replace the index passed in with another equivalent index in the -foreign key lists of the table. -@return whether all replacements were found */ -UNIV_INTERN -bool -dict_foreign_replace_index( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - const char** col_names, - /*!< in: column names, or NULL - to use table->col_names */ - const dict_index_t* index) /*!< in: index to be replaced */ -{ - bool found = true; - dict_foreign_t* foreign; - - ut_ad(index->to_be_dropped); - ut_ad(index->table == table); - - for (dict_foreign_set::iterator it = table->foreign_set.begin(); - it != table->foreign_set.end(); - ++it) { - - foreign = *it; - if (foreign->foreign_index == index) { - ut_ad(foreign->foreign_table == index->table); - - dict_index_t* new_index = dict_foreign_find_index( - foreign->foreign_table, col_names, - foreign->foreign_col_names, - foreign->n_fields, index, - /*check_charsets=*/TRUE, /*check_null=*/FALSE, - NULL, NULL, NULL); - if (new_index) { - ut_ad(new_index->table == index->table); - ut_ad(!new_index->to_be_dropped); - } else { - found = false; - } - - foreign->foreign_index = new_index; - } - } - - for (dict_foreign_set::iterator it = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - if (foreign->referenced_index == index) { - ut_ad(foreign->referenced_table == index->table); - - dict_index_t* new_index = dict_foreign_find_index( - foreign->referenced_table, NULL, - foreign->referenced_col_names, - foreign->n_fields, index, - /*check_charsets=*/TRUE, /*check_null=*/FALSE, - NULL, NULL, NULL); - /* There must exist an alternative index, - since this must have been checked earlier. */ - if (new_index) { - ut_ad(new_index->table == index->table); - ut_ad(!new_index->to_be_dropped); - } else { - found = false; - } - - foreign->referenced_index = new_index; - } - } - - return(found); -} - -/**********************************************************************//** -In case there is more than one index with the same name return the index -with the min(id). -@return index, NULL if does not exist */ -UNIV_INTERN -dict_index_t* -dict_table_get_index_on_name_and_min_id( -/*=====================================*/ - dict_table_t* table, /*!< in: table */ - const char* name) /*!< in: name of the index to find */ -{ - dict_index_t* index; - dict_index_t* min_index; /* Index with matching name and min(id) */ - - min_index = NULL; - index = dict_table_get_first_index(table); - - while (index != NULL) { - if (ut_strcmp(index->name, name) == 0) { - if (!min_index || index->id < min_index->id) { - - min_index = index; - } - } - - index = dict_table_get_next_index(index); - } - - return(min_index); - -} - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Check for duplicate index entries in a table [using the index name] */ -UNIV_INTERN -void -dict_table_check_for_dup_indexes( -/*=============================*/ - const dict_table_t* table, /*!< in: Check for dup indexes - in this table */ - enum check_name check) /*!< in: whether and when to allow - temporary index names */ -{ - /* Check for duplicates, ignoring indexes that are marked - as to be dropped */ - - const dict_index_t* index1; - const dict_index_t* index2; - - ut_ad(mutex_own(&dict_sys->mutex)); - - /* The primary index _must_ exist */ - ut_a(UT_LIST_GET_LEN(table->indexes) > 0); - - index1 = UT_LIST_GET_FIRST(table->indexes); - - do { - if (*index1->name == TEMP_INDEX_PREFIX) { - ut_a(!dict_index_is_clust(index1)); - - switch (check) { - case CHECK_ALL_COMPLETE: - ut_error; - case CHECK_ABORTED_OK: - switch (dict_index_get_online_status(index1)) { - case ONLINE_INDEX_COMPLETE: - case ONLINE_INDEX_CREATION: - ut_error; - break; - case ONLINE_INDEX_ABORTED: - case ONLINE_INDEX_ABORTED_DROPPED: - break; - } - /* fall through */ - case CHECK_PARTIAL_OK: - break; - } - } - - for (index2 = UT_LIST_GET_NEXT(indexes, index1); - index2 != NULL; - index2 = UT_LIST_GET_NEXT(indexes, index2)) { - ut_ad(ut_strcmp(index1->name, index2->name)); - } - - index1 = UT_LIST_GET_NEXT(indexes, index1); - } while (index1); -} -#endif /* UNIV_DEBUG */ - -/** Auxiliary macro used inside dict_table_schema_check(). */ -#define CREATE_TYPES_NAMES() \ - dtype_sql_name((unsigned) req_schema->columns[i].mtype, \ - (unsigned) req_schema->columns[i].prtype_mask, \ - (unsigned) req_schema->columns[i].len, \ - req_type, sizeof(req_type)); \ - dtype_sql_name(table->cols[j].mtype, \ - table->cols[j].prtype, \ - table->cols[j].len, \ - actual_type, sizeof(actual_type)) - -/*********************************************************************//** -Checks whether a table exists and whether it has the given structure. -The table must have the same number of columns with the same names and -types. The order of the columns does not matter. -The caller must own the dictionary mutex. -dict_table_schema_check() @{ -@return DB_SUCCESS if the table exists and contains the necessary columns */ -UNIV_INTERN -dberr_t -dict_table_schema_check( -/*====================*/ - dict_table_schema_t* req_schema, /*!< in/out: required table - schema */ - char* errstr, /*!< out: human readable error - message if != DB_SUCCESS is - returned */ - size_t errstr_sz) /*!< in: errstr size */ -{ - char buf[MAX_FULL_NAME_LEN]; - char req_type[64]; - char actual_type[64]; - dict_table_t* table; - ulint i; - - ut_ad(mutex_own(&dict_sys->mutex)); - - table = dict_table_get_low(req_schema->table_name); - - if (table == NULL) { - bool should_print=true; - /* no such table */ - - if (innobase_strcasecmp(req_schema->table_name, "mysql/innodb_table_stats") == 0) { - if (innodb_table_stats_not_found_reported == false) { - innodb_table_stats_not_found = true; - innodb_table_stats_not_found_reported = true; - } else { - should_print = false; - } - } else if (innobase_strcasecmp(req_schema->table_name, "mysql/innodb_index_stats") == 0 ) { - if (innodb_index_stats_not_found_reported == false) { - innodb_index_stats_not_found = true; - innodb_index_stats_not_found_reported = true; - } else { - should_print = false; - } - } - - if (should_print) { - ut_snprintf(errstr, errstr_sz, - "Table %s not found.", - ut_format_name(req_schema->table_name, - TRUE, buf, sizeof(buf))); - return(DB_TABLE_NOT_FOUND); - } else { - return(DB_STATS_DO_NOT_EXIST); - } - } - - if (!table->is_readable() && - fil_space_get(table->space) == NULL) { - /* missing tablespace */ - - ut_snprintf(errstr, errstr_sz, - "Tablespace for table %s is missing.", - ut_format_name(req_schema->table_name, - TRUE, buf, sizeof(buf))); - - return(DB_TABLE_NOT_FOUND); - } - - if ((ulint) table->n_def - DATA_N_SYS_COLS != req_schema->n_cols) { - /* the table has a different number of columns than - required */ - - ut_snprintf(errstr, errstr_sz, - "%s has %d columns but should have %lu.", - ut_format_name(req_schema->table_name, - TRUE, buf, sizeof(buf)), - table->n_def - DATA_N_SYS_COLS, - req_schema->n_cols); - - return(DB_ERROR); - } - - /* For each column from req_schema->columns[] search - whether it is present in table->cols[]. - The following algorithm is O(n_cols^2), but is optimized to - be O(n_cols) if the columns are in the same order in both arrays. */ - - for (i = 0; i < req_schema->n_cols; i++) { - ulint j; - - /* check if i'th column is the same in both arrays */ - if (innobase_strcasecmp(req_schema->columns[i].name, - dict_table_get_col_name(table, i)) == 0) { - - /* we found the column in table->cols[] quickly */ - j = i; - } else { - - /* columns in both arrays are not in the same order, - do a full scan of the second array */ - for (j = 0; j < table->n_def; j++) { - const char* name; - - name = dict_table_get_col_name(table, j); - - if (innobase_strcasecmp(name, - req_schema->columns[i].name) == 0) { - - /* found the column on j'th - position */ - break; - } - } - - if (j == table->n_def) { - - ut_snprintf(errstr, errstr_sz, - "required column %s " - "not found in table %s.", - req_schema->columns[i].name, - ut_format_name( - req_schema->table_name, - TRUE, buf, sizeof(buf))); - - return(DB_ERROR); - } - } - - /* we found a column with the same name on j'th position, - compare column types and flags */ - - /* check length for exact match */ - if (req_schema->columns[i].len != table->cols[j].len) { - - CREATE_TYPES_NAMES(); - - ut_snprintf(errstr, errstr_sz, - "Column %s in table %s is %s " - "but should be %s (length mismatch).", - req_schema->columns[i].name, - ut_format_name(req_schema->table_name, - TRUE, buf, sizeof(buf)), - actual_type, req_type); - - return(DB_ERROR); - } - - /* check mtype for exact match */ - if (req_schema->columns[i].mtype != table->cols[j].mtype) { - - CREATE_TYPES_NAMES(); - - ut_snprintf(errstr, errstr_sz, - "Column %s in table %s is %s " - "but should be %s (type mismatch).", - req_schema->columns[i].name, - ut_format_name(req_schema->table_name, - TRUE, buf, sizeof(buf)), - actual_type, req_type); - - return(DB_ERROR); - } - - /* check whether required prtype mask is set */ - if (req_schema->columns[i].prtype_mask != 0 - && (table->cols[j].prtype - & req_schema->columns[i].prtype_mask) - != req_schema->columns[i].prtype_mask) { - - CREATE_TYPES_NAMES(); - - ut_snprintf(errstr, errstr_sz, - "Column %s in table %s is %s " - "but should be %s (flags mismatch).", - req_schema->columns[i].name, - ut_format_name(req_schema->table_name, - TRUE, buf, sizeof(buf)), - actual_type, req_type); - - return(DB_ERROR); - } - } - - if (req_schema->n_foreign != table->foreign_set.size()) { - ut_snprintf( - errstr, errstr_sz, - "Table %s has " ULINTPF " foreign key(s) pointing" - " to other tables, but it must have %lu.", - ut_format_name(req_schema->table_name, - TRUE, buf, sizeof(buf)), - static_cast<ulint>(table->foreign_set.size()), - req_schema->n_foreign); - return(DB_ERROR); - } - - if (req_schema->n_referenced != table->referenced_set.size()) { - ut_snprintf( - errstr, errstr_sz, - "There are " ULINTPF " foreign key(s) pointing to %s, " - "but there must be %lu.", - static_cast<ulint>(table->referenced_set.size()), - ut_format_name(req_schema->table_name, - TRUE, buf, sizeof(buf)), - req_schema->n_referenced); - return(DB_ERROR); - } - - return(DB_SUCCESS); -} -/* @} */ - -/*********************************************************************//** -Converts a database and table name from filesystem encoding -(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two -strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be -at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */ -UNIV_INTERN -void -dict_fs2utf8( -/*=========*/ - const char* db_and_table, /*!< in: database and table names, - e.g. d@i1b/a@q1b@1Kc */ - char* db_utf8, /*!< out: database name, e.g. dцb */ - size_t db_utf8_size, /*!< in: dbname_utf8 size */ - char* table_utf8, /*!< out: table name, e.g. aюbØc */ - size_t table_utf8_size)/*!< in: table_utf8 size */ -{ - char db[MAX_DATABASE_NAME_LEN + 1]; - ulint db_len; - uint errors; - - db_len = dict_get_db_name_len(db_and_table); - - ut_a(db_len <= sizeof(db)); - - memcpy(db, db_and_table, db_len); - db[db_len] = '\0'; - - strconvert( - &my_charset_filename, db, db_len, system_charset_info, - db_utf8, static_cast<uint>(db_utf8_size), &errors); - - /* convert each # to @0023 in table name and store the result in buf */ - const char* table = dict_remove_db_name(db_and_table); - const char* table_p; - char buf[MAX_TABLE_NAME_LEN * 5 + 1]; - char* buf_p; - for (table_p = table, buf_p = buf; table_p[0] != '\0'; table_p++) { - if (table_p[0] != '#') { - buf_p[0] = table_p[0]; - buf_p++; - } else { - buf_p[0] = '@'; - buf_p[1] = '0'; - buf_p[2] = '0'; - buf_p[3] = '2'; - buf_p[4] = '3'; - buf_p += 5; - } - ut_a((size_t) (buf_p - buf) < sizeof(buf)); - } - buf_p[0] = '\0'; - - errors = 0; - strconvert( - &my_charset_filename, buf, buf_p - buf, system_charset_info, - table_utf8, static_cast<uint>(table_utf8_size), - &errors); - - if (errors != 0) { - ut_snprintf(table_utf8, table_utf8_size, "%s%s", - srv_mysql50_table_name_prefix, table); - } -} - -/**********************************************************************//** -Closes the data dictionary module. */ -UNIV_INTERN -void -dict_close(void) -/*============*/ -{ - ulint i; - - /* Free the hash elements. We don't remove them from the table - because we are going to destroy the table anyway. */ - for (i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) { - dict_table_t* table; - - table = static_cast<dict_table_t*>( - HASH_GET_FIRST(dict_sys->table_hash, i)); - - while (table) { - dict_table_t* prev_table = table; - - table = static_cast<dict_table_t*>( - HASH_GET_NEXT(name_hash, prev_table)); -#ifdef UNIV_DEBUG - ut_a(prev_table->magic_n == DICT_TABLE_MAGIC_N); -#endif - /* Acquire only because it's a pre-condition. */ - mutex_enter(&dict_sys->mutex); - - dict_table_remove_from_cache(prev_table); - - mutex_exit(&dict_sys->mutex); - } - } - - hash_table_free(dict_sys->table_hash); - - /* The elements are the same instance as in dict_sys->table_hash, - therefore we don't delete the individual elements. */ - hash_table_free(dict_sys->table_id_hash); - - dict_ind_free(); - - mutex_free(&dict_sys->mutex); - - rw_lock_free(&dict_operation_lock); - memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock)); - - if (!srv_read_only_mode) { - mutex_free(&dict_foreign_err_mutex); - } - - delete dict_sys->autoinc_map; - - mem_free(dict_sys); - dict_sys = NULL; -} - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Validate the dictionary table LRU list. -@return TRUE if valid */ -static -ibool -dict_lru_validate(void) -/*===================*/ -{ - dict_table_t* table; - - ut_ad(mutex_own(&dict_sys->mutex)); - - for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); - table != NULL; - table = UT_LIST_GET_NEXT(table_LRU, table)) { - - ut_a(table->can_be_evicted); - } - - for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); - table != NULL; - table = UT_LIST_GET_NEXT(table_LRU, table)) { - - ut_a(!table->can_be_evicted); - } - - return(TRUE); -} - -/**********************************************************************//** -Check if a table exists in the dict table LRU list. -@return TRUE if table found in LRU list */ -static -ibool -dict_lru_find_table( -/*================*/ - const dict_table_t* find_table) /*!< in: table to find */ -{ - dict_table_t* table; - - ut_ad(find_table != NULL); - ut_ad(mutex_own(&dict_sys->mutex)); - - for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); - table != NULL; - table = UT_LIST_GET_NEXT(table_LRU, table)) { - - ut_a(table->can_be_evicted); - - if (table == find_table) { - return(TRUE); - } - } - - return(FALSE); -} - -/**********************************************************************//** -Check if a table exists in the dict table non-LRU list. -@return TRUE if table found in non-LRU list */ -static -ibool -dict_non_lru_find_table( -/*====================*/ - const dict_table_t* find_table) /*!< in: table to find */ -{ - dict_table_t* table; - - ut_ad(find_table != NULL); - ut_ad(mutex_own(&dict_sys->mutex)); - - for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); - table != NULL; - table = UT_LIST_GET_NEXT(table_LRU, table)) { - - ut_a(!table->can_be_evicted); - - if (table == find_table) { - return(TRUE); - } - } - - return(FALSE); -} -#endif /* UNIV_DEBUG */ -/*********************************************************************//** -Check an index to see whether its first fields are the columns in the array, -in the same order and is not marked for deletion and is not the same -as types_idx. -@return true if the index qualifies, otherwise false */ -UNIV_INTERN -bool -dict_foreign_qualify_index( -/*=======================*/ - const dict_table_t* table, /*!< in: table */ - const char** col_names, - /*!< in: column names, or NULL - to use table->col_names */ - const char** columns,/*!< in: array of column names */ - ulint n_cols, /*!< in: number of columns */ - const dict_index_t* index, /*!< in: index to check */ - const dict_index_t* types_idx, - /*!< in: NULL or an index - whose types the column types - must match */ - bool check_charsets, - /*!< in: whether to check - charsets. only has an effect - if types_idx != NULL */ - ulint check_null, - /*!< in: nonzero if none of - the columns must be declared - NOT NULL */ - ulint* error, /*!< out: error code */ - ulint* err_col_no, - /*!< out: column number where error happened */ - dict_index_t** err_index) - /*!< out: index where error happened */ -{ - if (dict_index_get_n_fields(index) < n_cols) { - return(false); - } - - for (ulint i = 0; i < n_cols; i++) { - dict_field_t* field; - const char* col_name; - ulint col_no; - - field = dict_index_get_nth_field(index, i); - col_no = dict_col_get_no(field->col); - - if (field->prefix_len != 0) { - /* We do not accept column prefix - indexes here */ - if (error && err_col_no && err_index) { - *error = DB_FOREIGN_KEY_IS_PREFIX_INDEX; - *err_col_no = i; - *err_index = (dict_index_t*)index; - } - return(false); - } - - if (check_null - && (field->col->prtype & DATA_NOT_NULL)) { - if (error && err_col_no && err_index) { - *error = DB_FOREIGN_KEY_COL_NOT_NULL; - *err_col_no = i; - *err_index = (dict_index_t*)index; - } - return(false); - } - - col_name = col_names - ? col_names[col_no] - : dict_table_get_col_name(table, col_no); - - if (0 != innobase_strcasecmp(columns[i], col_name)) { - return(false); - } - - if (types_idx && !cmp_cols_are_equal( - dict_index_get_nth_col(index, i), - dict_index_get_nth_col(types_idx, i), - check_charsets)) { - if (error && err_col_no && err_index) { - *error = DB_FOREIGN_KEY_COLS_NOT_EQUAL; - *err_col_no = i; - *err_index = (dict_index_t*)index; - } - - return(false); - } - } - - return(true); -} - -/*********************************************************************//** -Update the state of compression failure padding heuristics. This is -called whenever a compression operation succeeds or fails. -The caller must be holding info->mutex */ -static -void -dict_index_zip_pad_update( -/*======================*/ - zip_pad_info_t* info, /*<! in/out: info to be updated */ - ulint zip_threshold) /*<! in: zip threshold value */ -{ - ulint total; - ulint fail_pct; - - ut_ad(info); - - total = info->success + info->failure; - - ut_ad(total > 0); - - if(zip_threshold == 0) { - /* User has just disabled the padding. */ - return; - } - - if (total < ZIP_PAD_ROUND_LEN) { - /* We are in middle of a round. Do nothing. */ - return; - } - - /* We are at a 'round' boundary. Reset the values but first - calculate fail rate for our heuristic. */ - fail_pct = (info->failure * 100) / total; - info->failure = 0; - info->success = 0; - - if (fail_pct > zip_threshold) { - /* Compression failures are more then user defined - threshold. Increase the pad size to reduce chances of - compression failures. */ - ut_ad(info->pad % ZIP_PAD_INCR == 0); - - /* Only do increment if it won't increase padding - beyond max pad size. */ - if (info->pad + ZIP_PAD_INCR - < (UNIV_PAGE_SIZE * zip_pad_max) / 100) { -#ifdef HAVE_ATOMIC_BUILTINS - /* Use atomics even though we have the mutex. - This is to ensure that we are able to read - info->pad atomically where atomics are - supported. */ - os_atomic_increment_ulint(&info->pad, ZIP_PAD_INCR); -#else /* HAVE_ATOMIC_BUILTINS */ - info->pad += ZIP_PAD_INCR; -#endif /* HAVE_ATOMIC_BUILTINS */ - - MONITOR_INC(MONITOR_PAD_INCREMENTS); - } - - info->n_rounds = 0; - - } else { - /* Failure rate was OK. Another successful round - completed. */ - ++info->n_rounds; - - /* If enough successful rounds are completed with - compression failure rate in control, decrease the - padding. */ - if (info->n_rounds >= ZIP_PAD_SUCCESSFUL_ROUND_LIMIT - && info->pad > 0) { - - ut_ad(info->pad % ZIP_PAD_INCR == 0); -#ifdef HAVE_ATOMIC_BUILTINS - /* Use atomics even though we have the mutex. - This is to ensure that we are able to read - info->pad atomically where atomics are - supported. */ - os_atomic_decrement_ulint(&info->pad, ZIP_PAD_INCR); -#else /* HAVE_ATOMIC_BUILTINS */ - info->pad -= ZIP_PAD_INCR; -#endif /* HAVE_ATOMIC_BUILTINS */ - - info->n_rounds = 0; - - MONITOR_INC(MONITOR_PAD_DECREMENTS); - } - } -} - -/*********************************************************************//** -This function should be called whenever a page is successfully -compressed. Updates the compression padding information. */ -UNIV_INTERN -void -dict_index_zip_success( -/*===================*/ - dict_index_t* index) /*!< in/out: index to be updated. */ -{ - ut_ad(index); - - ulint zip_threshold = zip_failure_threshold_pct; - if (!zip_threshold) { - /* Disabled by user. */ - return; - } - - dict_index_zip_pad_lock(index); - ++index->zip_pad.success; - dict_index_zip_pad_update(&index->zip_pad, zip_threshold); - dict_index_zip_pad_unlock(index); -} - -/*********************************************************************//** -This function should be called whenever a page compression attempt -fails. Updates the compression padding information. */ -UNIV_INTERN -void -dict_index_zip_failure( -/*===================*/ - dict_index_t* index) /*!< in/out: index to be updated. */ -{ - ut_ad(index); - - ulint zip_threshold = zip_failure_threshold_pct; - if (!zip_threshold) { - /* Disabled by user. */ - return; - } - - dict_index_zip_pad_lock(index); - ++index->zip_pad.failure; - dict_index_zip_pad_update(&index->zip_pad, zip_threshold); - dict_index_zip_pad_unlock(index); -} - - -/*********************************************************************//** -Return the optimal page size, for which page will likely compress. -@return page size beyond which page might not compress */ -UNIV_INTERN -ulint -dict_index_zip_pad_optimal_page_size( -/*=================================*/ - dict_index_t* index) /*!< in: index for which page size - is requested */ -{ - ulint pad; - ulint min_sz; - ulint sz; - - ut_ad(index); - - if (!zip_failure_threshold_pct) { - /* Disabled by user. */ - return(UNIV_PAGE_SIZE); - } - - /* We use atomics to read index->zip_pad.pad. Here we use zero - as increment as are not changing the value of the 'pad'. On - platforms where atomics are not available we grab the mutex. */ - -#ifdef HAVE_ATOMIC_BUILTINS - pad = os_atomic_increment_ulint(&index->zip_pad.pad, 0); -#else /* HAVE_ATOMIC_BUILTINS */ - dict_index_zip_pad_lock(index); - pad = index->zip_pad.pad; - dict_index_zip_pad_unlock(index); -#endif /* HAVE_ATOMIC_BUILTINS */ - - ut_ad(pad < UNIV_PAGE_SIZE); - sz = UNIV_PAGE_SIZE - pad; - - /* Min size allowed by user. */ - ut_ad(zip_pad_max < 100); - min_sz = (UNIV_PAGE_SIZE * (100 - zip_pad_max)) / 100; - - return(ut_max(sz, min_sz)); -} - -/*************************************************************//** -Convert table flag to row format string. -@return row format name. */ -UNIV_INTERN -const char* -dict_tf_to_row_format_string( -/*=========================*/ - ulint table_flag) /*!< in: row format setting */ -{ - switch (dict_tf_get_rec_format(table_flag)) { - case REC_FORMAT_REDUNDANT: - return("ROW_TYPE_REDUNDANT"); - case REC_FORMAT_COMPACT: - return("ROW_TYPE_COMPACT"); - case REC_FORMAT_COMPRESSED: - return("ROW_TYPE_COMPRESSED"); - case REC_FORMAT_DYNAMIC: - return("ROW_TYPE_DYNAMIC"); - } - - ut_error; - return(0); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc deleted file mode 100644 index 4c3dd47761f..00000000000 --- a/storage/xtradb/dict/dict0load.cc +++ /dev/null @@ -1,3275 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file dict/dict0load.cc -Loads to the memory cache database object definitions -from dictionary tables - -Created 4/24/1996 Heikki Tuuri -*******************************************************/ - -#include "dict0load.h" -#include "mysql_version.h" - -#ifdef UNIV_NONINL -#include "dict0load.ic" -#endif - -#include "btr0pcur.h" -#include "btr0btr.h" -#include "page0page.h" -#include "mach0data.h" -#include "dict0dict.h" -#include "dict0boot.h" -#include "dict0stats.h" -#include "rem0cmp.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "dict0crea.h" -#include "dict0priv.h" -#include "ha_prototypes.h" /* innobase_casedn_str() */ -#include "fts0priv.h" - -/** Following are the InnoDB system tables. The positions in -this array are referenced by enum dict_system_table_id. */ -static const char* SYSTEM_TABLE_NAME[] = { - "SYS_TABLES", - "SYS_INDEXES", - "SYS_COLUMNS", - "SYS_FIELDS", - "SYS_FOREIGN", - "SYS_FOREIGN_COLS", - "SYS_TABLESPACES", - "SYS_DATAFILES" -}; - -/* If this flag is TRUE, then we will load the cluster index's (and tables') -metadata even if it is marked as "corrupted". */ -UNIV_INTERN my_bool srv_load_corrupted = FALSE; - -#ifdef UNIV_DEBUG -/****************************************************************//** -Compare the name of an index column. -@return TRUE if the i'th column of index is 'name'. */ -static -ibool -name_of_col_is( -/*===========*/ - const dict_table_t* table, /*!< in: table */ - const dict_index_t* index, /*!< in: index */ - ulint i, /*!< in: index field offset */ - const char* name) /*!< in: name to compare to */ -{ - ulint tmp = dict_col_get_no(dict_field_get_col( - dict_index_get_nth_field( - index, i))); - - return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0); -} -#endif /* UNIV_DEBUG */ - -/********************************************************************//** -Finds the first table name in the given database. -@return own: table name, NULL if does not exist; the caller must free -the memory in the string! */ -UNIV_INTERN -char* -dict_get_first_table_name_in_db( -/*============================*/ - const char* name) /*!< in: database name which ends in '/' */ -{ - dict_table_t* sys_tables; - btr_pcur_t pcur; - dict_index_t* sys_index; - dtuple_t* tuple; - mem_heap_t* heap; - dfield_t* dfield; - const rec_t* rec; - const byte* field; - ulint len; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - heap = mem_heap_create(1000); - - mtr_start(&mtr); - - sys_tables = dict_table_get_low("SYS_TABLES"); - sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); - ut_ad(!dict_table_is_comp(sys_tables)); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, name, ut_strlen(name)); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); -loop: - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur)) { - /* Not found */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - - if (len < strlen(name) - || ut_memcmp(name, field, strlen(name)) != 0) { - /* Not found */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); - } - - if (!rec_get_deleted_flag(rec, 0)) { - - /* We found one */ - - char* table_name = mem_strdupl((char*) field, len); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(table_name); - } - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - goto loop; -} - -/********************************************************************//** -Prints to the standard output information on all tables found in the data -dictionary system table. */ -UNIV_INTERN -void -dict_print(void) -/*============*/ -{ - dict_table_t* table; - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - - /* Enlarge the fatal semaphore wait timeout during the InnoDB table - monitor printout */ - - os_increment_counter_by_amount( - server_mutex, - srv_fatal_semaphore_wait_threshold, - SRV_SEMAPHORE_WAIT_EXTENSION); - - heap = mem_heap_create(1000); - mutex_enter(&(dict_sys->mutex)); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES); - - while (rec) { - const char* err_msg; - - err_msg = static_cast<const char*>( - dict_process_sys_tables_rec_and_mtr_commit( - heap, rec, &table, DICT_TABLE_LOAD_FROM_CACHE, - &mtr)); - - if (!err_msg) { - dict_table_print(table); - } else { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: %s\n", err_msg); - } - - mem_heap_empty(heap); - - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&(dict_sys->mutex)); - mem_heap_free(heap); - - /* Restore the fatal semaphore wait timeout */ - os_decrement_counter_by_amount( - server_mutex, - srv_fatal_semaphore_wait_threshold, - SRV_SEMAPHORE_WAIT_EXTENSION); -} - -/********************************************************************//** -This function gets the next system table record as it scans the table. -@return the next record if found, NULL if end of scan */ -static -const rec_t* -dict_getnext_system_low( -/*====================*/ - btr_pcur_t* pcur, /*!< in/out: persistent cursor to the - record*/ - mtr_t* mtr) /*!< in: the mini-transaction */ -{ - rec_t* rec = NULL; - - while (!rec || rec_get_deleted_flag(rec, 0)) { - btr_pcur_move_to_next_user_rec(pcur, mtr); - - rec = btr_pcur_get_rec(pcur); - - if (!btr_pcur_is_on_user_rec(pcur)) { - /* end of index */ - btr_pcur_close(pcur); - - return(NULL); - } - } - - /* Get a record, let's save the position */ - btr_pcur_store_position(pcur, mtr); - - return(rec); -} - -/********************************************************************//** -This function opens a system table, and returns the first record. -@return first record of the system table */ -UNIV_INTERN -const rec_t* -dict_startscan_system( -/*==================*/ - btr_pcur_t* pcur, /*!< out: persistent cursor to - the record */ - mtr_t* mtr, /*!< in: the mini-transaction */ - dict_system_id_t system_id) /*!< in: which system table to open */ -{ - dict_table_t* system_table; - dict_index_t* clust_index; - const rec_t* rec; - - ut_a(system_id < SYS_NUM_SYSTEM_TABLES); - - system_table = dict_table_get_low(SYSTEM_TABLE_NAME[system_id]); - - clust_index = UT_LIST_GET_FIRST(system_table->indexes); - - btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, pcur, - true, 0, mtr); - - rec = dict_getnext_system_low(pcur, mtr); - - return(rec); -} - -/********************************************************************//** -This function gets the next system table record as it scans the table. -@return the next record if found, NULL if end of scan */ -UNIV_INTERN -const rec_t* -dict_getnext_system( -/*================*/ - btr_pcur_t* pcur, /*!< in/out: persistent cursor - to the record */ - mtr_t* mtr) /*!< in: the mini-transaction */ -{ - const rec_t* rec; - - /* Restore the position */ - btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); - - /* Get the next record */ - rec = dict_getnext_system_low(pcur, mtr); - - return(rec); -} - -/********************************************************************//** -This function processes one SYS_TABLES record and populate the dict_table_t -struct for the table. Extracted out of dict_print() to be used by -both monitor table output and information schema innodb_sys_tables output. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_tables_rec_and_mtr_commit( -/*=======================================*/ - mem_heap_t* heap, /*!< in/out: temporary memory heap */ - const rec_t* rec, /*!< in: SYS_TABLES record */ - dict_table_t** table, /*!< out: dict_table_t to fill */ - dict_table_info_t status, /*!< in: status bit controls - options such as whether we shall - look for dict_table_t from cache - first */ - mtr_t* mtr) /*!< in/out: mini-transaction, - will be committed */ -{ - ulint len; - const char* field; - const char* err_msg = NULL; - char* table_name; - - field = (const char*) rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - - ut_a(!rec_get_deleted_flag(rec, 0)); - - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); - - /* Get the table name */ - table_name = mem_heap_strdupl(heap, field, len); - - /* If DICT_TABLE_LOAD_FROM_CACHE is set, first check - whether there is cached dict_table_t struct */ - if (status & DICT_TABLE_LOAD_FROM_CACHE) { - - /* Commit before load the table again */ - mtr_commit(mtr); - - *table = dict_table_get_low(table_name); - - if (!(*table)) { - err_msg = "Table not found in cache"; - } - } else { - err_msg = dict_load_table_low(table_name, rec, table); - mtr_commit(mtr); - } - - if (err_msg) { - return(err_msg); - } - - return(NULL); -} - -/********************************************************************//** -This function parses a SYS_INDEXES record and populate a dict_index_t -structure with the information from the record. For detail information -about SYS_INDEXES fields, please refer to dict_boot() function. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_indexes_rec( -/*=========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_INDEXES rec */ - dict_index_t* index, /*!< out: index to be filled */ - table_id_t* table_id) /*!< out: index table id */ -{ - const char* err_msg; - byte* buf; - - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - - /* Parse the record, and get "dict_index_t" struct filled */ - err_msg = dict_load_index_low(buf, NULL, - heap, rec, FALSE, &index); - - *table_id = mach_read_from_8(buf); - - return(err_msg); -} - -/********************************************************************//** -This function parses a SYS_COLUMNS record and populate a dict_column_t -structure with the information from the record. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_columns_rec( -/*=========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_COLUMNS rec */ - dict_col_t* column, /*!< out: dict_col_t to be filled */ - table_id_t* table_id, /*!< out: table id */ - const char** col_name) /*!< out: column name */ -{ - const char* err_msg; - - /* Parse the record, and get "dict_col_t" struct filled */ - err_msg = dict_load_column_low(NULL, heap, column, - table_id, col_name, rec); - - return(err_msg); -} - -/********************************************************************//** -This function parses a SYS_FIELDS record and populates a dict_field_t -structure with the information from the record. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_fields_rec( -/*========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_FIELDS rec */ - dict_field_t* sys_field, /*!< out: dict_field_t to be - filled */ - ulint* pos, /*!< out: Field position */ - index_id_t* index_id, /*!< out: current index id */ - index_id_t last_id) /*!< in: previous index id */ -{ - byte* buf; - byte* last_index_id; - const char* err_msg; - - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - - last_index_id = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(last_index_id, last_id); - - err_msg = dict_load_field_low(buf, NULL, sys_field, - pos, last_index_id, heap, rec); - - *index_id = mach_read_from_8(buf); - - return(err_msg); - -} - -/********************************************************************//** -This function parses a SYS_FOREIGN record and populate a dict_foreign_t -structure with the information from the record. For detail information -about SYS_FOREIGN fields, please refer to dict_load_foreign() function. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_foreign_rec( -/*=========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_FOREIGN rec */ - dict_foreign_t* foreign) /*!< out: dict_foreign_t struct - to be filled */ -{ - ulint len; - const byte* field; - ulint n_fields_and_type; - - if (rec_get_deleted_flag(rec, 0)) { - return("delete-marked record in SYS_FOREIGN"); - } - - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FOREIGN) { - return("wrong number of columns in SYS_FOREIGN record"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN__ID, &len); - if (len == 0 || len == UNIV_SQL_NULL) { -err_len: - return("incorrect column length in SYS_FOREIGN"); - } - - /* This receives a dict_foreign_t* that points to a stack variable. - So dict_foreign_free(foreign) is not used as elsewhere. - Since the heap used here is freed elsewhere, foreign->heap - is not assigned. */ - foreign->id = mem_heap_strdupl(heap, (const char*) field, len); - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_FOREIGN__DB_TRX_ID, &len); - if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR, &len); - if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - /* The _lookup versions of the referenced and foreign table names - are not assigned since they are not used in this dict_foreign_t */ - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len); - if (len == 0 || len == UNIV_SQL_NULL) { - goto err_len; - } - foreign->foreign_table_name = mem_heap_strdupl( - heap, (const char*) field, len); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len); - if (len == 0 || len == UNIV_SQL_NULL) { - goto err_len; - } - foreign->referenced_table_name = mem_heap_strdupl( - heap, (const char*) field, len); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN__N_COLS, &len); - if (len != 4) { - goto err_len; - } - n_fields_and_type = mach_read_from_4(field); - - foreign->type = (unsigned int) (n_fields_and_type >> 24); - foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL); - - return(NULL); -} - -/********************************************************************//** -This function parses a SYS_FOREIGN_COLS record and extract necessary -information from the record and return to caller. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_foreign_col_rec( -/*=============================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_FOREIGN_COLS rec */ - const char** name, /*!< out: foreign key constraint name */ - const char** for_col_name, /*!< out: referencing column name */ - const char** ref_col_name, /*!< out: referenced column name - in referenced table */ - ulint* pos) /*!< out: column position */ -{ - ulint len; - const byte* field; - - if (rec_get_deleted_flag(rec, 0)) { - return("delete-marked record in SYS_FOREIGN_COLS"); - } - - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FOREIGN_COLS) { - return("wrong number of columns in SYS_FOREIGN_COLS record"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len); - if (len == 0 || len == UNIV_SQL_NULL) { -err_len: - return("incorrect column length in SYS_FOREIGN_COLS"); - } - *name = mem_heap_strdupl(heap, (char*) field, len); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len); - if (len != 4) { - goto err_len; - } - *pos = mach_read_from_4(field); - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID, &len); - if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR, &len); - if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len); - if (len == 0 || len == UNIV_SQL_NULL) { - goto err_len; - } - *for_col_name = mem_heap_strdupl(heap, (char*) field, len); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len); - if (len == 0 || len == UNIV_SQL_NULL) { - goto err_len; - } - *ref_col_name = mem_heap_strdupl(heap, (char*) field, len); - - return(NULL); -} - -/********************************************************************//** -This function parses a SYS_TABLESPACES record, extracts necessary -information from the record and returns to caller. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_tablespaces( -/*=========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */ - ulint* space, /*!< out: space id */ - const char** name, /*!< out: tablespace name */ - ulint* flags) /*!< out: tablespace flags */ -{ - ulint len; - const byte* field; - - /* Initialize the output values */ - *space = ULINT_UNDEFINED; - *name = NULL; - *flags = ULINT_UNDEFINED; - - if (rec_get_deleted_flag(rec, 0)) { - return("delete-marked record in SYS_TABLESPACES"); - } - - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLESPACES) { - return("wrong number of columns in SYS_TABLESPACES record"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len); - if (len != DICT_FLD_LEN_SPACE) { -err_len: - return("incorrect column length in SYS_TABLESPACES"); - } - *space = mach_read_from_4(field); - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_TABLESPACES__DB_TRX_ID, &len); - if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR, &len); - if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLESPACES__NAME, &len); - if (len == 0 || len == UNIV_SQL_NULL) { - goto err_len; - } - *name = mem_heap_strdupl(heap, (char*) field, len); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len); - if (len != DICT_FLD_LEN_FLAGS) { - goto err_len; - } - *flags = mach_read_from_4(field); - - return(NULL); -} - -/********************************************************************//** -This function parses a SYS_DATAFILES record, extracts necessary -information from the record and returns it to the caller. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_datafiles( -/*=======================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_DATAFILES rec */ - ulint* space, /*!< out: space id */ - const char** path) /*!< out: datafile paths */ -{ - ulint len; - const byte* field; - - if (rec_get_deleted_flag(rec, 0)) { - return("delete-marked record in SYS_DATAFILES"); - } - - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_DATAFILES) { - return("wrong number of columns in SYS_DATAFILES record"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_DATAFILES__SPACE, &len); - if (len != DICT_FLD_LEN_SPACE) { -err_len: - return("incorrect column length in SYS_DATAFILES"); - } - *space = mach_read_from_4(field); - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_DATAFILES__DB_TRX_ID, &len); - if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR, &len); - if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_DATAFILES__PATH, &len); - if (len == 0 || len == UNIV_SQL_NULL) { - goto err_len; - } - *path = mem_heap_strdupl(heap, (char*) field, len); - - return(NULL); -} - -/********************************************************************//** -Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS. -@return ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */ -static -ulint -dict_sys_tables_get_flags( -/*======================*/ - const rec_t* rec) /*!< in: a record of SYS_TABLES */ -{ - const byte* field; - ulint len; - ulint type; - ulint n_cols; - - /* read the 4 byte flags from the TYPE field */ - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__TYPE, &len); - ut_a(len == 4); - type = mach_read_from_4(field); - - /* The low order bit of SYS_TABLES.TYPE is always set to 1. But in - dict_table_t::flags the low order bit is used to determine if the - row format is Redundant or Compact when the format is Antelope. - Read the 4 byte N_COLS field and look at the high order bit. It - should be set for COMPACT and later. It should not be set for - REDUNDANT. */ - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__N_COLS, &len); - ut_a(len == 4); - n_cols = mach_read_from_4(field); - - /* This validation function also combines the DICT_N_COLS_COMPACT - flag in n_cols into the type field to effectively make it a - dict_table_t::flags. */ - - if (ULINT_UNDEFINED == dict_sys_tables_type_validate(type, n_cols)) { - return(ULINT_UNDEFINED); - } - - return(dict_sys_tables_type_to_tf(type, n_cols)); -} - -/********************************************************************//** -Gets the filepath for a spaceid from SYS_DATAFILES and checks it against -the contents of a link file. This function is called when there is no -fil_node_t entry for this space ID so both durable locations on disk -must be checked and compared. -We use a temporary heap here for the table lookup, but not for the path -returned which the caller must free. -This function can return NULL if the space ID is not found in SYS_DATAFILES, -then the caller will assume that the ibd file is in the normal datadir. -@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for -the given space ID. NULL if space ID is zero or not found. */ -UNIV_INTERN -char* -dict_get_first_path( -/*================*/ - ulint space, /*!< in: space id */ - const char* name) /*!< in: tablespace name */ -{ - mtr_t mtr; - dict_table_t* sys_datafiles; - dict_index_t* sys_index; - dtuple_t* tuple; - dfield_t* dfield; - byte* buf; - btr_pcur_t pcur; - const rec_t* rec; - const byte* field; - ulint len; - char* dict_filepath = NULL; - mem_heap_t* heap = mem_heap_create(1024); - - ut_ad(mutex_own(&(dict_sys->mutex))); - - mtr_start(&mtr); - - sys_datafiles = dict_table_get_low("SYS_DATAFILES"); - sys_index = UT_LIST_GET_FIRST(sys_datafiles->indexes); - ut_ad(!dict_table_is_comp(sys_datafiles)); - ut_ad(name_of_col_is(sys_datafiles, sys_index, - DICT_FLD__SYS_DATAFILES__SPACE, "SPACE")); - ut_ad(name_of_col_is(sys_datafiles, sys_index, - DICT_FLD__SYS_DATAFILES__PATH, "PATH")); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_DATAFILES__SPACE); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); - mach_write_to_4(buf, space); - - dfield_set_data(dfield, buf, 4); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - - rec = btr_pcur_get_rec(&pcur); - - /* If the file-per-table tablespace was created with - an earlier version of InnoDB, then this record is not - in SYS_DATAFILES. But a link file still might exist. */ - - if (btr_pcur_is_on_user_rec(&pcur)) { - /* A record for this space ID was found. */ - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_DATAFILES__PATH, &len); - ut_a(len > 0 || len == UNIV_SQL_NULL); - ut_a(len < OS_FILE_MAX_PATH); - dict_filepath = mem_strdupl((char*) field, len); - ut_a(dict_filepath); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(dict_filepath); -} - -/********************************************************************//** -Update the record for space_id in SYS_TABLESPACES to this filepath. -@return DB_SUCCESS if OK, dberr_t if the insert failed */ -UNIV_INTERN -dberr_t -dict_update_filepath( -/*=================*/ - ulint space_id, /*!< in: space id */ - const char* filepath) /*!< in: filepath */ -{ - dberr_t err = DB_SUCCESS; - trx_t* trx; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&(dict_sys->mutex))); - - trx = trx_allocate_for_background(); - trx->op_info = "update filepath"; - trx->dict_operation_lock_mode = RW_X_LATCH; - trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); - - pars_info_t* info = pars_info_create(); - - pars_info_add_int4_literal(info, "space", space_id); - pars_info_add_str_literal(info, "path", filepath); - - err = que_eval_sql(info, - "PROCEDURE UPDATE_FILEPATH () IS\n" - "BEGIN\n" - "UPDATE SYS_DATAFILES" - " SET PATH = :path\n" - " WHERE SPACE = :space;\n" - "END;\n", FALSE, trx); - - trx_commit_for_mysql(trx); - trx->dict_operation_lock_mode = 0; - trx_free_for_background(trx); - - if (err == DB_SUCCESS) { - /* We just updated SYS_DATAFILES due to the contents in - a link file. Make a note that we did this. */ - ib_logf(IB_LOG_LEVEL_INFO, - "The InnoDB data dictionary table SYS_DATAFILES " - "for tablespace ID %lu was updated to use file %s.", - (ulong) space_id, filepath); - } else { - ib_logf(IB_LOG_LEVEL_WARN, - "Problem updating InnoDB data dictionary table " - "SYS_DATAFILES for tablespace ID %lu to file %s.", - (ulong) space_id, filepath); - } - - return(err); -} - -/********************************************************************//** -Insert records into SYS_TABLESPACES and SYS_DATAFILES. -@return DB_SUCCESS if OK, dberr_t if the insert failed */ -UNIV_INTERN -dberr_t -dict_insert_tablespace_and_filepath( -/*================================*/ - ulint space, /*!< in: space id */ - const char* name, /*!< in: talespace name */ - const char* filepath, /*!< in: filepath */ - ulint fsp_flags) /*!< in: tablespace flags */ -{ - dberr_t err = DB_SUCCESS; - trx_t* trx; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(filepath); - - trx = trx_allocate_for_background(); - trx->op_info = "insert tablespace and filepath"; - trx->dict_operation_lock_mode = RW_X_LATCH; - trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); - - /* A record for this space ID was not found in - SYS_DATAFILES. Assume the record is also missing in - SYS_TABLESPACES. Insert records onto them both. */ - err = dict_create_add_tablespace_to_dictionary( - space, name, fsp_flags, filepath, trx, false); - - trx_commit_for_mysql(trx); - trx->dict_operation_lock_mode = 0; - trx_free_for_background(trx); - - return(err); -} - -/* Set by Xtrabackup */ -my_bool (*dict_check_if_skip_table)(const char* name) = 0; - - -/********************************************************************//** -This function looks at each table defined in SYS_TABLES. It checks the -tablespace for any table with a space_id > 0. It looks up the tablespace -in SYS_DATAFILES to ensure the correct path. - -In a crash recovery we already have all the tablespace objects created. -This function compares the space id information in the InnoDB data dictionary -to what we already read with fil_load_single_table_tablespaces(). - -In a normal startup, we create the tablespace objects for every table in -InnoDB's data dictionary, if the corresponding .ibd file exists. -We also scan the biggest space id, and store it to fil_system. */ -UNIV_INTERN -void -dict_check_tablespaces_and_store_max_id( -/*====================================*/ - dict_check_t dict_check) /*!< in: how to check */ -{ - dict_table_t* sys_tables; - dict_index_t* sys_index; - btr_pcur_t pcur; - const rec_t* rec; - ulint max_space_id; - mtr_t mtr; - - rw_lock_x_lock(&dict_operation_lock); - mutex_enter(&(dict_sys->mutex)); - - mtr_start(&mtr); - - sys_tables = dict_table_get_low("SYS_TABLES"); - sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); - ut_ad(!dict_table_is_comp(sys_tables)); - - max_space_id = mtr_read_ulint(dict_hdr_get(&mtr) - + DICT_HDR_MAX_SPACE_ID, - MLOG_4BYTES, &mtr); - fil_set_max_space_id_if_bigger(max_space_id); - - btr_pcur_open_at_index_side(true, sys_index, BTR_SEARCH_LEAF, &pcur, - true, 0, &mtr); -loop: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur)) { - /* end of index */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - /* We must make the tablespace cache aware of the biggest - known space id */ - - /* printf("Biggest space id in data dictionary %lu\n", - max_space_id); */ - fil_set_max_space_id_if_bigger(max_space_id); - - mutex_exit(&(dict_sys->mutex)); - rw_lock_x_unlock(&dict_operation_lock); - - return; - } - - if (!rec_get_deleted_flag(rec, 0)) { - - /* We found one */ - const byte* field; - ulint len; - ulint space_id; - ulint flags; - char* name; - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - - name = mem_strdupl((char*) field, len); - - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), name, FALSE); - - flags = dict_sys_tables_get_flags(rec); - if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) { - /* Read again the 4 bytes from rec. */ - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__TYPE, &len); - ut_ad(len == 4); /* this was checked earlier */ - flags = mach_read_from_4(field); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Table '%s' in InnoDB data dictionary" - " has unknown type %lx", table_name, flags); - mem_free(name); - goto loop; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__SPACE, &len); - ut_a(len == 4); - - space_id = mach_read_from_4(field); - - btr_pcur_store_position(&pcur, &mtr); - - /* For tables created with old versions of InnoDB, - SYS_TABLES.MIX_LEN may contain garbage. Such tables - would always be in ROW_FORMAT=REDUNDANT. Pretend that - all such tables are non-temporary. That is, do not - suppress error printouts about temporary or discarded - tablespaces not being found. */ - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len); - - bool is_temp = false; - bool discarded = false; - bool print_error_if_does_not_exist; - bool remove_from_data_dict_if_does_not_exist; - - ib_uint32_t flags2 = static_cast<ib_uint32_t>( - mach_read_from_4(field)); - - /* Check that the tablespace (the .ibd file) really - exists; print a warning to the .err log if not. - Do not print warnings for temporary tables or for - tablespaces that have been discarded. */ - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__N_COLS, &len); - - /* MIX_LEN valid only for ROW_FORMAT > REDUNDANT. */ - if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) { - - is_temp = !!(flags2 & DICT_TF2_TEMPORARY); - discarded = !!(flags2 & DICT_TF2_DISCARDED); - } - - if (space_id == 0) { - /* The system tablespace always exists. */ - ut_ad(!discarded); - mem_free(name); - goto loop; - } - - - ut_a(!IS_XTRABACKUP() || dict_check_if_skip_table); - - if (is_temp || discarded || - (IS_XTRABACKUP() && dict_check_if_skip_table(name))) { - print_error_if_does_not_exist = false; - } - else { - print_error_if_does_not_exist = true; - } - - remove_from_data_dict_if_does_not_exist = IS_XTRABACKUP() && !(is_temp || discarded); - - mtr_commit(&mtr); - - switch (dict_check) { - case DICT_CHECK_ALL_LOADED: - /* All tablespaces should have been found in - fil_load_single_table_tablespaces(). */ - if (fil_space_for_table_exists_in_mem( - space_id, name, print_error_if_does_not_exist, - remove_from_data_dict_if_does_not_exist , false, NULL, 0, flags) - && !(is_temp || discarded)) { - /* If user changes the path of .ibd files in - *.isl files before doing crash recovery , - then this leads to inconsistency in - SYS_DATAFILES system table because the - tables are loaded from the updated path - but the SYS_DATAFILES still points to the - old path.Therefore after crash recovery - update SYS_DATAFILES with the updated path.*/ - ut_ad(space_id); - ut_ad(recv_needed_recovery); - char *dict_path = dict_get_first_path(space_id, - name); - char *remote_path = fil_read_link_file(name); - if(dict_path && remote_path) { - if(strcmp(dict_path,remote_path)) { - dict_update_filepath(space_id, - remote_path); - } - } - if(dict_path) - mem_free(dict_path); - if(remote_path) - mem_free(remote_path); - } - break; - - case DICT_CHECK_SOME_LOADED: - /* Some tablespaces may have been opened in - trx_resurrect_table_locks(). */ - if (fil_space_for_table_exists_in_mem( - space_id, name, false, - false, false, NULL, 0, flags)) { - break; - } - /* fall through */ - case DICT_CHECK_NONE_LOADED: - if (discarded) { - ib_logf(IB_LOG_LEVEL_INFO, - "DISCARD flag set for table '%s'," - " ignored.", - table_name); - break; - } - - /* It is a normal database startup: create the - space object and check that the .ibd file exists. - If the table uses a remote tablespace, look for the - space_id in SYS_DATAFILES to find the filepath */ - - /* Use the remote filepath if known. */ - char* filepath = NULL; - if (DICT_TF_HAS_DATA_DIR(flags)) { - filepath = dict_get_first_path( - space_id, name); - } - - /* We could read page 0 to get (optional) IV - if encryption is turned on, if it's off - we will read the page 0 later and find out - if we should decrypt a potentially - already encrypted table - bool read_page_0 = srv_encrypt_tables; */ - - bool read_page_0 = false; - - /* We set the 2nd param (fix_dict = true) - here because we already have an x-lock on - dict_operation_lock and dict_sys->mutex. Besides, - this is at startup and we are now single threaded. - If the filepath is not known, it will need to - be discovered. */ - dberr_t err = fil_open_single_table_tablespace( - read_page_0, srv_read_only_mode ? false : true, - space_id, dict_tf_to_fsp_flags(flags), - name, filepath); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespace open failed for '%s', " - "ignored.", table_name); - } - - if (filepath) { - mem_free(filepath); - } - - break; - } - - if (space_id > max_space_id) { - max_space_id = space_id; - } - - mem_free(name); - mtr_start(&mtr); - - btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); - } - - goto loop; -} - -/********************************************************************//** -Loads a table column definition from a SYS_COLUMNS record to -dict_table_t. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_load_column_low( -/*=================*/ - dict_table_t* table, /*!< in/out: table, could be NULL - if we just populate a dict_column_t - struct with information from - a SYS_COLUMNS record */ - mem_heap_t* heap, /*!< in/out: memory heap - for temporary storage */ - dict_col_t* column, /*!< out: dict_column_t to fill, - or NULL if table != NULL */ - table_id_t* table_id, /*!< out: table id */ - const char** col_name, /*!< out: column name */ - const rec_t* rec) /*!< in: SYS_COLUMNS record */ -{ - char* name; - const byte* field; - ulint len; - ulint mtype; - ulint prtype; - ulint col_len; - ulint pos; - - ut_ad(table || column); - - if (rec_get_deleted_flag(rec, 0)) { - return("delete-marked record in SYS_COLUMNS"); - } - - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_COLUMNS) { - return("wrong number of columns in SYS_COLUMNS record"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len); - if (len != 8) { -err_len: - return("incorrect column length in SYS_COLUMNS"); - } - - if (table_id) { - *table_id = mach_read_from_8(field); - } else if (table->id != mach_read_from_8(field)) { - return("SYS_COLUMNS.TABLE_ID mismatch"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__POS, &len); - if (len != 4) { - - goto err_len; - } - - pos = mach_read_from_4(field); - - if (table && table->n_def != pos) { - return("SYS_COLUMNS.POS mismatch"); - } - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_COLUMNS__DB_TRX_ID, &len); - if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR, &len); - if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__NAME, &len); - if (len == 0 || len == UNIV_SQL_NULL) { - goto err_len; - } - - name = mem_heap_strdupl(heap, (const char*) field, len); - - if (col_name) { - *col_name = name; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__MTYPE, &len); - if (len != 4) { - goto err_len; - } - - mtype = mach_read_from_4(field); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__PRTYPE, &len); - if (len != 4) { - goto err_len; - } - prtype = mach_read_from_4(field); - - if (dtype_get_charset_coll(prtype) == 0 - && dtype_is_string_type(mtype)) { - /* The table was created with < 4.1.2. */ - - if (dtype_is_binary_string_type(mtype, prtype)) { - /* Use the binary collation for - string columns of binary type. */ - - prtype = dtype_form_prtype( - prtype, - DATA_MYSQL_BINARY_CHARSET_COLL); - } else { - /* Use the default charset for - other than binary columns. */ - - prtype = dtype_form_prtype( - prtype, - data_mysql_default_charset_coll); - } - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__LEN, &len); - if (len != 4) { - goto err_len; - } - col_len = mach_read_from_4(field); - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_COLUMNS__PREC, &len); - if (len != 4) { - goto err_len; - } - - if (!column) { - dict_mem_table_add_col(table, heap, name, mtype, - prtype, col_len); - } else { - dict_mem_fill_column_struct(column, pos, mtype, - prtype, col_len); - } - - return(NULL); -} - -/********************************************************************//** -Loads definitions for table columns. */ -static -void -dict_load_columns( -/*==============*/ - dict_table_t* table, /*!< in/out: table */ - mem_heap_t* heap) /*!< in/out: memory heap - for temporary storage */ -{ - dict_table_t* sys_columns; - dict_index_t* sys_index; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - const rec_t* rec; - byte* buf; - ulint i; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - mtr_start(&mtr); - - sys_columns = dict_table_get_low("SYS_COLUMNS"); - sys_index = UT_LIST_GET_FIRST(sys_columns->indexes); - ut_ad(!dict_table_is_comp(sys_columns)); - - ut_ad(name_of_col_is(sys_columns, sys_index, - DICT_FLD__SYS_COLUMNS__NAME, "NAME")); - ut_ad(name_of_col_is(sys_columns, sys_index, - DICT_FLD__SYS_COLUMNS__PREC, "PREC")); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) { - const char* err_msg; - const char* name = NULL; - - rec = btr_pcur_get_rec(&pcur); - - ut_a(btr_pcur_is_on_user_rec(&pcur)); - - err_msg = dict_load_column_low(table, heap, NULL, NULL, - &name, rec); - - if (err_msg) { - fprintf(stderr, "InnoDB: %s\n", err_msg); - ut_error; - } - - /* Note: Currently we have one DOC_ID column that is - shared by all FTS indexes on a table. */ - if (innobase_strcasecmp(name, - FTS_DOC_ID_COL_NAME) == 0) { - dict_col_t* col; - /* As part of normal loading of tables the - table->flag is not set for tables with FTS - till after the FTS indexes are loaded. So we - create the fts_t instance here if there isn't - one already created. - - This case does not arise for table create as - the flag is set before the table is created. */ - if (table->fts == NULL) { - table->fts = fts_create(table); - fts_optimize_add_table(table); - } - - ut_a(table->fts->doc_col == ULINT_UNDEFINED); - - col = dict_table_get_nth_col(table, i); - - ut_ad(col->len == sizeof(doc_id_t)); - - if (col->prtype & DATA_FTS_DOC_ID) { - DICT_TF2_FLAG_SET( - table, DICT_TF2_FTS_HAS_DOC_ID); - DICT_TF2_FLAG_UNSET( - table, DICT_TF2_FTS_ADD_DOC_ID); - } - - table->fts->doc_col = i; - } - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); -} - -/** Error message for a delete-marked record in dict_load_field_low() */ -static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS"; - -/********************************************************************//** -Loads an index field definition from a SYS_FIELDS record to -dict_index_t. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_load_field_low( -/*================*/ - byte* index_id, /*!< in/out: index id (8 bytes) - an "in" value if index != NULL - and "out" if index == NULL */ - dict_index_t* index, /*!< in/out: index, could be NULL - if we just populate a dict_field_t - struct with information from - a SYS_FIELDS record */ - dict_field_t* sys_field, /*!< out: dict_field_t to be - filled */ - ulint* pos, /*!< out: Field position */ - byte* last_index_id, /*!< in: last index id */ - mem_heap_t* heap, /*!< in/out: memory heap - for temporary storage */ - const rec_t* rec) /*!< in: SYS_FIELDS record */ -{ - const byte* field; - ulint len; - ulint pos_and_prefix_len; - ulint prefix_len; - ibool first_field; - ulint position; - - /* Either index or sys_field is supplied, not both */ - ut_a((!index) || (!sys_field)); - - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_field_del); - } - - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FIELDS) { - return("wrong number of columns in SYS_FIELDS record"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FIELDS__INDEX_ID, &len); - if (len != 8) { -err_len: - return("incorrect column length in SYS_FIELDS"); - } - - if (!index) { - ut_a(last_index_id); - memcpy(index_id, (const char*) field, 8); - first_field = memcmp(index_id, last_index_id, 8); - } else { - first_field = (index->n_def == 0); - if (memcmp(field, index_id, 8)) { - return("SYS_FIELDS.INDEX_ID mismatch"); - } - } - - /* The next field stores the field position in the index and a - possible column prefix length if the index field does not - contain the whole column. The storage format is like this: if - there is at least one prefix field in the index, then the HIGH - 2 bytes contain the field number (index->n_def) and the low 2 - bytes the prefix length for the field. Otherwise the field - number (index->n_def) is contained in the 2 LOW bytes. */ - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FIELDS__POS, &len); - if (len != 4) { - goto err_len; - } - - pos_and_prefix_len = mach_read_from_4(field); - - if (index && UNIV_UNLIKELY - ((pos_and_prefix_len & 0xFFFFUL) != index->n_def - && (pos_and_prefix_len >> 16 & 0xFFFF) != index->n_def)) { - return("SYS_FIELDS.POS mismatch"); - } - - if (first_field || pos_and_prefix_len > 0xFFFFUL) { - prefix_len = pos_and_prefix_len & 0xFFFFUL; - position = (pos_and_prefix_len & 0xFFFF0000UL) >> 16; - } else { - prefix_len = 0; - position = pos_and_prefix_len & 0xFFFFUL; - } - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_FIELDS__DB_TRX_ID, &len); - if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_FIELDS__DB_ROLL_PTR, &len); - if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len); - if (len == 0 || len == UNIV_SQL_NULL) { - goto err_len; - } - - if (index) { - dict_mem_index_add_field( - index, mem_heap_strdupl(heap, (const char*) field, len), - prefix_len); - } else { - ut_a(sys_field); - ut_a(pos); - - sys_field->name = mem_heap_strdupl( - heap, (const char*) field, len); - sys_field->prefix_len = prefix_len; - *pos = position; - } - - return(NULL); -} - -/********************************************************************//** -Loads definitions for index fields. -@return DB_SUCCESS if ok, DB_CORRUPTION if corruption */ -static -ulint -dict_load_fields( -/*=============*/ - dict_index_t* index, /*!< in/out: index whose fields to load */ - mem_heap_t* heap) /*!< in: memory heap for temporary storage */ -{ - dict_table_t* sys_fields; - dict_index_t* sys_index; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - const rec_t* rec; - byte* buf; - ulint i; - mtr_t mtr; - dberr_t error; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - mtr_start(&mtr); - - sys_fields = dict_table_get_low("SYS_FIELDS"); - sys_index = UT_LIST_GET_FIRST(sys_fields->indexes); - ut_ad(!dict_table_is_comp(sys_fields)); - ut_ad(name_of_col_is(sys_fields, sys_index, - DICT_FLD__SYS_FIELDS__COL_NAME, "COL_NAME")); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, index->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (i = 0; i < index->n_fields; i++) { - const char* err_msg; - - rec = btr_pcur_get_rec(&pcur); - - ut_a(btr_pcur_is_on_user_rec(&pcur)); - - err_msg = dict_load_field_low(buf, index, NULL, NULL, NULL, - heap, rec); - - if (err_msg == dict_load_field_del) { - /* There could be delete marked records in - SYS_FIELDS because SYS_FIELDS.INDEX_ID can be - updated by ALTER TABLE ADD INDEX. */ - - goto next_rec; - } else if (err_msg) { - fprintf(stderr, "InnoDB: %s\n", err_msg); - error = DB_CORRUPTION; - goto func_exit; - } -next_rec: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - error = DB_SUCCESS; -func_exit: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - return(error); -} - -/** Error message for a delete-marked record in dict_load_index_low() */ -static const char* dict_load_index_del = "delete-marked record in SYS_INDEXES"; -/** Error message for table->id mismatch in dict_load_index_low() */ -static const char* dict_load_index_id_err = "SYS_INDEXES.TABLE_ID mismatch"; - -/********************************************************************//** -Loads an index definition from a SYS_INDEXES record to dict_index_t. -If allocate=TRUE, we will create a dict_index_t structure and fill it -accordingly. If allocated=FALSE, the dict_index_t will be supplied by -the caller and filled with information read from the record. @return -error message, or NULL on success */ -UNIV_INTERN -const char* -dict_load_index_low( -/*================*/ - byte* table_id, /*!< in/out: table id (8 bytes), - an "in" value if allocate=TRUE - and "out" when allocate=FALSE */ - const char* table_name, /*!< in: table name */ - mem_heap_t* heap, /*!< in/out: temporary memory heap */ - const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool allocate, /*!< in: TRUE=allocate *index, - FALSE=fill in a pre-allocated - *index */ - dict_index_t** index) /*!< out,own: index, or NULL */ -{ - const byte* field; - ulint len; - ulint name_len; - char* name_buf; - index_id_t id; - ulint n_fields; - ulint type; - ulint space; - - if (allocate) { - /* If allocate=TRUE, no dict_index_t will - be supplied. Initialize "*index" to NULL */ - *index = NULL; - } - - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_index_del); - } - - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_INDEXES) { - return("wrong number of columns in SYS_INDEXES record"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len); - if (len != 8) { -err_len: - return("incorrect column length in SYS_INDEXES"); - } - - if (!allocate) { - /* We are reading a SYS_INDEXES record. Copy the table_id */ - memcpy(table_id, (const char*) field, 8); - } else if (memcmp(field, table_id, 8)) { - /* Caller supplied table_id, verify it is the same - id as on the index record */ - return(dict_load_index_id_err); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__ID, &len); - if (len != 8) { - goto err_len; - } - - id = mach_read_from_8(field); - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_INDEXES__DB_TRX_ID, &len); - if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_INDEXES__DB_ROLL_PTR, &len); - if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__NAME, &name_len); - if (name_len == UNIV_SQL_NULL) { - goto err_len; - } - - name_buf = mem_heap_strdupl(heap, (const char*) field, - name_len); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__N_FIELDS, &len); - if (len != 4) { - goto err_len; - } - n_fields = mach_read_from_4(field); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__TYPE, &len); - if (len != 4) { - goto err_len; - } - type = mach_read_from_4(field); - if (type & (~0U << DICT_IT_BITS)) { - return("unknown SYS_INDEXES.TYPE bits"); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__SPACE, &len); - if (len != 4) { - goto err_len; - } - space = mach_read_from_4(field); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len); - if (len != 4) { - goto err_len; - } - - if (allocate) { - *index = dict_mem_index_create(table_name, name_buf, - space, type, n_fields); - } else { - ut_a(*index); - - dict_mem_fill_index_struct(*index, NULL, NULL, name_buf, - space, type, n_fields); - } - - (*index)->id = id; - (*index)->page = mach_read_from_4(field); - btr_search_index_init(*index); - ut_ad((*index)->page); - - return(NULL); -} - -/********************************************************************//** -Loads definitions for table indexes. Adds them to the data dictionary -cache. -@return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary -table or DB_UNSUPPORTED if table has unknown index type */ -static MY_ATTRIBUTE((nonnull)) -dberr_t -dict_load_indexes( -/*==============*/ - dict_table_t* table, /*!< in/out: table */ - mem_heap_t* heap, /*!< in: memory heap for temporary storage */ - dict_err_ignore_t ignore_err) - /*!< in: error to be ignored when - loading the index definition */ -{ - dict_table_t* sys_indexes; - dict_index_t* sys_index; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - const rec_t* rec; - byte* buf; - mtr_t mtr; - dberr_t error = DB_SUCCESS; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - mtr_start(&mtr); - - sys_indexes = dict_table_get_low("SYS_INDEXES"); - sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes); - ut_ad(!dict_table_is_comp(sys_indexes)); - ut_ad(name_of_col_is(sys_indexes, sys_index, - DICT_FLD__SYS_INDEXES__NAME, "NAME")); - ut_ad(name_of_col_is(sys_indexes, sys_index, - DICT_FLD__SYS_INDEXES__PAGE_NO, "PAGE_NO")); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (;;) { - dict_index_t* index = NULL; - const char* err_msg; - - if (!btr_pcur_is_on_user_rec(&pcur)) { - - /* We should allow the table to open even - without index when DICT_ERR_IGNORE_CORRUPT is set. - DICT_ERR_IGNORE_CORRUPT is currently only set - for drop table */ - if (dict_table_get_first_index(table) == NULL - && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) { - ib_logf(IB_LOG_LEVEL_WARN, - "Cannot load table %s " - "because it has no indexes in " - "InnoDB internal data dictionary.", - table->name); - error = DB_CORRUPTION; - goto func_exit; - } - - break; - } - - rec = btr_pcur_get_rec(&pcur); - - if ((ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK) - && rec_get_n_fields_old(rec) - == DICT_NUM_FIELDS__SYS_INDEXES) { - const byte* field; - ulint len; - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__NAME, &len); - - if (len != UNIV_SQL_NULL - && char(*field) == char(TEMP_INDEX_PREFIX)) { - /* Skip indexes whose name starts with - TEMP_INDEX_PREFIX, because they will - be dropped during crash recovery. */ - goto next_rec; - } - } - - err_msg = dict_load_index_low(buf, table->name, heap, rec, - TRUE, &index); - ut_ad((index == NULL && err_msg != NULL) - || (index != NULL && err_msg == NULL)); - - if (err_msg == dict_load_index_id_err) { - /* TABLE_ID mismatch means that we have - run out of index definitions for the table. */ - - if (dict_table_get_first_index(table) == NULL - && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) { - ib_logf(IB_LOG_LEVEL_WARN, - "Failed to load the " - "clustered index for table %s " - "because of the following error: %s. " - "Refusing to load the rest of the " - "indexes (if any) and the whole table " - "altogether.", table->name, err_msg); - error = DB_CORRUPTION; - goto func_exit; - } - - break; - } else if (err_msg == dict_load_index_del) { - /* Skip delete-marked records. */ - goto next_rec; - } else if (err_msg) { - fprintf(stderr, "InnoDB: %s\n", err_msg); - if (ignore_err & DICT_ERR_IGNORE_CORRUPT) { - goto next_rec; - } - error = DB_CORRUPTION; - goto func_exit; - } - - ut_ad(index); - - /* Check whether the index is corrupted */ - if (dict_index_is_corrupted(index)) { - ut_print_timestamp(stderr); - fputs(" InnoDB: ", stderr); - dict_index_name_print(stderr, NULL, index); - fputs(" is corrupted\n", stderr); - - if (!srv_load_corrupted - && !(ignore_err & DICT_ERR_IGNORE_CORRUPT) - && dict_index_is_clust(index)) { - dict_mem_index_free(index); - - error = DB_INDEX_CORRUPT; - goto func_exit; - } else { - /* We will load the index if - 1) srv_load_corrupted is TRUE - 2) ignore_err is set with - DICT_ERR_IGNORE_CORRUPT - 3) if the index corrupted is a secondary - index */ - ut_print_timestamp(stderr); - fputs(" InnoDB: load corrupted index ", stderr); - dict_index_name_print(stderr, NULL, index); - putc('\n', stderr); - } - } - - if (index->type & DICT_FTS - && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) { - /* This should have been created by now. */ - ut_a(table->fts != NULL); - DICT_TF2_FLAG_SET(table, DICT_TF2_FTS); - } - - /* We check for unsupported types first, so that the - subsequent checks are relevant for the supported types. */ - if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE - | DICT_CORRUPT | DICT_FTS)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown type %lu of index %s of table %s", - (ulong) index->type, index->name, table->name); - - error = DB_UNSUPPORTED; - dict_mem_index_free(index); - goto func_exit; - } else if (index->page == FIL_NULL - && !table->file_unreadable - && (!(index->type & DICT_FTS))) { - - fprintf(stderr, - "InnoDB: Error: trying to load index %s" - " for table %s\n" - "InnoDB: but the index tree has been freed!\n", - index->name, table->name); - - if (ignore_err & DICT_ERR_IGNORE_INDEX_ROOT) { - /* If caller can tolerate this error, - we will continue to load the index and - let caller deal with this error. However - mark the index and table corrupted. We - only need to mark such in the index - dictionary cache for such metadata corruption, - since we would always be able to set it - when loading the dictionary cache */ - dict_set_corrupted_index_cache_only( - index, table); - - fprintf(stderr, - "InnoDB: Index is corrupt but forcing" - " load into data dictionary\n"); - } else { -corrupted: - dict_mem_index_free(index); - error = DB_CORRUPTION; - goto func_exit; - } - } else if (!dict_index_is_clust(index) - && NULL == dict_table_get_first_index(table)) { - - fputs("InnoDB: Error: trying to load index ", - stderr); - ut_print_name(stderr, NULL, FALSE, index->name); - fputs(" for table ", stderr); - ut_print_name(stderr, NULL, TRUE, table->name); - fputs("\nInnoDB: but the first index" - " is not clustered!\n", stderr); - - goto corrupted; - } else if (dict_is_sys_table(table->id) - && (dict_index_is_clust(index) - || ((table == dict_sys->sys_tables) - && !strcmp("ID_IND", index->name)))) { - - /* The index was created in memory already at booting - of the database server */ - dict_mem_index_free(index); - } else { - dict_load_fields(index, heap); - - error = dict_index_add_to_cache( - table, index, index->page, FALSE); - - /* The data dictionary tables should never contain - invalid index definitions. If we ignored this error - and simply did not load this index definition, the - .frm file would disagree with the index definitions - inside InnoDB. */ - if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - - goto func_exit; - } - } -next_rec: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - /* If the table contains FTS indexes, populate table->fts->indexes */ - if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) { - /* table->fts->indexes should have been created. */ - ut_a(table->fts->indexes != NULL); - dict_table_get_all_fts_indexes(table, table->fts->indexes); - } - -func_exit: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(error); -} - -/********************************************************************//** -Loads a table definition from a SYS_TABLES record to dict_table_t. -Does not load any columns or indexes. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_load_table_low( -/*================*/ - const char* name, /*!< in: table name */ - const rec_t* rec, /*!< in: SYS_TABLES record */ - dict_table_t** table) /*!< out,own: table, or NULL */ -{ - const byte* field; - ulint len; - ulint space; - ulint n_cols; - ulint flags = 0; - ulint flags2; - - if (rec_get_deleted_flag(rec, 0)) { - return("delete-marked record in SYS_TABLES"); - } - - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) { - return("wrong number of columns in SYS_TABLES record"); - } - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - if (len == 0 || len == UNIV_SQL_NULL) { -err_len: - return("incorrect column length in SYS_TABLES"); - } - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len); - if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_TABLES__DB_ROLL_PTR, &len); - if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { - goto err_len; - } - - rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len); - if (len != 8) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__N_COLS, &len); - if (len != 4) { - goto err_len; - } - - n_cols = mach_read_from_4(field); - - rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__TYPE, &len); - if (len != 4) { - goto err_len; - } - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_TABLES__MIX_ID, &len); - if (len != 8) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len); - if (len != 4) { - goto err_len; - } - - /* MIX_LEN may hold additional flags in post-antelope file formats. */ - flags2 = mach_read_from_4(field); - - /* DICT_TF2_FTS will be set when indexes is being loaded */ - flags2 &= ~DICT_TF2_FTS; - - rec_get_nth_field_offs_old( - rec, DICT_FLD__SYS_TABLES__CLUSTER_ID, &len); - if (len != UNIV_SQL_NULL) { - goto err_len; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__SPACE, &len); - if (len != 4) { - goto err_len; - } - - space = mach_read_from_4(field); - - /* Check if the tablespace exists and has the right name */ - flags = dict_sys_tables_get_flags(rec); - - if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) { - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__TYPE, &len); - ut_ad(len == 4); /* this was checked earlier */ - flags = mach_read_from_4(field); - - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_filename(stderr, name); - fprintf(stderr, "\n" - "InnoDB: in InnoDB data dictionary" - " has unknown type %lx.\n", - (ulong) flags); - return("incorrect flags in SYS_TABLES"); - } - - /* The high-order bit of N_COLS is the "compact format" flag. - For tables in that format, MIX_LEN may hold additional flags. */ - if (n_cols & DICT_N_COLS_COMPACT) { - ut_ad(flags & DICT_TF_COMPACT); - - if (flags2 & ~DICT_TF2_BIT_MASK) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: table ", stderr); - ut_print_filename(stderr, name); - fprintf(stderr, "\n" - "InnoDB: in InnoDB data dictionary" - " has unknown flags %lx.\n", - (ulong) flags2); - - /* Clean it up and keep going */ - flags2 &= DICT_TF2_BIT_MASK; - } - } else { - /* Do not trust the MIX_LEN field when the - row format is Redundant. */ - flags2 = 0; - } - - /* See if the tablespace is available. */ - *table = dict_mem_table_create( - name, space, n_cols & ~DICT_N_COLS_COMPACT, flags, flags2); - - field = rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__ID, &len); - ut_ad(len == 8); /* this was checked earlier */ - - (*table)->id = mach_read_from_8(field); - - (*table)->file_unreadable = false; - - return(NULL); -} - -/********************************************************************//** -Using the table->heap, copy the null-terminated filepath into -table->data_dir_path and replace the 'databasename/tablename.ibd' -portion with 'tablename'. -This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path. -Make this data directory path only if it has not yet been saved. */ -UNIV_INTERN -void -dict_save_data_dir_path( -/*====================*/ - dict_table_t* table, /*!< in/out: table */ - char* filepath) /*!< in: filepath of tablespace */ -{ - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_a(DICT_TF_HAS_DATA_DIR(table->flags)); - - ut_a(!table->data_dir_path); - ut_a(filepath); - - /* Be sure this filepath is not the default filepath. */ - char* default_filepath = fil_make_ibd_name(table->name, false); - if (strcmp(filepath, default_filepath)) { - ulint pathlen = strlen(filepath); - ut_a(pathlen < OS_FILE_MAX_PATH); - ut_a(0 == strcmp(filepath + pathlen - 4, ".ibd")); - - table->data_dir_path = mem_heap_strdup(table->heap, filepath); - os_file_make_data_dir_path(table->data_dir_path); - } else { - /* This does not change SYS_DATAFILES or SYS_TABLES - or FSP_FLAGS on the header page of the tablespace, - but it makes dict_table_t consistent */ - table->flags &= ~DICT_TF_MASK_DATA_DIR; - } - mem_free(default_filepath); -} - -/*****************************************************************//** -Make sure the data_file_name is saved in dict_table_t if needed. Try to -read it from the file dictionary first, then from SYS_DATAFILES. */ -UNIV_INTERN -void -dict_get_and_save_data_dir_path( -/*============================*/ - dict_table_t* table, /*!< in/out: table */ - bool dict_mutex_own) /*!< in: true if dict_sys->mutex - is owned already */ -{ - bool is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY); - - if (!is_temp && !table->data_dir_path && table->space) { - char* path = fil_space_get_first_path(table->space); - - if (!dict_mutex_own) { - dict_mutex_enter_for_mysql(); - } - if (!path) { - path = dict_get_first_path( - table->space, table->name); - } - - if (path) { - table->flags |= (1 << DICT_TF_POS_DATA_DIR); - dict_save_data_dir_path(table, path); - mem_free(path); - } - - if (!dict_mutex_own) { - dict_mutex_exit_for_mysql(); - } - } -} - -/********************************************************************//** -Loads a table definition and also all its index definitions, and also -the cluster definition if the table is a member in a cluster. Also loads -all foreign key constraints where the foreign key is in the table or where -a foreign key references columns in this table. Adds all these to the data -dictionary cache. -@return table, NULL if does not exist; if the table is stored in an -.ibd file, but the file does not exist, then we set the -ibd_file_missing flag TRUE in the table object we return */ -UNIV_INTERN -dict_table_t* -dict_load_table( -/*============*/ - const char* name, /*!< in: table name in the - databasename/tablename format */ - ibool cached, /*!< in: TRUE=add to cache, FALSE=do not */ - dict_err_ignore_t ignore_err) - /*!< in: error to be ignored when loading - table and its indexes' definition */ -{ - dberr_t err; - dict_table_t* table; - dict_table_t* sys_tables; - btr_pcur_t pcur; - dict_index_t* sys_index; - dtuple_t* tuple; - mem_heap_t* heap; - dfield_t* dfield; - const rec_t* rec; - const byte* field; - ulint len; - char* filepath = NULL; - const char* err_msg; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - heap = mem_heap_create(32000); - - mtr_start(&mtr); - - sys_tables = dict_table_get_low("SYS_TABLES"); - sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); - ut_ad(!dict_table_is_comp(sys_tables)); - ut_ad(name_of_col_is(sys_tables, sys_index, - DICT_FLD__SYS_TABLES__ID, "ID")); - ut_ad(name_of_col_is(sys_tables, sys_index, - DICT_FLD__SYS_TABLES__N_COLS, "N_COLS")); - ut_ad(name_of_col_is(sys_tables, sys_index, - DICT_FLD__SYS_TABLES__TYPE, "TYPE")); - ut_ad(name_of_col_is(sys_tables, sys_index, - DICT_FLD__SYS_TABLES__MIX_LEN, "MIX_LEN")); - ut_ad(name_of_col_is(sys_tables, sys_index, - DICT_FLD__SYS_TABLES__SPACE, "SPACE")); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, name, ut_strlen(name)); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur) - || rec_get_deleted_flag(rec, 0)) { - /* Not found */ -err_exit: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - - /* Check if the table name in record is the searched one */ - if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) { - - goto err_exit; - } - - err_msg = dict_load_table_low(name, rec, &table); - - if (err_msg) { - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: %s\n", err_msg); - goto err_exit; - } - - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name(table_name, sizeof(table_name), name, FALSE); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - if (table->space == 0) { - /* The system tablespace is always available. */ - } else if (table->flags2 & DICT_TF2_DISCARDED) { - - ib_logf(IB_LOG_LEVEL_WARN, - "Table '%s' tablespace is set as discarded.", - table_name); - - table->file_unreadable = true; - - } else if (!fil_space_for_table_exists_in_mem( - table->space, name, false, IS_XTRABACKUP(), true, heap, - table->id, table->flags)) { - - if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) { - /* Do not bother to retry opening temporary tables. */ - table->file_unreadable = true; - - } else { - if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Failed to find tablespace for " - "table '%s' in the cache. " - "Attempting to load the tablespace " - "with space id %lu.", - table_name, (ulong) table->space); - } - - /* Use the remote filepath if needed. */ - /* This needs to be added to the table - from SYS_DATAFILES */ - dict_get_and_save_data_dir_path(table, true); - - if (table->data_dir_path) { - filepath = os_file_make_remote_pathname( - table->data_dir_path, - table->name, "ibd"); - } - - /* Try to open the tablespace. We set the - 2nd param (fix_dict = false) here because we - do not have an x-lock on dict_operation_lock */ - err = fil_open_single_table_tablespace( - true, false, table->space, - dict_tf_to_fsp_flags(table->flags), - name, filepath); - - if (err != DB_SUCCESS) { - /* We failed to find a sensible - tablespace file */ - - table->file_unreadable = true; - } - - if (filepath) { - mem_free(filepath); - } - } - } - - dict_load_columns(table, heap); - - if (cached) { - dict_table_add_to_cache(table, TRUE, heap); - } else { - dict_table_add_system_columns(table, heap); - } - - mem_heap_empty(heap); - - /* If there is no tablespace for the table then we only need to - load the index definitions. So that we can IMPORT the tablespace - later. When recovering table locks for resurrected incomplete - transactions, the tablespace should exist, because DDL operations - were not allowed while the table is being locked by a transaction. */ - dict_err_ignore_t index_load_err = - !(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK) - && table->file_unreadable - ? DICT_ERR_IGNORE_ALL - : ignore_err; - - err = dict_load_indexes(table, heap, index_load_err); - - if (err == DB_INDEX_CORRUPT) { - /* Refuse to load the table if the table has a corrupted - cluster index */ - if (!srv_load_corrupted) { - fprintf(stderr, "InnoDB: Error: Load table "); - ut_print_name(stderr, NULL, TRUE, table->name); - fprintf(stderr, " failed, the table has corrupted" - " clustered indexes. Turn on" - " 'innodb_force_load_corrupted'" - " to drop it\n"); - - dict_table_remove_from_cache(table); - table = NULL; - goto func_exit; - } else { - dict_index_t* clust_index; - clust_index = dict_table_get_first_index(table); - - if (dict_index_is_corrupted(clust_index)) { - table->corrupted = TRUE; - } - } - } - - /* Initialize table foreign_child value. Its value could be - changed when dict_load_foreigns() is called below */ - table->fk_max_recusive_level = 0; - - /* If the force recovery flag is set, we open the table irrespective - of the error condition, since the user may want to dump data from the - clustered index. However we load the foreign key information only if - all indexes were loaded. */ - if (!cached || table->file_unreadable) { - /* Don't attempt to load the indexes from disk. */ - } else if (err == DB_SUCCESS) { - err = dict_load_foreigns(table->name, NULL, true, true, - ignore_err); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_WARN, - "Load table '%s' failed, the table has missing " - "foreign key indexes. Turn off " - "'foreign_key_checks' and try again.", - table->name); - - dict_table_remove_from_cache(table); - table = NULL; - } else { - table->fk_max_recusive_level = 0; - } - } else { - dict_index_t* index; - - /* Make sure that at least the clustered index was loaded. - Otherwise refuse to load the table */ - index = dict_table_get_first_index(table); - - if (!srv_force_recovery - || !index - || !dict_index_is_clust(index)) { - - dict_table_remove_from_cache(table); - table = NULL; - - } else if (dict_index_is_corrupted(index) - && !table->file_unreadable) { - - /* It is possible we force to load a corrupted - clustered index if srv_load_corrupted is set. - Mark the table as corrupted in this case */ - table->corrupted = true; - } - } - -func_exit: - mem_heap_free(heap); - - ut_ad(!table - || ignore_err != DICT_ERR_IGNORE_NONE - || table->file_unreadable - || !table->corrupted); - - if (table && table->fts) { - if (!(dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID))) { - /* the table->fts could be created in dict_load_column - when a user defined FTS_DOC_ID is present, but no - FTS */ - fts_optimize_remove_table(table); - fts_free(table); - } else { - fts_optimize_add_table(table); - } - } - - ut_ad(err != DB_SUCCESS || dict_foreign_set_validate(*table)); - - return(table); -} - -/***********************************************************************//** -Loads a table object based on the table id. -@return table; NULL if table does not exist */ -UNIV_INTERN -dict_table_t* -dict_load_table_on_id( -/*==================*/ - table_id_t table_id, /*!< in: table id */ - dict_err_ignore_t ignore_err) /*!< in: errors to ignore - when loading the table */ -{ - byte id_buf[8]; - btr_pcur_t pcur; - mem_heap_t* heap; - dtuple_t* tuple; - dfield_t* dfield; - dict_index_t* sys_table_ids; - dict_table_t* sys_tables; - const rec_t* rec; - const byte* field; - ulint len; - dict_table_t* table; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - table = NULL; - - /* NOTE that the operation of this function is protected by - the dictionary mutex, and therefore no deadlocks can occur - with other dictionary operations. */ - - mtr_start(&mtr); - /*---------------------------------------------------*/ - /* Get the secondary index based on ID for table SYS_TABLES */ - sys_tables = dict_sys->sys_tables; - sys_table_ids = dict_table_get_next_index( - dict_table_get_first_index(sys_tables)); - ut_ad(!dict_table_is_comp(sys_tables)); - ut_ad(!dict_index_is_clust(sys_table_ids)); - heap = mem_heap_create(256); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - /* Write the table id in byte format to id_buf */ - mach_write_to_8(id_buf, table_id); - - dfield_set_data(dfield, id_buf, 8); - dict_index_copy_types(tuple, sys_table_ids, 1); - - btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - - rec = btr_pcur_get_rec(&pcur); - - if (page_rec_is_user_rec(rec)) { - /*---------------------------------------------------*/ - /* Now we have the record in the secondary index - containing the table ID and NAME */ -check_rec: - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLE_IDS__ID, &len); - ut_ad(len == 8); - - /* Check if the table id in record is the one searched for */ - if (table_id == mach_read_from_8(field)) { - if (rec_get_deleted_flag(rec, 0)) { - /* Until purge has completed, there - may be delete-marked duplicate records - for the same SYS_TABLES.ID, but different - SYS_TABLES.NAME. */ - while (btr_pcur_move_to_next(&pcur, &mtr)) { - rec = btr_pcur_get_rec(&pcur); - - if (page_rec_is_user_rec(rec)) { - goto check_rec; - } - } - } else { - /* Now we get the table name from the record */ - field = rec_get_nth_field_old(rec, - DICT_FLD__SYS_TABLE_IDS__NAME, &len); - /* Load the table definition to memory */ - table = dict_load_table( - mem_heap_strdupl( - heap, (char*) field, len), - TRUE, ignore_err); - } - } - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(table); -} - -/***********************************************************************//** -Loads a table id based on the index id. -@return true if found */ -static -bool -dict_load_table_id_on_index_id( -/*==================*/ - index_id_t index_id, /*!< in: index id */ - table_id_t* table_id) /*!< out: table id */ -{ - /* check hard coded indexes */ - switch(index_id) { - case DICT_TABLES_ID: - case DICT_COLUMNS_ID: - case DICT_INDEXES_ID: - case DICT_FIELDS_ID: - *table_id = index_id; - return true; - case DICT_TABLE_IDS_ID: - /* The following is a secondary index on SYS_TABLES */ - *table_id = DICT_TABLES_ID; - return true; - } - - bool found = false; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* NOTE that the operation of this function is protected by - the dictionary mutex, and therefore no deadlocks can occur - with other dictionary operations. */ - - mtr_start(&mtr); - - btr_pcur_t pcur; - const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES); - - while (rec) { - ulint len; - const byte* field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__ID, &len); - ut_ad(len == 8); - - /* Check if the index id is the one searched for */ - if (index_id == mach_read_from_8(field)) { - found = true; - /* Now we get the table id */ - const byte* field = rec_get_nth_field_old( - rec, - DICT_FLD__SYS_INDEXES__TABLE_ID, - &len); - *table_id = mach_read_from_8(field); - break; - } - mtr_commit(&mtr); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(found); -} - -UNIV_INTERN -dict_table_t* -dict_table_open_on_index_id( -/*==================*/ - index_id_t index_id, /*!< in: index id */ - bool dict_locked) /*!< in: dict locked */ -{ - if (!dict_locked) { - mutex_enter(&dict_sys->mutex); - } - - ut_ad(mutex_own(&dict_sys->mutex)); - table_id_t table_id; - dict_table_t * table = NULL; - if (dict_load_table_id_on_index_id(index_id, &table_id)) { - bool local_dict_locked = true; - table = dict_table_open_on_id(table_id, - local_dict_locked, - DICT_TABLE_OP_LOAD_TABLESPACE); - } - - if (!dict_locked) { - mutex_exit(&dict_sys->mutex); - } - return table; -} - -/********************************************************************//** -This function is called when the database is booted. Loads system table -index definitions except for the clustered index which is added to the -dictionary cache at booting before calling this function. */ -UNIV_INTERN -void -dict_load_sys_table( -/*================*/ - dict_table_t* table) /*!< in: system table */ -{ - mem_heap_t* heap; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - heap = mem_heap_create(1000); - - dict_load_indexes(table, heap, DICT_ERR_IGNORE_NONE); - - mem_heap_free(heap); -} - -/********************************************************************//** -Loads foreign key constraint col names (also for the referenced table). -Members that must be set (and valid) in foreign: -foreign->heap -foreign->n_fields -foreign->id ('\0'-terminated) -Members that will be created and set by this function: -foreign->foreign_col_names[i] -foreign->referenced_col_names[i] -(for i=0..foreign->n_fields-1) */ -static -void -dict_load_foreign_cols( -/*===================*/ - dict_foreign_t* foreign)/*!< in/out: foreign constraint object */ -{ - dict_table_t* sys_foreign_cols; - dict_index_t* sys_index; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - const rec_t* rec; - const byte* field; - ulint len; - ulint i; - mtr_t mtr; - size_t id_len; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - id_len = strlen(foreign->id); - - foreign->foreign_col_names = static_cast<const char**>( - mem_heap_alloc(foreign->heap, - foreign->n_fields * sizeof(void*))); - - foreign->referenced_col_names = static_cast<const char**>( - mem_heap_alloc(foreign->heap, - foreign->n_fields * sizeof(void*))); - - mtr_start(&mtr); - - sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS"); - - sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes); - ut_ad(!dict_table_is_comp(sys_foreign_cols)); - - tuple = dtuple_create(foreign->heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, foreign->id, id_len); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (i = 0; i < foreign->n_fields; i++) { - - rec = btr_pcur_get_rec(&pcur); - - ut_a(btr_pcur_is_on_user_rec(&pcur)); - ut_a(!rec_get_deleted_flag(rec, 0)); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len); - - if (len != id_len || ut_memcmp(foreign->id, field, len) != 0) { - const rec_t* pos; - ulint pos_len; - const rec_t* for_col_name; - ulint for_col_name_len; - const rec_t* ref_col_name; - ulint ref_col_name_len; - - pos = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__POS, - &pos_len); - - for_col_name = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, - &for_col_name_len); - - ref_col_name = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, - &ref_col_name_len); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unable to load columns names for foreign " - "key '%s' because it was not found in " - "InnoDB internal table SYS_FOREIGN_COLS. The " - "closest entry we found is: " - "(ID='%.*s', POS=%lu, FOR_COL_NAME='%.*s', " - "REF_COL_NAME='%.*s')", - foreign->id, - (int) len, field, - mach_read_from_4(pos), - (int) for_col_name_len, for_col_name, - (int) ref_col_name_len, ref_col_name); - - ut_error; - } - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len); - ut_a(len == 4); - ut_a(i == mach_read_from_4(field)); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len); - foreign->foreign_col_names[i] = mem_heap_strdupl( - foreign->heap, (char*) field, len); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len); - foreign->referenced_col_names[i] = mem_heap_strdupl( - foreign->heap, (char*) field, len); - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); -} - -/***********************************************************************//** -Loads a foreign key constraint to the dictionary cache. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1), warn_unused_result)) -dberr_t -dict_load_foreign( -/*==============*/ - const char* id, - /*!< in: foreign constraint id, must be - '\0'-terminated */ - const char** col_names, - /*!< in: column names, or NULL - to use foreign->foreign_table->col_names */ - bool check_recursive, - /*!< in: whether to record the foreign table - parent count to avoid unlimited recursive - load of chained foreign tables */ - bool check_charsets, - /*!< in: whether to check charset - compatibility */ - dict_err_ignore_t ignore_err) - /*!< in: error to be ignored */ -{ - dict_foreign_t* foreign; - dict_table_t* sys_foreign; - btr_pcur_t pcur; - dict_index_t* sys_index; - dtuple_t* tuple; - mem_heap_t* heap2; - dfield_t* dfield; - const rec_t* rec; - const byte* field; - ulint len; - ulint n_fields_and_type; - mtr_t mtr; - dict_table_t* for_table; - dict_table_t* ref_table; - size_t id_len; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - id_len = strlen(id); - - heap2 = mem_heap_create(1000); - - mtr_start(&mtr); - - sys_foreign = dict_table_get_low("SYS_FOREIGN"); - - sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes); - ut_ad(!dict_table_is_comp(sys_foreign)); - - tuple = dtuple_create(heap2, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, id, id_len); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur) - || rec_get_deleted_flag(rec, 0)) { - /* Not found */ - - fprintf(stderr, - "InnoDB: Error: cannot load foreign constraint " - "%s: could not find the relevant record in " - "SYS_FOREIGN\n", id); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap2); - - return(DB_ERROR); - } - - field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len); - - /* Check if the id in record is the searched one */ - if (len != id_len || ut_memcmp(id, field, len) != 0) { - - fprintf(stderr, - "InnoDB: Error: cannot load foreign constraint " - "%s: found %.*s instead in SYS_FOREIGN\n", - id, (int) len, field); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap2); - - return(DB_ERROR); - } - - /* Read the table names and the number of columns associated - with the constraint */ - - mem_heap_free(heap2); - - foreign = dict_mem_foreign_create(); - - n_fields_and_type = mach_read_from_4( - rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN__N_COLS, &len)); - - ut_a(len == 4); - - /* We store the type in the bits 24..29 of n_fields_and_type. */ - - foreign->type = (unsigned int) (n_fields_and_type >> 24); - foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL); - - foreign->id = mem_heap_strdupl(foreign->heap, id, id_len); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len); - - foreign->foreign_table_name = mem_heap_strdupl( - foreign->heap, (char*) field, len); - dict_mem_foreign_table_name_lookup_set(foreign, TRUE); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len); - foreign->referenced_table_name = mem_heap_strdupl( - foreign->heap, (char*) field, len); - dict_mem_referenced_table_name_lookup_set(foreign, TRUE); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - dict_load_foreign_cols(foreign); - - ref_table = dict_table_check_if_in_cache_low( - foreign->referenced_table_name_lookup); - - /* We could possibly wind up in a deep recursive calls if - we call dict_table_get_low() again here if there - is a chain of tables concatenated together with - foreign constraints. In such case, each table is - both a parent and child of the other tables, and - act as a "link" in such table chains. - To avoid such scenario, we would need to check the - number of ancesters the current table has. If that - exceeds DICT_FK_MAX_CHAIN_LEN, we will stop loading - the child table. - Foreign constraints are loaded in a Breath First fashion, - that is, the index on FOR_NAME is scanned first, and then - index on REF_NAME. So foreign constrains in which - current table is a child (foreign table) are loaded first, - and then those constraints where current table is a - parent (referenced) table. - Thus we could check the parent (ref_table) table's - reference count (fk_max_recusive_level) to know how deep the - recursive call is. If the parent table (ref_table) is already - loaded, and its fk_max_recusive_level is larger than - DICT_FK_MAX_CHAIN_LEN, we will stop the recursive loading - by skipping loading the child table. It will not affect foreign - constraint check for DMLs since child table will be loaded - at that time for the constraint check. */ - if (!ref_table - || ref_table->fk_max_recusive_level < DICT_FK_MAX_RECURSIVE_LOAD) { - - /* If the foreign table is not yet in the dictionary cache, we - have to load it so that we are able to make type comparisons - in the next function call. */ - - for_table = dict_table_get_low(foreign->foreign_table_name_lookup); - - if (for_table && ref_table && check_recursive) { - /* This is to record the longest chain of ancesters - this table has, if the parent has more ancesters - than this table has, record it after add 1 (for this - parent */ - if (ref_table->fk_max_recusive_level - >= for_table->fk_max_recusive_level) { - for_table->fk_max_recusive_level = - ref_table->fk_max_recusive_level + 1; - } - } - } - - /* Note that there may already be a foreign constraint object in - the dictionary cache for this constraint: then the following - call only sets the pointers in it to point to the appropriate table - and index objects and frees the newly created object foreign. - Adding to the cache should always succeed since we are not creating - a new foreign key constraint but loading one from the data - dictionary. */ - - return(dict_foreign_add_to_cache(foreign, col_names, check_charsets, - ignore_err)); -} - -/***********************************************************************//** -Loads foreign key constraints where the table is either the foreign key -holder or where the table is referenced by a foreign key. Adds these -constraints to the data dictionary. Note that we know that the dictionary -cache already contains all constraints where the other relevant table is -already in the dictionary cache. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_load_foreigns( -/*===============*/ - const char* table_name, /*!< in: table name */ - const char** col_names, /*!< in: column names, or NULL - to use table->col_names */ - bool check_recursive,/*!< in: Whether to check - recursive load of tables - chained by FK */ - bool check_charsets, /*!< in: whether to check - charset compatibility */ - dict_err_ignore_t ignore_err) /*!< in: error to be ignored */ -{ - ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1) - / sizeof(ulint)]; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - dict_index_t* sec_index; - dict_table_t* sys_foreign; - const rec_t* rec; - const byte* field; - ulint len; - dberr_t err; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - sys_foreign = dict_table_get_low("SYS_FOREIGN"); - - if (sys_foreign == NULL) { - /* No foreign keys defined yet in this database */ - - fprintf(stderr, - "InnoDB: Error: no foreign key system tables" - " in the database\n"); - - return(DB_ERROR); - } - - ut_ad(!dict_table_is_comp(sys_foreign)); - mtr_start(&mtr); - - /* Get the secondary index based on FOR_NAME from table - SYS_FOREIGN */ - - sec_index = dict_table_get_next_index( - dict_table_get_first_index(sys_foreign)); - ut_ad(!dict_index_is_clust(sec_index)); -start_load: - - tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, table_name, ut_strlen(table_name)); - dict_index_copy_types(tuple, sec_index, 1); - - btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); -loop: - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur)) { - /* End of index */ - - goto load_next_index; - } - - /* Now we have the record in the secondary index containing a table - name and a foreign constraint ID */ - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME, &len); - - /* Check if the table name in the record is the one searched for; the - following call does the comparison in the latin1_swedish_ci - charset-collation, in a case-insensitive way. */ - - if (0 != cmp_data_data(dfield_get_type(dfield)->mtype, - dfield_get_type(dfield)->prtype, - static_cast<const byte*>( - dfield_get_data(dfield)), - dfield_get_len(dfield), - field, len)) { - - goto load_next_index; - } - - /* Since table names in SYS_FOREIGN are stored in a case-insensitive - order, we have to check that the table name matches also in a binary - string comparison. On Unix, MySQL allows table names that only differ - in character case. If lower_case_table_names=2 then what is stored - may not be the same case, but the previous comparison showed that they - match with no-case. */ - - if (rec_get_deleted_flag(rec, 0)) { - goto next_rec; - } - - if ((innobase_get_lower_case_table_names() != 2) - && (0 != ut_memcmp(field, table_name, len))) { - goto next_rec; - } - - /* Now we get a foreign key constraint id */ - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__ID, &len); - - /* Copy the string because the page may be modified or evicted - after mtr_commit() below. */ - char fk_id[MAX_TABLE_NAME_LEN + 1]; - - ut_a(len <= MAX_TABLE_NAME_LEN); - memcpy(fk_id, field, len); - fk_id[len] = '\0'; - - btr_pcur_store_position(&pcur, &mtr); - - mtr_commit(&mtr); - - /* Load the foreign constraint definition to the dictionary cache */ - - err = dict_load_foreign(fk_id, col_names, - check_recursive, check_charsets, ignore_err); - - if (err != DB_SUCCESS) { - btr_pcur_close(&pcur); - - return(err); - } - - mtr_start(&mtr); - - btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); -next_rec: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - goto loop; - -load_next_index: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - sec_index = dict_table_get_next_index(sec_index); - - if (sec_index != NULL) { - - mtr_start(&mtr); - - /* Switch to scan index on REF_NAME, fk_max_recusive_level - already been updated when scanning FOR_NAME index, no need to - update again */ - check_recursive = FALSE; - - goto start_load; - } - - return(DB_SUCCESS); -} diff --git a/storage/xtradb/dict/dict0mem.cc b/storage/xtradb/dict/dict0mem.cc deleted file mode 100644 index 125d7d78a1f..00000000000 --- a/storage/xtradb/dict/dict0mem.cc +++ /dev/null @@ -1,823 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file dict/dict0mem.cc -Data dictionary memory object creation - -Created 1/8/1996 Heikki Tuuri -***********************************************************************/ - -#include "dict0mem.h" - -#ifdef UNIV_NONINL -#include "dict0mem.ic" -#endif - -#include "rem0rec.h" -#include "data0type.h" -#include "mach0data.h" -#include "dict0dict.h" -#include "fts0priv.h" -#include "ut0crc32.h" -#ifndef UNIV_HOTBACKUP -# include "ha_prototypes.h" /* innobase_casedn_str(), - innobase_get_lower_case_table_names */ -# include "mysql_com.h" /* NAME_LEN */ -# include "lock0lock.h" -#endif /* !UNIV_HOTBACKUP */ -#ifdef UNIV_BLOB_DEBUG -# include "ut0rbt.h" -#endif /* UNIV_BLOB_DEBUG */ -#include <iostream> - -#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when - creating a table or index object */ - -#ifdef UNIV_PFS_MUTEX -/* Key to register autoinc_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t autoinc_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/** System databases */ -static const char* innobase_system_databases[] = { - "mysql/", - "information_schema/", - "performance_schema/", - NullS -}; - -/** An interger randomly initialized at startup used to make a temporary -table name as unique as possible. */ -static ib_uint32_t dict_temp_file_num; - -/**********************************************************************//** -Creates a table memory object. -@return own: table object */ -UNIV_INTERN -dict_table_t* -dict_mem_table_create( -/*==================*/ - const char* name, /*!< in: table name */ - ulint space, /*!< in: space where the clustered index of - the table is placed */ - ulint n_cols, /*!< in: number of columns */ - ulint flags, /*!< in: table flags */ - ulint flags2) /*!< in: table flags2 */ -{ - dict_table_t* table; - mem_heap_t* heap; - - ut_ad(name); - ut_a(dict_tf_is_valid(flags)); - ut_a(!(flags2 & ~DICT_TF2_BIT_MASK)); - - heap = mem_heap_create(DICT_HEAP_SIZE); - - table = static_cast<dict_table_t*>( - mem_heap_zalloc(heap, sizeof(dict_table_t))); - - table->heap = heap; - - table->flags = (unsigned int) flags; - table->flags2 = (unsigned int) flags2; - table->name = static_cast<char*>(ut_malloc(strlen(name) + 1)); - memcpy(table->name, name, strlen(name) + 1); - table->is_system_db = dict_mem_table_is_system(table->name); - table->space = (unsigned int) space; - table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); - - table->cols = static_cast<dict_col_t*>( - mem_heap_alloc(heap, - (n_cols + DATA_N_SYS_COLS) - * sizeof(dict_col_t))); - - ut_d(table->magic_n = DICT_TABLE_MAGIC_N); - - /* true means that the stats latch will be enabled - - dict_table_stats_lock() will not be noop. */ - dict_table_stats_latch_create(table, true); - -#ifndef UNIV_HOTBACKUP - table->autoinc_lock = static_cast<ib_lock_t*>( - mem_heap_alloc(heap, lock_get_size())); - - dict_table_autoinc_create_lazy(table); - - table->autoinc = 0; - - /* The number of transactions that are either waiting on the - AUTOINC lock or have been granted the lock. */ - table->n_waiting_or_granted_auto_inc_locks = 0; - - /* If the table has an FTS index or we are in the process - of building one, create the table->fts */ - if (dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) { - table->fts = fts_create(table); - table->fts->cache = fts_cache_create(table); - } else { - table->fts = NULL; - } - -#endif /* !UNIV_HOTBACKUP */ - - new(&table->foreign_set) dict_foreign_set(); - new(&table->referenced_set) dict_foreign_set(); - - return(table); -} - -/****************************************************************//** -Determines if a table belongs to a system database -@return */ -UNIV_INTERN -bool -dict_mem_table_is_system( -/*================*/ - char *name) /*!< in: table name */ -{ - ut_ad(name); - - /* table has the following format: database/table - and some system table are of the form SYS_* */ - if (strchr(name, '/')) { - int table_len = strlen(name); - const char *system_db; - int i = 0; - while ((system_db = innobase_system_databases[i++]) - && (system_db != NullS)) { - int len = strlen(system_db); - if (table_len > len && !strncmp(name, system_db, len)) { - return true; - } - } - return false; - } else { - return true; - } -} - -/****************************************************************//** -Free a table memory object. */ -UNIV_INTERN -void -dict_mem_table_free( -/*================*/ - dict_table_t* table) /*!< in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_d(table->cached = FALSE); - - if (dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) { - if (table->fts) { - if (table->cached) { - fts_optimize_remove_table(table); - } - - fts_free(table); - } - } -#ifndef UNIV_HOTBACKUP - dict_table_autoinc_destroy(table); -#endif /* UNIV_HOTBACKUP */ - - dict_table_stats_latch_destroy(table); - - table->foreign_set.~dict_foreign_set(); - table->referenced_set.~dict_foreign_set(); - - ut_free(table->name); - mem_heap_free(table->heap); -} - -/****************************************************************//** -Append 'name' to 'col_names'. @see dict_table_t::col_names -@return new column names array */ -static -const char* -dict_add_col_name( -/*==============*/ - const char* col_names, /*!< in: existing column names, or - NULL */ - ulint cols, /*!< in: number of existing columns */ - const char* name, /*!< in: new column name */ - mem_heap_t* heap) /*!< in: heap */ -{ - ulint old_len; - ulint new_len; - ulint total_len; - char* res; - - ut_ad(!cols == !col_names); - - /* Find out length of existing array. */ - if (col_names) { - const char* s = col_names; - ulint i; - - for (i = 0; i < cols; i++) { - s += strlen(s) + 1; - } - - old_len = s - col_names; - } else { - old_len = 0; - } - - new_len = strlen(name) + 1; - total_len = old_len + new_len; - - res = static_cast<char*>(mem_heap_alloc(heap, total_len)); - - if (old_len > 0) { - memcpy(res, col_names, old_len); - } - - memcpy(res + old_len, name, new_len); - - return(res); -} - -/**********************************************************************//** -Adds a column definition to a table. */ -UNIV_INTERN -void -dict_mem_table_add_col( -/*===================*/ - dict_table_t* table, /*!< in: table */ - mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */ - const char* name, /*!< in: column name, or NULL */ - ulint mtype, /*!< in: main datatype */ - ulint prtype, /*!< in: precise type */ - ulint len) /*!< in: precision */ -{ - dict_col_t* col; - ulint i; - - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(!heap == !name); - - i = table->n_def++; - - if (name) { - if (UNIV_UNLIKELY(table->n_def == table->n_cols)) { - heap = table->heap; - } - if (UNIV_LIKELY(i != 0) && UNIV_UNLIKELY(table->col_names == NULL)) { - /* All preceding column names are empty. */ - char* s = static_cast<char*>( - mem_heap_zalloc(heap, table->n_def)); - - table->col_names = s; - } - - table->col_names = dict_add_col_name(table->col_names, - i, name, heap); - } - - col = dict_table_get_nth_col(table, i); - - dict_mem_fill_column_struct(col, i, mtype, prtype, len); -} - -/**********************************************************************//** -Renames a column of a table in the data dictionary cache. */ -static MY_ATTRIBUTE((nonnull)) -void -dict_mem_table_col_rename_low( -/*==========================*/ - dict_table_t* table, /*!< in/out: table */ - unsigned i, /*!< in: column offset corresponding to s */ - const char* to, /*!< in: new column name */ - const char* s) /*!< in: pointer to table->col_names */ -{ - size_t from_len = strlen(s), to_len = strlen(to); - - ut_ad(i < table->n_def); - ut_ad(from_len <= NAME_LEN); - ut_ad(to_len <= NAME_LEN); - - char from[NAME_LEN + 1]; - strncpy(from, s, NAME_LEN + 1); - - if (from_len == to_len) { - /* The easy case: simply replace the column name in - table->col_names. */ - strcpy(const_cast<char*>(s), to); - } else { - /* We need to adjust all affected index->field - pointers, as in dict_index_add_col(). First, copy - table->col_names. */ - ulint prefix_len = s - table->col_names; - - for (; i < table->n_def; i++) { - s += strlen(s) + 1; - } - - ulint full_len = s - table->col_names; - char* col_names; - - if (to_len > from_len) { - col_names = static_cast<char*>( - mem_heap_alloc( - table->heap, - full_len + to_len - from_len)); - - memcpy(col_names, table->col_names, prefix_len); - } else { - col_names = const_cast<char*>(table->col_names); - } - - memcpy(col_names + prefix_len, to, to_len); - memmove(col_names + prefix_len + to_len, - table->col_names + (prefix_len + from_len), - full_len - (prefix_len + from_len)); - - /* Replace the field names in every index. */ - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - ulint n_fields = dict_index_get_n_fields(index); - - for (ulint i = 0; i < n_fields; i++) { - dict_field_t* field - = dict_index_get_nth_field( - index, i); - ulint name_ofs - = field->name - table->col_names; - if (name_ofs <= prefix_len) { - field->name = col_names + name_ofs; - } else { - ut_a(name_ofs < full_len); - field->name = col_names - + name_ofs + to_len - from_len; - } - } - } - - table->col_names = col_names; - } - - dict_foreign_t* foreign; - - /* Replace the field names in every foreign key constraint. */ - for (dict_foreign_set::iterator it = table->foreign_set.begin(); - it != table->foreign_set.end(); - ++it) { - - foreign = *it; - - if (foreign->foreign_index == NULL) { - /* We may go here when we set foreign_key_checks to 0, - and then try to rename a column and modify the - corresponding foreign key constraint. The index - would have been dropped, we have to find an equivalent - one */ - for (unsigned f = 0; f < foreign->n_fields; f++) { - if (strcmp(foreign->foreign_col_names[f], from) - == 0) { - - char** rc = const_cast<char**>( - foreign->foreign_col_names - + f); - - if (to_len <= strlen(*rc)) { - memcpy(*rc, to, to_len + 1); - } else { - *rc = static_cast<char*>( - mem_heap_dup( - foreign->heap, - to, - to_len + 1)); - } - } - } - - dict_index_t* new_index = dict_foreign_find_index( - foreign->foreign_table, NULL, - foreign->foreign_col_names, - foreign->n_fields, NULL, true, false, - NULL, NULL, NULL); - /* There must be an equivalent index in this case. */ - ut_ad(new_index != NULL); - - foreign->foreign_index = new_index; - - } else { - - for (unsigned f = 0; f < foreign->n_fields; f++) { - /* These can point straight to - table->col_names, because the foreign key - constraints will be freed at the same time - when the table object is freed. */ - foreign->foreign_col_names[f] - = dict_index_get_nth_field( - foreign->foreign_index, - f)->name; - } - } - } - - for (dict_foreign_set::iterator it = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - - ut_ad(foreign->referenced_index != NULL); - - for (unsigned f = 0; f < foreign->n_fields; f++) { - /* foreign->referenced_col_names[] need to be - copies, because the constraint may become - orphan when foreign_key_checks=0 and the - parent table is dropped. */ - - const char* col_name = dict_index_get_nth_field( - foreign->referenced_index, f)->name; - - if (strcmp(foreign->referenced_col_names[f], - col_name)) { - char** rc = const_cast<char**>( - foreign->referenced_col_names + f); - size_t col_name_len_1 = strlen(col_name) + 1; - - if (col_name_len_1 <= strlen(*rc) + 1) { - memcpy(*rc, col_name, col_name_len_1); - } else { - *rc = static_cast<char*>( - mem_heap_dup( - foreign->heap, - col_name, - col_name_len_1)); - } - } - } - } -} - -/**********************************************************************//** -Renames a column of a table in the data dictionary cache. */ -UNIV_INTERN -void -dict_mem_table_col_rename( -/*======================*/ - dict_table_t* table, /*!< in/out: table */ - unsigned nth_col,/*!< in: column index */ - const char* from, /*!< in: old column name */ - const char* to) /*!< in: new column name */ -{ - const char* s = table->col_names; - - ut_ad(nth_col < table->n_def); - - for (unsigned i = 0; i < nth_col; i++) { - size_t len = strlen(s); - ut_ad(len > 0); - s += len + 1; - } - - /* This could fail if the data dictionaries are out of sync. - Proceed with the renaming anyway. */ - ut_ad(!strcmp(from, s)); - - dict_mem_table_col_rename_low(table, nth_col, to, s); -} - -/**********************************************************************//** -This function populates a dict_col_t memory structure with -supplied information. */ -UNIV_INTERN -void -dict_mem_fill_column_struct( -/*========================*/ - dict_col_t* column, /*!< out: column struct to be - filled */ - ulint col_pos, /*!< in: column position */ - ulint mtype, /*!< in: main data type */ - ulint prtype, /*!< in: precise type */ - ulint col_len) /*!< in: column length */ -{ -#ifndef UNIV_HOTBACKUP - ulint mbminlen; - ulint mbmaxlen; -#endif /* !UNIV_HOTBACKUP */ - - column->ind = (unsigned int) col_pos; - column->ord_part = 0; - column->max_prefix = 0; - column->mtype = (unsigned int) mtype; - column->prtype = (unsigned int) prtype; - column->len = (unsigned int) col_len; -#ifndef UNIV_HOTBACKUP - dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen); - dict_col_set_mbminmaxlen(column, mbminlen, mbmaxlen); -#endif /* !UNIV_HOTBACKUP */ -} - -/**********************************************************************//** -Creates an index memory object. -@return own: index object */ -UNIV_INTERN -dict_index_t* -dict_mem_index_create( -/*==================*/ - const char* table_name, /*!< in: table name */ - const char* index_name, /*!< in: index name */ - ulint space, /*!< in: space where the index tree is - placed, ignored if the index is of - the clustered type */ - ulint type, /*!< in: DICT_UNIQUE, - DICT_CLUSTERED, ... ORed */ - ulint n_fields) /*!< in: number of fields */ -{ - dict_index_t* index; - mem_heap_t* heap; - - ut_ad(table_name && index_name); - - heap = mem_heap_create(DICT_HEAP_SIZE); - - index = static_cast<dict_index_t*>( - mem_heap_zalloc(heap, sizeof(*index))); - - dict_mem_fill_index_struct(index, heap, table_name, index_name, - space, type, n_fields); - - dict_index_zip_pad_mutex_create_lazy(index); - return(index); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Creates and initializes a foreign constraint memory object. -@return own: foreign constraint struct */ -UNIV_INTERN -dict_foreign_t* -dict_mem_foreign_create(void) -/*=========================*/ -{ - dict_foreign_t* foreign; - mem_heap_t* heap; - - heap = mem_heap_create(100); - - foreign = static_cast<dict_foreign_t*>( - mem_heap_zalloc(heap, sizeof(dict_foreign_t))); - - foreign->heap = heap; - - return(foreign); -} - -/**********************************************************************//** -Sets the foreign_table_name_lookup pointer based on the value of -lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup -will point to foreign_table_name. If 2, then another string is -allocated from foreign->heap and set to lower case. */ -UNIV_INTERN -void -dict_mem_foreign_table_name_lookup_set( -/*===================================*/ - dict_foreign_t* foreign, /*!< in/out: foreign struct */ - ibool do_alloc) /*!< in: is an alloc needed */ -{ - if (innobase_get_lower_case_table_names() == 2) { - if (do_alloc) { - ulint len; - - len = strlen(foreign->foreign_table_name) + 1; - - foreign->foreign_table_name_lookup = - static_cast<char*>( - mem_heap_alloc(foreign->heap, len)); - } - strcpy(foreign->foreign_table_name_lookup, - foreign->foreign_table_name); - innobase_casedn_str(foreign->foreign_table_name_lookup); - } else { - foreign->foreign_table_name_lookup - = foreign->foreign_table_name; - } -} - -/**********************************************************************//** -Sets the referenced_table_name_lookup pointer based on the value of -lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup -will point to referenced_table_name. If 2, then another string is -allocated from foreign->heap and set to lower case. */ -UNIV_INTERN -void -dict_mem_referenced_table_name_lookup_set( -/*======================================*/ - dict_foreign_t* foreign, /*!< in/out: foreign struct */ - ibool do_alloc) /*!< in: is an alloc needed */ -{ - if (innobase_get_lower_case_table_names() == 2) { - if (do_alloc) { - ulint len; - - len = strlen(foreign->referenced_table_name) + 1; - - foreign->referenced_table_name_lookup = - static_cast<char*>( - mem_heap_alloc(foreign->heap, len)); - } - strcpy(foreign->referenced_table_name_lookup, - foreign->referenced_table_name); - innobase_casedn_str(foreign->referenced_table_name_lookup); - } else { - foreign->referenced_table_name_lookup - = foreign->referenced_table_name; - } -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Adds a field definition to an index. NOTE: does not take a copy -of the column name if the field is a column. The memory occupied -by the column name may be released only after publishing the index. */ -UNIV_INTERN -void -dict_mem_index_add_field( -/*=====================*/ - dict_index_t* index, /*!< in: index */ - const char* name, /*!< in: column name */ - ulint prefix_len) /*!< in: 0 or the column prefix length - in a MySQL index like - INDEX (textcol(25)) */ -{ - dict_field_t* field; - - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - index->n_def++; - - field = dict_index_get_nth_field(index, index->n_def - 1); - - field->name = name; - field->prefix_len = (unsigned int) prefix_len; -} - -/**********************************************************************//** -Frees an index memory object. */ -UNIV_INTERN -void -dict_mem_index_free( -/*================*/ - dict_index_t* index) /*!< in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); -#ifdef UNIV_BLOB_DEBUG - if (index->blobs) { - mutex_free(&index->blobs_mutex); - rbt_free(index->blobs); - } -#endif /* UNIV_BLOB_DEBUG */ - - dict_index_zip_pad_mutex_destroy(index); - - mem_heap_free(index->heap); -} - -/** Create a temporary tablename like "#sql-ibtid-inc where - tid = the Table ID - inc = a randomly initialized number that is incremented for each file -The table ID is a 64 bit integer, can use up to 20 digits, and is -initialized at bootstrap. The second number is 32 bits, can use up to 10 -digits, and is initialized at startup to a randomly distributed number. -It is hoped that the combination of these two numbers will provide a -reasonably unique temporary file name. -@param[in] heap A memory heap -@param[in] dbtab Table name in the form database/table name -@param[in] id Table id -@return A unique temporary tablename suitable for InnoDB use */ -UNIV_INTERN -char* -dict_mem_create_temporary_tablename( - mem_heap_t* heap, - const char* dbtab, - table_id_t id) -{ - size_t size; - char* name; - const char* dbend = strchr(dbtab, '/'); - ut_ad(dbend); - size_t dblen = dbend - dbtab + 1; - -#ifdef HAVE_ATOMIC_BUILTINS - /* Increment a randomly initialized number for each temp file. */ - os_atomic_increment_uint32(&dict_temp_file_num, 1); -#else /* HAVE_ATOMIC_BUILTINS */ - dict_temp_file_num++; -#endif /* HAVE_ATOMIC_BUILTINS */ - - size = tmp_file_prefix_length + 3 + 20 + 1 + 10 + dblen; - name = static_cast<char*>(mem_heap_alloc(heap, size)); - memcpy(name, dbtab, dblen); - ut_snprintf(name + dblen, size - dblen, - TEMP_FILE_PREFIX_INNODB UINT64PF "-" UINT32PF, - id, dict_temp_file_num); - - return(name); -} - -/** Initialize dict memory variables */ - -void -dict_mem_init(void) -{ - /* Initialize a randomly distributed temporary file number */ - ib_uint32_t now = static_cast<ib_uint32_t>(ut_time()); - - const byte* buf = reinterpret_cast<const byte*>(&now); - ut_ad(ut_crc32 != NULL); - - dict_temp_file_num = ut_crc32(buf, sizeof(now)); - - DBUG_PRINT("dict_mem_init", - ("Starting Temporary file number is " UINT32PF, - dict_temp_file_num)); -} - -/** Validate the search order in the foreign key set. -@param[in] fk_set the foreign key set to be validated -@return true if search order is fine in the set, false otherwise. */ -bool -dict_foreign_set_validate( - const dict_foreign_set& fk_set) -{ - dict_foreign_not_exists not_exists(fk_set); - - dict_foreign_set::const_iterator it = std::find_if( - fk_set.begin(), fk_set.end(), not_exists); - - if (it == fk_set.end()) { - return(true); - } - - dict_foreign_t* foreign = *it; - std::cerr << "Foreign key lookup failed: " << *foreign; - std::cerr << fk_set; - ut_ad(0); - return(false); -} - -/** Validate the search order in the foreign key sets of the table -(foreign_set and referenced_set). -@param[in] table table whose foreign key sets are to be validated -@return true if foreign key sets are fine, false otherwise. */ -bool -dict_foreign_set_validate( - const dict_table_t& table) -{ - return(dict_foreign_set_validate(table.foreign_set) - && dict_foreign_set_validate(table.referenced_set)); -} - -std::ostream& -operator<< (std::ostream& out, const dict_foreign_t& foreign) -{ - out << "[dict_foreign_t: id='" << foreign.id << "'"; - - if (foreign.foreign_table_name != NULL) { - out << ",for: '" << foreign.foreign_table_name << "'"; - } - - out << "]"; - return(out); -} - -std::ostream& -operator<< (std::ostream& out, const dict_foreign_set& fk_set) -{ - out << "[dict_foreign_set:"; - std::for_each(fk_set.begin(), fk_set.end(), dict_foreign_print(out)); - out << "]" << std::endl; - return(out); -} - diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc deleted file mode 100644 index c1463e98ce0..00000000000 --- a/storage/xtradb/dict/dict0stats.cc +++ /dev/null @@ -1,4463 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file dict/dict0stats.cc -Code used for calculating and manipulating table statistics. - -Created Jan 06, 2010 Vasil Dimov -*******************************************************/ - -#ifndef UNIV_HOTBACKUP - -#include "univ.i" - -#include "btr0btr.h" /* btr_get_size() */ -#include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */ -#include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */ -#include "dict0mem.h" /* DICT_TABLE_MAGIC_N */ -#include "dict0stats.h" -#include "data0type.h" /* dtype_t */ -#include "db0err.h" /* dberr_t */ -#include "page0page.h" /* page_align() */ -#include "pars0pars.h" /* pars_info_create() */ -#include "pars0types.h" /* pars_info_t */ -#include "que0que.h" /* que_eval_sql() */ -#include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */ -#include "row0sel.h" /* sel_node_t */ -#include "row0types.h" /* sel_node_t */ -#include "trx0trx.h" /* trx_create() */ -#include "trx0roll.h" /* trx_rollback_to_savepoint() */ -#include "ut0rnd.h" /* ut_rnd_interval() */ -#include "ut0ut.h" /* ut_format_name(), ut_time() */ - -#include <algorithm> -#include <map> -#include <vector> - -/* Sampling algorithm description @{ - -The algorithm is controlled by one number - N_SAMPLE_PAGES(index), -let it be A, which is the number of leaf pages to analyze for a given index -for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be -analyzed). - -Let the total number of leaf pages in the table be T. -Level 0 - leaf pages, level H - root. - -Definition: N-prefix-boring record is a record on a non-leaf page that equals -the next (to the right, cross page boundaries, skipping the supremum and -infimum) record on the same level when looking at the fist n-prefix columns. -The last (user) record on a level is not boring (it does not match the -non-existent user record to the right). We call the records boring because all -the records on the page below a boring record are equal to that boring record. - -We avoid diving below boring records when searching for a leaf page to -estimate the number of distinct records because we know that such a leaf -page will have number of distinct records == 1. - -For each n-prefix: start from the root level and full scan subsequent lower -levels until a level that contains at least A*10 distinct records is found. -Lets call this level LA. -As an optimization the search is canceled if it has reached level 1 (never -descend to the level 0 (leaf)) and also if the next level to be scanned -would contain more than A pages. The latter is because the user has asked -to analyze A leaf pages and it does not make sense to scan much more than -A non-leaf pages with the sole purpose of finding a good sample of A leaf -pages. - -After finding the appropriate level LA with >A*10 distinct records (or less in -the exceptions described above), divide it into groups of equal records and -pick A such groups. Then pick the last record from each group. For example, -let the level be: - -index: 0,1,2,3,4,5,6,7,8,9,10 -record: 1,1,1,2,2,7,7,7,7,7,9 - -There are 4 groups of distinct records and if A=2 random ones are selected, -e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected. - -After selecting A records as described above, dive below them to find A leaf -pages and analyze them, finding the total number of distinct records. The -dive to the leaf level is performed by selecting a non-boring record from -each page and diving below it. - -This way, a total of A leaf pages are analyzed for the given n-prefix. - -Let the number of different key values found in each leaf page i be Pi (i=1..A). -Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A. -Let the number of different key values on level LA be N_DIFF_LA. -Let the total number of records on level LA be TOTAL_LA. -Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the -leaf level. -Let the number of leaf pages be N. -Then the total number of different key values on the leaf level is: -N * R * N_DIFF_AVG_LEAF. -See REF01 for the implementation. - -The above describes how to calculate the cardinality of an index. -This algorithm is executed for each n-prefix of a multi-column index -where n=1..n_uniq. -@} */ - -/* names of the tables from the persistent statistics storage */ -#define TABLE_STATS_NAME "mysql/innodb_table_stats" -#define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats" -#define INDEX_STATS_NAME "mysql/innodb_index_stats" -#define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats" - -#ifdef UNIV_STATS_DEBUG -#define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#else /* UNIV_STATS_DEBUG */ -#define DEBUG_PRINTF(fmt, ...) /* noop */ -#endif /* UNIV_STATS_DEBUG */ - -/* Gets the number of leaf pages to sample in persistent stats estimation */ -#define N_SAMPLE_PAGES(index) \ - static_cast<ib_uint64_t>( \ - (index)->table->stats_sample_pages != 0 \ - ? (index)->table->stats_sample_pages \ - : srv_stats_persistent_sample_pages) - -/* number of distinct records on a given level that are required to stop -descending to lower levels and fetch N_SAMPLE_PAGES(index) records -from that level */ -#define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10) - -/* A dynamic array where we store the boundaries of each distinct group -of keys. For example if a btree level is: -index: 0,1,2,3,4,5,6,7,8,9,10,11,12 -data: b,b,b,b,b,b,g,g,j,j,j, x, y -then we would store 5,7,10,11,12 in the array. */ -typedef std::vector<ib_uint64_t> boundaries_t; - -/* This is used to arrange the index based on the index name. -@return true if index_name1 is smaller than index_name2. */ -struct index_cmp -{ - bool operator()(const char* index_name1, const char* index_name2) const { - return(strcmp(index_name1, index_name2) < 0); - } -}; - -typedef std::map<const char*, dict_index_t*, index_cmp> index_map_t; - -/*********************************************************************//** -Checks whether an index should be ignored in stats manipulations: -* stats fetch -* stats recalc -* stats save -@return true if exists and all tables are ok */ -UNIV_INLINE -bool -dict_stats_should_ignore_index( -/*===========================*/ - const dict_index_t* index) /*!< in: index */ -{ - return((index->type & DICT_FTS) - || dict_index_is_corrupted(index) - || index->to_be_dropped - || *index->name == TEMP_INDEX_PREFIX); -} - -/*********************************************************************//** -Checks whether the persistent statistics storage exists and that all -tables have the proper structure. -@return true if exists and all tables are ok */ -static -bool -dict_stats_persistent_storage_check( -/*================================*/ - bool caller_has_dict_sys_mutex) /*!< in: true if the caller - owns dict_sys->mutex */ -{ - /* definition for the table TABLE_STATS_NAME */ - dict_col_meta_t table_stats_columns[] = { - {"database_name", DATA_VARMYSQL, - DATA_NOT_NULL, 192}, - - {"table_name", DATA_VARMYSQL, - DATA_NOT_NULL, 192}, - - {"last_update", DATA_FIXBINARY, - DATA_NOT_NULL, 4}, - - {"n_rows", DATA_INT, - DATA_NOT_NULL | DATA_UNSIGNED, 8}, - - {"clustered_index_size", DATA_INT, - DATA_NOT_NULL | DATA_UNSIGNED, 8}, - - {"sum_of_other_index_sizes", DATA_INT, - DATA_NOT_NULL | DATA_UNSIGNED, 8} - }; - dict_table_schema_t table_stats_schema = { - TABLE_STATS_NAME, - UT_ARR_SIZE(table_stats_columns), - table_stats_columns, - 0 /* n_foreign */, - 0 /* n_referenced */ - }; - - /* definition for the table INDEX_STATS_NAME */ - dict_col_meta_t index_stats_columns[] = { - {"database_name", DATA_VARMYSQL, - DATA_NOT_NULL, 192}, - - {"table_name", DATA_VARMYSQL, - DATA_NOT_NULL, 192}, - - {"index_name", DATA_VARMYSQL, - DATA_NOT_NULL, 192}, - - {"last_update", DATA_FIXBINARY, - DATA_NOT_NULL, 4}, - - {"stat_name", DATA_VARMYSQL, - DATA_NOT_NULL, 64*3}, - - {"stat_value", DATA_INT, - DATA_NOT_NULL | DATA_UNSIGNED, 8}, - - {"sample_size", DATA_INT, - DATA_UNSIGNED, 8}, - - {"stat_description", DATA_VARMYSQL, - DATA_NOT_NULL, 1024*3} - }; - dict_table_schema_t index_stats_schema = { - INDEX_STATS_NAME, - UT_ARR_SIZE(index_stats_columns), - index_stats_columns, - 0 /* n_foreign */, - 0 /* n_referenced */ - }; - - char errstr[512]; - dberr_t ret; - - if (!caller_has_dict_sys_mutex) { - mutex_enter(&(dict_sys->mutex)); - } - - ut_ad(mutex_own(&dict_sys->mutex)); - - /* first check table_stats */ - ret = dict_table_schema_check(&table_stats_schema, errstr, - sizeof(errstr)); - if (ret == DB_SUCCESS) { - /* if it is ok, then check index_stats */ - ret = dict_table_schema_check(&index_stats_schema, errstr, - sizeof(errstr)); - } - - if (!caller_has_dict_sys_mutex) { - mutex_exit(&(dict_sys->mutex)); - } - - if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: %s\n", errstr); - return(false); - } else if (ret == DB_STATS_DO_NOT_EXIST) { - return false; - } - /* else */ - - return(true); -} - -/** Executes a given SQL statement using the InnoDB internal SQL parser. -This function will free the pinfo object. -@param[in,out] pinfo pinfo to pass to que_eval_sql() must already -have any literals bound to it -@param[in] sql SQL string to execute -@param[in,out] trx in case of NULL the function will allocate and -free the trx object. If it is not NULL then it will be rolled back -only in the case of error, but not freed. -@return DB_SUCCESS or error code */ -static -dberr_t -dict_stats_exec_sql( - pars_info_t* pinfo, - const char* sql, - trx_t* trx) -{ - dberr_t err; - bool trx_started = false; -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&dict_sys->mutex)); - - if (!dict_stats_persistent_storage_check(true)) { - pars_info_free(pinfo); - return(DB_STATS_DO_NOT_EXIST); - } - - if (trx == NULL) { - trx = trx_allocate_for_background(); - trx_start_if_not_started(trx); - trx_started = true; - } - - err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */ - - DBUG_EXECUTE_IF("stats_index_error", - if (!trx_started) { - err = DB_STATS_DO_NOT_EXIST; - trx->error_state = DB_STATS_DO_NOT_EXIST; - }); - - if (!trx_started && err == DB_SUCCESS) { - return(DB_SUCCESS); - } - - if (err == DB_SUCCESS) { - trx_commit_for_mysql(trx); - } else { - trx->op_info = "rollback of internal trx on stats tables"; - trx->dict_operation_lock_mode = RW_X_LATCH; - trx_rollback_to_savepoint(trx, NULL); - trx->dict_operation_lock_mode = 0; - trx->op_info = ""; - ut_a(trx->error_state == DB_SUCCESS); - } - - if (trx_started) { - trx_free_for_background(trx); - } - - return(err); -} - -/*********************************************************************//** -Duplicate a table object and its indexes. -This function creates a dummy dict_table_t object and initializes the -following table and index members: -dict_table_t::id (copied) -dict_table_t::heap (newly created) -dict_table_t::name (copied) -dict_table_t::corrupted (copied) -dict_table_t::indexes<> (newly created) -dict_table_t::magic_n -for each entry in dict_table_t::indexes, the following are initialized: -(indexes that have DICT_FTS set in index->type are skipped) -dict_index_t::id (copied) -dict_index_t::name (copied) -dict_index_t::table_name (points to the copied table name) -dict_index_t::table (points to the above semi-initialized object) -dict_index_t::type (copied) -dict_index_t::to_be_dropped (copied) -dict_index_t::online_status (copied) -dict_index_t::n_uniq (copied) -dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name) -dict_index_t::indexes<> (newly created) -dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized) -dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized) -dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized) -dict_index_t::magic_n -The returned object should be freed with dict_stats_table_clone_free() -when no longer needed. -@return incomplete table object */ -static -dict_table_t* -dict_stats_table_clone_create( -/*==========================*/ - const dict_table_t* table) /*!< in: table whose stats to copy */ -{ - size_t heap_size; - dict_index_t* index; - - /* Estimate the size needed for the table and all of its indexes */ - - heap_size = 0; - heap_size += sizeof(dict_table_t); - heap_size += strlen(table->name) + 1; - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - if (dict_stats_should_ignore_index(index)) { - continue; - } - - ut_ad(!dict_index_is_univ(index)); - - ulint n_uniq = dict_index_get_n_unique(index); - - heap_size += sizeof(dict_index_t); - heap_size += strlen(index->name) + 1; - heap_size += n_uniq * sizeof(index->fields[0]); - for (ulint i = 0; i < n_uniq; i++) { - heap_size += strlen(index->fields[i].name) + 1; - } - heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]); - heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]); - heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]); - } - - /* Allocate the memory and copy the members */ - - mem_heap_t* heap; - - heap = mem_heap_create(heap_size); - - dict_table_t* t; - - t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id)); - t->id = table->id; - - t->heap = heap; - - UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1); - t->name = (char*) mem_heap_strdup(heap, table->name); - - t->corrupted = table->corrupted; - - /* This private object "t" is not shared with other threads, so - we do not need the stats_latch (thus we pass false below). The - dict_table_stats_lock()/unlock() routines will do nothing. */ - dict_table_stats_latch_create(t, false); - - UT_LIST_INIT(t->indexes); - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - if (dict_stats_should_ignore_index(index)) { - continue; - } - - ut_ad(!dict_index_is_univ(index)); - - dict_index_t* idx; - - idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx)); - - UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id)); - idx->id = index->id; - - UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1); - idx->name = (char*) mem_heap_strdup(heap, index->name); - - idx->table_name = t->name; - - idx->table = t; - - idx->type = index->type; - - idx->to_be_dropped = 0; - - idx->online_status = ONLINE_INDEX_COMPLETE; - - idx->n_uniq = index->n_uniq; - - idx->fields = (dict_field_t*) mem_heap_alloc( - heap, idx->n_uniq * sizeof(idx->fields[0])); - - for (ulint i = 0; i < idx->n_uniq; i++) { - UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1); - idx->fields[i].name = (char*) mem_heap_strdup( - heap, index->fields[i].name); - } - - /* hook idx into t->indexes */ - UT_LIST_ADD_LAST(indexes, t->indexes, idx); - - idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc( - heap, - idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0])); - - idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc( - heap, - idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0])); - - idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc( - heap, - idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0])); - ut_d(idx->magic_n = DICT_INDEX_MAGIC_N); - - idx->stat_defrag_n_page_split = 0; - idx->stat_defrag_n_pages_freed = 0; - } - - ut_d(t->magic_n = DICT_TABLE_MAGIC_N); - - return(t); -} - -/*********************************************************************//** -Free the resources occupied by an object returned by -dict_stats_table_clone_create(). */ -static -void -dict_stats_table_clone_free( -/*========================*/ - dict_table_t* t) /*!< in: dummy table object to free */ -{ - dict_table_stats_latch_destroy(t); - mem_heap_free(t->heap); -} - -/*********************************************************************//** -Write all zeros (or 1 where it makes sense) into an index -statistics members. The resulting stats correspond to an empty index. -The caller must own index's table stats latch in X mode -(dict_table_stats_lock(table, RW_X_LATCH)) */ -static -void -dict_stats_empty_index( -/*===================*/ - dict_index_t* index, /*!< in/out: index */ - bool empty_defrag_stats) - /*!< in: whether to empty defrag stats */ -{ - ut_ad(!(index->type & DICT_FTS)); - ut_ad(!dict_index_is_univ(index)); - - ulint n_uniq = index->n_uniq; - - for (ulint i = 0; i < n_uniq; i++) { - index->stat_n_diff_key_vals[i] = 0; - index->stat_n_sample_sizes[i] = 1; - index->stat_n_non_null_key_vals[i] = 0; - } - - index->stat_index_size = 1; - index->stat_n_leaf_pages = 1; - - if (empty_defrag_stats) { - dict_stats_empty_defrag_stats(index); - dict_stats_empty_defrag_summary(index); - } -} - -/**********************************************************************//** -Clear defragmentation summary. */ -UNIV_INTERN -void -dict_stats_empty_defrag_summary( -/*==================*/ - dict_index_t* index) /*!< in: index to clear defragmentation stats */ -{ - index->stat_defrag_n_pages_freed = 0; -} - -/**********************************************************************//** -Clear defragmentation related index stats. */ -UNIV_INTERN -void -dict_stats_empty_defrag_stats( -/*==================*/ - dict_index_t* index) /*!< in: index to clear defragmentation stats */ -{ - index->stat_defrag_modified_counter = 0; - index->stat_defrag_n_page_split = 0; -} - -/*********************************************************************//** -Write all zeros (or 1 where it makes sense) into a table and its indexes' -statistics members. The resulting stats correspond to an empty table. */ -static -void -dict_stats_empty_table( -/*===================*/ - dict_table_t* table, /*!< in/out: table */ - bool empty_defrag_stats) - /*!< in: whether to empty defrag stats */ -{ - /* Zero the stats members */ - - dict_table_stats_lock(table, RW_X_LATCH); - - table->stat_n_rows = 0; - table->stat_clustered_index_size = 1; - /* 1 page for each index, not counting the clustered */ - table->stat_sum_of_other_index_sizes - = UT_LIST_GET_LEN(table->indexes) - 1; - table->stat_modified_counter = 0; - - dict_index_t* index; - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - if (index->type & DICT_FTS) { - continue; - } - - ut_ad(!dict_index_is_univ(index)); - - dict_stats_empty_index(index, empty_defrag_stats); - } - - table->stat_initialized = TRUE; - - dict_table_stats_unlock(table, RW_X_LATCH); -} - -/*********************************************************************//** -Check whether index's stats are initialized (assert if they are not). */ -static -void -dict_stats_assert_initialized_index( -/*================================*/ - const dict_index_t* index) /*!< in: index */ -{ - UNIV_MEM_ASSERT_RW_ABORT( - index->stat_n_diff_key_vals, - index->n_uniq * sizeof(index->stat_n_diff_key_vals[0])); - - UNIV_MEM_ASSERT_RW_ABORT( - index->stat_n_sample_sizes, - index->n_uniq * sizeof(index->stat_n_sample_sizes[0])); - - UNIV_MEM_ASSERT_RW_ABORT( - index->stat_n_non_null_key_vals, - index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0])); - - UNIV_MEM_ASSERT_RW_ABORT( - &index->stat_index_size, - sizeof(index->stat_index_size)); - - UNIV_MEM_ASSERT_RW_ABORT( - &index->stat_n_leaf_pages, - sizeof(index->stat_n_leaf_pages)); -} - -/*********************************************************************//** -Check whether table's stats are initialized (assert if they are not). */ -static -void -dict_stats_assert_initialized( -/*==========================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_a(table->stat_initialized); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc, - sizeof(table->stats_last_recalc)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent, - sizeof(table->stat_persistent)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc, - sizeof(table->stats_auto_recalc)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages, - sizeof(table->stats_sample_pages)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows, - sizeof(table->stat_n_rows)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size, - sizeof(table->stat_clustered_index_size)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes, - sizeof(table->stat_sum_of_other_index_sizes)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter, - sizeof(table->stat_modified_counter)); - - UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag, - sizeof(table->stats_bg_flag)); - - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - if (!dict_stats_should_ignore_index(index)) { - dict_stats_assert_initialized_index(index); - } - } -} - -#define INDEX_EQ(i1, i2) \ - ((i1) != NULL \ - && (i2) != NULL \ - && (i1)->id == (i2)->id \ - && strcmp((i1)->name, (i2)->name) == 0) - -/*********************************************************************//** -Copy table and index statistics from one table to another, including index -stats. Extra indexes in src are ignored and extra indexes in dst are -initialized to correspond to an empty index. */ -static -void -dict_stats_copy( -/*============*/ - dict_table_t* dst, /*!< in/out: destination table */ - const dict_table_t* src, /*!< in: source table */ - bool reset_ignored_indexes) /*!< in: if true, set ignored indexes - to have the same statistics as if - the table was empty */ -{ - dst->stats_last_recalc = src->stats_last_recalc; - dst->stat_n_rows = src->stat_n_rows; - dst->stat_clustered_index_size = src->stat_clustered_index_size; - dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes; - dst->stat_modified_counter = src->stat_modified_counter; - - dict_index_t* dst_idx; - dict_index_t* src_idx; - - for (dst_idx = dict_table_get_first_index(dst), - src_idx = dict_table_get_first_index(src); - dst_idx != NULL; - dst_idx = dict_table_get_next_index(dst_idx), - (src_idx != NULL - && (src_idx = dict_table_get_next_index(src_idx)))) { - - if (dict_stats_should_ignore_index(dst_idx)) { - if (reset_ignored_indexes) { - /* Reset index statistics for all ignored indexes, - unless they are FT indexes (these have no statistics)*/ - if (dst_idx->type & DICT_FTS) { - continue; - } - dict_stats_empty_index(dst_idx, true); - } else { - continue; - } - } - - ut_ad(!dict_index_is_univ(dst_idx)); - - if (!INDEX_EQ(src_idx, dst_idx)) { - for (src_idx = dict_table_get_first_index(src); - src_idx != NULL; - src_idx = dict_table_get_next_index(src_idx)) { - - if (INDEX_EQ(src_idx, dst_idx)) { - break; - } - } - } - - if (!INDEX_EQ(src_idx, dst_idx)) { - dict_stats_empty_index(dst_idx, true); - continue; - } - - ulint n_copy_el; - - if (dst_idx->n_uniq > src_idx->n_uniq) { - n_copy_el = src_idx->n_uniq; - /* Since src is smaller some elements in dst - will remain untouched by the following memmove(), - thus we init all of them here. */ - dict_stats_empty_index(dst_idx, true); - } else { - n_copy_el = dst_idx->n_uniq; - } - - memmove(dst_idx->stat_n_diff_key_vals, - src_idx->stat_n_diff_key_vals, - n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0])); - - memmove(dst_idx->stat_n_sample_sizes, - src_idx->stat_n_sample_sizes, - n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0])); - - memmove(dst_idx->stat_n_non_null_key_vals, - src_idx->stat_n_non_null_key_vals, - n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0])); - - dst_idx->stat_index_size = src_idx->stat_index_size; - - dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages; - - dst_idx->stat_defrag_modified_counter = - src_idx->stat_defrag_modified_counter; - dst_idx->stat_defrag_n_pages_freed = - src_idx->stat_defrag_n_pages_freed; - dst_idx->stat_defrag_n_page_split = - src_idx->stat_defrag_n_page_split; - } - - dst->stat_initialized = TRUE; -} - -/*********************************************************************//** -Duplicate the stats of a table and its indexes. -This function creates a dummy dict_table_t object and copies the input -table's stats into it. The returned table object is not in the dictionary -cache and cannot be accessed by any other threads. In addition to the -members copied in dict_stats_table_clone_create() this function initializes -the following: -dict_table_t::stat_initialized -dict_table_t::stat_persistent -dict_table_t::stat_n_rows -dict_table_t::stat_clustered_index_size -dict_table_t::stat_sum_of_other_index_sizes -dict_table_t::stat_modified_counter -dict_index_t::stat_n_diff_key_vals[] -dict_index_t::stat_n_sample_sizes[] -dict_index_t::stat_n_non_null_key_vals[] -dict_index_t::stat_index_size -dict_index_t::stat_n_leaf_pages -dict_index_t::stat_defrag_modified_counter -dict_index_t::stat_defrag_n_pages_freed -dict_index_t::stat_defrag_n_page_split -The returned object should be freed with dict_stats_snapshot_free() -when no longer needed. -@return incomplete table object */ -static -dict_table_t* -dict_stats_snapshot_create( -/*=======================*/ - dict_table_t* table) /*!< in: table whose stats to copy */ -{ - mutex_enter(&dict_sys->mutex); - - dict_table_stats_lock(table, RW_S_LATCH); - - dict_stats_assert_initialized(table); - - dict_table_t* t; - - t = dict_stats_table_clone_create(table); - - dict_stats_copy(t, table, false); - - t->stat_persistent = table->stat_persistent; - t->stats_auto_recalc = table->stats_auto_recalc; - t->stats_sample_pages = table->stats_sample_pages; - t->stats_bg_flag = table->stats_bg_flag; - - dict_table_stats_unlock(table, RW_S_LATCH); - - mutex_exit(&dict_sys->mutex); - - return(t); -} - -/*********************************************************************//** -Free the resources occupied by an object returned by -dict_stats_snapshot_create(). */ -static -void -dict_stats_snapshot_free( -/*=====================*/ - dict_table_t* t) /*!< in: dummy table object to free */ -{ - dict_stats_table_clone_free(t); -} - -/*********************************************************************//** -Calculates new estimates for index statistics. This function is -relatively quick and is used to calculate transient statistics that -are not saved on disk. This was the only way to calculate statistics -before the Persistent Statistics feature was introduced. -This function doesn't update the defragmentation related stats. -Only persistent statistics supports defragmentation stats. */ -static -void -dict_stats_update_transient_for_index( -/*==================================*/ - dict_index_t* index) /*!< in/out: index */ -{ - if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO - && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO - || !dict_index_is_clust(index))) { - /* If we have set a high innodb_force_recovery - level, do not calculate statistics, as a badly - corrupted index can cause a crash in it. - Initialize some bogus index cardinality - statistics, so that the data can be queried in - various means, also via secondary indexes. */ - dict_stats_empty_index(index, false); -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG - } else if (ibuf_debug && !dict_index_is_clust(index)) { - dict_stats_empty_index(index, false); -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - } else { - mtr_t mtr; - ulint size; - mtr_start(&mtr); - mtr_s_lock(dict_index_get_lock(index), &mtr); - - size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); - - if (size != ULINT_UNDEFINED) { - index->stat_index_size = size; - - size = btr_get_size( - index, BTR_N_LEAF_PAGES, &mtr); - } - - mtr_commit(&mtr); - - switch (size) { - case ULINT_UNDEFINED: - dict_stats_empty_index(index, false); - return; - case 0: - /* The root node of the tree is a leaf */ - size = 1; - } - - index->stat_n_leaf_pages = size; - - /* Do not continue if table decryption has failed or - table is already marked as corrupted. */ - if (index->is_readable()) { - btr_estimate_number_of_different_key_vals(index); - } - } -} - -/*********************************************************************//** -Calculates new estimates for table and index statistics. This function -is relatively quick and is used to calculate transient statistics that -are not saved on disk. -This was the only way to calculate statistics before the -Persistent Statistics feature was introduced. */ -UNIV_INTERN -void -dict_stats_update_transient( -/*========================*/ - dict_table_t* table) /*!< in/out: table */ -{ - dict_index_t* index; - ulint sum_of_index_sizes = 0; - - /* Find out the sizes of the indexes and how many different values - for the key they approximately have */ - - index = dict_table_get_first_index(table); - - if (dict_table_is_discarded(table)) { - /* Nothing to do. */ - dict_stats_empty_table(table, false); - return; - } else if (index == NULL) { - /* Table definition is corrupt */ - - char buf[MAX_FULL_NAME_LEN]; - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: table %s has no indexes. " - "Cannot calculate statistics.\n", - ut_format_name(table->name, TRUE, buf, sizeof(buf))); - dict_stats_empty_table(table, false); - return; - } - - for (; index != NULL; index = dict_table_get_next_index(index)) { - - ut_ad(!dict_index_is_univ(index)); - - if (index->type & DICT_FTS) { - continue; - } - - dict_stats_empty_index(index, false); - - if (dict_stats_should_ignore_index(index)) { - continue; - } - - /* Do not continue if table decryption has failed or - table is already marked as corrupted. */ - if (!index->is_readable()) { - break; - } - - dict_stats_update_transient_for_index(index); - - sum_of_index_sizes += index->stat_index_size; - } - - index = dict_table_get_first_index(table); - - table->stat_n_rows = index->stat_n_diff_key_vals[ - dict_index_get_n_unique(index) - 1]; - - table->stat_clustered_index_size = index->stat_index_size; - - table->stat_sum_of_other_index_sizes = sum_of_index_sizes - - index->stat_index_size; - - table->stats_last_recalc = ut_time(); - - table->stat_modified_counter = 0; - - table->stat_initialized = TRUE; -} - -/* @{ Pseudo code about the relation between the following functions - -let N = N_SAMPLE_PAGES(index) - -dict_stats_analyze_index() - for each n_prefix - search for good enough level: - dict_stats_analyze_index_level() // only called if level has <= N pages - // full scan of the level in one mtr - collect statistics about the given level - if we are not satisfied with the level, search next lower level - we have found a good enough level here - dict_stats_analyze_index_for_n_prefix(that level, stats collected above) - // full scan of the level in one mtr - dive below some records and analyze the leaf page there: - dict_stats_analyze_index_below_cur() -@} */ - -/*********************************************************************//** -Find the total number and the number of distinct keys on a given level in -an index. Each of the 1..n_uniq prefixes are looked up and the results are -saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of -records on the level is saved in total_recs. -Also, the index of the last record in each group of equal records is saved -in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost -record on the level and continues cross pages boundaries, counting from 0. */ -static -void -dict_stats_analyze_index_level( -/*===========================*/ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: level */ - ib_uint64_t* n_diff, /*!< out: array for number of - distinct keys for all prefixes */ - ib_uint64_t* total_recs, /*!< out: total number of records */ - ib_uint64_t* total_pages, /*!< out: total number of pages */ - boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups - of distinct keys */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint n_uniq; - mem_heap_t* heap; - btr_pcur_t pcur; - const page_t* page; - const rec_t* rec; - const rec_t* prev_rec; - bool prev_rec_is_copied; - byte* prev_rec_buf = NULL; - ulint prev_rec_buf_size = 0; - ulint* rec_offsets; - ulint* prev_rec_offsets; - ulint i; - - DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu)\n", __func__, - index->table->name, index->name, level); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_S_LOCK)); - - n_uniq = dict_index_get_n_unique(index); - - /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */ - memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0])); - - /* Allocate space for the offsets header (the allocation size at - offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1, - so that this will never be less than the size calculated in - rec_get_offsets_func(). */ - i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields; - - heap = mem_heap_create((2 * sizeof *rec_offsets) * i); - rec_offsets = static_cast<ulint*>( - mem_heap_alloc(heap, i * sizeof *rec_offsets)); - prev_rec_offsets = static_cast<ulint*>( - mem_heap_alloc(heap, i * sizeof *prev_rec_offsets)); - rec_offs_set_n_alloc(rec_offsets, i); - rec_offs_set_n_alloc(prev_rec_offsets, i); - - /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */ - if (n_diff_boundaries != NULL) { - for (i = 0; i < n_uniq; i++) { - n_diff_boundaries[i].erase( - n_diff_boundaries[i].begin(), - n_diff_boundaries[i].end()); - } - } - - /* Position pcur on the leftmost record on the leftmost page - on the desired level. */ - - btr_pcur_open_at_index_side( - true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, - &pcur, true, level, mtr); - btr_pcur_move_to_next_on_page(&pcur); - - page = btr_pcur_get_page(&pcur); - - /* The page must not be empty, except when - it is the root page (and the whole index is empty). */ - ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page)); - ut_ad(btr_pcur_get_rec(&pcur) - == page_rec_get_next_const(page_get_infimum_rec(page))); - - /* check that we are indeed on the desired level */ - ut_a(btr_page_get_level(page, mtr) == level); - - /* there should not be any pages on the left */ - ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); - - /* check whether the first record on the leftmost page is marked - as such, if we are on a non-leaf level */ - ut_a((level == 0) - == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - btr_pcur_get_rec(&pcur), page_is_comp(page)))); - - prev_rec = NULL; - prev_rec_is_copied = false; - - /* no records by default */ - *total_recs = 0; - - *total_pages = 0; - - /* iterate over all user records on this level - and compare each two adjacent ones, even the last on page - X and the fist on page X+1 */ - for (; - btr_pcur_is_on_user_rec(&pcur); - btr_pcur_move_to_next_user_rec(&pcur, mtr)) { - - ulint matched_fields = 0; - ulint matched_bytes = 0; - bool rec_is_last_on_page; - - rec = btr_pcur_get_rec(&pcur); - - /* If rec and prev_rec are on different pages, then prev_rec - must have been copied, because we hold latch only on the page - where rec resides. */ - if (prev_rec != NULL - && page_align(rec) != page_align(prev_rec)) { - - ut_a(prev_rec_is_copied); - } - - rec_is_last_on_page = - page_rec_is_supremum(page_rec_get_next_const(rec)); - - /* increment the pages counter at the end of each page */ - if (rec_is_last_on_page) { - - (*total_pages)++; - } - - /* Skip delete-marked records on the leaf level. If we - do not skip them, then ANALYZE quickly after DELETE - could count them or not (purge may have already wiped - them away) which brings non-determinism. We skip only - leaf-level delete marks because delete marks on - non-leaf level do not make sense. */ - - if (level == 0 - && !srv_stats_include_delete_marked - && rec_get_deleted_flag( - rec, - page_is_comp(btr_pcur_get_page(&pcur)))) { - - if (rec_is_last_on_page - && !prev_rec_is_copied - && prev_rec != NULL) { - /* copy prev_rec */ - - prev_rec_offsets = rec_get_offsets( - prev_rec, index, prev_rec_offsets, - n_uniq, &heap); - - prev_rec = rec_copy_prefix_to_buf( - prev_rec, index, - rec_offs_n_fields(prev_rec_offsets), - &prev_rec_buf, &prev_rec_buf_size); - - prev_rec_is_copied = true; - } - - continue; - } - - rec_offsets = rec_get_offsets( - rec, index, rec_offsets, n_uniq, &heap); - - (*total_recs)++; - - if (prev_rec != NULL) { - prev_rec_offsets = rec_get_offsets( - prev_rec, index, prev_rec_offsets, - n_uniq, &heap); - - cmp_rec_rec_with_match(rec, - prev_rec, - rec_offsets, - prev_rec_offsets, - index, - FALSE, - &matched_fields, - &matched_bytes); - - for (i = matched_fields; i < n_uniq; i++) { - - if (n_diff_boundaries != NULL) { - /* push the index of the previous - record, that is - the last one from - a group of equal keys */ - - ib_uint64_t idx; - - /* the index of the current record - is total_recs - 1, the index of the - previous record is total_recs - 2; - we know that idx is not going to - become negative here because if we - are in this branch then there is a - previous record and thus - total_recs >= 2 */ - idx = *total_recs - 2; - - n_diff_boundaries[i].push_back(idx); - } - - /* increment the number of different keys - for n_prefix=i+1 (e.g. if i=0 then we increment - for n_prefix=1 which is stored in n_diff[0]) */ - n_diff[i]++; - } - } else { - /* this is the first non-delete marked record */ - for (i = 0; i < n_uniq; i++) { - n_diff[i] = 1; - } - } - - if (rec_is_last_on_page) { - /* end of a page has been reached */ - - /* we need to copy the record instead of assigning - like prev_rec = rec; because when we traverse the - records on this level at some point we will jump from - one page to the next and then rec and prev_rec will - be on different pages and - btr_pcur_move_to_next_user_rec() will release the - latch on the page that prev_rec is on */ - prev_rec = rec_copy_prefix_to_buf( - rec, index, rec_offs_n_fields(rec_offsets), - &prev_rec_buf, &prev_rec_buf_size); - prev_rec_is_copied = true; - - } else { - /* still on the same page, the next call to - btr_pcur_move_to_next_user_rec() will not jump - on the next page, we can simply assign pointers - instead of copying the records like above */ - - prev_rec = rec; - prev_rec_is_copied = false; - } - } - - /* if *total_pages is left untouched then the above loop was not - entered at all and there is one page in the whole tree which is - empty or the loop was entered but this is level 0, contains one page - and all records are delete-marked */ - if (*total_pages == 0) { - - ut_ad(level == 0); - ut_ad(*total_recs == 0); - - *total_pages = 1; - } - - /* if there are records on this level and boundaries - should be saved */ - if (*total_recs > 0 && n_diff_boundaries != NULL) { - - /* remember the index of the last record on the level as the - last one from the last group of equal keys; this holds for - all possible prefixes */ - for (i = 0; i < n_uniq; i++) { - ib_uint64_t idx; - - idx = *total_recs - 1; - - n_diff_boundaries[i].push_back(idx); - } - } - - /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers, - for i=0..n_uniq-1 */ - -#ifdef UNIV_STATS_DEBUG - for (i = 0; i < n_uniq; i++) { - - DEBUG_PRINTF(" %s(): total recs: " UINT64PF - ", total pages: " UINT64PF - ", n_diff[%lu]: " UINT64PF "\n", - __func__, *total_recs, - *total_pages, - i, n_diff[i]); - -#if 0 - if (n_diff_boundaries != NULL) { - ib_uint64_t j; - - DEBUG_PRINTF(" %s(): boundaries[%lu]: ", - __func__, i); - - for (j = 0; j < n_diff[i]; j++) { - ib_uint64_t idx; - - idx = n_diff_boundaries[i][j]; - - DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ", - j, idx); - } - DEBUG_PRINTF("\n"); - } -#endif - } -#endif /* UNIV_STATS_DEBUG */ - - /* Release the latch on the last page, because that is not done by - btr_pcur_close(). This function works also for non-leaf pages. */ - btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr); - - btr_pcur_close(&pcur); - - if (prev_rec_buf != NULL) { - - mem_free(prev_rec_buf); - } - - mem_heap_free(heap); -} - -/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */ -enum page_scan_method_t { - COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on - the given page and count the number of - distinct ones, also ignore delete marked - records */ - QUIT_ON_FIRST_NON_BORING,/* quit when the first record that differs - from its right neighbor is found */ - COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED/* scan all records on - the given page and count the number of - distinct ones, include delete marked - records */ -}; -/* @} */ - -/** Scan a page, reading records from left to right and counting the number -of distinct records (looking only at the first n_prefix -columns) and the number of external pages pointed by records from this page. -If scan_method is QUIT_ON_FIRST_NON_BORING then the function -will return as soon as it finds a record that does not match its neighbor -to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the -returned n_diff can either be 0 (empty page), 1 (the whole page has all keys -equal) or 2 (the function found a non-boring record and returned). -@param[out] out_rec record, or NULL -@param[out] offsets1 rec_get_offsets() working space (must -be big enough) -@param[out] offsets2 rec_get_offsets() working space (must -be big enough) -@param[in] index index of the page -@param[in] page the page to scan -@param[in] n_prefix look at the first n_prefix columns -@param[in] scan_method scan to the end of the page or not -@param[out] n_diff number of distinct records encountered -@param[out] n_external_pages if this is non-NULL then it will be set -to the number of externally stored pages which were encountered -@return offsets1 or offsets2 (the offsets of *out_rec), -or NULL if the page is empty and does not contain user records. */ -UNIV_INLINE -ulint* -dict_stats_scan_page( - const rec_t** out_rec, - ulint* offsets1, - ulint* offsets2, - dict_index_t* index, - const page_t* page, - ulint n_prefix, - page_scan_method_t scan_method, - ib_uint64_t* n_diff, - ib_uint64_t* n_external_pages) -{ - ulint* offsets_rec = offsets1; - ulint* offsets_next_rec = offsets2; - const rec_t* rec; - const rec_t* next_rec; - /* A dummy heap, to be passed to rec_get_offsets(). - Because offsets1,offsets2 should be big enough, - this memory heap should never be used. */ - mem_heap_t* heap = NULL; - const rec_t* (*get_next)(const rec_t*); - - if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) { - get_next = page_rec_get_next_non_del_marked; - } else { - get_next = page_rec_get_next_const; - } - - const bool should_count_external_pages = n_external_pages != NULL; - - if (should_count_external_pages) { - *n_external_pages = 0; - } - - rec = get_next(page_get_infimum_rec(page)); - - if (page_rec_is_supremum(rec)) { - /* the page is empty or contains only delete-marked records */ - *n_diff = 0; - *out_rec = NULL; - return(NULL); - } - - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - - if (should_count_external_pages) { - *n_external_pages += btr_rec_get_externally_stored_len( - rec, offsets_rec); - } - - next_rec = get_next(rec); - - *n_diff = 1; - - while (!page_rec_is_supremum(next_rec)) { - - ulint matched_fields = 0; - ulint matched_bytes = 0; - - offsets_next_rec = rec_get_offsets(next_rec, index, - offsets_next_rec, - ULINT_UNDEFINED, - &heap); - - /* check whether rec != next_rec when looking at - the first n_prefix fields */ - cmp_rec_rec_with_match(rec, next_rec, - offsets_rec, offsets_next_rec, - index, FALSE, &matched_fields, - &matched_bytes); - - if (matched_fields < n_prefix) { - /* rec != next_rec, => rec is non-boring */ - - (*n_diff)++; - - if (scan_method == QUIT_ON_FIRST_NON_BORING) { - goto func_exit; - } - } - - rec = next_rec; - { - /* Assign offsets_rec = offsets_next_rec - so that offsets_rec matches with rec which - was just assigned rec = next_rec above. - Also need to point offsets_next_rec to the - place where offsets_rec was pointing before - because we have just 2 placeholders where - data is actually stored: - offsets_onstack1 and offsets_onstack2 and we - are using them in circular fashion - (offsets[_next]_rec are just pointers to - those placeholders). */ - ulint* offsets_tmp; - offsets_tmp = offsets_rec; - offsets_rec = offsets_next_rec; - offsets_next_rec = offsets_tmp; - } - - if (should_count_external_pages) { - *n_external_pages += btr_rec_get_externally_stored_len( - rec, offsets_rec); - } - - next_rec = get_next(next_rec); - } - -func_exit: - /* offsets1,offsets2 should have been big enough */ - ut_a(heap == NULL); - *out_rec = rec; - return(offsets_rec); -} - -/** Dive below the current position of a cursor and calculate the number of -distinct records on the leaf page, when looking at the fist n_prefix -columns. Also calculate the number of external pages pointed by records -on the leaf page. -@param[in] cur cursor -@param[in] n_prefix look at the first n_prefix columns -when comparing records -@param[out] n_diff number of distinct records -@param[out] n_external_pages number of external pages -@return number of distinct records on the leaf page */ -static -void -dict_stats_analyze_index_below_cur( - const btr_cur_t* cur, - ulint n_prefix, - ib_uint64_t* n_diff, - ib_uint64_t* n_external_pages) -{ - dict_index_t* index; - ulint space; - ulint zip_size; - buf_block_t* block; - ulint page_no; - const page_t* page; - mem_heap_t* heap; - const rec_t* rec; - ulint* offsets1; - ulint* offsets2; - ulint* offsets_rec; - ulint size; - mtr_t mtr; - - index = btr_cur_get_index(cur); - - /* Allocate offsets for the record and the node pointer, for - node pointer records. In a secondary index, the node pointer - record will consist of all index fields followed by a child - page number. - Allocate space for the offsets header (the allocation size at - offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1, - so that this will never be less than the size calculated in - rec_get_offsets_func(). */ - size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index); - - heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2)); - - offsets1 = static_cast<ulint*>(mem_heap_alloc( - heap, size * sizeof *offsets1)); - - offsets2 = static_cast<ulint*>(mem_heap_alloc( - heap, size * sizeof *offsets2)); - - rec_offs_set_n_alloc(offsets1, size); - rec_offs_set_n_alloc(offsets2, size); - - space = dict_index_get_space(index); - zip_size = dict_table_zip_size(index->table); - - rec = btr_cur_get_rec(cur); - - offsets_rec = rec_get_offsets(rec, index, offsets1, - ULINT_UNDEFINED, &heap); - - page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec); - - /* assume no external pages by default - in case we quit from this - function without analyzing any leaf pages */ - *n_external_pages = 0; - - mtr_start(&mtr); - - /* descend to the leaf level on the B-tree */ - for (;;) { - - block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, - NULL /* no guessed block */, - BUF_GET, __FILE__, __LINE__, &mtr); - - page = buf_block_get_frame(block); - - if (page_is_leaf(page)) { - /* leaf level */ - break; - } - /* else */ - - /* search for the first non-boring record on the page */ - offsets_rec = dict_stats_scan_page( - &rec, offsets1, offsets2, index, page, n_prefix, - QUIT_ON_FIRST_NON_BORING, n_diff, NULL); - - /* pages on level > 0 are not allowed to be empty */ - ut_a(offsets_rec != NULL); - /* if page is not empty (offsets_rec != NULL) then n_diff must - be > 0, otherwise there is a bug in dict_stats_scan_page() */ - ut_a(*n_diff > 0); - - if (*n_diff == 1) { - mtr_commit(&mtr); - - /* page has all keys equal and the end of the page - was reached by dict_stats_scan_page(), no need to - descend to the leaf level */ - mem_heap_free(heap); - /* can't get an estimate for n_external_pages here - because we do not dive to the leaf level, assume no - external pages (*n_external_pages was assigned to 0 - above). */ - return; - } - /* else */ - - /* when we instruct dict_stats_scan_page() to quit on the - first non-boring record it finds, then the returned n_diff - can either be 0 (empty page), 1 (page has all keys equal) or - 2 (non-boring record was found) */ - ut_a(*n_diff == 2); - - /* we have a non-boring record in rec, descend below it */ - - page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec); - } - - /* make sure we got a leaf page as a result from the above loop */ - ut_ad(page_is_leaf(page)); - - /* scan the leaf page and find the number of distinct keys, - when looking only at the first n_prefix columns; also estimate - the number of externally stored pages pointed by records on this - page */ - - offsets_rec = dict_stats_scan_page( - &rec, offsets1, offsets2, index, page, n_prefix, - srv_stats_include_delete_marked ? - COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED: - COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff, - n_external_pages); - -#if 0 - DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n", - __func__, page_no, n_diff); -#endif - - mtr_commit(&mtr); - mem_heap_free(heap); -} - -/** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[] -for each n-columns prefix (n from 1 to n_uniq). */ -struct n_diff_data_t { - /** Index of the level on which the descent through the btree - stopped. level 0 is the leaf level. This is >= 1 because we - avoid scanning the leaf level because it may contain too many - pages and doing so is useless when combined with the random dives - - if we are to scan the leaf level, this means a full scan and we can - simply do that instead of fiddling with picking random records higher - in the tree and to dive below them. At the start of the analyzing - we may decide to do full scan of the leaf level, but then this - structure is not used in that code path. */ - ulint level; - - /** Number of records on the level where the descend through the btree - stopped. When we scan the btree from the root, we stop at some mid - level, choose some records from it and dive below them towards a leaf - page to analyze. */ - ib_uint64_t n_recs_on_level; - - /** Number of different key values that were found on the mid level. */ - ib_uint64_t n_diff_on_level; - - /** Number of leaf pages that are analyzed. This is also the same as - the number of records that we pick from the mid level and dive below - them. */ - ib_uint64_t n_leaf_pages_to_analyze; - - /** Cumulative sum of the number of different key values that were - found on all analyzed pages. */ - ib_uint64_t n_diff_all_analyzed_pages; - - /** Cumulative sum of the number of external pages (stored outside of - the btree but in the same file segment). */ - ib_uint64_t n_external_pages_sum; -}; - -/** Estimate the number of different key values in an index when looking at -the first n_prefix columns. For a given level in an index select -n_diff_data->n_leaf_pages_to_analyze records from that level and dive below -them to the corresponding leaf pages, then scan those leaf pages and save the -sampling results in n_diff_data->n_diff_all_analyzed_pages. -@param[in] index index -@param[in] n_prefix look at first 'n_prefix' columns when -comparing records -@param[in] boundaries a vector that contains -n_diff_data->n_diff_on_level integers each of which represents the index (on -level 'level', counting from left/smallest to right/biggest from 0) of the -last record from each group of distinct keys -@param[in,out] n_diff_data n_diff_all_analyzed_pages and -n_external_pages_sum in this structure will be set by this function. The -members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the -caller in advance - they are used by some calculations inside this function -@param[in,out] mtr mini-transaction */ -static -void -dict_stats_analyze_index_for_n_prefix( - dict_index_t* index, - ulint n_prefix, - const boundaries_t* boundaries, - n_diff_data_t* n_diff_data, - mtr_t* mtr) -{ - btr_pcur_t pcur; - const page_t* page; - ib_uint64_t rec_idx; - ib_uint64_t i; - -#if 0 - DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu, " - "n_diff_on_level=" UINT64PF ")\n", - __func__, index->table->name, index->name, level, - n_prefix, n_diff_data->n_diff_on_level); -#endif - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_S_LOCK)); - - /* Position pcur on the leftmost record on the leftmost page - on the desired level. */ - - btr_pcur_open_at_index_side( - true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, - &pcur, true, n_diff_data->level, mtr); - btr_pcur_move_to_next_on_page(&pcur); - - page = btr_pcur_get_page(&pcur); - - const rec_t* first_rec = btr_pcur_get_rec(&pcur); - - /* We shouldn't be scanning the leaf level. The caller of this function - should have stopped the descend on level 1 or higher. */ - ut_ad(n_diff_data->level > 0); - ut_ad(!page_is_leaf(page)); - - /* The page must not be empty, except when - it is the root page (and the whole index is empty). */ - ut_ad(btr_pcur_is_on_user_rec(&pcur)); - ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page))); - - /* check that we are indeed on the desired level */ - ut_a(btr_page_get_level(page, mtr) == n_diff_data->level); - - /* there should not be any pages on the left */ - ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); - - /* check whether the first record on the leftmost page is marked - as such; we are on a non-leaf level */ - ut_a(rec_get_info_bits(first_rec, page_is_comp(page)) - & REC_INFO_MIN_REC_FLAG); - - const ib_uint64_t last_idx_on_level = boundaries->at( - static_cast<unsigned>(n_diff_data->n_diff_on_level - 1)); - - rec_idx = 0; - - n_diff_data->n_diff_all_analyzed_pages = 0; - n_diff_data->n_external_pages_sum = 0; - - for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) { - /* there are n_diff_on_level elements - in 'boundaries' and we divide those elements - into n_leaf_pages_to_analyze segments, for example: - - let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then: - segment i=0: [0, 24] - segment i=1: [25, 49] - segment i=2: [50, 74] - segment i=3: [75, 99] or - - let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then: - segment i=0: [0, 0] or - - let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then: - segment i=0: [0, 0] - segment i=1: [1, 1] or - - let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then: - segment i=0: [0, 0] - segment i=1: [1, 2] - segment i=2: [3, 4] - segment i=3: [5, 6] - segment i=4: [7, 8] - segment i=5: [9, 10] - segment i=6: [11, 12] - - then we select a random record from each segment and dive - below it */ - const ib_uint64_t n_diff = n_diff_data->n_diff_on_level; - const ib_uint64_t n_pick - = n_diff_data->n_leaf_pages_to_analyze; - - const ib_uint64_t left = n_diff * i / n_pick; - const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1; - - ut_a(left <= right); - ut_a(right <= last_idx_on_level); - - /* we do not pass (left, right) because we do not want to ask - ut_rnd_interval() to work with too big numbers since - ib_uint64_t could be bigger than ulint */ - const ulint rnd = ut_rnd_interval( - 0, static_cast<ulint>(right - left)); - - const ib_uint64_t dive_below_idx - = boundaries->at(static_cast<unsigned>(left + rnd)); - -#if 0 - DEBUG_PRINTF(" %s(): dive below record with index=" - UINT64PF "\n", __func__, dive_below_idx); -#endif - - /* seek to the record with index dive_below_idx */ - while (rec_idx < dive_below_idx - && btr_pcur_is_on_user_rec(&pcur)) { - - btr_pcur_move_to_next_user_rec(&pcur, mtr); - rec_idx++; - } - - /* if the level has finished before the record we are - searching for, this means that the B-tree has changed in - the meantime, quit our sampling and use whatever stats - we have collected so far */ - if (rec_idx < dive_below_idx) { - - ut_ad(!btr_pcur_is_on_user_rec(&pcur)); - break; - } - - /* it could be that the tree has changed in such a way that - the record under dive_below_idx is the supremum record, in - this case rec_idx == dive_below_idx and pcur is positioned - on the supremum, we do not want to dive below it */ - if (!btr_pcur_is_on_user_rec(&pcur)) { - break; - } - - ut_a(rec_idx == dive_below_idx); - - ib_uint64_t n_diff_on_leaf_page; - ib_uint64_t n_external_pages; - - dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur), - n_prefix, - &n_diff_on_leaf_page, - &n_external_pages); - - /* We adjust n_diff_on_leaf_page here to avoid counting - one record twice - once as the last on some page and once - as the first on another page. Consider the following example: - Leaf level: - page: (2,2,2,2,3,3) - ... many pages like (3,3,3,3,3,3) ... - page: (3,3,3,3,5,5) - ... many pages like (5,5,5,5,5,5) ... - page: (5,5,5,5,8,8) - page: (8,8,8,8,9,9) - our algo would (correctly) get an estimate that there are - 2 distinct records per page (average). Having 4 pages below - non-boring records, it would (wrongly) estimate the number - of distinct records to 8. */ - if (n_diff_on_leaf_page > 0) { - n_diff_on_leaf_page--; - } - - n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page; - - n_diff_data->n_external_pages_sum += n_external_pages; - } - - btr_pcur_close(&pcur); -} - -/** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[]. -@param[in] n_diff_data input data to use to derive the results -@param[in,out] index index whose stat_n_diff_key_vals[] to set */ -UNIV_INLINE -void -dict_stats_index_set_n_diff( - const n_diff_data_t* n_diff_data, - dict_index_t* index) -{ - for (ulint n_prefix = dict_index_get_n_unique(index); - n_prefix >= 1; - n_prefix--) { - /* n_diff_all_analyzed_pages can be 0 here if - all the leaf pages sampled contained only - delete-marked records. In this case we should assign - 0 to index->stat_n_diff_key_vals[n_prefix - 1], which - the formula below does. */ - - const n_diff_data_t* data = &n_diff_data[n_prefix - 1]; - - ut_ad(data->n_leaf_pages_to_analyze > 0); - ut_ad(data->n_recs_on_level > 0); - - ulint n_ordinary_leaf_pages; - - if (data->level == 1) { - /* If we know the number of records on level 1, then - this number is the same as the number of pages on - level 0 (leaf). */ - n_ordinary_leaf_pages = data->n_recs_on_level; - } else { - /* If we analyzed D ordinary leaf pages and found E - external pages in total linked from those D ordinary - leaf pages, then this means that the ratio - ordinary/external is D/E. Then the ratio ordinary/total - is D / (D + E). Knowing that the total number of pages - is T (including ordinary and external) then we estimate - that the total number of ordinary leaf pages is - T * D / (D + E). */ - n_ordinary_leaf_pages - = index->stat_n_leaf_pages - * data->n_leaf_pages_to_analyze - / (data->n_leaf_pages_to_analyze - + data->n_external_pages_sum); - } - - /* See REF01 for an explanation of the algorithm */ - index->stat_n_diff_key_vals[n_prefix - 1] - = n_ordinary_leaf_pages - - * data->n_diff_on_level - / data->n_recs_on_level - - * data->n_diff_all_analyzed_pages - / data->n_leaf_pages_to_analyze; - - index->stat_n_sample_sizes[n_prefix - 1] - = data->n_leaf_pages_to_analyze; - - DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu" - " (%lu" - " * " UINT64PF " / " UINT64PF - " * " UINT64PF " / " UINT64PF ")\n", - __func__, - index->stat_n_diff_key_vals[n_prefix - 1], - n_prefix, - index->stat_n_leaf_pages, - data->n_diff_on_level, - data->n_recs_on_level, - data->n_diff_all_analyzed_pages, - data->n_leaf_pages_to_analyze); - } -} - -/*********************************************************************//** -Calculates new statistics for a given index and saves them to the index -members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and -stat_n_leaf_pages. This function could be slow. */ -static -void -dict_stats_analyze_index( -/*=====================*/ - dict_index_t* index) /*!< in/out: index to analyze */ -{ - ulint root_level; - ulint level; - bool level_is_analyzed; - ulint n_uniq; - ulint n_prefix; - ib_uint64_t total_recs; - ib_uint64_t total_pages; - mtr_t mtr; - ulint size; - DBUG_ENTER("dict_stats_analyze_index"); - - DBUG_PRINT("info", ("index: %s, online status: %d", index->name, - dict_index_get_online_status(index))); - - DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name); - - dict_stats_empty_index(index, false); - - mtr_start(&mtr); - - mtr_s_lock(dict_index_get_lock(index), &mtr); - - size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); - - if (size != ULINT_UNDEFINED) { - index->stat_index_size = size; - size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr); - } - - /* Release the X locks on the root page taken by btr_get_size() */ - mtr_commit(&mtr); - - switch (size) { - case ULINT_UNDEFINED: - dict_stats_assert_initialized_index(index); - DBUG_VOID_RETURN; - case 0: - /* The root node of the tree is a leaf */ - size = 1; - } - - index->stat_n_leaf_pages = size; - - mtr_start(&mtr); - - mtr_s_lock(dict_index_get_lock(index), &mtr); - - root_level = btr_height_get(index, &mtr); - - n_uniq = dict_index_get_n_unique(index); - - /* If the tree has just one level (and one page) or if the user - has requested to sample too many pages then do full scan. - - For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index) - will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf - pages will be sampled. If that number is bigger than the total - number of leaf pages then do full scan of the leaf level instead - since it will be faster and will give better results. */ - - if (root_level == 0 - || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) { - - if (root_level == 0) { - DEBUG_PRINTF(" %s(): just one page, " - "doing full scan\n", __func__); - } else { - DEBUG_PRINTF(" %s(): too many pages requested for " - "sampling, doing full scan\n", __func__); - } - - /* do full scan of level 0; save results directly - into the index */ - - dict_stats_analyze_index_level(index, - 0 /* leaf level */, - index->stat_n_diff_key_vals, - &total_recs, - &total_pages, - NULL /* boundaries not needed */, - &mtr); - - for (ulint i = 0; i < n_uniq; i++) { - index->stat_n_sample_sizes[i] = total_pages; - } - - mtr_commit(&mtr); - - dict_stats_assert_initialized_index(index); - DBUG_VOID_RETURN; - } - - /* For each level that is being scanned in the btree, this contains the - number of different key values for all possible n-column prefixes. */ - ib_uint64_t* n_diff_on_level = new ib_uint64_t[n_uniq]; - - /* For each level that is being scanned in the btree, this contains the - index of the last record from each group of equal records (when - comparing only the first n columns, n=1..n_uniq). */ - boundaries_t* n_diff_boundaries = new boundaries_t[n_uniq]; - - /* For each n-column prefix this array contains the input data that is - used to calculate dict_index_t::stat_n_diff_key_vals[]. */ - n_diff_data_t* n_diff_data = new n_diff_data_t[n_uniq]; - - /* total_recs is also used to estimate the number of pages on one - level below, so at the start we have 1 page (the root) */ - total_recs = 1; - - /* Here we use the following optimization: - If we find that level L is the first one (searching from the - root) that contains at least D distinct keys when looking at - the first n_prefix columns, then: - if we look at the first n_prefix-1 columns then the first - level that contains D distinct keys will be either L or a - lower one. - So if we find that the first level containing D distinct - keys (on n_prefix columns) is L, we continue from L when - searching for D distinct keys on n_prefix-1 columns. */ - level = root_level; - level_is_analyzed = false; - - for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) { - - DEBUG_PRINTF(" %s(): searching level with >=%llu " - "distinct records, n_prefix=%lu\n", - __func__, N_DIFF_REQUIRED(index), n_prefix); - - /* Commit the mtr to release the tree S lock to allow - other threads to do some work too. */ - mtr_commit(&mtr); - mtr_start(&mtr); - mtr_s_lock(dict_index_get_lock(index), &mtr); - if (root_level != btr_height_get(index, &mtr)) { - /* Just quit if the tree has changed beyond - recognition here. The old stats from previous - runs will remain in the values that we have - not calculated yet. Initially when the index - object is created the stats members are given - some sensible values so leaving them untouched - here even the first time will not cause us to - read uninitialized memory later. */ - break; - } - - /* check whether we should pick the current level; - we pick level 1 even if it does not have enough - distinct records because we do not want to scan the - leaf level because it may contain too many records */ - if (level_is_analyzed - && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index) - || level == 1)) { - - goto found_level; - } - - /* search for a level that contains enough distinct records */ - - if (level_is_analyzed && level > 1) { - - /* if this does not hold we should be on - "found_level" instead of here */ - ut_ad(n_diff_on_level[n_prefix - 1] - < N_DIFF_REQUIRED(index)); - - level--; - level_is_analyzed = false; - } - - /* descend into the tree, searching for "good enough" level */ - for (;;) { - - /* make sure we do not scan the leaf level - accidentally, it may contain too many pages */ - ut_ad(level > 0); - - /* scanning the same level twice is an optimization - bug */ - ut_ad(!level_is_analyzed); - - /* Do not scan if this would read too many pages. - Here we use the following fact: - the number of pages on level L equals the number - of records on level L+1, thus we deduce that the - following call would scan total_recs pages, because - total_recs is left from the previous iteration when - we scanned one level upper or we have not scanned any - levels yet in which case total_recs is 1. */ - if (total_recs > N_SAMPLE_PAGES(index)) { - - /* if the above cond is true then we are - not at the root level since on the root - level total_recs == 1 (set before we - enter the n-prefix loop) and cannot - be > N_SAMPLE_PAGES(index) */ - ut_a(level != root_level); - - /* step one level back and be satisfied with - whatever it contains */ - level++; - level_is_analyzed = true; - - break; - } - - dict_stats_analyze_index_level(index, - level, - n_diff_on_level, - &total_recs, - &total_pages, - n_diff_boundaries, - &mtr); - - level_is_analyzed = true; - - if (level == 1 - || n_diff_on_level[n_prefix - 1] - >= N_DIFF_REQUIRED(index)) { - /* we have reached the last level we could scan - or we found a good level with many distinct - records */ - break; - } - - level--; - level_is_analyzed = false; - } -found_level: - - DEBUG_PRINTF(" %s(): found level %lu that has " UINT64PF - " distinct records for n_prefix=%lu\n", - __func__, level, n_diff_on_level[n_prefix - 1], - n_prefix); - /* here we are either on level 1 or the level that we are on - contains >= N_DIFF_REQUIRED distinct keys or we did not scan - deeper levels because they would contain too many pages */ - - ut_ad(level > 0); - - ut_ad(level_is_analyzed); - - /* if any of these is 0 then there is exactly one page in the - B-tree and it is empty and we should have done full scan and - should not be here */ - ut_ad(total_recs > 0); - ut_ad(n_diff_on_level[n_prefix - 1] > 0); - - ut_ad(N_SAMPLE_PAGES(index) > 0); - - n_diff_data_t* data = &n_diff_data[n_prefix - 1]; - - data->level = level; - - data->n_recs_on_level = total_recs; - - data->n_diff_on_level = n_diff_on_level[n_prefix - 1]; - - data->n_leaf_pages_to_analyze = std::min( - N_SAMPLE_PAGES(index), - n_diff_on_level[n_prefix - 1]); - - /* pick some records from this level and dive below them for - the given n_prefix */ - - dict_stats_analyze_index_for_n_prefix( - index, n_prefix, &n_diff_boundaries[n_prefix - 1], - data, &mtr); - } - - mtr_commit(&mtr); - - delete[] n_diff_boundaries; - - delete[] n_diff_on_level; - - /* n_prefix == 0 means that the above loop did not end up prematurely - due to tree being changed and so n_diff_data[] is set up. */ - if (n_prefix == 0) { - dict_stats_index_set_n_diff(n_diff_data, index); - } - - delete[] n_diff_data; - - dict_stats_assert_initialized_index(index); - DBUG_VOID_RETURN; -} - -/*********************************************************************//** -Calculates new estimates for table and index statistics. This function -is relatively slow and is used to calculate persistent statistics that -will be saved on disk. -@return DB_SUCCESS or error code */ -static -dberr_t -dict_stats_update_persistent( -/*=========================*/ - dict_table_t* table) /*!< in/out: table */ -{ - dict_index_t* index; - - DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name); - - dict_table_stats_lock(table, RW_X_LATCH); - - /* analyze the clustered index first */ - - index = dict_table_get_first_index(table); - - if (index == NULL - || dict_index_is_corrupted(index) - || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) { - - /* Table definition is corrupt */ - dict_table_stats_unlock(table, RW_X_LATCH); - dict_stats_empty_table(table, true); - - return(DB_CORRUPTION); - } - - ut_ad(!dict_index_is_univ(index)); - - dict_stats_analyze_index(index); - - ulint n_unique = dict_index_get_n_unique(index); - - table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1]; - - table->stat_clustered_index_size = index->stat_index_size; - - /* analyze other indexes from the table, if any */ - - table->stat_sum_of_other_index_sizes = 0; - - for (index = dict_table_get_next_index(index); - index != NULL; - index = dict_table_get_next_index(index)) { - - ut_ad(!dict_index_is_univ(index)); - - if (index->type & DICT_FTS) { - continue; - } - - dict_stats_empty_index(index, false); - - if (dict_stats_should_ignore_index(index)) { - continue; - } - - if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) { - dict_stats_analyze_index(index); - } - - table->stat_sum_of_other_index_sizes - += index->stat_index_size; - } - - table->stats_last_recalc = ut_time(); - - table->stat_modified_counter = 0; - - table->stat_initialized = TRUE; - - dict_stats_assert_initialized(table); - - dict_table_stats_unlock(table, RW_X_LATCH); - - return(DB_SUCCESS); -} - -#include "mysql_com.h" -/** Save an individual index's statistic into the persistent statistics -storage. -@param[in] index index to be updated -@param[in] last_update timestamp of the stat -@param[in] stat_name name of the stat -@param[in] stat_value value of the stat -@param[in] sample_size n pages sampled or NULL -@param[in] stat_description description of the stat -@param[in,out] trx in case of NULL the function will -allocate and free the trx object. If it is not NULL then it will be -rolled back only in the case of error, but not freed. -@return DB_SUCCESS or error code */ -static -dberr_t -dict_stats_save_index_stat( - dict_index_t* index, - lint last_update, - const char* stat_name, - ib_uint64_t stat_value, - ib_uint64_t* sample_size, - const char* stat_description, - trx_t* trx) -{ - pars_info_t* pinfo; - dberr_t ret; - char db_utf8[MAX_DB_UTF8_LEN]; - char table_utf8[MAX_TABLE_UTF8_LEN]; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&dict_sys->mutex)); - - dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8), - table_utf8, sizeof(table_utf8)); - - pinfo = pars_info_create(); - pars_info_add_str_literal(pinfo, "database_name", db_utf8); - pars_info_add_str_literal(pinfo, "table_name", table_utf8); - UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name)); - pars_info_add_str_literal(pinfo, "index_name", index->name); - UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4); - pars_info_add_int4_literal(pinfo, "last_update", last_update); - UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name)); - pars_info_add_str_literal(pinfo, "stat_name", stat_name); - UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8); - pars_info_add_ull_literal(pinfo, "stat_value", stat_value); - if (sample_size != NULL) { - UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8); - pars_info_add_ull_literal(pinfo, "sample_size", *sample_size); - } else { - pars_info_add_literal(pinfo, "sample_size", NULL, - UNIV_SQL_NULL, DATA_FIXBINARY, 0); - } - UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description)); - pars_info_add_str_literal(pinfo, "stat_description", - stat_description); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE INDEX_STATS_SAVE () IS\n" - "BEGIN\n" - - "DELETE FROM \"" INDEX_STATS_NAME "\"\n" - "WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name AND\n" - "index_name = :index_name AND\n" - "stat_name = :stat_name;\n" - - "INSERT INTO \"" INDEX_STATS_NAME "\"\n" - "VALUES\n" - "(\n" - ":database_name,\n" - ":table_name,\n" - ":index_name,\n" - ":last_update,\n" - ":stat_name,\n" - ":stat_value,\n" - ":sample_size,\n" - ":stat_description\n" - ");\n" - "END;", trx); - - if (ret != DB_SUCCESS) { - if (innodb_index_stats_not_found == false && - index->stats_error_printed == false) { - char buf_table[MAX_FULL_NAME_LEN]; - char buf_index[MAX_FULL_NAME_LEN]; - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Cannot save index statistics for table " - "%s, index %s, stat name \"%s\": %s\n", - ut_format_name(index->table->name, TRUE, - buf_table, sizeof(buf_table)), - ut_format_name(index->name, FALSE, - buf_index, sizeof(buf_index)), - stat_name, ut_strerr(ret)); - index->stats_error_printed = true; - } - } - - return(ret); -} - -/** Report error if statistic update for a table failed because -.ibd file is missing, table decryption failed or table is corrupted. -@param[in,out] table Table -@param[in] defragment true if statistics is for defragment -@return DB_DECRYPTION_FAILED, DB_TABLESPACE_DELETED or DB_CORRUPTION -@retval DB_DECRYPTION_FAILED if decryption of the table failed -@retval DB_TABLESPACE_DELETED if .ibd file is missing -@retval DB_CORRUPTION if table is marked as corrupted */ -static -dberr_t -dict_stats_report_error( - dict_table_t* table, - bool defragment = false) -{ - char buf[3 * NAME_LEN]; - dberr_t err; - - innobase_format_name(buf, sizeof buf, - table->name, - true); - - FilSpace space(table->space); - - if (space()) { - if (table->corrupted) { - ib_logf(IB_LOG_LEVEL_INFO, - "Cannot save%s statistics because " - " table %s in file %s is corrupted.", - defragment ? " defragment" : " ", - buf, space()->chain.start->name); - err = DB_CORRUPTION; - } else { - ib_logf(IB_LOG_LEVEL_INFO, - "Cannot save%s statistics because " - " table %s in file %s can't be decrypted.", - defragment ? " defragment" : " ", - buf, space()->chain.start->name); - err = DB_DECRYPTION_FAILED; - } - } else { - ib_logf(IB_LOG_LEVEL_INFO, - "Cannot save%s statistics for " - " table %s because .ibd file is missing." - " For help, please " - "refer to " REFMAN "innodb-troubleshooting.html.", - defragment ? " defragment" : " ", - buf); - err = DB_TABLESPACE_DELETED; - } - - dict_stats_empty_table(table, defragment); - - return (err); -} - -/** Save the table's statistics into the persistent statistics storage. -@param[in] table_orig table whose stats to save -@param[in] only_for_index if this is non-NULL, then stats for indexes -that are not equal to it will not be saved, if NULL, then all -indexes' stats are saved -@return DB_SUCCESS or error code */ -static -dberr_t -dict_stats_save( -/*============*/ - dict_table_t* table_orig, - const index_id_t* only_for_index) -{ - pars_info_t* pinfo; - lint now; - dberr_t ret; - dict_table_t* table; - char db_utf8[MAX_DB_UTF8_LEN]; - char table_utf8[MAX_TABLE_UTF8_LEN]; - - if (table_orig->is_readable()) { - } else { - return (dict_stats_report_error(table_orig)); - } - - table = dict_stats_snapshot_create(table_orig); - - dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8), - table_utf8, sizeof(table_utf8)); - - rw_lock_x_lock(&dict_operation_lock); - mutex_enter(&dict_sys->mutex); - - /* MySQL's timestamp is 4 byte, so we use - pars_info_add_int4_literal() which takes a lint arg, so "now" is - lint */ - now = (lint) ut_time(); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "database_name", db_utf8); - pars_info_add_str_literal(pinfo, "table_name", table_utf8); - pars_info_add_int4_literal(pinfo, "last_update", now); - pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows); - pars_info_add_ull_literal(pinfo, "clustered_index_size", - table->stat_clustered_index_size); - pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes", - table->stat_sum_of_other_index_sizes); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE TABLE_STATS_SAVE () IS\n" - "BEGIN\n" - - "DELETE FROM \"" TABLE_STATS_NAME "\"\n" - "WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name;\n" - - "INSERT INTO \"" TABLE_STATS_NAME "\"\n" - "VALUES\n" - "(\n" - ":database_name,\n" - ":table_name,\n" - ":last_update,\n" - ":n_rows,\n" - ":clustered_index_size,\n" - ":sum_of_other_index_sizes\n" - ");\n" - "END;", NULL); - - if (ret != DB_SUCCESS) { - char buf[MAX_FULL_NAME_LEN]; - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Cannot save table statistics for table " - "%s: %s\n", - ut_format_name(table->name, TRUE, buf, sizeof(buf)), - ut_strerr(ret)); - - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - - dict_stats_snapshot_free(table); - - return(ret); - } - - trx_t* trx = trx_allocate_for_background(); - trx_start_if_not_started(trx); - - dict_index_t* index; - index_map_t indexes; - - /* Below we do all the modifications in innodb_index_stats in a single - transaction for performance reasons. Modifying more than one row in a - single transaction may deadlock with other transactions if they - lock the rows in different order. Other transaction could be for - example when we DROP a table and do - DELETE FROM innodb_index_stats WHERE database_name = '...' - AND table_name = '...'; which will affect more than one row. To - prevent deadlocks we always lock the rows in the same order - the - order of the PK, which is (database_name, table_name, index_name, - stat_name). This is why below we sort the indexes by name and then - for each index, do the mods ordered by stat_name. */ - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - indexes[index->name] = index; - } - - index_map_t::const_iterator it; - - for (it = indexes.begin(); it != indexes.end(); ++it) { - - index = it->second; - - if (only_for_index != NULL && index->id != *only_for_index) { - continue; - } - - if (dict_stats_should_ignore_index(index)) { - continue; - } - - ut_ad(!dict_index_is_univ(index)); - - for (ulint i = 0; i < index->n_uniq; i++) { - - char stat_name[16]; - char stat_description[1024]; - ulint j; - - ut_snprintf(stat_name, sizeof(stat_name), - "n_diff_pfx%02lu", i + 1); - - /* craft a string that contains the columns names */ - ut_snprintf(stat_description, - sizeof(stat_description), - "%s", index->fields[0].name); - for (j = 1; j <= i; j++) { - size_t len; - - len = strlen(stat_description); - - ut_snprintf(stat_description + len, - sizeof(stat_description) - len, - ",%s", index->fields[j].name); - } - - ret = dict_stats_save_index_stat( - index, now, stat_name, - index->stat_n_diff_key_vals[i], - &index->stat_n_sample_sizes[i], - stat_description, trx); - - if (ret != DB_SUCCESS) { - goto end; - } - } - - ret = dict_stats_save_index_stat(index, now, "n_leaf_pages", - index->stat_n_leaf_pages, - NULL, - "Number of leaf pages " - "in the index", trx); - if (ret != DB_SUCCESS) { - goto end; - } - - ret = dict_stats_save_index_stat(index, now, "size", - index->stat_index_size, - NULL, - "Number of pages " - "in the index", trx); - if (ret != DB_SUCCESS) { - goto end; - } - } - - trx_commit_for_mysql(trx); - -end: - trx_free_for_background(trx); - - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - - dict_stats_snapshot_free(table); - - return(ret); -} - -/*********************************************************************//** -Called for the row that is selected by -SELECT ... FROM mysql.innodb_table_stats WHERE table='...' -The second argument is a pointer to the table and the fetched stats are -written to it. -@return non-NULL dummy */ -static -ibool -dict_stats_fetch_table_stats_step( -/*==============================*/ - void* node_void, /*!< in: select node */ - void* table_void) /*!< out: table */ -{ - sel_node_t* node = (sel_node_t*) node_void; - dict_table_t* table = (dict_table_t*) table_void; - que_common_t* cnode; - int i; - - /* this should loop exactly 3 times - for - n_rows,clustered_index_size,sum_of_other_index_sizes */ - for (cnode = static_cast<que_common_t*>(node->select_list), i = 0; - cnode != NULL; - cnode = static_cast<que_common_t*>(que_node_get_next(cnode)), - i++) { - - const byte* data; - dfield_t* dfield = que_node_get_val(cnode); - dtype_t* type = dfield_get_type(dfield); - ulint len = dfield_get_len(dfield); - - data = static_cast<const byte*>(dfield_get_data(dfield)); - - switch (i) { - case 0: /* mysql.innodb_table_stats.n_rows */ - - ut_a(dtype_get_mtype(type) == DATA_INT); - ut_a(len == 8); - - table->stat_n_rows = mach_read_from_8(data); - - break; - - case 1: /* mysql.innodb_table_stats.clustered_index_size */ - - ut_a(dtype_get_mtype(type) == DATA_INT); - ut_a(len == 8); - - table->stat_clustered_index_size - = (ulint) mach_read_from_8(data); - - break; - - case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */ - - ut_a(dtype_get_mtype(type) == DATA_INT); - ut_a(len == 8); - - table->stat_sum_of_other_index_sizes - = (ulint) mach_read_from_8(data); - - break; - - default: - - /* someone changed SELECT - n_rows,clustered_index_size,sum_of_other_index_sizes - to select more columns from innodb_table_stats without - adjusting here */ - ut_error; - } - } - - /* if i < 3 this means someone changed the - SELECT n_rows,clustered_index_size,sum_of_other_index_sizes - to select less columns from innodb_table_stats without adjusting here; - if i > 3 we would have ut_error'ed earlier */ - ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/); - - /* XXX this is not used but returning non-NULL is necessary */ - return(TRUE); -} - -/** Aux struct used to pass a table and a boolean to -dict_stats_fetch_index_stats_step(). */ -struct index_fetch_t { - dict_table_t* table; /*!< table whose indexes are to be modified */ - bool stats_were_modified; /*!< will be set to true if at - least one index stats were modified */ -}; - -/*********************************************************************//** -Called for the rows that are selected by -SELECT ... FROM mysql.innodb_index_stats WHERE table='...' -The second argument is a pointer to the table and the fetched stats are -written to its indexes. -Let a table has N indexes and each index has Ui unique columns for i=1..N, -then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table. -So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude -N*AVG(Ui). In each call it searches for the currently fetched index into -table->indexes linearly, assuming this list is not sorted. Thus, overall, -fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N -is the number of indexes. -This can be improved if we sort table->indexes in a temporary area just once -and then search in that sorted list. Then the complexity will be O(N*log(N)). -We assume a table will not have more than 100 indexes, so we go with the -simpler N^2 algorithm. -@return non-NULL dummy */ -static -ibool -dict_stats_fetch_index_stats_step( -/*==============================*/ - void* node_void, /*!< in: select node */ - void* arg_void) /*!< out: table + a flag that tells if we - modified anything */ -{ - sel_node_t* node = (sel_node_t*) node_void; - index_fetch_t* arg = (index_fetch_t*) arg_void; - dict_table_t* table = arg->table; - dict_index_t* index = NULL; - que_common_t* cnode; - const char* stat_name = NULL; - ulint stat_name_len = ULINT_UNDEFINED; - ib_uint64_t stat_value = UINT64_UNDEFINED; - ib_uint64_t sample_size = UINT64_UNDEFINED; - int i; - - /* this should loop exactly 4 times - for the columns that - were selected: index_name,stat_name,stat_value,sample_size */ - for (cnode = static_cast<que_common_t*>(node->select_list), i = 0; - cnode != NULL; - cnode = static_cast<que_common_t*>(que_node_get_next(cnode)), - i++) { - - const byte* data; - dfield_t* dfield = que_node_get_val(cnode); - dtype_t* type = dfield_get_type(dfield); - ulint len = dfield_get_len(dfield); - - data = static_cast<const byte*>(dfield_get_data(dfield)); - - switch (i) { - case 0: /* mysql.innodb_index_stats.index_name */ - - ut_a(dtype_get_mtype(type) == DATA_VARMYSQL); - - /* search for index in table's indexes whose name - matches data; the fetched index name is in data, - has no terminating '\0' and has length len */ - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - if (strlen(index->name) == len - && memcmp(index->name, data, len) == 0) { - /* the corresponding index was found */ - break; - } - } - - /* if index is NULL here this means that - mysql.innodb_index_stats contains more rows than the - number of indexes in the table; this is ok, we just - return ignoring those extra rows; in other words - dict_stats_fetch_index_stats_step() has been called - for a row from index_stats with unknown index_name - column */ - if (index == NULL) { - - return(TRUE); - } - - break; - - case 1: /* mysql.innodb_index_stats.stat_name */ - - ut_a(dtype_get_mtype(type) == DATA_VARMYSQL); - - ut_a(index != NULL); - - stat_name = (const char*) data; - stat_name_len = len; - - break; - - case 2: /* mysql.innodb_index_stats.stat_value */ - - ut_a(dtype_get_mtype(type) == DATA_INT); - ut_a(len == 8); - - ut_a(index != NULL); - ut_a(stat_name != NULL); - ut_a(stat_name_len != ULINT_UNDEFINED); - - stat_value = mach_read_from_8(data); - - break; - - case 3: /* mysql.innodb_index_stats.sample_size */ - - ut_a(dtype_get_mtype(type) == DATA_INT); - ut_a(len == 8 || len == UNIV_SQL_NULL); - - ut_a(index != NULL); - ut_a(stat_name != NULL); - ut_a(stat_name_len != ULINT_UNDEFINED); - ut_a(stat_value != UINT64_UNDEFINED); - - if (len == UNIV_SQL_NULL) { - break; - } - /* else */ - - sample_size = mach_read_from_8(data); - - break; - - default: - - /* someone changed - SELECT index_name,stat_name,stat_value,sample_size - to select more columns from innodb_index_stats without - adjusting here */ - ut_error; - } - } - - /* if i < 4 this means someone changed the - SELECT index_name,stat_name,stat_value,sample_size - to select less columns from innodb_index_stats without adjusting here; - if i > 4 we would have ut_error'ed earlier */ - ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */); - - ut_a(index != NULL); - ut_a(stat_name != NULL); - ut_a(stat_name_len != ULINT_UNDEFINED); - ut_a(stat_value != UINT64_UNDEFINED); - /* sample_size could be UINT64_UNDEFINED here, if it is NULL */ - -#define PFX "n_diff_pfx" -#define PFX_LEN 10 - - if (stat_name_len == 4 /* strlen("size") */ - && strncasecmp("size", stat_name, stat_name_len) == 0) { - index->stat_index_size = (ulint) stat_value; - arg->stats_were_modified = true; - } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */ - && strncasecmp("n_leaf_pages", stat_name, stat_name_len) - == 0) { - index->stat_n_leaf_pages = (ulint) stat_value; - arg->stats_were_modified = true; - } else if (stat_name_len == 12 /* strlen("n_page_split") */ - && strncasecmp("n_page_split", stat_name, stat_name_len) - == 0) { - index->stat_defrag_n_page_split = (ulint) stat_value; - arg->stats_were_modified = true; - } else if (stat_name_len == 13 /* strlen("n_pages_freed") */ - && strncasecmp("n_pages_freed", stat_name, stat_name_len) - == 0) { - index->stat_defrag_n_pages_freed = (ulint) stat_value; - arg->stats_were_modified = true; - } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */ - && strncasecmp(PFX, stat_name, PFX_LEN) == 0) { - - const char* num_ptr; - unsigned long n_pfx; - - /* point num_ptr into "1" from "n_diff_pfx12..." */ - num_ptr = stat_name + PFX_LEN; - - /* stat_name should have exactly 2 chars appended to PFX - and they should be digits */ - if (stat_name_len != PFX_LEN + 2 - || num_ptr[0] < '0' || num_ptr[0] > '9' - || num_ptr[1] < '0' || num_ptr[1] > '9') { - - char db_utf8[MAX_DB_UTF8_LEN]; - char table_utf8[MAX_TABLE_UTF8_LEN]; - - dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8), - table_utf8, sizeof(table_utf8)); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Ignoring strange row from " - "%s WHERE " - "database_name = '%s' AND " - "table_name = '%s' AND " - "index_name = '%s' AND " - "stat_name = '%.*s'; because stat_name " - "is malformed\n", - INDEX_STATS_NAME_PRINT, - db_utf8, - table_utf8, - index->name, - (int) stat_name_len, - stat_name); - return(TRUE); - } - /* else */ - - /* extract 12 from "n_diff_pfx12..." into n_pfx - note that stat_name does not have a terminating '\0' */ - n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0'); - - ulint n_uniq = index->n_uniq; - - if (n_pfx == 0 || n_pfx > n_uniq) { - - char db_utf8[MAX_DB_UTF8_LEN]; - char table_utf8[MAX_TABLE_UTF8_LEN]; - - dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8), - table_utf8, sizeof(table_utf8)); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Ignoring strange row from " - "%s WHERE " - "database_name = '%s' AND " - "table_name = '%s' AND " - "index_name = '%s' AND " - "stat_name = '%.*s'; because stat_name is " - "out of range, the index has %lu unique " - "columns\n", - INDEX_STATS_NAME_PRINT, - db_utf8, - table_utf8, - index->name, - (int) stat_name_len, - stat_name, - n_uniq); - return(TRUE); - } - /* else */ - - index->stat_n_diff_key_vals[n_pfx - 1] = stat_value; - - if (sample_size != UINT64_UNDEFINED) { - index->stat_n_sample_sizes[n_pfx - 1] = sample_size; - } else { - /* hmm, strange... the user must have UPDATEd the - table manually and SET sample_size = NULL */ - index->stat_n_sample_sizes[n_pfx - 1] = 0; - } - - index->stat_n_non_null_key_vals[n_pfx - 1] = 0; - - arg->stats_were_modified = true; - } else { - /* silently ignore rows with unknown stat_name, the - user may have developed her own stats */ - } - - /* XXX this is not used but returning non-NULL is necessary */ - return(TRUE); -} - -/*********************************************************************//** -Read table's statistics from the persistent statistics storage. -@return DB_SUCCESS or error code */ -static -dberr_t -dict_stats_fetch_from_ps( -/*=====================*/ - dict_table_t* table) /*!< in/out: table */ -{ - index_fetch_t index_fetch_arg; - trx_t* trx; - pars_info_t* pinfo; - dberr_t ret; - char db_utf8[MAX_DB_UTF8_LEN]; - char table_utf8[MAX_TABLE_UTF8_LEN]; - - ut_ad(!mutex_own(&dict_sys->mutex)); - - /* Initialize all stats to dummy values before fetching because if - the persistent storage contains incomplete stats (e.g. missing stats - for some index) then we would end up with (partially) uninitialized - stats. */ - dict_stats_empty_table(table, true); - - trx = trx_allocate_for_background(); - - /* Use 'read-uncommitted' so that the SELECTs we execute - do not get blocked in case some user has locked the rows we - are SELECTing */ - - trx->isolation_level = TRX_ISO_READ_UNCOMMITTED; - - trx_start_if_not_started(trx); - - dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8), - table_utf8, sizeof(table_utf8)); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "database_name", db_utf8); - - pars_info_add_str_literal(pinfo, "table_name", table_utf8); - - pars_info_bind_function(pinfo, - "fetch_table_stats_step", - dict_stats_fetch_table_stats_step, - table); - - index_fetch_arg.table = table; - index_fetch_arg.stats_were_modified = false; - pars_info_bind_function(pinfo, - "fetch_index_stats_step", - dict_stats_fetch_index_stats_step, - &index_fetch_arg); - - ret = que_eval_sql(pinfo, - "PROCEDURE FETCH_STATS () IS\n" - "found INT;\n" - "DECLARE FUNCTION fetch_table_stats_step;\n" - "DECLARE FUNCTION fetch_index_stats_step;\n" - "DECLARE CURSOR table_stats_cur IS\n" - " SELECT\n" - /* if you change the selected fields, be - sure to adjust - dict_stats_fetch_table_stats_step() */ - " n_rows,\n" - " clustered_index_size,\n" - " sum_of_other_index_sizes\n" - " FROM \"" TABLE_STATS_NAME "\"\n" - " WHERE\n" - " database_name = :database_name AND\n" - " table_name = :table_name;\n" - "DECLARE CURSOR index_stats_cur IS\n" - " SELECT\n" - /* if you change the selected fields, be - sure to adjust - dict_stats_fetch_index_stats_step() */ - " index_name,\n" - " stat_name,\n" - " stat_value,\n" - " sample_size\n" - " FROM \"" INDEX_STATS_NAME "\"\n" - " WHERE\n" - " database_name = :database_name AND\n" - " table_name = :table_name;\n" - - "BEGIN\n" - - "OPEN table_stats_cur;\n" - "FETCH table_stats_cur INTO\n" - " fetch_table_stats_step();\n" - "IF (SQL % NOTFOUND) THEN\n" - " CLOSE table_stats_cur;\n" - " RETURN;\n" - "END IF;\n" - "CLOSE table_stats_cur;\n" - - "OPEN index_stats_cur;\n" - "found := 1;\n" - "WHILE found = 1 LOOP\n" - " FETCH index_stats_cur INTO\n" - " fetch_index_stats_step();\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE index_stats_cur;\n" - - "END;", - TRUE, trx); - /* pinfo is freed by que_eval_sql() */ - - trx_commit_for_mysql(trx); - - trx_free_for_background(trx); - - if (!index_fetch_arg.stats_were_modified) { - return(DB_STATS_DO_NOT_EXIST); - } - - return(ret); -} - -/*********************************************************************//** -Clear defragmentation stats modified counter for all indices in table. */ -static -void -dict_stats_empty_defrag_modified_counter( - dict_table_t* table) /*!< in: table */ -{ - dict_index_t* index; - ut_a(table); - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - index->stat_defrag_modified_counter = 0; - } -} - -/*********************************************************************//** -Fetches or calculates new estimates for index statistics. */ -UNIV_INTERN -void -dict_stats_update_for_index( -/*========================*/ - dict_index_t* index) /*!< in/out: index */ -{ - DBUG_ENTER("dict_stats_update_for_index"); - - ut_ad(!mutex_own(&dict_sys->mutex)); - - if (dict_stats_is_persistent_enabled(index->table)) { - - if (dict_stats_persistent_storage_check(false)) { - dict_table_stats_lock(index->table, RW_X_LATCH); - dict_stats_analyze_index(index); - dict_table_stats_unlock(index->table, RW_X_LATCH); - dict_stats_save(index->table, &index->id); - DBUG_VOID_RETURN; - } - /* else */ - - if (innodb_index_stats_not_found == false && - index->stats_error_printed == false) { - /* Fall back to transient stats since the persistent - storage is not present or is corrupted */ - char buf_table[MAX_FULL_NAME_LEN]; - char buf_index[MAX_FULL_NAME_LEN]; - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Recalculation of persistent statistics " - "requested for table %s index %s but the required " - "persistent statistics storage is not present or is " - "corrupted. Using transient stats instead.\n", - ut_format_name(index->table->name, TRUE, - buf_table, sizeof(buf_table)), - ut_format_name(index->name, FALSE, - buf_index, sizeof(buf_index))); - index->stats_error_printed = false; - } - } - - dict_table_stats_lock(index->table, RW_X_LATCH); - dict_stats_update_transient_for_index(index); - dict_table_stats_unlock(index->table, RW_X_LATCH); - - DBUG_VOID_RETURN; -} - -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_update( -/*==============*/ - dict_table_t* table, /*!< in/out: table */ - dict_stats_upd_option_t stats_upd_option) - /*!< in: whether to (re) calc - the stats or to fetch them from - the persistent statistics - storage */ -{ - char buf[MAX_FULL_NAME_LEN]; - - ut_ad(!mutex_own(&dict_sys->mutex)); - - if (!table->is_readable()) { - return (dict_stats_report_error(table)); - } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { - /* If we have set a high innodb_force_recovery level, do - not calculate statistics, as a badly corrupted index can - cause a crash in it. */ - dict_stats_empty_table(table, false); - return(DB_SUCCESS); - } - - switch (stats_upd_option) { - case DICT_STATS_RECALC_PERSISTENT: - - if (srv_read_only_mode) { - goto transient; - } - - /* Persistent recalculation requested, called from - 1) ANALYZE TABLE, or - 2) the auto recalculation background thread, or - 3) open table if stats do not exist on disk and auto recalc - is enabled */ - - /* InnoDB internal tables (e.g. SYS_TABLES) cannot have - persistent stats enabled */ - ut_a(strchr(table->name, '/') != NULL); - - /* check if the persistent statistics storage exists - before calling the potentially slow function - dict_stats_update_persistent(); that is a - prerequisite for dict_stats_save() succeeding */ - if (dict_stats_persistent_storage_check(false)) { - - dberr_t err; - - err = dict_stats_update_persistent(table); - - if (err != DB_SUCCESS) { - return(err); - } - - err = dict_stats_save(table, NULL); - - return(err); - } - - /* Fall back to transient stats since the persistent - storage is not present or is corrupted */ - - if (innodb_table_stats_not_found == false && - table->stats_error_printed == false) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Recalculation of persistent statistics " - "requested for table %s but the required persistent " - "statistics storage is not present or is corrupted. " - "Using transient stats instead.\n", - ut_format_name(table->name, TRUE, buf, sizeof(buf))); - table->stats_error_printed = true; - } - - goto transient; - - case DICT_STATS_RECALC_TRANSIENT: - - goto transient; - - case DICT_STATS_EMPTY_TABLE: - - dict_stats_empty_table(table, true); - - /* If table is using persistent stats, - then save the stats on disk */ - - if (dict_stats_is_persistent_enabled(table)) { - - if (dict_stats_persistent_storage_check(false)) { - - return(dict_stats_save(table, NULL)); - } - - return(DB_STATS_DO_NOT_EXIST); - } - - return(DB_SUCCESS); - - case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY: - - /* fetch requested, either fetch from persistent statistics - storage or use the old method */ - - if (table->stat_initialized) { - return(DB_SUCCESS); - } - - /* InnoDB internal tables (e.g. SYS_TABLES) cannot have - persistent stats enabled */ - ut_a(strchr(table->name, '/') != NULL); - - if (!dict_stats_persistent_storage_check(false)) { - /* persistent statistics storage does not exist - or is corrupted, calculate the transient stats */ - - if (innodb_table_stats_not_found == false && - table->stats_error_printed == false) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: Fetch of persistent " - "statistics requested for table %s but the " - "required system tables %s and %s are not " - "present or have unexpected structure. " - "Using transient stats instead.\n", - ut_format_name(table->name, TRUE, - buf, sizeof(buf)), - TABLE_STATS_NAME_PRINT, - INDEX_STATS_NAME_PRINT); - table->stats_error_printed = true; - } - - goto transient; - } - - dict_table_t* t; - - /* Create a dummy table object with the same name and - indexes, suitable for fetching the stats into it. */ - t = dict_stats_table_clone_create(table); - - dberr_t err = dict_stats_fetch_from_ps(t); - - t->stats_last_recalc = table->stats_last_recalc; - t->stat_modified_counter = 0; - dict_stats_empty_defrag_modified_counter(t); - - switch (err) { - case DB_SUCCESS: - - dict_table_stats_lock(table, RW_X_LATCH); - - /* Pass reset_ignored_indexes=true as parameter - to dict_stats_copy. This will cause statictics - for corrupted indexes to be set to empty values */ - dict_stats_copy(table, t, true); - - dict_stats_assert_initialized(table); - - dict_table_stats_unlock(table, RW_X_LATCH); - - dict_stats_table_clone_free(t); - - return(DB_SUCCESS); - case DB_STATS_DO_NOT_EXIST: - - dict_stats_table_clone_free(t); - - if (srv_read_only_mode) { - goto transient; - } - - if (dict_stats_auto_recalc_is_enabled(table)) { - return(dict_stats_update( - table, - DICT_STATS_RECALC_PERSISTENT)); - } - - ut_format_name(table->name, TRUE, buf, sizeof(buf)); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Trying to use table %s which has " - "persistent statistics enabled, but auto " - "recalculation turned off and the statistics " - "do not exist in %s and %s. Please either run " - "\"ANALYZE TABLE %s;\" manually or enable the " - "auto recalculation with " - "\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". " - "InnoDB will now use transient statistics for " - "%s.\n", - buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf, - buf, buf); - - goto transient; - default: - - dict_stats_table_clone_free(t); - - if (innodb_table_stats_not_found == false && - table->stats_error_printed == false) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error fetching persistent statistics " - "for table %s from %s and %s: %s. " - "Using transient stats method instead.\n", - ut_format_name(table->name, TRUE, buf, - sizeof(buf)), - TABLE_STATS_NAME, - INDEX_STATS_NAME, - ut_strerr(err)); - } - - goto transient; - } - /* no "default:" in order to produce a compilation warning - about unhandled enumeration value */ - } - -transient: - - dict_table_stats_lock(table, RW_X_LATCH); - - dict_stats_update_transient(table); - - dict_table_stats_unlock(table, RW_X_LATCH); - - return(DB_SUCCESS); -} - -/*********************************************************************//** -Removes the information for a particular index's stats from the persistent -storage if it exists and if there is data stored for this index. -This function creates its own trx and commits it. -A note from Marko why we cannot edit user and sys_* tables in one trx: -marko: The problem is that ibuf merges should be disabled while we are -rolling back dict transactions. -marko: If ibuf merges are not disabled, we need to scan the *.ibd files. -But we shouldn't open *.ibd files before we have rolled back dict -transactions and opened the SYS_* records for the *.ibd files. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_drop_index( -/*==================*/ - const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */ - const char* iname, /*!< in: index name */ - char* errstr, /*!< out: error message if != DB_SUCCESS - is returned */ - ulint errstr_sz)/*!< in: size of the errstr buffer */ -{ - char db_utf8[MAX_DB_UTF8_LEN]; - char table_utf8[MAX_TABLE_UTF8_LEN]; - pars_info_t* pinfo; - dberr_t ret; - - ut_ad(!mutex_own(&dict_sys->mutex)); - - /* skip indexes whose table names do not contain a database name - e.g. if we are dropping an index from SYS_TABLES */ - if (strchr(db_and_table, '/') == NULL) { - - return(DB_SUCCESS); - } - - dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8), - table_utf8, sizeof(table_utf8)); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "database_name", db_utf8); - - pars_info_add_str_literal(pinfo, "table_name", table_utf8); - - pars_info_add_str_literal(pinfo, "index_name", iname); - - rw_lock_x_lock(&dict_operation_lock); - mutex_enter(&dict_sys->mutex); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE DROP_INDEX_STATS () IS\n" - "BEGIN\n" - "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name AND\n" - "index_name = :index_name;\n" - "END;\n", NULL); - - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - - if (ret == DB_STATS_DO_NOT_EXIST) { - ret = DB_SUCCESS; - } - - if (ret != DB_SUCCESS) { - ut_snprintf(errstr, errstr_sz, - "Unable to delete statistics for index %s " - "from %s%s: %s. They can be deleted later using " - "DELETE FROM %s WHERE " - "database_name = '%s' AND " - "table_name = '%s' AND " - "index_name = '%s';", - iname, - INDEX_STATS_NAME_PRINT, - (ret == DB_LOCK_WAIT_TIMEOUT - ? " because the rows are locked" - : ""), - ut_strerr(ret), - INDEX_STATS_NAME_PRINT, - db_utf8, - table_utf8, - iname); - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: %s\n", errstr); - } - - return(ret); -} - -/*********************************************************************//** -Executes -DELETE FROM mysql.innodb_table_stats -WHERE database_name = '...' AND table_name = '...'; -Creates its own transaction and commits it. -@return DB_SUCCESS or error code */ -UNIV_INLINE -dberr_t -dict_stats_delete_from_table_stats( -/*===============================*/ - const char* database_name, /*!< in: database name, e.g. 'db' */ - const char* table_name) /*!< in: table name, e.g. 'table' */ -{ - pars_info_t* pinfo; - dberr_t ret; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&dict_sys->mutex)); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "database_name", database_name); - pars_info_add_str_literal(pinfo, "table_name", table_name); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n" - "BEGIN\n" - "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name;\n" - "END;\n", NULL); - - return(ret); -} - -/*********************************************************************//** -Executes -DELETE FROM mysql.innodb_index_stats -WHERE database_name = '...' AND table_name = '...'; -Creates its own transaction and commits it. -@return DB_SUCCESS or error code */ -UNIV_INLINE -dberr_t -dict_stats_delete_from_index_stats( -/*===============================*/ - const char* database_name, /*!< in: database name, e.g. 'db' */ - const char* table_name) /*!< in: table name, e.g. 'table' */ -{ - pars_info_t* pinfo; - dberr_t ret; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&dict_sys->mutex)); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "database_name", database_name); - pars_info_add_str_literal(pinfo, "table_name", table_name); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n" - "BEGIN\n" - "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name;\n" - "END;\n", NULL); - - return(ret); -} - -/*********************************************************************//** -Removes the statistics for a table and all of its indexes from the -persistent statistics storage if it exists and if there is data stored for -the table. This function creates its own transaction and commits it. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_drop_table( -/*==================*/ - const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */ - char* errstr, /*!< out: error message - if != DB_SUCCESS is returned */ - ulint errstr_sz) /*!< in: size of errstr buffer */ -{ - char db_utf8[MAX_DB_UTF8_LEN]; - char table_utf8[MAX_TABLE_UTF8_LEN]; - dberr_t ret; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&dict_sys->mutex)); - - /* skip tables that do not contain a database name - e.g. if we are dropping SYS_TABLES */ - if (strchr(db_and_table, '/') == NULL) { - - return(DB_SUCCESS); - } - - /* skip innodb_table_stats and innodb_index_stats themselves */ - if (strcmp(db_and_table, TABLE_STATS_NAME) == 0 - || strcmp(db_and_table, INDEX_STATS_NAME) == 0) { - - return(DB_SUCCESS); - } - - dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8), - table_utf8, sizeof(table_utf8)); - - ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8); - - if (ret == DB_SUCCESS) { - ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8); - } - - if (ret == DB_STATS_DO_NOT_EXIST) { - ret = DB_SUCCESS; - } - - if (ret != DB_SUCCESS) { - - ut_snprintf(errstr, errstr_sz, - "Unable to delete statistics for table %s.%s: %s. " - "They can be deleted later using " - - "DELETE FROM %s WHERE " - "database_name = '%s' AND " - "table_name = '%s'; " - - "DELETE FROM %s WHERE " - "database_name = '%s' AND " - "table_name = '%s';", - - db_utf8, table_utf8, - ut_strerr(ret), - - INDEX_STATS_NAME_PRINT, - db_utf8, table_utf8, - - TABLE_STATS_NAME_PRINT, - db_utf8, table_utf8); - } - - return(ret); -} - -/*********************************************************************//** -Executes -UPDATE mysql.innodb_table_stats SET -database_name = '...', table_name = '...' -WHERE database_name = '...' AND table_name = '...'; -Creates its own transaction and commits it. -@return DB_SUCCESS or error code */ -UNIV_INLINE -dberr_t -dict_stats_rename_in_table_stats( -/*=============================*/ - const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */ - const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */ - const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */ - const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */ -{ - pars_info_t* pinfo; - dberr_t ret; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&dict_sys->mutex)); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8); - pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8); - pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8); - pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE RENAME_IN_TABLE_STATS () IS\n" - "BEGIN\n" - "UPDATE \"" TABLE_STATS_NAME "\" SET\n" - "database_name = :new_dbname_utf8,\n" - "table_name = :new_tablename_utf8\n" - "WHERE\n" - "database_name = :old_dbname_utf8 AND\n" - "table_name = :old_tablename_utf8;\n" - "END;\n", NULL); - - return(ret); -} - -/*********************************************************************//** -Executes -UPDATE mysql.innodb_index_stats SET -database_name = '...', table_name = '...' -WHERE database_name = '...' AND table_name = '...'; -Creates its own transaction and commits it. -@return DB_SUCCESS or error code */ -UNIV_INLINE -dberr_t -dict_stats_rename_in_index_stats( -/*=============================*/ - const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */ - const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */ - const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */ - const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */ -{ - pars_info_t* pinfo; - dberr_t ret; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&dict_sys->mutex)); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8); - pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8); - pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8); - pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE RENAME_IN_INDEX_STATS () IS\n" - "BEGIN\n" - "UPDATE \"" INDEX_STATS_NAME "\" SET\n" - "database_name = :new_dbname_utf8,\n" - "table_name = :new_tablename_utf8\n" - "WHERE\n" - "database_name = :old_dbname_utf8 AND\n" - "table_name = :old_tablename_utf8;\n" - "END;\n", NULL); - - return(ret); -} - -/*********************************************************************//** -Renames a table in InnoDB persistent stats storage. -This function creates its own transaction and commits it. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_rename_table( -/*====================*/ - const char* old_name, /*!< in: old name, e.g. 'db/table' */ - const char* new_name, /*!< in: new name, e.g. 'db/table' */ - char* errstr, /*!< out: error string if != DB_SUCCESS - is returned */ - size_t errstr_sz) /*!< in: errstr size */ -{ - char old_db_utf8[MAX_DB_UTF8_LEN]; - char new_db_utf8[MAX_DB_UTF8_LEN]; - char old_table_utf8[MAX_TABLE_UTF8_LEN]; - char new_table_utf8[MAX_TABLE_UTF8_LEN]; - dberr_t ret; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(!mutex_own(&dict_sys->mutex)); - - /* skip innodb_table_stats and innodb_index_stats themselves */ - if (strcmp(old_name, TABLE_STATS_NAME) == 0 - || strcmp(old_name, INDEX_STATS_NAME) == 0 - || strcmp(new_name, TABLE_STATS_NAME) == 0 - || strcmp(new_name, INDEX_STATS_NAME) == 0) { - - return(DB_SUCCESS); - } - - dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8), - old_table_utf8, sizeof(old_table_utf8)); - - dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8), - new_table_utf8, sizeof(new_table_utf8)); - - rw_lock_x_lock(&dict_operation_lock); - mutex_enter(&dict_sys->mutex); - - ulint n_attempts = 0; - do { - n_attempts++; - - ret = dict_stats_rename_in_table_stats( - old_db_utf8, old_table_utf8, - new_db_utf8, new_table_utf8); - - if (ret == DB_DUPLICATE_KEY) { - dict_stats_delete_from_table_stats( - new_db_utf8, new_table_utf8); - } - - if (ret == DB_STATS_DO_NOT_EXIST) { - ret = DB_SUCCESS; - } - - if (ret != DB_SUCCESS) { - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - os_thread_sleep(200000 /* 0.2 sec */); - rw_lock_x_lock(&dict_operation_lock); - mutex_enter(&dict_sys->mutex); - } - } while ((ret == DB_DEADLOCK - || ret == DB_DUPLICATE_KEY - || ret == DB_LOCK_WAIT_TIMEOUT) - && n_attempts < 5); - - if (ret != DB_SUCCESS) { - ut_snprintf(errstr, errstr_sz, - "Unable to rename statistics from " - "%s.%s to %s.%s in %s: %s. " - "They can be renamed later using " - - "UPDATE %s SET " - "database_name = '%s', " - "table_name = '%s' " - "WHERE " - "database_name = '%s' AND " - "table_name = '%s';", - - old_db_utf8, old_table_utf8, - new_db_utf8, new_table_utf8, - TABLE_STATS_NAME_PRINT, - ut_strerr(ret), - - TABLE_STATS_NAME_PRINT, - new_db_utf8, new_table_utf8, - old_db_utf8, old_table_utf8); - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - return(ret); - } - /* else */ - - n_attempts = 0; - do { - n_attempts++; - - ret = dict_stats_rename_in_index_stats( - old_db_utf8, old_table_utf8, - new_db_utf8, new_table_utf8); - - if (ret == DB_DUPLICATE_KEY) { - dict_stats_delete_from_index_stats( - new_db_utf8, new_table_utf8); - } - - if (ret == DB_STATS_DO_NOT_EXIST) { - ret = DB_SUCCESS; - } - - if (ret != DB_SUCCESS) { - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - os_thread_sleep(200000 /* 0.2 sec */); - rw_lock_x_lock(&dict_operation_lock); - mutex_enter(&dict_sys->mutex); - } - } while ((ret == DB_DEADLOCK - || ret == DB_DUPLICATE_KEY - || ret == DB_LOCK_WAIT_TIMEOUT) - && n_attempts < 5); - - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - - if (ret != DB_SUCCESS) { - ut_snprintf(errstr, errstr_sz, - "Unable to rename statistics from " - "%s.%s to %s.%s in %s: %s. " - "They can be renamed later using " - - "UPDATE %s SET " - "database_name = '%s', " - "table_name = '%s' " - "WHERE " - "database_name = '%s' AND " - "table_name = '%s';", - - old_db_utf8, old_table_utf8, - new_db_utf8, new_table_utf8, - INDEX_STATS_NAME_PRINT, - ut_strerr(ret), - - INDEX_STATS_NAME_PRINT, - new_db_utf8, new_table_utf8, - old_db_utf8, old_table_utf8); - } - - return(ret); -} - -/*********************************************************************//** -Save defragmentation result. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_save_defrag_summary( - dict_index_t* index) /*!< in: index */ -{ - dberr_t ret; - lint now = (lint) ut_time(); - if (dict_index_is_univ(index)) { - return DB_SUCCESS; - } - rw_lock_x_lock(&dict_operation_lock); - mutex_enter(&dict_sys->mutex); - ret = dict_stats_save_index_stat(index, now, "n_pages_freed", - index->stat_defrag_n_pages_freed, - NULL, - "Number of pages freed during" - " last defragmentation run.", - NULL); - - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - return (ret); -} - -/*********************************************************************//** -Save defragmentation stats for a given index. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_save_defrag_stats( - dict_index_t* index) /*!< in: index */ -{ - dberr_t ret; - - - if (index->is_readable()) { - } else { - return (dict_stats_report_error(index->table, true)); - } - - if (dict_index_is_univ(index)) { - return DB_SUCCESS; - } - - lint now = (lint) ut_time(); - mtr_t mtr; - ulint n_leaf_pages; - ulint n_leaf_reserved; - mtr_start(&mtr); - mtr_s_lock(dict_index_get_lock(index), &mtr); - n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES, - &n_leaf_pages, &mtr); - mtr_commit(&mtr); - - if (n_leaf_reserved == ULINT_UNDEFINED) { - // The index name is different during fast index creation, - // so the stats won't be associated with the right index - // for later use. We just return without saving. - return DB_SUCCESS; - } - - rw_lock_x_lock(&dict_operation_lock); - - mutex_enter(&dict_sys->mutex); - ret = dict_stats_save_index_stat(index, now, "n_page_split", - index->stat_defrag_n_page_split, - NULL, - "Number of new page splits on leaves" - " since last defragmentation.", - NULL); - if (ret != DB_SUCCESS) { - goto end; - } - - ret = dict_stats_save_index_stat( - index, now, "n_leaf_pages_defrag", - n_leaf_pages, - NULL, - "Number of leaf pages when this stat is saved to disk", - NULL); - if (ret != DB_SUCCESS) { - goto end; - } - - ret = dict_stats_save_index_stat( - index, now, "n_leaf_pages_reserved", - n_leaf_reserved, - NULL, - "Number of pages reserved for this index leaves when this stat " - "is saved to disk", - NULL); - -end: - mutex_exit(&dict_sys->mutex); - rw_lock_x_unlock(&dict_operation_lock); - - return (ret); -} - -/* tests @{ */ -#ifdef UNIV_COMPILE_TEST_FUNCS - -/* The following unit tests test some of the functions in this file -individually, such testing cannot be performed by the mysql-test framework -via SQL. */ - -/* test_dict_table_schema_check() @{ */ -void -test_dict_table_schema_check() -{ - /* - CREATE TABLE tcheck ( - c01 VARCHAR(123), - c02 INT, - c03 INT NOT NULL, - c04 INT UNSIGNED, - c05 BIGINT, - c06 BIGINT UNSIGNED NOT NULL, - c07 TIMESTAMP - ) ENGINE=INNODB; - */ - /* definition for the table 'test/tcheck' */ - dict_col_meta_t columns[] = { - {"c01", DATA_VARCHAR, 0, 123}, - {"c02", DATA_INT, 0, 4}, - {"c03", DATA_INT, DATA_NOT_NULL, 4}, - {"c04", DATA_INT, DATA_UNSIGNED, 4}, - {"c05", DATA_INT, 0, 8}, - {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8}, - {"c07", DATA_INT, 0, 4}, - {"c_extra", DATA_INT, 0, 4} - }; - dict_table_schema_t schema = { - "test/tcheck", - 0 /* will be set individually for each test below */, - columns - }; - char errstr[512]; - - ut_snprintf(errstr, sizeof(errstr), "Table not found"); - - /* prevent any data dictionary modifications while we are checking - the tables' structure */ - - mutex_enter(&(dict_sys->mutex)); - - /* check that a valid table is reported as valid */ - schema.n_cols = 7; - if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) - == DB_SUCCESS) { - printf("OK: test.tcheck ok\n"); - } else { - printf("ERROR: %s\n", errstr); - printf("ERROR: test.tcheck not present or corrupted\n"); - goto test_dict_table_schema_check_end; - } - - /* check columns with wrong length */ - schema.columns[1].len = 8; - if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) - != DB_SUCCESS) { - printf("OK: test.tcheck.c02 has different length and is " - "reported as corrupted\n"); - } else { - printf("OK: test.tcheck.c02 has different length but is " - "reported as ok\n"); - goto test_dict_table_schema_check_end; - } - schema.columns[1].len = 4; - - /* request that c02 is NOT NULL while actually it does not have - this flag set */ - schema.columns[1].prtype_mask |= DATA_NOT_NULL; - if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) - != DB_SUCCESS) { - printf("OK: test.tcheck.c02 does not have NOT NULL while " - "it should and is reported as corrupted\n"); - } else { - printf("ERROR: test.tcheck.c02 does not have NOT NULL while " - "it should and is not reported as corrupted\n"); - goto test_dict_table_schema_check_end; - } - schema.columns[1].prtype_mask &= ~DATA_NOT_NULL; - - /* check a table that contains some extra columns */ - schema.n_cols = 6; - if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) - == DB_SUCCESS) { - printf("ERROR: test.tcheck has more columns but is not " - "reported as corrupted\n"); - goto test_dict_table_schema_check_end; - } else { - printf("OK: test.tcheck has more columns and is " - "reported as corrupted\n"); - } - - /* check a table that has some columns missing */ - schema.n_cols = 8; - if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) - != DB_SUCCESS) { - printf("OK: test.tcheck has missing columns and is " - "reported as corrupted\n"); - } else { - printf("ERROR: test.tcheck has missing columns but is " - "reported as ok\n"); - goto test_dict_table_schema_check_end; - } - - /* check non-existent table */ - schema.table_name = "test/tcheck_nonexistent"; - if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) - != DB_SUCCESS) { - printf("OK: test.tcheck_nonexistent is not present\n"); - } else { - printf("ERROR: test.tcheck_nonexistent is present!?\n"); - goto test_dict_table_schema_check_end; - } - -test_dict_table_schema_check_end: - - mutex_exit(&(dict_sys->mutex)); -} -/* @} */ - -/* save/fetch aux macros @{ */ -#define TEST_DATABASE_NAME "foobardb" -#define TEST_TABLE_NAME "test_dict_stats" - -#define TEST_N_ROWS 111 -#define TEST_CLUSTERED_INDEX_SIZE 222 -#define TEST_SUM_OF_OTHER_INDEX_SIZES 333 - -#define TEST_IDX1_NAME "tidx1" -#define TEST_IDX1_COL1_NAME "tidx1_col1" -#define TEST_IDX1_INDEX_SIZE 123 -#define TEST_IDX1_N_LEAF_PAGES 234 -#define TEST_IDX1_N_DIFF1 50 -#define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500 - -#define TEST_IDX2_NAME "tidx2" -#define TEST_IDX2_COL1_NAME "tidx2_col1" -#define TEST_IDX2_COL2_NAME "tidx2_col2" -#define TEST_IDX2_COL3_NAME "tidx2_col3" -#define TEST_IDX2_COL4_NAME "tidx2_col4" -#define TEST_IDX2_INDEX_SIZE 321 -#define TEST_IDX2_N_LEAF_PAGES 432 -#define TEST_IDX2_N_DIFF1 60 -#define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600 -#define TEST_IDX2_N_DIFF2 61 -#define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610 -#define TEST_IDX2_N_DIFF3 62 -#define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620 -#define TEST_IDX2_N_DIFF4 63 -#define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630 -/* @} */ - -/* test_dict_stats_save() @{ */ -void -test_dict_stats_save() -{ - dict_table_t table; - dict_index_t index1; - dict_field_t index1_fields[1]; - ib_uint64_t index1_stat_n_diff_key_vals[1]; - ib_uint64_t index1_stat_n_sample_sizes[1]; - dict_index_t index2; - dict_field_t index2_fields[4]; - ib_uint64_t index2_stat_n_diff_key_vals[4]; - ib_uint64_t index2_stat_n_sample_sizes[4]; - dberr_t ret; - - /* craft a dummy dict_table_t */ - table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME); - table.stat_n_rows = TEST_N_ROWS; - table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE; - table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES; - UT_LIST_INIT(table.indexes); - UT_LIST_ADD_LAST(indexes, table.indexes, &index1); - UT_LIST_ADD_LAST(indexes, table.indexes, &index2); - ut_d(table.magic_n = DICT_TABLE_MAGIC_N); - ut_d(index1.magic_n = DICT_INDEX_MAGIC_N); - - index1.name = TEST_IDX1_NAME; - index1.table = &table; - index1.cached = 1; - index1.n_uniq = 1; - index1.fields = index1_fields; - index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals; - index1.stat_n_sample_sizes = index1_stat_n_sample_sizes; - index1.stat_index_size = TEST_IDX1_INDEX_SIZE; - index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES; - index1_fields[0].name = TEST_IDX1_COL1_NAME; - index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1; - index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE; - - ut_d(index2.magic_n = DICT_INDEX_MAGIC_N); - index2.name = TEST_IDX2_NAME; - index2.table = &table; - index2.cached = 1; - index2.n_uniq = 4; - index2.fields = index2_fields; - index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals; - index2.stat_n_sample_sizes = index2_stat_n_sample_sizes; - index2.stat_index_size = TEST_IDX2_INDEX_SIZE; - index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES; - index2_fields[0].name = TEST_IDX2_COL1_NAME; - index2_fields[1].name = TEST_IDX2_COL2_NAME; - index2_fields[2].name = TEST_IDX2_COL3_NAME; - index2_fields[3].name = TEST_IDX2_COL4_NAME; - index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1; - index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2; - index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3; - index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4; - index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE; - index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE; - index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE; - index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE; - - ret = dict_stats_save(&table, NULL); - - ut_a(ret == DB_SUCCESS); - - printf("\nOK: stats saved successfully, now go ahead and read " - "what's inside %s and %s:\n\n", - TABLE_STATS_NAME_PRINT, - INDEX_STATS_NAME_PRINT); - - printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n" - "FROM %s\n" - "WHERE\n" - "database_name = '%s' AND\n" - "table_name = '%s' AND\n" - "n_rows = %d AND\n" - "clustered_index_size = %d AND\n" - "sum_of_other_index_sizes = %d;\n" - "\n", - TABLE_STATS_NAME_PRINT, - TEST_DATABASE_NAME, - TEST_TABLE_NAME, - TEST_N_ROWS, - TEST_CLUSTERED_INDEX_SIZE, - TEST_SUM_OF_OTHER_INDEX_SIZES); - - printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n" - "FROM %s\n" - "WHERE\n" - "database_name = '%s' AND\n" - "table_name = '%s' AND\n" - "index_name = '%s' AND\n" - "(\n" - " (stat_name = 'size' AND stat_value = %d AND" - " sample_size IS NULL) OR\n" - " (stat_name = 'n_leaf_pages' AND stat_value = %d AND" - " sample_size IS NULL) OR\n" - " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND" - " sample_size = '%d' AND stat_description = '%s')\n" - ");\n" - "\n", - INDEX_STATS_NAME_PRINT, - TEST_DATABASE_NAME, - TEST_TABLE_NAME, - TEST_IDX1_NAME, - TEST_IDX1_INDEX_SIZE, - TEST_IDX1_N_LEAF_PAGES, - TEST_IDX1_N_DIFF1, - TEST_IDX1_N_DIFF1_SAMPLE_SIZE, - TEST_IDX1_COL1_NAME); - - printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n" - "FROM %s\n" - "WHERE\n" - "database_name = '%s' AND\n" - "table_name = '%s' AND\n" - "index_name = '%s' AND\n" - "(\n" - " (stat_name = 'size' AND stat_value = %d AND" - " sample_size IS NULL) OR\n" - " (stat_name = 'n_leaf_pages' AND stat_value = %d AND" - " sample_size IS NULL) OR\n" - " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND" - " sample_size = '%d' AND stat_description = '%s') OR\n" - " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND" - " sample_size = '%d' AND stat_description = '%s,%s') OR\n" - " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND" - " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n" - " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND" - " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n" - ");\n" - "\n", - INDEX_STATS_NAME_PRINT, - TEST_DATABASE_NAME, - TEST_TABLE_NAME, - TEST_IDX2_NAME, - TEST_IDX2_INDEX_SIZE, - TEST_IDX2_N_LEAF_PAGES, - TEST_IDX2_N_DIFF1, - TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME, - TEST_IDX2_N_DIFF2, - TEST_IDX2_N_DIFF2_SAMPLE_SIZE, - TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, - TEST_IDX2_N_DIFF3, - TEST_IDX2_N_DIFF3_SAMPLE_SIZE, - TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME, - TEST_IDX2_N_DIFF4, - TEST_IDX2_N_DIFF4_SAMPLE_SIZE, - TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME, - TEST_IDX2_COL4_NAME); -} -/* @} */ - -/* test_dict_stats_fetch_from_ps() @{ */ -void -test_dict_stats_fetch_from_ps() -{ - dict_table_t table; - dict_index_t index1; - ib_uint64_t index1_stat_n_diff_key_vals[1]; - ib_uint64_t index1_stat_n_sample_sizes[1]; - dict_index_t index2; - ib_uint64_t index2_stat_n_diff_key_vals[4]; - ib_uint64_t index2_stat_n_sample_sizes[4]; - dberr_t ret; - - /* craft a dummy dict_table_t */ - table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME); - UT_LIST_INIT(table.indexes); - UT_LIST_ADD_LAST(indexes, table.indexes, &index1); - UT_LIST_ADD_LAST(indexes, table.indexes, &index2); - ut_d(table.magic_n = DICT_TABLE_MAGIC_N); - - index1.name = TEST_IDX1_NAME; - ut_d(index1.magic_n = DICT_INDEX_MAGIC_N); - index1.cached = 1; - index1.n_uniq = 1; - index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals; - index1.stat_n_sample_sizes = index1_stat_n_sample_sizes; - - index2.name = TEST_IDX2_NAME; - ut_d(index2.magic_n = DICT_INDEX_MAGIC_N); - index2.cached = 1; - index2.n_uniq = 4; - index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals; - index2.stat_n_sample_sizes = index2_stat_n_sample_sizes; - - ret = dict_stats_fetch_from_ps(&table); - - ut_a(ret == DB_SUCCESS); - - ut_a(table.stat_n_rows == TEST_N_ROWS); - ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE); - ut_a(table.stat_sum_of_other_index_sizes - == TEST_SUM_OF_OTHER_INDEX_SIZES); - - ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE); - ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES); - ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1); - ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE); - - ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE); - ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES); - ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1); - ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE); - ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2); - ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE); - ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3); - ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE); - ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4); - ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE); - - printf("OK: fetch successful\n"); -} -/* @} */ - -/* test_dict_stats_all() @{ */ -void -test_dict_stats_all() -{ - test_dict_table_schema_check(); - - test_dict_stats_save(); - - test_dict_stats_fetch_from_ps(); -} -/* @} */ - -#endif /* UNIV_COMPILE_TEST_FUNCS */ -/* @} */ - -#endif /* UNIV_HOTBACKUP */ diff --git a/storage/xtradb/dict/dict0stats_bg.cc b/storage/xtradb/dict/dict0stats_bg.cc deleted file mode 100644 index ba6fd115551..00000000000 --- a/storage/xtradb/dict/dict0stats_bg.cc +++ /dev/null @@ -1,585 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file dict/dict0stats_bg.cc -Code used for background table and index stats gathering. - -Created Apr 25, 2012 Vasil Dimov -*******************************************************/ - -#include "row0mysql.h" -#include "srv0start.h" -#include "dict0dict.h" -#include "dict0stats.h" -#include "dict0stats_bg.h" - -#ifdef UNIV_NONINL -# include "dict0stats_bg.ic" -#endif - -#include <vector> - -/** Minimum time interval between stats recalc for a given table */ -#define MIN_RECALC_INTERVAL 10 /* seconds */ - -/** Event to wake up dict_stats_thread on dict_stats_recalc_pool_add() -or shutdown. Not protected by any mutex. */ -UNIV_INTERN os_event_t dict_stats_event; - -/** Variable to initiate shutdown the dict stats thread. Note we don't -use 'srv_shutdown_state' because we want to shutdown dict stats thread -before purge thread. */ -static bool dict_stats_start_shutdown; - -/** Event to wait for shutdown of the dict stats thread */ -static os_event_t dict_stats_shutdown_event; - -/** This mutex protects the "recalc_pool" variable. */ -static ib_mutex_t recalc_pool_mutex; -static ib_mutex_t defrag_pool_mutex; -#ifdef HAVE_PSI_INTERFACE -static mysql_pfs_key_t recalc_pool_mutex_key; -static mysql_pfs_key_t defrag_pool_mutex_key; -#endif /* HAVE_PSI_INTERFACE */ - -/** The number of tables that can be added to "recalc_pool" before -it is enlarged */ -static const ulint RECALC_POOL_INITIAL_SLOTS = 128; - -/** The multitude of tables whose stats are to be automatically -recalculated - an STL vector */ -typedef std::vector<table_id_t> recalc_pool_t; -static recalc_pool_t recalc_pool; - -typedef recalc_pool_t::iterator recalc_pool_iterator_t; - -/** Indices whose defrag stats need to be saved to persistent storage.*/ -struct defrag_pool_item_t { - table_id_t table_id; - index_id_t index_id; -}; -typedef std::vector<defrag_pool_item_t> defrag_pool_t; -static defrag_pool_t defrag_pool; -typedef defrag_pool_t::iterator defrag_pool_iterator_t; - -/*****************************************************************//** -Initialize the recalc pool, called once during thread initialization. */ -static -void -dict_stats_pool_init() -/*=========================*/ -{ - ut_ad(!srv_read_only_mode); - - recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS); - defrag_pool.reserve(RECALC_POOL_INITIAL_SLOTS); -} - -/*****************************************************************//** -Free the resources occupied by the recalc pool, called once during -thread de-initialization. */ -static -void -dict_stats_pool_deinit() -/*===========================*/ -{ - ut_ad(!srv_read_only_mode); - - recalc_pool.clear(); - defrag_pool.clear(); - - /* - recalc_pool may still have its buffer allocated. It will free it when - its destructor is called. - The problem is, memory leak detector is run before the recalc_pool's - destructor is invoked, and will report recalc_pool's buffer as leaked - memory. To avoid that, we force recalc_pool to surrender its buffer - to empty_pool object, which will free it when leaving this function: - */ - recalc_pool_t recalc_empty_pool; - defrag_pool_t defrag_empty_pool; - memset(&recalc_empty_pool, 0, sizeof(recalc_pool_t)); - memset(&defrag_empty_pool, 0, sizeof(defrag_pool_t)); - recalc_pool.swap(recalc_empty_pool); - defrag_pool.swap(defrag_empty_pool); -} - -/*****************************************************************//** -Add a table to the recalc pool, which is processed by the -background stats gathering thread. Only the table id is added to the -list, so the table can be closed after being enqueued and it will be -opened when needed. If the table does not exist later (has been DROPped), -then it will be removed from the pool and skipped. */ -UNIV_INTERN -void -dict_stats_recalc_pool_add( -/*=======================*/ - const dict_table_t* table) /*!< in: table to add */ -{ - ut_ad(!srv_read_only_mode); - - mutex_enter(&recalc_pool_mutex); - - /* quit if already in the list */ - for (recalc_pool_iterator_t iter = recalc_pool.begin(); - iter != recalc_pool.end(); - ++iter) { - - if (*iter == table->id) { - mutex_exit(&recalc_pool_mutex); - return; - } - } - - recalc_pool.push_back(table->id); - - mutex_exit(&recalc_pool_mutex); - - os_event_set(dict_stats_event); -} - -/*****************************************************************//** -Get a table from the auto recalc pool. The returned table id is removed -from the pool. -@return true if the pool was non-empty and "id" was set, false otherwise */ -static -bool -dict_stats_recalc_pool_get( -/*=======================*/ - table_id_t* id) /*!< out: table id, or unmodified if list is - empty */ -{ - ut_ad(!srv_read_only_mode); - - mutex_enter(&recalc_pool_mutex); - - if (recalc_pool.empty()) { - mutex_exit(&recalc_pool_mutex); - return(false); - } - - *id = recalc_pool[0]; - - recalc_pool.erase(recalc_pool.begin()); - - mutex_exit(&recalc_pool_mutex); - - return(true); -} - -/*****************************************************************//** -Delete a given table from the auto recalc pool. -dict_stats_recalc_pool_del() */ -UNIV_INTERN -void -dict_stats_recalc_pool_del( -/*=======================*/ - const dict_table_t* table) /*!< in: table to remove */ -{ - ut_ad(!srv_read_only_mode); - ut_ad(mutex_own(&dict_sys->mutex)); - - mutex_enter(&recalc_pool_mutex); - - ut_ad(table->id > 0); - - for (recalc_pool_iterator_t iter = recalc_pool.begin(); - iter != recalc_pool.end(); - ++iter) { - - if (*iter == table->id) { - /* erase() invalidates the iterator */ - recalc_pool.erase(iter); - break; - } - } - - mutex_exit(&recalc_pool_mutex); -} - -/*****************************************************************//** -Add an index in a table to the defrag pool, which is processed by the -background stats gathering thread. Only the table id and index id are -added to the list, so the table can be closed after being enqueued and -it will be opened when needed. If the table or index does not exist later -(has been DROPped), then it will be removed from the pool and skipped. */ -UNIV_INTERN -void -dict_stats_defrag_pool_add( -/*=======================*/ - const dict_index_t* index) /*!< in: table to add */ -{ - defrag_pool_item_t item; - - ut_ad(!srv_read_only_mode); - - mutex_enter(&defrag_pool_mutex); - - /* quit if already in the list */ - for (defrag_pool_iterator_t iter = defrag_pool.begin(); - iter != defrag_pool.end(); - ++iter) { - if ((*iter).table_id == index->table->id - && (*iter).index_id == index->id) { - mutex_exit(&defrag_pool_mutex); - return; - } - } - - item.table_id = index->table->id; - item.index_id = index->id; - defrag_pool.push_back(item); - - mutex_exit(&defrag_pool_mutex); - - os_event_set(dict_stats_event); -} - -/*****************************************************************//** -Get an index from the auto defrag pool. The returned index id is removed -from the pool. -@return true if the pool was non-empty and "id" was set, false otherwise */ -static -bool -dict_stats_defrag_pool_get( -/*=======================*/ - table_id_t* table_id, /*!< out: table id, or unmodified if - list is empty */ - index_id_t* index_id) /*!< out: index id, or unmodified if - list is empty */ -{ - ut_ad(!srv_read_only_mode); - - mutex_enter(&defrag_pool_mutex); - - if (defrag_pool.empty()) { - mutex_exit(&defrag_pool_mutex); - return(false); - } - - defrag_pool_item_t& item = defrag_pool.back(); - *table_id = item.table_id; - *index_id = item.index_id; - - defrag_pool.pop_back(); - - mutex_exit(&defrag_pool_mutex); - - return(true); -} - -/*****************************************************************//** -Delete a given index from the auto defrag pool. */ -UNIV_INTERN -void -dict_stats_defrag_pool_del( -/*=======================*/ - const dict_table_t* table, /*!<in: if given, remove - all entries for the table */ - const dict_index_t* index) /*!< in: if given, remove this index */ -{ - ut_a((table && !index) || (!table && index)); - ut_ad(!srv_read_only_mode); - ut_ad(mutex_own(&dict_sys->mutex)); - - mutex_enter(&defrag_pool_mutex); - - defrag_pool_iterator_t iter = defrag_pool.begin(); - while (iter != defrag_pool.end()) { - if ((table && (*iter).table_id == table->id) - || (index - && (*iter).table_id == index->table->id - && (*iter).index_id == index->id)) { - /* erase() invalidates the iterator */ - iter = defrag_pool.erase(iter); - if (index) - break; - } else { - iter++; - } - } - - mutex_exit(&defrag_pool_mutex); -} - -/*****************************************************************//** -Wait until background stats thread has stopped using the specified table. -The caller must have locked the data dictionary using -row_mysql_lock_data_dictionary() and this function may unlock it temporarily -and restore the lock before it exits. -The background stats thread is guaranteed not to start using the specified -table after this function returns and before the caller unlocks the data -dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag -under dict_sys->mutex. */ -UNIV_INTERN -void -dict_stats_wait_bg_to_stop_using_table( -/*===================================*/ - dict_table_t* table, /*!< in/out: table */ - trx_t* trx) /*!< in/out: transaction to use for - unlocking/locking the data dict */ -{ - while (!dict_stats_stop_bg(table)) { - DICT_STATS_BG_YIELD(trx); - } -} - -/*****************************************************************//** -Initialize global variables needed for the operation of dict_stats_thread() -Must be called before dict_stats_thread() is started. */ -UNIV_INTERN -void -dict_stats_thread_init() -{ - ut_a(!srv_read_only_mode); - - dict_stats_event = os_event_create(); - dict_stats_shutdown_event = os_event_create(); - - /* The recalc_pool_mutex is acquired from: - 1) the background stats gathering thread before any other latch - and released without latching anything else in between (thus - any level would do here) - 2) from row_update_statistics_if_needed() - and released without latching anything else in between. We know - that dict_sys->mutex (SYNC_DICT) is not acquired when - row_update_statistics_if_needed() is called and it may be acquired - inside that function (thus a level <=SYNC_DICT would do). - 3) from row_drop_table_for_mysql() after dict_sys->mutex (SYNC_DICT) - and dict_operation_lock (SYNC_DICT_OPERATION) have been locked - (thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do) - So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */ - mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex, - SYNC_STATS_AUTO_RECALC); - - /* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */ - mutex_create(defrag_pool_mutex_key, &defrag_pool_mutex, - SYNC_STATS_DEFRAG); - dict_stats_pool_init(); -} - -/*****************************************************************//** -Free resources allocated by dict_stats_thread_init(), must be called -after dict_stats_thread() has exited. */ -UNIV_INTERN -void -dict_stats_thread_deinit() -/*======================*/ -{ - ut_a(!srv_read_only_mode); - ut_ad(!srv_dict_stats_thread_active); - - dict_stats_pool_deinit(); - - mutex_free(&recalc_pool_mutex); - memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex)); - - mutex_free(&defrag_pool_mutex); - memset(&defrag_pool_mutex, 0x0, sizeof(defrag_pool_mutex)); - - os_event_free(dict_stats_event); - dict_stats_event = NULL; - os_event_free(dict_stats_shutdown_event); - dict_stats_shutdown_event = NULL; - dict_stats_start_shutdown = false; -} - -/*****************************************************************//** -Get the first table that has been added for auto recalc and eventually -update its stats. */ -static -void -dict_stats_process_entry_from_recalc_pool() -/*=======================================*/ -{ - table_id_t table_id; - - ut_ad(!srv_read_only_mode); - - /* pop the first table from the auto recalc pool */ - if (!dict_stats_recalc_pool_get(&table_id)) { - /* no tables for auto recalc */ - return; - } - - dict_table_t* table; - - mutex_enter(&dict_sys->mutex); - - table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL); - - if (table == NULL) { - /* table does not exist, must have been DROPped - after its id was enqueued */ - mutex_exit(&dict_sys->mutex); - return; - } - - /* Check whether table is corrupted */ - if (table->corrupted) { - dict_table_close(table, TRUE, FALSE); - mutex_exit(&dict_sys->mutex); - return; - } - - table->stats_bg_flag |= BG_STAT_IN_PROGRESS; - - mutex_exit(&dict_sys->mutex); - - /* ut_time() could be expensive, the current function - is called once every time a table has been changed more than 10% and - on a system with lots of small tables, this could become hot. If we - find out that this is a problem, then the check below could eventually - be replaced with something else, though a time interval is the natural - approach. */ - - if (ut_difftime(ut_time(), table->stats_last_recalc) - < MIN_RECALC_INTERVAL) { - - /* Stats were (re)calculated not long ago. To avoid - too frequent stats updates we put back the table on - the auto recalc list and do nothing. */ - - dict_stats_recalc_pool_add(table); - - } else { - - dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT); - } - - mutex_enter(&dict_sys->mutex); - - table->stats_bg_flag &= ~BG_STAT_IN_PROGRESS; - - dict_table_close(table, TRUE, FALSE); - - mutex_exit(&dict_sys->mutex); -} - -/*****************************************************************//** -Get the first index that has been added for updating persistent defrag -stats and eventually save its stats. */ -static -void -dict_stats_process_entry_from_defrag_pool() -/*=======================================*/ -{ - table_id_t table_id; - index_id_t index_id; - - ut_ad(!srv_read_only_mode); - - /* pop the first index from the auto defrag pool */ - if (!dict_stats_defrag_pool_get(&table_id, &index_id)) { - /* no index in defrag pool */ - return; - } - - dict_table_t* table; - - mutex_enter(&dict_sys->mutex); - - /* If the table is no longer cached, we've already lost the in - memory stats so there's nothing really to write to disk. */ - table = dict_table_open_on_id(table_id, TRUE, - DICT_TABLE_OP_OPEN_ONLY_IF_CACHED); - - if (table == NULL) { - mutex_exit(&dict_sys->mutex); - return; - } - - /* Check whether table is corrupted */ - if (table->corrupted) { - dict_table_close(table, TRUE, FALSE); - mutex_exit(&dict_sys->mutex); - return; - } - mutex_exit(&dict_sys->mutex); - - dict_index_t* index = dict_table_find_index_on_id(table, index_id); - - if (index == NULL) { - return; - } - - /* Check whether index is corrupted */ - if (dict_index_is_corrupted(index)) { - dict_table_close(table, FALSE, FALSE); - return; - } - - dict_stats_save_defrag_stats(index); - dict_table_close(table, FALSE, FALSE); -} - -/*****************************************************************//** -This is the thread for background stats gathering. It pops tables, from -the auto recalc list and proceeds them, eventually recalculating their -statistics. -@return this function does not return, it calls os_thread_exit() */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(dict_stats_thread)(void*) -{ - my_thread_init(); - ut_a(!srv_read_only_mode); - - while (!dict_stats_start_shutdown) { - - /* Wake up periodically even if not signaled. This is - because we may lose an event - if the below call to - dict_stats_process_entry_from_recalc_pool() puts the entry back - in the list, the os_event_set() will be lost by the subsequent - os_event_reset(). */ - os_event_wait_time( - dict_stats_event, MIN_RECALC_INTERVAL * 1000000); - - if (dict_stats_start_shutdown) { - break; - } - - dict_stats_process_entry_from_recalc_pool(); - - while (defrag_pool.size()) - dict_stats_process_entry_from_defrag_pool(); - - os_event_reset(dict_stats_event); - } - - srv_dict_stats_thread_active = false; - - os_event_set(dict_stats_shutdown_event); - my_thread_end(); - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit instead of return(). */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/** Shut down the dict_stats_thread. */ -void -dict_stats_shutdown() -{ - dict_stats_start_shutdown = true; - os_event_set(dict_stats_event); - os_event_wait(dict_stats_shutdown_event); -} diff --git a/storage/xtradb/dyn/dyn0dyn.cc b/storage/xtradb/dyn/dyn0dyn.cc deleted file mode 100644 index dd1f6863c14..00000000000 --- a/storage/xtradb/dyn/dyn0dyn.cc +++ /dev/null @@ -1,65 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file dyn/dyn0dyn.cc -The dynamically allocated array - -Created 2/5/1996 Heikki Tuuri -*******************************************************/ - -#include "dyn0dyn.h" -#ifdef UNIV_NONINL -#include "dyn0dyn.ic" -#endif - -/************************************************************//** -Adds a new block to a dyn array. -@return created block */ -UNIV_INTERN -dyn_block_t* -dyn_array_add_block( -/*================*/ - dyn_array_t* arr) /*!< in/out: dyn array */ -{ - mem_heap_t* heap; - dyn_block_t* block; - - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - - if (arr->heap == NULL) { - UT_LIST_INIT(arr->base); - UT_LIST_ADD_FIRST(list, arr->base, arr); - - arr->heap = mem_heap_create(sizeof(dyn_block_t)); - } - - block = dyn_array_get_last_block(arr); - block->used = block->used | DYN_BLOCK_FULL_FLAG; - - heap = arr->heap; - - block = static_cast<dyn_block_t*>( - mem_heap_alloc(heap, sizeof(dyn_block_t))); - - block->used = 0; - - UT_LIST_ADD_LAST(list, arr->base, block); - - return(block); -} diff --git a/storage/xtradb/eval/eval0eval.cc b/storage/xtradb/eval/eval0eval.cc deleted file mode 100644 index ccc54781102..00000000000 --- a/storage/xtradb/eval/eval0eval.cc +++ /dev/null @@ -1,950 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file eval/eval0eval.cc -SQL evaluator: evaluates simple data structures, like expressions, in -a query graph - -Created 12/29/1997 Heikki Tuuri -*******************************************************/ - -#include "eval0eval.h" - -#ifdef UNIV_NONINL -#include "eval0eval.ic" -#endif - -#include "data0data.h" -#include "row0sel.h" -#include "rem0cmp.h" - -/** The RND function seed */ -static ulint eval_rnd = 128367121; - -/** Dummy adress used when we should allocate a buffer of size 0 in -eval_node_alloc_val_buf */ - -static byte eval_dummy; - -/************************************************************************* -Gets the like node from the node */ -UNIV_INLINE -que_node_t* -que_node_get_like_node( -/*===================*/ - /* out: next node in a list of nodes */ - que_node_t* node) /* in: node in a list */ -{ - return(((sym_node_t*) node)->like_node); -} - -/*****************************************************************//** -Allocate a buffer from global dynamic memory for a value of a que_node. -NOTE that this memory must be explicitly freed when the query graph is -freed. If the node already has an allocated buffer, that buffer is freed -here. NOTE that this is the only function where dynamic memory should be -allocated for a query node val field. -@return pointer to allocated buffer */ -UNIV_INTERN -byte* -eval_node_alloc_val_buf( -/*====================*/ - que_node_t* node, /*!< in: query graph node; sets the val field - data field to point to the new buffer, and - len field equal to size */ - ulint size) /*!< in: buffer size */ -{ - dfield_t* dfield; - byte* data; - - ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL - || que_node_get_type(node) == QUE_NODE_FUNC); - - dfield = que_node_get_val(node); - - data = static_cast<byte*>(dfield_get_data(dfield)); - - if (data && data != &eval_dummy) { - mem_free(data); - } - - if (size == 0) { - data = &eval_dummy; - } else { - data = static_cast<byte*>(mem_alloc(size)); - } - - que_node_set_val_buf_size(node, size); - - dfield_set_data(dfield, data, size); - - return(data); -} - -/*****************************************************************//** -Free the buffer from global dynamic memory for a value of a que_node, -if it has been allocated in the above function. The freeing for pushed -column values is done in sel_col_prefetch_buf_free. */ -UNIV_INTERN -void -eval_node_free_val_buf( -/*===================*/ - que_node_t* node) /*!< in: query graph node */ -{ - dfield_t* dfield; - byte* data; - - ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL - || que_node_get_type(node) == QUE_NODE_FUNC); - - dfield = que_node_get_val(node); - - data = static_cast<byte*>(dfield_get_data(dfield)); - - if (que_node_get_val_buf_size(node) > 0) { - ut_a(data); - - mem_free(data); - } -} - -/********************************************************************* -Evaluates a LIKE comparison node. -@return the result of the comparison */ -UNIV_INLINE -ibool -eval_cmp_like( -/*==========*/ - que_node_t* arg1, /* !< in: left operand */ - que_node_t* arg2) /* !< in: right operand */ -{ - ib_like_t op; - int res; - que_node_t* arg3; - que_node_t* arg4; - dfield_t* dfield; - dtype_t* dtype; - ibool val = TRUE; - - arg3 = que_node_get_like_node(arg2); - - /* Get the comparison type operator */ - ut_a(arg3); - - dfield = que_node_get_val(arg3); - dtype = dfield_get_type(dfield); - - ut_a(dtype_get_mtype(dtype) == DATA_INT); - op = static_cast<ib_like_t>(mach_read_from_4(static_cast<const unsigned char*>(dfield_get_data(dfield)))); - - switch (op) { - case IB_LIKE_PREFIX: - - arg4 = que_node_get_next(arg3); - res = cmp_dfield_dfield_like_prefix( - que_node_get_val(arg1), - que_node_get_val(arg4)); - break; - - case IB_LIKE_SUFFIX: - - arg4 = que_node_get_next(arg3); - res = cmp_dfield_dfield_like_suffix( - que_node_get_val(arg1), - que_node_get_val(arg4)); - break; - - case IB_LIKE_SUBSTR: - - arg4 = que_node_get_next(arg3); - res = cmp_dfield_dfield_like_substr( - que_node_get_val(arg1), - que_node_get_val(arg4)); - break; - - case IB_LIKE_EXACT: - res = cmp_dfield_dfield( - que_node_get_val(arg1), - que_node_get_val(arg2)); - break; - - default: - ut_error; - } - - if (res != 0) { - val = FALSE; - } - - return(val); -} - -/********************************************************************* -Evaluates a comparison node. -@return the result of the comparison */ -ibool -eval_cmp( -/*=====*/ - func_node_t* cmp_node) /*!< in: comparison node */ -{ - que_node_t* arg1; - que_node_t* arg2; - int res; - int func; - ibool val = TRUE; - - ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC); - - arg1 = cmp_node->args; - arg2 = que_node_get_next(arg1); - - func = cmp_node->func; - - if (func == PARS_LIKE_TOKEN_EXACT - || func == PARS_LIKE_TOKEN_PREFIX - || func == PARS_LIKE_TOKEN_SUFFIX - || func == PARS_LIKE_TOKEN_SUBSTR) { - - val = eval_cmp_like(arg1, arg2); - } else { - res = cmp_dfield_dfield( - que_node_get_val(arg1), que_node_get_val(arg2)); - - if (func == '=') { - if (res != 0) { - val = FALSE; - } - } else if (func == '<') { - if (res != -1) { - val = FALSE; - } - } else if (func == PARS_LE_TOKEN) { - if (res == 1) { - val = FALSE; - } - } else if (func == PARS_NE_TOKEN) { - if (res == 0) { - val = FALSE; - } - } else if (func == PARS_GE_TOKEN) { - if (res == -1) { - val = FALSE; - } - } else { - ut_ad(func == '>'); - - if (res != 1) { - val = FALSE; - } - } - } - - eval_node_set_ibool_val(cmp_node, val); - - return(val); -} - -/*****************************************************************//** -Evaluates a logical operation node. */ -UNIV_INLINE -void -eval_logical( -/*=========*/ - func_node_t* logical_node) /*!< in: logical operation node */ -{ - que_node_t* arg1; - que_node_t* arg2; - ibool val1; - ibool val2 = 0; /* remove warning */ - ibool val = 0; /* remove warning */ - int func; - - ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC); - - arg1 = logical_node->args; - arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */ - - val1 = eval_node_get_ibool_val(arg1); - - if (arg2) { - val2 = eval_node_get_ibool_val(arg2); - } - - func = logical_node->func; - - if (func == PARS_AND_TOKEN) { - val = val1 & val2; - } else if (func == PARS_OR_TOKEN) { - val = val1 | val2; - } else if (func == PARS_NOT_TOKEN) { - val = TRUE - val1; - } else { - ut_error; - } - - eval_node_set_ibool_val(logical_node, val); -} - -/*****************************************************************//** -Evaluates an arithmetic operation node. */ -UNIV_INLINE -void -eval_arith( -/*=======*/ - func_node_t* arith_node) /*!< in: arithmetic operation node */ -{ - que_node_t* arg1; - que_node_t* arg2; - lint val1; - lint val2 = 0; /* remove warning */ - lint val; - int func; - - ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC); - - arg1 = arith_node->args; - arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */ - - val1 = eval_node_get_int_val(arg1); - - if (arg2) { - val2 = eval_node_get_int_val(arg2); - } - - func = arith_node->func; - - if (func == '+') { - val = val1 + val2; - } else if ((func == '-') && arg2) { - val = val1 - val2; - } else if (func == '-') { - val = -val1; - } else if (func == '*') { - val = val1 * val2; - } else { - ut_ad(func == '/'); - val = val1 / val2; - } - - eval_node_set_int_val(arith_node, val); -} - -/*****************************************************************//** -Evaluates an aggregate operation node. */ -UNIV_INLINE -void -eval_aggregate( -/*===========*/ - func_node_t* node) /*!< in: aggregate operation node */ -{ - que_node_t* arg; - lint val; - lint arg_val; - int func; - - ut_ad(que_node_get_type(node) == QUE_NODE_FUNC); - - val = eval_node_get_int_val(node); - - func = node->func; - - if (func == PARS_COUNT_TOKEN) { - - val = val + 1; - } else { - ut_ad(func == PARS_SUM_TOKEN); - - arg = node->args; - arg_val = eval_node_get_int_val(arg); - - val = val + arg_val; - } - - eval_node_set_int_val(node, val); -} - -/*****************************************************************//** -Evaluates a predefined function node where the function is not relevant -in benchmarks. */ -static -void -eval_predefined_2( -/*==============*/ - func_node_t* func_node) /*!< in: predefined function node */ -{ - que_node_t* arg; - que_node_t* arg1; - que_node_t* arg2 = 0; /* remove warning (??? bug ???) */ - lint int_val; - byte* data; - ulint len1; - ulint len2; - int func; - ulint i; - - ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC); - - arg1 = func_node->args; - - if (arg1) { - arg2 = que_node_get_next(arg1); - } - - func = func_node->func; - - if (func == PARS_PRINTF_TOKEN) { - - arg = arg1; - - while (arg) { - dfield_print(que_node_get_val(arg)); - - arg = que_node_get_next(arg); - } - - putc('\n', stderr); - - } else if (func == PARS_ASSERT_TOKEN) { - - if (!eval_node_get_ibool_val(arg1)) { - fputs("SQL assertion fails in a stored procedure!\n", - stderr); - } - - ut_a(eval_node_get_ibool_val(arg1)); - - /* This function, or more precisely, a debug procedure, - returns no value */ - - } else if (func == PARS_RND_TOKEN) { - - len1 = (ulint) eval_node_get_int_val(arg1); - len2 = (ulint) eval_node_get_int_val(arg2); - - ut_ad(len2 >= len1); - - if (len2 > len1) { - int_val = (lint) (len1 - + (eval_rnd % (len2 - len1 + 1))); - } else { - int_val = (lint) len1; - } - - eval_rnd = ut_rnd_gen_next_ulint(eval_rnd); - - eval_node_set_int_val(func_node, int_val); - - } else if (func == PARS_RND_STR_TOKEN) { - - len1 = (ulint) eval_node_get_int_val(arg1); - - data = eval_node_ensure_val_buf(func_node, len1); - - for (i = 0; i < len1; i++) { - data[i] = (byte)(97 + (eval_rnd % 3)); - - eval_rnd = ut_rnd_gen_next_ulint(eval_rnd); - } - } else { - ut_error; - } -} - -/*****************************************************************//** -Evaluates a notfound-function node. */ -UNIV_INLINE -void -eval_notfound( -/*==========*/ - func_node_t* func_node) /*!< in: function node */ -{ - sym_node_t* cursor; - sel_node_t* sel_node; - ibool ibool_val; - - ut_ad(func_node->func == PARS_NOTFOUND_TOKEN); - - cursor = static_cast<sym_node_t*>(func_node->args); - - ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL); - - if (cursor->token_type == SYM_LIT) { - - ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)), - "SQL", 3) == 0); - - sel_node = cursor->sym_table->query_graph->last_sel_node; - } else { - sel_node = cursor->alias->cursor_def; - } - - if (sel_node->state == SEL_NODE_NO_MORE_ROWS) { - ibool_val = TRUE; - } else { - ibool_val = FALSE; - } - - eval_node_set_ibool_val(func_node, ibool_val); -} - -/*****************************************************************//** -Evaluates a substr-function node. */ -UNIV_INLINE -void -eval_substr( -/*========*/ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - que_node_t* arg3; - dfield_t* dfield; - byte* str1; - ulint len1; - ulint len2; - - arg1 = func_node->args; - arg2 = que_node_get_next(arg1); - - ut_ad(func_node->func == PARS_SUBSTR_TOKEN); - - arg3 = que_node_get_next(arg2); - - str1 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg1))); - - len1 = (ulint) eval_node_get_int_val(arg2); - len2 = (ulint) eval_node_get_int_val(arg3); - - dfield = que_node_get_val(func_node); - - dfield_set_data(dfield, str1 + len1, len2); -} - -/*****************************************************************//** -Evaluates a replstr-procedure node. */ -static -void -eval_replstr( -/*=========*/ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - que_node_t* arg3; - que_node_t* arg4; - byte* str1; - byte* str2; - ulint len1; - ulint len2; - - arg1 = func_node->args; - arg2 = que_node_get_next(arg1); - - ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL); - - arg3 = que_node_get_next(arg2); - arg4 = que_node_get_next(arg3); - - str1 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg1))); - str2 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg2))); - - len1 = (ulint) eval_node_get_int_val(arg3); - len2 = (ulint) eval_node_get_int_val(arg4); - - if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2) - || (dfield_get_len(que_node_get_val(arg2)) < len2)) { - - ut_error; - } - - ut_memcpy(str1 + len1, str2, len2); -} - -/*****************************************************************//** -Evaluates an instr-function node. */ -static -void -eval_instr( -/*=======*/ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - dfield_t* dfield1; - dfield_t* dfield2; - lint int_val; - byte* str1; - byte* str2; - byte match_char; - ulint len1; - ulint len2; - ulint i; - ulint j; - - arg1 = func_node->args; - arg2 = que_node_get_next(arg1); - - dfield1 = que_node_get_val(arg1); - dfield2 = que_node_get_val(arg2); - - str1 = static_cast<byte*>(dfield_get_data(dfield1)); - str2 = static_cast<byte*>(dfield_get_data(dfield2)); - - len1 = dfield_get_len(dfield1); - len2 = dfield_get_len(dfield2); - - if (len2 == 0) { - ut_error; - } - - match_char = str2[0]; - - for (i = 0; i < len1; i++) { - /* In this outer loop, the number of matched characters is 0 */ - - if (str1[i] == match_char) { - - if (i + len2 > len1) { - - break; - } - - for (j = 1;; j++) { - /* We have already matched j characters */ - - if (j == len2) { - int_val = i + 1; - - goto match_found; - } - - if (str1[i + j] != str2[j]) { - - break; - } - } - } - } - - int_val = 0; - -match_found: - eval_node_set_int_val(func_node, int_val); -} - -/*****************************************************************//** -Evaluates a predefined function node. */ -UNIV_INLINE -void -eval_binary_to_number( -/*==================*/ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg1; - dfield_t* dfield; - byte* str1; - byte* str2; - ulint len1; - ulint int_val; - - arg1 = func_node->args; - - dfield = que_node_get_val(arg1); - - str1 = static_cast<byte*>(dfield_get_data(dfield)); - len1 = dfield_get_len(dfield); - - if (len1 > 4) { - ut_error; - } - - if (len1 == 4) { - str2 = str1; - } else { - int_val = 0; - str2 = (byte*) &int_val; - - ut_memcpy(str2 + (4 - len1), str1, len1); - } - - eval_node_copy_and_alloc_val(func_node, str2, 4); -} - -/*****************************************************************//** -Evaluates a predefined function node. */ -static -void -eval_concat( -/*========*/ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg; - dfield_t* dfield; - byte* data; - ulint len; - ulint len1; - - arg = func_node->args; - len = 0; - - while (arg) { - len1 = dfield_get_len(que_node_get_val(arg)); - - len += len1; - - arg = que_node_get_next(arg); - } - - data = eval_node_ensure_val_buf(func_node, len); - - arg = func_node->args; - len = 0; - - while (arg) { - dfield = que_node_get_val(arg); - len1 = dfield_get_len(dfield); - - ut_memcpy(data + len, dfield_get_data(dfield), len1); - - len += len1; - - arg = que_node_get_next(arg); - } -} - -/*****************************************************************//** -Evaluates a predefined function node. If the first argument is an integer, -this function looks at the second argument which is the integer length in -bytes, and converts the integer to a VARCHAR. -If the first argument is of some other type, this function converts it to -BINARY. */ -UNIV_INLINE -void -eval_to_binary( -/*===========*/ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - dfield_t* dfield; - byte* str1; - ulint len; - ulint len1; - - arg1 = func_node->args; - - str1 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg1))); - - if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) { - - len = dfield_get_len(que_node_get_val(arg1)); - - dfield = que_node_get_val(func_node); - - dfield_set_data(dfield, str1, len); - - return; - } - - arg2 = que_node_get_next(arg1); - - len1 = (ulint) eval_node_get_int_val(arg2); - - if (len1 > 4) { - - ut_error; - } - - dfield = que_node_get_val(func_node); - - dfield_set_data(dfield, str1 + (4 - len1), len1); -} - -/*****************************************************************//** -Evaluates a predefined function node. */ -UNIV_INLINE -void -eval_predefined( -/*============*/ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg1; - lint int_val; - byte* data; - int func; - - func = func_node->func; - - arg1 = func_node->args; - - if (func == PARS_LENGTH_TOKEN) { - - int_val = (lint) dfield_get_len(que_node_get_val(arg1)); - - } else if (func == PARS_TO_CHAR_TOKEN) { - - /* Convert number to character string as a - signed decimal integer. */ - - ulint uint_val; - int int_len; - - int_val = eval_node_get_int_val(arg1); - - /* Determine the length of the string. */ - - if (int_val == 0) { - int_len = 1; /* the number 0 occupies 1 byte */ - } else { - int_len = 0; - if (int_val < 0) { - uint_val = ((ulint) -int_val - 1) + 1; - int_len++; /* reserve space for minus sign */ - } else { - uint_val = (ulint) int_val; - } - for (; uint_val > 0; int_len++) { - uint_val /= 10; - } - } - - /* allocate the string */ - data = eval_node_ensure_val_buf(func_node, int_len + 1); - - /* add terminating NUL character */ - data[int_len] = 0; - - /* convert the number */ - - if (int_val == 0) { - data[0] = '0'; - } else { - int tmp; - if (int_val < 0) { - data[0] = '-'; /* preceding minus sign */ - uint_val = ((ulint) -int_val - 1) + 1; - } else { - uint_val = (ulint) int_val; - } - for (tmp = int_len; uint_val > 0; uint_val /= 10) { - data[--tmp] = (byte) - ('0' + (byte)(uint_val % 10)); - } - } - - dfield_set_len(que_node_get_val(func_node), int_len); - - return; - - } else if (func == PARS_TO_NUMBER_TOKEN) { - - int_val = atoi((char*) - dfield_get_data(que_node_get_val(arg1))); - - } else if (func == PARS_SYSDATE_TOKEN) { - int_val = (lint) ut_time(); - } else { - eval_predefined_2(func_node); - - return; - } - - eval_node_set_int_val(func_node, int_val); -} - -/*****************************************************************//** -Evaluates a function node. */ -UNIV_INTERN -void -eval_func( -/*======*/ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg; - ulint fclass; - ulint func; - - ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC); - - fclass = func_node->fclass; - func = func_node->func; - - arg = func_node->args; - - /* Evaluate first the argument list */ - while (arg) { - eval_exp(arg); - - /* The functions are not defined for SQL null argument - values, except for eval_cmp and notfound */ - - if (dfield_is_null(que_node_get_val(arg)) - && (fclass != PARS_FUNC_CMP) - && (func != PARS_NOTFOUND_TOKEN) - && (func != PARS_PRINTF_TOKEN)) { - ut_error; - } - - arg = que_node_get_next(arg); - } - - switch (fclass) { - case PARS_FUNC_CMP: - eval_cmp(func_node); - return; - case PARS_FUNC_ARITH: - eval_arith(func_node); - return; - case PARS_FUNC_AGGREGATE: - eval_aggregate(func_node); - return; - case PARS_FUNC_PREDEFINED: - switch (func) { - case PARS_NOTFOUND_TOKEN: - eval_notfound(func_node); - return; - case PARS_SUBSTR_TOKEN: - eval_substr(func_node); - return; - case PARS_REPLSTR_TOKEN: - eval_replstr(func_node); - return; - case PARS_INSTR_TOKEN: - eval_instr(func_node); - return; - case PARS_BINARY_TO_NUMBER_TOKEN: - eval_binary_to_number(func_node); - return; - case PARS_CONCAT_TOKEN: - eval_concat(func_node); - return; - case PARS_TO_BINARY_TOKEN: - eval_to_binary(func_node); - return; - default: - eval_predefined(func_node); - return; - } - case PARS_FUNC_LOGICAL: - eval_logical(func_node); - return; - } - - ut_error; -} diff --git a/storage/xtradb/eval/eval0proc.cc b/storage/xtradb/eval/eval0proc.cc deleted file mode 100644 index e6f3a32cd48..00000000000 --- a/storage/xtradb/eval/eval0proc.cc +++ /dev/null @@ -1,296 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file eval/eval0proc.cc -Executes SQL stored procedures and their control structures - -Created 1/20/1998 Heikki Tuuri -*******************************************************/ - -#include "eval0proc.h" - -#ifdef UNIV_NONINL -#include "eval0proc.ic" -#endif - -/**********************************************************************//** -Performs an execution step of an if-statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -if_step( -/*====*/ - que_thr_t* thr) /*!< in: query thread */ -{ - if_node_t* node; - elsif_node_t* elsif_node; - - ut_ad(thr); - - node = static_cast<if_node_t*>(thr->run_node); - ut_ad(que_node_get_type(node) == QUE_NODE_IF); - - if (thr->prev_node == que_node_get_parent(node)) { - - /* Evaluate the condition */ - - eval_exp(node->cond); - - if (eval_node_get_ibool_val(node->cond)) { - - /* The condition evaluated to TRUE: start execution - from the first statement in the statement list */ - - thr->run_node = node->stat_list; - - } else if (node->else_part) { - thr->run_node = node->else_part; - - } else if (node->elsif_list) { - elsif_node = node->elsif_list; - - for (;;) { - eval_exp(elsif_node->cond); - - if (eval_node_get_ibool_val( - elsif_node->cond)) { - - /* The condition evaluated to TRUE: - start execution from the first - statement in the statement list */ - - thr->run_node = elsif_node->stat_list; - - break; - } - - elsif_node = static_cast<elsif_node_t*>( - que_node_get_next(elsif_node)); - - if (elsif_node == NULL) { - thr->run_node = NULL; - - break; - } - } - } else { - thr->run_node = NULL; - } - } else { - /* Move to the next statement */ - ut_ad(que_node_get_next(thr->prev_node) == NULL); - - thr->run_node = NULL; - } - - if (thr->run_node == NULL) { - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} - -/**********************************************************************//** -Performs an execution step of a while-statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -while_step( -/*=======*/ - que_thr_t* thr) /*!< in: query thread */ -{ - while_node_t* node; - - ut_ad(thr); - - node = static_cast<while_node_t*>(thr->run_node); - ut_ad(que_node_get_type(node) == QUE_NODE_WHILE); - - ut_ad((thr->prev_node == que_node_get_parent(node)) - || (que_node_get_next(thr->prev_node) == NULL)); - - /* Evaluate the condition */ - - eval_exp(node->cond); - - if (eval_node_get_ibool_val(node->cond)) { - - /* The condition evaluated to TRUE: start execution - from the first statement in the statement list */ - - thr->run_node = node->stat_list; - } else { - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} - -/**********************************************************************//** -Performs an execution step of an assignment statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -assign_step( -/*========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - assign_node_t* node; - - ut_ad(thr); - - node = static_cast<assign_node_t*>(thr->run_node); - ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT); - - /* Evaluate the value to assign */ - - eval_exp(node->val); - - eval_node_copy_val(node->var->alias, node->val); - - thr->run_node = que_node_get_parent(node); - - return(thr); -} - -/**********************************************************************//** -Performs an execution step of a for-loop node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -for_step( -/*=====*/ - que_thr_t* thr) /*!< in: query thread */ -{ - for_node_t* node; - que_node_t* parent; - lint loop_var_value; - - ut_ad(thr); - - node = static_cast<for_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_FOR); - - parent = que_node_get_parent(node); - - if (thr->prev_node != parent) { - - /* Move to the next statement */ - thr->run_node = que_node_get_next(thr->prev_node); - - if (thr->run_node != NULL) { - - return(thr); - } - - /* Increment the value of loop_var */ - - loop_var_value = 1 + eval_node_get_int_val(node->loop_var); - } else { - /* Initialize the loop */ - - eval_exp(node->loop_start_limit); - eval_exp(node->loop_end_limit); - - loop_var_value = eval_node_get_int_val(node->loop_start_limit); - - node->loop_end_value - = (int) eval_node_get_int_val(node->loop_end_limit); - } - - /* Check if we should do another loop */ - - if (loop_var_value > node->loop_end_value) { - - /* Enough loops done */ - - thr->run_node = parent; - } else { - eval_node_set_int_val(node->loop_var, loop_var_value); - - thr->run_node = node->stat_list; - } - - return(thr); -} - -/**********************************************************************//** -Performs an execution step of an exit statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -exit_step( -/*======*/ - que_thr_t* thr) /*!< in: query thread */ -{ - exit_node_t* node; - que_node_t* loop_node; - - ut_ad(thr); - - node = static_cast<exit_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_EXIT); - - /* Loops exit by setting thr->run_node as the loop node's parent, so - find our containing loop node and get its parent. */ - - loop_node = que_node_get_containing_loop_node(node); - - /* If someone uses an EXIT statement outside of a loop, this will - trigger. */ - ut_a(loop_node); - - thr->run_node = que_node_get_parent(loop_node); - - return(thr); -} - -/**********************************************************************//** -Performs an execution step of a return-statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -return_step( -/*========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - return_node_t* node; - que_node_t* parent; - - ut_ad(thr); - - node = static_cast<return_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_RETURN); - - parent = node; - - while (que_node_get_type(parent) != QUE_NODE_PROC) { - - parent = que_node_get_parent(parent); - } - - ut_a(parent); - - thr->run_node = que_node_get_parent(parent); - - return(thr); -} diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc deleted file mode 100644 index e73d600d2ca..00000000000 --- a/storage/xtradb/fil/fil0crypt.cc +++ /dev/null @@ -1,2662 +0,0 @@ -/***************************************************************************** -Copyright (C) 2013, 2015, Google Inc. All Rights Reserved. -Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ -/**************************************************//** -@file fil0crypt.cc -Innodb file space encrypt/decrypt - -Created Jonas Oreland Google -Modified Jan Lindström jan.lindstrom@mariadb.com -*******************************************************/ - -#include "fil0fil.h" -#include "fil0crypt.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "mach0data.h" -#include "log0recv.h" -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "page0zip.h" -#include "ut0ut.h" -#include "btr0scrub.h" -#include "fsp0fsp.h" -#include "fil0pagecompress.h" -#include "ha_prototypes.h" // IB_LOG_ -#include <my_crypt.h> - -/** Mutex for keys */ -static ib_mutex_t fil_crypt_key_mutex; - -static bool fil_crypt_threads_inited = false; - -#ifdef UNIV_PFS_MUTEX -static mysql_pfs_key_t fil_crypt_key_mutex_key; -#endif - -/** Is encryption enabled/disabled */ -UNIV_INTERN ulong srv_encrypt_tables = 0; - -/** No of key rotation threads requested */ -UNIV_INTERN uint srv_n_fil_crypt_threads = 0; - -/** No of key rotation threads started */ -UNIV_INTERN uint srv_n_fil_crypt_threads_started = 0; - -/** At this age or older a space/page will be rotated */ -UNIV_INTERN uint srv_fil_crypt_rotate_key_age; - -/** Event to signal FROM the key rotation threads. */ -static os_event_t fil_crypt_event; - -/** Event to signal TO the key rotation threads. */ -UNIV_INTERN os_event_t fil_crypt_threads_event; - -/** Event for waking up threads throttle. */ -static os_event_t fil_crypt_throttle_sleep_event; - -/** Mutex for key rotation threads. */ -UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex; - -#ifdef UNIV_PFS_MUTEX -static mysql_pfs_key_t fil_crypt_threads_mutex_key; -#endif - -/** Variable ensuring only 1 thread at time does initial conversion */ -static bool fil_crypt_start_converting = false; - -/** Variables for throttling */ -UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop -static uint srv_alloc_time = 3; // allocate iops for 3s at a time -static uint n_fil_crypt_iops_allocated = 0; - -/** Variables for scrubbing */ -extern uint srv_background_scrub_data_interval; -extern uint srv_background_scrub_data_check_interval; - -#define DEBUG_KEYROTATION_THROTTLING 0 - -/** Statistics variables */ -static fil_crypt_stat_t crypt_stat; -static ib_mutex_t crypt_stat_mutex; - -#ifdef UNIV_PFS_MUTEX -static mysql_pfs_key_t fil_crypt_stat_mutex_key; - -/** - * key for crypt data mutex -*/ -UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key; -#endif - -/** Is background scrubbing enabled, defined on btr0scrub.cc */ -extern my_bool srv_background_scrub_data_uncompressed; -extern my_bool srv_background_scrub_data_compressed; - -static bool -fil_crypt_needs_rotation( - fil_encryption_t encrypt_mode, /*!< in: Encryption - mode */ - uint key_version, /*!< in: Key version */ - uint latest_key_version, /*!< in: Latest key version */ - uint rotate_key_age); /*!< in: When to rotate */ - -/********************************************************************* -Init space crypt */ -UNIV_INTERN -void -fil_space_crypt_init() -{ - mutex_create(fil_crypt_key_mutex_key, - &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK); - - fil_crypt_throttle_sleep_event = os_event_create(); - - mutex_create(fil_crypt_stat_mutex_key, - &crypt_stat_mutex, SYNC_NO_ORDER_CHECK); - - memset(&crypt_stat, 0, sizeof(crypt_stat)); -} - -/********************************************************************* -Cleanup space crypt */ -UNIV_INTERN -void -fil_space_crypt_cleanup() -{ - os_event_free(fil_crypt_throttle_sleep_event); - fil_crypt_throttle_sleep_event = NULL; - mutex_free(&fil_crypt_key_mutex); - mutex_free(&crypt_stat_mutex); -} - -/** -Get latest key version from encryption plugin. -@return key version or ENCRYPTION_KEY_VERSION_INVALID */ -uint -fil_space_crypt_t::key_get_latest_version(void) -{ - uint key_version = key_found; - - if (is_key_found()) { - key_version = encryption_key_get_latest_version(key_id); - srv_stats.n_key_requests.inc(); - key_found = key_version; - } - - return key_version; -} - -/****************************************************************** -Get the latest(key-version), waking the encrypt thread, if needed -@param[in,out] crypt_data Crypt data */ -static inline -uint -fil_crypt_get_latest_key_version( - fil_space_crypt_t* crypt_data) -{ - ut_ad(crypt_data != NULL); - - uint key_version = crypt_data->key_get_latest_version(); - - if (crypt_data->is_key_found()) { - - if (fil_crypt_needs_rotation(crypt_data->encryption, - crypt_data->min_key_version, - key_version, - srv_fil_crypt_rotate_key_age)) { - os_event_set(fil_crypt_threads_event); - } - } - - return key_version; -} - -/****************************************************************** -Mutex helper for crypt_data->scheme */ -void -crypt_data_scheme_locker( -/*=====================*/ - st_encryption_scheme* scheme, - int exit) -{ - fil_space_crypt_t* crypt_data = - static_cast<fil_space_crypt_t*>(scheme); - - if (exit) { - mutex_exit(&crypt_data->mutex); - } else { - mutex_enter(&crypt_data->mutex); - } -} - -/****************************************************************** -Create a fil_space_crypt_t object -@param[in] type CRYPT_SCHEME_UNENCRYPTE or - CRYPT_SCHEME_1 -@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or - FIL_ENCRYPTION_ON or - FIL_ENCRYPTION_OFF -@param[in] min_key_version key_version or 0 -@param[in] key_id Used key id -@return crypt object */ -static -fil_space_crypt_t* -fil_space_create_crypt_data( - uint type, - fil_encryption_t encrypt_mode, - uint min_key_version, - uint key_id) -{ - void* buf = mem_zalloc(sizeof(fil_space_crypt_t)); - fil_space_crypt_t* crypt_data = NULL; - - if (buf) { - crypt_data = new(buf) - fil_space_crypt_t( - type, - min_key_version, - key_id, - encrypt_mode); - } - - return crypt_data; -} - -/****************************************************************** -Create a fil_space_crypt_t object -@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or - FIL_ENCRYPTION_ON or - FIL_ENCRYPTION_OFF - -@param[in] key_id Encryption key id -@return crypt object */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_create_crypt_data( - fil_encryption_t encrypt_mode, - uint key_id) -{ - return (fil_space_create_crypt_data(0, encrypt_mode, 0, key_id)); -} - -/****************************************************************** -Merge fil_space_crypt_t object -@param[in,out] dst Destination cryp data -@param[in] src Source crypt data */ -UNIV_INTERN -void -fil_space_merge_crypt_data( - fil_space_crypt_t* dst, - const fil_space_crypt_t* src) -{ - mutex_enter(&dst->mutex); - - /* validate that they are mergeable */ - ut_a(src->type == CRYPT_SCHEME_UNENCRYPTED || - src->type == CRYPT_SCHEME_1); - - ut_a(dst->type == CRYPT_SCHEME_UNENCRYPTED || - dst->type == CRYPT_SCHEME_1); - - dst->encryption = src->encryption; - dst->type = src->type; - dst->min_key_version = src->min_key_version; - dst->keyserver_requests += src->keyserver_requests; - - mutex_exit(&dst->mutex); -} - -/****************************************************************** -Read crypt data from a page (0) -@param[in] space space_id -@param[in] page Page 0 -@param[in] offset Offset to crypt data -@return crypt data from page 0 or NULL. */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_read_crypt_data( - ulint space, - const byte* page, - ulint offset) -{ - if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) { - /* Crypt data is not stored. */ - return NULL; - } - - ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0); - - if (! (type == CRYPT_SCHEME_UNENCRYPTED || - type == CRYPT_SCHEME_1)) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Found non sensible crypt scheme: " ULINTPF " for space " ULINTPF - " offset: " ULINTPF " bytes: " - "[ %.2x %.2x %.2x %.2x %.2x %.2x ].", - type, space, offset, - page[offset + 0 + MAGIC_SZ], - page[offset + 1 + MAGIC_SZ], - page[offset + 2 + MAGIC_SZ], - page[offset + 3 + MAGIC_SZ], - page[offset + 4 + MAGIC_SZ], - page[offset + 5 + MAGIC_SZ]); - ut_error; - } - - fil_space_crypt_t* crypt_data; - ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1); - - if (! (iv_length == sizeof(crypt_data->iv))) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Found non sensible iv length: %lu for space %lu " - " offset: %lu type: %lu bytes: " - "[ %.2x %.2x %.2x %.2x %.2x %.2x ].", - iv_length, space, offset, type, - page[offset + 0 + MAGIC_SZ], - page[offset + 1 + MAGIC_SZ], - page[offset + 2 + MAGIC_SZ], - page[offset + 3 + MAGIC_SZ], - page[offset + 4 + MAGIC_SZ], - page[offset + 5 + MAGIC_SZ]); - ut_error; - } - - uint min_key_version = mach_read_from_4 - (page + offset + MAGIC_SZ + 2 + iv_length); - - uint key_id = mach_read_from_4 - (page + offset + MAGIC_SZ + 2 + iv_length + 4); - - fil_encryption_t encryption = (fil_encryption_t)mach_read_from_1( - page + offset + MAGIC_SZ + 2 + iv_length + 8); - - crypt_data = fil_space_create_crypt_data(encryption, key_id); - /* We need to overwrite these as above function will initialize - members */ - crypt_data->type = type; - crypt_data->min_key_version = min_key_version; - crypt_data->page0_offset = offset; - memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length); - - return crypt_data; -} - -/****************************************************************** -Free a crypt data object -@param[in,out] crypt_data crypt data to be freed */ -UNIV_INTERN -void -fil_space_destroy_crypt_data( - fil_space_crypt_t **crypt_data) -{ - if (crypt_data != NULL && (*crypt_data) != NULL) { - fil_space_crypt_t* c = *crypt_data; - c->~fil_space_crypt_t(); - mem_free(c); - *crypt_data = NULL; - } -} - -/****************************************************************** -Write crypt data to a page (0) -@param[in,out] page0 Page 0 where to write -@param[in,out] mtr Minitransaction */ -UNIV_INTERN -void -fil_space_crypt_t::write_page0( - byte* page, - mtr_t* mtr) -{ - ulint space_id = mach_read_from_4( - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - const uint len = sizeof(iv); - ulint zip_size = fsp_header_get_zip_size(page); - const ulint offset = fsp_header_get_crypt_offset(zip_size); - page0_offset = offset; - - /* - redo log this as bytewise updates to page 0 - followed by an MLOG_FILE_WRITE_CRYPT_DATA - (that will during recovery update fil_space_t) - */ - mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr); - mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr); - mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr); - mlog_write_string(page + offset + MAGIC_SZ + 2, iv, len, - mtr); - mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version, - MLOG_4BYTES, mtr); - mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len + 4, key_id, - MLOG_4BYTES, mtr); - mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len + 8, encryption, - MLOG_1BYTE, mtr); - - byte* log_ptr = mlog_open(mtr, 11 + 17 + len); - - if (log_ptr != NULL) { - log_ptr = mlog_write_initial_log_record_fast( - page, - MLOG_FILE_WRITE_CRYPT_DATA, - log_ptr, mtr); - mach_write_to_4(log_ptr, space_id); - log_ptr += 4; - mach_write_to_2(log_ptr, offset); - log_ptr += 2; - mach_write_to_1(log_ptr, type); - log_ptr += 1; - mach_write_to_1(log_ptr, len); - log_ptr += 1; - mach_write_to_4(log_ptr, min_key_version); - log_ptr += 4; - mach_write_to_4(log_ptr, key_id); - log_ptr += 4; - mach_write_to_1(log_ptr, encryption); - log_ptr += 1; - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, iv, len); - } -} - -/****************************************************************** -Set crypt data for a tablespace -@param[in,out] space Tablespace -@param[in,out] crypt_data Crypt data to be set -@return crypt_data in tablespace */ -static -fil_space_crypt_t* -fil_space_set_crypt_data( - fil_space_t* space, - fil_space_crypt_t* crypt_data) -{ - fil_space_crypt_t* free_crypt_data = NULL; - fil_space_crypt_t* ret_crypt_data = NULL; - - /* Provided space is protected using fil_space_acquire() - from concurrent operations. */ - if (space->crypt_data != NULL) { - /* There is already crypt data present, - merge new crypt_data */ - fil_space_merge_crypt_data(space->crypt_data, - crypt_data); - ret_crypt_data = space->crypt_data; - free_crypt_data = crypt_data; - } else { - space->crypt_data = crypt_data; - ret_crypt_data = space->crypt_data; - } - - if (free_crypt_data != NULL) { - /* there was already crypt data present and the new crypt - * data provided as argument to this function has been merged - * into that => free new crypt data - */ - fil_space_destroy_crypt_data(&free_crypt_data); - } - - return ret_crypt_data; -} - -/****************************************************************** -Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry -@param[in] ptr Log entry start -@param[in] end_ptr Log entry end -@param[in] block buffer block -@return position on log buffer */ -UNIV_INTERN -byte* -fil_parse_write_crypt_data( - byte* ptr, - const byte* end_ptr, - const buf_block_t* block, - dberr_t* err) -{ - /* check that redo log entry is complete */ - size_t entry_size = - 4 + // size of space_id - 2 + // size of offset - 1 + // size of type - 1 + // size of iv-len - 4 + // size of min_key_version - 4 + // size of key_id - 1; // fil_encryption_t - - *err = DB_SUCCESS; - - if (ptr + entry_size > end_ptr) { - return NULL; - } - - ulint space_id = mach_read_from_4(ptr); - ptr += 4; - uint offset = mach_read_from_2(ptr); - ptr += 2; - uint type = mach_read_from_1(ptr); - ptr += 1; - size_t len = mach_read_from_1(ptr); - ptr += 1; - - ut_a(type == CRYPT_SCHEME_UNENCRYPTED || - type == CRYPT_SCHEME_1); // only supported - - ut_a(len == CRYPT_SCHEME_1_IV_LEN); // only supported - uint min_key_version = mach_read_from_4(ptr); - ptr += 4; - - uint key_id = mach_read_from_4(ptr); - ptr += 4; - - fil_encryption_t encryption = (fil_encryption_t)mach_read_from_1(ptr); - ptr +=1; - - if (ptr + len > end_ptr) { - return NULL; - } - - fil_space_crypt_t* crypt_data = fil_space_create_crypt_data(encryption, key_id); - /* Need to overwrite these as above will initialize fields. */ - crypt_data->page0_offset = offset; - crypt_data->min_key_version = min_key_version; - crypt_data->encryption = encryption; - memcpy(crypt_data->iv, ptr, len); - ptr += len; - - /* update fil_space memory cache with crypt_data */ - if (fil_space_t* space = fil_space_acquire_silent(space_id)) { - crypt_data = fil_space_set_crypt_data(space, crypt_data); - fil_space_release(space); - /* Check is used key found from encryption plugin */ - if (crypt_data->should_encrypt() - && !crypt_data->is_key_found()) { - *err = DB_DECRYPTION_FAILED; - } - } else { - fil_space_destroy_crypt_data(&crypt_data); - } - - return ptr; -} - -/****************************************************************** -Encrypt a buffer -@param[in,out] crypt_data Crypt data -@param[in] space space_id -@param[in] offset Page offset -@param[in] lsn Log sequence number -@param[in] src_frame Page to encrypt -@param[in] zip_size Compressed size or 0 -@param[in,out] dst_frame Output buffer -@return encrypted buffer or NULL */ -UNIV_INTERN -byte* -fil_encrypt_buf( - fil_space_crypt_t* crypt_data, - ulint space, - ulint offset, - lsn_t lsn, - const byte* src_frame, - ulint zip_size, - byte* dst_frame) -{ - ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; - uint key_version = fil_crypt_get_latest_key_version(crypt_data); - - if (key_version == ENCRYPTION_KEY_VERSION_INVALID) { - ib_logf(IB_LOG_LEVEL_FATAL, - "Unknown key id %u. Can't continue!\n", - crypt_data->key_id); - ut_error; - } - - ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); - ibool page_compressed = (orig_page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); - ulint header_len = FIL_PAGE_DATA; - - if (page_compressed) { - header_len += (FIL_PAGE_COMPRESSED_SIZE + FIL_PAGE_COMPRESSION_METHOD_SIZE); - } - - /* FIL page header is not encrypted */ - memcpy(dst_frame, src_frame, header_len); - - /* Store key version */ - mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, key_version); - - /* Calculate the start offset in a page */ - ulint unencrypted_bytes = header_len + FIL_PAGE_DATA_END; - ulint srclen = page_size - unencrypted_bytes; - const byte* src = src_frame + header_len; - byte* dst = dst_frame + header_len; - uint32 dstlen = 0; - - if (page_compressed) { - srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA); - } - - int rc = encryption_scheme_encrypt(src, srclen, dst, &dstlen, - crypt_data, key_version, - space, offset, lsn); - - if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) { - ib_logf(IB_LOG_LEVEL_FATAL, - "Unable to encrypt data-block " - " src: %p srclen: %ld buf: %p buflen: %d." - " return-code: %d. Can't continue!\n", - src, (long)srclen, - dst, dstlen, rc); - ut_error; - } - - /* For compressed tables we do not store the FIL header because - the whole page is not stored to the disk. In compressed tables only - the FIL header + compressed (and now encrypted) payload alligned - to sector boundary is written. */ - if (!page_compressed) { - /* FIL page trailer is also not encrypted */ - memcpy(dst_frame + page_size - FIL_PAGE_DATA_END, - src_frame + page_size - FIL_PAGE_DATA_END, - FIL_PAGE_DATA_END); - } else { - /* Clean up rest of buffer */ - memset(dst_frame+header_len+srclen, 0, page_size - (header_len+srclen)); - } - - /* handle post encryption checksum */ - ib_uint32_t checksum = 0; - - checksum = fil_crypt_calculate_checksum(zip_size, dst_frame); - - // store the post-encryption checksum after the key-version - mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, checksum); - - ut_ad(fil_space_verify_crypt_checksum(dst_frame, zip_size, NULL, offset)); - - srv_stats.pages_encrypted.inc(); - - return dst_frame; -} - -/****************************************************************** -Encrypt a page - -@param[in] space Tablespace -@param[in] offset Page offset -@param[in] lsn Log sequence number -@param[in] src_frame Page to encrypt -@param[in,out] dst_frame Output buffer -@return encrypted buffer or NULL */ -UNIV_INTERN -byte* -fil_space_encrypt( - const fil_space_t* space, - ulint offset, - lsn_t lsn, - byte* src_frame, - byte* dst_frame) -{ - ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); - - if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR - || orig_page_type==FIL_PAGE_TYPE_XDES) { - /* File space header or extent descriptor do not need to be - encrypted. */ - return (src_frame); - } - - if (!space->crypt_data || !space->crypt_data->is_encrypted()) { - return (src_frame); - } - - fil_space_crypt_t* crypt_data = space->crypt_data; - ut_ad(space->n_pending_ios > 0); - ulint zip_size = fsp_flags_get_zip_size(space->flags); - byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn, src_frame, zip_size, dst_frame); - -#ifdef UNIV_DEBUG - if (tmp) { - /* Verify that encrypted buffer is not corrupted */ - byte* tmp_mem = (byte *)malloc(UNIV_PAGE_SIZE); - dberr_t err = DB_SUCCESS; - byte* src = src_frame; - bool page_compressed_encrypted = (mach_read_from_2(tmp+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); - byte* comp_mem = NULL; - byte* uncomp_mem = NULL; - ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE; - - if (page_compressed_encrypted) { - comp_mem = (byte *)malloc(UNIV_PAGE_SIZE); - uncomp_mem = (byte *)malloc(UNIV_PAGE_SIZE); - memcpy(comp_mem, src_frame, UNIV_PAGE_SIZE); - fil_decompress_page(uncomp_mem, comp_mem, - srv_page_size, NULL); - src = uncomp_mem; - } - - bool corrupted1 = buf_page_is_corrupted(true, src, zip_size, space); - bool ok = fil_space_decrypt(crypt_data, tmp_mem, size, tmp, &err); - - /* Need to decompress the page if it was also compressed */ - if (page_compressed_encrypted) { - memcpy(comp_mem, tmp_mem, UNIV_PAGE_SIZE); - fil_decompress_page(tmp_mem, comp_mem, - srv_page_size, NULL); - } - - bool corrupted = buf_page_is_corrupted(true, tmp_mem, zip_size, space); - memcpy(tmp_mem+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, src+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 8); - bool different = memcmp(src, tmp_mem, size); - - if (!ok || corrupted || corrupted1 || err != DB_SUCCESS || different) { - fprintf(stderr, "ok %d corrupted %d corrupted1 %d err %d different %d\n", - ok , corrupted, corrupted1, err, different); - fprintf(stderr, "src_frame\n"); - buf_page_print(src_frame, zip_size, BUF_PAGE_PRINT_NO_CRASH); - fprintf(stderr, "encrypted_frame\n"); - buf_page_print(tmp, zip_size, BUF_PAGE_PRINT_NO_CRASH); - fprintf(stderr, "decrypted_frame\n"); - buf_page_print(tmp_mem, zip_size, 0); - } - - free(tmp_mem); - - if (comp_mem) { - free(comp_mem); - } - - if (uncomp_mem) { - free(uncomp_mem); - } - } - -#endif /* UNIV_DEBUG */ - - return tmp; -} - -/****************************************************************** -Decrypt a page -@param[in] crypt_data crypt_data -@param[in] tmp_frame Temporary buffer -@param[in] page_size Page size -@param[in,out] src_frame Page to decrypt -@param[out] err DB_SUCCESS or DB_DECRYPTION_FAILED -@return true if page decrypted, false if not.*/ -UNIV_INTERN -bool -fil_space_decrypt( - fil_space_crypt_t* crypt_data, - byte* tmp_frame, - ulint page_size, - byte* src_frame, - dberr_t* err) -{ - ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE); - uint key_version = mach_read_from_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); - ulint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET); - ulint space = mach_read_from_4(src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN); - - *err = DB_SUCCESS; - - if (key_version == ENCRYPTION_KEY_NOT_ENCRYPTED) { - return false; - } - - ut_a(crypt_data != NULL && crypt_data->is_encrypted()); - - /* read space & lsn */ - ulint header_len = FIL_PAGE_DATA; - - if (page_compressed) { - header_len += (FIL_PAGE_COMPRESSED_SIZE + FIL_PAGE_COMPRESSION_METHOD_SIZE); - } - - /* Copy FIL page header, it is not encrypted */ - memcpy(tmp_frame, src_frame, header_len); - - /* Calculate the offset where decryption starts */ - const byte* src = src_frame + header_len; - byte* dst = tmp_frame + header_len; - uint32 dstlen = 0; - ulint srclen = page_size - (header_len + FIL_PAGE_DATA_END); - - if (page_compressed) { - srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA); - } - - int rc = encryption_scheme_decrypt(src, srclen, dst, &dstlen, - crypt_data, key_version, - space, offset, lsn); - - if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) { - - if (rc == -1) { - *err = DB_DECRYPTION_FAILED; - return false; - } - - ib_logf(IB_LOG_LEVEL_FATAL, - "Unable to decrypt data-block " - " src: %p srclen: %ld buf: %p buflen: %d." - " return-code: %d. Can't continue!\n", - src, (long)srclen, - dst, dstlen, rc); - ut_error; - } - - /* For compressed tables we do not store the FIL header because - the whole page is not stored to the disk. In compressed tables only - the FIL header + compressed (and now encrypted) payload alligned - to sector boundary is written. */ - if (!page_compressed) { - /* Copy FIL trailer */ - memcpy(tmp_frame + page_size - FIL_PAGE_DATA_END, - src_frame + page_size - FIL_PAGE_DATA_END, - FIL_PAGE_DATA_END); - } - - srv_stats.pages_decrypted.inc(); - - return true; /* page was decrypted */ -} - -/****************************************************************** -Decrypt a page -@param[in] space Tablespace -@param[in] tmp_frame Temporary buffer used for decrypting -@param[in] page_size Page size -@param[in,out] src_frame Page to decrypt -@param[out] decrypted true if page was decrypted -@return decrypted page, or original not encrypted page if decryption is -not needed.*/ -UNIV_INTERN -byte* -fil_space_decrypt( - const fil_space_t* space, - byte* tmp_frame, - byte* src_frame, - bool* decrypted) -{ - dberr_t err = DB_SUCCESS; - byte* res = NULL; - ulint zip_size = fsp_flags_get_zip_size(space->flags); - ulint size = zip_size ? zip_size : UNIV_PAGE_SIZE; - *decrypted = false; - - ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted()); - ut_ad(space->n_pending_ios > 0); - - bool encrypted = fil_space_decrypt( - space->crypt_data, - tmp_frame, - size, - src_frame, - &err); - - if (err == DB_SUCCESS) { - if (encrypted) { - *decrypted = true; - /* Copy the decrypted page back to page buffer, not - really any other options. */ - memcpy(src_frame, tmp_frame, size); - } - - res = src_frame; - } - - return res; -} - -/****************************************************************** -Calculate post encryption checksum -@param[in] zip_size zip_size or 0 -@param[in] dst_frame Block where checksum is calculated -@return page checksum -not needed. */ -UNIV_INTERN -ulint -fil_crypt_calculate_checksum( - ulint zip_size, - const byte* dst_frame) -{ - ib_uint32_t checksum = 0; - - /* For encrypted tables we use only crc32 and strict_crc32 */ - if (zip_size == 0) { - checksum = buf_calc_page_crc32(dst_frame); - } else { - checksum = page_zip_calc_checksum(dst_frame, zip_size, - SRV_CHECKSUM_ALGORITHM_CRC32); - } - - return checksum; -} - -/********************************************************************* -Verify that post encryption checksum match calculated checksum. -This function should be called only if tablespace contains crypt_data -metadata (this is strong indication that tablespace is encrypted). -Function also verifies that traditional checksum does not match -calculated checksum as if it does page could be valid unencrypted, -encrypted, or corrupted. - -@param[in] page Page to verify -@param[in] zip_size zip size -@param[in] space Tablespace -@param[in] pageno Page no -@return true if page is encrypted AND OK, false otherwise */ -UNIV_INTERN -bool -fil_space_verify_crypt_checksum( - byte* page, - ulint zip_size, - const fil_space_t* space, - ulint pageno) -{ - uint key_version = mach_read_from_4(page+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - - /* If page is not encrypted, return false */ - if (key_version == 0) { - return(false); - } - - /* Read stored post encryption checksum. */ - ib_uint32_t checksum = mach_read_from_4( - page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); - - /* Declare empty pages non-corrupted */ - if (checksum == 0 - && *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_LSN) == 0 - && buf_page_is_zeroes(page, zip_size)) { - return(true); - } - - /* Compressed and encrypted pages do not have checksum. Assume not - corrupted. Page verification happens after decompression in - buf_page_io_complete() using buf_page_is_corrupted(). */ - if (mach_read_from_2(page+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { - return (true); - } - - ib_uint32_t cchecksum1 = 0; - ib_uint32_t cchecksum2 = 0; - - /* Calculate checksums */ - if (zip_size) { - cchecksum1 = page_zip_calc_checksum( - page, zip_size, SRV_CHECKSUM_ALGORITHM_CRC32); - - if(cchecksum1 != checksum) { - cchecksum2 = page_zip_calc_checksum( - page, zip_size, - SRV_CHECKSUM_ALGORITHM_INNODB); - } - } else { - cchecksum1 = buf_calc_page_crc32(page); - - if (cchecksum1 != checksum) { - cchecksum2 = (ib_uint32_t) buf_calc_page_new_checksum( - page); - } - } - - /* If stored checksum matches one of the calculated checksums - page is not corrupted. */ - - bool encrypted = (checksum == cchecksum1 || checksum == cchecksum2 - || checksum == BUF_NO_CHECKSUM_MAGIC); - - /* MySQL 5.6 and MariaDB 10.0 and 10.1 will write an LSN to the - first page of each system tablespace file at - FIL_PAGE_FILE_FLUSH_LSN offset. On other pages and in other files, - the field might have been uninitialized until MySQL 5.5. In MySQL 5.7 - (and MariaDB Server 10.2.2) WL#7990 stopped writing the field for other - than page 0 of the system tablespace. - - Starting from MariaDB 10.1 the field has been repurposed for - encryption key_version. - - Starting with MySQL 5.7 (and MariaDB Server 10.2), the - field has been repurposed for SPATIAL INDEX pages for - FIL_RTREE_SPLIT_SEQ_NUM. - - Note that FIL_PAGE_FILE_FLUSH_LSN is not included in the InnoDB page - checksum. - - Thus, FIL_PAGE_FILE_FLUSH_LSN could contain any value. While the - field would usually be 0 for pages that are not encrypted, we cannot - assume that a nonzero value means that the page is encrypted. - Therefore we must validate the page both as encrypted and unencrypted - when FIL_PAGE_FILE_FLUSH_LSN does not contain 0. - */ - - ulint checksum1 = mach_read_from_4( - page + FIL_PAGE_SPACE_OR_CHKSUM); - - ulint checksum2 = checksum1; - - bool valid; - - if (zip_size) { - valid = (checksum1 == cchecksum1); - } else { - checksum1 = mach_read_from_4( - page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); - valid = (buf_page_is_checksum_valid_crc32(page,checksum1,checksum2) - || buf_page_is_checksum_valid_innodb(page,checksum1, checksum2)); - } - - if (encrypted && valid) { - /* If page is encrypted and traditional checksums match, - page could be still encrypted, or not encrypted and valid or - corrupted. */ - ib_logf(IB_LOG_LEVEL_ERROR, - " Page %lu in space %s (%lu) maybe corrupted." - " Post encryption checksum %u stored [%lu:%lu] key_version %u", - pageno, - space ? space->name : "N/A", - mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID), - checksum, checksum1, checksum2, key_version); - encrypted = false; - } - - return(encrypted); -} - -/***********************************************************************/ - -/** A copy of global key state */ -struct key_state_t { - key_state_t() : key_id(0), key_version(0), - rotate_key_age(srv_fil_crypt_rotate_key_age) {} - bool operator==(const key_state_t& other) const { - return key_version == other.key_version && - rotate_key_age == other.rotate_key_age; - } - uint key_id; - uint key_version; - uint rotate_key_age; -}; - -/*********************************************************************** -Copy global key state -@param[in,out] new_state key state -@param[in] crypt_data crypt data */ -static void -fil_crypt_get_key_state( - key_state_t* new_state, - fil_space_crypt_t* crypt_data) -{ - if (srv_encrypt_tables) { - new_state->key_version = crypt_data->key_get_latest_version(); - new_state->rotate_key_age = srv_fil_crypt_rotate_key_age; - - ut_a(new_state->key_version != ENCRYPTION_KEY_NOT_ENCRYPTED); - } else { - new_state->key_version = 0; - new_state->rotate_key_age = 0; - } -} - -/*********************************************************************** -Check if a key needs rotation given a key_state -@param[in] encrypt_mode Encryption mode -@param[in] key_version Current key version -@param[in] latest_key_version Latest key version -@param[in] rotate_key_age when to rotate -@return true if key needs rotation, false if not */ -static bool -fil_crypt_needs_rotation( - fil_encryption_t encrypt_mode, - uint key_version, - uint latest_key_version, - uint rotate_key_age) -{ - if (key_version == ENCRYPTION_KEY_VERSION_INVALID) { - return false; - } - - if (key_version == 0 && latest_key_version != 0) { - /* this is rotation unencrypted => encrypted - * ignore rotate_key_age */ - return true; - } - - if (latest_key_version == 0 && key_version != 0) { - if (encrypt_mode == FIL_ENCRYPTION_DEFAULT) { - /* this is rotation encrypted => unencrypted */ - return true; - } - return false; - } - - /* this is rotation encrypted => encrypted, - * only reencrypt if key is sufficiently old */ - if (key_version + rotate_key_age < latest_key_version) { - return true; - } - - return false; -} - -/** Read page 0 and possible crypt data from there. -@param[in,out] space Tablespace */ -static inline -void -fil_crypt_read_crypt_data(fil_space_t* space) -{ - if (space->crypt_data || space->size) { - /* The encryption metadata has already been read, or - the tablespace is not encrypted and the file has been - opened already. */ - return; - } - - mtr_t mtr; - mtr_start(&mtr); - ulint zip_size = fsp_flags_get_zip_size(space->flags); - ulint offset = fsp_header_get_crypt_offset(zip_size); - if (buf_block_t* block = buf_page_get(space->id, zip_size, 0, - RW_S_LATCH, &mtr)) { - mutex_enter(&fil_system->mutex); - if (!space->crypt_data) { - space->crypt_data = fil_space_read_crypt_data( - space->id, block->frame, offset); - } - mutex_exit(&fil_system->mutex); - } - - mtr_commit(&mtr); -} - -/*********************************************************************** -Start encrypting a space -@param[in,out] space Tablespace -@return true if a recheck is needed */ -static -bool -fil_crypt_start_encrypting_space( - fil_space_t* space) -{ - bool recheck = false; - - mutex_enter(&fil_crypt_threads_mutex); - - fil_space_crypt_t *crypt_data = space->crypt_data; - - /* If space is not encrypted and encryption is not enabled, then - do not continue encrypting the space. */ - if (!crypt_data && !srv_encrypt_tables) { - mutex_exit(&fil_crypt_threads_mutex); - return false; - } - - if (crypt_data != NULL || fil_crypt_start_converting) { - /* someone beat us to it */ - if (fil_crypt_start_converting) { - recheck = true; - } - - mutex_exit(&fil_crypt_threads_mutex); - return recheck; - } - - /* NOTE: we need to write and flush page 0 before publishing - * the crypt data. This so that after restart there is no - * risk of finding encrypted pages without having - * crypt data in page 0 */ - - /* 1 - create crypt data */ - crypt_data = fil_space_create_crypt_data(FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); - - if (crypt_data == NULL) { - mutex_exit(&fil_crypt_threads_mutex); - return false; - } - - crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; - crypt_data->min_key_version = 0; // all pages are unencrypted - crypt_data->rotate_state.start_time = time(0); - crypt_data->rotate_state.starting = true; - crypt_data->rotate_state.active_threads = 1; - - mutex_enter(&crypt_data->mutex); - crypt_data = fil_space_set_crypt_data(space, crypt_data); - mutex_exit(&crypt_data->mutex); - - fil_crypt_start_converting = true; - mutex_exit(&fil_crypt_threads_mutex); - - do - { - mtr_t mtr; - mtr_start(&mtr); - - /* 2 - get page 0 */ - ulint zip_size = fsp_flags_get_zip_size(space->flags); - buf_block_t* block = buf_page_get_gen(space->id, zip_size, 0, - RW_X_LATCH, - NULL, - BUF_GET, - __FILE__, __LINE__, - &mtr); - - - /* 3 - write crypt data to page 0 */ - byte* frame = buf_block_get_frame(block); - crypt_data->type = CRYPT_SCHEME_1; - crypt_data->write_page0(frame, &mtr); - mtr_commit(&mtr); - - /* record lsn of update */ - lsn_t end_lsn = mtr.end_lsn; - - /* 4 - sync tablespace before publishing crypt data */ - - bool success = false; - ulint sum_pages = 0; - - do { - ulint n_pages = 0; - success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages); - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - sum_pages += n_pages; - } while (!success); - - /* 5 - publish crypt data */ - mutex_enter(&fil_crypt_threads_mutex); - mutex_enter(&crypt_data->mutex); - crypt_data->type = CRYPT_SCHEME_1; - ut_a(crypt_data->rotate_state.active_threads == 1); - crypt_data->rotate_state.active_threads = 0; - crypt_data->rotate_state.starting = false; - - fil_crypt_start_converting = false; - mutex_exit(&crypt_data->mutex); - mutex_exit(&fil_crypt_threads_mutex); - - return recheck; - } while (0); - - mutex_enter(&crypt_data->mutex); - ut_a(crypt_data->rotate_state.active_threads == 1); - crypt_data->rotate_state.active_threads = 0; - mutex_exit(&crypt_data->mutex); - - mutex_enter(&fil_crypt_threads_mutex); - fil_crypt_start_converting = false; - mutex_exit(&fil_crypt_threads_mutex); - - return recheck; -} - -/** State of a rotation thread */ -struct rotate_thread_t { - explicit rotate_thread_t(uint no) { - memset(this, 0, sizeof(* this)); - thread_no = no; - first = true; - estimated_max_iops = 20; - } - - uint thread_no; - bool first; /*!< is position before first space */ - fil_space_t* space; /*!< current space or NULL */ - ulint offset; /*!< current offset */ - ulint batch; /*!< #pages to rotate */ - uint min_key_version_found;/*!< min key version found but not rotated */ - lsn_t end_lsn; /*!< max lsn when rotating this space */ - - uint estimated_max_iops; /*!< estimation of max iops */ - uint allocated_iops; /*!< allocated iops */ - uint cnt_waited; /*!< #times waited during this slot */ - uint sum_waited_us; /*!< wait time during this slot */ - - fil_crypt_stat_t crypt_stat; // statistics - - btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions - * when iterating pages of tablespace */ - - /** @return whether this thread should terminate */ - bool should_shutdown() const { - switch (srv_shutdown_state) { - case SRV_SHUTDOWN_NONE: - return thread_no >= srv_n_fil_crypt_threads; - case SRV_SHUTDOWN_CLEANUP: - return true; - case SRV_SHUTDOWN_FLUSH_PHASE: - case SRV_SHUTDOWN_LAST_PHASE: - case SRV_SHUTDOWN_EXIT_THREADS: - break; - } - ut_ad(0); - return true; - } -}; - -/*********************************************************************** -Check if space needs rotation given a key_state -@param[in,out] state Key rotation state -@param[in,out] key_state Key state -@param[in,out] recheck needs recheck ? -@return true if space needs key rotation */ -static -bool -fil_crypt_space_needs_rotation( - rotate_thread_t* state, - key_state_t* key_state, - bool* recheck) -{ - fil_space_t* space = state->space; - - /* Make sure that tablespace is normal tablespace */ - if (space->purpose != FIL_TABLESPACE) { - return false; - } - - ut_ad(space->n_pending_ops > 0); - - fil_space_crypt_t *crypt_data = space->crypt_data; - - if (crypt_data == NULL) { - /** - * space has no crypt data - * start encrypting it... - */ - *recheck = fil_crypt_start_encrypting_space(space); - crypt_data = space->crypt_data; - - if (crypt_data == NULL) { - return false; - } - - crypt_data->key_get_latest_version(); - } - - /* If used key_id is not found from encryption plugin we can't - continue to rotate the tablespace */ - if (!crypt_data->is_key_found()) { - return false; - } - - mutex_enter(&crypt_data->mutex); - - do { - /* prevent threads from starting to rotate space */ - if (crypt_data->rotate_state.starting) { - /* recheck this space later */ - *recheck = true; - break; - } - - /* prevent threads from starting to rotate space */ - if (space->is_stopping()) { - break; - } - - if (crypt_data->rotate_state.flushing) { - break; - } - - /* No need to rotate space if encryption is disabled */ - if (crypt_data->not_encrypted()) { - break; - } - - if (crypt_data->key_id != key_state->key_id) { - key_state->key_id= crypt_data->key_id; - fil_crypt_get_key_state(key_state, crypt_data); - } - - bool need_key_rotation = fil_crypt_needs_rotation( - crypt_data->encryption, - crypt_data->min_key_version, - key_state->key_version, key_state->rotate_key_age); - - crypt_data->rotate_state.scrubbing.is_active = - btr_scrub_start_space(space->id, &state->scrub_data); - - time_t diff = time(0) - crypt_data->rotate_state.scrubbing. - last_scrub_completed; - - bool need_scrubbing = - (srv_background_scrub_data_uncompressed || - srv_background_scrub_data_compressed) && - crypt_data->rotate_state.scrubbing.is_active - && diff >= 0 - && ulint(diff) >= srv_background_scrub_data_interval; - - if (need_key_rotation == false && need_scrubbing == false) { - break; - } - - mutex_exit(&crypt_data->mutex); - - return true; - } while (0); - - mutex_exit(&crypt_data->mutex); - - - return false; -} - -/*********************************************************************** -Update global statistics with thread statistics -@param[in,out] state key rotation statistics */ -static void -fil_crypt_update_total_stat( - rotate_thread_t *state) -{ - mutex_enter(&crypt_stat_mutex); - crypt_stat.pages_read_from_cache += - state->crypt_stat.pages_read_from_cache; - crypt_stat.pages_read_from_disk += - state->crypt_stat.pages_read_from_disk; - crypt_stat.pages_modified += state->crypt_stat.pages_modified; - crypt_stat.pages_flushed += state->crypt_stat.pages_flushed; - // remote old estimate - crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops; - // add new estimate - crypt_stat.estimated_iops += state->estimated_max_iops; - mutex_exit(&crypt_stat_mutex); - - // make new estimate "current" estimate - memset(&state->crypt_stat, 0, sizeof(state->crypt_stat)); - // record our old (current) estimate - state->crypt_stat.estimated_iops = state->estimated_max_iops; -} - -/*********************************************************************** -Allocate iops to thread from global setting, -used before starting to rotate a space. -@param[in,out] state Rotation state -@return true if allocation succeeded, false if failed */ -static -bool -fil_crypt_alloc_iops( - rotate_thread_t *state) -{ - ut_ad(state->allocated_iops == 0); - - /* We have not yet selected the space to rotate, thus - state might not contain space and we can't check - its status yet. */ - - uint max_iops = state->estimated_max_iops; - mutex_enter(&fil_crypt_threads_mutex); - - if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) { - /* this can happen when user decreases srv_fil_crypt_iops */ - mutex_exit(&fil_crypt_threads_mutex); - return false; - } - - uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated; - - if (alloc > max_iops) { - alloc = max_iops; - } - - n_fil_crypt_iops_allocated += alloc; - mutex_exit(&fil_crypt_threads_mutex); - - state->allocated_iops = alloc; - - return alloc > 0; -} - -/*********************************************************************** -Reallocate iops to thread, -used when inside a space -@param[in,out] state Rotation state */ -static -void -fil_crypt_realloc_iops( - rotate_thread_t *state) -{ - ut_a(state->allocated_iops > 0); - - if (10 * state->cnt_waited > state->batch) { - /* if we waited more than 10% re-estimate max_iops */ - ulint avg_wait_time_us = - state->sum_waited_us / state->cnt_waited; - - if (avg_wait_time_us == 0) { - avg_wait_time_us = 1; // prevent division by zero - } - - DBUG_PRINT("ib_crypt", - ("thr_no: %u - update estimated_max_iops from %u to " - ULINTPF ".", - state->thread_no, - state->estimated_max_iops, - 1000000 / avg_wait_time_us)); - - state->estimated_max_iops = uint(1000000 / avg_wait_time_us); - state->cnt_waited = 0; - state->sum_waited_us = 0; - } else { - - DBUG_PRINT("ib_crypt", - ("thr_no: %u only waited %lu%% skip re-estimate.", - state->thread_no, - (100 * state->cnt_waited) / state->batch)); - } - - if (state->estimated_max_iops <= state->allocated_iops) { - /* return extra iops */ - uint extra = state->allocated_iops - state->estimated_max_iops; - - if (extra > 0) { - mutex_enter(&fil_crypt_threads_mutex); - if (n_fil_crypt_iops_allocated < extra) { - /* unknown bug! - * crash in debug - * keep n_fil_crypt_iops_allocated unchanged - * in release */ - ut_ad(0); - extra = 0; - } - n_fil_crypt_iops_allocated -= extra; - state->allocated_iops -= extra; - - if (state->allocated_iops == 0) { - /* no matter how slow io system seems to be - * never decrease allocated_iops to 0... */ - state->allocated_iops ++; - n_fil_crypt_iops_allocated ++; - } - - os_event_set(fil_crypt_threads_event); - mutex_exit(&fil_crypt_threads_mutex); - } - } else { - /* see if there are more to get */ - mutex_enter(&fil_crypt_threads_mutex); - if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) { - /* there are extra iops free */ - uint extra = srv_n_fil_crypt_iops - - n_fil_crypt_iops_allocated; - if (state->allocated_iops + extra > - state->estimated_max_iops) { - /* but don't alloc more than our max */ - extra = state->estimated_max_iops - - state->allocated_iops; - } - n_fil_crypt_iops_allocated += extra; - state->allocated_iops += extra; - - DBUG_PRINT("ib_crypt", - ("thr_no: %u increased iops from %u to %u.", - state->thread_no, - state->allocated_iops - extra, - state->allocated_iops)); - - } - mutex_exit(&fil_crypt_threads_mutex); - } - - fil_crypt_update_total_stat(state); -} - -/*********************************************************************** -Return allocated iops to global -@param[in,out] state Rotation state */ -static -void -fil_crypt_return_iops( - rotate_thread_t *state) -{ - if (state->allocated_iops > 0) { - uint iops = state->allocated_iops; - mutex_enter(&fil_crypt_threads_mutex); - if (n_fil_crypt_iops_allocated < iops) { - /* unknown bug! - * crash in debug - * keep n_fil_crypt_iops_allocated unchanged - * in release */ - ut_ad(0); - iops = 0; - } - - n_fil_crypt_iops_allocated -= iops; - state->allocated_iops = 0; - os_event_set(fil_crypt_threads_event); - mutex_exit(&fil_crypt_threads_mutex); - } - - fil_crypt_update_total_stat(state); -} - -/*********************************************************************** -Search for a space needing rotation -@param[in,out] key_state Key state -@param[in,out] state Rotation state -@param[in,out] recheck recheck ? */ -static -bool -fil_crypt_find_space_to_rotate( - key_state_t* key_state, - rotate_thread_t* state, - bool* recheck) -{ - /* we need iops to start rotating */ - while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) { - os_event_reset(fil_crypt_threads_event); - os_event_wait_time(fil_crypt_threads_event, 1000000); - } - - if (state->should_shutdown()) { - if (state->space) { - fil_space_release(state->space); - state->space = NULL; - } - return false; - } - - if (state->first) { - state->first = false; - if (state->space) { - fil_space_release(state->space); - } - state->space = NULL; - } - - /* If key rotation is enabled (default) we iterate all tablespaces. - If key rotation is not enabled we iterate only the tablespaces - added to keyrotation list. */ - if (srv_fil_crypt_rotate_key_age) { - state->space = fil_space_next(state->space); - } else { - state->space = fil_space_keyrotate_next(state->space); - } - - while (!state->should_shutdown() && state->space) { - fil_crypt_read_crypt_data(state->space); - - if (fil_crypt_space_needs_rotation(state, key_state, recheck)) { - ut_ad(key_state->key_id); - /* init state->min_key_version_found before - * starting on a space */ - state->min_key_version_found = key_state->key_version; - return true; - } - - if (srv_fil_crypt_rotate_key_age) { - state->space = fil_space_next(state->space); - } else { - state->space = fil_space_keyrotate_next(state->space); - } - } - - /* if we didn't find any space return iops */ - fil_crypt_return_iops(state); - - return false; - -} - -/*********************************************************************** -Start rotating a space -@param[in] key_state Key state -@param[in,out] state Rotation state */ -static -void -fil_crypt_start_rotate_space( - const key_state_t* key_state, - rotate_thread_t* state) -{ - fil_space_crypt_t *crypt_data = state->space->crypt_data; - - ut_ad(crypt_data); - mutex_enter(&crypt_data->mutex); - ut_ad(key_state->key_id == crypt_data->key_id); - - if (crypt_data->rotate_state.active_threads == 0) { - /* only first thread needs to init */ - crypt_data->rotate_state.next_offset = 1; // skip page 0 - /* no need to rotate beyond current max - * if space extends, it will be encrypted with newer version */ - /* FIXME: max_offset could be removed and instead - space->size consulted.*/ - crypt_data->rotate_state.max_offset = state->space->size; - crypt_data->rotate_state.end_lsn = 0; - crypt_data->rotate_state.min_key_version_found = - key_state->key_version; - - crypt_data->rotate_state.start_time = time(0); - - if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED && - crypt_data->is_encrypted() && - key_state->key_version != 0) { - /* this is rotation unencrypted => encrypted */ - crypt_data->type = CRYPT_SCHEME_1; - } - } - - /* count active threads in space */ - crypt_data->rotate_state.active_threads++; - - /* Initialize thread local state */ - state->end_lsn = crypt_data->rotate_state.end_lsn; - state->min_key_version_found = - crypt_data->rotate_state.min_key_version_found; - - mutex_exit(&crypt_data->mutex); -} - -/*********************************************************************** -Search for batch of pages needing rotation -@param[in] key_state Key state -@param[in,out] state Rotation state -@return true if page needing key rotation found, false if not found */ -static -bool -fil_crypt_find_page_to_rotate( - const key_state_t* key_state, - rotate_thread_t* state) -{ - ulint batch = srv_alloc_time * state->allocated_iops; - fil_space_t* space = state->space; - - ut_ad(!space || space->n_pending_ops > 0); - - /* If space is marked to be dropped stop rotation. */ - if (!space || space->is_stopping()) { - return false; - } - - fil_space_crypt_t *crypt_data = space->crypt_data; - - mutex_enter(&crypt_data->mutex); - ut_ad(key_state->key_id == crypt_data->key_id); - - bool found = crypt_data->rotate_state.max_offset >= - crypt_data->rotate_state.next_offset; - - if (found) { - state->offset = crypt_data->rotate_state.next_offset; - ulint remaining = crypt_data->rotate_state.max_offset - - crypt_data->rotate_state.next_offset; - - if (batch <= remaining) { - state->batch = batch; - } else { - state->batch = remaining; - } - } - - crypt_data->rotate_state.next_offset += batch; - mutex_exit(&crypt_data->mutex); - return found; -} - -/*********************************************************************** -Check if a page is uninitialized (doesn't need to be rotated) -@param[in] frame Page to check -@param[in] zip_size zip_size or 0 -@return true if page is uninitialized, false if not. */ -static inline -bool -fil_crypt_is_page_uninitialized( - const byte *frame, - uint zip_size) -{ - return (buf_page_is_zeroes(frame, zip_size)); -} - -#define fil_crypt_get_page_throttle(state,offset,mtr,sleeptime_ms) \ - fil_crypt_get_page_throttle_func(state, offset, mtr, \ - sleeptime_ms, __FILE__, __LINE__) - -/*********************************************************************** -Get a page and compute sleep time -@param[in,out] state Rotation state -@param[in] zip_size compressed size or 0 -@param[in] offset Page offset -@param[in,out] mtr Minitransaction -@param[out] sleeptime_ms Sleep time -@param[in] file File where called -@param[in] line Line where called -@return page or NULL*/ -static -buf_block_t* -fil_crypt_get_page_throttle_func( - rotate_thread_t* state, - ulint offset, - mtr_t* mtr, - ulint* sleeptime_ms, - const char* file, - ulint line) -{ - fil_space_t* space = state->space; - ulint zip_size = fsp_flags_get_zip_size(space->flags); - ut_ad(space->n_pending_ops > 0); - - buf_block_t* block = buf_page_try_get_func(space->id, offset, RW_X_LATCH, - true, - file, line, mtr); - if (block != NULL) { - /* page was in buffer pool */ - state->crypt_stat.pages_read_from_cache++; - return block; - } - - /* Before reading from tablespace we need to make sure that - tablespace exists and is not is just being dropped. */ - if (space->is_stopping()) { - return NULL; - } - - state->crypt_stat.pages_read_from_disk++; - - ullint start = ut_time_us(NULL); - block = buf_page_get_gen(space->id, zip_size, offset, - RW_X_LATCH, - NULL, BUF_GET_POSSIBLY_FREED, - file, line, mtr); - ullint end = ut_time_us(NULL); - - if (end < start) { - end = start; // safety... - } - - state->cnt_waited++; - state->sum_waited_us += (end - start); - - /* average page load */ - ulint add_sleeptime_ms = 0; - ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited; - ulint alloc_wait_us = 1000000 / state->allocated_iops; - - if (avg_wait_time_us < alloc_wait_us) { - /* we reading faster than we allocated */ - add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000; - } else { - /* if page load time is longer than we want, skip sleeping */ - } - - *sleeptime_ms += add_sleeptime_ms; - - return block; -} - - -/*********************************************************************** -Get block and allocation status - -note: innodb locks fil_space_latch and then block when allocating page -but locks block and then fil_space_latch when freeing page. - -@param[in,out] state Rotation state -@param[in] zip_size Compressed size or 0 -@param[in] offset Page offset -@param[in,out] mtr Minitransaction -@param[out] allocation_status Allocation status -@param[out] sleeptime_ms Sleep time -@return block or NULL -*/ -static -buf_block_t* -btr_scrub_get_block_and_allocation_status( - rotate_thread_t* state, - uint zip_size, - ulint offset, - mtr_t* mtr, - btr_scrub_page_allocation_status_t *allocation_status, - ulint* sleeptime_ms) -{ - mtr_t local_mtr; - buf_block_t *block = NULL; - fil_space_t* space = state->space; - - ut_ad(space->n_pending_ops > 0); - ut_ad(zip_size == fsp_flags_get_zip_size(space->flags)); - - mtr_start(&local_mtr); - - *allocation_status = fsp_page_is_free(space->id, offset, &local_mtr) ? - BTR_SCRUB_PAGE_FREE : - BTR_SCRUB_PAGE_ALLOCATED; - - if (*allocation_status == BTR_SCRUB_PAGE_FREE) { - /* this is easy case, we lock fil_space_latch first and - then block */ - block = fil_crypt_get_page_throttle(state, - offset, mtr, - sleeptime_ms); - mtr_commit(&local_mtr); - } else { - /* page is allocated according to xdes */ - - /* release fil_space_latch *before* fetching block */ - mtr_commit(&local_mtr); - - /* NOTE: when we have locked dict_index_get_lock(), - * it's safe to release fil_space_latch and then fetch block - * as dict_index_get_lock() is needed to make tree modifications - * such as free-ing a page - */ - - block = fil_crypt_get_page_throttle(state, - offset, mtr, - sleeptime_ms); - } - - return block; -} - - -/*********************************************************************** -Rotate one page -@param[in,out] key_state Key state -@param[in,out] state Rotation state */ -static -void -fil_crypt_rotate_page( - const key_state_t* key_state, - rotate_thread_t* state) -{ - fil_space_t*space = state->space; - ulint space_id = space->id; - ulint offset = state->offset; - const uint zip_size = fsp_flags_get_zip_size(space->flags); - ulint sleeptime_ms = 0; - fil_space_crypt_t *crypt_data = space->crypt_data; - - ut_ad(space->n_pending_ops > 0); - - /* In fil_crypt_thread where key rotation is done we have - acquired space and checked that this space is not yet - marked to be dropped. Similarly, in fil_crypt_find_page_to_rotate(). - Check here also to give DROP TABLE or similar a change. */ - if (space->is_stopping()) { - return; - } - - if (space_id == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) { - /* don't encrypt this as it contains address to dblwr buffer */ - return; - } - - mtr_t mtr; - mtr_start(&mtr); - buf_block_t* block = fil_crypt_get_page_throttle(state, - offset, &mtr, - &sleeptime_ms); - - if (block) { - - bool modified = false; - int needs_scrubbing = BTR_SCRUB_SKIP_PAGE; - lsn_t block_lsn = block->page.newest_modification; - byte* frame = buf_block_get_frame(block); - uint kv = mach_read_from_4(frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - - /* check if tablespace is closing after reading page */ - if (!space->is_stopping()) { - - if (kv == 0 && - fil_crypt_is_page_uninitialized(frame, zip_size)) { - ; - } else if (fil_crypt_needs_rotation( - crypt_data->encryption, - kv, key_state->key_version, - key_state->rotate_key_age)) { - - modified = true; - - /* force rotation by dummy updating page */ - mlog_write_ulint(frame + - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - space_id, MLOG_4BYTES, &mtr); - - /* statistics */ - state->crypt_stat.pages_modified++; - } else { - if (crypt_data->is_encrypted()) { - if (kv < state->min_key_version_found) { - state->min_key_version_found = kv; - } - } - } - - needs_scrubbing = btr_page_needs_scrubbing( - &state->scrub_data, block, - BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN); - } - - mtr_commit(&mtr); - lsn_t end_lsn = mtr.end_lsn; - - if (needs_scrubbing == BTR_SCRUB_PAGE) { - mtr_start(&mtr); - /* - * refetch page and allocation status - */ - btr_scrub_page_allocation_status_t allocated; - block = btr_scrub_get_block_and_allocation_status( - state, zip_size, offset, &mtr, - &allocated, - &sleeptime_ms); - - if (block) { - - /* get required table/index and index-locks */ - needs_scrubbing = btr_scrub_recheck_page( - &state->scrub_data, block, allocated, &mtr); - - if (needs_scrubbing == BTR_SCRUB_PAGE) { - /* we need to refetch it once more now that we have - * index locked */ - block = btr_scrub_get_block_and_allocation_status( - state, zip_size, offset, &mtr, - &allocated, - &sleeptime_ms); - - needs_scrubbing = btr_scrub_page(&state->scrub_data, - block, allocated, - &mtr); - } - - /* NOTE: mtr is committed inside btr_scrub_recheck_page() - * and/or btr_scrub_page. This is to make sure that - * locks & pages are latched in corrected order, - * the mtr is in some circumstances restarted. - * (mtr_commit() + mtr_start()) - */ - } - } - - if (needs_scrubbing != BTR_SCRUB_PAGE) { - /* if page didn't need scrubbing it might be that cleanups - are needed. do those outside of any mtr to prevent deadlocks. - - the information what kinds of cleanups that are needed are - encoded inside the needs_scrubbing, but this is opaque to - this function (except the value BTR_SCRUB_PAGE) */ - btr_scrub_skip_page(&state->scrub_data, needs_scrubbing); - } - - if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) { - /* if we just detected that scrubbing was turned off - * update global state to reflect this */ - ut_ad(crypt_data); - mutex_enter(&crypt_data->mutex); - crypt_data->rotate_state.scrubbing.is_active = false; - mutex_exit(&crypt_data->mutex); - } - - if (modified) { - /* if we modified page, we take lsn from mtr */ - ut_a(end_lsn > state->end_lsn); - ut_a(end_lsn > block_lsn); - state->end_lsn = end_lsn; - } else { - /* if we did not modify page, check for max lsn */ - if (block_lsn > state->end_lsn) { - state->end_lsn = block_lsn; - } - } - } else { - /* If block read failed mtr memo and log should be empty. */ - ut_ad(dyn_array_get_data_size(&mtr.memo) == 0); - ut_ad(dyn_array_get_data_size(&mtr.log) == 0); - mtr_commit(&mtr); - } - - if (sleeptime_ms) { - os_event_reset(fil_crypt_throttle_sleep_event); - os_event_wait_time(fil_crypt_throttle_sleep_event, - 1000 * sleeptime_ms); - } -} - -/*********************************************************************** -Rotate a batch of pages -@param[in,out] key_state Key state -@param[in,out] state Rotation state */ -static -void -fil_crypt_rotate_pages( - const key_state_t* key_state, - rotate_thread_t* state) -{ - ulint space = state->space->id; - ulint end = state->offset + state->batch; - - ut_ad(state->space->n_pending_ops > 0); - - for (; state->offset < end; state->offset++) { - - /* we can't rotate pages in dblwr buffer as - * it's not possible to read those due to lots of asserts - * in buffer pool. - * - * However since these are only (short-lived) copies of - * real pages, they will be updated anyway when the - * real page is updated - */ - if (space == TRX_SYS_SPACE && - buf_dblwr_page_inside(state->offset)) { - continue; - } - - fil_crypt_rotate_page(key_state, state); - } -} - -/*********************************************************************** -Flush rotated pages and then update page 0 - -@param[in,out] state rotation state */ -static -void -fil_crypt_flush_space( - rotate_thread_t* state) -{ - fil_space_t* space = state->space; - fil_space_crypt_t *crypt_data = space->crypt_data; - - ut_ad(space->n_pending_ops > 0); - - /* flush tablespace pages so that there are no pages left with old key */ - lsn_t end_lsn = crypt_data->rotate_state.end_lsn; - - if (end_lsn > 0 && !space->is_stopping()) { - bool success = false; - ulint n_pages = 0; - ulint sum_pages = 0; - ullint start = ut_time_us(NULL); - - do { - success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages); - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - sum_pages += n_pages; - } while (!success && !space->is_stopping()); - - ullint end = ut_time_us(NULL); - - if (sum_pages && end > start) { - state->cnt_waited += sum_pages; - state->sum_waited_us += (end - start); - - /* statistics */ - state->crypt_stat.pages_flushed += sum_pages; - } - } - - if (crypt_data->min_key_version == 0) { - crypt_data->type = CRYPT_SCHEME_UNENCRYPTED; - } - - /* update page 0 */ - mtr_t mtr; - mtr_start(&mtr); - - const uint zip_size = fsp_flags_get_zip_size(state->space->flags); - - buf_block_t* block = buf_page_get_gen(space->id, zip_size, 0, - RW_X_LATCH, NULL, BUF_GET, - __FILE__, __LINE__, &mtr); - byte* frame = buf_block_get_frame(block); - - crypt_data->write_page0(frame, &mtr); - - mtr_commit(&mtr); -} - -/*********************************************************************** -Complete rotating a space -@param[in,out] key_state Key state -@param[in,out] state Rotation state */ -static -void -fil_crypt_complete_rotate_space( - const key_state_t* key_state, - rotate_thread_t* state) -{ - fil_space_crypt_t *crypt_data = state->space->crypt_data; - - ut_ad(crypt_data); - ut_ad(state->space->n_pending_ops > 0); - - /* Space might already be dropped */ - if (!state->space->is_stopping()) { - mutex_enter(&crypt_data->mutex); - - /** - * Update crypt data state with state from thread - */ - if (state->min_key_version_found < - crypt_data->rotate_state.min_key_version_found) { - crypt_data->rotate_state.min_key_version_found = - state->min_key_version_found; - } - - if (state->end_lsn > crypt_data->rotate_state.end_lsn) { - crypt_data->rotate_state.end_lsn = state->end_lsn; - } - - ut_a(crypt_data->rotate_state.active_threads > 0); - crypt_data->rotate_state.active_threads--; - bool last = crypt_data->rotate_state.active_threads == 0; - - /** - * check if space is fully done - * this as when threads shutdown, it could be that we "complete" - * iterating before we have scanned the full space. - */ - bool done = crypt_data->rotate_state.next_offset >= - crypt_data->rotate_state.max_offset; - - /** - * we should flush space if we're last thread AND - * the iteration is done - */ - bool should_flush = last && done; - - if (should_flush) { - /* we're the last active thread */ - crypt_data->rotate_state.flushing = true; - crypt_data->min_key_version = - crypt_data->rotate_state.min_key_version_found; - } - - /* inform scrubbing */ - crypt_data->rotate_state.scrubbing.is_active = false; - mutex_exit(&crypt_data->mutex); - - /* all threads must call btr_scrub_complete_space wo/ mutex held */ - if (btr_scrub_complete_space(&state->scrub_data) == true) { - if (should_flush) { - /* only last thread updates last_scrub_completed */ - ut_ad(crypt_data); - mutex_enter(&crypt_data->mutex); - crypt_data->rotate_state.scrubbing. - last_scrub_completed = time(0); - mutex_exit(&crypt_data->mutex); - } - } - - if (should_flush) { - fil_crypt_flush_space(state); - - mutex_enter(&crypt_data->mutex); - crypt_data->rotate_state.flushing = false; - mutex_exit(&crypt_data->mutex); - } - } else { - mutex_enter(&crypt_data->mutex); - ut_a(crypt_data->rotate_state.active_threads > 0); - crypt_data->rotate_state.active_threads--; - mutex_exit(&crypt_data->mutex); - } -} - -/*********************************************************************//** -A thread which monitors global key state and rotates tablespaces accordingly -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(fil_crypt_thread)( -/*=============================*/ - void* arg __attribute__((unused))) /*!< in: a dummy parameter required - * by os_thread_create */ -{ - UT_NOT_USED(arg); - - mutex_enter(&fil_crypt_threads_mutex); - uint thread_no = srv_n_fil_crypt_threads_started; - srv_n_fil_crypt_threads_started++; - os_event_set(fil_crypt_event); /* signal that we started */ - mutex_exit(&fil_crypt_threads_mutex); - - /* state of this thread */ - rotate_thread_t thr(thread_no); - - /* if we find a space that is starting, skip over it and recheck it later */ - bool recheck = false; - - while (!thr.should_shutdown()) { - - key_state_t new_state; - - time_t wait_start = time(0); - - while (!thr.should_shutdown()) { - - /* wait for key state changes - * i.e either new key version of change or - * new rotate_key_age */ - os_event_reset(fil_crypt_threads_event); - - if (os_event_wait_time(fil_crypt_threads_event, 1000000) == 0) { - break; - } - - if (recheck) { - /* check recheck here, after sleep, so - * that we don't busy loop while when one thread is starting - * a space*/ - break; - } - - time_t waited = time(0) - wait_start; - - /* Break if we have waited the background scrub - internal and background scrubbing is enabled */ - if (waited >= 0 - && ulint(waited) >= srv_background_scrub_data_check_interval - && (srv_background_scrub_data_uncompressed - || srv_background_scrub_data_compressed)) { - break; - } - } - - recheck = false; - thr.first = true; // restart from first tablespace - - /* iterate all spaces searching for those needing rotation */ - while (!thr.should_shutdown() && - fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) { - - /* we found a space to rotate */ - fil_crypt_start_rotate_space(&new_state, &thr); - - /* iterate all pages (cooperativly with other threads) */ - while (!thr.should_shutdown() && - fil_crypt_find_page_to_rotate(&new_state, &thr)) { - - if (!thr.space->is_stopping()) { - /* rotate a (set) of pages */ - fil_crypt_rotate_pages(&new_state, &thr); - } - - /* If space is marked as stopping, release - space and stop rotation. */ - if (thr.space->is_stopping()) { - fil_crypt_complete_rotate_space( - &new_state, &thr); - fil_space_release(thr.space); - thr.space = NULL; - break; - } - - /* realloc iops */ - fil_crypt_realloc_iops(&thr); - } - - /* complete rotation */ - if (thr.space) { - fil_crypt_complete_rotate_space(&new_state, &thr); - } - - /* force key state refresh */ - new_state.key_id = 0; - - /* return iops */ - fil_crypt_return_iops(&thr); - } - } - - /* return iops if shutting down */ - fil_crypt_return_iops(&thr); - - /* release current space if shutting down */ - if (thr.space) { - fil_space_release(thr.space); - thr.space = NULL; - } - - mutex_enter(&fil_crypt_threads_mutex); - srv_n_fil_crypt_threads_started--; - os_event_set(fil_crypt_event); /* signal that we stopped */ - mutex_exit(&fil_crypt_threads_mutex); - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/********************************************************************* -Adjust thread count for key rotation -@param[in] enw_cnt Number of threads to be used */ -UNIV_INTERN -void -fil_crypt_set_thread_cnt( - const uint new_cnt) -{ - if (!fil_crypt_threads_inited) { - fil_crypt_threads_init(); - } - - mutex_enter(&fil_crypt_threads_mutex); - - if (new_cnt > srv_n_fil_crypt_threads) { - uint add = new_cnt - srv_n_fil_crypt_threads; - srv_n_fil_crypt_threads = new_cnt; - for (uint i = 0; i < add; i++) { - os_thread_id_t rotation_thread_id; - os_thread_create(fil_crypt_thread, NULL, &rotation_thread_id); - - ib_logf(IB_LOG_LEVEL_INFO, - "Creating #%d thread id %lu total threads %u.", - i+1, os_thread_pf(rotation_thread_id), new_cnt); - } - } else if (new_cnt < srv_n_fil_crypt_threads) { - srv_n_fil_crypt_threads = new_cnt; - os_event_set(fil_crypt_threads_event); - } - - mutex_exit(&fil_crypt_threads_mutex); - - while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) { - os_event_reset(fil_crypt_event); - os_event_wait_time(fil_crypt_event, 1000000); - } -} - -/********************************************************************* -Adjust max key age -@param[in] val New max key age */ -UNIV_INTERN -void -fil_crypt_set_rotate_key_age( - uint val) -{ - srv_fil_crypt_rotate_key_age = val; - os_event_set(fil_crypt_threads_event); -} - -/********************************************************************* -Adjust rotation iops -@param[in] val New max roation iops */ -UNIV_INTERN -void -fil_crypt_set_rotation_iops( - uint val) -{ - srv_n_fil_crypt_iops = val; - os_event_set(fil_crypt_threads_event); -} - -/********************************************************************* -Adjust encrypt tables -@param[in] val New setting for innodb-encrypt-tables */ -UNIV_INTERN -void -fil_crypt_set_encrypt_tables( - uint val) -{ - srv_encrypt_tables = val; - os_event_set(fil_crypt_threads_event); -} - -/********************************************************************* -Init threads for key rotation */ -UNIV_INTERN -void -fil_crypt_threads_init() -{ - ut_ad(mutex_own(&fil_system->mutex)); - if (!fil_crypt_threads_inited) { - fil_crypt_event = os_event_create(); - fil_crypt_threads_event = os_event_create(); - mutex_create(fil_crypt_threads_mutex_key, - &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK); - - uint cnt = srv_n_fil_crypt_threads; - srv_n_fil_crypt_threads = 0; - fil_crypt_threads_inited = true; - fil_crypt_set_thread_cnt(cnt); - } -} - -/********************************************************************* -Clean up key rotation threads resources */ -UNIV_INTERN -void -fil_crypt_threads_cleanup() -{ - if (!fil_crypt_threads_inited) { - return; - } - ut_a(!srv_n_fil_crypt_threads_started); - os_event_free(fil_crypt_event); - fil_crypt_event = NULL; - os_event_free(fil_crypt_threads_event); - fil_crypt_threads_event = NULL; - mutex_free(&fil_crypt_threads_mutex); - fil_crypt_threads_inited = false; -} - -/********************************************************************* -Wait for crypt threads to stop accessing space -@param[in] space Tablespace */ -UNIV_INTERN -void -fil_space_crypt_close_tablespace( - const fil_space_t* space) -{ - if (!srv_encrypt_tables || !space->crypt_data) { - return; - } - - mutex_enter(&fil_crypt_threads_mutex); - - fil_space_crypt_t* crypt_data = space->crypt_data; - - time_t start = time(0); - time_t last = start; - - mutex_enter(&crypt_data->mutex); - mutex_exit(&fil_crypt_threads_mutex); - - uint cnt = crypt_data->rotate_state.active_threads; - bool flushing = crypt_data->rotate_state.flushing; - - while (cnt > 0 || flushing) { - mutex_exit(&crypt_data->mutex); - /* release dict mutex so that scrub threads can release their - * table references */ - dict_mutex_exit_for_mysql(); - - /* wakeup throttle (all) sleepers */ - os_event_set(fil_crypt_throttle_sleep_event); - - os_thread_sleep(20000); - dict_mutex_enter_for_mysql(); - mutex_enter(&crypt_data->mutex); - cnt = crypt_data->rotate_state.active_threads; - flushing = crypt_data->rotate_state.flushing; - - time_t now = time(0); - - if (now >= last + 30) { - ib_logf(IB_LOG_LEVEL_WARN, - "Waited %ld seconds to drop space: %s (" ULINTPF - ") active threads %u flushing=%d.", - now - start, space->name, space->id, cnt, flushing); - last = now; - } - } - - mutex_exit(&crypt_data->mutex); -} - -/********************************************************************* -Get crypt status for a space (used by information_schema) -@param[in] space Tablespace -@param[out] status Crypt status */ -UNIV_INTERN -void -fil_space_crypt_get_status( - const fil_space_t* space, - struct fil_space_crypt_status_t* status) -{ - memset(status, 0, sizeof(*status)); - - ut_ad(space->n_pending_ops > 0); - fil_crypt_read_crypt_data(const_cast<fil_space_t*>(space)); - status->space = space->id; - - if (fil_space_crypt_t* crypt_data = space->crypt_data) { - mutex_enter(&crypt_data->mutex); - status->scheme = crypt_data->type; - status->keyserver_requests = crypt_data->keyserver_requests; - status->min_key_version = crypt_data->min_key_version; - status->key_id = crypt_data->key_id; - - if (crypt_data->rotate_state.active_threads > 0 || - crypt_data->rotate_state.flushing) { - status->rotating = true; - status->flushing = - crypt_data->rotate_state.flushing; - status->rotate_next_page_number = - crypt_data->rotate_state.next_offset; - status->rotate_max_page_number = - crypt_data->rotate_state.max_offset; - } - - mutex_exit(&crypt_data->mutex); - - if (srv_encrypt_tables || crypt_data->min_key_version) { - status->current_key_version = - fil_crypt_get_latest_key_version(crypt_data); - } - } -} - -/********************************************************************* -Return crypt statistics -@param[out] stat Crypt statistics */ -UNIV_INTERN -void -fil_crypt_total_stat( - fil_crypt_stat_t *stat) -{ - mutex_enter(&crypt_stat_mutex); - *stat = crypt_stat; - mutex_exit(&crypt_stat_mutex); -} - -/********************************************************************* -Get scrub status for a space (used by information_schema) - -@param[in] space Tablespace -@param[out] status Scrub status */ -UNIV_INTERN -void -fil_space_get_scrub_status( - const fil_space_t* space, - struct fil_space_scrub_status_t* status) -{ - memset(status, 0, sizeof(*status)); - - ut_ad(space->n_pending_ops > 0); - fil_space_crypt_t* crypt_data = space->crypt_data; - - status->space = space->id; - - if (crypt_data != NULL) { - status->compressed = fsp_flags_get_zip_size(space->flags) > 0; - mutex_enter(&crypt_data->mutex); - status->last_scrub_completed = - crypt_data->rotate_state.scrubbing.last_scrub_completed; - if (crypt_data->rotate_state.active_threads > 0 && - crypt_data->rotate_state.scrubbing.is_active) { - status->scrubbing = true; - status->current_scrub_started = - crypt_data->rotate_state.start_time; - status->current_scrub_active_threads = - crypt_data->rotate_state.active_threads; - status->current_scrub_page_number = - crypt_data->rotate_state.next_offset; - status->current_scrub_max_page_number = - crypt_data->rotate_state.max_offset; - } - - mutex_exit(&crypt_data->mutex); - } -} diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc deleted file mode 100644 index fdd09a6034e..00000000000 --- a/storage/xtradb/fil/fil0fil.cc +++ /dev/null @@ -1,7725 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file fil/fil0fil.cc -The tablespace memory cache - -Created 10/25/1995 Heikki Tuuri -*******************************************************/ - -#include "fil0fil.h" -#include "fil0pagecompress.h" -#include "fsp0pagecompress.h" -#include "fil0crypt.h" - -#include <debug_sync.h> -#include <my_dbug.h> - -#include "mem0mem.h" -#include "hash0hash.h" -#include "os0file.h" -#include "mach0data.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "log0recv.h" -#include "fsp0fsp.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "dict0dict.h" -#include "page0page.h" -#include "page0zip.h" -#include "trx0sys.h" -#include "row0mysql.h" -#include "os0file.h" -#ifndef UNIV_HOTBACKUP -# include "buf0lru.h" -# include "ibuf0ibuf.h" -# include "sync0sync.h" -# include "os0sync.h" -#else /* !UNIV_HOTBACKUP */ -# include "srv0srv.h" -static ulint srv_data_read, srv_data_written; -#endif /* !UNIV_HOTBACKUP */ - -#include "zlib.h" -#ifdef __linux__ -#include <linux/fs.h> -#include <sys/ioctl.h> -#include <fcntl.h> -#endif -#include "row0mysql.h" -#include "trx0purge.h" - -MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system; - - -/* - IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE - ============================================= - -The tablespace cache is responsible for providing fast read/write access to -tablespaces and logs of the database. File creation and deletion is done -in other modules which know more of the logic of the operation, however. - -A tablespace consists of a chain of files. The size of the files does not -have to be divisible by the database block size, because we may just leave -the last incomplete block unused. When a new file is appended to the -tablespace, the maximum size of the file is also specified. At the moment, -we think that it is best to extend the file to its maximum size already at -the creation of the file, because then we can avoid dynamically extending -the file when more space is needed for the tablespace. - -A block's position in the tablespace is specified with a 32-bit unsigned -integer. The files in the chain are thought to be catenated, and the block -corresponding to an address n is the nth block in the catenated file (where -the first block is named the 0th block, and the incomplete block fragments -at the end of files are not taken into account). A tablespace can be extended -by appending a new file at the end of the chain. - -Our tablespace concept is similar to the one of Oracle. - -To acquire more speed in disk transfers, a technique called disk striping is -sometimes used. This means that logical block addresses are divided in a -round-robin fashion across several disks. Windows NT supports disk striping, -so there we do not need to support it in the database. Disk striping is -implemented in hardware in RAID disks. We conclude that it is not necessary -to implement it in the database. Oracle 7 does not support disk striping, -either. - -Another trick used at some database sites is replacing tablespace files by -raw disks, that is, the whole physical disk drive, or a partition of it, is -opened as a single file, and it is accessed through byte offsets calculated -from the start of the disk or the partition. This is recommended in some -books on database tuning to achieve more speed in i/o. Using raw disk -certainly prevents the OS from fragmenting disk space, but it is not clear -if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file -system + EIDE Conner disk only a negligible difference in speed when reading -from a file, versus reading from a raw disk. - -To have fast access to a tablespace or a log file, we put the data structures -to a hash table. Each tablespace and log file is given an unique 32-bit -identifier. - -Some operating systems do not support many open files at the same time, -though NT seems to tolerate at least 900 open files. Therefore, we put the -open files in an LRU-list. If we need to open another file, we may close the -file at the end of the LRU-list. When an i/o-operation is pending on a file, -the file cannot be closed. We take the file nodes with pending i/o-operations -out of the LRU-list and keep a count of pending operations. When an operation -completes, we decrement the count and return the file node to the LRU-list if -the count drops to zero. */ - -/** When mysqld is run, the default directory "." is the mysqld datadir, -but in the MySQL Embedded Server Library and mysqlbackup it is not the default -directory, and we must set the base file path explicitly */ -UNIV_INTERN const char* fil_path_to_mysql_datadir = "."; - -/** The number of fsyncs done to the log */ -UNIV_INTERN ulint fil_n_log_flushes = 0; - -/** Number of pending redo log flushes */ -UNIV_INTERN ulint fil_n_pending_log_flushes = 0; -/** Number of pending tablespace flushes */ -UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0; - -/** Number of files currently open */ -UNIV_INTERN ulint fil_n_file_opened = 0; - -/** The null file address */ -UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0}; - -#ifdef UNIV_PFS_MUTEX -/* Key to register fil_system_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#ifdef UNIV_PFS_RWLOCK -/* Key to register file space latch with performance schema */ -UNIV_INTERN mysql_pfs_key_t fil_space_latch_key; -#endif /* UNIV_PFS_RWLOCK */ - -/** The tablespace memory cache. This variable is NULL before the module is -initialized. */ -UNIV_INTERN fil_system_t* fil_system = NULL; - -/** At this age or older a space/page will be rotated */ -UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age; -UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex; - -/** Determine if (i) is a user tablespace id or not. */ -# define fil_is_user_tablespace_id(i) (i != 0 \ - && !srv_is_undo_tablespace(i)) - -/** Determine if user has explicitly disabled fsync(). */ -#ifndef __WIN__ -# define fil_buffering_disabled(s) \ - (((s)->purpose == FIL_TABLESPACE \ - && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)\ - || ((s)->purpose == FIL_LOG \ - && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)) - -#else /* __WIN__ */ -# define fil_buffering_disabled(s) (0) -#endif /* __WIN__ */ - -#ifdef UNIV_DEBUG -/** Try fil_validate() every this many times */ -# define FIL_VALIDATE_SKIP 17 - -/******************************************************************//** -Checks the consistency of the tablespace cache some of the time. -@return TRUE if ok or the check was skipped */ -static -ibool -fil_validate_skip(void) -/*===================*/ -{ - /** The fil_validate() call skip counter. Use a signed type - because of the race condition below. */ - static int fil_validate_count = FIL_VALIDATE_SKIP; - - /* There is a race condition below, but it does not matter, - because this call is only for heuristic purposes. We want to - reduce the call frequency of the costly fil_validate() check - in debug builds. */ - if (--fil_validate_count > 0) { - return(TRUE); - } - - fil_validate_count = FIL_VALIDATE_SKIP; - return(fil_validate()); -} -#endif /* UNIV_DEBUG */ - -/********************************************************************//** -Determines if a file node belongs to the least-recently-used list. -@return TRUE if the file belongs to fil_system->LRU mutex. */ -UNIV_INLINE -ibool -fil_space_belongs_in_lru( -/*=====================*/ - const fil_space_t* space) /*!< in: file space */ -{ - return(space->purpose == FIL_TABLESPACE - && fil_is_user_tablespace_id(space->id)); -} - -/********************************************************************//** -NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! - -Prepares a file node for i/o. Opens the file if it is closed. Updates the -pending i/o's field in the node and the system appropriately. Takes the node -off the LRU list if it is in the LRU list. The caller must hold the fil_sys -mutex. -@return false if the file can't be opened, otherwise true */ -static -bool -fil_node_prepare_for_io( -/*====================*/ - fil_node_t* node, /*!< in: file node */ - fil_system_t* system, /*!< in: tablespace memory cache */ - fil_space_t* space); /*!< in: space */ -/********************************************************************//** -Updates the data structures when an i/o operation finishes. Updates the -pending i/o's field in the node appropriately. */ -static -void -fil_node_complete_io( -/*=================*/ - fil_node_t* node, /*!< in: file node */ - fil_system_t* system, /*!< in: tablespace memory cache */ - ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks - the node as modified if - type == OS_FILE_WRITE */ -/** Free a space object from the tablespace memory cache. Close the files in -the chain but do not delete them. There must not be any pending i/o's or -flushes on the files. -The fil_system->mutex will be released. -@param[in] id tablespace ID -@param[in] x_latched whether the caller holds exclusive space->latch -@return whether the tablespace existed */ -static -bool -fil_space_free_and_mutex_exit(ulint id, bool x_latched); -/********************************************************************//** -Reads data from a space to a buffer. Remember that the possible incomplete -blocks at the end of file are ignored: they are not taken into account when -calculating the byte offset within a space. -@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do -i/o on a tablespace which does not exist */ -UNIV_INLINE -dberr_t -fil_read( -/*=====*/ - bool sync, /*!< in: true if synchronous aio is desired */ - ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint block_offset, /*!< in: offset in number of blocks */ - ulint byte_offset, /*!< in: remainder of offset in bytes; in aio - this must be divisible by the OS block size */ - ulint len, /*!< in: how many bytes to read; this must not - cross a file boundary; in aio this must be a - block size multiple */ - void* buf, /*!< in/out: buffer where to store data read; - in aio this must be appropriately aligned */ - void* message, /*!< in: message for aio handler if non-sync - aio used, else ignored */ - ulint* write_size) /*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ -{ - return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset, - byte_offset, len, buf, message, write_size)); -} - -/********************************************************************//** -Writes data to a space from a buffer. Remember that the possible incomplete -blocks at the end of file are ignored: they are not taken into account when -calculating the byte offset within a space. -@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do -i/o on a tablespace which does not exist */ -UNIV_INLINE -dberr_t -fil_write( -/*======*/ - bool sync, /*!< in: true if synchronous aio is desired */ - ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint block_offset, /*!< in: offset in number of blocks */ - ulint byte_offset, /*!< in: remainder of offset in bytes; in aio - this must be divisible by the OS block size */ - ulint len, /*!< in: how many bytes to write; this must - not cross a file boundary; in aio this must - be a block size multiple */ - void* buf, /*!< in: buffer from which to write; in aio - this must be appropriately aligned */ - void* message, /*!< in: message for aio handler if non-sync - aio used, else ignored */ - ulint* write_size) /*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ -{ - ut_ad(!srv_read_only_mode); - - return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset, - byte_offset, len, buf, message, write_size)); -} - -/*******************************************************************//** -Returns the table space by a given id, NULL if not found. -It is unsafe to dereference the returned pointer. It is fine to check -for NULL. -@param[in] id Tablespace id -@return table space or NULL */ -fil_space_t* -fil_space_get_by_id( -/*================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - - ut_ad(mutex_own(&fil_system->mutex)); - - HASH_SEARCH(hash, fil_system->spaces, id, - fil_space_t*, space, - ut_ad(space->magic_n == FIL_SPACE_MAGIC_N), - space->id == id); - - /* The system tablespace must always be found */ - ut_ad(space || id != 0 || srv_is_being_started); - return(space); -} - -/****************************************************************//** -Get space id from fil node */ -ulint -fil_node_get_space_id( -/*==================*/ - fil_node_t* node) /*!< in: Compressed node*/ -{ - ut_ad(node); - ut_ad(node->space); - - return (node->space->id); -} - -/*******************************************************************//** -Returns the table space by a given name, NULL if not found. */ -fil_space_t* -fil_space_get_by_name( -/*==================*/ - const char* name) /*!< in: space name */ -{ - fil_space_t* space; - ulint fold; - - ut_ad(mutex_own(&fil_system->mutex)); - - fold = ut_fold_string(name); - - HASH_SEARCH(name_hash, fil_system->name_hash, fold, - fil_space_t*, space, - ut_ad(space->magic_n == FIL_SPACE_MAGIC_N), - !strcmp(name, space->name)); - - return(space); -} - -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Returns the version number of a tablespace, -1 if not found. -@return version number, -1 if the tablespace does not exist in the -memory cache */ -UNIV_INTERN -ib_int64_t -fil_space_get_version( -/*==================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - ib_int64_t version = -1; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - if (space) { - version = space->tablespace_version; - } - - mutex_exit(&fil_system->mutex); - - return(version); -} - -/*******************************************************************//** -Returns the latch of a file space. -@return latch protecting storage allocation */ -UNIV_INTERN -prio_rw_lock_t* -fil_space_get_latch( -/*================*/ - ulint id, /*!< in: space id */ - ulint* flags) /*!< out: tablespace flags */ -{ - fil_space_t* space; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - ut_a(space); - - if (flags) { - *flags = space->flags; - } - - mutex_exit(&fil_system->mutex); - - return(&(space->latch)); -} - -/*******************************************************************//** -Returns the type of a file space. -@return ULINT_UNDEFINED, or FIL_TABLESPACE or FIL_LOG */ -UNIV_INTERN -ulint -fil_space_get_type( -/*===============*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - ulint type = ULINT_UNDEFINED; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - mutex_exit(&fil_system->mutex); - - if (space) { - type = space->purpose; - } - - return(type); -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Checks if all the file nodes in a space are flushed. The caller must hold -the fil_system mutex. -@return true if all are flushed */ -static -bool -fil_space_is_flushed( -/*=================*/ - fil_space_t* space) /*!< in: space */ -{ - fil_node_t* node; - - ut_ad(mutex_own(&fil_system->mutex)); - - node = UT_LIST_GET_FIRST(space->chain); - - while (node) { - if (node->modification_counter > node->flush_counter) { - - ut_ad(!fil_buffering_disabled(space)); - return(false); - } - - node = UT_LIST_GET_NEXT(chain, node); - } - - return(true); -} - -/*******************************************************************//** -Appends a new file to the chain of files of a space. File must be closed. -@return pointer to the file name, or NULL on error */ -UNIV_INTERN -char* -fil_node_create( -/*============*/ - const char* name, /*!< in: file name (file must be closed) */ - ulint size, /*!< in: file size in database blocks, rounded - downwards to an integer */ - ulint id, /*!< in: space id where to append */ - ibool is_raw) /*!< in: TRUE if a raw device or - a raw disk partition */ -{ - fil_node_t* node; - fil_space_t* space; - - ut_a(fil_system); - ut_a(name); - - mutex_enter(&fil_system->mutex); - - node = static_cast<fil_node_t*>(mem_zalloc(sizeof(fil_node_t))); - - node->name = mem_strdup(name); - - ut_a(!is_raw || srv_start_raw_disk_in_use); - - node->sync_event = os_event_create(); - node->is_raw_disk = is_raw; - node->size = size; - node->magic_n = FIL_NODE_MAGIC_N; - - space = fil_space_get_by_id(id); - - if (!space) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: Could not find tablespace %lu for\n" - "InnoDB: file ", (ulong) id); - ut_print_filename(stderr, name); - fputs(" in the tablespace memory cache.\n", stderr); - mem_free(node->name); - - mem_free(node); - - mutex_exit(&fil_system->mutex); - - return(NULL); - } - - space->size += size; - - node->space = space; - - UT_LIST_ADD_LAST(chain, space->chain, node); - - if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) { - - fil_system->max_assigned_id = id; - } - - mutex_exit(&fil_system->mutex); - - return(node->name); -} - -/********************************************************************//** -Opens a file of a node of a tablespace. The caller must own the fil_system -mutex. -@return false if the file can't be opened, otherwise true */ -static -bool -fil_node_open_file( -/*===============*/ - fil_node_t* node, /*!< in: file node */ - fil_system_t* system, /*!< in: tablespace memory cache */ - fil_space_t* space) /*!< in: space */ -{ - os_offset_t size_bytes; - ibool ret; - ibool success; - byte* buf2; - byte* page; - - ut_ad(mutex_own(&(system->mutex))); - ut_a(node->n_pending == 0); - ut_a(node->open == FALSE); - - if (node->size == 0) { - /* It must be a single-table tablespace and we do not know the - size of the file yet. First we open the file in the normal - mode, no async I/O here, for simplicity. Then do some checks, - and close the file again. - NOTE that we could not use the simple file read function - os_file_read() in Windows to read from a file opened for - async I/O! */ - - node->handle = os_file_create_simple_no_error_handling( - innodb_file_data_key, node->name, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &success, 0); - - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(true); - - ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot " - "open %s\n. InnoDB: Have you deleted .ibd " - "files under a running mysqld server?\n", - node->name); - - return(false); - } - - size_bytes = os_file_get_size(node->handle); - ut_a(size_bytes != (os_offset_t) -1); - - node->file_block_size = os_file_get_block_size( - node->handle, node->name); - space->file_block_size = node->file_block_size; - -#ifdef UNIV_HOTBACKUP - if (space->id == 0) { - node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); - os_file_close(node->handle); - goto add_size; - } -#endif /* UNIV_HOTBACKUP */ - ut_a(space->purpose != FIL_LOG); - ut_a(fil_is_user_tablespace_id(space->id)); - - if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { - ib_logf(IB_LOG_LEVEL_ERROR, - "The size of the file %s is only " UINT64PF - " bytes, should be at least " ULINTPF, - node->name, size_bytes, - FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE); - os_file_close(node->handle); - return(false); - } - - /* Read the first page of the tablespace */ - - buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); - /* Align the memory for file i/o if we might have O_DIRECT - set */ - page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - - success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE); - srv_stats.page0_read.add(1); - - const ulint space_id = fsp_header_get_space_id(page); - ulint flags = fsp_header_get_flags(page); - - /* Try to read crypt_data from page 0 if it is not yet - read. */ - if (!node->space->crypt_data) { - const ulint offset = fsp_header_get_crypt_offset( - fsp_flags_get_zip_size(flags)); - node->space->crypt_data = fil_space_read_crypt_data(space_id, page, offset); - } - - ut_free(buf2); - os_file_close(node->handle); - - if (!fsp_flags_is_valid(flags)) { - ulint cflags = fsp_flags_convert_from_101(flags); - if (cflags == ULINT_UNDEFINED) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Expected tablespace flags 0x%x" - " but found 0x%x in the file %s", - int(space->flags), int(flags), - node->name); - return(false); - } - - flags = cflags; - } - - if (UNIV_UNLIKELY(space_id != space->id)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "tablespace id is " ULINTPF " in the data dictionary" - " but in file %s it is " ULINTPF "!\n", - space->id, node->name, space_id); - return(false); - } - - if (ulint zip_size = fsp_flags_get_zip_size(flags)) { - node->size = ulint(size_bytes / zip_size); - } else { - node->size = ulint(size_bytes / UNIV_PAGE_SIZE); - } - -#ifdef UNIV_HOTBACKUP -add_size: -#endif /* UNIV_HOTBACKUP */ - space->size += node->size; - } - - ulint atomic_writes = fsp_flags_get_atomic_writes(space->flags); - - /* printf("Opening file %s\n", node->name); */ - - /* Open the file for reading and writing, in Windows normally in the - unbuffered async I/O mode, though global variables may make - os_file_create() to fall back to the normal file I/O mode. */ - - if (space->purpose == FIL_LOG) { - node->handle = os_file_create(innodb_file_log_key, - node->name, OS_FILE_OPEN, - OS_FILE_AIO, OS_LOG_FILE, - &ret, atomic_writes); - } else if (node->is_raw_disk) { - node->handle = os_file_create(innodb_file_data_key, - node->name, - OS_FILE_OPEN_RAW, - OS_FILE_AIO, OS_DATA_FILE, - &ret, atomic_writes); - } else { - node->handle = os_file_create(innodb_file_data_key, - node->name, OS_FILE_OPEN, - OS_FILE_AIO, OS_DATA_FILE, - &ret, atomic_writes); - } - - if (node->file_block_size == 0) { - node->file_block_size = os_file_get_block_size( - node->handle, node->name); - space->file_block_size = node->file_block_size; - } - - ut_a(ret); - - node->open = TRUE; - - system->n_open++; - fil_n_file_opened++; - - if (fil_space_belongs_in_lru(space)) { - - /* Put the node to the LRU list */ - UT_LIST_ADD_FIRST(LRU, system->LRU, node); - } - - return(true); -} - -/**********************************************************************//** -Closes a file. */ -static -void -fil_node_close_file( -/*================*/ - fil_node_t* node, /*!< in: file node */ - fil_system_t* system) /*!< in: tablespace memory cache */ -{ - ibool ret; - - ut_ad(node && system); - ut_ad(mutex_own(&(system->mutex))); - ut_a(node->open); - ut_a(node->n_pending == 0); - ut_a(node->n_pending_flushes == 0); - ut_a(!node->being_extended); -#ifndef UNIV_HOTBACKUP - ut_a(node->modification_counter == node->flush_counter - || srv_fast_shutdown == 2); -#endif /* !UNIV_HOTBACKUP */ - - ret = os_file_close(node->handle); - ut_a(ret); - - /* printf("Closing file %s\n", node->name); */ - - node->open = FALSE; - ut_a(system->n_open > 0); - system->n_open--; - fil_n_file_opened--; - - if (fil_space_belongs_in_lru(node->space)) { - - ut_a(UT_LIST_GET_LEN(system->LRU) > 0); - - /* The node is in the LRU list, remove it */ - UT_LIST_REMOVE(LRU, system->LRU, node); - } -} - -/********************************************************************//** -Tries to close a file in the LRU list. The caller must hold the fil_sys -mutex. -@return TRUE if success, FALSE if should retry later; since i/o's -generally complete in < 100 ms, and as InnoDB writes at most 128 pages -from the buffer pool in a batch, and then immediately flushes the -files, there is a good chance that the next time we find a suitable -node from the LRU list */ -static -ibool -fil_try_to_close_file_in_LRU( -/*=========================*/ - ibool print_info) /*!< in: if TRUE, prints information why it - cannot close a file */ -{ - fil_node_t* node; - - ut_ad(mutex_own(&fil_system->mutex)); - - if (print_info) { - fprintf(stderr, - "InnoDB: fil_sys open file LRU len %lu\n", - (ulong) UT_LIST_GET_LEN(fil_system->LRU)); - } - - for (node = UT_LIST_GET_LAST(fil_system->LRU); - node != NULL; - node = UT_LIST_GET_PREV(LRU, node)) { - - if (node->modification_counter == node->flush_counter - && node->n_pending_flushes == 0 - && !node->being_extended) { - - fil_node_close_file(node, fil_system); - - return(TRUE); - } - - if (!print_info) { - continue; - } - - if (node->n_pending_flushes > 0) { - fputs("InnoDB: cannot close file ", stderr); - ut_print_filename(stderr, node->name); - fprintf(stderr, ", because n_pending_flushes %lu\n", - (ulong) node->n_pending_flushes); - } - - if (node->modification_counter != node->flush_counter) { - fputs("InnoDB: cannot close file ", stderr); - ut_print_filename(stderr, node->name); - fprintf(stderr, - ", because mod_count %ld != fl_count %ld\n", - (long) node->modification_counter, - (long) node->flush_counter); - - } - - if (node->being_extended) { - fputs("InnoDB: cannot close file ", stderr); - ut_print_filename(stderr, node->name); - fprintf(stderr, ", because it is being extended\n"); - } - } - - return(FALSE); -} - -/** Flush any writes cached by the file system. -@param[in,out] space tablespace */ -static -void -fil_flush_low(fil_space_t* space) -{ - ut_ad(mutex_own(&fil_system->mutex)); - ut_ad(space); - ut_ad(!space->stop_new_ops); - - if (fil_buffering_disabled(space)) { - - /* No need to flush. User has explicitly disabled - buffering. */ - ut_ad(!space->is_in_unflushed_spaces); - ut_ad(fil_space_is_flushed(space)); - ut_ad(space->n_pending_flushes == 0); - -#ifdef UNIV_DEBUG - for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - ut_ad(node->modification_counter - == node->flush_counter); - ut_ad(node->n_pending_flushes == 0); - } -#endif /* UNIV_DEBUG */ - - return; - } - - /* Prevent dropping of the space while we are flushing */ - space->n_pending_flushes++; - - for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - ib_int64_t old_mod_counter = node->modification_counter; - - if (old_mod_counter <= node->flush_counter) { - continue; - } - - ut_a(node->open); - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes++; - } else { - fil_n_pending_log_flushes++; - fil_n_log_flushes++; - } -#ifdef __WIN__ - if (node->is_raw_disk) { - - goto skip_flush; - } -#endif /* __WIN__ */ -retry: - if (node->n_pending_flushes > 0) { - /* We want to avoid calling os_file_flush() on - the file twice at the same time, because we do - not know what bugs OS's may contain in file - i/o */ - - ib_int64_t sig_count = - os_event_reset(node->sync_event); - - mutex_exit(&fil_system->mutex); - - os_event_wait_low(node->sync_event, sig_count); - - mutex_enter(&fil_system->mutex); - - if (node->flush_counter >= old_mod_counter) { - - goto skip_flush; - } - - goto retry; - } - - ut_a(node->open); - node->n_pending_flushes++; - - mutex_exit(&fil_system->mutex); - - os_file_flush(node->handle); - - mutex_enter(&fil_system->mutex); - - os_event_set(node->sync_event); - - node->n_pending_flushes--; -skip_flush: - if (node->flush_counter < old_mod_counter) { - node->flush_counter = old_mod_counter; - - if (space->is_in_unflushed_spaces - && fil_space_is_flushed(space)) { - - space->is_in_unflushed_spaces = false; - - UT_LIST_REMOVE( - unflushed_spaces, - fil_system->unflushed_spaces, - space); - } - } - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes--; - } else { - fil_n_pending_log_flushes--; - } - } - - space->n_pending_flushes--; -} - -/** Try to extend a tablespace. -@param[in,out] space tablespace to be extended -@param[in,out] node last file of the tablespace -@param[in] size desired size in number of pages -@param[out] success whether the operation succeeded -@return whether the operation should be retried */ -static UNIV_COLD __attribute__((warn_unused_result, nonnull)) -bool -fil_space_extend_must_retry( - fil_space_t* space, - fil_node_t* node, - ulint size, - ibool* success) -{ - ut_ad(mutex_own(&fil_system->mutex)); - ut_ad(UT_LIST_GET_LAST(space->chain) == node); - ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE); - - *success = space->size >= size; - - if (*success) { - /* Space already big enough */ - return(false); - } - - if (node->being_extended) { - /* Another thread is currently extending the file. Wait - for it to finish. - It'd have been better to use event driven mechanism but - the entire module is peppered with polling stuff. */ - mutex_exit(&fil_system->mutex); - os_thread_sleep(100000); - return(true); - } - - node->being_extended = true; - - if (!fil_node_prepare_for_io(node, fil_system, space)) { - /* The tablespace data file, such as .ibd file, is missing */ - node->being_extended = false; - return(false); - } - - /* At this point it is safe to release fil_system mutex. No - other thread can rename, delete or close the file because - we have set the node->being_extended flag. */ - mutex_exit(&fil_system->mutex); - - ulint start_page_no = space->size; - const ulint file_start_page_no = start_page_no - node->size; - - /* Determine correct file block size */ - if (node->file_block_size == 0) { - node->file_block_size = os_file_get_block_size( - node->handle, node->name); - space->file_block_size = node->file_block_size; - } - - ulint page_size = fsp_flags_get_zip_size(space->flags); - if (!page_size) { - page_size = UNIV_PAGE_SIZE; - } - -#ifdef _WIN32 - const ulint io_completion_type = OS_FILE_READ; - /* Logically or physically extend the file with zero bytes, - depending on whether it is sparse. */ - - /* FIXME: Call DeviceIoControl(node->handle, FSCTL_SET_SPARSE, ...) - when opening a file when FSP_FLAGS_HAS_PAGE_COMPRESSION(). */ - { - FILE_END_OF_FILE_INFO feof; - /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. - fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. - Do not shrink short ROW_FORMAT=COMPRESSED files. */ - feof.EndOfFile.QuadPart = std::max( - os_offset_t(size - file_start_page_no) * page_size, - os_offset_t(FIL_IBD_FILE_INITIAL_SIZE - * UNIV_PAGE_SIZE)); - *success = SetFileInformationByHandle(node->handle, - FileEndOfFileInfo, - &feof, sizeof feof); - if (!*success) { - ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s" - " from " INT64PF - " to " INT64PF " bytes failed with %u", - node->name, - os_offset_t(node->size) * page_size, - feof.EndOfFile.QuadPart, GetLastError()); - } else { - start_page_no = size; - } - } -#else - /* We will logically extend the file with ftruncate() if - page_compression is enabled, because the file is expected to - be sparse in that case. Make sure that ftruncate() can deal - with large files. */ - const bool is_sparse = sizeof(off_t) >= 8 - && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags); - -# ifdef HAVE_POSIX_FALLOCATE - /* We must complete the I/O request after invoking - posix_fallocate() to avoid an assertion failure at shutdown. - Because no actual writes were dispatched, a read operation - will suffice. */ - const ulint io_completion_type = srv_use_posix_fallocate - || is_sparse ? OS_FILE_READ : OS_FILE_WRITE; - - if (srv_use_posix_fallocate && !is_sparse) { - const os_offset_t start_offset - = os_offset_t(start_page_no - file_start_page_no) - * page_size; - const ulint n_pages = size - start_page_no; - const os_offset_t len = os_offset_t(n_pages) * page_size; - - int err; - do { - err = posix_fallocate(node->handle, start_offset, len); - } while (err == EINTR - && srv_shutdown_state == SRV_SHUTDOWN_NONE); - - *success = !err; - if (!*success) { - ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s" - " from " INT64PF " to " INT64PF " bytes" - " failed with error %d", - node->name, start_offset, len + start_offset, - err); - } - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - *success = FALSE; - os_has_said_disk_full = TRUE;); - - if (*success) { - os_has_said_disk_full = FALSE; - start_page_no = size; - } - } else -# else - const ulint io_completion_type = is_sparse - ? OS_FILE_READ : OS_FILE_WRITE; -# endif - if (is_sparse) { - /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. - fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. - Do not shrink short ROW_FORMAT=COMPRESSED files. */ - off_t s = std::max(off_t(size - file_start_page_no) - * off_t(page_size), - off_t(FIL_IBD_FILE_INITIAL_SIZE - * UNIV_PAGE_SIZE)); - *success = !ftruncate(node->handle, s); - if (!*success) { - ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s" - " from " INT64PF " to " INT64PF " bytes" - " failed with error %d", - node->name, - os_offset_t(start_page_no - file_start_page_no) - * page_size, os_offset_t(s), errno); - } else { - start_page_no = size; - } - } else { - /* Extend at most 64 pages at a time */ - ulint buf_size = ut_min(64, size - start_page_no) - * page_size; - byte* buf2 = static_cast<byte*>( - calloc(1, buf_size + page_size)); - *success = buf2 != NULL; - if (!buf2) { - ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF - " bytes to extend file", - buf_size + page_size); - } - byte* const buf = static_cast<byte*>( - ut_align(buf2, page_size)); - - while (*success && start_page_no < size) { - ulint n_pages - = ut_min(buf_size / page_size, - size - start_page_no); - - os_offset_t offset = static_cast<os_offset_t>( - start_page_no - file_start_page_no) - * page_size; - - *success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC, - node->name, node->handle, buf, - offset, page_size * n_pages, - page_size, node, NULL, - space->id, NULL, 0); - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - *success = FALSE; - os_has_said_disk_full = TRUE;); - - if (*success) { - os_has_said_disk_full = FALSE; - } - /* Let us measure the size of the file - to determine how much we were able to - extend it */ - os_offset_t fsize = os_file_get_size(node->handle); - ut_a(fsize != os_offset_t(-1)); - - start_page_no = ulint(fsize / page_size) - + file_start_page_no; - } - - free(buf2); - } -#endif - mutex_enter(&fil_system->mutex); - - ut_a(node->being_extended); - ut_a(start_page_no - file_start_page_no >= node->size); - - ulint file_size = start_page_no - file_start_page_no; - space->size += file_size - node->size; - node->size = file_size; - - fil_node_complete_io(node, fil_system, io_completion_type); - - node->being_extended = FALSE; - - if (space->id == 0) { - ulint pages_per_mb = (1024 * 1024) / page_size; - - /* Keep the last data file size info up to date, rounded to - full megabytes */ - - srv_data_file_sizes[srv_n_data_files - 1] - = (node->size / pages_per_mb) * pages_per_mb; - } - - fil_flush_low(space); - return(false); -} - -/*******************************************************************//** -Reserves the fil_system mutex and tries to make sure we can open at least one -file while holding it. This should be called before calling -fil_node_prepare_for_io(), because that function may need to open a file. */ -static -void -fil_mutex_enter_and_prepare_for_io( -/*===============================*/ - ulint space_id) /*!< in: space id */ -{ - fil_space_t* space; - ulint count = 0; - ulint count2 = 0; - -retry: - mutex_enter(&fil_system->mutex); - - if (space_id >= SRV_LOG_SPACE_FIRST_ID) { - /* We keep log files always open. */ - return; - } - - space = fil_space_get_by_id(space_id); - - if (space == NULL) { - return; - } - - if (space->stop_ios) { - ut_ad(space->id != 0); - /* We are going to do a rename file and want to stop new i/o's - for a while */ - - if (count2 > 20000) { - fputs("InnoDB: Warning: tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, - " has i/o ops stopped for a long time %lu\n", - (ulong) count2); - } - - mutex_exit(&fil_system->mutex); - -#ifndef UNIV_HOTBACKUP - - /* Wake the i/o-handler threads to make sure pending - i/o's are performed */ - os_aio_simulated_wake_handler_threads(); - - /* The sleep here is just to give IO helper threads a - bit of time to do some work. It is not required that - all IO related to the tablespace being renamed must - be flushed here as we do fil_flush() in - fil_rename_tablespace() as well. */ - os_thread_sleep(20000); - -#endif /* UNIV_HOTBACKUP */ - - /* Flush tablespaces so that we can close modified - files in the LRU list */ - fil_flush_file_spaces(FIL_TABLESPACE); - - os_thread_sleep(20000); - - count2++; - - goto retry; - } - - fil_node_t* node = UT_LIST_GET_LAST(space->chain); - - ut_ad(space->id == 0 || node == UT_LIST_GET_FIRST(space->chain)); - - if (space->id == 0) { - /* We keep the system tablespace files always open; - this is important in preventing deadlocks in this module, as - a page read completion often performs another read from the - insert buffer. The insert buffer is in tablespace 0, and we - cannot end up waiting in this function. */ - } else if (!node || node->open) { - /* If the file is already open, no need to do - anything; if the space does not exist, we handle the - situation in the function which called this - function */ - } else { - /* Too many files are open, try to close some */ - while (fil_system->n_open >= fil_system->max_n_open) { - if (fil_try_to_close_file_in_LRU(count > 1)) { - /* No problem */ - } else if (count >= 2) { - ib_logf(IB_LOG_LEVEL_WARN, - "innodb_open_files=%lu is exceeded" - " (%lu files stay open)", - fil_system->max_n_open, - fil_system->n_open); - break; - } else { - mutex_exit(&fil_system->mutex); - - /* Wake the i/o-handler threads to - make sure pending i/o's are - performed */ - os_aio_simulated_wake_handler_threads(); - os_thread_sleep(20000); - - /* Flush tablespaces so that we can - close modified files in the LRU list */ - fil_flush_file_spaces(FIL_TABLESPACE); - - count++; - goto retry; - } - } - } - - if (ulint size = UNIV_UNLIKELY(space->recv_size)) { - ut_ad(node); - ibool success; - if (fil_space_extend_must_retry(space, node, size, &success)) { - goto retry; - } - - ut_ad(mutex_own(&fil_system->mutex)); - /* Crash recovery requires the file extension to succeed. */ - ut_a(success); - /* InnoDB data files cannot shrink. */ - ut_a(space->size >= size); - - /* There could be multiple concurrent I/O requests for - this tablespace (multiple threads trying to extend - this tablespace). - - Also, fil_space_set_recv_size() may have been invoked - again during the file extension while fil_system->mutex - was not being held by us. - - Only if space->recv_size matches what we read originally, - reset the field. In this way, a subsequent I/O request - will handle any pending fil_space_set_recv_size(). */ - - if (size == space->recv_size) { - space->recv_size = 0; - } - } -} - -/** Prepare a data file object for freeing. -@param[in,out] space tablespace -@param[in,out] node data file */ -static -void -fil_node_free_part1(fil_space_t* space, fil_node_t* node) -{ - ut_ad(mutex_own(&fil_system->mutex)); - ut_a(node->magic_n == FIL_NODE_MAGIC_N); - ut_a(node->n_pending == 0); - ut_a(!node->being_extended); - - if (node->open) { - /* We fool the assertion in fil_node_close_file() to think - there are no unflushed modifications in the file */ - - node->modification_counter = node->flush_counter; - os_event_set(node->sync_event); - - if (fil_buffering_disabled(space)) { - - ut_ad(!space->is_in_unflushed_spaces); - ut_ad(fil_space_is_flushed(space)); - - } else if (space->is_in_unflushed_spaces - && fil_space_is_flushed(space)) { - - space->is_in_unflushed_spaces = false; - - UT_LIST_REMOVE(unflushed_spaces, - fil_system->unflushed_spaces, - space); - } - - fil_node_close_file(node, fil_system); - } -} - -/** Free a data file object. -@param[in,out] space tablespace -@param[in] node data file */ -static -void -fil_node_free_part2(fil_space_t* space, fil_node_t* node) -{ - ut_ad(!node->open); - - space->size -= node->size; - - UT_LIST_REMOVE(chain, space->chain, node); - - os_event_free(node->sync_event); - mem_free(node->name); - mem_free(node); -} - -#ifdef UNIV_LOG_ARCHIVE -/****************************************************************//** -Drops files from the start of a file space, so that its size is cut by -the amount given. */ -UNIV_INTERN -void -fil_space_truncate_start( -/*=====================*/ - ulint id, /*!< in: space id */ - ulint trunc_len) /*!< in: truncate by this much; it is an error - if this does not equal to the combined size of - some initial files in the space */ -{ - fil_node_t* node; - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - ut_a(space); - - while (trunc_len > 0) { - node = UT_LIST_GET_FIRST(space->chain); - - ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len); - - trunc_len -= node->size * UNIV_PAGE_SIZE; - - fil_node_free_part1(space, node); - fil_node_free_part2(space, node); - } - - mutex_exit(&fil_system->mutex); -} - -/****************************************************************//** -Check is there node in file space with given name. */ -UNIV_INTERN -ibool -fil_space_contains_node( -/*====================*/ - ulint id, /*!< in: space id */ - char* node_name) /*!< in: node name */ -{ - fil_node_t* node; - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - ut_a(space); - - for (node = UT_LIST_GET_FIRST(space->chain); node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - if (ut_strcmp(node->name, node_name) == 0) { - mutex_exit(&fil_system->mutex); - return(TRUE); - } - - } - - mutex_exit(&fil_system->mutex); - return(FALSE); -} - -#endif /* UNIV_LOG_ARCHIVE */ - -/*******************************************************************//** -Creates a space memory object and puts it to the 'fil system' hash table. -If there is an error, prints an error message to the .err log. -@param[in] name Space name -@param[in] id Space id -@param[in] flags Tablespace flags -@param[in] purpose FIL_TABLESPACE or FIL_LOG if log -@param[in] crypt_data Encryption information -@param[in] create_table True if this is create table -@param[in] mode Encryption mode -@return TRUE if success */ -UNIV_INTERN -bool -fil_space_create( - const char* name, - ulint id, - ulint flags, - ulint purpose, - fil_space_crypt_t* crypt_data, - bool create_table, - fil_encryption_t mode) -{ - fil_space_t* space; - - DBUG_EXECUTE_IF("fil_space_create_failure", return(false);); - - ut_a(fil_system); - - /* Look for a matching tablespace and if found free it. */ - do { - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_name(name); - - if (space != 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "Tablespace '%s' exists in the cache " - "with id %lu != %lu", - name, (ulong) space->id, (ulong) id); - - if (id == 0 || purpose != FIL_TABLESPACE) { - - mutex_exit(&fil_system->mutex); - - return(false); - } - - ib_logf(IB_LOG_LEVEL_WARN, - "Freeing existing tablespace '%s' entry " - "from the cache with id %lu", - name, (ulong) id); - - bool success = fil_space_free_and_mutex_exit( - space->id, false); - ut_a(success); - } - - } while (space != 0); - - space = fil_space_get_by_id(id); - - if (space != 0) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Trying to add tablespace '%s' with id %lu " - "to the tablespace memory cache, but tablespace '%s' " - "with id %lu already exists in the cache!", - name, (ulong) id, space->name, (ulong) space->id); - - mutex_exit(&fil_system->mutex); - - return(false); - } - - space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space))); - - space->name = mem_strdup(name); - space->id = id; - - fil_system->tablespace_version++; - space->tablespace_version = fil_system->tablespace_version; - - if (purpose == FIL_TABLESPACE && !recv_recovery_on - && id > fil_system->max_assigned_id) { - - if (!fil_system->space_id_reuse_warned) { - fil_system->space_id_reuse_warned = TRUE; - if (!IS_XTRABACKUP()) { - ib_logf(IB_LOG_LEVEL_WARN, - "Allocated tablespace %lu, old maximum " - "was %lu", - (ulong)id, - (ulong)fil_system->max_assigned_id); - } - } - - fil_system->max_assigned_id = id; - } - - space->purpose = purpose; - space->flags = flags; - - space->magic_n = FIL_SPACE_MAGIC_N; - space->crypt_data = crypt_data; - - rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); - - HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space); - - HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash, - ut_fold_string(name), space); - - UT_LIST_ADD_LAST(space_list, fil_system->space_list, space); - - /* Inform key rotation that there could be something - to do */ - if (purpose == FIL_TABLESPACE && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event && - (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || - srv_encrypt_tables)) { - /* Key rotation is not enabled, need to inform background - encryption threads. */ - UT_LIST_ADD_LAST(rotation_list, fil_system->rotation_list, space); - space->is_in_rotation_list = true; - mutex_exit(&fil_system->mutex); - mutex_enter(&fil_crypt_threads_mutex); - os_event_set(fil_crypt_threads_event); - mutex_exit(&fil_crypt_threads_mutex); - } else { - mutex_exit(&fil_system->mutex); - } - - return(true); -} - -/*******************************************************************//** -Assigns a new space id for a new single-table tablespace. This works simply by -incrementing the global counter. If 4 billion id's is not enough, we may need -to recycle id's. -@return TRUE if assigned, FALSE if not */ -UNIV_INTERN -ibool -fil_assign_new_space_id( -/*====================*/ - ulint* space_id) /*!< in/out: space id */ -{ - ulint id; - ibool success; - - mutex_enter(&fil_system->mutex); - - id = *space_id; - - if (id < fil_system->max_assigned_id) { - id = fil_system->max_assigned_id; - } - - id++; - - if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: Warning: you are running out of new" - " single-table tablespace id's.\n" - "InnoDB: Current counter is %lu and it" - " must not exceed %lu!\n" - "InnoDB: To reset the counter to zero" - " you have to dump all your tables and\n" - "InnoDB: recreate the whole InnoDB installation.\n", - (ulong) id, - (ulong) SRV_LOG_SPACE_FIRST_ID); - } - - success = (id < SRV_LOG_SPACE_FIRST_ID); - - if (success) { - *space_id = fil_system->max_assigned_id = id; - } else { - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: You have run out of single-table" - " tablespace id's!\n" - "InnoDB: Current counter is %lu.\n" - "InnoDB: To reset the counter to zero you" - " have to dump all your tables and\n" - "InnoDB: recreate the whole InnoDB installation.\n", - (ulong) id); - *space_id = ULINT_UNDEFINED; - } - - mutex_exit(&fil_system->mutex); - - return(success); -} - -/** Free a space object from the tablespace memory cache. Close the files in -the chain but do not delete them. There must not be any pending i/o's or -flushes on the files. -The fil_system->mutex will be released. -@param[in] id tablespace ID -@param[in] x_latched whether the caller holds exclusive space->latch -@return whether the tablespace existed */ -static -bool -fil_space_free_and_mutex_exit(ulint id, bool x_latched) -{ - fil_space_t* space; - fil_space_t* fnamespace; - - ut_ad(mutex_own(&fil_system->mutex)); - - space = fil_space_get_by_id(id); - - if (!space) { - ib_logf(IB_LOG_LEVEL_ERROR, - "trying to remove non-existing tablespace " ULINTPF, - id); - mutex_exit(&fil_system->mutex); - return(false); - } - - HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space); - - fnamespace = fil_space_get_by_name(space->name); - ut_a(fnamespace); - ut_a(space == fnamespace); - - HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash, - ut_fold_string(space->name), space); - - if (space->is_in_unflushed_spaces) { - - ut_ad(!fil_buffering_disabled(space)); - space->is_in_unflushed_spaces = false; - - UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces, - space); - } - - if (space->is_in_rotation_list) { - space->is_in_rotation_list = false; - ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0); - UT_LIST_REMOVE(rotation_list, fil_system->rotation_list, space); - } - - UT_LIST_REMOVE(space_list, fil_system->space_list, space); - - ut_a(space->magic_n == FIL_SPACE_MAGIC_N); - ut_a(0 == space->n_pending_flushes); - - for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - fil_node_free_part1(space, node); - } - - mutex_exit(&fil_system->mutex); - - /* Wait for fil_space_release_for_io(); after - fil_space_detach(), the tablespace cannot be found, so - fil_space_acquire_for_io() would return NULL */ - while (space->n_pending_ios) { - os_thread_sleep(100); - } - - for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain); - fil_node != NULL; - fil_node = UT_LIST_GET_FIRST(space->chain)) { - fil_node_free_part2(space, fil_node); - } - - ut_a(0 == UT_LIST_GET_LEN(space->chain)); - - if (x_latched) { - rw_lock_x_unlock(&space->latch); - } - - rw_lock_free(&(space->latch)); - - fil_space_destroy_crypt_data(&(space->crypt_data)); - - mem_free(space->name); - mem_free(space); - - return(TRUE); -} - -/*******************************************************************//** -Returns a pointer to the file_space_t that is in the memory cache -associated with a space id. -@return file_space_t pointer, NULL if space not found */ -fil_space_t* -fil_space_get( -/*==========*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - mutex_exit(&fil_system->mutex); - - return (space); -} - -/*******************************************************************//** -Returns a pointer to the file_space_t that is in the memory cache -associated with a space id. The caller must lock fil_system->mutex. -@return file_space_t pointer, NULL if space not found */ -UNIV_INLINE -fil_space_t* -fil_space_get_space( -/*================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - fil_node_t* node; - - ut_ad(fil_system); - - space = fil_space_get_by_id(id); - if (space == NULL) { - return(NULL); - } - - if (space->size == 0 && space->purpose == FIL_TABLESPACE) { - ut_a(id != 0); - - mutex_exit(&fil_system->mutex); - - /* It is possible that the space gets evicted at this point - before the fil_mutex_enter_and_prepare_for_io() acquires - the fil_system->mutex. Check for this after completing the - call to fil_mutex_enter_and_prepare_for_io(). */ - fil_mutex_enter_and_prepare_for_io(id); - - /* We are still holding the fil_system->mutex. Check if - the space is still in memory cache. */ - space = fil_space_get_by_id(id); - if (space == NULL) { - return(NULL); - } - - /* The following code must change when InnoDB supports - multiple datafiles per tablespace. Note that there is small - change that space is found from tablespace list but - we have not yet created node for it and as we hold - fil_system mutex here fil_node_create can't continue. */ - ut_a(UT_LIST_GET_LEN(space->chain) == 1 || UT_LIST_GET_LEN(space->chain) == 0); - - node = UT_LIST_GET_FIRST(space->chain); - - if (node) { - /* It must be a single-table tablespace and we have not opened - the file yet; the following calls will open it and update the - size fields */ - - if (!fil_node_prepare_for_io(node, fil_system, space)) { - /* The single-table tablespace can't be opened, - because the ibd file is missing. */ - return(NULL); - } - fil_node_complete_io(node, fil_system, OS_FILE_READ); - } - } - - return(space); -} - -/*******************************************************************//** -Returns the path from the first fil_node_t found for the space ID sent. -The caller is responsible for freeing the memory allocated here for the -value returned. -@return own: A copy of fil_node_t::path, NULL if space ID is zero -or not found. */ -UNIV_INTERN -char* -fil_space_get_first_path( -/*=====================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - fil_node_t* node; - char* path; - - ut_ad(fil_system); - ut_a(id); - - fil_mutex_enter_and_prepare_for_io(id); - - space = fil_space_get_space(id); - - if (space == NULL) { - mutex_exit(&fil_system->mutex); - - return(NULL); - } - - ut_ad(mutex_own(&fil_system->mutex)); - - node = UT_LIST_GET_FIRST(space->chain); - - path = mem_strdup(node->name); - - mutex_exit(&fil_system->mutex); - - return(path); -} - -/** Set the recovered size of a tablespace in pages. -@param id tablespace ID -@param size recovered size in pages */ -UNIV_INTERN -void -fil_space_set_recv_size(ulint id, ulint size) -{ - mutex_enter(&fil_system->mutex); - ut_ad(size); - ut_ad(id < SRV_LOG_SPACE_FIRST_ID); - - if (fil_space_t* space = fil_space_get_space(id)) { - space->recv_size = size; - } - - mutex_exit(&fil_system->mutex); -} - -/*******************************************************************//** -Returns the size of the space in pages. The tablespace must be cached in the -memory cache. -@return space size, 0 if space not found */ -UNIV_INTERN -ulint -fil_space_get_size( -/*===============*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - ulint size; - - ut_ad(fil_system); - mutex_enter(&fil_system->mutex); - - space = fil_space_get_space(id); - - size = space ? space->size : 0; - - mutex_exit(&fil_system->mutex); - - return(size); -} - -/*******************************************************************//** -Returns the flags of the space. The tablespace must be cached -in the memory cache. -@return flags, ULINT_UNDEFINED if space not found */ -UNIV_INTERN -ulint -fil_space_get_flags( -/*================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - ulint flags; - - ut_ad(fil_system); - - if (!id) { - return(0); - } - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_space(id); - - if (space == NULL) { - mutex_exit(&fil_system->mutex); - - return(ULINT_UNDEFINED); - } - - flags = space->flags; - - mutex_exit(&fil_system->mutex); - - return(flags); -} - -/*******************************************************************//** -Returns the compressed page size of the space, or 0 if the space -is not compressed. The tablespace must be cached in the memory cache. -@return compressed page size, ULINT_UNDEFINED if space not found */ -UNIV_INTERN -ulint -fil_space_get_zip_size( -/*===================*/ - ulint id) /*!< in: space id */ -{ - ulint flags; - - flags = fil_space_get_flags(id); - - if (flags && flags != ULINT_UNDEFINED) { - - return(fsp_flags_get_zip_size(flags)); - } - - return(flags); -} - -/*******************************************************************//** -Checks if the pair space, page_no refers to an existing page in a tablespace -file space. The tablespace must be cached in the memory cache. -@return TRUE if the address is meaningful */ -UNIV_INTERN -ibool -fil_check_adress_in_tablespace( -/*===========================*/ - ulint id, /*!< in: space id */ - ulint page_no)/*!< in: page number */ -{ - if (fil_space_get_size(id) > page_no) { - - return(TRUE); - } - - return(FALSE); -} - -/****************************************************************//** -Initializes the tablespace memory cache. */ -UNIV_INTERN -void -fil_init( -/*=====*/ - ulint hash_size, /*!< in: hash table size */ - ulint max_n_open) /*!< in: max number of open files */ -{ - ut_a(fil_system == NULL); - - ut_a(hash_size > 0); - ut_a(max_n_open > 0); - - fil_system = static_cast<fil_system_t*>( - mem_zalloc(sizeof(fil_system_t))); - - mutex_create(fil_system_mutex_key, - &fil_system->mutex, SYNC_ANY_LATCH); - - fil_system->spaces = hash_create(hash_size); - fil_system->name_hash = hash_create(hash_size); - - fil_system->max_n_open = max_n_open; - - fil_space_crypt_init(); -} - -/*******************************************************************//** -Opens all log files and system tablespace data files. They stay open until the -database server shutdown. This should be called at a server startup after the -space objects for the log and the system tablespace have been created. The -purpose of this operation is to make sure we never run out of file descriptors -if we need to read from the insert buffer or to write to the log. */ -UNIV_INTERN -void -fil_open_log_and_system_tablespace_files(void) -/*==========================================*/ -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - for (space = UT_LIST_GET_FIRST(fil_system->space_list); - space != NULL; - space = UT_LIST_GET_NEXT(space_list, space)) { - - fil_node_t* node; - - if (fil_space_belongs_in_lru(space)) { - - continue; - } - - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - if (!node->open) { - if (!fil_node_open_file(node, fil_system, - space)) { - /* This func is called during server's - startup. If some file of log or system - tablespace is missing, the server - can't start successfully. So we should - assert for it. */ - ut_a(0); - } - } - - if (fil_system->max_n_open < 10 + fil_system->n_open) { - - fprintf(stderr, - "InnoDB: Warning: you must" - " raise the value of" - " innodb_open_files in\n" - "InnoDB: my.cnf! Remember that" - " InnoDB keeps all log files" - " and all system\n" - "InnoDB: tablespace files open" - " for the whole time mysqld is" - " running, and\n" - "InnoDB: needs to open also" - " some .ibd files if the" - " file-per-table storage\n" - "InnoDB: model is used." - " Current open files %lu," - " max allowed" - " open files %lu.\n", - (ulong) fil_system->n_open, - (ulong) fil_system->max_n_open); - } - } - } - - mutex_exit(&fil_system->mutex); -} - -/*******************************************************************//** -Closes all open files. There must not be any pending i/o's or not flushed -modifications in the files. */ -UNIV_INTERN -void -fil_close_all_files(void) -/*=====================*/ -{ - fil_space_t* space; - - // Must check both flags as it's possible for this to be called during - // server startup with srv_track_changed_pages == true but - // srv_redo_log_thread_started == false - if (srv_track_changed_pages && srv_redo_log_thread_started) - os_event_wait(srv_redo_log_tracked_event); - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space != NULL) { - fil_node_t* node; - fil_space_t* prev_space = space; - - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - if (node->open) { - fil_node_close_file(node, fil_system); - } - } - - space = UT_LIST_GET_NEXT(space_list, space); - - /* This is executed during shutdown. No other thread - can create or remove tablespaces while we are not - holding fil_system->mutex. */ - fil_space_free_and_mutex_exit(prev_space->id, false); - mutex_enter(&fil_system->mutex); - } - - mutex_exit(&fil_system->mutex); -} - -/*******************************************************************//** -Closes the redo log files. There must not be any pending i/o's or not -flushed modifications in the files. */ -UNIV_INTERN -void -fil_close_log_files( -/*================*/ - bool free) /*!< in: whether to free the memory object */ -{ - fil_space_t* space; - - // Must check both flags as it's possible for this to be called during - // server startup with srv_track_changed_pages == true but - // srv_redo_log_thread_started == false - if (srv_track_changed_pages && srv_redo_log_thread_started) - os_event_wait(srv_redo_log_tracked_event); - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space != NULL) { - fil_node_t* node; - fil_space_t* prev_space = space; - - if (space->purpose != FIL_LOG) { - space = UT_LIST_GET_NEXT(space_list, space); - continue; - } - - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - if (node->open) { - fil_node_close_file(node, fil_system); - } - } - - space = UT_LIST_GET_NEXT(space_list, space); - - if (free) { - /* This is executed during startup. No other thread - can create or remove tablespaces while we are not - holding fil_system->mutex. */ - fil_space_free_and_mutex_exit(prev_space->id, false); - mutex_enter(&fil_system->mutex); - } - } - - mutex_exit(&fil_system->mutex); -} - -/*******************************************************************//** -Sets the max tablespace id counter if the given number is bigger than the -previous value. */ -UNIV_INTERN -void -fil_set_max_space_id_if_bigger( -/*===========================*/ - ulint max_id) /*!< in: maximum known id */ -{ - if (max_id >= SRV_LOG_SPACE_FIRST_ID) { - fprintf(stderr, - "InnoDB: Fatal error: max tablespace id" - " is too high, %lu\n", (ulong) max_id); - ut_error; - } - - mutex_enter(&fil_system->mutex); - - if (fil_system->max_assigned_id < max_id) { - - fil_system->max_assigned_id = max_id; - } - - mutex_exit(&fil_system->mutex); -} - -/** Write the flushed LSN to the page header of the first page in the -system tablespace. -@param[in] lsn flushed LSN -@return DB_SUCCESS or error number */ -dberr_t -fil_write_flushed_lsn( - lsn_t lsn) -{ - byte* buf1; - byte* buf; - dberr_t err; - - buf1 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); - buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE)); - - /* Acquire system tablespace */ - fil_space_t* space = fil_space_acquire(0); - - /* If tablespace is not encrypted, stamp flush_lsn to - first page of all system tablespace datafiles to avoid - unnecessary error messages on possible downgrade. */ - if (space->crypt_data->min_key_version == 0) { - fil_node_t* node; - ulint sum_of_sizes = 0; - - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - - err = fil_read(TRUE, 0, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL, 0); - - if (err == DB_SUCCESS) { - mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - lsn); - - err = fil_write(TRUE, 0, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL, 0); - - sum_of_sizes += node->size; - } - } - } else { - /* When system tablespace is encrypted stamp flush_lsn to - only the first page of the first datafile (rest of pages - are encrypted). */ - err = fil_read(TRUE, 0, 0, 0, 0, - UNIV_PAGE_SIZE, buf, NULL, 0); - - if (err == DB_SUCCESS) { - mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - lsn); - - err = fil_write(TRUE, 0, 0, 0, 0, - UNIV_PAGE_SIZE, buf, NULL, 0); - } - } - - fil_flush_file_spaces(FIL_TABLESPACE); - fil_space_release(space); - - ut_free(buf1); - - return(err); -} - -/** Check the consistency of the first data page of a tablespace -at database startup. -@param[in] page page frame -@param[in] space_id tablespace identifier -@param[in] flags tablespace flags -@retval NULL on success, or if innodb_force_recovery is set -@return pointer to an error message string */ -static MY_ATTRIBUTE((warn_unused_result)) -const char* -fil_check_first_page(const page_t* page, ulint space_id, ulint flags) -{ - if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) { - return(NULL); - } - - if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { - fprintf(stderr, - "InnoDB: Error: Current page size %lu != " - " page size on page %lu\n", - UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags)); - - return("innodb-page-size mismatch"); - } - - if (!space_id && !flags) { - ulint nonzero_bytes = UNIV_PAGE_SIZE; - const byte* b = page; - - while (!*b && --nonzero_bytes) { - b++; - } - - if (!nonzero_bytes) { - return("space header page consists of zero bytes"); - } - } - - if (buf_page_is_corrupted( - false, page, fsp_flags_get_zip_size(flags), NULL)) { - return("checksum mismatch"); - } - - if (page_get_space_id(page) == space_id - && page_get_page_no(page) == 0) { - return(NULL); - } - - return("inconsistent data in space header"); -} - -/** Reads the flushed lsn, arch no, space_id and tablespace flag fields from -the first page of a first data file at database startup. -@param[in] data_file open data file -@param[in] one_read_only true if first datafile is already - read -@param[out] flags FSP_SPACE_FLAGS -@param[out] space_id tablepspace ID -@param[out] flushed_lsn flushed lsn value -@param[out] crypt_data encryption crypt data -@retval NULL on success, or if innodb_force_recovery is set -@return pointer to an error message string */ -UNIV_INTERN -const char* -fil_read_first_page( - pfs_os_file_t data_file, - ibool one_read_already, - ulint* flags, - ulint* space_id, - lsn_t* flushed_lsn, - fil_space_crypt_t** crypt_data) -{ - byte* buf; - byte* page; - const char* check_msg = NULL; - fil_space_crypt_t* cdata; - - if (IS_XTRABACKUP() && srv_backup_mode) { - /* Files smaller than page size may occur - in xtrabackup, when server creates new file - but has not yet written into it, or wrote only - partially. Checks size here, to avoid exit in os_file_read. - This file will be skipped by xtrabackup if it is too small. - */ - os_offset_t file_size; - file_size = os_file_get_size(data_file); - if (file_size < FIL_IBD_FILE_INITIAL_SIZE*UNIV_PAGE_SIZE) { - return "File size is less than minimum"; - } - } - - buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); - - /* Align the memory for a possible read from a raw device */ - - page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); - - os_file_read(data_file, page, 0, UNIV_PAGE_SIZE); - - srv_stats.page0_read.add(1); - - /* The FSP_HEADER on page 0 is only valid for the first file - in a tablespace. So if this is not the first datafile, leave - *flags and *space_id as they were read from the first file and - do not validate the first page. */ - if (!one_read_already) { - *space_id = fsp_header_get_space_id(page); - *flags = fsp_header_get_flags(page); - - if (flushed_lsn) { - *flushed_lsn = mach_read_from_8(page + - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - } - - if (!fsp_flags_is_valid(*flags)) { - ulint cflags = fsp_flags_convert_from_101(*flags); - if (cflags == ULINT_UNDEFINED) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Invalid flags 0x%x in tablespace %u", - unsigned(*flags), unsigned(*space_id)); - return "invalid tablespace flags"; - } else { - *flags = cflags; - } - } - - if (!(IS_XTRABACKUP() && srv_backup_mode)) { - check_msg = fil_check_first_page(page, *space_id, *flags); - } - - /* Possible encryption crypt data is also stored only to first page - of the first datafile. */ - - const ulint offset = fsp_header_get_crypt_offset( - fsp_flags_get_zip_size(*flags)); - - cdata = fil_space_read_crypt_data(*space_id, page, offset); - - if (crypt_data) { - *crypt_data = cdata; - } - - /* If file space is encrypted we need to have at least some - encryption service available where to get keys */ - if (cdata && cdata->should_encrypt()) { - - if (!encryption_key_id_exists(cdata->key_id)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespace id " ULINTPF - " is encrypted but encryption service" - " or used key_id %u is not available. " - "Can't continue opening tablespace.", - *space_id, cdata->key_id); - - return ("table encrypted but encryption service not available."); - } - } - } - - ut_free(buf); - - if (check_msg) { - return(check_msg); - } - - return(NULL); -} - -/*================ SINGLE-TABLE TABLESPACES ==========================*/ - -/********************************************************//** -Creates the database directory for a table if it does not exist yet. */ -static -void -fil_create_directory_for_tablename( -/*===============================*/ - const char* name) /*!< in: name in the standard - 'databasename/tablename' format */ -{ - const char* namend; - char* path; - ulint len; - - len = strlen(fil_path_to_mysql_datadir); - namend = strchr(name, '/'); - ut_a(namend); - path = static_cast<char*>(mem_alloc(len + (namend - name) + 2)); - - memcpy(path, fil_path_to_mysql_datadir, len); - path[len] = '/'; - memcpy(path + len + 1, name, namend - name); - path[len + (namend - name) + 1] = 0; - - srv_normalize_path_for_win(path); - - ut_a(os_file_create_directory(path, FALSE)); - mem_free(path); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Writes a log record about an .ibd file create/rename/delete. */ -static -void -fil_op_write_log( -/*=============*/ - ulint type, /*!< in: MLOG_FILE_CREATE, - MLOG_FILE_CREATE2, - MLOG_FILE_DELETE, or - MLOG_FILE_RENAME */ - ulint space_id, /*!< in: space id */ - ulint log_flags, /*!< in: redo log flags (stored - in the page number field) */ - ulint flags, /*!< in: compressed page size - and file format - if type==MLOG_FILE_CREATE2, or 0 */ - const char* name, /*!< in: table name in the familiar - 'databasename/tablename' format, or - the file path in the case of - MLOG_FILE_DELETE */ - const char* new_name, /*!< in: if type is MLOG_FILE_RENAME, - the new table name in the - 'databasename/tablename' format */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - byte* log_ptr; - ulint len; - - log_ptr = mlog_open(mtr, 11 + 2 + 1); - ut_ad(fsp_flags_is_valid(flags)); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery: - in that case mlog_open returns NULL */ - return; - } - - log_ptr = mlog_write_initial_log_record_for_file_op( - type, space_id, log_flags, log_ptr, mtr); - if (type == MLOG_FILE_CREATE2) { - mach_write_to_4(log_ptr, flags); - log_ptr += 4; - } - /* Let us store the strings as null-terminated for easier readability - and handling */ - - len = strlen(name) + 1; - - mach_write_to_2(log_ptr, len); - log_ptr += 2; - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, (byte*) name, len); - - if (type == MLOG_FILE_RENAME) { - len = strlen(new_name) + 1; - log_ptr = mlog_open(mtr, 2 + len); - ut_a(log_ptr); - mach_write_to_2(log_ptr, len); - log_ptr += 2; - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, (byte*) new_name, len); - } -} -#endif - -/*******************************************************************//** -Parses the body of a log record written about an .ibd file operation. That is, -the log record part after the standard (type, space id, page no) header of the -log record. - -If desired, also replays the delete or rename operation if the .ibd file -exists and the space id in it matches. Replays the create operation if a file -at that path does not exist yet. If the database directory for the file to be -created does not exist, then we create the directory, too. - -Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to -the datadir that we should use in replaying the file operations. - -InnoDB recovery does not replay these fully since it always sets the space id -to zero. But mysqlbackup does replay them. TODO: If remote tablespaces are -used, mysqlbackup will only create tables in the default directory since -MLOG_FILE_CREATE and MLOG_FILE_CREATE2 only know the tablename, not the path. - -@return end of log record, or NULL if the record was not completely -contained between ptr and end_ptr */ -UNIV_INTERN -byte* -fil_op_log_parse_or_replay( -/*=======================*/ - byte* ptr, /*!< in: buffer containing the log record body, - or an initial segment of it, if the record does - not fir completely between ptr and end_ptr */ - byte* end_ptr, /*!< in: buffer end */ - ulint type, /*!< in: the type of this log record */ - ulint space_id, /*!< in: the space id of the tablespace in - question, or 0 if the log record should - only be parsed but not replayed */ - ulint log_flags) /*!< in: redo log flags - (stored in the page number parameter) */ -{ - ulint name_len; - ulint new_name_len; - const char* name; - const char* new_name = NULL; - ulint flags = 0; - - if (type == MLOG_FILE_CREATE2) { - if (end_ptr < ptr + 4) { - - return(NULL); - } - - flags = mach_read_from_4(ptr); - ptr += 4; - } - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - name_len = mach_read_from_2(ptr); - - ptr += 2; - - if (end_ptr < ptr + name_len) { - - return(NULL); - } - - name = (const char*) ptr; - - ptr += name_len; - - if (type == MLOG_FILE_RENAME) { - if (end_ptr < ptr + 2) { - - return(NULL); - } - - new_name_len = mach_read_from_2(ptr); - - ptr += 2; - - if (end_ptr < ptr + new_name_len) { - - return(NULL); - } - - new_name = (const char*) ptr; - - ptr += new_name_len; - } - - /* We managed to parse a full log record body */ - /* - printf("Parsed log rec of type %lu space %lu\n" - "name %s\n", type, space_id, name); - - if (type == MLOG_FILE_RENAME) { - printf("new name %s\n", new_name); - } - */ - if (!space_id) { - return(ptr); - } else { - /* Only replay file ops during recovery. This is a - release-build assert to minimize any data loss risk by a - misapplied file operation. */ - ut_a(recv_recovery_is_on()); - } - - /* Let us try to perform the file operation, if sensible. Note that - mysqlbackup has at this stage already read in all space id info to the - fil0fil.cc data structures. - - NOTE that our algorithm is not guaranteed to work correctly if there - were renames of tables during the backup. See mysqlbackup code for more - on the problem. */ - - switch (type) { - case MLOG_FILE_DELETE: - if (fil_tablespace_exists_in_mem(space_id)) { - dberr_t err = fil_delete_tablespace( - space_id, BUF_REMOVE_FLUSH_NO_WRITE); - ut_a(err == DB_SUCCESS); - } - - break; - - case MLOG_FILE_RENAME: - /* In order to replay the rename, the following must hold: - * The new name is not already used. - * A tablespace is open in memory with the old name. - * The space ID for that tablepace matches this log entry. - This will prevent unintended renames during recovery. */ - - if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED - && space_id == fil_get_space_id_for_table(name)) { - /* Create the database directory for the new name, if - it does not exist yet */ - fil_create_directory_for_tablename(new_name); - - if (!fil_rename_tablespace(name, space_id, - new_name, NULL)) { - ut_error; - } - } - - break; - - case MLOG_FILE_CREATE: - case MLOG_FILE_CREATE2: - if (fil_tablespace_exists_in_mem(space_id)) { - /* Do nothing */ - } else if (fil_get_space_id_for_table(name) - != ULINT_UNDEFINED) { - /* Do nothing */ - } else if (log_flags & MLOG_FILE_FLAG_TEMP) { - /* Temporary table, do nothing */ - } else { - /* Create the database directory for name, if it does - not exist yet */ - fil_create_directory_for_tablename(name); - - if (fil_create_new_single_table_tablespace( - space_id, name, NULL, flags, - DICT_TF2_USE_TABLESPACE, - FIL_IBD_FILE_INITIAL_SIZE, - FIL_ENCRYPTION_DEFAULT, - FIL_DEFAULT_ENCRYPTION_KEY) != DB_SUCCESS) { - ut_error; - } - } - - break; - - default: - ut_error; - } - - return(ptr); -} - -/*******************************************************************//** -Allocates a file name for the EXPORT/IMPORT config file name. The -string must be freed by caller with mem_free(). -@return own: file name */ -static -char* -fil_make_cfg_name( -/*==============*/ - const char* filepath) /*!< in: .ibd file name */ -{ - char* cfg_name; - - /* Create a temporary file path by replacing the .ibd suffix - with .cfg. */ - - ut_ad(strlen(filepath) > 4); - - cfg_name = mem_strdup(filepath); - ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg"); - return(cfg_name); -} - -/*******************************************************************//** -Check for change buffer merges. -@return 0 if no merges else count + 1. */ -static -ulint -fil_ibuf_check_pending_ops( -/*=======================*/ - fil_space_t* space, /*!< in/out: Tablespace to check */ - ulint count) /*!< in: number of attempts so far */ -{ - ut_ad(mutex_own(&fil_system->mutex)); - - if (space != 0 && space->n_pending_ops != 0) { - - if (count > 5000) { - ib_logf(IB_LOG_LEVEL_WARN, - "Trying to close/delete tablespace " - "'%s' but there are %lu pending change " - "buffer merges on it.", - space->name, - (ulong) space->n_pending_ops); - } - - return(count + 1); - } - - return(0); -} - -/*******************************************************************//** -Check for pending IO. -@return 0 if no pending else count + 1. */ -static -ulint -fil_check_pending_io( -/*=================*/ - fil_space_t* space, /*!< in/out: Tablespace to check */ - fil_node_t** node, /*!< out: Node in space list */ - ulint count) /*!< in: number of attempts so far */ -{ - ut_ad(mutex_own(&fil_system->mutex)); - ut_a(space->n_pending_ops == 0); - - /* The following code must change when InnoDB supports - multiple datafiles per tablespace. */ - ut_a(UT_LIST_GET_LEN(space->chain) == 1); - - *node = UT_LIST_GET_FIRST(space->chain); - - if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) { - - ut_a(!(*node)->being_extended); - - if (count > 1000) { - ib_logf(IB_LOG_LEVEL_WARN, - "Trying to close/delete tablespace '%s' " - "but there are %lu flushes " - " and %lu pending i/o's on it.", - space->name, - (ulong) space->n_pending_flushes, - (ulong) (*node)->n_pending); - } - - return(count + 1); - } - - return(0); -} - -/*******************************************************************//** -Check pending operations on a tablespace. -@return DB_SUCCESS or error failure. */ -static -dberr_t -fil_check_pending_operations( -/*=========================*/ - ulint id, /*!< in: space id */ - fil_space_t** space, /*!< out: tablespace instance in memory */ - char** path) /*!< out/own: tablespace path */ -{ - ulint count = 0; - - ut_a(id != TRX_SYS_SPACE); - ut_ad(space); - - *space = 0; - - mutex_enter(&fil_system->mutex); - fil_space_t* sp = fil_space_get_by_id(id); - - if (sp) { - sp->stop_new_ops = true; - /* space could be freed by other threads as soon - as n_pending_ops reaches 0, thus increment pending - ops here. */ - sp->n_pending_ops++; - } - - mutex_exit(&fil_system->mutex); - - /* Wait for crypt threads to stop accessing space */ - if (sp) { - fil_space_crypt_close_tablespace(sp); - /* We have "acquired" this space and must - free it now as below we compare n_pending_ops. */ - fil_space_release(sp); - } - - /* Check for pending change buffer merges. */ - - do { - mutex_enter(&fil_system->mutex); - - sp = fil_space_get_by_id(id); - - count = fil_ibuf_check_pending_ops(sp, count); - - mutex_exit(&fil_system->mutex); - - if (count > 0) { - os_thread_sleep(20000); - } - - } while (count > 0); - - /* Check for pending IO. */ - - *path = 0; - - do { - mutex_enter(&fil_system->mutex); - - sp = fil_space_get_by_id(id); - - if (sp == NULL) { - mutex_exit(&fil_system->mutex); - return(DB_TABLESPACE_NOT_FOUND); - } - - fil_node_t* node; - - count = fil_check_pending_io(sp, &node, count); - - if (count == 0) { - *path = mem_strdup(node->name); - } - - mutex_exit(&fil_system->mutex); - - if (count > 0) { - os_thread_sleep(20000); - } - - } while (count > 0); - - ut_ad(sp); - - *space = sp; - return(DB_SUCCESS); -} - -/*******************************************************************//** -Closes a single-table tablespace. The tablespace must be cached in the -memory cache. Free all pages used by the tablespace. -@return DB_SUCCESS or error */ -UNIV_INTERN -dberr_t -fil_close_tablespace( -/*=================*/ - trx_t* trx, /*!< in/out: Transaction covering the close */ - ulint id) /*!< in: space id */ -{ - char* path = 0; - fil_space_t* space = 0; - - ut_a(id != TRX_SYS_SPACE); - - dberr_t err = fil_check_pending_operations(id, &space, &path); - - if (err != DB_SUCCESS) { - return(err); - } - - ut_a(space); - ut_a(path != 0); - - rw_lock_x_lock(&space->latch); - -#ifndef UNIV_HOTBACKUP - /* Invalidate in the buffer pool all pages belonging to the - tablespace. Since we have set space->stop_new_ops = TRUE, readahead - or ibuf merge can no longer read more pages of this tablespace to the - buffer pool. Thus we can clean the tablespace out of the buffer pool - completely and permanently. The flag stop_new_ops also prevents - fil_flush() from being applied to this tablespace. */ - - buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx); -#endif - mutex_enter(&fil_system->mutex); - - /* If the free is successful, the X lock will be released before - the space memory data structure is freed. */ - - if (!fil_space_free_and_mutex_exit(id, TRUE)) { - rw_lock_x_unlock(&space->latch); - err = DB_TABLESPACE_NOT_FOUND; - } else { - err = DB_SUCCESS; - } - - /* If it is a delete then also delete any generated files, otherwise - when we drop the database the remove directory will fail. */ - - char* cfg_name = fil_make_cfg_name(path); - - os_file_delete_if_exists(innodb_file_data_key, cfg_name); - - mem_free(path); - mem_free(cfg_name); - - return(err); -} - -/*******************************************************************//** -Deletes a single-table tablespace. The tablespace must be cached in the -memory cache. -@return DB_SUCCESS or error */ -UNIV_INTERN -dberr_t -fil_delete_tablespace( -/*==================*/ - ulint id, /*!< in: space id */ - buf_remove_t buf_remove) /*!< in: specify the action to take - on the tables pages in the buffer - pool */ -{ - char* path = 0; - fil_space_t* space = 0; - - ut_a(id != TRX_SYS_SPACE); - - dberr_t err = fil_check_pending_operations(id, &space, &path); - - if (err != DB_SUCCESS) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot delete tablespace %lu because it is not " - "found in the tablespace memory cache.", - (ulong) id); - - return(err); - } - - ut_a(space); - ut_a(path != 0); - - /* Important: We rely on the data dictionary mutex to ensure - that a race is not possible here. It should serialize the tablespace - drop/free. We acquire an X latch only to avoid a race condition - when accessing the tablespace instance via: - - fsp_get_available_space_in_free_extents(). - - There our main motivation is to reduce the contention on the - dictionary mutex. */ - - rw_lock_x_lock(&space->latch); - -#ifndef UNIV_HOTBACKUP - /* IMPORTANT: Because we have set space::stop_new_ops there - can't be any new ibuf merges, reads or flushes. We are here - because node::n_pending was zero above. However, it is still - possible to have pending read and write requests: - - A read request can happen because the reader thread has - gone through the ::stop_new_ops check in buf_page_init_for_read() - before the flag was set and has not yet incremented ::n_pending - when we checked it above. - - A write request can be issued any time because we don't check - the ::stop_new_ops flag when queueing a block for write. - - We deal with pending write requests in the following function - where we'd minimally evict all dirty pages belonging to this - space from the flush_list. Not that if a block is IO-fixed - we'll wait for IO to complete. - - To deal with potential read requests by checking the - ::stop_new_ops flag in fil_io() */ - - buf_LRU_flush_or_remove_pages(id, buf_remove, 0); - -#endif /* !UNIV_HOTBACKUP */ - - /* If it is a delete then also delete any generated files, otherwise - when we drop the database the remove directory will fail. */ - { - char* cfg_name = fil_make_cfg_name(path); - os_file_delete_if_exists(innodb_file_data_key, cfg_name); - mem_free(cfg_name); - } - - /* Delete the link file pointing to the ibd file we are deleting. */ - if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) { - fil_delete_link_file(space->name); - } - - mutex_enter(&fil_system->mutex); - - /* Double check the sanity of pending ops after reacquiring - the fil_system::mutex. */ - if (fil_space_get_by_id(id)) { - ut_a(space->n_pending_ops == 0); - ut_a(UT_LIST_GET_LEN(space->chain) == 1); - fil_node_t* node = UT_LIST_GET_FIRST(space->chain); - ut_a(node->n_pending == 0); - } - - if (!fil_space_free_and_mutex_exit(id, true)) { - err = DB_TABLESPACE_NOT_FOUND; - } - - if (err != DB_SUCCESS) { - rw_lock_x_unlock(&space->latch); - } else if (!os_file_delete(innodb_file_data_key, path) - && !os_file_delete_if_exists(innodb_file_data_key, path)) { - - /* Note: This is because we have removed the - tablespace instance from the cache. */ - - err = DB_IO_ERROR; - } - - if (err == DB_SUCCESS && !IS_XTRABACKUP()) { -#ifndef UNIV_HOTBACKUP - /* Write a log record about the deletion of the .ibd - file, so that mysqlbackup can replay it in the - --apply-log phase. We use a dummy mtr and the familiar - log write mechanism. */ - mtr_t mtr; - - /* When replaying the operation in mysqlbackup, do not try - to write any log record */ - mtr_start(&mtr); - - fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr); - mtr_commit(&mtr); -#endif - err = DB_SUCCESS; - } - - mem_free(path); - - return(err); -} - -/*******************************************************************//** -Returns TRUE if a single-table tablespace is being deleted. -@return TRUE if being deleted */ -UNIV_INTERN -ibool -fil_tablespace_is_being_deleted( -/*============================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - ibool is_being_deleted; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - ut_a(space != NULL); - - is_being_deleted = space->stop_new_ops; - - mutex_exit(&fil_system->mutex); - - return(is_being_deleted); -} - -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Discards a single-table tablespace. The tablespace must be cached in the -memory cache. Discarding is like deleting a tablespace, but - - 1. We do not drop the table from the data dictionary; - - 2. We remove all insert buffer entries for the tablespace immediately; - in DROP TABLE they are only removed gradually in the background; - - 3. Free all the pages in use by the tablespace. -@return DB_SUCCESS or error */ -UNIV_INTERN -dberr_t -fil_discard_tablespace( -/*===================*/ - ulint id) /*!< in: space id */ -{ - dberr_t err; - - switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) { - case DB_SUCCESS: - break; - - case DB_IO_ERROR: - ib_logf(IB_LOG_LEVEL_WARN, - "While deleting tablespace %lu in DISCARD TABLESPACE." - " File rename/delete failed: %s", - (ulong) id, ut_strerr(err)); - break; - - case DB_TABLESPACE_NOT_FOUND: - ib_logf(IB_LOG_LEVEL_WARN, - "Cannot delete tablespace %lu in DISCARD " - "TABLESPACE. %s", - (ulong) id, ut_strerr(err)); - break; - - default: - ut_error; - } - - /* Remove all insert buffer entries for the tablespace */ - - ibuf_delete_for_discarded_space(id); - - return(err); -} -#endif /* !UNIV_HOTBACKUP */ - -/*******************************************************************//** -Renames the memory cache structures of a single-table tablespace. -@return TRUE if success */ -static -ibool -fil_rename_tablespace_in_mem( -/*=========================*/ - fil_space_t* space, /*!< in: tablespace memory object */ - fil_node_t* node, /*!< in: file node of that tablespace */ - const char* new_name, /*!< in: new name */ - const char* new_path) /*!< in: new file path */ -{ - fil_space_t* space2; - const char* old_name = space->name; - - ut_ad(mutex_own(&fil_system->mutex)); - - space2 = fil_space_get_by_name(old_name); - if (space != space2) { - fputs("InnoDB: Error: cannot find ", stderr); - ut_print_filename(stderr, old_name); - fputs(" in tablespace memory cache\n", stderr); - - return(FALSE); - } - - space2 = fil_space_get_by_name(new_name); - if (space2 != NULL) { - fputs("InnoDB: Error: ", stderr); - ut_print_filename(stderr, new_name); - fputs(" is already in tablespace memory cache\n", stderr); - - return(FALSE); - } - - HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash, - ut_fold_string(space->name), space); - mem_free(space->name); - mem_free(node->name); - - space->name = mem_strdup(new_name); - node->name = mem_strdup(new_path); - - HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash, - ut_fold_string(new_name), space); - return(TRUE); -} - -/*******************************************************************//** -Allocates a file name for a single-table tablespace. The string must be freed -by caller with mem_free(). -@return own: file name */ -UNIV_INTERN -char* -fil_make_ibd_name( -/*==============*/ - const char* name, /*!< in: table name or a dir path */ - bool is_full_path) /*!< in: TRUE if it is a dir path */ -{ - char* filename; - ulint namelen = strlen(name); - ulint dirlen = strlen(fil_path_to_mysql_datadir); - ulint pathlen = dirlen + namelen + sizeof "/.ibd"; - - filename = static_cast<char*>(mem_alloc(pathlen)); - - if (is_full_path) { - memcpy(filename, name, namelen); - memcpy(filename + namelen, ".ibd", sizeof ".ibd"); - } else { - ut_snprintf(filename, pathlen, "%s/%s.ibd", - fil_path_to_mysql_datadir, name); - - } - - srv_normalize_path_for_win(filename); - - return(filename); -} - -/*******************************************************************//** -Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link). -The string must be freed by caller with mem_free(). -@return own: file name */ -UNIV_INTERN -char* -fil_make_isl_name( -/*==============*/ - const char* name) /*!< in: table name */ -{ - char* filename; - ulint namelen = strlen(name); - ulint dirlen = strlen(fil_path_to_mysql_datadir); - ulint pathlen = dirlen + namelen + sizeof "/.isl"; - - filename = static_cast<char*>(mem_alloc(pathlen)); - - ut_snprintf(filename, pathlen, "%s/%s.isl", - fil_path_to_mysql_datadir, name); - - srv_normalize_path_for_win(filename); - - return(filename); -} - -/** Test if a tablespace file can be renamed to a new filepath by checking -if that the old filepath exists and the new filepath does not exist. -@param[in] space_id tablespace id -@param[in] old_path old filepath -@param[in] new_path new filepath -@param[in] is_discarded whether the tablespace is discarded -@return innodb error code */ -dberr_t -fil_rename_tablespace_check( - ulint space_id, - const char* old_path, - const char* new_path, - bool is_discarded) -{ - ulint exists = false; - os_file_type_t ftype; - - if (!is_discarded - && os_file_status(old_path, &exists, &ftype) - && !exists) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot rename '%s' to '%s' for space ID %lu" - " because the source file does not exist.", - old_path, new_path, space_id); - - return(DB_TABLESPACE_NOT_FOUND); - } - - exists = false; - if (!os_file_status(new_path, &exists, &ftype) || exists) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot rename '%s' to '%s' for space ID %lu" - " because the target file exists." - " Remove the target file and try again.", - old_path, new_path, space_id); - - return(DB_TABLESPACE_EXISTS); - } - - return(DB_SUCCESS); -} - -/*******************************************************************//** -Renames a single-table tablespace. The tablespace must be cached in the -tablespace memory cache. -@return TRUE if success */ -UNIV_INTERN -ibool -fil_rename_tablespace( -/*==================*/ - const char* old_name_in, /*!< in: old table name in the - standard databasename/tablename - format of InnoDB, or NULL if we - do the rename based on the space - id only */ - ulint id, /*!< in: space id */ - const char* new_name, /*!< in: new table name in the - standard databasename/tablename - format of InnoDB */ - const char* new_path_in) /*!< in: new full datafile path - if the tablespace is remotely - located, or NULL if it is located - in the normal data directory. */ -{ - ibool success; - fil_space_t* space; - fil_node_t* node; - ulint count = 0; - char* new_path; - char* old_name; - char* old_path; - const char* not_given = "(name not specified)"; - - ut_a(id != 0); - -retry: - count++; - - if (!(count % 1000)) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: problems renaming ", stderr); - ut_print_filename(stderr, - old_name_in ? old_name_in : not_given); - fputs(" to ", stderr); - ut_print_filename(stderr, new_name); - fprintf(stderr, ", %lu iterations\n", (ulong) count); - } - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; ); - - if (space == NULL) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot find space id %lu in the tablespace " - "memory cache, though the table '%s' in a " - "rename operation should have that id.", - (ulong) id, old_name_in ? old_name_in : not_given); - mutex_exit(&fil_system->mutex); - - return(FALSE); - } - - if (count > 25000) { - space->stop_ios = FALSE; - mutex_exit(&fil_system->mutex); - - return(FALSE); - } - - /* We temporarily close the .ibd file because we do not trust that - operating systems can rename an open file. For the closing we have to - wait until there are no pending i/o's or flushes on the file. */ - - space->stop_ios = TRUE; - - /* The following code must change when InnoDB supports - multiple datafiles per tablespace. */ - ut_a(UT_LIST_GET_LEN(space->chain) == 1); - node = UT_LIST_GET_FIRST(space->chain); - - if (node->n_pending > 0 - || node->n_pending_flushes > 0 - || node->being_extended) { - /* There are pending i/o's or flushes or the file is - currently being extended, sleep for a while and - retry */ - - mutex_exit(&fil_system->mutex); - - os_thread_sleep(20000); - - goto retry; - - } else if (node->modification_counter > node->flush_counter) { - /* Flush the space */ - - mutex_exit(&fil_system->mutex); - - os_thread_sleep(20000); - - fil_flush(id); - - goto retry; - - } else if (node->open) { - /* Close the file */ - - fil_node_close_file(node, fil_system); - } - - /* Check that the old name in the space is right */ - - if (old_name_in) { - old_name = mem_strdup(old_name_in); - ut_a(strcmp(space->name, old_name) == 0); - } else { - old_name = mem_strdup(space->name); - } - old_path = mem_strdup(node->name); - - /* Rename the tablespace and the node in the memory cache */ - new_path = new_path_in ? mem_strdup(new_path_in) - : fil_make_ibd_name(new_name, false); - - success = fil_rename_tablespace_in_mem( - space, node, new_name, new_path); - - if (success) { - - DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", - goto skip_second_rename; ); - - success = os_file_rename( - innodb_file_data_key, old_path, new_path); - - DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", -skip_second_rename: - success = FALSE; ); - - if (!success) { - /* We have to revert the changes we made - to the tablespace memory cache */ - - ut_a(fil_rename_tablespace_in_mem( - space, node, old_name, old_path)); - } - } - - space->stop_ios = FALSE; - - mutex_exit(&fil_system->mutex); - -#ifndef UNIV_HOTBACKUP - if (success && !recv_recovery_on && !IS_XTRABACKUP()) { - mtr_t mtr; - - mtr_start(&mtr); - - fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name, - &mtr); - mtr_commit(&mtr); - } -#endif /* !UNIV_HOTBACKUP */ - - mem_free(new_path); - mem_free(old_path); - mem_free(old_name); - - return(success); -} - -/*******************************************************************//** -Creates a new InnoDB Symbolic Link (ISL) file. It is always created -under the 'datadir' of MySQL. The datadir is the directory of a -running mysqld program. We can refer to it by simply using the path '.'. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fil_create_link_file( -/*=================*/ - const char* tablename, /*!< in: tablename */ - const char* filepath) /*!< in: pathname of tablespace */ -{ - dberr_t err = DB_SUCCESS; - char* link_filepath; - char* prev_filepath = fil_read_link_file(tablename); - - ut_ad(!srv_read_only_mode); - - if (prev_filepath) { - /* Truncate will call this with an existing - link file which contains the same filepath. */ - if (0 == strcmp(prev_filepath, filepath)) { - mem_free(prev_filepath); - return(DB_SUCCESS); - } - mem_free(prev_filepath); - } - - link_filepath = fil_make_isl_name(tablename); - - /** Check if the file already exists. */ - FILE* file = NULL; - ibool exists; - os_file_type_t ftype; - - bool success = os_file_status(link_filepath, &exists, &ftype); - - ulint error = 0; - if (success && !exists) { - file = fopen(link_filepath, "w"); - if (file == NULL) { - /* This call will print its own error message */ - error = os_file_get_last_error(true); - } - } else { - error = OS_FILE_ALREADY_EXISTS; - } - if (error != 0) { - - ut_print_timestamp(stderr); - fputs(" InnoDB: Cannot create file ", stderr); - ut_print_filename(stderr, link_filepath); - fputs(".\n", stderr); - - if (error == OS_FILE_ALREADY_EXISTS) { - fputs("InnoDB: The link file: ", stderr); - ut_print_filename(stderr, filepath); - fputs(" already exists.\n", stderr); - err = DB_TABLESPACE_EXISTS; - } else if (error == OS_FILE_DISK_FULL) { - err = DB_OUT_OF_FILE_SPACE; - } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { - err = DB_UNSUPPORTED; - } else { - err = DB_ERROR; - } - - /* file is not open, no need to close it. */ - mem_free(link_filepath); - return(err); - } - - ulint rbytes = fwrite(filepath, 1, strlen(filepath), file); - if (rbytes != strlen(filepath)) { - os_file_get_last_error(true); - ib_logf(IB_LOG_LEVEL_ERROR, - "cannot write link file " - "%s",filepath); - err = DB_ERROR; - } - - /* Close the file, we only need it at startup */ - fclose(file); - - mem_free(link_filepath); - - return(err); -} - -/*******************************************************************//** -Deletes an InnoDB Symbolic Link (ISL) file. */ -UNIV_INTERN -void -fil_delete_link_file( -/*=================*/ - const char* tablename) /*!< in: name of table */ -{ - char* link_filepath = fil_make_isl_name(tablename); - - os_file_delete_if_exists(innodb_file_data_key, link_filepath); - - mem_free(link_filepath); -} - -/*******************************************************************//** -Reads an InnoDB Symbolic Link (ISL) file. -It is always created under the 'datadir' of MySQL. The name is of the -form {databasename}/{tablename}. and the isl file is expected to be in a -'{databasename}' directory called '{tablename}.isl'. The caller must free -the memory of the null-terminated path returned if it is not null. -@return own: filepath found in link file, NULL if not found. */ -UNIV_INTERN -char* -fil_read_link_file( -/*===============*/ - const char* name) /*!< in: tablespace name */ -{ - char* filepath = NULL; - char* link_filepath; - FILE* file = NULL; - - /* The .isl file is in the 'normal' tablespace location. */ - link_filepath = fil_make_isl_name(name); - - file = fopen(link_filepath, "r+b"); - - mem_free(link_filepath); - - if (file) { - filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH)); - - os_file_read_string(file, filepath, OS_FILE_MAX_PATH); - fclose(file); - - if (strlen(filepath)) { - /* Trim whitespace from end of filepath */ - ulint lastch = strlen(filepath) - 1; - while (lastch > 4 && filepath[lastch] <= 0x20) { - filepath[lastch--] = 0x00; - } - srv_normalize_path_for_win(filepath); - } - } - - return(filepath); -} - -/*******************************************************************//** -Opens a handle to the file linked to in an InnoDB Symbolic Link file. -@return TRUE if remote linked tablespace file is found and opened. */ -UNIV_INTERN -ibool -fil_open_linked_file( -/*===============*/ - const char* tablename, /*!< in: database/tablename */ - char** remote_filepath,/*!< out: remote filepath */ - pfs_os_file_t* remote_file, /*!< out: remote file handle */ - ulint atomic_writes) /*!< in: atomic writes table option - value */ -{ - ibool success; - - *remote_filepath = fil_read_link_file(tablename); - if (*remote_filepath == NULL) { - return(FALSE); - } - - /* The filepath provided is different from what was - found in the link file. */ - *remote_file = os_file_create_simple_no_error_handling( - innodb_file_data_key, *remote_filepath, - OS_FILE_OPEN, OS_FILE_READ_ONLY, - &success, atomic_writes); - - if (!success) { - char* link_filepath = fil_make_isl_name(tablename); - - /* The following call prints an error message */ - os_file_get_last_error(true); - - ib_logf(IB_LOG_LEVEL_ERROR, - "A link file was found named '%s' " - "but the linked tablespace '%s' " - "could not be opened.", - link_filepath, *remote_filepath); - - mem_free(link_filepath); - mem_free(*remote_filepath); - *remote_filepath = NULL; - } - - return(success); -} - -/*******************************************************************//** -Creates a new single-table tablespace to a database directory of MySQL. -Database directories are under the 'datadir' of MySQL. The datadir is the -directory of a running mysqld program. We can refer to it by simply the -path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp -dir of the mysqld server. - -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fil_create_new_single_table_tablespace( -/*===================================*/ - ulint space_id, /*!< in: space id */ - const char* tablename, /*!< in: the table name in the usual - databasename/tablename format - of InnoDB */ - const char* dir_path, /*!< in: NULL or a dir path */ - ulint flags, /*!< in: tablespace flags */ - ulint flags2, /*!< in: table flags2 */ - ulint size, /*!< in: the initial size of the - tablespace file in pages, - must be >= FIL_IBD_FILE_INITIAL_SIZE */ - fil_encryption_t mode, /*!< in: encryption mode */ - ulint key_id) /*!< in: encryption key_id */ -{ - pfs_os_file_t file; - - ibool ret; - dberr_t err; - byte* buf2; - byte* page; - char* path; - ibool success; - /* TRUE if a table is created with CREATE TEMPORARY TABLE */ - bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY); - - - /* For XtraBackup recovery we force remote tablespaces to be local, - i.e. never execute the code path corresponding to has_data_dir == true. - We don't create .isl files either, because we rely on innobackupex to - copy them under a global lock, and use them to copy remote tablespaces - to their proper locations on --copy-back. - - See also MySQL bug #72022: dir_path is always NULL for remote - tablespaces when a MLOG_FILE_CREATE* log record is replayed (the remote - directory is not available from MLOG_FILE_CREATE*). */ - bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags) != 0 && !IS_XTRABACKUP(); - ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags); - fil_space_crypt_t *crypt_data = NULL; - - ut_a(space_id > 0); - ut_ad(!srv_read_only_mode); - ut_a(space_id < SRV_LOG_SPACE_FIRST_ID); - ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); - ut_a(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK)); - - if (is_temp) { - /* Temporary table filepath */ - ut_ad(dir_path); - path = fil_make_ibd_name(dir_path, true); - } else if (has_data_dir) { - ut_ad(dir_path); - path = os_file_make_remote_pathname(dir_path, tablename, "ibd"); - - /* Since this tablespace file will be created in a - remote directory, let's create the subdirectories - in the path, if they are not there already. */ - success = os_file_create_subdirs_if_needed(path); - if (!success) { - err = DB_ERROR; - goto error_exit_3; - } - } else { - path = fil_make_ibd_name(tablename, false); - } - - file = os_file_create( - innodb_file_data_key, path, - OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, - OS_FILE_NORMAL, - OS_DATA_FILE, - &ret, - atomic_writes); - - if (ret == FALSE) { - /* The following call will print an error message */ - ulint error = os_file_get_last_error(true); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot create file '%s'\n", path); - - if (error == OS_FILE_ALREADY_EXISTS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "The file '%s' already exists though the " - "corresponding table did not exist " - "in the InnoDB data dictionary. " - "Have you moved InnoDB .ibd files " - "around without using the SQL commands " - "DISCARD TABLESPACE and IMPORT TABLESPACE, " - "or did mysqld crash in the middle of " - "CREATE TABLE? " - "You can resolve the problem by removing " - "the file '%s' under the 'datadir' of MySQL.", - path, path); - - err = DB_TABLESPACE_EXISTS; - goto error_exit_3; - } - - if (error == OS_FILE_OPERATION_NOT_SUPPORTED) { - err = DB_UNSUPPORTED; - goto error_exit_3; - } - - if (error == OS_FILE_DISK_FULL) { - err = DB_OUT_OF_FILE_SPACE; - goto error_exit_3; - } - - err = DB_ERROR; - goto error_exit_3; - } - - { - /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes. - fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes. - Do not create too short ROW_FORMAT=COMPRESSED files. */ - const ulint zip_size = fsp_flags_get_zip_size(flags); - const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE; - const os_offset_t fsize = std::max( - os_offset_t(size) * page_size, - os_offset_t(FIL_IBD_FILE_INITIAL_SIZE - * UNIV_PAGE_SIZE)); - /* ROW_FORMAT=COMPRESSED files never use page_compression - (are never sparse). */ - ut_ad(!zip_size || !FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)); - - ret = os_file_set_size(path, file, fsize, - FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)); - } - - if (!ret) { - err = DB_OUT_OF_FILE_SPACE; - goto error_exit_2; - } - - /* printf("Creating tablespace %s id %lu\n", path, space_id); */ - - /* We have to write the space id to the file immediately and flush the - file to disk. This is because in crash recovery we must be aware what - tablespaces exist and what are their space id's, so that we can apply - the log records to the right file. It may take quite a while until - buffer pool flush algorithms write anything to the file and flush it to - disk. If we would not write here anything, the file would be filled - with zeros from the call of os_file_set_size(), until a buffer pool - flush would write to it. */ - - buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE)); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - - memset(page, '\0', UNIV_PAGE_SIZE); - - flags |= FSP_FLAGS_PAGE_SSIZE(); - fsp_header_init_fields(page, space_id, flags); - mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); - - if (const ulint zip_size = fsp_flags_get_zip_size(flags)) { - page_zip_des_t page_zip; - - page_zip_set_size(&page_zip, zip_size); - page_zip.data = page + UNIV_PAGE_SIZE; -#ifdef UNIV_DEBUG - page_zip.m_start = -#endif /* UNIV_DEBUG */ - page_zip.m_end = page_zip.m_nonempty = - page_zip.n_blobs = 0; - buf_flush_init_for_writing(page, &page_zip, 0); - ret = os_file_write(path, file, page_zip.data, 0, zip_size); - } else { - buf_flush_init_for_writing(page, NULL, 0); - ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE); - } - - ut_free(buf2); - - if (!ret) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Could not write the first page to tablespace " - "'%s'", path); - - err = DB_ERROR; - goto error_exit_2; - } - - ret = os_file_flush(file); - - if (!ret) { - ib_logf(IB_LOG_LEVEL_ERROR, - "File flush of tablespace '%s' failed", path); - err = DB_ERROR; - goto error_exit_2; - } - - if (has_data_dir) { - /* Now that the IBD file is created, make the ISL file. */ - err = fil_create_link_file(tablename, path); - if (err != DB_SUCCESS) { - goto error_exit_2; - } - } - - /* Create crypt data if the tablespace is either encrypted or user has - requested it to remain unencrypted. */ - if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF || - srv_encrypt_tables) { - crypt_data = fil_space_create_crypt_data(mode, key_id); - } - - success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE, - crypt_data, true, mode); - - if (!success || !fil_node_create(path, size, space_id, FALSE)) { - err = DB_ERROR; - goto error_exit_1; - } - -#ifndef UNIV_HOTBACKUP - if (!IS_XTRABACKUP()) - { - mtr_t mtr; - ulint mlog_file_flag = 0; - - if (is_temp) { - mlog_file_flag |= MLOG_FILE_FLAG_TEMP; - } - - mtr_start(&mtr); - - fil_op_write_log(flags - ? MLOG_FILE_CREATE2 - : MLOG_FILE_CREATE, - space_id, mlog_file_flag, - flags & ~FSP_FLAGS_MEM_MASK, - tablename, NULL, &mtr); - - mtr_commit(&mtr); - } -#endif - err = DB_SUCCESS; - - /* Error code is set. Cleanup the various variables used. - These labels reflect the order in which variables are assigned or - actions are done. */ -error_exit_1: - if (has_data_dir && err != DB_SUCCESS) { - fil_delete_link_file(tablename); - } -error_exit_2: - os_file_close(file); - if (err != DB_SUCCESS) { - os_file_delete(innodb_file_data_key, path); - } -error_exit_3: - mem_free(path); - - return(err); -} - -#include "pars0pars.h" -#include "que0que.h" -#include "dict0priv.h" -static -void -fil_remove_invalid_table_from_data_dict(const char *name) -{ - trx_t* trx; - pars_info_t* info = NULL; - - trx = trx_allocate_for_mysql(); - trx_start_for_ddl(trx, TRX_DICT_OP_TABLE); - - ut_ad(mutex_own(&dict_sys->mutex)); - - trx->op_info = "removing invalid table from data dictionary"; - - info = pars_info_create(); - - pars_info_add_str_literal(info, "table_name", name); - - que_eval_sql(info, - "PROCEDURE DROP_TABLE_PROC () IS\n" - "sys_foreign_id CHAR;\n" - "table_id CHAR;\n" - "index_id CHAR;\n" - "foreign_id CHAR;\n" - "found INT;\n" - - "DECLARE CURSOR cur_fk IS\n" - "SELECT ID FROM SYS_FOREIGN\n" - "WHERE FOR_NAME = :table_name\n" - "AND TO_BINARY(FOR_NAME)\n" - " = TO_BINARY(:table_name)\n" - "LOCK IN SHARE MODE;\n" - - "DECLARE CURSOR cur_idx IS\n" - "SELECT ID FROM SYS_INDEXES\n" - "WHERE TABLE_ID = table_id\n" - "LOCK IN SHARE MODE;\n" - - "BEGIN\n" - "SELECT ID INTO table_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = :table_name\n" - "LOCK IN SHARE MODE;\n" - "IF (SQL % NOTFOUND) THEN\n" - " RETURN;\n" - "END IF;\n" - "found := 1;\n" - "SELECT ID INTO sys_foreign_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = 'SYS_FOREIGN'\n" - "LOCK IN SHARE MODE;\n" - "IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - "END IF;\n" - "IF (:table_name = 'SYS_FOREIGN') THEN\n" - " found := 0;\n" - "END IF;\n" - "IF (:table_name = 'SYS_FOREIGN_COLS') THEN\n" - " found := 0;\n" - "END IF;\n" - "OPEN cur_fk;\n" - "WHILE found = 1 LOOP\n" - " FETCH cur_fk INTO foreign_id;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FOREIGN_COLS\n" - " WHERE ID = foreign_id;\n" - " DELETE FROM SYS_FOREIGN\n" - " WHERE ID = foreign_id;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE cur_fk;\n" - "found := 1;\n" - "OPEN cur_idx;\n" - "WHILE found = 1 LOOP\n" - " FETCH cur_idx INTO index_id;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FIELDS\n" - " WHERE INDEX_ID = index_id;\n" - " DELETE FROM SYS_INDEXES\n" - " WHERE ID = index_id\n" - " AND TABLE_ID = table_id;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE cur_idx;\n" - "DELETE FROM SYS_COLUMNS\n" - "WHERE TABLE_ID = table_id;\n" - "DELETE FROM SYS_TABLES\n" - "WHERE NAME = :table_name;\n" - "END;\n" - , FALSE, trx); - - /* SYS_DATAFILES and SYS_TABLESPACES do not necessarily exist - on XtraBackup recovery. See comments around - dict_create_or_check_foreign_constraint_tables() in - innobase_start_or_create_for_mysql(). */ - if (dict_table_get_low("SYS_DATAFILES") != NULL) { - info = pars_info_create(); - - pars_info_add_str_literal(info, "table_name", name); - - que_eval_sql(info, - "PROCEDURE DROP_TABLE_PROC () IS\n" - "space_id INT;\n" - - "BEGIN\n" - "SELECT SPACE INTO space_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = :table_name;\n" - "IF (SQL % NOTFOUND) THEN\n" - " RETURN;\n" - "END IF;\n" - "DELETE FROM SYS_TABLESPACES\n" - "WHERE SPACE = space_id;\n" - "DELETE FROM SYS_DATAFILES\n" - "WHERE SPACE = space_id;\n" - "END;\n" - , FALSE, trx); - } - - trx_commit_for_mysql(trx); - - trx_free_for_mysql(trx); -} - - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Report information about a bad tablespace. */ -static -void -fil_report_bad_tablespace( -/*======================*/ - const char* filepath, /*!< in: filepath */ - const char* check_msg, /*!< in: fil_check_first_page() */ - ulint found_id, /*!< in: found space ID */ - ulint found_flags, /*!< in: found flags */ - ulint expected_id, /*!< in: expected space id */ - ulint expected_flags) /*!< in: expected flags */ -{ - if (check_msg) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Error %s in file '%s'," - "tablespace id=%lu, flags=%lu. " - "Please refer to " - REFMAN "innodb-troubleshooting-datadict.html " - "for how to resolve the issue.", - check_msg, filepath, - (ulong) expected_id, (ulong) expected_flags); - return; - } - - ib_logf(IB_LOG_LEVEL_ERROR, - "In file '%s', tablespace id and flags are %lu and %lu, " - "but in the InnoDB data dictionary they are %lu and %lu. " - "Have you moved InnoDB .ibd files around without using the " - "commands DISCARD TABLESPACE and IMPORT TABLESPACE? " - "Please refer to " - REFMAN "innodb-troubleshooting-datadict.html " - "for how to resolve the issue.", - filepath, (ulong) found_id, (ulong) found_flags, - (ulong) expected_id, (ulong) expected_flags); -} - -/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations. -(Typically when upgrading from MariaDB 10.1.0..10.1.20.) -@param[in] space_id tablespace ID -@param[in] flags desired tablespace flags */ -UNIV_INTERN -void -fsp_flags_try_adjust(ulint space_id, ulint flags) -{ - ut_ad(!srv_read_only_mode); - ut_ad(fsp_flags_is_valid(flags)); - - mtr_t mtr; - mtr_start(&mtr); - if (buf_block_t* b = buf_page_get( - space_id, fsp_flags_get_zip_size(flags), 0, RW_X_LATCH, - &mtr)) { - ulint f = fsp_header_get_flags(b->frame); - /* Suppress the message if only the DATA_DIR flag to differs. */ - if ((f ^ flags) & ~(1U << FSP_FLAGS_POS_RESERVED)) { - ib_logf(IB_LOG_LEVEL_WARN, - "adjusting FSP_SPACE_FLAGS of tablespace " - ULINTPF " from 0x%x to 0x%x", - space_id, int(f), int(flags)); - } - if (f != flags) { - mlog_write_ulint(FSP_HEADER_OFFSET - + FSP_SPACE_FLAGS + b->frame, - flags, MLOG_4BYTES, &mtr); - } - } - - mtr_commit(&mtr); -} - -/********************************************************************//** -Tries to open a single-table tablespace and optionally checks that the -space id in it is correct. If this does not succeed, print an error message -to the .err log. This function is used to open a tablespace when we start -mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE. - -NOTE that we assume this operation is used either at the database startup -or under the protection of the dictionary mutex, so that two users cannot -race here. This operation does not leave the file associated with the -tablespace open, but closes it after we have looked at the space id in it. - -If the validate boolean is set, we read the first page of the file and -check that the space id in the file is what we expect. We assume that -this function runs much faster if no check is made, since accessing the -file inode probably is much faster (the OS caches them) than accessing -the first page of the file. This boolean may be initially FALSE, but if -a remote tablespace is found it will be changed to true. - -If the fix_dict boolean is set, then it is safe to use an internal SQL -statement to update the dictionary tables if they are incorrect. - -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fil_open_single_table_tablespace( -/*=============================*/ - bool validate, /*!< in: Do we validate tablespace? */ - bool fix_dict, /*!< in: Can we fix the dictionary? */ - ulint id, /*!< in: space id */ - ulint flags, /*!< in: expected FSP_SPACE_FLAGS */ - const char* tablename, /*!< in: table name in the - databasename/tablename format */ - const char* path_in) /*!< in: table */ -{ - dberr_t err = DB_SUCCESS; - bool dict_filepath_same_as_default = false; - bool link_file_found = false; - bool link_file_is_bad = false; - fsp_open_info def; - fsp_open_info dict; - fsp_open_info remote; - ulint tablespaces_found = 0; - ulint valid_tablespaces_found = 0; - ulint atomic_writes = 0; - fil_space_crypt_t* crypt_data = NULL; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex))); - - /* Table flags can be ULINT_UNDEFINED if - dict_tf_to_fsp_flags_failure is set. */ - if (flags == ULINT_UNDEFINED) { - return(DB_CORRUPTION); - } - - ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK)); - atomic_writes = fsp_flags_get_atomic_writes(flags); - - memset(&def, 0, sizeof(def)); - memset(&dict, 0, sizeof(dict)); - memset(&remote, 0, sizeof(remote)); - - /* Discover the correct filepath. We will always look for an ibd - in the default location. If it is remote, it should not be here. */ - def.filepath = fil_make_ibd_name(tablename, false); - - /* The path_in was read from SYS_DATAFILES. - We skip SYS_DATAFILES validation and remote tablespaces discovery for - XtraBackup, as all tablespaces are local for XtraBackup recovery. */ - if (path_in && !IS_XTRABACKUP()) { - if (strcmp(def.filepath, path_in)) { - dict.filepath = mem_strdup(path_in); - /* possibility of multiple files. */ - validate = true; - } else { - dict_filepath_same_as_default = true; - } - } - - link_file_found = fil_open_linked_file( - tablename, &remote.filepath, &remote.file, atomic_writes); - remote.success = link_file_found; - if (remote.success) { - /* possibility of multiple files. */ - validate = true; - tablespaces_found++; - - /* A link file was found. MySQL does not allow a DATA - DIRECTORY to be be the same as the default filepath. */ - ut_a(strcmp(def.filepath, remote.filepath)); - - /* If there was a filepath found in SYS_DATAFILES, - we hope it was the same as this remote.filepath found - in the ISL file. */ - if (dict.filepath - && (0 == strcmp(dict.filepath, remote.filepath))) { - remote.success = FALSE; - os_file_close(remote.file); - mem_free(remote.filepath); - remote.filepath = NULL; - tablespaces_found--; - } - } - - /* Attempt to open the tablespace at other possible filepaths. */ - if (dict.filepath) { - dict.file = os_file_create_simple_no_error_handling( - innodb_file_data_key, dict.filepath, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &dict.success, atomic_writes); - if (dict.success) { - /* possibility of multiple files. */ - validate = true; - tablespaces_found++; - } - } - - /* Always look for a file at the default location. */ - ut_a(def.filepath); - def.file = os_file_create_simple_no_error_handling( - innodb_file_data_key, def.filepath, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &def.success, atomic_writes); - - if (def.success) { - tablespaces_found++; - } - - /* We have now checked all possible tablespace locations and - have a count of how many we found. If things are normal, we - only found 1. */ - if (!validate && tablespaces_found == 1) { - goto skip_validate; - } - - /* Read the first page of the datadir tablespace, if found. */ - if (def.success) { - def.check_msg = fil_read_first_page( - def.file, false, &def.flags, &def.id, - NULL, &def.crypt_data); - - def.valid = !def.check_msg && def.id == id - && fsp_flags_match(flags, def.flags); - - if (def.valid) { - valid_tablespaces_found++; - } else { - /* Do not use this tablespace. */ - fil_report_bad_tablespace( - def.filepath, def.check_msg, def.id, - def.flags, id, flags); - } - } - - /* Read the first page of the remote tablespace */ - if (remote.success) { - remote.check_msg = fil_read_first_page( - remote.file, false, &remote.flags, &remote.id, - NULL, &remote.crypt_data); - - /* Validate this single-table-tablespace with SYS_TABLES. */ - remote.valid = !remote.check_msg && remote.id == id - && fsp_flags_match(flags, remote.flags); - - if (remote.valid) { - valid_tablespaces_found++; - } else { - /* Do not use this linked tablespace. */ - fil_report_bad_tablespace( - remote.filepath, remote.check_msg, remote.id, - remote.flags, id, flags); - link_file_is_bad = true; - } - } - - /* Read the first page of the datadir tablespace, if found. */ - if (dict.success) { - dict.check_msg = fil_read_first_page( - dict.file, false, &dict.flags, &dict.id, - NULL, &dict.crypt_data); - - /* Validate this single-table-tablespace with SYS_TABLES. */ - dict.valid = !dict.check_msg && dict.id == id - && fsp_flags_match(flags, dict.flags); - - if (dict.valid) { - valid_tablespaces_found++; - } else { - /* Do not use this tablespace. */ - fil_report_bad_tablespace( - dict.filepath, dict.check_msg, dict.id, - dict.flags, id, flags); - } - } - - /* Make sense of these three possible locations. - First, bail out if no tablespace files were found. */ - if (valid_tablespaces_found == 0) { - /* The following call prints an error message */ - os_file_get_last_error(true); - - ib_logf(IS_XTRABACKUP() ? IB_LOG_LEVEL_WARN : IB_LOG_LEVEL_ERROR, - "Could not find a valid tablespace file for '%s'. " - "See " REFMAN "innodb-troubleshooting-datadict.html " - "for how to resolve the issue.", - tablename); - - if (IS_XTRABACKUP() && fix_dict) { - ib_logf(IB_LOG_LEVEL_WARN, - "It will be removed from the data dictionary."); - - if (purge_sys) { - fil_remove_invalid_table_from_data_dict(tablename); - } - } - - err = DB_CORRUPTION; - - goto cleanup_and_exit; - } - - /* Do not open any tablespaces if more than one tablespace with - the correct space ID and flags were found. */ - if (tablespaces_found > 1) { - ib_logf(IB_LOG_LEVEL_ERROR, - "A tablespace for %s has been found in " - "multiple places;", tablename); - - if (def.success) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Default location; %s" - ", Space ID=" ULINTPF " , Flags=" ULINTPF " .", - def.filepath, - def.id, - def.flags); - } - - if (remote.success) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Remote location; %s" - ", Space ID=" ULINTPF " , Flags=" ULINTPF " .", - remote.filepath, - remote.id, - remote.flags); - } - - if (dict.success) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Dictionary location; %s" - ", Space ID=" ULINTPF " , Flags=" ULINTPF " .", - dict.filepath, - dict.id, - dict.flags); - } - - /* Force-recovery will allow some tablespaces to be - skipped by REDO if there was more than one file found. - Unlike during the REDO phase of recovery, we now know - if the tablespace is valid according to the dictionary, - which was not available then. So if we did not force - recovery and there is only one good tablespace, ignore - any bad tablespaces. */ - if (valid_tablespaces_found > 1 || srv_force_recovery > 0) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Will not open the tablespace for '%s'", - tablename); - - if (def.success != def.valid - || dict.success != dict.valid - || remote.success != remote.valid) { - err = DB_CORRUPTION; - } else { - err = DB_ERROR; - } - goto cleanup_and_exit; - } - - /* There is only one valid tablespace found and we did - not use srv_force_recovery during REDO. Use this one - tablespace and clean up invalid tablespace pointers */ - if (def.success && !def.valid) { - def.success = false; - os_file_close(def.file); - tablespaces_found--; - } - - if (dict.success && !dict.valid) { - dict.success = false; - os_file_close(dict.file); - /* Leave dict.filepath so that SYS_DATAFILES - can be corrected below. */ - tablespaces_found--; - } - if (remote.success && !remote.valid) { - remote.success = false; - os_file_close(remote.file); - mem_free(remote.filepath); - remote.filepath = NULL; - tablespaces_found--; - } - } - - /* At this point, there should be only one filepath. */ - ut_a(tablespaces_found == 1); - ut_a(valid_tablespaces_found == 1); - - /* Only fix the dictionary at startup when there is only one thread. - Calls to dict_load_table() can be done while holding other latches. */ - if (!fix_dict) { - goto skip_validate; - } - - /* We may need to change what is stored in SYS_DATAFILES or - SYS_TABLESPACES or adjust the link file. - Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does - not prevent opening and using the single_table_tablespace either - this time or the next, we do not check the return code or fail - to open the tablespace. But dict_update_filepath() will issue a - warning to the log. */ - if (dict.filepath) { - if (remote.success) { - dict_update_filepath(id, remote.filepath); - } else if (def.success) { - dict_update_filepath(id, def.filepath); - if (link_file_is_bad) { - fil_delete_link_file(tablename); - } - } else if (!link_file_found || link_file_is_bad) { - ut_ad(dict.success); - /* Fix the link file if we got our filepath - from the dictionary but a link file did not - exist or it did not point to a valid file. */ - fil_delete_link_file(tablename); - fil_create_link_file(tablename, dict.filepath); - } - - } else if (remote.success && dict_filepath_same_as_default) { - dict_update_filepath(id, remote.filepath); - - } else if (remote.success && path_in == NULL) { - /* SYS_DATAFILES record for this space ID was not found. */ - dict_insert_tablespace_and_filepath( - id, tablename, remote.filepath, flags); - } - -skip_validate: - if (remote.success) - crypt_data = remote.crypt_data; - else if (dict.success) - crypt_data = dict.crypt_data; - else if (def.success) - crypt_data = def.crypt_data; - - if (err != DB_SUCCESS) { - ; // Don't load the tablespace into the cache - } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE, - crypt_data, false)) { - err = DB_ERROR; - } else { - /* We do not measure the size of the file, that is why - we pass the 0 below */ - - if (!fil_node_create(remote.success ? remote.filepath : - dict.success ? dict.filepath : - def.filepath, 0, id, FALSE)) { - err = DB_ERROR; - } - } - -cleanup_and_exit: - if (remote.success) { - os_file_close(remote.file); - } - if (remote.filepath) { - mem_free(remote.filepath); - } - if (remote.crypt_data && remote.crypt_data != crypt_data) { - if (err == DB_SUCCESS) { - fil_space_destroy_crypt_data(&remote.crypt_data); - } - } - if (dict.success) { - os_file_close(dict.file); - } - if (dict.filepath) { - mem_free(dict.filepath); - } - if (dict.crypt_data && dict.crypt_data != crypt_data) { - fil_space_destroy_crypt_data(&dict.crypt_data); - } - if (def.success) { - os_file_close(def.file); - } - if (def.crypt_data && def.crypt_data != crypt_data) { - if (err == DB_SUCCESS) { - fil_space_destroy_crypt_data(&def.crypt_data); - } - } - - mem_free(def.filepath); - - /* We need to check fsp flags when no errors has happened and - server was not started on read only mode and tablespace validation - was requested or flags contain other table options except - low order bits to FSP_FLAGS_POS_PAGE_SSIZE position. - Note that flag comparison is pessimistic. Adjust is required - only when flags contain buggy MariaDB 10.1.0 - - MariaDB 10.1.20 flags. */ - if (err == DB_SUCCESS - && !srv_read_only_mode - && (validate - || flags >= (1U << FSP_FLAGS_POS_PAGE_SSIZE))) { - fsp_flags_try_adjust(id, flags & ~FSP_FLAGS_MEM_MASK); - } - - return(err); -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_HOTBACKUP -/*******************************************************************//** -Allocates a file name for an old version of a single-table tablespace. -The string must be freed by caller with mem_free()! -@return own: file name */ -static -char* -fil_make_ibbackup_old_name( -/*=======================*/ - const char* name) /*!< in: original file name */ -{ - static const char suffix[] = "_ibbackup_old_vers_"; - char* path; - ulint len = strlen(name); - - path = static_cast<char*>(mem_alloc(len + (15 + sizeof suffix))); - - memcpy(path, name, len); - memcpy(path + len, suffix, (sizeof suffix) - 1); - ut_sprintf_timestamp_without_extra_chars( - path + len + ((sizeof suffix) - 1)); - return(path); -} -#endif /* UNIV_HOTBACKUP */ - - -/*******************************************************************//** -Determine the space id of the given file descriptor by reading a few -pages from the beginning of the .ibd file. -@return true if space id was successfully identified, or false. */ -static -bool -fil_user_tablespace_find_space_id( -/*==============================*/ - fsp_open_info* fsp) /* in/out: contains file descriptor, which is - used as input. contains space_id, which is - the output */ -{ - bool st; - os_offset_t file_size; - - file_size = os_file_get_size(fsp->file); - - if (file_size == (os_offset_t) -1) { - ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s", - fsp->filepath); - return(false); - } - - /* Assuming a page size, read the space_id from each page and store it - in a map. Find out which space_id is agreed on by majority of the - pages. Choose that space_id. */ - for (ulint page_size = UNIV_ZIP_SIZE_MIN; - page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) { - - /* map[space_id] = count of pages */ - std::map<ulint, ulint> verify; - - ulint page_count = 64; - ulint valid_pages = 0; - - /* Adjust the number of pages to analyze based on file size */ - while ((page_count * page_size) > file_size) { - --page_count; - } - - ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:" - "%lu", page_size, page_count); - - byte* buf = static_cast<byte*>(ut_malloc(2*page_size)); - byte* page = static_cast<byte*>(ut_align(buf, page_size)); - - for (ulint j = 0; j < page_count; ++j) { - - st = os_file_read(fsp->file, page, (j* page_size), page_size); - - if (!st) { - ib_logf(IB_LOG_LEVEL_INFO, - "READ FAIL: page_no:%lu", j); - continue; - } - - bool uncompressed_ok = false; - - /* For uncompressed pages, the page size must be equal - to UNIV_PAGE_SIZE. */ - if (page_size == UNIV_PAGE_SIZE) { - uncompressed_ok = !buf_page_is_corrupted( - false, page, 0, NULL); - } - - bool compressed_ok = false; - if (page_size <= UNIV_PAGE_SIZE_DEF) { - compressed_ok = !buf_page_is_corrupted( - false, page, page_size, NULL); - } - - if (uncompressed_ok || compressed_ok) { - - ulint space_id = mach_read_from_4(page - + FIL_PAGE_SPACE_ID); - - if (space_id > 0) { - ib_logf(IB_LOG_LEVEL_INFO, - "VALID: space:%lu " - "page_no:%lu page_size:%lu", - space_id, j, page_size); - verify[space_id]++; - ++valid_pages; - } - } - } - - ut_free(buf); - - ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id " - "count:%lu", page_size, (ulint) verify.size()); - - const ulint pages_corrupted = 3; - for (ulint missed = 0; missed <= pages_corrupted; ++missed) { - - for (std::map<ulint, ulint>::iterator - m = verify.begin(); m != verify.end(); ++m ) { - - ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, " - "Number of pages matched: %lu/%lu " - "(%lu)", m->first, m->second, - valid_pages, page_size); - - if (m->second == (valid_pages - missed)) { - - ib_logf(IB_LOG_LEVEL_INFO, - "Chosen space:%lu\n", m->first); - - fsp->id = m->first; - return(true); - } - } - - } - } - - return(false); -} - -/*******************************************************************//** -Finds the given page_no of the given space id from the double write buffer, -and copies it to the corresponding .ibd file. -@return true if copy was successful, or false. */ -bool -fil_user_tablespace_restore_page( -/*==============================*/ - fsp_open_info* fsp, /* in: contains space id and .ibd - file information */ - ulint page_no) /* in: page_no to obtain from double - write buffer */ -{ - bool err; - ulint flags; - ulint zip_size; - ulint page_size; - ulint buflen; - byte* page; - - ib_logf(IB_LOG_LEVEL_INFO, "Restoring page %lu of tablespace %lu", - page_no, fsp->id); - - // find if double write buffer has page_no of given space id - page = recv_sys->dblwr.find_page(fsp->id, page_no); - - if (!page) { - ib_logf(IB_LOG_LEVEL_WARN, "Doublewrite does not have " - "page_no=%lu of space: %lu", page_no, fsp->id); - err = false; - goto out; - } - - flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); - - if (!fsp_flags_is_valid(flags)) { - ulint cflags = fsp_flags_convert_from_101(flags); - if (cflags == ULINT_UNDEFINED) { - ib_logf(IB_LOG_LEVEL_WARN, - "Ignoring a doublewrite copy of page " - ULINTPF ":" ULINTPF - " due to invalid flags 0x%x", - fsp->id, page_no, int(flags)); - err = false; - goto out; - } - flags = cflags; - /* The flags on the page should be converted later. */ - } - - zip_size = fsp_flags_get_zip_size(flags); - page_size = fsp_flags_get_page_size(flags); - - ut_ad(page_no == page_get_page_no(page)); - - buflen = zip_size ? zip_size: page_size; - - ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s", - buflen, fsp->filepath); - - err = os_file_write(fsp->filepath, fsp->file, page, - (zip_size ? zip_size : page_size) * page_no, - buflen); - - os_file_flush(fsp->file); -out: - return(err); -} - -/********************************************************************//** -Opens an .ibd file and adds the associated single-table tablespace to the -InnoDB fil0fil.cc data structures. -Set fsp->success to TRUE if tablespace is valid, FALSE if not. */ -static -void -fil_validate_single_table_tablespace( -/*=================================*/ - const char* tablename, /*!< in: database/tablename */ - fsp_open_info* fsp) /*!< in/out: tablespace info */ -{ - bool restore_attempted = false; - -check_first_page: - fsp->success = TRUE; - if (const char* check_msg = fil_read_first_page( - fsp->file, false, &fsp->flags, &fsp->id, - NULL, &fsp->crypt_data)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "%s in tablespace %s (table %s)", - check_msg, fsp->filepath, tablename); - fsp->success = FALSE; - } - - if (!fsp->success) { - if (IS_XTRABACKUP()) { - /* Do not attempt restore from doublewrite buffer - in Xtrabackup, this does not work.*/ - return; - } - - if (!restore_attempted) { - if (!fil_user_tablespace_find_space_id(fsp)) { - return; - } - restore_attempted = true; - - if (fsp->id > 0 - && !fil_user_tablespace_restore_page(fsp, 0)) { - return; - } - goto check_first_page; - } - return; - } - - if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespace is not sensible;" - " Table: %s Space ID: %lu Filepath: %s\n", - tablename, (ulong) fsp->id, fsp->filepath); - fsp->success = FALSE; - return; - } - - mutex_enter(&fil_system->mutex); - fil_space_t* space = fil_space_get_by_id(fsp->id); - mutex_exit(&fil_system->mutex); - if (space != NULL) { - char* prev_filepath = fil_space_get_first_path(fsp->id); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Attempted to open a previously opened tablespace. " - "Previous tablespace %s uses space ID: %lu at " - "filepath: %s. Cannot open tablespace %s which uses " - "space ID: %lu at filepath: %s", - space->name, (ulong) space->id, prev_filepath, - tablename, (ulong) fsp->id, fsp->filepath); - - mem_free(prev_filepath); - fsp->success = FALSE; - return; - } - - fsp->success = TRUE; -} - - -/********************************************************************//** -Opens an .ibd file and adds the associated single-table tablespace to the -InnoDB fil0fil.cc data structures. */ -static -void -fil_load_single_table_tablespace( -/*=============================*/ - const char* dbname, /*!< in: database name */ - const char* filename) /*!< in: file name (not a path), - including the .ibd or .isl extension */ -{ - char* tablename; - ulint tablename_len; - ulint dbname_len = strlen(dbname); - ulint filename_len = strlen(filename); - fsp_open_info def; - fsp_open_info remote; - os_offset_t size; - fil_space_t* space; - - fsp_open_info* fsp; - ulong minimum_size; - ibool file_space_create_success; - - memset(&def, 0, sizeof(def)); - memset(&remote, 0, sizeof(remote)); - - /* The caller assured that the extension is ".ibd" or ".isl". */ - ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4) - || 0 == memcmp(filename + filename_len - 4, ".isl", 4)); - - /* Build up the tablename in the standard form database/table. */ - tablename = static_cast<char*>( - mem_alloc(dbname_len + filename_len + 2)); - - /* When lower_case_table_names = 2 it is possible that the - dbname is in upper case ,but while storing it in fil_space_t - we must convert it into lower case */ - sprintf(tablename, "%s" , dbname); - tablename[dbname_len] = '\0'; - - if (lower_case_file_system) { - dict_casedn_str(tablename); - } - - sprintf(tablename+dbname_len,"/%s",filename); - tablename_len = strlen(tablename) - strlen(".ibd"); - tablename[tablename_len] = '\0'; - - /* There may be both .ibd and .isl file in the directory. - And it is possible that the .isl file refers to a different - .ibd file. If so, we open and compare them the first time - one of them is sent to this function. So if this table has - already been loaded, there is nothing to do.*/ - mutex_enter(&fil_system->mutex); - space = fil_space_get_by_name(tablename); - if (space) { - mem_free(tablename); - mutex_exit(&fil_system->mutex); - return; - } - mutex_exit(&fil_system->mutex); - - /* Build up the filepath of the .ibd tablespace in the datadir. - This must be freed independent of def.success. */ - def.filepath = fil_make_ibd_name(tablename, false); - -#ifdef __WIN__ -# ifndef UNIV_HOTBACKUP - /* If lower_case_table_names is 0 or 2, then MySQL allows database - directory names with upper case letters. On Windows, all table and - database names in InnoDB are internally always in lower case. Put the - file path to lower case, so that we are consistent with InnoDB's - internal data dictionary. */ - - dict_casedn_str(def.filepath); -# endif /* !UNIV_HOTBACKUP */ -#endif - - - /* Check for a link file which locates a remote tablespace. */ - remote.success = fil_open_linked_file( - tablename, &remote.filepath, &remote.file, FALSE); - - /* Read the first page of the remote tablespace */ - if (remote.success) { - fil_validate_single_table_tablespace(tablename, &remote); - if (!remote.success) { - os_file_close(remote.file); - mem_free(remote.filepath); - - if (srv_backup_mode && (remote.id == ULINT_UNDEFINED - || remote.id == 0)) { - - /* Ignore files that have uninitialized space - IDs on the backup stage. This means that a - tablespace has just been created and we will - replay the corresponding log records on - prepare. */ - goto func_exit_after_close; - } - } - } - - - /* Try to open the tablespace in the datadir. */ - def.file = os_file_create_simple_no_error_handling( - innodb_file_data_key, def.filepath, OS_FILE_OPEN, - OS_FILE_READ_WRITE, &def.success, FALSE); - - /* Read the first page of the remote tablespace */ - if (def.success) { - fil_validate_single_table_tablespace(tablename, &def); - if (!def.success) { - os_file_close(def.file); - - if (IS_XTRABACKUP() && srv_backup_mode && (def.id == ULINT_UNDEFINED - || def.id == 0)) { - - /* Ignore files that have uninitialized space - IDs on the backup stage. This means that a - tablespace has just been created and we will - replay the corresponding log records on - prepare. */ - - goto func_exit_after_close; - } - } - } - - if (!def.success && !remote.success) { - - /* The following call prints an error message */ - os_file_get_last_error(true); - fprintf(stderr, - "InnoDB: Error: could not open single-table" - " tablespace file %s\n", def.filepath); - - if (!strncmp(filename, - tmp_file_prefix, tmp_file_prefix_length)) { - /* Ignore errors for #sql tablespaces. */ - mem_free(tablename); - if (remote.filepath) { - mem_free(remote.filepath); - } - if (def.filepath) { - mem_free(def.filepath); - } - return; - } -no_good_file: - fprintf(stderr, - "InnoDB: We do not continue the crash recovery," - " because the table may become\n" - "InnoDB: corrupt if we cannot apply the log" - " records in the InnoDB log to it.\n" - "InnoDB: To fix the problem and start mysqld:\n" - "InnoDB: 1) If there is a permission problem" - " in the file and mysqld cannot\n" - "InnoDB: open the file, you should" - " modify the permissions.\n" - "InnoDB: 2) If the table is not needed, or you" - " can restore it from a backup,\n" - "InnoDB: then you can remove the .ibd file," - " and InnoDB will do a normal\n" - "InnoDB: crash recovery and ignore that table.\n" - "InnoDB: 3) If the file system or the" - " disk is broken, and you cannot remove\n" - "InnoDB: the .ibd file, you can set" - " innodb_force_recovery > 0 in my.cnf\n" - "InnoDB: and force InnoDB to continue crash" - " recovery here.\n"); -will_not_choose: - mem_free(tablename); - if (remote.filepath) { - mem_free(remote.filepath); - } - if (def.filepath) { - mem_free(def.filepath); - } - - if (srv_force_recovery > 0) { - ib_logf(IB_LOG_LEVEL_INFO, - "innodb_force_recovery was set to %lu. " - "Continuing crash recovery even though we " - "cannot access the .ibd file of this table.", - srv_force_recovery); - return; - } - - abort(); - } - - if (def.success && remote.success) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespaces for %s have been found in two places;\n" - "Location 1: SpaceID: " ULINTPF " File: %s\n" - "Location 2: SpaceID: " ULINTPF " File: %s\n" - "You must delete one of them.", - tablename, def.id, - def.filepath, remote.id, - remote.filepath); - - def.success = FALSE; - os_file_close(def.file); - os_file_close(remote.file); - goto will_not_choose; - } - - /* At this point, only one tablespace is open */ - ut_a(def.success == !remote.success); - - fsp = def.success ? &def : &remote; - - /* Get and test the file size. */ - size = os_file_get_size(fsp->file); - - if (size == (os_offset_t) -1) { - /* The following call prints an error message */ - os_file_get_last_error(true); - - ib_logf(IB_LOG_LEVEL_ERROR, - "could not measure the size of single-table " - "tablespace file %s", fsp->filepath); - - os_file_close(fsp->file); - goto no_good_file; - } - - /* Every .ibd file is created >= 4 pages in size. Smaller files - cannot be ok. */ - minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE; - if (size < minimum_size) { - ib_logf(IB_LOG_LEVEL_ERROR, - "The size of single-table tablespace file %s " - "is only " UINT64PF ", should be at least %lu!", - fsp->filepath, size, minimum_size); - os_file_close(fsp->file); - goto no_good_file; - } - -#ifdef UNIV_HOTBACKUP - if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) { - char* new_path; - - fprintf(stderr, - "InnoDB: Renaming tablespace %s of id %lu,\n" - "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n" - "InnoDB: because its size %" PRId64 " is too small" - " (< 4 pages 16 kB each),\n" - "InnoDB: or the space id in the file header" - " is not sensible.\n" - "InnoDB: This can happen in an mysqlbackup run," - " and is not dangerous.\n", - fsp->filepath, fsp->id, fsp->filepath, size); - os_file_close(fsp->file); - - new_path = fil_make_ibbackup_old_name(fsp->filepath); - - bool success = os_file_rename( - innodb_file_data_key, fsp->filepath, new_path); - - ut_a(success); - - mem_free(new_path); - - goto func_exit_after_close; - } - - /* A backup may contain the same space several times, if the space got - renamed at a sensitive time. Since it is enough to have one version of - the space, we rename the file if a space with the same space id - already exists in the tablespace memory cache. We rather rename the - file than delete it, because if there is a bug, we do not want to - destroy valuable data. */ - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(fsp->id); - - if (space) { - char* new_path; - - fprintf(stderr, - "InnoDB: Renaming tablespace %s of id %lu,\n" - "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n" - "InnoDB: because space %s with the same id\n" - "InnoDB: was scanned earlier. This can happen" - " if you have renamed tables\n" - "InnoDB: during an mysqlbackup run.\n", - fsp->filepath, fsp->id, fsp->filepath, - space->name); - os_file_close(fsp->file); - - new_path = fil_make_ibbackup_old_name(fsp->filepath); - - mutex_exit(&fil_system->mutex); - - bool success = os_file_rename( - innodb_file_data_key, fsp->filepath, new_path); - - ut_a(success); - - mem_free(new_path); - - goto func_exit_after_close; - } - mutex_exit(&fil_system->mutex); -#endif /* UNIV_HOTBACKUP */ - - /* Adjust the memory-based flags that would normally be set by - dict_tf_to_fsp_flags(). In recovery, we have no data dictionary. */ - if (FSP_FLAGS_HAS_PAGE_COMPRESSION(fsp->flags)) { - fsp->flags |= page_zip_level - << FSP_FLAGS_MEM_COMPRESSION_LEVEL; - } - remote.flags |= 1U << FSP_FLAGS_MEM_DATA_DIR; - /* We will leave atomic_writes at ATOMIC_WRITES_DEFAULT. - That will be adjusted in fil_space_for_table_exists_in_mem(). */ - - file_space_create_success = fil_space_create( - tablename, fsp->id, fsp->flags, FIL_TABLESPACE, - fsp->crypt_data, false); - - if (!file_space_create_success) { - if (srv_force_recovery > 0) { - fprintf(stderr, - "InnoDB: innodb_force_recovery was set" - " to %lu. Continuing crash recovery\n" - "InnoDB: even though the tablespace" - " creation of this table failed.\n", - srv_force_recovery); - goto func_exit; - } - - /* Exit here with a core dump, stack, etc. */ - ut_a(file_space_create_success); - } - - /* We do not use the size information we have about the file, because - the rounding formula for extents and pages is somewhat complex; we - let fil_node_open() do that task. */ - - if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) { - ut_error; - } - -func_exit: - /* We reuse file handles on the backup stage in XtraBackup to avoid - inconsistencies between the file name and the actual tablespace contents - if a DDL occurs between a fil_load_single_table_tablespaces() call and - the actual copy operation. */ - if (IS_XTRABACKUP() && srv_backup_mode && !srv_close_files) { - - fil_node_t* node; - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(fsp->id); - - if (space) { - node = UT_LIST_GET_LAST(space->chain); - - /* The handle will be closed by xtrabackup in - xtrabackup_copy_datafile(). We set node->open to TRUE to - make sure no one calls fil_node_open_file() - (i.e. attempts to reopen the tablespace by name) during - the backup stage. */ - - node->open = TRUE; - node->handle = fsp->file; - - /* The following is copied from fil_node_open_file() to - pass fil_system validaty checks. We cannot use - fil_node_open_file() directly, as that would re-open the - file by name and create another file handle. */ - - fil_system->n_open++; - fil_n_file_opened++; - - if (fil_space_belongs_in_lru(space)) { - - /* Put the node to the LRU list */ - UT_LIST_ADD_FIRST(LRU, fil_system->LRU, node); - } - } - - mutex_exit(&fil_system->mutex); - } - else { - os_file_close(fsp->file); - } - - -func_exit_after_close: - ut_ad(!mutex_own(&fil_system->mutex)); - - mem_free(tablename); - if (remote.success) { - mem_free(remote.filepath); - } - mem_free(def.filepath); -} - -/***********************************************************************//** -A fault-tolerant function that tries to read the next file name in the -directory. We retry 100 times if os_file_readdir_next_file() returns -1. The -idea is to read as much good data as we can and jump over bad data. -@return 0 if ok, -1 if error even after the retries, 1 if at the end -of the directory */ -UNIV_INTERN -int -fil_file_readdir_next_file( -/*=======================*/ - dberr_t* err, /*!< out: this is set to DB_ERROR if an error - was encountered, otherwise not changed */ - const char* dirname,/*!< in: directory name or path */ - os_file_dir_t dir, /*!< in: directory stream */ - os_file_stat_t* info) /*!< in/out: buffer where the - info is returned */ -{ - for (ulint i = 0; i < 100; i++) { - int ret = os_file_readdir_next_file(dirname, dir, info); - - if (ret != -1) { - - return(ret); - } - - ib_logf(IB_LOG_LEVEL_ERROR, - "os_file_readdir_next_file() returned -1 in " - "directory %s, crash recovery may have failed " - "for some .ibd files!", dirname); - - *err = DB_ERROR; - } - - return(-1); -} - - -my_bool(*fil_check_if_skip_database_by_path)(const char* name); - -#define CHECK_TIME_EVERY_N_FILES 10 -/********************************************************************//** -At the server startup, if we need crash recovery, scans the database -directories under the MySQL datadir, looking for .ibd files. Those files are -single-table tablespaces. We need to know the space id in each of them so that -we know into which file we should look to check the contents of a page stored -in the doublewrite buffer, also to know where to apply log records where the -space id is != 0. -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -fil_load_single_table_tablespaces(ibool (*pred)(const char*, const char*)) -/*===================================*/ -{ - int ret; - char* dbpath = NULL; - ulint dbpath_len = 100; - ulint files_read = 0; - ulint files_read_at_last_check = 0; - ib_time_t prev_report_time = ut_time(); - os_file_dir_t dir; - os_file_dir_t dbdir; - os_file_stat_t dbinfo; - os_file_stat_t fileinfo; - dberr_t err = DB_SUCCESS; - - /* The datadir of MySQL is always the default directory of mysqld */ - - dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE); - - if (dir == NULL) { - - return(DB_ERROR); - } - - dbpath = static_cast<char*>(mem_alloc(dbpath_len)); - - /* Scan all directories under the datadir. They are the database - directories of MySQL. */ - - ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir, - &dbinfo); - while (ret == 0) { - ulint len; - /* printf("Looking at %s in datadir\n", dbinfo.name); */ - - if (dbinfo.type == OS_FILE_TYPE_FILE - || dbinfo.type == OS_FILE_TYPE_UNKNOWN) { - - goto next_datadir_item; - } - - /* We found a symlink or a directory; try opening it to see - if a symlink is a directory */ - - len = strlen(fil_path_to_mysql_datadir) - + strlen (dbinfo.name) + 2; - if (len > dbpath_len) { - dbpath_len = len; - - if (dbpath) { - mem_free(dbpath); - } - - dbpath = static_cast<char*>(mem_alloc(dbpath_len)); - } - ut_snprintf(dbpath, dbpath_len, - "%s/%s", fil_path_to_mysql_datadir, dbinfo.name); - srv_normalize_path_for_win(dbpath); - - if (IS_XTRABACKUP()) { - ut_a(fil_check_if_skip_database_by_path); - if (fil_check_if_skip_database_by_path(dbpath)) { - fprintf(stderr, "Skipping db: %s\n", dbpath); - dbdir = NULL; - } else { - /* We want wrong directory permissions to be a fatal - error for XtraBackup. */ - dbdir = os_file_opendir(dbpath, TRUE); - } - } else { - dbdir = os_file_opendir(dbpath, FALSE); - } - - if (dbdir != NULL) { - - /* We found a database directory; loop through it, - looking for possible .ibd files in it */ - - ret = fil_file_readdir_next_file(&err, dbpath, dbdir, - &fileinfo); - while (ret == 0) { - - if (fileinfo.type == OS_FILE_TYPE_DIR) { - - goto next_file_item; - } - - /* We found a symlink or a file - - Ignore .isl files on XtraBackup - recovery, all tablespaces must be local. */ - if (strlen(fileinfo.name) > 4 - && (0 == strcmp(fileinfo.name - + strlen(fileinfo.name) - 4, - ".ibd") - || ((!IS_XTRABACKUP() || srv_backup_mode) - && 0 == strcmp(fileinfo.name - + strlen(fileinfo.name) - 4, - ".isl"))) - && (!pred || - pred(dbinfo.name, fileinfo.name))) { - /* The name ends in .ibd or .isl; - try opening the file */ - fil_load_single_table_tablespace( - dbinfo.name, fileinfo.name); - files_read++; - if (files_read - files_read_at_last_check > - CHECK_TIME_EVERY_N_FILES) { - ib_time_t cur_time= ut_time(); - files_read_at_last_check= files_read; - double time_elapsed= ut_difftime(cur_time, - prev_report_time); - if (time_elapsed > 15) { - ib_logf(IB_LOG_LEVEL_INFO, - "Processed %ld .ibd/.isl files", - files_read); - prev_report_time= cur_time; - } - } - } -next_file_item: - ret = fil_file_readdir_next_file(&err, - dbpath, dbdir, - &fileinfo); - } - - if (0 != os_file_closedir(dbdir)) { - fputs("InnoDB: Warning: could not" - " close database directory ", stderr); - ut_print_filename(stderr, dbpath); - putc('\n', stderr); - - err = DB_ERROR; - } - } - -next_datadir_item: - ret = fil_file_readdir_next_file(&err, - fil_path_to_mysql_datadir, - dir, &dbinfo); - } - - mem_free(dbpath); - - if (0 != os_file_closedir(dir)) { - fprintf(stderr, - "InnoDB: Error: could not close MySQL datadir\n"); - - return(DB_ERROR); - } - - return(err); -} - -/*******************************************************************//** -Returns TRUE if a single-table tablespace does not exist in the memory cache, -or is being deleted there. -@return TRUE if does not exist or is being deleted */ -UNIV_INTERN -ibool -fil_tablespace_deleted_or_being_deleted_in_mem( -/*===========================================*/ - ulint id, /*!< in: space id */ - ib_int64_t version)/*!< in: tablespace_version should be this; if - you pass -1 as the value of this, then this - parameter is ignored */ -{ - fil_space_t* space; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - if (space == NULL || space->is_stopping()) { - mutex_exit(&fil_system->mutex); - - return(TRUE); - } - - if (version != ((ib_int64_t)-1) - && space->tablespace_version != version) { - mutex_exit(&fil_system->mutex); - - return(TRUE); - } - - mutex_exit(&fil_system->mutex); - - return(FALSE); -} - -/*******************************************************************//** -Returns TRUE if a single-table tablespace exists in the memory cache. -@return TRUE if exists */ -UNIV_INTERN -ibool -fil_tablespace_exists_in_mem( -/*=========================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - mutex_exit(&fil_system->mutex); - - return(space != NULL); -} - -/*******************************************************************//** -Report that a tablespace for a table was not found. */ -static -void -fil_report_missing_tablespace( -/*===========================*/ - const char* name, /*!< in: table name */ - ulint space_id) /*!< in: table's space id */ -{ - char index_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name(index_name, sizeof(index_name), name, TRUE); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Table %s in the InnoDB data dictionary has tablespace id %lu, " - "but tablespace with that id or name does not exist. Have " - "you deleted or moved .ibd files? This may also be a table " - "created with CREATE TEMPORARY TABLE whose .ibd and .frm " - "files MySQL automatically removed, but the table still " - "exists in the InnoDB internal data dictionary.", - name, space_id); -} - -/** Check if a matching tablespace exists in the InnoDB tablespace memory -cache. Note that if we have not done a crash recovery at the database startup, -there may be many tablespaces which are not yet in the memory cache. -@return whether a matching tablespace exists in the memory cache */ -UNIV_INTERN -bool -fil_space_for_table_exists_in_mem( -/*==============================*/ - ulint id, /*!< in: space id */ - const char* name, /*!< in: table name used in - fil_space_create(). Either the - standard 'dbname/tablename' format - or table->dir_path_of_temp_table */ - bool print_error_if_does_not_exist, - /*!< in: print detailed error - information to the .err log if a - matching tablespace is not found from - memory */ - bool remove_from_data_dict_if_does_not_exist, - /*!< in: remove from the data dictionary - if tablespace does not exist */ - bool adjust_space, /*!< in: whether to adjust space id - when find table space mismatch */ - mem_heap_t* heap, /*!< in: heap memory */ - table_id_t table_id, /*!< in: table id */ - ulint table_flags) /*!< in: table flags */ -{ - fil_space_t* fnamespace; - fil_space_t* space; - - const ulint expected_flags = dict_tf_to_fsp_flags(table_flags); - - mutex_enter(&fil_system->mutex); - - /* Look if there is a space with the same id */ - - space = fil_space_get_by_id(id); - - /* Look if there is a space with the same name; the name is the - directory path from the datadir to the file */ - - fnamespace = fil_space_get_by_name(name); - bool valid = space && !((space->flags ^ expected_flags) - & ~FSP_FLAGS_MEM_MASK); - - if (!space) { - } else if (!valid || space == fnamespace) { - /* Found with the same file name, or got a flag mismatch. */ - goto func_exit; - } else if (adjust_space - && row_is_mysql_tmp_table_name(space->name) - && !row_is_mysql_tmp_table_name(name)) { - /* Info from fnamespace comes from the ibd file - itself, it can be different from data obtained from - System tables since renaming files is not - transactional. We shall adjust the ibd file name - according to system table info. */ - mutex_exit(&fil_system->mutex); - - DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space", - DBUG_SUICIDE();); - - char* tmp_name = dict_mem_create_temporary_tablename( - heap, name, table_id); - - fil_rename_tablespace(fnamespace->name, fnamespace->id, - tmp_name, NULL); - - DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space", - DBUG_SUICIDE();); - - fil_rename_tablespace(space->name, id, name, NULL); - - DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space", - DBUG_SUICIDE();); - - mutex_enter(&fil_system->mutex); - fnamespace = fil_space_get_by_name(name); - ut_ad(space == fnamespace); - goto func_exit; - } - - if (!print_error_if_does_not_exist) { - valid = false; - goto func_exit; - } - - if (space == NULL) { - if (fnamespace == NULL) { - if (print_error_if_does_not_exist) { - fil_report_missing_tablespace(name, id); - if (IS_XTRABACKUP() && remove_from_data_dict_if_does_not_exist) { - ib_logf(IB_LOG_LEVEL_WARN, - "It will be removed from " - "the data dictionary."); - } - } - } else { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_filename(stderr, name); - fprintf(stderr, "\n" - "InnoDB: in InnoDB data dictionary has" - " tablespace id %lu,\n" - "InnoDB: but a tablespace with that id" - " does not exist. There is\n" - "InnoDB: a tablespace of name %s and id %lu," - " though. Have\n" - "InnoDB: you deleted or moved .ibd files?\n", - (ulong) id, fnamespace->name, - (ulong) fnamespace->id); - } -error_exit: - fputs("InnoDB: Please refer to\n" - "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n" - "InnoDB: for how to resolve the issue.\n", stderr); - valid = false; - goto func_exit; - } - - if (0 != strcmp(space->name, name)) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_filename(stderr, name); - fprintf(stderr, "\n" - "InnoDB: in InnoDB data dictionary has" - " tablespace id %lu,\n" - "InnoDB: but the tablespace with that id" - " has name %s.\n" - "InnoDB: Have you deleted or moved .ibd files?\n", - (ulong) id, space->name); - - if (fnamespace != NULL) { - fputs("InnoDB: There is a tablespace" - " with the right name\n" - "InnoDB: ", stderr); - ut_print_filename(stderr, fnamespace->name); - fprintf(stderr, ", but its id is %lu.\n", - (ulong) fnamespace->id); - } - - goto error_exit; - } - -func_exit: - if (valid) { - /* Adjust the flags that are in FSP_FLAGS_MEM_MASK. - FSP_SPACE_FLAGS will not be written back here. */ - space->flags = expected_flags; - } - mutex_exit(&fil_system->mutex); - - if (valid && !srv_read_only_mode) { - fsp_flags_try_adjust(id, expected_flags & ~FSP_FLAGS_MEM_MASK); - } - - return(valid); -} - -/*******************************************************************//** -Checks if a single-table tablespace for a given table name exists in the -tablespace memory cache. -@return space id, ULINT_UNDEFINED if not found */ -UNIV_INTERN -ulint -fil_get_space_id_for_table( -/*=======================*/ - const char* tablename) /*!< in: table name in the standard - 'databasename/tablename' format */ -{ - fil_space_t* fnamespace; - ulint id = ULINT_UNDEFINED; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - /* Look if there is a space with the same name. */ - - fnamespace = fil_space_get_by_name(tablename); - - if (fnamespace) { - id = fnamespace->id; - } - - mutex_exit(&fil_system->mutex); - - return(id); -} - -/**********************************************************************//** -Tries to extend a data file so that it would accommodate the number of pages -given. The tablespace must be cached in the memory cache. If the space is big -enough already, does nothing. -@return TRUE if success */ -UNIV_INTERN -ibool -fil_extend_space_to_desired_size( -/*=============================*/ - ulint* actual_size, /*!< out: size of the space after extension; - if we ran out of disk space this may be lower - than the desired size */ - ulint space_id, /*!< in: space id */ - ulint size_after_extend)/*!< in: desired size in pages after the - extension; if the current space size is bigger - than this already, the function does nothing */ -{ - ut_ad(!srv_read_only_mode); - - for (;;) { - fil_mutex_enter_and_prepare_for_io(space_id); - - fil_space_t* space = fil_space_get_by_id(space_id); - ut_a(space); - ibool success; - - if (!fil_space_extend_must_retry( - space, UT_LIST_GET_LAST(space->chain), - size_after_extend, &success)) { - *actual_size = space->size; - mutex_exit(&fil_system->mutex); - return(success); - } - } -} - -#ifdef UNIV_HOTBACKUP -/********************************************************************//** -Extends all tablespaces to the size stored in the space header. During the -mysqlbackup --apply-log phase we extended the spaces on-demand so that log -records could be applied, but that may have left spaces still too small -compared to the size stored in the space header. */ -UNIV_INTERN -void -fil_extend_tablespaces_to_stored_len(void) -/*======================================*/ -{ - fil_space_t* space; - byte* buf; - ulint actual_size; - ulint size_in_header; - dberr_t error; - ibool success; - - buf = mem_alloc(UNIV_PAGE_SIZE); - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space) { - ut_a(space->purpose == FIL_TABLESPACE); - - mutex_exit(&fil_system->mutex); /* no need to protect with a - mutex, because this is a - single-threaded operation */ - error = fil_read(TRUE, space->id, - fsp_flags_get_zip_size(space->flags), - 0, 0, UNIV_PAGE_SIZE, buf, NULL, 0); - ut_a(error == DB_SUCCESS); - - size_in_header = fsp_get_size_low(buf); - - success = fil_extend_space_to_desired_size( - &actual_size, space->id, size_in_header); - if (!success) { - fprintf(stderr, - "InnoDB: Error: could not extend the" - " tablespace of %s\n" - "InnoDB: to the size stored in header," - " %lu pages;\n" - "InnoDB: size after extension %lu pages\n" - "InnoDB: Check that you have free disk space" - " and retry!\n", - space->name, size_in_header, actual_size); - ut_a(success); - } - - mutex_enter(&fil_system->mutex); - - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&fil_system->mutex); - - mem_free(buf); -} -#endif - -/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/ - -/*******************************************************************//** -Tries to reserve free extents in a file space. -@return TRUE if succeed */ -UNIV_INTERN -ibool -fil_space_reserve_free_extents( -/*===========================*/ - ulint id, /*!< in: space id */ - ulint n_free_now, /*!< in: number of free extents now */ - ulint n_to_reserve) /*!< in: how many one wants to reserve */ -{ - fil_space_t* space; - ibool success; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - ut_a(space); - - if (space->n_reserved_extents + n_to_reserve > n_free_now) { - success = FALSE; - } else { - space->n_reserved_extents += n_to_reserve; - success = TRUE; - } - - mutex_exit(&fil_system->mutex); - - return(success); -} - -/*******************************************************************//** -Releases free extents in a file space. */ -UNIV_INTERN -void -fil_space_release_free_extents( -/*===========================*/ - ulint id, /*!< in: space id */ - ulint n_reserved) /*!< in: how many one reserved */ -{ - fil_space_t* space; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - ut_a(space); - ut_a(space->n_reserved_extents >= n_reserved); - - space->n_reserved_extents -= n_reserved; - - mutex_exit(&fil_system->mutex); -} - -/*******************************************************************//** -Gets the number of reserved extents. If the database is silent, this number -should be zero. */ -UNIV_INTERN -ulint -fil_space_get_n_reserved_extents( -/*=============================*/ - ulint id) /*!< in: space id */ -{ - fil_space_t* space; - ulint n; - - ut_ad(fil_system); - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - ut_a(space); - - n = space->n_reserved_extents; - - mutex_exit(&fil_system->mutex); - - return(n); -} - -/*============================ FILE I/O ================================*/ - -/********************************************************************//** -NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! - -Prepares a file node for i/o. Opens the file if it is closed. Updates the -pending i/o's field in the node and the system appropriately. Takes the node -off the LRU list if it is in the LRU list. The caller must hold the fil_sys -mutex. -@return false if the file can't be opened, otherwise true */ -static -bool -fil_node_prepare_for_io( -/*====================*/ - fil_node_t* node, /*!< in: file node */ - fil_system_t* system, /*!< in: tablespace memory cache */ - fil_space_t* space) /*!< in: space */ -{ - ut_ad(node && system && space); - ut_ad(mutex_own(&(system->mutex))); - - if (system->n_open > system->max_n_open + 5) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: open files %lu" - " exceeds the limit %lu\n", - (ulong) system->n_open, - (ulong) system->max_n_open); - } - - if (node->open == FALSE) { - /* File is closed: open it */ - ut_a(node->n_pending == 0); - - if (!fil_node_open_file(node, system, space)) { - return(false); - } - } - - if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) { - /* The node is in the LRU list, remove it */ - - ut_a(UT_LIST_GET_LEN(system->LRU) > 0); - - UT_LIST_REMOVE(LRU, system->LRU, node); - } - - node->n_pending++; - - return(true); -} - -/********************************************************************//** -Updates the data structures when an i/o operation finishes. Updates the -pending i/o's field in the node appropriately. */ -static -void -fil_node_complete_io( -/*=================*/ - fil_node_t* node, /*!< in: file node */ - fil_system_t* system, /*!< in: tablespace memory cache */ - ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks - the node as modified if - type == OS_FILE_WRITE */ -{ - ut_ad(node); - ut_ad(system); - ut_ad(mutex_own(&(system->mutex))); - - ut_a(node->n_pending > 0); - - node->n_pending--; - - if (type == OS_FILE_WRITE) { - ut_ad(!srv_read_only_mode); - system->modification_counter++; - node->modification_counter = system->modification_counter; - - if (fil_buffering_disabled(node->space)) { - - /* We don't need to keep track of unflushed - changes as user has explicitly disabled - buffering. */ - ut_ad(!node->space->is_in_unflushed_spaces); - node->flush_counter = node->modification_counter; - - } else if (!node->space->is_in_unflushed_spaces) { - - node->space->is_in_unflushed_spaces = true; - UT_LIST_ADD_FIRST(unflushed_spaces, - system->unflushed_spaces, - node->space); - } - } - - if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) { - - /* The node must be put back to the LRU list */ - UT_LIST_ADD_FIRST(LRU, system->LRU, node); - } -} - -/********************************************************************//** -Report information about an invalid page access. */ -static -void -fil_report_invalid_page_access( -/*===========================*/ - ulint block_offset, /*!< in: block offset */ - ulint space_id, /*!< in: space id */ - const char* space_name, /*!< in: space name */ - ulint byte_offset, /*!< in: byte offset */ - ulint len, /*!< in: I/O length */ - ulint type) /*!< in: I/O type */ -{ - ib_logf(IB_LOG_LEVEL_FATAL, - "Trying to access page number " ULINTPF - " in space " ULINTPF - " space name %s," - " which is outside the tablespace bounds." - " Byte offset " ULINTPF ", len " ULINTPF - " i/o type " ULINTPF ".%s", - block_offset, space_id, space_name, - byte_offset, len, type, - space_id == 0 && !srv_was_started - ? "Please check that the configuration matches" - " the InnoDB system tablespace location (ibdata files)" - : ""); -} - -/********************************************************************//** -Find correct node from file space -@return node */ -static -fil_node_t* -fil_space_get_node( - fil_space_t* space, /*!< in: file spage */ - ulint space_id, /*!< in: space id */ - ulint* block_offset, /*!< in/out: offset in number of blocks */ - ulint byte_offset, /*!< in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len) /*!< in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ -{ - fil_node_t* node; - ut_ad(mutex_own(&fil_system->mutex)); - - node = UT_LIST_GET_FIRST(space->chain); - - for (;;) { - if (node == NULL) { - return(NULL); - } else if (fil_is_user_tablespace_id(space->id) - && node->size == 0) { - - /* We do not know the size of a single-table tablespace - before we open the file */ - break; - } else if (node->size > *block_offset) { - /* Found! */ - break; - } else { - (*block_offset) -= node->size; - node = UT_LIST_GET_NEXT(chain, node); - } - } - - return (node); -} - -/** Determine the block size of the data file. -@param[in] space tablespace -@param[in] offset page number -@return block size */ -UNIV_INTERN -ulint -fil_space_get_block_size(const fil_space_t* space, unsigned offset) -{ - ut_ad(space->n_pending_ios > 0); - - ulint block_size = 512; - - for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { - block_size = node->file_block_size; - if (node->size > offset) { - break; - } - offset -= node->size; - } - - /* Currently supporting block size up to 4K, - fall back to default if bigger requested. */ - if (block_size > 4096) { - block_size = 512; - } - - return block_size; -} - -/********************************************************************//** -Reads or writes data. This operation is asynchronous (aio). -@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do -i/o on a tablespace which does not exist */ -UNIV_INTERN -dberr_t -_fil_io( -/*===*/ - ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, - ORed to OS_FILE_LOG, if a log i/o - and ORed to OS_AIO_SIMULATED_WAKE_LATER - if simulated aio and we want to post a - batch of i/os; NOTE that a simulated batch - may introduce hidden chances of deadlocks, - because i/os are not actually handled until - all have been posted: use with great - caution! */ - bool sync, /*!< in: true if synchronous aio is desired */ - ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint block_offset, /*!< in: offset in number of blocks */ - ulint byte_offset, /*!< in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len, /*!< in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ - void* buf, /*!< in/out: buffer where to store read data - or from where to write; in aio this must be - appropriately aligned */ - void* message, /*!< in: message for aio handler if non-sync - aio used, else ignored */ - ulint* write_size, /*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ - trx_t* trx) -{ - ulint mode; - fil_space_t* space; - fil_node_t* node; - ibool ret=TRUE; - ulint is_log; - ulint wake_later; - os_offset_t offset; - bool ignore_nonexistent_pages; - - is_log = type & OS_FILE_LOG; - type = type & ~OS_FILE_LOG; - - wake_later = type & OS_AIO_SIMULATED_WAKE_LATER; - type = type & ~OS_AIO_SIMULATED_WAKE_LATER; - - ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES; - type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES; - - ut_ad(byte_offset < UNIV_PAGE_SIZE); - ut_ad(!zip_size || !byte_offset); - ut_ad(ut_is_2pow(zip_size)); - ut_ad(buf); - ut_ad(len > 0); - ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT)); -#if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX -# error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX" -#endif -#if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN -# error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN" -#endif - ut_ad(fil_validate_skip()); -#ifndef UNIV_HOTBACKUP -# ifndef UNIV_LOG_DEBUG - /* ibuf bitmap pages must be read in the sync aio mode: */ - ut_ad(recv_no_ibuf_operations - || type == OS_FILE_WRITE - || !ibuf_bitmap_page(zip_size, block_offset) - || sync - || is_log); -# endif /* UNIV_LOG_DEBUG */ - if (sync) { - mode = OS_AIO_SYNC; - } else if (is_log) { - mode = OS_AIO_LOG; - } else if (type == OS_FILE_READ - && !recv_no_ibuf_operations - && ibuf_page(space_id, zip_size, block_offset, NULL)) { - mode = OS_AIO_IBUF; - } else { - mode = OS_AIO_NORMAL; - } -#else /* !UNIV_HOTBACKUP */ - ut_a(sync); - mode = OS_AIO_SYNC; -#endif /* !UNIV_HOTBACKUP */ - - if (type == OS_FILE_READ) { - srv_stats.data_read.add(len); - } else if (type == OS_FILE_WRITE) { - ut_ad(!srv_read_only_mode); - srv_stats.data_written.add(len); - if (fil_page_is_index_page((byte *)buf)) { - srv_stats.index_pages_written.inc(); - } else { - srv_stats.non_index_pages_written.inc(); - } - } - - /* Reserve the fil_system mutex and make sure that we can open at - least one file while holding it, if the file is not already open */ - - fil_mutex_enter_and_prepare_for_io(space_id); - - space = fil_space_get_by_id(space_id); - - /* If we are deleting a tablespace we don't allow async read operations - on that. However, we do allow write and sync read operations */ - if (space == 0 - || (type == OS_FILE_READ - && !sync - && space->stop_new_ops)) { - mutex_exit(&fil_system->mutex); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Trying to do i/o to a tablespace which does " - "not exist. i/o type " ULINTPF - ", space id " ULINTPF " , " - "page no. " ULINTPF - ", i/o length " ULINTPF " bytes", - type, space_id, block_offset, - len); - - return(DB_TABLESPACE_DELETED); - } - - ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE); - - node = fil_space_get_node(space, space_id, &block_offset, byte_offset, len); - - if (!node) { - if (ignore_nonexistent_pages) { - mutex_exit(&fil_system->mutex); - return(DB_ERROR); - } - - fil_report_invalid_page_access( - block_offset, space_id, space->name, - byte_offset, len, type); - } - - /* Open file if closed */ - if (!fil_node_prepare_for_io(node, fil_system, space)) { - if (space->purpose == FIL_TABLESPACE - && fil_is_user_tablespace_id(space->id)) { - mutex_exit(&fil_system->mutex); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Trying to do i/o to a tablespace which " - "exists without .ibd data file. " - "i/o type " ULINTPF ", space id " - ULINTPF ", page no " ULINTPF ", " - "i/o length " ULINTPF " bytes", - type, space_id, - block_offset, len); - - return(DB_TABLESPACE_DELETED); - } - - /* The tablespace is for log. Currently, we just assert here - to prevent handling errors along the way fil_io returns. - Also, if the log files are missing, it would be hard to - promise the server can continue running. */ - ut_a(0); - } - - /* Check that at least the start offset is within the bounds of a - single-table tablespace, including rollback tablespaces. */ - if (UNIV_UNLIKELY(node->size <= block_offset) - && space->id != 0 && space->purpose == FIL_TABLESPACE) { - - fil_report_invalid_page_access( - block_offset, space_id, space->name, byte_offset, - len, type); - } - - /* Now we have made the changes in the data structures of fil_system */ - mutex_exit(&fil_system->mutex); - - /* Calculate the low 32 bits and the high 32 bits of the file offset */ - - if (!zip_size) { - offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT) - + byte_offset; - - ut_a(node->size - block_offset - >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1)) - / UNIV_PAGE_SIZE)); - } else { - ulint zip_size_shift; - switch (zip_size) { - case 1024: zip_size_shift = 10; break; - case 2048: zip_size_shift = 11; break; - case 4096: zip_size_shift = 12; break; - case 8192: zip_size_shift = 13; break; - case 16384: zip_size_shift = 14; break; - case 32768: zip_size_shift = 15; break; - case 65536: zip_size_shift = 16; break; - default: ut_error; - } - offset = ((os_offset_t) block_offset << zip_size_shift) - + byte_offset; - ut_a(node->size - block_offset - >= (len + (zip_size - 1)) / zip_size); - } - - /* Do aio */ - - ut_a(byte_offset % OS_MIN_LOG_BLOCK_SIZE == 0); - ut_a((len % OS_MIN_LOG_BLOCK_SIZE) == 0); - -#ifndef UNIV_HOTBACKUP - if (UNIV_UNLIKELY(space->is_corrupt && srv_pass_corrupt_table)) { - - /* should ignore i/o for the crashed space */ - if (srv_pass_corrupt_table == 1 || - type == OS_FILE_WRITE) { - - mutex_enter(&fil_system->mutex); - fil_node_complete_io(node, fil_system, type); - mutex_exit(&fil_system->mutex); - if (mode == OS_AIO_NORMAL) { - ut_a(space->purpose == FIL_TABLESPACE); - dberr_t err = buf_page_io_complete(static_cast<buf_page_t *> - (message)); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Write operation failed for tablespace %s (" - ULINTPF ") offset " ULINTPF " error=%d.", - space->name, space->id, byte_offset, err); - } - } - } - - if (srv_pass_corrupt_table == 1 && type == OS_FILE_READ) { - - return(DB_TABLESPACE_DELETED); - - } else if (type == OS_FILE_WRITE) { - - return(DB_SUCCESS); - } - } - - const char* name = node->name == NULL ? space->name : node->name; - - /* Queue the aio request */ - ret = os_aio(type, is_log, mode | wake_later, name, node->handle, buf, - offset, len, zip_size ? zip_size : UNIV_PAGE_SIZE, node, - message, space_id, trx, write_size); - -#else - /* In mysqlbackup do normal i/o, not aio */ - if (type == OS_FILE_READ) { - ret = os_file_read(node->handle, buf, offset, len); - } else { - ut_ad(!srv_read_only_mode); - ret = os_file_write(name, node->handle, buf, - offset, len); - } -#endif /* !UNIV_HOTBACKUP */ - - if (mode == OS_AIO_SYNC) { - /* The i/o operation is already completed when we return from - os_aio: */ - - mutex_enter(&fil_system->mutex); - - fil_node_complete_io(node, fil_system, type); - - mutex_exit(&fil_system->mutex); - - ut_ad(fil_validate_skip()); - } - - if (!ret) { - return(DB_OUT_OF_FILE_SPACE); - } - - return(DB_SUCCESS); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Waits for an aio operation to complete. This function is used to write the -handler for completed requests. The aio array of pending requests is divided -into segments (see os0file.cc for more info). The thread specifies which -segment it wants to wait for. */ -UNIV_INTERN -void -fil_aio_wait( -/*=========*/ - ulint segment) /*!< in: the number of the segment in the aio - array to wait for */ -{ - ibool ret; - fil_node_t* fil_node; - void* message; - ulint type; - ulint space_id = 0; - - ut_ad(fil_validate_skip()); - - if (srv_use_native_aio) { - srv_set_io_thread_op_info(segment, "native aio handle"); -#ifdef WIN_ASYNC_IO - ret = os_aio_windows_handle( - segment, 0, &fil_node, &message, &type, &space_id); -#elif defined(LINUX_NATIVE_AIO) - ret = os_aio_linux_handle( - segment, &fil_node, &message, &type, &space_id); -#else - ut_error; - ret = 0; /* Eliminate compiler warning */ -#endif /* WIN_ASYNC_IO */ - } else { - srv_set_io_thread_op_info(segment, "simulated aio handle"); - - ret = os_aio_simulated_handle( - segment, &fil_node, &message, &type, &space_id); - } - - ut_a(ret); - if (fil_node == NULL) { - ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); - return; - } - - srv_set_io_thread_op_info(segment, "complete io for fil node"); - - mutex_enter(&fil_system->mutex); - - fil_node_complete_io(fil_node, fil_system, type); - ulint purpose = fil_node->space->purpose; - space_id = fil_node->space->id; - - mutex_exit(&fil_system->mutex); - - ut_ad(fil_validate_skip()); - - /* Do the i/o handling */ - /* IMPORTANT: since i/o handling for reads will read also the insert - buffer in tablespace 0, you have to be very careful not to introduce - deadlocks in the i/o system. We keep tablespace 0 data files always - open, and use a special i/o thread to serve insert buffer requests. */ - - if (purpose == FIL_TABLESPACE) { - srv_set_io_thread_op_info(segment, "complete io for buf page"); - buf_page_t* bpage = static_cast<buf_page_t*>(message); - ulint offset = bpage->offset; - dberr_t err = buf_page_io_complete(bpage); - - if (err != DB_SUCCESS) { - ut_ad(type == OS_FILE_READ); - /* In crash recovery set log corruption on - and produce only an error to fail InnoDB startup. */ - if (recv_recovery_is_on() && !srv_force_recovery) { - recv_sys->found_corrupt_log = true; - } - - ib_logf(IB_LOG_LEVEL_ERROR, - "Read operation failed for tablespace %s" - " offset " ULINTPF " with error %s", - fil_node->name, - offset, - ut_strerr(err)); - } - } else { - srv_set_io_thread_op_info(segment, "complete io for log"); - log_io_complete(static_cast<log_group_t*>(message)); - } -} -#endif /* UNIV_HOTBACKUP */ - -/**********************************************************************//** -Flushes to disk possible writes cached by the OS. If the space does not exist -or is being dropped, does not do anything. */ -UNIV_INTERN -void -fil_flush( -/*======*/ - ulint space_id) /*!< in: file space id (this can be a group of - log files or a tablespace of the database) */ -{ - mutex_enter(&fil_system->mutex); - - if (fil_space_t* space = fil_space_get_by_id(space_id)) { - if (!space->stop_new_ops) { - - fil_flush_low(space); - } - } - - mutex_exit(&fil_system->mutex); -} - -/** Flush a tablespace. -@param[in,out] space tablespace to flush */ -UNIV_INTERN -void -fil_flush(fil_space_t* space) -{ - ut_ad(space->n_pending_ios > 0); - - if (!space->is_stopping()) { - mutex_enter(&fil_system->mutex); - if (!space->is_stopping()) { - fil_flush_low(space); - } - mutex_exit(&fil_system->mutex); - } -} - -/** Flush to disk the writes in file spaces of the given type -possibly cached by the OS. -@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */ -UNIV_INTERN -void -fil_flush_file_spaces(ulint purpose) -{ - fil_space_t* space; - ulint* space_ids; - ulint n_space_ids; - ulint i; - - mutex_enter(&fil_system->mutex); - - n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces); - if (n_space_ids == 0) { - - mutex_exit(&fil_system->mutex); - return; - } - - /* Assemble a list of space ids to flush. Previously, we - traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT() - on a space that was just removed from the list by fil_flush(). - Thus, the space could be dropped and the memory overwritten. */ - space_ids = static_cast<ulint*>( - mem_alloc(n_space_ids * sizeof *space_ids)); - - n_space_ids = 0; - - for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces); - space; - space = UT_LIST_GET_NEXT(unflushed_spaces, space)) { - - if (space->purpose == purpose && !space->is_stopping()) { - space_ids[n_space_ids++] = space->id; - } - } - - mutex_exit(&fil_system->mutex); - - /* Flush the spaces. It will not hurt to call fil_flush() on - a non-existing space id. */ - for (i = 0; i < n_space_ids; i++) { - - fil_flush(space_ids[i]); - } - - mem_free(space_ids); -} - -/** Functor to validate the space list. */ -struct Check { - void operator()(const fil_node_t* elem) - { - ut_a(elem->open || !elem->n_pending); - } -}; - -/******************************************************************//** -Checks the consistency of the tablespace cache. -@return TRUE if ok */ -UNIV_INTERN -ibool -fil_validate(void) -/*==============*/ -{ - fil_space_t* space; - fil_node_t* fil_node; - ulint n_open = 0; - ulint i; - - mutex_enter(&fil_system->mutex); - - /* Look for spaces in the hash table */ - - for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) { - - for (space = static_cast<fil_space_t*>( - HASH_GET_FIRST(fil_system->spaces, i)); - space != 0; - space = static_cast<fil_space_t*>( - HASH_GET_NEXT(hash, space))) { - - UT_LIST_VALIDATE( - chain, fil_node_t, space->chain, Check()); - - for (fil_node = UT_LIST_GET_FIRST(space->chain); - fil_node != 0; - fil_node = UT_LIST_GET_NEXT(chain, fil_node)) { - - if (fil_node->n_pending > 0) { - ut_a(fil_node->open); - } - - if (fil_node->open) { - n_open++; - } - } - } - } - - ut_a(fil_system->n_open == n_open); - - UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU); - - for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU); - fil_node != 0; - fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) { - - ut_a(fil_node->n_pending == 0); - ut_a(!fil_node->being_extended); - ut_a(fil_node->open); - ut_a(fil_space_belongs_in_lru(fil_node->space)); - } - - mutex_exit(&fil_system->mutex); - - return(TRUE); -} - -/********************************************************************//** -Returns TRUE if file address is undefined. -@return TRUE if undefined */ -UNIV_INTERN -ibool -fil_addr_is_null( -/*=============*/ - fil_addr_t addr) /*!< in: address */ -{ - return(addr.page == FIL_NULL); -} - -/********************************************************************//** -Get the predecessor of a file page. -@return FIL_PAGE_PREV */ -UNIV_INTERN -ulint -fil_page_get_prev( -/*==============*/ - const byte* page) /*!< in: file page */ -{ - return(mach_read_from_4(page + FIL_PAGE_PREV)); -} - -/********************************************************************//** -Get the successor of a file page. -@return FIL_PAGE_NEXT */ -UNIV_INTERN -ulint -fil_page_get_next( -/*==============*/ - const byte* page) /*!< in: file page */ -{ - return(mach_read_from_4(page + FIL_PAGE_NEXT)); -} - -/*********************************************************************//** -Sets the file page type. */ -UNIV_INTERN -void -fil_page_set_type( -/*==============*/ - byte* page, /*!< in/out: file page */ - ulint type) /*!< in: type */ -{ - ut_ad(page); - - mach_write_to_2(page + FIL_PAGE_TYPE, type); -} - -/*********************************************************************//** -Gets the file page type. -@return type; NOTE that if the type has not been written to page, the -return value not defined */ -UNIV_INTERN -ulint -fil_page_get_type( -/*==============*/ - const byte* page) /*!< in: file page */ -{ - ut_ad(page); - - return(mach_read_from_2(page + FIL_PAGE_TYPE)); -} - -/****************************************************************//** -Closes the tablespace memory cache. */ -UNIV_INTERN -void -fil_close(void) -/*===========*/ -{ - fil_space_crypt_cleanup(); - - mutex_free(&fil_system->mutex); - - hash_table_free(fil_system->spaces); - - hash_table_free(fil_system->name_hash); - - ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0); - ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0); - ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0); - - mem_free(fil_system); - - fil_system = NULL; -} - -/********************************************************************//** -Initializes a buffer control block when the buf_pool is created. */ -static -void -fil_buf_block_init( -/*===============*/ - buf_block_t* block, /*!< in: pointer to control block */ - byte* frame) /*!< in: pointer to buffer frame */ -{ - UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE); - - block->frame = frame; - - block->page.io_fix = BUF_IO_NONE; - /* There are assertions that check for this. */ - block->page.buf_fix_count = 1; - block->page.state = BUF_BLOCK_READY_FOR_USE; - - page_zip_des_init(&block->page.zip); -} - -struct fil_iterator_t { - pfs_os_file_t file; /*!< File handle */ - const char* filepath; /*!< File path name */ - os_offset_t start; /*!< From where to start */ - os_offset_t end; /*!< Where to stop */ - os_offset_t file_size; /*!< File size in bytes */ - ulint page_size; /*!< Page size */ - ulint n_io_buffers; /*!< Number of pages to use - for IO */ - byte* io_buffer; /*!< Buffer to use for IO */ - fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */ - byte* crypt_io_buffer; /*!< IO buffer when encrypted */ -}; - -/********************************************************************//** -TODO: This can be made parallel trivially by chunking up the file and creating -a callback per thread. . Main benefit will be to use multiple CPUs for -checksums and compressed tables. We have to do compressed tables block by -block right now. Secondly we need to decompress/compress and copy too much -of data. These are CPU intensive. - -Iterate over all the pages in the tablespace. -@param iter - Tablespace iterator -@param block - block to use for IO -@param callback - Callback to inspect and update page contents -@retval DB_SUCCESS or error code */ -static -dberr_t -fil_iterate( -/*========*/ - const fil_iterator_t& iter, - buf_block_t* block, - PageCallback& callback) -{ - os_offset_t offset; - ulint page_no = 0; - ulint space_id = callback.get_space_id(); - ulint n_bytes = iter.n_io_buffers * iter.page_size; - - ut_ad(!srv_read_only_mode); - - /* TODO: For compressed tables we do a lot of useless - copying for non-index pages. Unfortunately, it is - required by buf_zip_decompress() */ - const bool row_compressed = callback.get_zip_size() > 0; - - for (offset = iter.start; offset < iter.end; offset += n_bytes) { - - byte* io_buffer = iter.io_buffer; - - block->frame = io_buffer; - - if (row_compressed) { - page_zip_des_init(&block->page.zip); - page_zip_set_size(&block->page.zip, iter.page_size); - block->page.zip.data = block->frame + UNIV_PAGE_SIZE; - ut_d(block->page.zip.m_external = true); - ut_ad(iter.page_size == callback.get_zip_size()); - - /* Zip IO is done in the compressed page buffer. */ - io_buffer = block->page.zip.data; - } - - /* We have to read the exact number of bytes. Otherwise the - InnoDB IO functions croak on failed reads. */ - - n_bytes = static_cast<ulint>( - ut_min(static_cast<os_offset_t>(n_bytes), - iter.end - offset)); - - ut_ad(n_bytes > 0); - ut_ad(!(n_bytes % iter.page_size)); - - const bool encrypted = iter.crypt_data != NULL - && iter.crypt_data->should_encrypt(); - /* Use additional crypt io buffer if tablespace is encrypted */ - byte* const readptr = encrypted - ? iter.crypt_io_buffer : io_buffer; - byte* const writeptr = readptr; - - if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes)) { - - ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed"); - - return(DB_IO_ERROR); - } - - bool updated = false; - os_offset_t page_off = offset; - ulint n_pages_read = (ulint) n_bytes / iter.page_size; - bool decrypted = false; - - for (ulint i = 0; i < n_pages_read; ++i) { - ulint size = iter.page_size; - dberr_t err = DB_SUCCESS; - byte* src = readptr + (i * size); - byte* dst = io_buffer + (i * size); - bool frame_changed = false; - - ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE); - - const bool page_compressed - = page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED - || page_type == FIL_PAGE_PAGE_COMPRESSED; - - /* If tablespace is encrypted, we need to decrypt - the page. Note that tablespaces are not in - fil_system during import. */ - if (encrypted) { - decrypted = fil_space_decrypt( - iter.crypt_data, - dst, //dst - iter.page_size, - src, // src - &err); // src - - if (err != DB_SUCCESS) { - return(err); - } - - if (decrypted) { - updated = true; - } else { - if (!page_compressed && !row_compressed) { - block->frame = src; - frame_changed = true; - } else { - memcpy(dst, src, size); - } - } - } - - /* If the original page is page_compressed, we need - to decompress page before we can update it. */ - if (page_compressed) { - fil_decompress_page(NULL, dst, ulong(size), - NULL); - updated = true; - } - - buf_block_set_file_page(block, space_id, page_no++); - - if ((err = callback(page_off, block)) != DB_SUCCESS) { - - return(err); - - } else if (!updated) { - updated = buf_block_get_state(block) - == BUF_BLOCK_FILE_PAGE; - } - - buf_block_set_state(block, BUF_BLOCK_NOT_USED); - buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE); - - /* If tablespace is encrypted we use additional - temporary scratch area where pages are read - for decrypting readptr == crypt_io_buffer != io_buffer. - - Destination for decryption is a buffer pool block - block->frame == dst == io_buffer that is updated. - Pages that did not require decryption even when - tablespace is marked as encrypted are not copied - instead block->frame is set to src == readptr. - - For encryption we again use temporary scratch area - writeptr != io_buffer == dst - that is then written to the tablespace - - (1) For normal tables io_buffer == dst == writeptr - (2) For only page compressed tables - io_buffer == dst == writeptr - (3) For encrypted (and page compressed) - readptr != io_buffer == dst != writeptr - */ - - ut_ad(!encrypted && !page_compressed ? - src == dst && dst == writeptr + (i * size):1); - ut_ad(page_compressed && !encrypted ? - src == dst && dst == writeptr + (i * size):1); - ut_ad(encrypted ? - src != dst && dst != writeptr + (i * size):1); - - if (encrypted) { - memcpy(writeptr + (i * size), - row_compressed ? block->page.zip.data : - block->frame, size); - } - - if (frame_changed) { - block->frame = dst; - } - - src = io_buffer + (i * size); - - if (page_compressed) { - ulint len = 0; - - fil_compress_page( - NULL, - src, - NULL, - size, - 0,/* FIXME: compression level */ - 512,/* FIXME: use proper block size */ - encrypted, - &len); - - updated = true; - } - - /* If tablespace is encrypted, encrypt page before we - write it back. Note that we should not encrypt the - buffer that is in buffer pool. */ - /* NOTE: At this stage of IMPORT the - buffer pool is not being used at all! */ - if (decrypted && encrypted) { - byte *dest = writeptr + (i * size); - ulint space = mach_read_from_4( - src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET); - ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN); - - byte* tmp = fil_encrypt_buf( - iter.crypt_data, - space, - offset, - lsn, - src, - iter.page_size == UNIV_PAGE_SIZE ? 0 : iter.page_size, - dest); - - if (tmp == src) { - /* TODO: remove unnecessary memcpy's */ - memcpy(dest, src, size); - } - - updated = true; - } - - page_off += iter.page_size; - block->frame += iter.page_size; - } - - /* A page was updated in the set, write back to disk. */ - if (updated - && !os_file_write( - iter.filepath, iter.file, writeptr, - offset, (ulint) n_bytes)) { - - ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed"); - - return(DB_IO_ERROR); - } - } - - return(DB_SUCCESS); -} - -/********************************************************************//** -Iterate over all the pages in the tablespace. -@param table - the table definiton in the server -@param n_io_buffers - number of blocks to read and write together -@param callback - functor that will do the page updates -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fil_tablespace_iterate( -/*===================*/ - dict_table_t* table, - ulint n_io_buffers, - PageCallback& callback) -{ - dberr_t err; - pfs_os_file_t file; - char* filepath; - - ut_a(n_io_buffers > 0); - ut_ad(!srv_read_only_mode); - - DBUG_EXECUTE_IF("ib_import_trigger_corruption_1", - return(DB_CORRUPTION);); - - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - dict_get_and_save_data_dir_path(table, false); - ut_a(table->data_dir_path); - - filepath = os_file_make_remote_pathname( - table->data_dir_path, table->name, "ibd"); - } else { - filepath = fil_make_ibd_name(table->name, false); - } - - { - ibool success; - - file = os_file_create_simple_no_error_handling( - innodb_file_data_key, filepath, - OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE); - - DBUG_EXECUTE_IF("fil_tablespace_iterate_failure", - { - static bool once; - - if (!once || ut_rnd_interval(0, 10) == 5) { - once = true; - success = FALSE; - os_file_close(file); - } - }); - - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(true); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Trying to import a tablespace, but could not " - "open the tablespace file %s", filepath); - - mem_free(filepath); - - return(DB_TABLESPACE_NOT_FOUND); - - } else { - err = DB_SUCCESS; - } - } - - callback.set_file(filepath, file); - - os_offset_t file_size = os_file_get_size(file); - ut_a(file_size != (os_offset_t) -1); - - /* The block we will use for every physical page */ - buf_block_t block; - - memset(&block, 0x0, sizeof(block)); - - /* Allocate a page to read in the tablespace header, so that we - can determine the page size and zip_size (if it is compressed). - We allocate an extra page in case it is a compressed table. One - page is to ensure alignement. */ - - void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE); - byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE)); - - fil_buf_block_init(&block, page); - - /* Read the first page and determine the page and zip size. */ - - if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) { - - err = DB_IO_ERROR; - - } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) { - fil_iterator_t iter; - - iter.file = file; - iter.start = 0; - iter.end = file_size; - iter.filepath = filepath; - iter.file_size = file_size; - iter.n_io_buffers = n_io_buffers; - iter.page_size = callback.get_page_size(); - - /* In MariaDB/MySQL 5.6 tablespace does not exist - during import, therefore we can't use space directly - here. */ - ulint crypt_data_offset = fsp_header_get_crypt_offset( - callback.get_zip_size()); - - /* read (optional) crypt data */ - iter.crypt_data = fil_space_read_crypt_data( - 0, page, crypt_data_offset); - - /* Compressed pages can't be optimised for block IO for now. - We do the IMPORT page by page. */ - - if (callback.get_zip_size() > 0) { - iter.n_io_buffers = 1; - ut_a(iter.page_size == callback.get_zip_size()); - } - - /** If tablespace is encrypted, it needs extra buffers */ - if (iter.crypt_data != NULL) { - /* decrease io buffers so that memory - * consumption doesnt double - * note: the +1 is to avoid n_io_buffers getting down to 0 */ - iter.n_io_buffers = (iter.n_io_buffers + 1) / 2; - } - - /** Add an extra page for compressed page scratch area. */ - - void* io_buffer = mem_alloc( - (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE); - - iter.io_buffer = static_cast<byte*>( - ut_align(io_buffer, UNIV_PAGE_SIZE)); - - void* crypt_io_buffer = NULL; - if (iter.crypt_data != NULL) { - crypt_io_buffer = mem_alloc( - (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE); - iter.crypt_io_buffer = static_cast<byte*>( - ut_align(crypt_io_buffer, UNIV_PAGE_SIZE)); - } - - err = fil_iterate(iter, &block, callback); - - mem_free(io_buffer); - - if (crypt_io_buffer != NULL) { - mem_free(crypt_io_buffer); - iter.crypt_io_buffer = NULL; - fil_space_destroy_crypt_data(&iter.crypt_data); - } - } - - if (err == DB_SUCCESS) { - - ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk"); - - if (!os_file_flush(file)) { - ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!"); - err = DB_IO_ERROR; - } else { - ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!"); - } - } - - os_file_close(file); - - mem_free(page_ptr); - mem_free(filepath); - - return(err); -} - -/** -Set the tablespace compressed table size. -@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */ -dberr_t -PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW -{ - m_zip_size = fsp_header_get_zip_size(page); - - if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) { - return(DB_CORRUPTION); - } - - return(DB_SUCCESS); -} - -/********************************************************************//** -Delete the tablespace file and any related files like .cfg. -This should not be called for temporary tables. */ -UNIV_INTERN -void -fil_delete_file( -/*============*/ - const char* ibd_name) /*!< in: filepath of the ibd - tablespace */ -{ - /* Force a delete of any stale .ibd files that are lying around. */ - - ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name); - - os_file_delete_if_exists(innodb_file_data_key, ibd_name); - - char* cfg_name = fil_make_cfg_name(ibd_name); - - os_file_delete_if_exists(innodb_file_data_key, cfg_name); - - mem_free(cfg_name); -} - -/************************************************************************* -Return local hash table informations. */ - -ulint -fil_system_hash_cells(void) -/*=======================*/ -{ - if (fil_system) { - return (fil_system->spaces->n_cells - + fil_system->name_hash->n_cells); - } else { - return 0; - } -} - -ulint -fil_system_hash_nodes(void) -/*=======================*/ -{ - if (fil_system) { - return (UT_LIST_GET_LEN(fil_system->space_list) - * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE)); - } else { - return 0; - } -} - -/** -Iterate over all the spaces in the space list and fetch the -tablespace names. It will return a copy of the name that must be -freed by the caller using: delete[]. -@return DB_SUCCESS if all OK. */ -UNIV_INTERN -dberr_t -fil_get_space_names( -/*================*/ - space_name_list_t& space_name_list) - /*!< in/out: List to append to */ -{ - fil_space_t* space; - dberr_t err = DB_SUCCESS; - - mutex_enter(&fil_system->mutex); - - for (space = UT_LIST_GET_FIRST(fil_system->space_list); - space != NULL; - space = UT_LIST_GET_NEXT(space_list, space)) { - - if (space->purpose == FIL_TABLESPACE) { - ulint len; - char* name; - - len = strlen(space->name); - name = new(std::nothrow) char[len + 1]; - - if (name == 0) { - /* Caller to free elements allocated so far. */ - err = DB_OUT_OF_MEMORY; - break; - } - - memcpy(name, space->name, len); - name[len] = 0; - - space_name_list.push_back(name); - } - } - - mutex_exit(&fil_system->mutex); - - return(err); -} - -/** Generate redo log for swapping two .ibd files -@param[in] old_table old table -@param[in] new_table new table -@param[in] tmp_name temporary table name -@param[in,out] mtr mini-transaction -@return innodb error code */ -UNIV_INTERN -dberr_t -fil_mtr_rename_log( - const dict_table_t* old_table, - const dict_table_t* new_table, - const char* tmp_name, - mtr_t* mtr) -{ - dberr_t err = DB_SUCCESS; - char* old_path; - - /* If neither table is file-per-table, - there will be no renaming of files. */ - if (old_table->space == TRX_SYS_SPACE - && new_table->space == TRX_SYS_SPACE) { - return(DB_SUCCESS); - } - - if (DICT_TF_HAS_DATA_DIR(old_table->flags)) { - old_path = os_file_make_remote_pathname( - old_table->data_dir_path, old_table->name, "ibd"); - } else { - old_path = fil_make_ibd_name(old_table->name, false); - } - if (old_path == NULL) { - return(DB_OUT_OF_MEMORY); - } - - if (old_table->space != TRX_SYS_SPACE) { - char* tmp_path; - - if (DICT_TF_HAS_DATA_DIR(old_table->flags)) { - tmp_path = os_file_make_remote_pathname( - old_table->data_dir_path, tmp_name, "ibd"); - } - else { - tmp_path = fil_make_ibd_name(tmp_name, false); - } - - if (tmp_path == NULL) { - mem_free(old_path); - return(DB_OUT_OF_MEMORY); - } - - /* Temp filepath must not exist. */ - err = fil_rename_tablespace_check( - old_table->space, old_path, tmp_path, - dict_table_is_discarded(old_table)); - mem_free(tmp_path); - if (err != DB_SUCCESS) { - mem_free(old_path); - return(err); - } - - fil_op_write_log(MLOG_FILE_RENAME, old_table->space, - 0, 0, old_table->name, tmp_name, mtr); - } - - if (new_table->space != TRX_SYS_SPACE) { - - /* Destination filepath must not exist unless this ALTER - TABLE starts and ends with a file_per-table tablespace. */ - if (old_table->space == TRX_SYS_SPACE) { - char* new_path = NULL; - - if (DICT_TF_HAS_DATA_DIR(new_table->flags)) { - new_path = os_file_make_remote_pathname( - new_table->data_dir_path, - new_table->name, "ibd"); - } - else { - new_path = fil_make_ibd_name( - new_table->name, false); - } - - if (new_path == NULL) { - mem_free(old_path); - return(DB_OUT_OF_MEMORY); - } - - err = fil_rename_tablespace_check( - new_table->space, new_path, old_path, - dict_table_is_discarded(new_table)); - mem_free(new_path); - if (err != DB_SUCCESS) { - mem_free(old_path); - return(err); - } - } - - fil_op_write_log(MLOG_FILE_RENAME, new_table->space, - 0, 0, new_table->name, old_table->name, mtr); - - } - - mem_free(old_path); - - return(err); -} - -/************************************************************************* -functions to access is_corrupt flag of fil_space_t*/ - -void -fil_space_set_corrupt( -/*==================*/ - ulint space_id) -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(space_id); - - if (space) { - space->is_corrupt = true; - } - - mutex_exit(&fil_system->mutex); -} - -/** Acquire a tablespace when it could be dropped concurrently. -Used by background threads that do not necessarily hold proper locks -for concurrency control. -@param[in] id tablespace ID -@param[in] silent whether to silently ignore missing tablespaces -@return the tablespace -@retval NULL if missing or being deleted or truncated */ -UNIV_INTERN -fil_space_t* -fil_space_acquire_low(ulint id, bool silent) -{ - fil_space_t* space; - - mutex_enter(&fil_system->mutex); - - space = fil_space_get_by_id(id); - - if (space == NULL) { - if (!silent) { - ib_logf(IB_LOG_LEVEL_WARN, "Trying to access missing" - " tablespace " ULINTPF ".", id); - } - } else if (space->is_stopping()) { - space = NULL; - } else { - space->n_pending_ops++; - } - - mutex_exit(&fil_system->mutex); - - return(space); -} - -/** Acquire a tablespace for reading or writing a block, -when it could be dropped concurrently. -@param[in] id tablespace ID -@return the tablespace -@retval NULL if missing */ -UNIV_INTERN -fil_space_t* -fil_space_acquire_for_io(ulint id) -{ - mutex_enter(&fil_system->mutex); - - fil_space_t* space = fil_space_get_by_id(id); - - if (space) { - space->n_pending_ios++; - } - - mutex_exit(&fil_system->mutex); - - return(space); -} - -/** Release a tablespace acquired with fil_space_acquire_for_io(). -@param[in,out] space tablespace to release */ -UNIV_INTERN -void -fil_space_release_for_io(fil_space_t* space) -{ - mutex_enter(&fil_system->mutex); - ut_ad(space->magic_n == FIL_SPACE_MAGIC_N); - ut_ad(space->n_pending_ios > 0); - space->n_pending_ios--; - mutex_exit(&fil_system->mutex); -} - -/** Release a tablespace acquired with fil_space_acquire(). -@param[in,out] space tablespace to release */ -UNIV_INTERN -void -fil_space_release(fil_space_t* space) -{ - mutex_enter(&fil_system->mutex); - ut_ad(space->magic_n == FIL_SPACE_MAGIC_N); - ut_ad(space->n_pending_ops > 0); - space->n_pending_ops--; - mutex_exit(&fil_system->mutex); -} - -/** Return the next fil_space_t. -Once started, the caller must keep calling this until it returns NULL. -fil_space_acquire() and fil_space_release() are invoked here which -blocks a concurrent operation from dropping the tablespace. -@param[in] prev_space Pointer to the previous fil_space_t. -If NULL, use the first fil_space_t on fil_system->space_list. -@return pointer to the next fil_space_t. -@retval NULL if this was the last*/ -UNIV_INTERN -fil_space_t* -fil_space_next(fil_space_t* prev_space) -{ - fil_space_t* space=prev_space; - - mutex_enter(&fil_system->mutex); - - if (prev_space == NULL) { - space = UT_LIST_GET_FIRST(fil_system->space_list); - - /* We can trust that space is not NULL because at least the - system tablespace is always present and loaded first. */ - space->n_pending_ops++; - } else { - ut_ad(space->n_pending_ops > 0); - - /* Move on to the next fil_space_t */ - space->n_pending_ops--; - space = UT_LIST_GET_NEXT(space_list, space); - - /* Skip spaces that are being created by - fil_ibd_create(), or dropped, or !tablespace. */ - while (space != NULL - && (UT_LIST_GET_LEN(space->chain) == 0 - || space->is_stopping() - || space->purpose != FIL_TABLESPACE)) { - space = UT_LIST_GET_NEXT(space_list, space); - } - - if (space != NULL) { - space->n_pending_ops++; - } - } - - mutex_exit(&fil_system->mutex); - - return(space); -} - -/** -Remove space from key rotation list if there are no more -pending operations. -@param[in] space Tablespace */ -static -void -fil_space_remove_from_keyrotation( - fil_space_t* space) -{ - ut_ad(mutex_own(&fil_system->mutex)); - ut_ad(space); - - if (space->n_pending_ops == 0 && space->is_in_rotation_list) { - space->is_in_rotation_list = false; - ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0); - UT_LIST_REMOVE(rotation_list, fil_system->rotation_list, space); - } -} - - -/** Return the next fil_space_t from key rotation list. -Once started, the caller must keep calling this until it returns NULL. -fil_space_acquire() and fil_space_release() are invoked here which -blocks a concurrent operation from dropping the tablespace. -@param[in] prev_space Pointer to the previous fil_space_t. -If NULL, use the first fil_space_t on fil_system->space_list. -@return pointer to the next fil_space_t. -@retval NULL if this was the last*/ -UNIV_INTERN -fil_space_t* -fil_space_keyrotate_next( - fil_space_t* prev_space) -{ - fil_space_t* space = prev_space; - fil_space_t* old = NULL; - - mutex_enter(&fil_system->mutex); - - if (UT_LIST_GET_LEN(fil_system->rotation_list) == 0) { - if (space) { - ut_ad(space->n_pending_ops > 0); - space->n_pending_ops--; - fil_space_remove_from_keyrotation(space); - } - mutex_exit(&fil_system->mutex); - return(NULL); - } - - if (prev_space == NULL) { - space = UT_LIST_GET_FIRST(fil_system->rotation_list); - - /* We can trust that space is not NULL because we - checked list length above */ - } else { - ut_ad(space->n_pending_ops > 0); - - /* Move on to the next fil_space_t */ - space->n_pending_ops--; - - old = space; - space = UT_LIST_GET_NEXT(rotation_list, space); - - fil_space_remove_from_keyrotation(old); - } - - /* Skip spaces that are being created by fil_ibd_create(), - or dropped. Note that rotation_list contains only - space->purpose == FIL_TABLESPACE. */ - while (space != NULL - && (UT_LIST_GET_LEN(space->chain) == 0 - || space->is_stopping())) { - - old = space; - space = UT_LIST_GET_NEXT(rotation_list, space); - fil_space_remove_from_keyrotation(old); - } - - if (space != NULL) { - space->n_pending_ops++; - } - - mutex_exit(&fil_system->mutex); - - return(space); -} diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc deleted file mode 100644 index 2b6ae95640f..00000000000 --- a/storage/xtradb/fil/fil0pagecompress.cc +++ /dev/null @@ -1,745 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -/******************************************************************//** -@file fil/fil0pagecompress.cc -Implementation for page compressed file spaces. - -Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com -Updated 14/02/2015 -***********************************************************************/ - -#include "fil0fil.h" -#include "fil0pagecompress.h" - -#include <debug_sync.h> -#include <my_dbug.h> - -#include "mem0mem.h" -#include "hash0hash.h" -#include "os0file.h" -#include "mach0data.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "log0recv.h" -#include "fsp0fsp.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "dict0dict.h" -#include "page0page.h" -#include "page0zip.h" -#include "trx0sys.h" -#include "row0mysql.h" -#include "ha_prototypes.h" // IB_LOG_ -#ifndef UNIV_HOTBACKUP -# include "buf0lru.h" -# include "ibuf0ibuf.h" -# include "sync0sync.h" -# include "os0sync.h" -#else /* !UNIV_HOTBACKUP */ -# include "srv0srv.h" -static ulint srv_data_read, srv_data_written; -#endif /* !UNIV_HOTBACKUP */ -#include "zlib.h" -#ifdef __linux__ -#include <linux/fs.h> -#include <sys/ioctl.h> -#include <fcntl.h> -#endif -#include "row0mysql.h" -#ifdef HAVE_LZ4 -#include "lz4.h" -#endif -#ifdef HAVE_LZO -#include "lzo/lzo1x.h" -#endif -#ifdef HAVE_LZMA -#include "lzma.h" -#endif -#ifdef HAVE_BZIP2 -#include "bzlib.h" -#endif -#ifdef HAVE_SNAPPY -#include "snappy-c.h" -#endif - -/* Used for debugging */ -//#define UNIV_PAGECOMPRESS_DEBUG 1 - -/****************************************************************//** -For page compressed pages compress the page before actual write -operation. -@return compressed page to be written*/ -UNIV_INTERN -byte* -fil_compress_page( -/*==============*/ - fil_space_t* space, /*!< in,out: tablespace (NULL during IMPORT) */ - byte* buf, /*!< in: buffer from which to write; in aio - this must be appropriately aligned */ - byte* out_buf, /*!< out: compressed buffer */ - ulint len, /*!< in: length of input buffer.*/ - ulint level, /* in: compression level */ - ulint block_size, /*!< in: block size */ - bool encrypted, /*!< in: is page also encrypted */ - ulint* out_len) /*!< out: actual length of compressed - page */ -{ - int err = Z_OK; - int comp_level = level; - ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; - ulint write_size = 0; - /* Cache to avoid change during function execution */ - ulint comp_method = innodb_compression_algorithm; - bool allocated = false; - - /* page_compression does not apply to tables or tablespaces - that use ROW_FORMAT=COMPRESSED */ - ut_ad(!space || !FSP_FLAGS_GET_ZIP_SSIZE(space->flags)); - - if (encrypted) { - header_len += FIL_PAGE_COMPRESSION_METHOD_SIZE; - } - - if (!out_buf) { - allocated = true; - ulint size = UNIV_PAGE_SIZE; - - /* Both snappy and lzo compression methods require that - output buffer used for compression is bigger than input - buffer. Increase the allocated buffer size accordingly. */ -#if HAVE_SNAPPY - if (comp_method == PAGE_SNAPPY_ALGORITHM) { - size = snappy_max_compressed_length(size); - } -#endif -#if HAVE_LZO - if (comp_method == PAGE_LZO_ALGORITHM) { - size += LZO1X_1_15_MEM_COMPRESS; - } -#endif - - out_buf = static_cast<byte *>(ut_malloc(size)); - } - - ut_ad(buf); - ut_ad(out_buf); - ut_ad(len); - ut_ad(out_len); - - /* Let's not compress file space header or - extent descriptor */ - switch (fil_page_get_type(buf)) { - case 0: - case FIL_PAGE_TYPE_FSP_HDR: - case FIL_PAGE_TYPE_XDES: - case FIL_PAGE_PAGE_COMPRESSED: - *out_len = len; - goto err_exit; - } - - /* If no compression level was provided to this table, use system - default level */ - if (comp_level == 0) { - comp_level = page_zip_level; - } - - DBUG_PRINT("compress", - ("Preparing for space " ULINTPF " '%s' len " ULINTPF, - space ? space->id : 0, - space ? space->name : "(import)", - len)); - - write_size = UNIV_PAGE_SIZE - header_len; - - switch(comp_method) { -#ifdef HAVE_LZ4 - case PAGE_LZ4_ALGORITHM: - -#ifdef HAVE_LZ4_COMPRESS_DEFAULT - err = LZ4_compress_default((const char *)buf, - (char *)out_buf+header_len, len, write_size); -#else - err = LZ4_compress_limitedOutput((const char *)buf, - (char *)out_buf+header_len, len, write_size); -#endif /* HAVE_LZ4_COMPRESS_DEFAULT */ - write_size = err; - - if (err == 0) { - /* If error we leave the actual page as it was */ - -#ifndef UNIV_PAGECOMPRESS_DEBUG - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; -#endif - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - err, write_size); -#ifndef UNIV_PAGECOMPRESS_DEBUG - } -#endif - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; - } - break; -#endif /* HAVE_LZ4 */ -#ifdef HAVE_LZO - case PAGE_LZO_ALGORITHM: - err = lzo1x_1_15_compress( - buf, len, out_buf+header_len, &write_size, out_buf+UNIV_PAGE_SIZE); - - if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) { - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - err, write_size); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; - } - - break; -#endif /* HAVE_LZO */ -#ifdef HAVE_LZMA - case PAGE_LZMA_ALGORITHM: { - size_t out_pos=0; - - err = lzma_easy_buffer_encode( - comp_level, - LZMA_CHECK_NONE, - NULL, /* No custom allocator, use malloc/free */ - reinterpret_cast<uint8_t*>(buf), - len, - reinterpret_cast<uint8_t*>(out_buf + header_len), - &out_pos, - (size_t)write_size); - - if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) { - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - err, out_pos); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; - } - - write_size = out_pos; - - break; - } -#endif /* HAVE_LZMA */ - -#ifdef HAVE_BZIP2 - case PAGE_BZIP2_ALGORITHM: { - - err = BZ2_bzBuffToBuffCompress( - (char *)(out_buf + header_len), - (unsigned int *)&write_size, - (char *)buf, - len, - 1, - 0, - 0); - - if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) { - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - err, write_size); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; - } - break; - } -#endif /* HAVE_BZIP2 */ - -#ifdef HAVE_SNAPPY - case PAGE_SNAPPY_ALGORITHM: - { - snappy_status cstatus; - write_size = snappy_max_compressed_length(UNIV_PAGE_SIZE); - - cstatus = snappy_compress( - (const char *)buf, - (size_t)len, - (char *)(out_buf+header_len), - (size_t*)&write_size); - - if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) { - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " err %d write_size " ULINTPF ".", - space->id, space->name, len, - (int)cstatus, write_size); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; - } - break; - } -#endif /* HAVE_SNAPPY */ - - case PAGE_ZLIB_ALGORITHM: - err = compress2(out_buf+header_len, (ulong*)&write_size, buf, - uLong(len), comp_level); - - if (err != Z_OK) { - /* If error we leave the actual page as it was */ - - if (space && !space->printed_compression_failure) { - space->printed_compression_failure = true; - ib_logf(IB_LOG_LEVEL_WARN, - "Compression failed for space " ULINTPF - " name %s len " ULINTPF - " rt %d write_size " ULINTPF ".", - space->id, space->name, len, - err, write_size); - } - - srv_stats.pages_page_compression_error.inc(); - *out_len = len; - goto err_exit; - } - break; - - case PAGE_UNCOMPRESSED: - *out_len = len; - return (buf); - break; - default: - ut_error; - break; - } - - /* Set up the page header */ - memcpy(out_buf, buf, FIL_PAGE_DATA); - /* Set up the checksum */ - mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC); - - /* Set up the compression algorithm */ - mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, comp_method); - - if (encrypted) { - /* Set up the correct page type */ - mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); - mach_write_to_2(out_buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, comp_method); - } else { - /* Set up the correct page type */ - mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED); - } - - /* Set up the actual payload lenght */ - mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size); - -#ifdef UNIV_DEBUG - /* Verify */ - ut_ad(fil_page_is_compressed(out_buf) || fil_page_is_compressed_encrypted(out_buf)); - ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC); - ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size); - ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == (ulint)comp_method || - mach_read_from_2(out_buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE) == (ulint)comp_method); - - /* Verify that page can be decompressed */ - { - byte *comp_page; - byte *uncomp_page; - - comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); - uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); - memcpy(comp_page, out_buf, UNIV_PAGE_SIZE); - - fil_decompress_page(uncomp_page, comp_page, ulong(len), NULL); - - if (buf_page_is_corrupted(false, uncomp_page, 0, space)) { - buf_page_print(uncomp_page, 0, 0); - } - - ut_free(comp_page); - ut_free(uncomp_page); - } -#endif /* UNIV_DEBUG */ - - write_size+=header_len; - - if (block_size <= 0) { - block_size = 512; - } - - ut_ad(write_size > 0 && block_size > 0); - - /* Actual write needs to be alligned on block size */ - if (write_size % block_size) { - size_t tmp = write_size; - write_size = (size_t)ut_uint64_align_up((ib_uint64_t)write_size, block_size); - /* Clean up the end of buffer */ - memset(out_buf+tmp, 0, write_size - tmp); -#ifdef UNIV_DEBUG - ut_a(write_size > 0 && ((write_size % block_size) == 0)); - ut_a(write_size >= tmp); -#endif - } - - DBUG_PRINT("compress", - ("Succeeded for space " ULINTPF - " '%s' len " ULINTPF " out_len " ULINTPF, - space ? space->id : 0, - space ? space->name : "(import)", - len, write_size)); - - srv_stats.page_compression_saved.add((len - write_size)); - srv_stats.pages_page_compressed.inc(); - - /* If we do not persistently trim rest of page, we need to write it - all */ - if (!srv_use_trim) { - memset(out_buf+write_size,0,len-write_size); - write_size = len; - } - - *out_len = write_size; - - if (allocated) { - /* TODO: reduce number of memcpy's */ - memcpy(buf, out_buf, len); - } else { - return(out_buf); - } - -err_exit: - if (allocated) { - ut_free(out_buf); - } - - return (buf); - -} - -/****************************************************************//** -For page compressed pages decompress the page after actual read -operation. */ -UNIV_INTERN -void -fil_decompress_page( -/*================*/ - byte* page_buf, /*!< in: preallocated buffer or NULL */ - byte* buf, /*!< out: buffer from which to read; in aio - this must be appropriately aligned */ - ulong len, /*!< in: length of output buffer.*/ - ulint* write_size, /*!< in/out: Actual payload size of - the compressed data. */ - bool return_error) /*!< in: true if only an error should - be produced when decompression fails. - By default this parameter is false. */ -{ - int err = 0; - ulint actual_size = 0; - ulint compression_alg = 0; - byte *in_buf; - ulint ptype; - ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; - - ut_ad(buf); - ut_ad(len); - - ptype = mach_read_from_2(buf+FIL_PAGE_TYPE); - - if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { - header_len += FIL_PAGE_COMPRESSION_METHOD_SIZE; - } - - /* Do not try to uncompressed pages that are not compressed */ - if (ptype != FIL_PAGE_PAGE_COMPRESSED && - ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED && - ptype != FIL_PAGE_TYPE_COMPRESSED) { - return; - } - - // If no buffer was given, we need to allocate temporal buffer - if (page_buf == NULL) { - in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); - memset(in_buf, 0, UNIV_PAGE_SIZE); - } else { - in_buf = page_buf; - } - - /* Before actual decompress, make sure that page type is correct */ - - if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC || - (ptype != FIL_PAGE_PAGE_COMPRESSED && - ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: We try to uncompress corrupted page" - " CRC " ULINTPF " type " ULINTPF " len " ULINTPF ".", - mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM), - mach_read_from_2(buf+FIL_PAGE_TYPE), len); - - fflush(stderr); - if (return_error) { - goto error_return; - } - ut_error; - } - - /* Get compression algorithm */ - if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) { - compression_alg = mach_read_from_2(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE); - } else { - compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - } - - /* Get the actual size of compressed page */ - actual_size = mach_read_from_2(buf+FIL_PAGE_DATA); - /* Check if payload size is corrupted */ - if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: We try to uncompress corrupted page" - " actual size " ULINTPF " compression %s.", - actual_size, fil_get_compression_alg_name(compression_alg)); - fflush(stderr); - if (return_error) { - goto error_return; - } - ut_error; - } - - /* Store actual payload size of the compressed data. This pointer - points to buffer pool. */ - if (write_size) { - *write_size = actual_size; - } - - DBUG_PRINT("compress", - ("Preparing for decompress for len " ULINTPF ".", - actual_size)); - - switch(compression_alg) { - case PAGE_ZLIB_ALGORITHM: - err= uncompress(in_buf, &len, buf+header_len, (unsigned long)actual_size); - - /* If uncompress fails it means that page is corrupted */ - if (err != Z_OK) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but uncompress failed with error %d " - " size " ULINTPF " len " ULINTPF ".", - err, actual_size, len); - - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; - } - break; - -#ifdef HAVE_LZ4 - case PAGE_LZ4_ALGORITHM: - err = LZ4_decompress_fast((const char *)buf+header_len, (char *)in_buf, len); - - if (err != (int)actual_size) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but uncompress failed with error %d " - " size " ULINTPF " len " ULINTPF ".", - err, actual_size, len); - - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; - } - break; -#endif /* HAVE_LZ4 */ -#ifdef HAVE_LZO - case PAGE_LZO_ALGORITHM: { - ulint olen = 0; - err = lzo1x_decompress((const unsigned char *)buf+header_len, - actual_size,(unsigned char *)in_buf, &olen, NULL); - - if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but uncompress failed with error %d " - " size " ULINTPF " len " ULINTPF ".", - err, actual_size, len); - - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; - } - break; - } -#endif /* HAVE_LZO */ -#ifdef HAVE_LZMA - case PAGE_LZMA_ALGORITHM: { - - lzma_ret ret; - size_t src_pos = 0; - size_t dst_pos = 0; - uint64_t memlimit = UINT64_MAX; - - ret = lzma_stream_buffer_decode( - &memlimit, - 0, - NULL, - buf+header_len, - &src_pos, - actual_size, - in_buf, - &dst_pos, - len); - - - if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but decompression read only %ld bytes" - " size " ULINTPF "len " ULINTPF ".", - dst_pos, actual_size, len); - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; - } - - break; - } -#endif /* HAVE_LZMA */ -#ifdef HAVE_BZIP2 - case PAGE_BZIP2_ALGORITHM: { - unsigned int dst_pos = UNIV_PAGE_SIZE; - - err = BZ2_bzBuffToBuffDecompress( - (char *)in_buf, - &dst_pos, - (char *)(buf+header_len), - actual_size, - 1, - 0); - - if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but decompression read only %du bytes" - " size " ULINTPF " len " ULINTPF " err %d.", - dst_pos, actual_size, len, err); - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; - } - break; - } -#endif /* HAVE_BZIP2 */ -#ifdef HAVE_SNAPPY - case PAGE_SNAPPY_ALGORITHM: - { - snappy_status cstatus; - ulint olen = UNIV_PAGE_SIZE; - - cstatus = snappy_uncompress( - (const char *)(buf+header_len), - (size_t)actual_size, - (char *)in_buf, - (size_t*)&olen); - - if (cstatus != SNAPPY_OK || olen != UNIV_PAGE_SIZE) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but decompression read only " ULINTPF " bytes" - " size " ULINTPF " len " ULINTPF " err %d.", - olen, actual_size, len, (int)cstatus); - fflush(stderr); - - if (return_error) { - goto error_return; - } - ut_error; - } - - break; - } -#endif /* HAVE_SNAPPY */ - default: - ib_logf(IB_LOG_LEVEL_ERROR, - "Corruption: Page is marked as compressed" - " but compression algorithm %s" - " is not known." - ,fil_get_compression_alg_name(compression_alg)); - - fflush(stderr); - if (return_error) { - goto error_return; - } - ut_error; - break; - } - - srv_stats.pages_page_decompressed.inc(); - - /* Copy the uncompressed page to the buffer pool, not - really any other options. */ - memcpy(buf, in_buf, len); - -error_return: - if (page_buf != in_buf) { - ut_free(in_buf); - } -} diff --git a/storage/xtradb/fsp/fsp0fsp.cc b/storage/xtradb/fsp/fsp0fsp.cc deleted file mode 100644 index df8c6ffe222..00000000000 --- a/storage/xtradb/fsp/fsp0fsp.cc +++ /dev/null @@ -1,4171 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file fsp/fsp0fsp.cc -File space management - -Created 11/29/1995 Heikki Tuuri -***********************************************************************/ - -#include "fsp0fsp.h" - -#ifdef UNIV_NONINL -#include "fsp0fsp.ic" -#endif - -#include "buf0buf.h" -#include "fil0fil.h" -#include "fil0crypt.h" -#include "mtr0log.h" -#include "ut0byte.h" -#include "page0page.h" -#include "page0zip.h" -#ifdef UNIV_HOTBACKUP -# include "fut0lst.h" -#else /* UNIV_HOTBACKUP */ -# include "sync0sync.h" -# include "fut0fut.h" -# include "srv0srv.h" -# include "ibuf0ibuf.h" -# include "btr0btr.h" -# include "btr0sea.h" -# include "dict0boot.h" -# include "log0log.h" -#endif /* UNIV_HOTBACKUP */ -#include "dict0mem.h" -#include "srv0start.h" - - -#ifndef UNIV_HOTBACKUP -/** Flag to indicate if we have printed the tablespace full error. */ -static ibool fsp_tbs_full_error_printed = FALSE; - -/**********************************************************************//** -Returns an extent to the free list of a space. */ -static -void -fsp_free_extent( -/*============*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page, /*!< in: page offset in the extent */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Frees an extent of a segment to the space free list. */ -static -void -fseg_free_extent( -/*=============*/ - fseg_inode_t* seg_inode, /*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page, /*!< in: page offset in the extent */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Calculates the number of pages reserved by a segment, and how -many pages are currently used. -@return number of reserved pages */ -static -ulint -fseg_n_reserved_pages_low( -/*======================*/ - fseg_inode_t* header, /*!< in: segment inode */ - ulint* used, /*!< out: number of pages used (not - more than reserved) */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/********************************************************************//** -Marks a page used. The page must reside within the extents of the given -segment. */ -static MY_ATTRIBUTE((nonnull)) -void -fseg_mark_page_used( -/*================*/ - fseg_inode_t* seg_inode,/*!< in: segment inode */ - ulint page, /*!< in: page offset */ - xdes_t* descr, /*!< in: extent descriptor */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Returns the first extent descriptor for a segment. We think of the extent -lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL --> FSEG_FREE. -@return the first extent descriptor, or NULL if none */ -static -xdes_t* -fseg_get_first_extent( -/*==================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Puts new extents to the free list if -there are free extents above the free limit. If an extent happens -to contain an extent descriptor page, the extent is put to -the FSP_FREE_FRAG list with the page marked as used. */ -static -void -fsp_fill_free_list( -/*===============*/ - ibool init_space, /*!< in: TRUE if this is a single-table - tablespace and we are only initing - the tablespace's first extent - descriptor page and ibuf bitmap page; - then we do not allocate more extents */ - ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - UNIV_COLD; -/**********************************************************************//** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -static -buf_block_t* -fseg_alloc_free_page_low( -/*=====================*/ - ulint space, /*!< in: space */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - fseg_inode_t* seg_inode, /*!< in/out: segment inode */ - ulint hint, /*!< in: hint of which page would be - desirable */ - byte direction, /*!< in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction - in which the page should be initialized. - If init_mtr!=mtr, but the page is already - latched in mtr, do not initialize the page. */ - MY_ATTRIBUTE((warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Reads the file space size stored in the header page. -@return tablespace size stored in the space header */ -UNIV_INTERN -ulint -fsp_get_size_low( -/*=============*/ - page_t* page) /*!< in: header page (page 0 in the tablespace) */ -{ - return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE)); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Gets a pointer to the space header and x-locks its page. -@return pointer to the space header, page x-locked */ -UNIV_INLINE -fsp_header_t* -fsp_get_space_header( -/*=================*/ - ulint id, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - buf_block_t* block; - fsp_header_t* header; - - ut_ad(ut_is_2pow(zip_size)); - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - ut_ad(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN); - ut_ad(id || !zip_size); - - block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr); - - SRV_CORRUPT_TABLE_CHECK(block, return(0);); - - header = FSP_HEADER_OFFSET + buf_block_get_frame(block); - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - - ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header)); - ut_ad(zip_size == fsp_flags_get_zip_size( - mach_read_from_4(FSP_SPACE_FLAGS + header))); - return(header); -} - -/**********************************************************************//** -Gets a descriptor bit of a page. -@return TRUE if free */ -UNIV_INLINE -ibool -xdes_mtr_get_bit( -/*=============*/ - const xdes_t* descr, /*!< in: descriptor */ - ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ulint offset, /*!< in: page offset within extent: - 0 ... FSP_EXTENT_SIZE - 1 */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); - - return(xdes_get_bit(descr, bit, offset)); -} - -/**********************************************************************//** -Sets a descriptor bit of a page. */ -UNIV_INLINE -void -xdes_set_bit( -/*=========*/ - xdes_t* descr, /*!< in: descriptor */ - ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ulint offset, /*!< in: page offset within extent: - 0 ... FSP_EXTENT_SIZE - 1 */ - ibool val, /*!< in: bit value */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint index; - ulint byte_index; - ulint bit_index; - ulint descr_byte; - - ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); - ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT)); - ut_ad(offset < FSP_EXTENT_SIZE); - - index = bit + XDES_BITS_PER_PAGE * offset; - - byte_index = index / 8; - bit_index = index % 8; - - descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index, - MLOG_1BYTE, mtr); - descr_byte = ut_bit_set_nth(descr_byte, bit_index, val); - - mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte, - MLOG_1BYTE, mtr); -} - -/**********************************************************************//** -Looks for a descriptor bit having the desired value. Starts from hint -and scans upward; at the end of the extent the search is wrapped to -the start of the extent. -@return bit index of the bit, ULINT_UNDEFINED if not found */ -UNIV_INLINE -ulint -xdes_find_bit( -/*==========*/ - xdes_t* descr, /*!< in: descriptor */ - ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ibool val, /*!< in: desired bit value */ - ulint hint, /*!< in: hint of which bit position would - be desirable */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint i; - - ut_ad(descr && mtr); - ut_ad(val <= TRUE); - ut_ad(hint < FSP_EXTENT_SIZE); - ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); - for (i = hint; i < FSP_EXTENT_SIZE; i++) { - if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) { - - return(i); - } - } - - for (i = 0; i < hint; i++) { - if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/**********************************************************************//** -Returns the number of used pages in a descriptor. -@return number of pages used */ -UNIV_INLINE -ulint -xdes_get_n_used( -/*============*/ - const xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint count = 0; - - ut_ad(descr && mtr); - ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); - for (ulint i = 0; i < FSP_EXTENT_SIZE; ++i) { - if (FALSE == xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) { - count++; - } - } - - return(count); -} - -/**********************************************************************//** -Returns true if extent contains no used pages. -@return TRUE if totally free */ -UNIV_INLINE -ibool -xdes_is_free( -/*=========*/ - const xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - if (0 == xdes_get_n_used(descr, mtr)) { - - return(TRUE); - } - - return(FALSE); -} - -/**********************************************************************//** -Returns true if extent contains no free pages. -@return TRUE if full */ -UNIV_INLINE -ibool -xdes_is_full( -/*=========*/ - const xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) { - - return(TRUE); - } - - return(FALSE); -} - -/**********************************************************************//** -Sets the state of an xdes. */ -UNIV_INLINE -void -xdes_set_state( -/*===========*/ - xdes_t* descr, /*!< in/out: descriptor */ - ulint state, /*!< in: state to set */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(descr && mtr); - ut_ad(state >= XDES_FREE); - ut_ad(state <= XDES_FSEG); - ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); - - mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr); -} - -/**********************************************************************//** -Gets the state of an xdes. -@return state */ -UNIV_INLINE -ulint -xdes_get_state( -/*===========*/ - const xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint state; - - ut_ad(descr && mtr); - ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); - - state = mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr); - ut_ad(state - 1 < XDES_FSEG); - return(state); -} - -/**********************************************************************//** -Inits an extent descriptor to the free and clean state. */ -UNIV_INLINE -void -xdes_init( -/*======*/ - xdes_t* descr, /*!< in: descriptor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint i; - - ut_ad(descr && mtr); - ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX)); - ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0); - - for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) { - mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr); - } - - xdes_set_state(descr, XDES_FREE, mtr); -} - -/********************************************************************//** -Gets pointer to a the extent descriptor of a page. The page where the extent -descriptor resides is x-locked. This function no longer extends the data -file. -@return pointer to the extent descriptor, NULL if the page does not -exist in the space or if the offset is >= the free limit */ -UNIV_INLINE MY_ATTRIBUTE((nonnull, warn_unused_result)) -xdes_t* -xdes_get_descriptor_with_space_hdr( -/*===============================*/ - fsp_header_t* sp_header, /*!< in/out: space header, x-latched - in mtr */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page offset; if equal - to the free limit, we try to - add new extents to the space - free list */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint limit; - ulint size; - ulint zip_size; - ulint descr_page_no; - page_t* descr_page; - - ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX)); - ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET); - /* Read free limit and space size */ - limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT); - size = mach_read_from_4(sp_header + FSP_SIZE); - zip_size = fsp_flags_get_zip_size( - mach_read_from_4(sp_header + FSP_SPACE_FLAGS)); - - if ((offset >= size) || (offset >= limit)) { - return(NULL); - } - - descr_page_no = xdes_calc_descriptor_page(zip_size, offset); - - if (descr_page_no == 0) { - /* It is on the space header page */ - - descr_page = page_align(sp_header); - } else { - buf_block_t* block; - - block = buf_page_get(space, zip_size, descr_page_no, - RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - - descr_page = buf_block_get_frame(block); - } - - return(descr_page + XDES_ARR_OFFSET - + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset)); -} - -/********************************************************************//** -Gets pointer to a the extent descriptor of a page. The page where the extent -descriptor resides is x-locked. This function no longer extends the data -file. -@return pointer to the extent descriptor, NULL if the page does not -exist in the space or if the offset exceeds the free limit */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -xdes_t* -xdes_get_descriptor( -/*================*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint offset, /*!< in: page offset; if equal to the free limit, - we try to add new extents to the space free list */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - buf_block_t* block; - fsp_header_t* sp_header; - - block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr); - - SRV_CORRUPT_TABLE_CHECK(block, return(0);); - - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - - sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block); - return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset, - mtr)); -} - -/********************************************************************//** -Gets pointer to a the extent descriptor if the file address -of the descriptor list node is known. The page where the -extent descriptor resides is x-locked. -@return pointer to the extent descriptor */ -UNIV_INLINE -xdes_t* -xdes_lst_get_descriptor( -/*====================*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - fil_addr_t lst_node,/*!< in: file address of the list node - contained in the descriptor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - xdes_t* descr; - - ut_ad(mtr); - ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL), - MTR_MEMO_X_LOCK)); - descr = fut_get_ptr(space, zip_size, lst_node, RW_X_LATCH, mtr) - - XDES_FLST_NODE; - - return(descr); -} - -/********************************************************************//** -Returns page offset of the first page in extent described by a descriptor. -@return offset of the first page in extent */ -UNIV_INLINE -ulint -xdes_get_offset( -/*============*/ - const xdes_t* descr) /*!< in: extent descriptor */ -{ - ut_ad(descr); - - return(page_get_page_no(page_align(descr)) - + ((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE) - * FSP_EXTENT_SIZE); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Inits a file page whose prior contents should be ignored. */ -static -void -fsp_init_file_page_low( -/*===================*/ - buf_block_t* block) /*!< in: pointer to a page */ -{ - page_t* page = buf_block_get_frame(block); - page_zip_des_t* page_zip= buf_block_get_page_zip(block); - -#ifndef UNIV_HOTBACKUP - block->check_index_page_at_flush = FALSE; -#endif /* !UNIV_HOTBACKUP */ - - if (page_zip) { - memset(page, 0, UNIV_PAGE_SIZE); - memset(page_zip->data, 0, page_zip_get_size(page_zip)); - mach_write_to_4(page + FIL_PAGE_OFFSET, - buf_block_get_page_no(block)); - mach_write_to_4(page - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - buf_block_get_space(block)); - memcpy(page_zip->data + FIL_PAGE_OFFSET, - page + FIL_PAGE_OFFSET, 4); - memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4); - return; - } - - memset(page, 0, UNIV_PAGE_SIZE); - mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block)); - mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - buf_block_get_space(block)); -} - -#ifndef UNIV_HOTBACKUP -/***********************************************************//** -Inits a file page whose prior contents should be ignored. */ -static -void -fsp_init_file_page( -/*===============*/ - buf_block_t* block, /*!< in: pointer to a page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fsp_init_file_page_low(block); - - mlog_write_initial_log_record(buf_block_get_frame(block), - MLOG_INIT_FILE_PAGE, mtr); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses a redo log record of a file page init. -@return end of log record or NULL */ -UNIV_INTERN -byte* -fsp_parse_init_file_page( -/*=====================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */ - buf_block_t* block) /*!< in: block or NULL */ -{ - ut_ad(ptr && end_ptr); - - if (block) { - fsp_init_file_page_low(block); - } - - return(ptr); -} - -/**********************************************************************//** -Initializes the fsp system. */ -UNIV_INTERN -void -fsp_init(void) -/*==========*/ -{ - /* FSP_EXTENT_SIZE must be a multiple of page & zip size */ - ut_a(0 == (UNIV_PAGE_SIZE % FSP_EXTENT_SIZE)); - ut_a(UNIV_PAGE_SIZE); - -#if UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX -# error "UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX != 0" -#endif -#if UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN -# error "UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN != 0" -#endif - - /* Does nothing at the moment */ -} - -/**********************************************************************//** -Writes the space id and flags to a tablespace header. The flags contain -row type, physical/compressed page size, and logical/uncompressed page -size of the tablespace. */ -UNIV_INTERN -void -fsp_header_init_fields( -/*===================*/ - page_t* page, /*!< in/out: first page in the space */ - ulint space_id, /*!< in: space id */ - ulint flags) /*!< in: tablespace flags (FSP_SPACE_FLAGS) */ -{ - flags &= ~FSP_FLAGS_MEM_MASK; - ut_a(fsp_flags_is_valid(flags)); - - mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page, - space_id); - mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page, - flags); -} - -#ifndef UNIV_HOTBACKUP -/** Initialize a tablespace header. -@param[in] space_id space id -@param[in] size current size in blocks -@param[in,out] mtr mini-transaction */ -UNIV_INTERN -void -fsp_header_init(ulint space_id, ulint size, mtr_t* mtr) -{ - fsp_header_t* header; - buf_block_t* block; - page_t* page; - ulint flags; - ulint zip_size; - - ut_ad(mtr); - - mtr_x_lock(fil_space_get_latch(space_id, &flags), mtr); - - zip_size = fsp_flags_get_zip_size(flags); - block = buf_page_create(space_id, 0, zip_size, mtr); - buf_page_get(space_id, zip_size, 0, RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - - /* The prior contents of the file page should be ignored */ - - fsp_init_file_page(block, mtr); - page = buf_block_get_frame(block); - - mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR, - MLOG_2BYTES, mtr); - - header = FSP_HEADER_OFFSET + page; - - mlog_write_ulint(header + FSP_SPACE_ID, space_id, MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr); - - mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_SPACE_FLAGS, flags & ~FSP_FLAGS_MEM_MASK, - MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr); - - flst_init(header + FSP_FREE, mtr); - flst_init(header + FSP_FREE_FRAG, mtr); - flst_init(header + FSP_FULL_FRAG, mtr); - flst_init(header + FSP_SEG_INODES_FULL, mtr); - flst_init(header + FSP_SEG_INODES_FREE, mtr); - - mlog_write_ull(header + FSP_SEG_ID, 1, mtr); - - fsp_fill_free_list(space_id != TRX_SYS_SPACE, space_id, header, mtr); - - fil_space_t* space = fil_space_acquire(space_id); - ut_ad(space); - - if (space->crypt_data) { - space->crypt_data->write_page0(page, mtr); - } - - fil_space_release(space); -} - -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Reads the space id from the first page of a tablespace. -@return space id, ULINT UNDEFINED if error */ -UNIV_INTERN -ulint -fsp_header_get_space_id( -/*====================*/ - const page_t* page) /*!< in: first page of a tablespace */ -{ - ulint fsp_id; - ulint id; - - fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID); - - id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - DBUG_EXECUTE_IF("fsp_header_get_space_id_failure", - id = ULINT_UNDEFINED;); - - if (id != fsp_id) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Space id in fsp header %lu,but in the page header " - "%lu", fsp_id, id); - - return(ULINT_UNDEFINED); - } - - return(id); -} - -/**********************************************************************//** -Reads the space flags from the first page of a tablespace. -@return flags */ -UNIV_INTERN -ulint -fsp_header_get_flags( -/*=================*/ - const page_t* page) /*!< in: first page of a tablespace */ -{ - ut_ad(!page_offset(page)); - - return(mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page)); -} - -/**********************************************************************//** -Reads the compressed page size from the first page of a tablespace. -@return compressed page size in bytes, or 0 if uncompressed */ -UNIV_INTERN -ulint -fsp_header_get_zip_size( -/*====================*/ - const page_t* page) /*!< in: first page of a tablespace */ -{ - ulint flags = fsp_header_get_flags(page); - - return(fsp_flags_get_zip_size(flags)); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Increases the space size field of a space. */ -UNIV_INTERN -void -fsp_header_inc_size( -/*================*/ - ulint space, /*!< in: space id */ - ulint size_inc, /*!< in: size increment in pages */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fsp_header_t* header; - ulint size; - ulint flags; - - ut_ad(mtr); - - mtr_x_lock(fil_space_get_latch(space, &flags), mtr); - - header = fsp_get_space_header(space, - fsp_flags_get_zip_size(flags), - mtr); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - - mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES, - mtr); -} - -/**********************************************************************//** -Gets the size of the system tablespace from the tablespace header. If -we do not have an auto-extending data file, this should be equal to -the size of the data files. If there is an auto-extending data file, -this can be smaller. -@return size in pages */ -UNIV_INTERN -ulint -fsp_header_get_tablespace_size(void) -/*================================*/ -{ - fsp_header_t* header; - ulint size; - mtr_t mtr; - - mtr_start(&mtr); - - mtr_x_lock(fil_space_get_latch(0, NULL), &mtr); - - header = fsp_get_space_header(0, 0, &mtr); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr); - - mtr_commit(&mtr); - - return(size); -} - -/***********************************************************************//** -Tries to extend a single-table tablespace so that a page would fit in the -data file. -@return TRUE if success */ -static UNIV_COLD MY_ATTRIBUTE((nonnull, warn_unused_result)) -ibool -fsp_try_extend_data_file_with_pages( -/*================================*/ - ulint space, /*!< in: space */ - ulint page_no, /*!< in: page number */ - fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ibool success; - ulint actual_size; - ulint size; - - ut_a(space != 0); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - - ut_a(page_no >= size); - - success = fil_extend_space_to_desired_size(&actual_size, space, - page_no + 1); - /* actual_size now has the space size in pages; it may be less than - we wanted if we ran out of disk space */ - - mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr); - - return(success); -} - -/***********************************************************************//** -Tries to extend the last data file of a tablespace if it is auto-extending. -@return FALSE if not auto-extending */ -static UNIV_COLD MY_ATTRIBUTE((nonnull)) -ibool -fsp_try_extend_data_file( -/*=====================*/ - ulint* actual_increase,/*!< out: actual increase in pages, where - we measure the tablespace size from - what the header field says; it may be - the actual file size rounded down to - megabyte */ - ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint size; - ulint zip_size; - ulint new_size; - ulint old_size; - ulint size_increase; - ulint actual_size; - ibool success; - - *actual_increase = 0; - - if (space == 0 && !srv_auto_extend_last_data_file) { - - /* We print the error message only once to avoid - spamming the error log. Note that we don't need - to reset the flag to FALSE as dealing with this - error requires server restart. */ - if (fsp_tbs_full_error_printed == FALSE) { - fprintf(stderr, - "InnoDB: Error: Data file(s) ran" - " out of space.\n" - "Please add another data file or" - " use \'autoextend\' for the last" - " data file.\n"); - fsp_tbs_full_error_printed = TRUE; - } - return(FALSE); - } - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - zip_size = fsp_flags_get_zip_size( - mach_read_from_4(header + FSP_SPACE_FLAGS)); - - old_size = size; - - if (space == 0) { - if (!srv_last_file_size_max) { - size_increase = SRV_AUTO_EXTEND_INCREMENT; - } else { - if (srv_last_file_size_max - < srv_data_file_sizes[srv_n_data_files - 1]) { - - fprintf(stderr, - "InnoDB: Error: Last data file size" - " is %lu, max size allowed %lu\n", - (ulong) srv_data_file_sizes[ - srv_n_data_files - 1], - (ulong) srv_last_file_size_max); - } - - size_increase = srv_last_file_size_max - - srv_data_file_sizes[srv_n_data_files - 1]; - if (size_increase > SRV_AUTO_EXTEND_INCREMENT) { - size_increase = SRV_AUTO_EXTEND_INCREMENT; - } - } - } else { - /* We extend single-table tablespaces first one extent - at a time, but 4 at a time for bigger tablespaces. It is - not enough to extend always by one extent, because we need - to add at least one extent to FSP_FREE. - A single extent descriptor page will track many extents. - And the extent that uses its extent descriptor page is - put onto the FSP_FREE_FRAG list. Extents that do not - use their extent descriptor page are added to FSP_FREE. - The physical page size is used to determine how many - extents are tracked on one extent descriptor page. */ - ulint extent_size; /*!< one megabyte, in pages */ - ulint threshold; /*!< The size of the tablespace - (in number of pages) where we - start allocating more than one - extent at a time. */ - - if (!zip_size) { - extent_size = FSP_EXTENT_SIZE; - } else { - extent_size = FSP_EXTENT_SIZE - * UNIV_PAGE_SIZE / zip_size; - } - - /* Threshold is set at 32mb except when the page - size is small enough that it must be done sooner. - For page size less than 4k, we may reach the - extent contains extent descriptor page before - 32 mb. */ - threshold = ut_min((32 * extent_size), - (zip_size ? zip_size : UNIV_PAGE_SIZE)); - - if (size < extent_size) { - /* Let us first extend the file to extent_size */ - success = fsp_try_extend_data_file_with_pages( - space, extent_size - 1, header, mtr); - if (!success) { - new_size = mtr_read_ulint(header + FSP_SIZE, - MLOG_4BYTES, mtr); - - *actual_increase = new_size - old_size; - - return(FALSE); - } - - size = extent_size; - } - - if (size < threshold) { - size_increase = extent_size; - } else { - /* Below in fsp_fill_free_list() we assume - that we add at most FSP_FREE_ADD extents at - a time */ - size_increase = FSP_FREE_ADD * extent_size; - } - } - - if (size_increase == 0) { - - return(TRUE); - } - - success = fil_extend_space_to_desired_size(&actual_size, space, - size + size_increase); - if (!success) { - - return(false); - } - - /* We ignore any fragments of a full megabyte when storing the size - to the space header */ - - if (!zip_size) { - new_size = ut_calc_align_down(actual_size, - (1024 * 1024) / UNIV_PAGE_SIZE); - } else { - new_size = ut_calc_align_down(actual_size, - (1024 * 1024) / zip_size); - } - mlog_write_ulint(header + FSP_SIZE, new_size, MLOG_4BYTES, mtr); - - *actual_increase = new_size - old_size; - - return(TRUE); -} - -/**********************************************************************//** -Puts new extents to the free list if there are free extents above the free -limit. If an extent happens to contain an extent descriptor page, the extent -is put to the FSP_FREE_FRAG list with the page marked as used. */ -static -void -fsp_fill_free_list( -/*===============*/ - ibool init_space, /*!< in: TRUE if this is a single-table - tablespace and we are only initing - the tablespace's first extent - descriptor page and ibuf bitmap page; - then we do not allocate more extents */ - ulint space, /*!< in: space */ - fsp_header_t* header, /*!< in/out: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint limit; - ulint size; - ulint zip_size; - xdes_t* descr; - ulint count = 0; - ulint frag_n_used; - ulint actual_increase; - ulint i; - mtr_t ibuf_mtr; - - ut_ad(page_offset(header) == FSP_HEADER_OFFSET); - - /* Check if we can fill free list from above the free list limit */ - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr); - - zip_size = fsp_flags_get_zip_size( - mach_read_from_4(FSP_SPACE_FLAGS + header)); - ut_a(ut_is_2pow(zip_size)); - ut_a(zip_size <= UNIV_ZIP_SIZE_MAX); - ut_a(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN); - - if (space == 0 && srv_auto_extend_last_data_file - && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) { - - /* Try to increase the last data file size */ - fsp_try_extend_data_file(&actual_increase, space, header, mtr); - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - } - - if (space != 0 && !init_space - && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) { - - /* Try to increase the .ibd file size */ - fsp_try_extend_data_file(&actual_increase, space, header, mtr); - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - } - - i = limit; - - while ((init_space && i < 1) - || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) { - - ibool init_xdes; - if (zip_size) { - init_xdes = ut_2pow_remainder(i, zip_size) == 0; - } else { - init_xdes = ut_2pow_remainder(i, UNIV_PAGE_SIZE) == 0; - } - - mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE, - MLOG_4BYTES, mtr); - - if (UNIV_UNLIKELY(init_xdes)) { - - buf_block_t* block; - - /* We are going to initialize a new descriptor page - and a new ibuf bitmap page: the prior contents of the - pages should be ignored. */ - - if (i > 0) { - block = buf_page_create( - space, i, zip_size, mtr); - buf_page_get(space, zip_size, i, - RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, - SYNC_FSP_PAGE); - - fsp_init_file_page(block, mtr); - mlog_write_ulint(buf_block_get_frame(block) - + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_XDES, - MLOG_2BYTES, mtr); - } - - /* Initialize the ibuf bitmap page in a separate - mini-transaction because it is low in the latching - order, and we must be able to release its latch - before returning from the fsp routine */ - - mtr_start(&ibuf_mtr); - - block = buf_page_create(space, - i + FSP_IBUF_BITMAP_OFFSET, - zip_size, &ibuf_mtr); - buf_page_get(space, zip_size, - i + FSP_IBUF_BITMAP_OFFSET, - RW_X_LATCH, &ibuf_mtr); - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - - fsp_init_file_page(block, &ibuf_mtr); - - ibuf_bitmap_page_init(block, &ibuf_mtr); - - mtr_commit(&ibuf_mtr); - } - - descr = xdes_get_descriptor_with_space_hdr(header, space, i, - mtr); - xdes_init(descr, mtr); - - if (UNIV_UNLIKELY(init_xdes)) { - - /* The first page in the extent is a descriptor page - and the second is an ibuf bitmap page: mark them - used */ - - xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr); - xdes_set_bit(descr, XDES_FREE_BIT, - FSP_IBUF_BITMAP_OFFSET, FALSE, mtr); - xdes_set_state(descr, XDES_FREE_FRAG, mtr); - - flst_add_last(header + FSP_FREE_FRAG, - descr + XDES_FLST_NODE, mtr); - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, - MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used + 2, MLOG_4BYTES, mtr); - } else { - flst_add_last(header + FSP_FREE, - descr + XDES_FLST_NODE, mtr); - count++; - } - - i += FSP_EXTENT_SIZE; - } -} - -/**********************************************************************//** -Allocates a new free extent. -@return extent descriptor, NULL if cannot be allocated */ -static -xdes_t* -fsp_alloc_free_extent( -/*==================*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint hint, /*!< in: hint of which extent would be desirable: any - page offset in the extent goes; the hint must not - be > FSP_FREE_LIMIT */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fsp_header_t* header; - fil_addr_t first; - xdes_t* descr; - - ut_ad(mtr); - - header = fsp_get_space_header(space, zip_size, mtr); - - descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr); - - if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) { - /* Ok, we can take this extent */ - } else { - /* Take the first extent in the free list */ - first = flst_get_first(header + FSP_FREE, mtr); - - if (fil_addr_is_null(first)) { - fsp_fill_free_list(FALSE, space, header, mtr); - - first = flst_get_first(header + FSP_FREE, mtr); - } - - if (fil_addr_is_null(first)) { - - return(NULL); /* No free extents left */ - } - - descr = xdes_lst_get_descriptor(space, zip_size, first, mtr); - } - - flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr); - - return(descr); -} - -/**********************************************************************//** -Allocates a single free page from a space. */ -static MY_ATTRIBUTE((nonnull)) -void -fsp_alloc_from_free_frag( -/*=====================*/ - fsp_header_t* header, /*!< in/out: tablespace header */ - xdes_t* descr, /*!< in/out: extent descriptor */ - ulint bit, /*!< in: slot to allocate in the extent */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint frag_n_used; - - ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG); - ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, bit, mtr)); - xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr); - - /* Update the FRAG_N_USED field */ - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - mtr); - frag_n_used++; - mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, - mtr); - if (xdes_is_full(descr, mtr)) { - /* The fragment is full: move it to another list */ - flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - xdes_set_state(descr, XDES_FULL_FRAG, mtr); - - flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, - mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, - mtr); - } -} - -/**********************************************************************//** -Gets a buffer block for an allocated page. - -NOTE: If init_mtr != mtr, the block will only be initialized if it was -not previously x-latched. It is assumed that the block has been -x-latched only by mtr, and freed in mtr in that case. - -@return block, initialized if init_mtr==mtr -or rw_lock_x_lock_count(&block->lock) == 1 */ -static -buf_block_t* -fsp_page_create( -/*============*/ - ulint space, /*!< in: space id of the allocated page */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the allocated page */ - mtr_t* mtr, /*!< in: mini-transaction of the allocation */ - mtr_t* init_mtr) /*!< in: mini-transaction for initializing - the page */ -{ - buf_block_t* block - = buf_page_create(space, page_no, zip_size, init_mtr); -#ifdef UNIV_SYNC_DEBUG - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX) - == rw_lock_own(&block->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - /* Mimic buf_page_get(), but avoid the buf_pool->page_hash lookup. */ - rw_lock_x_lock(&block->lock); - mutex_enter(&block->mutex); - buf_block_buf_fix_inc(block, __FILE__, __LINE__); - mutex_exit(&block->mutex); - mtr_memo_push(init_mtr, block, MTR_MEMO_PAGE_X_FIX); - - if (init_mtr == mtr - || rw_lock_get_x_lock_count(&block->lock) == 1) { - - /* Initialize the page, unless it was already - X-latched in mtr. (In this case, we would want to - allocate another page that has not been freed in mtr.) */ - ut_ad(init_mtr == mtr - || !mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - - fsp_init_file_page(block, init_mtr); - } - - return(block); -} - -/**********************************************************************//** -Allocates a single free page from a space. The page is marked as used. -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -static MY_ATTRIBUTE((warn_unused_result)) -buf_block_t* -fsp_alloc_free_page( -/*================*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint hint, /*!< in: hint of which page would be desirable */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mini-transaction in which the - page should be initialized - (may be the same as mtr) */ -{ - fsp_header_t* header; - fil_addr_t first; - xdes_t* descr; - ulint free; - ulint page_no; - ulint space_size; - - header = fsp_get_space_header(space, zip_size, mtr); - - /* Get the hinted descriptor */ - descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr); - - if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) { - /* Ok, we can take this extent */ - } else { - /* Else take the first extent in free_frag list */ - first = flst_get_first(header + FSP_FREE_FRAG, mtr); - - if (fil_addr_is_null(first)) { - /* There are no partially full fragments: allocate - a free extent and add it to the FREE_FRAG list. NOTE - that the allocation may have as a side-effect that an - extent containing a descriptor page is added to the - FREE_FRAG list. But we will allocate our page from the - the free extent anyway. */ - - descr = fsp_alloc_free_extent(space, zip_size, - hint, mtr); - - if (descr == NULL) { - /* No free space left */ - - return(NULL); - } - - xdes_set_state(descr, XDES_FREE_FRAG, mtr); - flst_add_last(header + FSP_FREE_FRAG, - descr + XDES_FLST_NODE, mtr); - } else { - descr = xdes_lst_get_descriptor(space, zip_size, - first, mtr); - } - - /* Reset the hint */ - hint = 0; - } - - /* Now we have in descr an extent with at least one free page. Look - for a free page in the extent. */ - - free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE, - hint % FSP_EXTENT_SIZE, mtr); - if (free == ULINT_UNDEFINED) { - - ut_print_buf(stderr, ((byte*) descr) - 500, 1000); - putc('\n', stderr); - - ut_error; - } - - page_no = xdes_get_offset(descr) + free; - - space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - - if (space_size <= page_no) { - /* It must be that we are extending a single-table tablespace - whose size is still < 64 pages */ - - ut_a(space != 0); - if (page_no >= FSP_EXTENT_SIZE) { - fprintf(stderr, - "InnoDB: Error: trying to extend a" - " single-table tablespace %lu\n" - "InnoDB: by single page(s) though the" - " space size %lu. Page no %lu.\n", - (ulong) space, (ulong) space_size, - (ulong) page_no); - return(NULL); - } - if (!fsp_try_extend_data_file_with_pages(space, page_no, - header, mtr)) { - /* No disk space left */ - return(NULL); - } - } - - fsp_alloc_from_free_frag(header, descr, free, mtr); - return(fsp_page_create(space, zip_size, page_no, mtr, init_mtr)); -} - -/**********************************************************************//** -Frees a single page of a space. The page is marked as free and clean. */ -static -void -fsp_free_page( -/*==========*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page, /*!< in: page offset */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fsp_header_t* header; - xdes_t* descr; - ulint state; - ulint frag_n_used; - - ut_ad(mtr); - - /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */ - - header = fsp_get_space_header(space, zip_size, mtr); - - descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr); - - state = xdes_get_state(descr, mtr); - - if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) { - fprintf(stderr, - "InnoDB: Error: File space extent descriptor" - " of page %lu has state %lu\n", - (ulong) page, - (ulong) state); - fputs("InnoDB: Dump of descriptor: ", stderr); - ut_print_buf(stderr, ((byte*) descr) - 50, 200); - putc('\n', stderr); - /* Crash in debug version, so that we get a core dump - of this corruption. */ - ut_ad(0); - - if (state == XDES_FREE) { - /* We put here some fault tolerance: if the page - is already free, return without doing anything! */ - - return; - } - - ut_error; - } - - if (xdes_mtr_get_bit(descr, XDES_FREE_BIT, - page % FSP_EXTENT_SIZE, mtr)) { - - fprintf(stderr, - "InnoDB: Error: File space extent descriptor" - " of page %lu says it is free\n" - "InnoDB: Dump of descriptor: ", (ulong) page); - ut_print_buf(stderr, ((byte*) descr) - 50, 200); - putc('\n', stderr); - /* Crash in debug version, so that we get a core dump - of this corruption. */ - ut_ad(0); - - /* We put here some fault tolerance: if the page - is already free, return without doing anything! */ - - return; - } - - xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - mtr); - if (state == XDES_FULL_FRAG) { - /* The fragment was full: move it to another list */ - flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, - mtr); - xdes_set_state(descr, XDES_FREE_FRAG, mtr); - flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used + FSP_EXTENT_SIZE - 1, - MLOG_4BYTES, mtr); - } else { - ut_a(frag_n_used > 0); - mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1, - MLOG_4BYTES, mtr); - } - - if (xdes_is_free(descr, mtr)) { - /* The extent has become free: move it to another list */ - flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - fsp_free_extent(space, zip_size, page, mtr); - } - - mtr->n_freed_pages++; -} - -/**********************************************************************//** -Returns an extent to the free list of a space. */ -static -void -fsp_free_extent( -/*============*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page, /*!< in: page offset in the extent */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fsp_header_t* header; - xdes_t* descr; - - ut_ad(mtr); - - header = fsp_get_space_header(space, zip_size, mtr); - - descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr); - - if (xdes_get_state(descr, mtr) == XDES_FREE) { - - ut_print_buf(stderr, (byte*) descr - 500, 1000); - putc('\n', stderr); - - ut_error; - } - - xdes_init(descr, mtr); - - flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr); -} - -/**********************************************************************//** -Returns the nth inode slot on an inode page. -@return segment inode */ -UNIV_INLINE -fseg_inode_t* -fsp_seg_inode_page_get_nth_inode( -/*=============================*/ - page_t* page, /*!< in: segment inode page */ - ulint i, /*!< in: inode index on page */ - ulint zip_size MY_ATTRIBUTE((unused)), - /*!< in: compressed page size, or 0 */ - mtr_t* mtr MY_ATTRIBUTE((unused))) - /*!< in/out: mini-transaction */ -{ - ut_ad(i < FSP_SEG_INODES_PER_PAGE(zip_size)); - ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)); - - return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i); -} - -/**********************************************************************//** -Looks for a used segment inode on a segment inode page. -@return segment inode index, or ULINT_UNDEFINED if not found */ -static -ulint -fsp_seg_inode_page_find_used( -/*=========================*/ - page_t* page, /*!< in: segment inode page */ - ulint zip_size,/*!< in: compressed page size, or 0 */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint i; - fseg_inode_t* inode; - - for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) { - - inode = fsp_seg_inode_page_get_nth_inode( - page, i, zip_size, mtr); - - if (mach_read_from_8(inode + FSEG_ID)) { - /* This is used */ - - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/**********************************************************************//** -Looks for an unused segment inode on a segment inode page. -@return segment inode index, or ULINT_UNDEFINED if not found */ -static -ulint -fsp_seg_inode_page_find_free( -/*=========================*/ - page_t* page, /*!< in: segment inode page */ - ulint i, /*!< in: search forward starting from this index */ - ulint zip_size,/*!< in: compressed page size, or 0 */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - SRV_CORRUPT_TABLE_CHECK(page, return(ULINT_UNDEFINED);); - - for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) { - - fseg_inode_t* inode; - - inode = fsp_seg_inode_page_get_nth_inode( - page, i, zip_size, mtr); - - if (!mach_read_from_8(inode + FSEG_ID)) { - /* This is unused */ - return(i); - } - - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - } - - return(ULINT_UNDEFINED); -} - -/**********************************************************************//** -Allocates a new file segment inode page. -@return TRUE if could be allocated */ -static -ibool -fsp_alloc_seg_inode_page( -/*=====================*/ - fsp_header_t* space_header, /*!< in: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fseg_inode_t* inode; - buf_block_t* block; - page_t* page; - ulint space; - ulint zip_size; - - ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET); - - space = page_get_space_id(page_align(space_header)); - - zip_size = fsp_flags_get_zip_size( - mach_read_from_4(FSP_SPACE_FLAGS + space_header)); - - block = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr); - - if (block == NULL) { - - return(FALSE); - } - - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1); - - block->check_index_page_at_flush = FALSE; - - page = buf_block_get_frame(block); - - mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE, - MLOG_2BYTES, mtr); - - for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) { - - inode = fsp_seg_inode_page_get_nth_inode( - page, i, zip_size, mtr); - - mlog_write_ull(inode + FSEG_ID, 0, mtr); - } - - flst_add_last( - space_header + FSP_SEG_INODES_FREE, - page + FSEG_INODE_PAGE_NODE, mtr); - - return(TRUE); -} - -/**********************************************************************//** -Allocates a new file segment inode. -@return segment inode, or NULL if not enough space */ -static -fseg_inode_t* -fsp_alloc_seg_inode( -/*================*/ - fsp_header_t* space_header, /*!< in: space header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint page_no; - buf_block_t* block; - page_t* page; - fseg_inode_t* inode; - ibool success; - ulint zip_size; - ulint n; - - ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET); - - if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) { - /* Allocate a new segment inode page */ - - success = fsp_alloc_seg_inode_page(space_header, mtr); - - if (!success) { - - return(NULL); - } - } - - page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page; - - zip_size = fsp_flags_get_zip_size( - mach_read_from_4(FSP_SPACE_FLAGS + space_header)); - block = buf_page_get(page_get_space_id(page_align(space_header)), - zip_size, page_no, RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_FSP_PAGE); - - page = buf_block_get_frame(block); - - SRV_CORRUPT_TABLE_CHECK(page, return(0);); - - n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr); - - ut_a(n != ULINT_UNDEFINED); - - inode = fsp_seg_inode_page_get_nth_inode(page, n, zip_size, mtr); - - if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1, - zip_size, mtr)) { - /* There are no other unused headers left on the page: move it - to another list */ - - flst_remove(space_header + FSP_SEG_INODES_FREE, - page + FSEG_INODE_PAGE_NODE, mtr); - - flst_add_last(space_header + FSP_SEG_INODES_FULL, - page + FSEG_INODE_PAGE_NODE, mtr); - } - - ut_ad(!mach_read_from_8(inode + FSEG_ID) - || mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - return(inode); -} - -/**********************************************************************//** -Frees a file segment inode. */ -static -void -fsp_free_seg_inode( -/*===============*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - fseg_inode_t* inode, /*!< in: segment inode */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - page_t* page; - fsp_header_t* space_header; - - page = page_align(inode); - - space_header = fsp_get_space_header(space, zip_size, mtr); - - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - - if (ULINT_UNDEFINED - == fsp_seg_inode_page_find_free(page, 0, zip_size, mtr)) { - - /* Move the page to another list */ - - flst_remove(space_header + FSP_SEG_INODES_FULL, - page + FSEG_INODE_PAGE_NODE, mtr); - - flst_add_last(space_header + FSP_SEG_INODES_FREE, - page + FSEG_INODE_PAGE_NODE, mtr); - } - - mlog_write_ull(inode + FSEG_ID, 0, mtr); - mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr); - - if (ULINT_UNDEFINED - == fsp_seg_inode_page_find_used(page, zip_size, mtr)) { - - /* There are no other used headers left on the page: free it */ - - flst_remove(space_header + FSP_SEG_INODES_FREE, - page + FSEG_INODE_PAGE_NODE, mtr); - - fsp_free_page(space, zip_size, page_get_page_no(page), mtr); - } -} - -/**********************************************************************//** -Returns the file segment inode, page x-latched. -@return segment inode, page x-latched; NULL if the inode is free */ -static -fseg_inode_t* -fseg_inode_try_get( -/*===============*/ - fseg_header_t* header, /*!< in: segment header */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fil_addr_t inode_addr; - fseg_inode_t* inode; - - inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO); - inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET); - ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE)); - - inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr); - - SRV_CORRUPT_TABLE_CHECK(inode, return(0);); - - if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) { - - inode = NULL; - } else { - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - } - - return(inode); -} - -/**********************************************************************//** -Returns the file segment inode, page x-latched. -@return segment inode, page x-latched */ -static -fseg_inode_t* -fseg_inode_get( -/*===========*/ - fseg_header_t* header, /*!< in: segment header */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fseg_inode_t* inode - = fseg_inode_try_get(header, space, zip_size, mtr); - SRV_CORRUPT_TABLE_CHECK(inode, ; /* do nothing */); - return(inode); -} - -/**********************************************************************//** -Gets the page number from the nth fragment page slot. -@return page number, FIL_NULL if not in use */ -UNIV_INLINE -ulint -fseg_get_nth_frag_page_no( -/*======================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - ulint n, /*!< in: slot index */ - mtr_t* mtr MY_ATTRIBUTE((unused))) - /*!< in/out: mini-transaction */ -{ - ut_ad(inode && mtr); - ut_ad(n < FSEG_FRAG_ARR_N_SLOTS); - ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - return(mach_read_from_4(inode + FSEG_FRAG_ARR - + n * FSEG_FRAG_SLOT_SIZE)); -} - -/**********************************************************************//** -Sets the page number in the nth fragment page slot. */ -UNIV_INLINE -void -fseg_set_nth_frag_page_no( -/*======================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - ulint n, /*!< in: slot index */ - ulint page_no,/*!< in: page number to set */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(inode && mtr); - ut_ad(n < FSEG_FRAG_ARR_N_SLOTS); - ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - - mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE, - page_no, MLOG_4BYTES, mtr); -} - -/**********************************************************************//** -Finds a fragment page slot which is free. -@return slot index; ULINT_UNDEFINED if none found */ -static -ulint -fseg_find_free_frag_page_slot( -/*==========================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint i; - ulint page_no; - - ut_ad(inode && mtr); - - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { - page_no = fseg_get_nth_frag_page_no(inode, i, mtr); - - if (page_no == FIL_NULL) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/**********************************************************************//** -Finds a fragment page slot which is used and last in the array. -@return slot index; ULINT_UNDEFINED if none found */ -static -ulint -fseg_find_last_used_frag_page_slot( -/*===============================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint i; - ulint page_no; - - ut_ad(inode && mtr); - - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { - page_no = fseg_get_nth_frag_page_no( - inode, FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr); - - if (page_no != FIL_NULL) { - - return(FSEG_FRAG_ARR_N_SLOTS - i - 1); - } - } - - return(ULINT_UNDEFINED); -} - -/**********************************************************************//** -Calculates reserved fragment page slots. -@return number of fragment pages */ -static -ulint -fseg_get_n_frag_pages( -/*==================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint i; - ulint count = 0; - - ut_ad(inode && mtr); - - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { - if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) { - count++; - } - } - - return(count); -} - -/**********************************************************************//** -Creates a new segment. -@return the block where the segment header is placed, x-latched, NULL -if could not create segment because of lack of space */ -UNIV_INTERN -buf_block_t* -fseg_create_general( -/*================*/ - ulint space, /*!< in: space id */ - ulint page, /*!< in: page where the segment header is placed: if - this is != 0, the page must belong to another segment, - if this is 0, a new page will be allocated and it - will belong to the created segment */ - ulint byte_offset, /*!< in: byte offset of the created segment header - on the page */ - ibool has_done_reservation, /*!< in: TRUE if the caller has already - done the reservation for the pages with - fsp_reserve_free_extents (at least 2 extents: one for - the inode and the other for the segment) then there is - no need to do the check for this individual - operation */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint flags; - ulint zip_size; - fsp_header_t* space_header; - fseg_inode_t* inode; - ib_id_t seg_id; - buf_block_t* block = 0; /* remove warning */ - fseg_header_t* header = 0; /* remove warning */ - prio_rw_lock_t* latch; - ibool success; - ulint n_reserved; - ulint i; - - ut_ad(mtr); - ut_ad(byte_offset + FSEG_HEADER_SIZE - <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END); - - latch = fil_space_get_latch(space, &flags); - zip_size = fsp_flags_get_zip_size(flags); - - if (page != 0) { - block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr); - header = byte_offset + buf_block_get_frame(block); - } - - mtr_x_lock(latch, mtr); - - if (rw_lock_get_x_lock_count(latch) == 1) { - /* This thread did not own the latch before this call: free - excess pages from the insert buffer free list */ - - if (space == IBUF_SPACE_ID) { - ibuf_free_excess_pages(); - } - } - - if (!has_done_reservation) { - success = fsp_reserve_free_extents(&n_reserved, space, 2, - FSP_NORMAL, mtr); - if (!success) { - return(NULL); - } - } - - space_header = fsp_get_space_header(space, zip_size, mtr); - - inode = fsp_alloc_seg_inode(space_header, mtr); - - if (inode == NULL) { - goto funct_exit; - } - - /* Read the next segment id from space header and increment the - value in space header */ - - seg_id = mach_read_from_8(space_header + FSP_SEG_ID); - - mlog_write_ull(space_header + FSP_SEG_ID, seg_id + 1, mtr); - - mlog_write_ull(inode + FSEG_ID, seg_id, mtr); - mlog_write_ulint(inode + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr); - - flst_init(inode + FSEG_FREE, mtr); - flst_init(inode + FSEG_NOT_FULL, mtr); - flst_init(inode + FSEG_FULL, mtr); - - mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE, - MLOG_4BYTES, mtr); - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { - fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr); - } - - if (page == 0) { - block = fseg_alloc_free_page_low(space, zip_size, - inode, 0, FSP_UP, mtr, mtr); - - if (block == NULL) { - - fsp_free_seg_inode(space, zip_size, inode, mtr); - - goto funct_exit; - } - - ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1); - - header = byte_offset + buf_block_get_frame(block); - mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr); - } - - mlog_write_ulint(header + FSEG_HDR_OFFSET, - page_offset(inode), MLOG_2BYTES, mtr); - - mlog_write_ulint(header + FSEG_HDR_PAGE_NO, - page_get_page_no(page_align(inode)), - MLOG_4BYTES, mtr); - - mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr); - -funct_exit: - if (!has_done_reservation) { - - fil_space_release_free_extents(space, n_reserved); - } - - return(block); -} - -/**********************************************************************//** -Creates a new segment. -@return the block where the segment header is placed, x-latched, NULL -if could not create segment because of lack of space */ -UNIV_INTERN -buf_block_t* -fseg_create( -/*========*/ - ulint space, /*!< in: space id */ - ulint page, /*!< in: page where the segment header is placed: if - this is != 0, the page must belong to another segment, - if this is 0, a new page will be allocated and it - will belong to the created segment */ - ulint byte_offset, /*!< in: byte offset of the created segment header - on the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - return(fseg_create_general(space, page, byte_offset, FALSE, mtr)); -} - -/**********************************************************************//** -Calculates the number of pages reserved by a segment, and how many pages are -currently used. -@return number of reserved pages */ -static -ulint -fseg_n_reserved_pages_low( -/*======================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - ulint* used, /*!< out: number of pages used (not - more than reserved) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint ret; - - ut_ad(inode && used && mtr); - ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX)); - - *used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr) - + fseg_get_n_frag_pages(inode, mtr); - - ret = fseg_get_n_frag_pages(inode, mtr) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr); - - return(ret); -} - -/**********************************************************************//** -Calculates the number of pages reserved by a segment, and how many pages are -currently used. -@return number of reserved pages */ -UNIV_INTERN -ulint -fseg_n_reserved_pages( -/*==================*/ - fseg_header_t* header, /*!< in: segment header */ - ulint* used, /*!< out: number of pages used (<= reserved) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint ret; - fseg_inode_t* inode; - ulint space; - ulint flags; - ulint zip_size; - prio_rw_lock_t* latch; - - space = page_get_space_id(page_align(header)); - latch = fil_space_get_latch(space, &flags); - zip_size = fsp_flags_get_zip_size(flags); - - mtr_x_lock(latch, mtr); - - inode = fseg_inode_get(header, space, zip_size, mtr); - - ret = fseg_n_reserved_pages_low(inode, used, mtr); - - return(ret); -} - -/*********************************************************************//** -Tries to fill the free list of a segment with consecutive free extents. -This happens if the segment is big enough to allow extents in the free list, -the free list is empty, and the extents can be allocated consecutively from -the hint onward. */ -static -void -fseg_fill_free_list( -/*================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint hint, /*!< in: hint which extent would be good as - the first extent */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - xdes_t* descr; - ulint i; - ib_id_t seg_id; - ulint reserved; - ulint used; - - ut_ad(inode && mtr); - ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - - reserved = fseg_n_reserved_pages_low(inode, &used, mtr); - - if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) { - - /* The segment is too small to allow extents in free list */ - - return; - } - - if (flst_get_len(inode + FSEG_FREE, mtr) > 0) { - /* Free list is not empty */ - - return; - } - - for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) { - descr = xdes_get_descriptor(space, zip_size, hint, mtr); - - if ((descr == NULL) - || (XDES_FREE != xdes_get_state(descr, mtr))) { - - /* We cannot allocate the desired extent: stop */ - - return; - } - - descr = fsp_alloc_free_extent(space, zip_size, hint, mtr); - - xdes_set_state(descr, XDES_FSEG, mtr); - - seg_id = mach_read_from_8(inode + FSEG_ID); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - mlog_write_ull(descr + XDES_ID, seg_id, mtr); - - flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr); - hint += FSP_EXTENT_SIZE; - } -} - -/*********************************************************************//** -Allocates a free extent for the segment: looks first in the free list of the -segment, then tries to allocate from the space free list. NOTE that the extent -returned still resides in the segment free list, it is not yet taken off it! -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -static -xdes_t* -fseg_alloc_free_extent( -/*===================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - xdes_t* descr; - ib_id_t seg_id; - fil_addr_t first; - - ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - - if (flst_get_len(inode + FSEG_FREE, mtr) > 0) { - /* Segment free list is not empty, allocate from it */ - - first = flst_get_first(inode + FSEG_FREE, mtr); - - descr = xdes_lst_get_descriptor(space, zip_size, first, mtr); - } else { - /* Segment free list was empty, allocate from space */ - descr = fsp_alloc_free_extent(space, zip_size, 0, mtr); - - if (descr == NULL) { - - return(NULL); - } - - seg_id = mach_read_from_8(inode + FSEG_ID); - - xdes_set_state(descr, XDES_FSEG, mtr); - mlog_write_ull(descr + XDES_ID, seg_id, mtr); - flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr); - - /* Try to fill the segment free list */ - fseg_fill_free_list(inode, space, zip_size, - xdes_get_offset(descr) + FSP_EXTENT_SIZE, - mtr); - } - - return(descr); -} - -/**********************************************************************//** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -static -buf_block_t* -fseg_alloc_free_page_low( -/*=====================*/ - ulint space, /*!< in: space */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - fseg_inode_t* seg_inode, /*!< in/out: segment inode */ - ulint hint, /*!< in: hint of which page would be - desirable */ - byte direction, /*!< in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction - in which the page should be initialized. - If init_mtr!=mtr, but the page is already - latched in mtr, do not initialize the page. */ -{ - fsp_header_t* space_header; - ulint space_size; - ib_id_t seg_id; - ulint used; - ulint reserved; - xdes_t* descr; /*!< extent of the hinted page */ - ulint ret_page; /*!< the allocated page offset, FIL_NULL - if could not be allocated */ - xdes_t* ret_descr; /*!< the extent of the allocated page */ - ibool success; - ulint n; - - ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR)); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - seg_id = mach_read_from_8(seg_inode + FSEG_ID); - - ut_ad(seg_id); - - reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr); - - space_header = fsp_get_space_header(space, zip_size, mtr); - - descr = xdes_get_descriptor_with_space_hdr(space_header, space, - hint, mtr); - if (descr == NULL) { - /* Hint outside space or too high above free limit: reset - hint */ - /* The file space header page is always allocated. */ - hint = 0; - descr = xdes_get_descriptor(space, zip_size, hint, mtr); - } - - /* In the big if-else below we look for ret_page and ret_descr */ - /*-------------------------------------------------------------*/ - if ((xdes_get_state(descr, mtr) == XDES_FSEG) - && mach_read_from_8(descr + XDES_ID) == seg_id - && (xdes_mtr_get_bit(descr, XDES_FREE_BIT, - hint % FSP_EXTENT_SIZE, mtr) == TRUE)) { -take_hinted_page: - /* 1. We can take the hinted page - =================================*/ - ret_descr = descr; - ret_page = hint; - /* Skip the check for extending the tablespace. If the - page hint were not within the size of the tablespace, - we would have got (descr == NULL) above and reset the hint. */ - goto got_hinted_page; - /*-----------------------------------------------------------*/ - } else if (xdes_get_state(descr, mtr) == XDES_FREE - && reserved - used < reserved / FSEG_FILLFACTOR - && used >= FSEG_FRAG_LIMIT) { - - /* 2. We allocate the free extent from space and can take - ========================================================= - the hinted page - ===============*/ - ret_descr = fsp_alloc_free_extent(space, zip_size, hint, mtr); - - ut_a(ret_descr == descr); - - xdes_set_state(ret_descr, XDES_FSEG, mtr); - mlog_write_ull(ret_descr + XDES_ID, seg_id, mtr); - flst_add_last(seg_inode + FSEG_FREE, - ret_descr + XDES_FLST_NODE, mtr); - - /* Try to fill the segment free list */ - fseg_fill_free_list(seg_inode, space, zip_size, - hint + FSP_EXTENT_SIZE, mtr); - goto take_hinted_page; - /*-----------------------------------------------------------*/ - } else if ((direction != FSP_NO_DIR) - && ((reserved - used) < reserved / FSEG_FILLFACTOR) - && (used >= FSEG_FRAG_LIMIT) - && (!!(ret_descr - = fseg_alloc_free_extent(seg_inode, - space, zip_size, mtr)))) { - - /* 3. We take any free extent (which was already assigned above - =============================================================== - in the if-condition to ret_descr) and take the lowest or - ======================================================== - highest page in it, depending on the direction - ==============================================*/ - ret_page = xdes_get_offset(ret_descr); - - if (direction == FSP_DOWN) { - ret_page += FSP_EXTENT_SIZE - 1; - } - /*-----------------------------------------------------------*/ - } else if ((xdes_get_state(descr, mtr) == XDES_FSEG) - && mach_read_from_8(descr + XDES_ID) == seg_id - && (!xdes_is_full(descr, mtr))) { - - /* 4. We can take the page from the same extent as the - ====================================================== - hinted page (and the extent already belongs to the - ================================================== - segment) - ========*/ - ret_descr = descr; - ret_page = xdes_get_offset(ret_descr) - + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, - hint % FSP_EXTENT_SIZE, mtr); - /*-----------------------------------------------------------*/ - } else if (reserved - used > 0) { - /* 5. We take any unused page from the segment - ==============================================*/ - fil_addr_t first; - - if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) { - first = flst_get_first(seg_inode + FSEG_NOT_FULL, - mtr); - } else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) { - first = flst_get_first(seg_inode + FSEG_FREE, mtr); - } else { - ut_error; - return(NULL); - } - - ret_descr = xdes_lst_get_descriptor(space, zip_size, - first, mtr); - ret_page = xdes_get_offset(ret_descr) - + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, - 0, mtr); - /*-----------------------------------------------------------*/ - } else if (used < FSEG_FRAG_LIMIT) { - /* 6. We allocate an individual page from the space - ===================================================*/ - buf_block_t* block = fsp_alloc_free_page( - space, zip_size, hint, mtr, init_mtr); - - if (block != NULL) { - /* Put the page in the fragment page array of the - segment */ - n = fseg_find_free_frag_page_slot(seg_inode, mtr); - ut_a(n != ULINT_UNDEFINED); - - fseg_set_nth_frag_page_no( - seg_inode, n, buf_block_get_page_no(block), - mtr); - } - - /* fsp_alloc_free_page() invoked fsp_init_file_page() - already. */ - return(block); - /*-----------------------------------------------------------*/ - } else { - /* 7. We allocate a new extent and take its first page - ======================================================*/ - ret_descr = fseg_alloc_free_extent(seg_inode, - space, zip_size, mtr); - - if (ret_descr == NULL) { - ret_page = FIL_NULL; - } else { - ret_page = xdes_get_offset(ret_descr); - } - } - - if (ret_page == FIL_NULL) { - /* Page could not be allocated */ - - return(NULL); - } - - if (space != 0) { - space_size = fil_space_get_size(space); - - if (space_size <= ret_page) { - /* It must be that we are extending a single-table - tablespace whose size is still < 64 pages */ - - if (ret_page >= FSP_EXTENT_SIZE) { - fprintf(stderr, - "InnoDB: Error (2): trying to extend" - " a single-table tablespace %lu\n" - "InnoDB: by single page(s) though" - " the space size %lu. Page no %lu.\n", - (ulong) space, (ulong) space_size, - (ulong) ret_page); - return(NULL); - } - - success = fsp_try_extend_data_file_with_pages( - space, ret_page, space_header, mtr); - if (!success) { - /* No disk space left */ - return(NULL); - } - } - } - -got_hinted_page: - /* ret_descr == NULL if the block was allocated from free_frag - (XDES_FREE_FRAG) */ - if (ret_descr != NULL) { - /* At this point we know the extent and the page offset. - The extent is still in the appropriate list (FSEG_NOT_FULL - or FSEG_FREE), and the page is not yet marked as used. */ - - ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr) - == ret_descr); - - ut_ad(xdes_mtr_get_bit( - ret_descr, XDES_FREE_BIT, - ret_page % FSP_EXTENT_SIZE, mtr)); - - fseg_mark_page_used(seg_inode, ret_page, ret_descr, mtr); - } - - return(fsp_page_create( - space, fsp_flags_get_zip_size( - mach_read_from_4(FSP_SPACE_FLAGS - + space_header)), - ret_page, mtr, init_mtr)); -} - -/**********************************************************************//** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -UNIV_INTERN -buf_block_t* -fseg_alloc_free_page_general( -/*=========================*/ - fseg_header_t* seg_header,/*!< in/out: segment header */ - ulint hint, /*!< in: hint of which page would be - desirable */ - byte direction,/*!< in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - ibool has_done_reservation, /*!< in: TRUE if the caller has - already done the reservation for the page - with fsp_reserve_free_extents, then there - is no need to do the check for this individual - page */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction - in which the page should be initialized. - If init_mtr!=mtr, but the page is already - latched in mtr, do not initialize the page. */ -{ - fseg_inode_t* inode; - ulint space; - ulint flags; - ulint zip_size; - prio_rw_lock_t* latch; - buf_block_t* block; - ulint n_reserved; - - space = page_get_space_id(page_align(seg_header)); - - latch = fil_space_get_latch(space, &flags); - - zip_size = fsp_flags_get_zip_size(flags); - - mtr_x_lock(latch, mtr); - - if (rw_lock_get_x_lock_count(latch) == 1) { - /* This thread did not own the latch before this call: free - excess pages from the insert buffer free list */ - - if (space == IBUF_SPACE_ID) { - ibuf_free_excess_pages(); - } - } - - inode = fseg_inode_get(seg_header, space, zip_size, mtr); - - if (!has_done_reservation - && !fsp_reserve_free_extents(&n_reserved, space, 2, - FSP_NORMAL, mtr)) { - return(NULL); - } - - block = fseg_alloc_free_page_low(space, zip_size, - inode, hint, direction, - mtr, init_mtr); - if (!has_done_reservation) { - fil_space_release_free_extents(space, n_reserved); - } - - return(block); -} - -/**********************************************************************//** -Checks that we have at least 2 frag pages free in the first extent of a -single-table tablespace, and they are also physically initialized to the data -file. That is we have already extended the data file so that those pages are -inside the data file. If not, this function extends the tablespace with -pages. -@return TRUE if there were >= 3 free pages, or we were able to extend */ -static -ibool -fsp_reserve_free_pages( -/*===================*/ - ulint space, /*!< in: space id, must be != 0 */ - fsp_header_t* space_header, /*!< in: header of that space, - x-latched */ - ulint size, /*!< in: size of the tablespace in - pages, must be < FSP_EXTENT_SIZE */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - xdes_t* descr; - ulint n_used; - - ut_a(space != 0); - ut_a(size < FSP_EXTENT_SIZE); - - descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0, - mtr); - n_used = xdes_get_n_used(descr, mtr); - - ut_a(n_used <= size); - - if (size >= n_used + 2) { - - return(TRUE); - } - - return(fsp_try_extend_data_file_with_pages(space, n_used + 1, - space_header, mtr)); -} - -/**********************************************************************//** -Reserves free pages from a tablespace. All mini-transactions which may -use several pages from the tablespace should call this function beforehand -and reserve enough free extents so that they certainly will be able -to do their operation, like a B-tree page split, fully. Reservations -must be released with function fil_space_release_free_extents! - -The alloc_type below has the following meaning: FSP_NORMAL means an -operation which will probably result in more space usage, like an -insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are -deleting rows, then this allocation will in the long run result in -less space usage (after a purge); FSP_CLEANING means allocation done -in a physical record delete (like in a purge) or other cleaning operation -which will result in less space usage in the long run. We prefer the latter -two types of allocation: when space is scarce, FSP_NORMAL allocations -will not succeed, but the latter two allocations will succeed, if possible. -The purpose is to avoid dead end where the database is full but the -user cannot free any space because these freeing operations temporarily -reserve some space. - -Single-table tablespaces whose size is < 32 pages are a special case. In this -function we would liberally reserve several 64 page extents for every page -split or merge in a B-tree. But we do not want to waste disk space if the table -only occupies < 32 pages. That is why we apply different rules in that special -case, just ensuring that there are 3 free pages available. -@return TRUE if we were able to make the reservation */ -UNIV_INTERN -ibool -fsp_reserve_free_extents( -/*=====================*/ - ulint* n_reserved,/*!< out: number of extents actually reserved; if we - return TRUE and the tablespace size is < 64 pages, - then this can be 0, otherwise it is n_ext */ - ulint space, /*!< in: space id */ - ulint n_ext, /*!< in: number of extents to reserve */ - ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fsp_header_t* space_header; - prio_rw_lock_t* latch; - ulint n_free_list_ext; - ulint free_limit; - ulint size; - ulint flags; - ulint zip_size; - ulint n_free; - ulint n_free_up; - ulint reserve; - ibool success; - ulint n_pages_added; - size_t total_reserved = 0; - - ut_ad(mtr); - *n_reserved = n_ext; - - latch = fil_space_get_latch(space, &flags); - zip_size = fsp_flags_get_zip_size(flags); - - mtr_x_lock(latch, mtr); - - space_header = fsp_get_space_header(space, zip_size, mtr); -try_again: - size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr); - - if (size < FSP_EXTENT_SIZE / 2) { - /* Use different rules for small single-table tablespaces */ - *n_reserved = 0; - return(fsp_reserve_free_pages(space, space_header, size, mtr)); - } - - n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr); - - free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT, - MLOG_4BYTES, mtr); - - /* Below we play safe when counting free extents above the free limit: - some of them will contain extent descriptor pages, and therefore - will not be free extents */ - - n_free_up = (size - free_limit) / FSP_EXTENT_SIZE; - - if (n_free_up > 0) { - n_free_up--; - if (!zip_size) { - n_free_up -= n_free_up - / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE); - } else { - n_free_up -= n_free_up - / (zip_size / FSP_EXTENT_SIZE); - } - } - - n_free = n_free_list_ext + n_free_up; - - if (alloc_type == FSP_NORMAL) { - /* We reserve 1 extent + 0.5 % of the space size to undo logs - and 1 extent + 0.5 % to cleaning operations; NOTE: this source - code is duplicated in the function below! */ - - reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200; - - if (n_free <= reserve + n_ext) { - - goto try_to_extend; - } - } else if (alloc_type == FSP_UNDO) { - /* We reserve 0.5 % of the space size to cleaning operations */ - - reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200; - - if (n_free <= reserve + n_ext) { - - goto try_to_extend; - } - } else { - ut_a(alloc_type == FSP_CLEANING); - reserve = 0; - } - - success = fil_space_reserve_free_extents(space, n_free, n_ext); - *n_reserved = n_ext; - - if (success) { - return(TRUE); - } -try_to_extend: - success = fsp_try_extend_data_file(&n_pages_added, space, - space_header, mtr); - if (success && n_pages_added > 0) { - total_reserved += n_pages_added; - goto try_again; - } - - return(FALSE); -} - -/**********************************************************************//** -This function should be used to get information on how much we still -will be able to insert new data to the database without running out the -tablespace. Only free extents are taken into account and we also subtract -the safety margin required by the above function fsp_reserve_free_extents. -@return available space in kB */ -UNIV_INTERN -ullint -fsp_get_available_space_in_free_extents( -/*====================================*/ - ulint space) /*!< in: space id */ -{ - fsp_header_t* space_header; - ulint n_free_list_ext; - ulint free_limit; - ulint size; - ulint flags; - ulint zip_size; - ulint n_free; - ulint n_free_up; - ulint reserve; - prio_rw_lock_t* latch; - mtr_t mtr; - - /* The convoluted mutex acquire is to overcome latching order - issues: The problem is that the fil_mutex is at a lower level - than the tablespace latch and the buffer pool mutexes. We have to - first prevent any operations on the file system by acquiring the - dictionary mutex. Then acquire the tablespace latch to obey the - latching order and then release the dictionary mutex. That way we - ensure that the tablespace instance can't be freed while we are - examining its contents (see fil_space_free()). - - However, there is one further complication, we release the fil_mutex - when we need to invalidate the the pages in the buffer pool and we - reacquire the fil_mutex when deleting and freeing the tablespace - instance in fil0fil.cc. Here we need to account for that situation - too. */ - - mutex_enter(&dict_sys->mutex); - - /* At this stage there is no guarantee that the tablespace even - exists in the cache. */ - - if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) { - - mutex_exit(&dict_sys->mutex); - - return(ULLINT_UNDEFINED); - } - - mtr_start(&mtr); - - latch = fil_space_get_latch(space, &flags); - - /* This should ensure that the tablespace instance can't be freed - by another thread. However, the tablespace pages can still be freed - from the buffer pool. We need to check for that again. */ - - zip_size = fsp_flags_get_zip_size(flags); - - mtr_x_lock(latch, &mtr); - - mutex_exit(&dict_sys->mutex); - - /* At this point it is possible for the tablespace to be deleted and - its pages removed from the buffer pool. We need to check for that - situation. However, the tablespace instance can't be deleted because - our latching above should ensure that. */ - - if (fil_tablespace_is_being_deleted(space)) { - - mtr_commit(&mtr); - - return(ULLINT_UNDEFINED); - } - - /* From here on even if the user has dropped the tablespace, the - pages _must_ still exist in the buffer pool and the tablespace - instance _must_ be in the file system hash table. */ - - space_header = fsp_get_space_header(space, zip_size, &mtr); - - size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr); - - n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr); - - free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT, - MLOG_4BYTES, &mtr); - mtr_commit(&mtr); - - if (size < FSP_EXTENT_SIZE) { - ut_a(space != 0); /* This must be a single-table - tablespace */ - - return(0); /* TODO: count free frag pages and - return a value based on that */ - } - - /* Below we play safe when counting free extents above the free limit: - some of them will contain extent descriptor pages, and therefore - will not be free extents */ - - n_free_up = (size - free_limit) / FSP_EXTENT_SIZE; - - if (n_free_up > 0) { - n_free_up--; - if (!zip_size) { - n_free_up -= n_free_up - / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE); - } else { - n_free_up -= n_free_up - / (zip_size / FSP_EXTENT_SIZE); - } - } - - n_free = n_free_list_ext + n_free_up; - - /* We reserve 1 extent + 0.5 % of the space size to undo logs - and 1 extent + 0.5 % to cleaning operations; NOTE: this source - code is duplicated in the function above! */ - - reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200; - - if (reserve > n_free) { - return(0); - } - - if (!zip_size) { - return((ullint) (n_free - reserve) - * FSP_EXTENT_SIZE - * (UNIV_PAGE_SIZE / 1024)); - } else { - return((ullint) (n_free - reserve) - * FSP_EXTENT_SIZE - * (zip_size / 1024)); - } -} - -/********************************************************************//** -Marks a page used. The page must reside within the extents of the given -segment. */ -static MY_ATTRIBUTE((nonnull)) -void -fseg_mark_page_used( -/*================*/ - fseg_inode_t* seg_inode,/*!< in: segment inode */ - ulint page, /*!< in: page offset */ - xdes_t* descr, /*!< in: extent descriptor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint not_full_n_used; - - ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - - ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr) - == mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr)); - - if (xdes_is_free(descr, mtr)) { - /* We move the extent from the free list to the - NOT_FULL list */ - flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE, - mtr); - flst_add_last(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - } - - ut_ad(xdes_mtr_get_bit( - descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)); - - /* We mark the page as used */ - xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr); - - not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - MLOG_4BYTES, mtr); - not_full_n_used++; - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used, - MLOG_4BYTES, mtr); - if (xdes_is_full(descr, mtr)) { - /* We move the extent from the NOT_FULL list to the - FULL list */ - flst_remove(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - flst_add_last(seg_inode + FSEG_FULL, - descr + XDES_FLST_NODE, mtr); - - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used - FSP_EXTENT_SIZE, - MLOG_4BYTES, mtr); - } -} - -/**********************************************************************//** -Frees a single page of a segment. */ -static -void -fseg_free_page_low( -/*===============*/ - fseg_inode_t* seg_inode, /*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page, /*!< in: page offset */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - xdes_t* descr; - ulint not_full_n_used; - ulint state; - ib_id_t descr_id; - ib_id_t seg_id; - ulint i; - - ut_ad(seg_inode != NULL); - ut_ad(mtr != NULL); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - - /* Drop search system page hash index if the page is found in - the pool and is hashed */ - - btr_search_drop_page_hash_when_freed(space, zip_size, page); - - descr = xdes_get_descriptor(space, zip_size, page, mtr); - - SRV_CORRUPT_TABLE_CHECK(descr, - { - /* The page may be corrupt. pass it. */ - return; - }); - - if (xdes_mtr_get_bit(descr, XDES_FREE_BIT, - page % FSP_EXTENT_SIZE, mtr)) { - fputs("InnoDB: Dump of the tablespace extent descriptor: ", - stderr); - ut_print_buf(stderr, descr, 40); - - fprintf(stderr, "\n" - "InnoDB: Serious error! InnoDB is trying to" - " free page %lu\n" - "InnoDB: though it is already marked as free" - " in the tablespace!\n" - "InnoDB: The tablespace free space info is corrupt.\n" - "InnoDB: You may need to dump your" - " InnoDB tables and recreate the whole\n" - "InnoDB: database!\n", (ulong) page); -crash: - fputs("InnoDB: Please refer to\n" - "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" - "InnoDB: about forcing recovery.\n", stderr); - ut_error; - } - - state = xdes_get_state(descr, mtr); - - if (state != XDES_FSEG) { - /* The page is in the fragment pages of the segment */ - - for (i = 0;; i++) { - if (fseg_get_nth_frag_page_no(seg_inode, i, mtr) - == page) { - - fseg_set_nth_frag_page_no(seg_inode, i, - FIL_NULL, mtr); - break; - } - } - - fsp_free_page(space, zip_size, page, mtr); - - return; - } - - /* If we get here, the page is in some extent of the segment */ - - descr_id = mach_read_from_8(descr + XDES_ID); - seg_id = mach_read_from_8(seg_inode + FSEG_ID); -#if 0 - fprintf(stderr, - "InnoDB: InnoDB is freeing space %lu page %lu,\n" - "InnoDB: which belongs to descr seg %llu\n" - "InnoDB: segment %llu.\n", - (ulong) space, (ulong) page, - (ullint) descr_id, - (ullint) seg_id); -#endif /* 0 */ - if (UNIV_UNLIKELY(descr_id != seg_id)) { - fputs("InnoDB: Dump of the tablespace extent descriptor: ", - stderr); - ut_print_buf(stderr, descr, 40); - fputs("\nInnoDB: Dump of the segment inode: ", stderr); - ut_print_buf(stderr, seg_inode, 40); - putc('\n', stderr); - - fprintf(stderr, - "InnoDB: Serious error: InnoDB is trying to" - " free space %lu page %lu,\n" - "InnoDB: which does not belong to" - " segment %llu but belongs\n" - "InnoDB: to segment %llu.\n", - (ulong) space, (ulong) page, - (ullint) descr_id, - (ullint) seg_id); - goto crash; - } - - not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - MLOG_4BYTES, mtr); - if (xdes_is_full(descr, mtr)) { - /* The fragment is full: move it to another list */ - flst_remove(seg_inode + FSEG_FULL, - descr + XDES_FLST_NODE, mtr); - flst_add_last(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used + FSP_EXTENT_SIZE - 1, - MLOG_4BYTES, mtr); - } else { - ut_a(not_full_n_used > 0); - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used - 1, MLOG_4BYTES, mtr); - } - - xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - - if (xdes_is_free(descr, mtr)) { - /* The extent has become free: free it to space */ - flst_remove(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - fsp_free_extent(space, zip_size, page, mtr); - } - - mtr->n_freed_pages++; -} - -/**********************************************************************//** -Frees a single page of a segment. */ -UNIV_INTERN -void -fseg_free_page( -/*===========*/ - fseg_header_t* seg_header, /*!< in: segment header */ - ulint space, /*!< in: space id */ - ulint page, /*!< in: page offset */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint flags; - ulint zip_size; - fseg_inode_t* seg_inode; - prio_rw_lock_t* latch; - - latch = fil_space_get_latch(space, &flags); - zip_size = fsp_flags_get_zip_size(flags); - - mtr_x_lock(latch, mtr); - - seg_inode = fseg_inode_get(seg_header, space, zip_size, mtr); - - fseg_free_page_low(seg_inode, space, zip_size, page, mtr); - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - buf_page_set_file_page_was_freed(space, page); -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ -} - -/**********************************************************************//** -Checks if a single page of a segment is free. -@return true if free */ -UNIV_INTERN -bool -fseg_page_is_free( -/*==============*/ - fseg_header_t* seg_header, /*!< in: segment header */ - ulint space, /*!< in: space id */ - ulint page) /*!< in: page offset */ -{ - mtr_t mtr; - ibool is_free; - ulint flags; - prio_rw_lock_t* latch; - xdes_t* descr; - ulint zip_size; - fseg_inode_t* seg_inode; - - latch = fil_space_get_latch(space, &flags); - zip_size = dict_tf_get_zip_size(flags); - - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - seg_inode = fseg_inode_get(seg_header, space, zip_size, &mtr); - - ut_a(seg_inode); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); - - descr = xdes_get_descriptor(space, zip_size, page, &mtr); - ut_a(descr); - - is_free = xdes_mtr_get_bit( - descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr); - - mtr_commit(&mtr); - - return(is_free); -} - -/**********************************************************************//** -Frees an extent of a segment to the space free list. */ -static -void -fseg_free_extent( -/*=============*/ - fseg_inode_t* seg_inode, /*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page, /*!< in: a page in the extent */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint first_page_in_extent; - xdes_t* descr; - ulint not_full_n_used; - ulint descr_n_used; - ulint i; - - ut_ad(seg_inode != NULL); - ut_ad(mtr != NULL); - - descr = xdes_get_descriptor(space, zip_size, page, mtr); - - ut_a(xdes_get_state(descr, mtr) == XDES_FSEG); - ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8)); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - - first_page_in_extent = page - (page % FSP_EXTENT_SIZE); - - for (i = 0; i < FSP_EXTENT_SIZE; i++) { - if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) { - - /* Drop search system page hash index if the page is - found in the pool and is hashed */ - - btr_search_drop_page_hash_when_freed( - space, zip_size, first_page_in_extent + i); - } - } - - if (xdes_is_full(descr, mtr)) { - flst_remove(seg_inode + FSEG_FULL, - descr + XDES_FLST_NODE, mtr); - } else if (xdes_is_free(descr, mtr)) { - flst_remove(seg_inode + FSEG_FREE, - descr + XDES_FLST_NODE, mtr); - } else { - flst_remove(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - - not_full_n_used = mtr_read_ulint( - seg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr); - - descr_n_used = xdes_get_n_used(descr, mtr); - ut_a(not_full_n_used >= descr_n_used); - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used - descr_n_used, - MLOG_4BYTES, mtr); - } - - fsp_free_extent(space, zip_size, page, mtr); - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - for (i = 0; i < FSP_EXTENT_SIZE; i++) { - - buf_page_set_file_page_was_freed(space, - first_page_in_extent + i); - } -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ -} - -/**********************************************************************//** -Frees part of a segment. This function can be used to free a segment by -repeatedly calling this function in different mini-transactions. Doing -the freeing in a single mini-transaction might result in too big a -mini-transaction. -@return TRUE if freeing completed */ -UNIV_INTERN -ibool -fseg_free_step( -/*===========*/ - fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header - resides on the first page of the frag list - of the segment, this pointer becomes obsolete - after the last freeing step */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint n; - ulint page; - xdes_t* descr; - fseg_inode_t* inode; - ulint space; - ulint flags; - ulint zip_size; - ulint header_page; - prio_rw_lock_t* latch; - - space = page_get_space_id(page_align(header)); - header_page = page_get_page_no(page_align(header)); - - latch = fil_space_get_latch(space, &flags); - zip_size = fsp_flags_get_zip_size(flags); - - mtr_x_lock(latch, mtr); - - descr = xdes_get_descriptor(space, zip_size, header_page, mtr); - - SRV_CORRUPT_TABLE_CHECK(descr, - { - /* The page may be corrupt. pass it. */ - return(TRUE); - }); - - /* Check that the header resides on a page which has not been - freed yet */ - - ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, - header_page % FSP_EXTENT_SIZE, mtr) == FALSE); - - inode = fseg_inode_try_get(header, space, zip_size, mtr); - - if (UNIV_UNLIKELY(inode == NULL)) { - fprintf(stderr, "double free of inode from %u:%u\n", - (unsigned) space, (unsigned) header_page); - return(TRUE); - } - - descr = fseg_get_first_extent(inode, space, zip_size, mtr); - - if (descr != NULL) { - /* Free the extent held by the segment */ - page = xdes_get_offset(descr); - - fseg_free_extent(inode, space, zip_size, page, mtr); - - return(FALSE); - } - - /* Free a frag page */ - n = fseg_find_last_used_frag_page_slot(inode, mtr); - - if (n == ULINT_UNDEFINED) { - /* Freeing completed: free the segment inode */ - fsp_free_seg_inode(space, zip_size, inode, mtr); - - return(TRUE); - } - - fseg_free_page_low(inode, space, zip_size, - fseg_get_nth_frag_page_no(inode, n, mtr), mtr); - - n = fseg_find_last_used_frag_page_slot(inode, mtr); - - if (n == ULINT_UNDEFINED) { - /* Freeing completed: free the segment inode */ - fsp_free_seg_inode(space, zip_size, inode, mtr); - - return(TRUE); - } - - return(FALSE); -} - -/**********************************************************************//** -Frees part of a segment. Differs from fseg_free_step because this function -leaves the header page unfreed. -@return TRUE if freeing completed, except the header page */ -UNIV_INTERN -ibool -fseg_free_step_not_header( -/*======================*/ - fseg_header_t* header, /*!< in: segment header which must reside on - the first fragment page of the segment */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint n; - ulint page; - xdes_t* descr; - fseg_inode_t* inode; - ulint space; - ulint flags; - ulint zip_size; - ulint page_no; - prio_rw_lock_t* latch; - - space = page_get_space_id(page_align(header)); - - latch = fil_space_get_latch(space, &flags); - zip_size = fsp_flags_get_zip_size(flags); - - mtr_x_lock(latch, mtr); - - inode = fseg_inode_get(header, space, zip_size, mtr); - - SRV_CORRUPT_TABLE_CHECK(inode, - { - /* ignore the corruption */ - return(TRUE); - }); - - descr = fseg_get_first_extent(inode, space, zip_size, mtr); - - if (descr != NULL) { - /* Free the extent held by the segment */ - page = xdes_get_offset(descr); - - fseg_free_extent(inode, space, zip_size, page, mtr); - - return(FALSE); - } - - /* Free a frag page */ - - n = fseg_find_last_used_frag_page_slot(inode, mtr); - - if (n == ULINT_UNDEFINED) { - ut_error; - } - - page_no = fseg_get_nth_frag_page_no(inode, n, mtr); - - if (page_no == page_get_page_no(page_align(header))) { - - return(TRUE); - } - - fseg_free_page_low(inode, space, zip_size, page_no, mtr); - - return(FALSE); -} - -/**********************************************************************//** -Returns the first extent descriptor for a segment. We think of the extent -lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL --> FSEG_FREE. -@return the first extent descriptor, or NULL if none */ -static -xdes_t* -fseg_get_first_extent( -/*==================*/ - fseg_inode_t* inode, /*!< in: segment inode */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fil_addr_t first; - xdes_t* descr; - - ut_ad(inode && mtr); - - ut_ad(space == page_get_space_id(page_align(inode))); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - - first = fil_addr_null; - - if (flst_get_len(inode + FSEG_FULL, mtr) > 0) { - - first = flst_get_first(inode + FSEG_FULL, mtr); - - } else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) { - - first = flst_get_first(inode + FSEG_NOT_FULL, mtr); - - } else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) { - - first = flst_get_first(inode + FSEG_FREE, mtr); - } - - if (first.page == FIL_NULL) { - - return(NULL); - } - descr = xdes_lst_get_descriptor(space, zip_size, first, mtr); - - return(descr); -} - -/*******************************************************************//** -Validates a segment. -@return TRUE if ok */ -static -ibool -fseg_validate_low( -/*==============*/ - fseg_inode_t* inode, /*!< in: segment inode */ - mtr_t* mtr2) /*!< in/out: mini-transaction */ -{ - ulint space; - ib_id_t seg_id; - mtr_t mtr; - xdes_t* descr; - fil_addr_t node_addr; - ulint n_used = 0; - ulint n_used2 = 0; - - ut_ad(mtr_memo_contains_page(mtr2, inode, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - - space = page_get_space_id(page_align(inode)); - - seg_id = mach_read_from_8(inode + FSEG_ID); - n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, - MLOG_4BYTES, mtr2); - flst_validate(inode + FSEG_FREE, mtr2); - flst_validate(inode + FSEG_NOT_FULL, mtr2); - flst_validate(inode + FSEG_FULL, mtr2); - - /* Validate FSEG_FREE list */ - node_addr = flst_get_first(inode + FSEG_FREE, mtr2); - - while (!fil_addr_is_null(node_addr)) { - ulint flags; - ulint zip_size; - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space, &flags), &mtr); - zip_size = fsp_flags_get_zip_size(flags); - - descr = xdes_lst_get_descriptor(space, zip_size, - node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) == 0); - ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG); - ut_a(mach_read_from_8(descr + XDES_ID) == seg_id); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - /* Validate FSEG_NOT_FULL list */ - - node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2); - - while (!fil_addr_is_null(node_addr)) { - ulint flags; - ulint zip_size; - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space, &flags), &mtr); - zip_size = fsp_flags_get_zip_size(flags); - - descr = xdes_lst_get_descriptor(space, zip_size, - node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) > 0); - ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG); - ut_a(mach_read_from_8(descr + XDES_ID) == seg_id); - - n_used2 += xdes_get_n_used(descr, &mtr); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - /* Validate FSEG_FULL list */ - - node_addr = flst_get_first(inode + FSEG_FULL, mtr2); - - while (!fil_addr_is_null(node_addr)) { - ulint flags; - ulint zip_size; - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space, &flags), &mtr); - zip_size = fsp_flags_get_zip_size(flags); - - descr = xdes_lst_get_descriptor(space, zip_size, - node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG); - ut_a(mach_read_from_8(descr + XDES_ID) == seg_id); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - ut_a(n_used == n_used2); - - return(TRUE); -} - -#ifdef UNIV_DEBUG -/*******************************************************************//** -Validates a segment. -@return TRUE if ok */ -UNIV_INTERN -ibool -fseg_validate( -/*==========*/ - fseg_header_t* header, /*!< in: segment header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fseg_inode_t* inode; - ibool ret; - ulint space; - ulint flags; - ulint zip_size; - - space = page_get_space_id(page_align(header)); - - mtr_x_lock(fil_space_get_latch(space, &flags), mtr); - zip_size = fsp_flags_get_zip_size(flags); - - inode = fseg_inode_get(header, space, zip_size, mtr); - - ret = fseg_validate_low(inode, mtr); - - return(ret); -} -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -Writes info of a segment. */ -static -void -fseg_print_low( -/*===========*/ - fseg_inode_t* inode, /*!< in: segment inode */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint space; - ulint n_used; - ulint n_frag; - ulint n_free; - ulint n_not_full; - ulint n_full; - ulint reserved; - ulint used; - ulint page_no; - ib_id_t seg_id; - - ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX)); - space = page_get_space_id(page_align(inode)); - page_no = page_get_page_no(page_align(inode)); - - reserved = fseg_n_reserved_pages_low(inode, &used, mtr); - - seg_id = mach_read_from_8(inode + FSEG_ID); - - n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, - MLOG_4BYTES, mtr); - n_frag = fseg_get_n_frag_pages(inode, mtr); - n_free = flst_get_len(inode + FSEG_FREE, mtr); - n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr); - n_full = flst_get_len(inode + FSEG_FULL, mtr); - - fprintf(stderr, - "SEGMENT id %llu space %lu; page %lu;" - " res %lu used %lu; full ext %lu\n" - "fragm pages %lu; free extents %lu;" - " not full extents %lu: pages %lu\n", - (ullint) seg_id, - (ulong) space, (ulong) page_no, - (ulong) reserved, (ulong) used, (ulong) n_full, - (ulong) n_frag, (ulong) n_free, (ulong) n_not_full, - (ulong) n_used); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); -} - -#ifdef UNIV_BTR_PRINT -/*******************************************************************//** -Writes info of a segment. */ -UNIV_INTERN -void -fseg_print( -/*=======*/ - fseg_header_t* header, /*!< in: segment header */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - fseg_inode_t* inode; - ulint space; - ulint flags; - ulint zip_size; - - space = page_get_space_id(page_align(header)); - - mtr_x_lock(fil_space_get_latch(space, &flags), mtr); - zip_size = fsp_flags_get_zip_size(flags); - - inode = fseg_inode_get(header, space, zip_size, mtr); - - fseg_print_low(inode, mtr); -} -#endif /* UNIV_BTR_PRINT */ - -/*******************************************************************//** -Validates the file space system and its segments. -@return TRUE if ok */ -UNIV_INTERN -ibool -fsp_validate( -/*=========*/ - ulint space) /*!< in: space id */ -{ - fsp_header_t* header; - fseg_inode_t* seg_inode; - page_t* seg_inode_page; - prio_rw_lock_t* latch; - ulint size; - ulint flags; - ulint zip_size; - ulint free_limit; - ulint frag_n_used; - mtr_t mtr; - mtr_t mtr2; - xdes_t* descr; - fil_addr_t node_addr; - fil_addr_t next_node_addr; - ulint descr_count = 0; - ulint n_used = 0; - ulint n_used2 = 0; - ulint n_full_frag_pages; - ulint n; - ulint seg_inode_len_free; - ulint seg_inode_len_full; - - latch = fil_space_get_latch(space, &flags); - zip_size = fsp_flags_get_zip_size(flags); - ut_a(ut_is_2pow(zip_size)); - ut_a(zip_size <= UNIV_ZIP_SIZE_MAX); - ut_a(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN); - - /* Start first a mini-transaction mtr2 to lock out all other threads - from the fsp system */ - mtr_start(&mtr2); - mtr_x_lock(latch, &mtr2); - - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr); - free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, - MLOG_4BYTES, &mtr); - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, - MLOG_4BYTES, &mtr); - - n_full_frag_pages = FSP_EXTENT_SIZE - * flst_get_len(header + FSP_FULL_FRAG, &mtr); - - if (UNIV_UNLIKELY(free_limit > size)) { - - ut_a(space != 0); - ut_a(size < FSP_EXTENT_SIZE); - } - - flst_validate(header + FSP_FREE, &mtr); - flst_validate(header + FSP_FREE_FRAG, &mtr); - flst_validate(header + FSP_FULL_FRAG, &mtr); - - mtr_commit(&mtr); - - /* Validate FSP_FREE list */ - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - node_addr = flst_get_first(header + FSP_FREE, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - descr_count++; - descr = xdes_lst_get_descriptor(space, zip_size, - node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) == 0); - ut_a(xdes_get_state(descr, &mtr) == XDES_FREE); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - /* Validate FSP_FREE_FRAG list */ - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - descr_count++; - descr = xdes_lst_get_descriptor(space, zip_size, - node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) > 0); - ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG); - - n_used += xdes_get_n_used(descr, &mtr); - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - - mtr_commit(&mtr); - } - - /* Validate FSP_FULL_FRAG list */ - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - descr_count++; - descr = xdes_lst_get_descriptor(space, zip_size, - node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - /* Validate segments */ - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - - node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr); - - seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - - n = 0; - do { - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - seg_inode_page = fut_get_ptr( - space, zip_size, node_addr, RW_X_LATCH, &mtr) - - FSEG_INODE_PAGE_NODE; - - seg_inode = fsp_seg_inode_page_get_nth_inode( - seg_inode_page, n, zip_size, &mtr); - ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0); - fseg_validate_low(seg_inode, &mtr); - - descr_count += flst_get_len(seg_inode + FSEG_FREE, - &mtr); - descr_count += flst_get_len(seg_inode + FSEG_FULL, - &mtr); - descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL, - &mtr); - - n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr); - - next_node_addr = flst_get_next_addr( - seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr); - mtr_commit(&mtr); - } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size)); - - node_addr = next_node_addr; - } - - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - - node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr); - - seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - - n = 0; - - do { - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - seg_inode_page = fut_get_ptr( - space, zip_size, node_addr, RW_X_LATCH, &mtr) - - FSEG_INODE_PAGE_NODE; - - seg_inode = fsp_seg_inode_page_get_nth_inode( - seg_inode_page, n, zip_size, &mtr); - if (mach_read_from_8(seg_inode + FSEG_ID)) { - fseg_validate_low(seg_inode, &mtr); - - descr_count += flst_get_len( - seg_inode + FSEG_FREE, &mtr); - descr_count += flst_get_len( - seg_inode + FSEG_FULL, &mtr); - descr_count += flst_get_len( - seg_inode + FSEG_NOT_FULL, &mtr); - n_used2 += fseg_get_n_frag_pages( - seg_inode, &mtr); - } - - next_node_addr = flst_get_next_addr( - seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr); - mtr_commit(&mtr); - } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size)); - - node_addr = next_node_addr; - } - - ut_a(descr_count * FSP_EXTENT_SIZE == free_limit); - if (!zip_size) { - ut_a(n_used + n_full_frag_pages - == n_used2 + 2 * ((free_limit + (UNIV_PAGE_SIZE - 1)) - / UNIV_PAGE_SIZE) - + seg_inode_len_full + seg_inode_len_free); - } else { - ut_a(n_used + n_full_frag_pages - == n_used2 + 2 * ((free_limit + (zip_size - 1)) - / zip_size) - + seg_inode_len_full + seg_inode_len_free); - } - ut_a(frag_n_used == n_used); - - mtr_commit(&mtr2); - - return(TRUE); -} - -/*******************************************************************//** -Prints info of a file space. */ -UNIV_INTERN -void -fsp_print( -/*======*/ - ulint space) /*!< in: space id */ -{ - fsp_header_t* header; - fseg_inode_t* seg_inode; - page_t* seg_inode_page; - prio_rw_lock_t* latch; - ulint flags; - ulint zip_size; - ulint size; - ulint free_limit; - ulint frag_n_used; - fil_addr_t node_addr; - fil_addr_t next_node_addr; - ulint n_free; - ulint n_free_frag; - ulint n_full_frag; - ib_id_t seg_id; - ulint n; - ulint n_segs = 0; - mtr_t mtr; - mtr_t mtr2; - - latch = fil_space_get_latch(space, &flags); - zip_size = fsp_flags_get_zip_size(flags); - - /* Start first a mini-transaction mtr2 to lock out all other threads - from the fsp system */ - - mtr_start(&mtr2); - - mtr_x_lock(latch, &mtr2); - - mtr_start(&mtr); - - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr); - - free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, - &mtr); - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - &mtr); - n_free = flst_get_len(header + FSP_FREE, &mtr); - n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr); - n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr); - - seg_id = mach_read_from_8(header + FSP_SEG_ID); - - fprintf(stderr, - "FILE SPACE INFO: id %lu\n" - "size %lu, free limit %lu, free extents %lu\n" - "not full frag extents %lu: used pages %lu," - " full frag extents %lu\n" - "first seg id not used %llu\n", - (ulong) space, - (ulong) size, (ulong) free_limit, (ulong) n_free, - (ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag, - (ullint) seg_id); - - mtr_commit(&mtr); - - /* Print segments */ - - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - - node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - - n = 0; - - do { - - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - seg_inode_page = fut_get_ptr( - space, zip_size, node_addr, RW_X_LATCH, &mtr) - - FSEG_INODE_PAGE_NODE; - - seg_inode = fsp_seg_inode_page_get_nth_inode( - seg_inode_page, n, zip_size, &mtr); - ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0); - fseg_print_low(seg_inode, &mtr); - - n_segs++; - - next_node_addr = flst_get_next_addr( - seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr); - mtr_commit(&mtr); - } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size)); - - node_addr = next_node_addr; - } - - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - header = fsp_get_space_header(space, zip_size, &mtr); - - node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - - n = 0; - - do { - - mtr_start(&mtr); - mtr_x_lock(latch, &mtr); - - seg_inode_page = fut_get_ptr( - space, zip_size, node_addr, RW_X_LATCH, &mtr) - - FSEG_INODE_PAGE_NODE; - - seg_inode = fsp_seg_inode_page_get_nth_inode( - seg_inode_page, n, zip_size, &mtr); - if (mach_read_from_8(seg_inode + FSEG_ID)) { - - fseg_print_low(seg_inode, &mtr); - n_segs++; - } - - next_node_addr = flst_get_next_addr( - seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr); - mtr_commit(&mtr); - } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size)); - - node_addr = next_node_addr; - } - - mtr_commit(&mtr2); - - fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs); -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Compute offset after xdes where crypt data can be stored -@param[in] zip_size Compressed size or 0 -@return offset */ -ulint -fsp_header_get_crypt_offset( - const ulint zip_size) -{ - return (FSP_HEADER_OFFSET + (XDES_ARR_OFFSET + XDES_SIZE * - (zip_size ? zip_size : UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE)); -} - -/**********************************************************************//** -Checks if a single page is free. -@return true if free */ -UNIV_INTERN -bool -fsp_page_is_free_func( -/*==============*/ - ulint space, /*!< in: space id */ - ulint page_no, /*!< in: page offset */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - const char *file, - ulint line) -{ - ulint flags; - - ut_ad(mtr); - - mtr_x_lock_func(fil_space_get_latch(space, &flags), file, line, mtr); - ulint zip_size = fsp_flags_get_zip_size(flags); - - xdes_t* descr = xdes_get_descriptor(space, zip_size, page_no, mtr); - ut_a(descr); - - return xdes_mtr_get_bit( - descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr); -} diff --git a/storage/xtradb/fts/Makefile.query b/storage/xtradb/fts/Makefile.query deleted file mode 100644 index 12dcd833064..00000000000 --- a/storage/xtradb/fts/Makefile.query +++ /dev/null @@ -1,32 +0,0 @@ -LEX=flex -YACC=bison -PREFIX=fts - -all: fts0pars.cc fts0blex.cc fts0tlex.cc - -fts0par.cc: fts0pars.y -fts0blex.cc: fts0blex.l -fts0tlex.cc: fts0tlex.l - -.l.cc: - $(LEX) -P$(subst lex,,$*) -o $*.cc --header-file=../include/$*.h $< - -.y.cc: - $(YACC) -p $(PREFIX) -o $*.cc -d $< - mv $*.h ../include -LEX=flex -YACC=bison -PREFIX=fts - -all: fts0pars.cc fts0blex.cc fts0tlex.cc - -fts0par.cc: fts0pars.y -fts0blex.cc: fts0blex.l -fts0tlex.cc: fts0tlex.l - -.l.cc: - $(LEX) -P$(subst lex,,$*) -o $*.cc --header-file=../include/$*.h $< - -.y.cc: - $(YACC) -p $(PREFIX) -o $*.cc -d $< - mv $*.h ../include diff --git a/storage/xtradb/fts/fts0ast.cc b/storage/xtradb/fts/fts0ast.cc deleted file mode 100644 index 030b972440f..00000000000 --- a/storage/xtradb/fts/fts0ast.cc +++ /dev/null @@ -1,744 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file fts/fts0ast.cc -Full Text Search parser helper file. - -Created 2007/3/16 Sunny Bains. -***********************************************************************/ - -#include "mem0mem.h" -#include "fts0ast.h" -#include "fts0pars.h" -#include "fts0fts.h" - -/* The FTS ast visit pass. */ -enum fts_ast_visit_pass_t { - FTS_PASS_FIRST, /*!< First visit pass, - process operators excluding - FTS_EXIST and FTS_IGNORE */ - FTS_PASS_EXIST, /*!< Exist visit pass, - process operator FTS_EXIST */ - FTS_PASS_IGNORE /*!< Ignore visit pass, - process operator FTS_IGNORE */ -}; - -/******************************************************************//** -Create an empty fts_ast_node_t. -@return Create a new node */ -static -fts_ast_node_t* -fts_ast_node_create(void) -/*=====================*/ -{ - fts_ast_node_t* node; - - node = (fts_ast_node_t*) ut_malloc(sizeof(*node)); - memset(node, 0x0, sizeof(*node)); - - return(node); -} - -/******************************************************************//** -Create a operator fts_ast_node_t. -@return new node */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_oper( -/*=====================*/ - void* arg, /*!< in: ast state instance */ - fts_ast_oper_t oper) /*!< in: ast operator */ -{ - fts_ast_node_t* node = fts_ast_node_create(); - - node->type = FTS_AST_OPER; - node->oper = oper; - - fts_ast_state_add_node((fts_ast_state_t*) arg, node); - - return(node); -} - -/******************************************************************//** -This function takes ownership of the ptr and is responsible -for free'ing it -@return new node or a node list with tokenized words */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_term( -/*=====================*/ - void* arg, /*!< in: ast state instance */ - const fts_ast_string_t* ptr) /*!< in: ast term string */ -{ - fts_ast_state_t* state = static_cast<fts_ast_state_t*>(arg); - ulint len = ptr->len; - ulint cur_pos = 0; - fts_ast_node_t* node = NULL; - fts_ast_node_t* node_list = NULL; - fts_ast_node_t* first_node = NULL; - - /* Scan the incoming string and filter out any "non-word" characters */ - while (cur_pos < len) { - fts_string_t str; - ulint offset; - ulint cur_len; - - cur_len = innobase_mysql_fts_get_token( - state->charset, - reinterpret_cast<const byte*>(ptr->str) + cur_pos, - reinterpret_cast<const byte*>(ptr->str) + len, - &str, &offset); - - if (cur_len == 0) { - break; - } - - cur_pos += cur_len; - - if (str.f_n_char > 0) { - /* If the subsequent term (after the first one)'s size - is less than fts_min_token_size or the term is greater - than fts_max_token_size, we shall ignore that. This is - to make consistent with MyISAM behavior */ - if ((first_node && (str.f_n_char < fts_min_token_size)) - || str.f_n_char > fts_max_token_size) { - continue; - } - - node = fts_ast_node_create(); - - node->type = FTS_AST_TERM; - - node->term.ptr = fts_ast_string_create( - str.f_str, str.f_len); - - fts_ast_state_add_node( - static_cast<fts_ast_state_t*>(arg), node); - - if (first_node) { - /* There is more than one word, create - a list to organize them */ - if (!node_list) { - node_list = fts_ast_create_node_list( - static_cast<fts_ast_state_t*>( - arg), - first_node); - } - - fts_ast_add_node(node_list, node); - } else { - first_node = node; - } - } - } - - return((node_list != NULL) ? node_list : first_node); -} - -/******************************************************************//** -This function takes ownership of the ptr and is responsible -for free'ing it. -@return new node */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_text( -/*=====================*/ - void* arg, /*!< in: ast state instance */ - const fts_ast_string_t* ptr) /*!< in: ast text string */ -{ - ulint len = ptr->len; - fts_ast_node_t* node = NULL; - - /* Once we come here, the string must have at least 2 quotes "" - around the query string, which could be empty. Also the query - string may contain 0x00 in it, we don't treat it as null-terminated. */ - ut_ad(len >= 2); - ut_ad(ptr->str[0] == '\"' && ptr->str[len - 1] == '\"'); - - if (len == 2) { - /* If the query string contains nothing except quotes, - it's obviously an invalid query. */ - return(NULL); - } - - node = fts_ast_node_create(); - - /*!< We ignore the actual quotes "" */ - len -= 2; - - node->type = FTS_AST_TEXT; - /*!< Skip copying the first quote */ - node->text.ptr = fts_ast_string_create( - reinterpret_cast<const byte*>(ptr->str + 1), len); - node->text.distance = ULINT_UNDEFINED; - - fts_ast_state_add_node((fts_ast_state_t*) arg, node); - - return(node); -} - -/******************************************************************//** -This function takes ownership of the expr and is responsible -for free'ing it. -@return new node */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_list( -/*=====================*/ - void* arg, /*!< in: ast state instance */ - fts_ast_node_t* expr) /*!< in: ast expr instance */ -{ - fts_ast_node_t* node = fts_ast_node_create(); - - node->type = FTS_AST_LIST; - node->list.head = node->list.tail = expr; - - fts_ast_state_add_node((fts_ast_state_t*) arg, node); - - return(node); -} - -/******************************************************************//** -Create a sub-expression list node. This function takes ownership of -expr and is responsible for deleting it. -@return new node */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_create_node_subexp_list( -/*============================*/ - void* arg, /*!< in: ast state instance */ - fts_ast_node_t* expr) /*!< in: ast expr instance */ -{ - fts_ast_node_t* node = fts_ast_node_create(); - - node->type = FTS_AST_SUBEXP_LIST; - node->list.head = node->list.tail = expr; - - fts_ast_state_add_node((fts_ast_state_t*) arg, node); - - return(node); -} - -/******************************************************************//** -Free an expr list node elements. */ -static -void -fts_ast_free_list( -/*==============*/ - fts_ast_node_t* node) /*!< in: ast node to free */ -{ - ut_a(node->type == FTS_AST_LIST - || node->type == FTS_AST_SUBEXP_LIST); - - for (node = node->list.head; - node != NULL; - node = fts_ast_free_node(node)) { - - /*!< No op */ - } -} - -/********************************************************************//** -Free a fts_ast_node_t instance. -@return next node to free */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_free_node( -/*==============*/ - fts_ast_node_t* node) /*!< in: the node to free */ -{ - fts_ast_node_t* next_node; - - switch (node->type) { - case FTS_AST_TEXT: - if (node->text.ptr) { - fts_ast_string_free(node->text.ptr); - node->text.ptr = NULL; - } - break; - - case FTS_AST_TERM: - if (node->term.ptr) { - fts_ast_string_free(node->term.ptr); - node->term.ptr = NULL; - } - break; - - case FTS_AST_LIST: - case FTS_AST_SUBEXP_LIST: - fts_ast_free_list(node); - node->list.head = node->list.tail = NULL; - break; - - case FTS_AST_OPER: - break; - - default: - ut_error; - } - - /*!< Get next node before freeing the node itself */ - next_node = node->next; - - ut_free(node); - - return(next_node); -} - -/******************************************************************//** -This AST takes ownership of the expr and is responsible -for free'ing it. -@return in param "list" */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_add_node( -/*=============*/ - fts_ast_node_t* node, /*!< in: list instance */ - fts_ast_node_t* elem) /*!< in: node to add to list */ -{ - if (!elem) { - return(NULL); - } - - ut_a(!elem->next); - ut_a(node->type == FTS_AST_LIST - || node->type == FTS_AST_SUBEXP_LIST); - - if (!node->list.head) { - ut_a(!node->list.tail); - - node->list.head = node->list.tail = elem; - } else { - ut_a(node->list.tail); - - node->list.tail->next = elem; - node->list.tail = elem; - } - - return(node); -} - -/******************************************************************//** -For tracking node allocations, in case there is an error during -parsing. */ -UNIV_INTERN -void -fts_ast_state_add_node( -/*===================*/ - fts_ast_state_t*state, /*!< in: ast instance */ - fts_ast_node_t* node) /*!< in: node to add to ast */ -{ - if (!state->list.head) { - ut_a(!state->list.tail); - - state->list.head = state->list.tail = node; - } else { - state->list.tail->next_alloc = node; - state->list.tail = node; - } -} - -/******************************************************************//** -Set the wildcard attribute of a term. */ -UNIV_INTERN -void -fts_ast_term_set_wildcard( -/*======================*/ - fts_ast_node_t* node) /*!< in/out: set attribute of - a term node */ -{ - if (!node) { - return; - } - - /* If it's a node list, the wildcard should be set to the tail node*/ - if (node->type == FTS_AST_LIST) { - ut_ad(node->list.tail != NULL); - node = node->list.tail; - } - - ut_a(node->type == FTS_AST_TERM); - ut_a(!node->term.wildcard); - - node->term.wildcard = TRUE; -} - -/******************************************************************//** -Set the proximity attribute of a text node. */ -UNIV_INTERN -void -fts_ast_term_set_distance( -/*======================*/ - fts_ast_node_t* node, /*!< in/out: text node */ - ulint distance) /*!< in: the text proximity - distance */ -{ - if (node == NULL) { - return; - } - - ut_a(node->type == FTS_AST_TEXT); - ut_a(node->text.distance == ULINT_UNDEFINED); - - node->text.distance = distance; -} - -/******************************************************************//** -Free node and expr allocations. */ -UNIV_INTERN -void -fts_ast_state_free( -/*===============*/ - fts_ast_state_t*state) /*!< in: ast state to free */ -{ - fts_ast_node_t* node = state->list.head; - - /* Free the nodes that were allocated during parsing. */ - while (node) { - fts_ast_node_t* next = node->next_alloc; - - if (node->type == FTS_AST_TEXT && node->text.ptr) { - fts_ast_string_free(node->text.ptr); - node->text.ptr = NULL; - } else if (node->type == FTS_AST_TERM && node->term.ptr) { - fts_ast_string_free(node->term.ptr); - node->term.ptr = NULL; - } - - ut_free(node); - node = next; - } - - state->root = state->list.head = state->list.tail = NULL; -} - -/******************************************************************//** -Print an ast node. */ -UNIV_INTERN -void -fts_ast_node_print( -/*===============*/ - fts_ast_node_t* node) /*!< in: ast node to print */ -{ - switch (node->type) { - case FTS_AST_TEXT: - printf("TEXT: "); - fts_ast_string_print(node->text.ptr); - break; - - case FTS_AST_TERM: - printf("TERM: "); - fts_ast_string_print(node->term.ptr); - break; - - case FTS_AST_LIST: - printf("LIST: "); - node = node->list.head; - - while (node) { - fts_ast_node_print(node); - node = node->next; - } - break; - - case FTS_AST_SUBEXP_LIST: - printf("SUBEXP_LIST: "); - node = node->list.head; - - while (node) { - fts_ast_node_print(node); - node = node->next; - } - case FTS_AST_OPER: - printf("OPER: %d\n", node->oper); - break; - - default: - ut_error; - } -} - -/******************************************************************//** -Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE -nodes, which will be ignored in the first pass of each level, and visited in a -second and third pass after all other nodes in the same level are visited. -@return DB_SUCCESS if all went well */ -UNIV_INTERN -dberr_t -fts_ast_visit( -/*==========*/ - fts_ast_oper_t oper, /*!< in: current operator */ - fts_ast_node_t* node, /*!< in: current root node */ - fts_ast_callback visitor, /*!< in: callback function */ - void* arg, /*!< in: arg for callback */ - bool* has_ignore) /*!< out: true, if the operator - was ignored during processing, - currently we ignore FTS_EXIST - and FTS_IGNORE operators */ -{ - dberr_t error = DB_SUCCESS; - fts_ast_node_t* oper_node = NULL; - fts_ast_node_t* start_node; - bool revisit = false; - bool will_be_ignored = false; - fts_ast_visit_pass_t visit_pass = FTS_PASS_FIRST; - - start_node = node->list.head; - - ut_a(node->type == FTS_AST_LIST - || node->type == FTS_AST_SUBEXP_LIST); - - if (oper == FTS_EXIST_SKIP) { - visit_pass = FTS_PASS_EXIST; - } else if (oper == FTS_IGNORE_SKIP) { - visit_pass = FTS_PASS_IGNORE; - } - - /* In the first pass of the tree, at the leaf level of the - tree, FTS_EXIST and FTS_IGNORE operation will be ignored. - It will be repeated at the level above the leaf level. - - The basic idea here is that when we encounter FTS_EXIST or - FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP - or FTS_IGNORE_SKIP, and term node & text node with the operators - is ignored in the first pass. We have two passes during the revisit: - We process nodes with FTS_EXIST_SKIP in the exist pass, and then - process nodes with FTS_IGNORE_SKIP in the ignore pass. - - The order should be restrictly followed, or we will get wrong results. - For example, we have a query 'a +b -c d +e -f'. - first pass: process 'a' and 'd' by union; - exist pass: process '+b' and '+e' by intersection; - ignore pass: process '-c' and '-f' by difference. */ - - for (node = node->list.head; - node && (error == DB_SUCCESS); - node = node->next) { - - switch(node->type) { - case FTS_AST_LIST: - if (visit_pass != FTS_PASS_FIRST) { - break; - } - - error = fts_ast_visit(oper, node, visitor, - arg, &will_be_ignored); - - /* If will_be_ignored is set to true, then - we encountered and ignored a FTS_EXIST or FTS_IGNORE - operator. */ - if (will_be_ignored) { - revisit = true; - /* Remember oper for list in case '-abc&def', - ignored oper is from previous node of list.*/ - node->oper = oper; - } - - break; - - case FTS_AST_OPER: - oper = node->oper; - oper_node = node; - - /* Change the operator for revisit */ - if (oper == FTS_EXIST) { - oper_node->oper = FTS_EXIST_SKIP; - } else if (oper == FTS_IGNORE) { - oper_node->oper = FTS_IGNORE_SKIP; - } - - break; - - default: - if (node->visited) { - continue; - } - - ut_a(oper == FTS_NONE || !oper_node - || oper_node->oper == oper - || oper_node->oper == FTS_EXIST_SKIP - || oper_node->oper == FTS_IGNORE_SKIP); - - if (oper== FTS_EXIST || oper == FTS_IGNORE) { - *has_ignore = true; - continue; - } - - /* Process leaf node accroding to its pass.*/ - if (oper == FTS_EXIST_SKIP - && visit_pass == FTS_PASS_EXIST) { - error = visitor(FTS_EXIST, node, arg); - node->visited = true; - } else if (oper == FTS_IGNORE_SKIP - && visit_pass == FTS_PASS_IGNORE) { - error = visitor(FTS_IGNORE, node, arg); - node->visited = true; - } else if (visit_pass == FTS_PASS_FIRST) { - error = visitor(oper, node, arg); - node->visited = true; - } - } - } - - if (revisit) { - /* Exist pass processes the skipped FTS_EXIST operation. */ - for (node = start_node; - node && error == DB_SUCCESS; - node = node->next) { - - if (node->type == FTS_AST_LIST - && node->oper != FTS_IGNORE) { - error = fts_ast_visit(FTS_EXIST_SKIP, node, - visitor, arg, &will_be_ignored); - } - } - - /* Ignore pass processes the skipped FTS_IGNORE operation. */ - for (node = start_node; - node && error == DB_SUCCESS; - node = node->next) { - - if (node->type == FTS_AST_LIST) { - error = fts_ast_visit(FTS_IGNORE_SKIP, node, - visitor, arg, &will_be_ignored); - } - } - } - - return(error); -} - -/** -Create an ast string object, with NUL-terminator, so the string -has one more byte than len -@param[in] str pointer to string -@param[in] len length of the string -@return ast string with NUL-terminator */ -UNIV_INTERN -fts_ast_string_t* -fts_ast_string_create( - const byte* str, - ulint len) -{ - fts_ast_string_t* ast_str; - - ut_ad(len > 0); - - ast_str = static_cast<fts_ast_string_t*> - (ut_malloc(sizeof(fts_ast_string_t))); - ast_str->str = static_cast<byte*>(ut_malloc(len + 1)); - - ast_str->len = len; - memcpy(ast_str->str, str, len); - ast_str->str[len] = '\0'; - - return(ast_str); -} - -/** -Free an ast string instance -@param[in,out] ast_str string to free */ -UNIV_INTERN -void -fts_ast_string_free( - fts_ast_string_t* ast_str) -{ - if (ast_str != NULL) { - ut_free(ast_str->str); - ut_free(ast_str); - } -} - -/** -Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul -@param[in] str string to translate -@param[in] base the base -@return translated number */ -UNIV_INTERN -ulint -fts_ast_string_to_ul( - const fts_ast_string_t* ast_str, - int base) -{ - return(strtoul(reinterpret_cast<const char*>(ast_str->str), - NULL, base)); -} - -/** -Print the ast string -@param[in] str string to print */ -UNIV_INTERN -void -fts_ast_string_print( - const fts_ast_string_t* ast_str) -{ - for (ulint i = 0; i < ast_str->len; ++i) { - printf("%c", ast_str->str[i]); - } - - printf("\n"); -} - -#ifdef UNIV_DEBUG -const char* -fts_ast_oper_name_get(fts_ast_oper_t oper) -{ - switch(oper) { - case FTS_NONE: - return("FTS_NONE"); - case FTS_IGNORE: - return("FTS_IGNORE"); - case FTS_EXIST: - return("FTS_EXIST"); - case FTS_NEGATE: - return("FTS_NEGATE"); - case FTS_INCR_RATING: - return("FTS_INCR_RATING"); - case FTS_DECR_RATING: - return("FTS_DECR_RATING"); - case FTS_DISTANCE: - return("FTS_DISTANCE"); - case FTS_IGNORE_SKIP: - return("FTS_IGNORE_SKIP"); - case FTS_EXIST_SKIP: - return("FTS_EXIST_SKIP"); - } - ut_ad(0); -} - -const char* -fts_ast_node_type_get(fts_ast_type_t type) -{ - switch (type) { - case FTS_AST_OPER: - return("FTS_AST_OPER"); - case FTS_AST_NUMB: - return("FTS_AST_NUMB"); - case FTS_AST_TERM: - return("FTS_AST_TERM"); - case FTS_AST_TEXT: - return("FTS_AST_TEXT"); - case FTS_AST_LIST: - return("FTS_AST_LIST"); - case FTS_AST_SUBEXP_LIST: - return("FTS_AST_SUBEXP_LIST"); - } - ut_ad(0); -} -#endif /* UNIV_DEBUG */ diff --git a/storage/xtradb/fts/fts0blex.cc b/storage/xtradb/fts/fts0blex.cc deleted file mode 100644 index 2d71934fa0e..00000000000 --- a/storage/xtradb/fts/fts0blex.cc +++ /dev/null @@ -1,1957 +0,0 @@ -#include "univ.i" -#line 2 "fts0blex.cc" - -#line 4 "fts0blex.cc" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 35 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - -/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS 1 -#endif - -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! C99 */ - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -/* C99 requires __STDC__ to be defined as 1. */ -#if defined (__STDC__) - -#define YY_USE_CONST - -#endif /* defined (__STDC__) */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* Returned upon end-of-file. */ -#define YY_NULL 0 - -/* Promotes a possibly negative, possibly signed char to an unsigned - * integer for use as an array index. If the signed char is negative, - * we want to instead treat it as an 8-bit unsigned char, hence the - * double cast. - */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) - -/* An opaque pointer. */ -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T -typedef void* yyscan_t; -#endif - -/* For convenience, these vars (plus the bison vars far below) - are macros in the reentrant scanner. */ -#define yyin yyg->yyin_r -#define yyout yyg->yyout_r -#define yyextra yyg->yyextra_r -#define yyleng yyg->yyleng_r -#define yytext yyg->yytext_r -#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) -#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) -#define yy_flex_debug yyg->yy_flex_debug_r - -/* Enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN. - */ -#define BEGIN yyg->yy_start = 1 + 2 * - -/* Translate the current start state into a value that can be later handed - * to BEGIN to return to the state. The YYSTATE alias is for lex - * compatibility. - */ -#define YY_START ((yyg->yy_start - 1) / 2) -#define YYSTATE YY_START - -/* Action number for EOF rule of a given start state. */ -#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) - -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE fts0brestart(yyin ,yyscanner ) - -#define YY_END_OF_BUFFER_CHAR 0 - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k. - * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. - * Ditto for the __ia64__ case accordingly. - */ -#define YY_BUF_SIZE 32768 -#else -#define YY_BUF_SIZE 16384 -#endif /* __ia64__ */ -#endif - -/* The state buf must be large enough to hold one state per character in the main buffer. - */ -#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) - -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE -typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif - -#define EOB_ACT_CONTINUE_SCAN 0 -#define EOB_ACT_END_OF_FILE 1 -#define EOB_ACT_LAST_MATCH 2 - -#define YY_LESS_LINENO(n) - -/* Return all but the first "n" matched characters back to the input stream. */ -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - *yy_cp = yyg->yy_hold_char; \ - YY_RESTORE_YY_MORE_OFFSET \ - yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ - YY_DO_BEFORE_ACTION; /* set up yytext again */ \ - } \ - while ( 0 ) - -#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) - -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - -#ifndef YY_STRUCT_YY_BUFFER_STATE -#define YY_STRUCT_YY_BUFFER_STATE -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - int yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - int yy_bs_lineno; /**< The line count. */ - int yy_bs_column; /**< The column count. */ - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; - -#define YY_BUFFER_NEW 0 -#define YY_BUFFER_NORMAL 1 - /* When an EOF's been seen but there's still some text to process - * then we mark the buffer as YY_EOF_PENDING, to indicate that we - * shouldn't try reading from the input source any more. We might - * still have a bunch of tokens to match, though, because of - * possible backing-up. - * - * When we actually see the EOF, we change the status to "new" - * (via fts0brestart()), so that the user can continue scanning by - * just pointing yyin at a new input file. - */ -#define YY_BUFFER_EOF_PENDING 2 - - }; -#endif /* !YY_STRUCT_YY_BUFFER_STATE */ - -/* We provide macros for accessing buffer states in case in the - * future we want to put the buffer states in a more general - * "scanner state". - * - * Returns the top of the stack, or NULL. - */ -#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ - ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ - : NULL) - -/* Same as previous macro, but useful when we know that the buffer stack is not - * NULL or when we need an lvalue. For internal use only. - */ -#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] - -void fts0brestart (FILE *input_file ,yyscan_t yyscanner ); -void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0b_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); -void fts0b_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void fts0b_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -void fts0bpop_buffer_state (yyscan_t yyscanner ); - -static void fts0bensure_buffer_stack (yyscan_t yyscanner ); -static void fts0b_load_buffer_state (yyscan_t yyscanner ); -static void fts0b_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner ); - -#define YY_FLUSH_BUFFER fts0b_flush_buffer(YY_CURRENT_BUFFER ,yyscanner) - -YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); - -void *fts0balloc (yy_size_t , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) ); -void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) ); -void fts0bfree (void * , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) ); - -#define yy_new_buffer fts0b_create_buffer - -#define yy_set_interactive(is_interactive) \ - { \ - if ( ! YY_CURRENT_BUFFER ){ \ - fts0bensure_buffer_stack (yyscanner); \ - YY_CURRENT_BUFFER_LVALUE = \ - fts0b_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ - } - -#define yy_set_bol(at_bol) \ - { \ - if ( ! YY_CURRENT_BUFFER ){\ - fts0bensure_buffer_stack (yyscanner); \ - YY_CURRENT_BUFFER_LVALUE = \ - fts0b_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ - } - -#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) - -/* Begin user sect3 */ - -#define fts0bwrap(n) 1 -#define YY_SKIP_YYWRAP - -typedef unsigned char YY_CHAR; - -typedef int yy_state_type; - -#define yytext_ptr yytext_r - -static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); -static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); -static int yy_get_next_buffer (yyscan_t yyscanner ); -static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) ); - -/* Done after the current pattern has been matched and before the - * corresponding action - sets up yytext. - */ -#define YY_DO_BEFORE_ACTION \ - yyg->yytext_ptr = yy_bp; \ - yyleng = static_cast<int>(yy_cp - yy_bp); \ - yyg->yy_hold_char = *yy_cp; \ - *yy_cp = '\0'; \ - yyg->yy_c_buf_p = yy_cp; - -#define YY_NUM_RULES 7 -#define YY_END_OF_BUFFER 8 -/* This struct is not used in this scanner, - but its presence is necessary. */ -struct yy_trans_info - { - flex_int32_t yy_verify; - flex_int32_t yy_nxt; - }; -static yyconst flex_int16_t yy_accept[19] = - { 0, - 4, 4, 8, 4, 1, 6, 1, 7, 7, 2, - 3, 4, 1, 1, 0, 5, 3, 0 - } ; - -static yyconst flex_int32_t yy_ec[256] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 4, 1, 5, 1, 1, 6, 1, 1, 7, - 7, 7, 7, 1, 7, 1, 1, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 1, 1, 7, - 1, 7, 1, 7, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1 - } ; - -static yyconst flex_int32_t yy_meta[9] = - { 0, - 1, 2, 3, 4, 5, 5, 5, 1 - } ; - -static yyconst flex_int16_t yy_base[22] = - { 0, - 0, 0, 22, 0, 7, 23, 0, 14, 23, 23, - 7, 0, 0, 0, 5, 23, 0, 23, 11, 12, - 16 - } ; - -static yyconst flex_int16_t yy_def[22] = - { 0, - 18, 1, 18, 19, 19, 18, 20, 21, 18, 18, - 19, 19, 5, 20, 21, 18, 11, 0, 18, 18, - 18 - } ; - -static yyconst flex_int16_t yy_nxt[32] = - { 0, - 4, 5, 6, 7, 8, 9, 10, 11, 13, 16, - 14, 12, 12, 14, 17, 14, 15, 15, 16, 15, - 15, 18, 3, 18, 18, 18, 18, 18, 18, 18, - 18 - } ; - -static yyconst flex_int16_t yy_chk[32] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 5, 15, - 5, 19, 19, 20, 11, 20, 21, 21, 8, 21, - 21, 3, 18, 18, 18, 18, 18, 18, 18, 18, - 18 - } ; - -/* The intent behind this definition is that it'll catch - * any uses of REJECT which flex missed. - */ -#define REJECT reject_used_but_not_detected -#define yymore() yymore_used_but_not_detected -#define YY_MORE_ADJ 0 -#define YY_RESTORE_YY_MORE_OFFSET -#line 1 "fts0blex.l" -/***************************************************************************** - -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ -/** - * @file fts/fts0blex.l - * FTS parser lexical analyzer - * - * Created 2007/5/9 Sunny Bains - */ -#line 27 "fts0blex.l" - -#include "fts0ast.h" -#include "fts0pars.h" - -/* Required for reentrant parser */ -#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner) - -#define YY_NO_INPUT 1 -#line 484 "fts0blex.cc" - -#define INITIAL 0 - -#ifndef YY_NO_UNISTD_H -/* Special case for "unistd.h", since it is non-ANSI. We include it way - * down here because we want the user's section 1 to have been scanned first. - * The user has a chance to override it with an option. - */ -#include <unistd.h> -#endif - -#ifndef YY_EXTRA_TYPE -#define YY_EXTRA_TYPE void * -#endif - -/* Holds the entire state of the reentrant scanner. */ -struct yyguts_t -{ - - /* User-defined. Not touched by flex. */ - YY_EXTRA_TYPE yyextra_r; - - /* The rest are the same as the globals declared in the non-reentrant scanner. */ - FILE *yyin_r, *yyout_r; - size_t yy_buffer_stack_top; /**< index of top of stack. */ - size_t yy_buffer_stack_max; /**< capacity of stack. */ - YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ - char yy_hold_char; - int yy_n_chars; - int yyleng_r; - char *yy_c_buf_p; - int yy_init; - int yy_start; - int yy_did_buffer_switch_on_eof; - int yy_start_stack_ptr; - int yy_start_stack_depth; - int *yy_start_stack; - yy_state_type yy_last_accepting_state; - char* yy_last_accepting_cpos; - - int yylineno_r; - int yy_flex_debug_r; - - char *yytext_r; - int yy_more_flag; - int yy_more_len; - -}; /* end struct yyguts_t */ - -static int yy_init_globals (yyscan_t yyscanner ); - -int fts0blex_init (yyscan_t* scanner); - -int fts0blex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); - -/* Accessor methods to globals. - These are made visible to non-reentrant scanners for convenience. */ - -int fts0blex_destroy (yyscan_t yyscanner ); - -int fts0bget_debug (yyscan_t yyscanner ); - -void fts0bset_debug (int debug_flag ,yyscan_t yyscanner ); - -YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner ); - -void fts0bset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); - -FILE *fts0bget_in (yyscan_t yyscanner ); - -void fts0bset_in (FILE * in_str ,yyscan_t yyscanner ); - -FILE *fts0bget_out (yyscan_t yyscanner ); - -void fts0bset_out (FILE * out_str ,yyscan_t yyscanner ); - -int fts0bget_leng (yyscan_t yyscanner ); - -char *fts0bget_text (yyscan_t yyscanner ); - -int fts0bget_lineno (yyscan_t yyscanner ); - -void fts0bset_lineno (int line_number ,yyscan_t yyscanner ); - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int fts0bwrap (yyscan_t yyscanner ); -#else -extern int fts0bwrap (yyscan_t yyscanner ); -#endif -#endif - -#ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))); -#endif - -#ifndef YY_NO_INPUT - -#ifdef __cplusplus -static int yyinput (yyscan_t yyscanner ); -#else -static int input (yyscan_t yyscanner ); -#endif - -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k */ -#define YY_READ_BUF_SIZE 16384 -#else -#define YY_READ_BUF_SIZE 8192 -#endif /* __ia64__ */ -#endif - -/* Copy whatever the last rule matched to the standard output. */ -#ifndef ECHO -/* This used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ -#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) -#endif - -/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, - * is returned in "result". - */ -#ifndef YY_INPUT -#define YY_INPUT(buf,result,max_size) \ - if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ - { \ - int c = '*'; \ - int n; \ - for ( n = 0; n < static_cast<int>(max_size) && \ - (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ - buf[n] = (char) c; \ - if ( c == '\n' ) \ - buf[n++] = (char) c; \ - if ( c == EOF && ferror( yyin ) ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - result = n; \ - } \ - else \ - { \ - errno=0; \ - while ( (result = static_cast<int>(fread(buf, 1, max_size, yyin))) \ - == 0 && ferror(yyin) ) \ - { \ - if( errno != EINTR) \ - { \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - break; \ - } \ - errno=0; \ - clearerr(yyin); \ - } \ - }\ -\ - -#endif - -/* No semi-colon after return; correct usage is to write "yyterminate();" - - * we don't want an extra ';' after the "return" because that will cause - * some compilers to complain about unreachable statements. - */ -#ifndef yyterminate -#define yyterminate() return YY_NULL -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Report a fatal error. */ -#ifndef YY_FATAL_ERROR -#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) -#endif - -/* end tables serialization structures and prototypes */ - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL_IS_OURS 1 - -extern int fts0blex (yyscan_t yyscanner); - -#define YY_DECL int fts0blex (yyscan_t yyscanner) -#endif /* !YY_DECL */ - -/* Code executed at the beginning of each rule, after yytext and yyleng - * have been set up. - */ -#ifndef YY_USER_ACTION -#define YY_USER_ACTION -#endif - -/* Code executed at the end of each rule. */ -#ifndef YY_BREAK -#define YY_BREAK break; -#endif - -#define YY_RULE_SETUP \ - YY_USER_ACTION - -/** The main scanner function which does all the work. - */ -YY_DECL -{ - register yy_state_type yy_current_state; - register char *yy_cp, *yy_bp; - register int yy_act; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - -#line 43 "fts0blex.l" - - -#line 712 "fts0blex.cc" - - if ( !yyg->yy_init ) - { - yyg->yy_init = 1; - -#ifdef YY_USER_INIT - YY_USER_INIT; -#endif - - if ( ! yyg->yy_start ) - yyg->yy_start = 1; /* first start state */ - - if ( ! yyin ) - yyin = stdin; - - if ( ! yyout ) - yyout = stdout; - - if ( ! YY_CURRENT_BUFFER ) { - fts0bensure_buffer_stack (yyscanner); - YY_CURRENT_BUFFER_LVALUE = - fts0b_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); - } - - fts0b_load_buffer_state(yyscanner ); - } - - while ( 1 ) /* loops until end-of-file is reached */ - { - yy_cp = yyg->yy_c_buf_p; - - /* Support of yytext. */ - *yy_cp = yyg->yy_hold_char; - - /* yy_bp points to the position in yy_ch_buf of the start of - * the current run. - */ - yy_bp = yy_cp; - - yy_current_state = yyg->yy_start; -yy_match: - do - { - register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 19 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - ++yy_cp; - } - while ( yy_current_state != 18 ); - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - -yy_find_action: - yy_act = yy_accept[yy_current_state]; - - YY_DO_BEFORE_ACTION; - -do_action: /* This label is used only to access EOF actions. */ - - switch ( yy_act ) - { /* beginning of action switch */ - case 0: /* must back up */ - /* undo the effects of YY_DO_BEFORE_ACTION */ - *yy_cp = yyg->yy_hold_char; - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - goto yy_find_action; - -case 1: -YY_RULE_SETUP -#line 45 "fts0blex.l" -/* Ignore whitespace */ ; - YY_BREAK -case 2: -YY_RULE_SETUP -#line 47 "fts0blex.l" -{ - val->oper = fts0bget_text(yyscanner)[0]; - - return(val->oper); -} - YY_BREAK -case 3: -YY_RULE_SETUP -#line 53 "fts0blex.l" -{ - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); - - return(FTS_NUMB); -} - YY_BREAK -case 4: -YY_RULE_SETUP -#line 59 "fts0blex.l" -{ - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); - - return(FTS_TERM); -} - YY_BREAK -case 5: -YY_RULE_SETUP -#line 65 "fts0blex.l" -{ - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); - - return(FTS_TEXT); -} - YY_BREAK -case 6: -/* rule 6 can match eol */ -YY_RULE_SETUP -#line 71 "fts0blex.l" - - YY_BREAK -case 7: -YY_RULE_SETUP -#line 73 "fts0blex.l" -ECHO; - YY_BREAK -#line 843 "fts0blex.cc" -case YY_STATE_EOF(INITIAL): - yyterminate(); - - case YY_END_OF_BUFFER: - { - /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; - - /* Undo the effects of YY_DO_BEFORE_ACTION. */ - *yy_cp = yyg->yy_hold_char; - YY_RESTORE_YY_MORE_OFFSET - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) - { - /* We're scanning a new file or input source. It's - * possible that this happened because the user - * just pointed yyin at a new source and called - * fts0blex(). If so, then we have to assure - * consistency between YY_CURRENT_BUFFER and our - * globals. Here is the right place to do so, because - * this is the first action (other than possibly a - * back-up) that will match for the new input source. - */ - yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; - } - - /* Note that here we test for yy_c_buf_p "<=" to the position - * of the first EOB in the buffer, since yy_c_buf_p will - * already have been incremented past the NUL character - * (since all states make transitions on EOB to the - * end-of-buffer state). Contrast this with the test - * in input(). - */ - if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) - { /* This was really a NUL. */ - yy_state_type yy_next_state; - - yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( yyscanner ); - - /* Okay, we're now positioned to make the NUL - * transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we don't - * want to build jamming into it because then it - * will run more slowly). - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); - - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* Consume the NUL. */ - yy_cp = ++yyg->yy_c_buf_p; - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - goto yy_find_action; - } - } - - else switch ( yy_get_next_buffer( yyscanner ) ) - { - case EOB_ACT_END_OF_FILE: - { - yyg->yy_did_buffer_switch_on_eof = 0; - - if ( fts0bwrap(yyscanner ) ) - { - /* Note: because we've taken care in - * yy_get_next_buffer() to have set up - * yytext, we can now set up - * yy_c_buf_p so that if some total - * hoser (like flex itself) wants to - * call the scanner after we return the - * YY_NULL, it'll still work - another - * YY_NULL will get returned. - */ - yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF(YY_START); - goto do_action; - } - - else - { - if ( ! yyg->yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; - } - break; - } - - case EOB_ACT_CONTINUE_SCAN: - yyg->yy_c_buf_p = - yyg->yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( yyscanner ); - - yy_cp = yyg->yy_c_buf_p; - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - yyg->yy_c_buf_p = - &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; - - yy_current_state = yy_get_previous_state( yyscanner ); - - yy_cp = yyg->yy_c_buf_p; - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - goto yy_find_action; - } - break; - } - - default: - YY_FATAL_ERROR( - "fatal flex scanner internal error--no action found" ); - } /* end of action switch */ - } /* end of scanning one token */ -} /* end of fts0blex */ - -/* yy_get_next_buffer - try to read in a new buffer - * - * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file - */ -static int yy_get_next_buffer (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; - register char *source = yyg->yytext_ptr; - register int number_to_move, i; - int ret_val; - - if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) - YY_FATAL_ERROR( - "fatal flex scanner internal error--end of buffer missed" ); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) - { /* Don't try to fill the buffer, so this is an EOF. */ - if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) - { - /* We matched a single character, the EOB, so - * treat this as a final EOF. - */ - return EOB_ACT_END_OF_FILE; - } - - else - { - /* We matched some text prior to the EOB, first - * process it. - */ - return EOB_ACT_LAST_MATCH; - } - } - - /* Try to read more data. */ - - /* First move last chars to start of buffer. */ - number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1; - - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; - - else - { - int num_to_read = static_cast<int>( - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1); - - while ( num_to_read <= 0 ) - { /* Not enough room in the buffer - grow it. */ - - /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = YY_CURRENT_BUFFER; - - int yy_c_buf_p_offset = - (int) (yyg->yy_c_buf_p - b->yy_ch_buf); - - if ( b->yy_is_our_buffer ) - { - int new_size = static_cast<int>(b->yy_buf_size * 2); - - if ( new_size <= 0 ) - b->yy_buf_size += b->yy_buf_size / 8; - else - b->yy_buf_size *= 2; - - b->yy_ch_buf = (char *) - /* Include room in for 2 EOB chars. */ - fts0brealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner ); - } - else - /* Can't grow it, we don't own it. */ - b->yy_ch_buf = 0; - - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( - "fatal error - scanner input buffer overflow" ); - - yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; - - num_to_read = static_cast<int>( - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1); - - } - - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; - - /* Read in more data. */ - YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), - yyg->yy_n_chars, num_to_read); - - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - if ( yyg->yy_n_chars == 0 ) - { - if ( number_to_move == YY_MORE_ADJ ) - { - ret_val = EOB_ACT_END_OF_FILE; - fts0brestart(yyin ,yyscanner); - } - - else - { - ret_val = EOB_ACT_LAST_MATCH; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = - YY_BUFFER_EOF_PENDING; - } - } - - else - ret_val = EOB_ACT_CONTINUE_SCAN; - - if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { - /* Extend the array by 50%, plus the number we really need. */ - yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) fts0brealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner ); - if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); - } - - yyg->yy_n_chars += number_to_move; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; - - yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; - - return ret_val; -} - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - -static yy_state_type yy_get_previous_state (yyscan_t yyscanner) -{ - register yy_state_type yy_current_state; - register char *yy_cp; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - yy_current_state = yyg->yy_start; - - for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) - { - register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 19 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - } - - return yy_current_state; -} - -/* yy_try_NUL_trans - try to make a transition on the NUL character - * - * synopsis - * next_state = yy_try_NUL_trans( current_state ); - */ -static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) -{ - register int yy_is_jam; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ - register char *yy_cp = yyg->yy_c_buf_p; - - register YY_CHAR yy_c = 1; - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 19 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - yy_is_jam = (yy_current_state == 18); - - return yy_is_jam ? 0 : yy_current_state; -} - -#ifndef YY_NO_INPUT -#ifdef __cplusplus - static int yyinput (yyscan_t yyscanner) -#else - static int input (yyscan_t yyscanner) -#endif - -{ - int c; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - *yyg->yy_c_buf_p = yyg->yy_hold_char; - - if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) - { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) - /* This was really a NUL. */ - *yyg->yy_c_buf_p = '\0'; - - else - { /* need more input */ - int offset = yyg->yy_c_buf_p - yyg->yytext_ptr; - ++yyg->yy_c_buf_p; - - switch ( yy_get_next_buffer( yyscanner ) ) - { - case EOB_ACT_LAST_MATCH: - /* This happens because yy_g_n_b() - * sees that we've accumulated a - * token and flags that we need to - * try matching the token before - * proceeding. But for input(), - * there's no matching to consider. - * So convert the EOB_ACT_LAST_MATCH - * to EOB_ACT_END_OF_FILE. - */ - - /* Reset buffer status. */ - fts0brestart(yyin ,yyscanner); - - /*FALLTHROUGH*/ - - case EOB_ACT_END_OF_FILE: - { - if ( fts0bwrap(yyscanner ) ) - return EOF; - - if ( ! yyg->yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; -#ifdef __cplusplus - return yyinput(yyscanner); -#else - return input(yyscanner); -#endif - } - - case EOB_ACT_CONTINUE_SCAN: - yyg->yy_c_buf_p = yyg->yytext_ptr + offset; - break; - } - } - } - - c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ - *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ - yyg->yy_hold_char = *++yyg->yy_c_buf_p; - - return c; -} -#endif /* ifndef YY_NO_INPUT */ - -/** Immediately switch to a different input stream. - * @param input_file A readable stream. - * @param yyscanner The scanner object. - * @note This function does not reset the start condition to @c INITIAL . - */ -void fts0brestart (FILE * input_file , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if ( ! YY_CURRENT_BUFFER ){ - fts0bensure_buffer_stack (yyscanner); - YY_CURRENT_BUFFER_LVALUE = - fts0b_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); - } - - fts0b_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner); - fts0b_load_buffer_state(yyscanner ); -} - -/** Switch to a different input buffer. - * @param new_buffer The new input buffer. - * @param yyscanner The scanner object. - */ -void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* TODO. We should be able to replace this entire function body - * with - * fts0bpop_buffer_state(); - * fts0bpush_buffer_state(new_buffer); - */ - fts0bensure_buffer_stack (yyscanner); - if ( YY_CURRENT_BUFFER == new_buffer ) - return; - - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *yyg->yy_c_buf_p = yyg->yy_hold_char; - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - YY_CURRENT_BUFFER_LVALUE = new_buffer; - fts0b_load_buffer_state(yyscanner ); - - /* We don't actually know whether we did this switch during - * EOF (fts0bwrap()) processing, but the only time this flag - * is looked at is after fts0bwrap() is called, so it's safe - * to go ahead and always set it. - */ - yyg->yy_did_buffer_switch_on_eof = 1; -} - -static void fts0b_load_buffer_state (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; - yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; - yyg->yy_hold_char = *yyg->yy_c_buf_p; -} - -/** Allocate and initialize an input buffer state. - * @param file A readable stream. - * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. - * @param yyscanner The scanner object. - * @return the allocated buffer state. - */ -YY_BUFFER_STATE fts0b_create_buffer (FILE * file, int size , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) fts0balloc(sizeof( struct yy_buffer_state ) ,yyscanner ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in fts0b_create_buffer()" ); - - b->yy_buf_size = size; - - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (char *) fts0balloc(b->yy_buf_size + 2 ,yyscanner ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in fts0b_create_buffer()" ); - - b->yy_is_our_buffer = 1; - - fts0b_init_buffer(b,file ,yyscanner); - - return b; -} - -/** Destroy the buffer. - * @param b a buffer created with fts0b_create_buffer() - * @param yyscanner The scanner object. - */ -void fts0b_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if ( ! b ) - return; - - if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ - YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; - - if ( b->yy_is_our_buffer ) - fts0bfree((void *) b->yy_ch_buf ,yyscanner ); - - fts0bfree((void *) b ,yyscanner ); -} - -/* Initializes or reinitializes a buffer. - * This function is sometimes called more than once on the same buffer, - * such as during a fts0brestart() or at EOF. - */ -static void fts0b_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) - -{ - int oerrno = errno; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - fts0b_flush_buffer(b ,yyscanner); - - b->yy_input_file = file; - b->yy_fill_buffer = 1; - - /* If b is the current buffer, then fts0b_init_buffer was _probably_ - * called from fts0brestart() or through yy_get_next_buffer. - * In that case, we don't want to reset the lineno or column. - */ - if (b != YY_CURRENT_BUFFER){ - b->yy_bs_lineno = 1; - b->yy_bs_column = 0; - } - - b->yy_is_interactive = 0; - - errno = oerrno; -} - -/** Discard all buffered characters. On the next scan, YY_INPUT will be called. - * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. - * @param yyscanner The scanner object. - */ -void fts0b_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if ( ! b ) - return; - - b->yy_n_chars = 0; - - /* We always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; - - b->yy_buf_pos = &b->yy_ch_buf[0]; - - b->yy_at_bol = 1; - b->yy_buffer_status = YY_BUFFER_NEW; - - if ( b == YY_CURRENT_BUFFER ) - fts0b_load_buffer_state(yyscanner ); -} - -/** Pushes the new state onto the stack. The new state becomes - * the current state. This function will allocate the stack - * if necessary. - * @param new_buffer The new state. - * @param yyscanner The scanner object. - */ -void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if (new_buffer == NULL) - return; - - fts0bensure_buffer_stack(yyscanner); - - /* This block is copied from fts0b_switch_to_buffer. */ - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *yyg->yy_c_buf_p = yyg->yy_hold_char; - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - /* Only push if top exists. Otherwise, replace top. */ - if (YY_CURRENT_BUFFER) - yyg->yy_buffer_stack_top++; - YY_CURRENT_BUFFER_LVALUE = new_buffer; - - /* copied from fts0b_switch_to_buffer. */ - fts0b_load_buffer_state(yyscanner ); - yyg->yy_did_buffer_switch_on_eof = 1; -} - -/** Removes and deletes the top of the stack, if present. - * The next element becomes the new top. - * @param yyscanner The scanner object. - */ -void fts0bpop_buffer_state (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if (!YY_CURRENT_BUFFER) - return; - - fts0b_delete_buffer(YY_CURRENT_BUFFER ,yyscanner); - YY_CURRENT_BUFFER_LVALUE = NULL; - if (yyg->yy_buffer_stack_top > 0) - --yyg->yy_buffer_stack_top; - - if (YY_CURRENT_BUFFER) { - fts0b_load_buffer_state(yyscanner ); - yyg->yy_did_buffer_switch_on_eof = 1; - } -} - -/* Allocates the stack if it does not exist. - * Guarantees space for at least one push. - */ -static void fts0bensure_buffer_stack (yyscan_t yyscanner) -{ - int num_to_alloc; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (!yyg->yy_buffer_stack) { - - /* First allocation is just for 2 elements, since we don't know if this - * scanner will even need a stack. We use 2 instead of 1 to avoid an - * immediate realloc on the next call. - */ - num_to_alloc = 1; - yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0balloc - (num_to_alloc * sizeof(struct yy_buffer_state*) - , yyscanner); - if ( ! yyg->yy_buffer_stack ) - YY_FATAL_ERROR( "out of dynamic memory in fts0bensure_buffer_stack()" ); - - memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); - - yyg->yy_buffer_stack_max = num_to_alloc; - yyg->yy_buffer_stack_top = 0; - return; - } - - if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ - - /* Increase the buffer to prepare for a possible push. */ - int grow_size = 8 /* arbitrary grow size */; - - num_to_alloc = static_cast<int>(yyg->yy_buffer_stack_max + grow_size); - yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0brealloc - (yyg->yy_buffer_stack, - num_to_alloc * sizeof(struct yy_buffer_state*) - , yyscanner); - if ( ! yyg->yy_buffer_stack ) - YY_FATAL_ERROR( "out of dynamic memory in fts0bensure_buffer_stack()" ); - - /* zero only the new slots.*/ - memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); - yyg->yy_buffer_stack_max = num_to_alloc; - } -} - -/** Setup the input buffer state to scan directly from a user-specified character buffer. - * @param base the character buffer - * @param size the size in bytes of the character buffer - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - */ -YY_BUFFER_STATE fts0b_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - - if ( size < 2 || - base[size-2] != YY_END_OF_BUFFER_CHAR || - base[size-1] != YY_END_OF_BUFFER_CHAR ) - /* They forgot to leave room for the EOB's. */ - return 0; - - b = (YY_BUFFER_STATE) fts0balloc(sizeof( struct yy_buffer_state ) ,yyscanner ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in fts0b_scan_buffer()" ); - - b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ - b->yy_buf_pos = b->yy_ch_buf = base; - b->yy_is_our_buffer = 0; - b->yy_input_file = 0; - b->yy_n_chars = static_cast<int>(b->yy_buf_size); - b->yy_is_interactive = 0; - b->yy_at_bol = 1; - b->yy_fill_buffer = 0; - b->yy_buffer_status = YY_BUFFER_NEW; - - fts0b_switch_to_buffer(b ,yyscanner ); - - return b; -} - -/** Setup the input buffer state to scan a string. The next call to fts0blex() will - * scan from a @e copy of @a str. - * @param yystr a NUL-terminated string to scan - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - * @note If you want to scan bytes that may contain NUL values, then use - * fts0b_scan_bytes() instead. - */ -YY_BUFFER_STATE fts0b_scan_string (yyconst char * yystr , yyscan_t yyscanner) -{ - return fts0b_scan_bytes(yystr,static_cast<int>(strlen(yystr)), yyscanner); -} - -/** Setup the input buffer state to scan the given bytes. The next call to fts0blex() will - * scan from a @e copy of @a bytes. - * @param yybytes the byte buffer to scan - * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - */ -YY_BUFFER_STATE fts0b_scan_bytes (yyconst char * yybytes, int _yybytes_len , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - char *buf; - yy_size_t n; - int i; - - /* Get memory for full buffer, including space for trailing EOB's. */ - n = _yybytes_len + 2; - buf = (char *) fts0balloc(n ,yyscanner ); - if ( ! buf ) - YY_FATAL_ERROR( "out of dynamic memory in fts0b_scan_bytes()" ); - - for ( i = 0; i < _yybytes_len; ++i ) - buf[i] = yybytes[i]; - - buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; - - b = fts0b_scan_buffer(buf,n ,yyscanner); - if ( ! b ) - YY_FATAL_ERROR( "bad buffer in fts0b_scan_bytes()" ); - - /* It's okay to grow etc. this buffer, and we should throw it - * away when we're done. - */ - b->yy_is_our_buffer = 1; - - return b; -} - -#ifndef YY_EXIT_FAILURE -#define YY_EXIT_FAILURE 2 -#endif - -static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - (void) fprintf( stderr, "%s\n", msg ); - exit( YY_EXIT_FAILURE ); -} - -/* Redefine yyless() so it works in section 3 code. */ - -#undef yyless -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - yytext[yyleng] = yyg->yy_hold_char; \ - yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ - yyg->yy_hold_char = *yyg->yy_c_buf_p; \ - *yyg->yy_c_buf_p = '\0'; \ - yyleng = yyless_macro_arg; \ - } \ - while ( 0 ) - -/* Accessor methods (get/set functions) to struct members. */ - -/** Get the user-defined data for this scanner. - * @param yyscanner The scanner object. - */ -YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyextra; -} - -/** Get the current line number. - * @param yyscanner The scanner object. - */ -int fts0bget_lineno (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (! YY_CURRENT_BUFFER) - return 0; - - return yylineno; -} - -/** Get the current column number. - * @param yyscanner The scanner object. - */ -int fts0bget_column (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (! YY_CURRENT_BUFFER) - return 0; - - return yycolumn; -} - -/** Get the input stream. - * @param yyscanner The scanner object. - */ -FILE *fts0bget_in (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyin; -} - -/** Get the output stream. - * @param yyscanner The scanner object. - */ -FILE *fts0bget_out (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyout; -} - -/** Get the length of the current token. - * @param yyscanner The scanner object. - */ -int fts0bget_leng (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyleng; -} - -/** Get the current token. - * @param yyscanner The scanner object. - */ - -char *fts0bget_text (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yytext; -} - -/** Set the user-defined data. This data is never touched by the scanner. - * @param user_defined The data to be associated with this scanner. - * @param yyscanner The scanner object. - */ -void fts0bset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyextra = user_defined ; -} - -/** Set the current line number. - * @param line_number - * @param yyscanner The scanner object. - */ -void fts0bset_lineno (int line_number , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* lineno is only valid if an input buffer exists. */ - if (! YY_CURRENT_BUFFER ) - yy_fatal_error( "fts0bset_lineno called with no buffer" , yyscanner); - - yylineno = line_number; -} - -/** Set the current column. - * @param line_number - * @param yyscanner The scanner object. - */ -void fts0bset_column (int column_no , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* column is only valid if an input buffer exists. */ - if (! YY_CURRENT_BUFFER ) - yy_fatal_error( "fts0bset_column called with no buffer" , yyscanner); - - yycolumn = column_no; -} - -/** Set the input stream. This does not discard the current - * input buffer. - * @param in_str A readable stream. - * @param yyscanner The scanner object. - * @see fts0b_switch_to_buffer - */ -void fts0bset_in (FILE * in_str , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyin = in_str ; -} - -void fts0bset_out (FILE * out_str , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyout = out_str ; -} - -int fts0bget_debug (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yy_flex_debug; -} - -void fts0bset_debug (int bdebug , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yy_flex_debug = bdebug ; -} - -/* Accessor methods for yylval and yylloc */ - -/* User-visible API */ - -/* fts0blex_init is special because it creates the scanner itself, so it is - * the ONLY reentrant function that doesn't take the scanner as the last argument. - * That's why we explicitly handle the declaration, instead of using our macros. - */ - -int fts0blex_init(yyscan_t* ptr_yy_globals) - -{ - if (ptr_yy_globals == NULL){ - errno = EINVAL; - return 1; - } - - *ptr_yy_globals = (yyscan_t) fts0balloc ( sizeof( struct yyguts_t ), NULL ); - - if (*ptr_yy_globals == NULL){ - errno = ENOMEM; - return 1; - } - - /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ - memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - - return yy_init_globals ( *ptr_yy_globals ); -} - -/* fts0blex_init_extra has the same functionality as fts0blex_init, but follows the - * convention of taking the scanner as the last argument. Note however, that - * this is a *pointer* to a scanner, as it will be allocated by this call (and - * is the reason, too, why this function also must handle its own declaration). - * The user defined value in the first argument will be available to fts0balloc in - * the yyextra field. - */ - -int fts0blex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals ) - -{ - struct yyguts_t dummy_yyguts; - - fts0bset_extra (yy_user_defined, &dummy_yyguts); - - if (ptr_yy_globals == NULL){ - errno = EINVAL; - return 1; - } - - *ptr_yy_globals = (yyscan_t) fts0balloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); - - if (*ptr_yy_globals == NULL){ - errno = ENOMEM; - return 1; - } - - /* By setting to 0xAA, we expose bugs in - yy_init_globals. Leave at 0x00 for releases. */ - memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - - fts0bset_extra (yy_user_defined, *ptr_yy_globals); - - return yy_init_globals ( *ptr_yy_globals ); -} - -static int yy_init_globals (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - /* Initialization is the same as for the non-reentrant scanner. - * This function is called from fts0blex_destroy(), so don't allocate here. - */ - - yyg->yy_buffer_stack = 0; - yyg->yy_buffer_stack_top = 0; - yyg->yy_buffer_stack_max = 0; - yyg->yy_c_buf_p = (char *) 0; - yyg->yy_init = 0; - yyg->yy_start = 0; - - yyg->yy_start_stack_ptr = 0; - yyg->yy_start_stack_depth = 0; - yyg->yy_start_stack = NULL; - - /* Defined in main.c */ -#ifdef YY_STDINIT - yyin = stdin; - yyout = stdout; -#else - yyin = (FILE *) 0; - yyout = (FILE *) 0; -#endif - - /* For future reference: Set errno on error, since we are called by - * fts0blex_init() - */ - return 0; -} - -/* fts0blex_destroy is for both reentrant and non-reentrant scanners. */ -int fts0blex_destroy (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* Pop the buffer stack, destroying each element. */ - while(YY_CURRENT_BUFFER){ - fts0b_delete_buffer(YY_CURRENT_BUFFER ,yyscanner ); - YY_CURRENT_BUFFER_LVALUE = NULL; - fts0bpop_buffer_state(yyscanner); - } - - /* Destroy the stack itself. */ - fts0bfree(yyg->yy_buffer_stack ,yyscanner); - yyg->yy_buffer_stack = NULL; - - /* Destroy the start condition stack. */ - fts0bfree(yyg->yy_start_stack ,yyscanner ); - yyg->yy_start_stack = NULL; - - /* Reset the globals. This is important in a non-reentrant scanner so the next time - * fts0blex() is called, initialization will occur. */ - yy_init_globals( yyscanner); - - /* Destroy the main struct (reentrant only). */ - fts0bfree ( yyscanner , yyscanner ); - yyscanner = NULL; - return 0; -} - -/* - * Internal utility routines. - */ - -#ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - register int i; - for ( i = 0; i < n; ++i ) - s1[i] = s2[i]; -} -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - register int n; - for ( n = 0; s[n]; ++n ) - ; - - return n; -} -#endif - -void *fts0balloc (yy_size_t size , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - return (void *) malloc( size ); -} - -void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - /* The cast to (char *) in the following accommodates both - * implementations that use char* generic pointers, and those - * that use void* generic pointers. It works with the latter - * because both ANSI C and C++ allow castless assignment from - * any pointer type to void*, and deal with argument conversions - * as though doing an assignment. - */ - return (void *) realloc( (char *) ptr, size ); -} - -void fts0bfree (void * ptr , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - free( (char *) ptr ); /* see fts0brealloc() for (char *) cast */ -} - -#define YYTABLES_NAME "yytables" - -#line 73 "fts0blex.l" - diff --git a/storage/xtradb/fts/fts0blex.l b/storage/xtradb/fts/fts0blex.l deleted file mode 100644 index ae6e8ffaa48..00000000000 --- a/storage/xtradb/fts/fts0blex.l +++ /dev/null @@ -1,73 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/** - * @file fts/fts0blex.l - * FTS parser lexical analyzer - * - * Created 2007/5/9 Sunny Bains - */ - -%{ - -#include "fts0ast.h" -#include "fts0pars.h" - -/* Required for reentrant parser */ -#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner) - -%} - -%option noinput -%option nounput -%option noyywrap -%option nostdinit -%option reentrant -%option never-interactive - -%% - -[\t ]+ /* Ignore whitespace */ ; - -[*()+\-<>~@] { - val->oper = fts0bget_text(yyscanner)[0]; - - return(val->oper); -} - -[0-9]+ { - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); - - return(FTS_NUMB); -} - -[^" \n*()+\-<>~@%]* { - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); - - return(FTS_TERM); -} - -\"[^\"\n]*\" { - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner)); - - return(FTS_TEXT); -} - -\n - -%% diff --git a/storage/xtradb/fts/fts0config.cc b/storage/xtradb/fts/fts0config.cc deleted file mode 100644 index 5b4ae5c39f7..00000000000 --- a/storage/xtradb/fts/fts0config.cc +++ /dev/null @@ -1,564 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file fts/fts0config.cc -Full Text Search configuration table. - -Created 2007/5/9 Sunny Bains -***********************************************************************/ - -#include "trx0roll.h" -#include "row0sel.h" - -#include "fts0priv.h" - -#ifndef UNIV_NONINL -#include "fts0types.ic" -#include "fts0vlc.ic" -#endif - -/******************************************************************//** -Callback function for fetching the config value. -@return always returns TRUE */ -static -ibool -fts_config_fetch_value( -/*===================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: pointer to - ib_vector_t */ -{ - sel_node_t* node = static_cast<sel_node_t*>(row); - fts_string_t* value = static_cast<fts_string_t*>(user_arg); - - dfield_t* dfield = que_node_get_val(node->select_list); - dtype_t* type = dfield_get_type(dfield); - ulint len = dfield_get_len(dfield); - void* data = dfield_get_data(dfield); - - ut_a(dtype_get_mtype(type) == DATA_VARCHAR); - - if (len != UNIV_SQL_NULL) { - ulint max_len = ut_min(value->f_len - 1, len); - - memcpy(value->f_str, data, max_len); - value->f_len = max_len; - value->f_str[value->f_len] = '\0'; - } - - return(TRUE); -} - -/******************************************************************//** -Get value from the config table. The caller must ensure that enough -space is allocated for value to hold the column contents. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_get_value( -/*=================*/ - trx_t* trx, /*!< transaction */ - fts_table_t* fts_table, /*!< in: the indexed - FTS table */ - const char* name, /*!< in: get config value for - this parameter name */ - fts_string_t* value) /*!< out: value read from - config table */ -{ - pars_info_t* info; - que_t* graph; - dberr_t error; - ulint name_len = strlen(name); - - info = pars_info_create(); - - *value->f_str = '\0'; - ut_a(value->f_len > 0); - - pars_info_bind_function(info, "my_func", fts_config_fetch_value, - value); - - /* The len field of value must be set to the max bytes that - it can hold. On a successful read, the len field will be set - to the actual number of bytes copied to value. */ - pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len); - - fts_table->suffix = "CONFIG"; - - graph = fts_parse_sql( - fts_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS SELECT value FROM \"%s\"" - " WHERE key = :name;\n" - "BEGIN\n" - "" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - trx->op_info = "getting FTS config value"; - - error = fts_eval_sql(trx, graph); - - mutex_enter(&dict_sys->mutex); - que_graph_free(graph); - mutex_exit(&dict_sys->mutex); - - return(error); -} - -/*********************************************************************//** -Create the config table name for retrieving index specific value. -@return index config parameter name */ -UNIV_INTERN -char* -fts_config_create_index_param_name( -/*===============================*/ - const char* param, /*!< in: base name of param */ - const dict_index_t* index) /*!< in: index for config */ -{ - ulint len; - char* name; - - /* The format of the config name is: name_<index_id>. */ - len = strlen(param); - - /* Caller is responsible for deleting name. */ - name = static_cast<char*>(ut_malloc( - len + FTS_AUX_MIN_TABLE_ID_LENGTH + 2)); - strcpy(name, param); - name[len] = '_'; - - fts_write_object_id(index->id, name + len + 1, - DICT_TF2_FLAG_IS_SET(index->table, - DICT_TF2_FTS_AUX_HEX_NAME)); - - return(name); -} - -/******************************************************************//** -Get value specific to an FTS index from the config table. The caller -must ensure that enough space is allocated for value to hold the -column contents. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_get_index_value( -/*=======================*/ - trx_t* trx, /*!< transaction */ - dict_index_t* index, /*!< in: index */ - const char* param, /*!< in: get config value for - this parameter name */ - fts_string_t* value) /*!< out: value read from - config table */ -{ - char* name; - dberr_t error; - fts_table_t fts_table; - - FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, - index->table); - - /* We are responsible for free'ing name. */ - name = fts_config_create_index_param_name(param, index); - - error = fts_config_get_value(trx, &fts_table, name, value); - - ut_free(name); - - return(error); -} - -/******************************************************************//** -Set the value in the config table for name. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_set_value( -/*=================*/ - trx_t* trx, /*!< transaction */ - fts_table_t* fts_table, /*!< in: the indexed - FTS table */ - const char* name, /*!< in: get config value for - this parameter name */ - const fts_string_t* - value) /*!< in: value to update */ -{ - pars_info_t* info; - que_t* graph; - dberr_t error; - undo_no_t undo_no; - undo_no_t n_rows_updated; - ulint name_len = strlen(name); - - info = pars_info_create(); - - pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len); - pars_info_bind_varchar_literal(info, "value", - value->f_str, value->f_len); - - fts_table->suffix = "CONFIG"; - - graph = fts_parse_sql( - fts_table, info, - "BEGIN UPDATE \"%s\" SET value = :value WHERE key = :name;"); - - trx->op_info = "setting FTS config value"; - - undo_no = trx->undo_no; - - error = fts_eval_sql(trx, graph); - - fts_que_graph_free_check_lock(fts_table, NULL, graph); - - n_rows_updated = trx->undo_no - undo_no; - - /* Check if we need to do an insert. */ - if (n_rows_updated == 0) { - info = pars_info_create(); - - pars_info_bind_varchar_literal( - info, "name", (byte*) name, name_len); - - pars_info_bind_varchar_literal( - info, "value", value->f_str, value->f_len); - - graph = fts_parse_sql( - fts_table, info, - "BEGIN\n" - "INSERT INTO \"%s\" VALUES(:name, :value);"); - - trx->op_info = "inserting FTS config value"; - - error = fts_eval_sql(trx, graph); - - fts_que_graph_free_check_lock(fts_table, NULL, graph); - } - - return(error); -} - -/******************************************************************//** -Set the value specific to an FTS index in the config table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_set_index_value( -/*=======================*/ - trx_t* trx, /*!< transaction */ - dict_index_t* index, /*!< in: index */ - const char* param, /*!< in: get config value for - this parameter name */ - fts_string_t* value) /*!< out: value read from - config table */ -{ - char* name; - dberr_t error; - fts_table_t fts_table; - - FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, - index->table); - - /* We are responsible for free'ing name. */ - name = fts_config_create_index_param_name(param, index); - - error = fts_config_set_value(trx, &fts_table, name, value); - - ut_free(name); - - return(error); -} - -/******************************************************************//** -Get an ulint value from the config table. -@return DB_SUCCESS if all OK else error code */ -UNIV_INTERN -dberr_t -fts_config_get_index_ulint( -/*=======================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index */ - const char* name, /*!< in: param name */ - ulint* int_value) /*!< out: value */ -{ - dberr_t error; - fts_string_t value; - - /* We set the length of value to the max bytes it can hold. This - information is used by the callback that reads the value.*/ - value.f_len = FTS_MAX_CONFIG_VALUE_LEN; - value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1)); - - error = fts_config_get_index_value(trx, index, name, &value); - - if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - ut_print_timestamp(stderr); - - fprintf(stderr, " InnoDB: Error: (%s) reading `%s'\n", - ut_strerr(error), name); - } else { - *int_value = strtoul((char*) value.f_str, NULL, 10); - } - - ut_free(value.f_str); - - return(error); -} - -/******************************************************************//** -Set an ulint value in the config table. -@return DB_SUCCESS if all OK else error code */ -UNIV_INTERN -dberr_t -fts_config_set_index_ulint( -/*=======================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index */ - const char* name, /*!< in: param name */ - ulint int_value) /*!< in: value */ -{ - dberr_t error; - fts_string_t value; - - /* We set the length of value to the max bytes it can hold. This - information is used by the callback that reads the value.*/ - value.f_len = FTS_MAX_CONFIG_VALUE_LEN; - value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1)); - - // FIXME: Get rid of snprintf - ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN); - - value.f_len = ut_snprintf( - (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value); - - error = fts_config_set_index_value(trx, index, name, &value); - - if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - ut_print_timestamp(stderr); - - fprintf(stderr, " InnoDB: Error: (%s) writing `%s'\n", - ut_strerr(error), name); - } - - ut_free(value.f_str); - - return(error); -} - -/******************************************************************//** -Get an ulint value from the config table. -@return DB_SUCCESS if all OK else error code */ -UNIV_INTERN -dberr_t -fts_config_get_ulint( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table, /*!< in: the indexed - FTS table */ - const char* name, /*!< in: param name */ - ulint* int_value) /*!< out: value */ -{ - dberr_t error; - fts_string_t value; - - /* We set the length of value to the max bytes it can hold. This - information is used by the callback that reads the value.*/ - value.f_len = FTS_MAX_CONFIG_VALUE_LEN; - value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1)); - - error = fts_config_get_value(trx, fts_table, name, &value); - - if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - ut_print_timestamp(stderr); - - fprintf(stderr, " InnoDB: Error: (%s) reading `%s'\n", - ut_strerr(error), name); - } else { - *int_value = strtoul((char*) value.f_str, NULL, 10); - } - - ut_free(value.f_str); - - return(error); -} - -/******************************************************************//** -Set an ulint value in the config table. -@return DB_SUCCESS if all OK else error code */ -UNIV_INTERN -dberr_t -fts_config_set_ulint( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table, /*!< in: the indexed - FTS table */ - const char* name, /*!< in: param name */ - ulint int_value) /*!< in: value */ -{ - dberr_t error; - fts_string_t value; - - /* We set the length of value to the max bytes it can hold. This - information is used by the callback that reads the value.*/ - value.f_len = FTS_MAX_CONFIG_VALUE_LEN; - value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1)); - - // FIXME: Get rid of snprintf - ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN); - - value.f_len = snprintf( - (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value); - - error = fts_config_set_value(trx, fts_table, name, &value); - - if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - ut_print_timestamp(stderr); - - fprintf(stderr, " InnoDB: Error: (%s) writing `%s'\n", - ut_strerr(error), name); - } - - ut_free(value.f_str); - - return(error); -} - -/******************************************************************//** -Increment the value in the config table for column name. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_increment_value( -/*=======================*/ - trx_t* trx, /*!< transaction */ - fts_table_t* fts_table, /*!< in: the indexed - FTS table */ - const char* name, /*!< in: increment config value - for this parameter name */ - ulint delta) /*!< in: increment by this - much */ -{ - dberr_t error; - fts_string_t value; - que_t* graph = NULL; - ulint name_len = strlen(name); - pars_info_t* info = pars_info_create(); - - /* We set the length of value to the max bytes it can hold. This - information is used by the callback that reads the value.*/ - value.f_len = FTS_MAX_CONFIG_VALUE_LEN; - value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1)); - - *value.f_str = '\0'; - - pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len); - - pars_info_bind_function( - info, "my_func", fts_config_fetch_value, &value); - - fts_table->suffix = "CONFIG"; - - graph = fts_parse_sql( - fts_table, info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS SELECT value FROM \"%s\"" - " WHERE key = :name FOR UPDATE;\n" - "BEGIN\n" - "" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - trx->op_info = "read FTS config value"; - - error = fts_eval_sql(trx, graph); - - fts_que_graph_free_check_lock(fts_table, NULL, graph); - - if (UNIV_UNLIKELY(error == DB_SUCCESS)) { - ulint int_value; - - int_value = strtoul((char*) value.f_str, NULL, 10); - - int_value += delta; - - ut_a(FTS_MAX_CONFIG_VALUE_LEN > FTS_MAX_INT_LEN); - - // FIXME: Get rid of snprintf - value.f_len = snprintf( - (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value); - - fts_config_set_value(trx, fts_table, name, &value); - } - - if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - - ut_print_timestamp(stderr); - - fprintf(stderr, " InnoDB: Error: (%s) " - "while incrementing %s.\n", ut_strerr(error), name); - } - - ut_free(value.f_str); - - return(error); -} - -/******************************************************************//** -Increment the per index value in the config table for column name. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_increment_index_value( -/*=============================*/ - trx_t* trx, /*!< transaction */ - dict_index_t* index, /*!< in: FTS index */ - const char* param, /*!< in: increment config value - for this parameter name */ - ulint delta) /*!< in: increment by this - much */ -{ - char* name; - dberr_t error; - fts_table_t fts_table; - - FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, - index->table); - - /* We are responsible for free'ing name. */ - name = fts_config_create_index_param_name(param, index); - - error = fts_config_increment_value(trx, &fts_table, name, delta); - - ut_free(name); - - return(error); -} - diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc deleted file mode 100644 index e1a95bcd427..00000000000 --- a/storage/xtradb/fts/fts0fts.cc +++ /dev/null @@ -1,7711 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. All Rights reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file fts/fts0fts.cc -Full Text Search interface -***********************************************************************/ - -#include "trx0roll.h" -#include "row0mysql.h" -#include "row0upd.h" -#include "dict0types.h" -#include "row0sel.h" - -#include "fts0fts.h" -#include "fts0priv.h" -#include "fts0types.h" - -#include "fts0types.ic" -#include "fts0vlc.ic" -#include "dict0priv.h" -#include "dict0stats.h" -#include "btr0pcur.h" -#include <vector> - -#include "ha_prototypes.h" - -#define FTS_MAX_ID_LEN 32 - -/** Column name from the FTS config table */ -#define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb" - -/** Verify if a aux table name is a obsolete table -by looking up the key word in the obsolete table names */ -#define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \ - (strstr((table_name), "DOC_ID") != NULL \ - || strstr((table_name), "ADDED") != NULL \ - || strstr((table_name), "STOPWORDS") != NULL) - -/** This is maximum FTS cache for each table and would be -a configurable variable */ -UNIV_INTERN ulong fts_max_cache_size; - -/** Whether the total memory used for FTS cache is exhausted, and we will -need a sync to free some memory */ -UNIV_INTERN bool fts_need_sync = false; - -/** Variable specifying the total memory allocated for FTS cache */ -UNIV_INTERN ulong fts_max_total_cache_size; - -/** This is FTS result cache limit for each query and would be -a configurable variable */ -UNIV_INTERN ulong fts_result_cache_limit; - -/** Variable specifying the maximum FTS max token size */ -UNIV_INTERN ulong fts_max_token_size; - -/** Variable specifying the minimum FTS max token size */ -UNIV_INTERN ulong fts_min_token_size; - - -// FIXME: testing -ib_time_t elapsed_time = 0; -ulint n_nodes = 0; - -/** Error condition reported by fts_utf8_decode() */ -const ulint UTF8_ERROR = 0xFFFFFFFF; - -#ifdef FTS_CACHE_SIZE_DEBUG -/** The cache size permissible lower limit (1K) */ -static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1; - -/** The cache size permissible upper limit (1G) */ -static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024; -#endif /* FTS_CACHE_SIZE_DEBUG */ - -/** Time to sleep after DEADLOCK error before retrying operation. */ -static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000; - -#ifdef UNIV_PFS_RWLOCK -UNIV_INTERN mysql_pfs_key_t fts_cache_rw_lock_key; -UNIV_INTERN mysql_pfs_key_t fts_cache_init_rw_lock_key; -#endif /* UNIV_PFS_RWLOCK */ - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t fts_delete_mutex_key; -UNIV_INTERN mysql_pfs_key_t fts_optimize_mutex_key; -UNIV_INTERN mysql_pfs_key_t fts_bg_threads_mutex_key; -UNIV_INTERN mysql_pfs_key_t fts_doc_id_mutex_key; -UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/** variable to record innodb_fts_internal_tbl_name for information -schema table INNODB_FTS_INSERTED etc. */ -UNIV_INTERN char* fts_internal_tbl_name = NULL; -UNIV_INTERN char* fts_internal_tbl_name2 = NULL; - -/** InnoDB default stopword list: -There are different versions of stopwords, the stop words listed -below comes from "Google Stopword" list. Reference: -http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list. -The final version of InnoDB default stopword list is still pending -for decision */ -const char *fts_default_stopword[] = -{ - "a", - "about", - "an", - "are", - "as", - "at", - "be", - "by", - "com", - "de", - "en", - "for", - "from", - "how", - "i", - "in", - "is", - "it", - "la", - "of", - "on", - "or", - "that", - "the", - "this", - "to", - "was", - "what", - "when", - "where", - "who", - "will", - "with", - "und", - "the", - "www", - NULL -}; - -/** For storing table info when checking for orphaned tables. */ -struct fts_aux_table_t { - table_id_t id; /*!< Table id */ - table_id_t parent_id; /*!< Parent table id */ - table_id_t index_id; /*!< Table FT index id */ - char* name; /*!< Name of the table */ -}; - -/** SQL statements for creating the ancillary common FTS tables. */ -static const char* fts_create_common_tables_sql = { - "BEGIN\n" - "" - "CREATE TABLE \"%s_DELETED\" (\n" - " doc_id BIGINT UNSIGNED\n" - ") COMPACT;\n" - "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DELETED\"(doc_id);\n" - "" - "CREATE TABLE \"%s_DELETED_CACHE\" (\n" - " doc_id BIGINT UNSIGNED\n" - ") COMPACT;\n" - "CREATE UNIQUE CLUSTERED INDEX IND " - "ON \"%s_DELETED_CACHE\"(doc_id);\n" - "" - "CREATE TABLE \"%s_BEING_DELETED\" (\n" - " doc_id BIGINT UNSIGNED\n" - ") COMPACT;\n" - "CREATE UNIQUE CLUSTERED INDEX IND " - "ON \"%s_BEING_DELETED\"(doc_id);\n" - "" - "CREATE TABLE \"%s_BEING_DELETED_CACHE\" (\n" - " doc_id BIGINT UNSIGNED\n" - ") COMPACT;\n" - "CREATE UNIQUE CLUSTERED INDEX IND " - "ON \"%s_BEING_DELETED_CACHE\"(doc_id);\n" - "" - "CREATE TABLE \"%s_CONFIG\" (\n" - " key CHAR(50),\n" - " value CHAR(200) NOT NULL\n" - ") COMPACT;\n" - "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_CONFIG\"(key);\n" -}; - -#ifdef FTS_DOC_STATS_DEBUG -/** Template for creating the FTS auxiliary index specific tables. This is -mainly designed for the statistics work in the future */ -static const char* fts_create_index_tables_sql = { - "BEGIN\n" - "" - "CREATE TABLE \"%s_DOC_ID\" (\n" - " doc_id BIGINT UNSIGNED,\n" - " word_count INTEGER UNSIGNED NOT NULL\n" - ") COMPACT;\n" - "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DOC_ID\"(doc_id);\n" -}; -#endif - -/** Template for creating the ancillary FTS tables word index tables. */ -static const char* fts_create_index_sql = { - "BEGIN\n" - "" - "CREATE UNIQUE CLUSTERED INDEX FTS_INDEX_TABLE_IND " - "ON \"%s\"(word, first_doc_id);\n" -}; - -/** FTS auxiliary table suffixes that are common to all FT indexes. */ -static const char* fts_common_tables[] = { - "BEING_DELETED", - "BEING_DELETED_CACHE", - "CONFIG", - "DELETED", - "DELETED_CACHE", - NULL -}; - -/** FTS auxiliary INDEX split intervals. */ -const fts_index_selector_t fts_index_selector[] = { - { 9, "INDEX_1" }, - { 65, "INDEX_2" }, - { 70, "INDEX_3" }, - { 75, "INDEX_4" }, - { 80, "INDEX_5" }, - { 85, "INDEX_6" }, - { 0 , NULL } -}; - -/** Default config values for FTS indexes on a table. */ -static const char* fts_config_table_insert_values_sql = - "BEGIN\n" - "\n" - "INSERT INTO \"%s\" VALUES('" - FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n" - "" - "INSERT INTO \"%s\" VALUES('" - FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n" - "" - "INSERT INTO \"%s\" VALUES ('" - FTS_SYNCED_DOC_ID "', '0');\n" - "" - "INSERT INTO \"%s\" VALUES ('" - FTS_TOTAL_DELETED_COUNT "', '0');\n" - "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */ - "INSERT INTO \"%s\" VALUES ('" - FTS_TABLE_STATE "', '0');\n"; - -/** Run SYNC on the table, i.e., write out data from the cache to the -FTS auxiliary INDEX table and clear the cache at the end. -@param[in,out] sync sync state -@param[in] unlock_cache whether unlock cache lock when write node -@param[in] wait whether wait when a sync is in progress -@param[in] has_dict whether has dict operation lock -@return DB_SUCCESS if all OK */ -static -dberr_t -fts_sync( - fts_sync_t* sync, - bool unlock_cache, - bool wait, - bool has_dict); - -/****************************************************************//** -Release all resources help by the words rb tree e.g., the node ilist. */ -static -void -fts_words_free( -/*===========*/ - ib_rbt_t* words) /*!< in: rb tree of words */ - MY_ATTRIBUTE((nonnull)); -#ifdef FTS_CACHE_SIZE_DEBUG -/****************************************************************//** -Read the max cache size parameter from the config table. */ -static -void -fts_update_max_cache_size( -/*======================*/ - fts_sync_t* sync); /*!< in: sync state */ -#endif - -/*********************************************************************//** -This function fetches the document just inserted right before -we commit the transaction, and tokenize the inserted text data -and insert into FTS auxiliary table and its cache. -@return TRUE if successful */ -static -ulint -fts_add_doc_by_id( -/*==============*/ - fts_trx_table_t*ftt, /*!< in: FTS trx table */ - doc_id_t doc_id, /*!< in: doc id */ - ib_vector_t* fts_indexes MY_ATTRIBUTE((unused))); - /*!< in: affected fts indexes */ -#ifdef FTS_DOC_STATS_DEBUG -/****************************************************************//** -Check whether a particular word (term) exists in the FTS index. -@return DB_SUCCESS if all went fine */ -static -dberr_t -fts_is_word_in_index( -/*=================*/ - trx_t* trx, /*!< in: FTS query state */ - que_t** graph, /*!< out: Query graph */ - fts_table_t* fts_table, /*!< in: table instance */ - const fts_string_t* word, /*!< in: the word to check */ - ibool* found) /*!< out: TRUE if exists */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* FTS_DOC_STATS_DEBUG */ - -/******************************************************************//** -Update the last document id. This function could create a new -transaction to update the last document id. -@return DB_SUCCESS if OK */ -static -dberr_t -fts_update_sync_doc_id( -/*===================*/ - const dict_table_t* table, /*!< in: table */ - const char* table_name, /*!< in: table name, or NULL */ - doc_id_t doc_id, /*!< in: last document id */ - trx_t* trx) /*!< in: update trx, or NULL */ - MY_ATTRIBUTE((nonnull(1))); - -/****************************************************************//** -This function loads the default InnoDB stopword list */ -static -void -fts_load_default_stopword( -/*======================*/ - fts_stopword_t* stopword_info) /*!< in: stopword info */ -{ - fts_string_t str; - mem_heap_t* heap; - ib_alloc_t* allocator; - ib_rbt_t* stop_words; - - allocator = stopword_info->heap; - heap = static_cast<mem_heap_t*>(allocator->arg); - - if (!stopword_info->cached_stopword) { - /* For default stopword, we always use fts_utf8_string_cmp() */ - stopword_info->cached_stopword = rbt_create( - sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp); - } - - stop_words = stopword_info->cached_stopword; - - str.f_n_char = 0; - - for (ulint i = 0; fts_default_stopword[i]; ++i) { - char* word; - fts_tokenizer_word_t new_word; - - /* We are going to duplicate the value below. */ - word = const_cast<char*>(fts_default_stopword[i]); - - new_word.nodes = ib_vector_create( - allocator, sizeof(fts_node_t), 4); - - str.f_len = ut_strlen(word); - str.f_str = reinterpret_cast<byte*>(word); - - fts_utf8_string_dup(&new_word.text, &str, heap); - - rbt_insert(stop_words, &new_word, &new_word); - } - - stopword_info->status = STOPWORD_FROM_DEFAULT; -} - -/****************************************************************//** -Callback function to read a single stopword value. -@return Always return TRUE */ -static -ibool -fts_read_stopword( -/*==============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: pointer to ib_vector_t */ -{ - ib_alloc_t* allocator; - fts_stopword_t* stopword_info; - sel_node_t* sel_node; - que_node_t* exp; - ib_rbt_t* stop_words; - dfield_t* dfield; - fts_string_t str; - mem_heap_t* heap; - ib_rbt_bound_t parent; - - sel_node = static_cast<sel_node_t*>(row); - stopword_info = static_cast<fts_stopword_t*>(user_arg); - - stop_words = stopword_info->cached_stopword; - allocator = static_cast<ib_alloc_t*>(stopword_info->heap); - heap = static_cast<mem_heap_t*>(allocator->arg); - - exp = sel_node->select_list; - - /* We only need to read the first column */ - dfield = que_node_get_val(exp); - - str.f_n_char = 0; - str.f_str = static_cast<byte*>(dfield_get_data(dfield)); - str.f_len = dfield_get_len(dfield); - - /* Only create new node if it is a value not already existed */ - if (str.f_len != UNIV_SQL_NULL - && rbt_search(stop_words, &parent, &str) != 0) { - - fts_tokenizer_word_t new_word; - - new_word.nodes = ib_vector_create( - allocator, sizeof(fts_node_t), 4); - - new_word.text.f_str = static_cast<byte*>( - mem_heap_alloc(heap, str.f_len + 1)); - - memcpy(new_word.text.f_str, str.f_str, str.f_len); - - new_word.text.f_n_char = 0; - new_word.text.f_len = str.f_len; - new_word.text.f_str[str.f_len] = 0; - - rbt_insert(stop_words, &new_word, &new_word); - } - - return(TRUE); -} - -/******************************************************************//** -Load user defined stopword from designated user table -@return TRUE if load operation is successful */ -static -ibool -fts_load_user_stopword( -/*===================*/ - fts_t* fts, /*!< in: FTS struct */ - const char* stopword_table_name, /*!< in: Stopword table - name */ - fts_stopword_t* stopword_info) /*!< in: Stopword info */ -{ - pars_info_t* info; - que_t* graph; - dberr_t error = DB_SUCCESS; - ibool ret = TRUE; - trx_t* trx; - ibool has_lock = fts->fts_status & TABLE_DICT_LOCKED; - - trx = trx_allocate_for_background(); - trx->op_info = "Load user stopword table into FTS cache"; - - if (!has_lock) { - mutex_enter(&dict_sys->mutex); - } - - /* Validate the user table existence and in the right - format */ - stopword_info->charset = fts_valid_stopword_table(stopword_table_name); - if (!stopword_info->charset) { - ret = FALSE; - goto cleanup; - } else if (!stopword_info->cached_stopword) { - /* Create the stopword RB tree with the stopword column - charset. All comparison will use this charset */ - stopword_info->cached_stopword = rbt_create_arg_cmp( - sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp, - (void*)stopword_info->charset); - - } - - info = pars_info_create(); - - pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name); - - pars_info_bind_function(info, "my_func", fts_read_stopword, - stopword_info); - - graph = fts_parse_sql_no_dict_lock( - NULL, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT value " - " FROM $table_stopword;\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - for (;;) { - error = fts_eval_sql(trx, graph); - - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - stopword_info->status = STOPWORD_USER_TABLE; - break; - } else { - - fts_sql_rollback(trx); - - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: lock wait " - "timeout reading user stopword table. " - "Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error '%s' " - "while reading user stopword table.\n", - ut_strerr(error)); - ret = FALSE; - break; - } - } - } - - que_graph_free(graph); - -cleanup: - if (!has_lock) { - mutex_exit(&dict_sys->mutex); - } - - trx_free_for_background(trx); - return(ret); -} - -/******************************************************************//** -Initialize the index cache. */ -static -void -fts_index_cache_init( -/*=================*/ - ib_alloc_t* allocator, /*!< in: the allocator to use */ - fts_index_cache_t* index_cache) /*!< in: index cache */ -{ - ulint i; - - ut_a(index_cache->words == NULL); - - index_cache->words = rbt_create_arg_cmp( - sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp, - (void*)index_cache->charset); - - ut_a(index_cache->doc_stats == NULL); - - index_cache->doc_stats = ib_vector_create( - allocator, sizeof(fts_doc_stats_t), 4); - - for (i = 0; fts_index_selector[i].value; ++i) { - ut_a(index_cache->ins_graph[i] == NULL); - ut_a(index_cache->sel_graph[i] == NULL); - } -} - -/*********************************************************************//** -Initialize FTS cache. */ -UNIV_INTERN -void -fts_cache_init( -/*===========*/ - fts_cache_t* cache) /*!< in: cache to initialize */ -{ - ulint i; - - /* Just to make sure */ - ut_a(cache->sync_heap->arg == NULL); - - cache->sync_heap->arg = mem_heap_create(1024); - - cache->total_size = 0; - - mutex_enter((ib_mutex_t*) &cache->deleted_lock); - cache->deleted_doc_ids = ib_vector_create( - cache->sync_heap, sizeof(fts_update_t), 4); - mutex_exit((ib_mutex_t*) &cache->deleted_lock); - - /* Reset the cache data for all the FTS indexes. */ - for (i = 0; i < ib_vector_size(cache->indexes); ++i) { - fts_index_cache_t* index_cache; - - index_cache = static_cast<fts_index_cache_t*>( - ib_vector_get(cache->indexes, i)); - - fts_index_cache_init(cache->sync_heap, index_cache); - } -} - -/****************************************************************//** -Create a FTS cache. */ -UNIV_INTERN -fts_cache_t* -fts_cache_create( -/*=============*/ - dict_table_t* table) /*!< in: table owns the FTS cache */ -{ - mem_heap_t* heap; - fts_cache_t* cache; - - heap = static_cast<mem_heap_t*>(mem_heap_create(512)); - - cache = static_cast<fts_cache_t*>( - mem_heap_zalloc(heap, sizeof(*cache))); - - cache->cache_heap = heap; - - rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE); - - rw_lock_create( - fts_cache_init_rw_lock_key, &cache->init_lock, - SYNC_FTS_CACHE_INIT); - - mutex_create( - fts_delete_mutex_key, &cache->deleted_lock, SYNC_FTS_OPTIMIZE); - - mutex_create( - fts_optimize_mutex_key, &cache->optimize_lock, - SYNC_FTS_OPTIMIZE); - - mutex_create( - fts_doc_id_mutex_key, &cache->doc_id_lock, SYNC_FTS_OPTIMIZE); - - /* This is the heap used to create the cache itself. */ - cache->self_heap = ib_heap_allocator_create(heap); - - /* This is a transient heap, used for storing sync data. */ - cache->sync_heap = ib_heap_allocator_create(heap); - cache->sync_heap->arg = NULL; - - fts_need_sync = false; - - cache->sync = static_cast<fts_sync_t*>( - mem_heap_zalloc(heap, sizeof(fts_sync_t))); - - cache->sync->table = table; - cache->sync->event = os_event_create(); - - /* Create the index cache vector that will hold the inverted indexes. */ - cache->indexes = ib_vector_create( - cache->self_heap, sizeof(fts_index_cache_t), 2); - - fts_cache_init(cache); - - cache->stopword_info.cached_stopword = NULL; - cache->stopword_info.charset = NULL; - - cache->stopword_info.heap = cache->self_heap; - - cache->stopword_info.status = STOPWORD_NOT_INIT; - - return(cache); -} - -/*******************************************************************//** -Add a newly create index into FTS cache */ -UNIV_INTERN -void -fts_add_index( -/*==========*/ - dict_index_t* index, /*!< FTS index to be added */ - dict_table_t* table) /*!< table */ -{ - fts_t* fts = table->fts; - fts_cache_t* cache; - fts_index_cache_t* index_cache; - - ut_ad(fts); - cache = table->fts->cache; - - rw_lock_x_lock(&cache->init_lock); - - ib_vector_push(fts->indexes, &index); - - index_cache = fts_find_index_cache(cache, index); - - if (!index_cache) { - /* Add new index cache structure */ - index_cache = fts_cache_index_cache_create(table, index); - } - - rw_lock_x_unlock(&cache->init_lock); -} - -/*******************************************************************//** -recalibrate get_doc structure after index_cache in cache->indexes changed */ -static -void -fts_reset_get_doc( -/*==============*/ - fts_cache_t* cache) /*!< in: FTS index cache */ -{ - fts_get_doc_t* get_doc; - ulint i; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX)); -#endif - ib_vector_reset(cache->get_docs); - - for (i = 0; i < ib_vector_size(cache->indexes); i++) { - fts_index_cache_t* ind_cache; - - ind_cache = static_cast<fts_index_cache_t*>( - ib_vector_get(cache->indexes, i)); - - get_doc = static_cast<fts_get_doc_t*>( - ib_vector_push(cache->get_docs, NULL)); - - memset(get_doc, 0x0, sizeof(*get_doc)); - - get_doc->index_cache = ind_cache; - } - - ut_ad(ib_vector_size(cache->get_docs) - == ib_vector_size(cache->indexes)); -} - -/*******************************************************************//** -Check an index is in the table->indexes list -@return TRUE if it exists */ -static -ibool -fts_in_dict_index( -/*==============*/ - dict_table_t* table, /*!< in: Table */ - dict_index_t* index_check) /*!< in: index to be checked */ -{ - dict_index_t* index; - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - if (index == index_check) { - return(TRUE); - } - } - - return(FALSE); -} - -/*******************************************************************//** -Check an index is in the fts->cache->indexes list -@return TRUE if it exists */ -static -ibool -fts_in_index_cache( -/*===============*/ - dict_table_t* table, /*!< in: Table */ - dict_index_t* index) /*!< in: index to be checked */ -{ - ulint i; - - for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) { - fts_index_cache_t* index_cache; - - index_cache = static_cast<fts_index_cache_t*>( - ib_vector_get(table->fts->cache->indexes, i)); - - if (index_cache->index == index) { - return(TRUE); - } - } - - return(FALSE); -} - -/*******************************************************************//** -Check indexes in the fts->indexes is also present in index cache and -table->indexes list -@return TRUE if all indexes match */ -UNIV_INTERN -ibool -fts_check_cached_index( -/*===================*/ - dict_table_t* table) /*!< in: Table where indexes are dropped */ -{ - ulint i; - - if (!table->fts || !table->fts->cache) { - return(TRUE); - } - - ut_a(ib_vector_size(table->fts->indexes) - == ib_vector_size(table->fts->cache->indexes)); - - for (i = 0; i < ib_vector_size(table->fts->indexes); i++) { - dict_index_t* index; - - index = static_cast<dict_index_t*>( - ib_vector_getp(table->fts->indexes, i)); - - if (!fts_in_index_cache(table, index)) { - return(FALSE); - } - - if (!fts_in_dict_index(table, index)) { - return(FALSE); - } - } - - return(TRUE); -} - -/*******************************************************************//** -Drop auxiliary tables related to an FTS index -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -fts_drop_index( -/*===========*/ - dict_table_t* table, /*!< in: Table where indexes are dropped */ - dict_index_t* index, /*!< in: Index to be dropped */ - trx_t* trx) /*!< in: Transaction for the drop */ -{ - ib_vector_t* indexes = table->fts->indexes; - dberr_t err = DB_SUCCESS; - - ut_a(indexes); - - if ((ib_vector_size(indexes) == 1 - && (index == static_cast<dict_index_t*>( - ib_vector_getp(table->fts->indexes, 0)))) - || ib_vector_is_empty(indexes)) { - doc_id_t current_doc_id; - doc_id_t first_doc_id; - - /* If we are dropping the only FTS index of the table, - remove it from optimize thread */ - fts_optimize_remove_table(table); - - DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS); - - /* If Doc ID column is not added internally by FTS index, - we can drop all FTS auxiliary tables. Otherwise, we will - need to keep some common table such as CONFIG table, so - as to keep track of incrementing Doc IDs */ - if (!DICT_TF2_FLAG_IS_SET( - table, DICT_TF2_FTS_HAS_DOC_ID)) { - - err = fts_drop_tables(trx, table); - - err = fts_drop_index_tables(trx, index); - - fts_free(table); - - return(err); - } - - current_doc_id = table->fts->cache->next_doc_id; - first_doc_id = table->fts->cache->first_doc_id; - fts_cache_clear(table->fts->cache); - fts_cache_destroy(table->fts->cache); - table->fts->cache = fts_cache_create(table); - table->fts->cache->next_doc_id = current_doc_id; - table->fts->cache->first_doc_id = first_doc_id; - } else { - fts_cache_t* cache = table->fts->cache; - fts_index_cache_t* index_cache; - - rw_lock_x_lock(&cache->init_lock); - - index_cache = fts_find_index_cache(cache, index); - - if (index_cache != NULL) { - if (index_cache->words) { - fts_words_free(index_cache->words); - rbt_free(index_cache->words); - } - - ib_vector_remove(cache->indexes, *(void**) index_cache); - } - - if (cache->get_docs) { - fts_reset_get_doc(cache); - } - - rw_lock_x_unlock(&cache->init_lock); - } - - err = fts_drop_index_tables(trx, index); - - ib_vector_remove(indexes, (const void*) index); - - return(err); -} - -/****************************************************************//** -Free the query graph but check whether dict_sys->mutex is already -held */ -UNIV_INTERN -void -fts_que_graph_free_check_lock( -/*==========================*/ - fts_table_t* fts_table, /*!< in: FTS table */ - const fts_index_cache_t*index_cache, /*!< in: FTS index cache */ - que_t* graph) /*!< in: query graph */ -{ - ibool has_dict = FALSE; - - if (fts_table && fts_table->table) { - ut_ad(fts_table->table->fts); - - has_dict = fts_table->table->fts->fts_status - & TABLE_DICT_LOCKED; - } else if (index_cache) { - ut_ad(index_cache->index->table->fts); - - has_dict = index_cache->index->table->fts->fts_status - & TABLE_DICT_LOCKED; - } - - if (!has_dict) { - mutex_enter(&dict_sys->mutex); - } - - ut_ad(mutex_own(&dict_sys->mutex)); - - que_graph_free(graph); - - if (!has_dict) { - mutex_exit(&dict_sys->mutex); - } -} - -/****************************************************************//** -Create an FTS index cache. */ -UNIV_INTERN -CHARSET_INFO* -fts_index_get_charset( -/*==================*/ - dict_index_t* index) /*!< in: FTS index */ -{ - CHARSET_INFO* charset = NULL; - dict_field_t* field; - ulint prtype; - - field = dict_index_get_nth_field(index, 0); - prtype = field->col->prtype; - - charset = innobase_get_fts_charset( - (int) (prtype & DATA_MYSQL_TYPE_MASK), - (uint) dtype_get_charset_coll(prtype)); - -#ifdef FTS_DEBUG - /* Set up charset info for this index. Please note all - field of the FTS index should have the same charset */ - for (i = 1; i < index->n_fields; i++) { - CHARSET_INFO* fld_charset; - - field = dict_index_get_nth_field(index, i); - prtype = field->col->prtype; - - fld_charset = innobase_get_fts_charset( - (int)(prtype & DATA_MYSQL_TYPE_MASK), - (uint) dtype_get_charset_coll(prtype)); - - /* All FTS columns should have the same charset */ - if (charset) { - ut_a(charset == fld_charset); - } else { - charset = fld_charset; - } - } -#endif - - return(charset); - -} -/****************************************************************//** -Create an FTS index cache. -@return Index Cache */ -UNIV_INTERN -fts_index_cache_t* -fts_cache_index_cache_create( -/*=========================*/ - dict_table_t* table, /*!< in: table with FTS index */ - dict_index_t* index) /*!< in: FTS index */ -{ - ulint n_bytes; - fts_index_cache_t* index_cache; - fts_cache_t* cache = table->fts->cache; - - ut_a(cache != NULL); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX)); -#endif - - /* Must not already exist in the cache vector. */ - ut_a(fts_find_index_cache(cache, index) == NULL); - - index_cache = static_cast<fts_index_cache_t*>( - ib_vector_push(cache->indexes, NULL)); - - memset(index_cache, 0x0, sizeof(*index_cache)); - - index_cache->index = index; - - index_cache->charset = fts_index_get_charset(index); - - n_bytes = sizeof(que_t*) * sizeof(fts_index_selector); - - index_cache->ins_graph = static_cast<que_t**>( - mem_heap_zalloc(static_cast<mem_heap_t*>( - cache->self_heap->arg), n_bytes)); - - index_cache->sel_graph = static_cast<que_t**>( - mem_heap_zalloc(static_cast<mem_heap_t*>( - cache->self_heap->arg), n_bytes)); - - fts_index_cache_init(cache->sync_heap, index_cache); - - if (cache->get_docs) { - fts_reset_get_doc(cache); - } - - return(index_cache); -} - -/****************************************************************//** -Release all resources help by the words rb tree e.g., the node ilist. */ -static -void -fts_words_free( -/*===========*/ - ib_rbt_t* words) /*!< in: rb tree of words */ -{ - const ib_rbt_node_t* rbt_node; - - /* Free the resources held by a word. */ - for (rbt_node = rbt_first(words); - rbt_node != NULL; - rbt_node = rbt_first(words)) { - - ulint i; - fts_tokenizer_word_t* word; - - word = rbt_value(fts_tokenizer_word_t, rbt_node); - - /* Free the ilists of this word. */ - for (i = 0; i < ib_vector_size(word->nodes); ++i) { - - fts_node_t* fts_node = static_cast<fts_node_t*>( - ib_vector_get(word->nodes, i)); - - ut_free(fts_node->ilist); - fts_node->ilist = NULL; - } - - /* NOTE: We are responsible for free'ing the node */ - ut_free(rbt_remove_node(words, rbt_node)); - } -} - -/** Clear cache. -@param[in,out] cache fts cache */ -UNIV_INTERN -void -fts_cache_clear( - fts_cache_t* cache) -{ - ulint i; - - for (i = 0; i < ib_vector_size(cache->indexes); ++i) { - ulint j; - fts_index_cache_t* index_cache; - - index_cache = static_cast<fts_index_cache_t*>( - ib_vector_get(cache->indexes, i)); - - fts_words_free(index_cache->words); - - rbt_free(index_cache->words); - - index_cache->words = NULL; - - for (j = 0; fts_index_selector[j].value; ++j) { - - if (index_cache->ins_graph[j] != NULL) { - - fts_que_graph_free_check_lock( - NULL, index_cache, - index_cache->ins_graph[j]); - - index_cache->ins_graph[j] = NULL; - } - - if (index_cache->sel_graph[j] != NULL) { - - fts_que_graph_free_check_lock( - NULL, index_cache, - index_cache->sel_graph[j]); - - index_cache->sel_graph[j] = NULL; - } - } - - index_cache->doc_stats = NULL; - } - - mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg)); - cache->sync_heap->arg = NULL; - - cache->total_size = 0; - - mutex_enter((ib_mutex_t*) &cache->deleted_lock); - cache->deleted_doc_ids = NULL; - mutex_exit((ib_mutex_t*) &cache->deleted_lock); -} - -/*********************************************************************//** -Search the index specific cache for a particular FTS index. -@return the index cache else NULL */ -UNIV_INLINE -fts_index_cache_t* -fts_get_index_cache( -/*================*/ - fts_cache_t* cache, /*!< in: cache to search */ - const dict_index_t* index) /*!< in: index to search for */ -{ - ulint i; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX) - || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX)); -#endif - - for (i = 0; i < ib_vector_size(cache->indexes); ++i) { - fts_index_cache_t* index_cache; - - index_cache = static_cast<fts_index_cache_t*>( - ib_vector_get(cache->indexes, i)); - - if (index_cache->index == index) { - - return(index_cache); - } - } - - return(NULL); -} - -#ifdef FTS_DEBUG -/*********************************************************************//** -Search the index cache for a get_doc structure. -@return the fts_get_doc_t item else NULL */ -static -fts_get_doc_t* -fts_get_index_get_doc( -/*==================*/ - fts_cache_t* cache, /*!< in: cache to search */ - const dict_index_t* index) /*!< in: index to search for */ -{ - ulint i; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX)); -#endif - - for (i = 0; i < ib_vector_size(cache->get_docs); ++i) { - fts_get_doc_t* get_doc; - - get_doc = static_cast<fts_get_doc_t*>( - ib_vector_get(cache->get_docs, i)); - - if (get_doc->index_cache->index == index) { - - return(get_doc); - } - } - - return(NULL); -} -#endif - -/**********************************************************************//** -Free the FTS cache. */ -UNIV_INTERN -void -fts_cache_destroy( -/*==============*/ - fts_cache_t* cache) /*!< in: cache*/ -{ - rw_lock_free(&cache->lock); - rw_lock_free(&cache->init_lock); - mutex_free(&cache->optimize_lock); - mutex_free(&cache->deleted_lock); - mutex_free(&cache->doc_id_lock); - os_event_free(cache->sync->event); - - if (cache->stopword_info.cached_stopword) { - rbt_free(cache->stopword_info.cached_stopword); - } - - if (cache->sync_heap->arg) { - mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg)); - } - - mem_heap_free(cache->cache_heap); -} - -/**********************************************************************//** -Find an existing word, or if not found, create one and return it. -@return specified word token */ -static -fts_tokenizer_word_t* -fts_tokenizer_word_get( -/*===================*/ - fts_cache_t* cache, /*!< in: cache */ - fts_index_cache_t* - index_cache, /*!< in: index cache */ - fts_string_t* text) /*!< in: node text */ -{ - fts_tokenizer_word_t* word; - ib_rbt_bound_t parent; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX)); -#endif - - /* If it is a stopword, do not index it */ - if (cache->stopword_info.cached_stopword != NULL - && rbt_search(cache->stopword_info.cached_stopword, - &parent, text) == 0) { - - return(NULL); - } - - /* Check if we found a match, if not then add word to tree. */ - if (rbt_search(index_cache->words, &parent, text) != 0) { - mem_heap_t* heap; - fts_tokenizer_word_t new_word; - - heap = static_cast<mem_heap_t*>(cache->sync_heap->arg); - - new_word.nodes = ib_vector_create( - cache->sync_heap, sizeof(fts_node_t), 4); - - fts_utf8_string_dup(&new_word.text, text, heap); - - parent.last = rbt_add_node( - index_cache->words, &parent, &new_word); - - /* Take into account the RB tree memory use and the vector. */ - cache->total_size += sizeof(new_word) - + sizeof(ib_rbt_node_t) - + text->f_len - + (sizeof(fts_node_t) * 4) - + sizeof(*new_word.nodes); - - ut_ad(rbt_validate(index_cache->words)); - } - - word = rbt_value(fts_tokenizer_word_t, parent.last); - - return(word); -} - -/**********************************************************************//** -Add the given doc_id/word positions to the given node's ilist. */ -UNIV_INTERN -void -fts_cache_node_add_positions( -/*=========================*/ - fts_cache_t* cache, /*!< in: cache */ - fts_node_t* node, /*!< in: word node */ - doc_id_t doc_id, /*!< in: doc id */ - ib_vector_t* positions) /*!< in: fts_token_t::positions */ -{ - ulint i; - byte* ptr; - byte* ilist; - ulint enc_len; - ulint last_pos; - byte* ptr_start; - ulint doc_id_delta; - -#ifdef UNIV_SYNC_DEBUG - if (cache) { - ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX)); - } -#endif - ut_ad(doc_id >= node->last_doc_id); - - /* Calculate the space required to store the ilist. */ - doc_id_delta = (ulint)(doc_id - node->last_doc_id); - enc_len = fts_get_encoded_len(doc_id_delta); - - last_pos = 0; - for (i = 0; i < ib_vector_size(positions); i++) { - ulint pos = *(static_cast<ulint*>( - ib_vector_get(positions, i))); - - ut_ad(last_pos == 0 || pos > last_pos); - - enc_len += fts_get_encoded_len(pos - last_pos); - last_pos = pos; - } - - /* The 0x00 byte at the end of the token positions list. */ - enc_len++; - - if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) { - /* No need to allocate more space, we can fit in the new - data at the end of the old one. */ - ilist = NULL; - ptr = node->ilist + node->ilist_size; - } else { - ulint new_size = node->ilist_size + enc_len; - - /* Over-reserve space by a fixed size for small lengths and - by 20% for lengths >= 48 bytes. */ - if (new_size < 16) { - new_size = 16; - } else if (new_size < 32) { - new_size = 32; - } else if (new_size < 48) { - new_size = 48; - } else { - new_size = (ulint)(1.2 * new_size); - } - - ilist = static_cast<byte*>(ut_malloc(new_size)); - ptr = ilist + node->ilist_size; - - node->ilist_size_alloc = new_size; - } - - ptr_start = ptr; - - /* Encode the new fragment. */ - ptr += fts_encode_int(doc_id_delta, ptr); - - last_pos = 0; - for (i = 0; i < ib_vector_size(positions); i++) { - ulint pos = *(static_cast<ulint*>( - ib_vector_get(positions, i))); - - ptr += fts_encode_int(pos - last_pos, ptr); - last_pos = pos; - } - - *ptr++ = 0; - - ut_a(enc_len == (ulint)(ptr - ptr_start)); - - if (ilist) { - /* Copy old ilist to the start of the new one and switch the - new one into place in the node. */ - if (node->ilist_size > 0) { - memcpy(ilist, node->ilist, node->ilist_size); - ut_free(node->ilist); - } - - node->ilist = ilist; - } - - node->ilist_size += enc_len; - - if (cache) { - cache->total_size += enc_len; - } - - if (node->first_doc_id == FTS_NULL_DOC_ID) { - node->first_doc_id = doc_id; - } - - node->last_doc_id = doc_id; - ++node->doc_count; -} - -/**********************************************************************//** -Add document to the cache. */ -static -void -fts_cache_add_doc( -/*==============*/ - fts_cache_t* cache, /*!< in: cache */ - fts_index_cache_t* - index_cache, /*!< in: index cache */ - doc_id_t doc_id, /*!< in: doc id to add */ - ib_rbt_t* tokens) /*!< in: document tokens */ -{ - const ib_rbt_node_t* node; - ulint n_words; - fts_doc_stats_t* doc_stats; - - if (!tokens) { - return; - } - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX)); -#endif - - n_words = rbt_size(tokens); - - for (node = rbt_first(tokens); node; node = rbt_first(tokens)) { - - fts_tokenizer_word_t* word; - fts_node_t* fts_node = NULL; - fts_token_t* token = rbt_value(fts_token_t, node); - - /* Find and/or add token to the cache. */ - word = fts_tokenizer_word_get( - cache, index_cache, &token->text); - - if (!word) { - ut_free(rbt_remove_node(tokens, node)); - continue; - } - - if (ib_vector_size(word->nodes) > 0) { - fts_node = static_cast<fts_node_t*>( - ib_vector_last(word->nodes)); - } - - if (fts_node == NULL || fts_node->synced - || fts_node->ilist_size > FTS_ILIST_MAX_SIZE - || doc_id < fts_node->last_doc_id) { - - fts_node = static_cast<fts_node_t*>( - ib_vector_push(word->nodes, NULL)); - - memset(fts_node, 0x0, sizeof(*fts_node)); - - cache->total_size += sizeof(*fts_node); - } - - fts_cache_node_add_positions( - cache, fts_node, doc_id, token->positions); - - ut_free(rbt_remove_node(tokens, node)); - } - - ut_a(rbt_empty(tokens)); - - /* Add to doc ids processed so far. */ - doc_stats = static_cast<fts_doc_stats_t*>( - ib_vector_push(index_cache->doc_stats, NULL)); - - doc_stats->doc_id = doc_id; - doc_stats->word_count = n_words; - - /* Add the doc stats memory usage too. */ - cache->total_size += sizeof(*doc_stats); - - if (doc_id > cache->sync->max_doc_id) { - cache->sync->max_doc_id = doc_id; - } -} - -/****************************************************************//** -Drops a table. If the table can't be found we return a SUCCESS code. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_drop_table( -/*===========*/ - trx_t* trx, /*!< in: transaction */ - const char* table_name) /*!< in: table to drop */ -{ - dict_table_t* table; - dberr_t error = DB_SUCCESS; - - /* Check that the table exists in our data dictionary. - Similar to regular drop table case, we will open table with - DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */ - table = dict_table_open_on_name( - table_name, TRUE, FALSE, - static_cast<dict_err_ignore_t>( - DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT)); - - if (table != 0) { - - dict_table_close(table, TRUE, FALSE); - - /* Pass nonatomic=false (dont allow data dict unlock), - because the transaction may hold locks on SYS_* tables from - previous calls to fts_drop_table(). */ - error = row_drop_table_for_mysql(table_name, trx, true, false); - - if (error != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unable to drop FTS index aux table %s: %s", - table_name, ut_strerr(error)); - } - } else { - error = DB_FAIL; - } - - return(error); -} - -/****************************************************************//** -Rename a single auxiliary table due to database name change. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_rename_one_aux_table( -/*=====================*/ - const char* new_name, /*!< in: new parent tbl name */ - const char* fts_table_old_name, /*!< in: old aux tbl name */ - trx_t* trx) /*!< in: transaction */ -{ - char fts_table_new_name[MAX_TABLE_NAME_LEN]; - ulint new_db_name_len = dict_get_db_name_len(new_name); - ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name); - ulint table_new_name_len = strlen(fts_table_old_name) - + new_db_name_len - old_db_name_len; - - /* Check if the new and old database names are the same, if so, - nothing to do */ - ut_ad((new_db_name_len != old_db_name_len) - || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0); - - /* Get the database name from "new_name", and table name - from the fts_table_old_name */ - strncpy(fts_table_new_name, new_name, new_db_name_len); - strncpy(fts_table_new_name + new_db_name_len, - strchr(fts_table_old_name, '/'), - table_new_name_len - new_db_name_len); - fts_table_new_name[table_new_name_len] = 0; - - return(row_rename_table_for_mysql( - fts_table_old_name, fts_table_new_name, trx, false)); -} - -/****************************************************************//** -Rename auxiliary tables for all fts index for a table. This(rename) -is due to database name change -@return DB_SUCCESS or error code */ - -dberr_t -fts_rename_aux_tables( -/*==================*/ - dict_table_t* table, /*!< in: user Table */ - const char* new_name, /*!< in: new table name */ - trx_t* trx) /*!< in: transaction */ -{ - ulint i; - fts_table_t fts_table; - - FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table); - - /* Rename common auxiliary tables */ - for (i = 0; fts_common_tables[i] != NULL; ++i) { - char* old_table_name; - dberr_t err = DB_SUCCESS; - - fts_table.suffix = fts_common_tables[i]; - - old_table_name = fts_get_table_name(&fts_table); - - err = fts_rename_one_aux_table(new_name, old_table_name, trx); - - mem_free(old_table_name); - - if (err != DB_SUCCESS) { - return(err); - } - } - - fts_t* fts = table->fts; - - /* Rename index specific auxiliary tables */ - for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes); - ++i) { - dict_index_t* index; - - index = static_cast<dict_index_t*>( - ib_vector_getp(fts->indexes, i)); - - FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index); - - for (ulint j = 0; fts_index_selector[j].value; ++j) { - dberr_t err; - char* old_table_name; - - fts_table.suffix = fts_get_suffix(j); - - old_table_name = fts_get_table_name(&fts_table); - - err = fts_rename_one_aux_table( - new_name, old_table_name, trx); - - DBUG_EXECUTE_IF("fts_rename_failure", - err = DB_DEADLOCK; - fts_sql_rollback(trx);); - - mem_free(old_table_name); - - if (err != DB_SUCCESS) { - return(err); - } - } - } - - return(DB_SUCCESS); -} - -/****************************************************************//** -Drops the common ancillary tables needed for supporting an FTS index -on the given table. row_mysql_lock_data_dictionary must have been called -before this. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_drop_common_tables( -/*===================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table) /*!< in: table with an FTS - index */ -{ - ulint i; - dberr_t error = DB_SUCCESS; - - for (i = 0; fts_common_tables[i] != NULL; ++i) { - dberr_t err; - char* table_name; - - fts_table->suffix = fts_common_tables[i]; - - table_name = fts_get_table_name(fts_table); - - err = fts_drop_table(trx, table_name); - - /* We only return the status of the last error. */ - if (err != DB_SUCCESS && err != DB_FAIL) { - error = err; - } - - mem_free(table_name); - } - - return(error); -} - -/****************************************************************//** -Since we do a horizontal split on the index table, we need to drop -all the split tables. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_drop_index_split_tables( -/*========================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index) /*!< in: fts instance */ - -{ - ulint i; - fts_table_t fts_table; - dberr_t error = DB_SUCCESS; - - FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index); - - for (i = 0; fts_index_selector[i].value; ++i) { - dberr_t err; - char* table_name; - - fts_table.suffix = fts_get_suffix(i); - - table_name = fts_get_table_name(&fts_table); - - err = fts_drop_table(trx, table_name); - - /* We only return the status of the last error. */ - if (err != DB_SUCCESS && err != DB_FAIL) { - error = err; - } - - mem_free(table_name); - } - - return(error); -} - -/****************************************************************//** -Drops FTS auxiliary tables for an FTS index -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_drop_index_tables( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index) /*!< in: Index to drop */ -{ - dberr_t error = DB_SUCCESS; - -#ifdef FTS_DOC_STATS_DEBUG - fts_table_t fts_table; - static const char* index_tables[] = { - "DOC_ID", - NULL - }; -#endif /* FTS_DOC_STATS_DEBUG */ - - dberr_t err = fts_drop_index_split_tables(trx, index); - - /* We only return the status of the last error. */ - if (err != DB_SUCCESS) { - error = err; - } - -#ifdef FTS_DOC_STATS_DEBUG - FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index); - - for (ulint i = 0; index_tables[i] != NULL; ++i) { - char* table_name; - - fts_table.suffix = index_tables[i]; - - table_name = fts_get_table_name(&fts_table); - - err = fts_drop_table(trx, table_name); - - /* We only return the status of the last error. */ - if (err != DB_SUCCESS && err != DB_FAIL) { - error = err; - } - - mem_free(table_name); - } -#endif /* FTS_DOC_STATS_DEBUG */ - - return(error); -} - -/****************************************************************//** -Drops FTS ancillary tables needed for supporting an FTS index -on the given table. row_mysql_lock_data_dictionary must have been called -before this. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_drop_all_index_tables( -/*======================*/ - trx_t* trx, /*!< in: transaction */ - fts_t* fts) /*!< in: fts instance */ -{ - dberr_t error = DB_SUCCESS; - - for (ulint i = 0; - fts->indexes != 0 && i < ib_vector_size(fts->indexes); - ++i) { - - dberr_t err; - dict_index_t* index; - - index = static_cast<dict_index_t*>( - ib_vector_getp(fts->indexes, i)); - - err = fts_drop_index_tables(trx, index); - - if (err != DB_SUCCESS) { - error = err; - } - } - - return(error); -} - -/*********************************************************************//** -Drops the ancillary tables needed for supporting an FTS index on a -given table. row_mysql_lock_data_dictionary must have been called before -this. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_drop_tables( -/*============*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table) /*!< in: table has the FTS index */ -{ - dberr_t error; - fts_table_t fts_table; - - FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table); - - /* TODO: This is not atomic and can cause problems during recovery. */ - - error = fts_drop_common_tables(trx, &fts_table); - - if (error == DB_SUCCESS) { - error = fts_drop_all_index_tables(trx, table->fts); - } - - return(error); -} - -/*********************************************************************//** -Prepare the SQL, so that all '%s' are replaced by the common prefix. -@return sql string, use mem_free() to free the memory */ -static -char* -fts_prepare_sql( -/*============*/ - fts_table_t* fts_table, /*!< in: table name info */ - const char* my_template) /*!< in: sql template */ -{ - char* sql; - char* name_prefix; - - name_prefix = fts_get_table_name_prefix(fts_table); - sql = ut_strreplace(my_template, "%s", name_prefix); - mem_free(name_prefix); - - return(sql); -} - -/*********************************************************************//** -Creates the common ancillary tables needed for supporting an FTS index -on the given table. row_mysql_lock_data_dictionary must have been called -before this. -@return DB_SUCCESS if succeed */ -UNIV_INTERN -dberr_t -fts_create_common_tables( -/*=====================*/ - trx_t* trx, /*!< in: transaction */ - const dict_table_t* table, /*!< in: table with FTS index */ - const char* name, /*!< in: table name normalized.*/ - bool skip_doc_id_index)/*!< in: Skip index on doc id */ -{ - char* sql; - dberr_t error; - que_t* graph; - fts_table_t fts_table; - mem_heap_t* heap = mem_heap_create(1024); - pars_info_t* info; - - FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table); - - error = fts_drop_common_tables(trx, &fts_table); - - if (error != DB_SUCCESS) { - - goto func_exit; - } - - /* Create the FTS tables that are common to an FTS index. */ - sql = fts_prepare_sql(&fts_table, fts_create_common_tables_sql); - graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql); - mem_free(sql); - - error = fts_eval_sql(trx, graph); - - que_graph_free(graph); - - if (error != DB_SUCCESS) { - - goto func_exit; - } - - /* Write the default settings to the config table. */ - fts_table.suffix = "CONFIG"; - graph = fts_parse_sql_no_dict_lock( - &fts_table, NULL, fts_config_table_insert_values_sql); - - error = fts_eval_sql(trx, graph); - - que_graph_free(graph); - - if (error != DB_SUCCESS || skip_doc_id_index) { - - goto func_exit; - } - - info = pars_info_create(); - - pars_info_bind_id(info, TRUE, "table_name", name); - pars_info_bind_id(info, TRUE, "index_name", FTS_DOC_ID_INDEX_NAME); - pars_info_bind_id(info, TRUE, "doc_id_col_name", FTS_DOC_ID_COL_NAME); - - /* Create the FTS DOC_ID index on the hidden column. Currently this - is common for any FT index created on the table. */ - graph = fts_parse_sql_no_dict_lock( - NULL, - info, - mem_heap_printf( - heap, - "BEGIN\n" - "" - "CREATE UNIQUE INDEX $index_name ON $table_name(" - "$doc_id_col_name);\n")); - - error = fts_eval_sql(trx, graph); - que_graph_free(graph); - -func_exit: - if (error != DB_SUCCESS) { - /* We have special error handling here */ - - trx->error_state = DB_SUCCESS; - - trx_rollback_to_savepoint(trx, NULL); - - row_drop_table_for_mysql(table->name, trx, FALSE, TRUE); - - trx->error_state = DB_SUCCESS; - } - - mem_heap_free(heap); - - return(error); -} - -/*************************************************************//** -Wrapper function of fts_create_index_tables_low(), create auxiliary -tables for an FTS index - -@see row_merge_create_fts_sort_index() -@return: DB_SUCCESS or error code */ -static -dict_table_t* -fts_create_one_index_table( -/*=======================*/ - trx_t* trx, /*!< in: transaction */ - const dict_index_t* - index, /*!< in: the index instance */ - fts_table_t* fts_table, /*!< in: fts_table structure */ - mem_heap_t* heap) /*!< in: heap */ -{ - dict_field_t* field; - dict_table_t* new_table = NULL; - char* table_name = fts_get_table_name(fts_table); - dberr_t error; - CHARSET_INFO* charset; - ulint flags2 = 0; - - ut_ad(index->type & DICT_FTS); - - if (srv_file_per_table) { - flags2 = DICT_TF2_USE_TABLESPACE; - } - - new_table = dict_mem_table_create(table_name, 0, 5, 1, flags2); - - field = dict_index_get_nth_field(index, 0); - charset = innobase_get_fts_charset( - (int)(field->col->prtype & DATA_MYSQL_TYPE_MASK), - (uint) dtype_get_charset_coll(field->col->prtype)); - - dict_mem_table_add_col(new_table, heap, "word", - charset == &my_charset_latin1 - ? DATA_VARCHAR : DATA_VARMYSQL, - field->col->prtype, - FTS_MAX_WORD_LEN_IN_CHAR - * DATA_MBMAXLEN(field->col->mbminmaxlen)); - - dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT, - DATA_NOT_NULL | DATA_UNSIGNED, - sizeof(doc_id_t)); - - dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT, - DATA_NOT_NULL | DATA_UNSIGNED, - sizeof(doc_id_t)); - - dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT, - DATA_NOT_NULL | DATA_UNSIGNED, 4); - - dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB, - 4130048, 0); - - error = row_create_table_for_mysql(new_table, trx, false, FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); - - if (error != DB_SUCCESS) { - trx->error_state = error; - dict_mem_table_free(new_table); - new_table = NULL; - ib_logf(IB_LOG_LEVEL_WARN, - "Fail to create FTS index table %s", table_name); - } - - mem_free(table_name); - - return(new_table); -} - -/*************************************************************//** -Wrapper function of fts_create_index_tables_low(), create auxiliary -tables for an FTS index -@return: DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_create_index_tables_low( -/*========================*/ - trx_t* trx, /*!< in: transaction */ - const dict_index_t* - index, /*!< in: the index instance */ - const char* table_name, /*!< in: the table name */ - table_id_t table_id) /*!< in: the table id */ - -{ - ulint i; - que_t* graph; - fts_table_t fts_table; - dberr_t error = DB_SUCCESS; - mem_heap_t* heap = mem_heap_create(1024); - - fts_table.type = FTS_INDEX_TABLE; - fts_table.index_id = index->id; - fts_table.table_id = table_id; - fts_table.parent = table_name; - fts_table.table = index->table; - -#ifdef FTS_DOC_STATS_DEBUG - char* sql; - - /* Create the FTS auxiliary tables that are specific - to an FTS index. */ - sql = fts_prepare_sql(&fts_table, fts_create_index_tables_sql); - - graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql); - mem_free(sql); - - error = fts_eval_sql(trx, graph); - que_graph_free(graph); -#endif /* FTS_DOC_STATS_DEBUG */ - - for (i = 0; fts_index_selector[i].value && error == DB_SUCCESS; ++i) { - dict_table_t* new_table; - - /* Create the FTS auxiliary tables that are specific - to an FTS index. We need to preserve the table_id %s - which fts_parse_sql_no_dict_lock() will fill in for us. */ - fts_table.suffix = fts_get_suffix(i); - - new_table = fts_create_one_index_table( - trx, index, &fts_table, heap); - - if (!new_table) { - error = DB_FAIL; - break; - } - - graph = fts_parse_sql_no_dict_lock( - &fts_table, NULL, fts_create_index_sql); - - error = fts_eval_sql(trx, graph); - que_graph_free(graph); - } - - if (error != DB_SUCCESS) { - /* We have special error handling here */ - - trx->error_state = DB_SUCCESS; - - trx_rollback_to_savepoint(trx, NULL); - - row_drop_table_for_mysql(table_name, trx, FALSE, TRUE); - - trx->error_state = DB_SUCCESS; - } - - mem_heap_free(heap); - - return(error); -} - -/******************************************************************//** -Creates the column specific ancillary tables needed for supporting an -FTS index on the given table. row_mysql_lock_data_dictionary must have -been called before this. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_create_index_tables( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - const dict_index_t* index) /*!< in: the index instance */ -{ - dberr_t err; - dict_table_t* table; - - table = dict_table_get_low(index->table_name); - ut_a(table != NULL); - - err = fts_create_index_tables_low(trx, index, table->name, table->id); - - if (err == DB_SUCCESS) { - trx_commit(trx); - } - - return(err); -} -#if 0 -/******************************************************************//** -Return string representation of state. */ -static -const char* -fts_get_state_str( -/*==============*/ - /* out: string representation of state */ - fts_row_state state) /*!< in: state */ -{ - switch (state) { - case FTS_INSERT: - return("INSERT"); - - case FTS_MODIFY: - return("MODIFY"); - - case FTS_DELETE: - return("DELETE"); - - case FTS_NOTHING: - return("NOTHING"); - - case FTS_INVALID: - return("INVALID"); - - default: - return("UNKNOWN"); - } -} -#endif - -/******************************************************************//** -Calculate the new state of a row given the existing state and a new event. -@return new state of row */ -static -fts_row_state -fts_trx_row_get_new_state( -/*======================*/ - fts_row_state old_state, /*!< in: existing state of row */ - fts_row_state event) /*!< in: new event */ -{ - /* The rules for transforming states: - - I = inserted - M = modified - D = deleted - N = nothing - - M+D -> D: - - If the row existed before the transaction started and it is modified - during the transaction, followed by a deletion of the row, only the - deletion will be signaled. - - M+ -> M: - - If the row existed before the transaction started and it is modified - more than once during the transaction, only the last modification - will be signaled. - - IM*D -> N: - - If a new row is added during the transaction (and possibly modified - after its initial insertion) but it is deleted before the end of the - transaction, nothing will be signaled. - - IM* -> I: - - If a new row is added during the transaction and modified after its - initial insertion, only the addition will be signaled. - - M*DI -> M: - - If the row existed before the transaction started and it is deleted, - then re-inserted, only a modification will be signaled. Note that - this case is only possible if the table is using the row's primary - key for FTS row ids, since those can be re-inserted by the user, - which is not true for InnoDB generated row ids. - - It is easily seen that the above rules decompose such that we do not - need to store the row's entire history of events. Instead, we can - store just one state for the row and update that when new events - arrive. Then we can implement the above rules as a two-dimensional - look-up table, and get checking of invalid combinations "for free" - in the process. */ - - /* The lookup table for transforming states. old_state is the - Y-axis, event is the X-axis. */ - static const fts_row_state table[4][4] = { - /* I M D N */ - /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID }, - /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID }, - /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID }, - /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID } - }; - - fts_row_state result; - - ut_a(old_state < FTS_INVALID); - ut_a(event < FTS_INVALID); - - result = table[(int) old_state][(int) event]; - ut_a(result != FTS_INVALID); - - return(result); -} - -/******************************************************************//** -Create a savepoint instance. -@return savepoint instance */ -static -fts_savepoint_t* -fts_savepoint_create( -/*=================*/ - ib_vector_t* savepoints, /*!< out: InnoDB transaction */ - const char* name, /*!< in: savepoint name */ - mem_heap_t* heap) /*!< in: heap */ -{ - fts_savepoint_t* savepoint; - - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_push(savepoints, NULL)); - - memset(savepoint, 0x0, sizeof(*savepoint)); - - if (name) { - savepoint->name = mem_heap_strdup(heap, name); - } - - savepoint->tables = rbt_create( - sizeof(fts_trx_table_t*), fts_trx_table_cmp); - - return(savepoint); -} - -/******************************************************************//** -Create an FTS trx. -@return FTS trx */ -static -fts_trx_t* -fts_trx_create( -/*===========*/ - trx_t* trx) /*!< in/out: InnoDB - transaction */ -{ - fts_trx_t* ftt; - ib_alloc_t* heap_alloc; - mem_heap_t* heap = mem_heap_create(1024); - trx_named_savept_t* savep; - - ut_a(trx->fts_trx == NULL); - - ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t))); - ftt->trx = trx; - ftt->heap = heap; - - heap_alloc = ib_heap_allocator_create(heap); - - ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create( - heap_alloc, sizeof(fts_savepoint_t), 4)); - - ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create( - heap_alloc, sizeof(fts_savepoint_t), 4)); - - /* Default instance has no name and no heap. */ - fts_savepoint_create(ftt->savepoints, NULL, NULL); - fts_savepoint_create(ftt->last_stmt, NULL, NULL); - - /* Copy savepoints that already set before. */ - for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints); - savep != NULL; - savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) { - - fts_savepoint_take(trx, ftt, savep->name); - } - - return(ftt); -} - -/******************************************************************//** -Create an FTS trx table. -@return FTS trx table */ -static -fts_trx_table_t* -fts_trx_table_create( -/*=================*/ - fts_trx_t* fts_trx, /*!< in: FTS trx */ - dict_table_t* table) /*!< in: table */ -{ - fts_trx_table_t* ftt; - - ftt = static_cast<fts_trx_table_t*>( - mem_heap_alloc(fts_trx->heap, sizeof(*ftt))); - - memset(ftt, 0x0, sizeof(*ftt)); - - ftt->table = table; - ftt->fts_trx = fts_trx; - - ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp); - - return(ftt); -} - -/******************************************************************//** -Clone an FTS trx table. -@return FTS trx table */ -static -fts_trx_table_t* -fts_trx_table_clone( -/*=================*/ - const fts_trx_table_t* ftt_src) /*!< in: FTS trx */ -{ - fts_trx_table_t* ftt; - - ftt = static_cast<fts_trx_table_t*>( - mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt))); - - memset(ftt, 0x0, sizeof(*ftt)); - - ftt->table = ftt_src->table; - ftt->fts_trx = ftt_src->fts_trx; - - ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp); - - /* Copy the rb tree values to the new savepoint. */ - rbt_merge_uniq(ftt->rows, ftt_src->rows); - - /* These are only added on commit. At this stage we only have - the updated row state. */ - ut_a(ftt_src->added_doc_ids == NULL); - - return(ftt); -} - -/******************************************************************//** -Initialize the FTS trx instance. -@return FTS trx instance */ -static -fts_trx_table_t* -fts_trx_init( -/*=========*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table, /*!< in: FTS table instance */ - ib_vector_t* savepoints) /*!< in: Savepoints */ -{ - fts_trx_table_t* ftt; - ib_rbt_bound_t parent; - ib_rbt_t* tables; - fts_savepoint_t* savepoint; - - savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints)); - - tables = savepoint->tables; - rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL); - - if (parent.result == 0) { - fts_trx_table_t** fttp; - - fttp = rbt_value(fts_trx_table_t*, parent.last); - ftt = *fttp; - } else { - ftt = fts_trx_table_create(trx->fts_trx, table); - rbt_add_node(tables, &parent, &ftt); - } - - ut_a(ftt->table == table); - - return(ftt); -} - -/******************************************************************//** -Notify the FTS system about an operation on an FTS-indexed table. */ -static -void -fts_trx_table_add_op( -/*=================*/ - fts_trx_table_t*ftt, /*!< in: FTS trx table */ - doc_id_t doc_id, /*!< in: doc id */ - fts_row_state state, /*!< in: state of the row */ - ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */ -{ - ib_rbt_t* rows; - ib_rbt_bound_t parent; - - rows = ftt->rows; - rbt_search(rows, &parent, &doc_id); - - /* Row id found, update state, and if new state is FTS_NOTHING, - we delete the row from our tree. */ - if (parent.result == 0) { - fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last); - - row->state = fts_trx_row_get_new_state(row->state, state); - - if (row->state == FTS_NOTHING) { - if (row->fts_indexes) { - ib_vector_free(row->fts_indexes); - } - - ut_free(rbt_remove_node(rows, parent.last)); - row = NULL; - } else if (row->fts_indexes != NULL) { - ib_vector_free(row->fts_indexes); - row->fts_indexes = fts_indexes; - } - - } else { /* Row-id not found, create a new one. */ - fts_trx_row_t row; - - row.doc_id = doc_id; - row.state = state; - row.fts_indexes = fts_indexes; - - rbt_add_node(rows, &parent, &row); - } -} - -/******************************************************************//** -Notify the FTS system about an operation on an FTS-indexed table. */ -UNIV_INTERN -void -fts_trx_add_op( -/*===========*/ - trx_t* trx, /*!< in: InnoDB transaction */ - dict_table_t* table, /*!< in: table */ - doc_id_t doc_id, /*!< in: new doc id */ - fts_row_state state, /*!< in: state of the row */ - ib_vector_t* fts_indexes) /*!< in: FTS indexes affected - (NULL=all) */ -{ - fts_trx_table_t* tran_ftt; - fts_trx_table_t* stmt_ftt; - - if (!trx->fts_trx) { - trx->fts_trx = fts_trx_create(trx); - } - - tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints); - stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt); - - fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes); - fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes); -} - -/******************************************************************//** -Fetch callback that converts a textual document id to a binary value and -stores it in the given place. -@return always returns NULL */ -static -ibool -fts_fetch_store_doc_id( -/*===================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: doc_id_t* to store - doc_id in */ -{ - int n_parsed; - sel_node_t* node = static_cast<sel_node_t*>(row); - doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg); - dfield_t* dfield = que_node_get_val(node->select_list); - dtype_t* type = dfield_get_type(dfield); - ulint len = dfield_get_len(dfield); - - char buf[32]; - - ut_a(dtype_get_mtype(type) == DATA_VARCHAR); - ut_a(len > 0 && len < sizeof(buf)); - - memcpy(buf, dfield_get_data(dfield), len); - buf[len] = '\0'; - - n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id); - ut_a(n_parsed == 1); - - return(FALSE); -} - -#ifdef FTS_CACHE_SIZE_DEBUG -/******************************************************************//** -Get the max cache size in bytes. If there is an error reading the -value we simply print an error message here and return the default -value to the caller. -@return max cache size in bytes */ -static -ulint -fts_get_max_cache_size( -/*===================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table) /*!< in: table instance */ -{ - dberr_t error; - fts_string_t value; - ulint cache_size_in_mb; - - /* Set to the default value. */ - cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB; - - /* We set the length of value to the max bytes it can hold. This - information is used by the callback that reads the value. */ - value.f_n_char = 0; - value.f_len = FTS_MAX_CONFIG_VALUE_LEN; - value.f_str = ut_malloc(value.f_len + 1); - - error = fts_config_get_value( - trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value); - - if (error == DB_SUCCESS) { - - value.f_str[value.f_len] = 0; - cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10); - - if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) { - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Warning: FTS max cache size " - " (%lu) out of range. Minimum value is " - "%luMB and the maximum values is %luMB, " - "setting cache size to upper limit\n", - cache_size_in_mb, - FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB, - FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB); - - cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB; - - } else if (cache_size_in_mb - < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) { - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Warning: FTS max cache size " - " (%lu) out of range. Minimum value is " - "%luMB and the maximum values is %luMB, " - "setting cache size to lower limit\n", - cache_size_in_mb, - FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB, - FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB); - - cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB; - } - } else { - ut_print_timestamp(stderr); - fprintf(stderr, "InnoDB: Error: (%lu) reading max cache " - "config value from config table\n", error); - } - - ut_free(value.f_str); - - return(cache_size_in_mb * 1024 * 1024); -} -#endif - -#ifdef FTS_DOC_STATS_DEBUG -/*********************************************************************//** -Get the total number of words in the FTS for a particular FTS index. -@return DB_SUCCESS if all OK else error code */ -UNIV_INTERN -dberr_t -fts_get_total_word_count( -/*=====================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: for this index */ - ulint* total) /* out: total words */ -{ - dberr_t error; - fts_string_t value; - - *total = 0; - - /* We set the length of value to the max bytes it can hold. This - information is used by the callback that reads the value. */ - value.f_n_char = 0; - value.f_len = FTS_MAX_CONFIG_VALUE_LEN; - value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1)); - - error = fts_config_get_index_value( - trx, index, FTS_TOTAL_WORD_COUNT, &value); - - if (error == DB_SUCCESS) { - - value.f_str[value.f_len] = 0; - *total = strtoul((char*) value.f_str, NULL, 10); - } else { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: (%s) reading total words " - "value from config table\n", ut_strerr(error)); - } - - ut_free(value.f_str); - - return(error); -} -#endif /* FTS_DOC_STATS_DEBUG */ - -/*********************************************************************//** -Update the next and last Doc ID in the CONFIG table to be the input -"doc_id" value (+ 1). We would do so after each FTS index build or -table truncate */ -UNIV_INTERN -void -fts_update_next_doc_id( -/*===================*/ - trx_t* trx, /*!< in/out: transaction */ - const dict_table_t* table, /*!< in: table */ - const char* table_name, /*!< in: table name, or NULL */ - doc_id_t doc_id) /*!< in: DOC ID to set */ -{ - table->fts->cache->synced_doc_id = doc_id; - table->fts->cache->next_doc_id = doc_id + 1; - - table->fts->cache->first_doc_id = table->fts->cache->next_doc_id; - - fts_update_sync_doc_id( - table, table_name, table->fts->cache->synced_doc_id, trx); - -} - -/*********************************************************************//** -Get the next available document id. -@return DB_SUCCESS if OK */ -UNIV_INTERN -dberr_t -fts_get_next_doc_id( -/*================*/ - const dict_table_t* table, /*!< in: table */ - doc_id_t* doc_id) /*!< out: new document id */ -{ - fts_cache_t* cache = table->fts->cache; - - /* If the Doc ID system has not yet been initialized, we - will consult the CONFIG table and user table to re-establish - the initial value of the Doc ID */ - - if (cache->first_doc_id != 0 || !fts_init_doc_id(table)) { - if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - *doc_id = FTS_NULL_DOC_ID; - return(DB_SUCCESS); - } - - /* Otherwise, simply increment the value in cache */ - mutex_enter(&cache->doc_id_lock); - *doc_id = ++cache->next_doc_id; - mutex_exit(&cache->doc_id_lock); - } else { - mutex_enter(&cache->doc_id_lock); - *doc_id = cache->next_doc_id; - mutex_exit(&cache->doc_id_lock); - } - - return(DB_SUCCESS); -} - -/*********************************************************************//** -This function fetch the Doc ID from CONFIG table, and compare with -the Doc ID supplied. And store the larger one to the CONFIG table. -@return DB_SUCCESS if OK */ -static MY_ATTRIBUTE((nonnull)) -dberr_t -fts_cmp_set_sync_doc_id( -/*====================*/ - const dict_table_t* table, /*!< in: table */ - doc_id_t doc_id_cmp, /*!< in: Doc ID to compare */ - ibool read_only, /*!< in: TRUE if read the - synced_doc_id only */ - doc_id_t* doc_id) /*!< out: larger document id - after comparing "doc_id_cmp" - to the one stored in CONFIG - table */ -{ - trx_t* trx; - pars_info_t* info; - dberr_t error; - fts_table_t fts_table; - que_t* graph = NULL; - fts_cache_t* cache = table->fts->cache; -retry: - ut_a(table->fts->doc_col != ULINT_UNDEFINED); - - fts_table.suffix = "CONFIG"; - fts_table.table_id = table->id; - fts_table.type = FTS_COMMON_TABLE; - fts_table.table = table; - - fts_table.parent = table->name; - - trx = trx_allocate_for_background(); - - trx->op_info = "update the next FTS document id"; - - info = pars_info_create(); - - pars_info_bind_function( - info, "my_func", fts_fetch_store_doc_id, doc_id); - - graph = fts_parse_sql( - &fts_table, info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS SELECT value FROM \"%s\"" - " WHERE key = 'synced_doc_id' FOR UPDATE;\n" - "BEGIN\n" - "" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - *doc_id = 0; - - error = fts_eval_sql(trx, graph); - - fts_que_graph_free_check_lock(&fts_table, NULL, graph); - - // FIXME: We need to retry deadlock errors - if (error != DB_SUCCESS) { - goto func_exit; - } - - if (read_only) { - goto func_exit; - } - - if (doc_id_cmp == 0 && *doc_id) { - cache->synced_doc_id = *doc_id - 1; - } else { - cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id); - } - - mutex_enter(&cache->doc_id_lock); - /* For each sync operation, we will add next_doc_id by 1, - so to mark a sync operation */ - if (cache->next_doc_id < cache->synced_doc_id + 1) { - cache->next_doc_id = cache->synced_doc_id + 1; - } - mutex_exit(&cache->doc_id_lock); - - if (doc_id_cmp > *doc_id) { - error = fts_update_sync_doc_id( - table, table->name, cache->synced_doc_id, trx); - } - - *doc_id = cache->next_doc_id; - -func_exit: - - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - } else { - *doc_id = 0; - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: (%s) " - "while getting next doc id.\n", ut_strerr(error)); - - fts_sql_rollback(trx); - - if (error == DB_DEADLOCK) { - os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT); - goto retry; - } - } - - trx_free_for_background(trx); - - return(error); -} - -/*********************************************************************//** -Update the last document id. This function could create a new -transaction to update the last document id. -@return DB_SUCCESS if OK */ -static -dberr_t -fts_update_sync_doc_id( -/*===================*/ - const dict_table_t* table, /*!< in: table */ - const char* table_name, /*!< in: table name, or NULL */ - doc_id_t doc_id, /*!< in: last document id */ - trx_t* trx) /*!< in: update trx, or NULL */ -{ - byte id[FTS_MAX_ID_LEN]; - pars_info_t* info; - fts_table_t fts_table; - ulint id_len; - que_t* graph = NULL; - dberr_t error; - ibool local_trx = FALSE; - fts_cache_t* cache = table->fts->cache; - - fts_table.suffix = "CONFIG"; - fts_table.table_id = table->id; - fts_table.type = FTS_COMMON_TABLE; - fts_table.table = table; - if (table_name) { - fts_table.parent = table_name; - } else { - fts_table.parent = table->name; - } - - if (!trx) { - trx = trx_allocate_for_background(); - - trx->op_info = "setting last FTS document id"; - local_trx = TRUE; - } - - info = pars_info_create(); - - id_len = ut_snprintf( - (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1); - - pars_info_bind_varchar_literal(info, "doc_id", id, id_len); - - graph = fts_parse_sql( - &fts_table, info, - "BEGIN " - "UPDATE \"%s\" SET value = :doc_id" - " WHERE key = 'synced_doc_id';"); - - error = fts_eval_sql(trx, graph); - - fts_que_graph_free_check_lock(&fts_table, NULL, graph); - - if (local_trx) { - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - cache->synced_doc_id = doc_id; - } else { - - ib_logf(IB_LOG_LEVEL_ERROR, - "(%s) while updating last doc id.", - ut_strerr(error)); - - fts_sql_rollback(trx); - } - trx_free_for_background(trx); - } - - return(error); -} - -/*********************************************************************//** -Create a new fts_doc_ids_t. -@return new fts_doc_ids_t */ -UNIV_INTERN -fts_doc_ids_t* -fts_doc_ids_create(void) -/*====================*/ -{ - fts_doc_ids_t* fts_doc_ids; - mem_heap_t* heap = mem_heap_create(512); - - fts_doc_ids = static_cast<fts_doc_ids_t*>( - mem_heap_alloc(heap, sizeof(*fts_doc_ids))); - - fts_doc_ids->self_heap = ib_heap_allocator_create(heap); - - fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create( - fts_doc_ids->self_heap, sizeof(fts_update_t), 32)); - - return(fts_doc_ids); -} - -/*********************************************************************//** -Free a fts_doc_ids_t. */ - -void -fts_doc_ids_free( -/*=============*/ - fts_doc_ids_t* fts_doc_ids) -{ - mem_heap_t* heap = static_cast<mem_heap_t*>( - fts_doc_ids->self_heap->arg); - - memset(fts_doc_ids, 0, sizeof(*fts_doc_ids)); - - mem_heap_free(heap); -} - -/*********************************************************************//** -Do commit-phase steps necessary for the insertion of a new row. */ -void -fts_add( -/*====*/ - fts_trx_table_t*ftt, /*!< in: FTS trx table */ - fts_trx_row_t* row) /*!< in: row */ -{ - dict_table_t* table = ftt->table; - doc_id_t doc_id = row->doc_id; - - ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY); - - fts_add_doc_by_id(ftt, doc_id, row->fts_indexes); - - mutex_enter(&table->fts->cache->deleted_lock); - ++table->fts->cache->added; - mutex_exit(&table->fts->cache->deleted_lock); - - if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) - && doc_id >= table->fts->cache->next_doc_id) { - table->fts->cache->next_doc_id = doc_id + 1; - } -} - -/*********************************************************************//** -Do commit-phase steps necessary for the deletion of a row. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_delete( -/*=======*/ - fts_trx_table_t*ftt, /*!< in: FTS trx table */ - fts_trx_row_t* row) /*!< in: row */ -{ - que_t* graph; - fts_table_t fts_table; - dberr_t error = DB_SUCCESS; - doc_id_t write_doc_id; - dict_table_t* table = ftt->table; - doc_id_t doc_id = row->doc_id; - trx_t* trx = ftt->fts_trx->trx; - pars_info_t* info = pars_info_create(); - fts_cache_t* cache = table->fts->cache; - - /* we do not index Documents whose Doc ID value is 0 */ - if (doc_id == FTS_NULL_DOC_ID) { - ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)); - return(error); - } - - ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY); - - FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &write_doc_id, doc_id); - fts_bind_doc_id(info, "doc_id", &write_doc_id); - - /* It is possible we update a record that has not yet been sync-ed - into cache from last crash (delete Doc will not initialize the - sync). Avoid any added counter accounting until the FTS cache - is re-established and sync-ed */ - if (table->fts->fts_status & ADDED_TABLE_SYNCED - && doc_id > cache->synced_doc_id) { - mutex_enter(&table->fts->cache->deleted_lock); - - /* The Doc ID could belong to those left in - ADDED table from last crash. So need to check - if it is less than first_doc_id when we initialize - the Doc ID system after reboot */ - if (doc_id >= table->fts->cache->first_doc_id - && table->fts->cache->added > 0) { - --table->fts->cache->added; - } - - mutex_exit(&table->fts->cache->deleted_lock); - - /* Only if the row was really deleted. */ - ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY); - } - - /* Note the deleted document for OPTIMIZE to purge. */ - if (error == DB_SUCCESS) { - - trx->op_info = "adding doc id to FTS DELETED"; - - info->graph_owns_us = TRUE; - - fts_table.suffix = "DELETED"; - - graph = fts_parse_sql( - &fts_table, - info, - "BEGIN INSERT INTO \"%s\" VALUES (:doc_id);"); - - error = fts_eval_sql(trx, graph); - - fts_que_graph_free(graph); - } else { - pars_info_free(info); - } - - /* Increment the total deleted count, this is used to calculate the - number of documents indexed. */ - if (error == DB_SUCCESS) { - mutex_enter(&table->fts->cache->deleted_lock); - - ++table->fts->cache->deleted; - - mutex_exit(&table->fts->cache->deleted_lock); - } - - return(error); -} - -/*********************************************************************//** -Do commit-phase steps necessary for the modification of a row. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_modify( -/*=======*/ - fts_trx_table_t* ftt, /*!< in: FTS trx table */ - fts_trx_row_t* row) /*!< in: row */ -{ - dberr_t error; - - ut_a(row->state == FTS_MODIFY); - - error = fts_delete(ftt, row); - - if (error == DB_SUCCESS) { - fts_add(ftt, row); - } - - return(error); -} - -/*********************************************************************//** -Create a new document id. -@return DB_SUCCESS if all went well else error */ -UNIV_INTERN -dberr_t -fts_create_doc_id( -/*==============*/ - dict_table_t* table, /*!< in: row is of this table. */ - dtuple_t* row, /* in/out: add doc id value to this - row. This is the current row that is - being inserted. */ - mem_heap_t* heap) /*!< in: heap */ -{ - doc_id_t doc_id; - dberr_t error = DB_SUCCESS; - - ut_a(table->fts->doc_col != ULINT_UNDEFINED); - - if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) { - error = fts_get_next_doc_id(table, &doc_id); - } - return(error); - } - - error = fts_get_next_doc_id(table, &doc_id); - - if (error == DB_SUCCESS) { - dfield_t* dfield; - doc_id_t* write_doc_id; - - ut_a(doc_id > 0); - - dfield = dtuple_get_nth_field(row, table->fts->doc_col); - write_doc_id = static_cast<doc_id_t*>( - mem_heap_alloc(heap, sizeof(*write_doc_id))); - - ut_a(doc_id != FTS_NULL_DOC_ID); - ut_a(sizeof(doc_id) == dfield->type.len); - fts_write_doc_id((byte*) write_doc_id, doc_id); - - dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id)); - } - - return(error); -} - -/*********************************************************************//** -The given transaction is about to be committed; do whatever is necessary -from the FTS system's POV. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_commit_table( -/*=============*/ - fts_trx_table_t* ftt) /*!< in: FTS table to commit*/ -{ - const ib_rbt_node_t* node; - ib_rbt_t* rows; - dberr_t error = DB_SUCCESS; - fts_cache_t* cache = ftt->table->fts->cache; - trx_t* trx = trx_allocate_for_background(); - - rows = ftt->rows; - - ftt->fts_trx->trx = trx; - - if (cache->get_docs == NULL) { - rw_lock_x_lock(&cache->init_lock); - if (cache->get_docs == NULL) { - cache->get_docs = fts_get_docs_create(cache); - } - rw_lock_x_unlock(&cache->init_lock); - } - - for (node = rbt_first(rows); - node != NULL && error == DB_SUCCESS; - node = rbt_next(rows, node)) { - - fts_trx_row_t* row = rbt_value(fts_trx_row_t, node); - - switch (row->state) { - case FTS_INSERT: - fts_add(ftt, row); - break; - - case FTS_MODIFY: - error = fts_modify(ftt, row); - break; - - case FTS_DELETE: - error = fts_delete(ftt, row); - break; - - default: - ut_error; - } - } - - fts_sql_commit(trx); - - trx_free_for_background(trx); - - return(error); -} - -/*********************************************************************//** -The given transaction is about to be committed; do whatever is necessary -from the FTS system's POV. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_commit( -/*=======*/ - trx_t* trx) /*!< in: transaction */ -{ - const ib_rbt_node_t* node; - dberr_t error; - ib_rbt_t* tables; - fts_savepoint_t* savepoint; - - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_last(trx->fts_trx->savepoints)); - tables = savepoint->tables; - - for (node = rbt_first(tables), error = DB_SUCCESS; - node != NULL && error == DB_SUCCESS; - node = rbt_next(tables, node)) { - - fts_trx_table_t** ftt; - - ftt = rbt_value(fts_trx_table_t*, node); - - error = fts_commit_table(*ftt); - } - - return(error); -} - -/*********************************************************************//** -Initialize a document. */ -UNIV_INTERN -void -fts_doc_init( -/*=========*/ - fts_doc_t* doc) /*!< in: doc to initialize */ -{ - mem_heap_t* heap = mem_heap_create(32); - - memset(doc, 0, sizeof(*doc)); - - doc->self_heap = ib_heap_allocator_create(heap); -} - -/*********************************************************************//** -Free document. */ -UNIV_INTERN -void -fts_doc_free( -/*=========*/ - fts_doc_t* doc) /*!< in: document */ -{ - mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg); - - if (doc->tokens) { - rbt_free(doc->tokens); - } - -#ifdef UNIV_DEBUG - memset(doc, 0, sizeof(*doc)); -#endif /* UNIV_DEBUG */ - - mem_heap_free(heap); -} - -/*********************************************************************//** -Callback function for fetch that stores a row id to the location pointed. -The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8. -@return always returns NULL */ -UNIV_INTERN -void* -fts_fetch_row_id( -/*=============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: data pointer */ -{ - sel_node_t* node = static_cast<sel_node_t*>(row); - - dfield_t* dfield = que_node_get_val(node->select_list); - dtype_t* type = dfield_get_type(dfield); - ulint len = dfield_get_len(dfield); - - ut_a(dtype_get_mtype(type) == DATA_FIXBINARY); - ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE); - ut_a(len == 8); - - memcpy(user_arg, dfield_get_data(dfield), 8); - - return(NULL); -} - -/*********************************************************************//** -Callback function for fetch that stores the text of an FTS document, -converting each column to UTF-16. -@return always FALSE */ -UNIV_INTERN -ibool -fts_query_expansion_fetch_doc( -/*==========================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: fts_doc_t* */ -{ - que_node_t* exp; - sel_node_t* node = static_cast<sel_node_t*>(row); - fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg); - dfield_t* dfield; - ulint len; - ulint doc_len; - fts_doc_t doc; - CHARSET_INFO* doc_charset = NULL; - ulint field_no = 0; - - len = 0; - - fts_doc_init(&doc); - doc.found = TRUE; - - exp = node->select_list; - doc_len = 0; - - doc_charset = result_doc->charset; - - /* Copy each indexed column content into doc->text.f_str */ - while (exp) { - dfield = que_node_get_val(exp); - len = dfield_get_len(dfield); - - /* NULL column */ - if (len == UNIV_SQL_NULL) { - exp = que_node_get_next(exp); - continue; - } - - if (!doc_charset) { - ulint prtype = dfield->type.prtype; - doc_charset = innobase_get_fts_charset( - (int)(prtype & DATA_MYSQL_TYPE_MASK), - (uint) dtype_get_charset_coll(prtype)); - } - - doc.charset = doc_charset; - - if (dfield_is_ext(dfield)) { - /* We ignore columns that are stored externally, this - could result in too many words to search */ - exp = que_node_get_next(exp); - continue; - } else { - doc.text.f_n_char = 0; - - doc.text.f_str = static_cast<byte*>( - dfield_get_data(dfield)); - - doc.text.f_len = len; - } - - if (field_no == 0) { - fts_tokenize_document(&doc, result_doc); - } else { - fts_tokenize_document_next(&doc, doc_len, result_doc); - } - - exp = que_node_get_next(exp); - - doc_len += (exp) ? len + 1 : len; - - field_no++; - } - - ut_ad(doc_charset); - - if (!result_doc->charset) { - result_doc->charset = doc_charset; - } - - fts_doc_free(&doc); - - return(FALSE); -} - -/*********************************************************************//** -fetch and tokenize the document. */ -static -void -fts_fetch_doc_from_rec( -/*===================*/ - fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */ - dict_index_t* clust_index, /*!< in: cluster index */ - btr_pcur_t* pcur, /*!< in: cursor whose position - has been stored */ - ulint* offsets, /*!< in: offsets */ - fts_doc_t* doc) /*!< out: fts doc to hold parsed - documents */ -{ - dict_index_t* index; - dict_table_t* table; - const rec_t* clust_rec; - ulint num_field; - const dict_field_t* ifield; - const dict_col_t* col; - ulint clust_pos; - ulint i; - ulint doc_len = 0; - ulint processed_doc = 0; - - if (!get_doc) { - return; - } - - index = get_doc->index_cache->index; - table = get_doc->index_cache->index->table; - - clust_rec = btr_pcur_get_rec(pcur); - - num_field = dict_index_get_n_fields(index); - - for (i = 0; i < num_field; i++) { - ifield = dict_index_get_nth_field(index, i); - col = dict_field_get_col(ifield); - clust_pos = dict_col_get_clust_pos(col, clust_index); - - if (!get_doc->index_cache->charset) { - ulint prtype = ifield->col->prtype; - - get_doc->index_cache->charset = - innobase_get_fts_charset( - (int) (prtype & DATA_MYSQL_TYPE_MASK), - (uint) dtype_get_charset_coll(prtype)); - } - - if (rec_offs_nth_extern(offsets, clust_pos)) { - doc->text.f_str = - btr_rec_copy_externally_stored_field( - clust_rec, offsets, - dict_table_zip_size(table), - clust_pos, &doc->text.f_len, - static_cast<mem_heap_t*>( - doc->self_heap->arg), - NULL); - } else { - doc->text.f_str = (byte*) rec_get_nth_field( - clust_rec, offsets, clust_pos, - &doc->text.f_len); - } - - doc->found = TRUE; - doc->charset = get_doc->index_cache->charset; - - /* Null Field */ - if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) { - continue; - } - - if (processed_doc == 0) { - fts_tokenize_document(doc, NULL); - } else { - fts_tokenize_document_next(doc, doc_len, NULL); - } - - processed_doc++; - doc_len += doc->text.f_len + 1; - } -} - -/*********************************************************************//** -This function fetches the document inserted during the committing -transaction, and tokenize the inserted text data and insert into -FTS auxiliary table and its cache. -@return TRUE if successful */ -static -ulint -fts_add_doc_by_id( -/*==============*/ - fts_trx_table_t*ftt, /*!< in: FTS trx table */ - doc_id_t doc_id, /*!< in: doc id */ - ib_vector_t* fts_indexes MY_ATTRIBUTE((unused))) - /*!< in: affected fts indexes */ -{ - mtr_t mtr; - mem_heap_t* heap; - btr_pcur_t pcur; - dict_table_t* table; - dtuple_t* tuple; - dfield_t* dfield; - fts_get_doc_t* get_doc; - doc_id_t temp_doc_id; - dict_index_t* clust_index; - dict_index_t* fts_id_index; - ibool is_id_cluster; - fts_cache_t* cache = ftt->table->fts->cache; - - ut_ad(cache->get_docs); - - /* If Doc ID has been supplied by the user, then the table - might not yet be sync-ed */ - - if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) { - fts_init_index(ftt->table, FALSE); - } - - /* Get the first FTS index's get_doc */ - get_doc = static_cast<fts_get_doc_t*>( - ib_vector_get(cache->get_docs, 0)); - ut_ad(get_doc); - - table = get_doc->index_cache->index->table; - - heap = mem_heap_create(512); - - clust_index = dict_table_get_first_index(table); - fts_id_index = dict_table_get_index_on_name( - table, FTS_DOC_ID_INDEX_NAME); - - /* Check whether the index on FTS_DOC_ID is cluster index */ - is_id_cluster = (clust_index == fts_id_index); - - mtr_start(&mtr); - btr_pcur_init(&pcur); - - /* Search based on Doc ID. Here, we'll need to consider the case - when there is no primary index on Doc ID */ - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - dfield->type.mtype = DATA_INT; - dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE; - - mach_write_to_8((byte*) &temp_doc_id, doc_id); - dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id)); - - btr_pcur_open_with_no_init( - fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF, - &pcur, 0, &mtr); - - /* If we have a match, add the data to doc structure */ - if (btr_pcur_get_low_match(&pcur) == 1) { - const rec_t* rec; - btr_pcur_t* doc_pcur; - const rec_t* clust_rec; - btr_pcur_t clust_pcur; - ulint* offsets = NULL; - ulint num_idx = ib_vector_size(cache->get_docs); - - rec = btr_pcur_get_rec(&pcur); - - /* Doc could be deleted */ - if (page_rec_is_infimum(rec) - || rec_get_deleted_flag(rec, dict_table_is_comp(table))) { - - goto func_exit; - } - - if (is_id_cluster) { - clust_rec = rec; - doc_pcur = &pcur; - } else { - dtuple_t* clust_ref; - ulint n_fields; - - btr_pcur_init(&clust_pcur); - n_fields = dict_index_get_n_unique(clust_index); - - clust_ref = dtuple_create(heap, n_fields); - dict_index_copy_types(clust_ref, clust_index, n_fields); - - row_build_row_ref_in_tuple( - clust_ref, rec, fts_id_index, NULL, NULL); - - btr_pcur_open_with_no_init( - clust_index, clust_ref, PAGE_CUR_LE, - BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr); - - doc_pcur = &clust_pcur; - clust_rec = btr_pcur_get_rec(&clust_pcur); - - } - - offsets = rec_get_offsets(clust_rec, clust_index, - NULL, ULINT_UNDEFINED, &heap); - - for (ulint i = 0; i < num_idx; ++i) { - fts_doc_t doc; - dict_table_t* table; - fts_get_doc_t* get_doc; - - get_doc = static_cast<fts_get_doc_t*>( - ib_vector_get(cache->get_docs, i)); - - table = get_doc->index_cache->index->table; - - fts_doc_init(&doc); - - fts_fetch_doc_from_rec( - get_doc, clust_index, doc_pcur, offsets, &doc); - - if (doc.found) { - ibool success MY_ATTRIBUTE((unused)); - - btr_pcur_store_position(doc_pcur, &mtr); - mtr_commit(&mtr); - - rw_lock_x_lock(&table->fts->cache->lock); - - if (table->fts->cache->stopword_info.status - & STOPWORD_NOT_INIT) { - fts_load_stopword(table, NULL, NULL, - NULL, TRUE, TRUE); - } - - fts_cache_add_doc( - table->fts->cache, - get_doc->index_cache, - doc_id, doc.tokens); - - bool need_sync = false; - if ((cache->total_size > fts_max_cache_size / 10 - || fts_need_sync) - && !cache->sync->in_progress) { - need_sync = true; - } - - rw_lock_x_unlock(&table->fts->cache->lock); - - DBUG_EXECUTE_IF( - "fts_instrument_sync", - fts_optimize_request_sync_table(table); - os_event_wait(cache->sync->event); - ); - - DBUG_EXECUTE_IF( - "fts_instrument_sync_debug", - fts_sync(cache->sync, true, true, false); - ); - - DEBUG_SYNC_C("fts_instrument_sync_request"); - DBUG_EXECUTE_IF( - "fts_instrument_sync_request", - fts_optimize_request_sync_table(table); - ); - - if (need_sync) { - fts_optimize_request_sync_table(table); - } - - mtr_start(&mtr); - - if (i < num_idx - 1) { - - success = btr_pcur_restore_position( - BTR_SEARCH_LEAF, doc_pcur, - &mtr); - - ut_ad(success); - } - } - - fts_doc_free(&doc); - } - - if (!is_id_cluster) { - btr_pcur_close(doc_pcur); - } - } -func_exit: - mtr_commit(&mtr); - - btr_pcur_close(&pcur); - - mem_heap_free(heap); - return(TRUE); -} - - -/*********************************************************************//** -Callback function to read a single ulint column. -return always returns TRUE */ -static -ibool -fts_read_ulint( -/*===========*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: pointer to ulint */ -{ - sel_node_t* sel_node = static_cast<sel_node_t*>(row); - ulint* value = static_cast<ulint*>(user_arg); - que_node_t* exp = sel_node->select_list; - dfield_t* dfield = que_node_get_val(exp); - void* data = dfield_get_data(dfield); - - *value = static_cast<ulint>(mach_read_from_4( - static_cast<const byte*>(data))); - - return(TRUE); -} - -/*********************************************************************//** -Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists -@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */ -UNIV_INTERN -doc_id_t -fts_get_max_doc_id( -/*===============*/ - dict_table_t* table) /*!< in: user table */ -{ - dict_index_t* index; - dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL; - doc_id_t doc_id = 0; - mtr_t mtr; - btr_pcur_t pcur; - - index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME); - - if (!index) { - return(0); - } - - dfield = dict_index_get_nth_field(index, 0); - -#if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */ - ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0); -#endif - - mtr_start(&mtr); - - /* fetch the largest indexes value */ - btr_pcur_open_at_index_side( - false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - - if (!page_is_empty(btr_pcur_get_page(&pcur))) { - const rec_t* rec = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - mem_heap_t* heap = NULL; - ulint len; - const void* data; - - rec_offs_init(offsets_); - - do { - rec = btr_pcur_get_rec(&pcur); - - if (page_rec_is_user_rec(rec)) { - break; - } - } while (btr_pcur_move_to_prev(&pcur, &mtr)); - - if (!rec) { - goto func_exit; - } - - offsets = rec_get_offsets( - rec, index, offsets, ULINT_UNDEFINED, &heap); - - data = rec_get_nth_field(rec, offsets, 0, &len); - - doc_id = static_cast<doc_id_t>(fts_read_doc_id( - static_cast<const byte*>(data))); - } - -func_exit: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - return(doc_id); -} - -/*********************************************************************//** -Fetch document with the given document id. -@return DB_SUCCESS if OK else error */ -UNIV_INTERN -dberr_t -fts_doc_fetch_by_doc_id( -/*====================*/ - fts_get_doc_t* get_doc, /*!< in: state */ - doc_id_t doc_id, /*!< in: id of document to - fetch */ - dict_index_t* index_to_use, /*!< in: caller supplied FTS index, - or NULL */ - ulint option, /*!< in: search option, if it is - greater than doc_id or equal */ - fts_sql_callback - callback, /*!< in: callback to read */ - void* arg) /*!< in: callback arg */ -{ - pars_info_t* info; - dberr_t error; - const char* select_str; - doc_id_t write_doc_id; - dict_index_t* index; - trx_t* trx = trx_allocate_for_background(); - que_t* graph; - - trx->op_info = "fetching indexed FTS document"; - - /* The FTS index can be supplied by caller directly with - "index_to_use", otherwise, get it from "get_doc" */ - index = (index_to_use) ? index_to_use : get_doc->index_cache->index; - - if (get_doc && get_doc->get_document_graph) { - info = get_doc->get_document_graph->info; - } else { - info = pars_info_create(); - } - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &write_doc_id, doc_id); - fts_bind_doc_id(info, "doc_id", &write_doc_id); - pars_info_bind_function(info, "my_func", callback, arg); - - select_str = fts_get_select_columns_str(index, info, info->heap); - pars_info_bind_id(info, TRUE, "table_name", index->table_name); - - if (!get_doc || !get_doc->get_document_graph) { - if (option == FTS_FETCH_DOC_BY_ID_EQUAL) { - graph = fts_parse_sql( - NULL, - info, - mem_heap_printf(info->heap, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT %s FROM $table_name" - " WHERE %s = :doc_id;\n" - "BEGIN\n" - "" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c %% NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;", - select_str, FTS_DOC_ID_COL_NAME)); - } else { - ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE); - - /* This is used for crash recovery of table with - hidden DOC ID or FTS indexes. We will scan the table - to re-processing user table rows whose DOC ID or - FTS indexed documents have not been sync-ed to disc - during recent crash. - In the case that all fulltext indexes are dropped - for a table, we will keep the "hidden" FTS_DOC_ID - column, and this scan is to retreive the largest - DOC ID being used in the table to determine the - appropriate next DOC ID. - In the case of there exists fulltext index(es), this - operation will re-tokenize any docs that have not - been sync-ed to the disk, and re-prime the FTS - cached */ - graph = fts_parse_sql( - NULL, - info, - mem_heap_printf(info->heap, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT %s, %s FROM $table_name" - " WHERE %s > :doc_id;\n" - "BEGIN\n" - "" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c %% NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;", - FTS_DOC_ID_COL_NAME, - select_str, FTS_DOC_ID_COL_NAME)); - } - if (get_doc) { - get_doc->get_document_graph = graph; - } - } else { - graph = get_doc->get_document_graph; - } - - error = fts_eval_sql(trx, graph); - - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - } else { - fts_sql_rollback(trx); - } - - trx_free_for_background(trx); - - if (!get_doc) { - fts_que_graph_free(graph); - } - - return(error); -} - -/*********************************************************************//** -Write out a single word's data as new entry/entries in the INDEX table. -@return DB_SUCCESS if all OK. */ -UNIV_INTERN -dberr_t -fts_write_node( -/*===========*/ - trx_t* trx, /*!< in: transaction */ - que_t** graph, /*!< in: query graph */ - fts_table_t* fts_table, /*!< in: aux table */ - fts_string_t* word, /*!< in: word in UTF-8 */ - fts_node_t* node) /*!< in: node columns */ -{ - pars_info_t* info; - dberr_t error; - ib_uint32_t doc_count; - ib_time_t start_time; - doc_id_t last_doc_id; - doc_id_t first_doc_id; - - if (*graph) { - info = (*graph)->info; - } else { - info = pars_info_create(); - } - - pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id); - fts_bind_doc_id(info, "first_doc_id", &first_doc_id); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id); - fts_bind_doc_id(info, "last_doc_id", &last_doc_id); - - ut_a(node->last_doc_id >= node->first_doc_id); - - /* Convert to "storage" byte order. */ - mach_write_to_4((byte*) &doc_count, node->doc_count); - pars_info_bind_int4_literal( - info, "doc_count", (const ib_uint32_t*) &doc_count); - - /* Set copy_name to FALSE since it's a static. */ - pars_info_bind_literal( - info, "ilist", node->ilist, node->ilist_size, - DATA_BLOB, DATA_BINARY_TYPE); - - if (!*graph) { - *graph = fts_parse_sql( - fts_table, - info, - "BEGIN\n" - "INSERT INTO \"%s\" VALUES " - "(:token, :first_doc_id," - " :last_doc_id, :doc_count, :ilist);"); - } - - start_time = ut_time(); - error = fts_eval_sql(trx, *graph); - elapsed_time += ut_time() - start_time; - ++n_nodes; - - return(error); -} - -/*********************************************************************//** -Add rows to the DELETED_CACHE table. -@return DB_SUCCESS if all went well else error code*/ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_sync_add_deleted_cache( -/*=======================*/ - fts_sync_t* sync, /*!< in: sync state */ - ib_vector_t* doc_ids) /*!< in: doc ids to add */ -{ - ulint i; - pars_info_t* info; - que_t* graph; - fts_table_t fts_table; - doc_id_t dummy = 0; - dberr_t error = DB_SUCCESS; - ulint n_elems = ib_vector_size(doc_ids); - - ut_a(ib_vector_size(doc_ids) > 0); - - ib_vector_sort(doc_ids, fts_update_doc_id_cmp); - - info = pars_info_create(); - - fts_bind_doc_id(info, "doc_id", &dummy); - - FTS_INIT_FTS_TABLE( - &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table); - - graph = fts_parse_sql( - &fts_table, - info, - "BEGIN INSERT INTO \"%s\" VALUES (:doc_id);"); - - for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) { - fts_update_t* update; - doc_id_t write_doc_id; - - update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i)); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &write_doc_id, update->doc_id); - fts_bind_doc_id(info, "doc_id", &write_doc_id); - - error = fts_eval_sql(sync->trx, graph); - } - - fts_que_graph_free(graph); - - return(error); -} - -/** Write the words and ilist to disk. -@param[in,out] trx transaction -@param[in] index_cache index cache -@param[in] unlock_cache whether unlock cache when write node -@return DB_SUCCESS if all went well else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_sync_write_words( - trx_t* trx, - fts_index_cache_t* index_cache, - bool unlock_cache) -{ - fts_table_t fts_table; - ulint n_nodes = 0; - ulint n_words = 0; - const ib_rbt_node_t* rbt_node; - dberr_t error = DB_SUCCESS; - ibool print_error = FALSE; - dict_table_t* table = index_cache->index->table; -#ifdef FTS_DOC_STATS_DEBUG - ulint n_new_words = 0; -#endif /* FTS_DOC_STATS_DEBUG */ - - FTS_INIT_INDEX_TABLE( - &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index); - - n_words = rbt_size(index_cache->words); - - /* We iterate over the entire tree, even if there is an error, - since we want to free the memory used during caching. */ - for (rbt_node = rbt_first(index_cache->words); - rbt_node; - rbt_node = rbt_next(index_cache->words, rbt_node)) { - - ulint i; - ulint selected; - fts_tokenizer_word_t* word; - - word = rbt_value(fts_tokenizer_word_t, rbt_node); - - selected = fts_select_index( - index_cache->charset, word->text.f_str, - word->text.f_len); - - fts_table.suffix = fts_get_suffix(selected); - -#ifdef FTS_DOC_STATS_DEBUG - /* Check if the word exists in the FTS index and if not - then we need to increment the total word count stats. */ - if (error == DB_SUCCESS && fts_enable_diag_print) { - ibool found = FALSE; - - error = fts_is_word_in_index( - trx, - &index_cache->sel_graph[selected], - &fts_table, - &word->text, &found); - - if (error == DB_SUCCESS && !found) { - - ++n_new_words; - } - } -#endif /* FTS_DOC_STATS_DEBUG */ - - /* We iterate over all the nodes even if there was an error */ - for (i = 0; i < ib_vector_size(word->nodes); ++i) { - - fts_node_t* fts_node = static_cast<fts_node_t*>( - ib_vector_get(word->nodes, i)); - - if (fts_node->synced) { - continue; - } else { - fts_node->synced = true; - } - - /*FIXME: we need to handle the error properly. */ - if (error == DB_SUCCESS) { - if (unlock_cache) { - rw_lock_x_unlock( - &table->fts->cache->lock); - } - - error = fts_write_node( - trx, - &index_cache->ins_graph[selected], - &fts_table, &word->text, fts_node); - - DEBUG_SYNC_C("fts_write_node"); - DBUG_EXECUTE_IF("fts_write_node_crash", - DBUG_SUICIDE();); - - DBUG_EXECUTE_IF("fts_instrument_sync_sleep", - os_thread_sleep(1000000); - ); - - if (unlock_cache) { - rw_lock_x_lock( - &table->fts->cache->lock); - } - } - } - - n_nodes += ib_vector_size(word->nodes); - - if (error != DB_SUCCESS && !print_error) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error (%s) writing " - "word node to FTS auxiliary index " - "table.\n", ut_strerr(error)); - - print_error = TRUE; - } - } - -#ifdef FTS_DOC_STATS_DEBUG - if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) { - fts_table_t fts_table; - - FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table); - - /* Increment the total number of words in the FTS index */ - error = fts_config_increment_index_value( - trx, index_cache->index, FTS_TOTAL_WORD_COUNT, - n_new_words); - } -#endif /* FTS_DOC_STATS_DEBUG */ - - if (fts_enable_diag_print) { - printf("Avg number of nodes: %lf\n", - (double) n_nodes / (double) (n_words > 1 ? n_words : 1)); - } - - return(error); -} - -#ifdef FTS_DOC_STATS_DEBUG -/*********************************************************************//** -Write a single documents statistics to disk. -@return DB_SUCCESS if all went well else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_sync_write_doc_stat( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: index */ - que_t** graph, /* out: query graph */ - const fts_doc_stats_t* doc_stat) /*!< in: doc stats to write */ -{ - pars_info_t* info; - doc_id_t doc_id; - dberr_t error = DB_SUCCESS; - ib_uint32_t word_count; - - if (*graph) { - info = (*graph)->info; - } else { - info = pars_info_create(); - } - - /* Convert to "storage" byte order. */ - mach_write_to_4((byte*) &word_count, doc_stat->word_count); - pars_info_bind_int4_literal( - info, "count", (const ib_uint32_t*) &word_count); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id); - fts_bind_doc_id(info, "doc_id", &doc_id); - - if (!*graph) { - fts_table_t fts_table; - - FTS_INIT_INDEX_TABLE( - &fts_table, "DOC_ID", FTS_INDEX_TABLE, index); - - *graph = fts_parse_sql( - &fts_table, - info, - "BEGIN INSERT INTO \"%s\" VALUES (:doc_id, :count);"); - } - - for (;;) { - error = fts_eval_sql(trx, *graph); - - if (error == DB_SUCCESS) { - - break; /* Exit the loop. */ - } else { - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: lock wait " - "timeout writing to FTS doc_id. " - "Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: (%s) " - "while writing to FTS doc_id.\n", - ut_strerr(error)); - - break; /* Exit the loop. */ - } - } - } - - return(error); -} - -/*********************************************************************//** -Write document statistics to disk. -@return DB_SUCCESS if all OK */ -static -ulint -fts_sync_write_doc_stats( -/*=====================*/ - trx_t* trx, /*!< in: transaction */ - const fts_index_cache_t*index_cache) /*!< in: index cache */ -{ - dberr_t error = DB_SUCCESS; - que_t* graph = NULL; - fts_doc_stats_t* doc_stat; - - if (ib_vector_is_empty(index_cache->doc_stats)) { - return(DB_SUCCESS); - } - - doc_stat = static_cast<ts_doc_stats_t*>( - ib_vector_pop(index_cache->doc_stats)); - - while (doc_stat) { - error = fts_sync_write_doc_stat( - trx, index_cache->index, &graph, doc_stat); - - if (error != DB_SUCCESS) { - break; - } - - if (ib_vector_is_empty(index_cache->doc_stats)) { - break; - } - - doc_stat = static_cast<ts_doc_stats_t*>( - ib_vector_pop(index_cache->doc_stats)); - } - - if (graph != NULL) { - fts_que_graph_free_check_lock(NULL, index_cache, graph); - } - - return(error); -} - -/*********************************************************************//** -Callback to check the existince of a word. -@return always return NULL */ -static -ibool -fts_lookup_word( -/*============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: fts_doc_t* */ -{ - - que_node_t* exp; - sel_node_t* node = static_cast<sel_node_t*>(row); - ibool* found = static_cast<ibool*>(user_arg); - - exp = node->select_list; - - while (exp) { - dfield_t* dfield = que_node_get_val(exp); - ulint len = dfield_get_len(dfield); - - if (len != UNIV_SQL_NULL && len != 0) { - *found = TRUE; - } - - exp = que_node_get_next(exp); - } - - return(FALSE); -} - -/*********************************************************************//** -Check whether a particular word (term) exists in the FTS index. -@return DB_SUCCESS if all went well else error code */ -static -dberr_t -fts_is_word_in_index( -/*=================*/ - trx_t* trx, /*!< in: FTS query state */ - que_t** graph, /* out: Query graph */ - fts_table_t* fts_table, /*!< in: table instance */ - const fts_string_t* - word, /*!< in: the word to check */ - ibool* found) /* out: TRUE if exists */ -{ - pars_info_t* info; - dberr_t error; - - trx->op_info = "looking up word in FTS index"; - - if (*graph) { - info = (*graph)->info; - } else { - info = pars_info_create(); - } - - pars_info_bind_function(info, "my_func", fts_lookup_word, found); - pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len); - - if (*graph == NULL) { - *graph = fts_parse_sql( - fts_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT doc_count\n" - " FROM \"%s\"\n" - " WHERE word = :word " - " ORDER BY first_doc_id;\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - } - - for (;;) { - error = fts_eval_sql(trx, *graph); - - if (error == DB_SUCCESS) { - - break; /* Exit the loop. */ - } else { - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: lock wait " - "timeout reading FTS index. " - "Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: (%s) " - "while reading FTS index.\n", - ut_strerr(error)); - - break; /* Exit the loop. */ - } - } - } - - return(error); -} -#endif /* FTS_DOC_STATS_DEBUG */ - -/*********************************************************************//** -Begin Sync, create transaction, acquire locks, etc. */ -static -void -fts_sync_begin( -/*===========*/ - fts_sync_t* sync) /*!< in: sync state */ -{ - fts_cache_t* cache = sync->table->fts->cache; - - n_nodes = 0; - elapsed_time = 0; - - sync->start_time = ut_time(); - - sync->trx = trx_allocate_for_background(); - - if (fts_enable_diag_print) { - ib_logf(IB_LOG_LEVEL_INFO, - "FTS SYNC for table %s, deleted count: %ld size: " - "%lu bytes", - sync->table->name, - ib_vector_size(cache->deleted_doc_ids), - cache->total_size); - } -} - -/*********************************************************************//** -Run SYNC on the table, i.e., write out data from the index specific -cache to the FTS aux INDEX table and FTS aux doc id stats table. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_sync_index( -/*===========*/ - fts_sync_t* sync, /*!< in: sync state */ - fts_index_cache_t* index_cache) /*!< in: index cache */ -{ - trx_t* trx = sync->trx; - dberr_t error = DB_SUCCESS; - - trx->op_info = "doing SYNC index"; - - if (fts_enable_diag_print) { - ib_logf(IB_LOG_LEVEL_INFO, - "SYNC words: %ld", rbt_size(index_cache->words)); - } - - ut_ad(rbt_validate(index_cache->words)); - - error = fts_sync_write_words(sync->trx, index_cache, sync->unlock_cache); - -#ifdef FTS_DOC_STATS_DEBUG - /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID" - is not used currently for ranking. We disable fts_sync_write_doc_stats() - for now */ - /* Write the per doc statistics that will be used for ranking. */ - if (error == DB_SUCCESS) { - - error = fts_sync_write_doc_stats(trx, index_cache); - } -#endif /* FTS_DOC_STATS_DEBUG */ - - return(error); -} - -/** Check if index cache has been synced completely -@param[in,out] index_cache index cache -@return true if index is synced, otherwise false. */ -static -bool -fts_sync_index_check( - fts_index_cache_t* index_cache) -{ - const ib_rbt_node_t* rbt_node; - - for (rbt_node = rbt_first(index_cache->words); - rbt_node != NULL; - rbt_node = rbt_next(index_cache->words, rbt_node)) { - - fts_tokenizer_word_t* word; - word = rbt_value(fts_tokenizer_word_t, rbt_node); - - fts_node_t* fts_node; - fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes)); - - if (!fts_node->synced) { - return(false); - } - } - - return(true); -} - -/** Reset synced flag in index cache when rollback -@param[in,out] index_cache index cache */ -static -void -fts_sync_index_reset( - fts_index_cache_t* index_cache) -{ - const ib_rbt_node_t* rbt_node; - - for (rbt_node = rbt_first(index_cache->words); - rbt_node != NULL; - rbt_node = rbt_next(index_cache->words, rbt_node)) { - - fts_tokenizer_word_t* word; - word = rbt_value(fts_tokenizer_word_t, rbt_node); - - fts_node_t* fts_node; - fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes)); - - fts_node->synced = false; - } -} - -/** Commit the SYNC, change state of processed doc ids etc. -@param[in,out] sync sync state -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_sync_commit( - fts_sync_t* sync) -{ - dberr_t error; - trx_t* trx = sync->trx; - fts_cache_t* cache = sync->table->fts->cache; - doc_id_t last_doc_id; - - trx->op_info = "doing SYNC commit"; - - /* After each Sync, update the CONFIG table about the max doc id - we just sync-ed to index table */ - error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE, - &last_doc_id); - - /* Get the list of deleted documents that are either in the - cache or were headed there but were deleted before the add - thread got to them. */ - - if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) { - - error = fts_sync_add_deleted_cache( - sync, cache->deleted_doc_ids); - } - - /* We need to do this within the deleted lock since fts_delete() can - attempt to add a deleted doc id to the cache deleted id array. */ - fts_cache_clear(cache); - DEBUG_SYNC_C("fts_deleted_doc_ids_clear"); - fts_cache_init(cache); - rw_lock_x_unlock(&cache->lock); - - if (error == DB_SUCCESS) { - - fts_sql_commit(trx); - - } else if (error != DB_SUCCESS) { - - fts_sql_rollback(trx); - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: (%s) during SYNC.\n", - ut_strerr(error)); - } - - if (fts_enable_diag_print && elapsed_time) { - ib_logf(IB_LOG_LEVEL_INFO, - "SYNC for table %s: SYNC time : %lu secs: " - "elapsed %lf ins/sec", - sync->table->name, - (ulong) (ut_time() - sync->start_time), - (double) n_nodes/ (double) elapsed_time); - } - - /* Avoid assertion in trx_free(). */ - trx->dict_operation_lock_mode = 0; - trx_free_for_background(trx); - - return(error); -} - -/** Rollback a sync operation -@param[in,out] sync sync state */ -static -void -fts_sync_rollback( - fts_sync_t* sync) -{ - trx_t* trx = sync->trx; - fts_cache_t* cache = sync->table->fts->cache; - - for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) { - ulint j; - fts_index_cache_t* index_cache; - - index_cache = static_cast<fts_index_cache_t*>( - ib_vector_get(cache->indexes, i)); - - /* Reset synced flag so nodes will not be skipped - in the next sync, see fts_sync_write_words(). */ - fts_sync_index_reset(index_cache); - - for (j = 0; fts_index_selector[j].value; ++j) { - - if (index_cache->ins_graph[j] != NULL) { - - fts_que_graph_free_check_lock( - NULL, index_cache, - index_cache->ins_graph[j]); - - index_cache->ins_graph[j] = NULL; - } - - if (index_cache->sel_graph[j] != NULL) { - - fts_que_graph_free_check_lock( - NULL, index_cache, - index_cache->sel_graph[j]); - - index_cache->sel_graph[j] = NULL; - } - } - } - - rw_lock_x_unlock(&cache->lock); - - fts_sql_rollback(trx); - - /* Avoid assertion in trx_free(). */ - trx->dict_operation_lock_mode = 0; - trx_free_for_background(trx); -} - -/** Run SYNC on the table, i.e., write out data from the cache to the -FTS auxiliary INDEX table and clear the cache at the end. -@param[in,out] sync sync state -@param[in] unlock_cache whether unlock cache lock when write node -@param[in] wait whether wait when a sync is in progress -@param[in] has_dict whether has dict operation lock -@return DB_SUCCESS if all OK */ -static -dberr_t -fts_sync( - fts_sync_t* sync, - bool unlock_cache, - bool wait, - bool has_dict) -{ - ulint i; - dberr_t error = DB_SUCCESS; - fts_cache_t* cache = sync->table->fts->cache; - - rw_lock_x_lock(&cache->lock); - - /* Check if cache is being synced. - Note: we release cache lock in fts_sync_write_words() to - avoid long wait for the lock by other threads. */ - while (sync->in_progress) { - rw_lock_x_unlock(&cache->lock); - - if (wait) { - os_event_wait(sync->event); - } else { - return(DB_SUCCESS); - } - - rw_lock_x_lock(&cache->lock); - } - - sync->unlock_cache = unlock_cache; - sync->in_progress = true; - - DEBUG_SYNC_C("fts_sync_begin"); - fts_sync_begin(sync); - - /* When sync in background, we hold dict operation lock - to prevent DDL like DROP INDEX, etc. */ - if (has_dict) { - sync->trx->dict_operation_lock_mode = RW_S_LATCH; - } - -begin_sync: - if (cache->total_size > fts_max_cache_size) { - /* Avoid the case: sync never finish when - insert/update keeps comming. */ - ut_ad(sync->unlock_cache); - sync->unlock_cache = false; - } - - for (i = 0; i < ib_vector_size(cache->indexes); ++i) { - fts_index_cache_t* index_cache; - - index_cache = static_cast<fts_index_cache_t*>( - ib_vector_get(cache->indexes, i)); - - if (index_cache->index->to_be_dropped) { - continue; - } - - error = fts_sync_index(sync, index_cache); - - if (error != DB_SUCCESS && !sync->interrupted) { - - goto end_sync; - } - } - - DBUG_EXECUTE_IF("fts_instrument_sync_interrupted", - sync->interrupted = true; - error = DB_INTERRUPTED; - goto end_sync; - ); - - /* Make sure all the caches are synced. */ - for (i = 0; i < ib_vector_size(cache->indexes); ++i) { - fts_index_cache_t* index_cache; - - index_cache = static_cast<fts_index_cache_t*>( - ib_vector_get(cache->indexes, i)); - - if (index_cache->index->to_be_dropped - || fts_sync_index_check(index_cache)) { - continue; - } - - goto begin_sync; - } - -end_sync: - if (error == DB_SUCCESS && !sync->interrupted) { - error = fts_sync_commit(sync); - } else { - fts_sync_rollback(sync); - } - - rw_lock_x_lock(&cache->lock); - sync->interrupted = false; - sync->in_progress = false; - os_event_set(sync->event); - rw_lock_x_unlock(&cache->lock); - - /* We need to check whether an optimize is required, for that - we make copies of the two variables that control the trigger. These - variables can change behind our back and we don't want to hold the - lock for longer than is needed. */ - mutex_enter(&cache->deleted_lock); - - cache->added = 0; - cache->deleted = 0; - - mutex_exit(&cache->deleted_lock); - - return(error); -} - -/** Run SYNC on the table, i.e., write out data from the cache to the -FTS auxiliary INDEX table and clear the cache at the end. -@param[in,out] table fts table -@param[in] unlock_cache whether unlock cache when write node -@param[in] wait whether wait for existing sync to finish -@param[in] has_dict whether has dict operation lock -@return DB_SUCCESS on success, error code on failure. */ -UNIV_INTERN -dberr_t -fts_sync_table( - dict_table_t* table, - bool unlock_cache, - bool wait, - bool has_dict) -{ - dberr_t err = DB_SUCCESS; - - ut_ad(table->fts); - - if (!dict_table_is_discarded(table) && table->fts->cache) { - err = fts_sync(table->fts->cache->sync, - unlock_cache, wait, has_dict); - } - - return(err); -} - -/******************************************************************** -Process next token from document starting at the given position, i.e., add -the token's start position to the token's list of positions. -@return number of characters handled in this call */ -static -ulint -fts_process_token( -/*==============*/ - fts_doc_t* doc, /* in/out: document to - tokenize */ - fts_doc_t* result, /* out: if provided, save - result here */ - ulint start_pos, /*!< in: start position in text */ - ulint add_pos) /*!< in: add this position to all - tokens from this tokenization */ -{ - ulint ret; - fts_string_t str; - ulint offset = 0; - fts_doc_t* result_doc; - - /* Determine where to save the result. */ - result_doc = (result) ? result : doc; - - /* The length of a string in characters is set here only. */ - ret = innobase_mysql_fts_get_token( - doc->charset, doc->text.f_str + start_pos, - doc->text.f_str + doc->text.f_len, &str, &offset); - - /* Ignore string whose character number is less than - "fts_min_token_size" or more than "fts_max_token_size" */ - - if (str.f_n_char >= fts_min_token_size - && str.f_n_char <= fts_max_token_size) { - - mem_heap_t* heap; - fts_string_t t_str; - fts_token_t* token; - ib_rbt_bound_t parent; - ulint newlen; - - heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg); - - t_str.f_n_char = str.f_n_char; - - t_str.f_len = str.f_len * doc->charset->casedn_multiply + 1; - - t_str.f_str = static_cast<byte*>( - mem_heap_alloc(heap, t_str.f_len)); - - newlen = innobase_fts_casedn_str( - doc->charset, (char*) str.f_str, str.f_len, - (char*) t_str.f_str, t_str.f_len); - - t_str.f_len = newlen; - t_str.f_str[newlen] = 0; - - /* Add the word to the document statistics. If the word - hasn't been seen before we create a new entry for it. */ - if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) { - fts_token_t new_token; - - new_token.text.f_len = newlen; - new_token.text.f_str = t_str.f_str; - new_token.text.f_n_char = t_str.f_n_char; - - new_token.positions = ib_vector_create( - result_doc->self_heap, sizeof(ulint), 32); - - ut_a(new_token.text.f_n_char >= fts_min_token_size); - ut_a(new_token.text.f_n_char <= fts_max_token_size); - - parent.last = rbt_add_node( - result_doc->tokens, &parent, &new_token); - - ut_ad(rbt_validate(result_doc->tokens)); - } - -#ifdef FTS_CHARSET_DEBUG - offset += start_pos + add_pos; -#endif /* FTS_CHARSET_DEBUG */ - - offset += start_pos + ret - str.f_len + add_pos; - - token = rbt_value(fts_token_t, parent.last); - ib_vector_push(token->positions, &offset); - } - - return(ret); -} - -/******************************************************************//** -Tokenize a document. */ -UNIV_INTERN -void -fts_tokenize_document( -/*==================*/ - fts_doc_t* doc, /* in/out: document to - tokenize */ - fts_doc_t* result) /* out: if provided, save - the result token here */ -{ - ulint inc; - - ut_a(!doc->tokens); - ut_a(doc->charset); - - doc->tokens = rbt_create_arg_cmp( - sizeof(fts_token_t), innobase_fts_text_cmp, (void*) doc->charset); - - for (ulint i = 0; i < doc->text.f_len; i += inc) { - inc = fts_process_token(doc, result, i, 0); - ut_a(inc > 0); - } -} - -/******************************************************************//** -Continue to tokenize a document. */ -UNIV_INTERN -void -fts_tokenize_document_next( -/*=======================*/ - fts_doc_t* doc, /*!< in/out: document to - tokenize */ - ulint add_pos, /*!< in: add this position to all - tokens from this tokenization */ - fts_doc_t* result) /*!< out: if provided, save - the result token here */ -{ - ulint inc; - - ut_a(doc->tokens); - - for (ulint i = 0; i < doc->text.f_len; i += inc) { - inc = fts_process_token(doc, result, i, add_pos); - ut_a(inc > 0); - } -} - -/******************************************************************** -Create the vector of fts_get_doc_t instances. */ -UNIV_INTERN -ib_vector_t* -fts_get_docs_create( -/*================*/ - /* out: vector of - fts_get_doc_t instances */ - fts_cache_t* cache) /*!< in: fts cache */ -{ - ulint i; - ib_vector_t* get_docs; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX)); -#endif - /* We need one instance of fts_get_doc_t per index. */ - get_docs = ib_vector_create( - cache->self_heap, sizeof(fts_get_doc_t), 4); - - /* Create the get_doc instance, we need one of these - per FTS index. */ - for (i = 0; i < ib_vector_size(cache->indexes); ++i) { - - dict_index_t** index; - fts_get_doc_t* get_doc; - - index = static_cast<dict_index_t**>( - ib_vector_get(cache->indexes, i)); - - get_doc = static_cast<fts_get_doc_t*>( - ib_vector_push(get_docs, NULL)); - - memset(get_doc, 0x0, sizeof(*get_doc)); - - get_doc->index_cache = fts_get_index_cache(cache, *index); - get_doc->cache = cache; - - /* Must find the index cache. */ - ut_a(get_doc->index_cache != NULL); - } - - return(get_docs); -} - -/******************************************************************** -Release any resources held by the fts_get_doc_t instances. */ -static -void -fts_get_docs_clear( -/*===============*/ - ib_vector_t* get_docs) /*!< in: Doc retrieval vector */ -{ - ulint i; - - /* Release the get doc graphs if any. */ - for (i = 0; i < ib_vector_size(get_docs); ++i) { - - fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>( - ib_vector_get(get_docs, i)); - - if (get_doc->get_document_graph != NULL) { - - ut_a(get_doc->index_cache); - - fts_que_graph_free(get_doc->get_document_graph); - get_doc->get_document_graph = NULL; - } - } -} - -/*********************************************************************//** -Get the initial Doc ID by consulting the CONFIG table -@return initial Doc ID */ -UNIV_INTERN -doc_id_t -fts_init_doc_id( -/*============*/ - const dict_table_t* table) /*!< in: table */ -{ - doc_id_t max_doc_id = 0; - - rw_lock_x_lock(&table->fts->cache->lock); - - /* Return if the table is already initialized for DOC ID */ - if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) { - rw_lock_x_unlock(&table->fts->cache->lock); - return(0); - } - - DEBUG_SYNC_C("fts_initialize_doc_id"); - - /* Then compare this value with the ID value stored in the CONFIG - table. The larger one will be our new initial Doc ID */ - fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id); - - /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of - creating index (and add doc id column. No need to recovery - documents */ - if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) { - fts_init_index((dict_table_t*) table, TRUE); - } - - table->fts->fts_status |= ADDED_TABLE_SYNCED; - - table->fts->cache->first_doc_id = max_doc_id; - - rw_lock_x_unlock(&table->fts->cache->lock); - - ut_ad(max_doc_id > 0); - - return(max_doc_id); -} - -#ifdef FTS_MULT_INDEX -/*********************************************************************//** -Check if the index is in the affected set. -@return TRUE if index is updated */ -static -ibool -fts_is_index_updated( -/*=================*/ - const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */ - const fts_get_doc_t* get_doc) /*!< in: info for reading - document */ -{ - ulint i; - dict_index_t* index = get_doc->index_cache->index; - - for (i = 0; i < ib_vector_size(fts_indexes); ++i) { - const dict_index_t* updated_fts_index; - - updated_fts_index = static_cast<const dict_index_t*>( - ib_vector_getp_const(fts_indexes, i)); - - ut_a(updated_fts_index != NULL); - - if (updated_fts_index == index) { - return(TRUE); - } - } - - return(FALSE); -} -#endif - -/*********************************************************************//** -Fetch COUNT(*) from specified table. -@return the number of rows in the table */ -UNIV_INTERN -ulint -fts_get_rows_count( -/*===============*/ - fts_table_t* fts_table) /*!< in: fts table to read */ -{ - trx_t* trx; - pars_info_t* info; - que_t* graph; - dberr_t error; - ulint count = 0; - - trx = trx_allocate_for_background(); - - trx->op_info = "fetching FT table rows count"; - - info = pars_info_create(); - - pars_info_bind_function(info, "my_func", fts_read_ulint, &count); - - graph = fts_parse_sql( - fts_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT COUNT(*) " - " FROM \"%s\";\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - for (;;) { - error = fts_eval_sql(trx, graph); - - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - - break; /* Exit the loop. */ - } else { - fts_sql_rollback(trx); - - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: lock wait " - "timeout reading FTS table. " - "Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: (%s) " - "while reading FTS table.\n", - ut_strerr(error)); - - break; /* Exit the loop. */ - } - } - } - - fts_que_graph_free(graph); - - trx_free_for_background(trx); - - return(count); -} - -#ifdef FTS_CACHE_SIZE_DEBUG -/*********************************************************************//** -Read the max cache size parameter from the config table. */ -static -void -fts_update_max_cache_size( -/*======================*/ - fts_sync_t* sync) /*!< in: sync state */ -{ - trx_t* trx; - fts_table_t fts_table; - - trx = trx_allocate_for_background(); - - FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table); - - /* The size returned is in bytes. */ - sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table); - - fts_sql_commit(trx); - - trx_free_for_background(trx); -} -#endif /* FTS_CACHE_SIZE_DEBUG */ - -/*********************************************************************//** -Free the modified rows of a table. */ -UNIV_INLINE -void -fts_trx_table_rows_free( -/*====================*/ - ib_rbt_t* rows) /*!< in: rbt of rows to free */ -{ - const ib_rbt_node_t* node; - - for (node = rbt_first(rows); node; node = rbt_first(rows)) { - fts_trx_row_t* row; - - row = rbt_value(fts_trx_row_t, node); - - if (row->fts_indexes != NULL) { - /* This vector shouldn't be using the - heap allocator. */ - ut_a(row->fts_indexes->allocator->arg == NULL); - - ib_vector_free(row->fts_indexes); - row->fts_indexes = NULL; - } - - ut_free(rbt_remove_node(rows, node)); - } - - ut_a(rbt_empty(rows)); - rbt_free(rows); -} - -/*********************************************************************//** -Free an FTS savepoint instance. */ -UNIV_INLINE -void -fts_savepoint_free( -/*===============*/ - fts_savepoint_t* savepoint) /*!< in: savepoint instance */ -{ - const ib_rbt_node_t* node; - ib_rbt_t* tables = savepoint->tables; - - /* Nothing to free! */ - if (tables == NULL) { - return; - } - - for (node = rbt_first(tables); node; node = rbt_first(tables)) { - fts_trx_table_t* ftt; - fts_trx_table_t** fttp; - - fttp = rbt_value(fts_trx_table_t*, node); - ftt = *fttp; - - /* This can be NULL if a savepoint was released. */ - if (ftt->rows != NULL) { - fts_trx_table_rows_free(ftt->rows); - ftt->rows = NULL; - } - - /* This can be NULL if a savepoint was released. */ - if (ftt->added_doc_ids != NULL) { - fts_doc_ids_free(ftt->added_doc_ids); - ftt->added_doc_ids = NULL; - } - - /* The default savepoint name must be NULL. */ - if (ftt->docs_added_graph) { - fts_que_graph_free(ftt->docs_added_graph); - } - - /* NOTE: We are responsible for free'ing the node */ - ut_free(rbt_remove_node(tables, node)); - } - - ut_a(rbt_empty(tables)); - rbt_free(tables); - savepoint->tables = NULL; -} - -/*********************************************************************//** -Free an FTS trx. */ -UNIV_INTERN -void -fts_trx_free( -/*=========*/ - fts_trx_t* fts_trx) /* in, own: FTS trx */ -{ - ulint i; - - for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) { - fts_savepoint_t* savepoint; - - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_get(fts_trx->savepoints, i)); - - /* The default savepoint name must be NULL. */ - if (i == 0) { - ut_a(savepoint->name == NULL); - } - - fts_savepoint_free(savepoint); - } - - for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) { - fts_savepoint_t* savepoint; - - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_get(fts_trx->last_stmt, i)); - - /* The default savepoint name must be NULL. */ - if (i == 0) { - ut_a(savepoint->name == NULL); - } - - fts_savepoint_free(savepoint); - } - - if (fts_trx->heap) { - mem_heap_free(fts_trx->heap); - } -} - -/*********************************************************************//** -Extract the doc id from the FTS hidden column. -@return doc id that was extracted from rec */ -UNIV_INTERN -doc_id_t -fts_get_doc_id_from_row( -/*====================*/ - dict_table_t* table, /*!< in: table */ - dtuple_t* row) /*!< in: row whose FTS doc id we - want to extract.*/ -{ - dfield_t* field; - doc_id_t doc_id = 0; - - ut_a(table->fts->doc_col != ULINT_UNDEFINED); - - field = dtuple_get_nth_field(row, table->fts->doc_col); - - ut_a(dfield_get_len(field) == sizeof(doc_id)); - ut_a(dfield_get_type(field)->mtype == DATA_INT); - - doc_id = fts_read_doc_id( - static_cast<const byte*>(dfield_get_data(field))); - - return(doc_id); -} - -/*********************************************************************//** -Extract the doc id from the FTS hidden column. -@return doc id that was extracted from rec */ -UNIV_INTERN -doc_id_t -fts_get_doc_id_from_rec( -/*====================*/ - dict_table_t* table, /*!< in: table */ - const rec_t* rec, /*!< in: rec */ - mem_heap_t* heap) /*!< in: heap */ -{ - ulint len; - const byte* data; - ulint col_no; - doc_id_t doc_id = 0; - dict_index_t* clust_index; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - mem_heap_t* my_heap = heap; - - ut_a(table->fts->doc_col != ULINT_UNDEFINED); - - clust_index = dict_table_get_first_index(table); - - rec_offs_init(offsets_); - - offsets = rec_get_offsets( - rec, clust_index, offsets, ULINT_UNDEFINED, &my_heap); - - col_no = dict_col_get_clust_pos( - &table->cols[table->fts->doc_col], clust_index); - ut_ad(col_no != ULINT_UNDEFINED); - - data = rec_get_nth_field(rec, offsets, col_no, &len); - - ut_a(len == 8); - ut_ad(8 == sizeof(doc_id)); - doc_id = static_cast<doc_id_t>(mach_read_from_8(data)); - - if (my_heap && !heap) { - mem_heap_free(my_heap); - } - - return(doc_id); -} - -/*********************************************************************//** -Search the index specific cache for a particular FTS index. -@return the index specific cache else NULL */ -UNIV_INTERN -fts_index_cache_t* -fts_find_index_cache( -/*=================*/ - const fts_cache_t* cache, /*!< in: cache to search */ - const dict_index_t* index) /*!< in: index to search for */ -{ - /* We cast away the const because our internal function, takes - non-const cache arg and returns a non-const pointer. */ - return(static_cast<fts_index_cache_t*>( - fts_get_index_cache((fts_cache_t*) cache, index))); -} - -/*********************************************************************//** -Search cache for word. -@return the word node vector if found else NULL */ -UNIV_INTERN -const ib_vector_t* -fts_cache_find_word( -/*================*/ - const fts_index_cache_t*index_cache, /*!< in: cache to search */ - const fts_string_t* text) /*!< in: word to search for */ -{ - ib_rbt_bound_t parent; - const ib_vector_t* nodes = NULL; -#ifdef UNIV_SYNC_DEBUG - dict_table_t* table = index_cache->index->table; - fts_cache_t* cache = table->fts->cache; - - ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX)); -#endif - - /* Lookup the word in the rb tree */ - if (rbt_search(index_cache->words, &parent, text) == 0) { - const fts_tokenizer_word_t* word; - - word = rbt_value(fts_tokenizer_word_t, parent.last); - - nodes = word->nodes; - } - - return(nodes); -} - -/*********************************************************************//** -Check cache for deleted doc id. -@return TRUE if deleted */ -UNIV_INTERN -ibool -fts_cache_is_deleted_doc_id( -/*========================*/ - const fts_cache_t* cache, /*!< in: cache ito search */ - doc_id_t doc_id) /*!< in: doc id to search for */ -{ - ulint i; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(mutex_own(&cache->deleted_lock)); -#endif - - for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) { - const fts_update_t* update; - - update = static_cast<const fts_update_t*>( - ib_vector_get_const(cache->deleted_doc_ids, i)); - - if (doc_id == update->doc_id) { - - return(TRUE); - } - } - - return(FALSE); -} - -/*********************************************************************//** -Append deleted doc ids to vector. */ -UNIV_INTERN -void -fts_cache_append_deleted_doc_ids( -/*=============================*/ - const fts_cache_t* cache, /*!< in: cache to use */ - ib_vector_t* vector) /*!< in: append to this vector */ -{ - ulint i; - - mutex_enter((ib_mutex_t*) &cache->deleted_lock); - - if (cache->deleted_doc_ids == NULL) { - mutex_exit((ib_mutex_t*) &cache->deleted_lock); - return; - } - - - for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) { - fts_update_t* update; - - update = static_cast<fts_update_t*>( - ib_vector_get(cache->deleted_doc_ids, i)); - - ib_vector_push(vector, &update->doc_id); - } - - mutex_exit((ib_mutex_t*) &cache->deleted_lock); -} - -/*********************************************************************//** -Wait for the background thread to start. We poll to detect change -of state, which is acceptable, since the wait should happen only -once during startup. -@return true if the thread started else FALSE (i.e timed out) */ -UNIV_INTERN -ibool -fts_wait_for_background_thread_to_start( -/*====================================*/ - dict_table_t* table, /*!< in: table to which the thread - is attached */ - ulint max_wait) /*!< in: time in microseconds, if - set to 0 then it disables - timeout checking */ -{ - ulint count = 0; - ibool done = FALSE; - - ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT); - - for (;;) { - fts_t* fts = table->fts; - - mutex_enter(&fts->bg_threads_mutex); - - if (fts->fts_status & BG_THREAD_READY) { - - done = TRUE; - } - - mutex_exit(&fts->bg_threads_mutex); - - if (!done) { - os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT); - - if (max_wait > 0) { - - max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT; - - /* We ignore the residual value. */ - if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) { - break; - } - } - - ++count; - } else { - break; - } - - if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error the background thread " - "for the FTS table %s refuses to start\n", - table->name); - - count = 0; - } - } - - return(done); -} - -/*********************************************************************//** -Add the FTS document id hidden column. */ -UNIV_INTERN -void -fts_add_doc_id_column( -/*==================*/ - dict_table_t* table, /*!< in/out: Table with FTS index */ - mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */ -{ - dict_mem_table_add_col( - table, heap, - FTS_DOC_ID_COL_NAME, - DATA_INT, - dtype_form_prtype( - DATA_NOT_NULL | DATA_UNSIGNED - | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0), - sizeof(doc_id_t)); - DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID); -} - -/*********************************************************************//** -Update the query graph with a new document id. -@return Doc ID used */ -UNIV_INTERN -doc_id_t -fts_update_doc_id( -/*==============*/ - dict_table_t* table, /*!< in: table */ - upd_field_t* ufield, /*!< out: update node */ - doc_id_t* next_doc_id) /*!< in/out: buffer for writing */ -{ - doc_id_t doc_id; - dberr_t error = DB_SUCCESS; - - if (*next_doc_id) { - doc_id = *next_doc_id; - } else { - /* Get the new document id that will be added. */ - error = fts_get_next_doc_id(table, &doc_id); - } - - if (error == DB_SUCCESS) { - dict_index_t* clust_index; - - ufield->exp = NULL; - - ufield->new_val.len = sizeof(doc_id); - - clust_index = dict_table_get_first_index(table); - - ufield->field_no = dict_col_get_clust_pos( - &table->cols[table->fts->doc_col], clust_index); - - /* It is possible we update record that has - not yet be sync-ed from last crash. */ - - /* Convert to storage byte order. */ - ut_a(doc_id != FTS_NULL_DOC_ID); - fts_write_doc_id((byte*) next_doc_id, doc_id); - - ufield->new_val.data = next_doc_id; - } - - return(doc_id); -} - -/*********************************************************************//** -Check if the table has an FTS index. This is the non-inline version -of dict_table_has_fts_index(). -@return TRUE if table has an FTS index */ -UNIV_INTERN -ibool -fts_dict_table_has_fts_index( -/*=========================*/ - dict_table_t* table) /*!< in: table */ -{ - return(dict_table_has_fts_index(table)); -} - -/*********************************************************************//** -Create an instance of fts_t. -@return instance of fts_t */ -UNIV_INTERN -fts_t* -fts_create( -/*=======*/ - dict_table_t* table) /*!< in/out: table with FTS indexes */ -{ - fts_t* fts; - ib_alloc_t* heap_alloc; - mem_heap_t* heap; - - ut_a(!table->fts); - - heap = mem_heap_create(512); - - fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts))); - - memset(fts, 0x0, sizeof(*fts)); - - fts->fts_heap = heap; - - fts->doc_col = ULINT_UNDEFINED; - - mutex_create( - fts_bg_threads_mutex_key, &fts->bg_threads_mutex, - SYNC_FTS_BG_THREADS); - - heap_alloc = ib_heap_allocator_create(heap); - fts->indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4); - dict_table_get_all_fts_indexes(table, fts->indexes); - - return(fts); -} - -/*********************************************************************//** -Free the FTS resources. */ -UNIV_INTERN -void -fts_free( -/*=====*/ - dict_table_t* table) /*!< in/out: table with FTS indexes */ -{ - fts_t* fts = table->fts; - - mutex_free(&fts->bg_threads_mutex); - - ut_ad(!fts->add_wq); - - if (fts->cache) { - fts_cache_clear(fts->cache); - fts_cache_destroy(fts->cache); - fts->cache = NULL; - } - - mem_heap_free(fts->fts_heap); - - table->fts = NULL; -} - -/*********************************************************************//** -Signal FTS threads to initiate shutdown. */ -UNIV_INTERN -void -fts_start_shutdown( -/*===============*/ - dict_table_t* table, /*!< in: table with FTS indexes */ - fts_t* fts) /*!< in: fts instance that needs - to be informed about shutdown */ -{ - mutex_enter(&fts->bg_threads_mutex); - - fts->fts_status |= BG_THREAD_STOP; - - mutex_exit(&fts->bg_threads_mutex); - -} - -/*********************************************************************//** -Wait for FTS threads to shutdown. */ -UNIV_INTERN -void -fts_shutdown( -/*=========*/ - dict_table_t* table, /*!< in: table with FTS indexes */ - fts_t* fts) /*!< in: fts instance to shutdown */ -{ - mutex_enter(&fts->bg_threads_mutex); - - ut_a(fts->fts_status & BG_THREAD_STOP); - - dict_table_wait_for_bg_threads_to_exit(table, 20000); - - mutex_exit(&fts->bg_threads_mutex); -} - -/*********************************************************************//** -Take a FTS savepoint. */ -UNIV_INLINE -void -fts_savepoint_copy( -/*===============*/ - const fts_savepoint_t* src, /*!< in: source savepoint */ - fts_savepoint_t* dst) /*!< out: destination savepoint */ -{ - const ib_rbt_node_t* node; - const ib_rbt_t* tables; - - tables = src->tables; - - for (node = rbt_first(tables); node; node = rbt_next(tables, node)) { - - fts_trx_table_t* ftt_dst; - const fts_trx_table_t** ftt_src; - - ftt_src = rbt_value(const fts_trx_table_t*, node); - - ftt_dst = fts_trx_table_clone(*ftt_src); - - rbt_insert(dst->tables, &ftt_dst, &ftt_dst); - } -} - -/*********************************************************************//** -Take a FTS savepoint. */ -UNIV_INTERN -void -fts_savepoint_take( -/*===============*/ - trx_t* trx, /*!< in: transaction */ - fts_trx_t* fts_trx, /*!< in: fts transaction */ - const char* name) /*!< in: savepoint name */ -{ - mem_heap_t* heap; - fts_savepoint_t* savepoint; - fts_savepoint_t* last_savepoint; - - ut_a(name != NULL); - - heap = fts_trx->heap; - - /* The implied savepoint must exist. */ - ut_a(ib_vector_size(fts_trx->savepoints) > 0); - - last_savepoint = static_cast<fts_savepoint_t*>( - ib_vector_last(fts_trx->savepoints)); - savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap); - - if (last_savepoint->tables != NULL) { - fts_savepoint_copy(last_savepoint, savepoint); - } -} - -/*********************************************************************//** -Lookup a savepoint instance by name. -@return ULINT_UNDEFINED if not found */ -UNIV_INLINE -ulint -fts_savepoint_lookup( -/*==================*/ - ib_vector_t* savepoints, /*!< in: savepoints */ - const char* name) /*!< in: savepoint name */ -{ - ulint i; - - ut_a(ib_vector_size(savepoints) > 0); - - for (i = 1; i < ib_vector_size(savepoints); ++i) { - fts_savepoint_t* savepoint; - - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_get(savepoints, i)); - - if (strcmp(name, savepoint->name) == 0) { - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/*********************************************************************//** -Release the savepoint data identified by name. All savepoints created -after the named savepoint are kept. -@return DB_SUCCESS or error code */ -UNIV_INTERN -void -fts_savepoint_release( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - const char* name) /*!< in: savepoint name */ -{ - ut_a(name != NULL); - - ib_vector_t* savepoints = trx->fts_trx->savepoints; - - ut_a(ib_vector_size(savepoints) > 0); - - ulint i = fts_savepoint_lookup(savepoints, name); - if (i != ULINT_UNDEFINED) { - ut_a(i >= 1); - - fts_savepoint_t* savepoint; - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_get(savepoints, i)); - - if (i == ib_vector_size(savepoints) - 1) { - /* If the savepoint is the last, we save its - tables to the previous savepoint. */ - fts_savepoint_t* prev_savepoint; - prev_savepoint = static_cast<fts_savepoint_t*>( - ib_vector_get(savepoints, i - 1)); - - ib_rbt_t* tables = savepoint->tables; - savepoint->tables = prev_savepoint->tables; - prev_savepoint->tables = tables; - } - - fts_savepoint_free(savepoint); - ib_vector_remove(savepoints, *(void**)savepoint); - - /* Make sure we don't delete the implied savepoint. */ - ut_a(ib_vector_size(savepoints) > 0); - } -} - -/**********************************************************************//** -Refresh last statement savepoint. */ -UNIV_INTERN -void -fts_savepoint_laststmt_refresh( -/*===========================*/ - trx_t* trx) /*!< in: transaction */ -{ - - fts_trx_t* fts_trx; - fts_savepoint_t* savepoint; - - fts_trx = trx->fts_trx; - - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_pop(fts_trx->last_stmt)); - fts_savepoint_free(savepoint); - - ut_ad(ib_vector_is_empty(fts_trx->last_stmt)); - savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL); -} - -/******************************************************************** -Undo the Doc ID add/delete operations in last stmt */ -static -void -fts_undo_last_stmt( -/*===============*/ - fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */ - fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */ -{ - ib_rbt_t* s_rows; - ib_rbt_t* l_rows; - const ib_rbt_node_t* node; - - l_rows = l_ftt->rows; - s_rows = s_ftt->rows; - - for (node = rbt_first(l_rows); - node; - node = rbt_next(l_rows, node)) { - fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node); - ib_rbt_bound_t parent; - - rbt_search(s_rows, &parent, &(l_row->doc_id)); - - if (parent.result == 0) { - fts_trx_row_t* s_row = rbt_value( - fts_trx_row_t, parent.last); - - switch (l_row->state) { - case FTS_INSERT: - ut_free(rbt_remove_node(s_rows, parent.last)); - break; - - case FTS_DELETE: - if (s_row->state == FTS_NOTHING) { - s_row->state = FTS_INSERT; - } else if (s_row->state == FTS_DELETE) { - ut_free(rbt_remove_node( - s_rows, parent.last)); - } - break; - - /* FIXME: Check if FTS_MODIFY need to be addressed */ - case FTS_MODIFY: - case FTS_NOTHING: - break; - default: - ut_error; - } - } - } -} - -/**********************************************************************//** -Rollback to savepoint indentified by name. -@return DB_SUCCESS or error code */ -UNIV_INTERN -void -fts_savepoint_rollback_last_stmt( -/*=============================*/ - trx_t* trx) /*!< in: transaction */ -{ - ib_vector_t* savepoints; - fts_savepoint_t* savepoint; - fts_savepoint_t* last_stmt; - fts_trx_t* fts_trx; - ib_rbt_bound_t parent; - const ib_rbt_node_t* node; - ib_rbt_t* l_tables; - ib_rbt_t* s_tables; - - fts_trx = trx->fts_trx; - savepoints = fts_trx->savepoints; - - savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints)); - last_stmt = static_cast<fts_savepoint_t*>( - ib_vector_last(fts_trx->last_stmt)); - - l_tables = last_stmt->tables; - s_tables = savepoint->tables; - - for (node = rbt_first(l_tables); - node; - node = rbt_next(l_tables, node)) { - - fts_trx_table_t** l_ftt; - - l_ftt = rbt_value(fts_trx_table_t*, node); - - rbt_search_cmp( - s_tables, &parent, &(*l_ftt)->table->id, - fts_trx_table_id_cmp, NULL); - - if (parent.result == 0) { - fts_trx_table_t** s_ftt; - - s_ftt = rbt_value(fts_trx_table_t*, parent.last); - - fts_undo_last_stmt(*s_ftt, *l_ftt); - } - } -} - -/**********************************************************************//** -Rollback to savepoint indentified by name. -@return DB_SUCCESS or error code */ -UNIV_INTERN -void -fts_savepoint_rollback( -/*===================*/ - trx_t* trx, /*!< in: transaction */ - const char* name) /*!< in: savepoint name */ -{ - ulint i; - ib_vector_t* savepoints; - - ut_a(name != NULL); - - savepoints = trx->fts_trx->savepoints; - - /* We pop all savepoints from the the top of the stack up to - and including the instance that was found. */ - i = fts_savepoint_lookup(savepoints, name); - - if (i != ULINT_UNDEFINED) { - fts_savepoint_t* savepoint; - - ut_a(i > 0); - - while (ib_vector_size(savepoints) > i) { - fts_savepoint_t* savepoint; - - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_pop(savepoints)); - - if (savepoint->name != NULL) { - /* Since name was allocated on the heap, the - memory will be released when the transaction - completes. */ - savepoint->name = NULL; - - fts_savepoint_free(savepoint); - } - } - - /* Pop all a elements from the top of the stack that may - have been released. We have to be careful that we don't - delete the implied savepoint. */ - - for (savepoint = static_cast<fts_savepoint_t*>( - ib_vector_last(savepoints)); - ib_vector_size(savepoints) > 1 - && savepoint->name == NULL; - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_last(savepoints))) { - - ib_vector_pop(savepoints); - } - - /* Make sure we don't delete the implied savepoint. */ - ut_a(ib_vector_size(savepoints) > 0); - - /* Restore the savepoint. */ - fts_savepoint_take(trx, trx->fts_trx, name); - } -} - -/**********************************************************************//** -Check if a table is an FTS auxiliary table name. -@return TRUE if the name matches an auxiliary table name pattern */ -static -ibool -fts_is_aux_table_name( -/*==================*/ - fts_aux_table_t*table, /*!< out: table info */ - const char* name, /*!< in: table name */ - ulint len) /*!< in: length of table name */ -{ - const char* ptr; - char* end; - char my_name[MAX_FULL_NAME_LEN + 1]; - - ut_ad(len <= MAX_FULL_NAME_LEN); - ut_memcpy(my_name, name, len); - my_name[len] = 0; - end = my_name + len; - - ptr = static_cast<const char*>(memchr(my_name, '/', len)); - - if (ptr != NULL) { - /* We will start the match after the '/' */ - ++ptr; - len = end - ptr; - } - - /* All auxiliary tables are prefixed with "FTS_" and the name - length will be at the very least greater than 20 bytes. */ - if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) { - ulint i; - - /* Skip the prefix. */ - ptr += 4; - len -= 4; - - /* Try and read the table id. */ - if (!fts_read_object_id(&table->parent_id, ptr)) { - return(FALSE); - } - - /* Skip the table id. */ - ptr = static_cast<const char*>(memchr(ptr, '_', len)); - - if (ptr == NULL) { - return(FALSE); - } - - /* Skip the underscore. */ - ++ptr; - ut_a(end > ptr); - len = end - ptr; - - /* First search the common table suffix array. */ - for (i = 0; fts_common_tables[i] != NULL; ++i) { - - if (strncmp(ptr, fts_common_tables[i], len) == 0) { - return(TRUE); - } - } - - /* Could be obsolete common tables. */ - if (strncmp(ptr, "ADDED", len) == 0 - || strncmp(ptr, "STOPWORDS", len) == 0) { - return(true); - } - - /* Try and read the index id. */ - if (!fts_read_object_id(&table->index_id, ptr)) { - return(FALSE); - } - - /* Skip the table id. */ - ptr = static_cast<const char*>(memchr(ptr, '_', len)); - - if (ptr == NULL) { - return(FALSE); - } - - /* Skip the underscore. */ - ++ptr; - ut_a(end > ptr); - len = end - ptr; - - /* Search the FT index specific array. */ - for (i = 0; fts_index_selector[i].value; ++i) { - - if (strncmp(ptr, fts_get_suffix(i), len) == 0) { - return(TRUE); - } - } - - /* Other FT index specific table(s). */ - if (strncmp(ptr, "DOC_ID", len) == 0) { - return(TRUE); - } - } - - return(FALSE); -} - -/**********************************************************************//** -Callback function to read a single table ID column. -@return Always return TRUE */ -static -ibool -fts_read_tables( -/*============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: pointer to ib_vector_t */ -{ - int i; - fts_aux_table_t*table; - mem_heap_t* heap; - ibool done = FALSE; - ib_vector_t* tables = static_cast<ib_vector_t*>(user_arg); - sel_node_t* sel_node = static_cast<sel_node_t*>(row); - que_node_t* exp = sel_node->select_list; - - /* Must be a heap allocated vector. */ - ut_a(tables->allocator->arg != NULL); - - /* We will use this heap for allocating strings. */ - heap = static_cast<mem_heap_t*>(tables->allocator->arg); - table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL)); - - memset(table, 0x0, sizeof(*table)); - - /* Iterate over the columns and read the values. */ - for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) { - - dfield_t* dfield = que_node_get_val(exp); - void* data = dfield_get_data(dfield); - ulint len = dfield_get_len(dfield); - - ut_a(len != UNIV_SQL_NULL); - - /* Note: The column numbers below must match the SELECT */ - switch (i) { - case 0: /* NAME */ - - if (!fts_is_aux_table_name( - table, static_cast<const char*>(data), len)) { - ib_vector_pop(tables); - done = TRUE; - break; - } - - table->name = static_cast<char*>( - mem_heap_alloc(heap, len + 1)); - memcpy(table->name, data, len); - table->name[len] = 0; - break; - - case 1: /* ID */ - ut_a(len == 8); - table->id = mach_read_from_8( - static_cast<const byte*>(data)); - break; - - default: - ut_error; - } - } - - return(TRUE); -} - -/******************************************************************//** -Callback that sets a hex formatted FTS table's flags2 in -SYS_TABLES. The flags is stored in MIX_LEN column. -@return FALSE if all OK */ -static -ibool -fts_set_hex_format( -/*===============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: bool set/unset flag */ -{ - sel_node_t* node = static_cast<sel_node_t*>(row); - dfield_t* dfield = que_node_get_val(node->select_list); - - ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT); - ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t)); - /* There should be at most one matching record. So the value - must be the default value. */ - ut_ad(mach_read_from_4(static_cast<byte*>(user_arg)) - == ULINT32_UNDEFINED); - - ulint flags2 = mach_read_from_4( - static_cast<byte*>(dfield_get_data(dfield))); - - flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; - - mach_write_to_4(static_cast<byte*>(user_arg), flags2); - - return(FALSE); -} - -/*****************************************************************//** -Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES. -@return DB_SUCCESS or error code. */ -UNIV_INTERN -dberr_t -fts_update_hex_format_flag( -/*=======================*/ - trx_t* trx, /*!< in/out: transaction that - covers the update */ - table_id_t table_id, /*!< in: Table for which we want - to set the root table->flags2 */ - bool dict_locked) /*!< in: set to true if the - caller already owns the - dict_sys_t::mutex. */ -{ - pars_info_t* info; - ib_uint32_t flags2; - - static const char sql[] = - "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n" - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS\n" - " SELECT MIX_LEN " - " FROM SYS_TABLES " - " WHERE ID = :table_id FOR UPDATE;" - "\n" - "BEGIN\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "UPDATE SYS_TABLES" - " SET MIX_LEN = :flags2" - " WHERE ID = :table_id;\n" - "CLOSE c;\n" - "END;\n"; - - flags2 = ULINT32_UNDEFINED; - - info = pars_info_create(); - - pars_info_add_ull_literal(info, "table_id", table_id); - pars_info_bind_int4_literal(info, "flags2", &flags2); - - pars_info_bind_function( - info, "my_func", fts_set_hex_format, &flags2); - - if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) { - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - } - - dberr_t err = que_eval_sql(info, sql, !dict_locked, trx); - - ut_a(flags2 != ULINT32_UNDEFINED); - - return (err); -} - -/*********************************************************************//** -Rename an aux table to HEX format. It's called when "%016llu" is used -to format an object id in table name, which only happens in Windows. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_rename_one_aux_table_to_hex_format( -/*===================================*/ - trx_t* trx, /*!< in: transaction */ - const fts_aux_table_t* aux_table, /*!< in: table info */ - const dict_table_t* parent_table) /*!< in: parent table name */ -{ - const char* ptr; - fts_table_t fts_table; - char* new_name; - dberr_t error; - - ptr = strchr(aux_table->name, '/'); - ut_a(ptr != NULL); - ++ptr; - /* Skip "FTS_", table id and underscore */ - for (ulint i = 0; i < 2; ++i) { - ptr = strchr(ptr, '_'); - ut_a(ptr != NULL); - ++ptr; - } - - fts_table.suffix = NULL; - if (aux_table->index_id == 0) { - fts_table.type = FTS_COMMON_TABLE; - - for (ulint i = 0; fts_common_tables[i] != NULL; ++i) { - if (strcmp(ptr, fts_common_tables[i]) == 0) { - fts_table.suffix = fts_common_tables[i]; - break; - } - } - } else { - fts_table.type = FTS_INDEX_TABLE; - - /* Skip index id and underscore */ - ptr = strchr(ptr, '_'); - ut_a(ptr != NULL); - ++ptr; - - for (ulint i = 0; fts_index_selector[i].value; ++i) { - if (strcmp(ptr, fts_get_suffix(i)) == 0) { - fts_table.suffix = fts_get_suffix(i); - break; - } - } - } - - ut_a(fts_table.suffix != NULL); - - fts_table.parent = parent_table->name; - fts_table.table_id = aux_table->parent_id; - fts_table.index_id = aux_table->index_id; - fts_table.table = parent_table; - - new_name = fts_get_table_name(&fts_table); - ut_ad(strcmp(new_name, aux_table->name) != 0); - - if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) { - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - } - - error = row_rename_table_for_mysql(aux_table->name, new_name, trx, - FALSE); - - if (error != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_WARN, - "Failed to rename aux table \'%s\' to " - "new format \'%s\'. ", - aux_table->name, new_name); - } else { - ib_logf(IB_LOG_LEVEL_INFO, - "Renamed aux table \'%s\' to \'%s\'.", - aux_table->name, new_name); - } - - mem_free(new_name); - - return (error); -} - -/**********************************************************************//** -Rename all aux tables of a parent table to HEX format. Also set aux tables' -flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME. -It's called when "%016llu" is used to format an object id in table name, -which only happens in Windows. -Note the ids in tables are correct but the names are old ambiguous ones. - -This function should make sure that either all the parent table and aux tables -are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_rename_aux_tables_to_hex_format_low( -/*====================================*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* parent_table, /*!< in: parent table */ - ib_vector_t* tables) /*!< in: aux tables to rename. */ -{ - dberr_t error; - ulint count; - - ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME)); - ut_ad(!ib_vector_is_empty(tables)); - - error = fts_update_hex_format_flag(trx, parent_table->id, true); - - if (error != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_WARN, - "Setting parent table %s to hex format failed.", - parent_table->name); - - fts_sql_rollback(trx); - return (error); - } - - DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME); - - for (count = 0; count < ib_vector_size(tables); ++count) { - dict_table_t* table; - fts_aux_table_t* aux_table; - - aux_table = static_cast<fts_aux_table_t*>( - ib_vector_get(tables, count)); - - table = dict_table_open_on_id(aux_table->id, TRUE, - DICT_TABLE_OP_NORMAL); - - ut_ad(table != NULL); - ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME)); - - /* Set HEX_NAME flag here to make sure we can get correct - new table name in following function */ - DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); - error = fts_rename_one_aux_table_to_hex_format(trx, - aux_table, parent_table); - /* We will rollback the trx if the error != DB_SUCCESS, - so setting the flag here is the same with setting it in - row_rename_table_for_mysql */ - DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;); - - if (error != DB_SUCCESS) { - dict_table_close(table, TRUE, FALSE); - - ib_logf(IB_LOG_LEVEL_WARN, - "Failed to rename one aux table %s " - "Will revert all successful rename " - "operations.", aux_table->name); - - fts_sql_rollback(trx); - break; - } - - error = fts_update_hex_format_flag(trx, aux_table->id, true); - dict_table_close(table, TRUE, FALSE); - - if (error != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_WARN, - "Setting aux table %s to hex format failed.", - aux_table->name); - - fts_sql_rollback(trx); - break; - } - } - - if (error != DB_SUCCESS) { - ut_ad(count != ib_vector_size(tables)); - /* If rename fails, thr trx would be rolled back, we can't - use it any more, we'll start a new background trx to do - the reverting. */ - ut_a(trx->state == TRX_STATE_NOT_STARTED); - bool not_rename = false; - - /* Try to revert those succesful rename operations - in order to revert the ibd file rename. */ - for (ulint i = 0; i <= count; ++i) { - dict_table_t* table; - fts_aux_table_t* aux_table; - trx_t* trx_bg; - dberr_t err; - - aux_table = static_cast<fts_aux_table_t*>( - ib_vector_get(tables, i)); - - table = dict_table_open_on_id(aux_table->id, TRUE, - DICT_TABLE_OP_NORMAL); - ut_ad(table != NULL); - - if (not_rename) { - DICT_TF2_FLAG_UNSET(table, - DICT_TF2_FTS_AUX_HEX_NAME); - } - - if (!DICT_TF2_FLAG_IS_SET(table, - DICT_TF2_FTS_AUX_HEX_NAME)) { - dict_table_close(table, TRUE, FALSE); - continue; - } - - trx_bg = trx_allocate_for_background(); - trx_bg->op_info = "Revert half done rename"; - trx_bg->dict_operation_lock_mode = RW_X_LATCH; - trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE); - - DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME); - err = row_rename_table_for_mysql(table->name, - aux_table->name, - trx_bg, FALSE); - - trx_bg->dict_operation_lock_mode = 0; - dict_table_close(table, TRUE, FALSE); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_WARN, "Failed to revert " - "table %s. Please revert manually.", - table->name); - fts_sql_rollback(trx_bg); - trx_free_for_background(trx_bg); - /* Continue to clear aux tables' flags2 */ - not_rename = true; - continue; - } - - fts_sql_commit(trx_bg); - trx_free_for_background(trx_bg); - } - - DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME); - } - - return (error); -} - -/**********************************************************************//** -Convert an id, which is actually a decimal number but was regard as a HEX -from a string, to its real value. */ -static -ib_id_t -fts_fake_hex_to_dec( -/*================*/ - ib_id_t id) /*!< in: number to convert */ -{ - ib_id_t dec_id = 0; - char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH]; - int ret MY_ATTRIBUTE((unused)); - - ret = sprintf(tmp_id, UINT64PFx, id); - ut_ad(ret == 16); -#ifdef _WIN32 - ret = sscanf(tmp_id, "%016llu", &dec_id); -#else - ret = sscanf(tmp_id, "%016" PRIu64, &dec_id); -#endif /* _WIN32 */ - ut_ad(ret == 1); - - return dec_id; -} - -/*********************************************************************//** -Compare two fts_aux_table_t parent_ids. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_check_aux_table_parent_id_cmp( -/*==============================*/ - const void* p1, /*!< in: id1 */ - const void* p2) /*!< in: id2 */ -{ - const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1); - const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2); - - return static_cast<int>(fa1->parent_id - fa2->parent_id); -} - -/** Mark all the fts index associated with the parent table as corrupted. -@param[in] trx transaction -@param[in, out] parent_table fts index associated with this parent table - will be marked as corrupted. */ -static -void -fts_parent_all_index_set_corrupt( - trx_t* trx, - dict_table_t* parent_table) -{ - fts_t* fts = parent_table->fts; - - if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) { - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - } - - for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) { - dict_index_t* index = static_cast<dict_index_t*>( - ib_vector_getp_const(fts->indexes, j)); - dict_set_corrupted(index, - trx, "DROP ORPHANED TABLE"); - } -} - -/** Mark the fts index which index id matches the id as corrupted. -@param[in] trx transaction -@param[in] id index id to search -@param[in, out] parent_table parent table to check with all - the index. */ -static -void -fts_set_index_corrupt( - trx_t* trx, - index_id_t id, - dict_table_t* table) -{ - fts_t* fts = table->fts; - - if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) { - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - } - - for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) { - dict_index_t* index = static_cast<dict_index_t*>( - ib_vector_getp_const(fts->indexes, j)); - if (index->id == id) { - dict_set_corrupted(index, trx, - "DROP ORPHANED TABLE"); - break; - } - } -} - -/** Check the index for the aux table is corrupted. -@param[in] aux_table auxiliary table -@retval nonzero if index is corrupted, zero for valid index */ -static -ulint -fts_check_corrupt_index( - fts_aux_table_t* aux_table) -{ - dict_table_t* table; - dict_index_t* index; - table = dict_table_open_on_id( - aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL); - - if (table == NULL) { - return(0); - } - - for (index = UT_LIST_GET_FIRST(table->indexes); - index; - index = UT_LIST_GET_NEXT(indexes, index)) { - if (index->id == aux_table->index_id) { - ut_ad(index->type & DICT_FTS); - dict_table_close(table, true, false); - return(dict_index_is_corrupted(index)); - } - } - - dict_table_close(table, true, false); - return(0); -} - -/* Get parent table name if it's a fts aux table -@param[in] aux_table_name aux table name -@param[in] aux_table_len aux table length -@return parent table name, or NULL */ -char* -fts_get_parent_table_name( - const char* aux_table_name, - ulint aux_table_len) -{ - fts_aux_table_t aux_table; - char* parent_table_name = NULL; - - if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) { - dict_table_t* parent_table; - - parent_table = dict_table_open_on_id( - aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL); - - if (parent_table != NULL) { - parent_table_name = mem_strdupl( - parent_table->name, - strlen(parent_table->name)); - - dict_table_close(parent_table, TRUE, FALSE); - } - } - - return(parent_table_name); -} - -/** Check the validity of the parent table. -@param[in] aux_table auxiliary table -@return true if it is a valid table or false if it is not */ -static -bool -fts_valid_parent_table( - const fts_aux_table_t* aux_table) -{ - dict_table_t* parent_table; - bool valid = false; - - parent_table = dict_table_open_on_id( - aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL); - - if (parent_table != NULL && parent_table->fts != NULL) { - if (aux_table->index_id == 0) { - valid = true; - } else { - index_id_t id = aux_table->index_id; - dict_index_t* index; - - /* Search for the FT index in the table's list. */ - for (index = UT_LIST_GET_FIRST(parent_table->indexes); - index; - index = UT_LIST_GET_NEXT(indexes, index)) { - if (index->id == id) { - valid = true; - break; - } - - } - } - } - - if (parent_table) { - dict_table_close(parent_table, TRUE, FALSE); - } - - return(valid); -} - -/** Try to rename all aux tables of the specified parent table. -@param[in] aux_tables aux_tables to be renamed -@param[in] parent_table parent table of all aux - tables stored in tables. */ -static -void -fts_rename_aux_tables_to_hex_format( - ib_vector_t* aux_tables, - dict_table_t* parent_table) -{ - dberr_t err; - trx_t* trx_rename = trx_allocate_for_background(); - trx_rename->op_info = "Rename aux tables to hex format"; - trx_rename->dict_operation_lock_mode = RW_X_LATCH; - trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE); - - err = fts_rename_aux_tables_to_hex_format_low(trx_rename, - parent_table, aux_tables); - - trx_rename->dict_operation_lock_mode = 0; - - if (err != DB_SUCCESS) { - - ib_logf(IB_LOG_LEVEL_WARN, - "Rollback operations on all aux tables of table %s. " - "All the fts index associated with the table are " - "marked as corrupted. Please rebuild the " - "index again.", parent_table->name); - fts_sql_rollback(trx_rename); - - /* Corrupting the fts index related to parent table. */ - trx_t* trx_corrupt; - trx_corrupt = trx_allocate_for_background(); - trx_corrupt->dict_operation_lock_mode = RW_X_LATCH; - trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE); - fts_parent_all_index_set_corrupt(trx_corrupt, parent_table); - trx_corrupt->dict_operation_lock_mode = 0; - fts_sql_commit(trx_corrupt); - trx_free_for_background(trx_corrupt); - } else { - fts_sql_commit(trx_rename); - } - - trx_free_for_background(trx_rename); - ib_vector_reset(aux_tables); -} - -/** Set the hex format flag for the parent table. -@param[in, out] parent_table parent table -@param[in] trx transaction */ -static -void -fts_set_parent_hex_format_flag( - dict_table_t* parent_table, - trx_t* trx) -{ - if (!DICT_TF2_FLAG_IS_SET(parent_table, - DICT_TF2_FTS_AUX_HEX_NAME)) { - DBUG_EXECUTE_IF("parent_table_flag_fail", - ib_logf(IB_LOG_LEVEL_FATAL, - "Setting parent table %s to hex format " - "failed. Please try to restart the server " - "again, if it doesn't work, the system " - "tables might be corrupted.", - parent_table->name); - return;); - - dberr_t err = fts_update_hex_format_flag( - trx, parent_table->id, true); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_FATAL, - "Setting parent table %s to hex format " - "failed. Please try to restart the server " - "again, if it doesn't work, the system " - "tables might be corrupted.", - parent_table->name); - } else { - DICT_TF2_FLAG_SET( - parent_table, DICT_TF2_FTS_AUX_HEX_NAME); - } - } -} - -/** Drop the obsolete auxilary table. -@param[in] tables tables to be dropped. */ -static -void -fts_drop_obsolete_aux_table_from_vector( - ib_vector_t* tables) -{ - dberr_t err; - - for (ulint count = 0; count < ib_vector_size(tables); - ++count) { - - fts_aux_table_t* aux_drop_table; - aux_drop_table = static_cast<fts_aux_table_t*>( - ib_vector_get(tables, count)); - trx_t* trx_drop = trx_allocate_for_background(); - trx_drop->op_info = "Drop obsolete aux tables"; - trx_drop->dict_operation_lock_mode = RW_X_LATCH; - trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE); - - err = row_drop_table_for_mysql( - aux_drop_table->name, trx_drop, false, true); - - trx_drop->dict_operation_lock_mode = 0; - - if (err != DB_SUCCESS) { - /* We don't need to worry about the - failure, since server would try to - drop it on next restart, even if - the table was broken. */ - ib_logf(IB_LOG_LEVEL_WARN, - "Fail to drop obsolete aux table '%s', which " - "is harmless. will try to drop it on next " - "restart.", aux_drop_table->name); - fts_sql_rollback(trx_drop); - } else { - ib_logf(IB_LOG_LEVEL_INFO, - "Dropped obsolete aux table '%s'.", - aux_drop_table->name); - - fts_sql_commit(trx_drop); - } - - trx_free_for_background(trx_drop); - } -} - -/** Drop all the auxiliary table present in the vector. -@param[in] trx transaction -@param[in] tables tables to be dropped */ -static -void -fts_drop_aux_table_from_vector( - trx_t* trx, - ib_vector_t* tables) -{ - for (ulint count = 0; count < ib_vector_size(tables); - ++count) { - fts_aux_table_t* aux_drop_table; - aux_drop_table = static_cast<fts_aux_table_t*>( - ib_vector_get(tables, count)); - - /* Check for the validity of the parent table */ - if (!fts_valid_parent_table(aux_drop_table)) { - ib_logf(IB_LOG_LEVEL_WARN, - "Parent table of FTS auxiliary table %s not " - "found.", aux_drop_table->name); - dberr_t err = fts_drop_table(trx, aux_drop_table->name); - if (err == DB_FAIL) { - char* path = fil_make_ibd_name( - aux_drop_table->name, false); - os_file_delete_if_exists(innodb_file_data_key, - path); - mem_free(path); - } - } - } -} - -/**********************************************************************//** -Check and drop all orphaned FTS auxiliary tables, those that don't have -a parent table or FTS index defined on them. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull)) -void -fts_check_and_drop_orphaned_tables( -/*===============================*/ - trx_t* trx, /*!< in: transaction */ - ib_vector_t* tables) /*!< in: tables to check */ -{ - mem_heap_t* heap; - ib_vector_t* aux_tables_to_rename; - ib_vector_t* invalid_aux_tables; - ib_vector_t* valid_aux_tables; - ib_vector_t* drop_aux_tables; - ib_vector_t* obsolete_aux_tables; - ib_alloc_t* heap_alloc; - - heap = mem_heap_create(1024); - heap_alloc = ib_heap_allocator_create(heap); - - /* We store all aux tables belonging to the same parent table here, - and rename all these tables in a batch mode. */ - aux_tables_to_rename = ib_vector_create(heap_alloc, - sizeof(fts_aux_table_t), 128); - - /* We store all fake auxiliary table and orphaned table here. */ - invalid_aux_tables = ib_vector_create(heap_alloc, - sizeof(fts_aux_table_t), 128); - - /* We store all valid aux tables. We use this to filter the - fake auxiliary table from invalid auxiliary tables. */ - valid_aux_tables = ib_vector_create(heap_alloc, - sizeof(fts_aux_table_t), 128); - - /* We store all auxiliary tables to be dropped. */ - drop_aux_tables = ib_vector_create(heap_alloc, - sizeof(fts_aux_table_t), 128); - - /* We store all obsolete auxiliary tables to be dropped. */ - obsolete_aux_tables = ib_vector_create(heap_alloc, - sizeof(fts_aux_table_t), 128); - - /* Sort by parent_id first, in case rename will fail */ - ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp); - - for (ulint i = 0; i < ib_vector_size(tables); ++i) { - dict_table_t* parent_table; - fts_aux_table_t* aux_table; - bool drop = false; - dict_table_t* table; - fts_aux_table_t* next_aux_table = NULL; - ib_id_t orig_parent_id = 0; - ib_id_t orig_index_id = 0; - bool rename = false; - - aux_table = static_cast<fts_aux_table_t*>( - ib_vector_get(tables, i)); - - table = dict_table_open_on_id( - aux_table->id, TRUE, DICT_TABLE_OP_NORMAL); - orig_parent_id = aux_table->parent_id; - orig_index_id = aux_table->index_id; - - if (table == NULL || strcmp(table->name, aux_table->name)) { - - bool fake_aux = false; - - if (table != NULL) { - dict_table_close(table, TRUE, FALSE); - } - - if (i + 1 < ib_vector_size(tables)) { - next_aux_table = static_cast<fts_aux_table_t*>( - ib_vector_get(tables, i + 1)); - } - - /* To know whether aux table is fake fts or - orphan fts table. */ - for (ulint count = 0; - count < ib_vector_size(valid_aux_tables); - count++) { - fts_aux_table_t* valid_aux; - valid_aux = static_cast<fts_aux_table_t*>( - ib_vector_get(valid_aux_tables, count)); - if (strcmp(valid_aux->name, - aux_table->name) == 0) { - fake_aux = true; - break; - } - } - - /* All aux tables of parent table, whose id is - last_parent_id, have been checked, try to rename - them if necessary. */ - if ((next_aux_table == NULL - || orig_parent_id != next_aux_table->parent_id) - && (!ib_vector_is_empty(aux_tables_to_rename))) { - - ulint parent_id = fts_fake_hex_to_dec( - aux_table->parent_id); - - parent_table = dict_table_open_on_id( - parent_id, TRUE, - DICT_TABLE_OP_NORMAL); - - fts_rename_aux_tables_to_hex_format( - aux_tables_to_rename, parent_table); - - dict_table_close(parent_table, TRUE, - FALSE); - } - - /* If the aux table is fake aux table. Skip it. */ - if (!fake_aux) { - ib_vector_push(invalid_aux_tables, aux_table); - } - - continue; - } else if (!DICT_TF2_FLAG_IS_SET(table, - DICT_TF2_FTS_AUX_HEX_NAME)) { - - aux_table->parent_id = fts_fake_hex_to_dec( - aux_table->parent_id); - - if (aux_table->index_id != 0) { - aux_table->index_id = fts_fake_hex_to_dec( - aux_table->index_id); - } - - ut_ad(aux_table->id > aux_table->parent_id); - - /* Check whether parent table id and index id - are stored as decimal format. */ - if (fts_valid_parent_table(aux_table)) { - - parent_table = dict_table_open_on_id( - aux_table->parent_id, true, - DICT_TABLE_OP_NORMAL); - - ut_ad(parent_table != NULL); - ut_ad(parent_table->fts != NULL); - - if (!DICT_TF2_FLAG_IS_SET( - parent_table, - DICT_TF2_FTS_AUX_HEX_NAME)) { - rename = true; - } - - dict_table_close(parent_table, TRUE, FALSE); - } - - if (!rename) { - /* Reassign the original value of - aux table if it is not in decimal format */ - aux_table->parent_id = orig_parent_id; - aux_table->index_id = orig_index_id; - } - } - - if (table != NULL) { - dict_table_close(table, true, false); - } - - if (!rename) { - /* Check the validity of the parent table. */ - if (!fts_valid_parent_table(aux_table)) { - drop = true; - } - } - - /* Filter out the fake aux table by comparing with the - current valid auxiliary table name . */ - for (ulint count = 0; - count < ib_vector_size(invalid_aux_tables); count++) { - fts_aux_table_t* invalid_aux; - invalid_aux = static_cast<fts_aux_table_t*>( - ib_vector_get(invalid_aux_tables, count)); - if (strcmp(invalid_aux->name, aux_table->name) == 0) { - ib_vector_remove( - invalid_aux_tables, - *reinterpret_cast<void**>(invalid_aux)); - break; - } - } - - ib_vector_push(valid_aux_tables, aux_table); - - /* If the index associated with aux table is corrupted, - skip it. */ - if (fts_check_corrupt_index(aux_table) > 0) { - - if (i + 1 < ib_vector_size(tables)) { - next_aux_table = static_cast<fts_aux_table_t*>( - ib_vector_get(tables, i + 1)); - } - - if (next_aux_table == NULL - || orig_parent_id != next_aux_table->parent_id) { - - parent_table = dict_table_open_on_id( - aux_table->parent_id, TRUE, - DICT_TABLE_OP_NORMAL); - - if (!ib_vector_is_empty(aux_tables_to_rename)) { - fts_rename_aux_tables_to_hex_format( - aux_tables_to_rename, parent_table); - - } else { - fts_set_parent_hex_format_flag( - parent_table, trx); - } - - dict_table_close(parent_table, TRUE, FALSE); - } - - continue; - } - - parent_table = dict_table_open_on_id( - aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL); - - if (drop) { - ib_vector_push(drop_aux_tables, aux_table); - } else { - if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) { - - /* Current table could be one of the three - obsolete tables, in this case, we should - always try to drop it but not rename it. - This could happen when we try to upgrade - from older server to later one, which doesn't - contain these obsolete tables. */ - ib_vector_push(obsolete_aux_tables, aux_table); - continue; - } - } - - /* If the aux table is in decimal format, we should - rename it, so push it to aux_tables_to_rename */ - if (!drop && rename) { - ib_vector_push(aux_tables_to_rename, aux_table); - } - - if (i + 1 < ib_vector_size(tables)) { - next_aux_table = static_cast<fts_aux_table_t*>( - ib_vector_get(tables, i + 1)); - } - - if ((next_aux_table == NULL - || orig_parent_id != next_aux_table->parent_id) - && !ib_vector_is_empty(aux_tables_to_rename)) { - /* All aux tables of parent table, whose id is - last_parent_id, have been checked, try to rename - them if necessary. We had better use a new background - trx to rename rather than the original trx, in case - any failure would cause a complete rollback. */ - ut_ad(rename); - ut_ad(!DICT_TF2_FLAG_IS_SET( - parent_table, DICT_TF2_FTS_AUX_HEX_NAME)); - - fts_rename_aux_tables_to_hex_format( - aux_tables_to_rename,parent_table); - } - - /* The IDs are already in correct hex format. */ - if (!drop && !rename) { - dict_table_t* table; - - table = dict_table_open_on_id( - aux_table->id, TRUE, DICT_TABLE_OP_NORMAL); - if (table != NULL - && strcmp(table->name, aux_table->name)) { - dict_table_close(table, TRUE, FALSE); - table = NULL; - } - - if (table != NULL - && !DICT_TF2_FLAG_IS_SET( - table, - DICT_TF2_FTS_AUX_HEX_NAME)) { - - DBUG_EXECUTE_IF("aux_table_flag_fail", - ib_logf(IB_LOG_LEVEL_WARN, - "Setting aux table %s to hex " - "format failed.", table->name); - fts_set_index_corrupt( - trx, aux_table->index_id, - parent_table); - goto table_exit;); - - dberr_t err = fts_update_hex_format_flag( - trx, table->id, true); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_WARN, - "Setting aux table %s to hex " - "format failed.", table->name); - - fts_set_index_corrupt( - trx, aux_table->index_id, - parent_table); - } else { - DICT_TF2_FLAG_SET(table, - DICT_TF2_FTS_AUX_HEX_NAME); - } - } -#ifndef DBUG_OFF -table_exit: -#endif /* !DBUG_OFF */ - - if (table != NULL) { - dict_table_close(table, TRUE, FALSE); - } - - ut_ad(parent_table != NULL); - - fts_set_parent_hex_format_flag( - parent_table, trx); - } - - if (parent_table != NULL) { - dict_table_close(parent_table, TRUE, FALSE); - } - } - - fts_drop_aux_table_from_vector(trx, invalid_aux_tables); - fts_drop_aux_table_from_vector(trx, drop_aux_tables); - fts_sql_commit(trx); - - fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables); - - /* Free the memory allocated at the beginning */ - if (heap != NULL) { - mem_heap_free(heap); - } -} - -/**********************************************************************//** -Drop all orphaned FTS auxiliary tables, those that don't have a parent -table or FTS index defined on them. */ -UNIV_INTERN -void -fts_drop_orphaned_tables(void) -/*==========================*/ -{ - trx_t* trx; - pars_info_t* info; - mem_heap_t* heap; - que_t* graph; - ib_vector_t* tables; - ib_alloc_t* heap_alloc; - space_name_list_t space_name_list; - dberr_t error = DB_SUCCESS; - - /* Note: We have to free the memory after we are done with the list. */ - error = fil_get_space_names(space_name_list); - - if (error == DB_OUT_OF_MEMORY) { - ib_logf(IB_LOG_LEVEL_ERROR, "Out of memory"); - ut_error; - } - - heap = mem_heap_create(1024); - heap_alloc = ib_heap_allocator_create(heap); - - /* We store the table ids of all the FTS indexes that were found. */ - tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128); - - /* Get the list of all known .ibd files and check for orphaned - FTS auxiliary files in that list. We need to remove them because - users can't map them back to table names and this will create - unnecessary clutter. */ - - for (space_name_list_t::iterator it = space_name_list.begin(); - it != space_name_list.end(); - ++it) { - - fts_aux_table_t* fts_aux_table; - - fts_aux_table = static_cast<fts_aux_table_t*>( - ib_vector_push(tables, NULL)); - - memset(fts_aux_table, 0x0, sizeof(*fts_aux_table)); - - if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) { - ib_vector_pop(tables); - } else { - ulint len = strlen(*it); - - fts_aux_table->id = fil_get_space_id_for_table(*it); - - /* We got this list from fil0fil.cc. The tablespace - with this name must exist. */ - ut_a(fts_aux_table->id != ULINT_UNDEFINED); - - fts_aux_table->name = static_cast<char*>( - mem_heap_dup(heap, *it, len + 1)); - - fts_aux_table->name[len] = 0; - } - } - - trx = trx_allocate_for_background(); - trx->op_info = "dropping orphaned FTS tables"; - row_mysql_lock_data_dictionary(trx); - - info = pars_info_create(); - - pars_info_bind_function(info, "my_func", fts_read_tables, tables); - - graph = fts_parse_sql_no_dict_lock( - NULL, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT NAME, ID " - " FROM SYS_TABLES;\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - for (;;) { - error = fts_eval_sql(trx, graph); - - if (error == DB_SUCCESS) { - fts_check_and_drop_orphaned_tables(trx, tables); - break; /* Exit the loop. */ - } else { - ib_vector_reset(tables); - - fts_sql_rollback(trx); - - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - ib_logf(IB_LOG_LEVEL_WARN, - "lock wait timeout reading SYS_TABLES. " - "Retrying!"); - - trx->error_state = DB_SUCCESS; - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "(%s) while reading SYS_TABLES.", - ut_strerr(error)); - - break; /* Exit the loop. */ - } - } - } - - que_graph_free(graph); - - row_mysql_unlock_data_dictionary(trx); - - trx_free_for_background(trx); - - if (heap != NULL) { - mem_heap_free(heap); - } - - /** Free the memory allocated to store the .ibd names. */ - for (space_name_list_t::iterator it = space_name_list.begin(); - it != space_name_list.end(); - ++it) { - - delete[] *it; - } -} - -/**********************************************************************//** -Check whether user supplied stopword table is of the right format. -Caller is responsible to hold dictionary locks. -@return the stopword column charset if qualifies */ -UNIV_INTERN -CHARSET_INFO* -fts_valid_stopword_table( -/*=====================*/ - const char* stopword_table_name) /*!< in: Stopword table - name */ -{ - dict_table_t* table; - dict_col_t* col = NULL; - - if (!stopword_table_name) { - return(NULL); - } - - table = dict_table_get_low(stopword_table_name); - - if (!table) { - fprintf(stderr, - "InnoDB: user stopword table %s does not exist.\n", - stopword_table_name); - - return(NULL); - } else { - const char* col_name; - - col_name = dict_table_get_col_name(table, 0); - - if (ut_strcmp(col_name, "value")) { - fprintf(stderr, - "InnoDB: invalid column name for stopword " - "table %s. Its first column must be named as " - "'value'.\n", stopword_table_name); - - return(NULL); - } - - col = dict_table_get_nth_col(table, 0); - - if (col->mtype != DATA_VARCHAR - && col->mtype != DATA_VARMYSQL) { - fprintf(stderr, - "InnoDB: invalid column type for stopword " - "table %s. Its first column must be of " - "varchar type\n", stopword_table_name); - - return(NULL); - } - } - - ut_ad(col); - - return(innobase_get_fts_charset( - static_cast<int>(col->prtype & DATA_MYSQL_TYPE_MASK), - static_cast<uint>(dtype_get_charset_coll(col->prtype)))); -} - -/**********************************************************************//** -This function loads the stopword into the FTS cache. It also -records/fetches stopword configuration to/from FTS configure -table, depending on whether we are creating or reloading the -FTS. -@return TRUE if load operation is successful */ -UNIV_INTERN -ibool -fts_load_stopword( -/*==============*/ - const dict_table_t* - table, /*!< in: Table with FTS */ - trx_t* trx, /*!< in: Transactions */ - const char* global_stopword_table, /*!< in: Global stopword table - name */ - const char* session_stopword_table, /*!< in: Session stopword table - name */ - ibool stopword_is_on, /*!< in: Whether stopword - option is turned on/off */ - ibool reload) /*!< in: Whether it is - for reloading FTS table */ -{ - fts_table_t fts_table; - fts_string_t str; - dberr_t error = DB_SUCCESS; - ulint use_stopword; - fts_cache_t* cache; - const char* stopword_to_use = NULL; - ibool new_trx = FALSE; - byte str_buffer[MAX_FULL_NAME_LEN + 1]; - - FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table); - - cache = table->fts->cache; - - if (!reload && !(cache->stopword_info.status - & STOPWORD_NOT_INIT)) { - return(TRUE); - } - - if (!trx) { - trx = trx_allocate_for_background(); - trx->op_info = "upload FTS stopword"; - new_trx = TRUE; - } - - /* First check whether stopword filtering is turned off */ - if (reload) { - error = fts_config_get_ulint( - trx, &fts_table, FTS_USE_STOPWORD, &use_stopword); - } else { - use_stopword = (ulint) stopword_is_on; - - error = fts_config_set_ulint( - trx, &fts_table, FTS_USE_STOPWORD, use_stopword); - } - - if (error != DB_SUCCESS) { - goto cleanup; - } - - /* If stopword is turned off, no need to continue to load the - stopword into cache, but still need to do initialization */ - if (!use_stopword) { - cache->stopword_info.status = STOPWORD_OFF; - goto cleanup; - } - - if (reload) { - /* Fetch the stopword table name from FTS config - table */ - str.f_n_char = 0; - str.f_str = str_buffer; - str.f_len = sizeof(str_buffer) - 1; - - error = fts_config_get_value( - trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str); - - if (error != DB_SUCCESS) { - goto cleanup; - } - - if (strlen((char*) str.f_str) > 0) { - stopword_to_use = (const char*) str.f_str; - } - } else { - stopword_to_use = (session_stopword_table) - ? session_stopword_table : global_stopword_table; - } - - if (stopword_to_use - && fts_load_user_stopword(table->fts, stopword_to_use, - &cache->stopword_info)) { - /* Save the stopword table name to the configure - table */ - if (!reload) { - str.f_n_char = 0; - str.f_str = (byte*) stopword_to_use; - str.f_len = ut_strlen(stopword_to_use); - - error = fts_config_set_value( - trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str); - } - } else { - /* Load system default stopword list */ - fts_load_default_stopword(&cache->stopword_info); - } - -cleanup: - if (new_trx) { - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - } else { - fts_sql_rollback(trx); - } - - trx_free_for_background(trx); - } - - if (!cache->stopword_info.cached_stopword) { - cache->stopword_info.cached_stopword = rbt_create( - sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp); - } - - return(error == DB_SUCCESS); -} - -/**********************************************************************//** -Callback function when we initialize the FTS at the start up -time. It recovers the maximum Doc IDs presented in the current table. -@return: always returns TRUE */ -static -ibool -fts_init_get_doc_id( -/*================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: fts cache */ -{ - doc_id_t doc_id = FTS_NULL_DOC_ID; - sel_node_t* node = static_cast<sel_node_t*>(row); - que_node_t* exp = node->select_list; - fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg); - - ut_ad(ib_vector_is_empty(cache->get_docs)); - - /* Copy each indexed column content into doc->text.f_str */ - if (exp) { - dfield_t* dfield = que_node_get_val(exp); - dtype_t* type = dfield_get_type(dfield); - void* data = dfield_get_data(dfield); - - ut_a(dtype_get_mtype(type) == DATA_INT); - - doc_id = static_cast<doc_id_t>(mach_read_from_8( - static_cast<const byte*>(data))); - - if (doc_id >= cache->next_doc_id) { - cache->next_doc_id = doc_id + 1; - } - } - - return(TRUE); -} - -/**********************************************************************//** -Callback function when we initialize the FTS at the start up -time. It recovers Doc IDs that have not sync-ed to the auxiliary -table, and require to bring them back into FTS index. -@return: always returns TRUE */ -static -ibool -fts_init_recover_doc( -/*=================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: fts cache */ -{ - - fts_doc_t doc; - ulint doc_len = 0; - ulint field_no = 0; - fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg); - doc_id_t doc_id = FTS_NULL_DOC_ID; - sel_node_t* node = static_cast<sel_node_t*>(row); - que_node_t* exp = node->select_list; - fts_cache_t* cache = get_doc->cache; - - fts_doc_init(&doc); - doc.found = TRUE; - - ut_ad(cache); - - /* Copy each indexed column content into doc->text.f_str */ - while (exp) { - dfield_t* dfield = que_node_get_val(exp); - ulint len = dfield_get_len(dfield); - - if (field_no == 0) { - dtype_t* type = dfield_get_type(dfield); - void* data = dfield_get_data(dfield); - - ut_a(dtype_get_mtype(type) == DATA_INT); - - doc_id = static_cast<doc_id_t>(mach_read_from_8( - static_cast<const byte*>(data))); - - field_no++; - exp = que_node_get_next(exp); - continue; - } - - if (len == UNIV_SQL_NULL) { - exp = que_node_get_next(exp); - continue; - } - - ut_ad(get_doc); - - if (!get_doc->index_cache->charset) { - ulint prtype = dfield->type.prtype; - - get_doc->index_cache->charset = - innobase_get_fts_charset( - (int)(prtype & DATA_MYSQL_TYPE_MASK), - (uint) dtype_get_charset_coll(prtype)); - } - - doc.charset = get_doc->index_cache->charset; - - if (dfield_is_ext(dfield)) { - dict_table_t* table = cache->sync->table; - ulint zip_size = dict_table_zip_size(table); - - doc.text.f_str = btr_copy_externally_stored_field( - &doc.text.f_len, - static_cast<byte*>(dfield_get_data(dfield)), - zip_size, len, - static_cast<mem_heap_t*>(doc.self_heap->arg), - NULL); - } else { - doc.text.f_str = static_cast<byte*>( - dfield_get_data(dfield)); - - doc.text.f_len = len; - } - - if (field_no == 1) { - fts_tokenize_document(&doc, NULL); - } else { - fts_tokenize_document_next(&doc, doc_len, NULL); - } - - exp = que_node_get_next(exp); - - doc_len += (exp) ? len + 1 : len; - - field_no++; - } - - fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens); - - fts_doc_free(&doc); - - cache->added++; - - if (doc_id >= cache->next_doc_id) { - cache->next_doc_id = doc_id + 1; - } - - return(TRUE); -} - -/**********************************************************************//** -This function brings FTS index in sync when FTS index is first -used. There are documents that have not yet sync-ed to auxiliary -tables from last server abnormally shutdown, we will need to bring -such document into FTS cache before any further operations -@return TRUE if all OK */ -UNIV_INTERN -ibool -fts_init_index( -/*===========*/ - dict_table_t* table, /*!< in: Table with FTS */ - ibool has_cache_lock) /*!< in: Whether we already have - cache lock */ -{ - dict_index_t* index; - doc_id_t start_doc; - fts_get_doc_t* get_doc = NULL; - fts_cache_t* cache = table->fts->cache; - bool need_init = false; - - ut_ad(!mutex_own(&dict_sys->mutex)); - - /* First check cache->get_docs is initialized */ - if (!has_cache_lock) { - rw_lock_x_lock(&cache->lock); - } - - rw_lock_x_lock(&cache->init_lock); - if (cache->get_docs == NULL) { - cache->get_docs = fts_get_docs_create(cache); - } - rw_lock_x_unlock(&cache->init_lock); - - if (table->fts->fts_status & ADDED_TABLE_SYNCED) { - goto func_exit; - } - - need_init = true; - - start_doc = cache->synced_doc_id; - - if (!start_doc) { - fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc); - cache->synced_doc_id = start_doc; - } - - /* No FTS index, this is the case when previous FTS index - dropped, and we re-initialize the Doc ID system for subsequent - insertion */ - if (ib_vector_is_empty(cache->get_docs)) { - index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME); - - ut_a(index); - - fts_doc_fetch_by_doc_id(NULL, start_doc, index, - FTS_FETCH_DOC_BY_ID_LARGE, - fts_init_get_doc_id, cache); - } else { - if (table->fts->cache->stopword_info.status - & STOPWORD_NOT_INIT) { - fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE); - } - - for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) { - get_doc = static_cast<fts_get_doc_t*>( - ib_vector_get(cache->get_docs, i)); - - index = get_doc->index_cache->index; - - fts_doc_fetch_by_doc_id(NULL, start_doc, index, - FTS_FETCH_DOC_BY_ID_LARGE, - fts_init_recover_doc, get_doc); - } - } - - table->fts->fts_status |= ADDED_TABLE_SYNCED; - - fts_get_docs_clear(cache->get_docs); - -func_exit: - if (!has_cache_lock) { - rw_lock_x_unlock(&cache->lock); - } - - if (need_init) { - mutex_enter(&dict_sys->mutex); - /* Register the table with the optimize thread. */ - fts_optimize_add_table(table); - mutex_exit(&dict_sys->mutex); - } - - return(TRUE); -} diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc deleted file mode 100644 index cb30122adcb..00000000000 --- a/storage/xtradb/fts/fts0opt.cc +++ /dev/null @@ -1,3246 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. All Rights reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file fts/fts0opt.cc -Full Text Search optimize thread - -Created 2007/03/27 Sunny Bains -Completed 2011/7/10 Sunny and Jimmy Yang - -***********************************************************************/ - -#include "fts0fts.h" -#include "row0sel.h" -#include "que0types.h" -#include "fts0priv.h" -#include "fts0types.h" -#include "ut0wqueue.h" -#include "srv0start.h" -#include "zlib.h" - -#ifndef UNIV_NONINL -#include "fts0types.ic" -#include "fts0vlc.ic" -#endif - -/** The FTS optimize thread's work queue. */ -static ib_wqueue_t* fts_optimize_wq; - -/** Time to wait for a message. */ -static const ulint FTS_QUEUE_WAIT_IN_USECS = 5000000; - -/** Default optimize interval in secs. */ -static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300; - -/** Server is shutting down, so does we exiting the optimize thread */ -static bool fts_opt_start_shutdown = false; - -/** Initial size of nodes in fts_word_t. */ -static const ulint FTS_WORD_NODES_INIT_SIZE = 64; - -/** Last time we did check whether system need a sync */ -static ib_time_t last_check_sync_time; - -#if 0 -/** Check each table in round robin to see whether they'd -need to be "optimized" */ -static ulint fts_optimize_sync_iterator = 0; -#endif - -/** State of a table within the optimization sub system. */ -enum fts_state_t { - FTS_STATE_LOADED, - FTS_STATE_RUNNING, - FTS_STATE_SUSPENDED, - FTS_STATE_DONE, - FTS_STATE_EMPTY -}; - -/** FTS optimize thread message types. */ -enum fts_msg_type_t { - FTS_MSG_START, /*!< Start optimizing thread */ - - FTS_MSG_PAUSE, /*!< Pause optimizing thread */ - - FTS_MSG_STOP, /*!< Stop optimizing and exit thread */ - - FTS_MSG_ADD_TABLE, /*!< Add table to the optimize thread's - work queue */ - - FTS_MSG_OPTIMIZE_TABLE, /*!< Optimize a table */ - - FTS_MSG_DEL_TABLE, /*!< Remove a table from the optimize - threads work queue */ - FTS_MSG_SYNC_TABLE /*!< Sync fts cache of a table */ -}; - -/** Compressed list of words that have been read from FTS INDEX -that needs to be optimized. */ -struct fts_zip_t { - lint status; /*!< Status of (un)/zip operation */ - - ulint n_words; /*!< Number of words compressed */ - - ulint block_sz; /*!< Size of a block in bytes */ - - ib_vector_t* blocks; /*!< Vector of compressed blocks */ - - ib_alloc_t* heap_alloc; /*!< Heap to use for allocations */ - - ulint pos; /*!< Offset into blocks */ - - ulint last_big_block; /*!< Offset of last block in the - blocks array that is of size - block_sz. Blocks beyond this offset - are of size FTS_MAX_WORD_LEN */ - - z_streamp zp; /*!< ZLib state */ - - /*!< The value of the last word read - from the FTS INDEX table. This is - used to discard duplicates */ - - fts_string_t word; /*!< UTF-8 string */ - - ulint max_words; /*!< maximum number of words to read - in one pase */ -}; - -/** Prepared statemets used during optimize */ -struct fts_optimize_graph_t { - /*!< Delete a word from FTS INDEX */ - que_t* delete_nodes_graph; - /*!< Insert a word into FTS INDEX */ - que_t* write_nodes_graph; - /*!< COMMIT a transaction */ - que_t* commit_graph; - /*!< Read the nodes from FTS_INDEX */ - que_t* read_nodes_graph; -}; - -/** Used by fts_optimize() to store state. */ -struct fts_optimize_t { - trx_t* trx; /*!< The transaction used for all SQL */ - - ib_alloc_t* self_heap; /*!< Heap to use for allocations */ - - char* name_prefix; /*!< FTS table name prefix */ - - fts_table_t fts_index_table;/*!< Common table definition */ - - /*!< Common table definition */ - fts_table_t fts_common_table; - - dict_table_t* table; /*!< Table that has to be queried */ - - dict_index_t* index; /*!< The FTS index to be optimized */ - - fts_doc_ids_t* to_delete; /*!< doc ids to delete, we check against - this vector and purge the matching - entries during the optimizing - process. The vector entries are - sorted on doc id */ - - ulint del_pos; /*!< Offset within to_delete vector, - this is used to keep track of where - we are up to in the vector */ - - ibool done; /*!< TRUE when optimize finishes */ - - ib_vector_t* words; /*!< Word + Nodes read from FTS_INDEX, - it contains instances of fts_word_t */ - - fts_zip_t* zip; /*!< Words read from the FTS_INDEX */ - - fts_optimize_graph_t /*!< Prepared statements used during */ - graph; /*optimize */ - - ulint n_completed; /*!< Number of FTS indexes that have - been optimized */ - ibool del_list_regenerated; - /*!< BEING_DELETED list regenarated */ -}; - -/** Used by the optimize, to keep state during compacting nodes. */ -struct fts_encode_t { - doc_id_t src_last_doc_id;/*!< Last doc id read from src node */ - byte* src_ilist_ptr; /*!< Current ptr within src ilist */ -}; - -/** We use this information to determine when to start the optimize -cycle for a table. */ -struct fts_slot_t { - dict_table_t* table; /*!< Table to optimize */ - - table_id_t table_id; /*!< Table id */ - - fts_state_t state; /*!< State of this slot */ - - ulint added; /*!< Number of doc ids added since the - last time this table was optimized */ - - ulint deleted; /*!< Number of doc ids deleted since the - last time this table was optimized */ - - ib_time_t last_run; /*!< Time last run completed */ - - ib_time_t completed; /*!< Optimize finish time */ - - ib_time_t interval_time; /*!< Minimum time to wait before - optimizing the table again. */ -}; - -/** A table remove message for the FTS optimize thread. */ -struct fts_msg_del_t { - dict_table_t* table; /*!< The table to remove */ - - os_event_t event; /*!< Event to synchronize acknowledgement - of receipt and processing of the - this message by the consumer */ -}; - -/** Stop the optimize thread. */ -struct fts_msg_optimize_t { - dict_table_t* table; /*!< Table to optimize */ -}; - -/** The FTS optimize message work queue message type. */ -struct fts_msg_t { - fts_msg_type_t type; /*!< Message type */ - - void* ptr; /*!< The message contents */ - - mem_heap_t* heap; /*!< The heap used to allocate this - message, the message consumer will - free the heap. */ -}; - -/** The number of words to read and optimize in a single pass. */ -UNIV_INTERN ulong fts_num_word_optimize; - -// FIXME -UNIV_INTERN char fts_enable_diag_print; - -/** ZLib compressed block size.*/ -static ulint FTS_ZIP_BLOCK_SIZE = 1024; - -/** The amount of time optimizing in a single pass, in milliseconds. */ -static ib_time_t fts_optimize_time_limit = 0; - -/** SQL Statement for changing state of rows to be deleted from FTS Index. */ -static const char* fts_init_delete_sql = - "BEGIN\n" - "\n" - "INSERT INTO \"%s_BEING_DELETED\"\n" - "SELECT doc_id FROM \"%s_DELETED\";\n" - "\n" - "INSERT INTO \"%s_BEING_DELETED_CACHE\"\n" - "SELECT doc_id FROM \"%s_DELETED_CACHE\";\n"; - -static const char* fts_delete_doc_ids_sql = - "BEGIN\n" - "\n" - "DELETE FROM \"%s_DELETED\" WHERE doc_id = :doc_id1;\n" - "DELETE FROM \"%s_DELETED_CACHE\" WHERE doc_id = :doc_id2;\n"; - -static const char* fts_end_delete_sql = - "BEGIN\n" - "\n" - "DELETE FROM \"%s_BEING_DELETED\";\n" - "DELETE FROM \"%s_BEING_DELETED_CACHE\";\n"; - -/**********************************************************************//** -Initialize fts_zip_t. */ -static -void -fts_zip_initialize( -/*===============*/ - fts_zip_t* zip) /*!< out: zip instance to initialize */ -{ - zip->pos = 0; - zip->n_words = 0; - - zip->status = Z_OK; - - zip->last_big_block = 0; - - zip->word.f_len = 0; - *zip->word.f_str = 0; - - ib_vector_reset(zip->blocks); - - memset(zip->zp, 0, sizeof(*zip->zp)); -} - -/**********************************************************************//** -Create an instance of fts_zip_t. -@return a new instance of fts_zip_t */ -static -fts_zip_t* -fts_zip_create( -/*===========*/ - mem_heap_t* heap, /*!< in: heap */ - ulint block_sz, /*!< in: size of a zip block.*/ - ulint max_words) /*!< in: max words to read */ -{ - fts_zip_t* zip; - - zip = static_cast<fts_zip_t*>(mem_heap_zalloc(heap, sizeof(*zip))); - - zip->word.f_str = static_cast<byte*>( - mem_heap_zalloc(heap, FTS_MAX_WORD_LEN + 1)); - - zip->block_sz = block_sz; - - zip->heap_alloc = ib_heap_allocator_create(heap); - - zip->blocks = ib_vector_create(zip->heap_alloc, sizeof(void*), 128); - - zip->max_words = max_words; - - zip->zp = static_cast<z_stream*>( - mem_heap_zalloc(heap, sizeof(*zip->zp))); - - return(zip); -} - -/**********************************************************************//** -Initialize an instance of fts_zip_t. */ -static -void -fts_zip_init( -/*=========*/ - - fts_zip_t* zip) /*!< in: zip instance to init */ -{ - memset(zip->zp, 0, sizeof(*zip->zp)); - - zip->word.f_len = 0; - *zip->word.f_str = '\0'; -} - -/**********************************************************************//** -Create a fts_optimizer_word_t instance. -@return new instance */ -UNIV_INTERN -fts_word_t* -fts_word_init( -/*==========*/ - fts_word_t* word, /*!< in: word to initialize */ - byte* utf8, /*!< in: UTF-8 string */ - ulint len) /*!< in: length of string in bytes */ -{ - mem_heap_t* heap = mem_heap_create(sizeof(fts_node_t)); - - memset(word, 0, sizeof(*word)); - - word->text.f_len = len; - word->text.f_str = static_cast<byte*>(mem_heap_alloc(heap, len + 1)); - - /* Need to copy the NUL character too. */ - memcpy(word->text.f_str, utf8, word->text.f_len); - word->text.f_str[word->text.f_len] = 0; - - word->heap_alloc = ib_heap_allocator_create(heap); - - word->nodes = ib_vector_create( - word->heap_alloc, sizeof(fts_node_t), FTS_WORD_NODES_INIT_SIZE); - - return(word); -} - -/**********************************************************************//** -Read the FTS INDEX row. -@return fts_node_t instance */ -static -fts_node_t* -fts_optimize_read_node( -/*===================*/ - fts_word_t* word, /*!< in: */ - que_node_t* exp) /*!< in: */ -{ - int i; - fts_node_t* node = static_cast<fts_node_t*>( - ib_vector_push(word->nodes, NULL)); - - /* Start from 1 since the first node has been read by the caller */ - for (i = 1; exp; exp = que_node_get_next(exp), ++i) { - - dfield_t* dfield = que_node_get_val(exp); - byte* data = static_cast<byte*>( - dfield_get_data(dfield)); - ulint len = dfield_get_len(dfield); - - ut_a(len != UNIV_SQL_NULL); - - /* Note: The column numbers below must match the SELECT */ - switch (i) { - case 1: /* DOC_COUNT */ - node->doc_count = mach_read_from_4(data); - break; - - case 2: /* FIRST_DOC_ID */ - node->first_doc_id = fts_read_doc_id(data); - break; - - case 3: /* LAST_DOC_ID */ - node->last_doc_id = fts_read_doc_id(data); - break; - - case 4: /* ILIST */ - node->ilist_size_alloc = node->ilist_size = len; - node->ilist = static_cast<byte*>(ut_malloc(len)); - memcpy(node->ilist, data, len); - break; - - default: - ut_error; - } - } - - /* Make sure all columns were read. */ - ut_a(i == 5); - - return(node); -} - -/**********************************************************************//** -Callback function to fetch the rows in an FTS INDEX record. -@return always returns non-NULL */ -UNIV_INTERN -ibool -fts_optimize_index_fetch_node( -/*==========================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: pointer to ib_vector_t */ -{ - fts_word_t* word; - sel_node_t* sel_node = static_cast<sel_node_t*>(row); - fts_fetch_t* fetch = static_cast<fts_fetch_t*>(user_arg); - ib_vector_t* words = static_cast<ib_vector_t*>(fetch->read_arg); - que_node_t* exp = sel_node->select_list; - dfield_t* dfield = que_node_get_val(exp); - void* data = dfield_get_data(dfield); - ulint dfield_len = dfield_get_len(dfield); - fts_node_t* node; - bool is_word_init = false; - - ut_a(dfield_len <= FTS_MAX_WORD_LEN); - - if (ib_vector_size(words) == 0) { - - word = static_cast<fts_word_t*>(ib_vector_push(words, NULL)); - fts_word_init(word, (byte*) data, dfield_len); - is_word_init = true; - } - - word = static_cast<fts_word_t*>(ib_vector_last(words)); - - if (dfield_len != word->text.f_len - || memcmp(word->text.f_str, data, dfield_len)) { - - word = static_cast<fts_word_t*>(ib_vector_push(words, NULL)); - fts_word_init(word, (byte*) data, dfield_len); - is_word_init = true; - } - - node = fts_optimize_read_node(word, que_node_get_next(exp)); - - fetch->total_memory += node->ilist_size; - if (is_word_init) { - fetch->total_memory += sizeof(fts_word_t) - + sizeof(ib_alloc_t) + sizeof(ib_vector_t) + dfield_len - + sizeof(fts_node_t) * FTS_WORD_NODES_INIT_SIZE; - } else if (ib_vector_size(words) > FTS_WORD_NODES_INIT_SIZE) { - fetch->total_memory += sizeof(fts_node_t); - } - - if (fetch->total_memory >= fts_result_cache_limit) { - return(FALSE); - } - - return(TRUE); -} - -/**********************************************************************//** -Read the rows from the FTS inde. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_index_fetch_nodes( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - que_t** graph, /*!< in: prepared statement */ - fts_table_t* fts_table, /*!< in: table of the FTS INDEX */ - const fts_string_t* - word, /*!< in: the word to fetch */ - fts_fetch_t* fetch) /*!< in: fetch callback.*/ -{ - pars_info_t* info; - dberr_t error; - - trx->op_info = "fetching FTS index nodes"; - - if (*graph) { - info = (*graph)->info; - } else { - info = pars_info_create(); - } - - pars_info_bind_function(info, "my_func", fetch->read_record, fetch); - pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len); - - if (!*graph) { - ulint selected; - - ut_a(fts_table->type == FTS_INDEX_TABLE); - - selected = fts_select_index(fts_table->charset, - word->f_str, word->f_len); - - fts_table->suffix = fts_get_suffix(selected); - - *graph = fts_parse_sql( - fts_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT word, doc_count, first_doc_id, last_doc_id, " - "ilist\n" - " FROM \"%s\"\n" - " WHERE word LIKE :word\n" - " ORDER BY first_doc_id;\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - } - - for(;;) { - error = fts_eval_sql(trx, *graph); - - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - - break; /* Exit the loop. */ - } else { - fts_sql_rollback(trx); - - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: lock wait " - "timeout reading FTS index. " - "Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: (%s) " - "while reading FTS index.\n", - ut_strerr(error)); - - break; /* Exit the loop. */ - } - } - } - - return(error); -} - -/**********************************************************************//** -Read a word */ -static -byte* -fts_zip_read_word( -/*==============*/ - fts_zip_t* zip, /*!< in: Zip state + data */ - fts_string_t* word) /*!< out: uncompressed word */ -{ - short len = 0; - void* null = NULL; - byte* ptr = word->f_str; - int flush = Z_NO_FLUSH; - - /* Either there was an error or we are at the Z_STREAM_END. */ - if (zip->status != Z_OK) { - return(NULL); - } - - zip->zp->next_out = reinterpret_cast<byte*>(&len); - zip->zp->avail_out = sizeof(len); - - while (zip->status == Z_OK && zip->zp->avail_out > 0) { - - /* Finished decompressing block. */ - if (zip->zp->avail_in == 0) { - - /* Free the block that's been decompressed. */ - if (zip->pos > 0) { - ulint prev = zip->pos - 1; - - ut_a(zip->pos < ib_vector_size(zip->blocks)); - - ut_free(ib_vector_getp(zip->blocks, prev)); - ib_vector_set(zip->blocks, prev, &null); - } - - /* Any more blocks to decompress. */ - if (zip->pos < ib_vector_size(zip->blocks)) { - - zip->zp->next_in = static_cast<byte*>( - ib_vector_getp( - zip->blocks, zip->pos)); - - if (zip->pos > zip->last_big_block) { - zip->zp->avail_in = - FTS_MAX_WORD_LEN; - } else { - zip->zp->avail_in = static_cast<uInt>(zip->block_sz); - } - - ++zip->pos; - } else { - flush = Z_FINISH; - } - } - - switch (zip->status = inflate(zip->zp, flush)) { - case Z_OK: - if (zip->zp->avail_out == 0 && len > 0) { - - ut_a(len <= FTS_MAX_WORD_LEN); - ptr[len] = 0; - - zip->zp->next_out = ptr; - zip->zp->avail_out = len; - - word->f_len = len; - len = 0; - } - break; - - case Z_BUF_ERROR: /* No progress possible. */ - case Z_STREAM_END: - inflateEnd(zip->zp); - break; - - case Z_STREAM_ERROR: - default: - ut_error; - } - } - - /* All blocks must be freed at end of inflate. */ - if (zip->status != Z_OK) { - for (ulint i = 0; i < ib_vector_size(zip->blocks); ++i) { - if (ib_vector_getp(zip->blocks, i)) { - ut_free(ib_vector_getp(zip->blocks, i)); - ib_vector_set(zip->blocks, i, &null); - } - } - } - - if (ptr != NULL) { - ut_ad(word->f_len == strlen((char*) ptr)); - } - - return(zip->status == Z_OK || zip->status == Z_STREAM_END ? ptr : NULL); -} - -/**********************************************************************//** -Callback function to fetch and compress the word in an FTS -INDEX record. -@return FALSE on EOF */ -static -ibool -fts_fetch_index_words( -/*==================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: pointer to ib_vector_t */ -{ - sel_node_t* sel_node = static_cast<sel_node_t*>(row); - fts_zip_t* zip = static_cast<fts_zip_t*>(user_arg); - que_node_t* exp = sel_node->select_list; - dfield_t* dfield = que_node_get_val(exp); - short len = static_cast<short>(dfield_get_len(dfield)); - void* data = dfield_get_data(dfield); - - /* Skip the duplicate words. */ - if (zip->word.f_len == static_cast<ulint>(len) - && !memcmp(zip->word.f_str, data, len)) { - - return(TRUE); - } - - ut_a(len <= FTS_MAX_WORD_LEN); - - memcpy(zip->word.f_str, data, len); - zip->word.f_len = len; - - ut_a(zip->zp->avail_in == 0); - ut_a(zip->zp->next_in == NULL); - - /* The string is prefixed by len. */ - zip->zp->next_in = reinterpret_cast<byte*>(&len); - zip->zp->avail_in = sizeof(len); - - /* Compress the word, create output blocks as necessary. */ - while (zip->zp->avail_in > 0) { - - /* No space left in output buffer, create a new one. */ - if (zip->zp->avail_out == 0) { - byte* block; - - block = static_cast<byte*>(ut_malloc(zip->block_sz)); - ib_vector_push(zip->blocks, &block); - - zip->zp->next_out = block; - zip->zp->avail_out = static_cast<uInt>(zip->block_sz); - } - - switch (zip->status = deflate(zip->zp, Z_NO_FLUSH)) { - case Z_OK: - if (zip->zp->avail_in == 0) { - zip->zp->next_in = static_cast<byte*>(data); - zip->zp->avail_in = len; - ut_a(len <= FTS_MAX_WORD_LEN); - len = 0; - } - break; - - case Z_STREAM_END: - case Z_BUF_ERROR: - case Z_STREAM_ERROR: - default: - ut_error; - break; - } - } - - /* All data should have been compressed. */ - ut_a(zip->zp->avail_in == 0); - zip->zp->next_in = NULL; - - ++zip->n_words; - - return(zip->n_words >= zip->max_words ? FALSE : TRUE); -} - -/**********************************************************************//** -Finish Zip deflate. */ -static -void -fts_zip_deflate_end( -/*================*/ - fts_zip_t* zip) /*!< in: instance that should be closed*/ -{ - ut_a(zip->zp->avail_in == 0); - ut_a(zip->zp->next_in == NULL); - - zip->status = deflate(zip->zp, Z_FINISH); - - ut_a(ib_vector_size(zip->blocks) > 0); - zip->last_big_block = ib_vector_size(zip->blocks) - 1; - - /* Allocate smaller block(s), since this is trailing data. */ - while (zip->status == Z_OK) { - byte* block; - - ut_a(zip->zp->avail_out == 0); - - block = static_cast<byte*>(ut_malloc(FTS_MAX_WORD_LEN + 1)); - ib_vector_push(zip->blocks, &block); - - zip->zp->next_out = block; - zip->zp->avail_out = FTS_MAX_WORD_LEN; - - zip->status = deflate(zip->zp, Z_FINISH); - } - - ut_a(zip->status == Z_STREAM_END); - - zip->status = deflateEnd(zip->zp); - ut_a(zip->status == Z_OK); - - /* Reset the ZLib data structure. */ - memset(zip->zp, 0, sizeof(*zip->zp)); -} - -/**********************************************************************//** -Read the words from the FTS INDEX. -@return DB_SUCCESS if all OK, DB_TABLE_NOT_FOUND if no more indexes - to search else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_index_fetch_words( -/*==================*/ - fts_optimize_t* optim, /*!< in: optimize scratch pad */ - const fts_string_t* word, /*!< in: get words greater than this - word */ - ulint n_words)/*!< in: max words to read */ -{ - pars_info_t* info; - que_t* graph; - ulint selected; - fts_zip_t* zip = NULL; - dberr_t error = DB_SUCCESS; - mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg); - ibool inited = FALSE; - - optim->trx->op_info = "fetching FTS index words"; - - if (optim->zip == NULL) { - optim->zip = fts_zip_create(heap, FTS_ZIP_BLOCK_SIZE, n_words); - } else { - fts_zip_initialize(optim->zip); - } - - for (selected = fts_select_index( - optim->fts_index_table.charset, word->f_str, word->f_len); - fts_index_selector[selected].value; - selected++) { - - optim->fts_index_table.suffix = fts_get_suffix(selected); - - /* We've search all indexes. */ - if (optim->fts_index_table.suffix == NULL) { - return(DB_TABLE_NOT_FOUND); - } - - info = pars_info_create(); - - pars_info_bind_function( - info, "my_func", fts_fetch_index_words, optim->zip); - - pars_info_bind_varchar_literal( - info, "word", word->f_str, word->f_len); - - graph = fts_parse_sql( - &optim->fts_index_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT word\n" - " FROM \"%s\"\n" - " WHERE word > :word\n" - " ORDER BY word;\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - zip = optim->zip; - - for(;;) { - int err; - - if (!inited && ((err = deflateInit(zip->zp, 9)) - != Z_OK)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: ZLib deflateInit() " - "failed: %d\n", err); - - error = DB_ERROR; - break; - } else { - inited = TRUE; - error = fts_eval_sql(optim->trx, graph); - } - - if (error == DB_SUCCESS) { - //FIXME fts_sql_commit(optim->trx); - break; - } else { - //FIXME fts_sql_rollback(optim->trx); - - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: " - "Warning: lock wait " - "timeout reading document. " - "Retrying!\n"); - - /* We need to reset the ZLib state. */ - inited = FALSE; - deflateEnd(zip->zp); - fts_zip_init(zip); - - optim->trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: (%s) " - "while reading document.\n", - ut_strerr(error)); - - break; /* Exit the loop. */ - } - } - } - - fts_que_graph_free(graph); - - /* Check if max word to fetch is exceeded */ - if (optim->zip->n_words >= n_words) { - break; - } - } - - if (error == DB_SUCCESS && zip->status == Z_OK && zip->n_words > 0) { - - /* All data should have been read. */ - ut_a(zip->zp->avail_in == 0); - - fts_zip_deflate_end(zip); - } else { - deflateEnd(zip->zp); - } - - return(error); -} - -/**********************************************************************//** -Callback function to fetch the doc id from the record. -@return always returns TRUE */ -static -ibool -fts_fetch_doc_ids( -/*==============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: pointer to ib_vector_t */ -{ - que_node_t* exp; - int i = 0; - sel_node_t* sel_node = static_cast<sel_node_t*>(row); - fts_doc_ids_t* fts_doc_ids = static_cast<fts_doc_ids_t*>(user_arg); - fts_update_t* update = static_cast<fts_update_t*>( - ib_vector_push(fts_doc_ids->doc_ids, NULL)); - - for (exp = sel_node->select_list; - exp; - exp = que_node_get_next(exp), ++i) { - - dfield_t* dfield = que_node_get_val(exp); - void* data = dfield_get_data(dfield); - ulint len = dfield_get_len(dfield); - - ut_a(len != UNIV_SQL_NULL); - - /* Note: The column numbers below must match the SELECT. */ - switch (i) { - case 0: /* DOC_ID */ - update->fts_indexes = NULL; - update->doc_id = fts_read_doc_id( - static_cast<byte*>(data)); - break; - - default: - ut_error; - } - } - - return(TRUE); -} - -/**********************************************************************//** -Read the rows from a FTS common auxiliary table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_table_fetch_doc_ids( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table, /*!< in: table */ - fts_doc_ids_t* doc_ids) /*!< in: For collecting doc ids */ -{ - dberr_t error; - que_t* graph; - pars_info_t* info = pars_info_create(); - ibool alloc_bk_trx = FALSE; - - ut_a(fts_table->suffix != NULL); - ut_a(fts_table->type == FTS_COMMON_TABLE); - - if (!trx) { - trx = trx_allocate_for_background(); - alloc_bk_trx = TRUE; - } - - trx->op_info = "fetching FTS doc ids"; - - pars_info_bind_function(info, "my_func", fts_fetch_doc_ids, doc_ids); - - graph = fts_parse_sql( - fts_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT doc_id FROM \"%s\";\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - error = fts_eval_sql(trx, graph); - - mutex_enter(&dict_sys->mutex); - que_graph_free(graph); - mutex_exit(&dict_sys->mutex); - - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - - ib_vector_sort(doc_ids->doc_ids, fts_update_doc_id_cmp); - } else { - fts_sql_rollback(trx); - } - - if (alloc_bk_trx) { - trx_free_for_background(trx); - } - - return(error); -} - -/**********************************************************************//** -Do a binary search for a doc id in the array -@return +ve index if found -ve index where it should be inserted - if not found */ -UNIV_INTERN -int -fts_bsearch( -/*========*/ - fts_update_t* array, /*!< in: array to sort */ - int lower, /*!< in: the array lower bound */ - int upper, /*!< in: the array upper bound */ - doc_id_t doc_id) /*!< in: the doc id to search for */ -{ - int orig_size = upper; - - if (upper == 0) { - /* Nothing to search */ - return(-1); - } else { - while (lower < upper) { - int i = (lower + upper) >> 1; - - if (doc_id > array[i].doc_id) { - lower = i + 1; - } else if (doc_id < array[i].doc_id) { - upper = i - 1; - } else { - return(i); /* Found. */ - } - } - } - - if (lower == upper && lower < orig_size) { - if (doc_id == array[lower].doc_id) { - return(lower); - } else if (lower == 0) { - return(-1); - } - } - - /* Not found. */ - return( (lower == 0) ? -1 : -lower); -} - -/**********************************************************************//** -Search in the to delete array whether any of the doc ids within -the [first, last] range are to be deleted -@return +ve index if found -ve index where it should be inserted - if not found */ -static -int -fts_optimize_lookup( -/*================*/ - ib_vector_t* doc_ids, /*!< in: array to search */ - ulint lower, /*!< in: lower limit of array */ - doc_id_t first_doc_id, /*!< in: doc id to lookup */ - doc_id_t last_doc_id) /*!< in: doc id to lookup */ -{ - int pos; - int upper = static_cast<int>(ib_vector_size(doc_ids)); - fts_update_t* array = (fts_update_t*) doc_ids->data; - - pos = fts_bsearch(array, static_cast<int>(lower), upper, first_doc_id); - - ut_a(abs(pos) <= upper + 1); - - if (pos < 0) { - - int i = abs(pos); - - /* If i is 1, it could be first_doc_id is less than - either the first or second array item, do a - double check */ - if (i == 1 && array[0].doc_id <= last_doc_id - && first_doc_id < array[0].doc_id) { - pos = 0; - } else if (i < upper && array[i].doc_id <= last_doc_id) { - - /* Check if the "next" doc id is within the - first & last doc id of the node. */ - pos = i; - } - } - - return(pos); -} - -/**********************************************************************//** -Encode the word pos list into the node -@return DB_SUCCESS or error code*/ -static MY_ATTRIBUTE((nonnull)) -dberr_t -fts_optimize_encode_node( -/*=====================*/ - fts_node_t* node, /*!< in: node to fill*/ - doc_id_t doc_id, /*!< in: doc id to encode */ - fts_encode_t* enc) /*!< in: encoding state.*/ -{ - byte* dst; - ulint enc_len; - ulint pos_enc_len; - doc_id_t doc_id_delta; - dberr_t error = DB_SUCCESS; - byte* src = enc->src_ilist_ptr; - - if (node->first_doc_id == 0) { - ut_a(node->last_doc_id == 0); - - node->first_doc_id = doc_id; - } - - /* Calculate the space required to store the ilist. */ - ut_ad(doc_id > node->last_doc_id); - doc_id_delta = doc_id - node->last_doc_id; - enc_len = fts_get_encoded_len(static_cast<ulint>(doc_id_delta)); - - /* Calculate the size of the encoded pos array. */ - while (*src) { - fts_decode_vlc(&src); - } - - /* Skip the 0x00 byte at the end of the word positions list. */ - ++src; - - /* Number of encoded pos bytes to copy. */ - pos_enc_len = src - enc->src_ilist_ptr; - - /* Total number of bytes required for copy. */ - enc_len += pos_enc_len; - - /* Check we have enough space in the destination buffer for - copying the document word list. */ - if (!node->ilist) { - ulint new_size; - - ut_a(node->ilist_size == 0); - - new_size = enc_len > FTS_ILIST_MAX_SIZE - ? enc_len : FTS_ILIST_MAX_SIZE; - - node->ilist = static_cast<byte*>(ut_malloc(new_size)); - node->ilist_size_alloc = new_size; - - } else if ((node->ilist_size + enc_len) > node->ilist_size_alloc) { - ulint new_size = node->ilist_size + enc_len; - byte* ilist = static_cast<byte*>(ut_malloc(new_size)); - - memcpy(ilist, node->ilist, node->ilist_size); - - ut_free(node->ilist); - - node->ilist = ilist; - node->ilist_size_alloc = new_size; - } - - src = enc->src_ilist_ptr; - dst = node->ilist + node->ilist_size; - - /* Encode the doc id. Cast to ulint, the delta should be small and - therefore no loss of precision. */ - dst += fts_encode_int((ulint) doc_id_delta, dst); - - /* Copy the encoded pos array. */ - memcpy(dst, src, pos_enc_len); - - node->last_doc_id = doc_id; - - /* Data copied upto here. */ - node->ilist_size += enc_len; - enc->src_ilist_ptr += pos_enc_len; - - ut_a(node->ilist_size <= node->ilist_size_alloc); - - return(error); -} - -/**********************************************************************//** -Optimize the data contained in a node. -@return DB_SUCCESS or error code*/ -static MY_ATTRIBUTE((nonnull)) -dberr_t -fts_optimize_node( -/*==============*/ - ib_vector_t* del_vec, /*!< in: vector of doc ids to delete*/ - int* del_pos, /*!< in: offset into above vector */ - fts_node_t* dst_node, /*!< in: node to fill*/ - fts_node_t* src_node, /*!< in: source node for data*/ - fts_encode_t* enc) /*!< in: encoding state */ -{ - ulint copied; - dberr_t error = DB_SUCCESS; - doc_id_t doc_id = enc->src_last_doc_id; - - if (!enc->src_ilist_ptr) { - enc->src_ilist_ptr = src_node->ilist; - } - - copied = enc->src_ilist_ptr - src_node->ilist; - - /* While there is data in the source node and space to copy - into in the destination node. */ - while (copied < src_node->ilist_size - && dst_node->ilist_size < FTS_ILIST_MAX_SIZE) { - - doc_id_t delta; - doc_id_t del_doc_id = FTS_NULL_DOC_ID; - - delta = fts_decode_vlc(&enc->src_ilist_ptr); - -test_again: - /* Check whether the doc id is in the delete list, if - so then we skip the entries but we need to track the - delta for decoding the entries following this document's - entries. */ - if (*del_pos >= 0 && *del_pos < (int) ib_vector_size(del_vec)) { - fts_update_t* update; - - update = (fts_update_t*) ib_vector_get( - del_vec, *del_pos); - - del_doc_id = update->doc_id; - } - - if (enc->src_ilist_ptr == src_node->ilist && doc_id == 0) { - ut_a(delta == src_node->first_doc_id); - } - - doc_id += delta; - - if (del_doc_id > 0 && doc_id == del_doc_id) { - - ++*del_pos; - - /* Skip the entries for this document. */ - while (*enc->src_ilist_ptr) { - fts_decode_vlc(&enc->src_ilist_ptr); - } - - /* Skip the end of word position marker. */ - ++enc->src_ilist_ptr; - - } else { - - /* DOC ID already becomes larger than - del_doc_id, check the next del_doc_id */ - if (del_doc_id > 0 && doc_id > del_doc_id) { - del_doc_id = 0; - ++*del_pos; - delta = 0; - goto test_again; - } - - /* Decode and copy the word positions into - the dest node. */ - fts_optimize_encode_node(dst_node, doc_id, enc); - - ++dst_node->doc_count; - - ut_a(dst_node->last_doc_id == doc_id); - } - - /* Bytes copied so for from source. */ - copied = enc->src_ilist_ptr - src_node->ilist; - } - - if (copied >= src_node->ilist_size) { - ut_a(doc_id == src_node->last_doc_id); - } - - enc->src_last_doc_id = doc_id; - - return(error); -} - -/**********************************************************************//** -Determine the starting pos within the deleted doc id vector for a word. -@return delete position */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -int -fts_optimize_deleted_pos( -/*=====================*/ - fts_optimize_t* optim, /*!< in: optimize state data */ - fts_word_t* word) /*!< in: the word data to check */ -{ - int del_pos; - ib_vector_t* del_vec = optim->to_delete->doc_ids; - - /* Get the first and last dict ids for the word, we will use - these values to determine which doc ids need to be removed - when we coalesce the nodes. This way we can reduce the numer - of elements that need to be searched in the deleted doc ids - vector and secondly we can remove the doc ids during the - coalescing phase. */ - if (ib_vector_size(del_vec) > 0) { - fts_node_t* node; - doc_id_t last_id; - doc_id_t first_id; - ulint size = ib_vector_size(word->nodes); - - node = (fts_node_t*) ib_vector_get(word->nodes, 0); - first_id = node->first_doc_id; - - node = (fts_node_t*) ib_vector_get(word->nodes, size - 1); - last_id = node->last_doc_id; - - ut_a(first_id <= last_id); - - del_pos = fts_optimize_lookup( - del_vec, optim->del_pos, first_id, last_id); - } else { - - del_pos = -1; /* Note that there is nothing to delete. */ - } - - return(del_pos); -} - -#define FTS_DEBUG_PRINT -/**********************************************************************//** -Compact the nodes for a word, we also remove any doc ids during the -compaction pass. -@return DB_SUCCESS or error code.*/ -static -ib_vector_t* -fts_optimize_word( -/*==============*/ - fts_optimize_t* optim, /*!< in: optimize state data */ - fts_word_t* word) /*!< in: the word to optimize */ -{ - fts_encode_t enc; - ib_vector_t* nodes; - ulint i = 0; - int del_pos; - fts_node_t* dst_node = NULL; - ib_vector_t* del_vec = optim->to_delete->doc_ids; - ulint size = ib_vector_size(word->nodes); - - del_pos = fts_optimize_deleted_pos(optim, word); - nodes = ib_vector_create(word->heap_alloc, sizeof(*dst_node), 128); - - enc.src_last_doc_id = 0; - enc.src_ilist_ptr = NULL; - - if (fts_enable_diag_print) { - word->text.f_str[word->text.f_len] = 0; - fprintf(stderr, "FTS_OPTIMIZE: optimize \"%s\"\n", - word->text.f_str); - } - - while (i < size) { - ulint copied; - fts_node_t* src_node; - - src_node = (fts_node_t*) ib_vector_get(word->nodes, i); - - if (dst_node == NULL - || dst_node->last_doc_id > src_node->first_doc_id) { - - dst_node = static_cast<fts_node_t*>( - ib_vector_push(nodes, NULL)); - memset(dst_node, 0, sizeof(*dst_node)); - } - - /* Copy from the src to the dst node. */ - fts_optimize_node(del_vec, &del_pos, dst_node, src_node, &enc); - - ut_a(enc.src_ilist_ptr != NULL); - - /* Determine the numer of bytes copied to dst_node. */ - copied = enc.src_ilist_ptr - src_node->ilist; - - /* Can't copy more than whats in the vlc array. */ - ut_a(copied <= src_node->ilist_size); - - /* We are done with this node release the resources. */ - if (copied == src_node->ilist_size) { - - enc.src_last_doc_id = 0; - enc.src_ilist_ptr = NULL; - - ut_free(src_node->ilist); - - src_node->ilist = NULL; - src_node->ilist_size = src_node->ilist_size_alloc = 0; - - src_node = NULL; - - ++i; /* Get next source node to OPTIMIZE. */ - } - - if (dst_node->ilist_size >= FTS_ILIST_MAX_SIZE || i >= size) { - - dst_node = NULL; - } - } - - /* All dst nodes created should have been added to the vector. */ - ut_a(dst_node == NULL); - - /* Return the OPTIMIZED nodes. */ - return(nodes); -} - -/**********************************************************************//** -Update the FTS index table. This is a delete followed by an insert. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_write_word( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table, /*!< in: table of FTS index */ - fts_string_t* word, /*!< in: word data to write */ - ib_vector_t* nodes) /*!< in: the nodes to write */ -{ - ulint i; - pars_info_t* info; - que_t* graph; - ulint selected; - dberr_t error = DB_SUCCESS; - char* table_name = fts_get_table_name(fts_table); - - info = pars_info_create(); - - ut_ad(fts_table->charset); - - if (fts_enable_diag_print) { - fprintf(stderr, "FTS_OPTIMIZE: processed \"%s\"\n", - word->f_str); - } - - pars_info_bind_varchar_literal( - info, "word", word->f_str, word->f_len); - - selected = fts_select_index(fts_table->charset, - word->f_str, word->f_len); - - fts_table->suffix = fts_get_suffix(selected); - - graph = fts_parse_sql( - fts_table, - info, - "BEGIN DELETE FROM \"%s\" WHERE word = :word;"); - - error = fts_eval_sql(trx, graph); - - if (error != DB_SUCCESS) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: (%s) during optimize, " - "when deleting a word from the FTS index.\n", - ut_strerr(error)); - } - - fts_que_graph_free(graph); - graph = NULL; - - mem_free(table_name); - - /* Even if the operation needs to be rolled back and redone, - we iterate over the nodes in order to free the ilist. */ - for (i = 0; i < ib_vector_size(nodes); ++i) { - - fts_node_t* node = (fts_node_t*) ib_vector_get(nodes, i); - - if (error == DB_SUCCESS) { - error = fts_write_node( - trx, &graph, fts_table, word, node); - - if (error != DB_SUCCESS) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: (%s) " - "during optimize, while adding a " - "word to the FTS index.\n", - ut_strerr(error)); - } - } - - ut_free(node->ilist); - node->ilist = NULL; - node->ilist_size = node->ilist_size_alloc = 0; - } - - if (graph != NULL) { - fts_que_graph_free(graph); - } - - return(error); -} - -/**********************************************************************//** -Free fts_optimizer_word_t instanace.*/ -UNIV_INTERN -void -fts_word_free( -/*==========*/ - fts_word_t* word) /*!< in: instance to free.*/ -{ - mem_heap_t* heap = static_cast<mem_heap_t*>(word->heap_alloc->arg); - -#ifdef UNIV_DEBUG - memset(word, 0, sizeof(*word)); -#endif /* UNIV_DEBUG */ - - mem_heap_free(heap); -} - -/**********************************************************************//** -Optimize the word ilist and rewrite data to the FTS index. -@return status one of RESTART, EXIT, ERROR */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_compact( -/*=================*/ - fts_optimize_t* optim, /*!< in: optimize state data */ - dict_index_t* index, /*!< in: current FTS being optimized */ - ib_time_t start_time) /*!< in: optimize start time */ -{ - ulint i; - dberr_t error = DB_SUCCESS; - ulint size = ib_vector_size(optim->words); - - for (i = 0; i < size && error == DB_SUCCESS && !optim->done; ++i) { - fts_word_t* word; - ib_vector_t* nodes; - trx_t* trx = optim->trx; - - word = (fts_word_t*) ib_vector_get(optim->words, i); - - /* nodes is allocated from the word heap and will be destroyed - when the word is freed. We however have to be careful about - the ilist, that needs to be freed explicitly. */ - nodes = fts_optimize_word(optim, word); - - /* Update the data on disk. */ - error = fts_optimize_write_word( - trx, &optim->fts_index_table, &word->text, nodes); - - if (error == DB_SUCCESS) { - /* Write the last word optimized to the config table, - we use this value for restarting optimize. */ - error = fts_config_set_index_value( - optim->trx, index, - FTS_LAST_OPTIMIZED_WORD, &word->text); - } - - /* Free the word that was optimized. */ - fts_word_free(word); - - if (fts_optimize_time_limit > 0 - && (ut_time() - start_time) > fts_optimize_time_limit) { - - optim->done = TRUE; - } - } - - return(error); -} - -/**********************************************************************//** -Create an instance of fts_optimize_t. Also create a new -background transaction.*/ -static -fts_optimize_t* -fts_optimize_create( -/*================*/ - dict_table_t* table) /*!< in: table with FTS indexes */ -{ - fts_optimize_t* optim; - mem_heap_t* heap = mem_heap_create(128); - - optim = (fts_optimize_t*) mem_heap_zalloc(heap, sizeof(*optim)); - - optim->self_heap = ib_heap_allocator_create(heap); - - optim->to_delete = fts_doc_ids_create(); - - optim->words = ib_vector_create( - optim->self_heap, sizeof(fts_word_t), 256); - - optim->table = table; - - optim->trx = trx_allocate_for_background(); - - optim->fts_common_table.parent = table->name; - optim->fts_common_table.table_id = table->id; - optim->fts_common_table.type = FTS_COMMON_TABLE; - optim->fts_common_table.table = table; - - optim->fts_index_table.parent = table->name; - optim->fts_index_table.table_id = table->id; - optim->fts_index_table.type = FTS_INDEX_TABLE; - optim->fts_index_table.table = table; - - /* The common prefix for all this parent table's aux tables. */ - optim->name_prefix = fts_get_table_name_prefix( - &optim->fts_common_table); - - return(optim); -} - -#ifdef FTS_OPTIMIZE_DEBUG -/**********************************************************************//** -Get optimize start time of an FTS index. -@return DB_SUCCESS if all OK else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_get_index_start_time( -/*==============================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index */ - ib_time_t* start_time) /*!< out: time in secs */ -{ - return(fts_config_get_index_ulint( - trx, index, FTS_OPTIMIZE_START_TIME, - (ulint*) start_time)); -} - -/**********************************************************************//** -Set the optimize start time of an FTS index. -@return DB_SUCCESS if all OK else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_set_index_start_time( -/*==============================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index */ - ib_time_t start_time) /*!< in: start time */ -{ - return(fts_config_set_index_ulint( - trx, index, FTS_OPTIMIZE_START_TIME, - (ulint) start_time)); -} - -/**********************************************************************//** -Get optimize end time of an FTS index. -@return DB_SUCCESS if all OK else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_get_index_end_time( -/*============================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index */ - ib_time_t* end_time) /*!< out: time in secs */ -{ - return(fts_config_get_index_ulint( - trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time)); -} - -/**********************************************************************//** -Set the optimize end time of an FTS index. -@return DB_SUCCESS if all OK else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_set_index_end_time( -/*============================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index */ - ib_time_t end_time) /*!< in: end time */ -{ - return(fts_config_set_index_ulint( - trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time)); -} -#endif - -/**********************************************************************//** -Free the optimize prepared statements.*/ -static -void -fts_optimize_graph_free( -/*====================*/ - fts_optimize_graph_t* graph) /*!< in/out: The graph instances - to free */ -{ - if (graph->commit_graph) { - que_graph_free(graph->commit_graph); - graph->commit_graph = NULL; - } - - if (graph->write_nodes_graph) { - que_graph_free(graph->write_nodes_graph); - graph->write_nodes_graph = NULL; - } - - if (graph->delete_nodes_graph) { - que_graph_free(graph->delete_nodes_graph); - graph->delete_nodes_graph = NULL; - } - - if (graph->read_nodes_graph) { - que_graph_free(graph->read_nodes_graph); - graph->read_nodes_graph = NULL; - } -} - -/**********************************************************************//** -Free all optimize resources. */ -static -void -fts_optimize_free( -/*==============*/ - fts_optimize_t* optim) /*!< in: table with on FTS index */ -{ - mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg); - - trx_free_for_background(optim->trx); - - fts_doc_ids_free(optim->to_delete); - fts_optimize_graph_free(&optim->graph); - - mem_free(optim->name_prefix); - - /* This will free the heap from which optim itself was allocated. */ - mem_heap_free(heap); -} - -/**********************************************************************//** -Get the max time optimize should run in millisecs. -@return max optimize time limit in millisecs. */ -static -ib_time_t -fts_optimize_get_time_limit( -/*========================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table) /*!< in: aux table */ -{ - ib_time_t time_limit = 0; - - fts_config_get_ulint( - trx, fts_table, - FTS_OPTIMIZE_LIMIT_IN_SECS, (ulint*) &time_limit); - - return(time_limit * 1000); -} - - -/**********************************************************************//** -Run OPTIMIZE on the given table. Note: this can take a very long time -(hours). */ -static -void -fts_optimize_words( -/*===============*/ - fts_optimize_t* optim, /*!< in: optimize instance */ - dict_index_t* index, /*!< in: current FTS being optimized */ - fts_string_t* word) /*!< in: the starting word to optimize */ -{ - fts_fetch_t fetch; - ib_time_t start_time; - que_t* graph = NULL; - CHARSET_INFO* charset = optim->fts_index_table.charset; - - ut_a(!optim->done); - - /* Get the time limit from the config table. */ - fts_optimize_time_limit = fts_optimize_get_time_limit( - optim->trx, &optim->fts_common_table); - - start_time = ut_time(); - - /* Setup the callback to use for fetching the word ilist etc. */ - fetch.read_arg = optim->words; - fetch.read_record = fts_optimize_index_fetch_node; - - fprintf(stderr, "%.*s\n", (int) word->f_len, word->f_str); - - while(!optim->done) { - dberr_t error; - trx_t* trx = optim->trx; - ulint selected; - - ut_a(ib_vector_size(optim->words) == 0); - - selected = fts_select_index(charset, word->f_str, word->f_len); - - /* Read the index records to optimize. */ - fetch.total_memory = 0; - error = fts_index_fetch_nodes( - trx, &graph, &optim->fts_index_table, word, - &fetch); - ut_ad(fetch.total_memory < fts_result_cache_limit); - - if (error == DB_SUCCESS) { - /* There must be some nodes to read. */ - ut_a(ib_vector_size(optim->words) > 0); - - /* Optimize the nodes that were read and write - back to DB. */ - error = fts_optimize_compact(optim, index, start_time); - - if (error == DB_SUCCESS) { - fts_sql_commit(optim->trx); - } else { - fts_sql_rollback(optim->trx); - } - } - - ib_vector_reset(optim->words); - - if (error == DB_SUCCESS) { - if (!optim->done) { - if (!fts_zip_read_word(optim->zip, word)) { - optim->done = TRUE; - } else if (selected - != fts_select_index( - charset, word->f_str, - word->f_len) - && graph) { - fts_que_graph_free(graph); - graph = NULL; - } - } - } else if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, "InnoDB: Warning: lock wait timeout " - "during optimize. Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else if (error == DB_DEADLOCK) { - fprintf(stderr, "InnoDB: Warning: deadlock " - "during optimize. Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - optim->done = TRUE; /* Exit the loop. */ - } - } - - if (graph != NULL) { - fts_que_graph_free(graph); - } -} - -/**********************************************************************//** -Select the FTS index to search. -@return TRUE if last index */ -static -ibool -fts_optimize_set_next_word( -/*=======================*/ - CHARSET_INFO* charset, /*!< in: charset */ - fts_string_t* word) /*!< in: current last word */ -{ - ulint selected; - ibool last = FALSE; - - selected = fts_select_next_index(charset, word->f_str, word->f_len); - - /* If this was the last index then reset to start. */ - if (fts_index_selector[selected].value == 0) { - /* Reset the last optimized word to '' if no - more words could be read from the FTS index. */ - word->f_len = 0; - *word->f_str = 0; - - last = TRUE; - } else { - ulint value = fts_index_selector[selected].value; - - ut_a(value <= 0xff); - - /* Set to the first character of the next slot. */ - word->f_len = 1; - *word->f_str = (byte) value; - } - - return(last); -} - -/**********************************************************************//** -Optimize is complete. Set the completion time, and reset the optimize -start string for this FTS index to "". -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_index_completed( -/*=========================*/ - fts_optimize_t* optim, /*!< in: optimize instance */ - dict_index_t* index) /*!< in: table with one FTS index */ -{ - fts_string_t word; - dberr_t error; - byte buf[sizeof(ulint)]; -#ifdef FTS_OPTIMIZE_DEBUG - ib_time_t end_time = ut_time(); - - error = fts_optimize_set_index_end_time(optim->trx, index, end_time); -#endif - - /* If we've reached the end of the index then set the start - word to the empty string. */ - - word.f_len = 0; - word.f_str = buf; - *word.f_str = '\0'; - - error = fts_config_set_index_value( - optim->trx, index, FTS_LAST_OPTIMIZED_WORD, &word); - - if (error != DB_SUCCESS) { - - fprintf(stderr, "InnoDB: Error: (%s) while " - "updating last optimized word!\n", ut_strerr(error)); - } - - return(error); -} - - -/**********************************************************************//** -Read the list of words from the FTS auxiliary index that will be -optimized in this pass. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_index_read_words( -/*==========================*/ - fts_optimize_t* optim, /*!< in: optimize instance */ - dict_index_t* index, /*!< in: table with one FTS index */ - fts_string_t* word) /*!< in: buffer to use */ -{ - dberr_t error = DB_SUCCESS; - - if (optim->del_list_regenerated) { - word->f_len = 0; - } else { - - /* Get the last word that was optimized from - the config table. */ - error = fts_config_get_index_value( - optim->trx, index, FTS_LAST_OPTIMIZED_WORD, word); - } - - /* If record not found then we start from the top. */ - if (error == DB_RECORD_NOT_FOUND) { - word->f_len = 0; - error = DB_SUCCESS; - } - - while (error == DB_SUCCESS) { - - error = fts_index_fetch_words( - optim, word, fts_num_word_optimize); - - if (error == DB_SUCCESS) { - - /* If the search returned an empty set - try the next index in the horizontal split. */ - if (optim->zip->n_words > 0) { - break; - } else { - - fts_optimize_set_next_word( - optim->fts_index_table.charset, - word); - - if (word->f_len == 0) { - break; - } - } - } - } - - return(error); -} - -/**********************************************************************//** -Run OPTIMIZE on the given FTS index. Note: this can take a very long -time (hours). -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_index( -/*===============*/ - fts_optimize_t* optim, /*!< in: optimize instance */ - dict_index_t* index) /*!< in: table with one FTS index */ -{ - fts_string_t word; - dberr_t error; - byte str[FTS_MAX_WORD_LEN + 1]; - - /* Set the current index that we have to optimize. */ - optim->fts_index_table.index_id = index->id; - optim->fts_index_table.charset = fts_index_get_charset(index); - - optim->done = FALSE; /* Optimize until !done */ - - /* We need to read the last word optimized so that we start from - the next word. */ - word.f_str = str; - - /* We set the length of word to the size of str since we - need to pass the max len info to the fts_get_config_value() function. */ - word.f_len = sizeof(str) - 1; - - memset(word.f_str, 0x0, word.f_len); - - /* Read the words that will be optimized in this pass. */ - error = fts_optimize_index_read_words(optim, index, &word); - - if (error == DB_SUCCESS) { - int zip_error; - - ut_a(optim->zip->pos == 0); - ut_a(optim->zip->zp->total_in == 0); - ut_a(optim->zip->zp->total_out == 0); - - zip_error = inflateInit(optim->zip->zp); - ut_a(zip_error == Z_OK); - - word.f_len = 0; - word.f_str = str; - - /* Read the first word to optimize from the Zip buffer. */ - if (!fts_zip_read_word(optim->zip, &word)) { - - optim->done = TRUE; - } else { - fts_optimize_words(optim, index, &word); - } - - /* If we couldn't read any records then optimize is - complete. Increment the number of indexes that have - been optimized and set FTS index optimize state to - completed. */ - if (error == DB_SUCCESS && optim->zip->n_words == 0) { - - error = fts_optimize_index_completed(optim, index); - - if (error == DB_SUCCESS) { - ++optim->n_completed; - } - } - } - - return(error); -} - -/**********************************************************************//** -Delete the document ids in the delete, and delete cache tables. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_purge_deleted_doc_ids( -/*===============================*/ - fts_optimize_t* optim) /*!< in: optimize instance */ -{ - ulint i; - pars_info_t* info; - que_t* graph; - fts_update_t* update; - char* sql_str; - doc_id_t write_doc_id; - dberr_t error = DB_SUCCESS; - - info = pars_info_create(); - - ut_a(ib_vector_size(optim->to_delete->doc_ids) > 0); - - update = static_cast<fts_update_t*>( - ib_vector_get(optim->to_delete->doc_ids, 0)); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &write_doc_id, update->doc_id); - - /* This is required for the SQL parser to work. It must be able - to find the following variables. So we do it twice. */ - fts_bind_doc_id(info, "doc_id1", &write_doc_id); - fts_bind_doc_id(info, "doc_id2", &write_doc_id); - - /* Since we only replace the table_id and don't construct the full - name, we do substitution ourselves. Remember to free sql_str. */ - sql_str = ut_strreplace( - fts_delete_doc_ids_sql, "%s", optim->name_prefix); - - graph = fts_parse_sql(NULL, info, sql_str); - - mem_free(sql_str); - - /* Delete the doc ids that were copied at the start. */ - for (i = 0; i < ib_vector_size(optim->to_delete->doc_ids); ++i) { - - update = static_cast<fts_update_t*>(ib_vector_get( - optim->to_delete->doc_ids, i)); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &write_doc_id, update->doc_id); - - fts_bind_doc_id(info, "doc_id1", &write_doc_id); - - fts_bind_doc_id(info, "doc_id2", &write_doc_id); - - error = fts_eval_sql(optim->trx, graph); - - // FIXME: Check whether delete actually succeeded! - if (error != DB_SUCCESS) { - - fts_sql_rollback(optim->trx); - break; - } - } - - fts_que_graph_free(graph); - - return(error); -} - -/**********************************************************************//** -Delete the document ids in the pending delete, and delete tables. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_purge_deleted_doc_id_snapshot( -/*=======================================*/ - fts_optimize_t* optim) /*!< in: optimize instance */ -{ - dberr_t error; - que_t* graph; - char* sql_str; - - /* Since we only replace the table_id and don't construct - the full name, we do the '%s' substitution ourselves. */ - sql_str = ut_strreplace(fts_end_delete_sql, "%s", optim->name_prefix); - - /* Delete the doc ids that were copied to delete pending state at - the start of optimize. */ - graph = fts_parse_sql(NULL, NULL, sql_str); - - mem_free(sql_str); - - error = fts_eval_sql(optim->trx, graph); - fts_que_graph_free(graph); - - return(error); -} - -/**********************************************************************//** -Copy the deleted doc ids that will be purged during this optimize run -to the being deleted FTS auxiliary tables. The transaction is committed -upon successfull copy and rolled back on DB_DUPLICATE_KEY error. -@return DB_SUCCESS if all OK */ -static -ulint -fts_optimize_being_deleted_count( -/*=============================*/ - fts_optimize_t* optim) /*!< in: optimize instance */ -{ - fts_table_t fts_table; - - FTS_INIT_FTS_TABLE(&fts_table, "BEING_DELETED", FTS_COMMON_TABLE, - optim->table); - - return(fts_get_rows_count(&fts_table)); -} - -/*********************************************************************//** -Copy the deleted doc ids that will be purged during this optimize run -to the being deleted FTS auxiliary tables. The transaction is committed -upon successfull copy and rolled back on DB_DUPLICATE_KEY error. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_create_deleted_doc_id_snapshot( -/*========================================*/ - fts_optimize_t* optim) /*!< in: optimize instance */ -{ - dberr_t error; - que_t* graph; - char* sql_str; - - /* Since we only replace the table_id and don't construct the - full name, we do the substitution ourselves. */ - sql_str = ut_strreplace(fts_init_delete_sql, "%s", optim->name_prefix); - - /* Move doc_ids that are to be deleted to state being deleted. */ - graph = fts_parse_sql(NULL, NULL, sql_str); - - mem_free(sql_str); - - error = fts_eval_sql(optim->trx, graph); - - fts_que_graph_free(graph); - - if (error != DB_SUCCESS) { - fts_sql_rollback(optim->trx); - } else { - fts_sql_commit(optim->trx); - } - - optim->del_list_regenerated = TRUE; - - return(error); -} - -/*********************************************************************//** -Read in the document ids that are to be purged during optimize. The -transaction is committed upon successfully read. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_read_deleted_doc_id_snapshot( -/*======================================*/ - fts_optimize_t* optim) /*!< in: optimize instance */ -{ - dberr_t error; - - optim->fts_common_table.suffix = "BEING_DELETED"; - - /* Read the doc_ids to delete. */ - error = fts_table_fetch_doc_ids( - optim->trx, &optim->fts_common_table, optim->to_delete); - - if (error == DB_SUCCESS) { - - optim->fts_common_table.suffix = "BEING_DELETED_CACHE"; - - /* Read additional doc_ids to delete. */ - error = fts_table_fetch_doc_ids( - optim->trx, &optim->fts_common_table, optim->to_delete); - } - - if (error != DB_SUCCESS) { - - fts_doc_ids_free(optim->to_delete); - optim->to_delete = NULL; - } - - return(error); -} - -/*********************************************************************//** -Optimze all the FTS indexes, skipping those that have already been -optimized, since the FTS auxiliary indexes are not guaranteed to be -of the same cardinality. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_indexes( -/*=================*/ - fts_optimize_t* optim) /*!< in: optimize instance */ -{ - ulint i; - dberr_t error = DB_SUCCESS; - fts_t* fts = optim->table->fts; - - /* Optimize the FTS indexes. */ - for (i = 0; i < ib_vector_size(fts->indexes); ++i) { - dict_index_t* index; - -#ifdef FTS_OPTIMIZE_DEBUG - ib_time_t end_time; - ib_time_t start_time; - - /* Get the start and end optimize times for this index. */ - error = fts_optimize_get_index_start_time( - optim->trx, index, &start_time); - - if (error != DB_SUCCESS) { - break; - } - - error = fts_optimize_get_index_end_time( - optim->trx, index, &end_time); - - if (error != DB_SUCCESS) { - break; - } - - /* Start time will be 0 only for the first time or after - completing the optimization of all FTS indexes. */ - if (start_time == 0) { - start_time = ut_time(); - - error = fts_optimize_set_index_start_time( - optim->trx, index, start_time); - } - - /* Check if this index needs to be optimized or not. */ - if (ut_difftime(end_time, start_time) < 0) { - error = fts_optimize_index(optim, index); - - if (error != DB_SUCCESS) { - break; - } - } else { - ++optim->n_completed; - } -#endif - index = static_cast<dict_index_t*>( - ib_vector_getp(fts->indexes, i)); - error = fts_optimize_index(optim, index); - } - - if (error == DB_SUCCESS) { - fts_sql_commit(optim->trx); - } else { - fts_sql_rollback(optim->trx); - } - - return(error); -} - -/*********************************************************************//** -Cleanup the snapshot tables and the master deleted table. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_purge_snapshot( -/*========================*/ - fts_optimize_t* optim) /*!< in: optimize instance */ -{ - dberr_t error; - - /* Delete the doc ids from the master deleted tables, that were - in the snapshot that was taken at the start of optimize. */ - error = fts_optimize_purge_deleted_doc_ids(optim); - - if (error == DB_SUCCESS) { - /* Destroy the deleted doc id snapshot. */ - error = fts_optimize_purge_deleted_doc_id_snapshot(optim); - } - - if (error == DB_SUCCESS) { - fts_sql_commit(optim->trx); - } else { - fts_sql_rollback(optim->trx); - } - - return(error); -} - -/*********************************************************************//** -Reset the start time to 0 so that a new optimize can be started. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_optimize_reset_start_time( -/*==========================*/ - fts_optimize_t* optim) /*!< in: optimize instance */ -{ - dberr_t error = DB_SUCCESS; -#ifdef FTS_OPTIMIZE_DEBUG - fts_t* fts = optim->table->fts; - - /* Optimization should have been completed for all indexes. */ - ut_a(optim->n_completed == ib_vector_size(fts->indexes)); - - for (uint i = 0; i < ib_vector_size(fts->indexes); ++i) { - dict_index_t* index; - - ib_time_t start_time = 0; - - /* Reset the start time to 0 for this index. */ - error = fts_optimize_set_index_start_time( - optim->trx, index, start_time); - - index = static_cast<dict_index_t*>( - ib_vector_getp(fts->indexes, i)); - } -#endif - - if (error == DB_SUCCESS) { - fts_sql_commit(optim->trx); - } else { - fts_sql_rollback(optim->trx); - } - - return(error); -} - -/*********************************************************************//** -Run OPTIMIZE on the given table by a background thread. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull)) -dberr_t -fts_optimize_table_bk( -/*==================*/ - fts_slot_t* slot) /*!< in: table to optimiza */ -{ - dberr_t error; - dict_table_t* table = slot->table; - fts_t* fts = table->fts; - - /* Avoid optimizing tables that were optimized recently. */ - if (slot->last_run > 0 - && (ut_time() - slot->last_run) < slot->interval_time) { - - return(DB_SUCCESS); - - } else if (fts && fts->cache - && fts->cache->deleted >= FTS_OPTIMIZE_THRESHOLD) { - - error = fts_optimize_table(table); - - if (error == DB_SUCCESS) { - slot->state = FTS_STATE_DONE; - slot->last_run = 0; - slot->completed = ut_time(); - } - } else { - error = DB_SUCCESS; - } - - /* Note time this run completed. */ - slot->last_run = ut_time(); - - return(error); -} -/*********************************************************************//** -Run OPTIMIZE on the given table. -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -fts_optimize_table( -/*===============*/ - dict_table_t* table) /*!< in: table to optimiza */ -{ - dberr_t error = DB_SUCCESS; - fts_optimize_t* optim = NULL; - fts_t* fts = table->fts; - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: FTS start optimize %s\n", table->name); - - optim = fts_optimize_create(table); - - // FIXME: Call this only at the start of optimize, currently we - // rely on DB_DUPLICATE_KEY to handle corrupting the snapshot. - - /* Check whether there are still records in BEING_DELETED table */ - if (fts_optimize_being_deleted_count(optim) == 0) { - /* Take a snapshot of the deleted document ids, they are copied - to the BEING_ tables. */ - error = fts_optimize_create_deleted_doc_id_snapshot(optim); - } - - /* A duplicate error is OK, since we don't erase the - doc ids from the being deleted state until all FTS - indexes have been optimized. */ - if (error == DB_DUPLICATE_KEY) { - error = DB_SUCCESS; - } - - if (error == DB_SUCCESS) { - - /* These document ids will be filtered out during the - index optimization phase. They are in the snapshot that we - took above, at the start of the optimize. */ - error = fts_optimize_read_deleted_doc_id_snapshot(optim); - - if (error == DB_SUCCESS) { - - /* Commit the read of being deleted - doc ids transaction. */ - fts_sql_commit(optim->trx); - - /* We would do optimization only if there - are deleted records to be cleaned up */ - if (ib_vector_size(optim->to_delete->doc_ids) > 0) { - error = fts_optimize_indexes(optim); - } - - } else { - ut_a(optim->to_delete == NULL); - } - - /* Only after all indexes have been optimized can we - delete the (snapshot) doc ids in the pending delete, - and master deleted tables. */ - if (error == DB_SUCCESS - && optim->n_completed == ib_vector_size(fts->indexes)) { - - if (fts_enable_diag_print) { - fprintf(stderr, "FTS_OPTIMIZE: Completed " - "Optimize, cleanup DELETED " - "table\n"); - } - - if (ib_vector_size(optim->to_delete->doc_ids) > 0) { - - /* Purge the doc ids that were in the - snapshot from the snapshot tables and - the master deleted table. */ - error = fts_optimize_purge_snapshot(optim); - } - - if (error == DB_SUCCESS) { - /* Reset the start time of all the FTS indexes - so that optimize can be restarted. */ - error = fts_optimize_reset_start_time(optim); - } - } - } - - fts_optimize_free(optim); - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: FTS end optimize %s\n", table->name); - - return(error); -} - -/********************************************************************//** -Add the table to add to the OPTIMIZER's list. -@return new message instance */ -static -fts_msg_t* -fts_optimize_create_msg( -/*====================*/ - fts_msg_type_t type, /*!< in: type of message */ - void* ptr) /*!< in: message payload */ -{ - mem_heap_t* heap; - fts_msg_t* msg; - - heap = mem_heap_create(sizeof(*msg) + sizeof(ib_list_node_t) + 16); - msg = static_cast<fts_msg_t*>(mem_heap_alloc(heap, sizeof(*msg))); - - msg->ptr = ptr; - msg->type = type; - msg->heap = heap; - - return(msg); -} - -/**********************************************************************//** -Add the table to add to the OPTIMIZER's list. */ -UNIV_INTERN -void -fts_optimize_add_table( -/*===================*/ - dict_table_t* table) /*!< in: table to add */ -{ - fts_msg_t* msg; - - if (!fts_optimize_wq) { - return; - } - - /* Make sure table with FTS index cannot be evicted */ - if (table->can_be_evicted) { - dict_table_move_from_lru_to_non_lru(table); - } - - msg = fts_optimize_create_msg(FTS_MSG_ADD_TABLE, table); - - ib_wqueue_add(fts_optimize_wq, msg, msg->heap); -} - -/**********************************************************************//** -Optimize a table. */ -UNIV_INTERN -void -fts_optimize_do_table( -/*==================*/ - dict_table_t* table) /*!< in: table to optimize */ -{ - fts_msg_t* msg; - - /* Optimizer thread could be shutdown */ - if (!fts_optimize_wq) { - return; - } - - msg = fts_optimize_create_msg(FTS_MSG_OPTIMIZE_TABLE, table); - - ib_wqueue_add(fts_optimize_wq, msg, msg->heap); -} - -/**********************************************************************//** -Remove the table from the OPTIMIZER's list. We do wait for -acknowledgement from the consumer of the message. */ -UNIV_INTERN -void -fts_optimize_remove_table( -/*======================*/ - dict_table_t* table) /*!< in: table to remove */ -{ - fts_msg_t* msg; - os_event_t event; - fts_msg_del_t* remove; - - /* if the optimize system not yet initialized, return */ - if (!fts_optimize_wq) { - return; - } - - /* FTS optimizer thread is already exited */ - if (fts_opt_start_shutdown) { - ib_logf(IB_LOG_LEVEL_INFO, - "Try to remove table %s after FTS optimize" - " thread exiting.", table->name); - return; - } - - msg = fts_optimize_create_msg(FTS_MSG_DEL_TABLE, NULL); - - /* We will wait on this event until signalled by the consumer. */ - event = os_event_create(); - - remove = static_cast<fts_msg_del_t*>( - mem_heap_alloc(msg->heap, sizeof(*remove))); - - remove->table = table; - remove->event = event; - msg->ptr = remove; - - ib_wqueue_add(fts_optimize_wq, msg, msg->heap); - - os_event_wait(event); - - os_event_free(event); -} - -/** Send sync fts cache for the table. -@param[in] table table to sync */ -UNIV_INTERN -void -fts_optimize_request_sync_table( - dict_table_t* table) -{ - fts_msg_t* msg; - table_id_t* table_id; - - /* if the optimize system not yet initialized, return */ - if (!fts_optimize_wq) { - return; - } - - /* FTS optimizer thread is already exited */ - if (fts_opt_start_shutdown) { - ib_logf(IB_LOG_LEVEL_INFO, - "Try to sync table %s after FTS optimize" - " thread exiting.", table->name); - return; - } - - msg = fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, NULL); - - table_id = static_cast<table_id_t*>( - mem_heap_alloc(msg->heap, sizeof(table_id_t))); - *table_id = table->id; - msg->ptr = table_id; - - ib_wqueue_add(fts_optimize_wq, msg, msg->heap); -} - -/**********************************************************************//** -Find the slot for a particular table. -@return slot if found else NULL. */ -static -fts_slot_t* -fts_optimize_find_slot( -/*===================*/ - ib_vector_t* tables, /*!< in: vector of tables */ - const dict_table_t* table) /*!< in: table to add */ -{ - ulint i; - - for (i = 0; i < ib_vector_size(tables); ++i) { - fts_slot_t* slot; - - slot = static_cast<fts_slot_t*>(ib_vector_get(tables, i)); - - if (slot->table->id == table->id) { - return(slot); - } - } - - return(NULL); -} - -/**********************************************************************//** -Start optimizing table. */ -static -void -fts_optimize_start_table( -/*=====================*/ - ib_vector_t* tables, /*!< in/out: vector of tables */ - dict_table_t* table) /*!< in: table to optimize */ -{ - fts_slot_t* slot; - - slot = fts_optimize_find_slot(tables, table); - - if (slot == NULL) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: table %s not registered " - "with the optimize thread.\n", table->name); - } else { - slot->last_run = 0; - slot->completed = 0; - } -} - -/**********************************************************************//** -Add the table to the vector if it doesn't already exist. */ -static -ibool -fts_optimize_new_table( -/*===================*/ - ib_vector_t* tables, /*!< in/out: vector of tables */ - dict_table_t* table) /*!< in: table to add */ -{ - ulint i; - fts_slot_t* slot; - ulint empty_slot = ULINT_UNDEFINED; - - /* Search for duplicates, also find a free slot if one exists. */ - for (i = 0; i < ib_vector_size(tables); ++i) { - - slot = static_cast<fts_slot_t*>( - ib_vector_get(tables, i)); - - if (slot->state == FTS_STATE_EMPTY) { - empty_slot = i; - } else if (slot->table->id == table->id) { - /* Already exists in our optimize queue. */ - ut_ad(slot->table_id = table->id); - return(FALSE); - } - } - - /* Reuse old slot. */ - if (empty_slot != ULINT_UNDEFINED) { - - slot = static_cast<fts_slot_t*>( - ib_vector_get(tables, empty_slot)); - - ut_a(slot->state == FTS_STATE_EMPTY); - - } else { /* Create a new slot. */ - - slot = static_cast<fts_slot_t*>(ib_vector_push(tables, NULL)); - } - - memset(slot, 0x0, sizeof(*slot)); - - slot->table = table; - slot->table_id = table->id; - slot->state = FTS_STATE_LOADED; - slot->interval_time = FTS_OPTIMIZE_INTERVAL_IN_SECS; - - return(TRUE); -} - -/**********************************************************************//** -Remove the table from the vector if it exists. */ -static -ibool -fts_optimize_del_table( -/*===================*/ - ib_vector_t* tables, /*!< in/out: vector of tables */ - fts_msg_del_t* msg) /*!< in: table to delete */ -{ - ulint i; - dict_table_t* table = msg->table; - - for (i = 0; i < ib_vector_size(tables); ++i) { - fts_slot_t* slot; - - slot = static_cast<fts_slot_t*>(ib_vector_get(tables, i)); - - /* FIXME: Should we assert on this ? */ - if (slot->state != FTS_STATE_EMPTY - && slot->table->id == table->id) { - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: FTS Optimize Removing " - "table %s\n", table->name); - - slot->table = NULL; - slot->state = FTS_STATE_EMPTY; - - return(TRUE); - } - } - - return(FALSE); -} - -/**********************************************************************//** -Calculate how many of the registered tables need to be optimized. -@return no. of tables to optimize */ -static -ulint -fts_optimize_how_many( -/*==================*/ - const ib_vector_t* tables) /*!< in: registered tables - vector*/ -{ - ulint i; - ib_time_t delta; - ulint n_tables = 0; - ib_time_t current_time; - - current_time = ut_time(); - - for (i = 0; i < ib_vector_size(tables); ++i) { - const fts_slot_t* slot; - - slot = static_cast<const fts_slot_t*>( - ib_vector_get_const(tables, i)); - - switch (slot->state) { - case FTS_STATE_DONE: - case FTS_STATE_LOADED: - ut_a(slot->completed <= current_time); - - delta = current_time - slot->completed; - - /* Skip slots that have been optimized recently. */ - if (delta >= slot->interval_time) { - ++n_tables; - } - break; - - case FTS_STATE_RUNNING: - ut_a(slot->last_run <= current_time); - - delta = current_time - slot->last_run; - - if (delta > slot->interval_time) { - ++n_tables; - } - break; - - /* Slots in a state other than the above - are ignored. */ - case FTS_STATE_EMPTY: - case FTS_STATE_SUSPENDED: - break; - } - - } - - return(n_tables); -} - -/**********************************************************************//** -Check if the total memory used by all FTS table exceeds the maximum limit. -@return true if a sync is needed, false otherwise */ -static -bool -fts_is_sync_needed( -/*===============*/ - const ib_vector_t* tables) /*!< in: registered tables - vector*/ -{ - ulint total_memory = 0; - double time_diff = difftime(ut_time(), last_check_sync_time); - - if (fts_need_sync || time_diff < 5) { - return(false); - } - - last_check_sync_time = ut_time(); - - for (ulint i = 0; i < ib_vector_size(tables); ++i) { - const fts_slot_t* slot; - - slot = static_cast<const fts_slot_t*>( - ib_vector_get_const(tables, i)); - - if (slot->state != FTS_STATE_EMPTY && slot->table - && slot->table->fts) { - total_memory += slot->table->fts->cache->total_size; - } - - if (total_memory > fts_max_total_cache_size) { - return(true); - } - } - - return(false); -} - -#if 0 -/*********************************************************************//** -Check whether a table needs to be optimized. */ -static -void -fts_optimize_need_sync( -/*===================*/ - ib_vector_t* tables) /*!< in: list of tables */ -{ - dict_table_t* table = NULL; - fts_slot_t* slot; - ulint num_table = ib_vector_size(tables); - - if (!num_table) { - return; - } - - if (fts_optimize_sync_iterator >= num_table) { - fts_optimize_sync_iterator = 0; - } - - slot = ib_vector_get(tables, fts_optimize_sync_iterator); - table = slot->table; - - if (!table) { - return; - } - - ut_ad(table->fts); - - if (table->fts->cache) { - ulint deleted = table->fts->cache->deleted; - - if (table->fts->cache->added - >= fts_optimize_add_threshold) { - fts_sync_table(table); - } else if (deleted >= fts_optimize_delete_threshold) { - fts_optimize_do_table(table); - - mutex_enter(&table->fts->cache->deleted_lock); - table->fts->cache->deleted -= deleted; - mutex_exit(&table->fts->cache->deleted_lock); - } - } - - fts_optimize_sync_iterator++; - - return; -} -#endif - -/** Sync fts cache of a table -@param[in] table_id table id */ -void -fts_optimize_sync_table( - table_id_t table_id) -{ - dict_table_t* table = NULL; - - /* Prevent DROP INDEX etc. from running when we are syncing - cache in background. */ - if (!rw_lock_s_lock_nowait(&dict_operation_lock, __FILE__, __LINE__)) { - /* Exit when fail to get dict operation lock. */ - return; - } - - table = dict_table_open_on_id(table_id, FALSE, DICT_TABLE_OP_NORMAL); - - if (table) { - if (dict_table_has_fts_index(table) && table->fts->cache) { - fts_sync_table(table, true, false, true); - } - - dict_table_close(table, FALSE, FALSE); - } - - rw_lock_s_unlock(&dict_operation_lock); -} - -/**********************************************************************//** -Optimize all FTS tables. -@return Dummy return */ -UNIV_INTERN -os_thread_ret_t -fts_optimize_thread( -/*================*/ - void* arg) /*!< in: work queue*/ -{ - mem_heap_t* heap; - ib_vector_t* tables; - ib_alloc_t* heap_alloc; - ulint current = 0; - ibool done = FALSE; - ulint n_tables = 0; - os_event_t exit_event = 0; - ulint n_optimize = 0; - ib_wqueue_t* wq = (ib_wqueue_t*) arg; - - ut_ad(!srv_read_only_mode); - my_thread_init(); - - heap = mem_heap_create(sizeof(dict_table_t*) * 64); - heap_alloc = ib_heap_allocator_create(heap); - - tables = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4); - - while(!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) { - - /* If there is no message in the queue and we have tables - to optimize then optimize the tables. */ - - if (!done - && ib_wqueue_is_empty(wq) - && n_tables > 0 - && n_optimize > 0) { - - fts_slot_t* slot; - - ut_a(ib_vector_size(tables) > 0); - - slot = static_cast<fts_slot_t*>( - ib_vector_get(tables, current)); - - /* Handle the case of empty slots. */ - if (slot->state != FTS_STATE_EMPTY) { - - slot->state = FTS_STATE_RUNNING; - - fts_optimize_table_bk(slot); - } - - ++current; - - /* Wrap around the counter. */ - if (current >= ib_vector_size(tables)) { - n_optimize = fts_optimize_how_many(tables); - - current = 0; - } - - } else if (n_optimize == 0 || !ib_wqueue_is_empty(wq)) { - fts_msg_t* msg; - - msg = static_cast<fts_msg_t*>( - ib_wqueue_timedwait(wq, - FTS_QUEUE_WAIT_IN_USECS)); - - /* Timeout ? */ - if (msg == NULL) { - if (fts_is_sync_needed(tables)) { - fts_need_sync = true; - } - - continue; - } - - switch (msg->type) { - case FTS_MSG_START: - break; - - case FTS_MSG_PAUSE: - break; - - case FTS_MSG_STOP: - done = TRUE; - exit_event = (os_event_t) msg->ptr; - break; - - case FTS_MSG_ADD_TABLE: - ut_a(!done); - if (fts_optimize_new_table( - tables, - static_cast<dict_table_t*>( - msg->ptr))) { - ++n_tables; - } - break; - - case FTS_MSG_OPTIMIZE_TABLE: - if (!done) { - fts_optimize_start_table( - tables, - static_cast<dict_table_t*>( - msg->ptr)); - } - break; - - case FTS_MSG_DEL_TABLE: - if (fts_optimize_del_table( - tables, static_cast<fts_msg_del_t*>( - msg->ptr))) { - --n_tables; - } - - /* Signal the producer that we have - removed the table. */ - os_event_set( - ((fts_msg_del_t*) msg->ptr)->event); - break; - - case FTS_MSG_SYNC_TABLE: - fts_optimize_sync_table( - *static_cast<table_id_t*>(msg->ptr)); - break; - - default: - ut_error; - } - - mem_heap_free(msg->heap); - - if (!done) { - n_optimize = fts_optimize_how_many(tables); - } else { - n_optimize = 0; - } - } - } - - /* Server is being shutdown, sync the data from FTS cache to disk - if needed */ - if (n_tables > 0) { - ulint i; - - for (i = 0; i < ib_vector_size(tables); i++) { - fts_slot_t* slot; - - slot = static_cast<fts_slot_t*>( - ib_vector_get(tables, i)); - - if (slot->state != FTS_STATE_EMPTY) { - fts_optimize_sync_table(slot->table_id); - } - } - } - - ib_vector_free(tables); - - ib_logf(IB_LOG_LEVEL_INFO, "FTS optimize thread exiting."); - - os_event_set(exit_event); - my_thread_end(); - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/**********************************************************************//** -Startup the optimize thread and create the work queue. */ -UNIV_INTERN -void -fts_optimize_init(void) -/*===================*/ -{ - ut_ad(!srv_read_only_mode); - - /* For now we only support one optimize thread. */ - ut_a(fts_optimize_wq == NULL); - - fts_optimize_wq = ib_wqueue_create(); - ut_a(fts_optimize_wq != NULL); - last_check_sync_time = ut_time(); - - os_thread_create(fts_optimize_thread, fts_optimize_wq, NULL); -} - -/**********************************************************************//** -Check whether the work queue is initialized. -@return TRUE if optimze queue is initialized. */ -UNIV_INTERN -ibool -fts_optimize_is_init(void) -/*======================*/ -{ - return(fts_optimize_wq != NULL); -} - -/**********************************************************************//** -Signal the optimize thread to prepare for shutdown. */ -UNIV_INTERN -void -fts_optimize_start_shutdown(void) -/*=============================*/ -{ - ut_ad(!srv_read_only_mode); - - fts_msg_t* msg; - os_event_t event; - - /* If there is an ongoing activity on dictionary, such as - srv_master_evict_from_table_cache(), wait for it */ - dict_mutex_enter_for_mysql(); - - /* Tells FTS optimizer system that we are exiting from - optimizer thread, message send their after will not be - processed */ - fts_opt_start_shutdown = true; - dict_mutex_exit_for_mysql(); - - /* We tell the OPTIMIZE thread to switch to state done, we - can't delete the work queue here because the add thread needs - deregister the FTS tables. */ - event = os_event_create(); - - msg = fts_optimize_create_msg(FTS_MSG_STOP, NULL); - msg->ptr = event; - - ib_wqueue_add(fts_optimize_wq, msg, msg->heap); - - os_event_wait(event); - os_event_free(event); - - ib_wqueue_free(fts_optimize_wq); - -} - -/**********************************************************************//** -Reset the work queue. */ -UNIV_INTERN -void -fts_optimize_end(void) -/*==================*/ -{ - ut_ad(!srv_read_only_mode); - - // FIXME: Potential race condition here: We should wait for - // the optimize thread to confirm shutdown. - fts_optimize_wq = NULL; -} diff --git a/storage/xtradb/fts/fts0pars.cc b/storage/xtradb/fts/fts0pars.cc deleted file mode 100644 index 7f0ba4e0c1b..00000000000 --- a/storage/xtradb/fts/fts0pars.cc +++ /dev/null @@ -1,2010 +0,0 @@ -/* A Bison parser, made by GNU Bison 2.5. */ - -/* Bison implementation for Yacc-like parsers in C - - Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* C LALR(1) parser skeleton written by Richard Stallman, by - simplifying the original so-called "semantic" parser. */ - -/* All symbols defined below should begin with yy or YY, to avoid - infringing on user name space. This should be done even for local - variables, as they might otherwise be expanded by user macros. - There are some unavoidable exceptions within include files to - define necessary library symbols; they are noted "INFRINGES ON - USER NAME SPACE" below. */ - -/* Identify Bison output. */ -#define YYBISON 1 - -/* Bison version. */ -#define YYBISON_VERSION "2.5" - -/* Skeleton name. */ -#define YYSKELETON_NAME "yacc.c" - -/* Pure parsers. */ -#define YYPURE 1 - -/* Push parsers. */ -#define YYPUSH 0 - -/* Pull parsers. */ -#define YYPULL 1 - -/* Using locations. */ -#define YYLSP_NEEDED 0 - -/* Substitute the variable and function names. */ -#define yyparse ftsparse -#define yylex ftslex -#define yyerror ftserror -#define yylval ftslval -#define yychar ftschar -#define yydebug ftsdebug -#define yynerrs ftsnerrs - - -/* Copy the first part of user declarations. */ - -/* Line 268 of yacc.c */ -#line 26 "fts0pars.y" - - -#include "mem0mem.h" -#include "fts0ast.h" -#include "fts0blex.h" -#include "fts0tlex.h" -#include "fts0pars.h" - -extern int fts_lexer(YYSTYPE*, fts_lexer_t*); -extern int fts_blexer(YYSTYPE*, yyscan_t); -extern int fts_tlexer(YYSTYPE*, yyscan_t); - -typedef int (*fts_scan)(); - -extern int ftserror(const char* p); - -/* Required for reentrant parser */ -#define ftslex fts_lexer - -#define YYERROR_VERBOSE - -/* For passing an argument to yyparse() */ -#define YYPARSE_PARAM state -#define YYLEX_PARAM ((fts_ast_state_t*) state)->lexer - -#define YYTOKENFREE(token) fts_ast_string_free((token)) - -typedef int (*fts_scanner_alt)(YYSTYPE* val, yyscan_t yyscanner); -typedef int (*fts_scanner)(); - -struct fts_lexer_t { - fts_scanner scanner; - void* yyscanner; -}; - - - -/* Line 268 of yacc.c */ -#line 115 "fts0pars.cc" - -/* Enabling traces. */ -#ifndef YYDEBUG -# define YYDEBUG 0 -#endif - -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - -/* Enabling the token table. */ -#ifndef YYTOKEN_TABLE -# define YYTOKEN_TABLE 0 -#endif - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - FTS_OPER = 258, - FTS_TEXT = 259, - FTS_TERM = 260, - FTS_NUMB = 261 - }; -#endif - - - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef union YYSTYPE -{ - -/* Line 293 of yacc.c */ -#line 61 "fts0pars.y" - - int oper; - fts_ast_string_t* token; - fts_ast_node_t* node; - - - -/* Line 293 of yacc.c */ -#line 165 "fts0pars.cc" -} YYSTYPE; -# define YYSTYPE_IS_TRIVIAL 1 -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -#endif - - -/* Copy the second part of user declarations. */ - - -/* Line 343 of yacc.c */ -#line 177 "fts0pars.cc" - -#ifdef short -# undef short -#endif - -#ifdef YYTYPE_UINT8 -typedef YYTYPE_UINT8 yytype_uint8; -#else -typedef unsigned char yytype_uint8; -#endif - -#ifdef YYTYPE_INT8 -typedef YYTYPE_INT8 yytype_int8; -#elif (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -typedef signed char yytype_int8; -#else -typedef short int yytype_int8; -#endif - -#ifdef YYTYPE_UINT16 -typedef YYTYPE_UINT16 yytype_uint16; -#else -typedef unsigned short int yytype_uint16; -#endif - -#ifdef YYTYPE_INT16 -typedef YYTYPE_INT16 yytype_int16; -#else -typedef short int yytype_int16; -#endif - -#ifndef YYSIZE_T -# ifdef __SIZE_TYPE__ -# define YYSIZE_T __SIZE_TYPE__ -# elif defined size_t -# define YYSIZE_T size_t -# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# else -# define YYSIZE_T unsigned int -# endif -#endif - -#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) - -#ifndef YY_ -# if defined YYENABLE_NLS && YYENABLE_NLS -# if ENABLE_NLS -# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ -# define YY_(msgid) dgettext ("bison-runtime", msgid) -# endif -# endif -# ifndef YY_ -# define YY_(msgid) msgid -# endif -#endif - -/* Suppress unused-variable warnings by "using" E. */ -#if ! defined lint || defined __GNUC__ -# define YYUSE(e) ((void) (e)) -#else -# define YYUSE(e) /* empty */ -#endif - -/* Identity function, used to suppress warnings about constant conditions. */ -#ifndef lint -# define YYID(n) (n) -#else -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static int -YYID (int yyi) -#else -static int -YYID (yyi) - int yyi; -#endif -{ - return yyi; -} -#endif - -#if ! defined yyoverflow || YYERROR_VERBOSE - -/* The parser invokes alloca or malloc; define the necessary symbols. */ - -# ifdef YYSTACK_USE_ALLOCA -# if YYSTACK_USE_ALLOCA -# ifdef __GNUC__ -# define YYSTACK_ALLOC __builtin_alloca -# elif defined __BUILTIN_VA_ARG_INCR -# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ -# elif defined _AIX -# define YYSTACK_ALLOC __alloca -# elif defined _MSC_VER -# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ -# define alloca _alloca -# else -# define YYSTACK_ALLOC alloca -# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# ifndef EXIT_SUCCESS -# define EXIT_SUCCESS 0 -# endif -# endif -# endif -# endif -# endif - -# ifdef YYSTACK_ALLOC - /* Pacify GCC's `empty if-body' warning. */ -# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) -# ifndef YYSTACK_ALLOC_MAXIMUM - /* The OS might guarantee only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - invoke alloca (N) if N exceeds 4096. Use a slightly smaller number - to allow for a few compiler-allocated temporary stack slots. */ -# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ -# endif -# else -# define YYSTACK_ALLOC YYMALLOC -# define YYSTACK_FREE YYFREE -# ifndef YYSTACK_ALLOC_MAXIMUM -# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM -# endif -# if (defined __cplusplus && ! defined EXIT_SUCCESS \ - && ! ((defined YYMALLOC || defined malloc) \ - && (defined YYFREE || defined free))) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# ifndef EXIT_SUCCESS -# define EXIT_SUCCESS 0 -# endif -# endif -# ifndef YYMALLOC -# define YYMALLOC malloc -# if ! defined malloc && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# ifndef YYFREE -# define YYFREE free -# if ! defined free && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void free (void *); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - - -#if (! defined yyoverflow \ - && (! defined __cplusplus \ - || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) - -/* A type that is properly aligned for any stack member. */ -union yyalloc -{ - yytype_int16 yyss_alloc; - YYSTYPE yyvs_alloc; -}; - -/* The size of the maximum gap between one aligned stack and the next. */ -# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) - -/* The size of an array large to enough to hold all stacks, each with - N elements. */ -# define YYSTACK_BYTES(N) \ - ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ - + YYSTACK_GAP_MAXIMUM) - -# define YYCOPY_NEEDED 1 - -/* Relocate STACK from its old location to the new one. The - local variables YYSIZE and YYSTACKSIZE give the old and new number of - elements in the stack, and YYPTR gives the new location of the - stack. Advance YYPTR to a properly aligned location for the next - stack. */ -# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ - do \ - { \ - YYSIZE_T yynewbytes; \ - YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ - Stack = &yyptr->Stack_alloc; \ - yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ - yyptr += yynewbytes / sizeof (*yyptr); \ - } \ - while (YYID (0)) - -#endif - -#if defined YYCOPY_NEEDED && YYCOPY_NEEDED -/* Copy COUNT objects from FROM to TO. The source and destination do - not overlap. */ -# ifndef YYCOPY -# if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) -# else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ - while (YYID (0)) -# endif -# endif -#endif /* !YYCOPY_NEEDED */ - -/* YYFINAL -- State number of the termination state. */ -#define YYFINAL 3 -/* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 52 - -/* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 16 -/* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 8 -/* YYNRULES -- Number of rules. */ -#define YYNRULES 24 -/* YYNRULES -- Number of states. */ -#define YYNSTATES 33 - -/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ -#define YYUNDEFTOK 2 -#define YYMAXUTOK 261 - -#define YYTRANSLATE(YYX) \ - ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) - -/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ -static const yytype_uint8 yytranslate[] = -{ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 12, 13, 14, 7, 2, 8, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 10, 2, 11, 2, 15, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 9, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6 -}; - -#if YYDEBUG -/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in - YYRHS. */ -static const yytype_uint8 yyprhs[] = -{ - 0, 0, 3, 5, 6, 9, 12, 16, 21, 23, - 25, 28, 32, 36, 39, 44, 47, 49, 51, 53, - 55, 57, 59, 61, 64 -}; - -/* YYRHS -- A `-1'-separated list of the rules' RHS. */ -static const yytype_int8 yyrhs[] = -{ - 17, 0, -1, 18, -1, -1, 18, 20, -1, 18, - 19, -1, 12, 18, 13, -1, 21, 12, 18, 13, - -1, 22, -1, 23, -1, 22, 14, -1, 23, 15, - 6, -1, 21, 22, 14, -1, 21, 22, -1, 21, - 23, 15, 6, -1, 21, 23, -1, 8, -1, 7, - -1, 9, -1, 10, -1, 11, -1, 5, -1, 6, - -1, 14, 22, -1, 4, -1 -}; - -/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ -static const yytype_uint8 yyrline[] = -{ - 0, 79, 79, 85, 89, 99, 111, 119, 129, 133, - 137, 141, 146, 152, 157, 164, 170, 174, 178, 182, - 186, 191, 196, 202, 207 -}; -#endif - -#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE -/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at YYNTOKENS, nonterminals. */ -static const char *const yytname[] = -{ - "$end", "error", "$undefined", "FTS_OPER", "FTS_TEXT", "FTS_TERM", - "FTS_NUMB", "'+'", "'-'", "'~'", "'<'", "'>'", "'('", "')'", "'*'", - "'@'", "$accept", "query", "expr_lst", "sub_expr", "expr", "prefix", - "term", "text", 0 -}; -#endif - -# ifdef YYPRINT -/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to - token YYLEX-NUM. */ -static const yytype_uint16 yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 43, 45, 126, - 60, 62, 40, 41, 42, 64 -}; -# endif - -/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ -static const yytype_uint8 yyr1[] = -{ - 0, 16, 17, 18, 18, 18, 19, 19, 20, 20, - 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, - 21, 22, 22, 22, 23 -}; - -/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ -static const yytype_uint8 yyr2[] = -{ - 0, 2, 1, 0, 2, 2, 3, 4, 1, 1, - 2, 3, 3, 2, 4, 2, 1, 1, 1, 1, - 1, 1, 1, 2, 1 -}; - -/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM. - Performed when YYTABLE doesn't specify something else to do. Zero - means the default is an error. */ -static const yytype_uint8 yydefact[] = -{ - 3, 0, 2, 1, 24, 21, 22, 17, 16, 18, - 19, 20, 3, 0, 5, 4, 0, 8, 9, 0, - 23, 3, 13, 15, 10, 0, 6, 0, 12, 0, - 11, 7, 14 -}; - -/* YYDEFGOTO[NTERM-NUM]. */ -static const yytype_int8 yydefgoto[] = -{ - -1, 1, 2, 14, 15, 16, 17, 18 -}; - -/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ -#define YYPACT_NINF -5 -static const yytype_int8 yypact[] = -{ - -5, 38, 18, -5, -5, -5, -5, -5, -5, -5, - -5, -5, -5, 31, -5, -5, 29, 30, 32, -4, - -5, -5, 34, 35, -5, 40, -5, 7, -5, 43, - -5, -5, -5 -}; - -/* YYPGOTO[NTERM-NUM]. */ -static const yytype_int8 yypgoto[] = -{ - -5, -5, 19, -5, -5, -5, 26, 36 -}; - -/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule which - number is the opposite. If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -1 -static const yytype_uint8 yytable[] = -{ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 26, - 13, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 31, 13, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 19, 13, 4, 5, 6, 5, 6, 3, 20, - 27, 21, 22, 13, 24, 13, 30, 25, 28, 32, - 29, 0, 23 -}; - -#define yypact_value_is_default(yystate) \ - ((yystate) == (-5)) - -#define yytable_value_is_error(yytable_value) \ - YYID (0) - -static const yytype_int8 yycheck[] = -{ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 4, 5, 6, 7, 8, 9, 10, 11, 12, - 13, 14, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 12, 14, 4, 5, 6, 5, 6, 0, 13, - 21, 12, 16, 14, 14, 14, 6, 15, 14, 6, - 15, -1, 16 -}; - -/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ -static const yytype_uint8 yystos[] = -{ - 0, 17, 18, 0, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 14, 19, 20, 21, 22, 23, 18, - 22, 12, 22, 23, 14, 15, 13, 18, 14, 15, - 6, 13, 6 -}; - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 - -#define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrorlab - - -/* Like YYERROR except do call yyerror. This remains here temporarily - to ease the transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. However, - YYFAIL appears to be in use. Nevertheless, it is formally deprecated - in Bison 2.4.2's NEWS entry, where a plan to phase it out is - discussed. */ - -#define YYFAIL goto yyerrlab -#if defined YYFAIL - /* This is here to suppress warnings from the GCC cpp's - -Wunused-macros. Normally we don't worry about that warning, but - some users do, and we want to make it easy for users to remove - YYFAIL uses, which will produce warnings from Bison 2.5. */ -#endif - -#define YYRECOVERING() (!!yyerrstatus) - -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ - yyerror (YY_("syntax error: cannot back up")); \ - YYERROR; \ - } \ -while (YYID (0)) - - -#define YYTERROR 1 -#define YYERRCODE 256 - -#define YYERRCLEANUP \ -do \ - switch (yylastchar) \ - { \ - case FTS_NUMB: \ - case FTS_TEXT: \ - case FTS_TERM: \ - YYTOKENFREE(yylval.token); \ - break; \ - default: \ - break; \ - } \ -while (YYID (0)) - -/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. - If N is 0, then set CURRENT to the empty location which ends - the previous symbol: RHS[0] (always defined). */ - -#define YYRHSLOC(Rhs, K) ((Rhs)[K]) -#ifndef YYLLOC_DEFAULT -# define YYLLOC_DEFAULT(Current, Rhs, N) \ - do \ - if (YYID (N)) \ - { \ - (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = (Current).last_line = \ - YYRHSLOC (Rhs, 0).last_line; \ - (Current).first_column = (Current).last_column = \ - YYRHSLOC (Rhs, 0).last_column; \ - } \ - while (YYID (0)) -#endif - - -/* This macro is provided for backward compatibility. */ - -#ifndef YY_LOCATION_PRINT -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -#endif - - -/* YYLEX -- calling `yylex' with the right arguments. */ - -#ifdef YYLEX_PARAM -# define YYLEX yylex (&yylval, YYLEX_PARAM) -#else -# define YYLEX yylex (&yylval) -#endif - -/* Enable debugging if requested. */ -#if YYDEBUG - -# ifndef YYFPRINTF -# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ -# define YYFPRINTF fprintf -# endif - -# define YYDPRINTF(Args) \ -do { \ - if (yydebug) \ - YYFPRINTF Args; \ -} while (YYID (0)) - -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ -do { \ - if (yydebug) \ - { \ - YYFPRINTF (stderr, "%s ", Title); \ - yy_symbol_print (stderr, \ - Type, Value); \ - YYFPRINTF (stderr, "\n"); \ - } \ -} while (YYID (0)) - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) -#else -static void -yy_symbol_value_print (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; -#endif -{ - if (!yyvaluep) - return; -# ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# else - YYUSE (yyoutput); -# endif - switch (yytype) - { - default: - break; - } -} - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) -#else -static void -yy_symbol_print (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; -#endif -{ - if (yytype < YYNTOKENS) - YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); - else - YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); - - yy_symbol_value_print (yyoutput, yytype, yyvaluep); - YYFPRINTF (yyoutput, ")"); -} - -/*------------------------------------------------------------------. -| yy_stack_print -- Print the state stack from its BOTTOM up to its | -| TOP (included). | -`------------------------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) -#else -static void -yy_stack_print (yybottom, yytop) - yytype_int16 *yybottom; - yytype_int16 *yytop; -#endif -{ - YYFPRINTF (stderr, "Stack now"); - for (; yybottom <= yytop; yybottom++) - { - int yybot = *yybottom; - YYFPRINTF (stderr, " %d", yybot); - } - YYFPRINTF (stderr, "\n"); -} - -# define YY_STACK_PRINT(Bottom, Top) \ -do { \ - if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ -} while (YYID (0)) - - -/*------------------------------------------------. -| Report that the YYRULE is going to be reduced. | -`------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_reduce_print (YYSTYPE *yyvsp, int yyrule) -#else -static void -yy_reduce_print (yyvsp, yyrule) - YYSTYPE *yyvsp; - int yyrule; -#endif -{ - int yynrhs = yyr2[yyrule]; - int yyi; - unsigned long int yylno = yyrline[yyrule]; - YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", - yyrule - 1, yylno); - /* The symbols being reduced. */ - for (yyi = 0; yyi < yynrhs; yyi++) - { - YYFPRINTF (stderr, " $%d = ", yyi + 1); - yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], - &(yyvsp[(yyi + 1) - (yynrhs)]) - ); - YYFPRINTF (stderr, "\n"); - } -} - -# define YY_REDUCE_PRINT(Rule) \ -do { \ - if (yydebug) \ - yy_reduce_print (yyvsp, Rule); \ -} while (YYID (0)) - -/* Nonzero means print parse trace. It is left uninitialized so that - multiple parsers can coexist. */ -int yydebug; -#else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) -#endif /* !YYDEBUG */ - - -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#ifndef YYINITDEPTH -# define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only - if the built-in stack extension method is used). - - Do not make this value too large; the results are undefined if - YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) - evaluated with infinite-precision integer arithmetic. */ - -#ifndef YYMAXDEPTH -# define YYMAXDEPTH 10000 -#endif - - -#if YYERROR_VERBOSE - -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen strlen -# else -/* Return the length of YYSTR. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static YYSIZE_T -yystrlen (const char *yystr) -#else -static YYSIZE_T -yystrlen (yystr) - const char *yystr; -#endif -{ - YYSIZE_T yylen; - for (yylen = 0; yystr[yylen]; yylen++) - continue; - return yylen; -} -# endif -# endif - -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static char * -yystpcpy (char *yydest, const char *yysrc) -#else -static char * -yystpcpy (yydest, yysrc) - char *yydest; - const char *yysrc; -#endif -{ - char *yyd = yydest; - const char *yys = yysrc; - - while ((*yyd++ = *yys++) != '\0') - continue; - - return yyd - 1; -} -# endif -# endif - -# ifndef yytnamerr -/* Copy to YYRES the contents of YYSTR after stripping away unnecessary - quotes and backslashes, so that it's suitable for yyerror. The - heuristic is that double-quoting is unnecessary unless the string - contains an apostrophe, a comma, or backslash (other than - backslash-backslash). YYSTR is taken from yytname. If YYRES is - null, do not copy; instead, return the length of what the result - would have been. */ -static YYSIZE_T -yytnamerr (char *yyres, const char *yystr) -{ - if (*yystr == '"') - { - YYSIZE_T yyn = 0; - char const *yyp = yystr; - - for (;;) - switch (*++yyp) - { - case '\'': - case ',': - goto do_not_strip_quotes; - - case '\\': - if (*++yyp != '\\') - goto do_not_strip_quotes; - /* Fall through. */ - default: - if (yyres) - yyres[yyn] = *yyp; - yyn++; - break; - - case '"': - if (yyres) - yyres[yyn] = '\0'; - return yyn; - } - do_not_strip_quotes: ; - } - - if (! yyres) - return yystrlen (yystr); - - return yystpcpy (yyres, yystr) - yyres; -} -# endif - -/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message - about the unexpected token YYTOKEN for the state stack whose top is - YYSSP. - - Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is - not large enough to hold the message. In that case, also set - *YYMSG_ALLOC to the required number of bytes. Return 2 if the - required number of bytes is too large to store. */ -static int -yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, - yytype_int16 *yyssp, int yytoken) -{ - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]); - YYSIZE_T yysize = yysize0; - YYSIZE_T yysize1; - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - /* Internationalized format string. */ - const char *yyformat = 0; - /* Arguments of yyformat. */ - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; - /* Number of reported tokens (one for the "unexpected", one per - "expected"). */ - int yycount = 0; - - /* There are many possibilities here to consider: - - Assume YYFAIL is not used. It's too flawed to consider. See - <http://lists.gnu.org/archive/html/bison-patches/2009-12/msg00024.html> - for details. YYERROR is fine as it does not invoke this - function. - - If this state is a consistent state with a default action, then - the only way this function was invoked is if the default action - is an error action. In that case, don't check for expected - tokens because there are none. - - The only way there can be no lookahead present (in yychar) is if - this state is a consistent state with a default action. Thus, - detecting the absence of a lookahead is sufficient to determine - that there is no unexpected or expected token to report. In that - case, just report a simple "syntax error". - - Don't assume there isn't a lookahead just because this state is a - consistent state with a default action. There might have been a - previous inconsistent state, consistent state with a non-default - action, or user semantic action that manipulated yychar. - - Of course, the expected token list depends on states to have - correct lookahead information, and it depends on the parser not - to perform extra reductions after fetching a lookahead from the - scanner and before detecting a syntax error. Thus, state merging - (from LALR or IELR) and default reductions corrupt the expected - token list. However, the list is correct for canonical LR with - one exception: it will still contain any token that will not be - accepted due to an error action in a later state. - */ - if (yytoken != YYEMPTY) - { - int yyn = yypact[*yyssp]; - yyarg[yycount++] = yytname[yytoken]; - if (!yypact_value_is_default (yyn)) - { - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. In other words, skip the first -YYN actions for - this state because they are default actions. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yyx; - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR - && !yytable_value_is_error (yytable[yyx + yyn])) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - break; - } - yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); - if (! (yysize <= yysize1 - && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) - return 2; - yysize = yysize1; - } - } - } - - switch (yycount) - { -# define YYCASE_(N, S) \ - case N: \ - yyformat = S; \ - break - YYCASE_(0, YY_("syntax error")); - YYCASE_(1, YY_("syntax error, unexpected %s")); - YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); - YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); - YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); - YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); -# undef YYCASE_ - } - - yysize1 = yysize + yystrlen (yyformat); - if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) - return 2; - yysize = yysize1; - - if (*yymsg_alloc < yysize) - { - *yymsg_alloc = 2 * yysize; - if (! (yysize <= *yymsg_alloc - && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) - *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; - return 1; - } - - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - { - char *yyp = *yymsg; - int yyi = 0; - while ((*yyp = *yyformat) != '\0') - if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyformat += 2; - } - else - { - yyp++; - yyformat++; - } - } - return 0; -} -#endif /* YYERROR_VERBOSE */ - -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) -#else -static void -yydestruct (yymsg, yytype, yyvaluep) - const char *yymsg; - int yytype; - YYSTYPE *yyvaluep; -#endif -{ - YYUSE (yyvaluep); - - if (!yymsg) - yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); - - switch (yytype) - { - - default: - break; - } -} - - -/* Prevent warnings from -Wmissing-prototypes. */ -#ifdef YYPARSE_PARAM -#if defined __STDC__ || defined __cplusplus -int yyparse (void *YYPARSE_PARAM); -#else -int yyparse (); -#endif -#else /* ! YYPARSE_PARAM */ -#if defined __STDC__ || defined __cplusplus -int yyparse (void); -#else -int yyparse (); -#endif -#endif /* ! YYPARSE_PARAM */ - - -/*----------. -| yyparse. | -`----------*/ - -#ifdef YYPARSE_PARAM -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void *YYPARSE_PARAM) -#else -int -yyparse (YYPARSE_PARAM) - void *YYPARSE_PARAM; -#endif -#else /* ! YYPARSE_PARAM */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void) -#else -int -yyparse () - -#endif -#endif -{ -/* The lookahead symbol. */ -int yychar; -/* The backup of yychar when there is an error and we're in yyerrlab. */ -int yylastchar; - -/* The semantic value of the lookahead symbol. */ -YYSTYPE yylval; - - /* Number of syntax errors so far. */ - int yynerrs; - - int yystate; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; - - /* The stacks and their tools: - `yyss': related to states. - `yyvs': related to semantic values. - - Refer to the stacks thru separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ - - /* The state stack. */ - yytype_int16 yyssa[YYINITDEPTH]; - yytype_int16 *yyss; - yytype_int16 *yyssp; - - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs; - YYSTYPE *yyvsp; - - YYSIZE_T yystacksize; - - int yyn; - int yyresult; - /* Lookahead token as an internal (translated) token number. */ - int yytoken; - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; - -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYSIZE_T yymsg_alloc = sizeof yymsgbuf; -#endif - -#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) - - /* The number of symbols on the RHS of the reduced rule. - Keep to zero when no symbol should be popped. */ - int yylen = 0; - - yytoken = 0; - yyss = yyssa; - yyvs = yyvsa; - yystacksize = YYINITDEPTH; - - YYDPRINTF ((stderr, "Starting parse\n")); - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - yyssp = yyss; - yyvsp = yyvs; - - goto yysetstate; - -/*------------------------------------------------------------. -| yynewstate -- Push a new state, which is found in yystate. | -`------------------------------------------------------------*/ - yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. So pushing a state here evens the stacks. */ - yyssp++; - - yysetstate: - *yyssp = yystate; - - if (yyss + yystacksize - 1 <= yyssp) - { - /* Get the current used size of the three stacks, in elements. */ - YYSIZE_T yysize = yyssp - yyss + 1; - -#ifdef yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - YYSTYPE *yyvs1 = yyvs; - yytype_int16 *yyss1 = yyss; - - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow (YY_("memory exhausted"), - &yyss1, yysize * sizeof (*yyssp), - &yyvs1, yysize * sizeof (*yyvsp), - &yystacksize); - - yyss = yyss1; - yyvs = yyvs1; - } -#else /* no yyoverflow */ -# ifndef YYSTACK_RELOCATE - goto yyexhaustedlab; -# else - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyexhaustedlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; - - { - yytype_int16 *yyss1 = yyss; - union yyalloc *yyptr = - (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); - if (! yyptr) - goto yyexhaustedlab; - YYSTACK_RELOCATE (yyss_alloc, yyss); - YYSTACK_RELOCATE (yyvs_alloc, yyvs); -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif -#endif /* no yyoverflow */ - - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; - - YYDPRINTF ((stderr, "Stack size increased to %lu\n", - (unsigned long int) yystacksize)); - - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } - - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - - if (yystate == YYFINAL) - YYACCEPT; - - goto yybackup; - -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - - /* Do appropriate processing given the current state. Read a - lookahead token if we need one and don't already have one. */ - - /* First try to decide what to do without reference to lookahead token. */ - yyn = yypact[yystate]; - if (yypact_value_is_default (yyn)) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = YYLEX; - } - - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); - } - - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yytable_value_is_error (yyn)) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; - - /* Shift the lookahead token. */ - YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); - - /* Discard the shifted token. */ - yychar = YYEMPTY; - - yystate = yyn; - *++yyvsp = yylval; - - goto yynewstate; - - -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; - - -/*-----------------------------. -| yyreduce -- Do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; - - /* If YYLEN is nonzero, implement the default value of the action: - `$$ = $1'. - - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; - - - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 2: - -/* Line 1806 of yacc.c */ -#line 79 "fts0pars.y" - { - (yyval.node) = (yyvsp[(1) - (1)].node); - ((fts_ast_state_t*) state)->root = (yyval.node); - } - break; - - case 3: - -/* Line 1806 of yacc.c */ -#line 85 "fts0pars.y" - { - (yyval.node) = NULL; - } - break; - - case 4: - -/* Line 1806 of yacc.c */ -#line 89 "fts0pars.y" - { - (yyval.node) = (yyvsp[(1) - (2)].node); - - if (!(yyval.node)) { - (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(2) - (2)].node)); - } else { - fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); - } - } - break; - - case 5: - -/* Line 1806 of yacc.c */ -#line 99 "fts0pars.y" - { - (yyval.node) = (yyvsp[(1) - (2)].node); - (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); - - if (!(yyval.node)) { - (yyval.node) = (yyvsp[(2) - (2)].node); - } else { - fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); - } - } - break; - - case 6: - -/* Line 1806 of yacc.c */ -#line 111 "fts0pars.y" - { - (yyval.node) = (yyvsp[(2) - (3)].node); - - if ((yyval.node)) { - (yyval.node) = fts_ast_create_node_subexp_list(state, (yyval.node)); - } - } - break; - - case 7: - -/* Line 1806 of yacc.c */ -#line 119 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node)); - - if ((yyvsp[(3) - (4)].node)) { - fts_ast_add_node((yyval.node), - fts_ast_create_node_subexp_list(state, (yyvsp[(3) - (4)].node))); - } - } - break; - - case 8: - -/* Line 1806 of yacc.c */ -#line 129 "fts0pars.y" - { - (yyval.node) = (yyvsp[(1) - (1)].node); - } - break; - - case 9: - -/* Line 1806 of yacc.c */ -#line 133 "fts0pars.y" - { - (yyval.node) = (yyvsp[(1) - (1)].node); - } - break; - - case 10: - -/* Line 1806 of yacc.c */ -#line 137 "fts0pars.y" - { - fts_ast_term_set_wildcard((yyvsp[(1) - (2)].node)); - } - break; - - case 11: - -/* Line 1806 of yacc.c */ -#line 141 "fts0pars.y" - { - fts_ast_term_set_distance((yyvsp[(1) - (3)].node), fts_ast_string_to_ul((yyvsp[(3) - (3)].token), 10)); - fts_ast_string_free((yyvsp[(3) - (3)].token)); - } - break; - - case 12: - -/* Line 1806 of yacc.c */ -#line 146 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (3)].node)); - fts_ast_add_node((yyval.node), (yyvsp[(2) - (3)].node)); - fts_ast_term_set_wildcard((yyvsp[(2) - (3)].node)); - } - break; - - case 13: - -/* Line 1806 of yacc.c */ -#line 152 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); - fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); - } - break; - - case 14: - -/* Line 1806 of yacc.c */ -#line 157 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node)); - fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node)); - fts_ast_term_set_distance((yyvsp[(2) - (4)].node), fts_ast_string_to_ul((yyvsp[(4) - (4)].token), 10)); - fts_ast_string_free((yyvsp[(4) - (4)].token)); - } - break; - - case 15: - -/* Line 1806 of yacc.c */ -#line 164 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); - fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); - } - break; - - case 16: - -/* Line 1806 of yacc.c */ -#line 170 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_oper(state, FTS_IGNORE); - } - break; - - case 17: - -/* Line 1806 of yacc.c */ -#line 174 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_oper(state, FTS_EXIST); - } - break; - - case 18: - -/* Line 1806 of yacc.c */ -#line 178 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_oper(state, FTS_NEGATE); - } - break; - - case 19: - -/* Line 1806 of yacc.c */ -#line 182 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_oper(state, FTS_DECR_RATING); - } - break; - - case 20: - -/* Line 1806 of yacc.c */ -#line 186 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_oper(state, FTS_INCR_RATING); - } - break; - - case 21: - -/* Line 1806 of yacc.c */ -#line 191 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token)); - fts_ast_string_free((yyvsp[(1) - (1)].token)); - } - break; - - case 22: - -/* Line 1806 of yacc.c */ -#line 196 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token)); - fts_ast_string_free((yyvsp[(1) - (1)].token)); - } - break; - - case 23: - -/* Line 1806 of yacc.c */ -#line 202 "fts0pars.y" - { - (yyval.node) = (yyvsp[(2) - (2)].node); - } - break; - - case 24: - -/* Line 1806 of yacc.c */ -#line 207 "fts0pars.y" - { - (yyval.node) = fts_ast_create_node_text(state, (yyvsp[(1) - (1)].token)); - fts_ast_string_free((yyvsp[(1) - (1)].token)); - } - break; - - - -/* Line 1806 of yacc.c */ -#line 1663 "fts0pars.cc" - default: break; - } - /* User semantic actions sometimes alter yychar, and that requires - that yytoken be updated with the new translation. We take the - approach of translating immediately before every use of yytoken. - One alternative is translating here after every semantic action, - but that translation would be missed if the semantic action invokes - YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or - if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an - incorrect destructor might then be invoked immediately. In the - case of YYERROR or YYBACKUP, subsequent parser actions might lead - to an incorrect destructor call or verbose syntax error message - before the lookahead is translated. */ - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); - - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - - *++yyvsp = yyval; - - /* Now `shift' the result of the reduction. Determine what state - that goes to, based on the state we popped back to and the rule - number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; - if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTOKENS]; - - goto yynewstate; - - -/*------------------------------------. -| yyerrlab -- here on detecting error | -`------------------------------------*/ -yyerrlab: - /* Backup yychar, in case we would change it. */ - yylastchar = yychar; - /* Make sure we have latest lookahead translation. See comments at - user semantic actions for why this is necessary. */ - yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); - - /* If not already recovering from an error, report this error. */ - if (!yyerrstatus) - { - ++yynerrs; -#if ! YYERROR_VERBOSE - yyerror (YY_("syntax error")); -#else -# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ - yyssp, yytoken) - { - char const *yymsgp = YY_("syntax error"); - int yysyntax_error_status; - yysyntax_error_status = YYSYNTAX_ERROR; - if (yysyntax_error_status == 0) - yymsgp = yymsg; - else if (yysyntax_error_status == 1) - { - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); - if (!yymsg) - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - yysyntax_error_status = 2; - } - else - { - yysyntax_error_status = YYSYNTAX_ERROR; - yymsgp = yymsg; - } - } - yyerror (yymsgp); - if (yysyntax_error_status == 2) - goto yyexhaustedlab; - } -# undef YYSYNTAX_ERROR -#endif - } - - - - if (yyerrstatus == 3) - { - /* If just tried and failed to reuse lookahead token after an - error, discard it. */ - - if (yychar <= YYEOF) - { - /* Return failure if at end of input. */ - if (yychar == YYEOF) - { - /* Since we don't need the token, we have to free it first. */ - YYERRCLEANUP; - YYABORT; - } - } - else - { - yydestruct ("Error: discarding", - yytoken, &yylval); - yychar = YYEMPTY; - } - } - - /* Else will try to reuse lookahead token after shifting the error - token. */ - goto yyerrlab1; - - -/*---------------------------------------------------. -| yyerrorlab -- error raised explicitly by YYERROR. | -`---------------------------------------------------*/ -yyerrorlab: - - /* Pacify compilers like GCC when the user code never invokes - YYERROR and the label yyerrorlab therefore never appears in user - code. */ - if (/*CONSTCOND*/ 0) - goto yyerrorlab; - - /* Do not reclaim the symbols of the rule which action triggered - this YYERROR. */ - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - yystate = *yyssp; - goto yyerrlab1; - - -/*-------------------------------------------------------------. -| yyerrlab1 -- common code for both syntax error and YYERROR. | -`-------------------------------------------------------------*/ -yyerrlab1: - yyerrstatus = 3; /* Each real token shifted decrements this. */ - - for (;;) - { - yyn = yypact[yystate]; - if (!yypact_value_is_default (yyn)) - { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) - { - yyn = yytable[yyn]; - if (0 < yyn) - break; - } - } - - /* Pop the current state because it cannot handle the error token. */ - if (yyssp == yyss) - { - /* Since we don't need the error token, we have to free it first. */ - YYERRCLEANUP; - YYABORT; - } - - - yydestruct ("Error: popping", - yystos[yystate], yyvsp); - YYPOPSTACK (1); - yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); - } - - *++yyvsp = yylval; - - - /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); - - yystate = yyn; - goto yynewstate; - - -/*-------------------------------------. -| yyacceptlab -- YYACCEPT comes here. | -`-------------------------------------*/ -yyacceptlab: - yyresult = 0; - goto yyreturn; - -/*-----------------------------------. -| yyabortlab -- YYABORT comes here. | -`-----------------------------------*/ -yyabortlab: - yyresult = 1; - goto yyreturn; - -#if !defined(yyoverflow) || YYERROR_VERBOSE -/*-------------------------------------------------. -| yyexhaustedlab -- memory exhaustion comes here. | -`-------------------------------------------------*/ -yyexhaustedlab: - yyerror (YY_("memory exhausted")); - yyresult = 2; - /* Fall through. */ -#endif - -yyreturn: - if (yychar != YYEMPTY) - { - /* Make sure we have latest lookahead translation. See comments at - user semantic actions for why this is necessary. */ - yytoken = YYTRANSLATE (yychar); - yydestruct ("Cleanup: discarding lookahead", - yytoken, &yylval); - } - /* Do not reclaim the symbols of the rule which action triggered - this YYABORT or YYACCEPT. */ - YYPOPSTACK (yylen); - YY_STACK_PRINT (yyss, yyssp); - while (yyssp != yyss) - { - yydestruct ("Cleanup: popping", - yystos[*yyssp], yyvsp); - YYPOPSTACK (1); - } -#ifndef yyoverflow - if (yyss != yyssa) - YYSTACK_FREE (yyss); -#endif -#if YYERROR_VERBOSE - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); -#endif - /* Make sure YYID is used. */ - return YYID (yyresult); -} - - - -/* Line 2067 of yacc.c */ -#line 212 "fts0pars.y" - - -/******************************************************************** -*/ -int -ftserror( -/*=====*/ - const char* p) -{ - my_printf_error(ER_PARSE_ERROR, "%s", MYF(0), p); - return(0); -} - -/******************************************************************** -Create a fts_lexer_t instance.*/ - -fts_lexer_t* -fts_lexer_create( -/*=============*/ - ibool boolean_mode, - const byte* query, - ulint query_len) -{ - fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>( - ut_malloc(sizeof(fts_lexer_t))); - - if (boolean_mode) { - fts0blex_init(&fts_lexer->yyscanner); - fts0b_scan_bytes( - reinterpret_cast<const char*>(query), - static_cast<int>(query_len), - fts_lexer->yyscanner); - fts_lexer->scanner = reinterpret_cast<fts_scan>(fts_blexer); - /* FIXME: Debugging */ - /* fts0bset_debug(1 , fts_lexer->yyscanner); */ - } else { - fts0tlex_init(&fts_lexer->yyscanner); - fts0t_scan_bytes( - reinterpret_cast<const char*>(query), - static_cast<int>(query_len), - fts_lexer->yyscanner); - fts_lexer->scanner = reinterpret_cast<fts_scan>(fts_tlexer); - } - - return(fts_lexer); -} - -/******************************************************************** -Free an fts_lexer_t instance.*/ -void - -fts_lexer_free( -/*===========*/ - fts_lexer_t* fts_lexer) -{ - if (fts_lexer->scanner == (fts_scan) fts_blexer) { - fts0blex_destroy(fts_lexer->yyscanner); - } else { - fts0tlex_destroy(fts_lexer->yyscanner); - } - - ut_free(fts_lexer); -} - -/******************************************************************** -Call the appropaiate scanner.*/ - -int -fts_lexer( -/*======*/ - YYSTYPE* val, - fts_lexer_t* fts_lexer) -{ - fts_scanner_alt func_ptr; - - func_ptr = (fts_scanner_alt) fts_lexer->scanner; - - return(func_ptr(val, fts_lexer->yyscanner)); -} - -/******************************************************************** -Parse the query.*/ -int -fts_parse( -/*======*/ - fts_ast_state_t* state) -{ - return(ftsparse(state)); -} - diff --git a/storage/xtradb/fts/fts0pars.y b/storage/xtradb/fts/fts0pars.y deleted file mode 100644 index e48036e82fe..00000000000 --- a/storage/xtradb/fts/fts0pars.y +++ /dev/null @@ -1,294 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/** - * @file fts/fts0pars.y - * FTS parser: input file for the GNU Bison parser generator - * - * Created 2007/5/9 Sunny Bains - */ - -%{ - -#include "mem0mem.h" -#include "fts0ast.h" -#include "fts0blex.h" -#include "fts0tlex.h" -#include "fts0pars.h" - -extern int fts_lexer(YYSTYPE*, fts_lexer_t*); -extern int fts_blexer(YYSTYPE*, yyscan_t); -extern int fts_tlexer(YYSTYPE*, yyscan_t); - -typedef int (*fts_scan)(); - -extern int ftserror(const char* p); - -/* Required for reentrant parser */ -#define ftslex fts_lexer - -#define YYERROR_VERBOSE - -/* For passing an argument to yyparse() */ -#define YYPARSE_PARAM state -#define YYLEX_PARAM ((fts_ast_state_t*) state)->lexer - -typedef int (*fts_scanner_alt)(YYSTYPE* val, yyscan_t yyscanner); -typedef int (*fts_scanner)(); - -struct fts_lexer_struct { - fts_scanner scanner; - void* yyscanner; -}; - -%} - -%union { - int oper; - fts_ast_string_t* token; - fts_ast_node_t* node; -}; - -/* Enable re-entrant parser */ -%pure_parser - -%token<oper> FTS_OPER -%token<token> FTS_TEXT FTS_TERM FTS_NUMB - -%type<node> prefix term text expr sub_expr expr_lst query - -%nonassoc '+' '-' '~' '<' '>' - -%% - -query : expr_lst { - $$ = $1; - ((fts_ast_state_t*) state)->root = $$; - } - ; - -expr_lst: /* Empty */ { - $$ = NULL; - } - - | expr_lst expr { - $$ = $1; - - if (!$$) { - $$ = fts_ast_create_node_list(state, $2); - } else { - fts_ast_add_node($$, $2); - } - } - - | expr_lst sub_expr { - $$ = $1; - $$ = fts_ast_create_node_list(state, $1); - - if (!$$) { - $$ = $2; - } else { - fts_ast_add_node($$, $2); - } - } - ; - -sub_expr: '(' expr_lst ')' { - $$ = $2; - - if ($$) { - $$ = fts_ast_create_node_subexp_list(state, $$); - } - } - - | prefix '(' expr_lst ')' { - $$ = fts_ast_create_node_list(state, $1); - - if ($3) { - fts_ast_add_node($$, - fts_ast_create_node_subexp_list(state, $3)); - } - } - ; - -expr : term { - $$ = $1; - } - - | text { - $$ = $1; - } - - | term '*' { - fts_ast_term_set_wildcard($1); - } - - | text '@' FTS_NUMB { - fts_ast_term_set_distance($1, fts_ast_string_to_ul($3, 10)); - fts_ast_string_free($3); - } - - | prefix term '*' { - $$ = fts_ast_create_node_list(state, $1); - fts_ast_add_node($$, $2); - fts_ast_term_set_wildcard($2); - } - - | prefix term { - $$ = fts_ast_create_node_list(state, $1); - fts_ast_add_node($$, $2); - } - - | prefix text '@' FTS_NUMB { - $$ = fts_ast_create_node_list(state, $1); - fts_ast_add_node($$, $2); - fts_ast_term_set_distance($2, fts_ast_string_to_ul($4, 10)); - fts_ast_string_free($4); - } - - | prefix text { - $$ = fts_ast_create_node_list(state, $1); - fts_ast_add_node($$, $2); - } - ; - -prefix : '-' { - $$ = fts_ast_create_node_oper(state, FTS_IGNORE); - } - - | '+' { - $$ = fts_ast_create_node_oper(state, FTS_EXIST); - } - - | '~' { - $$ = fts_ast_create_node_oper(state, FTS_NEGATE); - } - - | '<' { - $$ = fts_ast_create_node_oper(state, FTS_DECR_RATING); - } - - | '>' { - $$ = fts_ast_create_node_oper(state, FTS_INCR_RATING); - } - ; - -term : FTS_TERM { - $$ = fts_ast_create_node_term(state, $1); - fts_ast_string_free($1); - } - - | FTS_NUMB { - $$ = fts_ast_create_node_term(state, $1); - fts_ast_string_free($1); - } - - /* Ignore leading '*' */ - | '*' term { - $$ = $2; - } - ; - -text : FTS_TEXT { - $$ = fts_ast_create_node_text(state, $1); - fts_ast_string_free($1); - } - ; -%% - -/******************************************************************** -*/ -int -ftserror( -/*=====*/ - const char* p) -{ - fprintf(stderr, "%s\n", p); - return(0); -} - -/******************************************************************** -Create a fts_lexer_t instance.*/ - -fts_lexer_t* -fts_lexer_create( -/*=============*/ - ibool boolean_mode, - const byte* query, - ulint query_len) -{ - fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>( - ut_malloc(sizeof(fts_lexer_t))); - - if (boolean_mode) { - fts0blex_init(&fts_lexer->yyscanner); - fts0b_scan_bytes((char*) query, query_len, fts_lexer->yyscanner); - fts_lexer->scanner = (fts_scan) fts_blexer; - /* FIXME: Debugging */ - /* fts0bset_debug(1 , fts_lexer->yyscanner); */ - } else { - fts0tlex_init(&fts_lexer->yyscanner); - fts0t_scan_bytes((char*) query, query_len, fts_lexer->yyscanner); - fts_lexer->scanner = (fts_scan) fts_tlexer; - } - - return(fts_lexer); -} - -/******************************************************************** -Free an fts_lexer_t instance.*/ -void - -fts_lexer_free( -/*===========*/ - fts_lexer_t* fts_lexer) -{ - if (fts_lexer->scanner == (fts_scan) fts_blexer) { - fts0blex_destroy(fts_lexer->yyscanner); - } else { - fts0tlex_destroy(fts_lexer->yyscanner); - } - - ut_free(fts_lexer); -} - -/******************************************************************** -Call the appropaiate scanner.*/ - -int -fts_lexer( -/*======*/ - YYSTYPE* val, - fts_lexer_t* fts_lexer) -{ - fts_scanner_alt func_ptr; - - func_ptr = (fts_scanner_alt) fts_lexer->scanner; - - return(func_ptr(val, fts_lexer->yyscanner)); -} - -/******************************************************************** -Parse the query.*/ -int -fts_parse( -/*======*/ - fts_ast_state_t* state) -{ - return(ftsparse(state)); -} diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc deleted file mode 100644 index f24973e26fb..00000000000 --- a/storage/xtradb/fts/fts0que.cc +++ /dev/null @@ -1,4491 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file fts/fts0que.cc -Full Text Search functionality. - -Created 2007/03/27 Sunny Bains -Completed 2011/7/10 Sunny and Jimmy Yang -*******************************************************/ - -#include "dict0dict.h" /* dict_table_get_n_rows() */ -#include "ut0rbt.h" -#include "row0sel.h" -#include "fts0fts.h" -#include "fts0priv.h" -#include "fts0ast.h" -#include "fts0pars.h" -#include "fts0types.h" -#include "ha_prototypes.h" -#include <ctype.h> - -#ifndef UNIV_NONINL -#include "fts0types.ic" -#include "fts0vlc.ic" -#endif - -#include <vector> - -#define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)]) - -#define RANK_DOWNGRADE (-1.0F) -#define RANK_UPGRADE (1.0F) - -/* Maximum number of words supported in a phrase or proximity search. */ -#define MAX_PROXIMITY_ITEM 128 - -/* Memory used by rbt itself for create and node add */ -#define SIZEOF_RBT_CREATE sizeof(ib_rbt_t) + sizeof(ib_rbt_node_t) * 2 -#define SIZEOF_RBT_NODE_ADD sizeof(ib_rbt_node_t) - -/*Initial byte length for 'words' in fts_ranking_t */ -#define RANKING_WORDS_INIT_LEN 4 - -// FIXME: Need to have a generic iterator that traverses the ilist. - -typedef std::vector<fts_string_t> word_vector_t; - -struct fts_word_freq_t; - -/** State of an FTS query. */ -struct fts_query_t { - mem_heap_t* heap; /*!< Heap to use for allocations */ - - trx_t* trx; /*!< The query transaction */ - - dict_index_t* index; /*!< The FTS index to search */ - /*!< FTS auxiliary common table def */ - fts_table_t fts_common_table; - - fts_table_t fts_index_table;/*!< FTS auxiliary index table def */ - - ulint total_size; /*!< total memory size used by query */ - - fts_doc_ids_t* deleted; /*!< Deleted doc ids that need to be - filtered from the output */ - - fts_ast_node_t* root; /*!< Abstract syntax tree */ - - fts_ast_node_t* cur_node; /*!< Current tree node */ - - ib_rbt_t* word_map; /*!< Matched word map for - searching by word*/ - - word_vector_t* word_vector; /*!< Matched word vector for - searching by index */ - - ib_rbt_t* doc_ids; /*!< The current set of matching - doc ids, elements are of - type fts_ranking_t */ - - ib_rbt_t* intersection; /*!< The doc ids that were found in - doc_ids, this tree will become - the new doc_ids, elements are of type - fts_ranking_t */ - - /*!< Prepared statement to read the - nodes from the FTS INDEX */ - que_t* read_nodes_graph; - - fts_ast_oper_t oper; /*!< Current boolean mode operator */ - - /*!< TRUE if we want to collect the - word positions within the document */ - ibool collect_positions; - - ulint flags; /*!< Specify the full text search type, - such as boolean search, phrase - search, proximity search etc. */ - - ulint distance; /*!< The proximity distance of a - phrase search. */ - - /*!< These doc ids are used as a - boundary condition when searching the - FTS index rows */ - - doc_id_t lower_doc_id; /*!< Lowest doc id in doc_ids */ - - doc_id_t upper_doc_id; /*!< Highest doc id in doc_ids */ - - bool boolean_mode; /*!< TRUE if boolean mode query */ - - ib_vector_t* matched; /*!< Array of matching documents - (fts_match_t) to search for a phrase */ - - ib_vector_t** match_array; /*!< Used for proximity search, contains - position info for each matched word - in the word list */ - - ib_uint64_t total_docs; /*!< The total number of documents */ - - ulint total_words; /*!< The total number of words */ - - dberr_t error; /*!< Error code if any, that is - encountered during query processing */ - - ib_rbt_t* word_freqs; /*!< RB tree of word frequencies per - document, its elements are of type - fts_word_freq_t */ - - bool multi_exist; /*!< multiple FTS_EXIST oper */ -}; - -/** For phrase matching, first we collect the documents and the positions -then we match. */ -struct fts_match_t { - doc_id_t doc_id; /*!< Document id */ - - ulint start; /*!< Start the phrase match from - this offset within the positions - vector. */ - - ib_vector_t* positions; /*!< Offsets of a word in a - document */ -}; - -/** For matching tokens in a phrase search. We use this data structure in -the callback that determines whether a document should be accepted or -rejected for a phrase search. */ -struct fts_select_t { - doc_id_t doc_id; /*!< The document id to match */ - - ulint min_pos; /*!< For found to be TRUE at least - one position must be greater than - min_pos. */ - - ibool found; /*!< TRUE if found */ - - fts_word_freq_t* - word_freq; /*!< Word frequency instance of the - current word being looked up in - the FTS index */ -}; - -typedef std::vector<ulint> pos_vector_t; - -/** structure defines a set of ranges for original documents, each of which -has a minimum position and maximum position. Text in such range should -contain all words in the proximity search. We will need to count the -words in such range to make sure it is less than the specified distance -of the proximity search */ -struct fts_proximity_t { - ulint n_pos; /*!< number of position set, defines - a range (min to max) containing all - matching words */ - pos_vector_t min_pos; /*!< the minimum position (in bytes) - of the range */ - pos_vector_t max_pos; /*!< the maximum position (in bytes) - of the range */ -}; - -/** The match positions and tokesn to match */ -struct fts_phrase_t { - ibool found; /*!< Match result */ - - const fts_match_t* - match; /*!< Positions within text */ - - const ib_vector_t* - tokens; /*!< Tokens to match */ - - ulint distance; /*!< For matching on proximity - distance. Can be 0 for exact match */ - CHARSET_INFO* charset; /*!< Phrase match charset */ - mem_heap_t* heap; /*!< Heap for word processing */ - ulint zip_size; /*!< row zip size */ - fts_proximity_t*proximity_pos; /*!< position info for proximity - search verification. Records the min - and max position of words matched */ -}; - -/** For storing the frequncy of a word/term in a document */ -struct fts_doc_freq_t { - doc_id_t doc_id; /*!< Document id */ - ulint freq; /*!< Frequency of a word in a document */ -}; - -/** To determine the word frequency per document. */ -struct fts_word_freq_t { - fts_string_t word; /*!< Word for which we need the freq, - it's allocated on the query heap */ - - ib_rbt_t* doc_freqs; /*!< RB Tree for storing per document - word frequencies. The elements are - of type fts_doc_freq_t */ - ib_uint64_t doc_count; /*!< Total number of documents that - contain this word */ - double idf; /*!< Inverse document frequency */ -}; - -/******************************************************************** -Callback function to fetch the rows in an FTS INDEX record. -@return always TRUE */ -static -ibool -fts_query_index_fetch_nodes( -/*========================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg); /*!< in: pointer to ib_vector_t */ - -/******************************************************************** -Read and filter nodes. -@return fts_node_t instance */ -static -dberr_t -fts_query_filter_doc_ids( -/*=====================*/ - fts_query_t* query, /*!< in: query instance */ - const fts_string_t* word, /*!< in: the current word */ - fts_word_freq_t* word_freq, /*!< in/out: word frequency */ - const fts_node_t* node, /*!< in: current FTS node */ - void* data, /*!< in: doc id ilist */ - ulint len, /*!< in: doc id ilist size */ - ibool calc_doc_count);/*!< in: whether to remember doc - count */ - -#if 0 -/*****************************************************************//*** -Find a doc_id in a word's ilist. -@return TRUE if found. */ -static -ibool -fts_query_find_doc_id( -/*==================*/ - fts_select_t* select, /*!< in/out: search the doc id selected, - update the frequency if found. */ - void* data, /*!< in: doc id ilist */ - ulint len); /*!< in: doc id ilist size */ -#endif - -/*************************************************************//** -This function implements a simple "blind" query expansion search: -words in documents found in the first search pass will be used as -search arguments to search the document again, thus "expand" -the search result set. -@return DB_SUCCESS if success, otherwise the error code */ -static -dberr_t -fts_expand_query( -/*=============*/ - dict_index_t* index, /*!< in: FTS index to search */ - fts_query_t* query) /*!< in: query result, to be freed - by the client */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************//** -This function finds documents that contain all words in a -phrase or proximity search. And if proximity search, verify -the words are close enough to each other, as in specified distance. -This function is called for phrase and proximity search. -@return TRUE if documents are found, FALSE if otherwise */ -static -ibool -fts_phrase_or_proximity_search( -/*===========================*/ - fts_query_t* query, /*!< in/out: query instance - query->doc_ids might be instantiated - with qualified doc IDs */ - ib_vector_t* tokens); /*!< in: Tokens contain words */ -/*************************************************************//** -This function checks whether words in result documents are close to -each other (within proximity range as specified by "distance"). -If "distance" is MAX_ULINT, then it will find all combinations of -positions of matching words and store min and max positions -in the "qualified_pos" for later verification. -@return true if words are close to each other, false if otherwise */ -static -bool -fts_proximity_get_positions( -/*========================*/ - fts_match_t** match, /*!< in: query instance */ - ulint num_match, /*!< in: number of matching - items */ - ulint distance, /*!< in: distance value - for proximity search */ - fts_proximity_t* qualified_pos); /*!< out: the position info - records ranges containing - all matching words. */ -#if 0 -/******************************************************************** -Get the total number of words in a documents. */ -static -ulint -fts_query_terms_in_document( -/*========================*/ - /*!< out: DB_SUCCESS if all go well - else error code */ - fts_query_t* query, /*!< in: FTS query state */ - doc_id_t doc_id, /*!< in: the word to check */ - ulint* total); /*!< out: total words in document */ -#endif - -/******************************************************************** -Compare two fts_doc_freq_t doc_ids. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_freq_doc_id_cmp( -/*================*/ - const void* p1, /*!< in: id1 */ - const void* p2) /*!< in: id2 */ -{ - const fts_doc_freq_t* fq1 = (const fts_doc_freq_t*) p1; - const fts_doc_freq_t* fq2 = (const fts_doc_freq_t*) p2; - - return((int) (fq1->doc_id - fq2->doc_id)); -} - -#if 0 -/*******************************************************************//** -Print the table used for calculating LCS. */ -static -void -fts_print_lcs_table( -/*================*/ - const ulint* table, /*!< in: array to print */ - ulint n_rows, /*!< in: total no. of rows */ - ulint n_cols) /*!< in: total no. of cols */ -{ - ulint i; - - for (i = 0; i < n_rows; ++i) { - ulint j; - - printf("\n"); - - for (j = 0; j < n_cols; ++j) { - - printf("%2lu ", FTS_ELEM(table, n_cols, i, j)); - } - } -} - -/******************************************************************** -Find the longest common subsequence between the query string and -the document. */ -static -ulint -fts_query_lcs( -/*==========*/ - /*!< out: LCS (length) between - two ilists */ - const ulint* p1, /*!< in: word positions of query */ - ulint len_p1, /*!< in: no. of elements in p1 */ - const ulint* p2, /*!< in: word positions within document */ - ulint len_p2) /*!< in: no. of elements in p2 */ -{ - int i; - ulint len = 0; - ulint r = len_p1; - ulint c = len_p2; - ulint size = (r + 1) * (c + 1) * sizeof(ulint); - ulint* table = (ulint*) ut_malloc(size); - - /* Traverse the table backwards, from the last row to the first and - also from the last column to the first. We compute the smaller - common subsequeces first, then use the caluclated values to determine - the longest common subsequence. The result will be in TABLE[0][0]. */ - for (i = r; i >= 0; --i) { - int j; - - for (j = c; j >= 0; --j) { - - if (p1[i] == (ulint) -1 || p2[j] == (ulint) -1) { - - FTS_ELEM(table, c, i, j) = 0; - - } else if (p1[i] == p2[j]) { - - FTS_ELEM(table, c, i, j) = FTS_ELEM( - table, c, i + 1, j + 1) + 1; - - } else { - - ulint value; - - value = ut_max( - FTS_ELEM(table, c, i + 1, j), - FTS_ELEM(table, c, i, j + 1)); - - FTS_ELEM(table, c, i, j) = value; - } - } - } - - len = FTS_ELEM(table, c, 0, 0); - - fts_print_lcs_table(table, r, c); - printf("\nLen=%lu\n", len); - - ut_free(table); - - return(len); -} -#endif - -/*******************************************************************//** -Compare two fts_ranking_t instance on their rank value and doc ids in -descending order on the rank and ascending order on doc id. -@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */ -static -int -fts_query_compare_rank( -/*===================*/ - const void* p1, /*!< in: pointer to elem */ - const void* p2) /*!< in: pointer to elem */ -{ - const fts_ranking_t* r1 = (const fts_ranking_t*) p1; - const fts_ranking_t* r2 = (const fts_ranking_t*) p2; - - if (r2->rank < r1->rank) { - return(-1); - } else if (r2->rank == r1->rank) { - - if (r1->doc_id < r2->doc_id) { - return(1); - } else if (r1->doc_id > r2->doc_id) { - return(1); - } - - return(0); - } - - return(1); -} - -#ifdef FTS_UTF8_DEBUG -/*******************************************************************//** -Convert string to lowercase. -@return lower case string, callers responsibility to delete using -ut_free() */ -static -byte* -fts_tolower( -/*========*/ - const byte* src, /*!< in: src string */ - ulint len) /*!< in: src string length */ -{ - fts_string_t str; - byte* lc_str = ut_malloc(len + 1); - - str.f_len = len; - str.f_str = lc_str; - - memcpy(str.f_str, src, len); - - /* Make sure the last byte is NUL terminated */ - str.f_str[len] = '\0'; - - fts_utf8_tolower(&str); - - return(lc_str); -} - -/*******************************************************************//** -Do a case insensitive search. Doesn't check for NUL byte end marker -only relies on len. Convert str2 to lower case before comparing. -@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */ -static -int -fts_utf8_strcmp( -/*============*/ - const fts_string_t* - str1, /*!< in: should be lower case*/ - - fts_string_t* str2) /*!< in: any case. We will use the length - of this string during compare as it - should be the min of the two strings */ -{ - byte b = str2->f_str[str2->f_len]; - - ut_a(str2->f_len <= str1->f_len); - - /* We need to write a NUL byte at the end of the string because the - string is converted to lowercase by a MySQL function which doesn't - care about the length. */ - str2->f_str[str2->f_len] = 0; - - fts_utf8_tolower(str2); - - /* Restore the value we replaced above. */ - str2->f_str[str2->f_len] = b; - - return(memcmp(str1->f_str, str2->f_str, str2->f_len)); -} -#endif - -/*******************************************************************//** -Create words in ranking */ -static -void -fts_ranking_words_create( -/*=====================*/ - fts_query_t* query, /*!< in: query instance */ - fts_ranking_t* ranking) /*!< in: ranking instance */ -{ - ranking->words = static_cast<byte*>( - mem_heap_zalloc(query->heap, RANKING_WORDS_INIT_LEN)); - ranking->words_len = RANKING_WORDS_INIT_LEN; -} - -/* -The optimization here is using a char array(bitmap) to replace words rb tree -in fts_ranking_t. - -It can save lots of memory except in some cases of QUERY EXPANSION. - -'word_map' is used as a word dictionary, in which the key is a word, the value -is a number. In 'fts_ranking_words_add', we first check if the word is in 'word_map'. -if not, we add it into 'word_map', and give it a position(actually a number). -then we set the corresponding bit to '1' at the position in the char array 'words'. - -'word_vector' is a useful backup of 'word_map', and we can get a word by its position, -more quickly than searching by value in 'word_map'. we use 'word_vector' -in 'fts_query_calculate_ranking' and 'fts_expand_query'. In the two functions, we need -to scan the bitmap 'words', and get a word when a bit is '1', then we get word_freq -by the word. -*/ - -/*******************************************************************//** -Add a word into ranking */ -static -void -fts_ranking_words_add( -/*==================*/ - fts_query_t* query, /*!< in: query instance */ - fts_ranking_t* ranking, /*!< in: ranking instance */ - const fts_string_t* word) /*!< in: term/word to add */ -{ - ulint pos; - ulint byte_offset; - ulint bit_offset; - ib_rbt_bound_t parent; - - /* Note: we suppose the word map and vector are append-only. */ - ut_ad(query->word_vector->size() == rbt_size(query->word_map)); - - /* We use ib_rbt to simulate a map, f_n_char means position. */ - if (rbt_search(query->word_map, &parent, word) == 0) { - fts_string_t* result_word; - - result_word = rbt_value(fts_string_t, parent.last); - pos = result_word->f_n_char; - ut_ad(pos < rbt_size(query->word_map)); - } else { - /* Add the word to map. */ - fts_string_t new_word; - - pos = rbt_size(query->word_map); - - new_word.f_str = static_cast<byte*>(mem_heap_alloc(query->heap, - word->f_len + 1)); - memcpy(new_word.f_str, word->f_str, word->f_len); - new_word.f_str[word->f_len] = 0; - new_word.f_len = word->f_len; - new_word.f_n_char = pos; - - rbt_add_node(query->word_map, &parent, &new_word); - ut_ad(rbt_validate(query->word_map)); - query->word_vector->push_back(new_word); - } - - /* Check words len */ - byte_offset = pos / CHAR_BIT; - if (byte_offset >= ranking->words_len) { - byte* words = ranking->words; - ulint words_len = ranking->words_len; - - while (byte_offset >= words_len) { - words_len *= 2; - } - - ranking->words = static_cast<byte*>( - mem_heap_zalloc(query->heap, words_len)); - ut_memcpy(ranking->words, words, ranking->words_len); - ranking->words_len = words_len; - } - - /* Set ranking words */ - ut_ad(byte_offset < ranking->words_len); - bit_offset = pos % CHAR_BIT; - ranking->words[byte_offset] |= 1 << bit_offset; -} - -/*******************************************************************//** -Get a word from a ranking -@return true if it's successful */ -static -bool -fts_ranking_words_get_next( -/*=======================*/ - const fts_query_t* query, /*!< in: query instance */ - fts_ranking_t* ranking,/*!< in: ranking instance */ - ulint* pos, /*!< in/out: word start pos */ - fts_string_t* word) /*!< in/out: term/word to add */ -{ - bool ret = false; - ulint max_pos = ranking->words_len * CHAR_BIT; - - /* Search for next word */ - while (*pos < max_pos) { - ulint byte_offset = *pos / CHAR_BIT; - ulint bit_offset = *pos % CHAR_BIT; - - if (ranking->words[byte_offset] & (1 << bit_offset)) { - ret = true; - break; - } - - *pos += 1; - }; - - /* Get next word from word vector */ - if (ret) { - ut_ad(*pos < query->word_vector->size()); - *word = query->word_vector->at((size_t)*pos); - *pos += 1; - } - - return ret; -} - -/*******************************************************************//** -Add a word if it doesn't exist, to the term freq RB tree. We store -a pointer to the word that is passed in as the argument. -@return pointer to word */ -static -fts_word_freq_t* -fts_query_add_word_freq( -/*====================*/ - fts_query_t* query, /*!< in: query instance */ - const fts_string_t* word) /*!< in: term/word to add */ -{ - ib_rbt_bound_t parent; - - /* Lookup the word in our rb tree and add if it doesn't exist. */ - if (rbt_search(query->word_freqs, &parent, word) != 0) { - fts_word_freq_t word_freq; - - memset(&word_freq, 0, sizeof(word_freq)); - - word_freq.word.f_str = static_cast<byte*>( - mem_heap_alloc(query->heap, word->f_len + 1)); - memcpy(word_freq.word.f_str, word->f_str, word->f_len); - word_freq.word.f_str[word->f_len] = 0; - word_freq.word.f_len = word->f_len; - - word_freq.doc_count = 0; - - word_freq.doc_freqs = rbt_create( - sizeof(fts_doc_freq_t), fts_freq_doc_id_cmp); - - parent.last = rbt_add_node( - query->word_freqs, &parent, &word_freq); - - query->total_size += word->f_len - + SIZEOF_RBT_CREATE - + SIZEOF_RBT_NODE_ADD - + sizeof(fts_word_freq_t); - } - - return(rbt_value(fts_word_freq_t, parent.last)); -} - -/*******************************************************************//** -Add a doc id if it doesn't exist, to the doc freq RB tree. -@return pointer to word */ -static -fts_doc_freq_t* -fts_query_add_doc_freq( -/*===================*/ - fts_query_t* query, /*!< in: query instance */ - ib_rbt_t* doc_freqs, /*!< in: rb tree of fts_doc_freq_t */ - doc_id_t doc_id) /*!< in: doc id to add */ -{ - ib_rbt_bound_t parent; - - /* Lookup the doc id in our rb tree and add if it doesn't exist. */ - if (rbt_search(doc_freqs, &parent, &doc_id) != 0) { - fts_doc_freq_t doc_freq; - - memset(&doc_freq, 0, sizeof(doc_freq)); - - doc_freq.freq = 0; - doc_freq.doc_id = doc_id; - - parent.last = rbt_add_node(doc_freqs, &parent, &doc_freq); - - query->total_size += SIZEOF_RBT_NODE_ADD - + sizeof(fts_doc_freq_t); - } - - return(rbt_value(fts_doc_freq_t, parent.last)); -} - -/*******************************************************************//** -Add the doc id to the query set only if it's not in the -deleted array. */ -static -void -fts_query_union_doc_id( -/*===================*/ - fts_query_t* query, /*!< in: query instance */ - doc_id_t doc_id, /*!< in: the doc id to add */ - fts_rank_t rank) /*!< in: if non-zero, it is the - rank associated with the doc_id */ -{ - ib_rbt_bound_t parent; - ulint size = ib_vector_size(query->deleted->doc_ids); - fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data; - - /* Check if the doc id is deleted and it's not already in our set. */ - if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0 - && rbt_search(query->doc_ids, &parent, &doc_id) != 0) { - - fts_ranking_t ranking; - - ranking.rank = rank; - ranking.doc_id = doc_id; - fts_ranking_words_create(query, &ranking); - - rbt_add_node(query->doc_ids, &parent, &ranking); - - query->total_size += SIZEOF_RBT_NODE_ADD - + sizeof(fts_ranking_t) + RANKING_WORDS_INIT_LEN; - } -} - -/*******************************************************************//** -Remove the doc id from the query set only if it's not in the -deleted set. */ -static -void -fts_query_remove_doc_id( -/*====================*/ - fts_query_t* query, /*!< in: query instance */ - doc_id_t doc_id) /*!< in: the doc id to add */ -{ - ib_rbt_bound_t parent; - ulint size = ib_vector_size(query->deleted->doc_ids); - fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data; - - /* Check if the doc id is deleted and it's in our set. */ - if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0 - && rbt_search(query->doc_ids, &parent, &doc_id) == 0) { - ut_free(rbt_remove_node(query->doc_ids, parent.last)); - - ut_ad(query->total_size >= - SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t)); - query->total_size -= SIZEOF_RBT_NODE_ADD - + sizeof(fts_ranking_t); - } -} - -/*******************************************************************//** -Find the doc id in the query set but not in the deleted set, artificialy -downgrade or upgrade its ranking by a value and make/initialize its ranking -under or above its normal range 0 to 1. This is used for Boolean Search -operator such as Negation operator, which makes word's contribution to the -row's relevance to be negative */ -static -void -fts_query_change_ranking( -/*====================*/ - fts_query_t* query, /*!< in: query instance */ - doc_id_t doc_id, /*!< in: the doc id to add */ - ibool downgrade) /*!< in: Whether to downgrade ranking */ -{ - ib_rbt_bound_t parent; - ulint size = ib_vector_size(query->deleted->doc_ids); - fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data; - - /* Check if the doc id is deleted and it's in our set. */ - if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0 - && rbt_search(query->doc_ids, &parent, &doc_id) == 0) { - - fts_ranking_t* ranking; - - ranking = rbt_value(fts_ranking_t, parent.last); - - ranking->rank += downgrade ? RANK_DOWNGRADE : RANK_UPGRADE; - - /* Allow at most 2 adjustment by RANK_DOWNGRADE (-0.5) - and RANK_UPGRADE (0.5) */ - if (ranking->rank >= 1.0F) { - ranking->rank = 1.0F; - } else if (ranking->rank <= -1.0F) { - ranking->rank = -1.0F; - } - } -} - -/*******************************************************************//** -Check the doc id in the query set only if it's not in the -deleted array. The doc ids that were found are stored in -another rb tree (fts_query_t::intersect). */ -static -void -fts_query_intersect_doc_id( -/*=======================*/ - fts_query_t* query, /*!< in: query instance */ - doc_id_t doc_id, /*!< in: the doc id to add */ - fts_rank_t rank) /*!< in: if non-zero, it is the - rank associated with the doc_id */ -{ - ib_rbt_bound_t parent; - ulint size = ib_vector_size(query->deleted->doc_ids); - fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data; - fts_ranking_t* ranking= NULL; - - /* There are three types of intersect: - 1. '+a': doc_ids is empty, add doc into intersect if it matches 'a'. - 2. 'a +b': docs match 'a' is in doc_ids, add doc into intersect - if it matches 'b'. if the doc is also in doc_ids, then change the - doc's rank, and add 'a' in doc's words. - 3. '+a +b': docs matching '+a' is in doc_ids, add doc into intsersect - if it matches 'b' and it's in doc_ids.(multi_exist = true). */ - - /* Check if the doc id is deleted and it's in our set */ - if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0) { - fts_ranking_t new_ranking; - - if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) { - if (query->multi_exist) { - return; - } else { - new_ranking.words = NULL; - } - } else { - ranking = rbt_value(fts_ranking_t, parent.last); - - /* We've just checked the doc id before */ - if (ranking->words == NULL) { - ut_ad(rbt_search(query->intersection, &parent, - ranking) == 0); - return; - } - - /* Merge rank */ - rank += ranking->rank; - if (rank >= 1.0F) { - rank = 1.0F; - } else if (rank <= -1.0F) { - rank = -1.0F; - } - - /* Take words */ - new_ranking.words = ranking->words; - new_ranking.words_len = ranking->words_len; - } - - new_ranking.rank = rank; - new_ranking.doc_id = doc_id; - - if (rbt_search(query->intersection, &parent, - &new_ranking) != 0) { - if (new_ranking.words == NULL) { - fts_ranking_words_create(query, &new_ranking); - - query->total_size += RANKING_WORDS_INIT_LEN; - } else { - /* Note that the intersection has taken - ownership of the ranking data. */ - ranking->words = NULL; - } - - rbt_add_node(query->intersection, - &parent, &new_ranking); - - query->total_size += SIZEOF_RBT_NODE_ADD - + sizeof(fts_ranking_t); - } - } -} - -/*******************************************************************//** -Free the document ranking rb tree. */ -static -void -fts_query_free_doc_ids( -/*===================*/ - fts_query_t* query, /*!< in: query instance */ - ib_rbt_t* doc_ids) /*!< in: rb tree to free */ -{ - const ib_rbt_node_t* node; - - for (node = rbt_first(doc_ids); node; node = rbt_first(doc_ids)) { - - fts_ranking_t* ranking; - - ranking = rbt_value(fts_ranking_t, node); - - if (ranking->words) { - ranking->words = NULL; - } - - ut_free(rbt_remove_node(doc_ids, node)); - - ut_ad(query->total_size >= - SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t)); - query->total_size -= SIZEOF_RBT_NODE_ADD - + sizeof(fts_ranking_t); - } - - rbt_free(doc_ids); - - ut_ad(query->total_size >= SIZEOF_RBT_CREATE); - query->total_size -= SIZEOF_RBT_CREATE; -} - -/** -Free the query intersection -@param[in] query query instance */ -static -void -fts_query_free_intersection( - fts_query_t* query) -{ - fts_query_free_doc_ids(query, query->intersection); - query->intersection = NULL; -} - -/*******************************************************************//** -Add the word to the documents "list" of matching words from -the query. We make a copy of the word from the query heap. */ -static -void -fts_query_add_word_to_document( -/*===========================*/ - fts_query_t* query, /*!< in: query to update */ - doc_id_t doc_id, /*!< in: the document to update */ - const fts_string_t* word) /*!< in: the token to add */ -{ - ib_rbt_bound_t parent; - fts_ranking_t* ranking = NULL; - - if (query->flags == FTS_OPT_RANKING) { - return; - } - - /* First we search the intersection RB tree as it could have - taken ownership of the words rb tree instance. */ - if (query->intersection - && rbt_search(query->intersection, &parent, &doc_id) == 0) { - - ranking = rbt_value(fts_ranking_t, parent.last); - } - - if (ranking == NULL - && rbt_search(query->doc_ids, &parent, &doc_id) == 0) { - - ranking = rbt_value(fts_ranking_t, parent.last); - } - - if (ranking != NULL) { - fts_ranking_words_add(query, ranking, word); - } -} - -/*******************************************************************//** -Check the node ilist. */ -static -void -fts_query_check_node( -/*=================*/ - fts_query_t* query, /*!< in: query to update */ - const fts_string_t* token, /*!< in: the token to search */ - const fts_node_t* node) /*!< in: node to check */ -{ - /* Skip nodes whose doc ids are out range. */ - if (query->oper == FTS_EXIST - && ((query->upper_doc_id > 0 - && node->first_doc_id > query->upper_doc_id) - || (query->lower_doc_id > 0 - && node->last_doc_id < query->lower_doc_id))) { - - /* Ignore */ - - } else { - int ret; - ib_rbt_bound_t parent; - ulint ilist_size = node->ilist_size; - fts_word_freq_t*word_freqs; - - /* The word must exist. */ - ret = rbt_search(query->word_freqs, &parent, token); - ut_a(ret == 0); - - word_freqs = rbt_value(fts_word_freq_t, parent.last); - - query->error = fts_query_filter_doc_ids( - query, token, word_freqs, node, - node->ilist, ilist_size, TRUE); - } -} - -/*****************************************************************//** -Search index cache for word with wildcard match. -@return number of words matched */ -static -ulint -fts_cache_find_wildcard( -/*====================*/ - fts_query_t* query, /*!< in: query instance */ - const fts_index_cache_t*index_cache, /*!< in: cache to search */ - const fts_string_t* token) /*!< in: token to search */ -{ - ib_rbt_bound_t parent; - const ib_vector_t* nodes = NULL; - fts_string_t srch_text; - byte term[FTS_MAX_WORD_LEN + 1]; - ulint num_word = 0; - - srch_text.f_len = (token->f_str[token->f_len - 1] == '%') - ? token->f_len - 1 - : token->f_len; - - strncpy((char*) term, (char*) token->f_str, srch_text.f_len); - term[srch_text.f_len] = '\0'; - srch_text.f_str = term; - - /* Lookup the word in the rb tree */ - if (rbt_search_cmp(index_cache->words, &parent, &srch_text, NULL, - innobase_fts_text_cmp_prefix) == 0) { - const fts_tokenizer_word_t* word; - ulint i; - const ib_rbt_node_t* cur_node; - ibool forward = FALSE; - - word = rbt_value(fts_tokenizer_word_t, parent.last); - cur_node = parent.last; - - while (innobase_fts_text_cmp_prefix( - index_cache->charset, &srch_text, &word->text) == 0) { - - nodes = word->nodes; - - for (i = 0; nodes && i < ib_vector_size(nodes); ++i) { - int ret; - const fts_node_t* node; - ib_rbt_bound_t freq_parent; - fts_word_freq_t* word_freqs; - - node = static_cast<const fts_node_t*>( - ib_vector_get_const(nodes, i)); - - ret = rbt_search(query->word_freqs, - &freq_parent, - &srch_text); - - ut_a(ret == 0); - - word_freqs = rbt_value( - fts_word_freq_t, - freq_parent.last); - - query->error = fts_query_filter_doc_ids( - query, &srch_text, - word_freqs, node, - node->ilist, node->ilist_size, TRUE); - - if (query->error != DB_SUCCESS) { - return(0); - } - } - - num_word++; - - if (!forward) { - cur_node = rbt_prev( - index_cache->words, cur_node); - } else { -cont_search: - cur_node = rbt_next( - index_cache->words, cur_node); - } - - if (!cur_node) { - break; - } - - word = rbt_value(fts_tokenizer_word_t, cur_node); - } - - if (!forward) { - forward = TRUE; - cur_node = parent.last; - goto cont_search; - } - } - - return(num_word); -} - -/*****************************************************************//** -Set difference. -@return DB_SUCCESS if all go well */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_difference( -/*=================*/ - fts_query_t* query, /*!< in: query instance */ - const fts_string_t* token) /*!< in: token to search */ -{ - ulint n_doc_ids= 0; - trx_t* trx = query->trx; - dict_table_t* table = query->index->table; - - ut_a(query->oper == FTS_IGNORE); - -#ifdef FTS_INTERNAL_DIAG_PRINT - fprintf(stderr, "DIFFERENCE: Searching: '%.*s'\n", - (int) token->f_len, token->f_str); -#endif - - if (query->doc_ids) { - n_doc_ids = rbt_size(query->doc_ids); - } - - /* There is nothing we can substract from an empty set. */ - if (query->doc_ids && !rbt_empty(query->doc_ids)) { - ulint i; - fts_fetch_t fetch; - const ib_vector_t* nodes; - const fts_index_cache_t*index_cache; - que_t* graph = NULL; - fts_cache_t* cache = table->fts->cache; - dberr_t error; - - rw_lock_x_lock(&cache->lock); - - index_cache = fts_find_index_cache(cache, query->index); - - /* Must find the index cache */ - ut_a(index_cache != NULL); - - /* Search the cache for a matching word first. */ - if (query->cur_node->term.wildcard - && query->flags != FTS_PROXIMITY - && query->flags != FTS_PHRASE) { - fts_cache_find_wildcard(query, index_cache, token); - } else { - nodes = fts_cache_find_word(index_cache, token); - - for (i = 0; nodes && i < ib_vector_size(nodes) - && query->error == DB_SUCCESS; ++i) { - const fts_node_t* node; - - node = static_cast<const fts_node_t*>( - ib_vector_get_const(nodes, i)); - - fts_query_check_node(query, token, node); - } - } - - rw_lock_x_unlock(&cache->lock); - - /* error is passed by 'query->error' */ - if (query->error != DB_SUCCESS) { - ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); - return(query->error); - } - - /* Setup the callback args for filtering and - consolidating the ilist. */ - fetch.read_arg = query; - fetch.read_record = fts_query_index_fetch_nodes; - - error = fts_index_fetch_nodes( - trx, &graph, &query->fts_index_table, token, &fetch); - - /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */ - ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS)); - if (error != DB_SUCCESS) { - query->error = error; - } - - fts_que_graph_free(graph); - } - - /* The size can't increase. */ - ut_a(rbt_size(query->doc_ids) <= n_doc_ids); - - return(query->error); -} - -/*****************************************************************//** -Intersect the token doc ids with the current set. -@return DB_SUCCESS if all go well */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_intersect( -/*================*/ - fts_query_t* query, /*!< in: query instance */ - const fts_string_t* token) /*!< in: the token to search */ -{ - trx_t* trx = query->trx; - dict_table_t* table = query->index->table; - - ut_a(query->oper == FTS_EXIST); - -#ifdef FTS_INTERNAL_DIAG_PRINT - fprintf(stderr, "INTERSECT: Searching: '%.*s'\n", - (int) token->f_len, token->f_str); -#endif - - /* If the words set is not empty and multi exist is true, - we know the intersection set is empty in advance. */ - if (!(rbt_empty(query->doc_ids) && query->multi_exist)) { - ulint n_doc_ids = 0; - ulint i; - fts_fetch_t fetch; - const ib_vector_t* nodes; - const fts_index_cache_t*index_cache; - que_t* graph = NULL; - fts_cache_t* cache = table->fts->cache; - dberr_t error; - - ut_a(!query->intersection); - - n_doc_ids = rbt_size(query->doc_ids); - - /* Create the rb tree that will hold the doc ids of - the intersection. */ - query->intersection = rbt_create( - sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); - - query->total_size += SIZEOF_RBT_CREATE; - - /* This is to avoid decompressing the ilist if the - node's ilist doc ids are out of range. */ - if (!rbt_empty(query->doc_ids) && query->multi_exist) { - const ib_rbt_node_t* node; - doc_id_t* doc_id; - - node = rbt_first(query->doc_ids); - doc_id = rbt_value(doc_id_t, node); - query->lower_doc_id = *doc_id; - - node = rbt_last(query->doc_ids); - doc_id = rbt_value(doc_id_t, node); - query->upper_doc_id = *doc_id; - - } else { - query->lower_doc_id = 0; - query->upper_doc_id = 0; - } - - /* Search the cache for a matching word first. */ - - rw_lock_x_lock(&cache->lock); - - /* Search for the index specific cache. */ - index_cache = fts_find_index_cache(cache, query->index); - - /* Must find the index cache. */ - ut_a(index_cache != NULL); - - if (query->cur_node->term.wildcard) { - /* Wildcard search the index cache */ - fts_cache_find_wildcard(query, index_cache, token); - } else { - nodes = fts_cache_find_word(index_cache, token); - - for (i = 0; nodes && i < ib_vector_size(nodes) - && query->error == DB_SUCCESS; ++i) { - const fts_node_t* node; - - node = static_cast<const fts_node_t*>( - ib_vector_get_const(nodes, i)); - - fts_query_check_node(query, token, node); - } - } - - rw_lock_x_unlock(&cache->lock); - - /* error is passed by 'query->error' */ - if (query->error != DB_SUCCESS) { - ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); - fts_query_free_intersection(query); - return(query->error); - } - - /* Setup the callback args for filtering and - consolidating the ilist. */ - fetch.read_arg = query; - fetch.read_record = fts_query_index_fetch_nodes; - - error = fts_index_fetch_nodes( - trx, &graph, &query->fts_index_table, token, &fetch); - - /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */ - ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS)); - if (error != DB_SUCCESS) { - query->error = error; - } - - fts_que_graph_free(graph); - - if (query->error == DB_SUCCESS) { - /* Make the intesection (rb tree) the current doc id - set and free the old set. */ - fts_query_free_doc_ids(query, query->doc_ids); - query->doc_ids = query->intersection; - query->intersection = NULL; - - ut_a(!query->multi_exist || (query->multi_exist - && rbt_size(query->doc_ids) <= n_doc_ids)); - } else if (query->intersection != NULL) { - fts_query_free_intersection(query); - } - } - - return(query->error); -} - -/*****************************************************************//** -Query index cache. -@return DB_SUCCESS if all go well */ -static -dberr_t -fts_query_cache( -/*============*/ - fts_query_t* query, /*!< in/out: query instance */ - const fts_string_t* token) /*!< in: token to search */ -{ - const fts_index_cache_t*index_cache; - dict_table_t* table = query->index->table; - fts_cache_t* cache = table->fts->cache; - - /* Search the cache for a matching word first. */ - rw_lock_x_lock(&cache->lock); - - /* Search for the index specific cache. */ - index_cache = fts_find_index_cache(cache, query->index); - - /* Must find the index cache. */ - ut_a(index_cache != NULL); - - if (query->cur_node->term.wildcard - && query->flags != FTS_PROXIMITY - && query->flags != FTS_PHRASE) { - /* Wildcard search the index cache */ - fts_cache_find_wildcard(query, index_cache, token); - } else { - const ib_vector_t* nodes; - ulint i; - - nodes = fts_cache_find_word(index_cache, token); - - for (i = 0; nodes && i < ib_vector_size(nodes) - && query->error == DB_SUCCESS; ++i) { - const fts_node_t* node; - - node = static_cast<const fts_node_t*>( - ib_vector_get_const(nodes, i)); - - fts_query_check_node(query, token, node); - } - } - - rw_lock_x_unlock(&cache->lock); - - return(query->error); -} - -/*****************************************************************//** -Set union. -@return DB_SUCCESS if all go well */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_union( -/*============*/ - fts_query_t* query, /*!< in: query instance */ - fts_string_t* token) /*!< in: token to search */ -{ - fts_fetch_t fetch; - ulint n_doc_ids = 0; - trx_t* trx = query->trx; - que_t* graph = NULL; - dberr_t error; - - ut_a(query->oper == FTS_NONE || query->oper == FTS_DECR_RATING || - query->oper == FTS_NEGATE || query->oper == FTS_INCR_RATING); - -#ifdef FTS_INTERNAL_DIAG_PRINT - fprintf(stderr, "UNION: Searching: '%.*s'\n", - (int) token->f_len, token->f_str); -#endif - - if (query->doc_ids) { - n_doc_ids = rbt_size(query->doc_ids); - } - - if (token->f_len == 0) { - return(query->error); - } - - /* Single '%' would confuse parser in pars_like_rebind(). In addition, - our wildcard search only supports prefix search */ - ut_ad(*token->f_str != '%'); - - fts_query_cache(query, token); - - /* Setup the callback args for filtering and - consolidating the ilist. */ - fetch.read_arg = query; - fetch.read_record = fts_query_index_fetch_nodes; - - /* Read the nodes from disk. */ - error = fts_index_fetch_nodes( - trx, &graph, &query->fts_index_table, token, &fetch); - - /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */ - ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS)); - if (error != DB_SUCCESS) { - query->error = error; - } - - fts_que_graph_free(graph); - - if (query->error == DB_SUCCESS) { - - /* The size can't decrease. */ - ut_a(rbt_size(query->doc_ids) >= n_doc_ids); - - /* Calulate the number of doc ids that were added to - the current doc id set. */ - if (query->doc_ids) { - n_doc_ids = rbt_size(query->doc_ids) - n_doc_ids; - } - } - - return(query->error); -} - -/*****************************************************************//** -Depending upon the current query operator process the doc id. -return DB_SUCCESS if all go well -or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */ -static -dberr_t -fts_query_process_doc_id( -/*=====================*/ - fts_query_t* query, /*!< in: query instance */ - doc_id_t doc_id, /*!< in: doc id to process */ - fts_rank_t rank) /*!< in: if non-zero, it is the - rank associated with the doc_id */ -{ - if (query->flags == FTS_OPT_RANKING) { - return(DB_SUCCESS); - } - - switch (query->oper) { - case FTS_NONE: - fts_query_union_doc_id(query, doc_id, rank); - break; - - case FTS_EXIST: - fts_query_intersect_doc_id(query, doc_id, rank); - break; - - case FTS_IGNORE: - fts_query_remove_doc_id(query, doc_id); - break; - - case FTS_NEGATE: - fts_query_change_ranking(query, doc_id, TRUE); - break; - - case FTS_DECR_RATING: - fts_query_union_doc_id(query, doc_id, rank); - fts_query_change_ranking(query, doc_id, TRUE); - break; - - case FTS_INCR_RATING: - fts_query_union_doc_id(query, doc_id, rank); - fts_query_change_ranking(query, doc_id, FALSE); - break; - - default: - ut_error; - } - - if (query->total_size > fts_result_cache_limit) { - return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT); - } else { - return(DB_SUCCESS); - } -} - -/*****************************************************************//** -Merge two result sets. */ -static -dberr_t -fts_merge_doc_ids( -/*==============*/ - fts_query_t* query, /*!< in,out: query instance */ - const ib_rbt_t* doc_ids) /*!< in: result set to merge */ -{ - const ib_rbt_node_t* node; - - DBUG_ENTER("fts_merge_doc_ids"); - - ut_a(!query->intersection); - - /* To process FTS_EXIST operation (intersection), we need - to create a new result set for fts_query_intersect(). */ - if (query->oper == FTS_EXIST) { - - query->intersection = rbt_create( - sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); - - query->total_size += SIZEOF_RBT_CREATE; - } - - /* Merge the elements to the result set. */ - for (node = rbt_first(doc_ids); node; node = rbt_next(doc_ids, node)) { - fts_ranking_t* ranking; - ulint pos = 0; - fts_string_t word; - - ranking = rbt_value(fts_ranking_t, node); - - query->error = fts_query_process_doc_id( - query, ranking->doc_id, ranking->rank); - - if (query->error != DB_SUCCESS) { - if (query->intersection != NULL) - { - ut_a(query->oper == FTS_EXIST); - fts_query_free_intersection(query); - } - DBUG_RETURN(query->error); - } - - /* Merge words. Don't need to take operator into account. */ - ut_a(ranking->words); - while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { - fts_query_add_word_to_document(query, ranking->doc_id, - &word); - } - } - - /* If it is an intersection operation, reset query->doc_ids - to query->intersection and free the old result list. */ - if (query->oper == FTS_EXIST && query->intersection != NULL) { - fts_query_free_doc_ids(query, query->doc_ids); - query->doc_ids = query->intersection; - query->intersection = NULL; - } - - DBUG_RETURN(DB_SUCCESS); -} - -/*****************************************************************//** -Skip non-whitespace in a string. Move ptr to the next word boundary. -@return pointer to first whitespace character or end */ -UNIV_INLINE -byte* -fts_query_skip_word( -/*================*/ - byte* ptr, /*!< in: start of scan */ - const byte* end) /*!< in: pointer to end of string */ -{ - /* TODO: Does this have to be UTF-8 too ? */ - while (ptr < end && !(ispunct(*ptr) || isspace(*ptr))) { - ++ptr; - } - - return(ptr); -} - -/*****************************************************************//** -Check whether the remaining terms in the phrase match the text. -@return TRUE if matched else FALSE */ -static -ibool -fts_query_match_phrase_terms( -/*=========================*/ - fts_phrase_t* phrase, /*!< in: phrase to match */ - byte** start, /*!< in/out: text to search, we can't - make this const becase we need to - first convert the string to - lowercase */ - const byte* end, /*!< in: pointer to the end of - the string to search */ - mem_heap_t* heap) /*!< in: heap */ -{ - ulint i; - byte* ptr = *start; - const ib_vector_t* tokens = phrase->tokens; - ulint distance = phrase->distance; - - /* We check only from the second term onwards, since the first - must have matched otherwise we wouldn't be here. */ - for (i = 1; ptr < end && i < ib_vector_size(tokens); /* No op */) { - fts_string_t match; - fts_string_t cmp_str; - const fts_string_t* token; - int result; - ulint ret; - ulint offset; - - ret = innobase_mysql_fts_get_token( - phrase->charset, ptr, (byte*) end, - &match, &offset); - - if (match.f_len > 0) { - /* Get next token to match. */ - token = static_cast<const fts_string_t*>( - ib_vector_get_const(tokens, i)); - - fts_utf8_string_dup(&cmp_str, &match, heap); - - result = innobase_fts_text_case_cmp( - phrase->charset, token, &cmp_str); - - /* Skip the rest of the tokens if this one doesn't - match and the proximity distance is exceeded. */ - if (result - && (distance == ULINT_UNDEFINED - || distance == 0)) { - - break; - } - - /* This token matched move to the next token. */ - if (result == 0) { - /* Advance the text to search by the length - of the last token. */ - ptr += ret; - - /* Advance to the next token. */ - ++i; - } else { - - ut_a(distance != ULINT_UNDEFINED); - - ptr = fts_query_skip_word(ptr, end); - } - - /* Distance can be 0 for exact matches. */ - if (distance != ULINT_UNDEFINED && distance > 0) { - --distance; - } - } else { - ptr += ret; - } - } - - *start = ptr; - - /* Can't be greater than the number of elements. */ - ut_a(i <= ib_vector_size(tokens)); - - /* This is the case for multiple words. */ - if (i == ib_vector_size(tokens)) { - phrase->found = TRUE; - } - - return(phrase->found); -} - -/*****************************************************************//** -Callback function to count the number of words in position ranges, -and see whether the word count is in specified "phrase->distance" -@return true if the number of characters is less than the "distance" */ -static -bool -fts_proximity_is_word_in_range( -/*===========================*/ - const fts_phrase_t* - phrase, /*!< in: phrase with the search info */ - byte* start, /*!< in: text to search */ - ulint total_len) /*!< in: length of text */ -{ - fts_proximity_t* proximity_pos = phrase->proximity_pos; - - ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size()); - ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size()); - - /* Search each matched position pair (with min and max positions) - and count the number of words in the range */ - for (ulint i = 0; i < proximity_pos->n_pos; i++) { - ulint cur_pos = proximity_pos->min_pos[i]; - ulint n_word = 0; - - ut_ad(proximity_pos->max_pos[i] <= total_len); - - /* Walk through words in the range and count them */ - while (cur_pos <= proximity_pos->max_pos[i]) { - ulint len; - fts_string_t str; - ulint offset = 0; - - len = innobase_mysql_fts_get_token( - phrase->charset, - start + cur_pos, - start + total_len, &str, &offset); - - if (len == 0) { - break; - } - - /* Advances position with "len" bytes */ - cur_pos += len; - - /* Record the number of words */ - if (str.f_n_char > 0) { - n_word++; - } - - if (n_word > phrase->distance) { - break; - } - } - - /* Check if the number of words is less than specified - "distance" */ - if (n_word && n_word <= phrase->distance) { - return(true); - } - } - - return(false); -} - -/*****************************************************************//** -Callback function to fetch and search the document. -@return TRUE if matched else FALSE */ -static -ibool -fts_query_match_phrase( -/*===================*/ - fts_phrase_t* phrase, /*!< in: phrase to match */ - byte* start, /*!< in: text to search, we can't make - this const becase we need to first - convert the string to lowercase */ - ulint cur_len, /*!< in: length of text */ - ulint prev_len, /*!< in: total length for searched - doc fields*/ - mem_heap_t* heap) /* heap */ -{ - ulint i; - const fts_string_t* first; - const byte* end = start + cur_len; - const ib_vector_t* tokens = phrase->tokens; - const ib_vector_t* positions = phrase->match->positions; - - ut_a(!phrase->found); - ut_a(phrase->match->doc_id > 0); - ut_a(ib_vector_size(tokens) > 0); - ut_a(ib_vector_size(positions) > 0); - - first = static_cast<const fts_string_t*>( - ib_vector_get_const(tokens, 0)); - - ut_a(phrase->match->start < ib_vector_size(positions)); - - for (i = phrase->match->start; i < ib_vector_size(positions); ++i) { - ulint pos; - fts_string_t match; - fts_string_t cmp_str; - byte* ptr = start; - ulint ret; - ulint offset; - - pos = *(ulint*) ib_vector_get_const(positions, i); - - if (pos == ULINT_UNDEFINED) { - break; - } - - if (pos < prev_len) { - continue; - } - - /* Document positions are calculated from the beginning - of the first field, need to save the length for each - searched field to adjust the doc position when search - phrases. */ - pos -= prev_len; - ptr = match.f_str = start + pos; - - /* Within limits ? */ - if (ptr >= end) { - break; - } - - ret = innobase_mysql_fts_get_token( - phrase->charset, start + pos, (byte*) end, - &match, &offset); - - if (match.f_len == 0) { - break; - } - - fts_utf8_string_dup(&cmp_str, &match, heap); - - if (innobase_fts_text_case_cmp( - phrase->charset, first, &cmp_str) == 0) { - - /* This is the case for the single word - in the phrase. */ - if (ib_vector_size(phrase->tokens) == 1) { - phrase->found = TRUE; - break; - } - - ptr += ret; - - /* Match the remaining terms in the phrase. */ - if (fts_query_match_phrase_terms(phrase, &ptr, - end, heap)) { - break; - } - } - } - - return(phrase->found); -} - -/*****************************************************************//** -Callback function to fetch and search the document. -@return whether the phrase is found */ -static -ibool -fts_query_fetch_document( -/*=====================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: fts_doc_t* */ -{ - - que_node_t* exp; - sel_node_t* node = static_cast<sel_node_t*>(row); - fts_phrase_t* phrase = static_cast<fts_phrase_t*>(user_arg); - ulint prev_len = 0; - ulint total_len = 0; - byte* document_text = NULL; - - exp = node->select_list; - - phrase->found = FALSE; - - /* For proximity search, we will need to get the whole document - from all fields, so first count the total length of the document - from all the fields */ - if (phrase->proximity_pos) { - while (exp) { - ulint field_len; - dfield_t* dfield = que_node_get_val(exp); - byte* data = static_cast<byte*>( - dfield_get_data(dfield)); - - if (dfield_is_ext(dfield)) { - ulint local_len = dfield_get_len(dfield); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - field_len = mach_read_from_4( - data + local_len + BTR_EXTERN_LEN + 4); - } else { - field_len = dfield_get_len(dfield); - } - - if (field_len != UNIV_SQL_NULL) { - total_len += field_len + 1; - } - - exp = que_node_get_next(exp); - } - - document_text = static_cast<byte*>(mem_heap_zalloc( - phrase->heap, total_len)); - - if (!document_text) { - return(FALSE); - } - } - - exp = node->select_list; - - while (exp) { - dfield_t* dfield = que_node_get_val(exp); - byte* data = static_cast<byte*>( - dfield_get_data(dfield)); - ulint cur_len; - - if (dfield_is_ext(dfield)) { - data = btr_copy_externally_stored_field( - &cur_len, data, phrase->zip_size, - dfield_get_len(dfield), phrase->heap, - NULL); - } else { - cur_len = dfield_get_len(dfield); - } - - if (cur_len != UNIV_SQL_NULL && cur_len != 0) { - if (phrase->proximity_pos) { - ut_ad(prev_len + cur_len <= total_len); - memcpy(document_text + prev_len, data, cur_len); - } else { - /* For phrase search */ - phrase->found = - fts_query_match_phrase( - phrase, - static_cast<byte*>(data), - cur_len, prev_len, - phrase->heap); - } - - /* Document positions are calculated from the beginning - of the first field, need to save the length for each - searched field to adjust the doc position when search - phrases. */ - prev_len += cur_len + 1; - } - - if (phrase->found) { - break; - } - - exp = que_node_get_next(exp); - } - - if (phrase->proximity_pos) { - ut_ad(prev_len <= total_len); - - phrase->found = fts_proximity_is_word_in_range( - phrase, document_text, total_len); - } - - return(phrase->found); -} - -#if 0 -/******************************************************************** -Callback function to check whether a record was found or not. */ -static -ibool -fts_query_select( -/*=============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: fts_doc_t* */ -{ - int i; - que_node_t* exp; - sel_node_t* node = row; - fts_select_t* select = user_arg; - - ut_a(select->word_freq); - ut_a(select->word_freq->doc_freqs); - - exp = node->select_list; - - for (i = 0; exp && !select->found; ++i) { - dfield_t* dfield = que_node_get_val(exp); - void* data = dfield_get_data(dfield); - ulint len = dfield_get_len(dfield); - - switch (i) { - case 0: /* DOC_COUNT */ - if (len != UNIV_SQL_NULL && len != 0) { - - select->word_freq->doc_count += - mach_read_from_4(data); - } - break; - - case 1: /* ILIST */ - if (len != UNIV_SQL_NULL && len != 0) { - - fts_query_find_doc_id(select, data, len); - } - break; - - default: - ut_error; - } - - exp = que_node_get_next(exp); - } - - return(FALSE); -} - -/******************************************************************** -Read the rows from the FTS index, that match word and where the -doc id is between first and last doc id. -@return DB_SUCCESS if all go well else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_find_term( -/*================*/ - fts_query_t* query, /*!< in: FTS query state */ - que_t** graph, /*!< in: prepared statement */ - const fts_string_t* word, /*!< in: the word to fetch */ - doc_id_t doc_id, /*!< in: doc id to match */ - ulint* min_pos,/*!< in/out: pos found must be - greater than this minimum value. */ - ibool* found) /*!< out: TRUE if found else FALSE */ -{ - pars_info_t* info; - dberr_t error; - fts_select_t select; - doc_id_t match_doc_id; - trx_t* trx = query->trx; - - trx->op_info = "fetching FTS index matching nodes"; - - if (*graph) { - info = (*graph)->info; - } else { - info = pars_info_create(); - } - - select.found = FALSE; - select.doc_id = doc_id; - select.min_pos = *min_pos; - select.word_freq = fts_query_add_word_freq(query, word->f_str); - - pars_info_bind_function(info, "my_func", fts_query_select, &select); - pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &match_doc_id, doc_id); - - fts_bind_doc_id(info, "min_doc_id", &match_doc_id); - - fts_bind_doc_id(info, "max_doc_id", &match_doc_id); - - if (!*graph) { - ulint selected; - - selected = fts_select_index(*word->f_str); - - query->fts_index_table.suffix = fts_get_suffix(selected); - - *graph = fts_parse_sql( - &query->fts_index_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT doc_count, ilist\n" - " FROM \"%s\"\n" - " WHERE word LIKE :word AND " - " first_doc_id <= :min_doc_id AND " - " last_doc_id >= :max_doc_id\n" - " ORDER BY first_doc_id;\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - } - - for(;;) { - error = fts_eval_sql(trx, *graph); - - if (error == DB_SUCCESS) { - - break; /* Exit the loop. */ - } else { - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: lock wait " - "timeout reading FTS index. " - "Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: %lu " - "while reading FTS index.\n", error); - - break; /* Exit the loop. */ - } - } - } - - /* Value to return */ - *found = select.found; - - if (*found) { - *min_pos = select.min_pos; - } - - return(error); -} - -/******************************************************************** -Callback aggregator for int columns. */ -static -ibool -fts_query_sum( -/*==========*/ - /*!< out: always returns TRUE */ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: ulint* */ -{ - - que_node_t* exp; - sel_node_t* node = row; - ulint* total = user_arg; - - exp = node->select_list; - - while (exp) { - dfield_t* dfield = que_node_get_val(exp); - void* data = dfield_get_data(dfield); - ulint len = dfield_get_len(dfield); - - if (len != UNIV_SQL_NULL && len != 0) { - *total += mach_read_from_4(data); - } - - exp = que_node_get_next(exp); - } - - return(TRUE); -} - -/******************************************************************** -Calculate the total documents that contain a particular word (term). -@return DB_SUCCESS if all go well else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_total_docs_containing_term( -/*=================================*/ - fts_query_t* query, /*!< in: FTS query state */ - const fts_string_t* word, /*!< in: the word to check */ - ulint* total) /*!< out: documents containing word */ -{ - pars_info_t* info; - dberr_t error; - que_t* graph; - ulint selected; - trx_t* trx = query->trx; - - trx->op_info = "fetching FTS index document count"; - - *total = 0; - - info = pars_info_create(); - - pars_info_bind_function(info, "my_func", fts_query_sum, total); - pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len); - - selected = fts_select_index(*word->f_str); - - query->fts_index_table.suffix = fts_get_suffix(selected); - - graph = fts_parse_sql( - &query->fts_index_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT doc_count\n" - " FROM %s\n" - " WHERE word = :word " - " ORDER BY first_doc_id;\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - for(;;) { - error = fts_eval_sql(trx, graph); - - if (error == DB_SUCCESS) { - - break; /* Exit the loop. */ - } else { - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: lock wait " - "timeout reading FTS index. " - "Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: %lu " - "while reading FTS index.\n", error); - - break; /* Exit the loop. */ - } - } - } - - fts_que_graph_free(graph); - - return(error); -} - -/******************************************************************** -Get the total number of words in a documents. -@return DB_SUCCESS if all go well else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_terms_in_document( -/*========================*/ - fts_query_t* query, /*!< in: FTS query state */ - doc_id_t doc_id, /*!< in: the word to check */ - ulint* total) /*!< out: total words in document */ -{ - pars_info_t* info; - dberr_t error; - que_t* graph; - doc_id_t read_doc_id; - trx_t* trx = query->trx; - - trx->op_info = "fetching FTS document term count"; - - *total = 0; - - info = pars_info_create(); - - pars_info_bind_function(info, "my_func", fts_query_sum, total); - - /* Convert to "storage" byte order. */ - fts_write_doc_id((byte*) &read_doc_id, doc_id); - fts_bind_doc_id(info, "doc_id", &read_doc_id); - - query->fts_index_table.suffix = "DOC_ID"; - - graph = fts_parse_sql( - &query->fts_index_table, - info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT count\n" - " FROM \"%s\"\n" - " WHERE doc_id = :doc_id " - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - for(;;) { - error = fts_eval_sql(trx, graph); - - if (error == DB_SUCCESS) { - - break; /* Exit the loop. */ - } else { - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: lock wait " - "timeout reading FTS doc id table. " - "Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: %lu " - "while reading FTS doc id table.\n", - error); - - break; /* Exit the loop. */ - } - } - } - - fts_que_graph_free(graph); - - return(error); -} -#endif - -/*****************************************************************//** -Retrieve the document and match the phrase tokens. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_match_document( -/*=====================*/ - ib_vector_t* tokens, /*!< in: phrase tokens */ - fts_get_doc_t* get_doc, /*!< in: table and prepared statements */ - fts_match_t* match, /*!< in: doc id and positions */ - ulint distance, /*!< in: proximity distance */ - ibool* found) /*!< out: TRUE if phrase found */ -{ - dberr_t error; - fts_phrase_t phrase; - - memset(&phrase, 0x0, sizeof(phrase)); - - phrase.match = match; /* Positions to match */ - phrase.tokens = tokens; /* Tokens to match */ - phrase.distance = distance; - phrase.charset = get_doc->index_cache->charset; - phrase.zip_size = dict_table_zip_size( - get_doc->index_cache->index->table); - phrase.heap = mem_heap_create(512); - - *found = phrase.found = FALSE; - - error = fts_doc_fetch_by_doc_id( - get_doc, match->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL, - fts_query_fetch_document, &phrase); - - if (error != DB_SUCCESS) { - ut_print_timestamp(stderr); - fprintf(stderr, "InnoDB: Error: (%s) matching document.\n", - ut_strerr(error)); - } else { - *found = phrase.found; - } - - mem_heap_free(phrase.heap); - - return(error); -} - -/*****************************************************************//** -This function fetches the original documents and count the -words in between matching words to see that is in specified distance -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -fts_query_is_in_proximity_range( -/*============================*/ - const fts_query_t* query, /*!< in: query instance */ - fts_match_t** match, /*!< in: query instance */ - fts_proximity_t* qualified_pos) /*!< in: position info for - qualified ranges */ -{ - fts_get_doc_t get_doc; - fts_cache_t* cache = query->index->table->fts->cache; - dberr_t err; - fts_phrase_t phrase; - - memset(&get_doc, 0x0, sizeof(get_doc)); - memset(&phrase, 0x0, sizeof(phrase)); - - rw_lock_x_lock(&cache->lock); - get_doc.index_cache = fts_find_index_cache(cache, query->index); - rw_lock_x_unlock(&cache->lock); - ut_a(get_doc.index_cache != NULL); - - phrase.distance = query->distance; - phrase.charset = get_doc.index_cache->charset; - phrase.zip_size = dict_table_zip_size( - get_doc.index_cache->index->table); - phrase.heap = mem_heap_create(512); - phrase.proximity_pos = qualified_pos; - phrase.found = FALSE; - - err = fts_doc_fetch_by_doc_id( - &get_doc, match[0]->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL, - fts_query_fetch_document, &phrase); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Error: (%s) in verification phase of proximity " - "search", ut_strerr(err)); - } - - /* Free the prepared statement. */ - if (get_doc.get_document_graph) { - fts_que_graph_free(get_doc.get_document_graph); - get_doc.get_document_graph = NULL; - } - - mem_heap_free(phrase.heap); - - return(err == DB_SUCCESS && phrase.found); -} - -/*****************************************************************//** -Iterate over the matched document ids and search the for the -actual phrase in the text. -@return DB_SUCCESS if all OK */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_search_phrase( -/*====================*/ - fts_query_t* query, /*!< in: query instance */ - ib_vector_t* orig_tokens, /*!< in: tokens to search, - with any stopwords in the - original phrase */ - ib_vector_t* tokens) /*!< in: tokens that does - not include stopwords and - can be used to calculate - ranking */ -{ - ulint i; - fts_get_doc_t get_doc; - ulint n_matched; - fts_cache_t* cache = query->index->table->fts->cache; - - n_matched = ib_vector_size(query->matched); - - /* Setup the doc retrieval infrastructure. */ - memset(&get_doc, 0x0, sizeof(get_doc)); - - rw_lock_x_lock(&cache->lock); - - get_doc.index_cache = fts_find_index_cache(cache, query->index); - - /* Must find the index cache */ - ut_a(get_doc.index_cache != NULL); - - rw_lock_x_unlock(&cache->lock); - -#ifdef FTS_INTERNAL_DIAG_PRINT - ut_print_timestamp(stderr); - fprintf(stderr, " Start phrase search\n"); -#endif - - /* Read the document from disk and do the actual - match, matching documents will be added to the current - doc id set. */ - for (i = 0; i < n_matched && query->error == DB_SUCCESS; ++i) { - fts_match_t* match; - ibool found = FALSE; - - match = static_cast<fts_match_t*>( - ib_vector_get(query->matched, i)); - - /* Skip the document ids that were filtered out by - an earlier pass. */ - if (match->doc_id != 0) { - - query->error = fts_query_match_document( - orig_tokens, &get_doc, - match, query->distance, &found); - - if (query->error == DB_SUCCESS && found) { - ulint z; - - query->error = fts_query_process_doc_id(query, - match->doc_id, 0); - if (query->error != DB_SUCCESS) { - goto func_exit; - } - - for (z = 0; z < ib_vector_size(tokens); z++) { - fts_string_t* token; - token = static_cast<fts_string_t*>( - ib_vector_get(tokens, z)); - fts_query_add_word_to_document( - query, match->doc_id, token); - } - } - } - } - -func_exit: - /* Free the prepared statement. */ - if (get_doc.get_document_graph) { - fts_que_graph_free(get_doc.get_document_graph); - get_doc.get_document_graph = NULL; - } - - return(query->error); -} - -/*****************************************************************//** -Text/Phrase search. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_phrase_search( -/*====================*/ - fts_query_t* query, /*!< in: query instance */ - const fts_string_t* phrase) /*!< in: token to search */ -{ - ib_vector_t* tokens; - ib_vector_t* orig_tokens; - mem_heap_t* heap = mem_heap_create(sizeof(fts_string_t)); - ulint len = phrase->f_len; - ulint cur_pos = 0; - ib_alloc_t* heap_alloc; - ulint num_token; - CHARSET_INFO* charset; - - charset = query->fts_index_table.charset; - - heap_alloc = ib_heap_allocator_create(heap); - - tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4); - orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4); - - if (query->distance != ULINT_UNDEFINED && query->distance > 0) { - query->flags = FTS_PROXIMITY; - } else { - query->flags = FTS_PHRASE; - } - - /* Split the phrase into tokens. */ - while (cur_pos < len) { - fts_cache_t* cache = query->index->table->fts->cache; - ib_rbt_bound_t parent; - ulint offset; - ulint cur_len; - fts_string_t result_str; - - cur_len = innobase_mysql_fts_get_token( - charset, - reinterpret_cast<const byte*>(phrase->f_str) + cur_pos, - reinterpret_cast<const byte*>(phrase->f_str) + len, - &result_str, &offset); - - if (cur_len == 0) { - break; - } - - cur_pos += cur_len; - - if (result_str.f_n_char == 0) { - continue; - } - - fts_string_t* token = static_cast<fts_string_t*>( - ib_vector_push(tokens, NULL)); - - token->f_str = static_cast<byte*>( - mem_heap_alloc(heap, result_str.f_len + 1)); - ut_memcpy(token->f_str, result_str.f_str, result_str.f_len); - - token->f_len = result_str.f_len; - token->f_str[token->f_len] = 0; - - if (cache->stopword_info.cached_stopword - && rbt_search(cache->stopword_info.cached_stopword, - &parent, token) != 0 - && result_str.f_n_char >= fts_min_token_size - && result_str.f_n_char <= fts_max_token_size) { - /* Add the word to the RB tree so that we can - calculate it's frequencey within a document. */ - fts_query_add_word_freq(query, token); - } else { - ib_vector_pop(tokens); - } - - /* we will start to store all words including stopwords - in the "orig_tokens" vector, but skip any leading words - that are stopwords */ - if (!ib_vector_is_empty(tokens)) { - fts_string_t* orig_token = static_cast<fts_string_t*>( - ib_vector_push(orig_tokens, NULL)); - - orig_token->f_str = token->f_str; - orig_token->f_len = token->f_len; - } - } - - num_token = ib_vector_size(tokens); - if (num_token > MAX_PROXIMITY_ITEM) { - query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE; - goto func_exit; - } - - ut_ad(ib_vector_size(orig_tokens) >= num_token); - - /* Ignore empty strings. */ - if (num_token > 0) { - fts_string_t* token; - fts_fetch_t fetch; - trx_t* trx = query->trx; - fts_ast_oper_t oper = query->oper; - que_t* graph = NULL; - ulint i; - dberr_t error; - - /* Create the vector for storing matching document ids - and the positions of the first token of the phrase. */ - if (!query->matched) { - ib_alloc_t* heap_alloc; - - heap_alloc = ib_heap_allocator_create(heap); - - if (!(query->flags & FTS_PROXIMITY) - && !(query->flags & FTS_PHRASE)) { - query->matched = ib_vector_create( - heap_alloc, sizeof(fts_match_t), - 64); - } else { - ut_a(num_token <= MAX_PROXIMITY_ITEM); - query->match_array = - (ib_vector_t**) mem_heap_alloc( - heap, - num_token * - sizeof(query->matched)); - - for (i = 0; i < num_token; i++) { - query->match_array[i] = - ib_vector_create( - heap_alloc, sizeof(fts_match_t), - 64); - } - - query->matched = query->match_array[0]; - } - } - - /* Setup the callback args for filtering and consolidating - the ilist. */ - fetch.read_arg = query; - fetch.read_record = fts_query_index_fetch_nodes; - - for (i = 0; i < num_token; i++) { - /* Search for the first word from the phrase. */ - token = static_cast<fts_string_t*>( - ib_vector_get(tokens, i)); - - if (query->flags & FTS_PROXIMITY - || query->flags & FTS_PHRASE) { - query->matched = query->match_array[i]; - } - - error = fts_index_fetch_nodes( - trx, &graph, &query->fts_index_table, - token, &fetch); - - /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */ - ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS)); - if (error != DB_SUCCESS) { - query->error = error; - } - - fts_que_graph_free(graph); - graph = NULL; - - fts_query_cache(query, token); - - if (!(query->flags & FTS_PHRASE) - && !(query->flags & FTS_PROXIMITY)) { - break; - } - - /* If any of the token can't be found, - no need to continue match */ - if (ib_vector_is_empty(query->match_array[i]) - || query->error != DB_SUCCESS) { - goto func_exit; - } - } - - /* Just a single word, no need to fetch the original - documents to do phrase matching */ - if (ib_vector_size(orig_tokens) == 1 - && !ib_vector_is_empty(query->match_array[0])) { - fts_match_t* match; - ulint n_matched; - - n_matched = ib_vector_size(query->match_array[0]); - - for (i = 0; i < n_matched; i++) { - match = static_cast<fts_match_t*>( - ib_vector_get( - query->match_array[0], i)); - - query->error = fts_query_process_doc_id( - query, match->doc_id, 0); - if (query->error != DB_SUCCESS) { - goto func_exit; - } - - fts_query_add_word_to_document( - query, match->doc_id, token); - } - query->oper = oper; - goto func_exit; - } - - /* If we are doing proximity search, verify the distance - between all words, and check they are in specified distance. */ - if (query->flags & FTS_PROXIMITY) { - fts_phrase_or_proximity_search(query, tokens); - } else { - ibool matched; - - /* Phrase Search case: - We filter out the doc ids that don't contain - all the tokens in the phrase. It's cheaper to - search the ilist than bringing the documents in - and then doing a search through the text. Isolated - testing shows this also helps in mitigating disruption - of the buffer cache. */ - matched = fts_phrase_or_proximity_search(query, tokens); - query->matched = query->match_array[0]; - - /* Read the actual text in and search for the phrase. */ - if (matched) { - ut_ad(query->error == DB_SUCCESS); - query->error = fts_query_search_phrase( - query, orig_tokens, tokens); - } - } - - /* Restore original operation. */ - query->oper = oper; - - if (query->error != DB_SUCCESS) { - goto func_exit; - } - } - -func_exit: - mem_heap_free(heap); - - /* Don't need it anymore. */ - query->matched = NULL; - - return(query->error); -} - -/*****************************************************************//** -Find the word and evaluate. -@return DB_SUCCESS if all go well */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_query_execute( -/*==============*/ - fts_query_t* query, /*!< in: query instance */ - fts_string_t* token) /*!< in: token to search */ -{ - switch (query->oper) { - case FTS_NONE: - case FTS_NEGATE: - case FTS_INCR_RATING: - case FTS_DECR_RATING: - query->error = fts_query_union(query, token); - break; - - case FTS_EXIST: - query->error = fts_query_intersect(query, token); - break; - - case FTS_IGNORE: - query->error = fts_query_difference(query, token); - break; - - default: - ut_error; - } - - return(query->error); -} - -/*****************************************************************//** -Create a wildcard string. It's the responsibility of the caller to -free the byte* pointer. It's allocated using ut_malloc(). -@return ptr to allocated memory */ -static -byte* -fts_query_get_token( -/*================*/ - fts_ast_node_t* node, /*!< in: the current sub tree */ - fts_string_t* token) /*!< in: token to create */ -{ - ulint str_len; - byte* new_ptr = NULL; - - str_len = node->term.ptr->len; - - ut_a(node->type == FTS_AST_TERM); - - token->f_len = str_len; - token->f_str = node->term.ptr->str; - - if (node->term.wildcard) { - - token->f_str = static_cast<byte*>(ut_malloc(str_len + 2)); - token->f_len = str_len + 1; - - memcpy(token->f_str, node->term.ptr->str, str_len); - - token->f_str[str_len] = '%'; - token->f_str[token->f_len] = 0; - - new_ptr = token->f_str; - } - - return(new_ptr); -} - -/*****************************************************************//** -Visit every node of the AST. */ -static -dberr_t -fts_query_visitor( -/*==============*/ - fts_ast_oper_t oper, /*!< in: current operator */ - fts_ast_node_t* node, /*!< in: The root of the current subtree*/ - void* arg) /*!< in: callback arg*/ -{ - byte* ptr; - fts_string_t token; - fts_query_t* query = static_cast<fts_query_t*>(arg); - - ut_a(node); - DBUG_ENTER("fts_query_visitor"); - DBUG_PRINT("fts", ("nodetype: %s", fts_ast_node_type_get(node->type))); - - token.f_n_char = 0; - query->oper = oper; - query->cur_node = node; - - switch (node->type) { - case FTS_AST_TEXT: - token.f_str = node->text.ptr->str; - token.f_len = node->text.ptr->len; - - if (query->oper == FTS_EXIST) { - ut_ad(query->intersection == NULL); - query->intersection = rbt_create( - sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); - - query->total_size += SIZEOF_RBT_CREATE; - } - - /* Set the current proximity distance. */ - query->distance = node->text.distance; - - /* Force collection of doc ids and the positions. */ - query->collect_positions = TRUE; - - query->error = fts_query_phrase_search(query, &token); - - query->collect_positions = FALSE; - - if (query->oper == FTS_EXIST) { - fts_query_free_doc_ids(query, query->doc_ids); - query->doc_ids = query->intersection; - query->intersection = NULL; - } - - break; - - case FTS_AST_TERM: - token.f_str = node->term.ptr->str; - token.f_len = node->term.ptr->len; - - /* Add the word to our RB tree that will be used to - calculate this terms per document frequency. */ - fts_query_add_word_freq(query, &token); - - ptr = fts_query_get_token(node, &token); - query->error = fts_query_execute(query, &token); - - if (ptr) { - ut_free(ptr); - } - break; - - case FTS_AST_SUBEXP_LIST: - query->error = fts_ast_visit_sub_exp(node, fts_query_visitor, arg); - break; - - default: - ut_error; - } - - if (query->oper == FTS_EXIST) { - query->multi_exist = true; - } - - DBUG_RETURN(query->error); -} - -/*****************************************************************//** -Process (nested) sub-expression, create a new result set to store the -sub-expression result by processing nodes under current sub-expression -list. Merge the sub-expression result with that of parent expression list. -@return DB_SUCCESS if all well */ -UNIV_INTERN -dberr_t -fts_ast_visit_sub_exp( -/*==================*/ - fts_ast_node_t* node, /*!< in,out: current root node */ - fts_ast_callback visitor, /*!< in: callback function */ - void* arg) /*!< in,out: arg for callback */ -{ - fts_ast_oper_t cur_oper; - fts_query_t* query = static_cast<fts_query_t*>(arg); - ib_rbt_t* parent_doc_ids; - ib_rbt_t* subexpr_doc_ids; - dberr_t error = DB_SUCCESS; - bool will_be_ignored = false; - bool multi_exist; - - DBUG_ENTER("fts_ast_visit_sub_exp"); - - ut_a(node->type == FTS_AST_SUBEXP_LIST); - - cur_oper = query->oper; - - /* Save current result set */ - parent_doc_ids = query->doc_ids; - - /* Create new result set to store the sub-expression result. We - will merge this result set with the parent after processing. */ - query->doc_ids = rbt_create(sizeof(fts_ranking_t), - fts_ranking_doc_id_cmp); - - query->total_size += SIZEOF_RBT_CREATE; - - multi_exist = query->multi_exist; - query->multi_exist = false; - /* Process nodes in current sub-expression and store its - result set in query->doc_ids we created above. */ - error = fts_ast_visit(FTS_NONE, node, visitor, - arg, &will_be_ignored); - - /* Reinstate parent node state */ - query->multi_exist = multi_exist; - query->oper = cur_oper; - - /* Merge the sub-expression result with the parent result set. */ - subexpr_doc_ids = query->doc_ids; - query->doc_ids = parent_doc_ids; - if (error == DB_SUCCESS) { - error = fts_merge_doc_ids(query, subexpr_doc_ids); - } - - /* Free current result set. Result already merged into parent. */ - fts_query_free_doc_ids(query, subexpr_doc_ids); - - DBUG_RETURN(error); -} - -#if 0 -/*****************************************************************//*** -Check if the doc id exists in the ilist. -@return TRUE if doc id found */ -static -ulint -fts_query_find_doc_id( -/*==================*/ - fts_select_t* select, /*!< in/out: contains the doc id to - find, we update the word freq if - document found */ - void* data, /*!< in: doc id ilist */ - ulint len) /*!< in: doc id ilist size */ -{ - byte* ptr = data; - doc_id_t doc_id = 0; - ulint decoded = 0; - - /* Decode the ilist and search for selected doc_id. We also - calculate the frequency of the word in the document if found. */ - while (decoded < len && !select->found) { - ulint freq = 0; - ulint min_pos = 0; - ulint last_pos = 0; - ulint pos = fts_decode_vlc(&ptr); - - /* Add the delta. */ - doc_id += pos; - - while (*ptr) { - ++freq; - last_pos += fts_decode_vlc(&ptr); - - /* Only if min_pos is not set and the current - term exists in a position greater than the - min_pos of the previous term. */ - if (min_pos == 0 && last_pos > select->min_pos) { - min_pos = last_pos; - } - } - - /* Skip the end of word position marker. */ - ++ptr; - - /* Bytes decoded so far. */ - decoded = ptr - (byte*) data; - - /* A word may exist in the document but we only consider a - match if it exists in a position that is greater than the - position of the previous term. */ - if (doc_id == select->doc_id && min_pos > 0) { - fts_doc_freq_t* doc_freq; - - /* Add the doc id to the doc freq rb tree, if - the doc id doesn't exist it will be created. */ - doc_freq = fts_query_add_doc_freq( - select->word_freq->doc_freqs, doc_id); - - /* Avoid duplicating the frequency tally */ - if (doc_freq->freq == 0) { - doc_freq->freq = freq; - } - - select->found = TRUE; - select->min_pos = min_pos; - } - } - - return(select->found); -} -#endif - -/*****************************************************************//** -Read and filter nodes. -@return DB_SUCCESS if all go well, -or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */ -static -dberr_t -fts_query_filter_doc_ids( -/*=====================*/ - fts_query_t* query, /*!< in: query instance */ - const fts_string_t* word, /*!< in: the current word */ - fts_word_freq_t* word_freq, /*!< in/out: word frequency */ - const fts_node_t* node, /*!< in: current FTS node */ - void* data, /*!< in: doc id ilist */ - ulint len, /*!< in: doc id ilist size */ - ibool calc_doc_count) /*!< in: whether to remember doc count */ -{ - byte* ptr = static_cast<byte*>(data); - doc_id_t doc_id = 0; - ulint decoded = 0; - ib_rbt_t* doc_freqs = word_freq->doc_freqs; - - /* Decode the ilist and add the doc ids to the query doc_id set. */ - while (decoded < len) { - ulint freq = 0; - fts_doc_freq_t* doc_freq; - fts_match_t* match = NULL; - ulint last_pos = 0; - ulint pos = fts_decode_vlc(&ptr); - - /* Some sanity checks. */ - if (doc_id == 0) { - ut_a(pos == node->first_doc_id); - } - - /* Add the delta. */ - doc_id += pos; - - if (calc_doc_count) { - word_freq->doc_count++; - } - - /* We simply collect the matching instances here. */ - if (query->collect_positions) { - ib_alloc_t* heap_alloc; - - /* Create a new fts_match_t instance. */ - match = static_cast<fts_match_t*>( - ib_vector_push(query->matched, NULL)); - - match->start = 0; - match->doc_id = doc_id; - heap_alloc = ib_vector_allocator(query->matched); - - /* Allocate from the same heap as the - parent container. */ - match->positions = ib_vector_create( - heap_alloc, sizeof(ulint), 64); - - query->total_size += sizeof(fts_match_t) - + sizeof(ib_vector_t) - + sizeof(ulint) * 64; - } - - /* Unpack the positions within the document. */ - while (*ptr) { - last_pos += fts_decode_vlc(&ptr); - - /* Collect the matching word positions, for phrase - matching later. */ - if (query->collect_positions) { - ib_vector_push(match->positions, &last_pos); - } - - ++freq; - } - - /* End of list marker. */ - last_pos = (ulint) -1; - - if (query->collect_positions) { - ut_a(match != NULL); - ib_vector_push(match->positions, &last_pos); - } - - /* Add the doc id to the doc freq rb tree, if the doc id - doesn't exist it will be created. */ - doc_freq = fts_query_add_doc_freq(query, doc_freqs, doc_id); - - /* Avoid duplicating frequency tally. */ - if (doc_freq->freq == 0) { - doc_freq->freq = freq; - } - - /* Skip the end of word position marker. */ - ++ptr; - - /* Bytes decoded so far */ - decoded = ptr - (byte*) data; - - /* We simply collect the matching documents and the - positions here and match later. */ - if (!query->collect_positions) { - /* We ignore error here and will check it later */ - fts_query_process_doc_id(query, doc_id, 0); - - /* Add the word to the document's matched RB tree. */ - fts_query_add_word_to_document(query, doc_id, word); - } - } - - /* Some sanity checks. */ - ut_a(doc_id == node->last_doc_id); - - if (query->total_size > fts_result_cache_limit) { - return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT); - } else { - return(DB_SUCCESS); - } -} - -/*****************************************************************//** -Read the FTS INDEX row. -@return DB_SUCCESS if all go well. */ -static -dberr_t -fts_query_read_node( -/*================*/ - fts_query_t* query, /*!< in: query instance */ - const fts_string_t* word, /*!< in: current word */ - que_node_t* exp) /*!< in: query graph node */ -{ - int i; - int ret; - fts_node_t node; - ib_rbt_bound_t parent; - fts_word_freq_t* word_freq; - ibool skip = FALSE; - fts_string_t term; - byte buf[FTS_MAX_WORD_LEN + 1]; - dberr_t error = DB_SUCCESS; - - ut_a(query->cur_node->type == FTS_AST_TERM || - query->cur_node->type == FTS_AST_TEXT); - - memset(&node, 0, sizeof(node)); - term.f_str = buf; - - /* Need to consider the wildcard search case, the word frequency - is created on the search string not the actual word. So we need - to assign the frequency on search string behalf. */ - if (query->cur_node->type == FTS_AST_TERM - && query->cur_node->term.wildcard) { - term.f_len = query->cur_node->term.ptr->len; - ut_ad(FTS_MAX_WORD_LEN >= term.f_len); - memcpy(term.f_str, query->cur_node->term.ptr->str, term.f_len); - } else { - term.f_len = word->f_len; - ut_ad(FTS_MAX_WORD_LEN >= word->f_len); - memcpy(term.f_str, word->f_str, word->f_len); - } - - /* Lookup the word in our rb tree, it must exist. */ - ret = rbt_search(query->word_freqs, &parent, &term); - - ut_a(ret == 0); - - word_freq = rbt_value(fts_word_freq_t, parent.last); - - /* Start from 1 since the first column has been read by the caller. - Also, we rely on the order of the columns projected, to filter - out ilists that are out of range and we always want to read - the doc_count irrespective of the suitablility of the row. */ - - for (i = 1; exp && !skip; exp = que_node_get_next(exp), ++i) { - - dfield_t* dfield = que_node_get_val(exp); - byte* data = static_cast<byte*>( - dfield_get_data(dfield)); - ulint len = dfield_get_len(dfield); - - ut_a(len != UNIV_SQL_NULL); - - /* Note: The column numbers below must match the SELECT. */ - - switch (i) { - case 1: /* DOC_COUNT */ - word_freq->doc_count += mach_read_from_4(data); - break; - - case 2: /* FIRST_DOC_ID */ - node.first_doc_id = fts_read_doc_id(data); - - /* Skip nodes whose doc ids are out range. */ - if (query->oper == FTS_EXIST - && query->upper_doc_id > 0 - && node.first_doc_id > query->upper_doc_id) { - skip = TRUE; - } - break; - - case 3: /* LAST_DOC_ID */ - node.last_doc_id = fts_read_doc_id(data); - - /* Skip nodes whose doc ids are out range. */ - if (query->oper == FTS_EXIST - && query->lower_doc_id > 0 - && node.last_doc_id < query->lower_doc_id) { - skip = TRUE; - } - break; - - case 4: /* ILIST */ - - error = fts_query_filter_doc_ids( - query, &word_freq->word, word_freq, - &node, data, len, FALSE); - - break; - - default: - ut_error; - } - } - - if (!skip) { - /* Make sure all columns were read. */ - - ut_a(i == 5); - } - - return error; -} - -/*****************************************************************//** -Callback function to fetch the rows in an FTS INDEX record. -@return always returns TRUE */ -static -ibool -fts_query_index_fetch_nodes( -/*========================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: pointer to fts_fetch_t */ -{ - fts_string_t key; - sel_node_t* sel_node = static_cast<sel_node_t*>(row); - fts_fetch_t* fetch = static_cast<fts_fetch_t*>(user_arg); - fts_query_t* query = static_cast<fts_query_t*>(fetch->read_arg); - que_node_t* exp = sel_node->select_list; - dfield_t* dfield = que_node_get_val(exp); - void* data = dfield_get_data(dfield); - ulint dfield_len = dfield_get_len(dfield); - - key.f_str = static_cast<byte*>(data); - key.f_len = dfield_len; - - ut_a(dfield_len <= FTS_MAX_WORD_LEN); - - /* Note: we pass error out by 'query->error' */ - query->error = fts_query_read_node(query, &key, que_node_get_next(exp)); - - if (query->error != DB_SUCCESS) { - ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); - return(FALSE); - } else { - return(TRUE); - } -} - -/*****************************************************************//** -Calculate the inverse document frequency (IDF) for all the terms. */ -static -void -fts_query_calculate_idf( -/*====================*/ - fts_query_t* query) /*!< in: Query state */ -{ - const ib_rbt_node_t* node; - ib_uint64_t total_docs = query->total_docs; - - /* We need to free any instances of fts_doc_freq_t that we - may have allocated. */ - for (node = rbt_first(query->word_freqs); - node; - node = rbt_next(query->word_freqs, node)) { - - fts_word_freq_t* word_freq; - - word_freq = rbt_value(fts_word_freq_t, node); - - if (word_freq->doc_count > 0) { - if (total_docs == word_freq->doc_count) { - /* QP assume ranking > 0 if we find - a match. Since Log10(1) = 0, we cannot - make IDF a zero value if do find a - word in all documents. So let's make - it an arbitrary very small number */ - word_freq->idf = log10(1.0001); - } else { - word_freq->idf = log10( - total_docs - / (double) word_freq->doc_count); - } - } - - if (fts_enable_diag_print) { - fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF - " %6.5lf\n", - word_freq->word.f_str, - query->total_docs, word_freq->doc_count, - word_freq->idf); - } - } -} - -/*****************************************************************//** -Calculate the ranking of the document. */ -static -void -fts_query_calculate_ranking( -/*========================*/ - const fts_query_t* query, /*!< in: query state */ - fts_ranking_t* ranking) /*!< in: Document to rank */ -{ - ulint pos = 0; - fts_string_t word; - - /* At this stage, ranking->rank should not exceed the 1.0 - bound */ - ut_ad(ranking->rank <= 1.0 && ranking->rank >= -1.0); - ut_ad(rbt_size(query->word_map) == query->word_vector->size()); - - while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { - int ret; - ib_rbt_bound_t parent; - double weight; - fts_doc_freq_t* doc_freq; - fts_word_freq_t* word_freq; - - ret = rbt_search(query->word_freqs, &parent, &word); - - /* It must exist. */ - ut_a(ret == 0); - - word_freq = rbt_value(fts_word_freq_t, parent.last); - - ret = rbt_search( - word_freq->doc_freqs, &parent, &ranking->doc_id); - - /* It must exist. */ - ut_a(ret == 0); - - doc_freq = rbt_value(fts_doc_freq_t, parent.last); - - weight = (double) doc_freq->freq * word_freq->idf; - - ranking->rank += (fts_rank_t) (weight * word_freq->idf); - } -} - -/*****************************************************************//** -Add ranking to the result set. */ -static -void -fts_query_add_ranking( -/*==================*/ - fts_query_t* query, /*!< in: query state */ - ib_rbt_t* ranking_tree, /*!< in: ranking tree */ - const fts_ranking_t* new_ranking) /*!< in: ranking of a document */ -{ - ib_rbt_bound_t parent; - - /* Lookup the ranking in our rb tree and add if it doesn't exist. */ - if (rbt_search(ranking_tree, &parent, new_ranking) == 0) { - fts_ranking_t* ranking; - - ranking = rbt_value(fts_ranking_t, parent.last); - - ranking->rank += new_ranking->rank; - - ut_a(ranking->words == NULL); - } else { - rbt_add_node(ranking_tree, &parent, new_ranking); - - query->total_size += SIZEOF_RBT_NODE_ADD - + sizeof(fts_ranking_t); - } -} - -/*****************************************************************//** -Retrieve the FTS Relevance Ranking result for doc with doc_id -@return the relevance ranking value, 0 if no ranking value -present. */ -float -fts_retrieve_ranking( -/*=================*/ - fts_result_t* result, /*!< in: FTS result structure */ - doc_id_t doc_id) /*!< in: doc_id of the item to retrieve */ -{ - ib_rbt_bound_t parent; - fts_ranking_t new_ranking; - - DBUG_ENTER("fts_retrieve_ranking"); - - if (!result || !result->rankings_by_id) { - DBUG_RETURN(0); - } - - new_ranking.doc_id = doc_id; - - /* Lookup the ranking in our rb tree */ - if (rbt_search(result->rankings_by_id, &parent, &new_ranking) == 0) { - fts_ranking_t* ranking; - - ranking = rbt_value(fts_ranking_t, parent.last); - - DBUG_RETURN(ranking->rank); - } - - DBUG_RETURN(0); -} - -/*****************************************************************//** -Create the result and copy the data to it. */ -static -fts_result_t* -fts_query_prepare_result( -/*=====================*/ - fts_query_t* query, /*!< in: Query state */ - fts_result_t* result) /*!< in: result this can contain - data from a previous search on - another FTS index */ -{ - const ib_rbt_node_t* node; - bool result_is_null = false; - - DBUG_ENTER("fts_query_prepare_result"); - - if (result == NULL) { - result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result))); - - memset(result, 0x0, sizeof(*result)); - - result->rankings_by_id = rbt_create( - sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); - - query->total_size += sizeof(fts_result_t) + SIZEOF_RBT_CREATE; - result_is_null = true; - } - - if (query->flags == FTS_OPT_RANKING) { - fts_word_freq_t* word_freq; - ulint size = ib_vector_size(query->deleted->doc_ids); - fts_update_t* array = - (fts_update_t*) query->deleted->doc_ids->data; - - node = rbt_first(query->word_freqs); - ut_ad(node); - word_freq = rbt_value(fts_word_freq_t, node); - - for (node = rbt_first(word_freq->doc_freqs); - node; - node = rbt_next(word_freq->doc_freqs, node)) { - fts_doc_freq_t* doc_freq; - fts_ranking_t ranking; - - doc_freq = rbt_value(fts_doc_freq_t, node); - - /* Don't put deleted docs into result */ - if (fts_bsearch(array, 0, static_cast<int>(size), - doc_freq->doc_id) >= 0) { - /* one less matching doc count */ - --word_freq->doc_count; - continue; - } - - ranking.doc_id = doc_freq->doc_id; - ranking.rank = static_cast<fts_rank_t>(doc_freq->freq); - ranking.words = NULL; - - fts_query_add_ranking(query, result->rankings_by_id, - &ranking); - - if (query->total_size > fts_result_cache_limit) { - query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT; - fts_query_free_result(result); - DBUG_RETURN(NULL); - } - } - - /* Calculate IDF only after we exclude the deleted items */ - fts_query_calculate_idf(query); - - node = rbt_first(query->word_freqs); - word_freq = rbt_value(fts_word_freq_t, node); - - /* Calculate the ranking for each doc */ - for (node = rbt_first(result->rankings_by_id); - node != NULL; - node = rbt_next(result->rankings_by_id, node)) { - - fts_ranking_t* ranking; - - ranking = rbt_value(fts_ranking_t, node); - - ranking->rank = static_cast<fts_rank_t>( - ranking->rank * word_freq->idf * word_freq->idf); - } - - DBUG_RETURN(result); - } - - ut_a(rbt_size(query->doc_ids) > 0); - - for (node = rbt_first(query->doc_ids); - node; - node = rbt_next(query->doc_ids, node)) { - - fts_ranking_t* ranking; - - ranking = rbt_value(fts_ranking_t, node); - fts_query_calculate_ranking(query, ranking); - - // FIXME: I think we may requre this information to improve the - // ranking of doc ids which have more word matches from - // different FTS indexes. - - /* We don't need these anymore free the resources. */ - ranking->words = NULL; - - if (!result_is_null) { - fts_query_add_ranking(query, result->rankings_by_id, ranking); - - if (query->total_size > fts_result_cache_limit) { - query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT; - fts_query_free_result(result); - DBUG_RETURN(NULL); - } - } - } - - if (result_is_null) { - /* Use doc_ids directly */ - rbt_free(result->rankings_by_id); - result->rankings_by_id = query->doc_ids; - query->doc_ids = NULL; - } - - DBUG_RETURN(result); -} - -/*****************************************************************//** -Get the result of the query. Calculate the similarity coefficient. */ -static -fts_result_t* -fts_query_get_result( -/*=================*/ - fts_query_t* query, /*!< in: query instance */ - fts_result_t* result) /*!< in: result */ -{ - DBUG_ENTER("fts_query_get_result"); - - if (rbt_size(query->doc_ids) > 0 || query->flags == FTS_OPT_RANKING) { - /* Copy the doc ids to the result. */ - result = fts_query_prepare_result(query, result); - } else { - /* Create an empty result instance. */ - result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result))); - memset(result, 0, sizeof(*result)); - } - - DBUG_RETURN(result); -} - -/*****************************************************************//** -FTS Query free resources and reset. */ -static -void -fts_query_free( -/*===========*/ - fts_query_t* query) /*!< in: query instance to free*/ -{ - - if (query->read_nodes_graph) { - fts_que_graph_free(query->read_nodes_graph); - } - - if (query->root) { - fts_ast_free_node(query->root); - } - - if (query->deleted) { - fts_doc_ids_free(query->deleted); - } - - if (query->doc_ids) { - fts_query_free_doc_ids(query, query->doc_ids); - } - - if (query->word_freqs) { - const ib_rbt_node_t* node; - - /* We need to free any instances of fts_doc_freq_t that we - may have allocated. */ - for (node = rbt_first(query->word_freqs); - node; - node = rbt_next(query->word_freqs, node)) { - - fts_word_freq_t* word_freq; - - word_freq = rbt_value(fts_word_freq_t, node); - - /* We need to cast away the const. */ - rbt_free(word_freq->doc_freqs); - } - - rbt_free(query->word_freqs); - } - - ut_a(!query->intersection); - - if (query->word_map) { - rbt_free(query->word_map); - } - - if (query->word_vector) { - delete query->word_vector; - } - - if (query->heap) { - mem_heap_free(query->heap); - } - - memset(query, 0, sizeof(*query)); -} - -/*****************************************************************//** -Parse the query using flex/bison. */ -static -fts_ast_node_t* -fts_query_parse( -/*============*/ - fts_query_t* query, /*!< in: query instance */ - byte* query_str, /*!< in: query string */ - ulint query_len) /*!< in: query string length */ -{ - int error; - fts_ast_state_t state; - bool mode = query->boolean_mode; - DBUG_ENTER("fts_query_parse"); - - memset(&state, 0x0, sizeof(state)); - - /* Setup the scanner to use, this depends on the mode flag. */ - state.lexer = fts_lexer_create(mode, query_str, query_len); - state.charset = query->fts_index_table.charset; - error = fts_parse(&state); - fts_lexer_free(state.lexer); - state.lexer = NULL; - - /* Error during parsing ? */ - if (error) { - /* Free the nodes that were allocated during parsing. */ - fts_ast_state_free(&state); - } else { - query->root = state.root; - } - - DBUG_RETURN(state.root); -} - -/*******************************************************************//** -FTS Query optimization -Set FTS_OPT_RANKING if it is a simple term query */ -static -void -fts_query_can_optimize( -/*===================*/ - fts_query_t* query, /*!< in/out: query instance */ - uint flags) /*!< In: FTS search mode */ -{ - fts_ast_node_t* node = query->root; - - if (flags & FTS_EXPAND) { - return; - } - - /* Check if it has only a term without oper */ - ut_ad(node->type == FTS_AST_LIST); - node = node->list.head; - if (node != NULL && node->type == FTS_AST_TERM && node->next == NULL) { - query->flags = FTS_OPT_RANKING; - } -} - -/*******************************************************************//** -Pre-process the query string -1) make it lower case -2) in boolean mode, if there is '-' or '+' that is immediately proceeded -and followed by valid word, make it a space -@return the processed string */ -static -byte* -fts_query_str_preprocess( -/*=====================*/ - const byte* query_str, /*!< in: FTS query */ - ulint query_len, /*!< in: FTS query string len */ - ulint *result_len, /*!< out: result string length */ - CHARSET_INFO* charset, /*!< in: string charset */ - bool boolean_mode) /*!< in: is boolean mode */ -{ - ulint cur_pos = 0; - ulint str_len; - byte* str_ptr; - bool in_phrase = false; - - /* Convert the query string to lower case before parsing. We own - the ut_malloc'ed result and so remember to free it before return. */ - - str_len = query_len * charset->casedn_multiply + 1; - str_ptr = static_cast<byte*>(ut_malloc(str_len)); - - *result_len = innobase_fts_casedn_str( - charset, const_cast<char*>(reinterpret_cast<const char*>( - query_str)), query_len, - reinterpret_cast<char*>(str_ptr), str_len); - - ut_ad(*result_len < str_len); - - str_ptr[*result_len] = 0; - - /* If it is boolean mode, no need to check for '-/+' */ - if (!boolean_mode) { - return(str_ptr); - } - - /* Otherwise, we travese the string to find any '-/+' that are - immediately proceeded and followed by valid search word. - NOTE: we should not do so for CJK languages, this should - be taken care of in our CJK implementation */ - while (cur_pos < *result_len) { - fts_string_t str; - ulint offset; - ulint cur_len; - - cur_len = innobase_mysql_fts_get_token( - charset, str_ptr + cur_pos, str_ptr + *result_len, - &str, &offset); - - if (cur_len == 0 || str.f_str == NULL) { - /* No valid word found */ - break; - } - - /* Check if we are in a phrase, if so, no need to do - replacement of '-/+'. */ - for (byte* ptr = str_ptr + cur_pos; ptr < str.f_str; ptr++) { - if ((char) (*ptr) == '"' ) { - in_phrase = !in_phrase; - } - } - - /* Find those are not leading '-/+' and also not in a phrase */ - if (cur_pos > 0 && str.f_str - str_ptr - cur_pos == 1 - && !in_phrase) { - char* last_op = reinterpret_cast<char*>( - str_ptr + cur_pos); - - if (*last_op == '-' || *last_op == '+') { - *last_op = ' '; - } - } - - cur_pos += cur_len; - } - - return(str_ptr); -} - -/*******************************************************************//** -FTS Query entry point. -@return DB_SUCCESS if successful otherwise error code */ -UNIV_INTERN -dberr_t -fts_query( -/*======*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: The FTS index to search */ - uint flags, /*!< in: FTS search mode */ - const byte* query_str, /*!< in: FTS query */ - ulint query_len, /*!< in: FTS query string len - in bytes */ - fts_result_t** result) /*!< in/out: result doc ids */ -{ - fts_query_t query; - dberr_t error = DB_SUCCESS; - byte* lc_query_str; - ulint result_len; - bool boolean_mode; - trx_t* query_trx; - CHARSET_INFO* charset; - ulint start_time_ms; - bool will_be_ignored = false; - - boolean_mode = flags & FTS_BOOL; - - *result = NULL; - memset(&query, 0x0, sizeof(query)); - query_trx = trx_allocate_for_background(); - query_trx->op_info = "FTS query"; - - start_time_ms = ut_time_ms(); - - query.trx = query_trx; - query.index = index; - query.boolean_mode = boolean_mode; - query.deleted = fts_doc_ids_create(); - query.cur_node = NULL; - - query.fts_common_table.type = FTS_COMMON_TABLE; - query.fts_common_table.table_id = index->table->id; - query.fts_common_table.parent = index->table->name; - query.fts_common_table.table = index->table; - - charset = fts_index_get_charset(index); - - query.fts_index_table.type = FTS_INDEX_TABLE; - query.fts_index_table.index_id = index->id; - query.fts_index_table.table_id = index->table->id; - query.fts_index_table.parent = index->table->name; - query.fts_index_table.charset = charset; - query.fts_index_table.table = index->table; - - query.word_map = rbt_create_arg_cmp( - sizeof(fts_string_t), innobase_fts_text_cmp, (void*) charset); - query.word_vector = new word_vector_t; - query.error = DB_SUCCESS; - - /* Setup the RB tree that will be used to collect per term - statistics. */ - query.word_freqs = rbt_create_arg_cmp( - sizeof(fts_word_freq_t), innobase_fts_text_cmp, (void*) charset); - - query.total_size += SIZEOF_RBT_CREATE; - - query.total_docs = dict_table_get_n_rows(index->table); - -#ifdef FTS_DOC_STATS_DEBUG - if (ft_enable_diag_print) { - error = fts_get_total_word_count( - trx, query.index, &query.total_words); - - if (error != DB_SUCCESS) { - goto func_exit; - } - - fprintf(stderr, "Total docs: " UINT64PF " Total words: %lu\n", - query.total_docs, query.total_words); - } -#endif /* FTS_DOC_STATS_DEBUG */ - - query.fts_common_table.suffix = "DELETED"; - - /* Read the deleted doc_ids, we need these for filtering. */ - error = fts_table_fetch_doc_ids( - NULL, &query.fts_common_table, query.deleted); - - if (error != DB_SUCCESS) { - goto func_exit; - } - - query.fts_common_table.suffix = "DELETED_CACHE"; - - error = fts_table_fetch_doc_ids( - NULL, &query.fts_common_table, query.deleted); - - if (error != DB_SUCCESS) { - goto func_exit; - } - - /* Get the deleted doc ids that are in the cache. */ - fts_cache_append_deleted_doc_ids( - index->table->fts->cache, query.deleted->doc_ids); - DEBUG_SYNC_C("fts_deleted_doc_ids_append"); - - /* Sort the vector so that we can do a binary search over the ids. */ - ib_vector_sort(query.deleted->doc_ids, fts_update_doc_id_cmp); - -#if 0 - /* Convert the query string to lower case before parsing. We own - the ut_malloc'ed result and so remember to free it before return. */ - - lc_query_str_len = query_len * charset->casedn_multiply + 1; - lc_query_str = static_cast<byte*>(ut_malloc(lc_query_str_len)); - - result_len = innobase_fts_casedn_str( - charset, (char*) query_str, query_len, - (char*) lc_query_str, lc_query_str_len); - - ut_ad(result_len < lc_query_str_len); - - lc_query_str[result_len] = 0; - -#endif - - lc_query_str = fts_query_str_preprocess( - query_str, query_len, &result_len, charset, boolean_mode); - - query.heap = mem_heap_create(128); - - /* Create the rb tree for the doc id (current) set. */ - query.doc_ids = rbt_create( - sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); - - query.total_size += SIZEOF_RBT_CREATE; - - /* Parse the input query string. */ - if (fts_query_parse(&query, lc_query_str, result_len)) { - fts_ast_node_t* ast = query.root; - - /* Optimize query to check if it's a single term */ - fts_query_can_optimize(&query, flags); - - DBUG_EXECUTE_IF("fts_instrument_result_cache_limit", - fts_result_cache_limit = 2048; - ); - - /* Traverse the Abstract Syntax Tree (AST) and execute - the query. */ - query.error = fts_ast_visit( - FTS_NONE, ast, fts_query_visitor, - &query, &will_be_ignored); - - /* If query expansion is requested, extend the search - with first search pass result */ - if (query.error == DB_SUCCESS && (flags & FTS_EXPAND)) { - query.error = fts_expand_query(index, &query); - } - - /* Calculate the inverse document frequency of the terms. */ - if (query.error == DB_SUCCESS - && query.flags != FTS_OPT_RANKING) { - fts_query_calculate_idf(&query); - } - - /* Copy the result from the query state, so that we can - return it to the caller. */ - if (query.error == DB_SUCCESS) { - *result = fts_query_get_result(&query, *result); - } - - error = query.error; - } else { - /* still return an empty result set */ - *result = static_cast<fts_result_t*>( - ut_malloc(sizeof(**result))); - memset(*result, 0, sizeof(**result)); - } - - ut_free(lc_query_str); - - if (fts_enable_diag_print && (*result)) { - ulint diff_time = ut_time_ms() - start_time_ms; - fprintf(stderr, "FTS Search Processing time: %ld secs:" - " %ld millisec: row(s) %d \n", - diff_time / 1000, diff_time % 1000, - (*result)->rankings_by_id - ? (int) rbt_size((*result)->rankings_by_id) - : -1); - - /* Log memory consumption & result size */ - ib_logf(IB_LOG_LEVEL_INFO, - "Full Search Memory: " - "%lu (bytes), Row: %lu .", - query.total_size, - (*result)->rankings_by_id - ? rbt_size((*result)->rankings_by_id) - : 0); - } - -func_exit: - fts_query_free(&query); - - trx_free_for_background(query_trx); - - return(error); -} - -/*****************************************************************//** -FTS Query free result, returned by fts_query(). */ - -void -fts_query_free_result( -/*==================*/ - fts_result_t* result) /*!< in: result instance to free.*/ -{ - if (result) { - if (result->rankings_by_id != NULL) { - rbt_free(result->rankings_by_id); - result->rankings_by_id = NULL; - } - if (result->rankings_by_rank != NULL) { - rbt_free(result->rankings_by_rank); - result->rankings_by_rank = NULL; - } - - ut_free(result); - result = NULL; - } -} - -/*****************************************************************//** -FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */ - -void -fts_query_sort_result_on_rank( -/*==========================*/ - fts_result_t* result) /*!< out: result instance to sort.*/ -{ - const ib_rbt_node_t* node; - ib_rbt_t* ranked; - - ut_a(result->rankings_by_id != NULL); - if (result->rankings_by_rank) { - rbt_free(result->rankings_by_rank); - } - - ranked = rbt_create(sizeof(fts_ranking_t), fts_query_compare_rank); - - /* We need to free any instances of fts_doc_freq_t that we - may have allocated. */ - for (node = rbt_first(result->rankings_by_id); - node; - node = rbt_next(result->rankings_by_id, node)) { - - fts_ranking_t* ranking; - - ranking = rbt_value(fts_ranking_t, node); - - ut_a(ranking->words == NULL); - - rbt_insert(ranked, ranking, ranking); - } - - /* Reset the current node too. */ - result->current = NULL; - result->rankings_by_rank = ranked; -} - -#ifdef UNIV_DEBUG -/*******************************************************************//** -A debug function to print result doc_id set. */ -static -void -fts_print_doc_id( -/*=============*/ - fts_query_t* query) /*!< in : tree that stores doc_ids.*/ -{ - const ib_rbt_node_t* node; - - /* Iterate each member of the doc_id set */ - for (node = rbt_first(query->doc_ids); - node; - node = rbt_next(query->doc_ids, node)) { - fts_ranking_t* ranking; - ranking = rbt_value(fts_ranking_t, node); - - ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, doc_id: %ld \n", - (ulint) ranking->doc_id); - - ulint pos = 0; - fts_string_t word; - - while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { - ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, value: %s \n", word.f_str); - } - } -} -#endif - -/*************************************************************//** -This function implements a simple "blind" query expansion search: -words in documents found in the first search pass will be used as -search arguments to search the document again, thus "expand" -the search result set. -@return DB_SUCCESS if success, otherwise the error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -fts_expand_query( -/*=============*/ - dict_index_t* index, /*!< in: FTS index to search */ - fts_query_t* query) /*!< in: FTS query instance */ -{ - const ib_rbt_node_t* node; - const ib_rbt_node_t* token_node; - fts_doc_t result_doc; - dberr_t error = DB_SUCCESS; - const fts_index_cache_t*index_cache; - - /* If no doc is found in first search pass, return */ - if (!rbt_size(query->doc_ids)) { - return(error); - } - - /* Init "result_doc", to hold words from the first search pass */ - fts_doc_init(&result_doc); - - rw_lock_x_lock(&index->table->fts->cache->lock); - index_cache = fts_find_index_cache(index->table->fts->cache, index); - rw_lock_x_unlock(&index->table->fts->cache->lock); - - ut_a(index_cache); - - result_doc.tokens = rbt_create_arg_cmp( - sizeof(fts_token_t), innobase_fts_text_cmp, - (void *)index_cache->charset); - - result_doc.charset = index_cache->charset; - - query->total_size += SIZEOF_RBT_CREATE; -#ifdef UNIV_DEBUG - fts_print_doc_id(query); -#endif - - for (node = rbt_first(query->doc_ids); - node; - node = rbt_next(query->doc_ids, node)) { - - fts_ranking_t* ranking; - ulint pos; - fts_string_t word; - ulint prev_token_size; - ulint estimate_size; - - prev_token_size = rbt_size(result_doc.tokens); - - ranking = rbt_value(fts_ranking_t, node); - - /* Fetch the documents with the doc_id from the - result of first seach pass. Since we do not - store document-to-word mapping, we need to - fetch the original document and parse them. - Future optimization could be done here if we - support some forms of document-to-word mapping */ - fts_doc_fetch_by_doc_id(NULL, ranking->doc_id, index, - FTS_FETCH_DOC_BY_ID_EQUAL, - fts_query_expansion_fetch_doc, - &result_doc); - - /* Remove words that have already been searched in the - first pass */ - pos = 0; - while (fts_ranking_words_get_next(query, ranking, &pos, - &word)) { - ibool ret; - - ret = rbt_delete(result_doc.tokens, &word); - - /* The word must exist in the doc we found */ - if (!ret) { - ib_logf(IB_LOG_LEVEL_ERROR, "Did not " - "find word %s in doc %ld for query " - "expansion search.\n", word.f_str, - (ulint) ranking->doc_id); - } - } - - /* Estimate memory used, see fts_process_token and fts_token_t. - We ignore token size here. */ - estimate_size = (rbt_size(result_doc.tokens) - prev_token_size) - * (SIZEOF_RBT_NODE_ADD + sizeof(fts_token_t) - + sizeof(ib_vector_t) + sizeof(ulint) * 32); - query->total_size += estimate_size; - - if (query->total_size > fts_result_cache_limit) { - error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT; - goto func_exit; - } - } - - /* Search the table the second time with expanded search list */ - for (token_node = rbt_first(result_doc.tokens); - token_node; - token_node = rbt_next(result_doc.tokens, token_node)) { - fts_token_t* mytoken; - mytoken = rbt_value(fts_token_t, token_node); - - ut_ad(mytoken->text.f_str[mytoken->text.f_len] == 0); - fts_query_add_word_freq(query, &mytoken->text); - error = fts_query_union(query, &mytoken->text); - - if (error != DB_SUCCESS) { - break; - } - } - -func_exit: - fts_doc_free(&result_doc); - - return(error); -} -/*************************************************************//** -This function finds documents that contain all words in a -phrase or proximity search. And if proximity search, verify -the words are close enough to each other, as in specified distance. -This function is called for phrase and proximity search. -@return TRUE if documents are found, FALSE if otherwise */ -static -ibool -fts_phrase_or_proximity_search( -/*===========================*/ - fts_query_t* query, /*!< in/out: query instance. - query->doc_ids might be instantiated - with qualified doc IDs */ - ib_vector_t* tokens) /*!< in: Tokens contain words */ -{ - ulint n_matched; - ulint i; - ibool matched = FALSE; - ulint num_token = ib_vector_size(tokens); - fts_match_t* match[MAX_PROXIMITY_ITEM]; - ibool end_list = FALSE; - - /* Number of matched documents for the first token */ - n_matched = ib_vector_size(query->match_array[0]); - - /* We have a set of match list for each word, we shall - walk through the list and find common documents that - contain all the matching words. */ - for (i = 0; i < n_matched; i++) { - ulint j; - ulint k = 0; - fts_proximity_t qualified_pos; - - match[0] = static_cast<fts_match_t*>( - ib_vector_get(query->match_array[0], i)); - - /* For remaining match list for the token(word), we - try to see if there is a document with the same - doc id */ - for (j = 1; j < num_token; j++) { - match[j] = static_cast<fts_match_t*>( - ib_vector_get(query->match_array[j], k)); - - while (match[j]->doc_id < match[0]->doc_id - && k < ib_vector_size(query->match_array[j])) { - match[j] = static_cast<fts_match_t*>( - ib_vector_get( - query->match_array[j], k)); - k++; - } - - if (match[j]->doc_id > match[0]->doc_id) { - /* no match */ - if (query->flags & FTS_PHRASE) { - match[0]->doc_id = 0; - } - break; - } - - if (k == ib_vector_size(query->match_array[j])) { - end_list = TRUE; - - if (match[j]->doc_id != match[0]->doc_id) { - /* no match */ - if (query->flags & FTS_PHRASE) { - ulint s; - - match[0]->doc_id = 0; - - for (s = i + 1; s < n_matched; - s++) { - match[0] = static_cast< - fts_match_t*>( - ib_vector_get( - query->match_array[0], - s)); - match[0]->doc_id = 0; - } - } - - goto func_exit; - } - } - - /* FIXME: A better solution will be a counter array - remember each run's last position. So we don't - reset it here very time */ - k = 0; - } - - if (j != num_token) { - continue; - } - - /* For this matching doc, we need to further - verify whether the words in the doc are close - to each other, and within the distance specified - in the proximity search */ - if (query->flags & FTS_PHRASE) { - matched = TRUE; - } else if (fts_proximity_get_positions( - match, num_token, ULINT_MAX, &qualified_pos)) { - - /* Fetch the original documents and count the - words in between matching words to see that is in - specified distance */ - if (fts_query_is_in_proximity_range( - query, match, &qualified_pos)) { - /* If so, mark we find a matching doc */ - query->error = fts_query_process_doc_id( - query, match[0]->doc_id, 0); - if (query->error != DB_SUCCESS) { - matched = FALSE; - goto func_exit; - } - - matched = TRUE; - for (ulint z = 0; z < num_token; z++) { - fts_string_t* token; - token = static_cast<fts_string_t*>( - ib_vector_get(tokens, z)); - fts_query_add_word_to_document( - query, match[0]->doc_id, token); - } - } - } - - if (end_list) { - break; - } - } - -func_exit: - return(matched); -} - -/*************************************************************//** -This function checks whether words in result documents are close to -each other (within proximity range as specified by "distance"). -If "distance" is MAX_ULINT, then it will find all combinations of -positions of matching words and store min and max positions -in the "qualified_pos" for later verification. -@return true if words are close to each other, false if otherwise */ -static -bool -fts_proximity_get_positions( -/*========================*/ - fts_match_t** match, /*!< in: query instance */ - ulint num_match, /*!< in: number of matching - items */ - ulint distance, /*!< in: distance value - for proximity search */ - fts_proximity_t* qualified_pos) /*!< out: the position info - records ranges containing - all matching words. */ -{ - ulint i; - ulint idx[MAX_PROXIMITY_ITEM]; - ulint num_pos[MAX_PROXIMITY_ITEM]; - ulint min_idx; - - qualified_pos->n_pos = 0; - - ut_a(num_match <= MAX_PROXIMITY_ITEM); - - /* Each word could appear multiple times in a doc. So - we need to walk through each word's position list, and find - closest distance between different words to see if - they are in the proximity distance. */ - - /* Assume each word's position list is sorted, we - will just do a walk through to all words' lists - similar to a the merge phase of a merge sort */ - for (i = 0; i < num_match; i++) { - /* idx is the current position we are checking - for a particular word */ - idx[i] = 0; - - /* Number of positions for this word */ - num_pos[i] = ib_vector_size(match[i]->positions); - } - - /* Start with the first word */ - min_idx = 0; - - while (idx[min_idx] < num_pos[min_idx]) { - ulint position[MAX_PROXIMITY_ITEM]; - ulint min_pos = ULINT_MAX; - ulint max_pos = 0; - - /* Check positions in each word position list, and - record the max/min position */ - for (i = 0; i < num_match; i++) { - position[i] = *(ulint*) ib_vector_get_const( - match[i]->positions, idx[i]); - - if (position[i] == ULINT_UNDEFINED) { - break; - } - - if (position[i] < min_pos) { - min_pos = position[i]; - min_idx = i; - } - - if (position[i] > max_pos) { - max_pos = position[i]; - } - } - - /* If max and min position are within range, we - find a good match */ - if (max_pos - min_pos <= distance - && (i >= num_match || position[i] != ULINT_UNDEFINED)) { - /* The charset has variable character - length encoding, record the min_pos and - max_pos, we will need to verify the actual - number of characters */ - qualified_pos->min_pos.push_back(min_pos); - qualified_pos->max_pos.push_back(max_pos); - qualified_pos->n_pos++; - } - - /* Otherwise, move to the next position is the - list for the word with the smallest position */ - idx[min_idx]++; - } - - return(qualified_pos->n_pos != 0); -} diff --git a/storage/xtradb/fts/fts0sql.cc b/storage/xtradb/fts/fts0sql.cc deleted file mode 100644 index cb8eff3cacc..00000000000 --- a/storage/xtradb/fts/fts0sql.cc +++ /dev/null @@ -1,363 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file fts/fts0sql.cc -Full Text Search functionality. - -Created 2007-03-27 Sunny Bains -*******************************************************/ - -#include "que0que.h" -#include "trx0roll.h" -#include "pars0pars.h" -#include "dict0dict.h" -#include "fts0types.h" -#include "fts0priv.h" - -#ifndef UNIV_NONINL -#include "fts0types.ic" -#include "fts0vlc.ic" -#endif - -/** SQL statements for creating the ancillary FTS tables. %s must be replaced -with the indexed table's id. */ - -/** Preamble to all SQL statements. */ -static const char* fts_sql_begin= - "PROCEDURE P() IS\n"; - -/** Postamble to non-committing SQL statements. */ -static const char* fts_sql_end= - "\n" - "END;\n"; - -/******************************************************************//** -Get the table id. -@return number of bytes written */ -UNIV_INTERN -int -fts_get_table_id( -/*=============*/ - const fts_table_t* - fts_table, /*!< in: FTS Auxiliary table */ - char* table_id) /*!< out: table id, must be at least - FTS_AUX_MIN_TABLE_ID_LENGTH bytes - long */ -{ - int len; - bool hex_name = DICT_TF2_FLAG_IS_SET(fts_table->table, - DICT_TF2_FTS_AUX_HEX_NAME); - - ut_a(fts_table->table != NULL); - - switch (fts_table->type) { - case FTS_COMMON_TABLE: - len = fts_write_object_id(fts_table->table_id, table_id, - hex_name); - break; - - case FTS_INDEX_TABLE: - - len = fts_write_object_id(fts_table->table_id, table_id, - hex_name); - - table_id[len] = '_'; - ++len; - table_id += len; - - len += fts_write_object_id(fts_table->index_id, table_id, - hex_name); - break; - - default: - ut_error; - } - - ut_a(len >= 16); - ut_a(len < FTS_AUX_MIN_TABLE_ID_LENGTH); - - return(len); -} - -/******************************************************************//** -Construct the prefix name of an FTS table. -@return own: table name, must be freed with mem_free() */ -UNIV_INTERN -char* -fts_get_table_name_prefix( -/*======================*/ - const fts_table_t* - fts_table) /*!< in: Auxiliary table type */ -{ - int len; - const char* slash; - char* prefix_name; - int dbname_len = 0; - int prefix_name_len; - char table_id[FTS_AUX_MIN_TABLE_ID_LENGTH]; - - slash = static_cast<const char*>( - memchr(fts_table->parent, '/', strlen(fts_table->parent))); - - if (slash) { - /* Print up to and including the separator. */ - dbname_len = static_cast<int>(slash - fts_table->parent) + 1; - } - - len = fts_get_table_id(fts_table, table_id); - - prefix_name_len = dbname_len + 4 + len + 1; - - prefix_name = static_cast<char*>(mem_alloc(prefix_name_len)); - - len = sprintf(prefix_name, "%.*sFTS_%s", - dbname_len, fts_table->parent, table_id); - - ut_a(len > 0); - ut_a(len == prefix_name_len - 1); - - return(prefix_name); -} - -/******************************************************************//** -Construct the name of an ancillary FTS table. -@return own: table name, must be freed with mem_free() */ -UNIV_INTERN -char* -fts_get_table_name( -/*===============*/ - const fts_table_t* fts_table) - /*!< in: Auxiliary table type */ -{ - int len; - char* name; - int name_len; - char* prefix_name; - - prefix_name = fts_get_table_name_prefix(fts_table); - - name_len = static_cast<int>( - strlen(prefix_name) + 1 + strlen(fts_table->suffix) + 1); - - name = static_cast<char*>(mem_alloc(name_len)); - - len = sprintf(name, "%s_%s", prefix_name, fts_table->suffix); - - ut_a(len > 0); - ut_a(len == name_len - 1); - - mem_free(prefix_name); - - return(name); -} - -/******************************************************************//** -Parse an SQL string. %s is replaced with the table's id. -@return query graph */ -UNIV_INTERN -que_t* -fts_parse_sql( -/*==========*/ - fts_table_t* fts_table, /*!< in: FTS auxiliarry table info */ - pars_info_t* info, /*!< in: info struct, or NULL */ - const char* sql) /*!< in: SQL string to evaluate */ -{ - char* str; - que_t* graph; - char* str_tmp; - ibool dict_locked; - - if (fts_table != NULL) { - char* table_name; - - table_name = fts_get_table_name(fts_table); - str_tmp = ut_strreplace(sql, "%s", table_name); - mem_free(table_name); - } else { - ulint sql_len = strlen(sql) + 1; - - str_tmp = static_cast<char*>(mem_alloc(sql_len)); - strcpy(str_tmp, sql); - } - - str = ut_str3cat(fts_sql_begin, str_tmp, fts_sql_end); - mem_free(str_tmp); - - dict_locked = (fts_table && fts_table->table->fts - && (fts_table->table->fts->fts_status - & TABLE_DICT_LOCKED)); - - if (!dict_locked) { - ut_ad(!mutex_own(&(dict_sys->mutex))); - - /* The InnoDB SQL parser is not re-entrant. */ - mutex_enter(&dict_sys->mutex); - } - - graph = pars_sql(info, str); - ut_a(graph); - - if (!dict_locked) { - mutex_exit(&dict_sys->mutex); - } - - mem_free(str); - - return(graph); -} - -/******************************************************************//** -Parse an SQL string. %s is replaced with the table's id. -@return query graph */ -UNIV_INTERN -que_t* -fts_parse_sql_no_dict_lock( -/*=======================*/ - fts_table_t* fts_table, /*!< in: FTS aux table info */ - pars_info_t* info, /*!< in: info struct, or NULL */ - const char* sql) /*!< in: SQL string to evaluate */ -{ - char* str; - que_t* graph; - char* str_tmp = NULL; - -#ifdef UNIV_DEBUG - ut_ad(mutex_own(&dict_sys->mutex)); -#endif - - if (fts_table != NULL) { - char* table_name; - - table_name = fts_get_table_name(fts_table); - str_tmp = ut_strreplace(sql, "%s", table_name); - mem_free(table_name); - } - - if (str_tmp != NULL) { - str = ut_str3cat(fts_sql_begin, str_tmp, fts_sql_end); - mem_free(str_tmp); - } else { - str = ut_str3cat(fts_sql_begin, sql, fts_sql_end); - } - - //fprintf(stderr, "%s\n", str); - - graph = pars_sql(info, str); - ut_a(graph); - - mem_free(str); - - return(graph); -} - -/******************************************************************//** -Evaluate an SQL query graph. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_eval_sql( -/*=========*/ - trx_t* trx, /*!< in: transaction */ - que_t* graph) /*!< in: Query graph to evaluate */ -{ - que_thr_t* thr; - - graph->trx = trx; - graph->fork_type = QUE_FORK_MYSQL_INTERFACE; - - ut_a(thr = que_fork_start_command(graph)); - - que_run_threads(thr); - - return(trx->error_state); -} - -/******************************************************************//** -Construct the column specification part of the SQL string for selecting the -indexed FTS columns for the given table. Adds the necessary bound -ids to the given 'info' and returns the SQL string. Examples: - -One indexed column named "text": - - "$sel0", - info/ids: sel0 -> "text" - -Two indexed columns named "subject" and "content": - - "$sel0, $sel1", - info/ids: sel0 -> "subject", sel1 -> "content", -@return heap-allocated WHERE string */ -UNIV_INTERN -const char* -fts_get_select_columns_str( -/*=======================*/ - dict_index_t* index, /*!< in: index */ - pars_info_t* info, /*!< in/out: parser info */ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ulint i; - const char* str = ""; - - for (i = 0; i < index->n_user_defined_cols; i++) { - char* sel_str; - - dict_field_t* field = dict_index_get_nth_field(index, i); - - sel_str = mem_heap_printf(heap, "sel%lu", (ulong) i); - - /* Set copy_name to TRUE since it's dynamic. */ - pars_info_bind_id(info, TRUE, sel_str, field->name); - - str = mem_heap_printf( - heap, "%s%s$%s", str, (*str) ? ", " : "", sel_str); - } - - return(str); -} - -/******************************************************************//** -Commit a transaction. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_sql_commit( -/*===========*/ - trx_t* trx) /*!< in: transaction */ -{ - dberr_t error; - - error = trx_commit_for_mysql(trx); - - /* Commit should always succeed */ - ut_a(error == DB_SUCCESS); - - return(DB_SUCCESS); -} - -/******************************************************************//** -Rollback a transaction. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_sql_rollback( -/*=============*/ - trx_t* trx) /*!< in: transaction */ -{ - return(trx_rollback_to_savepoint(trx, NULL)); -} diff --git a/storage/xtradb/fts/fts0tlex.cc b/storage/xtradb/fts/fts0tlex.cc deleted file mode 100644 index d4d9b4c48d1..00000000000 --- a/storage/xtradb/fts/fts0tlex.cc +++ /dev/null @@ -1,1952 +0,0 @@ -#include "univ.i" -#line 2 "fts0tlex.cc" - -#line 4 "fts0tlex.cc" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 35 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - -/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS 1 -#endif - -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! C99 */ - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -/* C99 requires __STDC__ to be defined as 1. */ -#if defined (__STDC__) - -#define YY_USE_CONST - -#endif /* defined (__STDC__) */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* Returned upon end-of-file. */ -#define YY_NULL 0 - -/* Promotes a possibly negative, possibly signed char to an unsigned - * integer for use as an array index. If the signed char is negative, - * we want to instead treat it as an 8-bit unsigned char, hence the - * double cast. - */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) - -/* An opaque pointer. */ -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T -typedef void* yyscan_t; -#endif - -/* For convenience, these vars (plus the bison vars far below) - are macros in the reentrant scanner. */ -#define yyin yyg->yyin_r -#define yyout yyg->yyout_r -#define yyextra yyg->yyextra_r -#define yyleng yyg->yyleng_r -#define yytext yyg->yytext_r -#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) -#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) -#define yy_flex_debug yyg->yy_flex_debug_r - -/* Enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN. - */ -#define BEGIN yyg->yy_start = 1 + 2 * - -/* Translate the current start state into a value that can be later handed - * to BEGIN to return to the state. The YYSTATE alias is for lex - * compatibility. - */ -#define YY_START ((yyg->yy_start - 1) / 2) -#define YYSTATE YY_START - -/* Action number for EOF rule of a given start state. */ -#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) - -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE fts0trestart(yyin ,yyscanner ) - -#define YY_END_OF_BUFFER_CHAR 0 - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k. - * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. - * Ditto for the __ia64__ case accordingly. - */ -#define YY_BUF_SIZE 32768 -#else -#define YY_BUF_SIZE 16384 -#endif /* __ia64__ */ -#endif - -/* The state buf must be large enough to hold one state per character in the main buffer. - */ -#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) - -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE -typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif - -#define EOB_ACT_CONTINUE_SCAN 0 -#define EOB_ACT_END_OF_FILE 1 -#define EOB_ACT_LAST_MATCH 2 - - #define YY_LESS_LINENO(n) - -/* Return all but the first "n" matched characters back to the input stream. */ -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - *yy_cp = yyg->yy_hold_char; \ - YY_RESTORE_YY_MORE_OFFSET \ - yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ - YY_DO_BEFORE_ACTION; /* set up yytext again */ \ - } \ - while ( 0 ) - -#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) - -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - -#ifndef YY_STRUCT_YY_BUFFER_STATE -#define YY_STRUCT_YY_BUFFER_STATE -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - int yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - int yy_bs_lineno; /**< The line count. */ - int yy_bs_column; /**< The column count. */ - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; - -#define YY_BUFFER_NEW 0 -#define YY_BUFFER_NORMAL 1 - /* When an EOF's been seen but there's still some text to process - * then we mark the buffer as YY_EOF_PENDING, to indicate that we - * shouldn't try reading from the input source any more. We might - * still have a bunch of tokens to match, though, because of - * possible backing-up. - * - * When we actually see the EOF, we change the status to "new" - * (via fts0trestart()), so that the user can continue scanning by - * just pointing yyin at a new input file. - */ -#define YY_BUFFER_EOF_PENDING 2 - - }; -#endif /* !YY_STRUCT_YY_BUFFER_STATE */ - -/* We provide macros for accessing buffer states in case in the - * future we want to put the buffer states in a more general - * "scanner state". - * - * Returns the top of the stack, or NULL. - */ -#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ - ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ - : NULL) - -/* Same as previous macro, but useful when we know that the buffer stack is not - * NULL or when we need an lvalue. For internal use only. - */ -#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] - -void fts0trestart (FILE *input_file ,yyscan_t yyscanner ); -void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0t_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); -void fts0t_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void fts0t_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -void fts0tpop_buffer_state (yyscan_t yyscanner ); - -static void fts0tensure_buffer_stack (yyscan_t yyscanner ); -static void fts0t_load_buffer_state (yyscan_t yyscanner ); -static void fts0t_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner ); - -#define YY_FLUSH_BUFFER fts0t_flush_buffer(YY_CURRENT_BUFFER ,yyscanner) - -YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); - -void *fts0talloc (yy_size_t , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) ); -void *fts0trealloc (void *,yy_size_t , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) ); -void fts0tfree (void * , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) ); - -#define yy_new_buffer fts0t_create_buffer - -#define yy_set_interactive(is_interactive) \ - { \ - if ( ! YY_CURRENT_BUFFER ){ \ - fts0tensure_buffer_stack (yyscanner); \ - YY_CURRENT_BUFFER_LVALUE = \ - fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ - } - -#define yy_set_bol(at_bol) \ - { \ - if ( ! YY_CURRENT_BUFFER ){\ - fts0tensure_buffer_stack (yyscanner); \ - YY_CURRENT_BUFFER_LVALUE = \ - fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ - } - -#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) - -/* Begin user sect3 */ - -#define fts0twrap(n) 1 -#define YY_SKIP_YYWRAP - -typedef unsigned char YY_CHAR; - -typedef int yy_state_type; - -#define yytext_ptr yytext_r - -static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); -static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); -static int yy_get_next_buffer (yyscan_t yyscanner ); -static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) ); - -/* Done after the current pattern has been matched and before the - * corresponding action - sets up yytext. - */ -#define YY_DO_BEFORE_ACTION \ - yyg->yytext_ptr = yy_bp; \ - yyleng = static_cast<int>(yy_cp - yy_bp); \ - yyg->yy_hold_char = *yy_cp; \ - *yy_cp = '\0'; \ - yyg->yy_c_buf_p = yy_cp; - -#define YY_NUM_RULES 7 -#define YY_END_OF_BUFFER 8 -/* This struct is not used in this scanner, - but its presence is necessary. */ -struct yy_trans_info - { - flex_int32_t yy_verify; - flex_int32_t yy_nxt; - }; -static yyconst flex_int16_t yy_accept[17] = - { 0, - 4, 4, 8, 4, 1, 6, 1, 5, 5, 2, - 4, 1, 1, 0, 3, 0 - } ; - -static yyconst flex_int32_t yy_ec[256] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 4, 1, 5, 1, 1, 6, 1, 1, 1, - 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1 - } ; - -static yyconst flex_int32_t yy_meta[8] = - { 0, - 1, 2, 3, 4, 5, 5, 1 - } ; - -static yyconst flex_int16_t yy_base[20] = - { 0, - 0, 0, 18, 0, 6, 21, 0, 9, 21, 0, - 0, 0, 0, 4, 21, 21, 10, 11, 15 - } ; - -static yyconst flex_int16_t yy_def[20] = - { 0, - 16, 1, 16, 17, 17, 16, 18, 19, 16, 17, - 17, 5, 18, 19, 16, 0, 16, 16, 16 - } ; - -static yyconst flex_int16_t yy_nxt[29] = - { 0, - 4, 5, 6, 7, 8, 9, 10, 12, 15, 13, - 11, 11, 13, 15, 13, 14, 14, 16, 14, 14, - 3, 16, 16, 16, 16, 16, 16, 16 - } ; - -static yyconst flex_int16_t yy_chk[29] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 5, 14, 5, - 17, 17, 18, 8, 18, 19, 19, 3, 19, 19, - 16, 16, 16, 16, 16, 16, 16, 16 - } ; - -/* The intent behind this definition is that it'll catch - * any uses of REJECT which flex missed. - */ -#define REJECT reject_used_but_not_detected -#define yymore() yymore_used_but_not_detected -#define YY_MORE_ADJ 0 -#define YY_RESTORE_YY_MORE_OFFSET -#line 1 "fts0tlex.l" -/***************************************************************************** - -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ -/** - * @file fts/fts0tlex.l - * FTS parser lexical analyzer - * - * Created 2007/5/9 Sunny Bains - */ -#line 27 "fts0tlex.l" - -#include "fts0ast.h" -#include "fts0pars.h" - -/* Required for reentrant parser */ -#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner) - -#define YY_NO_INPUT 1 -#line 480 "fts0tlex.cc" - -#define INITIAL 0 - -#ifndef YY_NO_UNISTD_H -/* Special case for "unistd.h", since it is non-ANSI. We include it way - * down here because we want the user's section 1 to have been scanned first. - * The user has a chance to override it with an option. - */ -#include <unistd.h> -#endif - -#ifndef YY_EXTRA_TYPE -#define YY_EXTRA_TYPE void * -#endif - -/* Holds the entire state of the reentrant scanner. */ -struct yyguts_t - { - - /* User-defined. Not touched by flex. */ - YY_EXTRA_TYPE yyextra_r; - - /* The rest are the same as the globals declared in the non-reentrant scanner. */ - FILE *yyin_r, *yyout_r; - size_t yy_buffer_stack_top; /**< index of top of stack. */ - size_t yy_buffer_stack_max; /**< capacity of stack. */ - YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ - char yy_hold_char; - int yy_n_chars; - int yyleng_r; - char *yy_c_buf_p; - int yy_init; - int yy_start; - int yy_did_buffer_switch_on_eof; - int yy_start_stack_ptr; - int yy_start_stack_depth; - int *yy_start_stack; - yy_state_type yy_last_accepting_state; - char* yy_last_accepting_cpos; - - int yylineno_r; - int yy_flex_debug_r; - - char *yytext_r; - int yy_more_flag; - int yy_more_len; - - }; /* end struct yyguts_t */ - -static int yy_init_globals (yyscan_t yyscanner ); - -int fts0tlex_init (yyscan_t* scanner); - -int fts0tlex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); - -/* Accessor methods to globals. - These are made visible to non-reentrant scanners for convenience. */ - -int fts0tlex_destroy (yyscan_t yyscanner ); - -int fts0tget_debug (yyscan_t yyscanner ); - -void fts0tset_debug (int debug_flag ,yyscan_t yyscanner ); - -YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner ); - -void fts0tset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); - -FILE *fts0tget_in (yyscan_t yyscanner ); - -void fts0tset_in (FILE * in_str ,yyscan_t yyscanner ); - -FILE *fts0tget_out (yyscan_t yyscanner ); - -void fts0tset_out (FILE * out_str ,yyscan_t yyscanner ); - -int fts0tget_leng (yyscan_t yyscanner ); - -char *fts0tget_text (yyscan_t yyscanner ); - -int fts0tget_lineno (yyscan_t yyscanner ); - -void fts0tset_lineno (int line_number ,yyscan_t yyscanner ); - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int fts0twrap (yyscan_t yyscanner ); -#else -extern int fts0twrap (yyscan_t yyscanner ); -#endif -#endif - -#ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))); -#endif - -#ifndef YY_NO_INPUT - -#ifdef __cplusplus -static int yyinput (yyscan_t yyscanner ); -#else -static int input (yyscan_t yyscanner ); -#endif - -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k */ -#define YY_READ_BUF_SIZE 16384 -#else -#define YY_READ_BUF_SIZE 8192 -#endif /* __ia64__ */ -#endif - -/* Copy whatever the last rule matched to the standard output. */ -#ifndef ECHO -/* This used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ -#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) -#endif - -/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, - * is returned in "result". - */ -#ifndef YY_INPUT -#define YY_INPUT(buf,result,max_size) \ - if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ - { \ - int c = '*'; \ - int n; \ - for ( n = 0; n < static_cast<int>(max_size) && \ - (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ - buf[n] = (char) c; \ - if ( c == '\n' ) \ - buf[n++] = (char) c; \ - if ( c == EOF && ferror( yyin ) ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - result = n; \ - } \ - else \ - { \ - errno=0; \ - while ( (result = static_cast<int>(fread(buf, 1, max_size, yyin)))==0 \ - && ferror(yyin)) \ - { \ - if( errno != EINTR) \ - { \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - break; \ - } \ - errno=0; \ - clearerr(yyin); \ - } \ - }\ -\ - -#endif - -/* No semi-colon after return; correct usage is to write "yyterminate();" - - * we don't want an extra ';' after the "return" because that will cause - * some compilers to complain about unreachable statements. - */ -#ifndef yyterminate -#define yyterminate() return YY_NULL -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Report a fatal error. */ -#ifndef YY_FATAL_ERROR -#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) -#endif - -/* end tables serialization structures and prototypes */ - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL_IS_OURS 1 - -extern int fts0tlex (yyscan_t yyscanner); - -#define YY_DECL int fts0tlex (yyscan_t yyscanner) -#endif /* !YY_DECL */ - -/* Code executed at the beginning of each rule, after yytext and yyleng - * have been set up. - */ -#ifndef YY_USER_ACTION -#define YY_USER_ACTION -#endif - -/* Code executed at the end of each rule. */ -#ifndef YY_BREAK -#define YY_BREAK break; -#endif - -#define YY_RULE_SETUP \ - YY_USER_ACTION - -/** The main scanner function which does all the work. - */ -YY_DECL -{ - register yy_state_type yy_current_state; - register char *yy_cp, *yy_bp; - register int yy_act; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - -#line 44 "fts0tlex.l" - - -#line 707 "fts0tlex.cc" - - if ( !yyg->yy_init ) - { - yyg->yy_init = 1; - -#ifdef YY_USER_INIT - YY_USER_INIT; -#endif - - if ( ! yyg->yy_start ) - yyg->yy_start = 1; /* first start state */ - - if ( ! yyin ) - yyin = stdin; - - if ( ! yyout ) - yyout = stdout; - - if ( ! YY_CURRENT_BUFFER ) { - fts0tensure_buffer_stack (yyscanner); - YY_CURRENT_BUFFER_LVALUE = - fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); - } - - fts0t_load_buffer_state(yyscanner ); - } - - while ( 1 ) /* loops until end-of-file is reached */ - { - yy_cp = yyg->yy_c_buf_p; - - /* Support of yytext. */ - *yy_cp = yyg->yy_hold_char; - - /* yy_bp points to the position in yy_ch_buf of the start of - * the current run. - */ - yy_bp = yy_cp; - - yy_current_state = yyg->yy_start; -yy_match: - do - { - register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 17 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - ++yy_cp; - } - while ( yy_current_state != 16 ); - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - -yy_find_action: - yy_act = yy_accept[yy_current_state]; - - YY_DO_BEFORE_ACTION; - -do_action: /* This label is used only to access EOF actions. */ - - switch ( yy_act ) - { /* beginning of action switch */ - case 0: /* must back up */ - /* undo the effects of YY_DO_BEFORE_ACTION */ - *yy_cp = yyg->yy_hold_char; - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - goto yy_find_action; - -case 1: -YY_RULE_SETUP -#line 46 "fts0tlex.l" -/* Ignore whitespace */ ; - YY_BREAK -case 2: -YY_RULE_SETUP -#line 48 "fts0tlex.l" -{ - val->oper = fts0tget_text(yyscanner)[0]; - - return(val->oper); -} - YY_BREAK -case 3: -YY_RULE_SETUP -#line 54 "fts0tlex.l" -{ - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner)); - - return(FTS_TEXT); -} - YY_BREAK -case 4: -YY_RULE_SETUP -#line 60 "fts0tlex.l" -{ - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner)); - - return(FTS_TERM); -} - YY_BREAK -case 5: -YY_RULE_SETUP -#line 65 "fts0tlex.l" -; - YY_BREAK -case 6: -/* rule 6 can match eol */ -YY_RULE_SETUP -#line 66 "fts0tlex.l" - - YY_BREAK -case 7: -YY_RULE_SETUP -#line 68 "fts0tlex.l" -ECHO; - YY_BREAK -#line 834 "fts0tlex.cc" -case YY_STATE_EOF(INITIAL): - yyterminate(); - - case YY_END_OF_BUFFER: - { - /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; - - /* Undo the effects of YY_DO_BEFORE_ACTION. */ - *yy_cp = yyg->yy_hold_char; - YY_RESTORE_YY_MORE_OFFSET - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) - { - /* We're scanning a new file or input source. It's - * possible that this happened because the user - * just pointed yyin at a new source and called - * fts0tlex(). If so, then we have to assure - * consistency between YY_CURRENT_BUFFER and our - * globals. Here is the right place to do so, because - * this is the first action (other than possibly a - * back-up) that will match for the new input source. - */ - yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; - } - - /* Note that here we test for yy_c_buf_p "<=" to the position - * of the first EOB in the buffer, since yy_c_buf_p will - * already have been incremented past the NUL character - * (since all states make transitions on EOB to the - * end-of-buffer state). Contrast this with the test - * in input(). - */ - if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) - { /* This was really a NUL. */ - yy_state_type yy_next_state; - - yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( yyscanner ); - - /* Okay, we're now positioned to make the NUL - * transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we don't - * want to build jamming into it because then it - * will run more slowly). - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); - - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* Consume the NUL. */ - yy_cp = ++yyg->yy_c_buf_p; - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { - yy_cp = yyg->yy_last_accepting_cpos; - yy_current_state = yyg->yy_last_accepting_state; - goto yy_find_action; - } - } - - else switch ( yy_get_next_buffer( yyscanner ) ) - { - case EOB_ACT_END_OF_FILE: - { - yyg->yy_did_buffer_switch_on_eof = 0; - - if ( fts0twrap(yyscanner ) ) - { - /* Note: because we've taken care in - * yy_get_next_buffer() to have set up - * yytext, we can now set up - * yy_c_buf_p so that if some total - * hoser (like flex itself) wants to - * call the scanner after we return the - * YY_NULL, it'll still work - another - * YY_NULL will get returned. - */ - yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF(YY_START); - goto do_action; - } - - else - { - if ( ! yyg->yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; - } - break; - } - - case EOB_ACT_CONTINUE_SCAN: - yyg->yy_c_buf_p = - yyg->yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( yyscanner ); - - yy_cp = yyg->yy_c_buf_p; - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - yyg->yy_c_buf_p = - &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; - - yy_current_state = yy_get_previous_state( yyscanner ); - - yy_cp = yyg->yy_c_buf_p; - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - goto yy_find_action; - } - break; - } - - default: - YY_FATAL_ERROR( - "fatal flex scanner internal error--no action found" ); - } /* end of action switch */ - } /* end of scanning one token */ -} /* end of fts0tlex */ - -/* yy_get_next_buffer - try to read in a new buffer - * - * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file - */ -static int yy_get_next_buffer (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; - register char *source = yyg->yytext_ptr; - register int number_to_move, i; - int ret_val; - - if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) - YY_FATAL_ERROR( - "fatal flex scanner internal error--end of buffer missed" ); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) - { /* Don't try to fill the buffer, so this is an EOF. */ - if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) - { - /* We matched a single character, the EOB, so - * treat this as a final EOF. - */ - return EOB_ACT_END_OF_FILE; - } - - else - { - /* We matched some text prior to the EOB, first - * process it. - */ - return EOB_ACT_LAST_MATCH; - } - } - - /* Try to read more data. */ - - /* First move last chars to start of buffer. */ - number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1; - - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; - - else - { - int num_to_read =static_cast<int>( - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1); - - while ( num_to_read <= 0 ) - { /* Not enough room in the buffer - grow it. */ - - /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = YY_CURRENT_BUFFER; - - int yy_c_buf_p_offset = - (int) (yyg->yy_c_buf_p - b->yy_ch_buf); - - if ( b->yy_is_our_buffer ) - { - int new_size = static_cast<int>(b->yy_buf_size * 2); - - if ( new_size <= 0 ) - b->yy_buf_size += b->yy_buf_size / 8; - else - b->yy_buf_size *= 2; - - b->yy_ch_buf = (char *) - /* Include room in for 2 EOB chars. */ - fts0trealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner ); - } - else - /* Can't grow it, we don't own it. */ - b->yy_ch_buf = 0; - - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( - "fatal error - scanner input buffer overflow" ); - - yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; - - num_to_read = static_cast<int>( - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1); - - } - - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; - - /* Read in more data. */ - YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), - yyg->yy_n_chars, num_to_read); - - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - if ( yyg->yy_n_chars == 0 ) - { - if ( number_to_move == YY_MORE_ADJ ) - { - ret_val = EOB_ACT_END_OF_FILE; - fts0trestart(yyin ,yyscanner); - } - - else - { - ret_val = EOB_ACT_LAST_MATCH; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = - YY_BUFFER_EOF_PENDING; - } - } - - else - ret_val = EOB_ACT_CONTINUE_SCAN; - - if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { - /* Extend the array by 50%, plus the number we really need. */ - yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) fts0trealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner ); - if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); - } - - yyg->yy_n_chars += number_to_move; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; - - yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; - - return ret_val; -} - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - - static yy_state_type yy_get_previous_state (yyscan_t yyscanner) -{ - register yy_state_type yy_current_state; - register char *yy_cp; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - yy_current_state = yyg->yy_start; - - for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) - { - register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 17 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - } - - return yy_current_state; -} - -/* yy_try_NUL_trans - try to make a transition on the NUL character - * - * synopsis - * next_state = yy_try_NUL_trans( current_state ); - */ - static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) -{ - register int yy_is_jam; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ - register char *yy_cp = yyg->yy_c_buf_p; - - register YY_CHAR yy_c = 1; - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 17 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - yy_is_jam = (yy_current_state == 16); - - return yy_is_jam ? 0 : yy_current_state; -} - -#ifndef YY_NO_INPUT -#ifdef __cplusplus - static int yyinput (yyscan_t yyscanner) -#else - static int input (yyscan_t yyscanner) -#endif - -{ - int c; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - *yyg->yy_c_buf_p = yyg->yy_hold_char; - - if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) - { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) - /* This was really a NUL. */ - *yyg->yy_c_buf_p = '\0'; - - else - { /* need more input */ - int offset = yyg->yy_c_buf_p - yyg->yytext_ptr; - ++yyg->yy_c_buf_p; - - switch ( yy_get_next_buffer( yyscanner ) ) - { - case EOB_ACT_LAST_MATCH: - /* This happens because yy_g_n_b() - * sees that we've accumulated a - * token and flags that we need to - * try matching the token before - * proceeding. But for input(), - * there's no matching to consider. - * So convert the EOB_ACT_LAST_MATCH - * to EOB_ACT_END_OF_FILE. - */ - - /* Reset buffer status. */ - fts0trestart(yyin ,yyscanner); - - /*FALLTHROUGH*/ - - case EOB_ACT_END_OF_FILE: - { - if ( fts0twrap(yyscanner ) ) - return EOF; - - if ( ! yyg->yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; -#ifdef __cplusplus - return yyinput(yyscanner); -#else - return input(yyscanner); -#endif - } - - case EOB_ACT_CONTINUE_SCAN: - yyg->yy_c_buf_p = yyg->yytext_ptr + offset; - break; - } - } - } - - c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ - *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ - yyg->yy_hold_char = *++yyg->yy_c_buf_p; - - return c; -} -#endif /* ifndef YY_NO_INPUT */ - -/** Immediately switch to a different input stream. - * @param input_file A readable stream. - * @param yyscanner The scanner object. - * @note This function does not reset the start condition to @c INITIAL . - */ - void fts0trestart (FILE * input_file , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if ( ! YY_CURRENT_BUFFER ){ - fts0tensure_buffer_stack (yyscanner); - YY_CURRENT_BUFFER_LVALUE = - fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); - } - - fts0t_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner); - fts0t_load_buffer_state(yyscanner ); -} - -/** Switch to a different input buffer. - * @param new_buffer The new input buffer. - * @param yyscanner The scanner object. - */ - void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* TODO. We should be able to replace this entire function body - * with - * fts0tpop_buffer_state(); - * fts0tpush_buffer_state(new_buffer); - */ - fts0tensure_buffer_stack (yyscanner); - if ( YY_CURRENT_BUFFER == new_buffer ) - return; - - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *yyg->yy_c_buf_p = yyg->yy_hold_char; - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - YY_CURRENT_BUFFER_LVALUE = new_buffer; - fts0t_load_buffer_state(yyscanner ); - - /* We don't actually know whether we did this switch during - * EOF (fts0twrap()) processing, but the only time this flag - * is looked at is after fts0twrap() is called, so it's safe - * to go ahead and always set it. - */ - yyg->yy_did_buffer_switch_on_eof = 1; -} - -static void fts0t_load_buffer_state (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; - yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; - yyg->yy_hold_char = *yyg->yy_c_buf_p; -} - -/** Allocate and initialize an input buffer state. - * @param file A readable stream. - * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. - * @param yyscanner The scanner object. - * @return the allocated buffer state. - */ - YY_BUFFER_STATE fts0t_create_buffer (FILE * file, int size , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) fts0talloc(sizeof( struct yy_buffer_state ) ,yyscanner ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in fts0t_create_buffer()" ); - - b->yy_buf_size = size; - - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (char *) fts0talloc(b->yy_buf_size + 2 ,yyscanner ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in fts0t_create_buffer()" ); - - b->yy_is_our_buffer = 1; - - fts0t_init_buffer(b,file ,yyscanner); - - return b; -} - -/** Destroy the buffer. - * @param b a buffer created with fts0t_create_buffer() - * @param yyscanner The scanner object. - */ - void fts0t_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if ( ! b ) - return; - - if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ - YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; - - if ( b->yy_is_our_buffer ) - fts0tfree((void *) b->yy_ch_buf ,yyscanner ); - - fts0tfree((void *) b ,yyscanner ); -} - -/* Initializes or reinitializes a buffer. - * This function is sometimes called more than once on the same buffer, - * such as during a fts0trestart() or at EOF. - */ - static void fts0t_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) - -{ - int oerrno = errno; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - fts0t_flush_buffer(b ,yyscanner); - - b->yy_input_file = file; - b->yy_fill_buffer = 1; - - /* If b is the current buffer, then fts0t_init_buffer was _probably_ - * called from fts0trestart() or through yy_get_next_buffer. - * In that case, we don't want to reset the lineno or column. - */ - if (b != YY_CURRENT_BUFFER){ - b->yy_bs_lineno = 1; - b->yy_bs_column = 0; - } - - b->yy_is_interactive = 0; - - errno = oerrno; -} - -/** Discard all buffered characters. On the next scan, YY_INPUT will be called. - * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. - * @param yyscanner The scanner object. - */ - void fts0t_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if ( ! b ) - return; - - b->yy_n_chars = 0; - - /* We always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; - - b->yy_buf_pos = &b->yy_ch_buf[0]; - - b->yy_at_bol = 1; - b->yy_buffer_status = YY_BUFFER_NEW; - - if ( b == YY_CURRENT_BUFFER ) - fts0t_load_buffer_state(yyscanner ); -} - -/** Pushes the new state onto the stack. The new state becomes - * the current state. This function will allocate the stack - * if necessary. - * @param new_buffer The new state. - * @param yyscanner The scanner object. - */ -void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if (new_buffer == NULL) - return; - - fts0tensure_buffer_stack(yyscanner); - - /* This block is copied from fts0t_switch_to_buffer. */ - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *yyg->yy_c_buf_p = yyg->yy_hold_char; - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - /* Only push if top exists. Otherwise, replace top. */ - if (YY_CURRENT_BUFFER) - yyg->yy_buffer_stack_top++; - YY_CURRENT_BUFFER_LVALUE = new_buffer; - - /* copied from fts0t_switch_to_buffer. */ - fts0t_load_buffer_state(yyscanner ); - yyg->yy_did_buffer_switch_on_eof = 1; -} - -/** Removes and deletes the top of the stack, if present. - * The next element becomes the new top. - * @param yyscanner The scanner object. - */ -void fts0tpop_buffer_state (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if (!YY_CURRENT_BUFFER) - return; - - fts0t_delete_buffer(YY_CURRENT_BUFFER ,yyscanner); - YY_CURRENT_BUFFER_LVALUE = NULL; - if (yyg->yy_buffer_stack_top > 0) - --yyg->yy_buffer_stack_top; - - if (YY_CURRENT_BUFFER) { - fts0t_load_buffer_state(yyscanner ); - yyg->yy_did_buffer_switch_on_eof = 1; - } -} - -/* Allocates the stack if it does not exist. - * Guarantees space for at least one push. - */ -static void fts0tensure_buffer_stack (yyscan_t yyscanner) -{ - int num_to_alloc; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (!yyg->yy_buffer_stack) { - - /* First allocation is just for 2 elements, since we don't know if this - * scanner will even need a stack. We use 2 instead of 1 to avoid an - * immediate realloc on the next call. - */ - num_to_alloc = 1; - yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0talloc - (num_to_alloc * sizeof(struct yy_buffer_state*) - , yyscanner); - if ( ! yyg->yy_buffer_stack ) - YY_FATAL_ERROR( "out of dynamic memory in fts0tensure_buffer_stack()" ); - - memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); - - yyg->yy_buffer_stack_max = num_to_alloc; - yyg->yy_buffer_stack_top = 0; - return; - } - - if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ - - /* Increase the buffer to prepare for a possible push. */ - int grow_size = 8 /* arbitrary grow size */; - - num_to_alloc = static_cast<int>(yyg->yy_buffer_stack_max + grow_size); - yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0trealloc - (yyg->yy_buffer_stack, - num_to_alloc * sizeof(struct yy_buffer_state*) - , yyscanner); - if ( ! yyg->yy_buffer_stack ) - YY_FATAL_ERROR( "out of dynamic memory in fts0tensure_buffer_stack()" ); - - /* zero only the new slots.*/ - memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); - yyg->yy_buffer_stack_max = num_to_alloc; - } -} - -/** Setup the input buffer state to scan directly from a user-specified character buffer. - * @param base the character buffer - * @param size the size in bytes of the character buffer - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - */ -YY_BUFFER_STATE fts0t_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - - if ( size < 2 || - base[size-2] != YY_END_OF_BUFFER_CHAR || - base[size-1] != YY_END_OF_BUFFER_CHAR ) - /* They forgot to leave room for the EOB's. */ - return 0; - - b = (YY_BUFFER_STATE) fts0talloc(sizeof( struct yy_buffer_state ) ,yyscanner ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in fts0t_scan_buffer()" ); - - b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ - b->yy_buf_pos = b->yy_ch_buf = base; - b->yy_is_our_buffer = 0; - b->yy_input_file = 0; - b->yy_n_chars = static_cast<int>(b->yy_buf_size); - b->yy_is_interactive = 0; - b->yy_at_bol = 1; - b->yy_fill_buffer = 0; - b->yy_buffer_status = YY_BUFFER_NEW; - - fts0t_switch_to_buffer(b ,yyscanner ); - - return b; -} - -/** Setup the input buffer state to scan a string. The next call to fts0tlex() will - * scan from a @e copy of @a str. - * @param yystr a NUL-terminated string to scan - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - * @note If you want to scan bytes that may contain NUL values, then use - * fts0t_scan_bytes() instead. - */ -YY_BUFFER_STATE fts0t_scan_string (yyconst char * yystr , yyscan_t yyscanner) -{ - - return fts0t_scan_bytes(yystr,static_cast<int>(strlen(yystr)) ,yyscanner); -} - -/** Setup the input buffer state to scan the given bytes. The next call to fts0tlex() will - * scan from a @e copy of @a bytes. - * @param yybytes the byte buffer to scan - * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - */ -YY_BUFFER_STATE fts0t_scan_bytes (yyconst char * yybytes, int _yybytes_len , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - char *buf; - yy_size_t n; - int i; - - /* Get memory for full buffer, including space for trailing EOB's. */ - n = _yybytes_len + 2; - buf = (char *) fts0talloc(n ,yyscanner ); - if ( ! buf ) - YY_FATAL_ERROR( "out of dynamic memory in fts0t_scan_bytes()" ); - - for ( i = 0; i < _yybytes_len; ++i ) - buf[i] = yybytes[i]; - - buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; - - b = fts0t_scan_buffer(buf,n ,yyscanner); - if ( ! b ) - YY_FATAL_ERROR( "bad buffer in fts0t_scan_bytes()" ); - - /* It's okay to grow etc. this buffer, and we should throw it - * away when we're done. - */ - b->yy_is_our_buffer = 1; - - return b; -} - -#ifndef YY_EXIT_FAILURE -#define YY_EXIT_FAILURE 2 -#endif - -static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - (void) fprintf( stderr, "%s\n", msg ); - exit( YY_EXIT_FAILURE ); -} - -/* Redefine yyless() so it works in section 3 code. */ - -#undef yyless -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - yytext[yyleng] = yyg->yy_hold_char; \ - yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ - yyg->yy_hold_char = *yyg->yy_c_buf_p; \ - *yyg->yy_c_buf_p = '\0'; \ - yyleng = yyless_macro_arg; \ - } \ - while ( 0 ) - -/* Accessor methods (get/set functions) to struct members. */ - -/** Get the user-defined data for this scanner. - * @param yyscanner The scanner object. - */ -YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyextra; -} - -/** Get the current line number. - * @param yyscanner The scanner object. - */ -int fts0tget_lineno (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (! YY_CURRENT_BUFFER) - return 0; - - return yylineno; -} - -/** Get the current column number. - * @param yyscanner The scanner object. - */ -int fts0tget_column (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (! YY_CURRENT_BUFFER) - return 0; - - return yycolumn; -} - -/** Get the input stream. - * @param yyscanner The scanner object. - */ -FILE *fts0tget_in (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyin; -} - -/** Get the output stream. - * @param yyscanner The scanner object. - */ -FILE *fts0tget_out (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyout; -} - -/** Get the length of the current token. - * @param yyscanner The scanner object. - */ -int fts0tget_leng (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyleng; -} - -/** Get the current token. - * @param yyscanner The scanner object. - */ - -char *fts0tget_text (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yytext; -} - -/** Set the user-defined data. This data is never touched by the scanner. - * @param user_defined The data to be associated with this scanner. - * @param yyscanner The scanner object. - */ -void fts0tset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyextra = user_defined ; -} - -/** Set the current line number. - * @param line_number - * @param yyscanner The scanner object. - */ -void fts0tset_lineno (int line_number , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* lineno is only valid if an input buffer exists. */ - if (! YY_CURRENT_BUFFER ) - yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner); - - yylineno = line_number; -} - -/** Set the current column. - * @param line_number - * @param yyscanner The scanner object. - */ -void fts0tset_column (int column_no , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* column is only valid if an input buffer exists. */ - if (! YY_CURRENT_BUFFER ) - yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner); - - yycolumn = column_no; -} - -/** Set the input stream. This does not discard the current - * input buffer. - * @param in_str A readable stream. - * @param yyscanner The scanner object. - * @see fts0t_switch_to_buffer - */ -void fts0tset_in (FILE * in_str , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyin = in_str ; -} - -void fts0tset_out (FILE * out_str , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyout = out_str ; -} - -int fts0tget_debug (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yy_flex_debug; -} - -void fts0tset_debug (int bdebug , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yy_flex_debug = bdebug ; -} - -/* Accessor methods for yylval and yylloc */ - -/* User-visible API */ - -/* fts0tlex_init is special because it creates the scanner itself, so it is - * the ONLY reentrant function that doesn't take the scanner as the last argument. - * That's why we explicitly handle the declaration, instead of using our macros. - */ - -int fts0tlex_init(yyscan_t* ptr_yy_globals) - -{ - if (ptr_yy_globals == NULL){ - errno = EINVAL; - return 1; - } - - *ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), NULL ); - - if (*ptr_yy_globals == NULL){ - errno = ENOMEM; - return 1; - } - - /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ - memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - - return yy_init_globals ( *ptr_yy_globals ); -} - -/* fts0tlex_init_extra has the same functionality as fts0tlex_init, but follows the - * convention of taking the scanner as the last argument. Note however, that - * this is a *pointer* to a scanner, as it will be allocated by this call (and - * is the reason, too, why this function also must handle its own declaration). - * The user defined value in the first argument will be available to fts0talloc in - * the yyextra field. - */ - -int fts0tlex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals ) - -{ - struct yyguts_t dummy_yyguts; - - fts0tset_extra (yy_user_defined, &dummy_yyguts); - - if (ptr_yy_globals == NULL){ - errno = EINVAL; - return 1; - } - - *ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); - - if (*ptr_yy_globals == NULL){ - errno = ENOMEM; - return 1; - } - - /* By setting to 0xAA, we expose bugs in - yy_init_globals. Leave at 0x00 for releases. */ - memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - - fts0tset_extra (yy_user_defined, *ptr_yy_globals); - - return yy_init_globals ( *ptr_yy_globals ); -} - -static int yy_init_globals (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - /* Initialization is the same as for the non-reentrant scanner. - * This function is called from fts0tlex_destroy(), so don't allocate here. - */ - - yyg->yy_buffer_stack = 0; - yyg->yy_buffer_stack_top = 0; - yyg->yy_buffer_stack_max = 0; - yyg->yy_c_buf_p = (char *) 0; - yyg->yy_init = 0; - yyg->yy_start = 0; - - yyg->yy_start_stack_ptr = 0; - yyg->yy_start_stack_depth = 0; - yyg->yy_start_stack = NULL; - -/* Defined in main.c */ -#ifdef YY_STDINIT - yyin = stdin; - yyout = stdout; -#else - yyin = (FILE *) 0; - yyout = (FILE *) 0; -#endif - - /* For future reference: Set errno on error, since we are called by - * fts0tlex_init() - */ - return 0; -} - -/* fts0tlex_destroy is for both reentrant and non-reentrant scanners. */ -int fts0tlex_destroy (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* Pop the buffer stack, destroying each element. */ - while(YY_CURRENT_BUFFER){ - fts0t_delete_buffer(YY_CURRENT_BUFFER ,yyscanner ); - YY_CURRENT_BUFFER_LVALUE = NULL; - fts0tpop_buffer_state(yyscanner); - } - - /* Destroy the stack itself. */ - fts0tfree(yyg->yy_buffer_stack ,yyscanner); - yyg->yy_buffer_stack = NULL; - - /* Destroy the start condition stack. */ - fts0tfree(yyg->yy_start_stack ,yyscanner ); - yyg->yy_start_stack = NULL; - - /* Reset the globals. This is important in a non-reentrant scanner so the next time - * fts0tlex() is called, initialization will occur. */ - yy_init_globals( yyscanner); - - /* Destroy the main struct (reentrant only). */ - fts0tfree ( yyscanner , yyscanner ); - yyscanner = NULL; - return 0; -} - -/* - * Internal utility routines. - */ - -#ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - register int i; - for ( i = 0; i < n; ++i ) - s1[i] = s2[i]; -} -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - register int n; - for ( n = 0; s[n]; ++n ) - ; - - return n; -} -#endif - -void *fts0talloc (yy_size_t size , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - return (void *) malloc( size ); -} - -void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - /* The cast to (char *) in the following accommodates both - * implementations that use char* generic pointers, and those - * that use void* generic pointers. It works with the latter - * because both ANSI C and C++ allow castless assignment from - * any pointer type to void*, and deal with argument conversions - * as though doing an assignment. - */ - return (void *) realloc( (char *) ptr, size ); -} - -void fts0tfree (void * ptr , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused))) -{ - free( (char *) ptr ); /* see fts0trealloc() for (char *) cast */ -} - -#define YYTABLES_NAME "yytables" - -#line 68 "fts0tlex.l" - - - diff --git a/storage/xtradb/fts/fts0tlex.l b/storage/xtradb/fts/fts0tlex.l deleted file mode 100644 index 4f55a83afe5..00000000000 --- a/storage/xtradb/fts/fts0tlex.l +++ /dev/null @@ -1,68 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/** - * @file fts/fts0tlex.l - * FTS parser lexical analyzer - * - * Created 2007/5/9 Sunny Bains - */ - -%{ - -#include "fts0ast.h" -#include "fts0pars.h" - -/* Required for reentrant parser */ -#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner) - -%} - -%option noinput -%option nounput -%option noyywrap -%option nostdinit -%option reentrant -%option never-interactive - - -%% - -[\t ]+ /* Ignore whitespace */ ; - -[*] { - val->oper = fts0tget_text(yyscanner)[0]; - - return(val->oper); -} - -\"[^\"\n]*\" { - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner)); - - return(FTS_TEXT); -} - -[^" \n\%]* { - val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner)); - - return(FTS_TERM); -} -. ; -\n - -%% diff --git a/storage/xtradb/fts/make_parser.sh b/storage/xtradb/fts/make_parser.sh deleted file mode 100755 index 52b63eff674..00000000000 --- a/storage/xtradb/fts/make_parser.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -# -# This program is free software; you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free Software -# Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - - -TMPF=t.$$ - -make -f Makefile.query - -echo '#include "univ.i"' > $TMPF - -# This is to avoid compiler warning about unused parameters. -# FIXME: gcc extension "MY_ATTRIBUTE" causing compilation errors on windows -# platform. Quote them out for now. -sed -e ' -s/^\(static.*void.*yy_fatal_error.*msg.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; -s/^\(static.*void.*yy_flex_strncpy.*n.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; -s/^\(static.*int.*yy_flex_strlen.*s.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; -s/^\(\(static\|void\).*fts0[bt]alloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; -s/^\(\(static\|void\).*fts0[bt]realloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; -s/^\(\(static\|void\).*fts0[bt]free.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; -' < fts0blex.cc >> $TMPF - -mv $TMPF fts0blex.cc - -echo '#include "univ.i"' > $TMPF - -sed -e ' -s/^\(static.*void.*yy_fatal_error.*msg.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; -s/^\(static.*void.*yy_flex_strncpy.*n.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; -s/^\(static.*int.*yy_flex_strlen.*s.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/; -s/^\(\(static\|void\).*fts0[bt]alloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; -s/^\(\(static\|void\).*fts0[bt]realloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; -s/^\(\(static\|void\).*fts0[bt]free.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/; -' < fts0tlex.cc >> $TMPF - -mv $TMPF fts0tlex.cc diff --git a/storage/xtradb/fut/fut0fut.cc b/storage/xtradb/fut/fut0fut.cc deleted file mode 100644 index 9bb1c512182..00000000000 --- a/storage/xtradb/fut/fut0fut.cc +++ /dev/null @@ -1,31 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file fut/fut0fut.cc -File-based utilities - -Created 12/13/1995 Heikki Tuuri -***********************************************************************/ - -#include "fut0fut.h" - -#ifdef UNIV_NONINL -#include "fut0fut.ic" -#endif - diff --git a/storage/xtradb/fut/fut0lst.cc b/storage/xtradb/fut/fut0lst.cc deleted file mode 100644 index dd3fa1238d9..00000000000 --- a/storage/xtradb/fut/fut0lst.cc +++ /dev/null @@ -1,432 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file fut/fut0lst.cc -File-based list utilities - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#include "fut0lst.h" - -#ifdef UNIV_NONINL -#include "fut0lst.ic" -#endif - -#include "buf0buf.h" -#include "page0page.h" - -/********************************************************************//** -Adds a node to an empty list. */ -static -void -flst_add_to_empty( -/*==============*/ - flst_base_node_t* base, /*!< in: pointer to base node of - empty list */ - flst_node_t* node, /*!< in: node to add */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ulint space; - fil_addr_t node_addr; - ulint len; - - ut_ad(mtr && base && node); - ut_ad(base != node); - ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX)); - len = flst_get_len(base, mtr); - ut_a(len == 0); - - buf_ptr_get_fsp_addr(node, &space, &node_addr); - - /* Update first and last fields of base node */ - flst_write_addr(base + FLST_FIRST, node_addr, mtr); - flst_write_addr(base + FLST_LAST, node_addr, mtr); - - /* Set prev and next fields of node to add */ - flst_write_addr(node + FLST_PREV, fil_addr_null, mtr); - flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr); - - /* Update len of base node */ - mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); -} - -/********************************************************************//** -Adds a node as the last node in a list. */ -UNIV_INTERN -void -flst_add_last( -/*==========*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node, /*!< in: node to add */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ulint space; - fil_addr_t node_addr; - ulint len; - fil_addr_t last_addr; - flst_node_t* last_node; - - ut_ad(mtr && base && node); - ut_ad(base != node); - ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX)); - len = flst_get_len(base, mtr); - last_addr = flst_get_last(base, mtr); - - buf_ptr_get_fsp_addr(node, &space, &node_addr); - - /* If the list is not empty, call flst_insert_after */ - if (len != 0) { - if (last_addr.page == node_addr.page) { - last_node = page_align(node) + last_addr.boffset; - } else { - ulint zip_size = fil_space_get_zip_size(space); - - last_node = fut_get_ptr(space, zip_size, last_addr, - RW_X_LATCH, mtr); - } - - flst_insert_after(base, last_node, node, mtr); - } else { - /* else call flst_add_to_empty */ - flst_add_to_empty(base, node, mtr); - } -} - -/********************************************************************//** -Adds a node as the first node in a list. */ -UNIV_INTERN -void -flst_add_first( -/*===========*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node, /*!< in: node to add */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ulint space; - fil_addr_t node_addr; - ulint len; - fil_addr_t first_addr; - flst_node_t* first_node; - - ut_ad(mtr && base && node); - ut_ad(base != node); - ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX)); - len = flst_get_len(base, mtr); - first_addr = flst_get_first(base, mtr); - - buf_ptr_get_fsp_addr(node, &space, &node_addr); - - /* If the list is not empty, call flst_insert_before */ - if (len != 0) { - if (first_addr.page == node_addr.page) { - first_node = page_align(node) + first_addr.boffset; - } else { - ulint zip_size = fil_space_get_zip_size(space); - - first_node = fut_get_ptr(space, zip_size, first_addr, - RW_X_LATCH, mtr); - } - - flst_insert_before(base, node, first_node, mtr); - } else { - /* else call flst_add_to_empty */ - flst_add_to_empty(base, node, mtr); - } -} - -/********************************************************************//** -Inserts a node after another in a list. */ -UNIV_INTERN -void -flst_insert_after( -/*==============*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node1, /*!< in: node to insert after */ - flst_node_t* node2, /*!< in: node to add */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ulint space; - fil_addr_t node1_addr; - fil_addr_t node2_addr; - flst_node_t* node3; - fil_addr_t node3_addr; - ulint len; - - ut_ad(mtr && node1 && node2 && base); - ut_ad(base != node1); - ut_ad(base != node2); - ut_ad(node2 != node1); - ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX)); - - buf_ptr_get_fsp_addr(node1, &space, &node1_addr); - buf_ptr_get_fsp_addr(node2, &space, &node2_addr); - - node3_addr = flst_get_next_addr(node1, mtr); - - /* Set prev and next fields of node2 */ - flst_write_addr(node2 + FLST_PREV, node1_addr, mtr); - flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr); - - if (!fil_addr_is_null(node3_addr)) { - /* Update prev field of node3 */ - ulint zip_size = fil_space_get_zip_size(space); - - node3 = fut_get_ptr(space, zip_size, - node3_addr, RW_X_LATCH, mtr); - flst_write_addr(node3 + FLST_PREV, node2_addr, mtr); - } else { - /* node1 was last in list: update last field in base */ - flst_write_addr(base + FLST_LAST, node2_addr, mtr); - } - - /* Set next field of node1 */ - flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr); - - /* Update len of base node */ - len = flst_get_len(base, mtr); - mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); -} - -/********************************************************************//** -Inserts a node before another in a list. */ -UNIV_INTERN -void -flst_insert_before( -/*===============*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node2, /*!< in: node to insert */ - flst_node_t* node3, /*!< in: node to insert before */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ulint space; - flst_node_t* node1; - fil_addr_t node1_addr; - fil_addr_t node2_addr; - fil_addr_t node3_addr; - ulint len; - - ut_ad(mtr && node2 && node3 && base); - ut_ad(base != node2); - ut_ad(base != node3); - ut_ad(node2 != node3); - ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX)); - - buf_ptr_get_fsp_addr(node2, &space, &node2_addr); - buf_ptr_get_fsp_addr(node3, &space, &node3_addr); - - node1_addr = flst_get_prev_addr(node3, mtr); - - /* Set prev and next fields of node2 */ - flst_write_addr(node2 + FLST_PREV, node1_addr, mtr); - flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr); - - if (!fil_addr_is_null(node1_addr)) { - ulint zip_size = fil_space_get_zip_size(space); - /* Update next field of node1 */ - node1 = fut_get_ptr(space, zip_size, node1_addr, - RW_X_LATCH, mtr); - flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr); - } else { - /* node3 was first in list: update first field in base */ - flst_write_addr(base + FLST_FIRST, node2_addr, mtr); - } - - /* Set prev field of node3 */ - flst_write_addr(node3 + FLST_PREV, node2_addr, mtr); - - /* Update len of base node */ - len = flst_get_len(base, mtr); - mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); -} - -/********************************************************************//** -Removes a node. */ -UNIV_INTERN -void -flst_remove( -/*========*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node2, /*!< in: node to remove */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ulint space; - ulint zip_size; - flst_node_t* node1; - fil_addr_t node1_addr; - fil_addr_t node2_addr; - flst_node_t* node3; - fil_addr_t node3_addr; - ulint len; - - ut_ad(mtr && node2 && base); - ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX)); - - buf_ptr_get_fsp_addr(node2, &space, &node2_addr); - zip_size = fil_space_get_zip_size(space); - - node1_addr = flst_get_prev_addr(node2, mtr); - node3_addr = flst_get_next_addr(node2, mtr); - - if (!fil_addr_is_null(node1_addr)) { - - /* Update next field of node1 */ - - if (node1_addr.page == node2_addr.page) { - - node1 = page_align(node2) + node1_addr.boffset; - } else { - node1 = fut_get_ptr(space, zip_size, - node1_addr, RW_X_LATCH, mtr); - } - - ut_ad(node1 != node2); - - flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr); - } else { - /* node2 was first in list: update first field in base */ - flst_write_addr(base + FLST_FIRST, node3_addr, mtr); - } - - if (!fil_addr_is_null(node3_addr)) { - /* Update prev field of node3 */ - - if (node3_addr.page == node2_addr.page) { - - node3 = page_align(node2) + node3_addr.boffset; - } else { - node3 = fut_get_ptr(space, zip_size, - node3_addr, RW_X_LATCH, mtr); - } - - ut_ad(node2 != node3); - - flst_write_addr(node3 + FLST_PREV, node1_addr, mtr); - } else { - /* node2 was last in list: update last field in base */ - flst_write_addr(base + FLST_LAST, node1_addr, mtr); - } - - /* Update len of base node */ - len = flst_get_len(base, mtr); - ut_ad(len > 0); - - mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr); -} - -/********************************************************************//** -Validates a file-based list. -@return TRUE if ok */ -UNIV_INTERN -ibool -flst_validate( -/*==========*/ - const flst_base_node_t* base, /*!< in: pointer to base node of list */ - mtr_t* mtr1) /*!< in: mtr */ -{ - ulint space; - ulint zip_size; - const flst_node_t* node; - fil_addr_t node_addr; - fil_addr_t base_addr; - ulint len; - ulint i; - mtr_t mtr2; - - ut_ad(base); - ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX)); - - /* We use two mini-transaction handles: the first is used to - lock the base node, and prevent other threads from modifying the - list. The second is used to traverse the list. We cannot run the - second mtr without committing it at times, because if the list - is long, then the x-locked pages could fill the buffer resulting - in a deadlock. */ - - /* Find out the space id */ - buf_ptr_get_fsp_addr(base, &space, &base_addr); - zip_size = fil_space_get_zip_size(space); - - len = flst_get_len(base, mtr1); - node_addr = flst_get_first(base, mtr1); - - for (i = 0; i < len; i++) { - mtr_start(&mtr2); - - node = fut_get_ptr(space, zip_size, - node_addr, RW_X_LATCH, &mtr2); - node_addr = flst_get_next_addr(node, &mtr2); - - mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer - becoming full */ - } - - ut_a(fil_addr_is_null(node_addr)); - - node_addr = flst_get_last(base, mtr1); - - for (i = 0; i < len; i++) { - mtr_start(&mtr2); - - node = fut_get_ptr(space, zip_size, - node_addr, RW_X_LATCH, &mtr2); - node_addr = flst_get_prev_addr(node, &mtr2); - - mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer - becoming full */ - } - - ut_a(fil_addr_is_null(node_addr)); - - return(TRUE); -} - -/********************************************************************//** -Prints info of a file-based list. */ -UNIV_INTERN -void -flst_print( -/*=======*/ - const flst_base_node_t* base, /*!< in: pointer to base node of list */ - mtr_t* mtr) /*!< in: mtr */ -{ - const buf_frame_t* frame; - ulint len; - - ut_ad(base && mtr); - ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); - frame = page_align((byte*) base); - - len = flst_get_len(base, mtr); - - fprintf(stderr, - "FILE-BASED LIST:\n" - "Base node in space %lu page %lu byte offset %lu; len %lu\n", - (ulong) page_get_space_id(frame), - (ulong) page_get_page_no(frame), - (ulong) page_offset(base), (ulong) len); -} diff --git a/storage/xtradb/ha/ha0ha.cc b/storage/xtradb/ha/ha0ha.cc deleted file mode 100644 index 3674260f173..00000000000 --- a/storage/xtradb/ha/ha0ha.cc +++ /dev/null @@ -1,528 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file ha/ha0ha.cc -The hash table with external chains - -Created 8/22/1994 Heikki Tuuri -*************************************************************************/ - -#include "ha0ha.h" -#ifdef UNIV_NONINL -#include "ha0ha.ic" -#endif - -#ifndef UNIV_HOTBACKUP -#ifdef UNIV_DEBUG -# include "buf0buf.h" -#endif /* UNIV_DEBUG */ -# include "btr0sea.h" -#include "page0page.h" - -/*************************************************************//** -Creates a hash table with at least n array cells. The actual number -of cells is chosen to be a prime number slightly bigger than n. -@return own: created table */ -UNIV_INTERN -hash_table_t* -ha_create_func( -/*===========*/ - ulint n, /*!< in: number of array cells */ -#ifdef UNIV_SYNC_DEBUG - ulint sync_level, /*!< in: level of the mutexes or rw_locks - in the latching order: this is used in the - debug version */ -#endif /* UNIV_SYNC_DEBUG */ - ulint n_sync_obj, /*!< in: number of mutexes or rw_locks - to protect the hash table: must be a - power of 2, or 0 */ - ulint type) /*!< in: type of datastructure for which - the memory heap is going to be used e.g.: - MEM_HEAP_FOR_BTR_SEARCH or - MEM_HEAP_FOR_PAGE_HASH */ -{ - hash_table_t* table; - ulint i; - - ut_a(type == MEM_HEAP_FOR_BTR_SEARCH - || type == MEM_HEAP_FOR_PAGE_HASH); - - ut_ad(ut_is_2pow(n_sync_obj)); - table = hash_create(n); - - /* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail, - but in practise it never should in this case, hence the asserts. */ - - if (n_sync_obj == 0) { - table->heap = mem_heap_create_typed( - ut_min(4096, MEM_MAX_ALLOC_IN_BUF), type); - ut_a(table->heap); - - return(table); - } - - if (type == MEM_HEAP_FOR_PAGE_HASH) { - /* We create a hash table protected by rw_locks for - buf_pool->page_hash. */ - hash_create_sync_obj(table, HASH_TABLE_SYNC_RW_LOCK, - n_sync_obj, sync_level); - } else { - hash_create_sync_obj(table, HASH_TABLE_SYNC_MUTEX, - n_sync_obj, sync_level); - } - - table->heaps = static_cast<mem_heap_t**>( - mem_alloc(n_sync_obj * sizeof(void*))); - - for (i = 0; i < n_sync_obj; i++) { - table->heaps[i] = mem_heap_create_typed(4096, type); - ut_a(table->heaps[i]); - } - - return(table); -} - -#ifdef UNIV_SYNC_DEBUG -/*************************************************************//** -Verifies that the specified hash table is a part of adaptive hash index and -that its corresponding latch is X-latched by the current thread. */ -static -bool -ha_assert_btr_x_locked( -/*===================*/ - const hash_table_t* table) /*!<in: hash table to check */ -{ - ulint i; - - ut_ad(table->adaptive); - - for (i = 0; i < btr_search_index_num; i++) { - if (btr_search_sys->hash_tables[i] == table) { - break; - } - } - - ut_ad(i < btr_search_index_num); - ut_ad(rw_lock_own(&btr_search_latch_arr[i], RW_LOCK_EX)); - - return(true); -} -#endif /* UNIV_SYNC_DEBUG */ - -/*************************************************************//** -Empties a hash table and frees the memory heaps. */ -UNIV_INTERN -void -ha_clear( -/*=====*/ - hash_table_t* table) /*!< in, own: hash table */ -{ - ulint i; - ulint n; - - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!table->adaptive || ha_assert_btr_x_locked(table)); -#endif /* UNIV_SYNC_DEBUG */ - - /* Free the memory heaps. */ - n = table->n_sync_obj; - - for (i = 0; i < n; i++) { - mem_heap_free(table->heaps[i]); - } - - if (table->heaps) { - mem_free(table->heaps); - } - - switch (table->type) { - case HASH_TABLE_SYNC_MUTEX: - for (ulint i = 0; i < table->n_sync_obj; i++) - mutex_free(table->sync_obj.mutexes + i); - mem_free(table->sync_obj.mutexes); - table->sync_obj.mutexes = NULL; - break; - - case HASH_TABLE_SYNC_RW_LOCK: - for (ulint i = 0; i < table->n_sync_obj; i++) - rw_lock_free(table->sync_obj.rw_locks + i); - mem_free(table->sync_obj.rw_locks); - table->sync_obj.rw_locks = NULL; - break; - - case HASH_TABLE_SYNC_NONE: - /* do nothing */ - break; - } - - table->n_sync_obj = 0; - table->type = HASH_TABLE_SYNC_NONE; - - - /* Clear the hash table. */ - n = hash_get_n_cells(table); - - for (i = 0; i < n; i++) { - hash_get_nth_cell(table, i)->node = NULL; - } -} - -/*************************************************************//** -Inserts an entry into a hash table. If an entry with the same fold number -is found, its node is updated to point to the new data, and no new node -is inserted. If btr_search_enabled is set to FALSE, we will only allow -updating existing nodes, but no new node is allowed to be added. -@return TRUE if succeed, FALSE if no more memory could be allocated */ -UNIV_INTERN -ibool -ha_insert_for_fold_func( -/*====================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold, /*!< in: folded value of data; if a node with - the same fold value already exists, it is - updated to point to the same data, and no new - node is created! */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - buf_block_t* block, /*!< in: buffer block containing the data */ -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - const rec_t* data) /*!< in: data, must not be NULL */ -{ - hash_cell_t* cell; - ha_node_t* node; - ha_node_t* prev_node; - ulint hash; - - ut_ad(data); - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(block->frame == page_align(data)); -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - hash_assert_can_modify(table, fold); - ut_ad(btr_search_enabled); - - hash = hash_calc_hash(fold, table); - - cell = hash_get_nth_cell(table, hash); - - prev_node = static_cast<ha_node_t*>(cell->node); - - while (prev_node != NULL) { - if (prev_node->fold == fold) { -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (table->adaptive) { - buf_block_t* prev_block = prev_node->block; - ut_a(prev_block->frame - == page_align(prev_node->data)); - ut_a(prev_block->n_pointers > 0); - prev_block->n_pointers--; - block->n_pointers++; - } - - prev_node->block = block; -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - prev_node->data = data; - - return(TRUE); - } - - prev_node = prev_node->next; - } - - /* We have to allocate a new chain node */ - - node = static_cast<ha_node_t*>( - mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t))); - - if (node == NULL) { - /* It was a btr search type memory heap and at the moment - no more memory could be allocated: return */ - - ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH); - - return(FALSE); - } - - ha_node_set_data(node, block, data); - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (table->adaptive) { - block->n_pointers++; - } -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - - node->fold = fold; - - node->next = NULL; - - prev_node = static_cast<ha_node_t*>(cell->node); - - if (prev_node == NULL) { - - cell->node = node; - - return(TRUE); - } - - while (prev_node->next != NULL) { - - prev_node = prev_node->next; - } - - prev_node->next = node; - - return(TRUE); -} - -/***********************************************************//** -Deletes a hash node. */ -UNIV_INTERN -void -ha_delete_hash_node( -/*================*/ - hash_table_t* table, /*!< in: hash table */ - ha_node_t* del_node) /*!< in: node to be deleted */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); -#ifdef UNIV_SYNC_DEBUG - ut_ad(ha_assert_btr_x_locked(table)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(btr_search_enabled); -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (table->adaptive) { - ut_a(del_node->block->frame = page_align(del_node->data)); - ut_a(del_node->block->n_pointers > 0); - del_node->block->n_pointers--; - } -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - - HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node); -} - -/*********************************************************//** -Looks for an element when we know the pointer to the data, and updates -the pointer to data, if found. -@return TRUE if found */ -UNIV_INTERN -ibool -ha_search_and_update_if_found_func( -/*===============================*/ - hash_table_t* table, /*!< in/out: hash table */ - ulint fold, /*!< in: folded value of the searched data */ - const rec_t* data, /*!< in: pointer to the data */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - buf_block_t* new_block,/*!< in: block containing new_data */ -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - const rec_t* new_data)/*!< in: new pointer to the data */ -{ - ha_node_t* node; - - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - hash_assert_can_modify(table, fold); -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(new_block->frame == page_align(new_data)); -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ -#ifdef UNIV_SYNC_DEBUG - ut_ad(ha_assert_btr_x_locked(table)); -#endif /* UNIV_SYNC_DEBUG */ - - if (!btr_search_enabled) { - return(FALSE); - } - - node = ha_search_with_data(table, fold, data); - - if (node) { -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (table->adaptive) { - ut_a(node->block->n_pointers > 0); - node->block->n_pointers--; - new_block->n_pointers++; - } - - node->block = new_block; -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - node->data = new_data; - - return(TRUE); - } - - return(FALSE); -} - -/*****************************************************************//** -Removes from the chain determined by fold all nodes whose data pointer -points to the page given. */ -UNIV_INTERN -void -ha_remove_all_nodes_to_page( -/*========================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold, /*!< in: fold value */ - const page_t* page) /*!< in: buffer page */ -{ - ha_node_t* node; - - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - hash_assert_can_modify(table, fold); - ut_ad(btr_search_enabled); - - node = ha_chain_get_first(table, fold); - - while (node) { - if (page_align(ha_node_get_data(node)) == page) { - - /* Remove the hash node */ - - ha_delete_hash_node(table, node); - - /* Start again from the first node in the chain - because the deletion may compact the heap of - nodes and move other nodes! */ - - node = ha_chain_get_first(table, fold); - } else { - node = ha_chain_get_next(node); - } - } -#ifdef UNIV_DEBUG - /* Check that all nodes really got deleted */ - - node = ha_chain_get_first(table, fold); - - while (node) { - ut_a(page_align(ha_node_get_data(node)) != page); - - node = ha_chain_get_next(node); - } -#endif -} - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG -/*************************************************************//** -Validates a given range of the cells in hash table. -@return TRUE if ok */ -UNIV_INTERN -ibool -ha_validate( -/*========*/ - hash_table_t* table, /*!< in: hash table */ - ulint start_index, /*!< in: start index */ - ulint end_index) /*!< in: end index */ -{ - ibool ok = TRUE; - ulint i; - - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_a(start_index <= end_index); - ut_a(start_index < hash_get_n_cells(table)); - ut_a(end_index < hash_get_n_cells(table)); - - for (i = start_index; i <= end_index; i++) { - ha_node_t* node; - hash_cell_t* cell; - - cell = hash_get_nth_cell(table, i); - - for (node = static_cast<ha_node_t*>(cell->node); - node != 0; - node = node->next) { - - if (hash_calc_hash(node->fold, table) != i) { - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: Error: hash table node" - " fold value %lu does not\n" - "InnoDB: match the cell number %lu.\n", - (ulong) node->fold, (ulong) i); - - ok = FALSE; - } - } - } - - return(ok); -} -#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ - -/*************************************************************//** -Prints info of a hash table. */ -UNIV_INTERN -void -ha_print_info( -/*==========*/ - FILE* file, /*!< in: file where to print */ - hash_table_t* table) /*!< in: hash table */ -{ -#ifdef UNIV_DEBUG -/* Some of the code here is disabled for performance reasons in production -builds, see http://bugs.mysql.com/36941 */ -#define PRINT_USED_CELLS -#endif /* UNIV_DEBUG */ - -#ifdef PRINT_USED_CELLS - hash_cell_t* cell; - ulint cells = 0; - ulint i; -#endif /* PRINT_USED_CELLS */ - ulint n_bufs; - - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); -#ifdef PRINT_USED_CELLS - for (i = 0; i < hash_get_n_cells(table); i++) { - - cell = hash_get_nth_cell(table, i); - - if (cell->node) { - - cells++; - } - } -#endif /* PRINT_USED_CELLS */ - - fprintf(file, "Hash table size %lu", - (ulong) hash_get_n_cells(table)); - -#ifdef PRINT_USED_CELLS - fprintf(file, ", used cells %lu", (ulong) cells); -#endif /* PRINT_USED_CELLS */ - - if (table->heaps == NULL && table->heap != NULL) { - - /* This calculation is intended for the adaptive hash - index: how many buffer frames we have reserved? */ - - n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1; - - if (table->heap->free_block) { - n_bufs++; - } - - fprintf(file, ", node heap has %lu buffer(s)\n", - (ulong) n_bufs); - } -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/ha/ha0storage.cc b/storage/xtradb/ha/ha0storage.cc deleted file mode 100644 index 6820591f316..00000000000 --- a/storage/xtradb/ha/ha0storage.cc +++ /dev/null @@ -1,184 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file ha/ha0storage.cc -Hash storage. -Provides a data structure that stores chunks of data in -its own storage, avoiding duplicates. - -Created September 22, 2007 Vasil Dimov -*******************************************************/ - -#include "univ.i" -#include "ha0storage.h" -#include "hash0hash.h" -#include "mem0mem.h" -#include "ut0rnd.h" - -#ifdef UNIV_NONINL -#include "ha0storage.ic" -#endif - -/*******************************************************************//** -Retrieves a data from a storage. If it is present, a pointer to the -stored copy of data is returned, otherwise NULL is returned. */ -static -const void* -ha_storage_get( -/*===========*/ - ha_storage_t* storage, /*!< in: hash storage */ - const void* data, /*!< in: data to check for */ - ulint data_len) /*!< in: data length */ -{ - ha_storage_node_t* node; - ulint fold; - - /* avoid repetitive calls to ut_fold_binary() in the HASH_SEARCH - macro */ - fold = ut_fold_binary(static_cast<const byte*>(data), data_len); - -#define IS_FOUND \ - node->data_len == data_len && memcmp(node->data, data, data_len) == 0 - - HASH_SEARCH( - next, /* node->"next" */ - storage->hash, /* the hash table */ - fold, /* key */ - ha_storage_node_t*, /* type of node->next */ - node, /* auxiliary variable */ - , /* assertion */ - IS_FOUND); /* search criteria */ - - if (node == NULL) { - - return(NULL); - } - /* else */ - - return(node->data); -} - -/*******************************************************************//** -Copies data into the storage and returns a pointer to the copy. If the -same data chunk is already present, then pointer to it is returned. -Data chunks are considered to be equal if len1 == len2 and -memcmp(data1, data2, len1) == 0. If "data" is not present (and thus -data_len bytes need to be allocated) and the size of storage is going to -become more than "memlim" then "data" is not added and NULL is returned. -To disable this behavior "memlim" can be set to 0, which stands for -"no limit". */ -UNIV_INTERN -const void* -ha_storage_put_memlim( -/*==================*/ - ha_storage_t* storage, /*!< in/out: hash storage */ - const void* data, /*!< in: data to store */ - ulint data_len, /*!< in: data length */ - ulint memlim) /*!< in: memory limit to obey */ -{ - void* raw; - ha_storage_node_t* node; - const void* data_copy; - ulint fold; - - /* check if data chunk is already present */ - data_copy = ha_storage_get(storage, data, data_len); - if (data_copy != NULL) { - - return(data_copy); - } - - /* not present */ - - /* check if we are allowed to allocate data_len bytes */ - if (memlim > 0 - && ha_storage_get_size(storage) + data_len > memlim) { - - return(NULL); - } - - /* we put the auxiliary node struct and the data itself in one - continuous block */ - raw = mem_heap_alloc(storage->heap, - sizeof(ha_storage_node_t) + data_len); - - node = (ha_storage_node_t*) raw; - data_copy = (byte*) raw + sizeof(*node); - - memcpy((byte*) raw + sizeof(*node), data, data_len); - - node->data_len = data_len; - node->data = data_copy; - - /* avoid repetitive calls to ut_fold_binary() in the HASH_INSERT - macro */ - fold = ut_fold_binary(static_cast<const byte*>(data), data_len); - - HASH_INSERT( - ha_storage_node_t, /* type used in the hash chain */ - next, /* node->"next" */ - storage->hash, /* the hash table */ - fold, /* key */ - node); /* add this data to the hash */ - - /* the output should not be changed because it will spoil the - hash table */ - return(data_copy); -} - -#ifdef UNIV_COMPILE_TEST_FUNCS - -void -test_ha_storage() -{ - ha_storage_t* storage; - char buf[1024]; - int i; - const void* stored[256]; - const void* p; - - storage = ha_storage_create(0, 0); - - for (i = 0; i < 256; i++) { - - memset(buf, i, sizeof(buf)); - stored[i] = ha_storage_put(storage, buf, sizeof(buf)); - } - - //ha_storage_empty(&storage); - - for (i = 255; i >= 0; i--) { - - memset(buf, i, sizeof(buf)); - p = ha_storage_put(storage, buf, sizeof(buf)); - - if (p != stored[i]) { - - fprintf(stderr, "ha_storage_put() returned %p " - "instead of %p, i=%d\n", p, stored[i], i); - return; - } - } - - fprintf(stderr, "all ok\n"); - - ha_storage_free(storage); -} - -#endif /* UNIV_COMPILE_TEST_FUNCS */ diff --git a/storage/xtradb/ha/hash0hash.cc b/storage/xtradb/ha/hash0hash.cc deleted file mode 100644 index 6f5b98e5e98..00000000000 --- a/storage/xtradb/ha/hash0hash.cc +++ /dev/null @@ -1,403 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file ha/hash0hash.cc -The simple hash table utility - -Created 5/20/1997 Heikki Tuuri -*******************************************************/ - -#include "hash0hash.h" -#ifdef UNIV_NONINL -#include "hash0hash.ic" -#endif - -#include "mem0mem.h" - -#ifndef UNIV_HOTBACKUP - -# ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t hash_table_mutex_key; -# endif /* UNIV_PFS_MUTEX */ - -# ifdef UNIV_PFS_RWLOCK -UNIV_INTERN mysql_pfs_key_t hash_table_rw_lock_key; -# endif /* UNIV_PFS_RWLOCK */ -/************************************************************//** -Reserves the mutex for a fold value in a hash table. */ -UNIV_INTERN -void -hash_mutex_enter( -/*=============*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - ut_ad(table->type == HASH_TABLE_SYNC_MUTEX); - mutex_enter(hash_get_mutex(table, fold)); -} - -/************************************************************//** -Releases the mutex for a fold value in a hash table. */ -UNIV_INTERN -void -hash_mutex_exit( -/*============*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - ut_ad(table->type == HASH_TABLE_SYNC_MUTEX); - mutex_exit(hash_get_mutex(table, fold)); -} - -/************************************************************//** -Reserves all the mutexes of a hash table, in an ascending order. */ -UNIV_INTERN -void -hash_mutex_enter_all( -/*=================*/ - hash_table_t* table) /*!< in: hash table */ -{ - ulint i; - - ut_ad(table->type == HASH_TABLE_SYNC_MUTEX); - for (i = 0; i < table->n_sync_obj; i++) { - - mutex_enter(table->sync_obj.mutexes + i); - } -} - -/************************************************************//** -Releases all the mutexes of a hash table. */ -UNIV_INTERN -void -hash_mutex_exit_all( -/*================*/ - hash_table_t* table) /*!< in: hash table */ -{ - ulint i; - - ut_ad(table->type == HASH_TABLE_SYNC_MUTEX); - for (i = 0; i < table->n_sync_obj; i++) { - - mutex_exit(table->sync_obj.mutexes + i); - } -} - -/************************************************************//** -Releases all but the passed in mutex of a hash table. */ -UNIV_INTERN -void -hash_mutex_exit_all_but( -/*====================*/ - hash_table_t* table, /*!< in: hash table */ - ib_prio_mutex_t* keep_mutex) /*!< in: mutex to keep */ -{ - ulint i; - - ut_ad(table->type == HASH_TABLE_SYNC_MUTEX); - for (i = 0; i < table->n_sync_obj; i++) { - - ib_prio_mutex_t* mutex = table->sync_obj.mutexes + i; - if (UNIV_LIKELY(keep_mutex != mutex)) { - mutex_exit(mutex); - } - } - - ut_ad(mutex_own(keep_mutex)); -} - -/************************************************************//** -s-lock a lock for a fold value in a hash table. */ -UNIV_INTERN -void -hash_lock_s( -/*========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - - prio_rw_lock_t* lock = hash_get_lock(table, fold); - - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - ut_ad(lock); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_s_lock(lock); -} - -/************************************************************//** -x-lock a lock for a fold value in a hash table. */ -UNIV_INTERN -void -hash_lock_x( -/*========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - - prio_rw_lock_t* lock = hash_get_lock(table, fold); - - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - ut_ad(lock); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_x_lock(lock); -} - -/************************************************************//** -unlock an s-lock for a fold value in a hash table. */ -UNIV_INTERN -void -hash_unlock_s( -/*==========*/ - - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - - prio_rw_lock_t* lock = hash_get_lock(table, fold); - - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - ut_ad(lock); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_s_unlock(lock); -} - -/************************************************************//** -unlock x-lock for a fold value in a hash table. */ -UNIV_INTERN -void -hash_unlock_x( -/*==========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - prio_rw_lock_t* lock = hash_get_lock(table, fold); - - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - ut_ad(lock); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_x_unlock(lock); -} - -/************************************************************//** -Reserves all the locks of a hash table, in an ascending order. */ -UNIV_INTERN -void -hash_lock_x_all( -/*============*/ - hash_table_t* table) /*!< in: hash table */ -{ - ulint i; - - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - for (i = 0; i < table->n_sync_obj; i++) { - - prio_rw_lock_t* lock = table->sync_obj.rw_locks + i; -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_x_lock(lock); - } -} - -/************************************************************//** -Releases all the locks of a hash table, in an ascending order. */ -UNIV_INTERN -void -hash_unlock_x_all( -/*==============*/ - hash_table_t* table) /*!< in: hash table */ -{ - ulint i; - - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - for (i = 0; i < table->n_sync_obj; i++) { - - prio_rw_lock_t* lock = table->sync_obj.rw_locks + i; -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_x_unlock(lock); - } -} - -/************************************************************//** -Releases all but passed in lock of a hash table, */ -UNIV_INTERN -void -hash_unlock_x_all_but( -/*==================*/ - hash_table_t* table, /*!< in: hash table */ - prio_rw_lock_t* keep_lock) /*!< in: lock to keep */ -{ - ulint i; - - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - for (i = 0; i < table->n_sync_obj; i++) { - - prio_rw_lock_t* lock = table->sync_obj.rw_locks + i; -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (UNIV_LIKELY(keep_lock != lock)) { - rw_lock_x_unlock(lock); - } - } -} - -#endif /* !UNIV_HOTBACKUP */ - -/*************************************************************//** -Creates a hash table with >= n array cells. The actual number of cells is -chosen to be a prime number slightly bigger than n. -@return own: created table */ -UNIV_INTERN -hash_table_t* -hash_create( -/*========*/ - ulint n) /*!< in: number of array cells */ -{ - hash_cell_t* array; - ulint prime; - hash_table_t* table; - - prime = ut_find_prime(n); - - table = static_cast<hash_table_t*>(mem_alloc(sizeof(hash_table_t))); - - array = static_cast<hash_cell_t*>( - ut_malloc(sizeof(hash_cell_t) * prime)); - - /* The default type of hash_table is HASH_TABLE_SYNC_NONE i.e.: - the caller is responsible for access control to the table. */ - table->type = HASH_TABLE_SYNC_NONE; - table->array = array; - table->n_cells = prime; -#ifndef UNIV_HOTBACKUP -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - table->adaptive = FALSE; -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - table->n_sync_obj = 0; - table->sync_obj.mutexes = NULL; - table->heaps = NULL; -#endif /* !UNIV_HOTBACKUP */ - table->heap = NULL; - ut_d(table->magic_n = HASH_TABLE_MAGIC_N); - - /* Initialize the cell array */ - hash_table_clear(table); - - return(table); -} - -/*************************************************************//** -Frees a hash table. */ -UNIV_INTERN -void -hash_table_free( -/*============*/ - hash_table_t* table) /*!< in, own: hash table */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - - ut_free(table->array); - mem_free(table); -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Creates a sync object array to protect a hash table. -::sync_obj can be mutexes or rw_locks depening on the type of -hash table. */ -UNIV_INTERN -void -hash_create_sync_obj_func( -/*======================*/ - hash_table_t* table, /*!< in: hash table */ - enum hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX - or HASH_TABLE_SYNC_RW_LOCK */ -#ifdef UNIV_SYNC_DEBUG - ulint sync_level,/*!< in: latching order level - of the mutexes: used in the - debug version */ -#endif /* UNIV_SYNC_DEBUG */ - ulint n_sync_obj)/*!< in: number of sync objects, - must be a power of 2 */ -{ - ulint i; - - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_a(n_sync_obj > 0); - ut_a(ut_is_2pow(n_sync_obj)); - - table->type = type; - - switch (type) { - case HASH_TABLE_SYNC_MUTEX: - table->sync_obj.mutexes = static_cast<ib_prio_mutex_t*>( - mem_alloc(n_sync_obj * sizeof(ib_prio_mutex_t))); - - for (i = 0; i < n_sync_obj; i++) { - mutex_create(hash_table_mutex_key, - table->sync_obj.mutexes + i, sync_level); - } - - break; - - case HASH_TABLE_SYNC_RW_LOCK: - table->sync_obj.rw_locks = static_cast<prio_rw_lock_t*>( - mem_alloc(n_sync_obj * sizeof(prio_rw_lock_t))); - - for (i = 0; i < n_sync_obj; i++) { - rw_lock_create(hash_table_rw_lock_key, - table->sync_obj.rw_locks + i, sync_level); - } - - break; - - case HASH_TABLE_SYNC_NONE: - ut_error; - } - - table->n_sync_obj = n_sync_obj; -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/ha_innodb.def b/storage/xtradb/ha_innodb.def deleted file mode 100644 index e0faa62deb1..00000000000 --- a/storage/xtradb/ha_innodb.def +++ /dev/null @@ -1,4 +0,0 @@ -EXPORTS - _mysql_plugin_interface_version_ - _mysql_sizeof_struct_st_plugin_ - _mysql_plugin_declarations_ diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc deleted file mode 100644 index 6097f87b43d..00000000000 --- a/storage/xtradb/handler/ha_innodb.cc +++ /dev/null @@ -1,22318 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. -Copyright (c) 2008, 2009 Google Inc. -Copyright (c) 2009, Percona Inc. -Copyright (c) 2012, Facebook Inc. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -#define MYSQL_SERVER - -#include <sql_table.h> // explain_filename, nz2, EXPLAIN_PARTITIONS_AS_COMMENT, - // EXPLAIN_FILENAME_MAX_EXTRA_LENGTH - -#include <sql_acl.h> // PROCESS_ACL -#include <debug_sync.h> // DEBUG_SYNC -#include <my_base.h> // HA_OPTION_* -#include <mysys_err.h> -#include <innodb_priv.h> -#include <table_cache.h> -#include <my_check_opt.h> - -#ifdef _WIN32 -#include <io.h> -#endif - -#include <my_systemd.h> - -/** @file ha_innodb.cc */ - -/* Include necessary InnoDB headers */ -#include "univ.i" -#include "buf0dump.h" -#include "buf0lru.h" -#include "buf0flu.h" -#include "buf0dblwr.h" -#include "btr0sea.h" -#include "btr0defragment.h" -#include "os0file.h" -#include "os0thread.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "trx0roll.h" -#include "trx0trx.h" -#include "trx0sys.h" -#include "rem0types.h" -#include "row0ins.h" -#include "row0mysql.h" -#include "row0sel.h" -#include "row0upd.h" -#include "log0log.h" -#include "log0online.h" -#include "lock0lock.h" -#include "dict0crea.h" -#include "btr0cur.h" -#include "btr0btr.h" -#include "fsp0fsp.h" -#include "sync0sync.h" -#include "fil0fil.h" -#include "fil0crypt.h" -#include "trx0xa.h" -#include "row0merge.h" -#include "dict0boot.h" -#include "dict0stats.h" -#include "dict0stats_bg.h" -#include "ha_prototypes.h" -#include "ut0mem.h" -#include "ut0timer.h" -#include "ibuf0ibuf.h" -#include "dict0dict.h" -#include "srv0mon.h" -#include "api0api.h" -#include "api0misc.h" -#include "pars0pars.h" -#include "fts0fts.h" -#include "fts0types.h" -#include "row0import.h" -#include "row0quiesce.h" -#include "row0mysql.h" -#ifdef UNIV_DEBUG -#include "trx0purge.h" -#endif /* UNIV_DEBUG */ -#include "fts0priv.h" -#include "page0zip.h" -#include "fil0pagecompress.h" - -#define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X)) - -#ifdef MYSQL_DYNAMIC_PLUGIN -#define tc_size 400 -#define tdc_size 400 -#endif - -#include "ha_innodb.h" -#include "i_s.h" -#include "xtradb_i_s.h" - -#include <string> -#include <sstream> - -#include <mysql/plugin.h> -#include <mysql/service_wsrep.h> - -# ifndef MYSQL_PLUGIN_IMPORT -# define MYSQL_PLUGIN_IMPORT /* nothing */ -# endif /* MYSQL_PLUGIN_IMPORT */ - -#ifdef WITH_WSREP -#include "dict0priv.h" -#include "../storage/innobase/include/ut0byte.h" -#include <mysql/service_md5.h> - -class binlog_trx_data; -extern handlerton *binlog_hton; - -extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_wsrep_rollback; -extern MYSQL_PLUGIN_IMPORT mysql_cond_t COND_wsrep_rollback; -extern MYSQL_PLUGIN_IMPORT wsrep_aborting_thd_t wsrep_aborting_thd; - -static inline wsrep_ws_handle_t* -wsrep_ws_handle(THD* thd, const trx_t* trx) { - return wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd), - (wsrep_trx_id_t)trx->id); -} - -extern TC_LOG* tc_log; -extern void wsrep_cleanup_transaction(THD *thd); -static int -wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd, - my_bool signal); -static void -wsrep_fake_trx_id(handlerton* hton, THD *thd); -static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid); -static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid); -#endif /* WITH_WSREP */ - -/** to protect innobase_open_files */ -static mysql_mutex_t innobase_share_mutex; -/** to force correct commit order in binlog */ -static ulong commit_threads = 0; -static mysql_cond_t commit_cond; -static mysql_mutex_t commit_cond_m; -static mysql_mutex_t pending_checkpoint_mutex; -static bool innodb_inited = 0; - -#define INSIDE_HA_INNOBASE_CC - -#define EQ_CURRENT_THD(thd) ((thd) == current_thd) - -static struct handlerton* innodb_hton_ptr; - -static const long AUTOINC_OLD_STYLE_LOCKING = 0; -static const long AUTOINC_NEW_STYLE_LOCKING = 1; -static const long AUTOINC_NO_LOCKING = 2; - -static long innobase_mirrored_log_groups; -static long innobase_log_buffer_size; -static long innobase_additional_mem_pool_size; -static long innobase_file_io_threads; -static long innobase_open_files; -static long innobase_autoinc_lock_mode; -static ulong innobase_commit_concurrency = 0; -static ulong innobase_read_io_threads; -static ulong innobase_write_io_threads; -static long innobase_buffer_pool_instances = 1; - -static ulong innobase_log_block_size; - -static long long innobase_buffer_pool_size, innobase_log_file_size; -/** Deprecated option that has no effect. */ -static my_bool innodb_buffer_pool_populate; - -/** Percentage of the buffer pool to reserve for 'old' blocks. -Connected to buf_LRU_old_ratio. */ -static uint innobase_old_blocks_pct; - -/** Maximum on-disk size of change buffer in terms of percentage -of the buffer pool. */ -static uint innobase_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE; - -/* The default values for the following char* start-up parameters -are determined in innobase_init below: */ - -static char* innobase_data_home_dir = NULL; -static char* innobase_data_file_path = NULL; -static char* innobase_file_format_name = NULL; -static char* innobase_change_buffering = NULL; -static char* innobase_enable_monitor_counter = NULL; -static char* innobase_disable_monitor_counter = NULL; -static char* innobase_reset_monitor_counter = NULL; -static char* innobase_reset_all_monitor_counter = NULL; - -/* The highest file format being used in the database. The value can be -set by user, however, it will be adjusted to the newer file format if -a table of such format is created/opened. */ -static char* innobase_file_format_max = NULL; - -/** Default value of innodb_file_format */ -static const char* innodb_file_format_default = "Barracuda"; -/** Default value of innodb_file_format_max */ -static const char* innodb_file_format_max_default = "Antelope"; - -static char* innobase_file_flush_method = NULL; - -/* This variable can be set in the server configure file, specifying -stopword table to be used */ -static char* innobase_server_stopword_table = NULL; - -/* Below we have boolean-valued start-up parameters, and their default -values */ - -static ulong innobase_fast_shutdown = 1; -static my_bool innobase_file_format_check = TRUE; -#ifdef UNIV_LOG_ARCHIVE -static my_bool innobase_log_archive = FALSE; -static char* innobase_log_arch_dir = NULL; -#endif /* UNIV_LOG_ARCHIVE */ -static my_bool innobase_use_atomic_writes = FALSE; -static my_bool innobase_use_fallocate = TRUE; -static my_bool innobase_use_doublewrite = TRUE; -static my_bool innobase_use_checksums = TRUE; -static my_bool innobase_locks_unsafe_for_binlog = FALSE; -static my_bool innobase_rollback_on_timeout = FALSE; -static my_bool innobase_create_status_file = FALSE; -static my_bool innobase_stats_on_metadata = TRUE; -static my_bool innobase_large_prefix = FALSE; -static my_bool innodb_optimize_fulltext_only = FALSE; - -static char* internal_innobase_data_file_path = NULL; - -static char* innodb_version_str = (char*) INNODB_VERSION_STR; - -extern uint srv_fil_crypt_rotate_key_age; -extern uint srv_n_fil_crypt_iops; - -extern my_bool srv_immediate_scrub_data_uncompressed; -extern my_bool srv_background_scrub_data_uncompressed; -extern my_bool srv_background_scrub_data_compressed; -extern uint srv_background_scrub_data_interval; -extern uint srv_background_scrub_data_check_interval; -#ifdef UNIV_DEBUG -extern my_bool srv_scrub_force_testing; -#endif - -/** Possible values for system variable "innodb_stats_method". The values -are defined the same as its corresponding MyISAM system variable -"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */ -static const char* innodb_stats_method_names[] = { - "nulls_equal", - "nulls_unequal", - "nulls_ignored", - NullS -}; - -/** Used to define an enumerate type of the system variable innodb_stats_method. -This is the same as "myisam_stats_method_typelib" */ -static TYPELIB innodb_stats_method_typelib = { - array_elements(innodb_stats_method_names) - 1, - "innodb_stats_method_typelib", - innodb_stats_method_names, - NULL -}; - -/** Possible values for system variables "innodb_checksum_algorithm" and -"innodb_log_checksum_algorithm". */ -UNIV_INTERN -const char* innodb_checksum_algorithm_names[] = { - "CRC32", - "STRICT_CRC32", - "INNODB", - "STRICT_INNODB", - "NONE", - "STRICT_NONE", - NullS -}; - -/** Used to define an enumerate type of the system variables -innodb_checksum_algorithm and innodb_log_checksum_algorithm. */ -UNIV_INTERN -TYPELIB innodb_checksum_algorithm_typelib = { - array_elements(innodb_checksum_algorithm_names) - 1, - "innodb_checksum_algorithm_typelib", - innodb_checksum_algorithm_names, - NULL -}; - -/** Possible values for system variable "innodb_cleaner_lsn_age_factor". */ -static const char* innodb_cleaner_lsn_age_factor_names[] = { - "LEGACY", - "HIGH_CHECKPOINT", - NullS -}; - -/** Enumeration for innodb_cleaner_lsn_age_factor. */ -static TYPELIB innodb_cleaner_lsn_age_factor_typelib = { - array_elements(innodb_cleaner_lsn_age_factor_names) - 1, - "innodb_cleaner_lsn_age_factor_typelib", - innodb_cleaner_lsn_age_factor_names, - NULL -}; - -/** Possible values for system variable "innodb_foreground_preflush". */ -static const char* innodb_foreground_preflush_names[] = { - "SYNC_PREFLUSH", - "EXPONENTIAL_BACKOFF", - NullS -}; - -/* Enumeration for innodb_foreground_preflush. */ -static TYPELIB innodb_foreground_preflush_typelib = { - array_elements(innodb_foreground_preflush_names) - 1, - "innodb_foreground_preflush_typelib", - innodb_foreground_preflush_names, - NULL -}; - -/** Possible values for system variable "innodb_empty_free_list_algorithm". */ -static const char* innodb_empty_free_list_algorithm_names[] = { - "LEGACY", - "BACKOFF", - NullS -}; - -/** Enumeration for innodb_empty_free_list_algorithm. */ -static TYPELIB innodb_empty_free_list_algorithm_typelib = { - array_elements(innodb_empty_free_list_algorithm_names) - 1, - "innodb_empty_free_list_algorithm_typelib", - innodb_empty_free_list_algorithm_names, - NULL -}; - -/** Possible values of the parameter innodb_lock_schedule_algorithm */ -static const char* innodb_lock_schedule_algorithm_names[] = { - "fcfs", - "vats", - NullS -}; - -/** Used to define an enumerate type of the system variable -innodb_lock_schedule_algorithm. */ -static TYPELIB innodb_lock_schedule_algorithm_typelib = { - array_elements(innodb_lock_schedule_algorithm_names) - 1, - "innodb_lock_schedule_algorithm_typelib", - innodb_lock_schedule_algorithm_names, - NULL -}; - - -/* The following counter is used to convey information to InnoDB -about server activity: in case of normal DML ops it is not -sensible to call srv_active_wake_master_thread after each -operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */ - -#define INNOBASE_WAKE_INTERVAL 32 -static ulong innobase_active_counter = 0; - -static hash_table_t* innobase_open_tables; - -/** Allowed values of innodb_change_buffering */ -static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = { - "none", /* IBUF_USE_NONE */ - "inserts", /* IBUF_USE_INSERT */ - "deletes", /* IBUF_USE_DELETE_MARK */ - "changes", /* IBUF_USE_INSERT_DELETE_MARK */ - "purges", /* IBUF_USE_DELETE */ - "all" /* IBUF_USE_ALL */ -}; - -/* Call back function array defined by MySQL and used to -retrieve FTS results. */ -const struct _ft_vft ft_vft_result = {NULL, - innobase_fts_find_ranking, - innobase_fts_close_ranking, - innobase_fts_retrieve_ranking, - NULL}; - -const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version, - innobase_fts_flags, - innobase_fts_retrieve_docid, - innobase_fts_count_matches}; - -#ifdef HAVE_PSI_INTERFACE -/* Keys to register pthread mutexes/cond in the current file with -performance schema */ -static mysql_pfs_key_t innobase_share_mutex_key; -static mysql_pfs_key_t commit_cond_mutex_key; -static mysql_pfs_key_t commit_cond_key; -static mysql_pfs_key_t pending_checkpoint_mutex_key; - -static PSI_mutex_info all_pthread_mutexes[] = { - {&commit_cond_mutex_key, "commit_cond_mutex", 0}, - {&innobase_share_mutex_key, "innobase_share_mutex", 0}, - {&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0} -}; - -static PSI_cond_info all_innodb_conds[] = { - {&commit_cond_key, "commit_cond", 0} -}; - -# ifdef UNIV_PFS_MUTEX -/* all_innodb_mutexes array contains mutexes that are -performance schema instrumented if "UNIV_PFS_MUTEX" -is defined */ -static PSI_mutex_info all_innodb_mutexes[] = { - {&autoinc_mutex_key, "autoinc_mutex", 0}, -# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK - {&buffer_block_mutex_key, "buffer_block_mutex", 0}, -# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */ - {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0}, - {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0}, - {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0}, - {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0}, - {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0}, - {&buf_pool_flush_state_mutex_key, "buf_pool_flush_state_mutex", 0}, - {&cache_last_read_mutex_key, "cache_last_read_mutex", 0}, - {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0}, - {&dict_sys_mutex_key, "dict_sys_mutex", 0}, - {&file_format_max_mutex_key, "file_format_max_mutex", 0}, - {&fil_system_mutex_key, "fil_system_mutex", 0}, - {&flush_list_mutex_key, "flush_list_mutex", 0}, - {&fts_bg_threads_mutex_key, "fts_bg_threads_mutex", 0}, - {&fts_delete_mutex_key, "fts_delete_mutex", 0}, - {&fts_optimize_mutex_key, "fts_optimize_mutex", 0}, - {&fts_doc_id_mutex_key, "fts_doc_id_mutex", 0}, - {&fts_pll_tokenize_mutex_key, "fts_pll_tokenize_mutex", 0}, - {&log_flush_order_mutex_key, "log_flush_order_mutex", 0}, - {&hash_table_mutex_key, "hash_table_mutex", 0}, - {&ibuf_bitmap_mutex_key, "ibuf_bitmap_mutex", 0}, - {&ibuf_mutex_key, "ibuf_mutex", 0}, - {&ibuf_pessimistic_insert_mutex_key, - "ibuf_pessimistic_insert_mutex", 0}, -# ifndef HAVE_ATOMIC_BUILTINS - {&server_mutex_key, "server_mutex", 0}, -# endif /* !HAVE_ATOMIC_BUILTINS */ - {&log_bmp_sys_mutex_key, "log_bmp_sys_mutex", 0}, - {&log_sys_mutex_key, "log_sys_mutex", 0}, -# ifdef UNIV_MEM_DEBUG - {&mem_hash_mutex_key, "mem_hash_mutex", 0}, -# endif /* UNIV_MEM_DEBUG */ - {&mem_pool_mutex_key, "mem_pool_mutex", 0}, - {&mutex_list_mutex_key, "mutex_list_mutex", 0}, - {&page_zip_stat_per_index_mutex_key, "page_zip_stat_per_index_mutex", 0}, - {&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0}, - {&recv_sys_mutex_key, "recv_sys_mutex", 0}, - {&recv_writer_mutex_key, "recv_writer_mutex", 0}, - {&rseg_mutex_key, "rseg_mutex", 0}, -# ifdef UNIV_SYNC_DEBUG - {&rw_lock_debug_mutex_key, "rw_lock_debug_mutex", 0}, -# endif /* UNIV_SYNC_DEBUG */ - {&rw_lock_list_mutex_key, "rw_lock_list_mutex", 0}, - {&rw_lock_mutex_key, "rw_lock_mutex", 0}, - {&srv_dict_tmpfile_mutex_key, "srv_dict_tmpfile_mutex", 0}, - {&srv_innodb_monitor_mutex_key, "srv_innodb_monitor_mutex", 0}, - {&srv_misc_tmpfile_mutex_key, "srv_misc_tmpfile_mutex", 0}, - {&srv_monitor_file_mutex_key, "srv_monitor_file_mutex", 0}, -# ifdef UNIV_SYNC_DEBUG - {&sync_thread_mutex_key, "sync_thread_mutex", 0}, -# endif /* UNIV_SYNC_DEBUG */ - {&buf_dblwr_mutex_key, "buf_dblwr_mutex", 0}, - {&trx_undo_mutex_key, "trx_undo_mutex", 0}, - {&srv_sys_mutex_key, "srv_sys_mutex", 0}, - {&lock_sys_mutex_key, "lock_mutex", 0}, - {&lock_sys_wait_mutex_key, "lock_wait_mutex", 0}, - {&trx_mutex_key, "trx_mutex", 0}, - {&srv_sys_tasks_mutex_key, "srv_threads_mutex", 0}, - /* mutex with os_fast_mutex_ interfaces */ -# ifndef PFS_SKIP_EVENT_MUTEX - {&event_os_mutex_key, "event_os_mutex", 0}, -# endif /* PFS_SKIP_EVENT_MUTEX */ - {&os_mutex_key, "os_mutex", 0}, -#ifndef HAVE_ATOMIC_BUILTINS - {&srv_conc_mutex_key, "srv_conc_mutex", 0}, -#endif /* !HAVE_ATOMIC_BUILTINS */ -#ifndef HAVE_ATOMIC_BUILTINS_64 - {&monitor_mutex_key, "monitor_mutex", 0}, -#endif /* !HAVE_ATOMIC_BUILTINS_64 */ - {&ut_list_mutex_key, "ut_list_mutex", 0}, - {&trx_sys_mutex_key, "trx_sys_mutex", 0}, - {&zip_pad_mutex_key, "zip_pad_mutex", 0}, -}; -# endif /* UNIV_PFS_MUTEX */ - -# ifdef UNIV_PFS_RWLOCK -/* all_innodb_rwlocks array contains rwlocks that are -performance schema instrumented if "UNIV_PFS_RWLOCK" -is defined */ -static PSI_rwlock_info all_innodb_rwlocks[] = { -# ifdef UNIV_LOG_ARCHIVE - {&archive_lock_key, "archive_lock", 0}, -# endif /* UNIV_LOG_ARCHIVE */ - {&btr_search_latch_key, "btr_search_latch", 0}, -# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK - {&buf_block_lock_key, "buf_block_lock", 0}, -# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */ -# ifdef UNIV_SYNC_DEBUG - {&buf_block_debug_latch_key, "buf_block_debug_latch", 0}, -# endif /* UNIV_SYNC_DEBUG */ - {&dict_operation_lock_key, "dict_operation_lock", 0}, - {&fil_space_latch_key, "fil_space_latch", 0}, - {&checkpoint_lock_key, "checkpoint_lock", 0}, - {&fts_cache_rw_lock_key, "fts_cache_rw_lock", 0}, - {&fts_cache_init_rw_lock_key, "fts_cache_init_rw_lock", 0}, - {&trx_i_s_cache_lock_key, "trx_i_s_cache_lock", 0}, - {&trx_purge_latch_key, "trx_purge_latch", 0}, - {&index_tree_rw_lock_key, "index_tree_rw_lock", 0}, - {&index_online_log_key, "index_online_log", 0}, - {&dict_table_stats_key, "dict_table_stats", 0}, - {&hash_table_rw_lock_key, "hash_table_locks", 0} -}; -# endif /* UNIV_PFS_RWLOCK */ - -# ifdef UNIV_PFS_THREAD -/* all_innodb_threads array contains threads that are -performance schema instrumented if "UNIV_PFS_THREAD" -is defined */ -static PSI_thread_info all_innodb_threads[] = { - {&trx_rollback_clean_thread_key, "trx_rollback_clean_thread", 0}, - {&io_handler_thread_key, "io_handler_thread", 0}, - {&srv_lock_timeout_thread_key, "srv_lock_timeout_thread", 0}, - {&srv_error_monitor_thread_key, "srv_error_monitor_thread", 0}, - {&srv_monitor_thread_key, "srv_monitor_thread", 0}, - {&srv_master_thread_key, "srv_master_thread", 0}, - {&srv_purge_thread_key, "srv_purge_thread", 0}, - {&buf_page_cleaner_thread_key, "page_cleaner_thread", 0}, - {&buf_lru_manager_thread_key, "lru_manager_thread", 0}, - {&recv_writer_thread_key, "recv_writer_thread", 0}, - {&srv_log_tracking_thread_key, "srv_redo_log_follow_thread", 0} -}; -# endif /* UNIV_PFS_THREAD */ - -# ifdef UNIV_PFS_IO -/* all_innodb_files array contains the type of files that are -performance schema instrumented if "UNIV_PFS_IO" is defined */ -static PSI_file_info all_innodb_files[] = { - {&innodb_file_data_key, "innodb_data_file", 0}, - {&innodb_file_log_key, "innodb_log_file", 0}, - {&innodb_file_temp_key, "innodb_temp_file", 0}, - {&innodb_file_bmp_key, "innodb_bmp_file", 0} -}; -# endif /* UNIV_PFS_IO */ -#endif /* HAVE_PSI_INTERFACE */ - -/** Always normalize table name to lower case on Windows */ -#ifdef __WIN__ -#define normalize_table_name(norm_name, name) \ - normalize_table_name_low(norm_name, name, TRUE) -#else -#define normalize_table_name(norm_name, name) \ - normalize_table_name_low(norm_name, name, FALSE) -#endif /* __WIN__ */ - -/** Set up InnoDB API callback function array */ -ib_cb_t innodb_api_cb[] = { - (ib_cb_t) ib_cursor_open_table, - (ib_cb_t) ib_cursor_read_row, - (ib_cb_t) ib_cursor_insert_row, - (ib_cb_t) ib_cursor_delete_row, - (ib_cb_t) ib_cursor_update_row, - (ib_cb_t) ib_cursor_moveto, - (ib_cb_t) ib_cursor_first, - (ib_cb_t) ib_cursor_next, - (ib_cb_t) ib_cursor_last, - (ib_cb_t) ib_cursor_set_match_mode, - (ib_cb_t) ib_sec_search_tuple_create, - (ib_cb_t) ib_clust_read_tuple_create, - (ib_cb_t) ib_tuple_delete, - (ib_cb_t) ib_tuple_copy, - (ib_cb_t) ib_tuple_read_u8, - (ib_cb_t) ib_tuple_write_u8, - (ib_cb_t) ib_tuple_read_u16, - (ib_cb_t) ib_tuple_write_u16, - (ib_cb_t) ib_tuple_read_u32, - (ib_cb_t) ib_tuple_write_u32, - (ib_cb_t) ib_tuple_read_u64, - (ib_cb_t) ib_tuple_write_u64, - (ib_cb_t) ib_tuple_read_i8, - (ib_cb_t) ib_tuple_write_i8, - (ib_cb_t) ib_tuple_read_i16, - (ib_cb_t) ib_tuple_write_i16, - (ib_cb_t) ib_tuple_read_i32, - (ib_cb_t) ib_tuple_write_i32, - (ib_cb_t) ib_tuple_read_i64, - (ib_cb_t) ib_tuple_write_i64, - (ib_cb_t) ib_tuple_get_n_cols, - (ib_cb_t) ib_col_set_value, - (ib_cb_t) ib_col_get_value, - (ib_cb_t) ib_col_get_meta, - (ib_cb_t) ib_trx_begin, - (ib_cb_t) ib_trx_commit, - (ib_cb_t) ib_trx_rollback, - (ib_cb_t) ib_trx_start, - (ib_cb_t) ib_trx_release, - (ib_cb_t) ib_trx_state, - (ib_cb_t) ib_cursor_lock, - (ib_cb_t) ib_cursor_close, - (ib_cb_t) ib_cursor_new_trx, - (ib_cb_t) ib_cursor_reset, - (ib_cb_t) ib_open_table_by_name, - (ib_cb_t) ib_col_get_name, - (ib_cb_t) ib_table_truncate, - (ib_cb_t) ib_cursor_open_index_using_name, - (ib_cb_t) ib_close_thd, - (ib_cb_t) ib_cfg_get_cfg, - (ib_cb_t) ib_cursor_set_memcached_sync, - (ib_cb_t) ib_cursor_set_cluster_access, - (ib_cb_t) ib_cursor_commit_trx, - (ib_cb_t) ib_cfg_trx_level, - (ib_cb_t) ib_tuple_get_n_user_cols, - (ib_cb_t) ib_cursor_set_lock_mode, - (ib_cb_t) ib_cursor_clear_trx, - (ib_cb_t) ib_get_idx_field_name, - (ib_cb_t) ib_trx_get_start_time, - (ib_cb_t) ib_cfg_bk_commit_interval, - (ib_cb_t) ib_cursor_stmt_begin, - (ib_cb_t) ib_trx_read_only -}; - -static void innodb_remember_check_sysvar_funcs(); -mysql_var_check_func check_sysvar_enum; - -static MYSQL_THDVAR_UINT(default_encryption_key_id, PLUGIN_VAR_RQCMDARG, - "Default encryption key id used for table encryption.", - NULL, NULL, - FIL_DEFAULT_ENCRYPTION_KEY, 1, UINT_MAX32, 0); - -/** - Structure for CREATE TABLE options (table options). - It needs to be called ha_table_option_struct. - - The option values can be specified in the CREATE TABLE at the end: - CREATE TABLE ( ... ) *here* -*/ - -ha_create_table_option innodb_table_option_list[]= -{ - /* With this option user can enable page compression feature for the - table */ - HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0), - /* With this option user can set zip compression level for page - compression for this table*/ - HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, 0, 1, 9, 1), - /* With this option user can enable atomic writes feature for this table */ - HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0), - /* With this option the user can enable encryption for the table */ - HA_TOPTION_ENUM("ENCRYPTED", encryption, "DEFAULT,YES,NO", 0), - /* With this option the user defines the key identifier using for the encryption */ - HA_TOPTION_SYSVAR("ENCRYPTION_KEY_ID", encryption_key_id, default_encryption_key_id), - - HA_TOPTION_END -}; - -/** - Test a file path whether it is same as mysql data directory path. - - @param path null terminated character string - - @return - @retval TRUE The path is different from mysql data directory. - @retval FALSE The path is same as mysql data directory. -*/ -static bool is_mysql_datadir_path(const char *path) -{ - if (path == NULL) - return false; - - char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN]; - convert_dirname(path_dir, path, NullS); - convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS); - size_t mysql_data_home_len= dirname_length(mysql_data_dir); - size_t path_len = dirname_length(path_dir); - - if (path_len < mysql_data_home_len) - return true; - - if (!lower_case_file_system) - return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len)); - - return(files_charset_info->coll->strnncoll(files_charset_info, - (uchar *) path_dir, path_len, - (uchar *) mysql_data_dir, - mysql_data_home_len, - TRUE)); -} - - -static int mysql_tmpfile_path(const char *path, const char *prefix) -{ - DBUG_ASSERT(path != NULL); - DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN); - - char filename[FN_REFLEN]; - File fd = create_temp_file(filename, path, prefix, -#ifdef __WIN__ - O_BINARY | O_TRUNC | O_SEQUENTIAL | - O_SHORT_LIVED | -#endif /* __WIN__ */ - O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY, - MYF(MY_WME)); - if (fd >= 0) { -#ifndef __WIN__ - /* - This can be removed once the following bug is fixed: - Bug #28903 create_temp_file() doesn't honor O_TEMPORARY option - (file not removed) (Unix) - */ - unlink(filename); -#endif /* !__WIN__ */ - } - - return fd; -} - -/*************************************************************//** -Check whether valid argument given to innodb_ft_*_stopword_table. -This function is registered as a callback with MySQL. -@return 0 for valid stopword table */ -static -int -innodb_stopword_table_validate( -/*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value); /*!< in: incoming string */ - -/** Validate passed-in "value" is a valid directory name. -This function is registered as a callback with MySQL. -@param[in,out] thd thread handle -@param[in] var pointer to system variable -@param[out] save immediate result for update -@param[in] value incoming string -@return 0 for valid name */ -static -int -innodb_tmpdir_validate( - THD* thd, - struct st_mysql_sys_var* var, - void* save, - struct st_mysql_value* value) -{ - - char* alter_tmp_dir; - char* innodb_tmp_dir; - char buff[OS_FILE_MAX_PATH]; - int len = sizeof(buff); - char tmp_abs_path[FN_REFLEN + 2]; - - ut_ad(save != NULL); - ut_ad(value != NULL); - - if (check_global_access(thd, FILE_ACL)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "InnoDB: FILE Permissions required"); - *static_cast<const char**>(save) = NULL; - return(1); - } - - alter_tmp_dir = (char*) value->val_str(value, buff, &len); - - if (!alter_tmp_dir) { - *static_cast<const char**>(save) = alter_tmp_dir; - return(0); - } - - if (strlen(alter_tmp_dir) > FN_REFLEN) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Path length should not exceed %d bytes", FN_REFLEN); - *static_cast<const char**>(save) = NULL; - return(1); - } - - my_realpath(tmp_abs_path, alter_tmp_dir, 0); - size_t tmp_abs_len = strlen(tmp_abs_path); - - if (my_access(tmp_abs_path, F_OK)) { - - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "InnoDB: Path doesn't exist."); - *static_cast<const char**>(save) = NULL; - return(1); - } else if (my_access(tmp_abs_path, R_OK | W_OK)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "InnoDB: Server doesn't have permission in " - "the given location."); - *static_cast<const char**>(save) = NULL; - return(1); - } - - MY_STAT stat_info_dir; - - if (my_stat(tmp_abs_path, &stat_info_dir, MYF(0))) { - if ((stat_info_dir.st_mode & S_IFDIR) != S_IFDIR) { - - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Given path is not a directory. "); - *static_cast<const char**>(save) = NULL; - return(1); - } - } - - if (!is_mysql_datadir_path(tmp_abs_path)) { - - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "InnoDB: Path Location should not be same as " - "mysql data directory location."); - *static_cast<const char**>(save) = NULL; - return(1); - } - - innodb_tmp_dir = static_cast<char*>( - thd_memdup(thd, tmp_abs_path, tmp_abs_len + 1)); - *static_cast<const char**>(save) = innodb_tmp_dir; - return(0); -} - -/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default -system clustered index when there is no primary key. */ -const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX"; -/************************************************************//** -Synchronously read and parse the redo log up to the last -checkpoint to write the changed page bitmap. -@return 0 to indicate success. Current implementation cannot fail. */ -static -my_bool -innobase_flush_changed_page_bitmaps() __attribute__((unused)); -/*==================================*/ -/************************************************************//** -Delete all the bitmap files for data less than the specified LSN. -If called with lsn == 0 (i.e. set by RESET request) or -IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise -continue it. -@return 0 to indicate success, 1 for failure. */ -static -my_bool -innobase_purge_changed_page_bitmaps( -/*================================*/ - ulonglong lsn) __attribute__((unused)); /*!< in: LSN to purge files up to */ - -/** Empty free list algorithm. -Checks if buffer pool is big enough to enable backoff algorithm. -InnoDB empty free list algorithm backoff requires free pages -from LRU for the best performance. -buf_LRU_buf_pool_running_out cancels query if 1/4 of -buffer pool belongs to LRU or freelist. -At the same time buf_flush_LRU_list_batch -keeps up to BUF_LRU_MIN_LEN in LRU. -In order to avoid deadlock baclkoff requires buffer pool -to be at least 4*BUF_LRU_MIN_LEN, -but flush peformance is bad because of trashing -and additional BUF_LRU_MIN_LEN pages are requested. -@param[in] algorithm desired algorithm from srv_empty_free_list_t -@return true if it's possible to enable backoff. */ -static inline -bool -innodb_empty_free_list_algorithm_allowed( - srv_empty_free_list_t algorithm) -{ - long long buf_pool_pages = srv_buf_pool_size / srv_page_size - / srv_buf_pool_instances; - - return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) - || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); -} - -/** Get the list of foreign keys referencing a specified table -table. -@param thd The thread handle -@param path Path to the table -@param f_key_list[out] The list of foreign keys - -@return error code or zero for success */ -static -int -innobase_get_parent_fk_list( - THD* thd, - const char* path, - List<FOREIGN_KEY_INFO>* f_key_list) __attribute__((unused)); - -/******************************************************************//** -Maps a MySQL trx isolation level code to the InnoDB isolation level code -@return InnoDB isolation level */ -static inline -ulint -innobase_map_isolation_level( -/*=========================*/ - enum_tx_isolation iso); /*!< in: MySQL isolation level code */ - -/* Enable / disable checkpoints */ -static int innobase_checkpoint_state(handlerton *hton, bool disable) -{ - if (disable) - (void) log_disable_checkpoint(); - else - log_enable_checkpoint(); - return 0; -} - -/*************************************************************//** -Check for a valid value of innobase_compression_algorithm. -@return 0 for valid innodb_compression_algorithm. */ -static -int -innodb_compression_algorithm_validate( -/*==================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value); /*!< in: incoming string */ - -static -int -innodb_encrypt_tables_validate( -/*==================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value); /*!< in: incoming string */ - -static const char innobase_hton_name[]= "InnoDB"; - -static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG, - "Enable InnoDB support for the XA two-phase commit", - /* check_func */ NULL, /* update_func */ NULL, - /* default */ TRUE); - -static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG, - "Enable InnoDB locking in LOCK TABLES", - /* check_func */ NULL, /* update_func */ NULL, - /* default */ TRUE); - -static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG, - "Use strict mode when evaluating create options.", - NULL, NULL, TRUE); - -static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG, - "Create FTS index with stopword.", - NULL, NULL, - /* default */ TRUE); - -static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, - "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.", - NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0); - -static MYSQL_THDVAR_STR(ft_user_stopword_table, - PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC, - "User supplied stopword table name, effective in the session level.", - innodb_stopword_table_validate, NULL, NULL); - -static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG, - "Controls the durability/speed trade-off for commits." - " Set to 0 (write and flush redo log to disk only once per second)," - " 1 (flush to disk at each commit)," - " 2 (write to log at commit but flush to disk only once per second)" - " or 3 (flush to disk at prepare and at commit, slower and usually redundant)." - " 1 and 3 guarantees that after a crash, committed transactions will" - " not be lost and will be consistent with the binlog and other transactional" - " engines. 2 can get inconsistent and lose transactions if there is a" - " power failure or kernel crash but not if mysqld crashes. 0 has no" - " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.", - NULL, NULL, 1, 0, 3, 0); - -static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG, - "In the transaction after enabled, UPDATE, INSERT and DELETE only move the cursor to the records " - "and do nothing other operations (no changes, no ibuf, no undo, no transaction log) in the transaction. " - "This is to cause replication prefetch IO. ATTENTION: the transaction started after enabled is affected.", - NULL, NULL, FALSE); - -static MYSQL_THDVAR_STR(tmpdir, - PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC, - "Directory for temporary non-tablespace files.", - innodb_tmpdir_validate, NULL, NULL); - -static ibool innodb_have_lzo=IF_LZO(1, 0); -static ibool innodb_have_lz4=IF_LZ4(1, 0); -static ibool innodb_have_lzma=IF_LZMA(1, 0); -static ibool innodb_have_bzip2=IF_BZIP2(1, 0); -static ibool innodb_have_snappy=IF_SNAPPY(1, 0); - -static SHOW_VAR innodb_status_variables[]= { - {"available_undo_logs", - (char*) &export_vars.innodb_available_undo_logs, SHOW_LONG}, - {"background_log_sync", - (char*) &export_vars.innodb_background_log_sync, SHOW_LONG}, - {"buffer_pool_bytes_data", - (char*) &export_vars.innodb_buffer_pool_bytes_data, SHOW_LONG}, - {"buffer_pool_bytes_dirty", - (char*) &export_vars.innodb_buffer_pool_bytes_dirty, SHOW_LONG}, - {"buffer_pool_dump_status", - (char*) &export_vars.innodb_buffer_pool_dump_status, SHOW_CHAR}, - {"buffer_pool_load_status", - (char*) &export_vars.innodb_buffer_pool_load_status, SHOW_CHAR}, - {"buffer_pool_pages_data", - (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG}, - {"buffer_pool_pages_dirty", - (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG}, - {"buffer_pool_pages_flushed", - (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG}, - {"buffer_pool_pages_free", - (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG}, -#ifdef UNIV_DEBUG - {"buffer_pool_pages_latched", - (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG}, -#endif /* UNIV_DEBUG */ - {"buffer_pool_pages_LRU_flushed", - (char*) &export_vars.innodb_buffer_pool_pages_LRU_flushed, SHOW_LONG}, - {"buffer_pool_pages_made_not_young", - (char*) &export_vars.innodb_buffer_pool_pages_made_not_young, SHOW_LONG}, - {"buffer_pool_pages_made_young", - (char*) &export_vars.innodb_buffer_pool_pages_made_young, SHOW_LONG}, - {"buffer_pool_pages_misc", - (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG}, - {"buffer_pool_pages_old", - (char*) &export_vars.innodb_buffer_pool_pages_old, SHOW_LONG}, - {"buffer_pool_pages_total", - (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG}, - {"buffer_pool_read_ahead", - (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG}, - {"buffer_pool_read_ahead_evicted", - (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG}, - {"buffer_pool_read_ahead_rnd", - (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG}, - {"buffer_pool_read_requests", - (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG}, - {"buffer_pool_reads", - (char*) &export_vars.innodb_buffer_pool_reads, SHOW_LONG}, - {"buffer_pool_wait_free", - (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG}, - {"buffer_pool_write_requests", - (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG}, - {"checkpoint_age", - (char*) &export_vars.innodb_checkpoint_age, SHOW_LONG}, - {"checkpoint_max_age", - (char*) &export_vars.innodb_checkpoint_max_age, SHOW_LONG}, - {"data_fsyncs", - (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG}, - {"data_pending_fsyncs", - (char*) &export_vars.innodb_data_pending_fsyncs, SHOW_LONG}, - {"data_pending_reads", - (char*) &export_vars.innodb_data_pending_reads, SHOW_LONG}, - {"data_pending_writes", - (char*) &export_vars.innodb_data_pending_writes, SHOW_LONG}, - {"data_read", - (char*) &export_vars.innodb_data_read, SHOW_LONG}, - {"data_reads", - (char*) &export_vars.innodb_data_reads, SHOW_LONG}, - {"data_writes", - (char*) &export_vars.innodb_data_writes, SHOW_LONG}, - {"data_written", - (char*) &export_vars.innodb_data_written, SHOW_LONG}, - {"dblwr_pages_written", - (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG}, - {"dblwr_writes", - (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG}, - {"deadlocks", - (char*) &export_vars.innodb_deadlocks, SHOW_LONG}, - {"have_atomic_builtins", - (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL}, - {"history_list_length", - (char*) &export_vars.innodb_history_list_length, SHOW_LONG}, - {"ibuf_discarded_delete_marks", - (char*) &export_vars.innodb_ibuf_discarded_delete_marks, SHOW_LONG}, - {"ibuf_discarded_deletes", - (char*) &export_vars.innodb_ibuf_discarded_deletes, SHOW_LONG}, - {"ibuf_discarded_inserts", - (char*) &export_vars.innodb_ibuf_discarded_inserts, SHOW_LONG}, - {"ibuf_free_list", - (char*) &export_vars.innodb_ibuf_free_list, SHOW_LONG}, - {"ibuf_merged_delete_marks", - (char*) &export_vars.innodb_ibuf_merged_delete_marks, SHOW_LONG}, - {"ibuf_merged_deletes", - (char*) &export_vars.innodb_ibuf_merged_deletes, SHOW_LONG}, - {"ibuf_merged_inserts", - (char*) &export_vars.innodb_ibuf_merged_inserts, SHOW_LONG}, - {"ibuf_merges", - (char*) &export_vars.innodb_ibuf_merges, SHOW_LONG}, - {"ibuf_segment_size", - (char*) &export_vars.innodb_ibuf_segment_size, SHOW_LONG}, - {"ibuf_size", - (char*) &export_vars.innodb_ibuf_size, SHOW_LONG}, - {"log_waits", - (char*) &export_vars.innodb_log_waits, SHOW_LONG}, - {"log_write_requests", - (char*) &export_vars.innodb_log_write_requests, SHOW_LONG}, - {"log_writes", - (char*) &export_vars.innodb_log_writes, SHOW_LONG}, - {"lsn_current", - (char*) &export_vars.innodb_lsn_current, SHOW_LONGLONG}, - {"lsn_flushed", - (char*) &export_vars.innodb_lsn_flushed, SHOW_LONGLONG}, - {"lsn_last_checkpoint", - (char*) &export_vars.innodb_lsn_last_checkpoint, SHOW_LONGLONG}, - {"master_thread_active_loops", - (char*) &export_vars.innodb_master_thread_active_loops, SHOW_LONG}, - {"master_thread_idle_loops", - (char*) &export_vars.innodb_master_thread_idle_loops, SHOW_LONG}, - {"max_trx_id", - (char*) &export_vars.innodb_max_trx_id, SHOW_LONGLONG}, - {"mem_adaptive_hash", - (char*) &export_vars.innodb_mem_adaptive_hash, SHOW_LONG}, - {"mem_dictionary", - (char*) &export_vars.innodb_mem_dictionary, SHOW_LONG}, - {"mem_total", - (char*) &export_vars.innodb_mem_total, SHOW_LONG}, - {"mutex_os_waits", - (char*) &export_vars.innodb_mutex_os_waits, SHOW_LONGLONG}, - {"mutex_spin_rounds", - (char*) &export_vars.innodb_mutex_spin_rounds, SHOW_LONGLONG}, - {"mutex_spin_waits", - (char*) &export_vars.innodb_mutex_spin_waits, SHOW_LONGLONG}, - {"oldest_view_low_limit_trx_id", - (char*) &export_vars.innodb_oldest_view_low_limit_trx_id, SHOW_LONGLONG}, - {"os_log_fsyncs", - (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG}, - {"os_log_pending_fsyncs", - (char*) &export_vars.innodb_os_log_pending_fsyncs, SHOW_LONG}, - {"os_log_pending_writes", - (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG}, - {"os_log_written", - (char*) &export_vars.innodb_os_log_written, SHOW_LONGLONG}, - {"page_size", - (char*) &export_vars.innodb_page_size, SHOW_LONG}, - {"pages_created", - (char*) &export_vars.innodb_pages_created, SHOW_LONG}, - {"pages_read", - (char*) &export_vars.innodb_pages_read, SHOW_LONG}, - {"pages0_read", - (char*) &export_vars.innodb_page0_read, SHOW_LONG}, - {"pages_written", - (char*) &export_vars.innodb_pages_written, SHOW_LONG}, - {"purge_trx_id", - (char*) &export_vars.innodb_purge_trx_id, SHOW_LONGLONG}, -#ifdef UNIV_DEBUG - {"purge_trx_id_age", - (char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG}, -#endif /* UNIV_DEBUG */ - {"purge_undo_no", - (char*) &export_vars.innodb_purge_undo_no, SHOW_LONGLONG}, -#ifdef UNIV_DEBUG - {"purge_view_trx_id_age", - (char*) &export_vars.innodb_purge_view_trx_id_age, SHOW_LONG}, -#endif /* UNIV_DEBUG */ - {"read_views_memory", - (char*) &export_vars.innodb_read_views_memory, SHOW_LONG}, - {"row_lock_current_waits", - (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG}, - {"row_lock_time", - (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG}, - {"row_lock_time_avg", - (char*) &export_vars.innodb_row_lock_time_avg, SHOW_LONG}, - {"row_lock_time_max", - (char*) &export_vars.innodb_row_lock_time_max, SHOW_LONG}, - {"row_lock_waits", - (char*) &export_vars.innodb_row_lock_waits, SHOW_LONG}, - {"rows_deleted", - (char*) &export_vars.innodb_rows_deleted, SHOW_LONG}, - {"rows_inserted", - (char*) &export_vars.innodb_rows_inserted, SHOW_LONG}, - {"rows_read", - (char*) &export_vars.innodb_rows_read, SHOW_LONG}, - {"rows_updated", - (char*) &export_vars.innodb_rows_updated, SHOW_LONG}, - {"system_rows_deleted", - (char*) &export_vars.innodb_system_rows_deleted, SHOW_LONG}, - {"system_rows_inserted", - (char*) &export_vars.innodb_system_rows_inserted, SHOW_LONG}, - {"system_rows_read", - (char*) &export_vars.innodb_system_rows_read, SHOW_LONG}, - {"system_rows_updated", - (char*) &export_vars.innodb_system_rows_updated, SHOW_LONG}, - {"s_lock_os_waits", - (char*) &export_vars.innodb_s_lock_os_waits, SHOW_LONGLONG}, - {"s_lock_spin_rounds", - (char*) &export_vars.innodb_s_lock_spin_rounds, SHOW_LONGLONG}, - {"s_lock_spin_waits", - (char*) &export_vars.innodb_s_lock_spin_waits, SHOW_LONGLONG}, - {"truncated_status_writes", - (char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG}, - {"x_lock_os_waits", - (char*) &export_vars.innodb_x_lock_os_waits, SHOW_LONGLONG}, - {"x_lock_spin_rounds", - (char*) &export_vars.innodb_x_lock_spin_rounds, SHOW_LONGLONG}, - {"x_lock_spin_waits", - (char*) &export_vars.innodb_x_lock_spin_waits, SHOW_LONGLONG}, - - /* Status variables for page compression */ - {"page_compression_saved", - (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG}, - {"page_compression_trim_sect512", - (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG}, - {"page_compression_trim_sect1024", - (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG}, - {"page_compression_trim_sect2048", - (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG}, - {"page_compression_trim_sect4096", - (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG}, - {"page_compression_trim_sect8192", - (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG}, - {"page_compression_trim_sect16384", - (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG}, - {"page_compression_trim_sect32768", - (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG}, - {"num_index_pages_written", - (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG}, - {"num_non_index_pages_written", - (char*) &export_vars.innodb_non_index_pages_written, SHOW_LONGLONG}, - {"num_pages_page_compressed", - (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG}, - {"num_page_compressed_trim_op", - (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG}, - {"num_page_compressed_trim_op_saved", - (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG}, - {"num_pages_page_decompressed", - (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG}, - {"num_pages_page_compression_error", - (char*) &export_vars.innodb_pages_page_compression_error, SHOW_LONGLONG}, - {"num_pages_encrypted", - (char*) &export_vars.innodb_pages_encrypted, SHOW_LONGLONG}, - {"num_pages_decrypted", - (char*) &export_vars.innodb_pages_decrypted, SHOW_LONGLONG}, - {"have_lz4", - (char*) &innodb_have_lz4, SHOW_BOOL}, - {"have_lzo", - (char*) &innodb_have_lzo, SHOW_BOOL}, - {"have_lzma", - (char*) &innodb_have_lzma, SHOW_BOOL}, - {"have_bzip2", - (char*) &innodb_have_bzip2, SHOW_BOOL}, - {"have_snappy", - (char*) &innodb_have_snappy, SHOW_BOOL}, - - /* Defragment */ - {"defragment_compression_failures", - (char*) &export_vars.innodb_defragment_compression_failures, SHOW_LONG}, - {"defragment_failures", - (char*) &export_vars.innodb_defragment_failures, SHOW_LONG}, - {"defragment_count", - (char*) &export_vars.innodb_defragment_count, SHOW_LONG}, - - /* Online alter table status variables */ - {"onlineddl_rowlog_rows", - (char*) &export_vars.innodb_onlineddl_rowlog_rows, SHOW_LONG}, - {"onlineddl_rowlog_pct_used", - (char*) &export_vars.innodb_onlineddl_rowlog_pct_used, SHOW_LONG}, - {"onlineddl_pct_progress", - (char*) &export_vars.innodb_onlineddl_pct_progress, SHOW_LONG}, - - /* Times secondary index lookup triggered cluster lookup and - times prefix optimization avoided triggering cluster lookup */ - {"secondary_index_triggered_cluster_reads", - (char*) &export_vars.innodb_sec_rec_cluster_reads, SHOW_LONG}, - {"secondary_index_triggered_cluster_reads_avoided", - (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG}, - - /* Encryption */ - {"encryption_rotation_pages_read_from_cache", - (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache, - SHOW_LONG}, - {"encryption_rotation_pages_read_from_disk", - (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk, - SHOW_LONG}, - {"encryption_rotation_pages_modified", - (char*) &export_vars.innodb_encryption_rotation_pages_modified, - SHOW_LONG}, - {"encryption_rotation_pages_flushed", - (char*) &export_vars.innodb_encryption_rotation_pages_flushed, - SHOW_LONG}, - {"encryption_rotation_estimated_iops", - (char*) &export_vars.innodb_encryption_rotation_estimated_iops, - SHOW_LONG}, - {"encryption_key_rotation_list_length", - (char*)&export_vars.innodb_key_rotation_list_length, - SHOW_LONGLONG}, - - /* Scrubing feature */ - {"scrub_background_page_reorganizations", - (char*) &export_vars.innodb_scrub_page_reorganizations, - SHOW_LONG}, - {"scrub_background_page_splits", - (char*) &export_vars.innodb_scrub_page_splits, - SHOW_LONG}, - {"scrub_background_page_split_failures_underflow", - (char*) &export_vars.innodb_scrub_page_split_failures_underflow, - SHOW_LONG}, - {"scrub_background_page_split_failures_out_of_filespace", - (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace, - SHOW_LONG}, - {"scrub_background_page_split_failures_missing_index", - (char*) &export_vars.innodb_scrub_page_split_failures_missing_index, - SHOW_LONG}, - {"scrub_background_page_split_failures_unknown", - (char*) &export_vars.innodb_scrub_page_split_failures_unknown, - SHOW_LONG}, - {"encryption_num_key_requests", - (char*) &export_vars.innodb_encryption_key_requests, SHOW_LONGLONG}, - - {NullS, NullS, SHOW_LONG} -}; - -/************************************************************************//** -Handling the shared INNOBASE_SHARE structure that is needed to provide table -locking. Register the table name if it doesn't exist in the hash table. */ -static -INNOBASE_SHARE* -get_share( -/*======*/ - const char* table_name); /*!< in: table to lookup */ - -/************************************************************************//** -Free the shared object that was registered with get_share(). */ -static -void -free_share( -/*=======*/ - INNOBASE_SHARE* share); /*!< in/own: share to free */ - -/*****************************************************************//** -Frees a possible InnoDB trx object associated with the current THD. -@return 0 or error number */ -static -int -innobase_close_connection( -/*======================*/ - handlerton* hton, /*!< in/out: Innodb handlerton */ - THD* thd); /*!< in: MySQL thread handle for - which to close the connection */ - -static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all); -static void innobase_checkpoint_request(handlerton *hton, void *cookie); - -/*****************************************************************//** -Cancel any pending lock request associated with the current THD. */ -static -void -innobase_kill_connection( -/*======================*/ - handlerton* hton, /*!< in: innobase handlerton */ - THD* thd, /*!< in: handle to the MySQL thread being killed */ - thd_kill_levels); - -/*****************************************************************//** -Commits a transaction in an InnoDB database or marks an SQL statement -ended. -@return 0 */ -static -int -innobase_commit( -/*============*/ - handlerton* hton, /*!< in/out: Innodb handlerton */ - THD* thd, /*!< in: MySQL thread handle of the - user for whom the transaction should - be committed */ - bool commit_trx); /*!< in: true - commit transaction - false - the current SQL statement - ended */ - -/*****************************************************************//** -Rolls back a transaction to a savepoint. -@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the -given name */ -static -int -innobase_rollback( -/*==============*/ - handlerton* hton, /*!< in/out: Innodb handlerton */ - THD* thd, /*!< in: handle to the MySQL thread - of the user whose transaction should - be rolled back */ - bool rollback_trx); /*!< in: TRUE - rollback entire - transaction FALSE - rollback the current - statement only */ - -/*****************************************************************//** -Rolls back a transaction to a savepoint. -@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the -given name */ -static -int -innobase_rollback_to_savepoint( -/*===========================*/ - handlerton* hton, /*!< in/out: InnoDB handlerton */ - THD* thd, /*!< in: handle to the MySQL thread of - the user whose XA transaction should - be rolled back to savepoint */ - void* savepoint); /*!< in: savepoint data */ - -/*****************************************************************//** -Check whether innodb state allows to safely release MDL locks after -rollback to savepoint. -@return true if it is safe, false if its not safe. */ -static -bool -innobase_rollback_to_savepoint_can_release_mdl( -/*===========================================*/ - handlerton* hton, /*!< in/out: InnoDB handlerton */ - THD* thd); /*!< in: handle to the MySQL thread of - the user whose XA transaction should - be rolled back to savepoint */ - -/*****************************************************************//** -Sets a transaction savepoint. -@return always 0, that is, always succeeds */ -static -int -innobase_savepoint( -/*===============*/ - handlerton* hton, /*!< in/out: InnoDB handlerton */ - THD* thd, /*!< in: handle to the MySQL thread of - the user's XA transaction for which - we need to take a savepoint */ - void* savepoint); /*!< in: savepoint data */ - -/*****************************************************************//** -Release transaction savepoint name. -@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the -given name */ -static -int -innobase_release_savepoint( -/*=======================*/ - handlerton* hton, /*!< in/out: handlerton for Innodb */ - THD* thd, /*!< in: handle to the MySQL thread - of the user whose transaction's - savepoint should be released */ - void* savepoint); /*!< in: savepoint data */ - -/************************************************************************//** -Function for constructing an InnoDB table handler instance. */ -static -handler* -innobase_create_handler( -/*====================*/ - handlerton* hton, /*!< in/out: handlerton for Innodb */ - TABLE_SHARE* table, - MEM_ROOT* mem_root); - -/** @brief Initialize the default value of innodb_commit_concurrency. - -Once InnoDB is running, the innodb_commit_concurrency must not change -from zero to nonzero. (Bug #42101) - -The initial default value is 0, and without this extra initialization, -SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter -to 0, even if it was initially set to nonzero at the command line -or configuration file. */ -static -void -innobase_commit_concurrency_init_default(); -/*=======================================*/ - -/** @brief Initialize the default and max value of innodb_undo_logs. - -Once InnoDB is running, the default value and the max value of -innodb_undo_logs must be equal to the available undo logs, -given by srv_available_undo_logs. */ -static -void -innobase_undo_logs_init_default_max(); -/*==================================*/ - -/************************************************************//** -Validate the file format name and return its corresponding id. -@return valid file format id */ -static -uint -innobase_file_format_name_lookup( -/*=============================*/ - const char* format_name); /*!< in: pointer to file format - name */ -/************************************************************//** -Validate the file format check config parameters, as a side effect it -sets the srv_max_file_format_at_startup variable. -@return the format_id if valid config value, otherwise, return -1 */ -static -int -innobase_file_format_validate_and_set( -/*==================================*/ - const char* format_max); /*!< in: parameter value */ - -/*******************************************************************//** -This function is used to prepare an X/Open XA distributed transaction. -@return 0 or error number */ -static -int -innobase_xa_prepare( -/*================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - THD* thd, /*!< in: handle to the MySQL thread of - the user whose XA transaction should - be prepared */ - bool all); /*!< in: true - prepare transaction - false - the current SQL statement - ended */ -/*******************************************************************//** -This function is used to recover X/Open XA distributed transactions. -@return number of prepared transactions stored in xid_list */ -static -int -innobase_xa_recover( -/*================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - XID* xid_list, /*!< in/out: prepared transactions */ - uint len); /*!< in: number of slots in xid_list */ -/*******************************************************************//** -This function is used to commit one X/Open XA distributed transaction -which is in the prepared state -@return 0 or error number */ -static -int -innobase_commit_by_xid( -/*===================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - XID* xid); /*!< in: X/Open XA transaction - identification */ -/*******************************************************************//** -This function is used to rollback one X/Open XA distributed transaction -which is in the prepared state -@return 0 or error number */ -static -int -innobase_rollback_by_xid( -/*=====================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - XID* xid); /*!< in: X/Open XA transaction - identification */ -/*******************************************************************//** -Create a consistent view for a cursor based on current transaction -which is created if the corresponding MySQL thread still lacks one. -This consistent view is then used inside of MySQL when accessing records -using a cursor. -@return pointer to cursor view or NULL */ -static -void* -innobase_create_cursor_view( -/*========================*/ - handlerton* hton, /*!< in: innobase hton */ - THD* thd); /*!< in: user thread handle */ -/*******************************************************************//** -Set the given consistent cursor view to a transaction which is created -if the corresponding MySQL thread still lacks one. If the given -consistent cursor view is NULL global read view of a transaction is -restored to a transaction read view. */ -static -void -innobase_set_cursor_view( -/*=====================*/ - handlerton* hton, /*!< in: handlerton of Innodb */ - THD* thd, /*!< in: user thread handle */ - void* curview); /*!< in: Consistent cursor view to - be set */ -/*******************************************************************//** -Close the given consistent cursor view of a transaction and restore -global read view to a transaction read view. Transaction is created if the -corresponding MySQL thread still lacks one. */ -static -void -innobase_close_cursor_view( -/*=======================*/ - handlerton* hton, /*!< in: handlerton of Innodb */ - THD* thd, /*!< in: user thread handle */ - void* curview); /*!< in: Consistent read view to be - closed */ -/*****************************************************************//** -Removes all tables in the named database inside InnoDB. */ -static -void -innobase_drop_database( -/*===================*/ - handlerton* hton, /*!< in: handlerton of Innodb */ - char* path); /*!< in: database path; inside InnoDB - the name of the last directory in - the path is used as the database name: - for example, in 'mysql/data/test' the - database name is 'test' */ -/** Shut down the InnoDB storage engine. -@return 0 */ -static -int -innobase_end(handlerton*, ha_panic_function); - -#if NOT_USED -/*****************************************************************//** -Stores the current binlog coordinates in the trx system header. */ -static -int -innobase_store_binlog_info( -/*=======================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - THD* thd); /*!< in: MySQL thread handle */ -#endif - -/*****************************************************************//** -Creates an InnoDB transaction struct for the thd if it does not yet have one. -Starts a new InnoDB transaction if a transaction is not yet started. And -assigns a new snapshot for a consistent read if the transaction does not yet -have one. -@return 0 */ -static -int -innobase_start_trx_and_assign_read_view( -/*====================================*/ - handlerton* hton, /* in: Innodb handlerton */ - THD* thd); /* in: MySQL thread handle of the - user for whom the transaction should - be committed */ -/****************************************************************//** -Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes -the logs, and the name of this function should be innobase_checkpoint. -@return TRUE if error */ -static -bool -innobase_flush_logs( -/*================*/ - handlerton* hton); /*!< in: InnoDB handlerton */ - -/************************************************************************//** -Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the -InnoDB Monitor to the client. -@return 0 on success */ -static -int -innodb_show_status( -/*===============*/ - handlerton* hton, /*!< in: the innodb handlerton */ - THD* thd, /*!< in: the MySQL query thread of - the caller */ - stat_print_fn* stat_print); -/************************************************************************//** -Return 0 on success and non-zero on failure. Note: the bool return type -seems to be abused here, should be an int. */ -static -bool -innobase_show_status( -/*=================*/ - handlerton* hton, /*!< in: the innodb handlerton */ - THD* thd, /*!< in: the MySQL query thread of - the caller */ - stat_print_fn* stat_print, - enum ha_stat_type stat_type); - -/*****************************************************************//** -Commits a transaction in an InnoDB database. */ -static -void -innobase_commit_low( -/*================*/ - trx_t* trx); /*!< in: transaction handle */ - -/****************************************************************//** -Parse and enable InnoDB monitor counters during server startup. -User can enable monitor counters/groups by specifying -"loose-innodb_monitor_enable = monitor_name1;monitor_name2..." -in server configuration file or at the command line. */ -static -void -innodb_enable_monitor_at_startup( -/*=============================*/ - char* str); /*!< in: monitor counter enable list */ - -/********************************************************************* -Normalizes a table name string. A normalized name consists of the -database name catenated to '/' and table name. An example: -test/mytable. On Windows normalization puts both the database name and the -table name always to lower case if "set_lower_case" is set to TRUE. */ -void -normalize_table_name_low( -/*=====================*/ - char* norm_name, /* out: normalized name as a - null-terminated string */ - const char* name, /* in: table name string */ - ibool set_lower_case); /* in: TRUE if we want to set - name to lower case */ - -#ifdef NOT_USED -/*************************************************************//** -Removes old archived transaction log files. -@return true on error */ -static bool innobase_purge_archive_logs( - handlerton *hton, /*!< in: InnoDB handlerton */ - time_t before_date, /*!< in: all files modified - before timestamp should be removed */ - const char* to_filename) /*!< in: this and earler files - should be removed */ -{ - ulint err= DB_ERROR; - if (before_date > 0) { - err= purge_archived_logs(before_date, 0); - } else if (to_filename) { - if (is_prefix(to_filename, IB_ARCHIVED_LOGS_PREFIX)) { - unsigned long long log_file_lsn = strtoll(to_filename - + IB_ARCHIVED_LOGS_PREFIX_LEN, - NULL, 10); - if (log_file_lsn > 0 && log_file_lsn < ULLONG_MAX) { - err= purge_archived_logs(0, log_file_lsn); - } - } - } - return (err != DB_SUCCESS); -} -#endif - - -/*************************************************************//** -Check for a valid value of innobase_commit_concurrency. -@return 0 for valid innodb_commit_concurrency */ -static -int -innobase_commit_concurrency_validate( -/*=================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - long long intbuf; - ulong commit_concurrency; - - DBUG_ENTER("innobase_commit_concurrency_validate"); - - if (value->val_int(value, &intbuf)) { - /* The value is NULL. That is invalid. */ - DBUG_RETURN(1); - } - - *reinterpret_cast<ulong*>(save) = commit_concurrency - = static_cast<ulong>(intbuf); - - /* Allow the value to be updated, as long as it remains zero - or nonzero. */ - DBUG_RETURN(!(!commit_concurrency == !innobase_commit_concurrency)); -} - -/*******************************************************************//** -Function for constructing an InnoDB table handler instance. */ -static -handler* -innobase_create_handler( -/*====================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - TABLE_SHARE* table, - MEM_ROOT* mem_root) -{ - return(new (mem_root) ha_innobase(hton, table)); -} - -/* General functions */ - -/*************************************************************//** -Check that a page_size is correct for InnoDB. If correct, set the -associated page_size_shift which is the power of 2 for this page size. -@return an associated page_size_shift if valid, 0 if invalid. */ -inline -int -innodb_page_size_validate( -/*======================*/ - ulong page_size) /*!< in: Page Size to evaluate */ -{ - ulong n; - - DBUG_ENTER("innodb_page_size_validate"); - - for (n = UNIV_PAGE_SIZE_SHIFT_MIN; - n <= UNIV_PAGE_SIZE_SHIFT_MAX; - n++) { - if (page_size == (ulong) (1 << n)) { - DBUG_RETURN(n); - } - } - - DBUG_RETURN(0); -} - -/******************************************************************//** -Returns true if the thread is the replication thread on the slave -server. Used in srv_conc_enter_innodb() to determine if the thread -should be allowed to enter InnoDB - the replication thread is treated -differently than other threads. Also used in -srv_conc_force_exit_innodb(). -@return true if thd is the replication thread */ -UNIV_INTERN -ibool -thd_is_replication_slave_thread( -/*============================*/ - THD* thd) /*!< in: thread handle */ -{ - return thd && ((ibool) thd_slave_thread(thd)); -} - -/******************************************************************//** -Gets information on the durability property requested by thread. -Used when writing either a prepare or commit record to the log -buffer. @return the durability property. */ -UNIV_INTERN -enum durability_properties -thd_requested_durability( -/*=====================*/ - const THD* thd) /*!< in: thread handle */ -{ - return(thd_get_durability_property(thd)); -} - -/******************************************************************//** -Returns true if transaction should be flagged as read-only. -@return true if the thd is marked as read-only */ -UNIV_INTERN -ibool -thd_trx_is_read_only( -/*=================*/ - THD* thd) /*!< in: thread handle */ -{ - return(thd != 0 && thd_tx_is_read_only(thd)); -} - -/******************************************************************//** -Check if the transaction is an auto-commit transaction. TRUE also -implies that it is a SELECT (read-only) transaction. -@return true if the transaction is an auto commit read-only transaction. */ -UNIV_INTERN -ibool -thd_trx_is_auto_commit( -/*===================*/ - THD* thd) /*!< in: thread handle, can be NULL */ -{ - return(thd != NULL - && !thd_test_options( - thd, - OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) - && thd_is_select(thd)); -} - -/******************************************************************//** -Save some CPU by testing the value of srv_thread_concurrency in inline -functions. */ -static inline -void -innobase_srv_conc_enter_innodb( -/*===========================*/ - trx_t* trx) /*!< in: transaction handle */ -{ -#ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return; -#endif /* WITH_WSREP */ - if (srv_thread_concurrency) { - if (trx->n_tickets_to_enter_innodb > 0) { - - /* If trx has 'free tickets' to enter the engine left, - then use one such ticket */ - - --trx->n_tickets_to_enter_innodb; - - } else if (trx->mysql_thd != NULL - && thd_is_replication_slave_thread(trx->mysql_thd)) { - - UT_WAIT_FOR( - srv_conc_get_active_threads() - < srv_thread_concurrency, - srv_replication_delay * 1000); - - } else { - srv_conc_enter_innodb(trx); - } - } -} - -/******************************************************************//** -Note that the thread wants to leave InnoDB only if it doesn't have -any spare tickets. */ -static inline -void -innobase_srv_conc_exit_innodb( -/*==========================*/ - trx_t* trx) /*!< in: transaction handle */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); -#endif /* UNIV_SYNC_DEBUG */ -#ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return; -#endif /* WITH_WSREP */ - - /* This is to avoid making an unnecessary function call. */ - if (trx->declared_to_be_inside_innodb - && trx->n_tickets_to_enter_innodb == 0) { - - srv_conc_force_exit_innodb(trx); - } -} - -/******************************************************************//** -Force a thread to leave InnoDB even if it has spare tickets. */ -static inline -void -innobase_srv_conc_force_exit_innodb( -/*================================*/ - trx_t* trx) /*!< in: transaction handle */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); -#endif /* UNIV_SYNC_DEBUG */ - - /* This is to avoid making an unnecessary function call. */ - if (trx->declared_to_be_inside_innodb) { - srv_conc_force_exit_innodb(trx); - } -} - -/******************************************************************//** -Returns the NUL terminated value of glob_hostname. -@return pointer to glob_hostname. */ -UNIV_INTERN -const char* -server_get_hostname() -/*=================*/ -{ - return(glob_hostname); -} - -/******************************************************************//** -Returns true if the transaction this thread is processing has edited -non-transactional tables. Used by the deadlock detector when deciding -which transaction to rollback in case of a deadlock - we try to avoid -rolling back transactions that have edited non-transactional tables. -@return true if non-transactional tables have been edited */ -UNIV_INTERN -ibool -thd_has_edited_nontrans_tables( -/*===========================*/ - THD* thd) /*!< in: thread handle */ -{ - return((ibool) thd_non_transactional_update(thd)); -} - -/* Return high resolution timestamp for the start of the current query */ -UNIV_INTERN -unsigned long long -thd_query_start_micro( - const THD* thd) /*!< in: thread handle */ -{ - return thd_start_utime(thd); -} - -/******************************************************************//** -Returns true if the thread is executing a SELECT statement. -@return true if thd is executing SELECT */ -UNIV_INTERN -ibool -thd_is_select( -/*==========*/ - const THD* thd) /*!< in: thread handle */ -{ - return(thd_sql_command(thd) == SQLCOM_SELECT); -} - -/******************************************************************//** -Returns true if the thread supports XA, -global value of innodb_supports_xa if thd is NULL. -@return true if thd has XA support */ -UNIV_INTERN -ibool -thd_supports_xa( -/*============*/ - THD* thd) /*!< in: thread handle, or NULL to query - the global innodb_supports_xa */ -{ - /* THDVAR cannot be used in xtrabackup, - plugin variables for innodb are not loaded. */ - return (thd || !IS_XTRABACKUP())? THDVAR(thd, support_xa): FALSE; -} - -/** Get the value of innodb_tmpdir. -@param[in] thd thread handle, or NULL to query - the global innodb_tmpdir. -@retval NULL if innodb_tmpdir="" */ -UNIV_INTERN -const char* -thd_innodb_tmpdir( - THD* thd) -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!sync_thread_levels_nonempty_trx(false)); -#endif /* UNIV_SYNC_DEBUG */ - - const char* tmp_dir = THDVAR(thd, tmpdir); - if (tmp_dir != NULL && *tmp_dir == '\0') { - tmp_dir = NULL; - } - - return(tmp_dir); -} -/******************************************************************//** -Check the status of fake changes mode (innodb_fake_changes) -@return true if fake change mode is enabled. */ -UNIV_INTERN -ibool -thd_fake_changes( -/*=============*/ - THD* thd) /*!< in: thread handle, or NULL to query - the global innodb_supports_xa */ -{ - /* THDVAR cannot be used in xtrabackup, - plugin variables for innodb are not loaded */ - return (thd || !IS_XTRABACKUP())? THDVAR((THD*) thd, fake_changes) : FALSE ; -} - -/******************************************************************//** -Returns the lock wait timeout for the current connection. -@return the lock wait timeout, in seconds */ -UNIV_INTERN -ulong -thd_lock_wait_timeout( -/*==================*/ - THD* thd) /*!< in: thread handle, or NULL to query - the global innodb_lock_wait_timeout */ -{ - /* According to <mysql/plugin.h>, passing thd == NULL - returns the global value of the session variable. */ - return(THDVAR(thd, lock_wait_timeout)); -} - -/******************************************************************//** -Set the time waited for the lock for the current query. */ -UNIV_INTERN -void -thd_set_lock_wait_time( -/*===================*/ - THD* thd, /*!< in/out: thread handle */ - ulint value) /*!< in: time waited for the lock */ -{ - if (thd) { - thd_storage_lock_wait(thd, value); - } -} - -/******************************************************************//** -*/ -UNIV_INTERN -ulong -thd_flush_log_at_trx_commit( -/*================================*/ - void* thd) -{ - /* THDVAR cannot be used in xtrabackup, - plugin variables for innodb are not loaded, - this makes xtrabackup crash when trying to use them. */ - return (thd || !IS_XTRABACKUP())? THDVAR((THD*)thd, flush_log_at_trx_commit) : FALSE; -} - -/********************************************************************//** -Obtain the InnoDB transaction of a MySQL thread. -@return reference to transaction pointer */ -MY_ATTRIBUTE((warn_unused_result, nonnull)) -static inline -trx_t*& -thd_to_trx( -/*=======*/ - THD* thd) /*!< in: MySQL thread */ -{ - return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr)); -} - -#ifdef WITH_WSREP -ulonglong -thd_to_trx_id( -/*=======*/ - THD* thd) /*!< in: MySQL thread */ -{ - return(thd_to_trx(thd)->id); -} -#endif /* WITH_WSREP */ - -my_bool -ha_innobase::is_fake_change_enabled(THD* thd) -{ - trx_t* trx = thd_to_trx(thd); - return(trx && UNIV_UNLIKELY(trx->fake_changes)); -} - -/********************************************************************//** -Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth -time calls srv_active_wake_master_thread. This function should be used -when a single database operation may introduce a small need for -server utility activity, like checkpointing. */ -static inline -void -innobase_active_small(void) -/*=======================*/ -{ - innobase_active_counter++; - - if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) { - srv_active_wake_master_thread(); - } -} - -/********************************************************************//** -Converts an InnoDB error code to a MySQL error code and also tells to MySQL -about a possible transaction rollback inside InnoDB caused by a lock wait -timeout or a deadlock. -@return MySQL error code */ -static -int -convert_error_code_to_mysql( -/*========================*/ - dberr_t error, /*!< in: InnoDB error code */ - ulint flags, /*!< in: InnoDB table flags, or 0 */ - THD* thd) /*!< in: user thread handle or NULL */ -{ - switch (error) { - case DB_SUCCESS: - return(0); - - case DB_INTERRUPTED: - return(HA_ERR_ABORTED_BY_USER); - - case DB_FOREIGN_EXCEED_MAX_CASCADE: - ut_ad(thd); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_ROW_IS_REFERENCED, - "InnoDB: Cannot delete/update " - "rows with cascading foreign key " - "constraints that exceed max " - "depth of %d. Please " - "drop extra constraints and try " - "again", DICT_FK_MAX_RECURSIVE_LOAD); - - /* fall through */ - - case DB_ERROR: - default: - return(-1); /* unspecified error */ - - case DB_DUPLICATE_KEY: - /* Be cautious with returning this error, since - mysql could re-enter the storage layer to get - duplicated key info, the operation requires a - valid table handle and/or transaction information, - which might not always be available in the error - handling stage. */ - return(HA_ERR_FOUND_DUPP_KEY); - - case DB_READ_ONLY: - return(HA_ERR_TABLE_READONLY); - - case DB_FOREIGN_DUPLICATE_KEY: - return(HA_ERR_FOREIGN_DUPLICATE_KEY); - - case DB_MISSING_HISTORY: - return(HA_ERR_TABLE_DEF_CHANGED); - - case DB_RECORD_NOT_FOUND: - return(HA_ERR_NO_ACTIVE_RECORD); - - case DB_SEARCH_ABORTED_BY_USER: - return(HA_ERR_ABORTED_BY_USER); - - case DB_DEADLOCK: - /* Since we rolled back the whole transaction, we must - tell it also to MySQL so that MySQL knows to empty the - cached binlog for this transaction */ - - if (thd) { - thd_mark_transaction_to_rollback(thd, TRUE); - } - - return(HA_ERR_LOCK_DEADLOCK); - - case DB_LOCK_WAIT_TIMEOUT: - /* Starting from 5.0.13, we let MySQL just roll back the - latest SQL statement in a lock wait timeout. Previously, we - rolled back the whole transaction. */ - - if (thd) { - thd_mark_transaction_to_rollback( - thd, (bool) row_rollback_on_timeout); - } - - return(HA_ERR_LOCK_WAIT_TIMEOUT); - - case DB_NO_REFERENCED_ROW: - return(HA_ERR_NO_REFERENCED_ROW); - - case DB_ROW_IS_REFERENCED: - return(HA_ERR_ROW_IS_REFERENCED); - - case DB_CANNOT_ADD_CONSTRAINT: - case DB_CHILD_NO_INDEX: - case DB_PARENT_NO_INDEX: - return(HA_ERR_CANNOT_ADD_FOREIGN); - - case DB_CANNOT_DROP_CONSTRAINT: - - return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit - misleading, a new MySQL error - code should be introduced */ - - case DB_CORRUPTION: - return(HA_ERR_CRASHED); - - case DB_OUT_OF_FILE_SPACE: - return(HA_ERR_RECORD_FILE_FULL); - - case DB_TEMP_FILE_WRITE_FAILURE: - my_error(ER_GET_ERRMSG, MYF(0), - DB_TEMP_FILE_WRITE_FAILURE, - ut_strerr(DB_TEMP_FILE_WRITE_FAILURE), - "InnoDB"); - return(HA_ERR_INTERNAL_ERROR); - - case DB_TABLE_IN_FK_CHECK: - return(HA_ERR_TABLE_IN_FK_CHECK); - - case DB_TABLE_IS_BEING_USED: - return(HA_ERR_WRONG_COMMAND); - - case DB_TABLESPACE_DELETED: - case DB_TABLE_NOT_FOUND: - return(HA_ERR_NO_SUCH_TABLE); - - case DB_DECRYPTION_FAILED: - return(HA_ERR_DECRYPTION_FAILED); - - case DB_TABLESPACE_NOT_FOUND: - return(HA_ERR_NO_SUCH_TABLE); - - case DB_TOO_BIG_RECORD: { - /* If prefix is true then a 768-byte prefix is stored - locally for BLOB fields. Refer to dict_table_get_format() */ - bool prefix = (dict_tf_get_format(flags) == UNIV_FORMAT_A); - my_printf_error(ER_TOO_BIG_ROWSIZE, - "Row size too large (> %lu). Changing some columns " - "to TEXT or BLOB %smay help. In current row " - "format, BLOB prefix of %d bytes is stored inline.", - MYF(0), - page_get_free_space_of_empty(flags & - DICT_TF_COMPACT) / 2, - prefix ? "or using ROW_FORMAT=DYNAMIC " - "or ROW_FORMAT=COMPRESSED ": "", - prefix ? DICT_MAX_FIXED_COL_LEN : 0); - return(HA_ERR_TO_BIG_ROW); - } - - - case DB_TOO_BIG_FOR_REDO: - my_printf_error(ER_TOO_BIG_ROWSIZE, "%s" , MYF(0), - "The size of BLOB/TEXT data inserted" - " in one transaction is greater than" - " 10% of redo log size. Increase the" - " redo log size using innodb_log_file_size."); - return(HA_ERR_TO_BIG_ROW); - - case DB_TOO_BIG_INDEX_COL: - my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), - DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags)); - return(HA_ERR_INDEX_COL_TOO_LONG); - - case DB_NO_SAVEPOINT: - return(HA_ERR_NO_SAVEPOINT); - - case DB_LOCK_TABLE_FULL: - /* Since we rolled back the whole transaction, we must - tell it also to MySQL so that MySQL knows to empty the - cached binlog for this transaction */ - - if (thd) { - thd_mark_transaction_to_rollback(thd, TRUE); - } - - return(HA_ERR_LOCK_TABLE_FULL); - - case DB_FTS_INVALID_DOCID: - return(HA_FTS_INVALID_DOCID); - case DB_FTS_EXCEED_RESULT_CACHE_LIMIT: - return(HA_ERR_OUT_OF_MEM); - case DB_TOO_MANY_CONCURRENT_TRXS: - return(HA_ERR_TOO_MANY_CONCURRENT_TRXS); - case DB_UNSUPPORTED: - return(HA_ERR_UNSUPPORTED); - case DB_INDEX_CORRUPT: - return(HA_ERR_INDEX_CORRUPT); - case DB_UNDO_RECORD_TOO_BIG: - return(HA_ERR_UNDO_REC_TOO_BIG); - case DB_OUT_OF_MEMORY: - return(HA_ERR_OUT_OF_MEM); - case DB_TABLESPACE_EXISTS: - return(HA_ERR_TABLESPACE_EXISTS); - case DB_IDENTIFIER_TOO_LONG: - return(HA_ERR_INTERNAL_ERROR); - case DB_FTS_TOO_MANY_WORDS_IN_PHRASE: - return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE); - } -} - -/*************************************************************//** -Prints info of a THD object (== user session thread) to the given file. */ -UNIV_INTERN -void -innobase_mysql_print_thd( -/*=====================*/ - FILE* f, /*!< in: output stream */ - THD* thd, /*!< in: MySQL THD object */ - uint max_query_len) /*!< in: max query length to print, or 0 to - use the default max length */ -{ - char buffer[1024]; - - fputs(thd_get_error_context_description((THD*) thd, - buffer, sizeof buffer, - max_query_len), f); - putc('\n', f); -} - -/******************************************************************//** -Get the error message format string. -@return the format string or 0 if not found. */ -UNIV_INTERN -const char* -innobase_get_err_msg( -/*=================*/ - int error_code) /*!< in: MySQL error code */ -{ - return(my_get_err_msg(error_code)); -} - -/******************************************************************//** -Get the variable length bounds of the given character set. */ -UNIV_INTERN -void -innobase_get_cset_width( -/*====================*/ - ulint cset, /*!< in: MySQL charset-collation code */ - ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */ - ulint* mbmaxlen) /*!< out: maximum length of a char (in bytes) */ -{ - CHARSET_INFO* cs; - ut_ad(cset <= MAX_CHAR_COLL_NUM); - ut_ad(mbminlen); - ut_ad(mbmaxlen); - - cs = all_charsets[cset]; - if (cs) { - *mbminlen = cs->mbminlen; - *mbmaxlen = cs->mbmaxlen; - ut_ad(*mbminlen < DATA_MBMAX); - ut_ad(*mbmaxlen < DATA_MBMAX); - } else { - THD* thd = current_thd; - - if (thd && thd_sql_command(thd) == SQLCOM_DROP_TABLE) { - - /* Fix bug#46256: allow tables to be dropped if the - collation is not found, but issue a warning. */ - if ((global_system_variables.log_warnings) - && (cset != 0)){ - - sql_print_warning( - "Unknown collation #%lu.", cset); - } - } else { - - ut_a(cset == 0); - } - - *mbminlen = *mbmaxlen = 0; - } -} - -/******************************************************************//** -Converts an identifier to a table name. */ -UNIV_INTERN -void -innobase_convert_from_table_id( -/*===========================*/ - struct charset_info_st* cs, /*!< in: the 'from' character set */ - char* to, /*!< out: converted identifier */ - const char* from, /*!< in: identifier to convert */ - ulint len) /*!< in: length of 'to', in bytes */ -{ - uint errors; - - strconvert(cs, from, strlen(from), &my_charset_filename, to, (uint) len, &errors); -} - -/********************************************************************** -Check if the length of the identifier exceeds the maximum allowed. -return true when length of identifier is too long. */ -UNIV_INTERN -my_bool -innobase_check_identifier_length( -/*=============================*/ - const char* id) /* in: FK identifier to check excluding the - database portion. */ -{ - int well_formed_error = 0; - CHARSET_INFO *cs = system_charset_info; - DBUG_ENTER("innobase_check_identifier_length"); - - size_t len = cs->cset->well_formed_len( - cs, id, id + strlen(id), - NAME_CHAR_LEN, &well_formed_error); - - if (well_formed_error || len == NAME_CHAR_LEN) { - my_error(ER_TOO_LONG_IDENT, MYF(0), id); - DBUG_RETURN(true); - } - DBUG_RETURN(false); -} - -/******************************************************************//** -Converts an identifier to UTF-8. */ -UNIV_INTERN -void -innobase_convert_from_id( -/*=====================*/ - struct charset_info_st* cs, /*!< in: the 'from' character set */ - char* to, /*!< out: converted identifier */ - const char* from, /*!< in: identifier to convert */ - ulint len) /*!< in: length of 'to', in bytes */ -{ - uint errors; - - strconvert(cs, from, strlen(from), system_charset_info, to, (uint) len, &errors); -} - -/******************************************************************//** -Compares NUL-terminated UTF-8 strings case insensitively. -@return 0 if a=b, <0 if a<b, >1 if a>b */ -UNIV_INTERN -int -innobase_strcasecmp( -/*================*/ - const char* a, /*!< in: first string to compare */ - const char* b) /*!< in: second string to compare */ -{ - if (!a) { - if (!b) { - return(0); - } else { - return(-1); - } - } else if (!b) { - return(1); - } - - return(my_strcasecmp(system_charset_info, a, b)); -} - -/******************************************************************//** -Compares NUL-terminated UTF-8 strings case insensitively. The -second string contains wildcards. -@return 0 if a match is found, 1 if not */ -UNIV_INTERN -int -innobase_wildcasecmp( -/*=================*/ - const char* a, /*!< in: string to compare */ - const char* b) /*!< in: wildcard string to compare */ -{ - return(wild_case_compare(system_charset_info, a, b)); -} - -/******************************************************************//** -Strip dir name from a full path name and return only the file name -@return file name or "null" if no file name */ -UNIV_INTERN -const char* -innobase_basename( -/*==============*/ - const char* path_name) /*!< in: full path name */ -{ - const char* name = base_name(path_name); - - return((name) ? name : "null"); -} - -/******************************************************************//** -Makes all characters in a NUL-terminated UTF-8 string lower case. */ -UNIV_INTERN -void -innobase_casedn_str( -/*================*/ - char* a) /*!< in/out: string to put in lower case */ -{ - my_casedn_str(system_charset_info, a); -} - -/**********************************************************************//** -Determines the connection character set. -@return connection character set */ -UNIV_INTERN -struct charset_info_st* -innobase_get_charset( -/*=================*/ - THD* mysql_thd) /*!< in: MySQL thread handle */ -{ - return(thd_charset(mysql_thd)); -} - -/**********************************************************************//** -Determines the current SQL statement. -@return SQL statement string */ -UNIV_INTERN -const char* -innobase_get_stmt( -/*==============*/ - THD* thd, /*!< in: MySQL thread handle */ - size_t* length) /*!< out: length of the SQL statement */ -{ - if (const LEX_STRING *stmt = thd_query_string(thd)) { - *length = stmt->length; - return stmt->str; - } - return NULL; -} - -/**********************************************************************//** -Get the current setting of the table_def_size global parameter. We do -a dirty read because for one there is no synchronization object and -secondly there is little harm in doing so even if we get a torn read. -@return value of table_def_size */ -UNIV_INTERN -ulint -innobase_get_table_cache_size(void) -/*===============================*/ -{ - return(tdc_size); -} - -/**********************************************************************//** -Get the current setting of the lower_case_table_names global parameter from -mysqld.cc. We do a dirty read because for one there is no synchronization -object and secondly there is little harm in doing so even if we get a torn -read. -@return value of lower_case_table_names */ -UNIV_INTERN -ulint -innobase_get_lower_case_table_names(void) -/*=====================================*/ -{ - return(lower_case_table_names); -} - -/** Create a temporary file in the location specified by the parameter -path. If the path is null, then it will be created in tmpdir. -@param[in] path location for creating temporary file -@return temporary file descriptor, or < 0 on error */ -UNIV_INTERN -int -innobase_mysql_tmpfile( - const char* path) -{ -#ifdef WITH_INNODB_DISALLOW_WRITES - os_event_wait(srv_allow_writes_event); -#endif /* WITH_INNODB_DISALLOW_WRITES */ - int fd2 = -1; - File fd; - - DBUG_EXECUTE_IF( - "innobase_tmpfile_creation_failure", - return(-1); - ); - - if (path == NULL) { - fd = mysql_tmpfile("ib"); - } else { - fd = mysql_tmpfile_path(path, "ib"); - } - - if (fd >= 0) { - /* Copy the file descriptor, so that the additional resources - allocated by create_temp_file() can be freed by invoking - my_close(). - - Because the file descriptor returned by this function - will be passed to fdopen(), it will be closed by invoking - fclose(), which in turn will invoke close() instead of - my_close(). */ - -#ifdef _WIN32 - /* Note that on Windows, the integer returned by mysql_tmpfile - has no relation to C runtime file descriptor. Here, we need - to call my_get_osfhandle to get the HANDLE and then convert it - to C runtime filedescriptor. */ - { - HANDLE hFile = my_get_osfhandle(fd); - HANDLE hDup; - BOOL bOK = DuplicateHandle( - GetCurrentProcess(), - hFile, GetCurrentProcess(), - &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS); - if (bOK) { - fd2 = _open_osfhandle((intptr_t) hDup, 0); - } else { - my_osmaperr(GetLastError()); - fd2 = -1; - } - } -#else - fd2 = dup(fd); -#endif - if (fd2 < 0) { - DBUG_PRINT("error",("Got error %d on dup",fd2)); - my_errno=errno; - my_error(EE_OUT_OF_FILERESOURCES, - MYF(ME_BELL+ME_WAITTANG), - "ib*", my_errno); - } - my_close(fd, MYF(MY_WME)); - } - return(fd2); -} - -/*********************************************************************//** -Wrapper around MySQL's copy_and_convert function. -@return number of bytes copied to 'to' */ -UNIV_INTERN -ulint -innobase_convert_string( -/*====================*/ - void* to, /*!< out: converted string */ - ulint to_length, /*!< in: number of bytes reserved - for the converted string */ - CHARSET_INFO* to_cs, /*!< in: character set to convert to */ - const void* from, /*!< in: string to convert */ - ulint from_length, /*!< in: number of bytes to convert */ - CHARSET_INFO* from_cs, /*!< in: character set to convert - from */ - uint* errors) /*!< out: number of errors encountered - during the conversion */ -{ - return(copy_and_convert( - (char*) to, (uint32) to_length, to_cs, - (const char*) from, (uint32) from_length, from_cs, - errors)); -} - -/*******************************************************************//** -Formats the raw data in "data" (in InnoDB on-disk format) that is of -type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes -the result to "buf". The result is converted to "system_charset_info". -Not more than "buf_size" bytes are written to "buf". -The result is always NUL-terminated (provided buf_size > 0) and the -number of bytes that were written to "buf" is returned (including the -terminating NUL). -@return number of bytes that were written */ -UNIV_INTERN -ulint -innobase_raw_format( -/*================*/ - const char* data, /*!< in: raw data */ - ulint data_len, /*!< in: raw data length - in bytes */ - ulint charset_coll, /*!< in: charset collation */ - char* buf, /*!< out: output buffer */ - ulint buf_size) /*!< in: output buffer size - in bytes */ -{ - /* XXX we use a hard limit instead of allocating - but_size bytes from the heap */ - CHARSET_INFO* data_cs; - char buf_tmp[8192]; - ulint buf_tmp_used; - uint num_errors; - - data_cs = all_charsets[charset_coll]; - - buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp), - system_charset_info, - data, data_len, data_cs, - &num_errors); - - return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size)); -} - -/*********************************************************************//** -Compute the next autoinc value. - -For MySQL replication the autoincrement values can be partitioned among -the nodes. The offset is the start or origin of the autoincrement value -for a particular node. For n nodes the increment will be n and the offset -will be in the interval [1, n]. The formula tries to allocate the next -value for a particular node. - -Note: This function is also called with increment set to the number of -values we want to reserve for multi-value inserts e.g., - - INSERT INTO T VALUES(), (), (); - -innobase_next_autoinc() will be called with increment set to 3 where -autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for -the multi-value INSERT above. -@return the next value */ -UNIV_INTERN -ulonglong -innobase_next_autoinc( -/*==================*/ - ulonglong current, /*!< in: Current value */ - ulonglong need, /*!< in: count of values needed */ - ulonglong step, /*!< in: AUTOINC increment step */ - ulonglong offset, /*!< in: AUTOINC offset */ - ulonglong max_value) /*!< in: max value for type */ -{ - ulonglong next_value; - ulonglong block = need * step; - - /* Should never be 0. */ - ut_a(need > 0); - ut_a(block > 0); - ut_a(max_value > 0); - - /* - Allow auto_increment to go over max_value up to max ulonglong. - This allows us to detect that all values are exhausted. - If we don't do this, we will return max_value several times - and get duplicate key errors instead of auto increment value - out of range. - */ - max_value= (~(ulonglong) 0); - - /* According to MySQL documentation, if the offset is greater than - the step then the offset is ignored. */ - if (offset > block) { - offset = 0; - } - - /* Check for overflow. Current can be > max_value if the value is - in reality a negative value.The visual studio compilers converts - large double values automatically into unsigned long long datatype - maximum value */ - - if (block >= max_value - || offset > max_value - || current >= max_value - || max_value - offset <= offset) { - - next_value = max_value; - } else { - ut_a(max_value > current); - - ulonglong free = max_value - current; - - if (free < offset || free - offset <= block) { - next_value = max_value; - } else { - next_value = 0; - } - } - - if (next_value == 0) { - ulonglong next; - - if (current >= offset) { - next = (current - offset) / step; - } else { - next = 0; - block -= step; - } - - ut_a(max_value > next); - next_value = next * step; - /* Check for multiplication overflow. */ - ut_a(next_value >= next); - ut_a(max_value > next_value); - - /* Check for overflow */ - if (max_value - next_value >= block) { - - next_value += block; - - if (max_value - next_value >= offset) { - next_value += offset; - } else { - next_value = max_value; - } - } else { - next_value = max_value; - } - } - - ut_a(next_value != 0); - ut_a(next_value <= max_value); - - return(next_value); -} - -/*********************************************************************//** -Initializes some fields in an InnoDB transaction object. */ -static -void -innobase_trx_init( -/*==============*/ - THD* thd, /*!< in: user thread handle */ - trx_t* trx) /*!< in/out: InnoDB transaction handle */ -{ - DBUG_ENTER("innobase_trx_init"); - DBUG_ASSERT(thd == trx->mysql_thd); - - trx->check_foreigns = !thd_test_options( - thd, OPTION_NO_FOREIGN_KEY_CHECKS); - - trx->check_unique_secondary = !thd_test_options( - thd, OPTION_RELAXED_UNIQUE_CHECKS); - - /* Transaction on start caches the fake_changes state and uses it for - complete transaction lifetime. - There are some APIs that doesn't need an active transaction object - but transaction object are just use as a cache object/data carrier. - Before using transaction object for such APIs refresh the state of - fake_changes. */ - if (trx->state == TRX_STATE_NOT_STARTED) { - trx->fake_changes = thd_fake_changes(thd); - } - -#ifdef EXTENDED_SLOWLOG - if (thd_log_slow_verbosity(thd) & (1ULL << SLOG_V_INNODB)) { - trx->take_stats = TRUE; - } else { - trx->take_stats = FALSE; - } -#else - trx->take_stats = FALSE; -#endif - - DBUG_VOID_RETURN; -} - -/*********************************************************************//** -Allocates an InnoDB transaction for a MySQL handler object for DML. -@return InnoDB transaction handle */ -UNIV_INTERN -trx_t* -innobase_trx_allocate( -/*==================*/ - THD* thd) /*!< in: user thread handle */ -{ - trx_t* trx; - - DBUG_ENTER("innobase_trx_allocate"); - DBUG_ASSERT(thd != NULL); - DBUG_ASSERT(EQ_CURRENT_THD(thd)); - - trx = trx_allocate_for_mysql(); - - trx->mysql_thd = thd; - - innobase_trx_init(thd, trx); - - DBUG_RETURN(trx); -} - -/*********************************************************************//** -Gets the InnoDB transaction handle for a MySQL handler object, creates -an InnoDB transaction struct if the corresponding MySQL thread struct still -lacks one. -@return InnoDB transaction handle */ -static inline -trx_t* -check_trx_exists( -/*=============*/ - THD* thd) /*!< in: user thread handle */ -{ - trx_t*& trx = thd_to_trx(thd); - - if (trx == NULL) { - trx = innobase_trx_allocate(thd); - thd_set_ha_data(thd, innodb_hton_ptr, trx); - } else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) { - mem_analyze_corruption(trx); - ut_error; - } - - innobase_trx_init(thd, trx); - - return(trx); -} - -/************************************************************************* -Gets current trx. */ -trx_t* -innobase_get_trx() -{ - THD *thd=current_thd; - if (likely(thd != 0)) { - trx_t*& trx = thd_to_trx(thd); - return(trx); - } else { - return(NULL); - } -} - -ibool -innobase_get_slow_log() -{ -#ifdef EXTENDED_SLOWLOG - return((ibool) thd_opt_slow_log()); -#else - return(FALSE); -#endif -} - -/*********************************************************************//** -Note that a transaction has been registered with MySQL. -@return true if transaction is registered with MySQL 2PC coordinator */ -static inline -bool -trx_is_registered_for_2pc( -/*=========================*/ - const trx_t* trx) /* in: transaction */ -{ - return(trx->is_registered == 1); -} - -/*********************************************************************//** -Note that innobase_commit_ordered() was run. */ -static inline -void -trx_set_active_commit_ordered( -/*==============================*/ - trx_t* trx) /* in: transaction */ -{ - ut_a(trx_is_registered_for_2pc(trx)); - trx->active_commit_ordered = 1; -} - -/*********************************************************************//** -Note that a transaction has been registered with MySQL 2PC coordinator. */ -static inline -void -trx_register_for_2pc( -/*==================*/ - trx_t* trx) /* in: transaction */ -{ - trx->is_registered = 1; - ut_ad(trx->active_commit_ordered == 0); -} - -/*********************************************************************//** -Note that a transaction has been deregistered. */ -static inline -void -trx_deregister_from_2pc( -/*====================*/ - trx_t* trx) /* in: transaction */ -{ - trx->is_registered = 0; - trx->active_commit_ordered = 0; -} - -/*********************************************************************//** -Check whether a transaction has active_commit_ordered set */ -static inline -bool -trx_is_active_commit_ordered( -/*=========================*/ - const trx_t* trx) /* in: transaction */ -{ - return(trx->active_commit_ordered == 1); -} - -/*********************************************************************//** -Check if transaction is started. -@reutrn true if transaction is in state started */ -static -bool -trx_is_started( -/*===========*/ - trx_t* trx) /* in: transaction */ -{ - return(trx->state != TRX_STATE_NOT_STARTED); -} - -/****************************************************************//** -Update log_checksum_algorithm_ptr with a pointer to the function corresponding -to a given checksum algorithm. */ - -void -innodb_log_checksum_func_update( -/*============================*/ - ulint algorithm) /*!< in: algorithm */ -{ - switch (algorithm) { - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - case SRV_CHECKSUM_ALGORITHM_INNODB: - log_checksum_algorithm_ptr=log_block_calc_checksum_innodb; - break; - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - case SRV_CHECKSUM_ALGORITHM_CRC32: - log_checksum_algorithm_ptr=log_block_calc_checksum_crc32; - break; - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - case SRV_CHECKSUM_ALGORITHM_NONE: - log_checksum_algorithm_ptr=log_block_calc_checksum_none; - break; - default: - ut_a(0); - } -} - -/****************************************************************//** -On update hook for the innodb_log_checksum_algorithm variable. */ -static -void -innodb_log_checksum_algorithm_update( -/*=================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - srv_checksum_algorithm_t algorithm; - - algorithm = (srv_checksum_algorithm_t) - (*static_cast<const ulong*>(save)); - - /* Make sure we are the only log user */ - mutex_enter(&log_sys->mutex); - - innodb_log_checksum_func_update(algorithm); - - srv_log_checksum_algorithm = algorithm; - - mutex_exit(&log_sys->mutex); -} - -/*********************************************************************//** -Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object. -Those flags are stored in .frm file and end up in the MySQL table object, -but are frequently used inside InnoDB so we keep their copies into the -InnoDB table object. */ -UNIV_INTERN -void -innobase_copy_frm_flags_from_create_info( -/*=====================================*/ - dict_table_t* innodb_table, /*!< in/out: InnoDB table */ - const HA_CREATE_INFO* create_info) /*!< in: create info */ -{ - ibool ps_on; - ibool ps_off; - - if (dict_table_is_temporary(innodb_table)) { - /* Temp tables do not use persistent stats. */ - ps_on = FALSE; - ps_off = TRUE; - } else { - ps_on = create_info->table_options - & HA_OPTION_STATS_PERSISTENT; - ps_off = create_info->table_options - & HA_OPTION_NO_STATS_PERSISTENT; - } - - dict_stats_set_persistent(innodb_table, ps_on, ps_off); - - dict_stats_auto_recalc_set( - innodb_table, - create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON, - create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF); - - innodb_table->stats_sample_pages = create_info->stats_sample_pages; -} - -/*********************************************************************//** -Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object. -Those flags are stored in .frm file and end up in the MySQL table object, -but are frequently used inside InnoDB so we keep their copies into the -InnoDB table object. */ -UNIV_INTERN -void -innobase_copy_frm_flags_from_table_share( -/*=====================================*/ - dict_table_t* innodb_table, /*!< in/out: InnoDB table */ - const TABLE_SHARE* table_share) /*!< in: table share */ -{ - ibool ps_on; - ibool ps_off; - - if (dict_table_is_temporary(innodb_table)) { - /* Temp tables do not use persistent stats */ - ps_on = FALSE; - ps_off = TRUE; - } else { - ps_on = table_share->db_create_options - & HA_OPTION_STATS_PERSISTENT; - ps_off = table_share->db_create_options - & HA_OPTION_NO_STATS_PERSISTENT; - } - - dict_stats_set_persistent(innodb_table, ps_on, ps_off); - - dict_stats_auto_recalc_set( - innodb_table, - table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON, - table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF); - - innodb_table->stats_sample_pages = table_share->stats_sample_pages; -} - -/*********************************************************************//** -Construct ha_innobase handler. */ -UNIV_INTERN -ha_innobase::ha_innobase( -/*=====================*/ - handlerton* hton, - TABLE_SHARE* table_arg) - :handler(hton, table_arg), - int_table_flags(HA_REC_NOT_IN_SEQ | - HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS | - HA_CAN_INDEX_BLOBS | HA_CONCURRENT_OPTIMIZE | - HA_CAN_SQL_HANDLER | - HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | - HA_PRIMARY_KEY_IN_READ_INDEX | - HA_BINLOG_ROW_CAPABLE | - HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ | - HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT | - (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0 ) | - HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT), - start_of_scan(0), - num_write_row(0), - ha_partition_stats(NULL) -{} - -/*********************************************************************//** -Destruct ha_innobase handler. */ -UNIV_INTERN -ha_innobase::~ha_innobase() -/*======================*/ -{ -} - -/*********************************************************************//** -Updates the user_thd field in a handle and also allocates a new InnoDB -transaction handle if needed, and updates the transaction fields in the -prebuilt struct. */ -UNIV_INTERN inline -void -ha_innobase::update_thd( -/*====================*/ - THD* thd) /*!< in: thd to use the handle */ -{ - trx_t* trx; - - DBUG_ENTER("ha_innobase::update_thd"); - DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p", - user_thd, thd)); - - /* The table should have been opened in ha_innobase::open(). */ - DBUG_ASSERT(prebuilt->table->n_ref_count > 0); - - trx = check_trx_exists(thd); - - if (prebuilt->trx != trx) { - - row_update_prebuilt_trx(prebuilt, trx); - } - - user_thd = thd; - DBUG_VOID_RETURN; -} - -/*********************************************************************//** -Updates the user_thd field in a handle and also allocates a new InnoDB -transaction handle if needed, and updates the transaction fields in the -prebuilt struct. */ -UNIV_INTERN -void -ha_innobase::update_thd() -/*=====================*/ -{ - THD* thd = ha_thd(); - - ut_ad(EQ_CURRENT_THD(thd)); - update_thd(thd); -} - -/*********************************************************************//** -Registers an InnoDB transaction with the MySQL 2PC coordinator, so that -the MySQL XA code knows to call the InnoDB prepare and commit, or rollback -for the transaction. This MUST be called for every transaction for which -the user may call commit or rollback. Calling this several times to register -the same transaction is allowed, too. This function also registers the -current SQL statement. */ -static inline -void -innobase_register_trx( -/*==================*/ - handlerton* hton, /* in: Innobase handlerton */ - THD* thd, /* in: MySQL thd (connection) object */ - trx_t* trx) /* in: transaction to register */ -{ - trans_register_ha(thd, FALSE, hton); - - if (!trx_is_registered_for_2pc(trx) - && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - trans_register_ha(thd, TRUE, hton); - } - - trx_register_for_2pc(trx); -} - -/* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB - ------------------------------------------------------------ - -1) The use of the query cache for TBL is disabled when there is an -uncommitted change to TBL. - -2) When a change to TBL commits, InnoDB stores the current value of -its global trx id counter, let us denote it by INV_TRX_ID, to the table object -in the InnoDB data dictionary, and does only allow such transactions whose -id <= INV_TRX_ID to use the query cache. - -3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit -modification because an ON DELETE CASCADE, we invalidate the MySQL query cache -of TBL immediately. - -How this is implemented inside InnoDB: - -1) Since every modification always sets an IX type table lock on the InnoDB -table, it is easy to check if there can be uncommitted modifications for a -table: just check if there are locks in the lock list of the table. - -2) When a transaction inside InnoDB commits, it reads the global trx id -counter and stores the value INV_TRX_ID to the tables on which it had a lock. - -3) If there is an implicit table change from ON DELETE CASCADE or SET NULL, -InnoDB calls an invalidate method for the MySQL query cache for that table. - -How this is implemented inside sql_cache.cc: - -1) The query cache for an InnoDB table TBL is invalidated immediately at an -INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay -invalidation to the transaction commit. - -2) To store or retrieve a value from the query cache of an InnoDB table TBL, -any query must first ask InnoDB's permission. We must pass the thd as a -parameter because InnoDB will look at the trx id, if any, associated with -that thd. Also the full_name which is used as key to search for the table -object. The full_name is a string containing the normalized path to the -table in the canonical format. - -3) Use of the query cache for InnoDB tables is now allowed also when -AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer -put restrictions on the use of the query cache. -*/ - -/******************************************************************//** -The MySQL query cache uses this to check from InnoDB if the query cache at -the moment is allowed to operate on an InnoDB table. The SQL query must -be a non-locking SELECT. - -The query cache is allowed to operate on certain query only if this function -returns TRUE for all tables in the query. - -If thd is not in the autocommit state, this function also starts a new -transaction for thd if there is no active trx yet, and assigns a consistent -read view to it if there is no read view yet. - -Why a deadlock of threads is not possible: the query cache calls this function -at the start of a SELECT processing. Then the calling thread cannot be -holding any InnoDB semaphores. The calling thread is holding the -query cache mutex, and this function will reserve the InnoDB trx_sys->mutex. -Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above -the InnoDB trx_sys->mutex. -@return TRUE if permitted, FALSE if not; note that the value FALSE -does not mean we should invalidate the query cache: invalidation is -called explicitly */ -static -my_bool -innobase_query_caching_of_table_permitted( -/*======================================*/ - THD* thd, /*!< in: thd of the user who is trying to - store a result to the query cache or - retrieve it */ - const char* full_name, /*!< in: normalized path to the table */ - uint full_name_len, /*!< in: length of the normalized path - to the table */ - ulonglong *unused) /*!< unused for this engine */ -{ - ibool is_autocommit; - trx_t* trx; - char norm_name[1000]; - - ut_a(full_name_len < 999); - - trx = check_trx_exists(thd); - - if (trx->isolation_level == TRX_ISO_SERIALIZABLE) { - /* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every - plain SELECT if AUTOCOMMIT is not on. */ - - return((my_bool)FALSE); - } - - if (UNIV_UNLIKELY(trx->has_search_latch)) { - sql_print_error("The calling thread is holding the adaptive " - "search, latch though calling " - "innobase_query_caching_of_table_permitted."); - trx_print(stderr, trx, 1024); - } - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - is_autocommit = TRUE; - } else { - is_autocommit = FALSE; - - } - - if (is_autocommit && trx->n_mysql_tables_in_use == 0) { - /* We are going to retrieve the query result from the query - cache. This cannot be a store operation to the query cache - because then MySQL would have locks on tables already. - - TODO: if the user has used LOCK TABLES to lock the table, - then we open a transaction in the call of row_.. below. - That trx can stay open until UNLOCK TABLES. The same problem - exists even if we do not use the query cache. MySQL should be - modified so that it ALWAYS calls some cleanup function when - the processing of a query ends! - - We can imagine we instantaneously serialize this consistent - read trx to the current trx id counter. If trx2 would have - changed the tables of a query result stored in the cache, and - trx2 would have already committed, making the result obsolete, - then trx2 would have already invalidated the cache. Thus we - can trust the result in the cache is ok for this query. */ - - return((my_bool)TRUE); - } - - /* Normalize the table name to InnoDB format */ - normalize_table_name(norm_name, full_name); - - innobase_register_trx(innodb_hton_ptr, thd, trx); - - if (row_search_check_if_query_cache_permitted(trx, norm_name)) { - - /* printf("Query cache for %s permitted\n", norm_name); */ - - return((my_bool)TRUE); - } - - /* printf("Query cache for %s NOT permitted\n", norm_name); */ - - return((my_bool)FALSE); -} - -/*****************************************************************//** -Invalidates the MySQL query cache for the table. */ -UNIV_INTERN -void -innobase_invalidate_query_cache( -/*============================*/ - trx_t* trx, /*!< in: transaction which - modifies the table */ - const char* full_name, /*!< in: concatenation of - database name, null char NUL, - table name, null char NUL; - NOTE that in Windows this is - always in LOWER CASE! */ - ulint full_name_len) /*!< in: full name length where - also the null chars count */ -{ - /* Note that the sync0sync.h rank of the query cache mutex is just - above the InnoDB trx_sys_t->lock. The caller of this function must - not have latches of a lower rank. */ - -#ifdef HAVE_QUERY_CACHE - char qcache_key_name[2 * (NAME_LEN + 1)]; - size_t tabname_len; - size_t dbname_len; - - /* Construct the key("db-name\0table$name\0") for the query cache using - the path name("db@002dname\0table@0024name\0") of the table in its - canonical form. */ - dbname_len = filename_to_tablename(full_name, qcache_key_name, - sizeof(qcache_key_name)); - tabname_len = filename_to_tablename(full_name + strlen(full_name) + 1, - qcache_key_name + dbname_len + 1, - sizeof(qcache_key_name) - - dbname_len - 1); - - /* Argument TRUE below means we are using transactions */ - mysql_query_cache_invalidate4(trx->mysql_thd, - qcache_key_name, - (dbname_len + tabname_len + 2), - TRUE); -#endif -} - -/*****************************************************************//** -Convert an SQL identifier to the MySQL system_charset_info (UTF-8) -and quote it if needed. -@return pointer to the end of buf */ -static -char* -innobase_convert_identifier( -/*========================*/ - char* buf, /*!< out: buffer for converted identifier */ - ulint buflen, /*!< in: length of buf, in bytes */ - const char* id, /*!< in: identifier to convert */ - ulint idlen, /*!< in: length of id, in bytes */ - THD* thd, /*!< in: MySQL connection thread, or NULL */ - ibool file_id)/*!< in: TRUE=id is a table or database name; - FALSE=id is an UTF-8 string */ -{ - char nz2[MAX_TABLE_NAME_LEN + 1]; - const char* s = id; - int q; - - if (file_id) { - - char nz[MAX_TABLE_NAME_LEN + 1]; - - /* Decode the table name. The MySQL function expects - a NUL-terminated string. The input and output strings - buffers must not be shared. */ - ut_a(idlen <= MAX_TABLE_NAME_LEN); - memcpy(nz, id, idlen); - nz[idlen] = 0; - - s = nz2; - idlen = explain_filename(thd, nz, nz2, sizeof nz2, - EXPLAIN_PARTITIONS_AS_COMMENT); - goto no_quote; - } - - /* See if the identifier needs to be quoted. */ - if (UNIV_UNLIKELY(!thd)) { - q = '"'; - } else { - q = get_quote_char_for_identifier(thd, s, (int) idlen); - } - - if (q == EOF) { -no_quote: - if (UNIV_UNLIKELY(idlen > buflen)) { - idlen = buflen; - } - memcpy(buf, s, idlen); - return(buf + idlen); - } - - /* Quote the identifier. */ - if (buflen < 2) { - return(buf); - } - - *buf++ = q; - buflen--; - - for (; idlen; idlen--) { - int c = *s++; - if (UNIV_UNLIKELY(c == q)) { - if (UNIV_UNLIKELY(buflen < 3)) { - break; - } - - *buf++ = c; - *buf++ = c; - buflen -= 2; - } else { - if (UNIV_UNLIKELY(buflen < 2)) { - break; - } - - *buf++ = c; - buflen--; - } - } - - *buf++ = q; - return(buf); -} - -/*****************************************************************//** -Convert a table or index name to the MySQL system_charset_info (UTF-8) -and quote it if needed. -@return pointer to the end of buf */ -UNIV_INTERN -char* -innobase_convert_name( -/*==================*/ - char* buf, /*!< out: buffer for converted identifier */ - ulint buflen, /*!< in: length of buf, in bytes */ - const char* id, /*!< in: identifier to convert */ - ulint idlen, /*!< in: length of id, in bytes */ - THD* thd, /*!< in: MySQL connection thread, or NULL */ - ibool table_id)/*!< in: TRUE=id is a table or database name; - FALSE=id is an index name */ -{ - char* s = buf; - const char* bufend = buf + buflen; - - if (table_id) { - const char* slash = (const char*) memchr(id, '/', idlen); - if (!slash) { - - goto no_db_name; - } - - /* Print the database name and table name separately. */ - s = innobase_convert_identifier(s, bufend - s, id, slash - id, - thd, TRUE); - if (UNIV_LIKELY(s < bufend)) { - *s++ = '.'; - s = innobase_convert_identifier(s, bufend - s, - slash + 1, idlen - - (slash - id) - 1, - thd, TRUE); - } - } else if (UNIV_UNLIKELY(*id == TEMP_INDEX_PREFIX)) { - /* Temporary index name (smart ALTER TABLE) */ - const char temp_index_suffix[]= "--temporary--"; - - s = innobase_convert_identifier(buf, buflen, id + 1, idlen - 1, - thd, FALSE); - if (s - buf + (sizeof temp_index_suffix - 1) < buflen) { - memcpy(s, temp_index_suffix, - sizeof temp_index_suffix - 1); - s += sizeof temp_index_suffix - 1; - } - } else { -no_db_name: - s = innobase_convert_identifier(buf, buflen, id, idlen, - thd, table_id); - } - - return(s); -} - -/*****************************************************************//** -A wrapper function of innobase_convert_name(), convert a table or -index name to the MySQL system_charset_info (UTF-8) and quote it if needed. -@return pointer to the end of buf */ -UNIV_INTERN -void -innobase_format_name( -/*==================*/ - char* buf, /*!< out: buffer for converted identifier */ - ulint buflen, /*!< in: length of buf, in bytes */ - const char* name, /*!< in: index or table name to format */ - ibool is_index_name) /*!< in: index name */ -{ - const char* bufend; - - bufend = innobase_convert_name(buf, buflen, name, strlen(name), - NULL, !is_index_name); - - ut_ad((ulint) (bufend - buf) < buflen); - - buf[bufend - buf] = '\0'; -} - -/**********************************************************************//** -Determines if the currently running transaction has been interrupted. -@return TRUE if interrupted */ -UNIV_INTERN -ibool -trx_is_interrupted( -/*===============*/ - const trx_t* trx) /*!< in: transaction */ -{ - return(trx && trx->mysql_thd && thd_kill_level((THD*) trx->mysql_thd)); -} - -/**********************************************************************//** -Determines if the currently running transaction is in strict mode. -@return TRUE if strict */ -UNIV_INTERN -ibool -trx_is_strict( -/*==========*/ - trx_t* trx) /*!< in: transaction */ -{ - return(trx && trx->mysql_thd && THDVAR(trx->mysql_thd, strict_mode)); -} - -/**************************************************************//** -Resets some fields of a prebuilt struct. The template is used in fast -retrieval of just those column values MySQL needs in its processing. */ -inline -void -ha_innobase::reset_template(void) -/*=============================*/ -{ - ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); - ut_ad(prebuilt->magic_n2 == prebuilt->magic_n); - - /* Force table to be freed in close_thread_table(). */ - DBUG_EXECUTE_IF("free_table_in_fts_query", - if (prebuilt->in_fts_query) { - table->m_needs_reopen = true; - } - ); - - prebuilt->keep_other_fields_on_keyread = 0; - prebuilt->read_just_key = 0; - prebuilt->in_fts_query = 0; - /* Reset index condition pushdown state. */ - if (prebuilt->idx_cond) { - prebuilt->idx_cond = NULL; - prebuilt->idx_cond_n_cols = 0; - /* Invalidate prebuilt->mysql_template - in ha_innobase::write_row(). */ - prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE; - } -} - -/*****************************************************************//** -Call this when you have opened a new table handle in HANDLER, before you -call index_read_idx() etc. Actually, we can let the cursor stay open even -over a transaction commit! Then you should call this before every operation, -fetch next etc. This function inits the necessary things even after a -transaction commit. */ -UNIV_INTERN -void -ha_innobase::init_table_handle_for_HANDLER(void) -/*============================================*/ -{ - /* If current thd does not yet have a trx struct, create one. - If the current handle does not yet have a prebuilt struct, create - one. Update the trx pointers in the prebuilt struct. Normally - this operation is done in external_lock. */ - - update_thd(ha_thd()); - - /* Initialize the prebuilt struct much like it would be inited in - external_lock */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - - innobase_srv_conc_force_exit_innodb(prebuilt->trx); - - /* If the transaction is not started yet, start it */ - - trx_start_if_not_started_xa(prebuilt->trx); - - /* Assign a read view if the transaction does not have it yet */ - - trx_assign_read_view(prebuilt->trx); - - innobase_register_trx(ht, user_thd, prebuilt->trx); - - /* We did the necessary inits in this function, no need to repeat them - in row_search_for_mysql */ - - prebuilt->sql_stat_start = FALSE; - - /* We let HANDLER always to do the reads as consistent reads, even - if the trx isolation level would have been specified as SERIALIZABLE */ - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE; - - /* Always fetch all columns in the index record */ - - prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS; - - /* We want always to fetch all columns in the whole row? Or do - we???? */ - - prebuilt->used_in_HANDLER = TRUE; - reset_template(); -} - -/****************************************************************//** -Gives the file extension of an InnoDB single-table tablespace. */ -static const char* ha_innobase_exts[] = { - ".ibd", - ".isl", - NullS -}; - -/*********************************************************************//** -Opens an InnoDB database. -@return 0 on success, error code on failure */ -static -int -innobase_init( -/*==========*/ - void *p) /*!< in: InnoDB handlerton */ -{ - static char current_dir[3]; /*!< Set if using current lib */ - int err; - bool ret; - char *default_path; - uint format_id; - ulong num_pll_degree; - - DBUG_ENTER("innobase_init"); - handlerton *innobase_hton= (handlerton*) p; - innodb_hton_ptr = innobase_hton; - - innobase_hton->state = SHOW_OPTION_YES; - innobase_hton->db_type= DB_TYPE_INNODB; - innobase_hton->savepoint_offset = sizeof(trx_named_savept_t); - innobase_hton->close_connection = innobase_close_connection; - innobase_hton->savepoint_set = innobase_savepoint; - innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint; - innobase_hton->savepoint_rollback_can_release_mdl = - innobase_rollback_to_savepoint_can_release_mdl; - innobase_hton->savepoint_release = innobase_release_savepoint; - innobase_hton->commit_ordered=innobase_commit_ordered; - innobase_hton->commit = innobase_commit; - innobase_hton->rollback = innobase_rollback; - innobase_hton->prepare = innobase_xa_prepare; - innobase_hton->recover = innobase_xa_recover; - innobase_hton->commit_by_xid = innobase_commit_by_xid; - innobase_hton->rollback_by_xid = innobase_rollback_by_xid; - innobase_hton->commit_checkpoint_request=innobase_checkpoint_request; - innobase_hton->checkpoint_state= innobase_checkpoint_state; - innobase_hton->create_cursor_read_view = innobase_create_cursor_view; - innobase_hton->set_cursor_read_view = innobase_set_cursor_view; - innobase_hton->close_cursor_read_view = innobase_close_cursor_view; - innobase_hton->create = innobase_create_handler; - innobase_hton->drop_database = innobase_drop_database; - innobase_hton->panic = innobase_end; - - innobase_hton->start_consistent_snapshot = - innobase_start_trx_and_assign_read_view; - - /*innobase_hton->store_binlog_info = - innobase_store_binlog_info;*/ - - innobase_hton->flush_logs = innobase_flush_logs; - innobase_hton->show_status = innobase_show_status; - innobase_hton->flags = HTON_SUPPORTS_EXTENDED_KEYS | - HTON_SUPPORTS_FOREIGN_KEYS; - - innobase_hton->kill_query = innobase_kill_connection; - - if (srv_file_per_table) - innobase_hton->tablefile_extensions = ha_innobase_exts; - - innobase_hton->table_options = innodb_table_option_list; -#ifdef WITH_WSREP - innobase_hton->abort_transaction=wsrep_abort_transaction; - innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint; - innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint; - innobase_hton->fake_trx_id=wsrep_fake_trx_id; -#endif /* WITH_WSREP */ - - innodb_remember_check_sysvar_funcs(); - - ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); - -#ifndef DBUG_OFF - static const char test_filename[] = "-@"; - char test_tablename[sizeof test_filename - + sizeof(srv_mysql50_table_name_prefix) - 1]; - if ((sizeof(test_tablename)) - 1 - != filename_to_tablename(test_filename, - test_tablename, - sizeof(test_tablename), true) - || strncmp(test_tablename, - srv_mysql50_table_name_prefix, - sizeof(srv_mysql50_table_name_prefix) - 1) - || strcmp(test_tablename - + sizeof(srv_mysql50_table_name_prefix) - 1, - test_filename)) { - - sql_print_error("tablename encoding has been changed"); - - goto error; - } -#endif /* DBUG_OFF */ - - srv_log_block_size = 0; - if (innobase_log_block_size != (1 << 9)) { /*!=512*/ - uint n_shift; - - fprintf(stderr, - "InnoDB: Warning: innodb_log_block_size has been " - "changed from default value 512. (###EXPERIMENTAL### " - "operation)\n"); - for (n_shift = 9; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX; - n_shift++) { - if (innobase_log_block_size == ((ulong)1 << n_shift)) { - srv_log_block_size = (1 << n_shift); - fprintf(stderr, - "InnoDB: The log block size is set to " - ULINTPF ".\n",srv_log_block_size); - break; - } - } - } else { - srv_log_block_size = 512; - } - - /* The buffer pool needs to be able to accommodate enough many - pages, even for larger pages */ - if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF - && innobase_buffer_pool_size < (24 * 1024 * 1024)) { - ib_logf(IB_LOG_LEVEL_INFO, - "innodb_page_size= " ULINTPF " requires " - "innodb_buffer_pool_size > 24M current %lld. ", - UNIV_PAGE_SIZE, - innobase_buffer_pool_size); - goto error; - } - - ut_ad (srv_log_block_size >= OS_MIN_LOG_BLOCK_SIZE); - - if (!srv_log_block_size) { - fprintf(stderr, - "InnoDB: Error: %lu is not a valid value for " - "innodb_log_block_size.\n" - "InnoDB: Error: A valid value for " - "innodb_log_block_size is\n" - "InnoDB: Error: a power of 2 from 512 to 16384.\n", - innobase_log_block_size); - goto error; - } - - /* Check that values don't overflow on 32-bit systems. */ - if (sizeof(ulint) == 4) { - if (innobase_buffer_pool_size > UINT_MAX32) { - sql_print_error( - "innobase_buffer_pool_size can't be over 4GB" - " on 32-bit systems"); - - goto error; - } - } - -#ifndef HAVE_LZ4 - if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblz4 is not installed. \n", - innodb_compression_algorithm); - goto error; - } -#endif - -#ifndef HAVE_LZO - if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblzo is not installed. \n", - innodb_compression_algorithm); - goto error; - } -#endif - -#ifndef HAVE_LZMA - if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblzma is not installed. \n", - innodb_compression_algorithm); - goto error; - } -#endif - -#ifndef HAVE_BZIP2 - if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: libbz2 is not installed. \n", - innodb_compression_algorithm); - goto error; - } -#endif - -#ifndef HAVE_SNAPPY - if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: libsnappy is not installed. \n", - innodb_compression_algorithm); - goto error; - } -#endif - - if ((srv_encrypt_tables || srv_encrypt_log) - && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) { - sql_print_error("InnoDB: cannot enable encryption, " - "encryption plugin is not available"); - goto error; - } - - os_innodb_umask = (ulint) my_umask; - - /* First calculate the default path for innodb_data_home_dir etc., - in case the user has not given any value. - - Note that when using the embedded server, the datadirectory is not - necessarily the current directory of this program. */ - - if (mysqld_embedded) { - default_path = mysql_real_data_home; - fil_path_to_mysql_datadir = mysql_real_data_home; - } else { - /* It's better to use current lib, to keep paths short */ - current_dir[0] = FN_CURLIB; - current_dir[1] = FN_LIBCHAR; - current_dir[2] = 0; - default_path = current_dir; - } - - ut_a(default_path); - - /* Set InnoDB initialization parameters according to the values - read from MySQL .cnf file */ - - /*--------------- Data files -------------------------*/ - - /* The default dir for data files is the datadir of MySQL */ - - srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir : - default_path); - - - /* Set default InnoDB data file size to 12 MB and let it be - auto-extending. Thus users can use InnoDB in >= 4.0 without having - to specify any startup options. */ - - if (!innobase_data_file_path) { - innobase_data_file_path = (char*) "ibdata1:12M:autoextend"; - } - - /* Since InnoDB edits the argument in the next call, we make another - copy of it: */ - - internal_innobase_data_file_path = my_strdup(innobase_data_file_path, - MYF(MY_FAE)); - - ret = (bool) srv_parse_data_file_paths_and_sizes( - internal_innobase_data_file_path); - if (ret == FALSE) { - sql_print_error( - "InnoDB: syntax error in innodb_data_file_path" - " or size specified is less than 1 megabyte"); -mem_free_and_error: - srv_free_paths_and_sizes(); - my_free(internal_innobase_data_file_path); - goto error; - } - - /* -------------- All log files ---------------------------*/ - - /* The default dir for log files is the datadir of MySQL */ - - if (!srv_log_group_home_dir) { - srv_log_group_home_dir = default_path; - } - -#ifdef UNIV_LOG_ARCHIVE - if (!innobase_log_arch_dir) { - innobase_log_arch_dir = srv_log_group_home_dir; - } - srv_arch_dir = innobase_log_arch_dir; -#endif /* UNIG_LOG_ARCHIVE */ - - srv_normalize_path_for_win(srv_log_group_home_dir); - - if (strchr(srv_log_group_home_dir, ';')) { - sql_print_error("syntax error in innodb_log_group_home_dir"); - goto mem_free_and_error; - } - - if (innobase_mirrored_log_groups == 1) { - sql_print_warning( - "innodb_mirrored_log_groups is an unimplemented " - "feature and the variable will be completely " - "removed in a future version."); - } - - if (innobase_mirrored_log_groups > 1) { - sql_print_error( - "innodb_mirrored_log_groups is an unimplemented feature and " - "the variable will be completely removed in a future version. " - "Using values other than 1 is not supported."); - goto mem_free_and_error; - } - - if (innobase_mirrored_log_groups == 0) { - /* To throw a deprecation warning message when the option is - passed, the default was changed to '0' (as a workaround). Since - the only value accepted for this option is '1', reset it to 1 */ - innobase_mirrored_log_groups = 1; - } - - /* Validate the file format by animal name */ - if (innobase_file_format_name != NULL) { - - format_id = innobase_file_format_name_lookup( - innobase_file_format_name); - - if (format_id > UNIV_FORMAT_MAX) { - - sql_print_error("InnoDB: wrong innodb_file_format."); - - goto mem_free_and_error; - } - } else { - /* Set it to the default file format id. Though this - should never happen. */ - format_id = 0; - } - - srv_file_format = format_id; - - /* Given the type of innobase_file_format_name we have little - choice but to cast away the constness from the returned name. - innobase_file_format_name is used in the MySQL set variable - interface and so can't be const. */ - - innobase_file_format_name = - (char*) trx_sys_file_format_id_to_name(format_id); - - /* Check innobase_file_format_check variable */ - if (!innobase_file_format_check) { - - /* Set the value to disable checking. */ - srv_max_file_format_at_startup = UNIV_FORMAT_MAX + 1; - - } else { - - /* Set the value to the lowest supported format. */ - srv_max_file_format_at_startup = UNIV_FORMAT_MIN; - } - - /* Did the user specify a format name that we support? - As a side effect it will update the variable - srv_max_file_format_at_startup */ - if (innobase_file_format_validate_and_set( - innobase_file_format_max) < 0) { - - sql_print_error("InnoDB: invalid " - "innodb_file_format_max value: " - "should be any value up to %s or its " - "equivalent numeric id", - trx_sys_file_format_id_to_name( - UNIV_FORMAT_MAX)); - - goto mem_free_and_error; - } - - if (innobase_change_buffering) { - ulint use; - - for (use = 0; - use < UT_ARR_SIZE(innobase_change_buffering_values); - use++) { - if (!innobase_strcasecmp( - innobase_change_buffering, - innobase_change_buffering_values[use])) { - ibuf_use = (ibuf_use_t) use; - goto innobase_change_buffering_inited_ok; - } - } - - sql_print_error("InnoDB: invalid value " - "innodb_change_buffering=%s", - innobase_change_buffering); - goto mem_free_and_error; - } - -innobase_change_buffering_inited_ok: - ut_a((ulint) ibuf_use < UT_ARR_SIZE(innobase_change_buffering_values)); - innobase_change_buffering = (char*) - innobase_change_buffering_values[ibuf_use]; - - /* Check that interdependent parameters have sane values. */ - if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) { - sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm" - " cannot be set higher than" - " innodb_max_dirty_pages_pct.\n" - "InnoDB: Setting" - " innodb_max_dirty_pages_pct_lwm to %lf\n", - srv_max_buf_pool_modified_pct); - - srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct; - } - - if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) { - - if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) { - /* Avoid overflow. */ - srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT; - } else { - /* The user has not set the value. We should - set it based on innodb_io_capacity. */ - srv_max_io_capacity = static_cast<ulong>( - ut_max(2 * srv_io_capacity, 2000)); - } - - } else if (srv_max_io_capacity < srv_io_capacity) { - sql_print_warning("InnoDB: innodb_io_capacity" - " cannot be set higher than" - " innodb_io_capacity_max.\n" - "InnoDB: Setting" - " innodb_io_capacity to %lu\n", - srv_max_io_capacity); - - srv_io_capacity = srv_max_io_capacity; - } - - if (!is_filename_allowed(srv_buf_dump_filename, - strlen(srv_buf_dump_filename), FALSE)) { - sql_print_error("InnoDB: innodb_buffer_pool_filename" - " cannot have colon (:) in the file name."); - goto mem_free_and_error; - } - - /* --------------------------------------------------*/ - - srv_file_flush_method_str = innobase_file_flush_method; - - srv_log_file_size = (ib_uint64_t) innobase_log_file_size; - -#ifdef UNIV_LOG_ARCHIVE - srv_log_archive_on = (ulint) innobase_log_archive; -#endif /* UNIV_LOG_ARCHIVE */ - - /* Check that the value of system variable innodb_page_size was - set correctly. Its value was put into srv_page_size. If valid, - return the associated srv_page_size_shift.*/ - srv_page_size_shift = innodb_page_size_validate(srv_page_size); - if (!srv_page_size_shift) { - sql_print_error("InnoDB: Invalid page size=%lu.\n", - srv_page_size); - goto mem_free_and_error; - } - - if (UNIV_PAGE_SIZE_DEF != srv_page_size) { - ib_logf(IB_LOG_LEVEL_INFO, - " innodb-page-size has been changed" - " from the default value %d to " ULINTPF " .", - UNIV_PAGE_SIZE_DEF, srv_page_size); - } - - srv_log_buffer_size = (ulint) innobase_log_buffer_size; - - if (innobase_buffer_pool_instances == 0) { - innobase_buffer_pool_instances = 8; - -#if defined(__WIN__) && !defined(_WIN64) - if (innobase_buffer_pool_size > 1331 * 1024 * 1024) { - innobase_buffer_pool_instances - = ut_min(MAX_BUFFER_POOLS, - (long) (innobase_buffer_pool_size - / (128 * 1024 * 1024))); - } -#endif /* defined(__WIN__) && !defined(_WIN64) */ - } - srv_buf_pool_size = (ulint) innobase_buffer_pool_size; - srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances; - - srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; - - if (innobase_additional_mem_pool_size - != 8*1024*1024L /* the default */ ) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: Using " - "innodb_additional_mem_pool_size is DEPRECATED. " - "This option may be removed in future releases, " - "together with the option innodb_use_sys_malloc " - "and with the InnoDB's internal memory " - "allocator.\n"); - } - - if (!srv_use_sys_malloc ) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: Setting " - "innodb_use_sys_malloc to FALSE is DEPRECATED. " - "This option may be removed in future releases, " - "together with the InnoDB's internal memory " - "allocator.\n"); - } - - if (innodb_buffer_pool_populate) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: Setting " - "innodb_buffer_pool_populate is DEPRECATED" - " and has no effect. " - "This option will be removed in MariaDB 10.2.3.\n"); - } - - srv_n_file_io_threads = (ulint) innobase_file_io_threads; - srv_n_read_io_threads = (ulint) innobase_read_io_threads; - srv_n_write_io_threads = (ulint) innobase_write_io_threads; - - srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; - - if (!innobase_use_checksums) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: Setting " - "innodb_checksums to OFF is DEPRECATED. " - "This option may be removed in future releases. " - "You should set innodb_checksum_algorithm=NONE " - "instead.\n"); - srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_NONE; - } - - innodb_log_checksum_func_update(srv_log_checksum_algorithm); - -#ifdef HAVE_LARGE_PAGES - if ((os_use_large_pages = (ibool) my_use_large_pages)) { - os_large_page_size = (ulint) opt_large_page_size; - } -#endif - - row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout; - - srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog; - if (innobase_locks_unsafe_for_binlog) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: Using " - "innodb_locks_unsafe_for_binlog is DEPRECATED. " - "This option may be removed in future releases. " - "Please use READ COMMITTED transaction isolation " - "level instead, see " REFMAN "set-transaction.html.\n"); - } - - if (innobase_open_files < 10) { - innobase_open_files = 300; - if (srv_file_per_table && tc_size > 300) { - innobase_open_files = tc_size; - } - } - - if (innobase_open_files > (long) open_files_limit) { - fprintf(stderr, - "innodb_open_files should not be greater" - " than the open_files_limit.\n"); - if (innobase_open_files > (long) tc_size) { - innobase_open_files = tc_size; - } - } - - srv_max_n_open_files = (ulint) innobase_open_files; - srv_innodb_status = (ibool) innobase_create_status_file; - - srv_print_verbose_log = mysqld_embedded ? 0 : 1; - - /* Round up fts_sort_pll_degree to nearest power of 2 number */ - for (num_pll_degree = 1; - num_pll_degree < fts_sort_pll_degree; - num_pll_degree <<= 1) { - - /* No op */ - } - - fts_sort_pll_degree = num_pll_degree; - - /* Store the default charset-collation number of this MySQL - installation */ - - data_mysql_default_charset_coll = (ulint) default_charset_info->number; - - ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL == - my_charset_latin1.number); - ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number); - - /* Store the latin1_swedish_ci character ordering table to InnoDB. For - non-latin1_swedish_ci charsets we use the MySQL comparison functions, - and consequently we do not need to know the ordering internally in - InnoDB. */ - - srv_latin1_ordering = my_charset_latin1.sort_order; - - innobase_commit_concurrency_init_default(); - -#ifdef HAVE_POSIX_FALLOCATE - srv_use_posix_fallocate = (ibool) innobase_use_fallocate; -#endif - /* Do not enable backoff algorithm for small buffer pool. */ - if (!innodb_empty_free_list_algorithm_allowed( - static_cast<srv_empty_free_list_t>( - srv_empty_free_list_algorithm))) { - sql_print_information( - "InnoDB: innodb_empty_free_list_algorithm " - "has been changed to legacy " - "because of small buffer pool size. " - "In order to use backoff, " - "increase buffer pool at least up to 20MB.\n"); - srv_empty_free_list_algorithm - = SRV_EMPTY_FREE_LIST_LEGACY; - } - - srv_use_atomic_writes = (ibool) innobase_use_atomic_writes; - if (innobase_use_atomic_writes) { - ib_logf(IB_LOG_LEVEL_INFO, "using atomic writes."); - - /* Force doublewrite buffer off, atomic writes replace it. */ - if (srv_use_doublewrite_buf) { - ib_logf(IB_LOG_LEVEL_INFO, "switching off doublewrite " - "buffer because of atomic writes."); - innobase_use_doublewrite = FALSE; - srv_use_doublewrite_buf = FALSE; - } - - /* Force O_DIRECT on Unixes (on Windows writes are always - unbuffered)*/ -#ifndef _WIN32 - if(!innobase_file_flush_method || - !strstr(innobase_file_flush_method, "O_DIRECT")) { - innobase_file_flush_method = - srv_file_flush_method_str = (char*)"O_DIRECT"; - ib_logf(IB_LOG_LEVEL_INFO, - "using O_DIRECT due to atomic writes."); - } -#endif -#ifdef HAVE_POSIX_FALLOCATE - /* Due to a bug in directFS, using atomics needs - posix_fallocate() to extend the file, because pwrite() past the - end of the file won't work */ - srv_use_posix_fallocate = TRUE; -#endif - } - -#ifdef HAVE_PSI_INTERFACE - /* Register keys with MySQL performance schema */ - int count; - - count = array_elements(all_pthread_mutexes); - mysql_mutex_register("innodb", all_pthread_mutexes, count); - -# ifdef UNIV_PFS_MUTEX - count = array_elements(all_innodb_mutexes); - mysql_mutex_register("innodb", all_innodb_mutexes, count); -# endif /* UNIV_PFS_MUTEX */ - -# ifdef UNIV_PFS_RWLOCK - count = array_elements(all_innodb_rwlocks); - mysql_rwlock_register("innodb", all_innodb_rwlocks, count); -# endif /* UNIV_PFS_MUTEX */ - -# ifdef UNIV_PFS_THREAD - count = array_elements(all_innodb_threads); - mysql_thread_register("innodb", all_innodb_threads, count); -# endif /* UNIV_PFS_THREAD */ - -# ifdef UNIV_PFS_IO - count = array_elements(all_innodb_files); - mysql_file_register("innodb", all_innodb_files, count); -# endif /* UNIV_PFS_IO */ - - count = array_elements(all_innodb_conds); - mysql_cond_register("innodb", all_innodb_conds, count); -#endif /* HAVE_PSI_INTERFACE */ - - /* Since we in this module access directly the fields of a trx - struct, and due to different headers and flags it might happen that - ib_mutex_t has a different size in this module and in InnoDB - modules, we check at run time that the size is the same in - these compilation modules. */ - - err = innobase_start_or_create_for_mysql(); - - if (err != DB_SUCCESS) { - goto mem_free_and_error; - } - - /* Adjust the innodb_undo_logs config object */ - innobase_undo_logs_init_default_max(); - - innobase_old_blocks_pct = static_cast<uint>( - buf_LRU_old_ratio_update(innobase_old_blocks_pct, TRUE)); - - ibuf_max_size_update(innobase_change_buffer_max_size); - - innobase_open_tables = hash_create(200); - mysql_mutex_init(innobase_share_mutex_key, - &innobase_share_mutex, - MY_MUTEX_INIT_FAST); - mysql_mutex_init(commit_cond_mutex_key, - &commit_cond_m, MY_MUTEX_INIT_FAST); - mysql_cond_init(commit_cond_key, &commit_cond, NULL); - mysql_mutex_init(pending_checkpoint_mutex_key, - &pending_checkpoint_mutex, - MY_MUTEX_INIT_FAST); - innodb_inited= 1; -#ifdef MYSQL_DYNAMIC_PLUGIN - if (innobase_hton != p) { - innobase_hton = reinterpret_cast<handlerton*>(p); - *innobase_hton = *innodb_hton_ptr; - } -#endif /* MYSQL_DYNAMIC_PLUGIN */ - - /* Get the current high water mark format. */ - innobase_file_format_max = (char*) trx_sys_file_format_max_get(); - - /* Currently, monitor counter information are not persistent. */ - memset(monitor_set_tbl, 0, sizeof monitor_set_tbl); - - memset(innodb_counter_value, 0, sizeof innodb_counter_value); - - /* Do this as late as possible so server is fully starts up, - since we might get some initial stats if user choose to turn - on some counters from start up */ - if (innobase_enable_monitor_counter) { - innodb_enable_monitor_at_startup( - innobase_enable_monitor_counter); - } - - /* Turn on monitor counters that are default on */ - srv_mon_default_on(); - - DBUG_RETURN(FALSE); -error: - DBUG_RETURN(TRUE); -} - -/** Shut down the InnoDB storage engine. -@return 0 */ -static -int -innobase_end(handlerton*, ha_panic_function) -{ - DBUG_ENTER("innobase_end"); - - if (innodb_inited) { - - THD *thd= current_thd; - if (thd) { // may be UNINSTALL PLUGIN statement - trx_t* trx = thd_to_trx(thd); - if (trx) { - trx_free_for_mysql(trx); - } - } - - srv_fast_shutdown = (ulint) innobase_fast_shutdown; - - innodb_inited = 0; - hash_table_free(innobase_open_tables); - innobase_open_tables = NULL; - innodb_shutdown(); - srv_free_paths_and_sizes(); - my_free(internal_innobase_data_file_path); - mysql_mutex_destroy(&innobase_share_mutex); - mysql_mutex_destroy(&commit_cond_m); - mysql_cond_destroy(&commit_cond); - mysql_mutex_destroy(&pending_checkpoint_mutex); - } - - DBUG_RETURN(0); -} - -/****************************************************************//** -Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes -the logs, and the name of this function should be innobase_checkpoint. -@return TRUE if error */ -static -bool -innobase_flush_logs( -/*================*/ - handlerton* hton) /*!< in/out: InnoDB handlerton */ -{ - bool result = 0; - - DBUG_ENTER("innobase_flush_logs"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - if (!srv_read_only_mode) { - log_buffer_flush_to_disk(); - } - - DBUG_RETURN(result); -} - -/************************************************************//** -Synchronously read and parse the redo log up to the last -checkpoint to write the changed page bitmap. -@return 0 to indicate success. Current implementation cannot fail. */ -my_bool -innobase_flush_changed_page_bitmaps() -/*=================================*/ -{ - if (srv_track_changed_pages) { - os_event_reset(srv_checkpoint_completed_event); - log_online_follow_redo_log(); - } - return FALSE; -} - -/************************************************************//** -Delete all the bitmap files for data less than the specified LSN. -If called with lsn == IB_ULONGLONG_MAX (i.e. set by RESET request), -restart the bitmap file sequence, otherwise continue it. -@return 0 to indicate success, 1 for failure. */ -static -my_bool -innobase_purge_changed_page_bitmaps( -/*================================*/ - ulonglong lsn) /*!< in: LSN to purge files up to */ -{ - return (my_bool)log_online_purge_changed_page_bitmaps(lsn); -} - -/*****************************************************************//** -Commits a transaction in an InnoDB database. */ -static -void -innobase_commit_low( -/*================*/ - trx_t* trx) /*!< in: transaction handle */ -{ -#ifdef WITH_WSREP - THD* thd = (THD*)trx->mysql_thd; - const char* tmp = 0; - if (thd && wsrep_on(thd)) { -#ifdef WSREP_PROC_INFO - char info[64]; - info[sizeof(info) - 1] = '\0'; - snprintf(info, sizeof(info) - 1, - "innobase_commit_low():trx_commit_for_mysql(%lld)", - (long long) wsrep_thd_trx_seqno(thd)); - tmp = thd_proc_info(thd, info); - -#else - tmp = thd_proc_info(thd, "innobase_commit_low()"); -#endif /* WSREP_PROC_INFO */ - } -#endif /* WITH_WSREP */ - if (trx_is_started(trx)) { - - trx_commit_for_mysql(trx); - } -#ifdef WITH_WSREP - if (wsrep_on(thd)) { thd_proc_info(thd, tmp); } -#endif /* WITH_WSREP */ -} - -#if NOT_USED -/*****************************************************************//** -Stores the current binlog coordinates in the trx system header. */ -static -int -innobase_store_binlog_info( -/*=======================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - THD* thd) /*!< in: MySQL thread handle */ - -{ - const char* file_name; - unsigned long long pos; - mtr_t mtr; - - DBUG_ENTER("innobase_store_binlog_info"); - - thd_binlog_pos(thd, &file_name, &pos); - - mtr_start(&mtr); - - trx_sys_update_mysql_binlog_offset(file_name, pos, - TRX_SYS_MYSQL_LOG_INFO, &mtr); - - mtr_commit(&mtr); - - innobase_flush_logs(hton); - - DBUG_RETURN(0); -} -#endif - -/*****************************************************************//** -Creates an InnoDB transaction struct for the thd if it does not yet have one. -Starts a new InnoDB transaction if a transaction is not yet started. And -assigns a new snapshot for a consistent read if the transaction does not yet -have one. -@return 0 */ -static -int -innobase_start_trx_and_assign_read_view( -/*====================================*/ - handlerton* hton, /*!< in: Innodb handlerton */ - THD* thd) /*!< in: MySQL thread handle of the user for - whom the transaction should be committed */ -{ - trx_t* trx; - - DBUG_ENTER("innobase_start_trx_and_assign_read_view"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - /* Create a new trx struct for thd, if it does not yet have one */ - - trx = check_trx_exists(thd); - - /* This is just to play safe: release a possible FIFO ticket and - search latch. Since we can potentially reserve the trx_sys->mutex, - we have to release the search system latch first to obey the latching - order. */ - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - /* If the transaction is not started yet, start it */ - - trx_start_if_not_started_xa(trx); - - /* Assign a read view if the transaction does not have it yet. - Do this only if transaction is using REPEATABLE READ isolation - level. */ - trx->isolation_level = innobase_map_isolation_level( - thd_get_trx_isolation(thd)); - - if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) { - trx_assign_read_view(trx); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "InnoDB: WITH CONSISTENT SNAPSHOT " - "was ignored because this phrase " - "can only be used with " - "REPEATABLE READ isolation level."); - } - - /* Set the MySQL flag to mark that there is an active transaction */ - - innobase_register_trx(hton, current_thd, trx); - - DBUG_RETURN(0); -} - -static -void -innobase_commit_ordered_2( -/*============*/ - trx_t* trx, /*!< in: Innodb transaction */ - THD* thd) /*!< in: MySQL thread handle */ -{ - DBUG_ENTER("innobase_commit_ordered_2"); - - /* We need current binlog position for mysqlbackup to work. */ -retry: - if (innobase_commit_concurrency > 0) { - mysql_mutex_lock(&commit_cond_m); - commit_threads++; - - if (commit_threads > innobase_commit_concurrency) { - commit_threads--; - mysql_cond_wait(&commit_cond, - &commit_cond_m); - mysql_mutex_unlock(&commit_cond_m); - goto retry; - } - else { - mysql_mutex_unlock(&commit_cond_m); - } - } - - /* The following call read the binary log position of - the transaction being committed. - - Binary logging of other engines is not relevant to - InnoDB as all InnoDB requires is that committing - InnoDB transactions appear in the same order in the - MySQL binary log as they appear in InnoDB logs, which - is guaranteed by the server. - - If the binary log is not enabled, or the transaction - is not written to the binary log, the file name will - be a NULL pointer. */ - unsigned long long pos; - thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos); - trx->mysql_log_offset= static_cast<ib_int64_t>(pos); - /* Don't do write + flush right now. For group commit - to work we want to do the flush later. */ - trx->flush_log_later = TRUE; - innobase_commit_low(trx); - trx->flush_log_later = FALSE; - - if (innobase_commit_concurrency > 0) { - mysql_mutex_lock(&commit_cond_m); - commit_threads--; - mysql_cond_signal(&commit_cond); - mysql_mutex_unlock(&commit_cond_m); - } - - /* Now do a write + flush of logs. */ - DBUG_VOID_RETURN; -} - -/*****************************************************************//** -Perform the first, fast part of InnoDB commit. - -Doing it in this call ensures that we get the same commit order here -as in binlog and any other participating transactional storage engines. - -Note that we want to do as little as really needed here, as we run -under a global mutex. The expensive fsync() is done later, in -innobase_commit(), without a lock so group commit can take place. - -Note also that this method can be called from a different thread than -the one handling the rest of the transaction. */ -static -void -innobase_commit_ordered( -/*============*/ - handlerton *hton, /*!< in: Innodb handlerton */ - THD* thd, /*!< in: MySQL thread handle of the user for whom - the transaction should be committed */ - bool all) /*!< in: TRUE - commit transaction - FALSE - the current SQL statement ended */ -{ - trx_t* trx; - DBUG_ENTER("innobase_commit_ordered"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - trx = check_trx_exists(thd); - - /* Since we will reserve the kernel mutex, we must not be holding the - search system latch, or we will disobey the latching order. But we - already released it in innobase_xa_prepare() (if not before), so just - have an assert here.*/ - ut_ad(!trx->has_search_latch); - - if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) { - /* We cannot throw error here; instead we will catch this error - again in innobase_commit() and report it from there. */ - DBUG_VOID_RETURN; - } - - /* commit_ordered is only called when committing the whole transaction - (or an SQL statement when autocommit is on). */ - DBUG_ASSERT(all || - (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))); - - innobase_commit_ordered_2(trx, thd); - - trx_set_active_commit_ordered(trx); - - DBUG_VOID_RETURN; -} - -/*****************************************************************//** -Commits a transaction in an InnoDB database or marks an SQL statement -ended. -@return 0 */ -static -int -innobase_commit( -/*============*/ - handlerton* hton, /*!< in: Innodb handlerton */ - THD* thd, /*!< in: MySQL thread handle of the - user for whom the transaction should - be committed */ - bool commit_trx) /*!< in: true - commit transaction - false - the current SQL statement - ended */ -{ - trx_t* trx; - - DBUG_ENTER("innobase_commit"); - DBUG_ASSERT(hton == innodb_hton_ptr); - DBUG_PRINT("trans", ("ending transaction")); - - trx = check_trx_exists(thd); - - /* Since we will reserve the trx_sys->mutex, we have to release - the search system latch first to obey the latching order. */ - - /* No-op in XtraDB */ - trx_search_latch_release_if_reserved(trx); - - /* If fake-changes mode = ON then allow - SELECT (they are read-only) and - CREATE ... SELECT * from table (Well this doesn't open up DDL for InnoDB - as ha_innobase::create will return appropriate error if fake-change = ON - but if create is trying to use other SE and SELECT is executing on - InnoDB table then we allow SELECT to proceed. - Ideally, statement like this should be marked CREATE_SELECT like - INSERT_SELECT but unfortunately it doesn't). */ - if (UNIV_UNLIKELY(trx->fake_changes - && (thd_sql_command(thd) != SQLCOM_SELECT - && thd_sql_command(thd) != SQLCOM_CREATE_TABLE) - && (commit_trx || (!thd_test_options(thd, - OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))))) { - - /* rollback implicitly */ - innobase_rollback(hton, thd, commit_trx); - - /* because debug assertion code complains, if something left */ - thd->get_stmt_da()->reset_diagnostics_area(); - - DBUG_RETURN(HA_ERR_WRONG_COMMAND); - } - /* Transaction is deregistered only in a commit or a rollback. If - it is deregistered we know there cannot be resources to be freed - and we could return immediately. For the time being, we play safe - and do the cleanup though there should be nothing to clean up. */ - - if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) { - - sql_print_error("Transaction not registered for MySQL 2PC, " - "but transaction is active"); - } - - if (commit_trx - || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - - DBUG_EXECUTE_IF("crash_innodb_before_commit", - DBUG_SUICIDE();); - - /* Run the fast part of commit if we did not already. */ - if (!trx_is_active_commit_ordered(trx)) { - innobase_commit_ordered_2(trx, thd); - } - - /* We were instructed to commit the whole transaction, or - this is an SQL statement end and autocommit is on */ - - /* At this point commit order is fixed and transaction is - visible to others. So we can wakeup other commits waiting for - this one, to allow then to group commit with us. */ - thd_wakeup_subsequent_commits(thd, 0); - - trx_commit_complete_for_mysql(trx); - trx_deregister_from_2pc(trx); - } else { - /* We just mark the SQL statement ended and do not do a - transaction commit */ - - /* If we had reserved the auto-inc lock for some - table in this SQL statement we release it now */ - - lock_unlock_table_autoinc(trx); - - /* Store the current undo_no of the transaction so that we - know where to roll back if we have to roll back the next - SQL statement */ - - trx_mark_sql_stat_end(trx); - } - - trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */ - - /* This is a statement level variable. */ - trx->fts_next_doc_id = 0; - - innobase_srv_conc_force_exit_innodb(trx); - - DBUG_RETURN(0); -} - -/*****************************************************************//** -Rolls back a transaction or the latest SQL statement. -@return 0 or error number */ -static -int -innobase_rollback( -/*==============*/ - handlerton* hton, /*!< in: Innodb handlerton */ - THD* thd, /*!< in: handle to the MySQL thread - of the user whose transaction should - be rolled back */ - bool rollback_trx) /*!< in: TRUE - rollback entire - transaction FALSE - rollback the current - statement only */ -{ - dberr_t error; - trx_t* trx; - - DBUG_ENTER("innobase_rollback"); - DBUG_ASSERT(hton == innodb_hton_ptr); - DBUG_PRINT("trans", ("aborting transaction")); - - trx = check_trx_exists(thd); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the trx_sys->mutex, we have to release the search system - latch first to obey the latching order. */ - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */ - - /* If we had reserved the auto-inc lock for some table (if - we come here to roll back the latest SQL statement) we - release it now before a possibly lengthy rollback */ - - lock_unlock_table_autoinc(trx); - - /* This is a statement level variable. */ - trx->fts_next_doc_id = 0; - - if (rollback_trx - || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - error = trx_rollback_for_mysql(trx); - trx_deregister_from_2pc(trx); - } else { - error = trx_rollback_last_sql_stat_for_mysql(trx); - } - - DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL)); -} - -/*****************************************************************//** -Rolls back a transaction -@return 0 or error number */ -static -int -innobase_rollback_trx( -/*==================*/ - trx_t* trx) /*!< in: transaction */ -{ - dberr_t error = DB_SUCCESS; - - DBUG_ENTER("innobase_rollback_trx"); - DBUG_PRINT("trans", ("aborting transaction")); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the trx_sys->mutex, we have to release the search system - latch first to obey the latching order. */ - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - /* If we had reserved the auto-inc lock for some table (if - we come here to roll back the latest SQL statement) we - release it now before a possibly lengthy rollback */ - - lock_unlock_table_autoinc(trx); - - if (!trx->read_only) { - error = trx_rollback_for_mysql(trx); - } - - DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL)); -} - - -struct pending_checkpoint { - struct pending_checkpoint *next; - handlerton *hton; - void *cookie; - ib_uint64_t lsn; -}; -static struct pending_checkpoint *pending_checkpoint_list; -static struct pending_checkpoint *pending_checkpoint_list_end; - -/*****************************************************************//** -Handle a commit checkpoint request from server layer. -We put the request in a queue, so that we can notify upper layer about -checkpoint complete when we have flushed the redo log. -If we have already flushed all relevant redo log, we notify immediately.*/ -static -void -innobase_checkpoint_request( - handlerton *hton, - void *cookie) -{ - ib_uint64_t lsn; - ib_uint64_t flush_lsn; - struct pending_checkpoint * entry; - - /* Do the allocation outside of lock to reduce contention. The normal - case is that not everything is flushed, so we will need to enqueue. */ - entry = static_cast<struct pending_checkpoint *> - (my_malloc(sizeof(*entry), MYF(MY_WME))); - if (!entry) { - sql_print_error("Failed to allocate %u bytes." - " Commit checkpoint will be skipped.", - static_cast<unsigned>(sizeof(*entry))); - return; - } - - entry->next = NULL; - entry->hton = hton; - entry->cookie = cookie; - - mysql_mutex_lock(&pending_checkpoint_mutex); - lsn = log_get_lsn(); - flush_lsn = log_get_flush_lsn(); - if (lsn > flush_lsn) { - /* Put the request in queue. - When the log gets flushed past the lsn, we will remove the - entry from the queue and notify the upper layer. */ - entry->lsn = lsn; - if (pending_checkpoint_list_end) { - pending_checkpoint_list_end->next = entry; - /* There is no need to order the entries in the list - by lsn. The upper layer can accept notifications in - any order, and short delays in notifications do not - significantly impact performance. */ - } else { - pending_checkpoint_list = entry; - } - pending_checkpoint_list_end = entry; - entry = NULL; - } - mysql_mutex_unlock(&pending_checkpoint_mutex); - - if (entry) { - /* We are already flushed. Notify the checkpoint immediately. */ - commit_checkpoint_notify_ha(entry->hton, entry->cookie); - my_free(entry); - } -} - -/*****************************************************************//** -Log code calls this whenever log has been written and/or flushed up -to a new position. We use this to notify upper layer of a new commit -checkpoint when necessary.*/ -UNIV_INTERN -void -innobase_mysql_log_notify( -/*===============*/ - ib_uint64_t write_lsn, /*!< in: LSN written to log file */ - ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */ -{ - struct pending_checkpoint * pending; - struct pending_checkpoint * entry; - struct pending_checkpoint * last_ready; - - /* It is safe to do a quick check for NULL first without lock. - Even if we should race, we will at most skip one checkpoint and - take the next one, which is harmless. */ - if (!pending_checkpoint_list) - return; - - mysql_mutex_lock(&pending_checkpoint_mutex); - pending = pending_checkpoint_list; - if (!pending) - { - mysql_mutex_unlock(&pending_checkpoint_mutex); - return; - } - - last_ready = NULL; - for (entry = pending; entry != NULL; entry = entry -> next) - { - /* Notify checkpoints up until the first entry that has not - been fully flushed to the redo log. Since we do not maintain - the list ordered, in principle there could be more entries - later than were also flushed. But there is no harm in - delaying notifications for those a bit. And in practise, the - list is unlikely to have more than one element anyway, as we - flush the redo log at least once every second. */ - if (entry->lsn > flush_lsn) - break; - last_ready = entry; - } - - if (last_ready) - { - /* We found some pending checkpoints that are now flushed to - disk. So remove them from the list. */ - pending_checkpoint_list = entry; - if (!entry) - pending_checkpoint_list_end = NULL; - } - - mysql_mutex_unlock(&pending_checkpoint_mutex); - - if (!last_ready) - return; - - /* Now that we have released the lock, notify upper layer about all - commit checkpoints that have now completed. */ - for (;;) { - entry = pending; - pending = pending->next; - - commit_checkpoint_notify_ha(entry->hton, entry->cookie); - - my_free(entry); - if (entry == last_ready) - break; - } -} - -/*****************************************************************//** -Rolls back a transaction to a savepoint. -@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the -given name */ -static -int -innobase_rollback_to_savepoint( -/*===========================*/ - handlerton* hton, /*!< in: Innodb handlerton */ - THD* thd, /*!< in: handle to the MySQL thread - of the user whose transaction should - be rolled back to savepoint */ - void* savepoint) /*!< in: savepoint data */ -{ - ib_int64_t mysql_binlog_cache_pos; - dberr_t error; - trx_t* trx; - char name[64]; - - DBUG_ENTER("innobase_rollback_to_savepoint"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - trx = check_trx_exists(thd); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the trx_sys->mutex, we have to release the search system - latch first to obey the latching order. */ - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - /* TODO: use provided savepoint data area to store savepoint data */ - - longlong2str((ulint) savepoint, name, 36); - - error = trx_rollback_to_savepoint_for_mysql( - trx, name, &mysql_binlog_cache_pos); - - if (error == DB_SUCCESS && trx->fts_trx != NULL) { - fts_savepoint_rollback(trx, name); - } - - DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL)); -} - -/*****************************************************************//** -Check whether innodb state allows to safely release MDL locks after -rollback to savepoint. -When binlog is on, MDL locks acquired after savepoint unit are not -released if there are any locks held in InnoDB. -@return true if it is safe, false if its not safe. */ -static -bool -innobase_rollback_to_savepoint_can_release_mdl( -/*===========================================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - THD* thd) /*!< in: handle to the MySQL thread - of the user whose transaction should - be rolled back to savepoint */ -{ - trx_t* trx; - - DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - trx = check_trx_exists(thd); - ut_ad(trx); - - /* If transaction has not acquired any locks then it is safe - to release MDL after rollback to savepoint */ - if (!(UT_LIST_GET_LEN(trx->lock.trx_locks))) { - DBUG_RETURN(true); - } - - DBUG_RETURN(false); -} - -/*****************************************************************//** -Release transaction savepoint name. -@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the -given name */ -static -int -innobase_release_savepoint( -/*=======================*/ - handlerton* hton, /*!< in: handlerton for Innodb */ - THD* thd, /*!< in: handle to the MySQL thread - of the user whose transaction's - savepoint should be released */ - void* savepoint) /*!< in: savepoint data */ -{ - dberr_t error; - trx_t* trx; - char name[64]; - - DBUG_ENTER("innobase_release_savepoint"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - trx = check_trx_exists(thd); - - if (trx->state == TRX_STATE_NOT_STARTED) { - trx_start_if_not_started(trx); - } - - /* TODO: use provided savepoint data area to store savepoint data */ - - longlong2str((ulint) savepoint, name, 36); - - error = trx_release_savepoint_for_mysql(trx, name); - - if (error == DB_SUCCESS && trx->fts_trx != NULL) { - fts_savepoint_release(trx, name); - } - - DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL)); -} - -/*****************************************************************//** -Sets a transaction savepoint. -@return always 0, that is, always succeeds */ -static -int -innobase_savepoint( -/*===============*/ - handlerton* hton, /*!< in: handle to the Innodb handlerton */ - THD* thd, /*!< in: handle to the MySQL thread */ - void* savepoint) /*!< in: savepoint data */ -{ - dberr_t error; - trx_t* trx; - - DBUG_ENTER("innobase_savepoint"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - /* In the autocommit mode there is no sense to set a savepoint - (unless we are in sub-statement), so SQL layer ensures that - this method is never called in such situation. */ - - trx = check_trx_exists(thd); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the trx_sys->mutex, we have to release the search system - latch first to obey the latching order. */ - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - /* Cannot happen outside of transaction */ - DBUG_ASSERT(trx_is_registered_for_2pc(trx)); - - /* TODO: use provided savepoint data area to store savepoint data */ - char name[64]; - longlong2str((ulint) savepoint,name,36); - - error = trx_savepoint_for_mysql(trx, name, (ib_int64_t)0); - - if (error == DB_SUCCESS && trx->fts_trx != NULL) { - fts_savepoint_take(trx, trx->fts_trx, name); - } - - DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL)); -} - -/*****************************************************************//** -Frees a possible InnoDB trx object associated with the current THD. -@return 0 or error number */ -static -int -innobase_close_connection( -/*======================*/ - handlerton* hton, /*!< in: innobase handlerton */ - THD* thd) /*!< in: handle to the MySQL thread of the user - whose resources should be free'd */ -{ - trx_t* trx; - - DBUG_ENTER("innobase_close_connection"); - DBUG_ASSERT(hton == innodb_hton_ptr); - trx = thd_to_trx(thd); - - ut_a(trx); - - if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) { - - sql_print_error("Transaction not registered for MySQL 2PC, " - "but transaction is active"); - } - - if (trx_is_started(trx) && global_system_variables.log_warnings) { - - sql_print_warning( - "MySQL is closing a connection that has an active " - "InnoDB transaction. " TRX_ID_FMT " row modifications " - "will roll back.", - trx->undo_no); - } - - innobase_rollback_trx(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(0); -} - -/*****************************************************************//** -Frees a possible InnoDB trx object associated with the current THD. -@return 0 or error number */ -UNIV_INTERN -int -innobase_close_thd( -/*===============*/ - THD* thd) /*!< in: handle to the MySQL thread of the user - whose resources should be free'd */ -{ - trx_t* trx = thd_to_trx(thd); - - if (!trx) { - return(0); - } - - return(innobase_close_connection(innodb_hton_ptr, thd)); -} - -/*************************************************************************//** -** InnoDB database tables -*****************************************************************************/ - -/****************************************************************//** -Get the record format from the data dictionary. -@return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT, -ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */ -UNIV_INTERN -enum row_type -ha_innobase::get_row_type() const -/*=============================*/ -{ - if (prebuilt && prebuilt->table) { - const ulint flags = prebuilt->table->flags; - - switch (dict_tf_get_rec_format(flags)) { - case REC_FORMAT_REDUNDANT: - return(ROW_TYPE_REDUNDANT); - case REC_FORMAT_COMPACT: - return(ROW_TYPE_COMPACT); - case REC_FORMAT_COMPRESSED: - return(ROW_TYPE_COMPRESSED); - case REC_FORMAT_DYNAMIC: - return(ROW_TYPE_DYNAMIC); - } - } - ut_ad(0); - return(ROW_TYPE_NOT_USED); -} - -/*****************************************************************//** -Cancel any pending lock request associated with the current THD. */ -static -void -innobase_kill_connection( -/*======================*/ - handlerton* hton, /*!< in: innobase handlerton */ - THD* thd, /*!< in: handle to the MySQL thread being killed */ - thd_kill_levels) -{ - trx_t* trx; - - DBUG_ENTER("innobase_kill_connection"); - DBUG_ASSERT(hton == innodb_hton_ptr); - -#ifdef WITH_WSREP - wsrep_thd_LOCK(thd); - if (wsrep_thd_get_conflict_state(thd) != NO_CONFLICT) { - /* if victim has been signaled by BF thread and/or aborting - is already progressing, following query aborting is not necessary - any more. - Also, BF thread should own trx mutex for the victim, which would - conflict with trx_mutex_enter() below - */ - wsrep_thd_UNLOCK(thd); - DBUG_VOID_RETURN; - } - wsrep_thd_UNLOCK(thd); -#endif /* WITH_WSREP */ - trx = thd_to_trx(thd); - - if (trx && trx->lock.wait_lock) { - /* In wsrep BF we have already took lock_sys and trx - mutex either on wsrep_abort_transaction() or - before wsrep_kill_victim(). In replication we - could own lock_sys mutex taken in - lock_deadlock_check_and_resolve().*/ - - WSREP_DEBUG("Killing victim trx %p BF %d trx BF %d trx_id " TRX_ID_FMT " ABORT %d thd %p" - " current_thd %p BF %d wait_lock_modes: %s\n", - trx, wsrep_thd_is_BF(trx->mysql_thd, FALSE), - wsrep_thd_is_BF(thd, FALSE), - trx->id, trx->abort_type, - trx->mysql_thd, - current_thd, - wsrep_thd_is_BF(current_thd, FALSE), - lock_get_info(trx->lock.wait_lock).c_str()); - - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - trx->abort_type == TRX_SERVER_ABORT) { - ut_ad(!lock_mutex_own()); - lock_mutex_enter(); - } - - if (trx->abort_type != TRX_WSREP_ABORT) { - trx_mutex_enter(trx); - } - - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(trx)); - - if (trx->lock.wait_lock) { - lock_cancel_waiting_and_release(trx->lock.wait_lock); - } - - if (trx->abort_type != TRX_WSREP_ABORT) { - trx_mutex_exit(trx); - } - - if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - trx->abort_type == TRX_SERVER_ABORT) { - lock_mutex_exit(); - } - } - - DBUG_VOID_RETURN; -} - - - -/****************************************************************//** -Get the table flags to use for the statement. -@return table flags */ -UNIV_INTERN -handler::Table_flags -ha_innobase::table_flags() const -/*============================*/ -{ - /* Need to use tx_isolation here since table flags is (also) - called before prebuilt is inited. */ - ulong const tx_isolation = thd_tx_isolation(ha_thd()); - - if (tx_isolation <= ISO_READ_COMMITTED) { - return(int_table_flags); - } - - return(int_table_flags | HA_BINLOG_STMT_CAPABLE); -} - -/****************************************************************//** -Returns the table type (storage engine name). -@return table type */ -UNIV_INTERN -const char* -ha_innobase::table_type() const -/*===========================*/ -{ - return(innobase_hton_name); -} - -/****************************************************************//** -Returns the index type. */ -UNIV_INTERN -const char* -ha_innobase::index_type( -/*====================*/ - uint keynr) /*!< : index number */ -{ - dict_index_t* index = innobase_get_index(keynr); - - if (index && index->type & DICT_FTS) { - return("FULLTEXT"); - } else { - return("BTREE"); - } -} - -/****************************************************************//** -Returns the operations supported for indexes. -@return flags of supported operations */ -UNIV_INTERN -ulong -ha_innobase::index_flags( -/*=====================*/ - uint key, - uint, - bool) const -{ - return((table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT) - ? 0 - : (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER - | HA_READ_RANGE | HA_KEYREAD_ONLY - | (key == table_share->primary_key ? HA_CLUSTERED_INDEX : 0) - | HA_DO_INDEX_COND_PUSHDOWN)); -} - -/****************************************************************//** -Returns the maximum number of keys. -@return MAX_KEY */ -UNIV_INTERN -uint -ha_innobase::max_supported_keys() const -/*===================================*/ -{ - return(MAX_KEY); -} - -/****************************************************************//** -Returns the maximum key length. -@return maximum supported key length, in bytes */ -UNIV_INTERN -uint -ha_innobase::max_supported_key_length() const -/*=========================================*/ -{ - /* An InnoDB page must store >= 2 keys; a secondary key record - must also contain the primary key value. Therefore, if both - the primary key and the secondary key are at this maximum length, - it must be less than 1/4th of the free space on a page including - record overhead. - - MySQL imposes its own limit to this number; MAX_KEY_LENGTH = 3072. - - For page sizes = 16k, InnoDB historically reported 3500 bytes here, - But the MySQL limit of 3072 was always used through the handler - interface. - - Note: Handle 16k and 32k pages the same here since the limits - are higher than imposed by MySQL. */ - - switch (UNIV_PAGE_SIZE) { - case 4096: - return(768); - case 8192: - return(1536); - default: -#ifdef WITH_WSREP - return(3500); -#else - return(3500); -#endif - } -} - -/****************************************************************//** -Returns the key map of keys that are usable for scanning. -@return key_map_full */ -UNIV_INTERN -const key_map* -ha_innobase::keys_to_use_for_scanning() -/*===================================*/ -{ - return(&key_map_full); -} - -/****************************************************************//** -Determines if table caching is supported. -@return HA_CACHE_TBL_ASKTRANSACT */ -UNIV_INTERN -uint8 -ha_innobase::table_cache_type() -/*===========================*/ -{ - return(HA_CACHE_TBL_ASKTRANSACT); -} - -/****************************************************************//** -Determines if the primary key is clustered index. -@return true */ -UNIV_INTERN -bool -ha_innobase::primary_key_is_clustered() -/*===================================*/ -{ - return(true); -} - -/*****************************************************************//** -Normalizes a table name string. A normalized name consists of the -database name catenated to '/' and table name. Example: test/mytable. -On Windows normalization puts both the database name and the -table name always to lower case if "set_lower_case" is set to TRUE. */ -void -normalize_table_name_low( -/*=====================*/ - char* norm_name, /*!< out: normalized name as a - null-terminated string */ - const char* name, /*!< in: table name string */ - ibool set_lower_case) /*!< in: TRUE if we want to set name - to lower case */ -{ - char* name_ptr; - ulint name_len; - char* db_ptr; - ulint db_len; - char* ptr; - ulint norm_len; - - /* Scan name from the end */ - - ptr = strend(name) - 1; - - /* seek to the last path separator */ - while (ptr >= name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - - name_ptr = ptr + 1; - name_len = strlen(name_ptr); - - /* skip any number of path separators */ - while (ptr >= name && (*ptr == '\\' || *ptr == '/')) { - ptr--; - } - - DBUG_ASSERT(ptr >= name); - - /* seek to the last but one path separator or one char before - the beginning of name */ - db_len = 0; - while (ptr >= name && *ptr != '\\' && *ptr != '/') { - ptr--; - db_len++; - } - - db_ptr = ptr + 1; - - norm_len = db_len + name_len + sizeof "/"; - ut_a(norm_len < FN_REFLEN - 1); - - memcpy(norm_name, db_ptr, db_len); - - norm_name[db_len] = '/'; - - /* Copy the name and null-byte. */ - memcpy(norm_name + db_len + 1, name_ptr, name_len + 1); - - if (set_lower_case) { - innobase_casedn_str(norm_name); - } -} - -#if !defined(DBUG_OFF) -/********************************************************************* -Test normalize_table_name_low(). */ -static -void -test_normalize_table_name_low() -/*===========================*/ -{ - char norm_name[FN_REFLEN]; - const char* test_data[][2] = { - /* input, expected result */ - {"./mysqltest/t1", "mysqltest/t1"}, - {"./test/#sql-842b_2", "test/#sql-842b_2"}, - {"./test/#sql-85a3_10", "test/#sql-85a3_10"}, - {"./test/#sql2-842b-2", "test/#sql2-842b-2"}, - {"./test/bug29807", "test/bug29807"}, - {"./test/foo", "test/foo"}, - {"./test/innodb_bug52663", "test/innodb_bug52663"}, - {"./test/t", "test/t"}, - {"./test/t1", "test/t1"}, - {"./test/t10", "test/t10"}, - {"/a/b/db/table", "db/table"}, - {"/a/b/db///////table", "db/table"}, - {"/a/b////db///////table", "db/table"}, - {"/var/tmp/mysqld.1/#sql842b_2_10", "mysqld.1/#sql842b_2_10"}, - {"db/table", "db/table"}, - {"ddd/t", "ddd/t"}, - {"d/ttt", "d/ttt"}, - {"d/t", "d/t"}, - {".\\mysqltest\\t1", "mysqltest/t1"}, - {".\\test\\#sql-842b_2", "test/#sql-842b_2"}, - {".\\test\\#sql-85a3_10", "test/#sql-85a3_10"}, - {".\\test\\#sql2-842b-2", "test/#sql2-842b-2"}, - {".\\test\\bug29807", "test/bug29807"}, - {".\\test\\foo", "test/foo"}, - {".\\test\\innodb_bug52663", "test/innodb_bug52663"}, - {".\\test\\t", "test/t"}, - {".\\test\\t1", "test/t1"}, - {".\\test\\t10", "test/t10"}, - {"C:\\a\\b\\db\\table", "db/table"}, - {"C:\\a\\b\\db\\\\\\\\\\\\\\table", "db/table"}, - {"C:\\a\\b\\\\\\\\db\\\\\\\\\\\\\\table", "db/table"}, - {"C:\\var\\tmp\\mysqld.1\\#sql842b_2_10", "mysqld.1/#sql842b_2_10"}, - {"db\\table", "db/table"}, - {"ddd\\t", "ddd/t"}, - {"d\\ttt", "d/ttt"}, - {"d\\t", "d/t"}, - }; - - for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) { - printf("test_normalize_table_name_low(): " - "testing \"%s\", expected \"%s\"... ", - test_data[i][0], test_data[i][1]); - - normalize_table_name_low(norm_name, test_data[i][0], FALSE); - - if (strcmp(norm_name, test_data[i][1]) == 0) { - printf("ok\n"); - } else { - printf("got \"%s\"\n", norm_name); - ut_error; - } - } -} - -/********************************************************************* -Test ut_format_name(). */ -static -void -test_ut_format_name() -/*=================*/ -{ - char buf[NAME_LEN * 3]; - - struct { - const char* name; - ibool is_table; - ulint buf_size; - const char* expected; - } test_data[] = { - {"test/t1", TRUE, sizeof(buf), "\"test\".\"t1\""}, - {"test/t1", TRUE, 12, "\"test\".\"t1\""}, - {"test/t1", TRUE, 11, "\"test\".\"t1"}, - {"test/t1", TRUE, 10, "\"test\".\"t"}, - {"test/t1", TRUE, 9, "\"test\".\""}, - {"test/t1", TRUE, 8, "\"test\"."}, - {"test/t1", TRUE, 7, "\"test\""}, - {"test/t1", TRUE, 6, "\"test"}, - {"test/t1", TRUE, 5, "\"tes"}, - {"test/t1", TRUE, 4, "\"te"}, - {"test/t1", TRUE, 3, "\"t"}, - {"test/t1", TRUE, 2, "\""}, - {"test/t1", TRUE, 1, ""}, - {"test/t1", TRUE, 0, "BUF_NOT_CHANGED"}, - {"table", TRUE, sizeof(buf), "\"table\""}, - {"ta'le", TRUE, sizeof(buf), "\"ta'le\""}, - {"ta\"le", TRUE, sizeof(buf), "\"ta\"\"le\""}, - {"ta`le", TRUE, sizeof(buf), "\"ta`le\""}, - {"index", FALSE, sizeof(buf), "\"index\""}, - {"ind/ex", FALSE, sizeof(buf), "\"ind/ex\""}, - }; - - for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) { - - memcpy(buf, "BUF_NOT_CHANGED", strlen("BUF_NOT_CHANGED") + 1); - - char* ret; - - ret = ut_format_name(test_data[i].name, - test_data[i].is_table, - buf, - test_data[i].buf_size); - - ut_a(ret == buf); - - if (strcmp(buf, test_data[i].expected) == 0) { - fprintf(stderr, - "ut_format_name(%s, %s, buf, %lu), " - "expected %s, OK\n", - test_data[i].name, - test_data[i].is_table ? "TRUE" : "FALSE", - test_data[i].buf_size, - test_data[i].expected); - } else { - fprintf(stderr, - "ut_format_name(%s, %s, buf, %lu), " - "expected %s, ERROR: got %s\n", - test_data[i].name, - test_data[i].is_table ? "TRUE" : "FALSE", - test_data[i].buf_size, - test_data[i].expected, - buf); - ut_error; - } - } -} -#endif /* !DBUG_OFF */ - -/********************************************************************//** -Get the upper limit of the MySQL integral and floating-point type. -@return maximum allowed value for the field */ -UNIV_INTERN -ulonglong -innobase_get_int_col_max_value( -/*===========================*/ - const Field* field) /*!< in: MySQL field */ -{ - ulonglong max_value = 0; - - switch (field->key_type()) { - /* TINY */ - case HA_KEYTYPE_BINARY: - max_value = 0xFFULL; - break; - case HA_KEYTYPE_INT8: - max_value = 0x7FULL; - break; - /* SHORT */ - case HA_KEYTYPE_USHORT_INT: - max_value = 0xFFFFULL; - break; - case HA_KEYTYPE_SHORT_INT: - max_value = 0x7FFFULL; - break; - /* MEDIUM */ - case HA_KEYTYPE_UINT24: - max_value = 0xFFFFFFULL; - break; - case HA_KEYTYPE_INT24: - max_value = 0x7FFFFFULL; - break; - /* LONG */ - case HA_KEYTYPE_ULONG_INT: - max_value = 0xFFFFFFFFULL; - break; - case HA_KEYTYPE_LONG_INT: - max_value = 0x7FFFFFFFULL; - break; - /* BIG */ - case HA_KEYTYPE_ULONGLONG: - max_value = 0xFFFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_LONGLONG: - max_value = 0x7FFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_FLOAT: - /* We use the maximum as per IEEE754-2008 standard, 2^24 */ - max_value = 0x1000000ULL; - break; - case HA_KEYTYPE_DOUBLE: - /* We use the maximum as per IEEE754-2008 standard, 2^53 */ - max_value = 0x20000000000000ULL; - break; - default: - ut_error; - } - - return(max_value); -} - -/*******************************************************************//** -This function checks whether the index column information -is consistent between KEY info from mysql and that from innodb index. -@return TRUE if all column types match. */ -static -ibool -innobase_match_index_columns( -/*=========================*/ - const KEY* key_info, /*!< in: Index info - from mysql */ - const dict_index_t* index_info) /*!< in: Index info - from Innodb */ -{ - const KEY_PART_INFO* key_part; - const KEY_PART_INFO* key_end; - const dict_field_t* innodb_idx_fld; - const dict_field_t* innodb_idx_fld_end; - - DBUG_ENTER("innobase_match_index_columns"); - - /* Check whether user defined index column count matches */ - if (key_info->user_defined_key_parts != - index_info->n_user_defined_cols) { - DBUG_RETURN(FALSE); - } - - key_part = key_info->key_part; - key_end = key_part + key_info->user_defined_key_parts; - innodb_idx_fld = index_info->fields; - innodb_idx_fld_end = index_info->fields + index_info->n_fields; - - /* Check each index column's datatype. We do not check - column name because there exists case that index - column name got modified in mysql but such change does not - propagate to InnoDB. - One hidden assumption here is that the index column sequences - are matched up between those in mysql and Innodb. */ - for (; key_part != key_end; ++key_part) { - ulint col_type; - ibool is_unsigned; - ulint mtype = innodb_idx_fld->col->mtype; - - /* Need to translate to InnoDB column type before - comparison. */ - col_type = get_innobase_type_from_mysql_type(&is_unsigned, - key_part->field); - - /* Ignore Innodb specific system columns. */ - while (mtype == DATA_SYS) { - innodb_idx_fld++; - - if (innodb_idx_fld >= innodb_idx_fld_end) { - DBUG_RETURN(FALSE); - } - - mtype = innodb_idx_fld->col->mtype; - } - - if (col_type != mtype) { - /* Column Type mismatches */ - DBUG_RETURN(FALSE); - } - - innodb_idx_fld++; - } - - DBUG_RETURN(TRUE); -} - -/*******************************************************************//** -This function builds a translation table in INNOBASE_SHARE -structure for fast index location with mysql array number from its -table->key_info structure. This also provides the necessary translation -between the key order in mysql key_info and Innodb ib_table->indexes if -they are not fully matched with each other. -Note we do not have any mutex protecting the translation table -building based on the assumption that there is no concurrent -index creation/drop and DMLs that requires index lookup. All table -handle will be closed before the index creation/drop. -@return TRUE if index translation table built successfully */ -UNIV_INTERN -ibool -innobase_build_index_translation( -/*=============================*/ - const TABLE* table, /*!< in: table in MySQL data - dictionary */ - dict_table_t* ib_table,/*!< in: table in Innodb data - dictionary */ - INNOBASE_SHARE* share) /*!< in/out: share structure - where index translation table - will be constructed in. */ -{ - ulint mysql_num_index; - ulint ib_num_index; - dict_index_t** index_mapping; - ibool ret = TRUE; - - DBUG_ENTER("innobase_build_index_translation"); - - mutex_enter(&dict_sys->mutex); - - mysql_num_index = table->s->keys; - ib_num_index = UT_LIST_GET_LEN(ib_table->indexes); - - index_mapping = share->idx_trans_tbl.index_mapping; - - /* If there exists inconsistency between MySQL and InnoDB dictionary - (metadata) information, the number of index defined in MySQL - could exceed that in InnoDB, do not build index translation - table in such case */ - if (UNIV_UNLIKELY(ib_num_index < mysql_num_index)) { - ret = FALSE; - goto func_exit; - } - - /* If index entry count is non-zero, nothing has - changed since last update, directly return TRUE */ - if (share->idx_trans_tbl.index_count) { - /* Index entry count should still match mysql_num_index */ - ut_a(share->idx_trans_tbl.index_count == mysql_num_index); - goto func_exit; - } - - /* The number of index increased, rebuild the mapping table */ - if (mysql_num_index > share->idx_trans_tbl.array_size) { - index_mapping = (dict_index_t**) my_realloc(index_mapping, - mysql_num_index * - sizeof(*index_mapping), - MYF(MY_ALLOW_ZERO_PTR)); - - if (!index_mapping) { - /* Report an error if index_mapping continues to be - NULL and mysql_num_index is a non-zero value */ - sql_print_error("InnoDB: fail to allocate memory for " - "index translation table. Number of " - "Index:%lu, array size:%lu", - mysql_num_index, - share->idx_trans_tbl.array_size); - ret = FALSE; - goto func_exit; - } - - share->idx_trans_tbl.array_size = mysql_num_index; - } - - /* For each index in the mysql key_info array, fetch its - corresponding InnoDB index pointer into index_mapping - array. */ - for (ulint count = 0; count < mysql_num_index; count++) { - - /* Fetch index pointers into index_mapping according to mysql - index sequence */ - index_mapping[count] = dict_table_get_index_on_name( - ib_table, table->key_info[count].name); - - if (!index_mapping[count]) { - sql_print_error("Cannot find index %s in InnoDB " - "index dictionary.", - table->key_info[count].name); - ret = FALSE; - goto func_exit; - } - - /* Double check fetched index has the same - column info as those in mysql key_info. */ - if (!innobase_match_index_columns(&table->key_info[count], - index_mapping[count])) { - sql_print_error("Found index %s whose column info " - "does not match that of MySQL.", - table->key_info[count].name); - ret = FALSE; - goto func_exit; - } - } - - /* Successfully built the translation table */ - share->idx_trans_tbl.index_count = mysql_num_index; - -func_exit: - if (!ret) { - /* Build translation table failed. */ - my_free(index_mapping); - - share->idx_trans_tbl.array_size = 0; - share->idx_trans_tbl.index_count = 0; - index_mapping = NULL; - } - - share->idx_trans_tbl.index_mapping = index_mapping; - - mutex_exit(&dict_sys->mutex); - - DBUG_RETURN(ret); -} - -/*******************************************************************//** -This function uses index translation table to quickly locate the -requested index structure. -Note we do not have mutex protection for the index translatoin table -access, it is based on the assumption that there is no concurrent -translation table rebuild (fter create/drop index) and DMLs that -require index lookup. -@return dict_index_t structure for requested index. NULL if -fail to locate the index structure. */ -static -dict_index_t* -innobase_index_lookup( -/*==================*/ - INNOBASE_SHARE* share, /*!< in: share structure for index - translation table. */ - uint keynr) /*!< in: index number for the requested - index */ -{ - if (!share->idx_trans_tbl.index_mapping - || keynr >= share->idx_trans_tbl.index_count) { - return(NULL); - } - - return(share->idx_trans_tbl.index_mapping[keynr]); -} - -/************************************************************************ -Set the autoinc column max value. This should only be called once from -ha_innobase::open(). Therefore there's no need for a covering lock. */ -UNIV_INTERN -void -ha_innobase::innobase_initialize_autoinc() -/*======================================*/ -{ - ulonglong auto_inc; - const Field* field = table->found_next_number_field; - - if (field != NULL) { - auto_inc = innobase_get_int_col_max_value(field); - } else { - /* We have no idea what's been passed in to us as the - autoinc column. We set it to the 0, effectively disabling - updates to the table. */ - auto_inc = 0; - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Unable to determine the AUTOINC " - "column name\n"); - } - - if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { - /* If the recovery level is set so high that writes - are disabled we force the AUTOINC counter to 0 - value effectively disabling writes to the table. - Secondly, we avoid reading the table in case the read - results in failure due to a corrupted table/index. - - We will not return an error to the client, so that the - tables can be dumped with minimal hassle. If an error - were returned in this case, the first attempt to read - the table would fail and subsequent SELECTs would succeed. */ - auto_inc = 0; - } else if (field == NULL) { - /* This is a far more serious error, best to avoid - opening the table and return failure. */ - my_error(ER_AUTOINC_READ_FAILED, MYF(0)); - } else { - dict_index_t* index; - const char* col_name; - ib_uint64_t read_auto_inc; - ulint err; - - update_thd(ha_thd()); - - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - - col_name = field->field_name.str; - index = innobase_get_index(table->s->next_number_index); - - /* Execute SELECT MAX(col_name) FROM TABLE; */ - err = row_search_max_autoinc(index, col_name, &read_auto_inc); - - switch (err) { - case DB_SUCCESS: { - ulonglong col_max_value; - - col_max_value = innobase_get_int_col_max_value(field); - - /* At the this stage we do not know the increment - nor the offset, so use a default increment of 1. */ - - auto_inc = innobase_next_autoinc( - read_auto_inc, 1, 1, 0, col_max_value); - - break; - } - case DB_RECORD_NOT_FOUND: - char buf[MAX_FULL_NAME_LEN]; - ut_format_name(index->table->name, TRUE, buf, sizeof(buf)); - - ib_logf(IB_LOG_LEVEL_ERROR, - "MySQL and InnoDB data " - "dictionaries are out of sync." - " Unable to find the AUTOINC column " - " %s in the InnoDB table %s." - " We set the next AUTOINC column " - "value to 0" - " in effect disabling the AUTOINC " - "next value generation." - " You can either set the next " - "AUTOINC value explicitly using ALTER TABLE " - " or fix the data dictionary by " - "recreating the table.", - col_name, buf); - - /* This will disable the AUTOINC generation. */ - auto_inc = 0; - - /* We want the open to succeed, so that the user can - take corrective action. ie. reads should succeed but - updates should fail. */ - err = DB_SUCCESS; - break; - default: - /* row_search_max_autoinc() should only return - one of DB_SUCCESS or DB_RECORD_NOT_FOUND. */ - ut_error; - } - } - - dict_table_autoinc_initialize(prebuilt->table, auto_inc); -} - -/*****************************************************************//** -Creates and opens a handle to a table which already exists in an InnoDB -database. -@return 1 if error, 0 if success */ -UNIV_INTERN -int -ha_innobase::open( -/*==============*/ - const char* name, /*!< in: table name */ - int mode, /*!< in: not used */ - uint test_if_locked) /*!< in: not used */ -{ - dict_table_t* ib_table; - char norm_name[FN_REFLEN]; - THD* thd; - char* is_part = NULL; - ibool par_case_name_set = FALSE; - char par_case_name[FN_REFLEN]; - dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE; - - DBUG_ENTER("ha_innobase::open"); - - UT_NOT_USED(mode); - UT_NOT_USED(test_if_locked); - - thd = ha_thd(); - - normalize_table_name(norm_name, name); - - user_thd = NULL; - - if (!(share=get_share(name))) { - - DBUG_RETURN(1); - } - - if (UNIV_UNLIKELY(share->ib_table && share->ib_table->corrupted && - srv_pass_corrupt_table <= 1)) { - free_share(share); - - DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); - } - - /* Will be allocated if it is needed in ::update_row() */ - upd_buf = NULL; - upd_buf_size = 0; - - /* We look for pattern #P# to see if the table is partitioned - MySQL table. */ -#ifdef __WIN__ - is_part = strstr(norm_name, "#p#"); -#else - is_part = strstr(norm_name, "#P#"); -#endif /* __WIN__ */ - - /* Check whether FOREIGN_KEY_CHECKS is set to 0. If so, the table - can be opened even if some FK indexes are missing. If not, the table - can't be opened in the same situation */ - if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { - ignore_err = DICT_ERR_IGNORE_FK_NOKEY; - } - - /* Get pointer to a table object in InnoDB dictionary cache */ - ib_table = dict_table_open_on_name(norm_name, FALSE, TRUE, ignore_err); - - if (ib_table - && ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID) - && table->s->stored_fields != dict_table_get_n_user_cols(ib_table)) - || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID) - && (table->s->fields - != dict_table_get_n_user_cols(ib_table) - 1)))) { - ib_logf(IB_LOG_LEVEL_WARN, - "table %s contains " ULINTPF " user defined columns " - "in InnoDB, but %u columns in MySQL. Please " - "check INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " - REFMAN "innodb-troubleshooting.html " - "for how to resolve it", - norm_name, dict_table_get_n_user_cols(ib_table), - table->s->fields); - - /* Mark this table as corrupted, so the drop table - or force recovery can still use it, but not others. */ - ib_table->corrupted = true; - ib_table->file_unreadable = true; - dict_table_close(ib_table, FALSE, FALSE); - ib_table = NULL; - is_part = NULL; - } - - if (UNIV_UNLIKELY(ib_table && ib_table->corrupted && - srv_pass_corrupt_table <= 1)) { - free_share(share); - my_free(upd_buf); - upd_buf = NULL; - upd_buf_size = 0; - - DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE); - } - - share->ib_table = ib_table; - - if (NULL == ib_table) { - if (is_part) { - /* MySQL partition engine hard codes the file name - separator as "#P#". The text case is fixed even if - lower_case_table_names is set to 1 or 2. This is true - for sub-partition names as well. InnoDB always - normalises file names to lower case on Windows, this - can potentially cause problems when copying/moving - tables between platforms. - - 1) If boot against an installation from Windows - platform, then its partition table name could - be in lower case in system tables. So we will - need to check lower case name when load table. - - 2) If we boot an installation from other case - sensitive platform in Windows, we might need to - check the existence of table name without lower - case in the system table. */ - if (innobase_get_lower_case_table_names() == 1) { - - if (!par_case_name_set) { -#ifndef __WIN__ - /* Check for the table using lower - case name, including the partition - separator "P" */ - strcpy(par_case_name, norm_name); - innobase_casedn_str(par_case_name); -#else - /* On Windows platfrom, check - whether there exists table name in - system table whose name is - not being normalized to lower case */ - normalize_table_name_low( - par_case_name, name, FALSE); -#endif - par_case_name_set = TRUE; - } - - ib_table = dict_table_open_on_name( - par_case_name, FALSE, TRUE, - ignore_err); - } - - if (ib_table) { -#ifndef __WIN__ - sql_print_warning("Partition table %s opened " - "after converting to lower " - "case. The table may have " - "been moved from a case " - "in-sensitive file system. " - "Please recreate table in " - "the current file system\n", - norm_name); -#else - sql_print_warning("Partition table %s opened " - "after skipping the step to " - "lower case the table name. " - "The table may have been " - "moved from a case sensitive " - "file system. Please " - "recreate table in the " - "current file system\n", - norm_name); -#endif - /* We allow use of table if it is found. - this is consistent to current behavior - to innodb_plugin */ - share->ib_table = ib_table; - goto table_opened; - } - } - - if (is_part) { - sql_print_error("Failed to open table %s.\n", - norm_name); - } - - ib_logf(IB_LOG_LEVEL_WARN, - "Cannot open table %s from the internal data " - "dictionary of InnoDB though the .frm file " - "for the table exists. See " - REFMAN "innodb-troubleshooting.html for how " - "you can resolve the problem.", norm_name); - - free_share(share); - my_errno = ENOENT; - - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); - } - -table_opened: - - innobase_copy_frm_flags_from_table_share(ib_table, table->s); - - /* No point to init any statistics if tablespace is still encrypted. */ - if (ib_table->is_readable()) { - dict_stats_init(ib_table); - } else { - ib_table->stat_initialized = 1; - } - - MONITOR_INC(MONITOR_TABLE_OPEN); - - bool no_tablespace = false; - bool encrypted = false; - FilSpace space; - - if (dict_table_is_discarded(ib_table)) { - - ib_senderrf(thd, - IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED, - table->s->table_name.str); - - /* Allow an open because a proper DISCARD should have set - all the flags and index root page numbers to FIL_NULL that - should prevent any DML from running but it should allow DDL - operations. */ - - no_tablespace = false; - - } else if (!ib_table->is_readable()) { - space = fil_space_acquire_silent(ib_table->space); - - if (space()) { - if (space()->crypt_data && space()->crypt_data->is_encrypted()) { - /* This means that tablespace was found but we could not - decrypt encrypted page. */ - no_tablespace = true; - encrypted = true; - } else { - no_tablespace = true; - } - } else { - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, - ER_TABLESPACE_MISSING, norm_name); - - /* This means we have no idea what happened to the tablespace - file, best to play it safe. */ - - no_tablespace = true; - } - } - - if (!thd_tablespace_op(thd) && no_tablespace) { - free_share(share); - my_errno = ENOENT; - int ret_err = HA_ERR_NO_SUCH_TABLE; - - /* If table has no talespace but it has crypt data, check - is tablespace made unaccessible because encryption service - or used key_id is not available. */ - if (encrypted) { - bool warning_pushed = false; - char buf[MAX_FULL_NAME_LEN]; - ut_format_name(ib_table->name, TRUE, buf, sizeof(buf)); - - if (!encryption_key_id_exists(space()->crypt_data->key_id)) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_DECRYPTION_FAILED, - "Table %s in file %s is encrypted but encryption service or" - " used key_id %u is not available. " - " Can't continue reading table.", - buf, space()->chain.start->name, - space()->crypt_data->key_id); - ret_err = HA_ERR_DECRYPTION_FAILED; - warning_pushed = true; - } - - /* If table is marked as encrypted then we push - warning if it has not been already done as used - key_id might be found but it is incorrect. */ - if (!warning_pushed) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_DECRYPTION_FAILED, - "Table %s in file %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - buf, space()->chain.start->name); - ret_err = HA_ERR_DECRYPTION_FAILED; - } - } - - dict_table_close(ib_table, FALSE, FALSE); - - DBUG_RETURN(ret_err); - } - - prebuilt = row_create_prebuilt(ib_table, table->s->stored_rec_length); - - prebuilt->default_rec = table->s->default_values; - ut_ad(prebuilt->default_rec); - - /* Looks like MySQL-3.23 sometimes has primary key number != 0 */ - primary_key = table->s->primary_key; - key_used_on_scan = primary_key; - - if (!innobase_build_index_translation(table, ib_table, share)) { - sql_print_error("Build InnoDB index translation table for" - " Table %s failed", name); - } - - /* Allocate a buffer for a 'row reference'. A row reference is - a string of bytes of length ref_length which uniquely specifies - a row in our table. Note that MySQL may also compare two row - references for equality by doing a simple memcmp on the strings - of length ref_length! */ - - if (!row_table_got_default_clust_index(ib_table)) { - - prebuilt->clust_index_was_generated = FALSE; - - if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) { - ib_table->dict_frm_mismatch = DICT_FRM_NO_PK; - - /* This mismatch could cause further problems - if not attended, bring this to the user's attention - by printing a warning in addition to log a message - in the errorlog */ - - ib_push_frm_error(thd, ib_table, table, 0, true); - - /* If primary_key >= MAX_KEY, its (primary_key) - value could be out of bound if continue to index - into key_info[] array. Find InnoDB primary index, - and assign its key_length to ref_length. - In addition, since MySQL indexes are sorted starting - with primary index, unique index etc., initialize - ref_length to the first index key length in - case we fail to find InnoDB cluster index. - - Please note, this will not resolve the primary - index mismatch problem, other side effects are - possible if users continue to use the table. - However, we allow this table to be opened so - that user can adopt necessary measures for the - mismatch while still being accessible to the table - date. */ - if (!table->key_info) { - ut_ad(!table->s->keys); - ref_length = 0; - } else { - ref_length = table->key_info[0].key_length; - } - - /* Find corresponding cluster index - key length in MySQL's key_info[] array */ - for (uint i = 0; i < table->s->keys; i++) { - dict_index_t* index; - index = innobase_get_index(i); - if (dict_index_is_clust(index)) { - ref_length = - table->key_info[i].key_length; - } - } - } else { - /* MySQL allocates the buffer for ref. - key_info->key_length includes space for all key - columns + one byte for each column that may be - NULL. ref_length must be as exact as possible to - save space, because all row reference buffers are - allocated based on ref_length. */ - - ref_length = table->key_info[primary_key].key_length; - } - } else { - if (primary_key != MAX_KEY) { - - ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS; - - /* This mismatch could cause further problems - if not attended, bring this to the user attention - by printing a warning in addition to log a message - in the errorlog */ - ib_push_frm_error(thd, ib_table, table, 0, true); - } - - prebuilt->clust_index_was_generated = TRUE; - - ref_length = DATA_ROW_ID_LEN; - - /* If we automatically created the clustered index, then - MySQL does not know about it, and MySQL must NOT be aware - of the index used on scan, to make it avoid checking if we - update the column of the index. That is why we assert below - that key_used_on_scan is the undefined value MAX_KEY. - The column is the row id in the automatical generation case, - and it will never be updated anyway. */ - - if (key_used_on_scan != MAX_KEY) { - sql_print_warning( - "Table %s key_used_on_scan is %lu even " - "though there is no primary key inside " - "InnoDB.", name, (ulong) key_used_on_scan); - } - } - - /* Index block size in InnoDB: used by MySQL in query optimization */ - stats.block_size = UNIV_PAGE_SIZE; - - /* Init table lock structure */ - thr_lock_data_init(&share->lock,&lock,(void*) 0); - - if (prebuilt->table) { - /* We update the highest file format in the system table - space, if this table has higher file format setting. */ - - trx_sys_file_format_max_upgrade( - (const char**) &innobase_file_format_max, - dict_table_get_format(prebuilt->table)); - } - - /* Only if the table has an AUTOINC column. */ - if (prebuilt->table != NULL - && prebuilt->table->is_readable() - && table->found_next_number_field != NULL) { - dict_table_autoinc_lock(prebuilt->table); - - /* Since a table can already be "open" in InnoDB's internal - data dictionary, we only init the autoinc counter once, the - first time the table is loaded. We can safely reuse the - autoinc value from a previous MySQL open. */ - if (dict_table_autoinc_read(prebuilt->table) == 0) { - - innobase_initialize_autoinc(); - } - - dict_table_autoinc_unlock(prebuilt->table); - } - - info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); - - DBUG_RETURN(0); -} - -UNIV_INTERN -handler* -ha_innobase::clone( -/*===============*/ - const char* name, /*!< in: table name */ - MEM_ROOT* mem_root) /*!< in: memory context */ -{ - ha_innobase* new_handler; - - DBUG_ENTER("ha_innobase::clone"); - - new_handler = static_cast<ha_innobase*>(handler::clone(name, - mem_root)); - if (new_handler) { - DBUG_ASSERT(new_handler->prebuilt != NULL); - - new_handler->prebuilt->select_lock_type - = prebuilt->select_lock_type; - } - - DBUG_RETURN(new_handler); -} - -UNIV_INTERN -uint -ha_innobase::max_supported_key_part_length() const -/*==============================================*/ -{ - /* A table format specific index column length check will be performed - at ha_innobase::add_index() and row_create_index_for_mysql() */ - return(innobase_large_prefix - ? REC_VERSION_56_MAX_INDEX_COL_LEN - : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1); -} - -/******************************************************************//** -Closes a handle to an InnoDB table. -@return 0 */ -UNIV_INTERN -int -ha_innobase::close() -/*================*/ -{ - THD* thd; - - DBUG_ENTER("ha_innobase::close"); - - thd = ha_thd(); - - row_prebuilt_free(prebuilt, FALSE); - - if (upd_buf != NULL) { - ut_ad(upd_buf_size != 0); - my_free(upd_buf); - upd_buf = NULL; - upd_buf_size = 0; - } - - free_share(share); - - MONITOR_INC(MONITOR_TABLE_CLOSE); - - /* Tell InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - DBUG_RETURN(0); -} - -/* The following accessor functions should really be inside MySQL code! */ - -/**************************************************************//** -Gets field offset for a field in a table. -@return offset */ -static inline -uint -get_field_offset( -/*=============*/ - const TABLE* table, /*!< in: MySQL table object */ - const Field* field) /*!< in: MySQL field object */ -{ - return((uint) (field->ptr - table->record[0])); -} - -#ifdef WITH_WSREP -UNIV_INTERN -int -wsrep_innobase_mysql_sort( -/*===============*/ - /* out: str contains sort string */ - int mysql_type, /* in: MySQL type */ - uint charset_number, /* in: number of the charset */ - unsigned char* str, /* in: data field */ - unsigned int str_length, /* in: data field length, - not UNIV_SQL_NULL */ - unsigned int buf_length) /* in: total str buffer length */ - -{ - CHARSET_INFO* charset; - enum_field_types mysql_tp; - int ret_length = str_length; - - DBUG_ASSERT(str_length != UNIV_SQL_NULL); - - mysql_tp = (enum_field_types) mysql_type; - - switch (mysql_tp) { - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_STRING: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_VARCHAR: - { - uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'}; - uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN; - - /* Use the charset number to pick the right charset struct for - the comparison. Since the MySQL function get_charset may be - slow before Bar removes the mutex operation there, we first - look at 2 common charsets directly. */ - - if (charset_number == default_charset_info->number) { - charset = default_charset_info; - } else if (charset_number == my_charset_latin1.number) { - charset = &my_charset_latin1; - } else { - charset = get_charset(charset_number, MYF(MY_WME)); - - if (charset == NULL) { - sql_print_error("InnoDB needs charset %lu for doing " - "a comparison, but MySQL cannot " - "find that charset.", - (ulong) charset_number); - ut_a(0); - } - } - - ut_a(str_length <= tmp_length); - memcpy(tmp_str, str, str_length); - - tmp_length = charset->coll->strnxfrm(charset, str, str_length, - str_length, tmp_str, - tmp_length, 0); - DBUG_ASSERT(tmp_length <= str_length); - if (wsrep_protocol_version < 3) { - tmp_length = charset->coll->strnxfrm( - charset, str, str_length, - str_length, tmp_str, tmp_length, 0); - DBUG_ASSERT(tmp_length <= str_length); - } else { - /* strnxfrm will expand the destination string, - protocols < 3 truncated the sorted sring - protocols >= 3 gets full sorted sring - */ - tmp_length = charset->coll->strnxfrm( - charset, str, buf_length, - str_length, tmp_str, str_length, 0); - DBUG_ASSERT(tmp_length <= buf_length); - ret_length = tmp_length; - } - - break; - } - case MYSQL_TYPE_DECIMAL : - case MYSQL_TYPE_TINY : - case MYSQL_TYPE_SHORT : - case MYSQL_TYPE_LONG : - case MYSQL_TYPE_FLOAT : - case MYSQL_TYPE_DOUBLE : - case MYSQL_TYPE_NULL : - case MYSQL_TYPE_TIMESTAMP : - case MYSQL_TYPE_LONGLONG : - case MYSQL_TYPE_INT24 : - case MYSQL_TYPE_DATE : - case MYSQL_TYPE_TIME : - case MYSQL_TYPE_DATETIME : - case MYSQL_TYPE_YEAR : - case MYSQL_TYPE_NEWDATE : - case MYSQL_TYPE_NEWDECIMAL : - case MYSQL_TYPE_ENUM : - case MYSQL_TYPE_SET : - case MYSQL_TYPE_GEOMETRY : - break; - default: - break; - } - - return ret_length; -} -#endif /* WITH_WSREP */ - -/*************************************************************//** -InnoDB uses this function to compare two data fields for which the data type -is such that we must use MySQL code to compare them. NOTE that the prototype -of this function is in rem0cmp.cc in InnoDB source code! If you change this -function, remember to update the prototype there! -@return 1, 0, -1, if a is greater, equal, less than b, respectively */ -UNIV_INTERN -int -innobase_mysql_cmp( -/*===============*/ - int mysql_type, /*!< in: MySQL type */ - uint charset_number, /*!< in: number of the charset */ - const unsigned char* a, /*!< in: data field */ - unsigned int a_length, /*!< in: data field length, - not UNIV_SQL_NULL */ - const unsigned char* b, /*!< in: data field */ - unsigned int b_length) /*!< in: data field length, - not UNIV_SQL_NULL */ -{ - CHARSET_INFO* charset; - enum_field_types mysql_tp; - int ret; - - DBUG_ASSERT(a_length != UNIV_SQL_NULL); - DBUG_ASSERT(b_length != UNIV_SQL_NULL); - - mysql_tp = (enum_field_types) mysql_type; - - switch (mysql_tp) { - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_STRING: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_VARCHAR: - /* Use the charset number to pick the right charset struct for - the comparison. Since the MySQL function get_charset may be - slow before Bar removes the mutex operation there, we first - look at 2 common charsets directly. */ - - if (charset_number == default_charset_info->number) { - charset = default_charset_info; - } else if (charset_number == my_charset_latin1.number) { - charset = &my_charset_latin1; - } else { - charset = get_charset(charset_number, MYF(MY_WME)); - - if (charset == NULL) { - sql_print_error("InnoDB needs charset %lu for doing " - "a comparison, but MySQL cannot " - "find that charset.", - (ulong) charset_number); - ut_a(0); - } - } - - /* Starting from 4.1.3, we use strnncollsp() in comparisons of - non-latin1_swedish_ci strings. NOTE that the collation order - changes then: 'b\0\0...' is ordered BEFORE 'b ...'. Users - having indexes on such data need to rebuild their tables! */ - - ret = charset->coll->strnncollsp( - charset, a, a_length, b, b_length, 0); - - if (ret < 0) { - return(-1); - } else if (ret > 0) { - return(1); - } else { - return(0); - } - default: - ut_error; - } - - return(0); -} - - -/*************************************************************//** -Get the next token from the given string and store it in *token. */ -UNIV_INTERN -CHARSET_INFO* -innobase_get_fts_charset( -/*=====================*/ - int mysql_type, /*!< in: MySQL type */ - uint charset_number) /*!< in: number of the charset */ -{ - enum_field_types mysql_tp; - CHARSET_INFO* charset; - - mysql_tp = (enum_field_types) mysql_type; - - switch (mysql_tp) { - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_STRING: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_VARCHAR: - /* Use the charset number to pick the right charset struct for - the comparison. Since the MySQL function get_charset may be - slow before Bar removes the mutex operation there, we first - look at 2 common charsets directly. */ - - if (charset_number == default_charset_info->number) { - charset = default_charset_info; - } else if (charset_number == my_charset_latin1.number) { - charset = &my_charset_latin1; - } else { - charset = get_charset(charset_number, MYF(MY_WME)); - - if (charset == NULL) { - sql_print_error("InnoDB needs charset %lu for doing " - "a comparison, but MySQL cannot " - "find that charset.", - (ulong) charset_number); - ut_a(0); - } - } - break; - default: - ut_error; - } - - return(charset); -} - -/*************************************************************//** -InnoDB uses this function to compare two data fields for which the data type -is such that we must use MySQL code to compare them. NOTE that the prototype -of this function is in rem0cmp.c in InnoDB source code! If you change this -function, remember to update the prototype there! -@return 1, 0, -1, if a is greater, equal, less than b, respectively */ -UNIV_INTERN -int -innobase_mysql_cmp_prefix( -/*======================*/ - int mysql_type, /*!< in: MySQL type */ - uint charset_number, /*!< in: number of the charset */ - const unsigned char* a, /*!< in: data field */ - unsigned int a_length, /*!< in: data field length, - not UNIV_SQL_NULL */ - const unsigned char* b, /*!< in: data field */ - unsigned int b_length) /*!< in: data field length, - not UNIV_SQL_NULL */ -{ - CHARSET_INFO* charset; - int result; - - charset = innobase_get_fts_charset(mysql_type, charset_number); - - result = ha_compare_text(charset, (uchar*) a, a_length, - (uchar*) b, b_length, 1, 0); - - return(result); -} -/******************************************************************//** -compare two character string according to their charset. */ -UNIV_INTERN -int -innobase_fts_text_cmp( -/*==================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2) /*!< in: node */ -{ - const CHARSET_INFO* charset = (const CHARSET_INFO*) cs; - const fts_string_t* s1 = (const fts_string_t*) p1; - const fts_string_t* s2 = (const fts_string_t*) p2; - - return(ha_compare_text( - charset, s1->f_str, static_cast<uint>(s1->f_len), - s2->f_str, static_cast<uint>(s2->f_len), 0, 0)); -} -/******************************************************************//** -compare two character string case insensitively according to their charset. */ -UNIV_INTERN -int -innobase_fts_text_case_cmp( -/*=======================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2) /*!< in: node */ -{ - const CHARSET_INFO* charset = (const CHARSET_INFO*) cs; - const fts_string_t* s1 = (const fts_string_t*) p1; - const fts_string_t* s2 = (const fts_string_t*) p2; - ulint newlen; - - my_casedn_str(charset, (char*) s2->f_str); - - newlen = strlen((const char*) s2->f_str); - - return(ha_compare_text( - charset, s1->f_str, static_cast<uint>(s1->f_len), - s2->f_str, static_cast<uint>(newlen), 0, 0)); -} -/******************************************************************//** -Get the first character's code position for FTS index partition. */ -UNIV_INTERN -ulint -innobase_strnxfrm( -/*==============*/ - const CHARSET_INFO* - cs, /*!< in: Character set */ - const uchar* str, /*!< in: string */ - const ulint len) /*!< in: string length */ -{ - uchar mystr[2]; - ulint value; - - if (!str || len == 0) { - return(0); - } - - my_strnxfrm(cs, (uchar*) mystr, 2, str, len); - - value = mach_read_from_2(mystr); - - if (value > 255) { - value = value / 256; - } - - return(value); -} - -/******************************************************************//** -compare two character string according to their charset. */ -UNIV_INTERN -int -innobase_fts_text_cmp_prefix( -/*=========================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: prefix key */ - const void* p2) /*!< in: value to compare */ -{ - const CHARSET_INFO* charset = (const CHARSET_INFO*) cs; - const fts_string_t* s1 = (const fts_string_t*) p1; - const fts_string_t* s2 = (const fts_string_t*) p2; - int result; - - result = ha_compare_text( - charset, s2->f_str, static_cast<uint>(s2->f_len), - s1->f_str, static_cast<uint>(s1->f_len), 1, 0); - - /* We switched s1, s2 position in ha_compare_text. So we need - to negate the result */ - return(-result); -} - -/******************************************************************//** -Makes all characters in a string lower case. */ -UNIV_INTERN -size_t -innobase_fts_casedn_str( -/*====================*/ - CHARSET_INFO* cs, /*!< in: Character set */ - char* src, /*!< in: string to put in lower case */ - size_t src_len,/*!< in: input string length */ - char* dst, /*!< in: buffer for result string */ - size_t dst_len)/*!< in: buffer size */ -{ - if (cs->casedn_multiply == 1) { - memcpy(dst, src, src_len); - dst[src_len] = 0; - my_casedn_str(cs, dst); - - return(strlen(dst)); - } else { - return(cs->cset->casedn(cs, src, src_len, dst, dst_len)); - } -} - -#define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_') - -#define misc_word_char(X) 0 - -/*************************************************************//** -Get the next token from the given string and store it in *token. -It is mostly copied from MyISAM's doc parsing function ft_simple_get_word() -@return length of string processed */ -UNIV_INTERN -ulint -innobase_mysql_fts_get_token( -/*=========================*/ - CHARSET_INFO* cs, /*!< in: Character set */ - const byte* start, /*!< in: start of text */ - const byte* end, /*!< in: one character past end of - text */ - fts_string_t* token, /*!< out: token's text */ - ulint* offset) /*!< out: offset to token, - measured as characters from - 'start' */ -{ - int mbl; - const uchar* doc = start; - - ut_a(cs); - - token->f_n_char = token->f_len = 0; - token->f_str = NULL; - - for (;;) { - - if (doc >= end) { - return(doc - start); - } - - int ctype; - - mbl = cs->cset->ctype( - cs, &ctype, doc, (const uchar*) end); - - if (true_word_char(ctype, *doc)) { - break; - } - - doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1); - } - - ulint mwc = 0; - ulint length = 0; - - token->f_str = const_cast<byte*>(doc); - - while (doc < end) { - - int ctype; - - mbl = cs->cset->ctype( - cs, &ctype, (uchar*) doc, (uchar*) end); - if (true_word_char(ctype, *doc)) { - mwc = 0; - } else if (!misc_word_char(*doc) || mwc) { - break; - } else { - ++mwc; - } - - ++length; - - doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1); - } - - token->f_len = (uint) (doc - token->f_str) - mwc; - token->f_n_char = length; - - return(doc - start); -} - -/**************************************************************//** -Converts a MySQL type to an InnoDB type. Note that this function returns -the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1 -VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. -@return DATA_BINARY, DATA_VARCHAR, ... */ -UNIV_INTERN -ulint -get_innobase_type_from_mysql_type( -/*==============================*/ - ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an - 'unsigned type'; - at least ENUM and SET, - and unsigned integer - types are 'unsigned types' */ - const void* f) /*!< in: MySQL Field */ -{ - const class Field* field = reinterpret_cast<const class Field*>(f); - - /* The following asserts try to check that the MySQL type code fits in - 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to - the type */ - - DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256); - DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256); - DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256); - DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256); - DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256); - - if (field->flags & UNSIGNED_FLAG) { - - *unsigned_flag = DATA_UNSIGNED; - } else { - *unsigned_flag = 0; - } - - if (field->real_type() == MYSQL_TYPE_ENUM - || field->real_type() == MYSQL_TYPE_SET) { - - /* MySQL has field->type() a string type for these, but the - data is actually internally stored as an unsigned integer - code! */ - - *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned - flag set to zero, even though - internally this is an unsigned - integer type */ - return(DATA_INT); - } - - switch (field->type()) { - /* NOTE that we only allow string types in DATA_MYSQL and - DATA_VARMYSQL */ - case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */ - case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */ - if (field->binary()) { - return(DATA_BINARY); - } else if (field->charset() == &my_charset_latin1) { - return(DATA_VARCHAR); - } else { - return(DATA_VARMYSQL); - } - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_STRING: - if (field->binary()) { - return(DATA_FIXBINARY); - } else if (field->charset() == &my_charset_latin1) { - return(DATA_CHAR); - } else { - return(DATA_MYSQL); - } - case MYSQL_TYPE_NEWDECIMAL: - return(DATA_FIXBINARY); - case MYSQL_TYPE_LONG: - case MYSQL_TYPE_LONGLONG: - case MYSQL_TYPE_TINY: - case MYSQL_TYPE_SHORT: - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_DATE: - case MYSQL_TYPE_YEAR: - case MYSQL_TYPE_NEWDATE: - return(DATA_INT); - case MYSQL_TYPE_TIME: - case MYSQL_TYPE_DATETIME: - case MYSQL_TYPE_TIMESTAMP: - if (field->key_type() == HA_KEYTYPE_BINARY) - return(DATA_FIXBINARY); - else - return(DATA_INT); - case MYSQL_TYPE_FLOAT: - return(DATA_FLOAT); - case MYSQL_TYPE_DOUBLE: - return(DATA_DOUBLE); - case MYSQL_TYPE_DECIMAL: - return(DATA_DECIMAL); - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_LONG_BLOB: - return(DATA_BLOB); - case MYSQL_TYPE_NULL: - /* MySQL currently accepts "NULL" datatype, but will - reject such datatype in the next release. We will cope - with it and not trigger assertion failure in 5.1 */ - break; - default: - ut_error; - } - - return(0); -} - -/*******************************************************************//** -Writes an unsigned integer value < 64k to 2 bytes, in the little-endian -storage format. */ -static inline -void -innobase_write_to_2_little_endian( -/*==============================*/ - byte* buf, /*!< in: where to store */ - ulint val) /*!< in: value to write, must be < 64k */ -{ - ut_a(val < 256 * 256); - - buf[0] = (byte)(val & 0xFF); - buf[1] = (byte)(val / 256); -} - -/*******************************************************************//** -Reads an unsigned integer value < 64k from 2 bytes, in the little-endian -storage format. -@return value */ -static inline -uint -innobase_read_from_2_little_endian( -/*===============================*/ - const uchar* buf) /*!< in: from where to read */ -{ - return((uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])))); -} - -#ifdef WITH_WSREP -/*******************************************************************//** -Stores a key value for a row to a buffer. -@return key value length as stored in buff */ -UNIV_INTERN -uint -wsrep_store_key_val_for_row( -/*===============================*/ - THD* thd, - TABLE* table, - uint keynr, /*!< in: key number */ - char* buff, /*!< in/out: buffer for the key value (in MySQL - format) */ - uint buff_len,/*!< in: buffer length */ - const uchar* record, - row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */ - ibool* key_is_null)/*!< out: full key was null */ -{ - KEY* key_info = table->key_info + keynr; - KEY_PART_INFO* key_part = key_info->key_part; - KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts; - char* buff_start = buff; - enum_field_types mysql_type; - Field* field; - uint buff_space = buff_len; - - DBUG_ENTER("wsrep_store_key_val_for_row"); - - memset(buff, 0, buff_len); - *key_is_null = TRUE; - - for (; key_part != end; key_part++) { - - uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'}; - ibool part_is_null = FALSE; - - if (key_part->null_bit) { - if (buff_space > 0) { - if (record[key_part->null_offset] - & key_part->null_bit) { - *buff = 1; - part_is_null = TRUE; - } else { - *buff = 0; - } - buff++; - buff_space--; - } else { - fprintf (stderr, "WSREP: key truncated: %s\n", - wsrep_thd_query(thd)); - } - } - if (!part_is_null) *key_is_null = FALSE; - - field = key_part->field; - mysql_type = field->type(); - - if (mysql_type == MYSQL_TYPE_VARCHAR) { - /* >= 5.0.3 true VARCHAR */ - ulint lenlen; - ulint len; - const byte* data; - ulint key_len; - ulint true_len; - const CHARSET_INFO* cs; - int error=0; - - key_len = key_part->length; - - if (part_is_null) { - true_len = key_len + 2; - if (true_len > buff_space) { - fprintf (stderr, - "WSREP: key truncated: %s\n", - wsrep_thd_query(thd)); - true_len = buff_space; - } - buff += true_len; - buff_space -= true_len; - continue; - } - cs = field->charset(); - - lenlen = (ulint) - (((Field_varstring*)field)->length_bytes); - - data = row_mysql_read_true_varchar(&len, - (byte*) (record - + (ulint)get_field_offset(table, field)), - lenlen); - - true_len = len; - - /* For multi byte character sets we need to calculate - the true length of the key */ - - if (len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) cs->cset->well_formed_len(cs, - (const char *) data, - (const char *) data + len, - (uint) (key_len / - cs->mbmaxlen), - &error); - } - - /* In a column prefix index, we may need to truncate - the stored value: */ - - if (true_len > key_len) { - true_len = key_len; - } - - memcpy(sorted, data, true_len); - true_len = wsrep_innobase_mysql_sort( - mysql_type, cs->number, sorted, true_len, - REC_VERSION_56_MAX_INDEX_COL_LEN); - - if (wsrep_protocol_version > 1) { - /* Note that we always reserve the maximum possible - length of the true VARCHAR in the key value, though - only len first bytes after the 2 length bytes contain - actual data. The rest of the space was reset to zero - in the bzero() call above. */ - if (true_len > buff_space) { - fprintf (stderr, - "WSREP: key truncated: %s\n", - wsrep_thd_query(thd)); - true_len = buff_space; - } - memcpy(buff, sorted, true_len); - buff += true_len; - buff_space -= true_len; - } else { - buff += key_len; - } - } else if (mysql_type == MYSQL_TYPE_TINY_BLOB - || mysql_type == MYSQL_TYPE_MEDIUM_BLOB - || mysql_type == MYSQL_TYPE_BLOB - || mysql_type == MYSQL_TYPE_LONG_BLOB - /* MYSQL_TYPE_GEOMETRY data is treated - as BLOB data in innodb. */ - || mysql_type == MYSQL_TYPE_GEOMETRY) { - - const CHARSET_INFO* cs; - ulint key_len; - ulint true_len; - int error=0; - ulint blob_len; - const byte* blob_data; - - ut_a(key_part->key_part_flag & HA_PART_KEY_SEG); - - key_len = key_part->length; - - if (part_is_null) { - true_len = key_len + 2; - if (true_len > buff_space) { - fprintf (stderr, - "WSREP: key truncated: %s\n", - wsrep_thd_query(thd)); - true_len = buff_space; - } - buff += true_len; - buff_space -= true_len; - - continue; - } - - cs = field->charset(); - - blob_data = row_mysql_read_blob_ref(&blob_len, - (byte*) (record - + (ulint) get_field_offset(table, field)), - (ulint) field->pack_length()); - - true_len = blob_len; - - ut_a(get_field_offset(table, field) - == key_part->offset); - - /* For multi byte character sets we need to calculate - the true length of the key */ - - if (blob_len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) cs->cset->well_formed_len(cs, - (const char *) blob_data, - (const char *) blob_data - + blob_len, - (uint) (key_len / - cs->mbmaxlen), - &error); - } - - /* All indexes on BLOB and TEXT are column prefix - indexes, and we may need to truncate the data to be - stored in the key value: */ - - if (true_len > key_len) { - true_len = key_len; - } - - memcpy(sorted, blob_data, true_len); - true_len = wsrep_innobase_mysql_sort( - mysql_type, cs->number, sorted, true_len, - REC_VERSION_56_MAX_INDEX_COL_LEN); - - - /* Note that we always reserve the maximum possible - length of the BLOB prefix in the key value. */ - if (wsrep_protocol_version > 1) { - if (true_len > buff_space) { - fprintf (stderr, - "WSREP: key truncated: %s\n", - wsrep_thd_query(thd)); - true_len = buff_space; - } - buff += true_len; - buff_space -= true_len; - } else { - buff += key_len; - } - memcpy(buff, sorted, true_len); - } else { - /* Here we handle all other data types except the - true VARCHAR, BLOB and TEXT. Note that the column - value we store may be also in a column prefix - index. */ - - const CHARSET_INFO* cs = NULL; - ulint true_len; - ulint key_len; - const uchar* src_start; - int error=0; - enum_field_types real_type; - - key_len = key_part->length; - - if (part_is_null) { - true_len = key_len; - if (true_len > buff_space) { - fprintf (stderr, - "WSREP: key truncated: %s\n", - wsrep_thd_query(thd)); - true_len = buff_space; - } - buff += true_len; - buff_space -= true_len; - - continue; - } - - src_start = record + key_part->offset; - real_type = field->real_type(); - true_len = key_len; - - /* Character set for the field is defined only - to fields whose type is string and real field - type is not enum or set. For these fields check - if character set is multi byte. */ - - if (real_type != MYSQL_TYPE_ENUM - && real_type != MYSQL_TYPE_SET - && ( mysql_type == MYSQL_TYPE_VAR_STRING - || mysql_type == MYSQL_TYPE_STRING)) { - - cs = field->charset(); - - /* For multi byte character sets we need to - calculate the true length of the key */ - - if (key_len > 0 && cs->mbmaxlen > 1) { - - true_len = (ulint) - cs->cset->well_formed_len(cs, - (const char *)src_start, - (const char *)src_start - + key_len, - (uint) (key_len / - cs->mbmaxlen), - &error); - } - memcpy(sorted, src_start, true_len); - true_len = wsrep_innobase_mysql_sort( - mysql_type, cs->number, sorted, true_len, - REC_VERSION_56_MAX_INDEX_COL_LEN); - - if (true_len > buff_space) { - fprintf (stderr, - "WSREP: key truncated: %s\n", - wsrep_thd_query(thd)); - true_len = buff_space; - } - memcpy(buff, sorted, true_len); - } else { - memcpy(buff, src_start, true_len); - } - buff += true_len; - buff_space -= true_len; - } - } - - ut_a(buff <= buff_start + buff_len); - - DBUG_RETURN((uint)(buff - buff_start)); -} -#endif /* WITH_WSREP */ - -/*******************************************************************//** -Stores a key value for a row to a buffer. -@return key value length as stored in buff */ -UNIV_INTERN -uint -ha_innobase::store_key_val_for_row( -/*===============================*/ - uint keynr, /*!< in: key number */ - char* buff, /*!< in/out: buffer for the key value (in MySQL - format) */ - uint buff_len,/*!< in: buffer length */ - const uchar* record)/*!< in: row in MySQL format */ -{ - KEY* key_info = table->key_info + keynr; - KEY_PART_INFO* key_part = key_info->key_part; - KEY_PART_INFO* end = - key_part + key_info->user_defined_key_parts; - char* buff_start = buff; - enum_field_types mysql_type; - Field* field; - ibool is_null; - - DBUG_ENTER("store_key_val_for_row"); - - /* The format for storing a key field in MySQL is the following: - - 1. If the column can be NULL, then in the first byte we put 1 if the - field value is NULL, 0 otherwise. - - 2. If the column is of a BLOB type (it must be a column prefix field - in this case), then we put the length of the data in the field to the - next 2 bytes, in the little-endian format. If the field is SQL NULL, - then these 2 bytes are set to 0. Note that the length of data in the - field is <= column prefix length. - - 3. In a column prefix field, prefix_len next bytes are reserved for - data. In a normal field the max field length next bytes are reserved - for data. For a VARCHAR(n) the max field length is n. If the stored - value is the SQL NULL then these data bytes are set to 0. - - 4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that - in the MySQL row format, the length is stored in 1 or 2 bytes, - depending on the maximum allowed length. But in the MySQL key value - format, the length always takes 2 bytes. - - We have to zero-fill the buffer so that MySQL is able to use a - simple memcmp to compare two key values to determine if they are - equal. MySQL does this to compare contents of two 'ref' values. */ - - memset(buff, 0, buff_len); - - for (; key_part != end; key_part++) { - is_null = FALSE; - - if (key_part->null_bit) { - if (record[key_part->null_offset] - & key_part->null_bit) { - *buff = 1; - is_null = TRUE; - } else { - *buff = 0; - } - buff++; - } - - field = key_part->field; - mysql_type = field->type(); - - if (mysql_type == MYSQL_TYPE_VARCHAR) { - /* >= 5.0.3 true VARCHAR */ - ulint lenlen; - ulint len; - const byte* data; - ulint key_len; - ulint true_len; - const CHARSET_INFO* cs; - int error=0; - - key_len = key_part->length; - - if (is_null) { - buff += key_len + 2; - - continue; - } - cs = field->charset(); - - lenlen = (ulint) - (((Field_varstring*) field)->length_bytes); - - data = row_mysql_read_true_varchar(&len, - (byte*) (record - + (ulint) get_field_offset(table, field)), - lenlen); - - true_len = len; - - /* For multi byte character sets we need to calculate - the true length of the key */ - - if (len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) cs->cset->well_formed_len(cs, - (const char*) data, - (const char*) data + len, - (uint) (key_len / cs->mbmaxlen), - &error); - } - - /* In a column prefix index, we may need to truncate - the stored value: */ - - if (true_len > key_len) { - true_len = key_len; - } - - /* The length in a key value is always stored in 2 - bytes */ - - row_mysql_store_true_var_len((byte*) buff, true_len, 2); - buff += 2; - - memcpy(buff, data, true_len); - - /* Note that we always reserve the maximum possible - length of the true VARCHAR in the key value, though - only len first bytes after the 2 length bytes contain - actual data. The rest of the space was reset to zero - in the memset() call above. */ - - buff += key_len; - - } else if (mysql_type == MYSQL_TYPE_TINY_BLOB - || mysql_type == MYSQL_TYPE_MEDIUM_BLOB - || mysql_type == MYSQL_TYPE_BLOB - || mysql_type == MYSQL_TYPE_LONG_BLOB - /* MYSQL_TYPE_GEOMETRY data is treated - as BLOB data in innodb. */ - || mysql_type == MYSQL_TYPE_GEOMETRY) { - - const CHARSET_INFO* cs; - ulint key_len; - ulint true_len; - int error=0; - ulint blob_len; - const byte* blob_data; - - ut_a(key_part->key_part_flag & HA_PART_KEY_SEG); - - key_len = key_part->length; - - if (is_null) { - buff += key_len + 2; - - continue; - } - - cs = field->charset(); - - blob_data = row_mysql_read_blob_ref(&blob_len, - (byte*) (record - + (ulint) get_field_offset(table, field)), - (ulint) field->pack_length()); - - true_len = blob_len; - - ut_a(get_field_offset(table, field) - == key_part->offset); - - /* For multi byte character sets we need to calculate - the true length of the key */ - - if (blob_len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) cs->cset->well_formed_len(cs, - (const char*) blob_data, - (const char*) blob_data - + blob_len, - (uint) (key_len / cs->mbmaxlen), - &error); - } - - /* All indexes on BLOB and TEXT are column prefix - indexes, and we may need to truncate the data to be - stored in the key value: */ - - if (true_len > key_len) { - true_len = key_len; - } - - /* MySQL reserves 2 bytes for the length and the - storage of the number is little-endian */ - - innobase_write_to_2_little_endian( - (byte*) buff, true_len); - buff += 2; - - memcpy(buff, blob_data, true_len); - - /* Note that we always reserve the maximum possible - length of the BLOB prefix in the key value. */ - - buff += key_len; - } else { - /* Here we handle all other data types except the - true VARCHAR, BLOB and TEXT. Note that the column - value we store may be also in a column prefix - index. */ - - const CHARSET_INFO* cs = NULL; - ulint true_len; - ulint key_len; - const uchar* src_start; - int error=0; - enum_field_types real_type; - - key_len = key_part->length; - - if (is_null) { - buff += key_len; - - continue; - } - - src_start = record + key_part->offset; - real_type = field->real_type(); - true_len = key_len; - - /* Character set for the field is defined only - to fields whose type is string and real field - type is not enum or set. For these fields check - if character set is multi byte. */ - - if (real_type != MYSQL_TYPE_ENUM - && real_type != MYSQL_TYPE_SET - && ( mysql_type == MYSQL_TYPE_VAR_STRING - || mysql_type == MYSQL_TYPE_STRING)) { - - cs = field->charset(); - - /* For multi byte character sets we need to - calculate the true length of the key */ - - if (key_len > 0 && cs->mbmaxlen > 1) { - - true_len = (ulint) - cs->cset->well_formed_len(cs, - (const char*) src_start, - (const char*) src_start - + key_len, - (uint) (key_len - / cs->mbmaxlen), - &error); - } - } - - memcpy(buff, src_start, true_len); - buff += true_len; - - /* Pad the unused space with spaces. */ - - if (true_len < key_len) { - ulint pad_len = key_len - true_len; - ut_a(cs != NULL); - ut_a(!(pad_len % cs->mbminlen)); - - cs->cset->fill(cs, buff, pad_len, - 0x20 /* space */); - buff += pad_len; - } - } - } - - ut_a(buff <= buff_start + buff_len); - - DBUG_RETURN((uint)(buff - buff_start)); -} - -/**************************************************************//** -Determines if a field is needed in a prebuilt struct 'template'. -@return field to use, or NULL if the field is not needed */ -static -const Field* -build_template_needs_field( -/*=======================*/ - ibool index_contains, /*!< in: - dict_index_contains_col_or_prefix( - index, i) */ - ibool read_just_key, /*!< in: TRUE when MySQL calls - ha_innobase::extra with the - argument HA_EXTRA_KEYREAD; it is enough - to read just columns defined in - the index (i.e., no read of the - clustered index record necessary) */ - ibool fetch_all_in_key, - /*!< in: true=fetch all fields in - the index */ - ibool fetch_primary_key_cols, - /*!< in: true=fetch the - primary key columns */ - dict_index_t* index, /*!< in: InnoDB index to use */ - const TABLE* table, /*!< in: MySQL table object */ - ulint i, /*!< in: field index in InnoDB table */ - ulint sql_idx) /*!< in: field index in SQL table */ -{ - const Field* field = table->field[sql_idx]; - - ut_ad(index_contains == dict_index_contains_col_or_prefix(index, i)); - - if (!index_contains) { - if (read_just_key) { - /* If this is a 'key read', we do not need - columns that are not in the key */ - - return(NULL); - } - } else if (fetch_all_in_key) { - /* This field is needed in the query */ - - return(field); - } - - if (bitmap_is_set(table->read_set, static_cast<uint>(sql_idx)) - || bitmap_is_set(table->write_set, static_cast<uint>(sql_idx))) { - /* This field is needed in the query */ - - return(field); - } - - if (fetch_primary_key_cols - && dict_table_col_in_clustered_key(index->table, i)) { - /* This field is needed in the query */ - - return(field); - } - - /* This field is not needed in the query, skip it */ - - return(NULL); -} - -/**************************************************************//** -Determines if a field is needed in a prebuilt struct 'template'. -@return whether the field is needed for index condition pushdown */ -inline -bool -build_template_needs_field_in_icp( -/*==============================*/ - const dict_index_t* index, /*!< in: InnoDB index */ - const row_prebuilt_t* prebuilt,/*!< in: row fetch template */ - bool contains,/*!< in: whether the index contains - column i */ - ulint i) /*!< in: column number */ -{ - ut_ad(contains == dict_index_contains_col_or_prefix(index, i)); - - return(index == prebuilt->index - ? contains - : dict_index_contains_col_or_prefix(prebuilt->index, i)); -} - -/**************************************************************//** -Adds a field to a prebuilt struct 'template'. -@return the field template */ -static -mysql_row_templ_t* -build_template_field( -/*=================*/ - row_prebuilt_t* prebuilt, /*!< in/out: template */ - dict_index_t* clust_index, /*!< in: InnoDB clustered index */ - dict_index_t* index, /*!< in: InnoDB index to use */ - TABLE* table, /*!< in: MySQL table object */ - const Field* field, /*!< in: field in MySQL table */ - ulint i) /*!< in: field index in InnoDB table */ -{ - mysql_row_templ_t* templ; - const dict_col_t* col; - - //ut_ad(field == table->field[i]); - ut_ad(clust_index->table == index->table); - - col = dict_table_get_nth_col(index->table, i); - - templ = prebuilt->mysql_template + prebuilt->n_template++; - UNIV_MEM_INVALID(templ, sizeof *templ); - templ->col_no = i; - templ->clust_rec_field_no = dict_col_get_clust_pos(col, clust_index); - - /* If clustered index record field is not found, lets print out - field names and all the rest to understand why field is not found. */ - if (templ->clust_rec_field_no == ULINT_UNDEFINED) { - const char* tb_col_name = dict_table_get_col_name(clust_index->table, i); - dict_field_t* field=NULL; - size_t size = 0; - - for(ulint j=0; j < clust_index->n_user_defined_cols; j++) { - dict_field_t* ifield = &(clust_index->fields[j]); - if (ifield && !memcmp(tb_col_name, ifield->name, - strlen(tb_col_name))) { - field = ifield; - break; - } - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Looking for field %lu name %s from table %s", - i, - (tb_col_name ? tb_col_name : "NULL"), - clust_index->table->name); - - - for(ulint j=0; j < clust_index->n_user_defined_cols; j++) { - dict_field_t* ifield = &(clust_index->fields[j]); - ib_logf(IB_LOG_LEVEL_INFO, - "InnoDB Table %s field %lu name %s", - clust_index->table->name, - j, - (ifield ? ifield->name : "NULL")); - } - - for(ulint j=0; j < table->s->stored_fields; j++) { - ib_logf(IB_LOG_LEVEL_INFO, - "MySQL table %s field %lu name %s", - table->s->table_name.str, - j, - table->field[j]->field_name.str); - } - - ib_logf(IB_LOG_LEVEL_ERROR, - "Clustered record field for column %lu" - " not found table n_user_defined %d" - " index n_user_defined %d" - " InnoDB table %s field name %s" - " MySQL table %s field name %s n_fields %d" - " query %s", - i, - clust_index->n_user_defined_cols, - clust_index->table->n_cols - DATA_N_SYS_COLS, - clust_index->table->name, - (field ? field->name : "NULL"), - table->s->table_name.str, - (tb_col_name ? tb_col_name : "NULL"), - table->s->stored_fields, - innobase_get_stmt(current_thd, &size)); - - ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED); - } - templ->rec_field_is_prefix = FALSE; - - if (dict_index_is_clust(index)) { - templ->rec_field_is_prefix = false; - templ->rec_field_no = templ->clust_rec_field_no; - templ->rec_prefix_field_no = ULINT_UNDEFINED; - } else { - /* If we're in a secondary index, keep track of the original - index position even if this is just a prefix index; we will use - this later to avoid a cluster index lookup in some cases.*/ - - templ->rec_field_no = dict_index_get_nth_col_pos(index, i, - &templ->rec_prefix_field_no); - templ->rec_field_is_prefix - = (templ->rec_field_no == ULINT_UNDEFINED) - && (templ->rec_prefix_field_no != ULINT_UNDEFINED); -#ifdef UNIV_DEBUG - if (templ->rec_prefix_field_no != ULINT_UNDEFINED) - { - const dict_field_t* field = dict_index_get_nth_field( - index, - templ->rec_prefix_field_no); - ut_ad(templ->rec_field_is_prefix - == (field->prefix_len != 0)); - } else { - ut_ad(!templ->rec_field_is_prefix); - } -#endif - } - - if (field->real_maybe_null()) { - templ->mysql_null_byte_offset = - field->null_offset(); - - templ->mysql_null_bit_mask = (ulint) field->null_bit; - } else { - templ->mysql_null_bit_mask = 0; - } - - templ->mysql_col_offset = (ulint) get_field_offset(table, field); - - templ->mysql_col_len = (ulint) field->pack_length(); - templ->type = col->mtype; - templ->mysql_type = (ulint) field->type(); - - if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { - templ->mysql_length_bytes = (ulint) - (((Field_varstring*) field)->length_bytes); - } - - templ->charset = dtype_get_charset_coll(col->prtype); - templ->mbminlen = dict_col_get_mbminlen(col); - templ->mbmaxlen = dict_col_get_mbmaxlen(col); - templ->is_unsigned = col->prtype & DATA_UNSIGNED; - - if (!dict_index_is_clust(index) - && templ->rec_field_no == ULINT_UNDEFINED) { - prebuilt->need_to_access_clustered = TRUE; - - if (templ->rec_prefix_field_no != ULINT_UNDEFINED) { - dict_field_t* field = dict_index_get_nth_field( - index, - templ->rec_prefix_field_no); - templ->rec_field_is_prefix = (field->prefix_len != 0); - } - } - - if (prebuilt->mysql_prefix_len < templ->mysql_col_offset - + templ->mysql_col_len) { - prebuilt->mysql_prefix_len = templ->mysql_col_offset - + templ->mysql_col_len; - } - - if (templ->type == DATA_BLOB) { - prebuilt->templ_contains_blob = TRUE; - } - - return(templ); -} - -/**************************************************************//** -Builds a 'template' to the prebuilt struct. The template is used in fast -retrieval of just those column values MySQL needs in its processing. */ -UNIV_INTERN -void -ha_innobase::build_template( -/*========================*/ - bool whole_row) /*!< in: true=ROW_MYSQL_WHOLE_ROW, - false=ROW_MYSQL_REC_FIELDS */ -{ - dict_index_t* index; - dict_index_t* clust_index; - ulint n_stored_fields; - ibool fetch_all_in_key = FALSE; - ibool fetch_primary_key_cols = FALSE; - ulint i, sql_idx; - - if (prebuilt->select_lock_type == LOCK_X) { - /* We always retrieve the whole clustered index record if we - use exclusive row level locks, for example, if the read is - done in an UPDATE statement. */ - - whole_row = true; - } else if (!whole_row) { - if (prebuilt->hint_need_to_fetch_extra_cols - == ROW_RETRIEVE_ALL_COLS) { - - /* We know we must at least fetch all columns in the - key, or all columns in the table */ - - if (prebuilt->read_just_key) { - /* MySQL has instructed us that it is enough - to fetch the columns in the key; looks like - MySQL can set this flag also when there is - only a prefix of the column in the key: in - that case we retrieve the whole column from - the clustered index */ - - fetch_all_in_key = TRUE; - } else { - whole_row = true; - } - } else if (prebuilt->hint_need_to_fetch_extra_cols - == ROW_RETRIEVE_PRIMARY_KEY) { - /* We must at least fetch all primary key cols. Note - that if the clustered index was internally generated - by InnoDB on the row id (no primary key was - defined), then row_search_for_mysql() will always - retrieve the row id to a special buffer in the - prebuilt struct. */ - - fetch_primary_key_cols = TRUE; - } - } - - clust_index = dict_table_get_first_index(prebuilt->table); - - index = whole_row ? clust_index : prebuilt->index; - - prebuilt->need_to_access_clustered = (index == clust_index); - - /* Either prebuilt->index should be a secondary index, or it - should be the clustered index. */ - ut_ad(dict_index_is_clust(index) == (index == clust_index)); - - /* Below we check column by column if we need to access - the clustered index. */ - - n_stored_fields= (ulint)table->s->stored_fields; /* number of stored columns */ - - if (!prebuilt->mysql_template) { - prebuilt->mysql_template = (mysql_row_templ_t*) - mem_alloc(n_stored_fields * sizeof(mysql_row_templ_t)); - } - - prebuilt->template_type = whole_row - ? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS; - prebuilt->null_bitmap_len = table->s->null_bytes; - - /* Prepare to build prebuilt->mysql_template[]. */ - prebuilt->templ_contains_blob = FALSE; - prebuilt->mysql_prefix_len = 0; - prebuilt->n_template = 0; - prebuilt->idx_cond_n_cols = 0; - - /* Note that in InnoDB, i is the column number in the table. - MySQL calls columns 'fields'. */ - - if (active_index != MAX_KEY && active_index == pushed_idx_cond_keyno) { - /* Push down an index condition or an end_range check. */ - for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { - - while (!table->field[sql_idx]->stored_in_db) { - sql_idx++; - } - - const ibool index_contains - = dict_index_contains_col_or_prefix(index, i); - - /* Test if an end_range or an index condition - refers to the field. Note that "index" and - "index_contains" may refer to the clustered index. - Index condition pushdown is relative to prebuilt->index - (the index that is being looked up first). */ - - /* When join_read_always_key() invokes this - code via handler::ha_index_init() and - ha_innobase::index_init(), end_range is not - yet initialized. Because of that, we must - always check for index_contains, instead of - the subset - field->part_of_key.is_set(active_index) - which would be acceptable if end_range==NULL. */ - if (build_template_needs_field_in_icp( - index, prebuilt, index_contains, i)) { - /* Needed in ICP */ - const Field* field; - mysql_row_templ_t* templ; - - if (whole_row) { - field = table->field[sql_idx]; - } else { - field = build_template_needs_field( - index_contains, - prebuilt->read_just_key, - fetch_all_in_key, - fetch_primary_key_cols, - index, table, i, sql_idx); - if (!field) { - continue; - } - } - - templ = build_template_field( - prebuilt, clust_index, index, - table, field, i); - prebuilt->idx_cond_n_cols++; - ut_ad(prebuilt->idx_cond_n_cols - == prebuilt->n_template); - - if (index == prebuilt->index) { - templ->icp_rec_field_no - = templ->rec_field_no; - } else { - templ->icp_rec_field_no - = dict_index_get_nth_col_pos( - prebuilt->index, i, - NULL); - } - - if (dict_index_is_clust(prebuilt->index)) { - ut_ad(templ->icp_rec_field_no - != ULINT_UNDEFINED); - /* If the primary key includes - a column prefix, use it in - index condition pushdown, - because the condition is - evaluated before fetching any - off-page (externally stored) - columns. */ - if (templ->icp_rec_field_no - < prebuilt->index->n_uniq) { - /* This is a key column; - all set. */ - continue; - } - } else if (templ->icp_rec_field_no - != ULINT_UNDEFINED) { - continue; - } - - /* This is a column prefix index. - The column prefix can be used in - an end_range comparison. */ - - templ->icp_rec_field_no - = dict_index_get_nth_col_or_prefix_pos( - prebuilt->index, i, TRUE, NULL); - ut_ad(templ->icp_rec_field_no - != ULINT_UNDEFINED); - - /* Index condition pushdown can be used on - all columns of a secondary index, and on - the PRIMARY KEY columns. On the clustered - index, it must never be used on other than - PRIMARY KEY columns, because those columns - may be stored off-page, and we will not - fetch externally stored columns before - checking the index condition. */ - /* TODO: test the above with an assertion - like this. Note that index conditions are - currently pushed down as part of the - "optimizer phase" while end_range is done - as part of the execution phase. Therefore, - we were unable to use an accurate condition - for end_range in the "if" condition above, - and the following assertion would fail. - ut_ad(!dict_index_is_clust(prebuilt->index) - || templ->rec_field_no - < prebuilt->index->n_uniq); - */ - } - } - - ut_ad(prebuilt->idx_cond_n_cols > 0); - ut_ad(prebuilt->idx_cond_n_cols == prebuilt->n_template); - - /* Include the fields that are not needed in index condition - pushdown. */ - for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { - - while (!table->field[sql_idx]->stored_in_db) { - sql_idx++; - } - - const ibool index_contains - = dict_index_contains_col_or_prefix(index, i); - - if (!build_template_needs_field_in_icp( - index, prebuilt, index_contains, i)) { - /* Not needed in ICP */ - const Field* field; - - if (whole_row) { - field = table->field[sql_idx]; - } else { - field = build_template_needs_field( - index_contains, - prebuilt->read_just_key, - fetch_all_in_key, - fetch_primary_key_cols, - index, table, i, sql_idx); - if (!field) { - continue; - } - } - - build_template_field(prebuilt, - clust_index, index, - table, field, i); - } - } - - prebuilt->idx_cond = this; - } else { - /* No index condition pushdown */ - prebuilt->idx_cond = NULL; - - for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { - const Field* field; - - while (!table->field[sql_idx]->stored_in_db) { - sql_idx++; - } - - if (whole_row) { - field = table->field[sql_idx]; - } else { - field = build_template_needs_field( - dict_index_contains_col_or_prefix( - index, i), - prebuilt->read_just_key, - fetch_all_in_key, - fetch_primary_key_cols, - index, table, i, sql_idx); - if (!field) { - continue; - } - } - - build_template_field(prebuilt, clust_index, index, - table, field, i); - } - } - - if (index != clust_index && prebuilt->need_to_access_clustered) { - /* Change rec_field_no's to correspond to the clustered index - record */ - for (i = 0; i < prebuilt->n_template; i++) { - - mysql_row_templ_t* templ - = &prebuilt->mysql_template[i]; - - templ->rec_field_no = templ->clust_rec_field_no; - } - } -} - -/********************************************************************//** -This special handling is really to overcome the limitations of MySQL's -binlogging. We need to eliminate the non-determinism that will arise in -INSERT ... SELECT type of statements, since MySQL binlog only stores the -min value of the autoinc interval. Once that is fixed we can get rid of -the special lock handling. -@return DB_SUCCESS if all OK else error code */ -UNIV_INTERN -dberr_t -ha_innobase::innobase_lock_autoinc(void) -/*====================================*/ -{ - DBUG_ENTER("ha_innobase::innobase_lock_autoinc"); - dberr_t error = DB_SUCCESS; - - ut_ad(!srv_read_only_mode); - - switch (innobase_autoinc_lock_mode) { - case AUTOINC_NO_LOCKING: - /* Acquire only the AUTOINC mutex. */ - dict_table_autoinc_lock(prebuilt->table); - break; - - case AUTOINC_NEW_STYLE_LOCKING: - /* For simple (single/multi) row INSERTs/REPLACEs and RBR - events, we fallback to the old style only if another - transaction has already acquired the AUTOINC lock on - behalf of a LOAD FILE or INSERT ... SELECT etc. type of - statement. */ - if (thd_sql_command(user_thd) == SQLCOM_INSERT - || thd_sql_command(user_thd) == SQLCOM_REPLACE - || thd_sql_command(user_thd) == SQLCOM_END // RBR event - ) { - dict_table_t* ib_table = prebuilt->table; - - /* Acquire the AUTOINC mutex. */ - dict_table_autoinc_lock(ib_table); - - /* We need to check that another transaction isn't - already holding the AUTOINC lock on the table. */ - if (ib_table->n_waiting_or_granted_auto_inc_locks) { - /* Release the mutex to avoid deadlocks and - fall back to old style locking. */ - dict_table_autoinc_unlock(ib_table); - } else { - /* Do not fall back to old style locking. */ - break; - } - } - /* Use old style locking. */ - /* fall through */ - case AUTOINC_OLD_STYLE_LOCKING: - DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used", - ut_ad(0);); - error = row_lock_table_autoinc_for_mysql(prebuilt); - - if (error == DB_SUCCESS) { - - /* Acquire the AUTOINC mutex. */ - dict_table_autoinc_lock(prebuilt->table); - } - break; - - default: - ut_error; - } - - DBUG_RETURN(error); -} - -/********************************************************************//** -Reset the autoinc value in the table. -@return DB_SUCCESS if all went well else error code */ -UNIV_INTERN -dberr_t -ha_innobase::innobase_reset_autoinc( -/*================================*/ - ulonglong autoinc) /*!< in: value to store */ -{ - dberr_t error; - - error = innobase_lock_autoinc(); - - if (error == DB_SUCCESS) { - - dict_table_autoinc_initialize(prebuilt->table, autoinc); - - dict_table_autoinc_unlock(prebuilt->table); - } - - return(error); -} - -/********************************************************************//** -Store the autoinc value in the table. The autoinc value is only set if -it's greater than the existing autoinc value in the table. -@return DB_SUCCESS if all went well else error code */ -UNIV_INTERN -dberr_t -ha_innobase::innobase_set_max_autoinc( -/*==================================*/ - ulonglong auto_inc) /*!< in: value to store */ -{ - dberr_t error; - - error = innobase_lock_autoinc(); - - if (error == DB_SUCCESS) { - - dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc); - - dict_table_autoinc_unlock(prebuilt->table); - } - - return(error); -} - -/********************************************************************//** -Stores a row in an InnoDB database, to the table specified in this -handle. -@return error code */ -UNIV_INTERN -int -ha_innobase::write_row( -/*===================*/ - uchar* record) /*!< in: a row in MySQL format */ -{ - dberr_t error; - int error_result= 0; - ibool auto_inc_used= FALSE; -#ifdef WITH_WSREP - ibool auto_inc_inserted= FALSE; /* if NULL was inserted */ -#endif - ulint sql_command; - trx_t* trx = thd_to_trx(user_thd); - - DBUG_ENTER("ha_innobase::write_row"); - - if (high_level_read_only) { - ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } else if (prebuilt->trx != trx) { - sql_print_error("The transaction object for the table handle " - "is at %p, but for the current thread it is at " - "%p", - (const void*) prebuilt->trx, (const void*) trx); - - fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr); - ut_print_buf(stderr, ((const byte*) prebuilt) - 100, 200); - fputs("\n" - "InnoDB: Dump of 200 bytes around ha_data: ", - stderr); - ut_print_buf(stderr, ((const byte*) trx) - 100, 200); - putc('\n', stderr); - ut_error; - } else if (!trx_is_started(trx)) { - ++trx->will_lock; - } - - ha_statistic_increment(&SSV::ha_write_count); - - sql_command = thd_sql_command(user_thd); - - if ((sql_command == SQLCOM_ALTER_TABLE - || sql_command == SQLCOM_OPTIMIZE - || sql_command == SQLCOM_CREATE_INDEX -#ifdef WITH_WSREP - || (wsrep_on(user_thd) && wsrep_load_data_splitting && - sql_command == SQLCOM_LOAD && - !thd_test_options( - user_thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) -#endif /* WITH_WSREP */ - || sql_command == SQLCOM_DROP_INDEX) - && num_write_row >= 10000) { -#ifdef WITH_WSREP - if (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD) { - WSREP_DEBUG("forced trx split for LOAD: %s", - wsrep_thd_query(user_thd)); - } -#endif /* WITH_WSREP */ - /* ALTER TABLE is COMMITted at every 10000 copied rows. - The IX table lock for the original table has to be re-issued. - As this method will be called on a temporary table where the - contents of the original table is being copied to, it is - a bit tricky to determine the source table. The cursor - position in the source table need not be adjusted after the - intermediate COMMIT, since writes by other transactions are - being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */ - - dict_table_t* src_table; - enum lock_mode mode; - - num_write_row = 0; - - /* Commit the transaction. This will release the table - locks, so they have to be acquired again. */ - - /* Altering an InnoDB table */ - /* Get the source table. */ - src_table = lock_get_src_table( - prebuilt->trx, prebuilt->table, &mode); - if (!src_table) { -no_commit: - /* Unknown situation: do not commit */ - /* - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ALTER TABLE is holding lock" - " on %lu tables!\n", - prebuilt->trx->mysql_n_tables_locked); - */ - ; - } else if (src_table == prebuilt->table) { -#ifdef WITH_WSREP - if (wsrep_on(user_thd) && - wsrep_load_data_splitting && - sql_command == SQLCOM_LOAD && - !thd_test_options(user_thd, - OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) - { - switch (wsrep_run_wsrep_commit(user_thd, 1)) - { - case WSREP_TRX_OK: - break; - case WSREP_TRX_SIZE_EXCEEDED: - case WSREP_TRX_CERT_FAIL: - case WSREP_TRX_ERROR: - DBUG_RETURN(1); - } - - if (binlog_hton->commit(binlog_hton, user_thd, 1)) - DBUG_RETURN(1); - wsrep_post_commit(user_thd, TRUE); - } -#endif /* WITH_WSREP */ - /* Source table is not in InnoDB format: - no need to re-acquire locks on it. */ - - /* Altering to InnoDB format */ - innobase_commit(ht, user_thd, 1); - /* Note that this transaction is still active. */ - trx_register_for_2pc(prebuilt->trx); - /* We will need an IX lock on the destination table. */ - prebuilt->sql_stat_start = TRUE; - } else { -#ifdef WITH_WSREP - if (wsrep_on(user_thd) && - wsrep_load_data_splitting && - sql_command == SQLCOM_LOAD && - !thd_test_options(user_thd, - OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) - { - switch (wsrep_run_wsrep_commit(user_thd, 1)) - { - case WSREP_TRX_OK: - break; - case WSREP_TRX_SIZE_EXCEEDED: - case WSREP_TRX_CERT_FAIL: - case WSREP_TRX_ERROR: - DBUG_RETURN(1); - } - - if (binlog_hton->commit(binlog_hton, user_thd, 1)) - DBUG_RETURN(1); - wsrep_post_commit(user_thd, TRUE); - } -#endif /* WITH_WSREP */ - /* Ensure that there are no other table locks than - LOCK_IX and LOCK_AUTO_INC on the destination table. */ - - if (!lock_is_table_exclusive(prebuilt->table, - prebuilt->trx)) { - goto no_commit; - } - - /* Commit the transaction. This will release the table - locks, so they have to be acquired again. */ - innobase_commit(ht, user_thd, 1); - /* Note that this transaction is still active. */ - trx_register_for_2pc(prebuilt->trx); - /* Re-acquire the table lock on the source table. */ - row_lock_table_for_mysql(prebuilt, src_table, mode); - /* We will need an IX lock on the destination table. */ - prebuilt->sql_stat_start = TRUE; - } - } - - num_write_row++; - - /* This is the case where the table has an auto-increment column */ - if (table->next_number_field && record == table->record[0]) { - - /* Reset the error code before calling - innobase_get_auto_increment(). */ - prebuilt->autoinc_error = DB_SUCCESS; - -#ifdef WITH_WSREP - auto_inc_inserted= (table->next_number_field->val_int() == 0); -#endif - - if ((error_result = update_auto_increment())) { - /* We don't want to mask autoinc overflow errors. */ - - /* Handle the case where the AUTOINC sub-system - failed during initialization. */ - if (prebuilt->autoinc_error == DB_UNSUPPORTED) { - error_result = ER_AUTOINC_READ_FAILED; - /* Set the error message to report too. */ - my_error(ER_AUTOINC_READ_FAILED, MYF(0)); - goto func_exit; - } else if (prebuilt->autoinc_error != DB_SUCCESS) { - error = prebuilt->autoinc_error; - goto report_error; - } - - /* MySQL errors are passed straight back. except for - ER_AUTOINC_READ_FAILED. This can only happen - for values out of range. - */ - goto func_exit; - } - - auto_inc_used = TRUE; - } - - if (prebuilt->mysql_template == NULL - || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) { - - /* Build the template used in converting quickly between - the two database formats */ - - build_template(true); - } - - innobase_srv_conc_enter_innodb(prebuilt->trx); - - error = row_insert_for_mysql((byte*) record, prebuilt); - DEBUG_SYNC(user_thd, "ib_after_row_insert"); - - /* Handle duplicate key errors */ - if (auto_inc_used) { - ulonglong auto_inc; - ulonglong col_max_value; - - /* Note the number of rows processed for this statement, used - by get_auto_increment() to determine the number of AUTO-INC - values to reserve. This is only useful for a mult-value INSERT - and is a statement level counter.*/ - if (trx->n_autoinc_rows > 0) { - --trx->n_autoinc_rows; - } - - /* We need the upper limit of the col type to check for - whether we update the table autoinc counter or not. */ - col_max_value = innobase_get_int_col_max_value( - table->next_number_field); - - /* Get the value that MySQL attempted to store in the table.*/ - auto_inc = table->next_number_field->val_int(); - - switch (error) { - case DB_DUPLICATE_KEY: - - /* A REPLACE command and LOAD DATA INFILE REPLACE - handle a duplicate key error themselves, but we - must update the autoinc counter if we are performing - those statements. */ - - switch (sql_command) { - case SQLCOM_LOAD: - if (trx->duplicates) { - - goto set_max_autoinc; - } - break; - - case SQLCOM_REPLACE: - case SQLCOM_INSERT_SELECT: - case SQLCOM_REPLACE_SELECT: - goto set_max_autoinc; - -#ifdef WITH_WSREP - /* workaround for LP bug #355000, retrying the insert */ - case SQLCOM_INSERT: - - WSREP_DEBUG("DUPKEY error for autoinc\n" - "THD %ld, value %llu, off %llu inc %llu", - thd_get_thread_id(current_thd), - auto_inc, - prebuilt->autoinc_offset, - prebuilt->autoinc_increment); - - if (wsrep_on(current_thd) && - auto_inc_inserted && - wsrep_drupal_282555_workaround && - wsrep_thd_retry_counter(current_thd) == 0 && - !thd_test_options(current_thd, - OPTION_NOT_AUTOCOMMIT | - OPTION_BEGIN)) { - WSREP_DEBUG( - "retrying insert: %s", - (*wsrep_thd_query(current_thd)) ? - wsrep_thd_query(current_thd) : - (char *)"void"); - error= DB_SUCCESS; - wsrep_thd_set_conflict_state( - current_thd, MUST_ABORT); - innobase_srv_conc_exit_innodb(prebuilt->trx); - /* jump straight to func exit over - * later wsrep hooks */ - goto func_exit; - } - break; -#endif /* WITH_WSREP */ - - default: - break; - } - - break; - - case DB_SUCCESS: - /* If the actual value inserted is greater than - the upper limit of the interval, then we try and - update the table upper limit. Note: last_value - will be 0 if get_auto_increment() was not called.*/ - - if (auto_inc >= prebuilt->autoinc_last_value) { -set_max_autoinc: - /* This should filter out the negative - values set explicitly by the user. */ - if (auto_inc <= col_max_value) { - ut_a(prebuilt->autoinc_increment > 0); - - ulonglong offset; - ulonglong increment; - dberr_t err; - - offset = prebuilt->autoinc_offset; - increment = prebuilt->autoinc_increment; - - auto_inc = innobase_next_autoinc( - auto_inc, - 1, increment, offset, - col_max_value); - - err = innobase_set_max_autoinc( - auto_inc); - - if (err != DB_SUCCESS) { - error = err; - } - } - } - break; - default: - break; - } - } - - innobase_srv_conc_exit_innodb(prebuilt->trx); - -report_error: - if (error == DB_TABLESPACE_DELETED) { - ib_senderrf( - trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_DISCARDED, - table->s->table_name.str); - } - - error_result = convert_error_code_to_mysql(error, - prebuilt->table->flags, - user_thd); - -#ifdef WITH_WSREP - if (!error_result && - wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && - wsrep_on(user_thd) && - !wsrep_consistency_check(user_thd) && - !wsrep_thd_ignore_table(user_thd)) - { - if (wsrep_append_keys(user_thd, false, record, NULL)) - { - DBUG_PRINT("wsrep", ("row key failed")); - error_result = HA_ERR_INTERNAL_ERROR; - goto wsrep_error; - } - } -wsrep_error: -#endif /* WITH_WSREP */ - - if (error_result == HA_FTS_INVALID_DOCID) { - my_error(HA_FTS_INVALID_DOCID, MYF(0)); - } - -func_exit: - innobase_active_small(); - - DBUG_RETURN(error_result); -} - -/**********************************************************************//** -Checks which fields have changed in a row and stores information -of them to an update vector. -@return DB_SUCCESS or error code */ -static -dberr_t -calc_row_difference( -/*================*/ - upd_t* uvect, /*!< in/out: update vector */ - uchar* old_row, /*!< in: old row in MySQL format */ - uchar* new_row, /*!< in: new row in MySQL format */ - TABLE* table, /*!< in: table in MySQL data - dictionary */ - uchar* upd_buff, /*!< in: buffer to use */ - ulint buff_len, /*!< in: buffer length */ - row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */ - THD* thd) /*!< in: user thread */ -{ - uchar* original_upd_buff = upd_buff; - Field* field; - enum_field_types field_mysql_type; - uint n_fields; - ulint o_len; - ulint n_len; - ulint col_pack_len; - const byte* new_mysql_row_col; - const byte* o_ptr; - const byte* n_ptr; - byte* buf; - upd_field_t* ufield; - ulint col_type; - ulint n_changed = 0; - dfield_t dfield; - dict_index_t* clust_index; - uint sql_idx, innodb_idx= 0; - ibool changes_fts_column = FALSE; - ibool changes_fts_doc_col = FALSE; - trx_t* trx = thd_to_trx(thd); - doc_id_t doc_id = FTS_NULL_DOC_ID; - - ut_ad(!srv_read_only_mode); - - n_fields = table->s->fields; - clust_index = dict_table_get_first_index(prebuilt->table); - - /* We use upd_buff to convert changed fields */ - buf = (byte*) upd_buff; - - for (sql_idx = 0; sql_idx < n_fields; sql_idx++) { - field = table->field[sql_idx]; - if (!field->stored_in_db) - continue; - - o_ptr = (const byte*) old_row + get_field_offset(table, field); - n_ptr = (const byte*) new_row + get_field_offset(table, field); - - /* Use new_mysql_row_col and col_pack_len save the values */ - - new_mysql_row_col = n_ptr; - col_pack_len = field->pack_length(); - - o_len = col_pack_len; - n_len = col_pack_len; - - /* We use o_ptr and n_ptr to dig up the actual data for - comparison. */ - - field_mysql_type = field->type(); - - col_type = prebuilt->table->cols[innodb_idx].mtype; - - switch (col_type) { - - case DATA_BLOB: - /* Do not compress blob column while comparing*/ - o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len); - n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len); - - break; - - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_VARMYSQL: - if (field_mysql_type == MYSQL_TYPE_VARCHAR) { - /* This is a >= 5.0.3 type true VARCHAR where - the real payload data length is stored in - 1 or 2 bytes */ - - o_ptr = row_mysql_read_true_varchar( - &o_len, o_ptr, - (ulint) - (((Field_varstring*) field)->length_bytes)); - - n_ptr = row_mysql_read_true_varchar( - &n_len, n_ptr, - (ulint) - (((Field_varstring*) field)->length_bytes)); - } - - break; - default: - ; - } - - if (field_mysql_type == MYSQL_TYPE_LONGLONG - && prebuilt->table->fts - && innobase_strcasecmp( - field->field_name.str, FTS_DOC_ID_COL_NAME) == 0) { - doc_id = (doc_id_t) mach_read_from_n_little_endian( - n_ptr, 8); - if (doc_id == 0) { - return(DB_FTS_INVALID_DOCID); - } - } - - - if (field->real_maybe_null()) { - if (field->is_null_in_record(old_row)) { - o_len = UNIV_SQL_NULL; - } - - if (field->is_null_in_record(new_row)) { - n_len = UNIV_SQL_NULL; - } - } - - if (o_len != n_len || (o_len != 0 && o_len != UNIV_SQL_NULL - && 0 != memcmp(o_ptr, n_ptr, o_len))) { - /* The field has changed */ - - ufield = uvect->fields + n_changed; - UNIV_MEM_INVALID(ufield, sizeof *ufield); - - /* Let us use a dummy dfield to make the conversion - from the MySQL column format to the InnoDB format */ - - if (n_len != UNIV_SQL_NULL) { - dict_col_copy_type(prebuilt->table->cols + innodb_idx, - dfield_get_type(&dfield)); - - buf = row_mysql_store_col_in_innobase_format( - &dfield, - (byte*) buf, - TRUE, - new_mysql_row_col, - col_pack_len, - dict_table_is_comp(prebuilt->table)); - dfield_copy(&ufield->new_val, &dfield); - } else { - dfield_set_null(&ufield->new_val); - } - - ufield->exp = NULL; - ufield->orig_len = 0; - ufield->field_no = dict_col_get_clust_pos( - &prebuilt->table->cols[innodb_idx], clust_index); - n_changed++; - - /* If an FTS indexed column was changed by this - UPDATE then we need to inform the FTS sub-system. - - NOTE: Currently we re-index all FTS indexed columns - even if only a subset of the FTS indexed columns - have been updated. That is the reason we are - checking only once here. Later we will need to - note which columns have been updated and do - selective processing. */ - if (prebuilt->table->fts != NULL) { - ulint offset; - dict_table_t* innodb_table; - - innodb_table = prebuilt->table; - - if (!changes_fts_column) { - offset = row_upd_changes_fts_column( - innodb_table, ufield); - - if (offset != ULINT_UNDEFINED) { - changes_fts_column = TRUE; - } - } - - if (!changes_fts_doc_col) { - changes_fts_doc_col = - row_upd_changes_doc_id( - innodb_table, ufield); - } - } - } - if (field->stored_in_db) - innodb_idx++; - } - - /* If the update changes a column with an FTS index on it, we - then add an update column node with a new document id to the - other changes. We piggy back our changes on the normal UPDATE - to reduce processing and IO overhead. */ - if (!prebuilt->table->fts) { - trx->fts_next_doc_id = 0; - } else if (changes_fts_column || changes_fts_doc_col) { - dict_table_t* innodb_table = prebuilt->table; - - ufield = uvect->fields + n_changed; - - if (!DICT_TF2_FLAG_IS_SET( - innodb_table, DICT_TF2_FTS_HAS_DOC_ID)) { - - /* If Doc ID is managed by user, and if any - FTS indexed column has been updated, its corresponding - Doc ID must also be updated. Otherwise, return - error */ - if (changes_fts_column && !changes_fts_doc_col) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: A new Doc ID" - " must be supplied while updating" - " FTS indexed columns.\n"); - return(DB_FTS_INVALID_DOCID); - } - - /* Doc ID must monotonically increase */ - ut_ad(innodb_table->fts->cache); - if (doc_id < prebuilt->table->fts->cache->next_doc_id) { - fprintf(stderr, - "InnoDB: FTS Doc ID must be larger than" - " " IB_ID_FMT " for table", - innodb_table->fts->cache->next_doc_id - - 1); - ut_print_name(stderr, trx, - TRUE, innodb_table->name); - putc('\n', stderr); - - return(DB_FTS_INVALID_DOCID); - } else if ((doc_id - - prebuilt->table->fts->cache->next_doc_id) - >= FTS_DOC_ID_MAX_STEP) { - fprintf(stderr, - "InnoDB: Doc ID " UINT64PF " is too" - " big. Its difference with largest" - " Doc ID used " UINT64PF " cannot" - " exceed or equal to %d\n", - doc_id, - prebuilt->table->fts->cache->next_doc_id - 1, - FTS_DOC_ID_MAX_STEP); - } - - - trx->fts_next_doc_id = doc_id; - } else { - /* If the Doc ID is a hidden column, it can't be - changed by user */ - ut_ad(!changes_fts_doc_col); - - /* Doc ID column is hidden, a new Doc ID will be - generated by following fts_update_doc_id() call */ - trx->fts_next_doc_id = 0; - } - - fts_update_doc_id( - innodb_table, ufield, &trx->fts_next_doc_id); - - ++n_changed; - } else { - /* We have a Doc ID column, but none of FTS indexed - columns are touched, nor the Doc ID column, so set - fts_next_doc_id to UINT64_UNDEFINED, which means do not - update the Doc ID column */ - trx->fts_next_doc_id = UINT64_UNDEFINED; - } - - uvect->n_fields = n_changed; - uvect->info_bits = 0; - - ut_a(buf <= (byte*) original_upd_buff + buff_len); - - return(DB_SUCCESS); -} - -#ifdef WITH_WSREP -static -int -wsrep_calc_row_hash( -/*================*/ - byte* digest, /*!< in/out: md5 sum */ - const uchar* row, /*!< in: row in MySQL format */ - TABLE* table, /*!< in: table in MySQL data - dictionary */ - row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */ - THD* thd) /*!< in: user thread */ -{ - Field* field; - enum_field_types field_mysql_type; - uint n_fields; - ulint len; - const byte* ptr; - ulint col_type; - uint i; - - void *ctx = alloca(my_md5_context_size()); - my_md5_init(ctx); - - n_fields = table->s->fields; - - for (i = 0; i < n_fields; i++) { - byte null_byte=0; - byte true_byte=1; - - field = table->field[i]; - - ptr = (const byte*) row + get_field_offset(table, field); - len = field->pack_length(); - - field_mysql_type = field->type(); - - col_type = prebuilt->table->cols[i].mtype; - - switch (col_type) { - - case DATA_BLOB: - ptr = row_mysql_read_blob_ref(&len, ptr, len); - break; - - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_VARMYSQL: - if (field_mysql_type == MYSQL_TYPE_VARCHAR) { - /* This is a >= 5.0.3 type true VARCHAR where - the real payload data length is stored in - 1 or 2 bytes */ - - ptr = row_mysql_read_true_varchar( - &len, ptr, - (ulint) - (((Field_varstring*)field)->length_bytes)); - - } - - break; - default: - ; - } - /* - if (field->null_ptr && - field_in_record_is_null(table, field, (char*) row)) { - */ - - if (field->is_null_in_record(row)) { - my_md5_input(ctx, &null_byte, 1); - } else { - my_md5_input(ctx, &true_byte, 1); - my_md5_input(ctx, ptr, len); - } - } - - my_md5_result(ctx, digest); - - return(0); -} -#endif /* WITH_WSREP */ -/**********************************************************************//** -Updates a row given as a parameter to a new value. Note that we are given -whole rows, not just the fields which are updated: this incurs some -overhead for CPU when we check which fields are actually updated. -TODO: currently InnoDB does not prevent the 'Halloween problem': -in a searched update a single row can get updated several times -if its index columns are updated! -@return error number or 0 */ -UNIV_INTERN -int -ha_innobase::update_row( -/*====================*/ - const uchar* old_row, /*!< in: old row in MySQL format */ - const uchar* new_row) /*!< in: new row in MySQL format */ -{ - upd_t* uvect; - dberr_t error; - trx_t* trx = thd_to_trx(user_thd); - - DBUG_ENTER("ha_innobase::update_row"); - - ut_a(prebuilt->trx == trx); - - if (high_level_read_only) { - ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } else if (!trx_is_started(trx)) { - ++trx->will_lock; - } - - if (upd_buf == NULL) { - ut_ad(upd_buf_size == 0); - - /* Create a buffer for packing the fields of a record. Why - table->stored_rec_length did not work here? Obviously, because char - fields when packed actually became 1 byte longer, when we also - stored the string length as the first byte. */ - - upd_buf_size = table->s->stored_rec_length + table->s->max_key_length - + MAX_REF_PARTS * 3; - upd_buf = (uchar*) my_malloc(upd_buf_size, MYF(MY_WME)); - if (upd_buf == NULL) { - upd_buf_size = 0; - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - } - } - - ha_statistic_increment(&SSV::ha_update_count); - - if (prebuilt->upd_node) { - uvect = prebuilt->upd_node->update; - } else { - uvect = row_get_prebuilt_update_vector(prebuilt); - } - - /* Build an update vector from the modified fields in the rows - (uses upd_buf of the handle) */ - - error = calc_row_difference(uvect, (uchar*) old_row, new_row, table, - upd_buf, upd_buf_size, prebuilt, user_thd); - - if (error != DB_SUCCESS) { - goto func_exit; - } - - /* This is not a delete */ - prebuilt->upd_node->is_delete = FALSE; - - ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); - - innobase_srv_conc_enter_innodb(trx); - - error = row_update_for_mysql((byte*) old_row, prebuilt); - - /* We need to do some special AUTOINC handling for the following case: - - INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ... - - We need to use the AUTOINC counter that was actually used by - MySQL in the UPDATE statement, which can be different from the - value used in the INSERT statement.*/ - - if (error == DB_SUCCESS - && table->next_number_field - && new_row == table->record[0] - && thd_sql_command(user_thd) == SQLCOM_INSERT - && trx->duplicates) { - - ulonglong auto_inc; - ulonglong col_max_value; - - auto_inc = table->next_number_field->val_int(); - - /* We need the upper limit of the col type to check for - whether we update the table autoinc counter or not. */ - col_max_value = innobase_get_int_col_max_value( - table->next_number_field); - - if (auto_inc <= col_max_value && auto_inc != 0) { - - ulonglong offset; - ulonglong increment; - - offset = prebuilt->autoinc_offset; - increment = prebuilt->autoinc_increment; - - auto_inc = innobase_next_autoinc( - auto_inc, 1, increment, offset, col_max_value); - - error = innobase_set_max_autoinc(auto_inc); - } - } - - innobase_srv_conc_exit_innodb(trx); - -func_exit: - int err = convert_error_code_to_mysql(error, - prebuilt->table->flags, user_thd); - - /* If success and no columns were updated. */ - if (err == 0 && uvect->n_fields == 0) { - - /* This is the same as success, but instructs - MySQL that the row is not really updated and it - should not increase the count of updated rows. - This is fix for http://bugs.mysql.com/29157 */ - err = HA_ERR_RECORD_IS_THE_SAME; - } else if (err == HA_FTS_INVALID_DOCID) { - my_error(HA_FTS_INVALID_DOCID, MYF(0)); - } - - /* Tell InnoDB server that there might be work for - utility threads: */ - - innobase_active_small(); - -#ifdef WITH_WSREP - if (error == DB_SUCCESS && - wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && - wsrep_on(user_thd) && - !wsrep_thd_ignore_table(user_thd)) - { - DBUG_PRINT("wsrep", ("update row key")); - - if (wsrep_append_keys(user_thd, false, old_row, new_row)) { - WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED"); - DBUG_PRINT("wsrep", ("row key failed")); - err = HA_ERR_INTERNAL_ERROR; - goto wsrep_error; - } - } -wsrep_error: -#endif /* WITH_WSREP */ - - DBUG_RETURN(err); -} - -/**********************************************************************//** -Deletes a row given as the parameter. -@return error number or 0 */ -UNIV_INTERN -int -ha_innobase::delete_row( -/*====================*/ - const uchar* record) /*!< in: a row in MySQL format */ -{ - dberr_t error; - trx_t* trx = thd_to_trx(user_thd); - - DBUG_ENTER("ha_innobase::delete_row"); - - ut_a(prebuilt->trx == trx); - - if (high_level_read_only) { - ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } else if (!trx_is_started(trx)) { - ++trx->will_lock; - } - - ha_statistic_increment(&SSV::ha_delete_count); - - if (!prebuilt->upd_node) { - row_get_prebuilt_update_vector(prebuilt); - } - - /* This is a delete */ - - prebuilt->upd_node->is_delete = TRUE; - - innobase_srv_conc_enter_innodb(trx); - - error = row_update_for_mysql((byte*) record, prebuilt); - - innobase_srv_conc_exit_innodb(trx); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - innobase_active_small(); - -#ifdef WITH_WSREP - if (error == DB_SUCCESS && - wsrep_thd_exec_mode(user_thd) == LOCAL_STATE && - wsrep_on(user_thd) && - !wsrep_thd_ignore_table(user_thd)) - { - if (wsrep_append_keys(user_thd, false, record, NULL)) { - DBUG_PRINT("wsrep", ("delete fail")); - error = (dberr_t) HA_ERR_INTERNAL_ERROR; - goto wsrep_error; - } - } -wsrep_error: -#endif /* WITH_WSREP */ - - DBUG_RETURN(convert_error_code_to_mysql( - error, prebuilt->table->flags, user_thd)); -} - -/**********************************************************************//** -Removes a new lock set on a row, if it was not read optimistically. This can -be called after a row has been read in the processing of an UPDATE or a DELETE -query, if the option innodb_locks_unsafe_for_binlog is set. */ -UNIV_INTERN -void -ha_innobase::unlock_row(void) -/*=========================*/ -{ - DBUG_ENTER("ha_innobase::unlock_row"); - - /* Consistent read does not take any locks, thus there is - nothing to unlock. */ - - if (prebuilt->select_lock_type == LOCK_NONE) { - DBUG_VOID_RETURN; - } - - /* Ideally, this assert must be in the beginning of the function. - But there are some calls to this function from the SQL layer when the - transaction is in state TRX_STATE_NOT_STARTED. The check on - prebuilt->select_lock_type above gets around this issue. */ - ut_ad(trx_state_eq(prebuilt->trx, TRX_STATE_ACTIVE)); - - switch (prebuilt->row_read_type) { - case ROW_READ_WITH_LOCKS: - if (!srv_locks_unsafe_for_binlog - && prebuilt->trx->isolation_level - > TRX_ISO_READ_COMMITTED) { - break; - } - /* fall through */ - case ROW_READ_TRY_SEMI_CONSISTENT: - row_unlock_for_mysql(prebuilt, FALSE); - break; - case ROW_READ_DID_SEMI_CONSISTENT: - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - break; - } - - DBUG_VOID_RETURN; -} - -/* See handler.h and row0mysql.h for docs on this function. */ -UNIV_INTERN -bool -ha_innobase::was_semi_consistent_read(void) -/*=======================================*/ -{ - return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT); -} - -/* See handler.h and row0mysql.h for docs on this function. */ -UNIV_INTERN -void -ha_innobase::try_semi_consistent_read(bool yes) -/*===========================================*/ -{ - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - /* Row read type is set to semi consistent read if this was - requested by the MySQL and either innodb_locks_unsafe_for_binlog - option is used or this session is using READ COMMITTED isolation - level. */ - - if (yes - && (srv_locks_unsafe_for_binlog - || prebuilt->trx->isolation_level <= TRX_ISO_READ_COMMITTED)) { - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - } else { - prebuilt->row_read_type = ROW_READ_WITH_LOCKS; - } -} - -/******************************************************************//** -Initializes a handle to use an index. -@return 0 or error number */ -UNIV_INTERN -int -ha_innobase::index_init( -/*====================*/ - uint keynr, /*!< in: key (index) number */ - bool sorted) /*!< in: 1 if result MUST be sorted according to index */ -{ - DBUG_ENTER("index_init"); - - DBUG_RETURN(change_active_index(keynr)); -} - -/******************************************************************//** -Currently does nothing. -@return 0 */ -UNIV_INTERN -int -ha_innobase::index_end(void) -/*========================*/ -{ - int error = 0; - DBUG_ENTER("index_end"); - active_index = MAX_KEY; - in_range_check_pushed_down = FALSE; - ds_mrr.dsmrr_close(); - DBUG_RETURN(error); -} - -/*********************************************************************//** -Converts a search mode flag understood by MySQL to a flag understood -by InnoDB. */ -static inline -ulint -convert_search_mode_to_innobase( -/*============================*/ - enum ha_rkey_function find_flag) -{ - switch (find_flag) { - case HA_READ_KEY_EXACT: - /* this does not require the index to be UNIQUE */ - return(PAGE_CUR_GE); - case HA_READ_KEY_OR_NEXT: - return(PAGE_CUR_GE); - case HA_READ_KEY_OR_PREV: - return(PAGE_CUR_LE); - case HA_READ_AFTER_KEY: - return(PAGE_CUR_G); - case HA_READ_BEFORE_KEY: - return(PAGE_CUR_L); - case HA_READ_PREFIX: - return(PAGE_CUR_GE); - case HA_READ_PREFIX_LAST: - return(PAGE_CUR_LE); - case HA_READ_PREFIX_LAST_OR_PREV: - return(PAGE_CUR_LE); - /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always - pass a complete-field prefix of a key value as the search - tuple. I.e., it is not allowed that the last field would - just contain n first bytes of the full field value. - MySQL uses a 'padding' trick to convert LIKE 'abc%' - type queries so that it can use as a search tuple - a complete-field-prefix of a key value. Thus, the InnoDB - search mode PAGE_CUR_LE_OR_EXTENDS is never used. - TODO: when/if MySQL starts to use also partial-field - prefixes, we have to deal with stripping of spaces - and comparison of non-latin1 char type fields in - innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to - work correctly. */ - case HA_READ_MBR_CONTAIN: - case HA_READ_MBR_INTERSECT: - case HA_READ_MBR_WITHIN: - case HA_READ_MBR_DISJOINT: - case HA_READ_MBR_EQUAL: - return(PAGE_CUR_UNSUPP); - /* do not use "default:" in order to produce a gcc warning: - enumeration value '...' not handled in switch - (if -Wswitch or -Wall is used) */ - } - - my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality"); - - return(PAGE_CUR_UNSUPP); -} - -/* - BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED - --------------------------------------------------- -The following does not cover all the details, but explains how we determine -the start of a new SQL statement, and what is associated with it. - -For each table in the database the MySQL interpreter may have several -table handle instances in use, also in a single SQL query. For each table -handle instance there is an InnoDB 'prebuilt' struct which contains most -of the InnoDB data associated with this table handle instance. - - A) if the user has not explicitly set any MySQL table level locks: - - 1) MySQL calls ::external_lock to set an 'intention' table level lock on -the table of the handle instance. There we set -prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set -true if we are taking this table handle instance to use in a new SQL -statement issued by the user. We also increment trx->n_mysql_tables_in_use. - - 2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search -instructions to prebuilt->template of the table handle instance in -::index_read. The template is used to save CPU time in large joins. - - 3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we -allocate a new consistent read view for the trx if it does not yet have one, -or in the case of a locking read, set an InnoDB 'intention' table level -lock on the table. - - 4) We do the SELECT. MySQL may repeatedly call ::index_read for the -same table handle instance, if it is a join. - - 5) When the SELECT ends, MySQL removes its intention table level locks -in ::external_lock. When trx->n_mysql_tables_in_use drops to zero, - (a) we execute a COMMIT there if the autocommit is on, - (b) we also release possible 'SQL statement level resources' InnoDB may -have for this SQL statement. The MySQL interpreter does NOT execute -autocommit for pure read transactions, though it should. That is why the -table handler in that case has to execute the COMMIT in ::external_lock. - - B) If the user has explicitly set MySQL table level locks, then MySQL -does NOT call ::external_lock at the start of the statement. To determine -when we are at the start of a new SQL statement we at the start of -::index_read also compare the query id to the latest query id where the -table handle instance was used. If it has changed, we know we are at the -start of a new SQL statement. Since the query id can theoretically -overwrap, we use this test only as a secondary way of determining the -start of a new SQL statement. */ - - -/**********************************************************************//** -Positions an index cursor to the index specified in the handle. Fetches the -row if any. -@return 0, HA_ERR_KEY_NOT_FOUND, or error number */ -UNIV_INTERN -int -ha_innobase::index_read( -/*====================*/ - uchar* buf, /*!< in/out: buffer for the returned - row */ - const uchar* key_ptr, /*!< in: key value; if this is NULL - we position the cursor at the - start or end of index; this can - also contain an InnoDB row id, in - which case key_len is the InnoDB - row id length; the key value can - also be a prefix of a full key value, - and the last column can be a prefix - of a full column */ - uint key_len,/*!< in: key value length */ - enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */ -{ - ulint mode; - dict_index_t* index; - ulint match_mode = 0; - int error; - dberr_t ret; - - DBUG_ENTER("index_read"); - DEBUG_SYNC_C("ha_innobase_index_read_begin"); - - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT); - - ha_statistic_increment(&SSV::ha_read_key_count); - - index = prebuilt->index; - - if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) { - prebuilt->index_usable = FALSE; - DBUG_RETURN(HA_ERR_CRASHED); - } - - if (UNIV_UNLIKELY(!prebuilt->index_usable)) { - DBUG_RETURN(dict_index_is_corrupted(index) - ? HA_ERR_INDEX_CORRUPT - : HA_ERR_TABLE_DEF_CHANGED); - } - - if (index->type & DICT_FTS) { - DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); - } - - /* Note that if the index for which the search template is built is not - necessarily prebuilt->index, but can also be the clustered index */ - - if (prebuilt->sql_stat_start) { - build_template(false); - } - - if (key_ptr) { - /* Convert the search key value to InnoDB format into - prebuilt->search_tuple */ - - row_sel_convert_mysql_key_to_innobase( - prebuilt->search_tuple, - prebuilt->srch_key_val1, - prebuilt->srch_key_val_len, - index, - (byte*) key_ptr, - (ulint) key_len, - prebuilt->trx); - DBUG_ASSERT(prebuilt->search_tuple->n_fields > 0); - } else { - /* We position the cursor to the last or the first entry - in the index */ - - dtuple_set_n_fields(prebuilt->search_tuple, 0); - } - - mode = convert_search_mode_to_innobase(find_flag); - - match_mode = 0; - - if (find_flag == HA_READ_KEY_EXACT) { - - match_mode = ROW_SEL_EXACT; - - } else if (find_flag == HA_READ_PREFIX - || find_flag == HA_READ_PREFIX_LAST) { - - match_mode = ROW_SEL_EXACT_PREFIX; - } - - last_match_mode = (uint) match_mode; - - if (mode != PAGE_CUR_UNSUPP) { - - innobase_srv_conc_enter_innodb(prebuilt->trx); - - ret = row_search_for_mysql((byte*) buf, mode, prebuilt, - match_mode, 0); - - innobase_srv_conc_exit_innodb(prebuilt->trx); - } else { - - ret = DB_UNSUPPORTED; - } - - switch (ret) { - case DB_SUCCESS: - error = 0; - table->status = 0; - if (prebuilt->table->is_system_db) { - srv_stats.n_system_rows_read.add( - (size_t) prebuilt->trx->id, 1); - } else { - srv_stats.n_rows_read.add( - (size_t) prebuilt->trx->id, 1); - } - break; - case DB_RECORD_NOT_FOUND: - error = HA_ERR_KEY_NOT_FOUND; - table->status = STATUS_NOT_FOUND; - break; - case DB_END_OF_INDEX: - error = HA_ERR_KEY_NOT_FOUND; - table->status = STATUS_NOT_FOUND; - break; - case DB_TABLESPACE_DELETED: - - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_DISCARDED, - table->s->table_name.str); - - table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; - break; - case DB_TABLESPACE_NOT_FOUND: - - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_MISSING, MYF(0), - table->s->table_name.str); - - table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; - break; - default: - error = convert_error_code_to_mysql( - ret, prebuilt->table->flags, user_thd); - - table->status = STATUS_NOT_FOUND; - break; - } - - DBUG_RETURN(error); -} - -/*******************************************************************//** -The following functions works like index_read, but it find the last -row with the current key value or prefix. -@return 0, HA_ERR_KEY_NOT_FOUND, or an error code */ -UNIV_INTERN -int -ha_innobase::index_read_last( -/*=========================*/ - uchar* buf, /*!< out: fetched row */ - const uchar* key_ptr,/*!< in: key value, or a prefix of a full - key value */ - uint key_len)/*!< in: length of the key val or prefix - in bytes */ -{ - return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST)); -} - -/********************************************************************//** -Get the index for a handle. Does not change active index. -@return NULL or index instance. */ -UNIV_INTERN -dict_index_t* -ha_innobase::innobase_get_index( -/*============================*/ - uint keynr) /*!< in: use this index; MAX_KEY means always - clustered index, even if it was internally - generated by InnoDB */ -{ - KEY* key = 0; - dict_index_t* index = 0; - - DBUG_ENTER("innobase_get_index"); - - if (keynr != MAX_KEY && table->s->keys > 0) { - key = table->key_info + keynr; - - index = innobase_index_lookup(share, keynr); - - if (index) { - - if (!key || ut_strcmp(index->name, key->name) != 0) { - fprintf(stderr, "InnoDB: [Error] Index for key no %u" - " mysql name %s , InnoDB name %s for table %s\n", - keynr, key ? key->name : "NULL", - index->name, - prebuilt->table->name); - - for(ulint i=0; i < table->s->keys; i++) { - index = innobase_index_lookup(share, i); - key = table->key_info + keynr; - - if (index) { - - fprintf(stderr, "InnoDB: [Note] Index for key no %u" - " mysql name %s , InnoDB name %s for table %s\n", - keynr, key ? key->name : "NULL", - index->name, - prebuilt->table->name); - } - } - } - - ut_a(ut_strcmp(index->name, key->name) == 0); - } else { - /* Can't find index with keynr in the translation - table. Only print message if the index translation - table exists */ - if (share->idx_trans_tbl.index_mapping) { - sql_print_warning("InnoDB could not find " - "index %s key no %u for " - "table %s through its " - "index translation table", - key ? key->name : "NULL", - keynr, - prebuilt->table->name); - } - - index = dict_table_get_index_on_name(prebuilt->table, - key->name); - } - } else { - index = dict_table_get_first_index(prebuilt->table); - } - - if (!index) { - sql_print_error( - "Innodb could not find key n:o %u with name %s " - "from dict cache for table %s", - keynr, key ? key->name : "NULL", - prebuilt->table->name); - } - - DBUG_RETURN(index); -} - -/********************************************************************//** -Changes the active index of a handle. -@return 0 or error code */ -UNIV_INTERN -int -ha_innobase::change_active_index( -/*=============================*/ - uint keynr) /*!< in: use this index; MAX_KEY means always clustered - index, even if it was internally generated by - InnoDB */ -{ - DBUG_ENTER("change_active_index"); - - ut_ad(user_thd == ha_thd()); - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - - active_index = keynr; - - prebuilt->index = innobase_get_index(keynr); - - if (UNIV_UNLIKELY(!prebuilt->index)) { - sql_print_warning("InnoDB: change_active_index(%u) failed", - keynr); - prebuilt->index_usable = FALSE; - DBUG_RETURN(1); - } - - prebuilt->index_usable = row_merge_is_index_usable(prebuilt->trx, - prebuilt->index); - - if (UNIV_UNLIKELY(!prebuilt->index_usable)) { - if (dict_index_is_corrupted(prebuilt->index)) { - char index_name[MAX_FULL_NAME_LEN + 1]; - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - index_name, sizeof index_name, - prebuilt->index->name, TRUE); - - innobase_format_name( - table_name, sizeof table_name, - prebuilt->index->table->name, FALSE); - - push_warning_printf( - user_thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_INDEX_CORRUPT, - "InnoDB: Index %s for table %s is" - " marked as corrupted", - index_name, table_name); - DBUG_RETURN(HA_ERR_INDEX_CORRUPT); - } else { - push_warning_printf( - user_thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_TABLE_DEF_CHANGED, - "InnoDB: insufficient history for index %u", - keynr); - } - - /* The caller seems to ignore this. Thus, we must check - this again in row_search_for_mysql(). */ - DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY, - 0, NULL)); - } - - ut_a(prebuilt->search_tuple != 0); - - dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields); - - dict_index_copy_types(prebuilt->search_tuple, prebuilt->index, - prebuilt->index->n_fields); - - /* MySQL changes the active index for a handle also during some - queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX() - and then calculates the sum. Previously we played safe and used - the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary - copying. Starting from MySQL-4.1 we use a more efficient flag here. */ - - build_template(false); - - DBUG_RETURN(0); -} - -/**********************************************************************//** -Positions an index cursor to the index specified in keynr. Fetches the -row if any. -??? This is only used to read whole keys ??? -@return error number or 0 */ -UNIV_INTERN -int -ha_innobase::index_read_idx( -/*========================*/ - uchar* buf, /*!< in/out: buffer for the returned - row */ - uint keynr, /*!< in: use this index */ - const uchar* key, /*!< in: key value; if this is NULL - we position the cursor at the - start or end of index */ - uint key_len, /*!< in: key value length */ - enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */ -{ - if (change_active_index(keynr)) { - - return(1); - } - - return(index_read(buf, key, key_len, find_flag)); -} - -/***********************************************************************//** -Reads the next or previous row from a cursor, which must have previously been -positioned using index_read. -@return 0, HA_ERR_END_OF_FILE, or error number */ -UNIV_INTERN -int -ha_innobase::general_fetch( -/*=======================*/ - uchar* buf, /*!< in/out: buffer for next row in MySQL - format */ - uint direction, /*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */ - uint match_mode) /*!< in: 0, ROW_SEL_EXACT, or - ROW_SEL_EXACT_PREFIX */ -{ - dberr_t ret; - int error; - - DBUG_ENTER("general_fetch"); - - /* If transaction is not startted do not continue, instead return a error code. */ - if(!(prebuilt->sql_stat_start || (prebuilt->trx && prebuilt->trx->state == 1))) { - DBUG_RETURN(HA_ERR_END_OF_FILE); - } - - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - - if (prebuilt->table->is_readable()) { - } else { - if (prebuilt->table->corrupted) { - DBUG_RETURN(HA_ERR_CRASHED); - } else { - FilSpace space(prebuilt->table->space, true); - - if (space()) { - DBUG_RETURN(HA_ERR_DECRYPTION_FAILED); - } else { - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); - } - } - } - - innobase_srv_conc_enter_innodb(prebuilt->trx); - - ret = row_search_for_mysql( - (byte*) buf, 0, prebuilt, match_mode, direction); - - innobase_srv_conc_exit_innodb(prebuilt->trx); - - switch (ret) { - case DB_SUCCESS: - error = 0; - table->status = 0; - srv_stats.n_rows_read.add((size_t) prebuilt->trx->id, 1); - break; - case DB_RECORD_NOT_FOUND: - error = HA_ERR_END_OF_FILE; - table->status = STATUS_NOT_FOUND; - break; - case DB_END_OF_INDEX: - error = HA_ERR_END_OF_FILE; - table->status = STATUS_NOT_FOUND; - break; - case DB_TABLESPACE_DELETED: - - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_DISCARDED, - table->s->table_name.str); - - table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; - break; - case DB_TABLESPACE_NOT_FOUND: - - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_MISSING, - table->s->table_name.str); - - table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; - break; - default: - error = convert_error_code_to_mysql( - ret, prebuilt->table->flags, user_thd); - - table->status = STATUS_NOT_FOUND; - break; - } - - DBUG_RETURN(error); -} - -/***********************************************************************//** -Reads the next row from a cursor, which must have previously been -positioned using index_read. -@return 0, HA_ERR_END_OF_FILE, or error number */ -UNIV_INTERN -int -ha_innobase::index_next( -/*====================*/ - uchar* buf) /*!< in/out: buffer for next row in MySQL - format */ -{ - return(general_fetch(buf, ROW_SEL_NEXT, 0)); -} - -/*******************************************************************//** -Reads the next row matching to the key value given as the parameter. -@return 0, HA_ERR_END_OF_FILE, or error number */ -UNIV_INTERN -int -ha_innobase::index_next_same( -/*=========================*/ - uchar* buf, /*!< in/out: buffer for the row */ - const uchar* key, /*!< in: key value */ - uint keylen) /*!< in: key value length */ -{ - return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode)); -} - -/***********************************************************************//** -Reads the previous row from a cursor, which must have previously been -positioned using index_read. -@return 0, HA_ERR_END_OF_FILE, or error number */ -UNIV_INTERN -int -ha_innobase::index_prev( -/*====================*/ - uchar* buf) /*!< in/out: buffer for previous row in MySQL format */ -{ - return(general_fetch(buf, ROW_SEL_PREV, 0)); -} - -/********************************************************************//** -Positions a cursor on the first record in an index and reads the -corresponding row to buf. -@return 0, HA_ERR_END_OF_FILE, or error code */ -UNIV_INTERN -int -ha_innobase::index_first( -/*=====================*/ - uchar* buf) /*!< in/out: buffer for the row */ -{ - int error; - - DBUG_ENTER("index_first"); - - error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY); - - /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */ - - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - - DBUG_RETURN(error); -} - -/********************************************************************//** -Positions a cursor on the last record in an index and reads the -corresponding row to buf. -@return 0, HA_ERR_END_OF_FILE, or error code */ -UNIV_INTERN -int -ha_innobase::index_last( -/*====================*/ - uchar* buf) /*!< in/out: buffer for the row */ -{ - int error; - - DBUG_ENTER("index_last"); - - error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY); - - /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */ - - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - - DBUG_RETURN(error); -} - -/****************************************************************//** -Initialize a table scan. -@return 0 or error number */ -UNIV_INTERN -int -ha_innobase::rnd_init( -/*==================*/ - bool scan) /*!< in: TRUE if table/index scan FALSE otherwise */ -{ - int err; - - /* Store the active index value so that we can restore the original - value after a scan */ - - if (prebuilt->clust_index_was_generated) { - err = change_active_index(MAX_KEY); - } else { - err = change_active_index(primary_key); - } - - /* Don't use semi-consistent read in random row reads (by position). - This means we must disable semi_consistent_read if scan is false */ - - if (!scan) { - try_semi_consistent_read(0); - } - - start_of_scan = 1; - - return(err); -} - -/*****************************************************************//** -Ends a table scan. -@return 0 or error number */ -UNIV_INTERN -int -ha_innobase::rnd_end(void) -/*======================*/ -{ - return(index_end()); -} - -/*****************************************************************//** -Reads the next row in a table scan (also used to read the FIRST row -in a table scan). -@return 0, HA_ERR_END_OF_FILE, or error number */ -UNIV_INTERN -int -ha_innobase::rnd_next( -/*==================*/ - uchar* buf) /*!< in/out: returns the row in this buffer, - in MySQL format */ -{ - int error; - - DBUG_ENTER("rnd_next"); - - if (start_of_scan) { - error = index_first(buf); - - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - - start_of_scan = 0; - } else { - error = general_fetch(buf, ROW_SEL_NEXT, 0); - } - - DBUG_RETURN(error); -} - -/**********************************************************************//** -Fetches a row from the table based on a row reference. -@return 0, HA_ERR_KEY_NOT_FOUND, or error code */ -UNIV_INTERN -int -ha_innobase::rnd_pos( -/*=================*/ - uchar* buf, /*!< in/out: buffer for the row */ - uchar* pos) /*!< in: primary key value of the row in the - MySQL format, or the row id if the clustered - index was internally generated by InnoDB; the - length of data in pos has to be ref_length */ -{ - int error; - DBUG_ENTER("rnd_pos"); - DBUG_DUMP("key", pos, ref_length); - - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - /* Note that we assume the length of the row reference is fixed - for the table, and it is == ref_length */ - - error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT); - - if (error) { - DBUG_PRINT("error", ("Got error: %d", error)); - } - - DBUG_RETURN(error); -} - -/**********************************************************************//** -Initialize FT index scan -@return 0 or error number */ -UNIV_INTERN -int -ha_innobase::ft_init() -/*==================*/ -{ - DBUG_ENTER("ft_init"); - - trx_t* trx = check_trx_exists(ha_thd()); - - /* FTS queries are not treated as autocommit non-locking selects. - This is because the FTS implementation can acquire locks behind - the scenes. This has not been verified but it is safer to treat - them as regular read only transactions for now. */ - - if (!trx_is_started(trx)) { - ++trx->will_lock; - } - - DBUG_RETURN(rnd_init(false)); -} - -/**********************************************************************//** -Initialize FT index scan -@return FT_INFO structure if successful or NULL */ -UNIV_INTERN -FT_INFO* -ha_innobase::ft_init_ext( -/*=====================*/ - uint flags, /* in: */ - uint keynr, /* in: */ - String* key) /* in: */ -{ - trx_t* trx; - dict_table_t* ft_table; - dberr_t error; - byte* query = (byte*) key->ptr(); - ulint query_len = key->length(); - const CHARSET_INFO* char_set = key->charset(); - NEW_FT_INFO* fts_hdl = NULL; - dict_index_t* index; - fts_result_t* result; - char buf_tmp[8192]; - ulint buf_tmp_used; - uint num_errors; - - if (fts_enable_diag_print) { - fprintf(stderr, "keynr=%u, '%.*s'\n", - keynr, (int) key->length(), (byte*) key->ptr()); - - if (flags & FT_BOOL) { - fprintf(stderr, "BOOL search\n"); - } else { - fprintf(stderr, "NL search\n"); - } - } - - /* FIXME: utf32 and utf16 are not compatible with some - string function used. So to convert them to uft8 before - proceed. */ - if (strcmp(char_set->csname, "utf32") == 0 - || strcmp(char_set->csname, "utf16") == 0) { - buf_tmp_used = innobase_convert_string( - buf_tmp, sizeof(buf_tmp) - 1, - &my_charset_utf8_general_ci, - query, query_len, (CHARSET_INFO*) char_set, - &num_errors); - - query = (byte*) buf_tmp; - query_len = buf_tmp_used; - query[query_len] = 0; - } - - trx = prebuilt->trx; - - /* FTS queries are not treated as autocommit non-locking selects. - This is because the FTS implementation can acquire locks behind - the scenes. This has not been verified but it is safer to treat - them as regular read only transactions for now. */ - - if (!trx_is_started(trx)) { - ++trx->will_lock; - } - - ft_table = prebuilt->table; - - /* Table does not have an FTS index */ - if (!ft_table->fts || ib_vector_is_empty(ft_table->fts->indexes)) { - my_error(ER_TABLE_HAS_NO_FT, MYF(0)); - return(NULL); - } - - /* If tablespace is discarded, we should return here */ - if (dict_table_is_discarded(ft_table)) { - my_error(ER_NO_SUCH_TABLE, MYF(0), table->s->db.str, - table->s->table_name.str); - return(NULL); - } - - if (keynr == NO_SUCH_KEY) { - /* FIXME: Investigate the NO_SUCH_KEY usage */ - index = (dict_index_t*) ib_vector_getp(ft_table->fts->indexes, 0); - } else { - index = innobase_get_index(keynr); - } - - if (!index || index->type != DICT_FTS) { - my_error(ER_TABLE_HAS_NO_FT, MYF(0)); - return(NULL); - } - - if (!(ft_table->fts->fts_status & ADDED_TABLE_SYNCED)) { - fts_init_index(ft_table, FALSE); - - ft_table->fts->fts_status |= ADDED_TABLE_SYNCED; - } - - error = fts_query(trx, index, flags, query, query_len, &result); - - if (error != DB_SUCCESS) { - my_error(convert_error_code_to_mysql(error, 0, NULL), - MYF(0)); - return(NULL); - } - - /* Allocate FTS handler, and instantiate it before return */ - fts_hdl = static_cast<NEW_FT_INFO*>(my_malloc(sizeof(NEW_FT_INFO), - MYF(0))); - - fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result); - fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result); - fts_hdl->ft_prebuilt = prebuilt; - fts_hdl->ft_result = result; - - /* FIXME: Re-evluate the condition when Bug 14469540 - is resolved */ - prebuilt->in_fts_query = true; - - return((FT_INFO*) fts_hdl); -} - -/*****************************************************************//** -Set up search tuple for a query through FTS_DOC_ID_INDEX on -supplied Doc ID. This is used by MySQL to retrieve the documents -once the search result (Doc IDs) is available */ -static -void -innobase_fts_create_doc_id_key( -/*===========================*/ - dtuple_t* tuple, /* in/out: prebuilt->search_tuple */ - const dict_index_t* - index, /* in: index (FTS_DOC_ID_INDEX) */ - doc_id_t* doc_id) /* in/out: doc id to search, value - could be changed to storage format - used for search. */ -{ - doc_id_t temp_doc_id; - dfield_t* dfield = dtuple_get_nth_field(tuple, 0); - - ut_a(dict_index_get_n_unique(index) == 1); - - dtuple_set_n_fields(tuple, index->n_fields); - dict_index_copy_types(tuple, index, index->n_fields); - -#ifdef UNIV_DEBUG - /* The unique Doc ID field should be an eight-bytes integer */ - dict_field_t* field = dict_index_get_nth_field(index, 0); - ut_a(field->col->mtype == DATA_INT); - ut_ad(sizeof(*doc_id) == field->fixed_len); - ut_ad(innobase_strcasecmp(index->name, FTS_DOC_ID_INDEX_NAME) == 0); -#endif /* UNIV_DEBUG */ - - /* Convert to storage byte order */ - mach_write_to_8(reinterpret_cast<byte*>(&temp_doc_id), *doc_id); - *doc_id = temp_doc_id; - dfield_set_data(dfield, doc_id, sizeof(*doc_id)); - - dtuple_set_n_fields_cmp(tuple, 1); - - for (ulint i = 1; i < index->n_fields; i++) { - dfield = dtuple_get_nth_field(tuple, i); - dfield_set_null(dfield); - } -} - -/**********************************************************************//** -Fetch next result from the FT result set -@return error code */ -UNIV_INTERN -int -ha_innobase::ft_read( -/*=================*/ - uchar* buf) /*!< in/out: buf contain result row */ -{ - fts_result_t* result; - int error; - row_prebuilt_t* ft_prebuilt; - - ft_prebuilt = ((NEW_FT_INFO*) ft_handler)->ft_prebuilt; - - ut_a(ft_prebuilt == prebuilt); - - result = ((NEW_FT_INFO*) ft_handler)->ft_result; - - if (result->current == NULL) { - /* This is the case where the FTS query did not - contain and matching documents. */ - if (result->rankings_by_id != NULL) { - /* Now that we have the complete result, we - need to sort the document ids on their rank - calculation. */ - - fts_query_sort_result_on_rank(result); - - result->current = const_cast<ib_rbt_node_t*>( - rbt_first(result->rankings_by_rank)); - } else { - ut_a(result->current == NULL); - } - } else { - result->current = const_cast<ib_rbt_node_t*>( - rbt_next(result->rankings_by_rank, result->current)); - } - -next_record: - - if (result->current != NULL) { - dict_index_t* index; - dtuple_t* tuple = prebuilt->search_tuple; - doc_id_t search_doc_id; - - /* If we only need information from result we can return - without fetching the table row */ - if (ft_prebuilt->read_just_key) { - table->status= 0; - return(0); - } - - index = dict_table_get_index_on_name( - prebuilt->table, FTS_DOC_ID_INDEX_NAME); - - /* Must find the index */ - ut_a(index); - - /* Switch to the FTS doc id index */ - prebuilt->index = index; - - fts_ranking_t* ranking = rbt_value( - fts_ranking_t, result->current); - - search_doc_id = ranking->doc_id; - - /* We pass a pointer of search_doc_id because it will be - converted to storage byte order used in the search - tuple. */ - innobase_fts_create_doc_id_key(tuple, index, &search_doc_id); - - innobase_srv_conc_enter_innodb(prebuilt->trx); - - dberr_t ret = row_search_for_mysql( - (byte*) buf, PAGE_CUR_GE, prebuilt, ROW_SEL_EXACT, 0); - - innobase_srv_conc_exit_innodb(prebuilt->trx); - - switch (ret) { - case DB_SUCCESS: - error = 0; - table->status = 0; - break; - case DB_RECORD_NOT_FOUND: - result->current = const_cast<ib_rbt_node_t*>( - rbt_next(result->rankings_by_rank, - result->current)); - - if (!result->current) { - /* exhaust the result set, should return - HA_ERR_END_OF_FILE just like - ha_innobase::general_fetch() and/or - ha_innobase::index_first() etc. */ - error = HA_ERR_END_OF_FILE; - table->status = STATUS_NOT_FOUND; - } else { - goto next_record; - } - break; - case DB_END_OF_INDEX: - error = HA_ERR_END_OF_FILE; - table->status = STATUS_NOT_FOUND; - break; - case DB_TABLESPACE_DELETED: - - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_DISCARDED, - table->s->table_name.str); - - table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; - break; - case DB_TABLESPACE_NOT_FOUND: - - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_MISSING, - table->s->table_name.str); - - table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; - break; - default: - error = convert_error_code_to_mysql( - ret, 0, user_thd); - - table->status = STATUS_NOT_FOUND; - break; - } - - return(error); - } - - return(HA_ERR_END_OF_FILE); -} - -/************************************************************************* -*/ - -void -ha_innobase::ft_end() -{ - fprintf(stderr, "ft_end()\n"); - - rnd_end(); -} -#ifdef WITH_WSREP -extern dict_index_t* -wsrep_dict_foreign_find_index( - dict_table_t* table, - const char** col_names, - const char** columns, - ulint n_cols, - dict_index_t* types_idx, - ibool check_charsets, - ulint check_null); - - -extern dberr_t -wsrep_append_foreign_key( -/*===========================*/ - trx_t* trx, /*!< in: trx */ - dict_foreign_t* foreign, /*!< in: foreign key constraint */ - const rec_t* rec, /*!<in: clustered index record */ - dict_index_t* index, /*!<in: clustered index */ - ibool referenced, /*!<in: is check for referenced table */ - ibool shared) /*!<in: is shared access */ -{ - ut_a(trx); - THD* thd = (THD*)trx->mysql_thd; - ulint rcode = DB_SUCCESS; - char cache_key[513] = {'\0'}; - int cache_key_len; - bool const copy = true; - - if (!wsrep_on(trx->mysql_thd) || - wsrep_thd_exec_mode(thd) != LOCAL_STATE) - return DB_SUCCESS; - - if (!thd || !foreign || - (!foreign->referenced_table && !foreign->foreign_table)) - { - WSREP_INFO("FK: %s missing in: %s", - (!thd) ? "thread" : - ((!foreign) ? "constraint" : - ((!foreign->referenced_table) ? - "referenced table" : "foreign table")), - (thd && wsrep_thd_query(thd)) ? - wsrep_thd_query(thd) : "void"); - return DB_ERROR; - } - - if ( !((referenced) ? - foreign->referenced_table : foreign->foreign_table)) - { - WSREP_DEBUG("pulling %s table into cache", - (referenced) ? "referenced" : "foreign"); - mutex_enter(&(dict_sys->mutex)); - if (referenced) - { - foreign->referenced_table = - dict_table_get_low( - foreign->referenced_table_name_lookup); - if (foreign->referenced_table) - { - foreign->referenced_index = - wsrep_dict_foreign_find_index( - foreign->referenced_table, NULL, - foreign->referenced_col_names, - foreign->n_fields, - foreign->foreign_index, - TRUE, FALSE); - } - } - else - { - foreign->foreign_table = - dict_table_get_low( - foreign->foreign_table_name_lookup); - if (foreign->foreign_table) - { - foreign->foreign_index = - wsrep_dict_foreign_find_index( - foreign->foreign_table, NULL, - foreign->foreign_col_names, - foreign->n_fields, - foreign->referenced_index, - TRUE, FALSE); - } - } - mutex_exit(&(dict_sys->mutex)); - } - - if ( !((referenced) ? - foreign->referenced_table : foreign->foreign_table)) - { - WSREP_WARN("FK: %s missing in query: %s", - (!foreign->referenced_table) ? - "referenced table" : "foreign table", - (wsrep_thd_query(thd)) ? - wsrep_thd_query(thd) : "void"); - return DB_ERROR; - } - byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; - ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH; - - dict_index_t *idx_target = (referenced) ? - foreign->referenced_index : index; - dict_index_t *idx = (referenced) ? - UT_LIST_GET_FIRST(foreign->referenced_table->indexes) : - UT_LIST_GET_FIRST(foreign->foreign_table->indexes); - int i = 0; - while (idx != NULL && idx != idx_target) { - if (innobase_strcasecmp (idx->name, innobase_index_reserve_name) != 0) { - i++; - } - idx = UT_LIST_GET_NEXT(indexes, idx); - } - ut_a(idx); - key[0] = (char)i; - - rcode = wsrep_rec_get_foreign_key( - &key[1], &len, rec, index, idx, - wsrep_protocol_version > 1); - if (rcode != DB_SUCCESS) { - WSREP_ERROR( - "FK key set failed: %lu (%lu %lu), index: %s %s, %s", - rcode, referenced, shared, - (index && index->name) ? index->name : - "void index", - (index && index->table_name) ? index->table_name : - "void table", - wsrep_thd_query(thd)); - return DB_ERROR; - } - strncpy(cache_key, - (wsrep_protocol_version > 1) ? - ((referenced) ? - foreign->referenced_table->name : - foreign->foreign_table->name) : - foreign->foreign_table->name, sizeof(cache_key) - 1); - cache_key_len = strlen(cache_key); -#ifdef WSREP_DEBUG_PRINT - ulint j; - fprintf(stderr, "FK parent key, table: %s %s len: %lu ", - cache_key, (shared) ? "shared" : "exclusive", len+1); - for (j=0; j<len+1; j++) { - fprintf(stderr, " %hhX, ", key[j]); - } - fprintf(stderr, "\n"); -#endif - char *p = strchr(cache_key, '/'); - if (p) { - *p = '\0'; - } else { - WSREP_WARN("unexpected foreign key table %s %s", - foreign->referenced_table->name, - foreign->foreign_table->name); - } - - wsrep_buf_t wkey_part[3]; - wsrep_key_t wkey = {wkey_part, 3}; - if (!wsrep_prepare_key( - (const uchar*)cache_key, - cache_key_len + 1, - (const uchar*)key, len+1, - wkey_part, - (size_t*)&wkey.key_parts_num)) { - WSREP_WARN("key prepare failed for cascaded FK: %s", - (wsrep_thd_query(thd)) ? - wsrep_thd_query(thd) : "void"); - return DB_ERROR; - } - wsrep_t *wsrep= get_wsrep(); - rcode = (int)wsrep->append_key( - wsrep, - wsrep_ws_handle(thd, trx), - &wkey, - 1, - shared ? WSREP_KEY_SHARED : WSREP_KEY_EXCLUSIVE, - copy); - if (rcode) { - DBUG_PRINT("wsrep", ("row key failed: %lu", rcode)); - WSREP_ERROR("Appending cascaded fk row key failed: %s, %lu", - (wsrep_thd_query(thd)) ? - wsrep_thd_query(thd) : "void", rcode); - return DB_ERROR; - } - - return DB_SUCCESS; -} - -static int -wsrep_append_key( -/*==================*/ - THD *thd, - trx_t *trx, - TABLE_SHARE *table_share, - TABLE *table, - const char* key, - uint16_t key_len, - bool shared -) -{ - DBUG_ENTER("wsrep_append_key"); - bool const copy = true; -#ifdef WSREP_DEBUG_PRINT - fprintf(stderr, "%s conn %ld, trx %llu, keylen %d, table %s\n Query: %s ", - (shared) ? "Shared" : "Exclusive", - thd_get_thread_id(thd), (long long)trx->id, key_len, - table_share->table_name.str, wsrep_thd_query(thd)); - for (int i=0; i<key_len; i++) { - fprintf(stderr, "%hhX, ", key[i]); - } - fprintf(stderr, "\n"); -#endif - wsrep_buf_t wkey_part[3]; - wsrep_key_t wkey = {wkey_part, 3}; - if (!wsrep_prepare_key( - (const uchar*)table_share->table_cache_key.str, - table_share->table_cache_key.length, - (const uchar*)key, key_len, - wkey_part, - (size_t*)&wkey.key_parts_num)) { - WSREP_WARN("key prepare failed for: %s", - (wsrep_thd_query(thd)) ? - wsrep_thd_query(thd) : "void"); - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - - wsrep_t *wsrep= get_wsrep(); - int rcode = (int)wsrep->append_key( - wsrep, - wsrep_ws_handle(thd, trx), - &wkey, - 1, - shared ? WSREP_KEY_SHARED : WSREP_KEY_EXCLUSIVE, - copy); - if (rcode) { - DBUG_PRINT("wsrep", ("row key failed: %d", rcode)); - WSREP_WARN("Appending row key failed: %s, %d", - (wsrep_thd_query(thd)) ? - wsrep_thd_query(thd) : "void", rcode); - DBUG_RETURN(HA_ERR_INTERNAL_ERROR); - } - DBUG_RETURN(0); -} - -static bool -referenced_by_foreign_key2(dict_table_t* table, - dict_index_t* index) { - ut_ad(table != NULL); - ut_ad(index != NULL); - - const dict_foreign_set* fks = &table->referenced_set; - for (dict_foreign_set::const_iterator it = fks->begin(); - it != fks->end(); - ++it) - { - dict_foreign_t* foreign = *it; - if (foreign->referenced_index != index) { - continue; - } - ut_ad(table == foreign->referenced_table); - return true; - } - return false; -} - -int -ha_innobase::wsrep_append_keys( -/*==================*/ - THD *thd, - bool shared, - const uchar* record0, /* in: row in MySQL format */ - const uchar* record1) /* in: row in MySQL format */ -{ - int rcode; - DBUG_ENTER("wsrep_append_keys"); - - bool key_appended = false; - trx_t *trx = thd_to_trx(thd); - - if (table_share && table_share->tmp_table != NO_TMP_TABLE) { - WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s", - thd_get_thread_id(thd), - table_share->tmp_table, - (wsrep_thd_query(thd)) ? - wsrep_thd_query(thd) : "void"); - DBUG_RETURN(0); - } - - if (wsrep_protocol_version == 0) { - uint len; - char keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; - char *key = &keyval[0]; - ibool is_null; - - len = wsrep_store_key_val_for_row( - thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH, - record0, prebuilt, &is_null); - - if (!is_null) { - rcode = wsrep_append_key( - thd, trx, table_share, table, keyval, - len, shared); - if (rcode) DBUG_RETURN(rcode); - } - else - { - WSREP_DEBUG("NULL key skipped (proto 0): %s", - wsrep_thd_query(thd)); - } - } else { - ut_a(table->s->keys <= 256); - uint i; - bool hasPK= false; - - for (i=0; i<table->s->keys; ++i) { - KEY* key_info = table->key_info + i; - if (key_info->flags & HA_NOSAME) { - hasPK = true; - } - } - - for (i=0; i<table->s->keys; ++i) { - uint len; - char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; - char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'}; - char* key0 = &keyval0[1]; - char* key1 = &keyval1[1]; - KEY* key_info = table->key_info + i; - ibool is_null; - - dict_index_t* idx = innobase_get_index(i); - dict_table_t* tab = (idx) ? idx->table : NULL; - - keyval0[0] = (char)i; - keyval1[0] = (char)i; - - if (!tab) { - WSREP_WARN("MySQL-InnoDB key mismatch %s %s", - table->s->table_name.str, - key_info->name); - } - /* !hasPK == table with no PK, must append all non-unique keys */ - if (!hasPK || key_info->flags & HA_NOSAME || - ((tab && - referenced_by_foreign_key2(tab, idx)) || - (!tab && referenced_by_foreign_key()))) { - - len = wsrep_store_key_val_for_row( - thd, table, i, key0, - WSREP_MAX_SUPPORTED_KEY_LENGTH, - record0, prebuilt, &is_null); - if (!is_null) { - rcode = wsrep_append_key( - thd, trx, table_share, table, - keyval0, len+1, shared); - if (rcode) DBUG_RETURN(rcode); - - if (key_info->flags & HA_NOSAME || shared) - key_appended = true; - } - else - { - WSREP_DEBUG("NULL key skipped: %s", - wsrep_thd_query(thd)); - } - if (record1) { - len = wsrep_store_key_val_for_row( - thd, table, i, key1, - WSREP_MAX_SUPPORTED_KEY_LENGTH, - record1, prebuilt, &is_null); - if (!is_null && memcmp(key0, key1, len)) { - rcode = wsrep_append_key( - thd, trx, table_share, - table, - keyval1, len+1, shared); - if (rcode) DBUG_RETURN(rcode); - } - } - } - } - } - - /* if no PK, calculate hash of full row, to be the key value */ - if (!key_appended && wsrep_certify_nonPK) { - uchar digest[16]; - int rcode; - - wsrep_calc_row_hash(digest, record0, table, prebuilt, thd); - if ((rcode = wsrep_append_key(thd, trx, table_share, table, - (const char*) digest, 16, - shared))) { - DBUG_RETURN(rcode); - } - - if (record1) { - wsrep_calc_row_hash( - digest, record1, table, prebuilt, thd); - if ((rcode = wsrep_append_key(thd, trx, table_share, - table, - (const char*) digest, - 16, shared))) { - DBUG_RETURN(rcode); - } - } - DBUG_RETURN(0); - } - - DBUG_RETURN(0); -} -#endif /* WITH_WSREP */ - -/*********************************************************************//** -Stores a reference to the current row to 'ref' field of the handle. Note -that in the case where we have generated the clustered index for the -table, the function parameter is illogical: we MUST ASSUME that 'record' -is the current 'position' of the handle, because if row ref is actually -the row id internally generated in InnoDB, then 'record' does not contain -it. We just guess that the row id must be for the record where the handle -was positioned the last time. */ -UNIV_INTERN -void -ha_innobase::position( -/*==================*/ - const uchar* record) /*!< in: row in MySQL format */ -{ - uint len; - - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - if (prebuilt->clust_index_was_generated) { - /* No primary key was defined for the table and we - generated the clustered index from row id: the - row reference will be the row id, not any key value - that MySQL knows of */ - - len = DATA_ROW_ID_LEN; - - memcpy(ref, prebuilt->row_id, len); - } else { - len = store_key_val_for_row(primary_key, (char*) ref, - ref_length, record); - } - - /* We assume that the 'ref' value len is always fixed for the same - table. */ - - if (len != ref_length) { - sql_print_error("Stored ref len is %lu, but table ref len is " - "%lu", (ulong) len, (ulong) ref_length); - } -} - -/*****************************************************************//** -Check whether there exist a column named as "FTS_DOC_ID", which is -reserved for InnoDB FTS Doc ID -@return true if there exist a "FTS_DOC_ID" column */ -static -bool -create_table_check_doc_id_col( -/*==========================*/ - trx_t* trx, /*!< in: InnoDB transaction handle */ - const TABLE* form, /*!< in: information on table - columns and indexes */ - ulint* doc_id_col) /*!< out: Doc ID column number if - there exist a FTS_DOC_ID column, - ULINT_UNDEFINED if column is of the - wrong type/name/size */ -{ - for (ulint i = 0; i < form->s->fields; i++) { - const Field* field; - ulint col_type; - ulint col_len; - ulint unsigned_type; - - field = form->field[i]; - - col_type = get_innobase_type_from_mysql_type(&unsigned_type, - field); - - col_len = field->pack_length(); - - if (innobase_strcasecmp(field->field_name.str, - FTS_DOC_ID_COL_NAME) == 0) { - - /* Note the name is case sensitive due to - our internal query parser */ - if (col_type == DATA_INT - && !field->real_maybe_null() - && col_len == sizeof(doc_id_t) - && (strcmp(field->field_name.str, - FTS_DOC_ID_COL_NAME) == 0)) { - *doc_id_col = i; - } else { - push_warning_printf( - trx->mysql_thd, - Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: FTS_DOC_ID column must be " - "of BIGINT NOT NULL type, and named " - "in all capitalized characters"); - my_error(ER_WRONG_COLUMN_NAME, MYF(0), - field->field_name.str); - *doc_id_col = ULINT_UNDEFINED; - } - - return(true); - } - } - - return(false); -} - -/*****************************************************************//** -Creates a table definition to an InnoDB database. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -int -create_table_def( -/*=============*/ - trx_t* trx, /*!< in: InnoDB transaction handle */ - const TABLE* form, /*!< in: information on table - columns and indexes */ - const char* table_name, /*!< in: table name */ - const char* temp_path, /*!< in: if this is a table explicitly - created by the user with the - TEMPORARY keyword, then this - parameter is the dir path where the - table should be placed if we create - an .ibd file for it (no .ibd extension - in the path, though). Otherwise this - is a zero length-string */ - const char* remote_path, /*!< in: Remote path or zero length-string */ - ulint flags, /*!< in: table flags */ - ulint flags2, /*!< in: table flags2 */ - fil_encryption_t mode, /*!< in: encryption mode */ - ulint key_id) /*!< in: encryption key_id */ -{ - THD* thd = trx->mysql_thd; - dict_table_t* table; - ulint n_cols, s_cols; - dberr_t err; - ulint col_type; - ulint col_len; - ulint nulls_allowed; - ulint unsigned_type; - ulint binary_type; - ulint long_true_varchar; - ulint charset_no; - ulint i; - ulint doc_id_col = 0; - ibool has_doc_id_col = FALSE; - mem_heap_t* heap; - - DBUG_ENTER("create_table_def"); - DBUG_PRINT("enter", ("table_name: %s", table_name)); - - DBUG_ASSERT(thd != NULL); - - /* MySQL does the name length check. But we do additional check - on the name length here */ - const size_t table_name_len = strlen(table_name); - if (table_name_len > MAX_FULL_NAME_LEN) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_TABLE_NAME, - "InnoDB: Table Name or Database Name is too long"); - - DBUG_RETURN(ER_TABLE_NAME); - } - - if (table_name[table_name_len - 1] == '/') { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_TABLE_NAME, - "InnoDB: Table name is empty"); - - DBUG_RETURN(ER_WRONG_TABLE_NAME); - } - - n_cols = form->s->fields; - s_cols = form->s->stored_fields; - - /* Check whether there already exists a FTS_DOC_ID column */ - if (create_table_check_doc_id_col(trx, form, &doc_id_col)){ - - /* Raise error if the Doc ID column is of wrong type or name */ - if (doc_id_col == ULINT_UNDEFINED) { - trx_commit_for_mysql(trx); - - err = DB_ERROR; - goto error_ret; - } else { - has_doc_id_col = TRUE; - } - } - - /* We pass 0 as the space id, and determine at a lower level the space - id where to store the table */ - - if (flags2 & DICT_TF2_FTS) { - /* Adjust for the FTS hidden field */ - if (!has_doc_id_col) { - table = dict_mem_table_create(table_name, 0, s_cols + 1, - flags, flags2); - - /* Set the hidden doc_id column. */ - table->fts->doc_col = s_cols; - } else { - table = dict_mem_table_create(table_name, 0, s_cols, - flags, flags2); - table->fts->doc_col = doc_id_col; - } - } else { - table = dict_mem_table_create(table_name, 0, s_cols, - flags, flags2); - } - - if (flags2 & DICT_TF2_TEMPORARY) { - ut_a(strlen(temp_path)); - table->dir_path_of_temp_table = - mem_heap_strdup(table->heap, temp_path); - } - - if (DICT_TF_HAS_DATA_DIR(flags)) { - ut_a(strlen(remote_path)); - table->data_dir_path = mem_heap_strdup(table->heap, remote_path); - } else { - table->data_dir_path = NULL; - } - heap = mem_heap_create(1000); - - for (i = 0; i < n_cols; i++) { - Field* field = form->field[i]; - if (!field->stored_in_db) - continue; - - col_type = get_innobase_type_from_mysql_type(&unsigned_type, - field); - - if (!col_type) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_CREATE_TABLE, - "Error creating table '%s' with " - "column '%s'. Please check its " - "column type and try to re-create " - "the table with an appropriate " - "column type.", - table->name, field->field_name.str); - goto err_col; - } - - nulls_allowed = field->real_maybe_null() ? 0 : DATA_NOT_NULL; - binary_type = field->binary() ? DATA_BINARY_TYPE : 0; - - charset_no = 0; - - if (dtype_is_string_type(col_type)) { - - charset_no = (ulint) field->charset()->number; - - if (UNIV_UNLIKELY(charset_no > MAX_CHAR_COLL_NUM)) { - /* in data0type.h we assume that the - number fits in one byte in prtype */ - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_CREATE_TABLE, - "In InnoDB, charset-collation codes" - " must be below 256." - " Unsupported code %lu.", - (ulong) charset_no); - mem_heap_free(heap); - DBUG_RETURN(ER_CANT_CREATE_TABLE); - } - } - - /* we assume in dtype_form_prtype() that this fits in - two bytes */ - ut_a(static_cast<uint>(field->type()) <= MAX_CHAR_COLL_NUM); - col_len = field->pack_length(); - - /* The MySQL pack length contains 1 or 2 bytes length field - for a true VARCHAR. Let us subtract that, so that the InnoDB - column length in the InnoDB data dictionary is the real - maximum byte length of the actual data. */ - - long_true_varchar = 0; - - if (field->type() == MYSQL_TYPE_VARCHAR) { - col_len -= ((Field_varstring*) field)->length_bytes; - - if (((Field_varstring*) field)->length_bytes == 2) { - long_true_varchar = DATA_LONG_TRUE_VARCHAR; - } - } - - /* First check whether the column to be added has a - system reserved name. */ - if (dict_col_name_is_reserved(field->field_name.str)){ - my_error(ER_WRONG_COLUMN_NAME, MYF(0), - field->field_name.str); -err_col: - dict_mem_table_free(table); - mem_heap_free(heap); - trx_commit_for_mysql(trx); - - err = DB_ERROR; - goto error_ret; - } - - dict_mem_table_add_col(table, heap, - field->field_name.str, - col_type, - dtype_form_prtype( - (ulint) field->type() - | nulls_allowed | unsigned_type - | binary_type | long_true_varchar, - charset_no), - col_len); - } - - /* Add the FTS doc_id hidden column. */ - if (flags2 & DICT_TF2_FTS && !has_doc_id_col) { - fts_add_doc_id_column(table, heap); - } - - err = row_create_table_for_mysql(table, trx, false, mode, key_id); - - mem_heap_free(heap); - - DBUG_EXECUTE_IF("ib_create_err_tablespace_exist", - err = DB_TABLESPACE_EXISTS;); - - if (err == DB_DUPLICATE_KEY || err == DB_TABLESPACE_EXISTS) { - char display_name[FN_REFLEN]; - char* buf_end = innobase_convert_identifier( - display_name, sizeof(display_name) - 1, - table_name, strlen(table_name), - thd, TRUE); - - *buf_end = '\0'; - - my_error(err == DB_DUPLICATE_KEY - ? ER_TABLE_EXISTS_ERROR - : ER_TABLESPACE_EXISTS, MYF(0), display_name); - } - - if (err == DB_SUCCESS && (flags2 & DICT_TF2_FTS)) { - fts_optimize_add_table(table); - } - -error_ret: - DBUG_RETURN(convert_error_code_to_mysql(err, flags, thd)); -} - -/*****************************************************************//** -Creates an index in an InnoDB database. */ -static -int -create_index( -/*=========*/ - trx_t* trx, /*!< in: InnoDB transaction handle */ - const TABLE* form, /*!< in: information on table - columns and indexes */ - ulint flags, /*!< in: InnoDB table flags */ - const char* table_name, /*!< in: table name */ - uint key_num) /*!< in: index number */ -{ - dict_index_t* index; - int error; - const KEY* key; - ulint ind_type; - ulint* field_lengths; - - DBUG_ENTER("create_index"); - - key = form->key_info + key_num; - - /* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */ - ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0); - - if (key->flags & HA_FULLTEXT) { - index = dict_mem_index_create(table_name, key->name, 0, - DICT_FTS, - key->user_defined_key_parts); - - for (ulint i = 0; i < key->user_defined_key_parts; i++) { - KEY_PART_INFO* key_part = key->key_part + i; - dict_mem_index_add_field( - index, key_part->field->field_name.str, 0); - } - - DBUG_RETURN(convert_error_code_to_mysql( - row_create_index_for_mysql( - index, trx, NULL), - flags, NULL)); - - } - - ind_type = 0; - - if (key_num == form->s->primary_key) { - ind_type |= DICT_CLUSTERED; - } - - if (key->flags & HA_NOSAME) { - ind_type |= DICT_UNIQUE; - } - - field_lengths = (ulint*) my_malloc( - key->user_defined_key_parts * sizeof * - field_lengths, MYF(MY_FAE)); - - /* We pass 0 as the space id, and determine at a lower level the space - id where to store the table */ - - index = dict_mem_index_create(table_name, key->name, 0, - ind_type, key->user_defined_key_parts); - - for (ulint i = 0; i < key->user_defined_key_parts; i++) { - KEY_PART_INFO* key_part = key->key_part + i; - ulint prefix_len; - ulint col_type; - ulint is_unsigned; - - - /* (The flag HA_PART_KEY_SEG denotes in MySQL a - column prefix field in an index: we only store a - specified number of first bytes of the column to - the index field.) The flag does not seem to be - properly set by MySQL. Let us fall back on testing - the length of the key part versus the column. */ - - Field* field = NULL; - - for (ulint j = 0; j < form->s->fields; j++) { - - field = form->field[j]; - - if (0 == innobase_strcasecmp( - field->field_name.str, - key_part->field->field_name.str)) { - /* Found the corresponding column */ - - goto found; - } - } - - ut_error; -found: - col_type = get_innobase_type_from_mysql_type( - &is_unsigned, key_part->field); - - if (DATA_BLOB == col_type - || (key_part->length < field->pack_length() - && field->type() != MYSQL_TYPE_VARCHAR) - || (field->type() == MYSQL_TYPE_VARCHAR - && key_part->length < field->pack_length() - - ((Field_varstring*) field)->length_bytes)) { - - switch (col_type) { - default: - prefix_len = key_part->length; - break; - case DATA_INT: - case DATA_FLOAT: - case DATA_DOUBLE: - case DATA_DECIMAL: - sql_print_error( - "MySQL is trying to create a column " - "prefix index field, on an " - "inappropriate data type. Table " - "name %s, column name %s.", - table_name, - key_part->field->field_name.str); - - prefix_len = 0; - } - } else { - prefix_len = 0; - } - - field_lengths[i] = key_part->length; - - dict_mem_index_add_field( - index, key_part->field->field_name.str, prefix_len); - } - - ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS)); - - /* Even though we've defined max_supported_key_part_length, we - still do our own checking using field_lengths to be absolutely - sure we don't create too long indexes. */ - - error = convert_error_code_to_mysql( - row_create_index_for_mysql(index, trx, field_lengths), - flags, NULL); - - my_free(field_lengths); - - DBUG_RETURN(error); -} - -/*****************************************************************//** -Creates an index to an InnoDB table when the user has defined no -primary index. */ -static -int -create_clustered_index_when_no_primary( -/*===================================*/ - trx_t* trx, /*!< in: InnoDB transaction handle */ - ulint flags, /*!< in: InnoDB table flags */ - const char* table_name) /*!< in: table name */ -{ - dict_index_t* index; - dberr_t error; - - /* We pass 0 as the space id, and determine at a lower level the space - id where to store the table */ - index = dict_mem_index_create(table_name, - innobase_index_reserve_name, - 0, DICT_CLUSTERED, 0); - - error = row_create_index_for_mysql(index, trx, NULL); - - return(convert_error_code_to_mysql(error, flags, NULL)); -} - -/*****************************************************************//** -Return a display name for the row format -@return row format name */ -UNIV_INTERN -const char* -get_row_format_name( -/*================*/ - enum row_type row_format) /*!< in: Row Format */ -{ - switch (row_format) { - case ROW_TYPE_COMPACT: - return("COMPACT"); - case ROW_TYPE_COMPRESSED: - return("COMPRESSED"); - case ROW_TYPE_DYNAMIC: - return("DYNAMIC"); - case ROW_TYPE_REDUNDANT: - return("REDUNDANT"); - case ROW_TYPE_DEFAULT: - return("DEFAULT"); - case ROW_TYPE_FIXED: - return("FIXED"); - case ROW_TYPE_PAGE: - case ROW_TYPE_NOT_USED: - default: - break; - } - return("NOT USED"); -} - -/** If file-per-table is missing, issue warning and set ret false */ -#define CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace)\ - if (!use_tablespace) { \ - push_warning_printf( \ - thd, Sql_condition::WARN_LEVEL_WARN, \ - ER_ILLEGAL_HA_CREATE_OPTION, \ - "InnoDB: ROW_FORMAT=%s requires" \ - " innodb_file_per_table.", \ - get_row_format_name(row_format)); \ - ret = "ROW_FORMAT"; \ - } - -/** If file-format is Antelope, issue warning and set ret false */ -#define CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE \ - if (srv_file_format < UNIV_FORMAT_B) { \ - push_warning_printf( \ - thd, Sql_condition::WARN_LEVEL_WARN, \ - ER_ILLEGAL_HA_CREATE_OPTION, \ - "InnoDB: ROW_FORMAT=%s requires" \ - " innodb_file_format > Antelope.", \ - get_row_format_name(row_format)); \ - ret = "ROW_FORMAT"; \ - } - - -/*****************************************************************//** -Validates the create options. We may build on this function -in future. For now, it checks two specifiers: -KEY_BLOCK_SIZE and ROW_FORMAT -If innodb_strict_mode is not set then this function is a no-op -@return NULL if valid, string if not. */ -UNIV_INTERN -const char* -create_options_are_invalid( -/*=======================*/ - THD* thd, /*!< in: connection thread. */ - TABLE* form, /*!< in: information on table - columns and indexes */ - HA_CREATE_INFO* create_info, /*!< in: create info. */ - bool use_tablespace) /*!< in: srv_file_per_table */ -{ - ibool kbs_specified = FALSE; - const char* ret = NULL; - enum row_type row_format = form->s->row_type; - - ut_ad(thd != NULL); - - /* If innodb_strict_mode is not set don't do any validation. */ - if (!(THDVAR(thd, strict_mode))) { - return(NULL); - } - - ut_ad(form != NULL); - ut_ad(create_info != NULL); - - /* First check if a non-zero KEY_BLOCK_SIZE was specified. */ - if (create_info->key_block_size) { - kbs_specified = TRUE; - switch (create_info->key_block_size) { - ulint kbs_max; - case 1: - case 2: - case 4: - case 8: - case 16: - /* Valid KEY_BLOCK_SIZE, check its dependencies. */ - if (!use_tablespace) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE requires" - " innodb_file_per_table."); - ret = "KEY_BLOCK_SIZE"; - } - if (srv_file_format < UNIV_FORMAT_B) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE requires" - " innodb_file_format > Antelope."); - ret = "KEY_BLOCK_SIZE"; - } - - /* The maximum KEY_BLOCK_SIZE (KBS) is 16. But if - UNIV_PAGE_SIZE is smaller than 16k, the maximum - KBS is also smaller. */ - kbs_max = ut_min( - 1 << (UNIV_PAGE_SSIZE_MAX - 1), - 1 << (PAGE_ZIP_SSIZE_MAX - 1)); - if (create_info->key_block_size > kbs_max) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE=%ld" - " cannot be larger than %ld.", - create_info->key_block_size, - kbs_max); - ret = "KEY_BLOCK_SIZE"; - } - break; - default: - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: invalid KEY_BLOCK_SIZE = %lu." - " Valid values are [1, 2, 4, 8, 16]", - create_info->key_block_size); - ret = "KEY_BLOCK_SIZE"; - break; - } - } - - /* Check for a valid Innodb ROW_FORMAT specifier and - other incompatibilities. */ - switch (row_format) { - case ROW_TYPE_COMPRESSED: - CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace); - CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE; - break; - case ROW_TYPE_DYNAMIC: - CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace); - CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE; - /* ROW_FORMAT=DYNAMIC also shuns KEY_BLOCK_SIZE */ - /* fall through */ - case ROW_TYPE_COMPACT: - case ROW_TYPE_REDUNDANT: - if (kbs_specified) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: cannot specify ROW_FORMAT = %s" - " with KEY_BLOCK_SIZE.", - get_row_format_name(row_format)); - ret = "KEY_BLOCK_SIZE"; - } - break; - case ROW_TYPE_DEFAULT: - break; - case ROW_TYPE_FIXED: - case ROW_TYPE_PAGE: - case ROW_TYPE_NOT_USED: - default: - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, \ - "InnoDB: invalid ROW_FORMAT specifier."); - ret = "ROW_TYPE"; - break; - } - - /* Use DATA DIRECTORY only with file-per-table. */ - if (create_info->data_file_name && !use_tablespace) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: DATA DIRECTORY requires" - " innodb_file_per_table."); - ret = "DATA DIRECTORY"; - } - - /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */ - if (create_info->data_file_name - && create_info->options & HA_LEX_CREATE_TMP_TABLE) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: DATA DIRECTORY cannot be used" - " for TEMPORARY tables."); - ret = "DATA DIRECTORY"; - } - - /* Do not allow INDEX_DIRECTORY */ - if (create_info->index_file_name) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: INDEX DIRECTORY is not supported"); - ret = "INDEX DIRECTORY"; - } - - if ((kbs_specified || row_format == ROW_TYPE_COMPRESSED) - && UNIV_PAGE_SIZE > (1<<14)) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: Cannot create a COMPRESSED table" - " when innodb_page_size > 16k."); - - if (kbs_specified) { - ret = "KEY_BLOCK_SIZE"; - } else { - ret = "ROW_TYPE"; - } - } - - return(ret); -} - -/*****************************************************************//** -Update create_info. Used in SHOW CREATE TABLE et al. */ -UNIV_INTERN -void -ha_innobase::update_create_info( -/*============================*/ - HA_CREATE_INFO* create_info) /*!< in/out: create info */ -{ - if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) { - ha_innobase::info(HA_STATUS_AUTO); - create_info->auto_increment_value = stats.auto_increment_value; - } - - /* Update the DATA DIRECTORY name from SYS_DATAFILES. */ - dict_get_and_save_data_dir_path(prebuilt->table, false); - - if (prebuilt->table->data_dir_path) { - create_info->data_file_name = prebuilt->table->data_dir_path; - } -} - -/*****************************************************************//** -Initialize the table FTS stopword list -@return TRUE if success */ -UNIV_INTERN -ibool -innobase_fts_load_stopword( -/*=======================*/ - dict_table_t* table, /*!< in: Table has the FTS */ - trx_t* trx, /*!< in: transaction */ - THD* thd) /*!< in: current thread */ -{ - return(fts_load_stopword(table, trx, - innobase_server_stopword_table, - THDVAR(thd, ft_user_stopword_table), - THDVAR(thd, ft_enable_stopword), FALSE)); -} - -/*****************************************************************//** -Parses the table name into normal name and either temp path or remote path -if needed. -@return 0 if successful, otherwise, error number */ -UNIV_INTERN -int -ha_innobase::parse_table_name( -/*==========================*/ - const char* name, /*!< in/out: table name provided*/ - HA_CREATE_INFO* create_info, /*!< in: more information of the - created table, contains also the - create statement string */ - ulint flags, /*!< in: flags*/ - ulint flags2, /*!< in: flags2*/ - char* norm_name, /*!< out: normalized table name */ - char* temp_path, /*!< out: absolute path of table */ - char* remote_path) /*!< out: remote path of table */ -{ - THD* thd = ha_thd(); - bool use_tablespace = flags2 & DICT_TF2_USE_TABLESPACE; - DBUG_ENTER("ha_innobase::parse_table_name"); - -#ifdef __WIN__ - /* Names passed in from server are in two formats: - 1. <database_name>/<table_name>: for normal table creation - 2. full path: for temp table creation, or DATA DIRECTORY. - - When srv_file_per_table is on and mysqld_embedded is off, - check for full path pattern, i.e. - X:\dir\..., X is a driver letter, or - \\dir1\dir2\..., UNC path - returns error if it is in full path format, but not creating a temp. - table. Currently InnoDB does not support symbolic link on Windows. */ - - if (use_tablespace - && !mysqld_embedded - && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) { - - if ((name[1] == ':') - || (name[0] == '\\' && name[1] == '\\')) { - sql_print_error("Cannot create table %s\n", name); - DBUG_RETURN(HA_ERR_GENERIC); - } - } -#endif - - normalize_table_name(norm_name, name); - temp_path[0] = '\0'; - remote_path[0] = '\0'; - - /* A full path is used for TEMPORARY TABLE and DATA DIRECTORY. - In the case of; - CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ; - We ignore the DATA DIRECTORY. */ - if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { - strncpy(temp_path, name, FN_REFLEN - 1); - } - - if (create_info->data_file_name) { - bool ignore = false; - - /* Use DATA DIRECTORY only with file-per-table. */ - if (!use_tablespace) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: DATA DIRECTORY requires" - " innodb_file_per_table."); - ignore = true; - } - - /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */ - if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: DATA DIRECTORY cannot be" - " used for TEMPORARY tables."); - ignore = true; - } - - if (ignore) { - my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING, - "DATA DIRECTORY"); - } else { - strncpy(remote_path, create_info->data_file_name, - FN_REFLEN - 1); - } - } - - if (create_info->index_file_name) { - my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING, - "INDEX DIRECTORY"); - } - - DBUG_RETURN(0); -} - -/*****************************************************************//** -Determines InnoDB table flags. -@retval true if successful, false if error */ -UNIV_INTERN -bool -innobase_table_flags( -/*=================*/ - const TABLE* form, /*!< in: table */ - const HA_CREATE_INFO* create_info, /*!< in: information - on table columns and indexes */ - THD* thd, /*!< in: connection */ - bool use_tablespace, /*!< in: whether to create - outside system tablespace */ - ulint* flags, /*!< out: DICT_TF flags */ - ulint* flags2) /*!< out: DICT_TF2 flags */ -{ - DBUG_ENTER("innobase_table_flags"); - - const char* fts_doc_id_index_bad = NULL; - bool zip_allowed = true; - ulint zip_ssize = 0; - enum row_type row_format; - rec_format_t innodb_row_format = REC_FORMAT_COMPACT; - bool use_data_dir; - ha_table_option_struct *options= form->s->option_struct; - - /* Cache the value of innodb_file_format, in case it is - modified by another thread while the table is being created. */ - const ulint file_format_allowed = srv_file_format; - - /* Cache the value of innobase_compression_level, in case it is - modified by another thread while the table is being created. */ - const ulint default_compression_level = page_zip_level; - - *flags = 0; - *flags2 = 0; - - /* Check if there are any FTS indexes defined on this table. */ - for (uint i = 0; i < form->s->keys; i++) { - const KEY* key = &form->key_info[i]; - - if (key->flags & HA_FULLTEXT) { - *flags2 |= DICT_TF2_FTS; - - /* We don't support FTS indexes in temporary - tables. */ - if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { - - my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0)); - DBUG_RETURN(false); - } - - if (key->flags & HA_USES_PARSER) { - my_error(ER_INNODB_NO_FT_USES_PARSER, MYF(0)); - DBUG_RETURN(false); - } - - if (fts_doc_id_index_bad) { - goto index_bad; - } - } - - if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) { - continue; - } - - /* Do a pre-check on FTS DOC ID index */ - if (!(key->flags & HA_NOSAME) - || strcmp(key->name, FTS_DOC_ID_INDEX_NAME) - || strcmp(key->key_part[0].field->field_name.str, - FTS_DOC_ID_COL_NAME)) { - fts_doc_id_index_bad = key->name; - } - - if (fts_doc_id_index_bad && (*flags2 & DICT_TF2_FTS)) { -index_bad: - my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0), - fts_doc_id_index_bad); - DBUG_RETURN(false); - } - } - - row_format = form->s->row_type; - - if (create_info->key_block_size) { - /* The requested compressed page size (key_block_size) - is given in kilobytes. If it is a valid number, store - that value as the number of log2 shifts from 512 in - zip_ssize. Zero means it is not compressed. */ - ulint zssize; /* Zip Shift Size */ - ulint kbsize; /* Key Block Size */ - for (zssize = kbsize = 1; - zssize <= ut_min(UNIV_PAGE_SSIZE_MAX, - PAGE_ZIP_SSIZE_MAX); - zssize++, kbsize <<= 1) { - if (kbsize == create_info->key_block_size) { - zip_ssize = zssize; - break; - } - } - - /* Make sure compressed row format is allowed. */ - if (!use_tablespace) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE requires" - " innodb_file_per_table."); - zip_allowed = FALSE; - } - - if (file_format_allowed < UNIV_FORMAT_B) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: KEY_BLOCK_SIZE requires" - " innodb_file_format > Antelope."); - zip_allowed = FALSE; - } - - if (!zip_allowed - || zssize > ut_min(UNIV_PAGE_SSIZE_MAX, - PAGE_ZIP_SSIZE_MAX)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring KEY_BLOCK_SIZE=%lu.", - create_info->key_block_size); - } - } - - if (zip_ssize && zip_allowed) { - /* if ROW_FORMAT is set to default, - automatically change it to COMPRESSED.*/ - if (row_format == ROW_TYPE_DEFAULT) { - row_format = ROW_TYPE_COMPRESSED; - } else if (row_format != ROW_TYPE_COMPRESSED) { - /* ROW_FORMAT other than COMPRESSED - ignores KEY_BLOCK_SIZE. It does not - make sense to reject conflicting - KEY_BLOCK_SIZE and ROW_FORMAT, because - such combinations can be obtained - with ALTER TABLE anyway. */ - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ignoring KEY_BLOCK_SIZE=%lu" - " unless ROW_FORMAT=COMPRESSED.", - create_info->key_block_size); - zip_allowed = FALSE; - } - } else { - /* zip_ssize == 0 means no KEY_BLOCK_SIZE.*/ - if (row_format == ROW_TYPE_COMPRESSED && zip_allowed) { - /* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE - implies half the maximum KEY_BLOCK_SIZE(*1k) or - UNIV_PAGE_SIZE, whichever is less. */ - zip_ssize = ut_min(UNIV_PAGE_SSIZE_MAX, - PAGE_ZIP_SSIZE_MAX) - 1; - } - } - - /* Validate the row format. Correct it if necessary */ - switch (row_format) { - case ROW_TYPE_REDUNDANT: - innodb_row_format = REC_FORMAT_REDUNDANT; - break; - - case ROW_TYPE_COMPRESSED: - case ROW_TYPE_DYNAMIC: - if (!use_tablespace) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s requires" - " innodb_file_per_table.", - get_row_format_name(row_format)); - } else if (file_format_allowed == UNIV_FORMAT_A) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: ROW_FORMAT=%s requires" - " innodb_file_format > Antelope.", - get_row_format_name(row_format)); - } else { - switch(row_format) { - case ROW_TYPE_COMPRESSED: - innodb_row_format = REC_FORMAT_COMPRESSED; - break; - case ROW_TYPE_DYNAMIC: - innodb_row_format = REC_FORMAT_DYNAMIC; - break; - default: - /* Not possible, avoid compiler warning */ - break; - } - break; /* Correct row_format */ - } - zip_allowed = FALSE; - /* Set ROW_FORMAT = COMPACT */ - /* fall through */ - case ROW_TYPE_NOT_USED: - case ROW_TYPE_FIXED: - case ROW_TYPE_PAGE: - default: - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: assuming ROW_FORMAT=COMPACT."); - /* fall through */ - case ROW_TYPE_DEFAULT: - /* If we fell through, set row format to Compact. */ - row_format = ROW_TYPE_COMPACT; - case ROW_TYPE_COMPACT: - break; - } - - /* Don't support compressed table when page size > 16k. */ - if (zip_allowed && zip_ssize && UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_ILLEGAL_HA_CREATE_OPTION, - "InnoDB: Cannot create a COMPRESSED table" - " when innodb_page_size > 16k." - " Assuming ROW_FORMAT=COMPACT."); - zip_allowed = FALSE; - } - - /* Set the table flags */ - if (!zip_allowed) { - zip_ssize = 0; - } - - use_data_dir = use_tablespace - && ((create_info->data_file_name != NULL) - && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)); - - /* Set up table dictionary flags */ - dict_tf_set(flags, - innodb_row_format, - zip_ssize, - use_data_dir, - options->page_compressed, - options->page_compression_level == 0 ? - default_compression_level : options->page_compression_level, - options->atomic_writes); - - if (create_info->options & HA_LEX_CREATE_TMP_TABLE) { - *flags2 |= DICT_TF2_TEMPORARY; - } - - if (use_tablespace) { - *flags2 |= DICT_TF2_USE_TABLESPACE; - } - - /* Set the flags2 when create table or alter tables */ - *flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; - DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", - *flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); - - DBUG_RETURN(true); -} - -/*****************************************************************//** -Check engine specific table options not handled by SQL-parser. -@return NULL if valid, string if not */ -UNIV_INTERN -const char* -ha_innobase::check_table_options( - THD *thd, /*!< in: thread handle */ - TABLE* table, /*!< in: information on table - columns and indexes */ - HA_CREATE_INFO* create_info, /*!< in: more information of the - created table, contains also the - create statement string */ - const bool use_tablespace, /*!< in: use file par table */ - const ulint file_format) -{ - enum row_type row_format = table->s->row_type; - ha_table_option_struct *options= table->s->option_struct; - atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes; - fil_encryption_t encrypt = (fil_encryption_t)options->encryption; - - if (encrypt != FIL_ENCRYPTION_DEFAULT && !use_tablespace) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ENCRYPTED requires innodb_file_per_table"); - return "ENCRYPTED"; - } - - if (encrypt == FIL_ENCRYPTION_OFF && srv_encrypt_tables == 2) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ENCRYPTED=OFF cannot be used when innodb_encrypt_tables=FORCE"); - return "ENCRYPTED"; - } - - /* Check page compression requirements */ - if (options->page_compressed) { - - if (row_format == ROW_TYPE_COMPRESSED) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: PAGE_COMPRESSED table can't have" - " ROW_TYPE=COMPRESSED"); - return "PAGE_COMPRESSED"; - } - - if (row_format == ROW_TYPE_REDUNDANT) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: PAGE_COMPRESSED table can't have" - " ROW_TYPE=REDUNDANT"); - return "PAGE_COMPRESSED"; - } - - if (!use_tablespace) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: PAGE_COMPRESSED requires" - " innodb_file_per_table."); - return "PAGE_COMPRESSED"; - } - - if (file_format < UNIV_FORMAT_B) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: PAGE_COMPRESSED requires" - " innodb_file_format > Antelope."); - return "PAGE_COMPRESSED"; - } - - if (create_info->key_block_size) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: PAGE_COMPRESSED table can't have" - " key_block_size"); - return "PAGE_COMPRESSED"; - } - } - - /* Check page compression level requirements, some of them are - already checked above */ - if (options->page_compression_level != 0) { - if (options->page_compressed == false) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: PAGE_COMPRESSION_LEVEL requires" - " PAGE_COMPRESSED"); - return "PAGE_COMPRESSION_LEVEL"; - } - - if (options->page_compression_level < 1 || options->page_compression_level > 9) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu." - " Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]", - options->page_compression_level); - return "PAGE_COMPRESSION_LEVEL"; - } - } - - /* If encryption is set up make sure that used key_id is found */ - if (encrypt == FIL_ENCRYPTION_ON || - (encrypt == FIL_ENCRYPTION_DEFAULT && srv_encrypt_tables)) { - if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ENCRYPTION_KEY_ID %u not available", - (uint)options->encryption_key_id - ); - return "ENCRYPTION_KEY_ID"; - - } - } - - /* Ignore nondefault key_id if encryption is set off */ - if (encrypt == FIL_ENCRYPTION_OFF && - options->encryption_key_id != THDVAR(thd, default_encryption_key_id)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: Ignored ENCRYPTION_KEY_ID %u when encryption is disabled", - (uint)options->encryption_key_id - ); - options->encryption_key_id = FIL_DEFAULT_ENCRYPTION_KEY; - } - - /* If default encryption is used make sure that used kay is found - from key file. */ - if (encrypt == FIL_ENCRYPTION_DEFAULT && - !srv_encrypt_tables && - options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) { - if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ENCRYPTION_KEY_ID %u not available", - (uint)options->encryption_key_id - ); - return "ENCRYPTION_KEY_ID"; - - } - } - - /* Check atomic writes requirements */ - if (awrites == ATOMIC_WRITES_ON || - (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) { - if (!use_tablespace) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_WRONG_CREATE_OPTION, - "InnoDB: ATOMIC_WRITES requires" - " innodb_file_per_table."); - return "ATOMIC_WRITES"; - } - } - - return 0; -} - -/*****************************************************************//** -Creates a new table to an InnoDB database. -@return error number */ -UNIV_INTERN -int -ha_innobase::create( -/*================*/ - const char* name, /*!< in: table name */ - TABLE* form, /*!< in: information on table - columns and indexes */ - HA_CREATE_INFO* create_info) /*!< in: more information of the - created table, contains also the - create statement string */ -{ - int error; - trx_t* parent_trx; - trx_t* trx; - int primary_key_no; - uint i; - char norm_name[FN_REFLEN]; /* {database}/{tablename} */ - char temp_path[FN_REFLEN]; /* absolute path of temp frm */ - char remote_path[FN_REFLEN]; /* absolute path of table */ - THD* thd = ha_thd(); - ib_int64_t auto_inc_value; - - /* Cache the global variable "srv_file_per_table" to a local - variable before using it. Note that "srv_file_per_table" - is not under dict_sys mutex protection, and could be changed - while creating the table. So we read the current value here - and make all further decisions based on this. */ - bool use_tablespace = srv_file_per_table; - const ulint file_format = srv_file_format; - - /* Zip Shift Size - log2 - 9 of compressed page size, - zero for uncompressed */ - ulint flags; - ulint flags2; - dict_table_t* innobase_table = NULL; - - const char* stmt; - size_t stmt_len; - /* Cache table options */ - ha_table_option_struct *options= form->s->option_struct; - fil_encryption_t encrypt = (fil_encryption_t)options->encryption; - uint key_id = (uint)options->encryption_key_id; - - DBUG_ENTER("ha_innobase::create"); - - DBUG_ASSERT(thd != NULL); - DBUG_ASSERT(create_info != NULL); - - if (form->s->stored_fields > REC_MAX_N_USER_FIELDS) { - DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS); - } else if (high_level_read_only) { - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } - - /* Create the table definition in InnoDB */ - - /* Validate table options not handled by the SQL-parser */ - if(check_table_options(thd, form, create_info, use_tablespace, - file_format)) { - DBUG_RETURN(HA_WRONG_CREATE_OPTION); - } - - /* Validate create options if innodb_strict_mode is set. */ - if (create_options_are_invalid( - thd, form, create_info, use_tablespace)) { - DBUG_RETURN(HA_WRONG_CREATE_OPTION); - } - - if (!innobase_table_flags(form, create_info, - thd, use_tablespace, - &flags, &flags2)) { - DBUG_RETURN(-1); - } - - error = parse_table_name(name, create_info, flags, flags2, - norm_name, temp_path, remote_path); - if (error) { - DBUG_RETURN(error); - } - - /* Look for a primary key */ - primary_key_no = (form->s->primary_key != MAX_KEY ? - (int) form->s->primary_key : - -1); - - /* Our function innobase_get_mysql_key_number_for_index assumes - the primary key is always number 0, if it exists */ - ut_a(primary_key_no == -1 || primary_key_no == 0); - - /* Check for name conflicts (with reserved name) for - any user indices to be created. */ - if (innobase_index_name_is_reserved(thd, form->key_info, - form->s->keys)) { - DBUG_RETURN(-1); - } - - if (row_is_magic_monitor_table(norm_name)) { - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_WARN, - HA_ERR_WRONG_COMMAND, - "Using the table name %s to enable " - "diagnostic output is deprecated " - "and may be removed in future releases. " - "Use INFORMATION_SCHEMA or " - "PERFORMANCE_SCHEMA tables or " - "SET GLOBAL innodb_status_output=ON.", - dict_remove_db_name(norm_name)); - - /* Limit innodb monitor access to users with PROCESS privilege. - See http://bugs.mysql.com/32710 why we chose PROCESS. */ - if (check_global_access(thd, PROCESS_ACL)) { - DBUG_RETURN(HA_ERR_GENERIC); - } - } - - /* Get the transaction associated with the current thd, or create one - if not yet created */ - - parent_trx = check_trx_exists(thd); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(parent_trx); - - trx = innobase_trx_allocate(thd); - - if (UNIV_UNLIKELY(trx->fake_changes)) { - innobase_commit_low(trx); - trx_free_for_mysql(trx); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); - } - - /* Latch the InnoDB data dictionary exclusively so that no deadlocks - or lock waits can happen in it during a table create operation. - Drop table etc. do this latching in row0mysql.cc. */ - - row_mysql_lock_data_dictionary(trx); - - error = create_table_def(trx, form, norm_name, temp_path, - remote_path, flags, flags2, encrypt, key_id); - if (error) { - goto cleanup; - } - - /* Create the keys */ - - if (form->s->keys == 0 || primary_key_no == -1) { - /* Create an index which is used as the clustered index; - order the rows by their row id which is internally generated - by InnoDB */ - - error = create_clustered_index_when_no_primary( - trx, flags, norm_name); - if (error) { - goto cleanup; - } - } - - if (primary_key_no != -1) { - /* In InnoDB the clustered index must always be created - first */ - if ((error = create_index(trx, form, flags, norm_name, - (uint) primary_key_no))) { - goto cleanup; - } - } - - /* Create the ancillary tables that are common to all FTS indexes on - this table. */ - if (flags2 & DICT_TF2_FTS) { - enum fts_doc_id_index_enum ret; - - innobase_table = dict_table_open_on_name( - norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE); - - ut_a(innobase_table); - - /* Check whether there already exists FTS_DOC_ID_INDEX */ - ret = innobase_fts_check_doc_id_index_in_def( - form->s->keys, form->key_info); - - switch (ret) { - case FTS_INCORRECT_DOC_ID_INDEX: - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_NAME_FOR_INDEX, - " InnoDB: Index name %s is reserved" - " for the unique index on" - " FTS_DOC_ID column for FTS" - " Document ID indexing" - " on table %s. Please check" - " the index definition to" - " make sure it is of correct" - " type\n", - FTS_DOC_ID_INDEX_NAME, - innobase_table->name); - - if (innobase_table->fts) { - fts_free(innobase_table); - } - - dict_table_close(innobase_table, TRUE, FALSE); - my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), - FTS_DOC_ID_INDEX_NAME); - error = -1; - goto cleanup; - case FTS_EXIST_DOC_ID_INDEX: - case FTS_NOT_EXIST_DOC_ID_INDEX: - break; - } - - dberr_t err = fts_create_common_tables( - trx, innobase_table, norm_name, - (ret == FTS_EXIST_DOC_ID_INDEX)); - - error = convert_error_code_to_mysql(err, 0, NULL); - - dict_table_close(innobase_table, TRUE, FALSE); - - if (error) { - goto cleanup; - } - } - - for (i = 0; i < form->s->keys; i++) { - - if (i != static_cast<uint>(primary_key_no)) { - - if ((error = create_index(trx, form, flags, - norm_name, i))) { - goto cleanup; - } - } - } - - /* Cache all the FTS indexes on this table in the FTS specific - structure. They are used for FTS indexed column update handling. */ - if (flags2 & DICT_TF2_FTS) { - fts_t* fts = innobase_table->fts; - - ut_a(fts != NULL); - - dict_table_get_all_fts_indexes(innobase_table, fts->indexes); - } - - stmt = innobase_get_stmt(thd, &stmt_len); - - if (stmt) { - dberr_t err = row_table_add_foreign_constraints( - trx, stmt, stmt_len, norm_name, - create_info->options & HA_LEX_CREATE_TMP_TABLE); - - switch (err) { - - case DB_PARENT_NO_INDEX: - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_CANNOT_ADD_FOREIGN, - "Create table '%s' with foreign key constraint" - " failed. There is no index in the referenced" - " table where the referenced columns appear" - " as the first columns.\n", norm_name); - break; - - case DB_CHILD_NO_INDEX: - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_CANNOT_ADD_FOREIGN, - "Create table '%s' with foreign key constraint" - " failed. There is no index in the referencing" - " table where referencing columns appear" - " as the first columns.\n", norm_name); - break; - default: - break; - } - - error = convert_error_code_to_mysql(err, flags, NULL); - - if (error) { - goto cleanup; - } - } - - innobase_commit_low(trx); - - row_mysql_unlock_data_dictionary(trx); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - innobase_table = dict_table_open_on_name( - norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); - - DBUG_ASSERT(innobase_table != 0); - - innobase_copy_frm_flags_from_create_info(innobase_table, create_info); - - dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE); - - if (innobase_table) { - /* We update the highest file format in the system table - space, if this table has higher file format setting. */ - - trx_sys_file_format_max_upgrade( - (const char**) &innobase_file_format_max, - dict_table_get_format(innobase_table)); - } - - /* Load server stopword into FTS cache */ - if (flags2 & DICT_TF2_FTS) { - if (!innobase_fts_load_stopword(innobase_table, NULL, thd)) { - dict_table_close(innobase_table, FALSE, FALSE); - srv_active_wake_master_thread(); - trx_free_for_mysql(trx); - DBUG_RETURN(-1); - } - } - - /* Note: We can't call update_thd() as prebuilt will not be - setup at this stage and so we use thd. */ - - /* We need to copy the AUTOINC value from the old table if - this is an ALTER|OPTIMIZE TABLE or CREATE INDEX because CREATE INDEX - does a table copy too. If query was one of : - - CREATE TABLE ...AUTO_INCREMENT = x; or - ALTER TABLE...AUTO_INCREMENT = x; or - OPTIMIZE TABLE t; or - CREATE INDEX x on t(...); - - Find out a table definition from the dictionary and get - the current value of the auto increment field. Set a new - value to the auto increment field if the value is greater - than the maximum value in the column. */ - - if (((create_info->used_fields & HA_CREATE_USED_AUTO) - || thd_sql_command(thd) == SQLCOM_ALTER_TABLE - || thd_sql_command(thd) == SQLCOM_OPTIMIZE - || thd_sql_command(thd) == SQLCOM_CREATE_INDEX) - && create_info->auto_increment_value > 0) { - - auto_inc_value = create_info->auto_increment_value; - - dict_table_autoinc_lock(innobase_table); - dict_table_autoinc_initialize(innobase_table, auto_inc_value); - dict_table_autoinc_unlock(innobase_table); - } - - dict_table_close(innobase_table, FALSE, FALSE); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - trx_free_for_mysql(trx); - - DBUG_RETURN(0); - -cleanup: - trx_rollback_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(error); -} - -/*****************************************************************//** -Discards or imports an InnoDB tablespace. -@return 0 == success, -1 == error */ -UNIV_INTERN -int -ha_innobase::discard_or_import_tablespace( -/*======================================*/ - my_bool discard) /*!< in: TRUE if discard, else import */ -{ - dberr_t err; - dict_table_t* dict_table; - - DBUG_ENTER("ha_innobase::discard_or_import_tablespace"); - - ut_a(prebuilt->trx); - ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - if (high_level_read_only) { - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } - - if (UNIV_UNLIKELY(prebuilt->trx->fake_changes)) { - DBUG_RETURN(HA_ERR_WRONG_COMMAND); - } - - dict_table = prebuilt->table; - - if (dict_table->space == TRX_SYS_SPACE) { - - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_IN_SYSTEM_TABLESPACE, - table->s->table_name.str); - - DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE); - } - - trx_start_if_not_started(prebuilt->trx); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads. */ - trx_search_latch_release_if_reserved(prebuilt->trx); - - /* Obtain an exclusive lock on the table. */ - err = row_mysql_lock_table( - prebuilt->trx, dict_table, LOCK_X, - discard ? "setting table lock for DISCARD TABLESPACE" - : "setting table lock for IMPORT TABLESPACE"); - - if (err != DB_SUCCESS) { - /* unable to lock the table: do nothing */ - } else if (discard) { - - /* Discarding an already discarded tablespace should be an - idempotent operation. Also, if the .ibd file is missing the - user may want to set the DISCARD flag in order to IMPORT - a new tablespace. */ - - if (!dict_table->is_readable()) { - ib_senderrf( - prebuilt->trx->mysql_thd, - IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING, - table->s->table_name.str); - } - - err = row_discard_tablespace_for_mysql( - dict_table->name, prebuilt->trx); - - } else if (dict_table->is_readable()) { - /* Commit the transaction in order to - release the table lock. */ - trx_commit_for_mysql(prebuilt->trx); - - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_EXISTS, table->s->table_name.str); - - DBUG_RETURN(HA_ERR_TABLE_EXIST); - } else { - err = row_import_for_mysql(dict_table, prebuilt); - - if (err == DB_SUCCESS) { - - if (table->found_next_number_field) { - dict_table_autoinc_lock(dict_table); - innobase_initialize_autoinc(); - dict_table_autoinc_unlock(dict_table); - } - - info(HA_STATUS_TIME - | HA_STATUS_CONST - | HA_STATUS_VARIABLE - | HA_STATUS_AUTO); - - fil_crypt_set_encrypt_tables(srv_encrypt_tables); - } - } - - /* Commit the transaction in order to release the table lock. */ - trx_commit_for_mysql(prebuilt->trx); - - if (err == DB_SUCCESS && !discard - && dict_stats_is_persistent_enabled(dict_table)) { - dberr_t ret; - - /* Adjust the persistent statistics. */ - ret = dict_stats_update(dict_table, - DICT_STATS_RECALC_PERSISTENT); - - if (ret != DB_SUCCESS) { - push_warning_printf( - ha_thd(), - Sql_condition::WARN_LEVEL_WARN, - ER_ALTER_INFO, - "Error updating stats for table '%s'" - " after table rebuild: %s", - dict_table->name, ut_strerr(ret)); - } - } - - DBUG_RETURN(convert_error_code_to_mysql(err, dict_table->flags, NULL)); -} - -/*****************************************************************//** -Deletes all rows of an InnoDB table. -@return error number */ -UNIV_INTERN -int -ha_innobase::truncate() -/*===================*/ -{ - dberr_t err; - int error; - - DBUG_ENTER("ha_innobase::truncate"); - - if (high_level_read_only) { - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } - - /* Get the transaction associated with the current thd, or create one - if not yet created, and update prebuilt->trx */ - - update_thd(ha_thd()); - - DBUG_ASSERT(share->ib_table == prebuilt->table); - - if (UNIV_UNLIKELY(prebuilt->trx->fake_changes)) { - DBUG_RETURN(HA_ERR_WRONG_COMMAND); - } - - if (!trx_is_started(prebuilt->trx)) { - ++prebuilt->trx->will_lock; - } - /* Truncate the table in InnoDB */ - - err = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx); - - switch (err) { - - case DB_TABLESPACE_DELETED: - case DB_TABLESPACE_NOT_FOUND: - ib_senderrf( - prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - (err == DB_TABLESPACE_DELETED ? - ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING), - table->s->table_name.str); - table->status = STATUS_NOT_FOUND; - error = HA_ERR_NO_SUCH_TABLE; - break; - - default: - error = convert_error_code_to_mysql( - err, prebuilt->table->flags, - prebuilt->trx->mysql_thd); - table->status = STATUS_NOT_FOUND; - break; - } - DBUG_RETURN(error); -} - -/*****************************************************************//** -Drops a table from an InnoDB database. Before calling this function, -MySQL calls innobase_commit to commit the transaction of the current user. -Then the current user cannot have locks set on the table. Drop table -operation inside InnoDB will remove all locks any user has on the table -inside InnoDB. -@return error number */ -UNIV_INTERN -int -ha_innobase::delete_table( -/*======================*/ - const char* name) /*!< in: table name */ -{ - ulint name_len; - dberr_t err; - trx_t* parent_trx; - trx_t* trx; - THD* thd = ha_thd(); - char norm_name[FN_REFLEN]; - - DBUG_ENTER("ha_innobase::delete_table"); - - DBUG_EXECUTE_IF( - "test_normalize_table_name_low", - test_normalize_table_name_low(); - ); - DBUG_EXECUTE_IF( - "test_ut_format_name", - test_ut_format_name(); - ); - - /* Strangely, MySQL passes the table name without the '.frm' - extension, in contrast to ::create */ - normalize_table_name(norm_name, name); - - if (srv_read_only_mode - || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } else if (row_is_magic_monitor_table(norm_name) - && check_global_access(thd, PROCESS_ACL)) { - DBUG_RETURN(HA_ERR_GENERIC); - } - - parent_trx = check_trx_exists(thd); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(parent_trx); - - trx = innobase_trx_allocate(thd); - - if (UNIV_UNLIKELY(trx->fake_changes)) { - innobase_commit_low(trx); - trx_free_for_mysql(trx); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); - } - - name_len = strlen(name); - - ut_a(name_len < 1000); - - /* Either the transaction is already flagged as a locking transaction - or it hasn't been started yet. */ - - ut_a(!trx_is_started(trx) || trx->will_lock > 0); - - /* We are doing a DDL operation. */ - ++trx->will_lock; - trx->ddl = true; - - /* Drop the table in InnoDB */ - err = row_drop_table_for_mysql( - norm_name, trx, thd_sql_command(thd) == SQLCOM_DROP_DB, - FALSE); - - - if (err == DB_TABLE_NOT_FOUND - && innobase_get_lower_case_table_names() == 1) { - char* is_part = NULL; -#ifdef __WIN__ - is_part = strstr(norm_name, "#p#"); -#else - is_part = strstr(norm_name, "#P#"); -#endif /* __WIN__ */ - - if (is_part) { - char par_case_name[FN_REFLEN]; - -#ifndef __WIN__ - /* Check for the table using lower - case name, including the partition - separator "P" */ - strcpy(par_case_name, norm_name); - innobase_casedn_str(par_case_name); -#else - /* On Windows platfrom, check - whether there exists table name in - system table whose name is - not being normalized to lower case */ - normalize_table_name_low( - par_case_name, name, FALSE); -#endif - err = row_drop_table_for_mysql( - par_case_name, trx, - thd_sql_command(thd) == SQLCOM_DROP_DB, - FALSE); - } - } - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - innobase_commit_low(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL)); -} - -/*****************************************************************//** -Defragment table. -@return error number */ -UNIV_INTERN -int -ha_innobase::defragment_table( -/*==========================*/ - const char* name, /*!< in: table name */ - const char* index_name, /*!< in: index name */ - bool async) /*!< in: whether to wait until finish */ -{ - char norm_name[FN_REFLEN]; - dict_table_t* table = NULL; - dict_index_t* index = NULL; - ibool one_index = (index_name != 0); - int ret = 0; - dberr_t err = DB_SUCCESS; - - if (!srv_defragment) { - return ER_FEATURE_DISABLED; - } - - normalize_table_name(norm_name, name); - - table = dict_table_open_on_name(norm_name, FALSE, - FALSE, DICT_ERR_IGNORE_NONE); - - for (index = dict_table_get_first_index(table); index; - index = dict_table_get_next_index(index)) { - - if (dict_index_is_corrupted(index)) { - continue; - } - - if (index->page == FIL_NULL) { - /* Do not defragment auxiliary tables related - to FULLTEXT INDEX. */ - ut_ad(index->type & DICT_FTS); - continue; - } - - if (one_index && strcasecmp(index_name, index->name) != 0) { - continue; - } - - if (btr_defragment_find_index(index)) { - // We borrow this error code. When the same index is - // already in the defragmentation queue, issue another - // defragmentation only introduces overhead. We return - // an error here to let the user know this is not - // necessary. Note that this will fail a query that's - // trying to defragment a full table if one of the - // indicies in that table is already in defragmentation. - // We choose this behavior so user is aware of this - // rather than silently defragment other indicies of - // that table. - ret = ER_SP_ALREADY_EXISTS; - break; - } - - os_event_t event = btr_defragment_add_index(index, async, &err); - - if (err != DB_SUCCESS) { - push_warning_printf( - current_thd, - Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_TABLE, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue checking table.", - index->table->name); - - ret = convert_error_code_to_mysql(err, 0, current_thd); - break; - } - - if (!async && event) { - while(os_event_wait_time(event, 1000000)) { - if (thd_killed(current_thd)) { - btr_defragment_remove_index(index); - ret = ER_QUERY_INTERRUPTED; - break; - } - } - os_event_free(event); - } - - if (ret) { - break; - } - - if (one_index) { - one_index = FALSE; - break; - } - } - - dict_table_close(table, FALSE, FALSE); - - if (ret == 0 && one_index) { - ret = ER_NO_SUCH_INDEX; - } - - return ret; -} - -/*****************************************************************//** -Removes all tables in the named database inside InnoDB. */ -static -void -innobase_drop_database( -/*===================*/ - handlerton* hton, /*!< in: handlerton of Innodb */ - char* path) /*!< in: database path; inside InnoDB the name - of the last directory in the path is used as - the database name: for example, in - 'mysql/data/test' the database name is 'test' */ -{ - ulint len = 0; - trx_t* trx; - char* ptr; - char* namebuf; - THD* thd = current_thd; - - /* Get the transaction associated with the current thd, or create one - if not yet created */ - - DBUG_ASSERT(hton == innodb_hton_ptr); - - if (srv_read_only_mode) { - return; - } - - /* In the Windows plugin, thd = current_thd is always NULL */ - if (thd) { - trx_t* parent_trx = check_trx_exists(thd); - - /* In case MySQL calls this in the middle of a SELECT - query, release possible adaptive hash latch to avoid - deadlocks of threads */ - - trx_search_latch_release_if_reserved(parent_trx); - } - - ptr = strend(path) - 2; - - while (ptr >= path && *ptr != '\\' && *ptr != '/') { - ptr--; - len++; - } - - ptr++; - namebuf = (char*) my_malloc((uint) len + 2, MYF(0)); - - memcpy(namebuf, ptr, len); - namebuf[len] = '/'; - namebuf[len + 1] = '\0'; -#ifdef __WIN__ - innobase_casedn_str(namebuf); -#endif - trx = innobase_trx_allocate(thd); - - if (UNIV_UNLIKELY(trx->fake_changes)) { - my_free(namebuf); - innobase_commit_low(trx); - trx_free_for_mysql(trx); - return; /* ignore */ - } - - /* Either the transaction is already flagged as a locking transaction - or it hasn't been started yet. */ - - ut_a(!trx_is_started(trx) || trx->will_lock > 0); - - /* We are doing a DDL operation. */ - ++trx->will_lock; - - row_drop_database_for_mysql(namebuf, trx); - - my_free(namebuf); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - innobase_commit_low(trx); - trx_free_for_mysql(trx); -} - -/*********************************************************************//** -Renames an InnoDB table. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -innobase_rename_table( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - const char* from, /*!< in: old name of the table */ - const char* to) /*!< in: new name of the table */ -{ - dberr_t error; - char norm_to[FN_REFLEN]; - char norm_from[FN_REFLEN]; - - DBUG_ENTER("innobase_rename_table"); - DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); - - ut_ad(!srv_read_only_mode); - - normalize_table_name(norm_to, to); - normalize_table_name(norm_from, from); - - DEBUG_SYNC_C("innodb_rename_table_ready"); - - trx_start_if_not_started(trx); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations. */ - - row_mysql_lock_data_dictionary(trx); - - /* Transaction must be flagged as a locking transaction or it hasn't - been started yet. */ - - ut_a(trx->will_lock > 0); - - error = row_rename_table_for_mysql( - norm_from, norm_to, trx, TRUE); - - if (error != DB_SUCCESS) { - if (error == DB_TABLE_NOT_FOUND - && innobase_get_lower_case_table_names() == 1) { - char* is_part = NULL; -#ifdef __WIN__ - is_part = strstr(norm_from, "#p#"); -#else - is_part = strstr(norm_from, "#P#"); -#endif /* __WIN__ */ - - if (is_part) { - char par_case_name[FN_REFLEN]; -#ifndef __WIN__ - /* Check for the table using lower - case name, including the partition - separator "P" */ - strcpy(par_case_name, norm_from); - innobase_casedn_str(par_case_name); -#else - /* On Windows platfrom, check - whether there exists table name in - system table whose name is - not being normalized to lower case */ - normalize_table_name_low( - par_case_name, from, FALSE); -#endif - trx_start_if_not_started(trx); - error = row_rename_table_for_mysql( - par_case_name, norm_to, trx, TRUE); - } - } - - if (error == DB_SUCCESS) { -#ifndef __WIN__ - sql_print_warning("Rename partition table %s " - "succeeds after converting to lower " - "case. The table may have " - "been moved from a case " - "in-sensitive file system.\n", - norm_from); -#else - sql_print_warning("Rename partition table %s " - "succeeds after skipping the step to " - "lower case the table name. " - "The table may have been " - "moved from a case sensitive " - "file system.\n", - norm_from); -#endif /* __WIN__ */ - } - } - - row_mysql_unlock_data_dictionary(trx); - - /* Flush the log to reduce probability that the .frm - files and the InnoDB data dictionary get out-of-sync - if the user runs with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - DBUG_RETURN(error); -} - -/*********************************************************************//** -Renames an InnoDB table. -@return 0 or error code */ -UNIV_INTERN -int -ha_innobase::rename_table( -/*======================*/ - const char* from, /*!< in: old name of the table */ - const char* to) /*!< in: new name of the table */ -{ - trx_t* trx; - dberr_t error; - trx_t* parent_trx; - THD* thd = ha_thd(); - - DBUG_ENTER("ha_innobase::rename_table"); - - if (high_level_read_only) { - ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } - - /* Get the transaction associated with the current thd, or create one - if not yet created */ - - parent_trx = check_trx_exists(thd); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(parent_trx); - - trx = innobase_trx_allocate(thd); - if (UNIV_UNLIKELY(trx->fake_changes)) { - innobase_commit_low(trx); - trx_free_for_mysql(trx); - DBUG_RETURN(HA_ERR_WRONG_COMMAND); - } - - /* We are doing a DDL operation. */ - ++trx->will_lock; - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - - error = innobase_rename_table(trx, from, to); - - DEBUG_SYNC(thd, "after_innobase_rename_table"); - - innobase_commit_low(trx); - trx_free_for_mysql(trx); - - if (error == DB_SUCCESS) { - char norm_from[MAX_FULL_NAME_LEN]; - char norm_to[MAX_FULL_NAME_LEN]; - char errstr[512]; - dberr_t ret; - - normalize_table_name(norm_from, from); - normalize_table_name(norm_to, to); - - ret = dict_stats_rename_table(norm_from, norm_to, - errstr, sizeof(errstr)); - - if (ret != DB_SUCCESS) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: %s\n", errstr); - - push_warning(thd, Sql_condition::WARN_LEVEL_WARN, - ER_LOCK_WAIT_TIMEOUT, errstr); - } - } - - /* Add a special case to handle the Duplicated Key error - and return DB_ERROR instead. - This is to avoid a possible SIGSEGV error from mysql error - handling code. Currently, mysql handles the Duplicated Key - error by re-entering the storage layer and getting dup key - info by calling get_dup_key(). This operation requires a valid - table handle ('row_prebuilt_t' structure) which could no - longer be available in the error handling stage. The suggested - solution is to report a 'table exists' error message (since - the dup key error here is due to an existing table whose name - is the one we are trying to rename to) and return the generic - error code. */ - if (error == DB_DUPLICATE_KEY) { - my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to); - - error = DB_ERROR; - } - - DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL)); -} - -/*********************************************************************//** -Estimates the number of index records in a range. -@return estimated number of rows */ -UNIV_INTERN -ha_rows -ha_innobase::records_in_range( -/*==========================*/ - uint keynr, /*!< in: index number */ - key_range *min_key, /*!< in: start key value of the - range, may also be 0 */ - key_range *max_key) /*!< in: range end key val, may - also be 0 */ -{ - KEY* key; - dict_index_t* index; - dtuple_t* range_start; - dtuple_t* range_end; - ib_int64_t n_rows; - ulint mode1; - ulint mode2; - mem_heap_t* heap; - - DBUG_ENTER("records_in_range"); - - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - prebuilt->trx->op_info = (char*)"estimating records in index range"; - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - - active_index = keynr; - - key = table->key_info + active_index; - - index = innobase_get_index(keynr); - - /* There exists possibility of not being able to find requested - index due to inconsistency between MySQL and InoDB dictionary info. - Necessary message should have been printed in innobase_get_index() */ - if (dict_table_is_discarded(prebuilt->table)) { - n_rows = HA_POS_ERROR; - goto func_exit; - } - if (UNIV_UNLIKELY(!index)) { - n_rows = HA_POS_ERROR; - goto func_exit; - } - if (dict_index_is_corrupted(index)) { - n_rows = HA_ERR_INDEX_CORRUPT; - goto func_exit; - } - if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) { - n_rows = HA_ERR_TABLE_DEF_CHANGED; - goto func_exit; - } - - heap = mem_heap_create(2 * (key->ext_key_parts * sizeof(dfield_t) - + sizeof(dtuple_t))); - - range_start= dtuple_create(heap, key->ext_key_parts); - dict_index_copy_types(range_start, index, key->ext_key_parts); - - range_end= dtuple_create(heap, key->ext_key_parts); - dict_index_copy_types(range_end, index, key->ext_key_parts); - - row_sel_convert_mysql_key_to_innobase( - range_start, - prebuilt->srch_key_val1, - prebuilt->srch_key_val_len, - index, - (byte*) (min_key ? min_key->key : - (const uchar*) 0), - (ulint) (min_key ? min_key->length : 0), - prebuilt->trx); - DBUG_ASSERT(min_key - ? range_start->n_fields > 0 - : range_start->n_fields == 0); - - row_sel_convert_mysql_key_to_innobase( - range_end, - prebuilt->srch_key_val2, - prebuilt->srch_key_val_len, - index, - (byte*) (max_key ? max_key->key : - (const uchar*) 0), - (ulint) (max_key ? max_key->length : 0), - prebuilt->trx); - DBUG_ASSERT(max_key - ? range_end->n_fields > 0 - : range_end->n_fields == 0); - - mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag : - HA_READ_KEY_EXACT); - mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag : - HA_READ_KEY_EXACT); - - if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) { - - n_rows = btr_estimate_n_rows_in_range(index, range_start, - mode1, range_end, - mode2, prebuilt->trx); - } else { - - n_rows = HA_POS_ERROR; - } - - mem_heap_free(heap); - -func_exit: - - prebuilt->trx->op_info = (char*)""; - - /* The MySQL optimizer seems to believe an estimate of 0 rows is - always accurate and may return the result 'Empty set' based on that. - The accuracy is not guaranteed, and even if it were, for a locking - read we should anyway perform the search to set the next-key lock. - Add 1 to the value to make sure MySQL does not make the assumption! */ - - if (n_rows == 0) { - n_rows = 1; - } - - DBUG_RETURN((ha_rows) n_rows); -} - -/*********************************************************************//** -Gives an UPPER BOUND to the number of rows in a table. This is used in -filesort.cc. -@return upper bound of rows */ -UNIV_INTERN -ha_rows -ha_innobase::estimate_rows_upper_bound() -/*====================================*/ -{ - const dict_index_t* index; - ulonglong estimate; - ulonglong local_data_file_length; - ulint stat_n_leaf_pages; - - DBUG_ENTER("estimate_rows_upper_bound"); - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(ha_thd()); - - prebuilt->trx->op_info = "calculating upper bound for table rows"; - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - - index = dict_table_get_first_index(prebuilt->table); - - stat_n_leaf_pages = index->stat_n_leaf_pages; - - ut_a(stat_n_leaf_pages > 0); - - local_data_file_length = - ((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE; - - /* Calculate a minimum length for a clustered index record and from - that an upper bound for the number of rows. Since we only calculate - new statistics in row0mysql.cc when a table has grown by a threshold - factor, we must add a safety factor 2 in front of the formula below. */ - - estimate = 2 * local_data_file_length - / dict_index_calc_min_rec_len(index); - - prebuilt->trx->op_info = ""; - - /* Set num_rows less than MERGEBUFF to simulate the case where we do - not have enough space to merge the externally sorted file blocks. */ - DBUG_EXECUTE_IF("set_num_rows_lt_MERGEBUFF", - estimate = 2; - DBUG_SET("-d,set_num_rows_lt_MERGEBUFF"); - ); - - DBUG_RETURN((ha_rows) estimate); -} - -/*********************************************************************//** -How many seeks it will take to read through the table. This is to be -comparable to the number returned by records_in_range so that we can -decide if we should scan the table or use keys. -@return estimated time measured in disk seeks */ -UNIV_INTERN -double -ha_innobase::scan_time() -/*====================*/ -{ - /* Since MySQL seems to favor table scans too much over index - searches, we pretend that a sequential read takes the same time - as a random disk read, that is, we do not divide the following - by 10, which would be physically realistic. */ - - /* The locking below is disabled for performance reasons. Without - it we could end up returning uninitialized value to the caller, - which in the worst case could make some query plan go bogus or - issue a Valgrind warning. */ -#if 0 - /* avoid potential lock order violation with dict_table_stats_lock() - below */ - update_thd(ha_thd()); - trx_search_latch_release_if_reserved(prebuilt->trx); -#endif - - ulint stat_clustered_index_size; - -#if 0 - dict_table_stats_lock(prebuilt->table, RW_S_LATCH); -#endif - - ut_a(prebuilt->table->stat_initialized); - - stat_clustered_index_size = prebuilt->table->stat_clustered_index_size; - -#if 0 - dict_table_stats_unlock(prebuilt->table, RW_S_LATCH); -#endif - - return((double) stat_clustered_index_size); -} - -/******************************************************************//** -Calculate the time it takes to read a set of ranges through an index -This enables us to optimise reads for clustered indexes. -@return estimated time measured in disk seeks */ -UNIV_INTERN -double -ha_innobase::read_time( -/*===================*/ - uint index, /*!< in: key number */ - uint ranges, /*!< in: how many ranges */ - ha_rows rows) /*!< in: estimated number of rows in the ranges */ -{ - ha_rows total_rows; - double time_for_scan; - - if (index != table->s->primary_key) { - /* Not clustered */ - return(handler::read_time(index, ranges, rows)); - } - - /* Assume that the read time is proportional to the scan time for all - rows + at most one seek per range. */ - - time_for_scan = scan_time(); - - if ((total_rows = estimate_rows_upper_bound()) < rows) { - - return(time_for_scan); - } - - return(ranges + (double) rows / (double) total_rows * time_for_scan); -} - -/******************************************************************//** -Return the size of the InnoDB memory buffer. */ -UNIV_INTERN -longlong -ha_innobase::get_memory_buffer_size() const -/*=======================================*/ -{ - return(innobase_buffer_pool_size); -} - -/*********************************************************************//** -Calculates the key number used inside MySQL for an Innobase index. We will -first check the "index translation table" for a match of the index to get -the index number. If there does not exist an "index translation table", -or not able to find the index in the translation table, then we will fall back -to the traditional way of looping through dict_index_t list to find a -match. In this case, we have to take into account if we generated a -default clustered index for the table -@return the key number used inside MySQL */ -static -int -innobase_get_mysql_key_number_for_index( -/*====================================*/ - INNOBASE_SHARE* share, /*!< in: share structure for index - translation table. */ - const TABLE* table, /*!< in: table in MySQL data - dictionary */ - dict_table_t* ib_table,/*!< in: table in Innodb data - dictionary */ - const dict_index_t* index) /*!< in: index */ -{ - const dict_index_t* ind; - unsigned int i; - - ut_a(index); - - /* If index does not belong to the table object of share structure - (ib_table comes from the share structure) search the index->table - object instead */ - if (index->table != ib_table) { - i = 0; - ind = dict_table_get_first_index(index->table); - - while (index != ind) { - ind = dict_table_get_next_index(ind); - i++; - } - - if (row_table_got_default_clust_index(index->table)) { - ut_a(i > 0); - i--; - } - - return(i); - } - - /* If index translation table exists, we will first check - the index through index translation table for a match. */ - if (share->idx_trans_tbl.index_mapping) { - for (i = 0; i < share->idx_trans_tbl.index_count; i++) { - if (share->idx_trans_tbl.index_mapping[i] == index) { - return(i); - } - } - - /* Print an error message if we cannot find the index - in the "index translation table". */ - if (*index->name != TEMP_INDEX_PREFIX) { - sql_print_error("Cannot find index %s in InnoDB index " - "translation table.", index->name); - } - } - - /* If we do not have an "index translation table", or not able - to find the index in the translation table, we'll directly find - matching index with information from mysql TABLE structure and - InnoDB dict_index_t list */ - for (i = 0; i < table->s->keys; i++) { - ind = dict_table_get_index_on_name( - ib_table, table->key_info[i].name); - - if (index == ind) { - return(i); - } - } - - /* Loop through each index of the table and lock them */ - for (ind = dict_table_get_first_index(ib_table); - ind != NULL; - ind = dict_table_get_next_index(ind)) { - if (index == ind) { - /* Temp index is internal to InnoDB, that is - not present in the MySQL index list, so no - need to print such mismatch warning. */ - if (*(index->name) != TEMP_INDEX_PREFIX) { - sql_print_warning( - "Find index %s in InnoDB index list " - "but not its MySQL index number " - "It could be an InnoDB internal index.", - index->name); - } - return(-1); - } - } - - ut_error; - - return(-1); -} - -/*********************************************************************//** -Calculate Record Per Key value. Need to exclude the NULL value if -innodb_stats_method is set to "nulls_ignored" -@return estimated record per key value */ -static -ha_rows -innodb_rec_per_key( -/*===============*/ - dict_index_t* index, /*!< in: dict_index_t structure */ - ulint i, /*!< in: the column we are - calculating rec per key */ - ha_rows records) /*!< in: estimated total records */ -{ - ha_rows rec_per_key; - ib_uint64_t n_diff; - - ut_a(index->table->stat_initialized); - - ut_ad(i < dict_index_get_n_unique(index)); - - n_diff = index->stat_n_diff_key_vals[i]; - - if (n_diff == 0) { - - rec_per_key = records; - } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) { - ib_uint64_t n_null; - ib_uint64_t n_non_null; - - n_non_null = index->stat_n_non_null_key_vals[i]; - - /* In theory, index->stat_n_non_null_key_vals[i] - should always be less than the number of records. - Since this is statistics value, the value could - have slight discrepancy. But we will make sure - the number of null values is not a negative number. */ - if (records < n_non_null) { - n_null = 0; - } else { - n_null = records - n_non_null; - } - - /* If the number of NULL values is the same as or - large than that of the distinct values, we could - consider that the table consists mostly of NULL value. - Set rec_per_key to 1. */ - if (n_diff <= n_null) { - rec_per_key = 1; - } else { - /* Need to exclude rows with NULL values from - rec_per_key calculation */ - rec_per_key = (ha_rows) - ((records - n_null) / (n_diff - n_null)); - } - } else { - DEBUG_SYNC_C("after_checking_for_0"); - rec_per_key = (ha_rows) (records / n_diff); - } - - return(rec_per_key); -} - -/*********************************************************************//** -Returns statistics information of the table to the MySQL interpreter, -in various fields of the handle object. -@return HA_ERR_* error code or 0 */ -UNIV_INTERN -int -ha_innobase::info_low( -/*==================*/ - uint flag, /*!< in: what information is requested */ - bool is_analyze) -{ - dict_table_t* ib_table; - ha_rows rec_per_key; - ib_uint64_t n_rows; - os_file_stat_t stat_info; - - DBUG_ENTER("info"); - - /* If we are forcing recovery at a high level, we will suppress - statistics calculation on tables, because that may crash the - server if an index is badly corrupted. */ - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(ha_thd()); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - prebuilt->trx->op_info = (char*)"returning various info to MySQL"; - - trx_search_latch_release_if_reserved(prebuilt->trx); - - ib_table = prebuilt->table; - DBUG_ASSERT(ib_table->n_ref_count > 0); - - if (flag & HA_STATUS_TIME) { - if (is_analyze || innobase_stats_on_metadata) { - - dict_stats_upd_option_t opt; - dberr_t ret; - - prebuilt->trx->op_info = "updating table statistics"; - - if (dict_stats_is_persistent_enabled(ib_table)) { - - if (is_analyze) { - - /* If this table is already queued for - background analyze, remove it from the - queue as we are about to do the same */ - if (!srv_read_only_mode) { - - dict_mutex_enter_for_mysql(); - dict_stats_recalc_pool_del( - ib_table); - dict_mutex_exit_for_mysql(); - } - - opt = DICT_STATS_RECALC_PERSISTENT; - } else { - /* This is e.g. 'SHOW INDEXES', fetch - the persistent stats from disk. */ - opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY; - } - } else { - opt = DICT_STATS_RECALC_TRANSIENT; - } - - ut_ad(!mutex_own(&dict_sys->mutex)); - ret = dict_stats_update(ib_table, opt); - - if (ret != DB_SUCCESS) { - prebuilt->trx->op_info = ""; - DBUG_RETURN(HA_ERR_GENERIC); - } - - prebuilt->trx->op_info = - "returning various info to MySQL"; - } - - } - - if (flag & HA_STATUS_VARIABLE) { - - ulint page_size; - ulint stat_clustered_index_size; - ulint stat_sum_of_other_index_sizes; - - if (!(flag & HA_STATUS_NO_LOCK)) { - dict_table_stats_lock(ib_table, RW_S_LATCH); - } - - ut_a(ib_table->stat_initialized); - - n_rows = ib_table->stat_n_rows; - - stat_clustered_index_size - = ib_table->stat_clustered_index_size; - - stat_sum_of_other_index_sizes - = ib_table->stat_sum_of_other_index_sizes; - - if (!(flag & HA_STATUS_NO_LOCK)) { - dict_table_stats_unlock(ib_table, RW_S_LATCH); - } - - /* - The MySQL optimizer seems to assume in a left join that n_rows - is an accurate estimate if it is zero. Of course, it is not, - since we do not have any locks on the rows yet at this phase. - Since SHOW TABLE STATUS seems to call this function with the - HA_STATUS_TIME flag set, while the left join optimizer does not - set that flag, we add one to a zero value if the flag is not - set. That way SHOW TABLE STATUS will show the best estimate, - while the optimizer never sees the table empty. */ - - if (n_rows == 0 && !(flag & HA_STATUS_TIME)) { - n_rows++; - } - - /* Fix bug#40386: Not flushing query cache after truncate. - n_rows can not be 0 unless the table is empty, set to 1 - instead. The original problem of bug#29507 is actually - fixed in the server code. */ - if (thd_sql_command(user_thd) == SQLCOM_TRUNCATE) { - - n_rows = 1; - - /* We need to reset the prebuilt value too, otherwise - checks for values greater than the last value written - to the table will fail and the autoinc counter will - not be updated. This will force write_row() into - attempting an update of the table's AUTOINC counter. */ - - prebuilt->autoinc_last_value = 0; - } - - page_size = dict_table_zip_size(ib_table); - if (page_size == 0) { - page_size = UNIV_PAGE_SIZE; - } - - stats.records = (ha_rows) n_rows; - stats.deleted = 0; - stats.data_file_length - = ((ulonglong) stat_clustered_index_size) - * page_size; - stats.index_file_length - = ((ulonglong) stat_sum_of_other_index_sizes) - * page_size; - - /* Since fsp_get_available_space_in_free_extents() is - acquiring latches inside InnoDB, we do not call it if we - are asked by MySQL to avoid locking. Another reason to - avoid the call is that it uses quite a lot of CPU. - See Bug#38185. */ - if (flag & HA_STATUS_NO_LOCK - || !(flag & HA_STATUS_VARIABLE_EXTRA)) { - /* We do not update delete_length if no - locking is requested so the "old" value can - remain. delete_length is initialized to 0 in - the ha_statistics' constructor. Also we only - need delete_length to be set when - HA_STATUS_VARIABLE_EXTRA is set */ - } else if (UNIV_UNLIKELY - (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) { - /* Avoid accessing the tablespace if - innodb_crash_recovery is set to a high value. */ - stats.delete_length = 0; - } else { - ullint avail_space; - - avail_space = fsp_get_available_space_in_free_extents( - ib_table->space); - - if (avail_space == ULLINT_UNDEFINED) { - THD* thd; - - thd = ha_thd(); - - push_warning_printf( - thd, - Sql_condition::WARN_LEVEL_WARN, - ER_CANT_GET_STAT, - "InnoDB: Trying to get the free " - "space for table %s but its " - "tablespace has been discarded or " - "the .ibd file is missing. Setting " - "the free space to zero. " - "(errno: %M)", - ib_table->name, errno); - - stats.delete_length = 0; - } else { - stats.delete_length = avail_space * 1024; - } - } - - stats.check_time = 0; - stats.mrr_length_per_rec = ref_length + sizeof(void*); - - if (stats.records == 0) { - stats.mean_rec_length = 0; - } else { - stats.mean_rec_length = (ulong) - (stats.data_file_length / stats.records); - } - } - - if (flag & HA_STATUS_CONST) { - ulong i; - char path[FN_REFLEN]; - /* Verify the number of index in InnoDB and MySQL - matches up. If prebuilt->clust_index_was_generated - holds, InnoDB defines GEN_CLUST_INDEX internally */ - ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes) - - prebuilt->clust_index_was_generated; - if (table->s->keys < num_innodb_index) { - /* If there are too many indexes defined - inside InnoDB, ignore those that are being - created, because MySQL will only consider - the fully built indexes here. */ - - for (const dict_index_t* index - = UT_LIST_GET_FIRST(ib_table->indexes); - index != NULL; - index = UT_LIST_GET_NEXT(indexes, index)) { - - /* First, online index creation is - completed inside InnoDB, and then - MySQL attempts to upgrade the - meta-data lock so that it can rebuild - the .frm file. If we get here in that - time frame, dict_index_is_online_ddl() - would not hold and the index would - still not be included in TABLE_SHARE. */ - if (*index->name == TEMP_INDEX_PREFIX) { - num_innodb_index--; - } - } - - if (table->s->keys < num_innodb_index - && innobase_fts_check_doc_id_index( - ib_table, NULL, NULL) - == FTS_EXIST_DOC_ID_INDEX) { - num_innodb_index--; - } - } - - if (table->s->keys != num_innodb_index) { - ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS; - ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true); - } - - if (!(flag & HA_STATUS_NO_LOCK)) { - dict_table_stats_lock(ib_table, RW_S_LATCH); - } - - ut_a(ib_table->stat_initialized); - - for (i = 0; i < table->s->keys; i++) { - ulong j; - rec_per_key = 1; - /* We could get index quickly through internal - index mapping with the index translation table. - The identity of index (match up index name with - that of table->key_info[i]) is already verified in - innobase_get_index(). */ - dict_index_t* index = innobase_get_index(i); - - if (index == NULL) { - ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS; - ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true); - break; - } - - for (j = 0; j < table->key_info[i].ext_key_parts; j++) { - - if (table->key_info[i].flags & HA_FULLTEXT) { - /* The whole concept has no validity - for FTS indexes. */ - table->key_info[i].rec_per_key[j] = 1; - continue; - } - - if (j + 1 > index->n_uniq) { - sql_print_error( - "Index %s of %s has %lu columns" - " unique inside InnoDB, but " - "MySQL is asking statistics for" - " %lu columns. Have you mixed " - "up .frm files from different " - "installations? " - "See " REFMAN - "innodb-troubleshooting.html\n", - index->name, - ib_table->name, - (unsigned long) - index->n_uniq, j + 1); - break; - } - - DBUG_EXECUTE_IF("ib_ha_innodb_stat_not_initialized", - index->table->stat_initialized = FALSE;); - - if (!ib_table->stat_initialized || - (index->table != ib_table || - !index->table->stat_initialized)) { - fprintf(stderr, - "InnoDB: Warning: Index %s points to table %s" - " and ib_table %s statistics is initialized %d " - " but index table %s initialized %d " - " mysql table is %s. Have you mixed " - "up .frm files from different " - "installations? " - "See " REFMAN - "innodb-troubleshooting.html\n", - index->name, - index->table->name, - ib_table->name, - ib_table->stat_initialized, - index->table->name, - index->table->stat_initialized, - table->s->table_name.str - ); - - /* This is better than - assert on below function */ - dict_stats_init(index->table); - } - - rec_per_key = innodb_rec_per_key( - index, j, stats.records); - - /* Since MySQL seems to favor table scans - too much over index searches, we pretend - index selectivity is 2 times better than - our estimate: */ - - rec_per_key = rec_per_key / 2; - - if (rec_per_key == 0) { - rec_per_key = 1; - } - - table->key_info[i].rec_per_key[j] = - rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 : - (ulong) rec_per_key; - } - - } - - if (!(flag & HA_STATUS_NO_LOCK)) { - dict_table_stats_unlock(ib_table, RW_S_LATCH); - } - - my_snprintf(path, sizeof(path), "%s/%s%s", - mysql_data_home, - table->s->normalized_path.str, - reg_ext); - - unpack_filename(path,path); - - /* Note that we do not know the access time of the table, - nor the CHECK TABLE time, nor the UPDATE or INSERT time. */ - - if (os_file_get_status(path, &stat_info, false) == DB_SUCCESS) { - stats.create_time = (ulong) stat_info.ctime; - } - } - - if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { - - goto func_exit; - } - - if (flag & HA_STATUS_ERRKEY) { - const dict_index_t* err_index; - - ut_a(prebuilt->trx); - ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); - - err_index = trx_get_error_info(prebuilt->trx); - - if (err_index) { - errkey = innobase_get_mysql_key_number_for_index( - share, table, ib_table, err_index); - } else { - errkey = (unsigned int) ( - (prebuilt->trx->error_key_num - == ULINT_UNDEFINED) - ? ~0 - : prebuilt->trx->error_key_num); - } - } - - if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) { - stats.auto_increment_value = innobase_peek_autoinc(); - } - -func_exit: - prebuilt->trx->op_info = (char*)""; - - DBUG_RETURN(0); -} - -/*********************************************************************//** -Returns statistics information of the table to the MySQL interpreter, -in various fields of the handle object. -@return HA_ERR_* error code or 0 */ -UNIV_INTERN -int -ha_innobase::info( -/*==============*/ - uint flag) /*!< in: what information is requested */ -{ - return(this->info_low(flag, false /* not ANALYZE */)); -} - -/**********************************************************************//** -Updates index cardinalities of the table, based on random dives into -each index tree. This does NOT calculate exact statistics on the table. -@return HA_ADMIN_* error code or HA_ADMIN_OK */ -UNIV_INTERN -int -ha_innobase::analyze( -/*=================*/ - THD* thd, /*!< in: connection thread handle */ - HA_CHECK_OPT* check_opt) /*!< in: currently ignored */ -{ - int ret; - - DBUG_ASSERT(share->ib_table == prebuilt->table); - - /* Simply call this->info_low() with all the flags - and request recalculation of the statistics */ - ret = this->info_low( - HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE, - true /* this is ANALYZE */); - - DBUG_ASSERT(share->ib_table == prebuilt->table); - - if (ret != 0) { - return(HA_ADMIN_FAILED); - } - - return(HA_ADMIN_OK); -} - -/**********************************************************************//** -This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds -the table in MySQL. */ -UNIV_INTERN -int -ha_innobase::optimize( -/*==================*/ - THD* thd, /*!< in: connection thread handle */ - HA_CHECK_OPT* check_opt) /*!< in: currently ignored */ -{ - /*FTS-FIXME: Since MySQL doesn't support engine-specific commands, - we have to hijack some existing command in order to be able to test - the new admin commands added in InnoDB's FTS support. For now, we - use MySQL's OPTIMIZE command, normally mapped to ALTER TABLE in - InnoDB (so it recreates the table anew), and map it to OPTIMIZE. - - This works OK otherwise, but MySQL locks the entire table during - calls to OPTIMIZE, which is undesirable. */ - - if (srv_defragment) { - int err; - - err = defragment_table(prebuilt->table->name, NULL, false); - - if (err == 0) { - return (HA_ADMIN_OK); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - err, - "InnoDB: Cannot defragment table %s: returned error code %d\n", - prebuilt->table->name, err); - - if(err == ER_SP_ALREADY_EXISTS) { - return (HA_ADMIN_OK); - } else { - return (HA_ADMIN_TRY_ALTER); - } - } - } - - if (innodb_optimize_fulltext_only) { - if (prebuilt->table->fts && prebuilt->table->fts->cache - && !dict_table_is_discarded(prebuilt->table)) { - fts_sync_table(prebuilt->table, false, true, false); - fts_optimize_table(prebuilt->table); - } - return(HA_ADMIN_OK); - } else { - - return(HA_ADMIN_TRY_ALTER); - } -} - -/*******************************************************************//** -Tries to check that an InnoDB table is not corrupted. If corruption is -noticed, prints to stderr information about it. In case of corruption -may also assert a failure and crash the server. -@return HA_ADMIN_CORRUPT or HA_ADMIN_OK */ -UNIV_INTERN -int -ha_innobase::check( -/*===============*/ - THD* thd, /*!< in: user thread handle */ - HA_CHECK_OPT* check_opt) /*!< in: check options */ -{ - dict_index_t* index; - ulint n_rows; - ulint n_rows_in_table = ULINT_UNDEFINED; - bool is_ok = true; - ulint old_isolation_level; - ibool table_corrupted; - - DBUG_ENTER("ha_innobase::check"); - DBUG_ASSERT(thd == ha_thd()); - ut_a(prebuilt->trx); - ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); - ut_a(prebuilt->trx == thd_to_trx(thd)); - - if (prebuilt->mysql_template == NULL) { - /* Build the template; we will use a dummy template - in index scans done in checking */ - - build_template(true); - } - - if (dict_table_is_discarded(prebuilt->table)) { - - ib_senderrf( - thd, - IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_DISCARDED, - table->s->table_name.str); - - DBUG_RETURN(HA_ADMIN_CORRUPT); - - } else if (!prebuilt->table->is_readable() && - fil_space_get(prebuilt->table->space) == NULL) { - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_MISSING, - table->s->table_name.str); - - DBUG_RETURN(HA_ADMIN_CORRUPT); - } - - if (prebuilt->table->corrupted) { - char index_name[MAX_FULL_NAME_LEN + 1]; - /* If some previous operation has marked the table as - corrupted in memory, and has not propagated such to - clustered index, we will do so here */ - index = dict_table_get_first_index(prebuilt->table); - - if (!dict_index_is_corrupted(index)) { - row_mysql_lock_data_dictionary(prebuilt->trx); - dict_set_corrupted(index, prebuilt->trx, "CHECK TABLE"); - row_mysql_unlock_data_dictionary(prebuilt->trx); - } - - innobase_format_name(index_name, sizeof index_name, - index->name, TRUE); - - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_INDEX_CORRUPT, - "InnoDB: Index %s is marked as" - " corrupted", index_name); - - /* Now that the table is already marked as corrupted, - there is no need to check any index of this table */ - prebuilt->trx->op_info = ""; - - DBUG_RETURN(HA_ADMIN_CORRUPT); - } - - prebuilt->trx->op_info = "checking table"; - - old_isolation_level = prebuilt->trx->isolation_level; - - /* We must run the index record counts at an isolation level - >= READ COMMITTED, because a dirty read can see a wrong number - of records in some index; to play safe, we use always - REPEATABLE READ here */ - - prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ; - - /* Check whether the table is already marked as corrupted - before running the check table */ - table_corrupted = prebuilt->table->corrupted; - - /* Reset table->corrupted bit so that check table can proceed to - do additional check */ - prebuilt->table->corrupted = FALSE; - - for (index = dict_table_get_first_index(prebuilt->table); - index != NULL; - index = dict_table_get_next_index(index)) { - char index_name[MAX_FULL_NAME_LEN + 1]; - - /* If this is an index being created or dropped, skip */ - if (*index->name == TEMP_INDEX_PREFIX) { - continue; - } - - if (!(check_opt->flags & T_QUICK)) { - /* Enlarge the fatal lock wait timeout during - CHECK TABLE. */ - os_increment_counter_by_amount( - server_mutex, - srv_fatal_semaphore_wait_threshold, - SRV_SEMAPHORE_WAIT_EXTENSION); - - dberr_t err = btr_validate_index(index, prebuilt->trx); - - /* Restore the fatal lock wait timeout after - CHECK TABLE. */ - os_decrement_counter_by_amount( - server_mutex, - srv_fatal_semaphore_wait_threshold, - SRV_SEMAPHORE_WAIT_EXTENSION); - - if (err != DB_SUCCESS) { - is_ok = false; - - innobase_format_name( - index_name, sizeof index_name, - index->name, TRUE); - - if (err == DB_DECRYPTION_FAILED) { - push_warning_printf( - thd, - Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_TABLE, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue checking table.", - index->table->name); - } else { - push_warning_printf( - thd, - Sql_condition::WARN_LEVEL_WARN, - ER_NOT_KEYFILE, - "InnoDB: The B-tree of" - " index %s is corrupted.", - index_name); - } - - continue; - } - } - - /* Instead of invoking change_active_index(), set up - a dummy template for non-locking reads, disabling - access to the clustered index. */ - prebuilt->index = index; - - prebuilt->index_usable = row_merge_is_index_usable( - prebuilt->trx, prebuilt->index); - - DBUG_EXECUTE_IF( - "dict_set_index_corrupted", - if (!dict_index_is_clust(index)) { - prebuilt->index_usable = FALSE; - row_mysql_lock_data_dictionary(prebuilt->trx); - dict_set_corrupted(index, prebuilt->trx, "dict_set_index_corrupted"); - row_mysql_unlock_data_dictionary(prebuilt->trx); - }); - - if (UNIV_UNLIKELY(!prebuilt->index_usable)) { - innobase_format_name( - index_name, sizeof index_name, - prebuilt->index->name, TRUE); - - if (dict_index_is_corrupted(prebuilt->index)) { - push_warning_printf( - user_thd, - Sql_condition::WARN_LEVEL_WARN, - HA_ERR_INDEX_CORRUPT, - "InnoDB: Index %s is marked as" - " corrupted", - index_name); - is_ok = false; - } else { - push_warning_printf( - thd, - Sql_condition::WARN_LEVEL_WARN, - HA_ERR_TABLE_DEF_CHANGED, - "InnoDB: Insufficient history for" - " index %s", - index_name); - } - continue; - } - - prebuilt->sql_stat_start = TRUE; - prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE; - prebuilt->n_template = 0; - prebuilt->need_to_access_clustered = FALSE; - - dtuple_set_n_fields(prebuilt->search_tuple, 0); - - prebuilt->select_lock_type = LOCK_NONE; - - bool check_result - = row_check_index_for_mysql(prebuilt, index, &n_rows); - DBUG_EXECUTE_IF( - "dict_set_index_corrupted", - if (!(index->type & DICT_CLUSTERED)) { - check_result = false; - }); - - if (!check_result) { - innobase_format_name( - index_name, sizeof index_name, - index->name, TRUE); - - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_NOT_KEYFILE, - "InnoDB: The B-tree of" - " index %s is corrupted.", - index_name); - is_ok = false; - dict_set_corrupted( - index, prebuilt->trx, "CHECK TABLE-check index"); - } - - if (thd_kill_level(user_thd)) { - break; - } - -#if 0 - fprintf(stderr, "%lu entries in index %s\n", n_rows, - index->name); -#endif - - if (index == dict_table_get_first_index(prebuilt->table)) { - n_rows_in_table = n_rows; - } else if (!(index->type & DICT_FTS) - && (n_rows != n_rows_in_table)) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_NOT_KEYFILE, - "InnoDB: Index '%-.200s' contains %lu" - " entries, should be %lu.", - index->name, - (ulong) n_rows, - (ulong) n_rows_in_table); - is_ok = false; - dict_set_corrupted( - index, prebuilt->trx, - "CHECK TABLE; Wrong count"); - } - } - - if (table_corrupted) { - /* If some previous operation has marked the table as - corrupted in memory, and has not propagated such to - clustered index, we will do so here */ - index = dict_table_get_first_index(prebuilt->table); - - if (!dict_index_is_corrupted(index)) { - dict_set_corrupted( - index, prebuilt->trx, "CHECK TABLE"); - } - prebuilt->table->corrupted = TRUE; - } - - /* Restore the original isolation level */ - prebuilt->trx->isolation_level = old_isolation_level; - - /* We validate the whole adaptive hash index for all tables - at every CHECK TABLE only when QUICK flag is not present. */ - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) { - push_warning(thd, Sql_condition::WARN_LEVEL_WARN, - ER_NOT_KEYFILE, - "InnoDB: The adaptive hash index is corrupted."); - is_ok = false; - } -#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ - - prebuilt->trx->op_info = ""; - if (thd_kill_level(user_thd)) { - my_error(ER_QUERY_INTERRUPTED, MYF(0)); - } - - if (UNIV_UNLIKELY(prebuilt->table && prebuilt->table->corrupted)) { - DBUG_RETURN(HA_ADMIN_CORRUPT); - } - - DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT); -} - -/*************************************************************//** -Adds information about free space in the InnoDB tablespace to a table comment -which is printed out when a user calls SHOW TABLE STATUS. Adds also info on -foreign keys. -@return table comment + InnoDB free space + info on foreign keys */ -UNIV_INTERN -char* -ha_innobase::update_table_comment( -/*==============================*/ - const char* comment)/*!< in: table comment defined by user */ -{ - uint length = (uint) strlen(comment); - char* str=0; - long flen; - std::string fk_str; - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - if (length > 64000 - 3) { - return((char*) comment); /* string too long */ - } - - update_thd(ha_thd()); - - prebuilt->trx->op_info = (char*)"returning table comment"; - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - -#define SSTR( x ) reinterpret_cast< std::ostringstream & >( \ - ( std::ostringstream() << std::dec << x ) ).str() - - fk_str.append("InnoDB free: "); - fk_str.append(SSTR(fsp_get_available_space_in_free_extents( - prebuilt->table->space))); - - fk_str.append(dict_print_info_on_foreign_keys( - FALSE, prebuilt->trx, - prebuilt->table)); - - flen = fk_str.length(); - - if (flen < 0) { - flen = 0; - } else if (length + flen + 3 > 64000) { - flen = 64000 - 3 - length; - } - - /* allocate buffer for the full string */ - - str = (char*) my_malloc(length + flen + 3, MYF(0)); - - if (str) { - char* pos = str + length; - if (length) { - memcpy(str, comment, length); - *pos++ = ';'; - *pos++ = ' '; - } - - memcpy(pos, fk_str.c_str(), flen); - pos[flen] = 0; - } - - prebuilt->trx->op_info = (char*)""; - - return(str ? str : (char*) comment); -} - -/*******************************************************************//** -Gets the foreign key create info for a table stored in InnoDB. -@return own: character string in the form which can be inserted to the -CREATE TABLE statement, MUST be freed with -ha_innobase::free_foreign_key_create_info */ -UNIV_INTERN -char* -ha_innobase::get_foreign_key_create_info(void) -/*==========================================*/ -{ - char* fk_str = 0; - - ut_a(prebuilt != NULL); - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(ha_thd()); - - prebuilt->trx->op_info = (char*)"getting info on foreign keys"; - - /* In case MySQL calls this in the middle of a SELECT query, - release possible adaptive hash latch to avoid - deadlocks of threads */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - - /* Output the data to a temporary file */ - std::string str = dict_print_info_on_foreign_keys( - TRUE, prebuilt->trx, - prebuilt->table); - - prebuilt->trx->op_info = (char*)""; - - /* Allocate buffer for the string */ - fk_str = (char*) my_malloc(str.length() + 1, MYF(0)); - - if (fk_str) { - memcpy(fk_str, str.c_str(), str.length()); - fk_str[str.length()]='\0'; - } - - return(fk_str); -} - - -/***********************************************************************//** -Maps a InnoDB foreign key constraint to a equivalent MySQL foreign key info. -@return pointer to foreign key info */ -static -FOREIGN_KEY_INFO* -get_foreign_key_info( -/*=================*/ - THD* thd, /*!< in: user thread handle */ - dict_foreign_t* foreign) /*!< in: foreign key constraint */ -{ - FOREIGN_KEY_INFO f_key_info; - FOREIGN_KEY_INFO* pf_key_info; - uint i = 0; - ulint len; - char tmp_buff[NAME_LEN+1]; - char name_buff[NAME_LEN+1]; - const char* ptr; - LEX_STRING* referenced_key_name; - LEX_STRING* name = NULL; - - ptr = dict_remove_db_name(foreign->id); - f_key_info.foreign_id = thd_make_lex_string(thd, 0, ptr, - (uint) strlen(ptr), 1); - - /* Name format: database name, '/', table name, '\0' */ - - /* Referenced (parent) database name */ - len = dict_get_db_name_len(foreign->referenced_table_name); - ut_a(len < sizeof(tmp_buff)); - ut_memcpy(tmp_buff, foreign->referenced_table_name, len); - tmp_buff[len] = 0; - - len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff)); - f_key_info.referenced_db = thd_make_lex_string( - thd, 0, name_buff, static_cast<unsigned int>(len), 1); - - /* Referenced (parent) table name */ - ptr = dict_remove_db_name(foreign->referenced_table_name); - len = filename_to_tablename(ptr, name_buff, sizeof(name_buff)); - f_key_info.referenced_table = thd_make_lex_string( - thd, 0, name_buff, static_cast<unsigned int>(len), 1); - - /* Dependent (child) database name */ - len = dict_get_db_name_len(foreign->foreign_table_name); - ut_a(len < sizeof(tmp_buff)); - ut_memcpy(tmp_buff, foreign->foreign_table_name, len); - tmp_buff[len] = 0; - - len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff)); - f_key_info.foreign_db = thd_make_lex_string( - thd, 0, name_buff, static_cast<unsigned int>(len), 1); - - /* Dependent (child) table name */ - ptr = dict_remove_db_name(foreign->foreign_table_name); - len = filename_to_tablename(ptr, name_buff, sizeof(name_buff)); - f_key_info.foreign_table = thd_make_lex_string( - thd, 0, name_buff, static_cast<unsigned int>(len), 1); - - do { - ptr = foreign->foreign_col_names[i]; - name = thd_make_lex_string(thd, name, ptr, - (uint) strlen(ptr), 1); - f_key_info.foreign_fields.push_back(name); - ptr = foreign->referenced_col_names[i]; - name = thd_make_lex_string(thd, name, ptr, - (uint) strlen(ptr), 1); - f_key_info.referenced_fields.push_back(name); - } while (++i < foreign->n_fields); - - if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) { - len = 7; - ptr = "CASCADE"; - } else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) { - len = 8; - ptr = "SET NULL"; - } else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { - len = 9; - ptr = "NO ACTION"; - } else { - len = 8; - ptr = "RESTRICT"; - } - - f_key_info.delete_method = thd_make_lex_string( - thd, f_key_info.delete_method, ptr, - static_cast<unsigned int>(len), 1); - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { - len = 7; - ptr = "CASCADE"; - } else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { - len = 8; - ptr = "SET NULL"; - } else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { - len = 9; - ptr = "NO ACTION"; - } else { - len = 8; - ptr = "RESTRICT"; - } - - f_key_info.update_method = thd_make_lex_string( - thd, f_key_info.update_method, ptr, - static_cast<unsigned int>(len), 1); - - if (foreign->referenced_index && foreign->referenced_index->name) { - referenced_key_name = thd_make_lex_string(thd, - f_key_info.referenced_key_name, - foreign->referenced_index->name, - (uint) strlen(foreign->referenced_index->name), - 1); - } else { - referenced_key_name = NULL; - } - - f_key_info.referenced_key_name = referenced_key_name; - - pf_key_info = (FOREIGN_KEY_INFO*) thd_memdup(thd, &f_key_info, - sizeof(FOREIGN_KEY_INFO)); - - return(pf_key_info); -} - -/** Get the list of foreign keys referencing a specified table -table. -@param thd The thread handle -@param path Path to the table -@param f_key_list[out] The list of foreign keys */ -static -void -fill_foreign_key_list(THD* thd, - const dict_table_t* table, - List<FOREIGN_KEY_INFO>* f_key_list) -{ - ut_ad(mutex_own(&dict_sys->mutex)); - - for (dict_foreign_set::const_iterator it - = table->referenced_set.begin(); - it != table->referenced_set.end(); ++it) { - - dict_foreign_t* foreign = *it; - - FOREIGN_KEY_INFO* pf_key_info - = get_foreign_key_info(thd, foreign); - if (pf_key_info) { - f_key_list->push_back(pf_key_info); - } - } -} - -/** Get the list of foreign keys referencing a specified table -table. -@param thd The thread handle -@param path Path to the table -@param f_key_list[out] The list of foreign keys - -@return error code or zero for success */ -static -int -innobase_get_parent_fk_list( - THD* thd, - const char* path, - List<FOREIGN_KEY_INFO>* f_key_list) -{ - ut_a(strlen(path) <= FN_REFLEN); - char norm_name[FN_REFLEN + 1]; - normalize_table_name(norm_name, path); - - trx_t* parent_trx = check_trx_exists(thd); - parent_trx->op_info = "getting list of referencing foreign keys"; - trx_search_latch_release_if_reserved(parent_trx); - - mutex_enter(&dict_sys->mutex); - - dict_table_t* table - = dict_table_open_on_name(norm_name, TRUE, FALSE, - static_cast<dict_err_ignore_t>( - DICT_ERR_IGNORE_INDEX_ROOT - | DICT_ERR_IGNORE_CORRUPT)); - if (!table) { - mutex_exit(&dict_sys->mutex); - return(HA_ERR_NO_SUCH_TABLE); - } - - fill_foreign_key_list(thd, table, f_key_list); - - dict_table_close(table, TRUE, FALSE); - - mutex_exit(&dict_sys->mutex); - parent_trx->op_info = ""; - return(0); -} - -/*******************************************************************//** -Gets the list of foreign keys in this table. -@return always 0, that is, always succeeds */ -UNIV_INTERN -int -ha_innobase::get_foreign_key_list( -/*==============================*/ - THD* thd, /*!< in: user thread handle */ - List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */ -{ - FOREIGN_KEY_INFO* pf_key_info; - dict_foreign_t* foreign; - - ut_a(prebuilt != NULL); - update_thd(ha_thd()); - - prebuilt->trx->op_info = "getting list of foreign keys"; - - trx_search_latch_release_if_reserved(prebuilt->trx); - - mutex_enter(&(dict_sys->mutex)); - - for (dict_foreign_set::iterator it - = prebuilt->table->foreign_set.begin(); - it != prebuilt->table->foreign_set.end(); - ++it) { - - foreign = *it; - - pf_key_info = get_foreign_key_info(thd, foreign); - if (pf_key_info) { - f_key_list->push_back(pf_key_info); - } - } - - mutex_exit(&(dict_sys->mutex)); - - prebuilt->trx->op_info = ""; - - return(0); -} - -/*******************************************************************//** -Gets the set of foreign keys where this table is the referenced table. -@return always 0, that is, always succeeds */ -UNIV_INTERN -int -ha_innobase::get_parent_foreign_key_list( -/*=====================================*/ - THD* thd, /*!< in: user thread handle */ - List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */ -{ - ut_a(prebuilt != NULL); - update_thd(ha_thd()); - - prebuilt->trx->op_info = "getting list of referencing foreign keys"; - - trx_search_latch_release_if_reserved(prebuilt->trx); - - mutex_enter(&(dict_sys->mutex)); - fill_foreign_key_list(thd, prebuilt->table, f_key_list); - mutex_exit(&(dict_sys->mutex)); - - prebuilt->trx->op_info = ""; - - return(0); -} - -/*****************************************************************//** -Checks if ALTER TABLE may change the storage engine of the table. -Changing storage engines is not allowed for tables for which there -are foreign key constraints (parent or child tables). -@return TRUE if can switch engines */ -UNIV_INTERN -bool -ha_innobase::can_switch_engines(void) -/*=================================*/ -{ - bool can_switch; - - DBUG_ENTER("ha_innobase::can_switch_engines"); - update_thd(); - - prebuilt->trx->op_info = - "determining if there are foreign key constraints"; - row_mysql_freeze_data_dictionary(prebuilt->trx); - - can_switch = prebuilt->table->referenced_set.empty() - && prebuilt->table->foreign_set.empty(); - - row_mysql_unfreeze_data_dictionary(prebuilt->trx); - prebuilt->trx->op_info = ""; - - DBUG_RETURN(can_switch); -} - -/*******************************************************************//** -Checks if a table is referenced by a foreign key. The MySQL manual states that -a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a -delete is then allowed internally to resolve a duplicate key conflict in -REPLACE, not an update. -@return > 0 if referenced by a FOREIGN KEY */ -UNIV_INTERN -uint -ha_innobase::referenced_by_foreign_key(void) -/*========================================*/ -{ - if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) { - - return(1); - } - - return(0); -} - -/*******************************************************************//** -Frees the foreign key create info for a table stored in InnoDB, if it is -non-NULL. */ -UNIV_INTERN -void -ha_innobase::free_foreign_key_create_info( -/*======================================*/ - char* str) /*!< in, own: create info string to free */ -{ - if (str) { - my_free(str); - } -} - -/*******************************************************************//** -Tells something additional to the handler about how to do things. -@return 0 or error number */ -UNIV_INTERN -int -ha_innobase::extra( -/*===============*/ - enum ha_extra_function operation) - /*!< in: HA_EXTRA_FLUSH or some other flag */ -{ - check_trx_exists(ha_thd()); - - /* Warning: since it is not sure that MySQL calls external_lock - before calling this function, the trx field in prebuilt can be - obsolete! */ - - switch (operation) { - case HA_EXTRA_FLUSH: - if (prebuilt->blob_heap) { - row_mysql_prebuilt_free_blob_heap(prebuilt); - } - - break; - case HA_EXTRA_RESET_STATE: - reset_template(); - thd_to_trx(ha_thd())->duplicates = 0; - break; - case HA_EXTRA_NO_KEYREAD: - prebuilt->read_just_key = 0; - break; - case HA_EXTRA_KEYREAD: - prebuilt->read_just_key = 1; - break; - case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: - prebuilt->keep_other_fields_on_keyread = 1; - break; - - /* IMPORTANT: prebuilt->trx can be obsolete in - this method, because it is not sure that MySQL - calls external_lock before this method with the - parameters below. We must not invoke update_thd() - either, because the calling threads may change. - CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */ - case HA_EXTRA_INSERT_WITH_UPDATE: - thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE; - break; - case HA_EXTRA_NO_IGNORE_DUP_KEY: - thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE; - break; - case HA_EXTRA_WRITE_CAN_REPLACE: - thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE; - break; - case HA_EXTRA_WRITE_CANNOT_REPLACE: - thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE; - break; - default:/* Do nothing */ - ; - } - - return(0); -} - -/******************************************************************//** -*/ -UNIV_INTERN -int -ha_innobase::reset() -/*================*/ -{ - if (prebuilt->blob_heap) { - row_mysql_prebuilt_free_blob_heap(prebuilt); - } - - reset_template(); - ds_mrr.dsmrr_close(); - - /* TODO: This should really be reset in reset_template() but for now - it's safer to do it explicitly here. */ - - /* This is a statement level counter. */ - prebuilt->autoinc_last_value = 0; - - return(0); -} - -/******************************************************************//** -MySQL calls this function at the start of each SQL statement inside LOCK -TABLES. Inside LOCK TABLES the ::external_lock method does not work to -mark SQL statement borders. Note also a special case: if a temporary table -is created inside LOCK TABLES, MySQL has not called external_lock() at all -on that table. -MySQL-5.0 also calls this before each statement in an execution of a stored -procedure. To make the execution more deterministic for binlogging, MySQL-5.0 -locks all tables involved in a stored procedure with full explicit table -locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the -procedure. -@return 0 or error code */ -UNIV_INTERN -int -ha_innobase::start_stmt( -/*====================*/ - THD* thd, /*!< in: handle to the user thread */ - thr_lock_type lock_type) -{ - trx_t* trx; - DBUG_ENTER("ha_innobase::start_stmt"); - - update_thd(thd); - - trx = prebuilt->trx; - - /* Here we release the search latch and the InnoDB thread FIFO ticket - if they were reserved. They should have been released already at the - end of the previous statement, but because inside LOCK TABLES the - lock count method does not work to mark the end of a SELECT statement, - that may not be the case. We MUST release the search latch before an - INSERT, for example. */ - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - /* Reset the AUTOINC statement level counter for multi-row INSERTs. */ - trx->n_autoinc_rows = 0; - - prebuilt->sql_stat_start = TRUE; - prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(); - - if (dict_table_is_temporary(prebuilt->table) - && prebuilt->mysql_has_locked - && prebuilt->select_lock_type == LOCK_NONE) { - dberr_t error; - - switch (thd_sql_command(thd)) { - case SQLCOM_INSERT: - case SQLCOM_UPDATE: - case SQLCOM_DELETE: - init_table_handle_for_HANDLER(); - prebuilt->select_lock_type = LOCK_X; - prebuilt->stored_select_lock_type = LOCK_X; - error = row_lock_table_for_mysql(prebuilt, NULL, 1); - - if (error != DB_SUCCESS) { - int st = convert_error_code_to_mysql( - error, 0, thd); - DBUG_RETURN(st); - } - break; - } - } - - if (!prebuilt->mysql_has_locked) { - /* This handle is for a temporary table created inside - this same LOCK TABLES; since MySQL does NOT call external_lock - in this case, we must use x-row locks inside InnoDB to be - prepared for an update of a row */ - - prebuilt->select_lock_type = LOCK_X; - - } else if (trx->isolation_level != TRX_ISO_SERIALIZABLE - && thd_sql_command(thd) == SQLCOM_SELECT - && lock_type == TL_READ) { - - /* For other than temporary tables, we obtain - no lock for consistent read (plain SELECT). */ - - prebuilt->select_lock_type = LOCK_NONE; - } else { - /* Not a consistent read: restore the - select_lock_type value. The value of - stored_select_lock_type was decided in: - 1) ::store_lock(), - 2) ::external_lock(), - 3) ::init_table_handle_for_HANDLER(), and - 4) ::transactional_table_lock(). */ - - ut_a(prebuilt->stored_select_lock_type != LOCK_NONE_UNSET); - prebuilt->select_lock_type = prebuilt->stored_select_lock_type; - } - - *trx->detailed_error = 0; - - innobase_register_trx(ht, thd, trx); - - if (!trx_is_started(trx)) { - ++trx->will_lock; - } - - DBUG_RETURN(0); -} - -/******************************************************************//** -Maps a MySQL trx isolation level code to the InnoDB isolation level code -@return InnoDB isolation level */ -static inline -ulint -innobase_map_isolation_level( -/*=========================*/ - enum_tx_isolation iso) /*!< in: MySQL isolation level code */ -{ - switch (iso) { - case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ); - case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED); - case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE); - case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED); - } - - ut_error; - - return(0); -} - -/******************************************************************//** -As MySQL will execute an external lock for every new table it uses when it -starts to process an SQL statement (an exception is when MySQL calls -start_stmt for the handle) we can use this function to store the pointer to -the THD in the handle. We will also use this function to communicate -to InnoDB that a new SQL statement has started and that we must store a -savepoint to our transaction handle, so that we are able to roll back -the SQL statement in case of an error. -@return 0 */ -UNIV_INTERN -int -ha_innobase::external_lock( -/*=======================*/ - THD* thd, /*!< in: handle to the user thread */ - int lock_type) /*!< in: lock type */ -{ - trx_t* trx; - - DBUG_ENTER("ha_innobase::external_lock"); - DBUG_PRINT("enter",("lock_type: %d", lock_type)); - - update_thd(thd); - - /* Statement based binlogging does not work in isolation level - READ UNCOMMITTED and READ COMMITTED since the necessary - locks cannot be taken. In this case, we print an - informative error message and return with an error. - Note: decide_logging_format would give the same error message, - except it cannot give the extra details. */ - - if (lock_type == F_WRLCK - && !(table_flags() & HA_BINLOG_STMT_CAPABLE) - && thd_binlog_format(thd) == BINLOG_FORMAT_STMT - && thd_binlog_filter_ok(thd) - && thd_sqlcom_can_generate_row_events(thd)) { - bool skip = 0; - /* used by test case */ - DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;); - if (!skip) { -#ifdef WITH_WSREP - if (!wsrep_on(thd) || wsrep_thd_exec_mode(thd) == LOCAL_STATE) - { -#endif /* WITH_WSREP */ - my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0), - " InnoDB is limited to row-logging when " - "transaction isolation level is " - "READ COMMITTED or READ UNCOMMITTED."); - DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE); -#ifdef WITH_WSREP - } -#endif /* WITH_WSREP */ - } - } - - /* Check for UPDATEs in read-only mode. */ - if (srv_read_only_mode - && (thd_sql_command(thd) == SQLCOM_UPDATE - || thd_sql_command(thd) == SQLCOM_INSERT - || thd_sql_command(thd) == SQLCOM_REPLACE - || thd_sql_command(thd) == SQLCOM_DROP_TABLE - || thd_sql_command(thd) == SQLCOM_ALTER_TABLE - || thd_sql_command(thd) == SQLCOM_OPTIMIZE - || (thd_sql_command(thd) == SQLCOM_CREATE_TABLE - && lock_type == F_WRLCK) - || thd_sql_command(thd) == SQLCOM_CREATE_INDEX - || thd_sql_command(thd) == SQLCOM_DROP_INDEX - || thd_sql_command(thd) == SQLCOM_DELETE)) { - - if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE) - { - ib_senderrf(thd, IB_LOG_LEVEL_WARN, - ER_READ_ONLY_MODE); - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } else { - ib_senderrf(thd, IB_LOG_LEVEL_WARN, - ER_READ_ONLY_MODE); - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } - - } - - trx = prebuilt->trx; - - prebuilt->sql_stat_start = TRUE; - prebuilt->hint_need_to_fetch_extra_cols = 0; - - reset_template(); - - switch (prebuilt->table->quiesce) { - case QUIESCE_START: - /* Check for FLUSH TABLE t WITH READ LOCK; */ - if (!srv_read_only_mode - && thd_sql_command(thd) == SQLCOM_FLUSH - && lock_type == F_RDLCK) { - - row_quiesce_table_start(prebuilt->table, trx); - - /* Use the transaction instance to track UNLOCK - TABLES. It can be done via START TRANSACTION; too - implicitly. */ - - ++trx->flush_tables; - } - break; - - case QUIESCE_COMPLETE: - /* Check for UNLOCK TABLES; implicit or explicit - or trx interruption. */ - if (trx->flush_tables > 0 - && (lock_type == F_UNLCK || trx_is_interrupted(trx))) { - - row_quiesce_table_complete(prebuilt->table, trx); - - ut_a(trx->flush_tables > 0); - --trx->flush_tables; - } - - break; - - case QUIESCE_NONE: - break; - } - - if (lock_type == F_WRLCK) { - - /* If this is a SELECT, then it is in UPDATE TABLE ... - or SELECT ... FOR UPDATE */ - prebuilt->select_lock_type = LOCK_X; - prebuilt->stored_select_lock_type = LOCK_X; - } - - if (lock_type != F_UNLCK) { - /* MySQL is setting a new table lock */ - - *trx->detailed_error = 0; - - innobase_register_trx(ht, thd, trx); - - if (trx->isolation_level == TRX_ISO_SERIALIZABLE - && prebuilt->select_lock_type == LOCK_NONE - && thd_test_options( - thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - /* To get serializable execution, we let InnoDB - conceptually add 'LOCK IN SHARE MODE' to all SELECTs - which otherwise would have been consistent reads. An - exception is consistent reads in the AUTOCOMMIT=1 mode: - we know that they are read-only transactions, and they - can be serialized also if performed as consistent - reads. */ - - prebuilt->select_lock_type = LOCK_S; - prebuilt->stored_select_lock_type = LOCK_S; - } - - /* Starting from 4.1.9, no InnoDB table lock is taken in LOCK - TABLES if AUTOCOMMIT=1. It does not make much sense to acquire - an InnoDB table lock if it is released immediately at the end - of LOCK TABLES, and InnoDB's table locks in that case cause - VERY easily deadlocks. - - We do not set InnoDB table locks if user has not explicitly - requested a table lock. Note that thd_in_lock_tables(thd) - can hold in some cases, e.g., at the start of a stored - procedure call (SQLCOM_CALL). */ - - if (prebuilt->select_lock_type != LOCK_NONE) { - - if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES - && THDVAR(thd, table_locks) - && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT) - && thd_in_lock_tables(thd)) { - - dberr_t error = row_lock_table_for_mysql( - prebuilt, NULL, 0); - - if (error != DB_SUCCESS) { - DBUG_RETURN( - convert_error_code_to_mysql( - error, 0, thd)); - } - } - - trx->mysql_n_tables_locked++; - } - - trx->n_mysql_tables_in_use++; - prebuilt->mysql_has_locked = TRUE; - - if (!trx_is_started(trx) - && (prebuilt->select_lock_type != LOCK_NONE - || prebuilt->stored_select_lock_type != LOCK_NONE)) { - - ++trx->will_lock; - } - - DBUG_RETURN(0); - } - - /* MySQL is releasing a table lock */ - - trx->n_mysql_tables_in_use--; - prebuilt->mysql_has_locked = FALSE; - - /* Release a possible FIFO ticket and search latch. Since we - may reserve the trx_sys->mutex, we have to release the search - system latch first to obey the latching order. */ - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - /* If the MySQL lock count drops to zero we know that the current SQL - statement has ended */ - - if (trx->n_mysql_tables_in_use == 0) { -#ifdef EXTENDED_SLOWLOG - if (UNIV_UNLIKELY(trx->take_stats)) { - increment_thd_innodb_stats(thd, - (unsigned long long) trx->id, - trx->io_reads, - trx->io_read, - trx->io_reads_wait_timer, - trx->lock_que_wait_timer, - trx->innodb_que_wait_timer, - trx->distinct_page_access); - - trx->io_reads = 0; - trx->io_read = 0; - trx->io_reads_wait_timer = 0; - trx->lock_que_wait_timer = 0; - trx->innodb_que_wait_timer = 0; - trx->distinct_page_access = 0; - if (trx->distinct_page_access_hash) - memset(trx->distinct_page_access_hash, 0, - DPAH_SIZE); - } -#endif - - trx->mysql_n_tables_locked = 0; - prebuilt->used_in_HANDLER = FALSE; - - if (!thd_test_options( - thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - if (trx_is_started(trx)) { - innobase_commit(ht, thd, TRUE); - } - - } else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED - && trx->global_read_view) { - - /* At low transaction isolation levels we let - each consistent read set its own snapshot */ - - read_view_close_for_mysql(trx); - } - } - - if (!trx_is_started(trx) - && (prebuilt->select_lock_type != LOCK_NONE - || prebuilt->stored_select_lock_type != LOCK_NONE)) { - - ++trx->will_lock; - } - - DBUG_RETURN(0); -} - -/******************************************************************//** -With this function MySQL request a transactional lock to a table when -user issued query LOCK TABLES..WHERE ENGINE = InnoDB. -@return error code */ -UNIV_INTERN -int -ha_innobase::transactional_table_lock( -/*==================================*/ - THD* thd, /*!< in: handle to the user thread */ - int lock_type) /*!< in: lock type */ -{ - trx_t* trx; - - DBUG_ENTER("ha_innobase::transactional_table_lock"); - DBUG_PRINT("enter",("lock_type: %d", lock_type)); - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(thd); - - DBUG_ASSERT(share->ib_table == prebuilt->table); - - if (!thd_tablespace_op(thd)) { - - if (dict_table_is_discarded(prebuilt->table)) { - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_DISCARDED, - table->s->table_name.str); - - } else if (!prebuilt->table->is_readable()) { - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, - ER_TABLESPACE_MISSING, - table->s->table_name.str); - } - - DBUG_RETURN(HA_ERR_CRASHED); - } - - trx = prebuilt->trx; - - prebuilt->sql_stat_start = TRUE; - prebuilt->hint_need_to_fetch_extra_cols = 0; - - reset_template(); - - if (lock_type == F_WRLCK) { - prebuilt->select_lock_type = LOCK_X; - prebuilt->stored_select_lock_type = LOCK_X; - } else if (lock_type == F_RDLCK) { - prebuilt->select_lock_type = LOCK_S; - prebuilt->stored_select_lock_type = LOCK_S; - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "MySQL is trying to set transactional table lock " - "with corrupted lock type to table %s, lock type " - "%d does not exist.", - table->s->table_name.str, lock_type); - - DBUG_RETURN(HA_ERR_CRASHED); - } - - /* MySQL is setting a new transactional table lock */ - - innobase_register_trx(ht, thd, trx); - - if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) { - dberr_t error; - - error = row_lock_table_for_mysql(prebuilt, NULL, 0); - - if (error != DB_SUCCESS) { - DBUG_RETURN( - convert_error_code_to_mysql( - error, prebuilt->table->flags, thd)); - } - - if (thd_test_options( - thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - /* Store the current undo_no of the transaction - so that we know where to roll back if we have - to roll back the next SQL statement */ - - trx_mark_sql_stat_end(trx); - } - } - - DBUG_RETURN(0); -} - -/************************************************************************//** -Here we export InnoDB status variables to MySQL. */ -static -void -innodb_export_status() -/*==================*/ -{ - if (innodb_inited) { - srv_export_innodb_status(); - } -} - -/************************************************************************//** -Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the -InnoDB Monitor to the client. -@return 0 on success */ -static -int -innodb_show_status( -/*===============*/ - handlerton* hton, /*!< in: the innodb handlerton */ - THD* thd, /*!< in: the MySQL query thread of the caller */ - stat_print_fn* stat_print) -{ - trx_t* trx; - static const char truncated_msg[] = "... truncated...\n"; - const long MAX_STATUS_SIZE = 1048576; - ulint trx_list_start = ULINT_UNDEFINED; - ulint trx_list_end = ULINT_UNDEFINED; - bool ret_val; - - DBUG_ENTER("innodb_show_status"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - /* We don't create the temp files or associated - mutexes in read-only-mode */ - - if (srv_read_only_mode) { - DBUG_RETURN(0); - } - - trx = check_trx_exists(thd); - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - /* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE - bytes of text. */ - - char* str; - ssize_t flen, usable_len; - - mutex_enter(&srv_monitor_file_mutex); - rewind(srv_monitor_file); - - srv_printf_innodb_monitor(srv_monitor_file, FALSE, - &trx_list_start, &trx_list_end); - - os_file_set_eof(srv_monitor_file); - - if ((flen = ftell(srv_monitor_file)) < 0) { - flen = 0; - } - - if (flen > MAX_STATUS_SIZE) { - usable_len = MAX_STATUS_SIZE; - srv_truncated_status_writes++; - } else { - usable_len = flen; - } - - /* allocate buffer for the string, and - read the contents of the temporary file */ - - if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) { - mutex_exit(&srv_monitor_file_mutex); - DBUG_RETURN(1); - } - - rewind(srv_monitor_file); - - if (flen < MAX_STATUS_SIZE) { - /* Display the entire output. */ - flen = fread(str, 1, flen, srv_monitor_file); - } else if (trx_list_end < (ulint) flen - && trx_list_start < trx_list_end - && trx_list_start + (flen - trx_list_end) - < MAX_STATUS_SIZE - sizeof truncated_msg - 1) { - - /* Omit the beginning of the list of active transactions. */ - ssize_t len = fread(str, 1, trx_list_start, srv_monitor_file); - - memcpy(str + len, truncated_msg, sizeof truncated_msg - 1); - len += sizeof truncated_msg - 1; - usable_len = (MAX_STATUS_SIZE - 1) - len; - fseek(srv_monitor_file, - static_cast<long>(flen - usable_len), SEEK_SET); - len += fread(str + len, 1, usable_len, srv_monitor_file); - flen = len; - } else { - /* Omit the end of the output. */ - flen = fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file); - } - - mutex_exit(&srv_monitor_file_mutex); - - ret_val= stat_print( - thd, innobase_hton_name, - static_cast<uint>(strlen(innobase_hton_name)), - STRING_WITH_LEN(""), str, static_cast<uint>(flen)); - - my_free(str); - - DBUG_RETURN(ret_val); -} - -/************************************************************************//** -Implements the SHOW MUTEX STATUS command. -@return 0 on success. */ -static -int -innodb_mutex_show_status( -/*=====================*/ - handlerton* hton, /*!< in: the innodb handlerton */ - THD* thd, /*!< in: the MySQL query thread of the - caller */ - stat_print_fn* stat_print) /*!< in: function for printing - statistics */ -{ - char buf1[IO_SIZE]; - char buf2[IO_SIZE]; - ib_mutex_t* mutex; - rw_lock_t* lock; - ulint block_mutex_oswait_count = 0; - ulint block_lock_oswait_count = 0; - ib_mutex_t* block_mutex = NULL; - rw_lock_t* block_lock = NULL; -#ifdef UNIV_DEBUG - ulint rw_lock_count= 0; - ulint rw_lock_count_spin_loop= 0; - ulint rw_lock_count_spin_rounds= 0; - ulint rw_lock_count_os_wait= 0; - ulint rw_lock_count_os_yield= 0; - ulonglong rw_lock_wait_time= 0; -#endif /* UNIV_DEBUG */ - uint buf1len; - uint buf2len; - uint hton_name_len; - - hton_name_len = (uint) strlen(innobase_hton_name); - - DBUG_ENTER("innodb_mutex_show_status"); - DBUG_ASSERT(hton == innodb_hton_ptr); - - mutex_enter(&mutex_list_mutex); - - for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL; - mutex = UT_LIST_GET_NEXT(list, mutex)) { - if (mutex->count_os_wait == 0) { - continue; - } - - if (buf_pool_is_block_mutex(mutex)) { - block_mutex = mutex; - block_mutex_oswait_count += mutex->count_os_wait; - continue; - } - - buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s", - mutex->cmutex_name); - buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu", - (ulong) mutex->count_os_wait); - - if (stat_print(thd, innobase_hton_name, - hton_name_len, buf1, buf1len, - buf2, buf2len)) { - mutex_exit(&mutex_list_mutex); - DBUG_RETURN(1); - } - } - - if (block_mutex) { - buf1len = (uint) my_snprintf(buf1, sizeof buf1, - "combined %s", - block_mutex->cmutex_name); - buf2len = (uint) my_snprintf(buf2, sizeof buf2, - "os_waits=%lu", - (ulong) block_mutex_oswait_count); - - if (stat_print(thd, innobase_hton_name, - hton_name_len, buf1, buf1len, - buf2, buf2len)) { - mutex_exit(&mutex_list_mutex); - DBUG_RETURN(1); - } - } - - mutex_exit(&mutex_list_mutex); - - mutex_enter(&rw_lock_list_mutex); - - for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL; - lock = UT_LIST_GET_NEXT(list, lock)) { - if (lock->count_os_wait == 0) { - continue; - } - - if (buf_pool_is_block_lock(lock)) { - block_lock = lock; - block_lock_oswait_count += lock->count_os_wait; - continue; - } - - buf1len = (uint) my_snprintf( - buf1, sizeof buf1, "%s", - lock->lock_name); - buf2len = (uint) my_snprintf( - buf2, sizeof buf2, "os_waits=%lu", - static_cast<ulong>(lock->count_os_wait)); - - if (stat_print(thd, innobase_hton_name, - hton_name_len, buf1, buf1len, - buf2, buf2len)) { - mutex_exit(&rw_lock_list_mutex); - DBUG_RETURN(1); - } - } - - if (block_lock) { - buf1len = (uint) my_snprintf(buf1, sizeof buf1, - "combined %s", - block_lock->lock_name); - buf2len = (uint) my_snprintf(buf2, sizeof buf2, - "os_waits=%lu", - (ulong) block_lock_oswait_count); - - if (stat_print(thd, innobase_hton_name, - hton_name_len, buf1, buf1len, - buf2, buf2len)) { - mutex_exit(&rw_lock_list_mutex); - DBUG_RETURN(1); - } - } - - mutex_exit(&rw_lock_list_mutex); - -#ifdef UNIV_DEBUG - buf2len = static_cast<uint>(my_snprintf(buf2, sizeof buf2, - "count=%lu, spin_waits=%lu, spin_rounds=%lu, " - "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", - (ulong) rw_lock_count, - (ulong) rw_lock_count_spin_loop, - (ulong) rw_lock_count_spin_rounds, - (ulong) rw_lock_count_os_wait, - (ulong) rw_lock_count_os_yield, - (ulong) (rw_lock_wait_time / 1000))); - - if (stat_print(thd, innobase_hton_name, hton_name_len, - STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) { - DBUG_RETURN(1); - } -#endif /* UNIV_DEBUG */ - - /* Success */ - DBUG_RETURN(0); -} - -/************************************************************************//** -Return 0 on success and non-zero on failure. Note: the bool return type -seems to be abused here, should be an int. */ -static -bool -innobase_show_status( -/*=================*/ - handlerton* hton, /*!< in: the innodb handlerton */ - THD* thd, /*!< in: the MySQL query thread - of the caller */ - stat_print_fn* stat_print, - enum ha_stat_type stat_type) -{ - DBUG_ASSERT(hton == innodb_hton_ptr); - - switch (stat_type) { - case HA_ENGINE_STATUS: - /* Non-zero return value means there was an error. */ - return(innodb_show_status(hton, thd, stat_print) != 0); - - case HA_ENGINE_MUTEX: - /* Non-zero return value means there was an error. */ - return(innodb_mutex_show_status(hton, thd, stat_print) != 0); - - case HA_ENGINE_LOGS: - /* Not handled */ - break; - } - - /* Success */ - return(false); -} - -/************************************************************************//** -Handling the shared INNOBASE_SHARE structure that is needed to provide table -locking. Register the table name if it doesn't exist in the hash table. */ -static -INNOBASE_SHARE* -get_share( -/*======*/ - const char* table_name) -{ - INNOBASE_SHARE* share; - - mysql_mutex_lock(&innobase_share_mutex); - - ulint fold = ut_fold_string(table_name); - - HASH_SEARCH(table_name_hash, innobase_open_tables, fold, - INNOBASE_SHARE*, share, - ut_ad(share->use_count > 0), - !strcmp(share->table_name, table_name)); - - if (!share) { - - uint length = (uint) strlen(table_name); - - /* TODO: invoke HASH_MIGRATE if innobase_open_tables - grows too big */ - - share = (INNOBASE_SHARE*) my_malloc(sizeof(*share)+length+1, - MYF(MY_FAE | MY_ZEROFILL)); - - share->table_name = (char*) memcpy(share + 1, - table_name, length + 1); - - HASH_INSERT(INNOBASE_SHARE, table_name_hash, - innobase_open_tables, fold, share); - - thr_lock_init(&share->lock); - - /* Index translation table initialization */ - share->idx_trans_tbl.index_mapping = NULL; - share->idx_trans_tbl.index_count = 0; - share->idx_trans_tbl.array_size = 0; - } - - share->use_count++; - mysql_mutex_unlock(&innobase_share_mutex); - - return(share); -} - -/************************************************************************//** -Free the shared object that was registered with get_share(). */ -static -void -free_share( -/*=======*/ - INNOBASE_SHARE* share) /*!< in/own: table share to free */ -{ - mysql_mutex_lock(&innobase_share_mutex); - -#ifdef UNIV_DEBUG - INNOBASE_SHARE* share2; - ulint fold = ut_fold_string(share->table_name); - - HASH_SEARCH(table_name_hash, innobase_open_tables, fold, - INNOBASE_SHARE*, share2, - ut_ad(share->use_count > 0), - !strcmp(share->table_name, share2->table_name)); - - ut_a(share2 == share); -#endif /* UNIV_DEBUG */ - - if (!--share->use_count) { - ulint fold = ut_fold_string(share->table_name); - - HASH_DELETE(INNOBASE_SHARE, table_name_hash, - innobase_open_tables, fold, share); - thr_lock_delete(&share->lock); - - /* Free any memory from index translation table */ - my_free(share->idx_trans_tbl.index_mapping); - - my_free(share); - - /* TODO: invoke HASH_MIGRATE if innobase_open_tables - shrinks too much */ - } - - mysql_mutex_unlock(&innobase_share_mutex); -} - -/*****************************************************************//** -Converts a MySQL table lock stored in the 'lock' field of the handle to -a proper type before storing pointer to the lock into an array of pointers. -MySQL also calls this if it wants to reset some table locks to a not-locked -state during the processing of an SQL query. An example is that during a -SELECT the read lock is released early on the 'const' tables where we only -fetch one row. MySQL does not call this when it releases all locks at the -end of an SQL statement. -@return pointer to the next element in the 'to' array */ -UNIV_INTERN -THR_LOCK_DATA** -ha_innobase::store_lock( -/*====================*/ - THD* thd, /*!< in: user thread handle */ - THR_LOCK_DATA** to, /*!< in: pointer to an array - of pointers to lock structs; - pointer to the 'lock' field - of current handle is stored - next to this array */ - enum thr_lock_type lock_type) /*!< in: lock type to store in - 'lock'; this may also be - TL_IGNORE */ -{ - trx_t* trx; - - /* Note that trx in this function is NOT necessarily prebuilt->trx - because we call update_thd() later, in ::external_lock()! Failure to - understand this caused a serious memory corruption bug in 5.1.11. */ - - trx = check_trx_exists(thd); - - /* NOTE: MySQL can call this function with lock 'type' TL_IGNORE! - Be careful to ignore TL_IGNORE if we are going to do something with - only 'real' locks! */ - - /* If no MySQL table is in use, we need to set the isolation level - of the transaction. */ - - if (lock_type != TL_IGNORE - && trx->n_mysql_tables_in_use == 0) { - trx->isolation_level = innobase_map_isolation_level( - (enum_tx_isolation) thd_tx_isolation(thd)); - - if (trx->isolation_level <= TRX_ISO_READ_COMMITTED - && trx->global_read_view) { - - /* At low transaction isolation levels we let - each consistent read set its own snapshot */ - - read_view_close_for_mysql(trx); - } - } - - DBUG_ASSERT(EQ_CURRENT_THD(thd)); - const bool in_lock_tables = thd_in_lock_tables(thd); - const uint sql_command = thd_sql_command(thd); - - if (srv_read_only_mode - && (sql_command == SQLCOM_UPDATE - || sql_command == SQLCOM_INSERT - || sql_command == SQLCOM_REPLACE - || sql_command == SQLCOM_DROP_TABLE - || sql_command == SQLCOM_ALTER_TABLE - || sql_command == SQLCOM_OPTIMIZE - || (sql_command == SQLCOM_CREATE_TABLE - && (lock_type >= TL_WRITE_CONCURRENT_INSERT - && lock_type <= TL_WRITE)) - || sql_command == SQLCOM_CREATE_INDEX - || sql_command == SQLCOM_DROP_INDEX - || sql_command == SQLCOM_DELETE)) { - - ib_senderrf(trx->mysql_thd, - IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); - - } else if (sql_command == SQLCOM_FLUSH - && lock_type == TL_READ_NO_INSERT) { - - /* Check for FLUSH TABLES ... WITH READ LOCK */ - - /* Note: This call can fail, but there is no way to return - the error to the caller. We simply ignore it for now here - and push the error code to the caller where the error is - detected in the function. */ - - dberr_t err = row_quiesce_set_state( - prebuilt->table, QUIESCE_START, trx); - - ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED); - - if (trx->isolation_level == TRX_ISO_SERIALIZABLE) { - prebuilt->select_lock_type = LOCK_S; - prebuilt->stored_select_lock_type = LOCK_S; - } else { - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE; - } - - /* Check for DROP TABLE */ - } else if (sql_command == SQLCOM_DROP_TABLE) { - - /* MySQL calls this function in DROP TABLE though this table - handle may belong to another thd that is running a query. Let - us in that case skip any changes to the prebuilt struct. */ - - /* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */ - } else if ((lock_type == TL_READ && in_lock_tables) - || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables) - || lock_type == TL_READ_WITH_SHARED_LOCKS - || lock_type == TL_READ_NO_INSERT - || (lock_type != TL_IGNORE - && sql_command != SQLCOM_SELECT)) { - - /* The OR cases above are in this order: - 1) MySQL is doing LOCK TABLES ... READ LOCAL, or we - are processing a stored procedure or function, or - 2) (we do not know when TL_READ_HIGH_PRIORITY is used), or - 3) this is a SELECT ... IN SHARE MODE, or - 4) we are doing a complex SQL statement like - INSERT INTO ... SELECT ... and the logical logging (MySQL - binlog) requires the use of a locking read, or - MySQL is doing LOCK TABLES ... READ. - 5) we let InnoDB do locking reads for all SQL statements that - are not simple SELECTs; note that select_lock_type in this - case may get strengthened in ::external_lock() to LOCK_X. - Note that we MUST use a locking read in all data modifying - SQL statements, because otherwise the execution would not be - serializable, and also the results from the update could be - unexpected if an obsolete consistent read view would be - used. */ - - /* Use consistent read for checksum table */ - - if (sql_command == SQLCOM_CHECKSUM - || sql_command == SQLCOM_CHECKSUM - || ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && trx->isolation_level != TRX_ISO_SERIALIZABLE - && (lock_type == TL_READ - || lock_type == TL_READ_NO_INSERT) - && (sql_command == SQLCOM_INSERT_SELECT - || sql_command == SQLCOM_REPLACE_SELECT - || sql_command == SQLCOM_UPDATE - || sql_command == SQLCOM_CREATE_TABLE))) { - - /* If we either have innobase_locks_unsafe_for_binlog - option set or this session is using READ COMMITTED - isolation level and isolation level of the transaction - is not set to serializable and MySQL is doing - INSERT INTO...SELECT or REPLACE INTO...SELECT - or UPDATE ... = (SELECT ...) or CREATE ... - SELECT... without FOR UPDATE or IN SHARE - MODE in select, then we use consistent read - for select. */ - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE; - } else { - prebuilt->select_lock_type = LOCK_S; - prebuilt->stored_select_lock_type = LOCK_S; - } - - } else if (lock_type != TL_IGNORE) { - - /* We set possible LOCK_X value in external_lock, not yet - here even if this would be SELECT ... FOR UPDATE */ - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE; - } - - if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) { - - /* Starting from 5.0.7, we weaken also the table locks - set at the start of a MySQL stored procedure call, just like - we weaken the locks set at the start of an SQL statement. - MySQL does set in_lock_tables TRUE there, but in reality - we do not need table locks to make the execution of a - single transaction stored procedure call deterministic - (if it does not use a consistent read). */ - - if (lock_type == TL_READ - && sql_command == SQLCOM_LOCK_TABLES) { - /* We come here if MySQL is processing LOCK TABLES - ... READ LOCAL. MyISAM under that table lock type - reads the table as it was at the time the lock was - granted (new inserts are allowed, but not seen by the - reader). To get a similar effect on an InnoDB table, - we must use LOCK TABLES ... READ. We convert the lock - type here, so that for InnoDB, READ LOCAL is - equivalent to READ. This will change the InnoDB - behavior in mysqldump, so that dumps of InnoDB tables - are consistent with dumps of MyISAM tables. */ - - lock_type = TL_READ_NO_INSERT; - } - - /* If we are not doing a LOCK TABLE, DISCARD/IMPORT - TABLESPACE or TRUNCATE TABLE then allow multiple - writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ - < TL_WRITE_CONCURRENT_INSERT. - - We especially allow multiple writers if MySQL is at the - start of a stored procedure call (SQLCOM_CALL) or a - stored function call (MySQL does have in_lock_tables - TRUE there). */ - - if ((lock_type >= TL_WRITE_CONCURRENT_INSERT - && lock_type <= TL_WRITE) - && !(in_lock_tables - && sql_command == SQLCOM_LOCK_TABLES) - && !thd_tablespace_op(thd) - && sql_command != SQLCOM_TRUNCATE - && sql_command != SQLCOM_OPTIMIZE - && sql_command != SQLCOM_CREATE_TABLE) { - - lock_type = TL_WRITE_ALLOW_WRITE; - } - - /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ... - MySQL would use the lock TL_READ_NO_INSERT on t2, and that - would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts - to t2. Convert the lock to a normal read lock to allow - concurrent inserts to t2. - - We especially allow concurrent inserts if MySQL is at the - start of a stored procedure call (SQLCOM_CALL) - (MySQL does have thd_in_lock_tables() TRUE there). */ - - if (lock_type == TL_READ_NO_INSERT - && sql_command != SQLCOM_LOCK_TABLES) { - - lock_type = TL_READ; - } - - lock.type = lock_type; - } - - *to++= &lock; - - if (!trx_is_started(trx) - && (prebuilt->select_lock_type != LOCK_NONE - || prebuilt->stored_select_lock_type != LOCK_NONE)) { - - ++trx->will_lock; - } - - return(to); -} - -/*********************************************************************//** -Read the next autoinc value. Acquire the relevant locks before reading -the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked -on return and all relevant locks acquired. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -ha_innobase::innobase_get_autoinc( -/*==============================*/ - ulonglong* value) /*!< out: autoinc value */ -{ - *value = 0; - - prebuilt->autoinc_error = innobase_lock_autoinc(); - - if (prebuilt->autoinc_error == DB_SUCCESS) { - - /* Determine the first value of the interval */ - *value = dict_table_autoinc_read(prebuilt->table); - - /* It should have been initialized during open. */ - if (*value == 0) { - prebuilt->autoinc_error = DB_UNSUPPORTED; - dict_table_autoinc_unlock(prebuilt->table); - } - } - - return(prebuilt->autoinc_error); -} - -/*******************************************************************//** -This function reads the global auto-inc counter. It doesn't use the -AUTOINC lock even if the lock mode is set to TRADITIONAL. -@return the autoinc value */ -UNIV_INTERN -ulonglong -ha_innobase::innobase_peek_autoinc(void) -/*====================================*/ -{ - ulonglong auto_inc; - dict_table_t* innodb_table; - - ut_a(prebuilt != NULL); - ut_a(prebuilt->table != NULL); - - innodb_table = prebuilt->table; - - dict_table_autoinc_lock(innodb_table); - - auto_inc = dict_table_autoinc_read(innodb_table); - - if (auto_inc == 0) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: AUTOINC next value generation " - "is disabled for '%s'\n", innodb_table->name); - } - - dict_table_autoinc_unlock(innodb_table); - - return(auto_inc); -} - -/*********************************************************************//** -Returns the value of the auto-inc counter in *first_value and ~0 on failure. */ -UNIV_INTERN -void -ha_innobase::get_auto_increment( -/*============================*/ - ulonglong offset, /*!< in: table autoinc offset */ - ulonglong increment, /*!< in: table autoinc - increment */ - ulonglong nb_desired_values, /*!< in: number of values - reqd */ - ulonglong* first_value, /*!< out: the autoinc value */ - ulonglong* nb_reserved_values) /*!< out: count of reserved - values */ -{ - trx_t* trx; - dberr_t error; - ulonglong autoinc = 0; - - /* Prepare prebuilt->trx in the table handle */ - update_thd(ha_thd()); - - error = innobase_get_autoinc(&autoinc); - - if (error != DB_SUCCESS) { - *first_value = (~(ulonglong) 0); - return; - } - - /* This is a hack, since nb_desired_values seems to be accurate only - for the first call to get_auto_increment() for multi-row INSERT and - meaningless for other statements e.g, LOAD etc. Subsequent calls to - this method for the same statement results in different values which - don't make sense. Therefore we store the value the first time we are - called and count down from that as rows are written (see write_row()). - */ - - trx = prebuilt->trx; - - /* Note: We can't rely on *first_value since some MySQL engines, - in particular the partition engine, don't initialize it to 0 when - invoking this method. So we are not sure if it's guaranteed to - be 0 or not. */ - - /* We need the upper limit of the col type to check for - whether we update the table autoinc counter or not. */ - ulonglong col_max_value = innobase_get_int_col_max_value( - table->next_number_field); - - /** The following logic is needed to avoid duplicate key error - for autoincrement column. - - (1) InnoDB gives the current autoincrement value with respect - to increment and offset value. - - (2) Basically it does compute_next_insert_id() logic inside InnoDB - to avoid the current auto increment value changed by handler layer. - - (3) It is restricted only for insert operations. */ - - if (increment > 1 && thd_sql_command(user_thd) != SQLCOM_ALTER_TABLE - && autoinc < col_max_value) { - - ulonglong prev_auto_inc = autoinc; - - autoinc = ((autoinc - 1) + increment - offset)/ increment; - - autoinc = autoinc * increment + offset; - - /* If autoinc exceeds the col_max_value then reset - to old autoinc value. Because in case of non-strict - sql mode, boundary value is not considered as error. */ - - if (autoinc >= col_max_value) { - autoinc = prev_auto_inc; - } - - ut_ad(autoinc > 0); - } - - /* Called for the first time ? */ - if (trx->n_autoinc_rows == 0) { - - trx->n_autoinc_rows = (ulint) nb_desired_values; - - /* It's possible for nb_desired_values to be 0: - e.g., INSERT INTO T1(C) SELECT C FROM T2; */ - if (nb_desired_values == 0) { - - trx->n_autoinc_rows = 1; - } - - set_if_bigger(*first_value, autoinc); - /* Not in the middle of a mult-row INSERT. */ - } else if (prebuilt->autoinc_last_value == 0) { - set_if_bigger(*first_value, autoinc); - } - - if (*first_value > col_max_value) - { - /* Out of range number. Let handler::update_auto_increment() - take care of this */ - prebuilt->autoinc_last_value = 0; - dict_table_autoinc_unlock(prebuilt->table); - *nb_reserved_values = 0; - return; - } - *nb_reserved_values = trx->n_autoinc_rows; - - /* With old style AUTOINC locking we only update the table's - AUTOINC counter after attempting to insert the row. */ - if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) { - ulonglong current; - ulonglong next_value; - - current = *first_value; - - if (prebuilt->autoinc_increment != increment) { - - WSREP_DEBUG("autoinc decrease: %llu -> %llu\n" - "THD: %ld, current: %llu, autoinc: %llu", - prebuilt->autoinc_increment, - increment, - thd_get_thread_id(ha_thd()), - current, autoinc); - if (!wsrep_on(ha_thd())) - { - current = autoinc - prebuilt->autoinc_increment; - } - - current = innobase_next_autoinc( - current, 1, increment, offset, col_max_value); - - dict_table_autoinc_initialize(prebuilt->table, current); - - *first_value = current; - } - - /* Compute the last value in the interval */ - next_value = innobase_next_autoinc( - current, *nb_reserved_values, increment, offset, - col_max_value); - - prebuilt->autoinc_last_value = next_value; - - if (prebuilt->autoinc_last_value < *first_value) { - *first_value = (~(ulonglong) 0); - } else { - /* Update the table autoinc variable */ - dict_table_autoinc_update_if_greater( - prebuilt->table, prebuilt->autoinc_last_value); - } - } else { - /* This will force write_row() into attempting an update - of the table's AUTOINC counter. */ - prebuilt->autoinc_last_value = 0; - } - - /* The increment to be used to increase the AUTOINC value, we use - this in write_row() and update_row() to increase the autoinc counter - for columns that are filled by the user. We need the offset and - the increment. */ - prebuilt->autoinc_offset = offset; - prebuilt->autoinc_increment = increment; - - dict_table_autoinc_unlock(prebuilt->table); -} - -/*******************************************************************//** -Reset the auto-increment counter to the given value, i.e. the next row -inserted will get the given value. This is called e.g. after TRUNCATE -is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is -returned by storage engines that don't support this operation. -@return 0 or error code */ -UNIV_INTERN -int -ha_innobase::reset_auto_increment( -/*==============================*/ - ulonglong value) /*!< in: new value for table autoinc */ -{ - DBUG_ENTER("ha_innobase::reset_auto_increment"); - - dberr_t error; - - update_thd(ha_thd()); - - error = row_lock_table_autoinc_for_mysql(prebuilt); - - if (error != DB_SUCCESS) { - DBUG_RETURN(convert_error_code_to_mysql( - error, prebuilt->table->flags, user_thd)); - } - - /* The next value can never be 0. */ - if (value == 0) { - value = 1; - } - - innobase_reset_autoinc(value); - - DBUG_RETURN(0); -} - -/*******************************************************************//** -See comment in handler.cc */ -UNIV_INTERN -bool -ha_innobase::get_error_message( -/*===========================*/ - int error, - String* buf) -{ - trx_t* trx = check_trx_exists(ha_thd()); - - if (error == HA_ERR_DECRYPTION_FAILED) { - const char *msg = "Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match."; - buf->copy(msg, (uint)strlen(msg), system_charset_info); - } else { - buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error), - system_charset_info); - } - - return(FALSE); -} - -/*******************************************************************//** - Retrieves the names of the table and the key for which there was a - duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY. - - If any of the names is not available, then this method will return - false and will not change any of child_table_name or child_key_name. - - @param child_table_name[out] Table name - @param child_table_name_len[in] Table name buffer size - @param child_key_name[out] Key name - @param child_key_name_len[in] Key name buffer size - - @retval true table and key names were available - and were written into the corresponding - out parameters. - @retval false table and key names were not available, - the out parameters were not touched. -*/ -bool -ha_innobase::get_foreign_dup_key( -/*=============================*/ - char* child_table_name, - uint child_table_name_len, - char* child_key_name, - uint child_key_name_len) -{ - const dict_index_t* err_index; - - ut_a(prebuilt->trx != NULL); - ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); - - err_index = trx_get_error_info(prebuilt->trx); - - if (err_index == NULL) { - return(false); - } - /* else */ - - /* copy table name (and convert from filename-safe encoding to - system_charset_info) */ - char* p; - p = strchr(err_index->table->name, '/'); - /* strip ".../" prefix if any */ - if (p != NULL) { - p++; - } else { - p = err_index->table->name; - } - uint len; - len = filename_to_tablename(p, child_table_name, child_table_name_len); - child_table_name[len] = '\0'; - - /* copy index name */ - ut_snprintf(child_key_name, child_key_name_len, "%s", err_index->name); - - return(true); -} - -/*******************************************************************//** -Compares two 'refs'. A 'ref' is the (internal) primary key value of the row. -If there is no explicitly declared non-null unique key or a primary key, then -InnoDB internally uses the row id as the primary key. -@return < 0 if ref1 < ref2, 0 if equal, else > 0 */ -UNIV_INTERN -int -ha_innobase::cmp_ref( -/*=================*/ - const uchar* ref1, /*!< in: an (internal) primary key value in the - MySQL key value format */ - const uchar* ref2) /*!< in: an (internal) primary key value in the - MySQL key value format */ -{ - enum_field_types mysql_type; - Field* field; - KEY_PART_INFO* key_part; - KEY_PART_INFO* key_part_end; - uint len1; - uint len2; - int result; - - if (prebuilt->clust_index_was_generated) { - /* The 'ref' is an InnoDB row id */ - - return(memcmp(ref1, ref2, DATA_ROW_ID_LEN)); - } - - /* Do a type-aware comparison of primary key fields. PK fields - are always NOT NULL, so no checks for NULL are performed. */ - - key_part = table->key_info[table->s->primary_key].key_part; - - key_part_end = key_part - + table->key_info[table->s->primary_key].user_defined_key_parts; - - for (; key_part != key_part_end; ++key_part) { - field = key_part->field; - mysql_type = field->type(); - - if (mysql_type == MYSQL_TYPE_TINY_BLOB - || mysql_type == MYSQL_TYPE_MEDIUM_BLOB - || mysql_type == MYSQL_TYPE_BLOB - || mysql_type == MYSQL_TYPE_LONG_BLOB) { - - /* In the MySQL key value format, a column prefix of - a BLOB is preceded by a 2-byte length field */ - - len1 = innobase_read_from_2_little_endian(ref1); - len2 = innobase_read_from_2_little_endian(ref2); - - result = ((Field_blob*) field)->cmp( - ref1 + 2, len1, ref2 + 2, len2); - } else { - result = field->key_cmp(ref1, ref2); - } - - if (result) { - - return(result); - } - - ref1 += key_part->store_length; - ref2 += key_part->store_length; - } - - return(0); -} - -/*******************************************************************//** -Ask InnoDB if a query to a table can be cached. -@return TRUE if query caching of the table is permitted */ -UNIV_INTERN -my_bool -ha_innobase::register_query_cache_table( -/*====================================*/ - THD* thd, /*!< in: user thread handle */ - const char* table_key, /*!< in: normalized path to the - table */ - uint key_length, /*!< in: length of the normalized - path to the table */ - qc_engine_callback* - call_back, /*!< out: pointer to function for - checking if query caching - is permitted */ - ulonglong *engine_data) /*!< in/out: data to call_back */ -{ - *call_back = innobase_query_caching_of_table_permitted; - *engine_data = 0; - return(innobase_query_caching_of_table_permitted(thd, table_key, - key_length, - engine_data)); -} - -/*******************************************************************//** -Get the bin log name. */ -UNIV_INTERN -const char* -ha_innobase::get_mysql_bin_log_name() -/*=================================*/ -{ - return(trx_sys_mysql_bin_log_name); -} - -/*******************************************************************//** -Get the bin log offset (or file position). */ -UNIV_INTERN -ulonglong -ha_innobase::get_mysql_bin_log_pos() -/*================================*/ -{ - /* trx... is ib_int64_t, which is a typedef for a 64-bit integer - (__int64 or longlong) so it's ok to cast it to ulonglong. */ - - return(trx_sys_mysql_bin_log_pos); -} - -/******************************************************************//** -This function is used to find the storage length in bytes of the first n -characters for prefix indexes using a multibyte character set. The function -finds charset information and returns length of prefix_len characters in the -index field in bytes. -@return number of bytes occupied by the first n characters */ -UNIV_INTERN -ulint -innobase_get_at_most_n_mbchars( -/*===========================*/ - ulint charset_id, /*!< in: character set id */ - ulint prefix_len, /*!< in: prefix length in bytes of the index - (this has to be divided by mbmaxlen to get the - number of CHARACTERS n in the prefix) */ - ulint data_len, /*!< in: length of the string in bytes */ - const char* str) /*!< in: character string */ -{ - ulint char_length; /*!< character length in bytes */ - ulint n_chars; /*!< number of characters in prefix */ - CHARSET_INFO* charset; /*!< charset used in the field */ - - charset = get_charset((uint) charset_id, MYF(MY_WME)); - - ut_ad(charset); - ut_ad(charset->mbmaxlen); - - /* Calculate how many characters at most the prefix index contains */ - - n_chars = prefix_len / charset->mbmaxlen; - - /* If the charset is multi-byte, then we must find the length of the - first at most n chars in the string. If the string contains less - characters than n, then we return the length to the end of the last - character. */ - - if (charset->mbmaxlen > 1) { - /* my_charpos() returns the byte length of the first n_chars - characters, or a value bigger than the length of str, if - there were not enough full characters in str. - - Why does the code below work: - Suppose that we are looking for n UTF-8 characters. - - 1) If the string is long enough, then the prefix contains at - least n complete UTF-8 characters + maybe some extra - characters + an incomplete UTF-8 character. No problem in - this case. The function returns the pointer to the - end of the nth character. - - 2) If the string is not long enough, then the string contains - the complete value of a column, that is, only complete UTF-8 - characters, and we can store in the column prefix index the - whole string. */ - - char_length = my_charpos(charset, str, - str + data_len, (int) n_chars); - if (char_length > data_len) { - char_length = data_len; - } - } else { - if (data_len < prefix_len) { - char_length = data_len; - } else { - char_length = prefix_len; - } - } - - return(char_length); -} - -/*******************************************************************//** -This function is used to prepare an X/Open XA distributed transaction. -@return 0 or error number */ -static -int -innobase_xa_prepare( -/*================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - THD* thd, /*!< in: handle to the MySQL thread of - the user whose XA transaction should - be prepared */ - bool prepare_trx) /*!< in: true - prepare transaction - false - the current SQL statement - ended */ -{ - int error = 0; - trx_t* trx = check_trx_exists(thd); - - DBUG_ASSERT(hton == innodb_hton_ptr); - - /* we use support_xa value as it was seen at transaction start - time, not the current session variable value. Any possible changes - to the session variable take effect only in the next transaction */ - if (!trx->support_xa) { -#ifdef WITH_WSREP - thd_get_xid(thd, (MYSQL_XID*) &trx->xid); -#endif // WITH_WSREP - - return(0); - } - - if (UNIV_UNLIKELY(trx->fake_changes)) { - - if (prepare_trx - || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT - | OPTION_BEGIN))) { - - thd->get_stmt_da()->reset_diagnostics_area(); - return(HA_ERR_WRONG_COMMAND); - } - return(0); - } - - thd_get_xid(thd, (MYSQL_XID*) &trx->xid); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the trx_sys->mutex, we have to release the search system - latch first to obey the latching order. */ - - trx_search_latch_release_if_reserved(trx); - - innobase_srv_conc_force_exit_innodb(trx); - - if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) { - - sql_print_error("Transaction not registered for MySQL 2PC, " - "but transaction is active"); - } - - if (prepare_trx - || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - - /* We were instructed to prepare the whole transaction, or - this is an SQL statement end and autocommit is on */ - - ut_ad(trx_is_registered_for_2pc(trx)); - - trx_prepare_for_mysql(trx); - - DBUG_EXECUTE_IF("crash_innodb_after_prepare", - DBUG_SUICIDE();); - - error = 0; - } else { - /* We just mark the SQL statement ended and do not do a - transaction prepare */ - - /* If we had reserved the auto-inc lock for some - table in this SQL statement we release it now */ - - lock_unlock_table_autoinc(trx); - - /* Store the current undo_no of the transaction so that we - know where to roll back if we have to roll back the next - SQL statement */ - - trx_mark_sql_stat_end(trx); - } - - if (thd_sql_command(thd) != SQLCOM_XA_PREPARE - && (prepare_trx - || !thd_test_options( - thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - - /* For mysqlbackup to work the order of transactions in binlog - and InnoDB must be the same. Consider the situation - - thread1> prepare; write to binlog; ... - <context switch> - thread2> prepare; write to binlog; commit - thread1> ... commit - - The server guarantees that writes to the binary log - and commits are in the same order, so we do not have - to handle this case. */ - } - - return(error); -} - -/*******************************************************************//** -This function is used to recover X/Open XA distributed transactions. -@return number of prepared transactions stored in xid_list */ -static -int -innobase_xa_recover( -/*================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - XID* xid_list,/*!< in/out: prepared transactions */ - uint len) /*!< in: number of slots in xid_list */ -{ - DBUG_ASSERT(hton == innodb_hton_ptr); - - if (len == 0 || xid_list == NULL) { - - return(0); - } - - return(trx_recover_for_mysql(xid_list, len)); -} - -/*******************************************************************//** -This function is used to commit one X/Open XA distributed transaction -which is in the prepared state -@return 0 or error number */ -static -int -innobase_commit_by_xid( -/*===================*/ - handlerton* hton, - XID* xid) /*!< in: X/Open XA transaction identification */ -{ - trx_t* trx; - - DBUG_ASSERT(hton == innodb_hton_ptr); - - trx = trx_get_trx_by_xid(xid); - - if (trx) { - innobase_commit_low(trx); - trx_free_for_background(trx); - return(XA_OK); - } else { - return(XAER_NOTA); - } -} - -/*******************************************************************//** -This function is used to rollback one X/Open XA distributed transaction -which is in the prepared state -@return 0 or error number */ -static -int -innobase_rollback_by_xid( -/*=====================*/ - handlerton* hton, /*!< in: InnoDB handlerton */ - XID* xid) /*!< in: X/Open XA transaction - identification */ -{ - trx_t* trx; - - DBUG_ASSERT(hton == innodb_hton_ptr); - - trx = trx_get_trx_by_xid(xid); - - if (trx) { - int ret = innobase_rollback_trx(trx); - trx_free_for_background(trx); - return(ret); - } else { - return(XAER_NOTA); - } -} - -/*******************************************************************//** -Create a consistent view for a cursor based on current transaction -which is created if the corresponding MySQL thread still lacks one. -This consistent view is then used inside of MySQL when accessing records -using a cursor. -@return pointer to cursor view or NULL */ -static -void* -innobase_create_cursor_view( -/*========================*/ - handlerton* hton, /*!< in: innobase hton */ - THD* thd) /*!< in: user thread handle */ -{ - DBUG_ASSERT(hton == innodb_hton_ptr); - - return(read_cursor_view_create_for_mysql(check_trx_exists(thd))); -} - -/*******************************************************************//** -Close the given consistent cursor view of a transaction and restore -global read view to a transaction read view. Transaction is created if the -corresponding MySQL thread still lacks one. */ -static -void -innobase_close_cursor_view( -/*=======================*/ - handlerton* hton, /*!< in: innobase hton */ - THD* thd, /*!< in: user thread handle */ - void* curview)/*!< in: Consistent read view to be closed */ -{ - DBUG_ASSERT(hton == innodb_hton_ptr); - - read_cursor_view_close_for_mysql(check_trx_exists(thd), - (cursor_view_t*) curview); -} - -/*******************************************************************//** -Set the given consistent cursor view to a transaction which is created -if the corresponding MySQL thread still lacks one. If the given -consistent cursor view is NULL global read view of a transaction is -restored to a transaction read view. */ -static -void -innobase_set_cursor_view( -/*=====================*/ - handlerton* hton, /*!< in: innobase hton */ - THD* thd, /*!< in: user thread handle */ - void* curview)/*!< in: Consistent cursor view to be set */ -{ - DBUG_ASSERT(hton == innodb_hton_ptr); - - read_cursor_set_for_mysql(check_trx_exists(thd), - (cursor_view_t*) curview); -} - -/*******************************************************************//** -*/ -UNIV_INTERN -bool -ha_innobase::check_if_incompatible_data( -/*====================================*/ - HA_CREATE_INFO* info, - uint table_changes) -{ - ha_table_option_struct *param_old, *param_new; - - /* Cache engine specific options */ - param_new = info->option_struct; - param_old = table->s->option_struct; - - innobase_copy_frm_flags_from_create_info(prebuilt->table, info); - - if (table_changes != IS_EQUAL_YES) { - - return(COMPATIBLE_DATA_NO); - } - - /* Check that auto_increment value was not changed */ - if ((info->used_fields & HA_CREATE_USED_AUTO) && - info->auto_increment_value != 0) { - - return(COMPATIBLE_DATA_NO); - } - - /* Check that row format didn't change */ - if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT) - && info->row_type != get_row_type()) { - - return(COMPATIBLE_DATA_NO); - } - - /* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */ - if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) { - return(COMPATIBLE_DATA_NO); - } - - /* Changes on engine specific table options requests a rebuild of the table. */ - if (param_new->page_compressed != param_old->page_compressed || - param_new->page_compression_level != param_old->page_compression_level || - param_new->atomic_writes != param_old->atomic_writes) { - return(COMPATIBLE_DATA_NO); - } - - return(COMPATIBLE_DATA_YES); -} - -/****************************************************************//** -Update the system variable innodb_io_capacity_max using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_io_capacity_max_update( -/*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - ulong in_val = *static_cast<const ulong*>(save); - if (in_val < srv_io_capacity) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Setting innodb_io_capacity_max %lu" - " lower than innodb_io_capacity %lu.", - in_val, srv_io_capacity); - - srv_io_capacity = in_val; - - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Setting innodb_io_capacity to %lu", - srv_io_capacity); - } - - srv_max_io_capacity = in_val; -} - -/****************************************************************//** -Update the system variable innodb_io_capacity using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_io_capacity_update( -/*======================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - ulong in_val = *static_cast<const ulong*>(save); - if (in_val > srv_max_io_capacity) { - - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Setting innodb_io_capacity to %lu" - " higher than innodb_io_capacity_max %lu", - in_val, srv_max_io_capacity); - - srv_max_io_capacity = in_val * 2; - - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Setting innodb_max_io_capacity to %lu", - srv_max_io_capacity); - } - - srv_io_capacity = in_val; -} - -/****************************************************************//** -Update the system variable innodb_log_arch_expire_sec using -the "saved" value. This function is registered as a callback with MySQL. */ -static -void -innodb_log_archive_expire_update( -/*==============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr, /*!< out: unused */ - const void* save) /*!< in: immediate result - from check function */ -{ - srv_log_arch_expire_sec = *(ulint*) save; -} - -static -void -innodb_log_archive_update( -/*======================*/ - THD* thd, - struct st_mysql_sys_var* var, - void* var_ptr, - const void* save) -{ - if (srv_read_only_mode) - return; - - my_bool in_val = *static_cast<const my_bool*>(save); - - if (in_val) { - /* turn archiving on */ - innobase_log_archive = srv_log_archive_on = 1; - log_archive_archivelog(); - } else { - /* turn archivng off */ - innobase_log_archive = srv_log_archive_on = 0; - log_archive_noarchivelog(); - } -} - -/****************************************************************//** -Update the system variable innodb_max_dirty_pages_pct using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_max_dirty_pages_pct_update( -/*==============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - double in_val = *static_cast<const double*>(save); - if (in_val < srv_max_dirty_pages_pct_lwm) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "innodb_max_dirty_pages_pct cannot be" - " set lower than" - " innodb_max_dirty_pages_pct_lwm."); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Lowering" - " innodb_max_dirty_page_pct_lwm to %lf", - in_val); - - srv_max_dirty_pages_pct_lwm = in_val; - } - - srv_max_buf_pool_modified_pct = in_val; -} - -/****************************************************************//** -Update the system variable innodb_max_dirty_pages_pct_lwm using the -"saved" value. This function is registered as a callback with MySQL. */ -static -void -innodb_max_dirty_pages_pct_lwm_update( -/*==================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - double in_val = *static_cast<const double*>(save); - if (in_val > srv_max_buf_pool_modified_pct) { - in_val = srv_max_buf_pool_modified_pct; - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "innodb_max_dirty_pages_pct_lwm" - " cannot be set higher than" - " innodb_max_dirty_pages_pct."); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Setting innodb_max_dirty_page_pct_lwm" - " to %lf", - in_val); - } - - srv_max_dirty_pages_pct_lwm = in_val; -} - -UNIV_INTERN -void -ha_innobase::set_partition_owner_stats(ha_statistics *stats) -{ - ha_partition_stats= stats; -} - -/************************************************************//** -Validate the file format name and return its corresponding id. -@return valid file format id */ -static -uint -innobase_file_format_name_lookup( -/*=============================*/ - const char* format_name) /*!< in: pointer to file format name */ -{ - char* endp; - uint format_id; - - ut_a(format_name != NULL); - - /* The format name can contain the format id itself instead of - the name and we check for that. */ - format_id = (uint) strtoul(format_name, &endp, 10); - - /* Check for valid parse. */ - if (*endp == '\0' && *format_name != '\0') { - - if (format_id <= UNIV_FORMAT_MAX) { - - return(format_id); - } - } else { - - for (format_id = 0; format_id <= UNIV_FORMAT_MAX; - format_id++) { - const char* name; - - name = trx_sys_file_format_id_to_name(format_id); - - if (!innobase_strcasecmp(format_name, name)) { - - return(format_id); - } - } - } - - return(UNIV_FORMAT_MAX + 1); -} - -/************************************************************//** -Validate the file format check config parameters, as a side effect it -sets the srv_max_file_format_at_startup variable. -@return the format_id if valid config value, otherwise, return -1 */ -static -int -innobase_file_format_validate_and_set( -/*==================================*/ - const char* format_max) /*!< in: parameter value */ -{ - uint format_id; - - format_id = innobase_file_format_name_lookup(format_max); - - if (format_id < UNIV_FORMAT_MAX + 1) { - srv_max_file_format_at_startup = format_id; - - return((int) format_id); - } else { - return(-1); - } -} - -/*************************************************************//** -Check if it is a valid file format. This function is registered as -a callback with MySQL. -@return 0 for valid file format */ -static -int -innodb_file_format_name_validate( -/*=============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - const char* file_format_input; - char buff[STRING_BUFFER_USUAL_SIZE]; - int len = sizeof(buff); - - ut_a(save != NULL); - ut_a(value != NULL); - - file_format_input = value->val_str(value, buff, &len); - - if (file_format_input != NULL) { - uint format_id; - - format_id = innobase_file_format_name_lookup( - file_format_input); - - if (format_id <= UNIV_FORMAT_MAX) { - - /* Save a pointer to the name in the - 'file_format_name_map' constant array. */ - *static_cast<const char**>(save) = - trx_sys_file_format_id_to_name(format_id); - - return(0); - } - } - - *static_cast<const char**>(save) = NULL; - return(1); -} - -/****************************************************************//** -Update the system variable innodb_file_format using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_file_format_name_update( -/*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr, /*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - const char* format_name; - - ut_a(var_ptr != NULL); - ut_a(save != NULL); - - format_name = *static_cast<const char*const*>(save); - - if (format_name) { - uint format_id; - - format_id = innobase_file_format_name_lookup(format_name); - - if (format_id <= UNIV_FORMAT_MAX) { - srv_file_format = format_id; - } - } - - *static_cast<const char**>(var_ptr) - = trx_sys_file_format_id_to_name(srv_file_format); -} - -/*************************************************************//** -Check if valid argument to innodb_file_format_max. This function -is registered as a callback with MySQL. -@return 0 for valid file format */ -static -int -innodb_file_format_max_validate( -/*============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - const char* file_format_input; - char buff[STRING_BUFFER_USUAL_SIZE]; - int len = sizeof(buff); - int format_id; - - ut_a(save != NULL); - ut_a(value != NULL); - - file_format_input = value->val_str(value, buff, &len); - - if (file_format_input != NULL) { - - format_id = innobase_file_format_validate_and_set( - file_format_input); - - if (format_id >= 0) { - /* Save a pointer to the name in the - 'file_format_name_map' constant array. */ - *static_cast<const char**>(save) = - trx_sys_file_format_id_to_name( - (uint) format_id); - - return(0); - - } else { - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "InnoDB: invalid innodb_file_format_max " - "value; can be any format up to %s " - "or equivalent id of %d", - trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX), - UNIV_FORMAT_MAX); - } - } - - *static_cast<const char**>(save) = NULL; - return(1); -} - -/****************************************************************//** -Update the system variable innodb_file_format_max using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_file_format_max_update( -/*==========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - const char* format_name_in; - const char** format_name_out; - uint format_id; - - ut_a(save != NULL); - ut_a(var_ptr != NULL); - - format_name_in = *static_cast<const char*const*>(save); - - if (!format_name_in) { - - return; - } - - format_id = innobase_file_format_name_lookup(format_name_in); - - if (format_id > UNIV_FORMAT_MAX) { - /* DEFAULT is "on", which is invalid at runtime. */ - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Ignoring SET innodb_file_format=%s", - format_name_in); - return; - } - - format_name_out = static_cast<const char**>(var_ptr); - - /* Update the max format id in the system tablespace. */ - if (trx_sys_file_format_max_set(format_id, format_name_out)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " [Info] InnoDB: the file format in the system " - "tablespace is now set to %s.\n", *format_name_out); - } -} - -/*************************************************************//** -Check whether valid argument given to innobase_*_stopword_table. -This function is registered as a callback with MySQL. -@return 0 for valid stopword table */ -static -int -innodb_stopword_table_validate( -/*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - const char* stopword_table_name; - char buff[STRING_BUFFER_USUAL_SIZE]; - int len = sizeof(buff); - trx_t* trx; - int ret = 1; - - ut_a(save != NULL); - ut_a(value != NULL); - - stopword_table_name = value->val_str(value, buff, &len); - - trx = check_trx_exists(thd); - - row_mysql_lock_data_dictionary(trx); - - /* Validate the stopword table's (if supplied) existence and - of the right format */ - if (!stopword_table_name - || fts_valid_stopword_table(stopword_table_name)) { - *static_cast<const char**>(save) = stopword_table_name; - ret = 0; - } - - row_mysql_unlock_data_dictionary(trx); - - return(ret); -} - -/*************************************************************//** -Check whether valid argument given to "innodb_fts_internal_tbl_name" -This function is registered as a callback with MySQL. -@return 0 for valid stopword table */ -static -int -innodb_internal_table_validate( -/*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - const char* table_name; - char buff[STRING_BUFFER_USUAL_SIZE]; - int len = sizeof(buff); - int ret = 1; - dict_table_t* user_table; - - ut_a(save != NULL); - ut_a(value != NULL); - - table_name = value->val_str(value, buff, &len); - - if (!table_name) { - *static_cast<const char**>(save) = NULL; - return(0); - } - - user_table = dict_table_open_on_name( - table_name, FALSE, TRUE, DICT_ERR_IGNORE_NONE); - - if (user_table) { - if (dict_table_has_fts_index(user_table)) { - *static_cast<const char**>(save) = table_name; - ret = 0; - } - - dict_table_close(user_table, FALSE, TRUE); - - DBUG_EXECUTE_IF("innodb_evict_autoinc_table", - mutex_enter(&dict_sys->mutex); - dict_table_remove_from_cache_low(user_table, TRUE); - mutex_exit(&dict_sys->mutex); - ); - } - - return(ret); -} - -/****************************************************************//** -Update global variable "fts_internal_tbl_name" with the "saved" -stopword table name value. This function is registered as a callback -with MySQL. */ -static -void -innodb_internal_table_update( -/*=========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - const char* table_name; - char* old; - - ut_a(save != NULL); - ut_a(var_ptr != NULL); - - table_name = *static_cast<const char*const*>(save); - old = *(char**) var_ptr; - - if (table_name) { - *(char**) var_ptr = my_strdup(table_name, MYF(0)); - } else { - *(char**) var_ptr = NULL; - } - - if (old) { - my_free(old); - } - - fts_internal_tbl_name2 = *(char**) var_ptr; - if (fts_internal_tbl_name2 == NULL) { - fts_internal_tbl_name = const_cast<char*>("default"); - } else { - fts_internal_tbl_name = fts_internal_tbl_name2; - } -} - -/****************************************************************//** -Update the system variable innodb_adaptive_hash_index using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_adaptive_hash_index_update( -/*==============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - if (*(my_bool*) save) { - btr_search_enable(); - } else { - btr_search_disable(); - } -} - -/****************************************************************//** -Update the system variable innodb_cmp_per_index using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_cmp_per_index_update( -/*========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - /* Reset the stats whenever we enable the table - INFORMATION_SCHEMA.innodb_cmp_per_index. */ - if (!srv_cmp_per_index_enabled && *(my_bool*) save) { - page_zip_reset_stat_per_index(); - } - - srv_cmp_per_index_enabled = !!(*(my_bool*) save); -} - -/****************************************************************//** -Update the system variable innodb_old_blocks_pct using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_old_blocks_pct_update( -/*=========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - innobase_old_blocks_pct = static_cast<uint>( - buf_LRU_old_ratio_update( - *static_cast<const uint*>(save), TRUE)); -} - -/****************************************************************//** -Update the system variable innodb_old_blocks_pct using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_change_buffer_max_size_update( -/*=================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - innobase_change_buffer_max_size = - (*static_cast<const uint*>(save)); - ibuf_max_size_update(innobase_change_buffer_max_size); -} - -#ifdef UNIV_DEBUG -ulong srv_fil_make_page_dirty_debug = 0; -ulong srv_saved_page_number_debug = 0; - -/****************************************************************//** -Save an InnoDB page number. */ -static -void -innodb_save_page_no( -/*================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - srv_saved_page_number_debug = *static_cast<const ulong*>(save); - - ib_logf(IB_LOG_LEVEL_INFO, - "Saving InnoDB page number: %lu", - srv_saved_page_number_debug); -} - -/****************************************************************//** -Make the first page of given user tablespace dirty. */ -static -void -innodb_make_page_dirty( -/*===================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - mtr_t mtr; - ulong space_id = *static_cast<const ulong*>(save); - - mtr_start(&mtr); - - buf_block_t* block = buf_page_get( - space_id, 0, srv_saved_page_number_debug, RW_X_LATCH, &mtr); - - if (block) { - byte* page = block->frame; - ib_logf(IB_LOG_LEVEL_INFO, - "Dirtying page:%lu of space:%lu", - page_get_page_no(page), - page_get_space_id(page)); - mlog_write_ulint(page + FIL_PAGE_TYPE, - fil_page_get_type(page), - MLOG_2BYTES, &mtr); - } - mtr_commit(&mtr); -} -#endif // UNIV_DEBUG - -/*************************************************************//** -Find the corresponding ibuf_use_t value that indexes into -innobase_change_buffering_values[] array for the input -change buffering option name. -@return corresponding IBUF_USE_* value for the input variable -name, or IBUF_USE_COUNT if not able to find a match */ -static -ibuf_use_t -innodb_find_change_buffering_value( -/*===============================*/ - const char* input_name) /*!< in: input change buffering - option name */ -{ - ulint use; - - for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values); - use++) { - /* found a match */ - if (!innobase_strcasecmp( - input_name, innobase_change_buffering_values[use])) { - return((ibuf_use_t) use); - } - } - - /* Did not find any match */ - return(IBUF_USE_COUNT); -} - -/*************************************************************//** -Check if it is a valid value of innodb_change_buffering. This function is -registered as a callback with MySQL. -@return 0 for valid innodb_change_buffering */ -static -int -innodb_change_buffering_validate( -/*=============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - const char* change_buffering_input; - char buff[STRING_BUFFER_USUAL_SIZE]; - int len = sizeof(buff); - - ut_a(save != NULL); - ut_a(value != NULL); - - change_buffering_input = value->val_str(value, buff, &len); - - if (change_buffering_input != NULL) { - ibuf_use_t use; - - use = innodb_find_change_buffering_value( - change_buffering_input); - - if (use != IBUF_USE_COUNT) { - /* Find a matching change_buffering option value. */ - *static_cast<const char**>(save) = - innobase_change_buffering_values[use]; - - return(0); - } - } - - /* No corresponding change buffering option for user supplied - "change_buffering_input" */ - return(1); -} - -/****************************************************************//** -Update the system variable innodb_change_buffering using the "saved" -value. This function is registered as a callback with MySQL. */ -static -void -innodb_change_buffering_update( -/*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - ibuf_use_t use; - - ut_a(var_ptr != NULL); - ut_a(save != NULL); - - use = innodb_find_change_buffering_value( - *static_cast<const char*const*>(save)); - - ut_a(use < IBUF_USE_COUNT); - - ibuf_use = use; - *static_cast<const char**>(var_ptr) = - *static_cast<const char*const*>(save); -} - -/*************************************************************//** -Just emit a warning that the usage of the variable is deprecated. -@return 0 */ -static -void -innodb_stats_sample_pages_update( -/*=============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ -#define STATS_SAMPLE_PAGES_DEPRECATED_MSG \ - "Using innodb_stats_sample_pages is deprecated and " \ - "the variable may be removed in future releases. " \ - "Please use innodb_stats_transient_sample_pages " \ - "instead." - - push_warning(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_WRONG_COMMAND, STATS_SAMPLE_PAGES_DEPRECATED_MSG); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: %s\n", - STATS_SAMPLE_PAGES_DEPRECATED_MSG); - - srv_stats_transient_sample_pages = - *static_cast<const unsigned long long*>(save); -} - -/****************************************************************//** -Update the monitor counter according to the "set_option", turn -on/off or reset specified monitor counter. */ -static -void -innodb_monitor_set_option( -/*======================*/ - const monitor_info_t* monitor_info,/*!< in: monitor info for the monitor - to set */ - mon_option_t set_option) /*!< in: Turn on/off reset the - counter */ -{ - monitor_id_t monitor_id = monitor_info->monitor_id; - - /* If module type is MONITOR_GROUP_MODULE, it cannot be - turned on/off individually. It should never use this - function to set options */ - ut_a(!(monitor_info->monitor_type & MONITOR_GROUP_MODULE)); - - switch (set_option) { - case MONITOR_TURN_ON: - MONITOR_ON(monitor_id); - MONITOR_INIT(monitor_id); - MONITOR_SET_START(monitor_id); - - /* If the monitor to be turned on uses - exisitng monitor counter (status variable), - make special processing to remember existing - counter value. */ - if (monitor_info->monitor_type - & MONITOR_EXISTING) { - srv_mon_process_existing_counter( - monitor_id, MONITOR_TURN_ON); - } - break; - - case MONITOR_TURN_OFF: - if (monitor_info->monitor_type & MONITOR_EXISTING) { - srv_mon_process_existing_counter( - monitor_id, MONITOR_TURN_OFF); - } - - MONITOR_OFF(monitor_id); - MONITOR_SET_OFF(monitor_id); - break; - - case MONITOR_RESET_VALUE: - srv_mon_reset(monitor_id); - break; - - case MONITOR_RESET_ALL_VALUE: - srv_mon_reset_all(monitor_id); - break; - - default: - ut_error; - } -} - -/****************************************************************//** -Find matching InnoDB monitor counters and update their status -according to the "set_option", turn on/off or reset specified -monitor counter. */ -static -void -innodb_monitor_update_wildcard( -/*===========================*/ - const char* name, /*!< in: monitor name to match */ - mon_option_t set_option) /*!< in: the set option, whether - to turn on/off or reset the counter */ -{ - ut_a(name); - - for (ulint use = 0; use < NUM_MONITOR; use++) { - ulint type; - monitor_id_t monitor_id = static_cast<monitor_id_t>(use); - monitor_info_t* monitor_info; - - if (!innobase_wildcasecmp( - srv_mon_get_name(monitor_id), name)) { - monitor_info = srv_mon_get_info(monitor_id); - - type = monitor_info->monitor_type; - - /* If the monitor counter is of MONITOR_MODULE - type, skip it. Except for those also marked with - MONITOR_GROUP_MODULE flag, which can be turned - on only as a module. */ - if (!(type & MONITOR_MODULE) - && !(type & MONITOR_GROUP_MODULE)) { - innodb_monitor_set_option(monitor_info, - set_option); - } - - /* Need to special handle counters marked with - MONITOR_GROUP_MODULE, turn on the whole module if - any one of it comes here. Currently, only - "module_buf_page" is marked with MONITOR_GROUP_MODULE */ - if (type & MONITOR_GROUP_MODULE) { - if ((monitor_id >= MONITOR_MODULE_BUF_PAGE) - && (monitor_id < MONITOR_MODULE_OS)) { - if (set_option == MONITOR_TURN_ON - && MONITOR_IS_ON( - MONITOR_MODULE_BUF_PAGE)) { - continue; - } - - srv_mon_set_module_control( - MONITOR_MODULE_BUF_PAGE, - set_option); - } else { - /* If new monitor is added with - MONITOR_GROUP_MODULE, it needs - to be added here. */ - ut_ad(0); - } - } - } - } -} - -/*************************************************************//** -Given a configuration variable name, find corresponding monitor counter -and return its monitor ID if found. -@return monitor ID if found, MONITOR_NO_MATCH if there is no match */ -static -ulint -innodb_monitor_id_by_name_get( -/*==========================*/ - const char* name) /*!< in: monitor counter namer */ -{ - ut_a(name); - - /* Search for wild character '%' in the name, if - found, we treat it as a wildcard match. We do not search for - single character wildcard '_' since our monitor names already contain - such character. To avoid confusion, we request user must include - at least one '%' character to activate the wildcard search. */ - if (strchr(name, '%')) { - return(MONITOR_WILDCARD_MATCH); - } - - /* Not wildcard match, check for an exact match */ - for (ulint i = 0; i < NUM_MONITOR; i++) { - if (!innobase_strcasecmp( - name, srv_mon_get_name(static_cast<monitor_id_t>(i)))) { - return(i); - } - } - - return(MONITOR_NO_MATCH); -} -/*************************************************************//** -Validate that the passed in monitor name matches at least one -monitor counter name with wildcard compare. -@return TRUE if at least one monitor name matches */ -static -ibool -innodb_monitor_validate_wildcard_name( -/*==================================*/ - const char* name) /*!< in: monitor counter namer */ -{ - for (ulint i = 0; i < NUM_MONITOR; i++) { - if (!innobase_wildcasecmp( - srv_mon_get_name(static_cast<monitor_id_t>(i)), name)) { - return(TRUE); - } - } - - return(FALSE); -} -/*************************************************************//** -Validate the passed in monitor name, find and save the -corresponding monitor name in the function parameter "save". -@return 0 if monitor name is valid */ -static -int -innodb_monitor_valid_byname( -/*========================*/ - void* save, /*!< out: immediate result - for update function */ - const char* name) /*!< in: incoming monitor name */ -{ - ulint use; - monitor_info_t* monitor_info; - - if (!name) { - return(1); - } - - use = innodb_monitor_id_by_name_get(name); - - /* No monitor name matches, nor it is wildcard match */ - if (use == MONITOR_NO_MATCH) { - return(1); - } - - if (use < NUM_MONITOR) { - monitor_info = srv_mon_get_info((monitor_id_t) use); - - /* If the monitor counter is marked with - MONITOR_GROUP_MODULE flag, then this counter - cannot be turned on/off individually, instead - it shall be turned on/off as a group using - its module name */ - if ((monitor_info->monitor_type & MONITOR_GROUP_MODULE) - && (!(monitor_info->monitor_type & MONITOR_MODULE))) { - sql_print_warning( - "Monitor counter '%s' cannot" - " be turned on/off individually." - " Please use its module name" - " to turn on/off the counters" - " in the module as a group.\n", - name); - - return(1); - } - - } else { - ut_a(use == MONITOR_WILDCARD_MATCH); - - /* For wildcard match, if there is not a single monitor - counter name that matches, treat it as an invalid - value for the system configuration variables */ - if (!innodb_monitor_validate_wildcard_name(name)) { - return(1); - } - } - - /* Save the configure name for innodb_monitor_update() */ - *static_cast<const char**>(save) = name; - - return(0); -} -/*************************************************************//** -Validate passed-in "value" is a valid monitor counter name. -This function is registered as a callback with MySQL. -@return 0 for valid name */ -static -int -innodb_monitor_validate( -/*====================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - const char* name; - char* monitor_name; - char buff[STRING_BUFFER_USUAL_SIZE]; - int len = sizeof(buff); - int ret; - - ut_a(save != NULL); - ut_a(value != NULL); - - name = value->val_str(value, buff, &len); - - /* monitor_name could point to memory from MySQL - or buff[]. Always dup the name to memory allocated - by InnoDB, so we can access it in another callback - function innodb_monitor_update() and free it appropriately */ - if (name) { - monitor_name = my_strdup(name, MYF(0)); - } else { - return(1); - } - - ret = innodb_monitor_valid_byname(save, monitor_name); - - if (ret) { - /* Validation failed */ - my_free(monitor_name); - } else { - /* monitor_name will be freed in separate callback function - innodb_monitor_update(). Assert "save" point to - the "monitor_name" variable */ - ut_ad(*static_cast<char**>(save) == monitor_name); - } - - return(ret); -} - -/****************************************************************//** -Update the system variable innodb_enable(disable/reset/reset_all)_monitor -according to the "set_option" and turn on/off or reset specified monitor -counter. */ -static -void -innodb_monitor_update( -/*==================*/ - THD* thd, /*!< in: thread handle */ - void* var_ptr, /*!< out: where the - formal string goes */ - const void* save, /*!< in: immediate result - from check function */ - mon_option_t set_option, /*!< in: the set option, - whether to turn on/off or - reset the counter */ - ibool free_mem) /*!< in: whether we will - need to free the memory */ -{ - monitor_info_t* monitor_info; - ulint monitor_id; - ulint err_monitor = 0; - const char* name; - - ut_a(save != NULL); - - name = *static_cast<const char*const*>(save); - - if (!name) { - monitor_id = MONITOR_DEFAULT_START; - } else { - monitor_id = innodb_monitor_id_by_name_get(name); - - /* Double check we have a valid monitor ID */ - if (monitor_id == MONITOR_NO_MATCH) { - return; - } - } - - if (monitor_id == MONITOR_DEFAULT_START) { - /* If user set the variable to "default", we will - print a message and make this set operation a "noop". - The check is being made here is because "set default" - does not go through validation function */ - if (thd) { - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, - ER_NO_DEFAULT, - "Default value is not defined for " - "this set option. Please specify " - "correct counter or module name."); - } else { - sql_print_error( - "Default value is not defined for " - "this set option. Please specify " - "correct counter or module name.\n"); - } - - if (var_ptr) { - *(const char**) var_ptr = NULL; - } - } else if (monitor_id == MONITOR_WILDCARD_MATCH) { - innodb_monitor_update_wildcard(name, set_option); - } else { - monitor_info = srv_mon_get_info( - static_cast<monitor_id_t>(monitor_id)); - - ut_a(monitor_info); - - /* If monitor is already truned on, someone could already - collect monitor data, exit and ask user to turn off the - monitor before turn it on again. */ - if (set_option == MONITOR_TURN_ON - && MONITOR_IS_ON(monitor_id)) { - err_monitor = monitor_id; - goto exit; - } - - if (var_ptr) { - *(const char**) var_ptr = monitor_info->monitor_name; - } - - /* Depending on the monitor name is for a module or - a counter, process counters in the whole module or - individual counter. */ - if (monitor_info->monitor_type & MONITOR_MODULE) { - srv_mon_set_module_control( - static_cast<monitor_id_t>(monitor_id), - set_option); - } else { - innodb_monitor_set_option(monitor_info, set_option); - } - } -exit: - /* Only if we are trying to turn on a monitor that already - been turned on, we will set err_monitor. Print related - information */ - if (err_monitor) { - sql_print_warning("Monitor %s is already enabled.", - srv_mon_get_name((monitor_id_t) err_monitor)); - } - - if (free_mem && name) { - my_free((void*) name); - } - - return; -} - -#ifdef __WIN__ -/*************************************************************//** -Validate if passed-in "value" is a valid value for -innodb_buffer_pool_filename. On Windows, file names with colon (:) -are not allowed. - -@return 0 for valid name */ -static -int -innodb_srv_buf_dump_filename_validate( -/*==================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - const char* buf_name; - char buff[OS_FILE_MAX_PATH]; - int len= sizeof(buff); - - ut_a(save != NULL); - ut_a(value != NULL); - - buf_name = value->val_str(value, buff, &len); - - if (buf_name) { - if (is_filename_allowed(buf_name, len, FALSE)){ - *static_cast<const char**>(save) = buf_name; - return(0); - } else { - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "InnoDB: innodb_buffer_pool_filename " - "cannot have colon (:) in the file name."); - - } - } - - return(1); -} -#else /* __WIN__ */ -# define innodb_srv_buf_dump_filename_validate NULL -#endif /* __WIN__ */ - -#ifdef UNIV_DEBUG -static char* srv_buffer_pool_evict; - -/****************************************************************//** -Evict all uncompressed pages of compressed tables from the buffer pool. -Keep the compressed pages in the buffer pool. -@return whether all uncompressed pages were evicted */ -static MY_ATTRIBUTE((warn_unused_result)) -bool -innodb_buffer_pool_evict_uncompressed(void) -/*=======================================*/ -{ - bool all_evicted = true; - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool = &buf_pool_ptr[i]; - - mutex_enter(&buf_pool->LRU_list_mutex); - - for (buf_block_t* block = UT_LIST_GET_LAST( - buf_pool->unzip_LRU); - block != NULL; ) { - buf_block_t* prev_block = UT_LIST_GET_PREV( - unzip_LRU, block); - ut_ad(buf_block_get_state(block) - == BUF_BLOCK_FILE_PAGE); - ut_ad(block->in_unzip_LRU_list); - ut_ad(block->page.in_LRU_list); - - mutex_enter(&block->mutex); - all_evicted = buf_LRU_free_page(&block->page, false); - mutex_exit(&block->mutex); - - if (all_evicted) { - - mutex_enter(&buf_pool->LRU_list_mutex); - block = UT_LIST_GET_LAST(buf_pool->unzip_LRU); - } else { - - block = prev_block; - } - } - - mutex_exit(&buf_pool->LRU_list_mutex); - } - - return(all_evicted); -} - -/****************************************************************//** -Called on SET GLOBAL innodb_buffer_pool_evict=... -Handles some values specially, to evict pages from the buffer pool. -SET GLOBAL innodb_buffer_pool_evict='uncompressed' -evicts all uncompressed page frames of compressed tablespaces. */ -static -void -innodb_buffer_pool_evict_update( -/*============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var*var, /*!< in: pointer to system variable */ - void* var_ptr,/*!< out: ignored */ - const void* save) /*!< in: immediate result - from check function */ -{ - if (const char* op = *static_cast<const char*const*>(save)) { - if (!strcmp(op, "uncompressed")) { - for (uint tries = 0; tries < 10000; tries++) { - if (innodb_buffer_pool_evict_uncompressed()) { - return; - } - - os_thread_sleep(10000); - } - - /* We failed to evict all uncompressed pages. */ - ut_ad(0); - } - } -} -#endif /* UNIV_DEBUG */ - -/****************************************************************//** -Update the system variable innodb_monitor_enable and enable -specified monitor counter. -This function is registered as a callback with MySQL. */ -static -void -innodb_enable_monitor_update( -/*=========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_ON, TRUE); -} - -/****************************************************************//** -Update the system variable innodb_monitor_disable and turn -off specified monitor counter. */ -static -void -innodb_disable_monitor_update( -/*==========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_OFF, TRUE); -} - -/****************************************************************//** -Update the system variable innodb_monitor_reset and reset -specified monitor counter(s). -This function is registered as a callback with MySQL. */ -static -void -innodb_reset_monitor_update( -/*========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_VALUE, TRUE); -} - -/****************************************************************//** -Update the system variable innodb_monitor_reset_all and reset -all value related monitor counter. -This function is registered as a callback with MySQL. */ -static -void -innodb_reset_all_monitor_update( -/*============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_ALL_VALUE, - TRUE); -} - -static -void -innodb_defragment_frequency_update( -/*===============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - srv_defragment_frequency = (*static_cast<const uint*>(save)); - srv_defragment_interval = ut_microseconds_to_timer( - 1000000.0 / srv_defragment_frequency); -} - -/****************************************************************//** -Parse and enable InnoDB monitor counters during server startup. -User can list the monitor counters/groups to be enable by specifying -"loose-innodb_monitor_enable=monitor_name1;monitor_name2..." -in server configuration file or at the command line. The string -separate could be ";", "," or empty space. */ -static -void -innodb_enable_monitor_at_startup( -/*=============================*/ - char* str) /*!< in/out: monitor counter enable list */ -{ - static const char* sep = " ;,"; - char* last; - - ut_a(str); - - /* Walk through the string, and separate each monitor counter - and/or counter group name, and calling innodb_monitor_update() - if successfully updated. Please note that the "str" would be - changed by strtok_r() as it walks through it. */ - for (char* option = strtok_r(str, sep, &last); - option; - option = strtok_r(NULL, sep, &last)) { - ulint ret; - char* option_name; - - ret = innodb_monitor_valid_byname(&option_name, option); - - /* The name is validated if ret == 0 */ - if (!ret) { - innodb_monitor_update(NULL, NULL, &option, - MONITOR_TURN_ON, FALSE); - } else { - sql_print_warning("Invalid monitor counter" - " name: '%s'", option); - } - } -} - -#ifdef UNIV_LINUX - -/****************************************************************//** -Update the innodb_sched_priority_cleaner variable and set the thread -priorities accordingly. */ -static -void -innodb_sched_priority_cleaner_update( -/*=================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - if (srv_read_only_mode) { - return; - } - - ulint priority = *static_cast<const ulint *>(save); - ulint actual_priority; - ulint nice = 0; - - /* Set the priority for the LRU manager thread */ - ut_ad(buf_lru_manager_is_active); - nice = os_thread_get_priority(srv_lru_manager_tid); - actual_priority = os_thread_set_priority(srv_lru_manager_tid, - priority); - - if (UNIV_UNLIKELY(actual_priority != priority)) { - - if (actual_priority+nice != priority) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Failed to set the LRU manager thread " - "priority to %lu, " - "the nice is %lu and used priority is %lu", priority, - nice, actual_priority); - } - } else { - - srv_sched_priority_cleaner = priority; - } - - /* Set the priority for the page cleaner thread */ - - ut_ad(buf_page_cleaner_is_active); - nice = os_thread_get_priority(srv_cleaner_tid); - actual_priority = os_thread_set_priority(srv_cleaner_tid, priority); - if (UNIV_UNLIKELY(actual_priority != priority)) { - if (actual_priority+nice != priority) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Failed to set the page cleaner thread " - "priority to %lu, " - "the nice is %lu and used priority is %lu", priority, - nice, actual_priority); - } - } -} - -#if defined(UNIV_DEBUG) || (UNIV_PERF_DEBUG) - -/****************************************************************//** -Update the innodb_sched_priority_purge variable and set the thread -priorities accordingly. */ -static -void -innodb_sched_priority_purge_update( -/*===============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - ulint priority = *static_cast<const ulint *>(save); - - if (srv_read_only_mode) { - return; - } - - for (ulint i = 0; i < srv_n_purge_threads; i++) { - ulint nice = os_thread_get_priority(srv_purge_tids[i]); - ulint actual_priority - = os_thread_set_priority(srv_purge_tids[i], priority); - if (UNIV_UNLIKELY(actual_priority != priority)) { - if (actual_priority+nice != priority) { - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Failed to set the purge " - "thread priority to %lu, the " - "nice is %lu the current priority is %lu, " - "aborting priority update", - priority, nice, actual_priority); - return; - } - } - } - - srv_sched_priority_purge = priority; -} - -/****************************************************************//** -Update the innodb_sched_priority_io variable and set the thread -priorities accordingly. */ -static -void -innodb_sched_priority_io_update( -/*============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - ulint priority = *static_cast<const ulint *>(save); - - for (ulint i = 0; i < srv_n_file_io_threads; i++) { - ulint nice = os_thread_get_priority(srv_io_tids[i]); - ulint actual_priority = os_thread_set_priority(srv_io_tids[i], - priority); - - if (UNIV_UNLIKELY(actual_priority != priority)) { - - if (actual_priority+nice != priority) { - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Failed to set the I/O " - "thread priority to %lu, the " - "nice is %lu the current priority is %lu, " - "aborting priority update", - priority, nice, actual_priority); - return; - } - } - } - - srv_sched_priority_io = priority; -} - -/****************************************************************//** -Update the innodb_sched_priority_master variable and set the thread -priorities accordingly. */ -static -void -innodb_sched_priority_master_update( -/*================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - ulint priority = *static_cast<const lint *>(save); - ulint actual_priority; - ulint nice; - - if (srv_read_only_mode) { - return; - } - - nice = os_thread_get_priority(srv_master_tid); - actual_priority = os_thread_set_priority(srv_master_tid, priority); - if (UNIV_UNLIKELY(actual_priority != priority)) { - if (actual_priority+nice != priority) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Failed to set the master thread " - "priority to %lu, " - "the nice is %lu and the current priority is %lu", priority, - nice, actual_priority); - } - } else { - - srv_sched_priority_master = priority; - } -} - -#endif /* defined(UNIV_DEBUG) || (UNIV_PERF_DEBUG) */ - -#endif /* UNIV_LINUX */ - -#ifdef UNIV_DEBUG -/*************************************************************//** -Check if it is a valid value of innodb_track_changed_pages. -Changed pages tracking is not working correctly without initialization -procedure on server startup. The function allows to temporary -disable tracking, but only if the feature was enabled on startup. -This function is registered as a callback with MySQL. -@return 0 for valid innodb_track_changed_pages */ -static -int -innodb_track_changed_pages_validate( - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming bool */ -{ - long long intbuf = 0; - - if (value->val_int(value, &intbuf)) { - /* The value is NULL. That is invalid. */ - return 1; - } - - if (srv_redo_log_thread_started) { - *reinterpret_cast<ulong*>(save) - = static_cast<ulong>(intbuf); - return 0; - } - - if (intbuf == srv_track_changed_pages) - return 0; - - return 1; -} -#endif - -/****************************************************************//** -Callback function for accessing the InnoDB variables from MySQL: -SHOW VARIABLES. */ -static -int -show_innodb_vars( -/*=============*/ - THD* thd, - SHOW_VAR* var, - char* buff) -{ - innodb_export_status(); - var->type = SHOW_ARRAY; - var->value = (char*) &innodb_status_variables; - - return(0); -} - -/****************************************************************//** -This function checks each index name for a table against reserved -system default primary index name 'GEN_CLUST_INDEX'. If a name -matches, this function pushes an warning message to the client, -and returns true. -@return true if the index name matches the reserved name */ -UNIV_INTERN -bool -innobase_index_name_is_reserved( -/*============================*/ - THD* thd, /*!< in/out: MySQL connection */ - const KEY* key_info, /*!< in: Indexes to be created */ - ulint num_of_keys) /*!< in: Number of indexes to - be created. */ -{ - const KEY* key; - uint key_num; /* index number */ - - for (key_num = 0; key_num < num_of_keys; key_num++) { - key = &key_info[key_num]; - - if (innobase_strcasecmp(key->name, - innobase_index_reserve_name) == 0) { - /* Push warning to mysql */ - push_warning_printf(thd, - Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_NAME_FOR_INDEX, - "Cannot Create Index with name " - "'%s'. The name is reserved " - "for the system default primary " - "index.", - innobase_index_reserve_name); - - my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), - innobase_index_reserve_name); - - return(true); - } - } - - return(false); -} - -/*********************************************************************** -Retrieve the FTS Relevance Ranking result for doc with doc_id -of prebuilt->fts_doc_id -@return the relevance ranking value */ -UNIV_INTERN -float -innobase_fts_retrieve_ranking( -/*============================*/ - FT_INFO * fts_hdl) /*!< in: FTS handler */ -{ - row_prebuilt_t* ft_prebuilt; - fts_result_t* result; - - result = ((NEW_FT_INFO*) fts_hdl)->ft_result; - - ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt; - - if (ft_prebuilt->read_just_key) { - fts_ranking_t* ranking = - rbt_value(fts_ranking_t, result->current); - return(ranking->rank); - } - - /* Retrieve the ranking value for doc_id with value of - prebuilt->fts_doc_id */ - return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id)); -} - -/*********************************************************************** -Free the memory for the FTS handler */ -UNIV_INTERN -void -innobase_fts_close_ranking( -/*=======================*/ - FT_INFO * fts_hdl) -{ - fts_result_t* result; - - result = ((NEW_FT_INFO*) fts_hdl)->ft_result; - - fts_query_free_result(result); - - my_free((uchar*) fts_hdl); - - return; -} - -/*********************************************************************** -Find and Retrieve the FTS Relevance Ranking result for doc with doc_id -of prebuilt->fts_doc_id -@return the relevance ranking value */ -UNIV_INTERN -float -innobase_fts_find_ranking( -/*======================*/ - FT_INFO* fts_hdl, /*!< in: FTS handler */ - uchar* record, /*!< in: Unused */ - uint len) /*!< in: Unused */ -{ - row_prebuilt_t* ft_prebuilt; - fts_result_t* result; - - ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt; - result = ((NEW_FT_INFO*) fts_hdl)->ft_result; - - /* Retrieve the ranking value for doc_id with value of - prebuilt->fts_doc_id */ - return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id)); -} - -#ifdef UNIV_DEBUG -static my_bool innodb_purge_run_now = TRUE; -static my_bool innodb_purge_stop_now = TRUE; -static my_bool innodb_log_checkpoint_now = TRUE; -static my_bool innodb_buf_flush_list_now = TRUE; -static my_bool innodb_track_redo_log_now = TRUE; - -/****************************************************************//** -Set the purge state to RUN. If purge is disabled then it -is a no-op. This function is registered as a callback with MySQL. */ -static -void -purge_run_now_set( -/*==============*/ - THD* thd /*!< in: thread handle */ - MY_ATTRIBUTE((unused)), - struct st_mysql_sys_var* var /*!< in: pointer to system - variable */ - MY_ATTRIBUTE((unused)), - void* var_ptr /*!< out: where the formal - string goes */ - MY_ATTRIBUTE((unused)), - const void* save) /*!< in: immediate result from - check function */ -{ - if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) { - trx_purge_run(); - } -} - -/****************************************************************//** -Set the purge state to STOP. If purge is disabled then it -is a no-op. This function is registered as a callback with MySQL. */ -static -void -purge_stop_now_set( -/*===============*/ - THD* thd /*!< in: thread handle */ - MY_ATTRIBUTE((unused)), - struct st_mysql_sys_var* var /*!< in: pointer to system - variable */ - MY_ATTRIBUTE((unused)), - void* var_ptr /*!< out: where the formal - string goes */ - MY_ATTRIBUTE((unused)), - const void* save) /*!< in: immediate result from - check function */ -{ - if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) { - trx_purge_stop(); - } -} - -/****************************************************************//** -Force innodb to checkpoint. */ -static -void -checkpoint_now_set( -/*===============*/ - THD* thd /*!< in: thread handle */ - MY_ATTRIBUTE((unused)), - struct st_mysql_sys_var* var /*!< in: pointer to system - variable */ - MY_ATTRIBUTE((unused)), - void* var_ptr /*!< out: where the formal - string goes */ - MY_ATTRIBUTE((unused)), - const void* save) /*!< in: immediate result from - check function */ -{ - if (*(my_bool*) save) { - while (log_sys->last_checkpoint_lsn < log_sys->lsn) { - log_make_checkpoint_at(LSN_MAX, TRUE); - fil_flush_file_spaces(FIL_LOG); - } - - dberr_t err = fil_write_flushed_lsn(log_sys->lsn); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Failed to write flush lsn to the " - "system tablespace at checkpoint err=%s", - ut_strerr(err)); - } - } -} - -/****************************************************************//** -Force a dirty pages flush now. */ -static -void -buf_flush_list_now_set( -/*===================*/ - THD* thd /*!< in: thread handle */ - MY_ATTRIBUTE((unused)), - struct st_mysql_sys_var* var /*!< in: pointer to system - variable */ - MY_ATTRIBUTE((unused)), - void* var_ptr /*!< out: where the formal - string goes */ - MY_ATTRIBUTE((unused)), - const void* save) /*!< in: immediate result from - check function */ -{ - if (*(my_bool*) save) { - buf_flush_list(ULINT_MAX, LSN_MAX, NULL); - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - } -} - -/****************************************************************//** -Force log tracker to track the log synchronously. */ -static -void -track_redo_log_now_set( -/*===================*/ - THD* thd /*!< in: thread handle */ - MY_ATTRIBUTE((unused)), - struct st_mysql_sys_var* var /*!< in: pointer to system - variable */ - MY_ATTRIBUTE((unused)), - void* var_ptr /*!< out: where the formal - string goes */ - MY_ATTRIBUTE((unused)), - const void* save) /*!< in: immediate result from - check function */ -{ - if (*(my_bool*) save && srv_track_changed_pages) { - - log_online_follow_redo_log(); - } -} - -#endif /* UNIV_DEBUG */ - -/*********************************************************************** -@return version of the extended FTS API */ -uint -innobase_fts_get_version() -/*======================*/ -{ - /* Currently this doesn't make much sense as returning - HA_CAN_FULLTEXT_EXT automatically mean this version is supported. - This supposed to ease future extensions. */ - return(2); -} - -/*********************************************************************** -@return Which part of the extended FTS API is supported */ -ulonglong -innobase_fts_flags() -/*================*/ -{ - return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT); -} - - -/*********************************************************************** -Find and Retrieve the FTS doc_id for the current result row -@return the document ID */ -ulonglong -innobase_fts_retrieve_docid( -/*========================*/ - FT_INFO_EXT * fts_hdl) /*!< in: FTS handler */ -{ - row_prebuilt_t* ft_prebuilt; - fts_result_t* result; - - ft_prebuilt = ((NEW_FT_INFO *)fts_hdl)->ft_prebuilt; - result = ((NEW_FT_INFO *)fts_hdl)->ft_result; - - if (ft_prebuilt->read_just_key) { - fts_ranking_t* ranking = - rbt_value(fts_ranking_t, result->current); - return(ranking->doc_id); - } - - return(ft_prebuilt->fts_doc_id); -} - - -/*********************************************************************** -Find and retrieve the size of the current result -@return number of matching rows */ -ulonglong -innobase_fts_count_matches( -/*=======================*/ - FT_INFO_EXT* fts_hdl) /*!< in: FTS handler */ -{ - NEW_FT_INFO* handle = (NEW_FT_INFO *) fts_hdl; - - if (handle->ft_result->rankings_by_id != 0) { - return rbt_size(handle->ft_result->rankings_by_id); - } else { - return(0); - } -} - -/* These variables are never read by InnoDB or changed. They are a kind of -dummies that are needed by the MySQL infrastructure to call -buffer_pool_dump_now(), buffer_pool_load_now() and buffer_pool_load_abort() -by the user by doing: - SET GLOBAL innodb_buffer_pool_dump_now=ON; - SET GLOBAL innodb_buffer_pool_load_now=ON; - SET GLOBAL innodb_buffer_pool_load_abort=ON; -Their values are read by MySQL and displayed to the user when the variables -are queried, e.g.: - SELECT @@innodb_buffer_pool_dump_now; - SELECT @@innodb_buffer_pool_load_now; - SELECT @@innodb_buffer_pool_load_abort; */ -static my_bool innodb_buffer_pool_dump_now = FALSE; -static my_bool innodb_buffer_pool_load_now = FALSE; -static my_bool innodb_buffer_pool_load_abort = FALSE; - -/****************************************************************//** -Trigger a dump of the buffer pool if innodb_buffer_pool_dump_now is set -to ON. This function is registered as a callback with MySQL. */ -static -void -buffer_pool_dump_now( -/*=================*/ - THD* thd /*!< in: thread handle */ - MY_ATTRIBUTE((unused)), - struct st_mysql_sys_var* var /*!< in: pointer to system - variable */ - MY_ATTRIBUTE((unused)), - void* var_ptr /*!< out: where the formal - string goes */ - MY_ATTRIBUTE((unused)), - const void* save) /*!< in: immediate result from - check function */ -{ - if (*(my_bool*) save && !srv_read_only_mode) { - buf_dump_start(); - } -} - -/****************************************************************//** -Trigger a load of the buffer pool if innodb_buffer_pool_load_now is set -to ON. This function is registered as a callback with MySQL. */ -static -void -buffer_pool_load_now( -/*=================*/ - THD* thd /*!< in: thread handle */ - __attribute__((unused)), - struct st_mysql_sys_var* var /*!< in: pointer to system - variable */ - __attribute__((unused)), - void* var_ptr /*!< out: where the formal - string goes */ - __attribute__((unused)), - const void* save) /*!< in: immediate result from - check function */ -{ - if (*(my_bool*) save) { - buf_load_start(); - } -} - -/****************************************************************//** -Abort a load of the buffer pool if innodb_buffer_pool_load_abort -is set to ON. This function is registered as a callback with MySQL. */ -static -void -buffer_pool_load_abort( -/*===================*/ - THD* thd /*!< in: thread handle */ - __attribute__((unused)), - struct st_mysql_sys_var* var /*!< in: pointer to system - variable */ - __attribute__((unused)), - void* var_ptr /*!< out: where the formal - string goes */ - __attribute__((unused)), - const void* save) /*!< in: immediate result from - check function */ -{ - if (*(my_bool*) save) { - buf_load_abort(); - } -} - -/** Update innodb_status_output or innodb_status_output_locks, -which control InnoDB "status monitor" output to the error log. -@param[in] thd thread handle -@param[in] var system variable -@param[out] var_ptr current value -@param[in] save to-be-assigned value */ -static -void -innodb_status_output_update( -/*========================*/ - THD* thd __attribute__((unused)), - struct st_mysql_sys_var* var __attribute__((unused)), - void* var_ptr __attribute__((unused)), - const void* save __attribute__((unused))) -{ - *static_cast<my_bool*>(var_ptr) = *static_cast<const my_bool*>(save); - /* Wakeup server monitor thread. */ - os_event_set(srv_monitor_event); -} - -/****************************************************************** -Update the system variable innodb_encryption_threads */ -static -void -innodb_encryption_threads_update( -/*=============================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - fil_crypt_set_thread_cnt(*static_cast<const uint*>(save)); -} - -/****************************************************************** -Update the system variable innodb_encryption_rotate_key_age */ -static -void -innodb_encryption_rotate_key_age_update( -/*====================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save)); -} - -/****************************************************************** -Update the system variable innodb_encryption_rotation_iops */ -static -void -innodb_encryption_rotation_iops_update( -/*===================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - fil_crypt_set_rotation_iops(*static_cast<const uint*>(save)); -} - -/****************************************************************** -Update the system variable innodb_encrypt_tables*/ -static -void -innodb_encrypt_tables_update( -/*=========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to - system variable */ - void* var_ptr,/*!< out: where the - formal string goes */ - const void* save) /*!< in: immediate result - from check function */ -{ - fil_crypt_set_encrypt_tables(*static_cast<const ulong*>(save)); -} - -static SHOW_VAR innodb_status_variables_export[]= { - {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC}, - {NullS, NullS, SHOW_LONG} -}; - -static struct st_mysql_storage_engine innobase_storage_engine= -{ MYSQL_HANDLERTON_INTERFACE_VERSION }; - -#ifdef WITH_WSREP -void -wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno) -{ - WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be " - "caused by:\n\t" - "1) unsupported configuration options combination, please check documentation.\n\t" - "2) a bug in the code.\n\t" - "3) a database corruption.\n Node consistency compromized, " - "need to abort. Restart the node to resync with cluster.", - (long long)bf_seqno, (long long)victim_seqno); - abort(); -} -/*******************************************************************//** -This function is used to kill one transaction in BF. */ -UNIV_INTERN -int -wsrep_innobase_kill_one_trx( - void * const bf_thd_ptr, - const trx_t * const bf_trx, - trx_t *victim_trx, - ibool signal) -{ - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(victim_trx)); - ut_ad(bf_thd_ptr); - ut_ad(victim_trx); - - DBUG_ENTER("wsrep_innobase_kill_one_trx"); - THD *bf_thd = bf_thd_ptr ? (THD*) bf_thd_ptr : NULL; - THD *thd = (THD *) victim_trx->mysql_thd; - int64_t bf_seqno = (bf_thd) ? wsrep_thd_trx_seqno(bf_thd) : 0; - - if (!thd) { - DBUG_PRINT("wsrep", ("no thd for conflicting lock")); - WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id); - DBUG_RETURN(1); - } - - if (!bf_thd) { - DBUG_PRINT("wsrep", ("no BF thd for conflicting lock")); - WSREP_WARN("no BF THD for trx: " TRX_ID_FMT, - bf_trx ? bf_trx->id : 0); - DBUG_RETURN(1); - } - - WSREP_LOG_CONFLICT(bf_thd, thd, TRUE); - - WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: " - TRX_ID_FMT, - signal, (long long)bf_seqno, - thd_get_thread_id(thd), - victim_trx->id); - - WSREP_DEBUG("Aborting query: %s", - (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void"); - - wsrep_thd_LOCK(thd); - DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock", - { - const char act[]= - "now " - "wait_for signal.wsrep_after_BF_victim_lock"; - DBUG_ASSERT(!debug_sync_set_action(bf_thd, - STRING_WITH_LEN(act))); - };); - - - if (wsrep_thd_query_state(thd) == QUERY_EXITING) { - WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT, - victim_trx->id); - wsrep_thd_UNLOCK(thd); - DBUG_RETURN(0); - } - - if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) { - WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT ", state: %d", - victim_trx->id, - wsrep_thd_get_conflict_state(thd)); - } - - switch (wsrep_thd_get_conflict_state(thd)) { - case NO_CONFLICT: - wsrep_thd_set_conflict_state(thd, MUST_ABORT); - break; - case MUST_ABORT: - WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state", - victim_trx->id); - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - DBUG_RETURN(0); - break; - case ABORTED: - case ABORTING: // fall through - default: - WSREP_DEBUG("victim " TRX_ID_FMT " in state %d", - victim_trx->id, wsrep_thd_get_conflict_state(thd)); - wsrep_thd_UNLOCK(thd); - DBUG_RETURN(0); - break; - } - - switch (wsrep_thd_query_state(thd)) { - case QUERY_COMMITTING: - enum wsrep_status rcode; - - WSREP_DEBUG("kill query for: %ld", - thd_get_thread_id(thd)); - WSREP_DEBUG("kill trx QUERY_COMMITTING for " TRX_ID_FMT, - victim_trx->id); - - if (wsrep_thd_exec_mode(thd) == REPL_RECV) { - wsrep_abort_slave_trx(bf_seqno, - wsrep_thd_trx_seqno(thd)); - } else { - wsrep_t *wsrep= get_wsrep(); - rcode = wsrep->abort_pre_commit( - wsrep, bf_seqno, - (wsrep_trx_id_t)victim_trx->id - ); - - switch (rcode) { - case WSREP_WARNING: - WSREP_DEBUG("cancel commit warning: " - TRX_ID_FMT, - victim_trx->id); - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - DBUG_RETURN(1); - break; - case WSREP_OK: - break; - default: - WSREP_ERROR( - "cancel commit bad exit: %d " - TRX_ID_FMT, - rcode, - victim_trx->id); - /* unable to interrupt, must abort */ - /* note: kill_mysql() will block, if we cannot. - * kill the lock holder first. - */ - abort(); - break; - } - } - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - break; - case QUERY_EXEC: - /* it is possible that victim trx is itself waiting for some - * other lock. We need to cancel this waiting - */ - WSREP_DEBUG("kill trx QUERY_EXEC for " TRX_ID_FMT, - victim_trx->id); - - victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; - if (victim_trx->lock.wait_lock) { - WSREP_DEBUG("victim has wait flag: %ld", - thd_get_thread_id(thd)); - lock_t* wait_lock = victim_trx->lock.wait_lock; - if (wait_lock) { - WSREP_DEBUG("canceling wait lock"); - victim_trx->lock.was_chosen_as_deadlock_victim= TRUE; - lock_cancel_waiting_and_release(wait_lock); - } - - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - } else { - /* abort currently executing query */ - DBUG_PRINT("wsrep",("sending KILL_QUERY to: %ld", - thd_get_thread_id(thd))); - WSREP_DEBUG("kill query for: %ld", - thd_get_thread_id(thd)); - /* Note that innobase_kill_connection will take lock_mutex - and trx_mutex */ - wsrep_thd_UNLOCK(thd); - wsrep_thd_awake(thd, signal); - - /* for BF thd, we need to prevent him from committing */ - if (wsrep_thd_exec_mode(thd) == REPL_RECV) { - wsrep_abort_slave_trx(bf_seqno, - wsrep_thd_trx_seqno(thd)); - } - } - break; - case QUERY_IDLE: - { - WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id); - - if (wsrep_thd_exec_mode(thd) == REPL_RECV) { - WSREP_DEBUG("kill BF IDLE, seqno: %lld", - (long long)wsrep_thd_trx_seqno(thd)); - wsrep_thd_UNLOCK(thd); - wsrep_abort_slave_trx(bf_seqno, - wsrep_thd_trx_seqno(thd)); - DBUG_RETURN(0); - } - /* This will lock thd from proceeding after net_read() */ - wsrep_thd_set_conflict_state(thd, ABORTING); - - wsrep_lock_rollback(); - - if (wsrep_aborting_thd_contains(thd)) { - WSREP_WARN("duplicate thd aborter %lu", - thd_get_thread_id(thd)); - } else { - wsrep_aborting_thd_enqueue(thd); - DBUG_PRINT("wsrep",("enqueuing trx abort for %lu", - thd_get_thread_id(thd))); - WSREP_DEBUG("enqueuing trx abort for (%lu)", - thd_get_thread_id(thd)); - } - - DBUG_PRINT("wsrep",("signalling wsrep rollbacker")); - WSREP_DEBUG("signaling aborter"); - wsrep_unlock_rollback(); - wsrep_thd_UNLOCK(thd); - - break; - } - default: - WSREP_WARN("bad wsrep query state: %d", - wsrep_thd_query_state(thd)); - wsrep_thd_UNLOCK(thd); - break; - } - - DBUG_RETURN(0); -} - -static int -wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd, - my_bool signal) -{ - DBUG_ENTER("wsrep_innobase_abort_thd"); - trx_t* victim_trx = thd_to_trx(victim_thd); - trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL; - WSREP_DEBUG("abort transaction: BF: %s victim: %s", - wsrep_thd_query(bf_thd), - wsrep_thd_query(victim_thd)); - - if (victim_trx) { - lock_mutex_enter(); - trx_mutex_enter(victim_trx); - victim_trx->abort_type = TRX_WSREP_ABORT; - int rcode = wsrep_innobase_kill_one_trx(bf_thd, bf_trx, - victim_trx, signal); - trx_mutex_exit(victim_trx); - lock_mutex_exit(); - victim_trx->abort_type = TRX_SERVER_ABORT; - wsrep_srv_conc_cancel_wait(victim_trx); - DBUG_RETURN(rcode); - } else { - WSREP_DEBUG("victim does not have transaction"); - wsrep_thd_LOCK(victim_thd); - wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT); - wsrep_thd_UNLOCK(victim_thd); - wsrep_thd_awake(victim_thd, signal); - } - - DBUG_RETURN(-1); -} - -static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid) -{ - DBUG_ASSERT(hton == innodb_hton_ptr); - if (wsrep_is_wsrep_xid(xid)) { - mtr_t mtr; - mtr_start(&mtr); - trx_sysf_t* sys_header = trx_sysf_get(&mtr); - trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr); - mtr_commit(&mtr); - innobase_flush_logs(hton); - return 0; - } else { - return 1; - } -} - -static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid) -{ - DBUG_ASSERT(hton == innodb_hton_ptr); - trx_sys_read_wsrep_checkpoint(xid); - return 0; -} - - -static void -wsrep_fake_trx_id( -/*==================*/ - handlerton *hton, - THD *thd) /*!< in: user thread handle */ -{ - mutex_enter(&trx_sys->mutex); - trx_id_t trx_id = trx_sys_get_new_trx_id(); - mutex_exit(&trx_sys->mutex); - - wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd), trx_id); -} - -#endif /* WITH_WSREP */ - - -/*************************************************************//** -Empty free list algorithm. This function is registered as -a callback with MySQL. -@return 0 for valid algorithm */ -static -int -innodb_srv_empty_free_list_algorithm_validate( -/*===========================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - const char* algorithm_name; - char buff[STRING_BUFFER_USUAL_SIZE]; - int len = sizeof(buff); - ulint algo; - srv_empty_free_list_t algorithm; - - algorithm_name = value->val_str(value, buff, &len); - - if (!algorithm_name) { - return(1); - } - - for (algo = 0; algo < array_elements( - innodb_empty_free_list_algorithm_names - ) - 1; - algo++) { - if (!innobase_strcasecmp( - algorithm_name, - innodb_empty_free_list_algorithm_names[algo])) - break; - } - - if (algo == array_elements( innodb_empty_free_list_algorithm_names) - 1) - return(1); - - algorithm = static_cast<srv_empty_free_list_t>(algo); - if (!innodb_empty_free_list_algorithm_allowed(algorithm)) { - sql_print_warning( - "InnoDB: innodb_empty_free_list_algorithm " - "= 'backoff' requires at least" - " 20MB buffer pool instances.\n"); - return(1); - } - - *reinterpret_cast<ulong*>(save) = static_cast<ulong>(algorithm); - return(0); -} - -/* plugin options */ - -static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm, - PLUGIN_VAR_RQCMDARG, - "The algorithm InnoDB uses for page checksumming. Possible values are " - "CRC32 (hardware accelerated if the CPU supports it) " - "write crc32, allow any of the other checksums to match when reading; " - "STRICT_CRC32 " - "write crc32, do not allow other algorithms to match when reading; " - "INNODB " - "write a software calculated checksum, allow any other checksums " - "to match when reading; " - "STRICT_INNODB " - "write a software calculated checksum, do not allow other algorithms " - "to match when reading; " - "NONE " - "write a constant magic number, do not do any checksum verification " - "when reading (same as innodb_checksums=OFF); " - "STRICT_NONE " - "write a constant magic number, do not allow values other than that " - "magic number when reading; " - "Files updated when this option is set to crc32 or strict_crc32 will " - "not be readable by MySQL versions older than 5.6.3", - NULL, NULL, SRV_CHECKSUM_ALGORITHM_CRC32, - &innodb_checksum_algorithm_typelib); - - -static MYSQL_SYSVAR_ENUM(log_checksum_algorithm, srv_log_checksum_algorithm, - PLUGIN_VAR_RQCMDARG, - "The algorithm InnoDB uses for log block checksums. Possible values are " - "CRC32 (hardware accelerated if the CPU supports it) " - "write crc32, allow any of the other checksums to match when reading; " - "STRICT_CRC32 " - "write crc32, do not allow other algorithms to match when reading; " - "INNODB " - "write a software calculated checksum, allow any other checksums " - "to match when reading; " - "STRICT_INNODB " - "write a software calculated checksum, do not allow other algorithms " - "to match when reading; " - "NONE " - "write a constant magic number, do not do any checksum verification " - "when reading (same as innodb_checksums=OFF); " - "STRICT_NONE " - "write a constant magic number, do not allow values other than that " - "magic number when reading; " - "Logs created when this option is set to crc32/strict_crc32/none/strict_none " - "will not be readable by any MySQL version or Percona Server versions that do" - "not support this feature", - NULL, innodb_log_checksum_algorithm_update, SRV_CHECKSUM_ALGORITHM_INNODB, - &innodb_checksum_algorithm_typelib); - - -static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting " - "this to OFF", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "###EXPERIMENTAL###: The log block size of the transaction log file. Changing for created log file is not supported. Use on your own risk!", - NULL, NULL, (1 << 9)/*512*/, OS_MIN_LOG_BLOCK_SIZE, - (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0); - -static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir, - PLUGIN_VAR_READONLY, - "The common part for InnoDB table spaces.", - NULL, NULL, NULL); - -static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable InnoDB doublewrite buffer (enabled by default). " - "Disable with --skip-innodb-doublewrite.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(stats_include_delete_marked, - srv_stats_include_delete_marked, - PLUGIN_VAR_OPCMDARG, - "Scan delete marked records for persistent stat", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Prevent partial page writes, via atomic writes (beta). " - "The option is used to prevent partial writes in case of a crash/poweroff, " - "as faster alternative to doublewrite buffer. " - "Currently this option works only " - "on Linux only with FusionIO device, and directFS filesystem.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(use_fallocate, innobase_use_fallocate, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Preallocate files fast, using operating system functionality. On POSIX systems, posix_fallocate system call is used.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity, - PLUGIN_VAR_RQCMDARG, - "Number of IOPs the server can do. Tunes the background IO rate", - NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0); - -static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity, - PLUGIN_VAR_RQCMDARG, - "Limit to which innodb_io_capacity can be inflated.", - NULL, innodb_io_capacity_max_update, - SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100, - SRV_MAX_IO_CAPACITY_LIMIT, 0); - -static MYSQL_SYSVAR_ULONG(idle_flush_pct, - srv_idle_flush_pct, - PLUGIN_VAR_RQCMDARG, - "Up to what percentage of dirty pages should be flushed when innodb " - "finds it has spare resources to do so.", - NULL, NULL, 100, 0, 100, 0); - -#ifdef UNIV_DEBUG -static MYSQL_SYSVAR_BOOL(purge_run_now, innodb_purge_run_now, - PLUGIN_VAR_OPCMDARG, - "Set purge state to RUN", - NULL, purge_run_now_set, FALSE); - -static MYSQL_SYSVAR_BOOL(purge_stop_now, innodb_purge_stop_now, - PLUGIN_VAR_OPCMDARG, - "Set purge state to STOP", - NULL, purge_stop_now_set, FALSE); - -static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now, - PLUGIN_VAR_OPCMDARG, - "Force checkpoint now", - NULL, checkpoint_now_set, FALSE); - -static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now, - PLUGIN_VAR_OPCMDARG, - "Force dirty page flush now", - NULL, buf_flush_list_now_set, FALSE); - -static MYSQL_SYSVAR_BOOL(track_redo_log_now, - innodb_track_redo_log_now, - PLUGIN_VAR_OPCMDARG, - "Force log tracker to catch up with checkpoint now", - NULL, track_redo_log_now_set, FALSE); - -#endif /* UNIV_DEBUG */ - -static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size, - PLUGIN_VAR_OPCMDARG, - "Number of UNDO log pages to purge in one batch from the history list.", - NULL, NULL, - 300, /* Default setting */ - 1, /* Minimum value */ - 5000, 0); /* Maximum value */ - -static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Purge threads can be from 1 to 32. Default is 1.", - NULL, NULL, - 4, /* Default setting */ - 1, /* Minimum value */ - SRV_MAX_N_PURGE_THREADS, 0); /* Maximum value */ - -static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Size of the mutex/lock wait array.", - NULL, NULL, - 1, /* Default setting */ - 1, /* Minimum value */ - 1024, 0); /* Maximum value */ - -static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown, - PLUGIN_VAR_OPCMDARG, - "Speeds up the shutdown process of the InnoDB storage engine. Possible " - "values are 0, 1 (faster) or 2 (fastest - crash-like).", - NULL, NULL, 1, 0, 2, 0); - -static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table, - PLUGIN_VAR_NOCMDARG, - "Stores each InnoDB table to an .ibd file in the database dir.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name, - PLUGIN_VAR_RQCMDARG, - "File format to use for new tables in .ibd files.", - innodb_file_format_name_validate, - innodb_file_format_name_update, innodb_file_format_default); - -/* "innobase_file_format_check" decides whether we would continue -booting the server if the file format stamped on the system -table space exceeds the maximum file format supported -by the server. Can be set during server startup at command -line or configure file, and a read only variable after -server startup */ -static MYSQL_SYSVAR_BOOL(file_format_check, innobase_file_format_check, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Whether to perform system file format check.", - NULL, NULL, TRUE); - -/* If a new file format is introduced, the file format -name needs to be updated accordingly. Please refer to -file_format_name_map[] defined in trx0sys.cc for the next -file format name. */ -static MYSQL_SYSVAR_STR(file_format_max, innobase_file_format_max, - PLUGIN_VAR_OPCMDARG, - "The highest file format in the tablespace.", - innodb_file_format_max_validate, - innodb_file_format_max_update, innodb_file_format_max_default); - -static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC, - "The user supplied stopword table name.", - innodb_stopword_table_validate, - NULL, - NULL); - -static MYSQL_SYSVAR_UINT(flush_log_at_timeout, srv_flush_log_at_timeout, - PLUGIN_VAR_OPCMDARG, - "Write and flush logs every (n) second.", - NULL, NULL, 1, 0, 2700, 0); - -/* Changed to the THDVAR */ -//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit, -// PLUGIN_VAR_OPCMDARG, -// "Set to 0 (write and flush once per second)," -// " 1 (write and flush at each commit)" -// " or 2 (write at commit, flush once per second).", -// NULL, NULL, 1, 0, 2, 0); - -static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit, - PLUGIN_VAR_NOCMDARG, - "Use global innodb_flush_log_at_trx_commit value. (default: ON).", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "With which method to flush data.", NULL, NULL, NULL); - -static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix, - PLUGIN_VAR_NOCMDARG, - "Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Force InnoDB to load metadata of corrupted table.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "DEPRECATED. This option may be removed in future releases. " - "Please use READ COMMITTED transaction isolation level instead. " - "Force InnoDB to not use next-key locking, to use only row-level locking.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(show_verbose_locks, srv_show_verbose_locks, - PLUGIN_VAR_OPCMDARG, - "Whether to show records locked in SHOW INNODB STATUS.", - NULL, NULL, 0, 0, 1, 0); - -static MYSQL_SYSVAR_ULONG(show_locks_held, srv_show_locks_held, - PLUGIN_VAR_RQCMDARG, - "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.", - NULL, NULL, 10, 0, 1000, 0); - -#ifdef UNIV_LOG_ARCHIVE -static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Where full logs should be archived.", NULL, NULL, NULL); - -static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive, - PLUGIN_VAR_OPCMDARG, - "Set to 1 if you want to have logs archived.", - NULL, innodb_log_archive_update, FALSE); -#endif /* UNIV_LOG_ARCHIVE */ - -static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Path to InnoDB log files.", NULL, NULL, NULL); - -static MYSQL_SYSVAR_ULONG(log_arch_expire_sec, - srv_log_arch_expire_sec, PLUGIN_VAR_OPCMDARG, - "Expiration time for archived innodb transaction logs.", - NULL, innodb_log_archive_expire_update, 0, 0, ~0UL, 0); - -static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct, - PLUGIN_VAR_RQCMDARG, - "Percentage of dirty pages allowed in bufferpool.", - NULL, innodb_max_dirty_pages_pct_update, 75.0, 0.001, 99.999, 0); - -static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm, - srv_max_dirty_pages_pct_lwm, - PLUGIN_VAR_RQCMDARG, - "Percentage of dirty pages at which flushing kicks in.", - NULL, innodb_max_dirty_pages_pct_lwm_update, 0.001, 0.000, 99.999, 0); - -static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm, - srv_adaptive_flushing_lwm, - PLUGIN_VAR_RQCMDARG, - "Percentage of log capacity below which no adaptive flushing happens.", - NULL, NULL, 10.0, 0.0, 70.0, 0); - -static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing, - PLUGIN_VAR_NOCMDARG, - "Attempt flushing dirty pages to avoid IO bursts at checkpoints.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_ULONG(flushing_avg_loops, - srv_flushing_avg_loops, - PLUGIN_VAR_RQCMDARG, - "Number of iterations over which the background flushing is averaged.", - NULL, NULL, 30, 1, 1000, 0); - -static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag, - PLUGIN_VAR_RQCMDARG, - "Desired maximum length of the purge queue (0 = no limit)", - NULL, NULL, 0, 0, ~0UL, 0); - -static MYSQL_SYSVAR_ULONG(max_purge_lag_delay, srv_max_purge_lag_delay, - PLUGIN_VAR_RQCMDARG, - "Maximum delay of user threads in micro-seconds", - NULL, NULL, - 0L, /* Default seting */ - 0L, /* Minimum value */ - 10000000UL, 0); /* Maximum value */ - -static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR, - "Enable SHOW ENGINE INNODB STATUS output in the innodb_status.<pid> file", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata, - PLUGIN_VAR_OPCMDARG, - "Enable statistics gathering for metadata commands such as " - "SHOW TABLE STATUS for tables that use transient statistics (off by default)", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages, - PLUGIN_VAR_RQCMDARG, - "Deprecated, use innodb_stats_transient_sample_pages instead", - NULL, innodb_stats_sample_pages_update, 8, 1, ~0ULL, 0); - -static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages, - srv_stats_transient_sample_pages, - PLUGIN_VAR_RQCMDARG, - "The number of leaf index pages to sample when calculating transient " - "statistics (if persistent statistics are not used, default 8)", - NULL, NULL, 8, 1, ~0ULL, 0); - -static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent, - PLUGIN_VAR_OPCMDARG, - "InnoDB persistent statistics enabled for all tables unless overridden " - "at table level", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc, - PLUGIN_VAR_OPCMDARG, - "InnoDB automatic recalculation of persistent statistics enabled for all " - "tables unless overridden at table level (automatic recalculation is only " - "done when InnoDB decides that the table has changed too much and needs a " - "new statistics)", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages, - srv_stats_persistent_sample_pages, - PLUGIN_VAR_RQCMDARG, - "The number of leaf index pages to sample when calculating persistent " - "statistics (by ANALYZE, default 20)", - NULL, NULL, 20, 1, ~0ULL, 0); - -static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter, - PLUGIN_VAR_RQCMDARG, - "The number of rows modified before we calculate new statistics (default 0 = current limits)", - NULL, NULL, 0, 0, ~0ULL, 0); - -static MYSQL_SYSVAR_BOOL(stats_traditional, srv_stats_sample_traditional, - PLUGIN_VAR_RQCMDARG, - "Enable traditional statistic calculation based on number of configured pages (default true)", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled, - PLUGIN_VAR_OPCMDARG, - "Enable InnoDB adaptive hash index (enabled by default). " - "Disable with --skip-innodb-adaptive-hash-index.", - NULL, innodb_adaptive_hash_index_update, TRUE); - -/* btr_search_index_num is constrained to machine word size for historical -reasons. This limitation can be easily removed later. */ -static MYSQL_SYSVAR_ULINT(adaptive_hash_index_partitions, btr_search_index_num, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of InnoDB adaptive hash index partitions (default 1: disable " - "partitioning)", - NULL, NULL, 1, 1, sizeof(ulint) * 8, 0); - -static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay, - PLUGIN_VAR_RQCMDARG, - "Replication thread delay (ms) on the slave server if " - "innodb_thread_concurrency is reached (0 by default)", - NULL, NULL, 0, 0, ~0UL, 0); - -static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages, - PLUGIN_VAR_OPCMDARG, - "Enables/disables the logging of entire compressed page images." - " InnoDB logs the compressed pages to prevent corruption if" - " the zlib compression algorithm changes." - " When turned OFF, InnoDB will assume that the zlib" - " compression algorithm doesn't change.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "DEPRECATED. This option may be removed in future releases, " - "together with the option innodb_use_sys_malloc and with the InnoDB's " - "internal memory allocator. " - "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.", - NULL, NULL, 8*1024*1024L, 512*1024L, LONG_MAX, 1024); - -static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment, - PLUGIN_VAR_RQCMDARG, - "Data file autoextend increment in megabytes", - NULL, NULL, 64L, 1L, 1000L, 0); - -static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L); - -static MYSQL_SYSVAR_BOOL(buffer_pool_populate, innodb_buffer_pool_populate, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Deprecated. This option has no effect and " - "will be removed in MariaDB 10.2.3.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ENUM(foreground_preflush, srv_foreground_preflush, - PLUGIN_VAR_OPCMDARG, - "The algorithm InnoDB uses for the query threads at sync preflush. " - "Possible values are " - "SYNC_PREFLUSH: perform a sync preflush as Oracle MySQL; " - "EXPONENTIAL_BACKOFF: (default) wait for the page cleaner flush.", - NULL, NULL, SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF, - &innodb_foreground_preflush_typelib); - -#ifdef UNIV_LINUX - -static MYSQL_SYSVAR_ULONG(sched_priority_cleaner, srv_sched_priority_cleaner, - PLUGIN_VAR_RQCMDARG, - "Nice value for the cleaner and LRU manager thread scheduling", - NULL, innodb_sched_priority_cleaner_update, 19, 0, 39, 0); - -#endif /* UNIV_LINUX */ - -#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG -static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2", - NULL, NULL, 16, 1, MAX_PAGE_HASH_LOCKS, 0); - -static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Number of pages reserved in doublewrite buffer for batch flushing", - NULL, NULL, 120, 1, 127, 0); - -#ifdef UNIV_LINUX - -static MYSQL_SYSVAR_ULONG(sched_priority_purge, srv_sched_priority_purge, - PLUGIN_VAR_RQCMDARG, - "Nice value for the purge thread scheduling", - NULL, innodb_sched_priority_purge_update, 19, 0, 39, 0); - -static MYSQL_SYSVAR_ULONG(sched_priority_io, srv_sched_priority_io, - PLUGIN_VAR_RQCMDARG, - "Nice value for the I/O handler thread scheduling", - NULL, innodb_sched_priority_io_update, 19, 0, 39, 0); - -static MYSQL_SYSVAR_ULONG(sched_priority_master, srv_sched_priority_master, - PLUGIN_VAR_RQCMDARG, - "Nice value for the master thread scheduling", - NULL, innodb_sched_priority_master_update, 19, 0, 39, 0); - -static MYSQL_SYSVAR_BOOL(priority_purge, srv_purge_thread_priority, - PLUGIN_VAR_OPCMDARG, - "Make purge coordinator and worker threads acquire shared resources with " - "priority", NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(priority_io, srv_io_thread_priority, - PLUGIN_VAR_OPCMDARG, - "Make I/O threads acquire shared resources with priority", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(priority_cleaner, srv_cleaner_thread_priority, - PLUGIN_VAR_OPCMDARG, - "Make buffer pool cleaner and LRU manager threads acquire shared resources " - "with priority", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(priority_master, srv_master_thread_priority, - PLUGIN_VAR_OPCMDARG, - "Make buffer pool cleaner thread acquire shared resources with priority", - NULL, NULL, FALSE); - -#endif /* UNIV_LINUX */ - -static MYSQL_SYSVAR_ULINT(cleaner_max_lru_time, srv_cleaner_max_lru_time, - PLUGIN_VAR_RQCMDARG, - "The maximum time limit for a single LRU tail flush iteration by the page " - "cleaner thread in miliseconds", - NULL, NULL, 1000, 0, ~0UL, 0); - -static MYSQL_SYSVAR_ULINT(cleaner_max_flush_time, srv_cleaner_max_flush_time, - PLUGIN_VAR_RQCMDARG, - "The maximum time limit for a single flush list flush iteration by the page " - "cleaner thread in miliseconds", - NULL, NULL, 1000, 0, ~0UL, 0); - -static MYSQL_SYSVAR_ULINT(cleaner_flush_chunk_size, - srv_cleaner_flush_chunk_size, - PLUGIN_VAR_RQCMDARG, - "Divide page cleaner flush list flush batches into chunks of this size", - NULL, NULL, 100, 1, ~0UL, 0); - -static MYSQL_SYSVAR_ULINT(cleaner_lru_chunk_size, - srv_cleaner_lru_chunk_size, - PLUGIN_VAR_RQCMDARG, - "Divide page cleaner LRU list flush batches into chunks of this size", - NULL, NULL, 100, 1, ~0UL, 0); - -static MYSQL_SYSVAR_ULINT(cleaner_free_list_lwm, srv_cleaner_free_list_lwm, - PLUGIN_VAR_RQCMDARG, - "Page cleaner will keep on flushing the same buffer pool instance if its " - "free list length is below this percentage of innodb_lru_scan_depth", - NULL, NULL, 10, 0, 100, 0); - -static MYSQL_SYSVAR_BOOL(cleaner_eviction_factor, srv_cleaner_eviction_factor, - PLUGIN_VAR_OPCMDARG, - "Make page cleaner LRU flushes use evicted instead of flushed page counts " - "for its heuristics", - NULL, NULL, FALSE); - -#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */ - -static MYSQL_SYSVAR_ENUM(cleaner_lsn_age_factor, - srv_cleaner_lsn_age_factor, - PLUGIN_VAR_OPCMDARG, - "The formula for LSN age factor for page cleaner adaptive flushing. " - "LEGACY: Original Oracle MySQL 5.6 formula. " - "HIGH_CHECKPOINT: (the default) Percona Server 5.6 formula.", - NULL, NULL, SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT, - &innodb_cleaner_lsn_age_factor_typelib); - -static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm, - srv_empty_free_list_algorithm, - PLUGIN_VAR_OPCMDARG, - "The algorithm to use for empty free list handling. Allowed values: " - "LEGACY: Original Oracle MySQL 5.6 handling with single page flushes; " - "BACKOFF: (default) Wait until cleaner produces a free page.", - innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF, - &innodb_empty_free_list_algorithm_typelib); - -static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm, - PLUGIN_VAR_RQCMDARG, - "The algorithm Innodb uses for deciding which locks to grant next when" - " a lock is released. Possible values are" - " FCFS" - " grant the locks in First-Come-First-Served order;" - " VATS" - " use the Variance-Aware-Transaction-Scheduling algorithm, which" - " uses an Eldest-Transaction-First heuristic.", - NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, - &innodb_lock_schedule_algorithm_typelib); - -static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of buffer pool instances, set to higher value on high-end machines to increase scalability", - NULL, NULL, 0L, 0L, MAX_BUFFER_POOLS, 1L); - -static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, - "Filename to/from which to dump/load the InnoDB buffer pool", - innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT); - -static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now, - PLUGIN_VAR_RQCMDARG, - "Trigger an immediate dump of the buffer pool into a file named @@innodb_buffer_pool_filename", - NULL, buffer_pool_dump_now, FALSE); - -static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown, - PLUGIN_VAR_RQCMDARG, - "Dump the buffer pool into a file named @@innodb_buffer_pool_filename", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct, - PLUGIN_VAR_RQCMDARG, - "Dump only the hottest N% of each buffer pool, defaults to 25", - NULL, NULL, 25, 1, 100, 0); - -#ifdef UNIV_DEBUG -static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict, - PLUGIN_VAR_RQCMDARG, - "Evict pages from the buffer pool", - NULL, innodb_buffer_pool_evict_update, ""); -#endif /* UNIV_DEBUG */ - -static MYSQL_SYSVAR_BOOL(buffer_pool_load_now, innodb_buffer_pool_load_now, - PLUGIN_VAR_RQCMDARG, - "Trigger an immediate load of the buffer pool from a file named @@innodb_buffer_pool_filename", - NULL, buffer_pool_load_now, FALSE); - -static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort, - PLUGIN_VAR_RQCMDARG, - "Abort a currently running load of the buffer pool", - NULL, buffer_pool_load_abort, FALSE); - -/* there is no point in changing this during runtime, thus readonly */ -static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Load the buffer pool from a file named @@innodb_buffer_pool_filename", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(defragment, srv_defragment, - PLUGIN_VAR_RQCMDARG, - "Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing " - "defragmentation will be paused. And new defragmentation command will fail." - "Paused defragmentation commands will resume when this variable is set to " - "true again.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages, - PLUGIN_VAR_RQCMDARG, - "Number of pages considered at once when merging multiple pages to " - "defragment", - NULL, NULL, 7, 2, 32, 0); - -static MYSQL_SYSVAR_UINT(defragment_stats_accuracy, - srv_defragment_stats_accuracy, - PLUGIN_VAR_RQCMDARG, - "How many defragment stats changes there are before the stats " - "are written to persistent storage. Set to 0 meaning disable " - "defragment stats tracking.", - NULL, NULL, 0, 0, ~0U, 0); - -static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs, - srv_defragment_fill_factor_n_recs, - PLUGIN_VAR_RQCMDARG, - "How many records of space defragmentation should leave on the page. " - "This variable, together with innodb_defragment_fill_factor, is introduced " - "so defragmentation won't pack the page too full and cause page split on " - "the next insert on every page. The variable indicating more defragmentation" - " gain is the one effective.", - NULL, NULL, 20, 1, 100, 0); - -static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor, - PLUGIN_VAR_RQCMDARG, - "A number between [0.7, 1] that tells defragmentation how full it should " - "fill a page. Default is 0.9. Number below 0.7 won't make much sense." - "This variable, together with innodb_defragment_fill_factor_n_recs, is " - "introduced so defragmentation won't pack the page too full and cause " - "page split on the next insert on every page. The variable indicating more " - "defragmentation gain is the one effective.", - NULL, NULL, 0.9, 0.7, 1, 0); - -static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency, - PLUGIN_VAR_RQCMDARG, - "Do not defragment a single index more than this number of time per second." - "This controls the number of time defragmentation thread can request X_LOCK " - "on an index. Defragmentation thread will check whether " - "1/defragment_frequency (s) has passed since it worked on this index last " - "time, and put the index back to the queue if not enough time has passed. " - "The actual frequency can only be lower than this given number.", - NULL, innodb_defragment_frequency_update, - SRV_DEFRAGMENT_FREQUENCY_DEFAULT, 1, 1000, 0); - - -static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth, - PLUGIN_VAR_RQCMDARG, - "How deep to scan LRU to keep it clean", - NULL, NULL, 1024, 100, ~0UL, 0); - -static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors, - PLUGIN_VAR_OPCMDARG, - "Set to 0 (don't flush neighbors from buffer pool)," - " 1 (flush contiguous neighbors from buffer pool)" - " or 2 (flush neighbors from buffer pool)," - " when flushing a block", - NULL, NULL, 1, 0, 2, 0); - -static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency, - PLUGIN_VAR_RQCMDARG, - "Helps in performance tuning in heavily concurrent environments.", - innobase_commit_concurrency_validate, NULL, 0, 0, 1000, 0); - -static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter, - PLUGIN_VAR_RQCMDARG, - "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket", - NULL, NULL, 5000L, 1L, ~0UL, 0); - -#ifdef EXTENDED_FOR_KILLIDLE -#define kill_idle_help_text "If non-zero value, the idle session with transaction which is idle over the value in seconds is killed by InnoDB." -#else -#define kill_idle_help_text "No effect for this build." -#endif -static MYSQL_SYSVAR_LONGLONG(kill_idle_transaction, srv_kill_idle_transaction, - PLUGIN_VAR_RQCMDARG, kill_idle_help_text, NULL, NULL, 0, 0, LONG_MAX, 0); - -static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR, - "Number of file I/O threads in InnoDB.", - NULL, NULL, 4, 4, 64, 0); - -static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print, - PLUGIN_VAR_OPCMDARG, - "Whether to enable additional FTS diagnostic printout ", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache, - PLUGIN_VAR_OPCMDARG, - "Whether to disable OS system file cache for sort I/O", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2, - PLUGIN_VAR_NOCMDARG, - "FTS internal auxiliary table to be checked", - innodb_internal_table_validate, - innodb_internal_table_update, NULL); - -static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "InnoDB Fulltext search cache size in bytes", - NULL, NULL, 8000000, 1600000, 80000000, 0); - -static MYSQL_SYSVAR_ULONG(ft_total_cache_size, fts_max_total_cache_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Total memory allocated for InnoDB Fulltext Search cache", - NULL, NULL, 640000000, 32000000, 1600000000, 0); - -static MYSQL_SYSVAR_ULONG(ft_result_cache_limit, fts_result_cache_limit, - PLUGIN_VAR_RQCMDARG, - "InnoDB Fulltext search query result cache limit in bytes", - NULL, NULL, 2000000000L, 1000000L, 4294967295UL, 0); - -static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "InnoDB Fulltext search minimum token size in characters", - NULL, NULL, 3, 0, 16, 0); - -static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "InnoDB Fulltext search maximum token size in characters", - NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0); - - -static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize, - PLUGIN_VAR_OPCMDARG, - "InnoDB Fulltext search number of words to optimize for each optimize table call ", - NULL, NULL, 2000, 1000, 10000, 0); - -static MYSQL_SYSVAR_ULONG(ft_sort_pll_degree, fts_sort_pll_degree, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number", - NULL, NULL, 2, 1, 16, 0); - -static MYSQL_SYSVAR_ULONG(sort_buffer_size, srv_sort_buf_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Memory buffer size for index creation", - NULL, NULL, 1048576, 65536, 64<<20, 0); - -static MYSQL_SYSVAR_ULONGLONG(online_alter_log_max_size, srv_online_max_size, - PLUGIN_VAR_RQCMDARG, - "Maximum modification log file size for online index creation", - NULL, NULL, 128<<20, 65536, ~0ULL, 0); - -static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only, - PLUGIN_VAR_NOCMDARG, - "Only optimize the Fulltext index of the table", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of background read I/O threads in InnoDB.", - NULL, NULL, 4, 1, 64, 0); - -static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of background write I/O threads in InnoDB.", - NULL, NULL, 4, 1, 64, 0); - -static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Helps to save your data in case the disk image of the database becomes corrupt.", - NULL, NULL, 0, 0, 6, 0); - -static MYSQL_SYSVAR_ULONG(page_size, srv_page_size, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Page size to use for all InnoDB tablespaces.", - NULL, NULL, UNIV_PAGE_SIZE_DEF, - UNIV_PAGE_SIZE_MIN, UNIV_PAGE_SIZE_MAX, 0); - -static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "The size of the buffer which InnoDB uses to write log to the log files on disk.", - NULL, NULL, 16*1024*1024L, 256*1024L, LONG_MAX, 1024); - -static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Size of each log file in a log group.", - NULL, NULL, 48*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L); - -static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of log files in the log group. InnoDB writes to the files in a circular fashion.", - NULL, NULL, 2, 2, SRV_N_LOG_FILES_MAX, 0); - -/* Note that the default and minimum values are set to 0 to -detect if the option is passed and print deprecation message */ -static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.", - NULL, NULL, 0, 0, 10, 0); - -static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct, - PLUGIN_VAR_RQCMDARG, - "Percentage of the buffer pool to reserve for 'old' blocks.", - NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0); - -static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms, - PLUGIN_VAR_RQCMDARG, - "Move blocks to the 'new' end of the buffer pool if the first access" - " was at least this many milliseconds ago." - " The timeout is disabled if 0.", - NULL, NULL, 1000, 0, UINT_MAX32, 0); - -static MYSQL_SYSVAR_LONG(open_files, innobase_open_files, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "How many files at the maximum InnoDB keeps open at the same time.", - NULL, NULL, 0L, 0L, LONG_MAX, 0); - -static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds, - PLUGIN_VAR_RQCMDARG, - "Count of spin-loop rounds in InnoDB mutexes (30 by default)", - NULL, NULL, 30L, 0L, ~0UL, 0); - -static MYSQL_SYSVAR_ULONG(spin_wait_delay, srv_spin_wait_delay, - PLUGIN_VAR_OPCMDARG, - "Maximum delay between polling for a spin lock (6 by default)", - NULL, NULL, 6L, 0L, ~0UL, 0); - -static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency, - PLUGIN_VAR_RQCMDARG, - "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.", - NULL, NULL, 0, 0, 1000, 0); - -#ifdef HAVE_ATOMIC_BUILTINS -static MYSQL_SYSVAR_ULONG( - adaptive_max_sleep_delay, srv_adaptive_max_sleep_delay, - PLUGIN_VAR_RQCMDARG, - "The upper limit of the sleep delay in usec. Value of 0 disables it.", - NULL, NULL, - 150000, /* Default setting */ - 0, /* Minimum value */ - 1000000, 0); /* Maximum value */ -#endif /* HAVE_ATOMIC_BUILTINS */ - -static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization, - srv_prefix_index_cluster_optimization, - PLUGIN_VAR_OPCMDARG, - "Enable prefix optimization to sometimes avoid cluster index lookups.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay, - PLUGIN_VAR_RQCMDARG, - "Time of innodb thread sleeping before joining InnoDB queue (usec). " - "Value 0 disable a sleep", - NULL, NULL, - 10000L, - 0L, - 1000000L, 0); - -static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Path to individual files and their sizes.", - NULL, NULL, NULL); - -static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Directory where undo tablespace files live, this path can be absolute.", - NULL, NULL, "."); - -static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of undo tablespaces to use. ", - NULL, NULL, - 0L, /* Default seting */ - 0L, /* Minimum value */ - 126L, 0); /* Maximum value */ - -static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs, - PLUGIN_VAR_OPCMDARG, - "Number of undo logs to use.", - NULL, NULL, - TRX_SYS_N_RSEGS, /* Default setting */ - 1, /* Minimum value */ - TRX_SYS_N_RSEGS, 0); /* Maximum value */ - -/* Alias for innodb_undo_logs, this config variable is deprecated. */ -static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs, - PLUGIN_VAR_OPCMDARG, - "Number of undo logs to use (deprecated).", - NULL, NULL, - TRX_SYS_N_RSEGS, /* Default setting */ - 1, /* Minimum value */ - TRX_SYS_N_RSEGS, 0); /* Maximum value */ - -static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "The AUTOINC lock modes supported by InnoDB: " - "0 => Old style AUTOINC locking (for backward" - " compatibility) " - "1 => New style AUTOINC locking " - "2 => No AUTOINC locking (unsafe for SBR)", - NULL, NULL, - AUTOINC_NEW_STYLE_LOCKING, /* Default setting */ - AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */ - AUTOINC_NO_LOCKING, 0); /* Maximum value */ - -static MYSQL_SYSVAR_STR(version, innodb_version_str, - PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY, - "Percona-InnoDB-plugin version", NULL, NULL, INNODB_VERSION_STR); - -static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "DEPRECATED. This option may be removed in future releases, " - "together with the InnoDB's internal memory allocator. " - "Use OS memory allocator instead of InnoDB's internal memory allocator", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Use native AIO if supported on this platform.", - NULL, NULL, TRUE); - -#ifdef HAVE_LIBNUMA -static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Use NUMA interleave memory policy to allocate InnoDB buffer pool.", - NULL, NULL, FALSE); -#endif // HAVE_LIBNUMA - -static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable binlog for applications direct access InnoDB through InnoDB APIs", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(api_enable_mdl, ib_mdl_enabled, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable MDL for applications direct access InnoDB through InnoDB APIs", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(api_disable_rowlock, ib_disable_row_lock, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Disable row lock when direct access InnoDB through InnoDB APIs", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(api_trx_level, ib_trx_level_setting, - PLUGIN_VAR_OPCMDARG, - "InnoDB API transaction isolation level", - NULL, NULL, - 0, /* Default setting */ - 0, /* Minimum value */ - 3, 0); /* Maximum value */ - -static MYSQL_SYSVAR_ULONG(api_bk_commit_interval, ib_bk_commit_interval, - PLUGIN_VAR_OPCMDARG, - "Background commit interval in seconds", - NULL, NULL, - 5, /* Default setting */ - 1, /* Minimum value */ - 1024 * 1024 * 1024, 0); /* Maximum value */ - -static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering, - PLUGIN_VAR_RQCMDARG, - "Buffer changes to reduce random access: " - "OFF, ON, inserting, deleting, changing, or purging.", - innodb_change_buffering_validate, - innodb_change_buffering_update, "all"); - -static MYSQL_SYSVAR_UINT(change_buffer_max_size, - innobase_change_buffer_max_size, - PLUGIN_VAR_RQCMDARG, - "Maximum on-disk size of change buffer in terms of percentage" - " of the buffer pool.", - NULL, innodb_change_buffer_max_size_update, - CHANGE_BUFFER_DEFAULT_SIZE, 0, 50, 0); - -static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, - PLUGIN_VAR_RQCMDARG, - "Specifies how InnoDB index statistics collection code should " - "treat NULLs", - NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib); - -#ifdef UNIV_DEBUG -/* Make this variable dynamic for debug builds to -provide a testcase sync facility */ -#define track_changed_pages_flags PLUGIN_VAR_NOCMDARG -#define track_changed_pages_check innodb_track_changed_pages_validate -#else -#define track_changed_pages_flags PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY -#define track_changed_pages_check NULL -#endif -static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages, - track_changed_pages_flags, - "Track the redo log for changed pages and output a changed page bitmap", - track_changed_pages_check, - NULL, FALSE); - -static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size, - PLUGIN_VAR_RQCMDARG, - "The maximum size of changed page bitmap files", - NULL, NULL, 100*1024*1024ULL, 4096ULL, ULONGLONG_MAX, 0); - -static MYSQL_SYSVAR_ULONGLONG(max_changed_pages, srv_max_changed_pages, - PLUGIN_VAR_RQCMDARG, - "The maximum number of rows for " - "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES table, " - "0 - unlimited", - NULL, NULL, 1000000, 0, ~0ULL, 0); - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug, - PLUGIN_VAR_RQCMDARG, - "Debug flags for InnoDB change buffering (0=none, 2=crash at merge)", - NULL, NULL, 0, 0, 2, 0); - -static MYSQL_SYSVAR_BOOL(disable_background_merge, - srv_ibuf_disable_background_merge, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG, - "Disable change buffering merges by the master thread", - NULL, NULL, FALSE); -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - -static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency, - PLUGIN_VAR_RQCMDARG, - "A number between [0, 100] that tells how oftern buffer pool dump status " - "in percentages should be printed. E.g. 10 means that buffer pool dump " - "status is printed when every 10% of number of buffer pool pages are " - "dumped. Default is 0 (only start and end status is printed).", - NULL, NULL, 0, 0, 100, 0); - -#ifdef WITH_INNODB_DISALLOW_WRITES -/******************************************************* - * innobase_disallow_writes variable definition * - *******************************************************/ - -/* Must always init to FALSE. */ -static my_bool innobase_disallow_writes = FALSE; - -/************************************************************************** -An "update" method for innobase_disallow_writes variable. */ -static -void -innobase_disallow_writes_update( -/*============================*/ - THD* thd, /* in: thread handle */ - st_mysql_sys_var* var, /* in: pointer to system - variable */ - void* var_ptr, /* out: pointer to dynamic - variable */ - const void* save) /* in: temporary storage */ -{ - *(my_bool*)var_ptr = *(my_bool*)save; - ut_a(srv_allow_writes_event); - if (*(my_bool*)var_ptr) - os_event_reset(srv_allow_writes_event); - else - os_event_set(srv_allow_writes_event); -} - -static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes, - PLUGIN_VAR_NOCMDOPT, - "Tell InnoDB to stop any writes to disk", - NULL, innobase_disallow_writes_update, FALSE); -#endif /* WITH_INNODB_DISALLOW_WRITES */ -static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead, - PLUGIN_VAR_NOCMDARG, - "Whether to use read ahead for random access within an extent.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold, - PLUGIN_VAR_RQCMDARG, - "Number of pages that must be accessed sequentially for InnoDB to " - "trigger a readahead.", - NULL, NULL, 56, 0, 64, 0); - -static MYSQL_SYSVAR_STR(monitor_enable, innobase_enable_monitor_counter, - PLUGIN_VAR_RQCMDARG, - "Turn on a monitor counter", - innodb_monitor_validate, - innodb_enable_monitor_update, NULL); - -static MYSQL_SYSVAR_STR(monitor_disable, innobase_disable_monitor_counter, - PLUGIN_VAR_RQCMDARG, - "Turn off a monitor counter", - innodb_monitor_validate, - innodb_disable_monitor_update, NULL); - -static MYSQL_SYSVAR_STR(monitor_reset, innobase_reset_monitor_counter, - PLUGIN_VAR_RQCMDARG, - "Reset a monitor counter", - innodb_monitor_validate, - innodb_reset_monitor_update, NULL); - -static MYSQL_SYSVAR_STR(monitor_reset_all, innobase_reset_all_monitor_counter, - PLUGIN_VAR_RQCMDARG, - "Reset all values for a monitor counter", - innodb_monitor_validate, - innodb_reset_all_monitor_update, NULL); - -static MYSQL_SYSVAR_BOOL(status_output, srv_print_innodb_monitor, - PLUGIN_VAR_OPCMDARG, "Enable InnoDB monitor output to the error log.", - NULL, innodb_status_output_update, FALSE); - -static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor, - PLUGIN_VAR_OPCMDARG, "Enable InnoDB lock monitor output to the error log." - " Requires innodb_status_output=ON.", - NULL, innodb_status_output_update, FALSE); - -static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks, - PLUGIN_VAR_OPCMDARG, - "Print all deadlocks to MySQL error log (off by default)", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct, - zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG, - "If the compression failure rate of a table is greater than this number" - " more padding is added to the pages to reduce the failures. A value of" - " zero implies no padding", - NULL, NULL, 5, 0, 100, 0); - -static MYSQL_SYSVAR_ULONG(compression_pad_pct_max, - zip_pad_max, PLUGIN_VAR_OPCMDARG, - "Percentage of empty space on a data page that can be reserved" - " to make the page compressible.", - NULL, NULL, 50, 0, 75, 0); - -static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Start InnoDB in read only mode (off by default)", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled, - PLUGIN_VAR_OPCMDARG, - "Enable INFORMATION_SCHEMA.innodb_cmp_per_index, " - "may have negative impact on performance (off by default)", - NULL, innodb_cmp_per_index_update, FALSE); - -#ifdef UNIV_DEBUG -static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT, - "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()", - NULL, NULL, 0, 0, 1024, 0); - -static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug, - btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG, - "Artificially limit the number of records per B-tree page (0=unlimited).", - NULL, NULL, 0, 0, UINT_MAX32, 0); - -static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug, - srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT, - "Pause actual purging any delete-marked records, but merely update the purge view. " - "It is to create artificially the situation the purge view have been updated " - "but the each purges were not done yet.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_UINT(data_file_size_debug, - srv_sys_space_size_debug, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "InnoDB system tablespace size to be set in recovery.", - NULL, NULL, 0, 0, UINT_MAX32, 0); - -static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug, - srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG, - "Make the first page of the given tablespace dirty.", - NULL, innodb_make_page_dirty, 0, 0, UINT_MAX32, 0); - -static MYSQL_SYSVAR_ULONG(saved_page_number_debug, - srv_saved_page_number_debug, PLUGIN_VAR_OPCMDARG, - "An InnoDB page number.", - NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0); - -#endif /* UNIV_DEBUG */ - -static MYSQL_SYSVAR_UINT(simulate_comp_failures, srv_simulate_comp_failures, - PLUGIN_VAR_NOCMDARG, - "Simulate compression failures.", - NULL, NULL, 0, 0, 99, 0); - -static MYSQL_SYSVAR_BOOL(force_primary_key, - srv_force_primary_key, - PLUGIN_VAR_OPCMDARG, - "Do not allow to create table without primary key (off by default)", - NULL, NULL, FALSE); - -const char *corrupt_table_action_names[]= -{ - "assert", /* 0 */ - "warn", /* 1 */ - "salvage", /* 2 */ - NullS -}; -TYPELIB corrupt_table_action_typelib= -{ - array_elements(corrupt_table_action_names) - 1, "corrupt_table_action_typelib", - corrupt_table_action_names, NULL -}; -static MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table, - PLUGIN_VAR_RQCMDARG, - "Warn corruptions of user tables as 'corrupt table' instead of not crashing itself, " - "when used with file_per_table. " - "All file io for the datafile after detected as corrupt are disabled, " - "except for the deletion.", - NULL, NULL, 0, &corrupt_table_action_typelib); - -static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks, - PLUGIN_VAR_NOCMDARG, - "###EXPERIMENTAL### if enabled, transactions will get S row locks instead " - "of X locks for fake changes. If disabled, fake change transactions will " - "not take any locks at all.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Print stacktrace on long semaphore wait (off by default supported only on linux)", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_UINT(compression_level, page_zip_level, - PLUGIN_VAR_RQCMDARG, - "Compression level used for zlib compression. 0 is no compression" - ", 1 is fastest, 9 is best compression and default is 6.", - NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0); - -static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim, - PLUGIN_VAR_OPCMDARG, - "Use trim. Default FALSE.", - NULL, NULL, FALSE); - -static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 }; -static TYPELIB page_compression_algorithms_typelib= -{ - array_elements(page_compression_algorithms) - 1, 0, - page_compression_algorithms, 0 -}; -static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm, - PLUGIN_VAR_OPCMDARG, - "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, or bzip2", - innodb_compression_algorithm_validate, NULL, - /* We use here the largest number of supported compression method to - enable all those methods that are available. Availability of compression - method is verified on innodb_compression_algorithm_validate function. */ - PAGE_ZLIB_ALGORITHM, - &page_compression_algorithms_typelib); - -static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of multi-threaded flush threads", - NULL, NULL, - MTFLUSH_DEFAULT_WORKER, /* Default setting */ - 1, /* Minimum setting */ - MTFLUSH_MAX_WORKER, /* Max setting */ - 0); - -static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Use multi-threaded flush. Default FALSE.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wait_threshold, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Maximum number of seconds that semaphore times out in InnoDB.", - NULL, NULL, - DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT, /* Default setting */ - 1, /* Minimum setting */ - UINT_MAX32, /* Maximum setting */ - 0); - -static const char* srv_encrypt_tables_names[] = { "OFF", "ON", "FORCE", 0 }; -static TYPELIB srv_encrypt_tables_typelib = { - array_elements(srv_encrypt_tables_names)-1, 0, srv_encrypt_tables_names, - NULL -}; -static MYSQL_SYSVAR_ENUM(encrypt_tables, srv_encrypt_tables, - PLUGIN_VAR_OPCMDARG, - "Enable encryption for tables. " - "Don't forget to enable --innodb-encrypt-log too", - innodb_encrypt_tables_validate, - innodb_encrypt_tables_update, - 0, - &srv_encrypt_tables_typelib); - -static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads, - PLUGIN_VAR_RQCMDARG, - "Number of threads performing background key rotation and " - "scrubbing", - NULL, - innodb_encryption_threads_update, - srv_n_fil_crypt_threads, 0, UINT_MAX32, 0); - -static MYSQL_SYSVAR_UINT(encryption_rotate_key_age, - srv_fil_crypt_rotate_key_age, - PLUGIN_VAR_RQCMDARG, - "Key rotation - re-encrypt in background " - "all pages that were encrypted with a key that " - "many (or more) versions behind. Value 0 indicates " - "that key rotation is disabled.", - NULL, - innodb_encryption_rotate_key_age_update, - 1, 0, UINT_MAX32, 0); - -static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops, - PLUGIN_VAR_RQCMDARG, - "Use this many iops for background key rotation", - NULL, - innodb_encryption_rotation_iops_update, - srv_n_fil_crypt_iops, 0, UINT_MAX32, 0); - -static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Enable background redo log (ib_logfile0, ib_logfile1...) scrubbing", - 0, 0, 0); - -static MYSQL_SYSVAR_ULONGLONG(scrub_log_speed, innodb_scrub_log_speed, - PLUGIN_VAR_OPCMDARG, - "Background redo log scrubbing speed in bytes/sec", - NULL, NULL, - 256, /* 256 bytes/sec, corresponds to 2000 ms scrub_log_interval */ - 1, /* min */ - 50000, 0); /* 50Kbyte/sec, corresponds to 10 ms scrub_log_interval */ - -static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Enable redo log encryption", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed, - srv_immediate_scrub_data_uncompressed, - 0, - "Enable scrubbing of data", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed, - srv_background_scrub_data_uncompressed, - 0, - "Enable scrubbing of uncompressed data by " - "background threads (same as encryption_threads)", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed, - srv_background_scrub_data_compressed, - 0, - "Enable scrubbing of compressed data by " - "background threads (same as encryption_threads)", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval, - srv_background_scrub_data_check_interval, - 0, - "check if spaces needs scrubbing every " - "innodb_background_scrub_data_check_interval " - "seconds", - NULL, NULL, - srv_background_scrub_data_check_interval, - 1, - UINT_MAX32, 0); - -static MYSQL_SYSVAR_UINT(background_scrub_data_interval, - srv_background_scrub_data_interval, - 0, - "scrub spaces that were last scrubbed longer than " - " innodb_background_scrub_data_interval seconds ago", - NULL, NULL, - srv_background_scrub_data_interval, - 1, - UINT_MAX32, 0); - -#ifdef UNIV_DEBUG -static MYSQL_SYSVAR_BOOL(debug_force_scrubbing, - srv_scrub_force_testing, - 0, - "Perform extra scrubbing to increase test exposure", - NULL, NULL, FALSE); -#endif /* UNIV_DEBUG */ - -static MYSQL_SYSVAR_BOOL(instrument_semaphores, srv_instrument_semaphores, - PLUGIN_VAR_OPCMDARG, - "Enable semaphore request instrumentation. This could have some effect on performance but allows better" - " information on long semaphore wait problems. (Default: not enabled)", - 0, 0, FALSE); - -static struct st_mysql_sys_var* innobase_system_variables[]= { - MYSQL_SYSVAR(log_block_size), - MYSQL_SYSVAR(additional_mem_pool_size), - MYSQL_SYSVAR(api_trx_level), - MYSQL_SYSVAR(api_bk_commit_interval), - MYSQL_SYSVAR(autoextend_increment), - MYSQL_SYSVAR(buffer_pool_size), - MYSQL_SYSVAR(buffer_pool_populate), - MYSQL_SYSVAR(buffer_pool_instances), - MYSQL_SYSVAR(buffer_pool_filename), - MYSQL_SYSVAR(buffer_pool_dump_now), - MYSQL_SYSVAR(buffer_pool_dump_at_shutdown), - MYSQL_SYSVAR(buffer_pool_dump_pct), -#ifdef UNIV_DEBUG - MYSQL_SYSVAR(buffer_pool_evict), -#endif /* UNIV_DEBUG */ - MYSQL_SYSVAR(buffer_pool_load_now), - MYSQL_SYSVAR(buffer_pool_load_abort), - MYSQL_SYSVAR(buffer_pool_load_at_startup), - MYSQL_SYSVAR(defragment), - MYSQL_SYSVAR(defragment_n_pages), - MYSQL_SYSVAR(defragment_stats_accuracy), - MYSQL_SYSVAR(defragment_fill_factor), - MYSQL_SYSVAR(defragment_fill_factor_n_recs), - MYSQL_SYSVAR(defragment_frequency), - MYSQL_SYSVAR(lru_scan_depth), - MYSQL_SYSVAR(flush_neighbors), - MYSQL_SYSVAR(checksum_algorithm), - MYSQL_SYSVAR(log_checksum_algorithm), - MYSQL_SYSVAR(checksums), - MYSQL_SYSVAR(commit_concurrency), - MYSQL_SYSVAR(concurrency_tickets), - MYSQL_SYSVAR(compression_level), - MYSQL_SYSVAR(kill_idle_transaction), - MYSQL_SYSVAR(data_file_path), - MYSQL_SYSVAR(data_home_dir), - MYSQL_SYSVAR(doublewrite), - MYSQL_SYSVAR(stats_include_delete_marked), - MYSQL_SYSVAR(api_enable_binlog), - MYSQL_SYSVAR(api_enable_mdl), - MYSQL_SYSVAR(api_disable_rowlock), - MYSQL_SYSVAR(use_atomic_writes), - MYSQL_SYSVAR(use_fallocate), - MYSQL_SYSVAR(fast_shutdown), - MYSQL_SYSVAR(file_io_threads), - MYSQL_SYSVAR(read_io_threads), - MYSQL_SYSVAR(write_io_threads), - MYSQL_SYSVAR(file_per_table), - MYSQL_SYSVAR(file_format), - MYSQL_SYSVAR(file_format_check), - MYSQL_SYSVAR(file_format_max), - MYSQL_SYSVAR(flush_log_at_timeout), - MYSQL_SYSVAR(flush_log_at_trx_commit), - MYSQL_SYSVAR(use_global_flush_log_at_trx_commit), - MYSQL_SYSVAR(flush_method), - MYSQL_SYSVAR(force_recovery), - MYSQL_SYSVAR(ft_cache_size), - MYSQL_SYSVAR(ft_total_cache_size), - MYSQL_SYSVAR(ft_result_cache_limit), - MYSQL_SYSVAR(ft_enable_stopword), - MYSQL_SYSVAR(ft_max_token_size), - MYSQL_SYSVAR(ft_min_token_size), - MYSQL_SYSVAR(ft_num_word_optimize), - MYSQL_SYSVAR(ft_sort_pll_degree), - MYSQL_SYSVAR(large_prefix), - MYSQL_SYSVAR(force_load_corrupted), - MYSQL_SYSVAR(lock_schedule_algorithm), - MYSQL_SYSVAR(locks_unsafe_for_binlog), - MYSQL_SYSVAR(lock_wait_timeout), -#ifdef UNIV_LOG_ARCHIVE - MYSQL_SYSVAR(log_arch_dir), - MYSQL_SYSVAR(log_archive), - MYSQL_SYSVAR(log_arch_expire_sec), -#endif /* UNIV_LOG_ARCHIVE */ - MYSQL_SYSVAR(page_size), - MYSQL_SYSVAR(log_buffer_size), - MYSQL_SYSVAR(log_file_size), - MYSQL_SYSVAR(log_files_in_group), - MYSQL_SYSVAR(log_group_home_dir), - MYSQL_SYSVAR(log_compressed_pages), - MYSQL_SYSVAR(max_dirty_pages_pct), - MYSQL_SYSVAR(max_dirty_pages_pct_lwm), - MYSQL_SYSVAR(adaptive_flushing_lwm), - MYSQL_SYSVAR(adaptive_flushing), - MYSQL_SYSVAR(flushing_avg_loops), - MYSQL_SYSVAR(max_purge_lag), - MYSQL_SYSVAR(max_purge_lag_delay), - MYSQL_SYSVAR(mirrored_log_groups), - MYSQL_SYSVAR(old_blocks_pct), - MYSQL_SYSVAR(old_blocks_time), - MYSQL_SYSVAR(open_files), - MYSQL_SYSVAR(optimize_fulltext_only), - MYSQL_SYSVAR(rollback_on_timeout), - MYSQL_SYSVAR(ft_aux_table), - MYSQL_SYSVAR(ft_enable_diag_print), - MYSQL_SYSVAR(ft_server_stopword_table), - MYSQL_SYSVAR(ft_user_stopword_table), - MYSQL_SYSVAR(disable_sort_file_cache), - MYSQL_SYSVAR(stats_on_metadata), - MYSQL_SYSVAR(stats_sample_pages), - MYSQL_SYSVAR(stats_transient_sample_pages), - MYSQL_SYSVAR(stats_persistent), - MYSQL_SYSVAR(stats_persistent_sample_pages), - MYSQL_SYSVAR(stats_auto_recalc), - MYSQL_SYSVAR(stats_modified_counter), - MYSQL_SYSVAR(stats_traditional), - MYSQL_SYSVAR(adaptive_hash_index), - MYSQL_SYSVAR(adaptive_hash_index_partitions), - MYSQL_SYSVAR(stats_method), - MYSQL_SYSVAR(replication_delay), - MYSQL_SYSVAR(status_file), - MYSQL_SYSVAR(strict_mode), - MYSQL_SYSVAR(support_xa), - MYSQL_SYSVAR(sort_buffer_size), - MYSQL_SYSVAR(online_alter_log_max_size), - MYSQL_SYSVAR(sync_spin_loops), - MYSQL_SYSVAR(spin_wait_delay), - MYSQL_SYSVAR(table_locks), - MYSQL_SYSVAR(thread_concurrency), -#ifdef HAVE_ATOMIC_BUILTINS - MYSQL_SYSVAR(adaptive_max_sleep_delay), -#endif /* HAVE_ATOMIC_BUILTINS */ - MYSQL_SYSVAR(prefix_index_cluster_optimization), - MYSQL_SYSVAR(thread_sleep_delay), - MYSQL_SYSVAR(autoinc_lock_mode), - MYSQL_SYSVAR(show_verbose_locks), - MYSQL_SYSVAR(show_locks_held), - MYSQL_SYSVAR(version), - MYSQL_SYSVAR(use_sys_malloc), - MYSQL_SYSVAR(use_native_aio), -#ifdef HAVE_LIBNUMA - MYSQL_SYSVAR(numa_interleave), -#endif // HAVE_LIBNUMA - MYSQL_SYSVAR(change_buffering), - MYSQL_SYSVAR(change_buffer_max_size), - MYSQL_SYSVAR(track_changed_pages), - MYSQL_SYSVAR(max_bitmap_file_size), - MYSQL_SYSVAR(max_changed_pages), -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG - MYSQL_SYSVAR(change_buffering_debug), - MYSQL_SYSVAR(disable_background_merge), -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ -#ifdef WITH_INNODB_DISALLOW_WRITES - MYSQL_SYSVAR(disallow_writes), -#endif /* WITH_INNODB_DISALLOW_WRITES */ - MYSQL_SYSVAR(random_read_ahead), - MYSQL_SYSVAR(read_ahead_threshold), - MYSQL_SYSVAR(read_only), - MYSQL_SYSVAR(io_capacity), - MYSQL_SYSVAR(io_capacity_max), - MYSQL_SYSVAR(idle_flush_pct), - MYSQL_SYSVAR(monitor_enable), - MYSQL_SYSVAR(monitor_disable), - MYSQL_SYSVAR(monitor_reset), - MYSQL_SYSVAR(monitor_reset_all), - MYSQL_SYSVAR(purge_threads), - MYSQL_SYSVAR(purge_batch_size), -#ifdef UNIV_DEBUG - MYSQL_SYSVAR(purge_run_now), - MYSQL_SYSVAR(purge_stop_now), - MYSQL_SYSVAR(log_checkpoint_now), - MYSQL_SYSVAR(buf_flush_list_now), - MYSQL_SYSVAR(track_redo_log_now), -#endif /* UNIV_DEBUG */ -#ifdef UNIV_LINUX - MYSQL_SYSVAR(sched_priority_cleaner), -#endif -#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG - MYSQL_SYSVAR(page_hash_locks), - MYSQL_SYSVAR(doublewrite_batch_size), -#ifdef UNIV_LINUX - MYSQL_SYSVAR(sched_priority_purge), - MYSQL_SYSVAR(sched_priority_io), - MYSQL_SYSVAR(sched_priority_master), - MYSQL_SYSVAR(priority_purge), - MYSQL_SYSVAR(priority_io), - MYSQL_SYSVAR(priority_cleaner), - MYSQL_SYSVAR(priority_master), -#endif /* UNIV_LINUX */ - MYSQL_SYSVAR(cleaner_max_lru_time), - MYSQL_SYSVAR(cleaner_max_flush_time), - MYSQL_SYSVAR(cleaner_flush_chunk_size), - MYSQL_SYSVAR(cleaner_lru_chunk_size), - MYSQL_SYSVAR(cleaner_free_list_lwm), - MYSQL_SYSVAR(cleaner_eviction_factor), -#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */ - MYSQL_SYSVAR(status_output), - MYSQL_SYSVAR(status_output_locks), - MYSQL_SYSVAR(cleaner_lsn_age_factor), - MYSQL_SYSVAR(foreground_preflush), - MYSQL_SYSVAR(empty_free_list_algorithm), - MYSQL_SYSVAR(print_all_deadlocks), - MYSQL_SYSVAR(cmp_per_index_enabled), - MYSQL_SYSVAR(undo_logs), - MYSQL_SYSVAR(rollback_segments), - MYSQL_SYSVAR(undo_directory), - MYSQL_SYSVAR(undo_tablespaces), - MYSQL_SYSVAR(sync_array_size), - MYSQL_SYSVAR(compression_failure_threshold_pct), - MYSQL_SYSVAR(compression_pad_pct_max), -#ifdef UNIV_DEBUG - MYSQL_SYSVAR(trx_rseg_n_slots_debug), - MYSQL_SYSVAR(limit_optimistic_insert_debug), - MYSQL_SYSVAR(trx_purge_view_update_only_debug), - MYSQL_SYSVAR(data_file_size_debug), - MYSQL_SYSVAR(fil_make_page_dirty_debug), - MYSQL_SYSVAR(saved_page_number_debug), -#endif /* UNIV_DEBUG */ - MYSQL_SYSVAR(simulate_comp_failures), - MYSQL_SYSVAR(corrupt_table_action), - MYSQL_SYSVAR(fake_changes), - MYSQL_SYSVAR(locking_fake_changes), - MYSQL_SYSVAR(tmpdir), - MYSQL_SYSVAR(use_stacktrace), - MYSQL_SYSVAR(force_primary_key), - MYSQL_SYSVAR(fatal_semaphore_wait_threshold), - /* Table page compression feature */ - MYSQL_SYSVAR(use_trim), - MYSQL_SYSVAR(compression_algorithm), - MYSQL_SYSVAR(mtflush_threads), - MYSQL_SYSVAR(use_mtflush), - /* Encryption feature */ - MYSQL_SYSVAR(encrypt_tables), - MYSQL_SYSVAR(encryption_threads), - MYSQL_SYSVAR(encryption_rotate_key_age), - MYSQL_SYSVAR(encryption_rotation_iops), - MYSQL_SYSVAR(scrub_log), - MYSQL_SYSVAR(scrub_log_speed), - MYSQL_SYSVAR(encrypt_log), - MYSQL_SYSVAR(default_encryption_key_id), - /* Scrubing feature */ - MYSQL_SYSVAR(immediate_scrub_data_uncompressed), - MYSQL_SYSVAR(background_scrub_data_uncompressed), - MYSQL_SYSVAR(background_scrub_data_compressed), - MYSQL_SYSVAR(background_scrub_data_interval), - MYSQL_SYSVAR(background_scrub_data_check_interval), -#ifdef UNIV_DEBUG - MYSQL_SYSVAR(debug_force_scrubbing), -#endif - MYSQL_SYSVAR(instrument_semaphores), - MYSQL_SYSVAR(buf_dump_status_frequency), - NULL -}; - -maria_declare_plugin(xtradb) -{ /* InnoDB */ - MYSQL_STORAGE_ENGINE_PLUGIN, - &innobase_storage_engine, - innobase_hton_name, - plugin_author, - "Percona-XtraDB, Supports transactions, row-level locking, foreign keys and encryption for tables", - PLUGIN_LICENSE_GPL, - innobase_init, /* Plugin Init */ - NULL, /* Plugin Deinit */ - INNODB_VERSION_SHORT, - innodb_status_variables_export,/* status variables */ - innobase_system_variables, /* system variables */ - INNODB_VERSION_STR, /* string version */ - MariaDB_PLUGIN_MATURITY_STABLE /* maturity */ -}, -i_s_xtradb_read_view, -i_s_xtradb_internal_hash_tables, -i_s_xtradb_rseg, -i_s_innodb_trx, -i_s_innodb_locks, -i_s_innodb_lock_waits, -i_s_innodb_cmp, -i_s_innodb_cmp_reset, -i_s_innodb_cmpmem, -i_s_innodb_cmpmem_reset, -i_s_innodb_cmp_per_index, -i_s_innodb_cmp_per_index_reset, -i_s_innodb_buffer_page, -i_s_innodb_buffer_page_lru, -i_s_innodb_buffer_stats, -i_s_innodb_metrics, -i_s_innodb_ft_default_stopword, -i_s_innodb_ft_deleted, -i_s_innodb_ft_being_deleted, -i_s_innodb_ft_config, -i_s_innodb_ft_index_cache, -i_s_innodb_ft_index_table, -i_s_innodb_sys_tables, -i_s_innodb_sys_tablestats, -i_s_innodb_sys_indexes, -i_s_innodb_sys_columns, -i_s_innodb_sys_fields, -i_s_innodb_sys_foreign, -i_s_innodb_sys_foreign_cols, -i_s_innodb_sys_tablespaces, -i_s_innodb_sys_datafiles, -i_s_innodb_changed_pages, -i_s_innodb_mutexes, -i_s_innodb_sys_semaphore_waits, -i_s_innodb_tablespaces_encryption, -i_s_innodb_tablespaces_scrubbing, -i_s_innodb_changed_page_bitmaps -maria_declare_plugin_end; - -/** @brief Initialize the default value of innodb_commit_concurrency. - -Once InnoDB is running, the innodb_commit_concurrency must not change -from zero to nonzero. (Bug #42101) - -The initial default value is 0, and without this extra initialization, -SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter -to 0, even if it was initially set to nonzero at the command line -or configuration file. */ -static -void -innobase_commit_concurrency_init_default() -/*======================================*/ -{ - MYSQL_SYSVAR_NAME(commit_concurrency).def_val - = innobase_commit_concurrency; -} - -/** @brief Initialize the default and max value of innodb_undo_logs. - -Once InnoDB is running, the default value and the max value of -innodb_undo_logs must be equal to the available undo logs, -given by srv_available_undo_logs. */ -static -void -innobase_undo_logs_init_default_max() -/*=================================*/ -{ - MYSQL_SYSVAR_NAME(undo_logs).max_val - = MYSQL_SYSVAR_NAME(undo_logs).def_val - = static_cast<unsigned long>(srv_available_undo_logs); -} - -#ifdef UNIV_COMPILE_TEST_FUNCS - -struct innobase_convert_name_test_t { - char* buf; - ulint buflen; - const char* id; - ulint idlen; - void* thd; - ibool file_id; - - const char* expected; -}; - -void -test_innobase_convert_name() -{ - char buf[1024]; - ulint i; - - innobase_convert_name_test_t test_input[] = { - {buf, sizeof(buf), "abcd", 4, NULL, TRUE, "\"abcd\""}, - {buf, 7, "abcd", 4, NULL, TRUE, "\"abcd\""}, - {buf, 6, "abcd", 4, NULL, TRUE, "\"abcd\""}, - {buf, 5, "abcd", 4, NULL, TRUE, "\"abc\""}, - {buf, 4, "abcd", 4, NULL, TRUE, "\"ab\""}, - - {buf, sizeof(buf), "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""}, - {buf, 9, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""}, - {buf, 8, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""}, - {buf, 7, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""}, - {buf, 6, "ab@0060cd", 9, NULL, TRUE, "\"ab`c\""}, - {buf, 5, "ab@0060cd", 9, NULL, TRUE, "\"ab`\""}, - {buf, 4, "ab@0060cd", 9, NULL, TRUE, "\"ab\""}, - - {buf, sizeof(buf), "ab\"cd", 5, NULL, TRUE, - "\"#mysql50#ab\"\"cd\""}, - {buf, 17, "ab\"cd", 5, NULL, TRUE, - "\"#mysql50#ab\"\"cd\""}, - {buf, 16, "ab\"cd", 5, NULL, TRUE, - "\"#mysql50#ab\"\"c\""}, - {buf, 15, "ab\"cd", 5, NULL, TRUE, - "\"#mysql50#ab\"\"\""}, - {buf, 14, "ab\"cd", 5, NULL, TRUE, - "\"#mysql50#ab\""}, - {buf, 13, "ab\"cd", 5, NULL, TRUE, - "\"#mysql50#ab\""}, - {buf, 12, "ab\"cd", 5, NULL, TRUE, - "\"#mysql50#a\""}, - {buf, 11, "ab\"cd", 5, NULL, TRUE, - "\"#mysql50#\""}, - {buf, 10, "ab\"cd", 5, NULL, TRUE, - "\"#mysql50\""}, - - {buf, sizeof(buf), "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""}, - {buf, 9, "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""}, - {buf, 8, "ab/cd", 5, NULL, TRUE, "\"ab\".\"c\""}, - {buf, 7, "ab/cd", 5, NULL, TRUE, "\"ab\".\"\""}, - {buf, 6, "ab/cd", 5, NULL, TRUE, "\"ab\"."}, - {buf, 5, "ab/cd", 5, NULL, TRUE, "\"ab\"."}, - {buf, 4, "ab/cd", 5, NULL, TRUE, "\"ab\""}, - {buf, 3, "ab/cd", 5, NULL, TRUE, "\"a\""}, - {buf, 2, "ab/cd", 5, NULL, TRUE, "\"\""}, - /* XXX probably "" is a better result in this case - {buf, 1, "ab/cd", 5, NULL, TRUE, "."}, - */ - {buf, 0, "ab/cd", 5, NULL, TRUE, ""}, - }; - - for (i = 0; i < sizeof(test_input) / sizeof(test_input[0]); i++) { - - char* end; - ibool ok = TRUE; - size_t res_len; - - fprintf(stderr, "TESTING %lu, %s, %lu, %s\n", - test_input[i].buflen, - test_input[i].id, - test_input[i].idlen, - test_input[i].expected); - - end = innobase_convert_name( - test_input[i].buf, - test_input[i].buflen, - test_input[i].id, - test_input[i].idlen, - test_input[i].thd, - test_input[i].file_id); - - res_len = (size_t) (end - test_input[i].buf); - - if (res_len != strlen(test_input[i].expected)) { - - fprintf(stderr, "unexpected len of the result: %u, " - "expected: %u\n", (unsigned) res_len, - (unsigned) strlen(test_input[i].expected)); - ok = FALSE; - } - - if (memcmp(test_input[i].buf, - test_input[i].expected, - strlen(test_input[i].expected)) != 0 - || !ok) { - - fprintf(stderr, "unexpected result: %.*s, " - "expected: %s\n", (int) res_len, - test_input[i].buf, - test_input[i].expected); - ok = FALSE; - } - - if (ok) { - fprintf(stderr, "OK: res: %.*s\n\n", (int) res_len, - buf); - } else { - fprintf(stderr, "FAILED\n\n"); - return; - } - } -} - -#endif /* UNIV_COMPILE_TEST_FUNCS */ - -/** - * Index Condition Pushdown interface implementation - */ - -/*************************************************************//** -InnoDB index push-down condition check -@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ -UNIV_INTERN -enum icp_result -innobase_index_cond( -/*================*/ - void* file) /*!< in/out: pointer to ha_innobase */ -{ - return handler_index_cond_check(file); -} - -/** Attempt to push down an index condition. -* @param[in] keyno MySQL key number -* @param[in] idx_cond Index condition to be checked -* @return Part of idx_cond which the handler will not evaluate -*/ -UNIV_INTERN -class Item* -ha_innobase::idx_cond_push( - uint keyno, - class Item* idx_cond) -{ - DBUG_ENTER("ha_innobase::idx_cond_push"); - DBUG_ASSERT(keyno != MAX_KEY); - DBUG_ASSERT(idx_cond != NULL); - - pushed_idx_cond = idx_cond; - pushed_idx_cond_keyno = keyno; - in_range_check_pushed_down = TRUE; - /* We will evaluate the condition entirely */ - DBUG_RETURN(NULL); -} - -/******************************************************************//** -Use this when the args are passed to the format string from -errmsg-utf8.txt directly as is. - -Push a warning message to the client, it is a wrapper around: - -void push_warning_printf( - THD *thd, Sql_condition::enum_warning_level level, - uint code, const char *format, ...); -*/ -UNIV_INTERN -void -ib_senderrf( -/*========*/ - THD* thd, /*!< in/out: session */ - ib_log_level_t level, /*!< in: warning level */ - ib_uint32_t code, /*!< MySQL error code */ - ...) /*!< Args */ -{ - va_list args; - const char* format = innobase_get_err_msg(code); - - /* If the caller wants to push a message to the client then - the caller must pass a valid session handle. */ - - ut_a(thd != 0); - - /* The error code must exist in the errmsg-utf8.txt file. */ - ut_a(format != 0); - - va_start(args, code); - - myf l=0; - - switch(level) { - case IB_LOG_LEVEL_INFO: - l = ME_JUST_INFO; - break; - case IB_LOG_LEVEL_WARN: - l = ME_JUST_WARNING; - break; - case IB_LOG_LEVEL_ERROR: - case IB_LOG_LEVEL_FATAL: - l = 0; - break; - default: - l = 0; - break; - } - - my_printv_error(code, format, MYF(l), args); - - va_end(args); - - if (level == IB_LOG_LEVEL_FATAL) { - ut_error; - } -} - -/******************************************************************//** -Use this when the args are first converted to a formatted string and then -passed to the format string from errmsg-utf8.txt. The error message format -must be: "Some string ... %s". - -Push a warning message to the client, it is a wrapper around: - -void push_warning_printf( - THD *thd, Sql_condition::enum_warning_level level, - uint code, const char *format, ...); -*/ -UNIV_INTERN -void -ib_errf( -/*====*/ - THD* thd, /*!< in/out: session */ - ib_log_level_t level, /*!< in: warning level */ - ib_uint32_t code, /*!< MySQL error code */ - const char* format, /*!< printf format */ - ...) /*!< Args */ -{ - char* str; - va_list args; - - /* If the caller wants to push a message to the client then - the caller must pass a valid session handle. */ - - ut_a(thd != 0); - ut_a(format != 0); - - va_start(args, format); - -#ifdef __WIN__ - int size = _vscprintf(format, args) + 1; - str = static_cast<char*>(malloc(size)); - str[size - 1] = 0x0; - vsnprintf(str, size, format, args); -#elif HAVE_VASPRINTF - int ret; - ret = vasprintf(&str, format, args); - ut_a(ret != -1); -#else - /* Use a fixed length string. */ - str = static_cast<char*>(malloc(BUFSIZ)); - my_vsnprintf(str, BUFSIZ, format, args); -#endif /* __WIN__ */ - - ib_senderrf(thd, level, code, str); - - va_end(args); - free(str); -} - -/******************************************************************//** -Write a message to the MySQL log, prefixed with "InnoDB: " */ -UNIV_INTERN -void -ib_logf( -/*====*/ - ib_log_level_t level, /*!< in: warning level */ - const char* format, /*!< printf format */ - ...) /*!< Args */ -{ - char* str; - va_list args; - - va_start(args, format); - -#ifdef __WIN__ - int size = _vscprintf(format, args) + 1; - str = static_cast<char*>(malloc(size)); - str[size - 1] = 0x0; - vsnprintf(str, size, format, args); -#elif HAVE_VASPRINTF - int ret; - ret = vasprintf(&str, format, args); - ut_a(ret != -1); -#else - /* Use a fixed length string. */ - str = static_cast<char*>(malloc(BUFSIZ)); - my_vsnprintf(str, BUFSIZ, format, args); -#endif /* __WIN__ */ - if (!IS_XTRABACKUP()) { - switch (level) { - case IB_LOG_LEVEL_INFO: - sql_print_information("InnoDB: %s", str); - break; - case IB_LOG_LEVEL_WARN: - sql_print_warning("InnoDB: %s", str); - break; - case IB_LOG_LEVEL_ERROR: - sql_print_error("InnoDB: %s", str); - sd_notifyf(0, "STATUS=InnoDB: Error: %s", str); - break; - case IB_LOG_LEVEL_FATAL: - sql_print_error("InnoDB: %s", str); - sd_notifyf(0, "STATUS=InnoDB: Fatal: %s", str); - break; - } - } - else { - /* Don't use server logger for XtraBackup, just print to stderr. */ - fprintf(stderr, "InnoDB: %s\n", str); - } - - va_end(args); - free(str); - - if (level == IB_LOG_LEVEL_FATAL) { - ut_error; - } -} - -/********************************************************************** -Converts an identifier from my_charset_filename to UTF-8 charset. -@return result string length, as returned by strconvert() */ -uint -innobase_convert_to_filename_charset( -/*=================================*/ - char* to, /* out: converted identifier */ - const char* from, /* in: identifier to convert */ - ulint len) /* in: length of 'to', in bytes */ -{ - uint errors; - CHARSET_INFO* cs_to = &my_charset_filename; - CHARSET_INFO* cs_from = system_charset_info; - - return(strconvert(cs_from, from, strlen(from), cs_to, to, - static_cast<uint>(len), &errors)); -} - -/********************************************************************** -Converts an identifier from my_charset_filename to UTF-8 charset. -@return result string length, as returned by strconvert() */ -uint -innobase_convert_to_system_charset( -/*===============================*/ - char* to, /* out: converted identifier */ - const char* from, /* in: identifier to convert */ - ulint len, /* in: length of 'to', in bytes */ - uint* errors) /* out: error return */ -{ - CHARSET_INFO* cs1 = &my_charset_filename; - CHARSET_INFO* cs2 = system_charset_info; - - return(strconvert(cs1, from, strlen(from), cs2, to, - static_cast<uint>(len), errors)); -} - - -/**************************************************************************** - * DS-MRR implementation - ***************************************************************************/ - -/** - * Multi Range Read interface, DS-MRR calls - */ - -int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, - HANDLER_BUFFER *buf) -{ - return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf); -} - -int ha_innobase::multi_range_read_next(range_id_t *range_info) -{ - return ds_mrr.dsmrr_next(range_info); -} - -ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, - void *seq_init_param, - uint n_ranges, uint *bufsz, - uint *flags, - Cost_estimate *cost) -{ - /* See comments in ha_myisam::multi_range_read_info_const */ - ds_mrr.init(this, table); - - if (prebuilt->select_lock_type != LOCK_NONE) - *flags |= HA_MRR_USE_DEFAULT_IMPL; - - ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges, - bufsz, flags, cost); - return res; -} - -ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint key_parts, uint *bufsz, - uint *flags, Cost_estimate *cost) -{ - ds_mrr.init(this, table); - ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz, - flags, cost); - return res; -} - -int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t size) -{ - return ds_mrr.dsmrr_explain_info(mrr_mode, str, size); -} - -/* - A helper function used only in index_cond_func_innodb -*/ - -bool ha_innobase::is_thd_killed() -{ - return thd_kill_level(user_thd); -} - -/********************************************************************** -Issue a warning that the row is too big. */ -UNIV_INTERN -void -ib_warn_row_too_big(const dict_table_t* table) -{ - /* If prefix is true then a 768-byte prefix is stored - locally for BLOB fields. Refer to dict_table_get_format() */ - const bool prefix = (dict_tf_get_format(table->flags) - == UNIV_FORMAT_A); - - const ulint free_space = page_get_free_space_of_empty( - table->flags & DICT_TF_COMPACT) / 2; - - THD* thd = current_thd; - - if (thd == NULL) { - return; - } - - push_warning_printf( - thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW, - "Row size too large (> %lu). Changing some columns to TEXT" - " or BLOB %smay help. In current row format, BLOB prefix of" - " %d bytes is stored inline.", free_space - , prefix ? "or using ROW_FORMAT=DYNAMIC or" - " ROW_FORMAT=COMPRESSED ": "" - , prefix ? DICT_MAX_FIXED_COL_LEN : 0); -} - -/*************************************************************//** -Check for a valid value of innobase_compression_algorithm. -@return 0 for valid innodb_compression_algorithm. */ -static -int -innodb_compression_algorithm_validate( -/*==================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - long compression_algorithm; - DBUG_ENTER("innobase_compression_algorithm_validate"); - - if (check_sysvar_enum(thd, var, save, value)) { - DBUG_RETURN(1); - } - - compression_algorithm = *reinterpret_cast<ulong*>(save); - (void)compression_algorithm; - -#ifndef HAVE_LZ4 - if (compression_algorithm == PAGE_LZ4_ALGORITHM) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblz4 is not installed. \n", - compression_algorithm); - DBUG_RETURN(1); - } -#endif - -#ifndef HAVE_LZO - if (compression_algorithm == PAGE_LZO_ALGORITHM) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblzo is not installed. \n", - compression_algorithm); - DBUG_RETURN(1); - } -#endif - -#ifndef HAVE_LZMA - if (compression_algorithm == PAGE_LZMA_ALGORITHM) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblzma is not installed. \n", - compression_algorithm); - DBUG_RETURN(1); - } -#endif - -#ifndef HAVE_BZIP2 - if (compression_algorithm == PAGE_BZIP2_ALGORITHM) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: libbz2 is not installed. \n", - compression_algorithm); - DBUG_RETURN(1); - } -#endif - -#ifndef HAVE_SNAPPY - if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: libsnappy is not installed. \n", - compression_algorithm); - DBUG_RETURN(1); - } -#endif - DBUG_RETURN(0); -} - -static -int -innodb_encrypt_tables_validate( -/*=================================*/ - THD* thd, /*!< in: thread handle */ - struct st_mysql_sys_var* var, /*!< in: pointer to system - variable */ - void* save, /*!< out: immediate result - for update function */ - struct st_mysql_value* value) /*!< in: incoming string */ -{ - if (check_sysvar_enum(thd, var, save, value)) { - return 1; - } - - ulong encrypt_tables = *(ulong*)save; - - if (encrypt_tables - && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "InnoDB: cannot enable encryption, " - "encryption plugin is not available"); - return 1; - } - - if (!srv_fil_crypt_rotate_key_age) { - const char *msg = (encrypt_tables ? "enable" : "disable"); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_UNSUPPORTED, - "InnoDB: cannot %s encryption, " - "innodb_encryption_rotate_key_age=0" - " i.e. key rotation disabled", msg); - return 1; - } - - return 0; -} - -static void innodb_remember_check_sysvar_funcs() -{ - /* remember build-in sysvar check functions */ - ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM); - check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check; -} - -/********************************************************************//** -Helper function to push warnings from InnoDB internals to SQL-layer. */ -UNIV_INTERN -void -ib_push_warning( - trx_t* trx, /*!< in: trx */ - ulint error, /*!< in: error code to push as warning */ - const char *format,/*!< in: warning message */ - ...) -{ - if (trx && trx->mysql_thd) { - THD *thd = (THD *)trx->mysql_thd; - va_list args; - char *buf; -#define MAX_BUF_SIZE 4*1024 - - va_start(args, format); - buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME)); - vsprintf(buf,format, args); - - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - convert_error_code_to_mysql((dberr_t)error, 0, thd), - buf); - my_free(buf); - va_end(args); - } -} - -/********************************************************************//** -Helper function to push warnings from InnoDB internals to SQL-layer. */ -UNIV_INTERN -void -ib_push_warning( - void* ithd, /*!< in: thd */ - ulint error, /*!< in: error code to push as warning */ - const char *format,/*!< in: warning message */ - ...) -{ - va_list args; - THD *thd = (THD *)ithd; - char *buf; -#define MAX_BUF_SIZE 4*1024 - - if (ithd == NULL) { - thd = current_thd; - } - - if (thd) { - va_start(args, format); - buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME)); - vsprintf(buf,format, args); - - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - convert_error_code_to_mysql((dberr_t)error, 0, thd), - buf); - my_free(buf); - va_end(args); - } -} - -/********************************************************************//** -Helper function to push frm mismatch error to error log and -if needed to sql-layer. */ -UNIV_INTERN -void -ib_push_frm_error( -/*==============*/ - THD* thd, /*!< in: MySQL thd */ - dict_table_t* ib_table, /*!< in: InnoDB table */ - TABLE* table, /*!< in: MySQL table */ - ulint n_keys, /*!< in: InnoDB #keys */ - bool push_warning) /*!< in: print warning ? */ -{ - switch (ib_table->dict_frm_mismatch) { - case DICT_FRM_NO_PK: - sql_print_error("Table %s has a primary key in " - "InnoDB data dictionary, but not " - "in MySQL!" - " Have you mixed up " - ".frm files from different " - "installations? See " - REFMAN - "innodb-troubleshooting.html\n", - ib_table->name); - - if (push_warning) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_INDEX, - "InnoDB: Table %s has a " - "primary key in InnoDB data " - "dictionary, but not in " - "MySQL!", ib_table->name); - } - break; - case DICT_NO_PK_FRM_HAS: - sql_print_error( - "Table %s has no primary key in InnoDB data " - "dictionary, but has one in MySQL! If you " - "created the table with a MySQL version < " - "3.23.54 and did not define a primary key, " - "but defined a unique key with all non-NULL " - "columns, then MySQL internally treats that " - "key as the primary key. You can fix this " - "error by dump + DROP + CREATE + reimport " - "of the table.", ib_table->name); - - if (push_warning) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_INDEX, - "InnoDB: Table %s has no " - "primary key in InnoDB data " - "dictionary, but has one in " - "MySQL!", - ib_table->name); - } - break; - - case DICT_FRM_INCONSISTENT_KEYS: - sql_print_error("InnoDB: Table %s contains %lu " - "indexes inside InnoDB, which " - "is different from the number of " - "indexes %u defined in the MySQL " - " Have you mixed up " - ".frm files from different " - "installations? See " - REFMAN - "innodb-troubleshooting.html\n", - ib_table->name, n_keys, - table->s->keys); - - if (push_warning) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_NO_SUCH_INDEX, - "InnoDB: Table %s contains %lu " - "indexes inside InnoDB, which " - "is different from the number of " - "indexes %u defined in the MySQL ", - ib_table->name, n_keys, - table->s->keys); - } - break; - - case DICT_FRM_CONSISTENT: - default: - sql_print_error("InnoDB: Table %s is consistent " - "on InnoDB data dictionary and MySQL " - " FRM file.", - ib_table->name); - ut_error; - break; - } -} diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h deleted file mode 100644 index 3bb67532954..00000000000 --- a/storage/xtradb/handler/ha_innodb.h +++ /dev/null @@ -1,746 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/* - This file is based on ha_berkeley.h of MySQL distribution - - This file defines the Innodb handler: the interface between MySQL and - Innodb -*/ - -#include "dict0stats.h" - -/* Structure defines translation table between mysql index and innodb -index structures */ -struct innodb_idx_translate_t { - ulint index_count; /*!< number of valid index entries - in the index_mapping array */ - ulint array_size; /*!< array size of index_mapping */ - dict_index_t** index_mapping; /*!< index pointer array directly - maps to index in Innodb from MySQL - array index */ -}; - - -/** InnoDB table share */ -typedef struct st_innobase_share { - THR_LOCK lock; /*!< MySQL lock protecting - this structure */ - const char* table_name; /*!< InnoDB table name */ - uint use_count; /*!< reference count, - incremented in get_share() - and decremented in - free_share() */ - void* table_name_hash;/*!< hash table chain node */ - innodb_idx_translate_t idx_trans_tbl; /*!< index translation - table between MySQL and - Innodb */ - dict_table_t* ib_table; -} INNOBASE_SHARE; - - -/** Prebuilt structures in an InnoDB table handle used within MySQL */ -struct row_prebuilt_t; - -/** Engine specific table options are defined using this struct */ -struct ha_table_option_struct -{ - bool page_compressed; /*!< Table is using page compression - if this option is true. */ - ulonglong page_compression_level; /*!< Table page compression level - 0-9. */ - uint atomic_writes; /*!< Use atomic writes for this - table if this options is ON or - in DEFAULT if - srv_use_atomic_writes=1. - Atomic writes are not used if - value OFF.*/ - uint encryption; /*!< DEFAULT, ON, OFF */ - ulonglong encryption_key_id; /*!< encryption key id */ -}; - -/** The class defining a handle to an Innodb table */ -class ha_innobase: public handler -{ - row_prebuilt_t* prebuilt; /*!< prebuilt struct in InnoDB, used - to save CPU time with prebuilt data - structures*/ - THD* user_thd; /*!< the thread handle of the user - currently using the handle; this is - set in external_lock function */ - THR_LOCK_DATA lock; - INNOBASE_SHARE* share; /*!< information for MySQL - table locking */ - - uchar* upd_buf; /*!< buffer used in updates */ - ulint upd_buf_size; /*!< the size of upd_buf in bytes */ - Table_flags int_table_flags; - uint primary_key; - ulong start_of_scan; /*!< this is set to 1 when we are - starting a table scan but have not - yet fetched any row, else 0 */ - uint last_match_mode;/* match mode of the latest search: - ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX, - or undefined */ - uint num_write_row; /*!< number of write_row() calls */ - - ha_statistics* ha_partition_stats; /*!< stats of the partition owner - handler (if there is one) */ - uint store_key_val_for_row(uint keynr, char* buff, uint buff_len, - const uchar* record); - inline void update_thd(THD* thd); - void update_thd(); - int change_active_index(uint keynr); - int general_fetch(uchar* buf, uint direction, uint match_mode); - dberr_t innobase_lock_autoinc(); - ulonglong innobase_peek_autoinc(); - dberr_t innobase_set_max_autoinc(ulonglong auto_inc); - dberr_t innobase_reset_autoinc(ulonglong auto_inc); - dberr_t innobase_get_autoinc(ulonglong* value); - void innobase_initialize_autoinc(); - dict_index_t* innobase_get_index(uint keynr); - -#ifdef WITH_WSREP - int wsrep_append_keys(THD *thd, bool shared, - const uchar* record0, const uchar* record1); -#endif - /* Init values for the class: */ - public: - ha_innobase(handlerton *hton, TABLE_SHARE *table_arg); - ~ha_innobase(); - /* - Get the row type from the storage engine. If this method returns - ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used. - */ - enum row_type get_row_type() const; - - const char* table_type() const; - const char* index_type(uint key_number); - Table_flags table_flags() const; - ulong index_flags(uint idx, uint part, bool all_parts) const; - uint max_supported_keys() const; - uint max_supported_key_length() const; - uint max_supported_key_part_length() const; - const key_map* keys_to_use_for_scanning(); - - int open(const char *name, int mode, uint test_if_locked); - handler* clone(const char *name, MEM_ROOT *mem_root); - int close(void); - double scan_time(); - double read_time(uint index, uint ranges, ha_rows rows); - longlong get_memory_buffer_size() const; - my_bool is_fake_change_enabled(THD *thd); - - int write_row(uchar * buf); - int update_row(const uchar * old_data, const uchar * new_data); - int delete_row(const uchar * buf); - bool was_semi_consistent_read(); - void try_semi_consistent_read(bool yes); - void unlock_row(); - - int index_init(uint index, bool sorted); - int index_end(); - int index_read(uchar * buf, const uchar * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_idx(uchar * buf, uint index, const uchar * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_last(uchar * buf, const uchar * key, uint key_len); - int index_next(uchar * buf); - int index_next_same(uchar * buf, const uchar *key, uint keylen); - int index_prev(uchar * buf); - int index_first(uchar * buf); - int index_last(uchar * buf); - - bool has_gap_locks() const { return true; } - - int rnd_init(bool scan); - int rnd_end(); - int rnd_next(uchar *buf); - int rnd_pos(uchar * buf, uchar *pos); - - int ft_init(); - void ft_end(); - FT_INFO *ft_init_ext(uint flags, uint inx, String* key); - int ft_read(uchar* buf); - - void position(const uchar *record); - int info(uint); - int analyze(THD* thd,HA_CHECK_OPT* check_opt); - int optimize(THD* thd,HA_CHECK_OPT* check_opt); - int discard_or_import_tablespace(my_bool discard); - int extra(enum ha_extra_function operation); - int reset(); - int external_lock(THD *thd, int lock_type); - int transactional_table_lock(THD *thd, int lock_type); - int start_stmt(THD *thd, thr_lock_type lock_type); - void position(uchar *record); - ha_rows records_in_range(uint inx, key_range *min_key, key_range - *max_key); - ha_rows estimate_rows_upper_bound(); - - void update_create_info(HA_CREATE_INFO* create_info); - int parse_table_name(const char*name, - HA_CREATE_INFO* create_info, - ulint flags, - ulint flags2, - char* norm_name, - char* temp_path, - char* remote_path); - const char* check_table_options(THD *thd, TABLE* table, - HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format); - int create(const char *name, register TABLE *form, - HA_CREATE_INFO *create_info); - int truncate(); - int delete_table(const char *name); - int rename_table(const char* from, const char* to); - int defragment_table(const char* name, const char* index_name, - bool async); - int check(THD* thd, HA_CHECK_OPT* check_opt); - char* update_table_comment(const char* comment); - char* get_foreign_key_create_info(); - int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list); - int get_parent_foreign_key_list(THD *thd, - List<FOREIGN_KEY_INFO> *f_key_list); - bool can_switch_engines(); - uint referenced_by_foreign_key(); - void free_foreign_key_create_info(char* str); - THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type); - void init_table_handle_for_HANDLER(); - virtual void get_auto_increment(ulonglong offset, ulonglong increment, - ulonglong nb_desired_values, - ulonglong *first_value, - ulonglong *nb_reserved_values); - int reset_auto_increment(ulonglong value); - - virtual bool get_error_message(int error, String *buf); - virtual bool get_foreign_dup_key(char*, uint, char*, uint); - uint8 table_cache_type(); - /* - ask handler about permission to cache table during query registration - */ - my_bool register_query_cache_table(THD *thd, const char *table_key, - uint key_length, - qc_engine_callback *call_back, - ulonglong *engine_data); - static const char *get_mysql_bin_log_name(); - static ulonglong get_mysql_bin_log_pos(); - bool primary_key_is_clustered(); - int cmp_ref(const uchar *ref1, const uchar *ref2); - /** On-line ALTER TABLE interface @see handler0alter.cc @{ */ - - /** Check if InnoDB supports a particular alter table in-place - @param altered_table TABLE object for new version of table. - @param ha_alter_info Structure describing changes to be done - by ALTER TABLE and holding data used during in-place alter. - - @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported - @retval HA_ALTER_INPLACE_NO_LOCK Supported - @retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE - Supported, but requires lock - during main phase and exclusive - lock during prepare phase. - @retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE - Supported, prepare phase - requires exclusive lock. - */ - enum_alter_inplace_result check_if_supported_inplace_alter( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info); - /** Allows InnoDB to update internal structures with concurrent - writes blocked (provided that check_if_supported_inplace_alter() - did not return HA_ALTER_INPLACE_NO_LOCK). - This will be invoked before inplace_alter_table(). - - @param altered_table TABLE object for new version of table. - @param ha_alter_info Structure describing changes to be done - by ALTER TABLE and holding data used during in-place alter. - - @retval true Failure - @retval false Success - */ - bool prepare_inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info); - - /** Alter the table structure in-place with operations - specified using HA_ALTER_FLAGS and Alter_inplace_information. - The level of concurrency allowed during this operation depends - on the return value from check_if_supported_inplace_alter(). - - @param altered_table TABLE object for new version of table. - @param ha_alter_info Structure describing changes to be done - by ALTER TABLE and holding data used during in-place alter. - - @retval true Failure - @retval false Success - */ - bool inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info); - - /** Commit or rollback the changes made during - prepare_inplace_alter_table() and inplace_alter_table() inside - the storage engine. Note that the allowed level of concurrency - during this operation will be the same as for - inplace_alter_table() and thus might be higher than during - prepare_inplace_alter_table(). (E.g concurrent writes were - blocked during prepare, but might not be during commit). - @param altered_table TABLE object for new version of table. - @param ha_alter_info Structure describing changes to be done - by ALTER TABLE and holding data used during in-place alter. - @param commit true => Commit, false => Rollback. - @retval true Failure - @retval false Success - */ - bool commit_inplace_alter_table( - TABLE* altered_table, - Alter_inplace_info* ha_alter_info, - bool commit); - /** @} */ - void set_partition_owner_stats(ha_statistics *stats); - bool check_if_incompatible_data(HA_CREATE_INFO *info, - uint table_changes); - - bool check_if_supported_virtual_columns(void) { return TRUE; } - -private: - /** Builds a 'template' to the prebuilt struct. - - The template is used in fast retrieval of just those column - values MySQL needs in its processing. - @param whole_row true if access is needed to a whole row, - false if accessing individual fields is enough */ - void build_template(bool whole_row); - /** Resets a query execution 'template'. - @see build_template() */ - inline void reset_template(); - - int info_low(uint, bool); - -public: - /** @name Multi Range Read interface @{ */ - /** Initialize multi range read @see DsMrr_impl::dsmrr_init - * @param seq - * @param seq_init_param - * @param n_ranges - * @param mode - * @param buf - */ - int multi_range_read_init(RANGE_SEQ_IF* seq, - void* seq_init_param, - uint n_ranges, uint mode, - HANDLER_BUFFER* buf); - /** Process next multi range read @see DsMrr_impl::dsmrr_next - * @param range_info - */ - int multi_range_read_next(range_id_t *range_info); - /** Initialize multi range read and get information. - * @see ha_myisam::multi_range_read_info_const - * @see DsMrr_impl::dsmrr_info_const - * @param keyno - * @param seq - * @param seq_init_param - * @param n_ranges - * @param bufsz - * @param flags - * @param cost - */ - ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF* seq, - void* seq_init_param, - uint n_ranges, uint* bufsz, - uint* flags, Cost_estimate* cost); - /** Initialize multi range read and get information. - * @see DsMrr_impl::dsmrr_info - * @param keyno - * @param seq - * @param seq_init_param - * @param n_ranges - * @param bufsz - * @param flags - * @param cost - */ - ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint key_parts, uint* bufsz, uint* flags, - Cost_estimate* cost); - int multi_range_read_explain_info(uint mrr_mode, - char *str, size_t size); - - /** Attempt to push down an index condition. - * @param[in] keyno MySQL key number - * @param[in] idx_cond Index condition to be checked - * @return idx_cond if pushed; NULL if not pushed - */ - class Item* idx_cond_push(uint keyno, class Item* idx_cond); - - /* An helper function for index_cond_func_innodb: */ - bool is_thd_killed(); - -private: - /** The multi range read session object */ - DsMrr_impl ds_mrr; - /* @} */ -}; - -/* Some accessor functions which the InnoDB plugin needs, but which -can not be added to mysql/plugin.h as part of the public interface; -the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */ - -#ifndef INNODB_COMPATIBILITY_HOOKS -#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS -#endif - -extern "C" { - -struct charset_info_st *thd_charset(MYSQL_THD thd); -LEX_STRING *thd_query_string(MYSQL_THD thd); - -/** - Check if a user thread is a replication slave thread - @param thd user thread - @retval 0 the user thread is not a replication slave thread - @retval 1 the user thread is a replication slave thread -*/ -int thd_slave_thread(const MYSQL_THD thd); - -/** - Check if a user thread is running a non-transactional update - @param thd user thread - @retval 0 the user thread is not running a non-transactional update - @retval 1 the user thread is running a non-transactional update -*/ -int thd_non_transactional_update(const MYSQL_THD thd); - -/** - Get high resolution timestamp for the current query start time. - The timestamp is not anchored to any specific point in time, - but can be used for comparison. - - @retval timestamp in microseconds precision -*/ -unsigned long long thd_start_utime(const MYSQL_THD thd); - -/** - Get the user thread's binary logging format - @param thd user thread - @return Value to be used as index into the binlog_format_names array -*/ -int thd_binlog_format(const MYSQL_THD thd); - -/** - Mark transaction to rollback and mark error as fatal to a sub-statement. - @param thd Thread handle - @param all TRUE <=> rollback main transaction. -*/ -void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all); - -/** - Check if binary logging is filtered for thread's current db. - @param thd Thread handle - @retval 1 the query is not filtered, 0 otherwise. -*/ -bool thd_binlog_filter_ok(const MYSQL_THD thd); - -/** - Check if the query may generate row changes which - may end up in the binary. - @param thd Thread handle - @return 1 the query may generate row changes, 0 otherwise. -*/ -bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd); - -/** - Gets information on the durability property requested by - a thread. - @param thd Thread handle - @return a durability property. -*/ -enum durability_properties thd_get_durability_property(const MYSQL_THD thd); - -/** Is strict sql_mode set. -@param thd Thread object -@return True if sql_mode has strict mode (all or trans), false otherwise. -*/ -bool thd_is_strict_mode(const MYSQL_THD thd) -MY_ATTRIBUTE((nonnull)); -} /* extern "C" */ - -/** Get the file name and position of the MySQL binlog corresponding to the - * current commit. - */ -extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file); - -struct trx_t; -#ifdef WITH_WSREP -#include <wsrep_mysqld.h> -//extern "C" int wsrep_trx_order_before(void *thd1, void *thd2); - -extern "C" bool wsrep_thd_is_wsrep_on(THD *thd); - - -extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode); -extern "C" void wsrep_thd_set_query_state( - THD *thd, enum wsrep_query_state state); - -extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id); - -extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd); -extern "C" time_t wsrep_thd_query_start(THD *thd); -extern "C" query_id_t wsrep_thd_query_id(THD *thd); -extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd); -extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id); -#endif - -extern const struct _ft_vft ft_vft_result; - -/* Structure Returned by ha_innobase::ft_init_ext() */ -typedef struct new_ft_info -{ - struct _ft_vft *please; - struct _ft_vft_ext *could_you; - row_prebuilt_t* ft_prebuilt; - fts_result_t* ft_result; -} NEW_FT_INFO; - -/*********************************************************************//** -Allocates an InnoDB transaction for a MySQL handler object. -@return InnoDB transaction handle */ -trx_t* -innobase_trx_allocate( -/*==================*/ - MYSQL_THD thd); /*!< in: user thread handle */ - -/*********************************************************************//** -This function checks each index name for a table against reserved -system default primary index name 'GEN_CLUST_INDEX'. If a name -matches, this function pushes an warning message to the client, -and returns true. -@return true if the index name matches the reserved name */ -UNIV_INTERN -bool -innobase_index_name_is_reserved( -/*============================*/ - THD* thd, /*!< in/out: MySQL connection */ - const KEY* key_info, /*!< in: Indexes to be created */ - ulint num_of_keys) /*!< in: Number of indexes to - be created. */ - MY_ATTRIBUTE((nonnull(1), warn_unused_result)); - -/*****************************************************************//** -#ifdef WITH_WSREP -extern "C" int wsrep_trx_is_aborting(void *thd_ptr); -#endif -Determines InnoDB table flags. -@retval true if successful, false if error */ -UNIV_INTERN -bool -innobase_table_flags( -/*=================*/ - const TABLE* form, /*!< in: table */ - const HA_CREATE_INFO* create_info, /*!< in: information - on table columns and indexes */ - THD* thd, /*!< in: connection */ - bool use_tablespace, /*!< in: whether to create - outside system tablespace */ - ulint* flags, /*!< out: DICT_TF flags */ - ulint* flags2) /*!< out: DICT_TF2 flags */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*****************************************************************//** -Validates the create options. We may build on this function -in future. For now, it checks two specifiers: -KEY_BLOCK_SIZE and ROW_FORMAT -If innodb_strict_mode is not set then this function is a no-op -@return NULL if valid, string if not. */ -UNIV_INTERN -const char* -create_options_are_invalid( -/*=======================*/ - THD* thd, /*!< in: connection thread. */ - TABLE* form, /*!< in: information on table - columns and indexes */ - HA_CREATE_INFO* create_info, /*!< in: create info. */ - bool use_tablespace) /*!< in: srv_file_per_table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************//** -Retrieve the FTS Relevance Ranking result for doc with doc_id -of prebuilt->fts_doc_id -@return the relevance ranking value */ -UNIV_INTERN -float -innobase_fts_retrieve_ranking( -/*==========================*/ - FT_INFO* fts_hdl); /*!< in: FTS handler */ - -/*********************************************************************//** -Find and Retrieve the FTS Relevance Ranking result for doc with doc_id -of prebuilt->fts_doc_id -@return the relevance ranking value */ -UNIV_INTERN -float -innobase_fts_find_ranking( -/*======================*/ - FT_INFO* fts_hdl, /*!< in: FTS handler */ - uchar* record, /*!< in: Unused */ - uint len); /*!< in: Unused */ -/*********************************************************************//** -Free the memory for the FTS handler */ -UNIV_INTERN -void -innobase_fts_close_ranking( -/*=======================*/ - FT_INFO* fts_hdl) /*!< in: FTS handler */ - MY_ATTRIBUTE((nonnull)); -/*****************************************************************//** -Initialize the table FTS stopword list -@return TRUE if success */ -UNIV_INTERN -ibool -innobase_fts_load_stopword( -/*=======================*/ - dict_table_t* table, /*!< in: Table has the FTS */ - trx_t* trx, /*!< in: transaction */ - THD* thd) /*!< in: current thread */ - MY_ATTRIBUTE((nonnull(1,3), warn_unused_result)); - -/** Some defines for innobase_fts_check_doc_id_index() return value */ -enum fts_doc_id_index_enum { - FTS_INCORRECT_DOC_ID_INDEX, - FTS_EXIST_DOC_ID_INDEX, - FTS_NOT_EXIST_DOC_ID_INDEX -}; - -/*******************************************************************//** -Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME -on the Doc ID column. -@return the status of the FTS_DOC_ID index */ -UNIV_INTERN -enum fts_doc_id_index_enum -innobase_fts_check_doc_id_index( -/*============================*/ - const dict_table_t* table, /*!< in: table definition */ - const TABLE* altered_table, /*!< in: MySQL table - that is being altered */ - ulint* fts_doc_col_no) /*!< out: The column number for - Doc ID */ - MY_ATTRIBUTE((warn_unused_result)); - -/*******************************************************************//** -Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME -on the Doc ID column in MySQL create index definition. -@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index, -FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */ -UNIV_INTERN -enum fts_doc_id_index_enum -innobase_fts_check_doc_id_index_in_def( -/*===================================*/ - ulint n_key, /*!< in: Number of keys */ - const KEY* key_info) /*!< in: Key definitions */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************** -@return version of the extended FTS API */ -uint -innobase_fts_get_version(); - -/*********************************************************************** -@return Which part of the extended FTS API is supported */ -ulonglong -innobase_fts_flags(); - -/*********************************************************************** -Find and Retrieve the FTS doc_id for the current result row -@return the document ID */ -ulonglong -innobase_fts_retrieve_docid( -/*============================*/ - FT_INFO_EXT* fts_hdl); /*!< in: FTS handler */ - -/*********************************************************************** -Find and retrieve the size of the current result -@return number of matching rows */ -ulonglong -innobase_fts_count_matches( -/*============================*/ - FT_INFO_EXT* fts_hdl); /*!< in: FTS handler */ - -/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default -system clustered index when there is no primary key. */ -extern const char innobase_index_reserve_name[]; - -/*********************************************************************//** -Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object. -Those flags are stored in .frm file and end up in the MySQL table object, -but are frequently used inside InnoDB so we keep their copies into the -InnoDB table object. */ -UNIV_INTERN -void -innobase_copy_frm_flags_from_create_info( -/*=====================================*/ - dict_table_t* innodb_table, /*!< in/out: InnoDB table */ - const HA_CREATE_INFO* create_info); /*!< in: create info */ - -/*********************************************************************//** -Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object. -Those flags are stored in .frm file and end up in the MySQL table object, -but are frequently used inside InnoDB so we keep their copies into the -InnoDB table object. */ -UNIV_INTERN -void -innobase_copy_frm_flags_from_table_share( -/*=====================================*/ - dict_table_t* innodb_table, /*!< in/out: InnoDB table */ - const TABLE_SHARE* table_share); /*!< in: table share */ - -/*******************************************************************//** -This function builds a translation table in INNOBASE_SHARE -structure for fast index location with mysql array number from its -table->key_info structure. This also provides the necessary translation -between the key order in mysql key_info and Innodb ib_table->indexes if -they are not fully matched with each other. -Note we do not have any mutex protecting the translation table -building based on the assumption that there is no concurrent -index creation/drop and DMLs that requires index lookup. All table -handle will be closed before the index creation/drop. -@return TRUE if index translation table built successfully */ -UNIV_INTERN -ibool -innobase_build_index_translation( -/*=============================*/ - const TABLE* table, /*!< in: table in MySQL data - dictionary */ - dict_table_t* ib_table, /*!< in: table in Innodb data - dictionary */ - INNOBASE_SHARE* share); /*!< in/out: share structure - where index translation table - will be constructed in. */ - -/********************************************************************//** -Helper function to push frm mismatch error to error log and -if needed to sql-layer. */ -UNIV_INTERN -void -ib_push_frm_error( -/*==============*/ - THD* thd, /*!< in: MySQL thd */ - dict_table_t* ib_table, /*!< in: InnoDB table */ - TABLE* table, /*!< in: MySQL table */ - ulint n_keys, /*!< in: InnoDB #keys */ - bool push_warning); /*!< in: print warning ? */ diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc deleted file mode 100644 index 0e7cc9a655b..00000000000 --- a/storage/xtradb/handler/handler0alter.cc +++ /dev/null @@ -1,6431 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file handler/handler0alter.cc -Smart ALTER TABLE -*******************************************************/ - -#include <my_global.h> -#include <unireg.h> -#include <mysqld_error.h> -#include <log.h> -#include <debug_sync.h> -#include <innodb_priv.h> -#include <sql_alter.h> -#include <sql_class.h> -#include <sql_table.h> - -#include "dict0crea.h" -#include "dict0dict.h" -#include "dict0priv.h" -#include "dict0stats.h" -#include "dict0stats_bg.h" -#include "log0log.h" -#include "rem0types.h" -#include "row0log.h" -#include "row0merge.h" -#include "srv0srv.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "ha_prototypes.h" -#include "handler0alter.h" -#include "srv0mon.h" -#include "fts0priv.h" -#include "pars0pars.h" -#include "row0sel.h" -#include "ha_innodb.h" -#ifdef WITH_WSREP -//#include "wsrep_api.h" -#include <sql_acl.h> // PROCESS_ACL -#endif - -/** Operations for creating secondary indexes (no rebuild needed) */ -static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_CREATE - = Alter_inplace_info::ADD_INDEX - | Alter_inplace_info::ADD_UNIQUE_INDEX; - -/** Operations for rebuilding a table in place */ -static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_REBUILD - = Alter_inplace_info::ADD_PK_INDEX - | Alter_inplace_info::DROP_PK_INDEX - | Alter_inplace_info::CHANGE_CREATE_OPTION - /* CHANGE_CREATE_OPTION needs to check innobase_need_rebuild() */ - | Alter_inplace_info::ALTER_COLUMN_NULLABLE - | Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE - | Alter_inplace_info::ALTER_COLUMN_ORDER - | Alter_inplace_info::DROP_COLUMN - | Alter_inplace_info::ADD_COLUMN - | Alter_inplace_info::RECREATE_TABLE - /* - | Alter_inplace_info::ALTER_COLUMN_TYPE - | Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH - */ - ; - -/** Operations that require changes to data */ -static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_DATA - = INNOBASE_ONLINE_CREATE | INNOBASE_ALTER_REBUILD; - -/** Operations for altering a table that InnoDB does not care about */ -static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE - = Alter_inplace_info::ALTER_COLUMN_DEFAULT - | Alter_inplace_info::ALTER_PARTITIONED - | Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT - | Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE - | Alter_inplace_info::ALTER_RENAME; - -/** Operations on foreign key definitions (changing the schema only) */ -static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_FOREIGN_OPERATIONS - = Alter_inplace_info::DROP_FOREIGN_KEY - | Alter_inplace_info::ADD_FOREIGN_KEY; - -/** Operations that InnoDB cares about and can perform without rebuild */ -static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_NOREBUILD - = INNOBASE_ONLINE_CREATE - | INNOBASE_FOREIGN_OPERATIONS - | Alter_inplace_info::DROP_INDEX - | Alter_inplace_info::DROP_UNIQUE_INDEX - | Alter_inplace_info::ALTER_COLUMN_NAME; - -/* Report an InnoDB error to the client by invoking my_error(). */ -static UNIV_COLD MY_ATTRIBUTE((nonnull)) -void -my_error_innodb( -/*============*/ - dberr_t error, /*!< in: InnoDB error code */ - const char* table, /*!< in: table name */ - ulint flags) /*!< in: table flags */ -{ - switch (error) { - case DB_MISSING_HISTORY: - my_error(ER_TABLE_DEF_CHANGED, MYF(0)); - break; - case DB_RECORD_NOT_FOUND: - my_error(ER_KEY_NOT_FOUND, MYF(0), table); - break; - case DB_DEADLOCK: - my_error(ER_LOCK_DEADLOCK, MYF(0)); - break; - case DB_LOCK_WAIT_TIMEOUT: - my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0)); - break; - case DB_INTERRUPTED: - my_error(ER_QUERY_INTERRUPTED, MYF(0)); - break; - case DB_OUT_OF_MEMORY: - my_error(ER_OUT_OF_RESOURCES, MYF(0)); - break; - case DB_OUT_OF_FILE_SPACE: - my_error(ER_RECORD_FILE_FULL, MYF(0), table); - break; - case DB_TEMP_FILE_WRITE_FAILURE: - my_error(ER_GET_ERRMSG, MYF(0), - DB_TEMP_FILE_WRITE_FAILURE, - ut_strerr(DB_TEMP_FILE_WRITE_FAILURE), - "InnoDB"); - break; - case DB_TOO_BIG_INDEX_COL: - my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), - DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags)); - break; - case DB_TOO_MANY_CONCURRENT_TRXS: - my_error(ER_TOO_MANY_CONCURRENT_TRXS, MYF(0)); - break; - case DB_LOCK_TABLE_FULL: - my_error(ER_LOCK_TABLE_FULL, MYF(0)); - break; - case DB_UNDO_RECORD_TOO_BIG: - my_error(ER_UNDO_RECORD_TOO_BIG, MYF(0)); - break; - case DB_CORRUPTION: - my_error(ER_NOT_KEYFILE, MYF(0), table); - break; - case DB_TOO_BIG_RECORD: - my_error(ER_TOO_BIG_ROWSIZE, MYF(0), - page_get_free_space_of_empty( - flags & DICT_TF_COMPACT) / 2); - break; - case DB_INVALID_NULL: - /* TODO: report the row, as we do for DB_DUPLICATE_KEY */ - my_error(ER_INVALID_USE_OF_NULL, MYF(0)); - break; - case DB_TABLESPACE_EXISTS: - my_error(ER_TABLESPACE_EXISTS, MYF(0), table); - break; - -#ifdef UNIV_DEBUG - case DB_SUCCESS: - case DB_DUPLICATE_KEY: - case DB_ONLINE_LOG_TOO_BIG: - /* These codes should not be passed here. */ - ut_error; -#endif /* UNIV_DEBUG */ - default: - my_error(ER_GET_ERRNO, MYF(0), error, "InnoDB"); - break; - } -} - -/** Determine if fulltext indexes exist in a given table. -@param table MySQL table -@return whether fulltext indexes exist on the table */ -static -bool -innobase_fulltext_exist( -/*====================*/ - const TABLE* table) -{ - for (uint i = 0; i < table->s->keys; i++) { - if (table->key_info[i].flags & HA_FULLTEXT) { - return(true); - } - } - - return(false); -} - -/*******************************************************************//** -Determine if ALTER TABLE needs to rebuild the table. -@param ha_alter_info the DDL operation -@param altered_table MySQL original table -@return whether it is necessary to rebuild the table */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_need_rebuild( -/*==================*/ - const Alter_inplace_info* ha_alter_info, - const TABLE* altered_table) -{ - Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags = - ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE); - - if (alter_inplace_flags - == Alter_inplace_info::CHANGE_CREATE_OPTION - && !(ha_alter_info->create_info->used_fields - & (HA_CREATE_USED_ROW_FORMAT - | HA_CREATE_USED_KEY_BLOCK_SIZE))) { - /* Any other CHANGE_CREATE_OPTION than changing - ROW_FORMAT or KEY_BLOCK_SIZE is ignored. */ - return(false); - } - - /* If alter table changes column name and adds a new - index, we need to check is this new index created - to new column name. This is because column name - changes are done normally after creating indexes. */ - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME) && - ((ha_alter_info->handler_flags - & Alter_inplace_info::ADD_INDEX) || - (ha_alter_info->handler_flags - & Alter_inplace_info::ADD_FOREIGN_KEY))) { - for (ulint i = 0; i < ha_alter_info->index_add_count; i++) { - const KEY* key = &ha_alter_info->key_info_buffer[ - ha_alter_info->index_add_buffer[i]]; - - for (ulint j = 0; j < key->user_defined_key_parts; j++) { - const KEY_PART_INFO* key_part = &(key->key_part[j]); - const Field* field = altered_table->field[key_part->fieldnr]; - - /* Field used on added index is renamed on - this same alter table. We need table - rebuild. */ - if (field && field->flags & FIELD_IS_RENAMED) { - return (true); - } - } - } - } - - return(!!(ha_alter_info->handler_flags & INNOBASE_ALTER_REBUILD)); -} - -/** Check if InnoDB supports a particular alter table in-place -@param altered_table TABLE object for new version of table. -@param ha_alter_info Structure describing changes to be done -by ALTER TABLE and holding data used during in-place alter. - -@retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported -@retval HA_ALTER_INPLACE_NO_LOCK Supported -@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but requires -lock during main phase and exclusive lock during prepare phase. -@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare phase -requires exclusive lock (any transactions that have accessed the table -must commit or roll back first, and no transactions can access the table -while prepare_inplace_alter_table() is executing) -*/ -UNIV_INTERN -enum_alter_inplace_result -ha_innobase::check_if_supported_inplace_alter( -/*==========================================*/ - TABLE* altered_table, - Alter_inplace_info* ha_alter_info) -{ - DBUG_ENTER("check_if_supported_inplace_alter"); - - if (high_level_read_only) { - ha_alter_info->unsupported_reason = - innobase_get_err_msg(ER_READ_ONLY_MODE); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } else if (srv_created_new_raw || srv_force_recovery) { - - ha_alter_info->unsupported_reason = - innobase_get_err_msg(ER_READ_ONLY_MODE); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - if (altered_table->s->stored_fields > REC_MAX_N_USER_FIELDS) { - /* Deny the inplace ALTER TABLE. MySQL will try to - re-create the table and ha_innobase::create() will - return an error too. This is how we effectively - deny adding too many columns to a table. */ - ha_alter_info->unsupported_reason = - innobase_get_err_msg(ER_TOO_MANY_FIELDS); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - update_thd(); - trx_search_latch_release_if_reserved(prebuilt->trx); - - /* Change on engine specific table options require rebuild of the - table */ - if (ha_alter_info->handler_flags - & Alter_inplace_info::CHANGE_CREATE_OPTION) { - ha_table_option_struct *new_options= ha_alter_info->create_info->option_struct; - ha_table_option_struct *old_options= table->s->option_struct; - - if (new_options->page_compressed != old_options->page_compressed || - new_options->page_compression_level != old_options->page_compression_level || - new_options->atomic_writes != old_options->atomic_writes) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - if (new_options->encryption != old_options->encryption || - new_options->encryption_key_id != old_options->encryption_key_id) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - } - - if (ha_alter_info->handler_flags - & ~(INNOBASE_INPLACE_IGNORE - | INNOBASE_ALTER_NOREBUILD - | INNOBASE_ALTER_REBUILD)) { - - if (ha_alter_info->handler_flags - & (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH - | Alter_inplace_info::ALTER_COLUMN_TYPE)) - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - /* Only support online add foreign key constraint when - check_foreigns is turned off */ - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ADD_FOREIGN_KEY) - && prebuilt->trx->check_foreigns) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) { - DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK); - } - - /* Only support NULL -> NOT NULL change if strict table sql_mode - is set. Fall back to COPY for conversion if not strict tables. - In-Place will fail with an error when trying to convert - NULL to a NOT NULL value. */ - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE) - && !thd_is_strict_mode(user_thd)) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - /* InnoDB cannot IGNORE when creating unique indexes. IGNORE - should silently delete some duplicate rows. Our inplace_alter - code will not delete anything from existing indexes. */ - if (ha_alter_info->ignore - && (ha_alter_info->handler_flags - & (Alter_inplace_info::ADD_PK_INDEX - | Alter_inplace_info::ADD_UNIQUE_INDEX))) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_IGNORE); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - /* DROP PRIMARY KEY is only allowed in combination with ADD - PRIMARY KEY. */ - if ((ha_alter_info->handler_flags - & (Alter_inplace_info::ADD_PK_INDEX - | Alter_inplace_info::DROP_PK_INDEX)) - == Alter_inplace_info::DROP_PK_INDEX) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOPK); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - /* If a column change from NOT NULL to NULL, - and there's a implict pk on this column. the - table should be rebuild. The change should - only go through the "Copy" method.*/ - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NULLABLE)) { - uint primary_key = altered_table->s->primary_key; - - /* See if MYSQL table has no pk but we do.*/ - if (UNIV_UNLIKELY(primary_key >= MAX_KEY) - && !row_table_got_default_clust_index(prebuilt->table)) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_PRIMARY_CANT_HAVE_NULL); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - } - - /* - InnoDB in different MariaDB versions was generating different mtype - codes for certain types. In some cases the signed/unsigned bit was - generated differently too. - - Online ALTER would change the mtype/unsigned_flag (to what the - current code generates) without changing the underlying data - represenation, and it might result in data corruption. - - Don't do online ALTER if mtype/unsigned_flag are wrong. - */ - for (ulint i = 0, icol= 0; i < table->s->fields; i++) { - const Field* field = table->field[i]; - const dict_col_t* col = dict_table_get_nth_col(prebuilt->table, icol); - ulint unsigned_flag; - if (!field->stored_in_db()) - continue; - icol++; - - if (col->mtype != get_innobase_type_from_mysql_type(&unsigned_flag, field)) { - - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - if ((col->prtype & DATA_UNSIGNED) != unsigned_flag) { - - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - } - - /* If we have column that has changed from NULL -> NOT NULL - and column default has changed we need to do additional - check. */ - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE) && - (ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_DEFAULT)) { - Alter_info *alter_info = ha_alter_info->alter_info; - List_iterator<Create_field> def_it(alter_info->create_list); - Create_field *def; - while ((def=def_it++)) { - - /* If this is first column definition whose SQL type - is TIMESTAMP and it is defined as NOT NULL and - it has either constant default or function default - we must use "Copy" method. */ - if (def->is_timestamp_type()) { - if ((def->flags & NOT_NULL_FLAG) != 0 && // NOT NULL - (def->default_value != NULL || // constant default ? - def->unireg_check != Field::NONE)) { // function default - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - break; - } - } - } - - ulint n_indexes = UT_LIST_GET_LEN((prebuilt->table)->indexes); - - /* If InnoDB dictionary and MySQL frm file are not consistent - use "Copy" method. */ - if (prebuilt->table->dict_frm_mismatch) { - - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_NO_SUCH_INDEX); - ib_push_frm_error(user_thd, prebuilt->table, altered_table, - n_indexes, true); - - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - /* We should be able to do the operation in-place. - See if we can do it online (LOCK=NONE). */ - bool online = true; - - List_iterator_fast<Create_field> cf_it( - ha_alter_info->alter_info->create_list); - - /* Fix the key parts. */ - for (KEY* new_key = ha_alter_info->key_info_buffer; - new_key < ha_alter_info->key_info_buffer - + ha_alter_info->key_count; - new_key++) { - for (KEY_PART_INFO* key_part = new_key->key_part; - key_part < new_key->key_part + new_key->user_defined_key_parts; - key_part++) { - const Create_field* new_field; - - DBUG_ASSERT(key_part->fieldnr - < altered_table->s->fields); - - cf_it.rewind(); - for (uint fieldnr = 0; (new_field = cf_it++); - fieldnr++) { - if (fieldnr == key_part->fieldnr) { - break; - } - } - - DBUG_ASSERT(new_field); - - key_part->field = altered_table->field[ - key_part->fieldnr]; - /* In some special cases InnoDB emits "false" - duplicate key errors with NULL key values. Let - us play safe and ensure that we can correctly - print key values even in such cases .*/ - key_part->null_offset = key_part->field->null_offset(); - key_part->null_bit = key_part->field->null_bit; - - if (new_field->field) { - /* This is an existing column. */ - continue; - } - - /* This is an added column. */ - DBUG_ASSERT(ha_alter_info->handler_flags - & Alter_inplace_info::ADD_COLUMN); - - /* We cannot replace a hidden FTS_DOC_ID - with a user-visible FTS_DOC_ID. */ - if (prebuilt->table->fts - && innobase_fulltext_exist(altered_table) - && !my_strcasecmp( - system_charset_info, - key_part->field->field_name.str, - FTS_DOC_ID_COL_NAME)) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_HIDDEN_FTS); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - - DBUG_ASSERT((MTYP_TYPENR(key_part->field->unireg_check) - == Field::NEXT_NUMBER) - == !!(key_part->field->flags - & AUTO_INCREMENT_FLAG)); - - if (key_part->field->flags & AUTO_INCREMENT_FLAG) { - /* We cannot assign an AUTO_INCREMENT - column values during online ALTER. */ - DBUG_ASSERT(key_part->field == altered_table - -> found_next_number_field); - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC); - online = false; - } - } - } - - DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col - <= table->s->stored_fields); - DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col - < dict_table_get_n_user_cols(prebuilt->table)); - - if (prebuilt->table->fts - && innobase_fulltext_exist(altered_table)) { - /* FULLTEXT indexes are supposed to remain. */ - /* Disallow DROP INDEX FTS_DOC_ID_INDEX */ - - for (uint i = 0; i < ha_alter_info->index_drop_count; i++) { - if (!my_strcasecmp( - system_charset_info, - ha_alter_info->index_drop_buffer[i]->name, - FTS_DOC_ID_INDEX_NAME)) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - } - - /* InnoDB can have a hidden FTS_DOC_ID_INDEX on a - visible FTS_DOC_ID column as well. Prevent dropping or - renaming the FTS_DOC_ID. */ - - for (Field** fp = table->field; *fp; fp++) { - if (!((*fp)->flags - & (FIELD_IS_RENAMED | FIELD_IS_DROPPED))) { - continue; - } - - if (!my_strcasecmp( - system_charset_info, - (*fp)->field_name.str, - FTS_DOC_ID_COL_NAME)) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - } - } - - prebuilt->trx->will_lock++; - - if (!online) { - /* We already determined that only a non-locking - operation is possible. */ - } else if (((ha_alter_info->handler_flags - & Alter_inplace_info::ADD_PK_INDEX) - || innobase_need_rebuild(ha_alter_info, table)) - && (innobase_fulltext_exist(altered_table))) { - /* Refuse to rebuild the table online, if - fulltext indexes are to survive the rebuild. */ - online = false; - /* If the table already contains fulltext indexes, - refuse to rebuild the table natively altogether. */ - if (prebuilt->table->fts) { - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_INNODB_FT_LIMIT); - DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); - } - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS); - } else if ((ha_alter_info->handler_flags - & Alter_inplace_info::ADD_INDEX)) { - /* Building a full-text index requires a lock. - We could do without a lock if the table already contains - an FTS_DOC_ID column, but in that case we would have - to apply the modification log to the full-text indexes. */ - - for (uint i = 0; i < ha_alter_info->index_add_count; i++) { - const KEY* key = - &ha_alter_info->key_info_buffer[ - ha_alter_info->index_add_buffer[i]]; - if (key->flags & HA_FULLTEXT) { - DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK - & ~(HA_FULLTEXT - | HA_PACK_KEY - | HA_GENERATED_KEY - | HA_BINARY_PACK_KEY))); - ha_alter_info->unsupported_reason = innobase_get_err_msg( - ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS); - online = false; - break; - } - } - } - - DBUG_RETURN(online - ? HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE - : HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE); -} - -/*************************************************************//** -Initialize the dict_foreign_t structure with supplied info -@return true if added, false if duplicate foreign->id */ -static MY_ATTRIBUTE((nonnull(1,3,5,7))) -bool -innobase_init_foreign( -/*==================*/ - dict_foreign_t* foreign, /*!< in/out: structure to - initialize */ - char* constraint_name, /*!< in/out: constraint name if - exists */ - dict_table_t* table, /*!< in: foreign table */ - dict_index_t* index, /*!< in: foreign key index */ - const char** column_names, /*!< in: foreign key column - names */ - ulint num_field, /*!< in: number of columns */ - const char* referenced_table_name, /*!< in: referenced table - name */ - dict_table_t* referenced_table, /*!< in: referenced table */ - dict_index_t* referenced_index, /*!< in: referenced index */ - const char** referenced_column_names,/*!< in: referenced column - names */ - ulint referenced_num_field) /*!< in: number of referenced - columns */ -{ - ut_ad(mutex_own(&dict_sys->mutex)); - - if (constraint_name) { - ulint db_len; - - /* Catenate 'databasename/' to the constraint name specified - by the user: we conceive the constraint as belonging to the - same MySQL 'database' as the table itself. We store the name - to foreign->id. */ - - db_len = dict_get_db_name_len(table->name); - - foreign->id = static_cast<char*>(mem_heap_alloc( - foreign->heap, db_len + strlen(constraint_name) + 2)); - - ut_memcpy(foreign->id, table->name, db_len); - foreign->id[db_len] = '/'; - strcpy(foreign->id + db_len + 1, constraint_name); - - /* Check if any existing foreign key has the same id, - this is needed only if user supplies the constraint name */ - - if (table->foreign_set.find(foreign) - != table->foreign_set.end()) { - return(false); - } - } - - foreign->foreign_table = table; - foreign->foreign_table_name = mem_heap_strdup( - foreign->heap, table->name); - dict_mem_foreign_table_name_lookup_set(foreign, TRUE); - - foreign->foreign_index = index; - foreign->n_fields = (unsigned int) num_field; - - foreign->foreign_col_names = static_cast<const char**>( - mem_heap_alloc(foreign->heap, num_field * sizeof(void*))); - - for (ulint i = 0; i < foreign->n_fields; i++) { - foreign->foreign_col_names[i] = mem_heap_strdup( - foreign->heap, column_names[i]); - } - - foreign->referenced_index = referenced_index; - foreign->referenced_table = referenced_table; - - foreign->referenced_table_name = mem_heap_strdup( - foreign->heap, referenced_table_name); - dict_mem_referenced_table_name_lookup_set(foreign, TRUE); - - foreign->referenced_col_names = static_cast<const char**>( - mem_heap_alloc(foreign->heap, - referenced_num_field * sizeof(void*))); - - for (ulint i = 0; i < foreign->n_fields; i++) { - foreign->referenced_col_names[i] - = mem_heap_strdup(foreign->heap, - referenced_column_names[i]); - } - - return(true); -} - -/*************************************************************//** -Check whether the foreign key options is legit -@return true if it is */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_check_fk_option( -/*=====================*/ - const dict_foreign_t* foreign) /*!< in: foreign key */ -{ - if (!foreign->foreign_index) { - return(true); - } - - if (foreign->type & (DICT_FOREIGN_ON_UPDATE_SET_NULL - | DICT_FOREIGN_ON_DELETE_SET_NULL)) { - - for (ulint j = 0; j < foreign->n_fields; j++) { - if ((dict_index_get_nth_col( - foreign->foreign_index, j)->prtype) - & DATA_NOT_NULL) { - - /* It is not sensible to define - SET NULL if the column is not - allowed to be NULL! */ - return(false); - } - } - } - - return(true); -} - -/*************************************************************//** -Set foreign key options -@return true if successfully set */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_set_foreign_key_option( -/*============================*/ - dict_foreign_t* foreign, /*!< in:InnoDB Foreign key */ - Foreign_key* fk_key) /*!< in: Foreign key info from - MySQL */ -{ - ut_ad(!foreign->type); - - switch (fk_key->delete_opt) { - case Foreign_key::FK_OPTION_NO_ACTION: - case Foreign_key::FK_OPTION_RESTRICT: - case Foreign_key::FK_OPTION_DEFAULT: - foreign->type = DICT_FOREIGN_ON_DELETE_NO_ACTION; - break; - case Foreign_key::FK_OPTION_CASCADE: - foreign->type = DICT_FOREIGN_ON_DELETE_CASCADE; - break; - case Foreign_key::FK_OPTION_SET_NULL: - foreign->type = DICT_FOREIGN_ON_DELETE_SET_NULL; - break; - } - - switch (fk_key->update_opt) { - case Foreign_key::FK_OPTION_NO_ACTION: - case Foreign_key::FK_OPTION_RESTRICT: - case Foreign_key::FK_OPTION_DEFAULT: - foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION; - break; - case Foreign_key::FK_OPTION_CASCADE: - foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE; - break; - case Foreign_key::FK_OPTION_SET_NULL: - foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL; - break; - } - - return(innobase_check_fk_option(foreign)); -} - -/*******************************************************************//** -Check if a foreign key constraint can make use of an index -that is being created. -@return useable index, or NULL if none found */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -const KEY* -innobase_find_equiv_index( -/*======================*/ - const char*const* col_names, - /*!< in: column names */ - uint n_cols, /*!< in: number of columns */ - const KEY* keys, /*!< in: index information */ - const uint* add, /*!< in: indexes being created */ - uint n_add) /*!< in: number of indexes to create */ -{ - for (uint i = 0; i < n_add; i++) { - const KEY* key = &keys[add[i]]; - - if (key->user_defined_key_parts < n_cols) { -no_match: - continue; - } - - for (uint j = 0; j < n_cols; j++) { - const KEY_PART_INFO& key_part = key->key_part[j]; - uint32 col_len - = key_part.field->pack_length(); - - /* The MySQL pack length contains 1 or 2 bytes - length field for a true VARCHAR. */ - - if (key_part.field->type() == MYSQL_TYPE_VARCHAR) { - col_len -= static_cast<const Field_varstring*>( - key_part.field)->length_bytes; - } - - if (key_part.length < col_len) { - - /* Column prefix indexes cannot be - used for FOREIGN KEY constraints. */ - goto no_match; - } - - if (innobase_strcasecmp(col_names[j], - key_part.field->field_name.str)) { - /* Name mismatch */ - goto no_match; - } - } - - return(key); - } - - return(NULL); -} - -/*************************************************************//** -Find an index whose first fields are the columns in the array -in the same order and is not marked for deletion -@return matching index, NULL if not found */ -static MY_ATTRIBUTE((nonnull(1,2,6), warn_unused_result)) -dict_index_t* -innobase_find_fk_index( -/*===================*/ - Alter_inplace_info* ha_alter_info, - /*!< in: alter table info */ - dict_table_t* table, /*!< in: table */ - const char** col_names, - /*!< in: column names, or NULL - to use table->col_names */ - dict_index_t** drop_index, - /*!< in: indexes to be dropped */ - ulint n_drop_index, - /*!< in: size of drop_index[] */ - const char** columns,/*!< in: array of column names */ - ulint n_cols) /*!< in: number of columns */ -{ - dict_index_t* index; - - index = dict_table_get_first_index(table); - - while (index != NULL) { - if (!(index->type & DICT_FTS) - && dict_foreign_qualify_index( - table, col_names, columns, n_cols, - index, NULL, true, 0, NULL, NULL, NULL)) { - for (ulint i = 0; i < n_drop_index; i++) { - if (index == drop_index[i]) { - /* Skip to-be-dropped indexes. */ - goto next_rec; - } - } - - return(index); - } - -next_rec: - index = dict_table_get_next_index(index); - } - - return(NULL); -} - -/*************************************************************//** -Create InnoDB foreign key structure from MySQL alter_info -@retval true if successful -@retval false on error (will call my_error()) */ -static MY_ATTRIBUTE((nonnull(1,2,3,7,8), warn_unused_result)) -bool -innobase_get_foreign_key_info( -/*==========================*/ - Alter_inplace_info* - ha_alter_info, /*!< in: alter table info */ - const TABLE_SHARE* - table_share, /*!< in: the TABLE_SHARE */ - dict_table_t* table, /*!< in: table */ - const char** col_names, /*!< in: column names, or NULL - to use table->col_names */ - dict_index_t** drop_index, /*!< in: indexes to be dropped */ - ulint n_drop_index, /*!< in: size of drop_index[] */ - dict_foreign_t**add_fk, /*!< out: foreign constraint added */ - ulint* n_add_fk, /*!< out: number of foreign - constraints added */ - const trx_t* trx) /*!< in: user transaction */ -{ - Key* key; - Foreign_key* fk_key; - dict_table_t* referenced_table = NULL; - char* referenced_table_name = NULL; - ulint num_fk = 0; - Alter_info* alter_info = ha_alter_info->alter_info; - - *n_add_fk = 0; - - List_iterator<Key> key_iterator(alter_info->key_list); - - while ((key=key_iterator++)) { - if (key->type != Key::FOREIGN_KEY) { - continue; - } - - const char* column_names[MAX_NUM_FK_COLUMNS]; - dict_index_t* index = NULL; - const char* referenced_column_names[MAX_NUM_FK_COLUMNS]; - dict_index_t* referenced_index = NULL; - ulint num_col = 0; - ulint referenced_num_col = 0; - bool correct_option; - char* db_namep = NULL; - char* tbl_namep = NULL; - ulint db_name_len = 0; - ulint tbl_name_len = 0; - char db_name[MAX_DATABASE_NAME_LEN]; - char tbl_name[MAX_TABLE_NAME_LEN]; - - fk_key = static_cast<Foreign_key*>(key); - - if (fk_key->columns.elements > 0) { - ulint i = 0; - Key_part_spec* column; - List_iterator<Key_part_spec> key_part_iterator( - fk_key->columns); - - /* Get all the foreign key column info for the - current table */ - while ((column = key_part_iterator++)) { - column_names[i] = column->field_name.str; - ut_ad(i < MAX_NUM_FK_COLUMNS); - i++; - } - - index = innobase_find_fk_index( - ha_alter_info, - table, col_names, - drop_index, n_drop_index, - column_names, i); - - /* MySQL would add a index in the creation - list if no such index for foreign table, - so we have to use DBUG_EXECUTE_IF to simulate - the scenario */ - DBUG_EXECUTE_IF("innodb_test_no_foreign_idx", - index = NULL;); - - /* Check whether there exist such - index in the the index create clause */ - if (!index && !innobase_find_equiv_index( - column_names, static_cast<uint>(i), - ha_alter_info->key_info_buffer, - ha_alter_info->index_add_buffer, - ha_alter_info->index_add_count)) { - my_error( - ER_FK_NO_INDEX_CHILD, - MYF(0), - fk_key->name.str - ? fk_key->name.str : "", - table_share->table_name.str); - goto err_exit; - } - - num_col = i; - } - - add_fk[num_fk] = dict_mem_foreign_create(); - -#ifndef __WIN__ - if(fk_key->ref_db.str) { - tablename_to_filename(fk_key->ref_db.str, db_name, - MAX_DATABASE_NAME_LEN); - db_namep = db_name; - db_name_len = strlen(db_name); - } - if (fk_key->ref_table.str) { - tablename_to_filename(fk_key->ref_table.str, tbl_name, - MAX_TABLE_NAME_LEN); - tbl_namep = tbl_name; - tbl_name_len = strlen(tbl_name); - } -#else - ut_ad(fk_key->ref_table.str); - tablename_to_filename(fk_key->ref_table.str, tbl_name, - MAX_TABLE_NAME_LEN); - innobase_casedn_str(tbl_name); - tbl_name_len = strlen(tbl_name); - tbl_namep = &tbl_name[0]; - - if (fk_key->ref_db.str != NULL) { - tablename_to_filename(fk_key->ref_db.str, db_name, - MAX_DATABASE_NAME_LEN); - innobase_casedn_str(db_name); - db_name_len = strlen(db_name); - db_namep = &db_name[0]; - } -#endif - mutex_enter(&dict_sys->mutex); - - referenced_table_name = dict_get_referenced_table( - table->name, - db_namep, - db_name_len, - tbl_namep, - tbl_name_len, - &referenced_table, - add_fk[num_fk]->heap); - - /* Test the case when referenced_table failed to - open, if trx->check_foreigns is not set, we should - still be able to add the foreign key */ - DBUG_EXECUTE_IF("innodb_test_open_ref_fail", - referenced_table = NULL;); - - if (!referenced_table && trx->check_foreigns) { - mutex_exit(&dict_sys->mutex); - my_error(ER_FK_CANNOT_OPEN_PARENT, - MYF(0), tbl_namep); - - goto err_exit; - } - - if (fk_key->ref_columns.elements > 0) { - ulint i = 0; - Key_part_spec* column; - List_iterator<Key_part_spec> key_part_iterator( - fk_key->ref_columns); - - while ((column = key_part_iterator++)) { - referenced_column_names[i] = - column->field_name.str; - ut_ad(i < MAX_NUM_FK_COLUMNS); - i++; - } - - if (referenced_table) { - referenced_index = - dict_foreign_find_index( - referenced_table, 0, - referenced_column_names, - i, index, - TRUE, FALSE, NULL, NULL, NULL); - - DBUG_EXECUTE_IF( - "innodb_test_no_reference_idx", - referenced_index = NULL;); - - /* Check whether there exist such - index in the the index create clause */ - if (!referenced_index) { - mutex_exit(&dict_sys->mutex); - my_error(ER_FK_NO_INDEX_PARENT, MYF(0), - fk_key->name.str - ? fk_key->name.str : "", - tbl_namep); - goto err_exit; - } - } else { - ut_a(!trx->check_foreigns); - } - - referenced_num_col = i; - } else { - /* Not possible to add a foreign key without a - referenced column */ - mutex_exit(&dict_sys->mutex); - my_error(ER_CANNOT_ADD_FOREIGN, MYF(0), tbl_namep); - goto err_exit; - } - - if (!innobase_init_foreign( - add_fk[num_fk], fk_key->name.str, - table, index, column_names, - num_col, referenced_table_name, - referenced_table, referenced_index, - referenced_column_names, referenced_num_col)) { - mutex_exit(&dict_sys->mutex); - my_error( - ER_DUP_CONSTRAINT_NAME, - MYF(0), - "FOREIGN KEY", add_fk[num_fk]->id); - goto err_exit; - } - - mutex_exit(&dict_sys->mutex); - - correct_option = innobase_set_foreign_key_option( - add_fk[num_fk], fk_key); - - DBUG_EXECUTE_IF("innodb_test_wrong_fk_option", - correct_option = false;); - - if (!correct_option) { - my_error(ER_FK_INCORRECT_OPTION, - MYF(0), - table_share->table_name.str, - add_fk[num_fk]->id); - goto err_exit; - } - - num_fk++; - } - - *n_add_fk = num_fk; - - return(true); -err_exit: - for (ulint i = 0; i <= num_fk; i++) { - if (add_fk[i]) { - dict_foreign_free(add_fk[i]); - } - } - - return(false); -} - -/*************************************************************//** -Copies an InnoDB column to a MySQL field. This function is -adapted from row_sel_field_store_in_mysql_format(). */ -static -void -innobase_col_to_mysql( -/*==================*/ - const dict_col_t* col, /*!< in: InnoDB column */ - const uchar* data, /*!< in: InnoDB column data */ - ulint len, /*!< in: length of data, in bytes */ - Field* field) /*!< in/out: MySQL field */ -{ - uchar* ptr; - uchar* dest = field->ptr; - ulint flen = field->pack_length(); - - switch (col->mtype) { - case DATA_INT: - ut_ad(len == flen); - - /* Convert integer data from Innobase to little-endian - format, sign bit restored to normal */ - - for (ptr = dest + len; ptr != dest; ) { - *--ptr = *data++; - } - - if (!(field->flags & UNSIGNED_FLAG)) { - ((byte*) dest)[len - 1] ^= 0x80; - } - - break; - - case DATA_VARCHAR: - case DATA_VARMYSQL: - case DATA_BINARY: - field->reset(); - - if (field->type() == MYSQL_TYPE_VARCHAR) { - /* This is a >= 5.0.3 type true VARCHAR. Store the - length of the data to the first byte or the first - two bytes of dest. */ - - dest = row_mysql_store_true_var_len( - dest, len, flen - field->key_length()); - } - - /* Copy the actual data */ - memcpy(dest, data, len); - break; - - case DATA_BLOB: - /* Skip MySQL BLOBs when reporting an erroneous row - during index creation or table rebuild. */ - field->set_null(); - break; - -#ifdef UNIV_DEBUG - case DATA_MYSQL: - ut_ad(flen >= len); - ut_ad(DATA_MBMAXLEN(col->mbminmaxlen) - >= DATA_MBMINLEN(col->mbminmaxlen)); - memcpy(dest, data, len); - break; - - default: - case DATA_SYS_CHILD: - case DATA_SYS: - /* These column types should never be shipped to MySQL. */ - ut_ad(0); - - case DATA_FLOAT: - case DATA_DOUBLE: - case DATA_DECIMAL: - /* Above are the valid column types for MySQL data. */ - ut_ad(flen == len); - /* fall through */ - case DATA_FIXBINARY: - case DATA_CHAR: - /* We may have flen > len when there is a shorter - prefix on the CHAR and BINARY column. */ - ut_ad(flen >= len); -#else /* UNIV_DEBUG */ - default: -#endif /* UNIV_DEBUG */ - memcpy(dest, data, len); - } -} - -/*************************************************************//** -Copies an InnoDB record to table->record[0]. */ -UNIV_INTERN -void -innobase_rec_to_mysql( -/*==================*/ - struct TABLE* table, /*!< in/out: MySQL table */ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index, /*!< in: index */ - const ulint* offsets)/*!< in: rec_get_offsets( - rec, index, ...) */ -{ - uint n_fields = table->s->stored_fields; - uint sql_idx = 0; - - ut_ad(n_fields == dict_table_get_n_user_cols(index->table) - - !!(DICT_TF2_FLAG_IS_SET(index->table, - DICT_TF2_FTS_HAS_DOC_ID))); - - for (uint i = 0; i < n_fields; i++, sql_idx++) { - Field* field; - ulint ipos; - ulint ilen; - const uchar* ifield; - - while (!((field= table->field[sql_idx])->stored_in_db())) - sql_idx++; - - field->reset(); - - ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE, - NULL); - - if (ipos == ULINT_UNDEFINED - || rec_offs_nth_extern(offsets, ipos)) { -null_field: - field->set_null(); - continue; - } - - ifield = rec_get_nth_field(rec, offsets, ipos, &ilen); - - /* Assign the NULL flag */ - if (ilen == UNIV_SQL_NULL) { - ut_ad(field->real_maybe_null()); - goto null_field; - } - - field->set_notnull(); - - innobase_col_to_mysql( - dict_field_get_col( - dict_index_get_nth_field(index, ipos)), - ifield, ilen, field); - } -} - -/*************************************************************//** -Copies an InnoDB index entry to table->record[0]. */ -UNIV_INTERN -void -innobase_fields_to_mysql( -/*=====================*/ - struct TABLE* table, /*!< in/out: MySQL table */ - const dict_index_t* index, /*!< in: InnoDB index */ - const dfield_t* fields) /*!< in: InnoDB index fields */ -{ - uint n_fields = table->s->stored_fields; - uint sql_idx = 0; - - ut_ad(n_fields == dict_table_get_n_user_cols(index->table) - - !!(DICT_TF2_FLAG_IS_SET(index->table, - DICT_TF2_FTS_HAS_DOC_ID))); - - for (uint i = 0; i < n_fields; i++, sql_idx++) { - Field* field; - ulint ipos; - - while (!((field= table->field[sql_idx])->stored_in_db())) - sql_idx++; - - field->reset(); - - ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE, - NULL); - - if (ipos == ULINT_UNDEFINED - || dfield_is_ext(&fields[ipos]) - || dfield_is_null(&fields[ipos])) { - - field->set_null(); - } else { - field->set_notnull(); - - const dfield_t* df = &fields[ipos]; - - innobase_col_to_mysql( - dict_field_get_col( - dict_index_get_nth_field(index, ipos)), - static_cast<const uchar*>(dfield_get_data(df)), - dfield_get_len(df), field); - } - } -} - -/*************************************************************//** -Copies an InnoDB row to table->record[0]. */ -UNIV_INTERN -void -innobase_row_to_mysql( -/*==================*/ - struct TABLE* table, /*!< in/out: MySQL table */ - const dict_table_t* itab, /*!< in: InnoDB table */ - const dtuple_t* row) /*!< in: InnoDB row */ -{ - uint n_fields = table->s->stored_fields; - uint sql_idx = 0; - - /* The InnoDB row may contain an extra FTS_DOC_ID column at the end. */ - ut_ad(row->n_fields == dict_table_get_n_cols(itab)); - ut_ad(n_fields == row->n_fields - DATA_N_SYS_COLS - - !!(DICT_TF2_FLAG_IS_SET(itab, DICT_TF2_FTS_HAS_DOC_ID))); - - for (uint i = 0; i < n_fields; i++, sql_idx++) { - Field* field; - const dfield_t* df = dtuple_get_nth_field(row, i); - - while (!((field= table->field[sql_idx])->stored_in_db())) - sql_idx++; - - field->reset(); - - if (dfield_is_ext(df) || dfield_is_null(df)) { - field->set_null(); - } else { - field->set_notnull(); - - innobase_col_to_mysql( - dict_table_get_nth_col(itab, i), - static_cast<const uchar*>(dfield_get_data(df)), - dfield_get_len(df), field); - } - } -} - -/*************************************************************//** -Resets table->record[0]. */ -UNIV_INTERN -void -innobase_rec_reset( -/*===============*/ - TABLE* table) /*!< in/out: MySQL table */ -{ - uint n_fields = table->s->fields; - uint i; - - for (i = 0; i < n_fields; i++) { - table->field[i]->set_default(); - } -} - -/*******************************************************************//** -This function checks that index keys are sensible. -@return 0 or error number */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -int -innobase_check_index_keys( -/*======================*/ - const Alter_inplace_info* info, - /*!< in: indexes to be created or dropped */ - const dict_table_t* innodb_table) - /*!< in: Existing indexes */ -{ - for (uint key_num = 0; key_num < info->index_add_count; - key_num++) { - const KEY& key = info->key_info_buffer[ - info->index_add_buffer[key_num]]; - - /* Check that the same index name does not appear - twice in indexes to be created. */ - - for (ulint i = 0; i < key_num; i++) { - const KEY& key2 = info->key_info_buffer[ - info->index_add_buffer[i]]; - - if (0 == strcmp(key.name, key2.name)) { - my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), - key.name); - - return(ER_WRONG_NAME_FOR_INDEX); - } - } - - /* Check that the same index name does not already exist. */ - - const dict_index_t* index; - - for (index = dict_table_get_first_index(innodb_table); - index; index = dict_table_get_next_index(index)) { - - if (!strcmp(key.name, index->name)) { - break; - } - } - - if (index) { - /* If a key by the same name is being created and - dropped, the name clash is OK. */ - for (uint i = 0; i < info->index_drop_count; - i++) { - const KEY* drop_key - = info->index_drop_buffer[i]; - - if (0 == strcmp(key.name, drop_key->name)) { - goto name_ok; - } - } - - my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key.name); - - return(ER_WRONG_NAME_FOR_INDEX); - } - -name_ok: - for (ulint i = 0; i < key.user_defined_key_parts; i++) { - const KEY_PART_INFO& key_part1 - = key.key_part[i]; - const Field* field - = key_part1.field; - ibool is_unsigned; - - switch (get_innobase_type_from_mysql_type( - &is_unsigned, field)) { - default: - break; - case DATA_INT: - case DATA_FLOAT: - case DATA_DOUBLE: - case DATA_DECIMAL: - /* Check that MySQL does not try to - create a column prefix index field on - an inappropriate data type. */ - - if (field->type() == MYSQL_TYPE_VARCHAR) { - if (key_part1.length - >= field->pack_length() - - ((Field_varstring*) field) - ->length_bytes) { - break; - } - } else { - if (key_part1.length - >= field->pack_length()) { - break; - } - } - - my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB", - field->field_name.str); - return(ER_WRONG_KEY_COLUMN); - } - - /* Check that the same column does not appear - twice in the index. */ - - for (ulint j = 0; j < i; j++) { - const KEY_PART_INFO& key_part2 - = key.key_part[j]; - - if (key_part1.fieldnr != key_part2.fieldnr) { - continue; - } - - my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB", - field->field_name.str); - return(ER_WRONG_KEY_COLUMN); - } - } - } - - return(0); -} - -/*******************************************************************//** -Create index field definition for key part */ -static MY_ATTRIBUTE((nonnull(2,3))) -void -innobase_create_index_field_def( -/*============================*/ - const TABLE* altered_table, /*!< in: MySQL table that is - being altered, or NULL - if a new clustered index is - not being created */ - const KEY_PART_INFO* key_part, /*!< in: MySQL key definition */ - index_field_t* index_field, /*!< out: index field - definition for key_part */ - const Field** fields) /*!< in: MySQL table fields */ -{ - const Field* field; - ibool is_unsigned; - ulint col_type; - - DBUG_ENTER("innobase_create_index_field_def"); - - ut_ad(key_part); - ut_ad(index_field); - - field = altered_table - ? altered_table->field[key_part->fieldnr] - : key_part->field; - ut_a(field); - - index_field->col_no = key_part->fieldnr; - index_field->col_name = altered_table ? field->field_name.str : fields[key_part->fieldnr]->field_name.str; - - col_type = get_innobase_type_from_mysql_type(&is_unsigned, field); - - if (DATA_BLOB == col_type - || (key_part->length < field->pack_length() - && field->type() != MYSQL_TYPE_VARCHAR) - || (field->type() == MYSQL_TYPE_VARCHAR - && key_part->length < field->pack_length() - - ((Field_varstring*) field)->length_bytes)) { - - index_field->prefix_len = key_part->length; - } else { - index_field->prefix_len = 0; - } - - DBUG_VOID_RETURN; -} - -/*******************************************************************//** -Create index definition for key */ -static MY_ATTRIBUTE((nonnull)) -void -innobase_create_index_def( -/*======================*/ - const TABLE* altered_table, /*!< in: MySQL table that is - being altered */ - const KEY* keys, /*!< in: key definitions */ - ulint key_number, /*!< in: MySQL key number */ - bool new_clustered, /*!< in: true if generating - a new clustered index - on the table */ - bool key_clustered, /*!< in: true if this is - the new clustered index */ - index_def_t* index, /*!< out: index definition */ - mem_heap_t* heap, /*!< in: heap where memory - is allocated */ - const Field** fields) /*!< in: MySQL table fields - */ -{ - const KEY* key = &keys[key_number]; - ulint i; - ulint len; - ulint n_fields = key->user_defined_key_parts; - char* index_name; - - DBUG_ENTER("innobase_create_index_def"); - DBUG_ASSERT(!key_clustered || new_clustered); - - index->fields = static_cast<index_field_t*>( - mem_heap_alloc(heap, n_fields * sizeof *index->fields)); - - memset(index->fields, 0, n_fields * sizeof *index->fields); - - index->ind_type = 0; - index->key_number = key_number; - index->n_fields = n_fields; - len = strlen(key->name) + 1; - index->name = index_name = static_cast<char*>( - mem_heap_alloc(heap, len + !new_clustered)); - - if (!new_clustered) { - *index_name++ = TEMP_INDEX_PREFIX; - } - - memcpy(index_name, key->name, len); - - if (key->flags & HA_NOSAME) { - index->ind_type |= DICT_UNIQUE; - } - - if (key_clustered) { - DBUG_ASSERT(!(key->flags & HA_FULLTEXT)); - index->ind_type |= DICT_CLUSTERED; - } else if (key->flags & HA_FULLTEXT) { - DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK - & ~(HA_FULLTEXT - | HA_PACK_KEY - | HA_BINARY_PACK_KEY))); - DBUG_ASSERT(!(key->flags & HA_NOSAME)); - DBUG_ASSERT(!index->ind_type); - index->ind_type |= DICT_FTS; - } - - if (!new_clustered) { - altered_table = NULL; - } - - for (i = 0; i < n_fields; i++) { - innobase_create_index_field_def( - altered_table, &key->key_part[i], &index->fields[i], fields); - } - - DBUG_VOID_RETURN; -} - -/*******************************************************************//** -Check whether the table has the FTS_DOC_ID column -@return whether there exists an FTS_DOC_ID column */ -static -bool -innobase_fts_check_doc_id_col( -/*==========================*/ - const dict_table_t* table, /*!< in: InnoDB table with - fulltext index */ - const TABLE* altered_table, - /*!< in: MySQL table with - fulltext index */ - ulint* fts_doc_col_no) - /*!< out: The column number for - Doc ID, or ULINT_UNDEFINED - if it is of wrong type */ -{ - *fts_doc_col_no = ULINT_UNDEFINED; - - const uint n_cols = altered_table->s->stored_fields; - uint sql_idx = 0; - uint i; - - for (i = 0; i < n_cols; i++, sql_idx++) { - const Field* field; - while (!((field= altered_table->field[sql_idx])-> - stored_in_db())) - sql_idx++; - if (my_strcasecmp(system_charset_info, - field->field_name.str, FTS_DOC_ID_COL_NAME)) { - continue; - } - - if (strcmp(field->field_name.str, FTS_DOC_ID_COL_NAME)) { - my_error(ER_WRONG_COLUMN_NAME, MYF(0), - field->field_name.str); - } else if (field->type() != MYSQL_TYPE_LONGLONG - || field->pack_length() != 8 - || field->real_maybe_null() - || !(field->flags & UNSIGNED_FLAG)) { - my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN, MYF(0), - field->field_name.str); - } else { - *fts_doc_col_no = i; - } - - return(true); - } - - if (!table) { - return(false); - } - - for (; i + DATA_N_SYS_COLS < (uint) table->n_cols; i++) { - const char* name = dict_table_get_col_name(table, i); - - if (strcmp(name, FTS_DOC_ID_COL_NAME) == 0) { -#ifdef UNIV_DEBUG - const dict_col_t* col; - - col = dict_table_get_nth_col(table, i); - - /* Because the FTS_DOC_ID does not exist in - the MySQL data dictionary, this must be the - internally created FTS_DOC_ID column. */ - ut_ad(col->mtype == DATA_INT); - ut_ad(col->len == 8); - ut_ad(col->prtype & DATA_NOT_NULL); - ut_ad(col->prtype & DATA_UNSIGNED); -#endif /* UNIV_DEBUG */ - *fts_doc_col_no = i; - return(true); - } - } - - return(false); -} - -/*******************************************************************//** -Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME -on the Doc ID column. -@return the status of the FTS_DOC_ID index */ -UNIV_INTERN -enum fts_doc_id_index_enum -innobase_fts_check_doc_id_index( -/*============================*/ - const dict_table_t* table, /*!< in: table definition */ - const TABLE* altered_table, /*!< in: MySQL table - that is being altered */ - ulint* fts_doc_col_no) /*!< out: The column number for - Doc ID, or ULINT_UNDEFINED - if it is being created in - ha_alter_info */ -{ - const dict_index_t* index; - const dict_field_t* field; - - if (altered_table) { - /* Check if a unique index with the name of - FTS_DOC_ID_INDEX_NAME is being created. */ - - for (uint i = 0; i < altered_table->s->keys; i++) { - const KEY& key = altered_table->key_info[i]; - - if (innobase_strcasecmp( - key.name, FTS_DOC_ID_INDEX_NAME)) { - continue; - } - - if ((key.flags & HA_NOSAME) - && key.user_defined_key_parts == 1 - && !strcmp(key.name, FTS_DOC_ID_INDEX_NAME) - && !strcmp(key.key_part[0].field->field_name.str, - FTS_DOC_ID_COL_NAME)) { - if (fts_doc_col_no) { - *fts_doc_col_no = ULINT_UNDEFINED; - } - return(FTS_EXIST_DOC_ID_INDEX); - } else { - return(FTS_INCORRECT_DOC_ID_INDEX); - } - } - } - - if (!table) { - return(FTS_NOT_EXIST_DOC_ID_INDEX); - } - - for (index = dict_table_get_first_index(table); - index; index = dict_table_get_next_index(index)) { - - /* Check if there exists a unique index with the name of - FTS_DOC_ID_INDEX_NAME */ - if (innobase_strcasecmp(index->name, FTS_DOC_ID_INDEX_NAME)) { - continue; - } - - if (!dict_index_is_unique(index) - || dict_index_get_n_unique(index) > 1 - || strcmp(index->name, FTS_DOC_ID_INDEX_NAME)) { - return(FTS_INCORRECT_DOC_ID_INDEX); - } - - /* Check whether the index has FTS_DOC_ID as its - first column */ - field = dict_index_get_nth_field(index, 0); - - /* The column would be of a BIGINT data type */ - if (strcmp(field->name, FTS_DOC_ID_COL_NAME) == 0 - && field->col->mtype == DATA_INT - && field->col->len == 8 - && field->col->prtype & DATA_NOT_NULL) { - if (fts_doc_col_no) { - *fts_doc_col_no = dict_col_get_no(field->col); - } - return(FTS_EXIST_DOC_ID_INDEX); - } else { - return(FTS_INCORRECT_DOC_ID_INDEX); - } - } - - - /* Not found */ - return(FTS_NOT_EXIST_DOC_ID_INDEX); -} -/*******************************************************************//** -Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME -on the Doc ID column in MySQL create index definition. -@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index, -FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */ -UNIV_INTERN -enum fts_doc_id_index_enum -innobase_fts_check_doc_id_index_in_def( -/*===================================*/ - ulint n_key, /*!< in: Number of keys */ - const KEY* key_info) /*!< in: Key definition */ -{ - /* Check whether there is a "FTS_DOC_ID_INDEX" in the to be built index - list */ - for (ulint j = 0; j < n_key; j++) { - const KEY* key = &key_info[j]; - - if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) { - continue; - } - - /* Do a check on FTS DOC ID_INDEX, it must be unique, - named as "FTS_DOC_ID_INDEX" and on column "FTS_DOC_ID" */ - if (!(key->flags & HA_NOSAME) - || key->user_defined_key_parts != 1 - || strcmp(key->name, FTS_DOC_ID_INDEX_NAME) - || strcmp(key->key_part[0].field->field_name.str, - FTS_DOC_ID_COL_NAME)) { - return(FTS_INCORRECT_DOC_ID_INDEX); - } - - return(FTS_EXIST_DOC_ID_INDEX); - } - - return(FTS_NOT_EXIST_DOC_ID_INDEX); -} - -/*******************************************************************//** -Create an index table where indexes are ordered as follows: - -IF a new primary key is defined for the table THEN - - 1) New primary key - 2) The remaining keys in key_info - -ELSE - - 1) All new indexes in the order they arrive from MySQL - -ENDIF - -@return key definitions */ -static MY_ATTRIBUTE((nonnull, warn_unused_result, malloc)) -index_def_t* -innobase_create_key_defs( -/*=====================*/ - mem_heap_t* heap, - /*!< in/out: memory heap where space for key - definitions are allocated */ - const Alter_inplace_info* ha_alter_info, - /*!< in: alter operation */ - const TABLE* altered_table, - /*!< in: MySQL table that is being altered */ - ulint& n_add, - /*!< in/out: number of indexes to be created */ - ulint& n_fts_add, - /*!< out: number of FTS indexes to be created */ - bool got_default_clust, - /*!< in: whether the table lacks a primary key */ - ulint& fts_doc_id_col, - /*!< in: The column number for Doc ID */ - bool& add_fts_doc_id, - /*!< in: whether we need to add new DOC ID - column for FTS index */ - bool& add_fts_doc_idx, - /*!< in: whether we need to add new DOC ID - index for FTS index */ - const TABLE* table) - /*!< in: MySQL table that is being altered */ -{ - index_def_t* indexdef; - index_def_t* indexdefs; - bool new_primary; - const uint*const add - = ha_alter_info->index_add_buffer; - const KEY*const key_info - = ha_alter_info->key_info_buffer; - - DBUG_ENTER("innobase_create_key_defs"); - DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_idx); - DBUG_ASSERT(ha_alter_info->index_add_count == n_add); - - /* If there is a primary key, it is always the first index - defined for the innodb_table. */ - - new_primary = n_add > 0 - && !my_strcasecmp(system_charset_info, - key_info[*add].name, "PRIMARY"); - n_fts_add = 0; - - /* If there is a UNIQUE INDEX consisting entirely of NOT NULL - columns and if the index does not contain column prefix(es) - (only prefix/part of the column is indexed), MySQL will treat the - index as a PRIMARY KEY unless the table already has one. */ - - ut_ad(altered_table->s->primary_key == 0 - || altered_table->s->primary_key == MAX_KEY); - - if (got_default_clust && !new_primary) { - new_primary = (altered_table->s->primary_key != MAX_KEY); - } - - const bool rebuild = new_primary || add_fts_doc_id - || innobase_need_rebuild(ha_alter_info, table); - - /* Reserve one more space if new_primary is true, and we might - need to add the FTS_DOC_ID_INDEX */ - indexdef = indexdefs = static_cast<index_def_t*>( - mem_heap_alloc( - heap, sizeof *indexdef - * (ha_alter_info->key_count - + rebuild - + got_default_clust))); - - if (rebuild) { - ulint primary_key_number; - - if (new_primary) { - if (n_add == 0) { - DBUG_ASSERT(got_default_clust); - DBUG_ASSERT(altered_table->s->primary_key - == 0); - primary_key_number = 0; - } else { - primary_key_number = *add; - } - } else if (got_default_clust) { - /* Create the GEN_CLUST_INDEX */ - index_def_t* index = indexdef++; - - index->fields = NULL; - index->n_fields = 0; - index->ind_type = DICT_CLUSTERED; - index->name = mem_heap_strdup( - heap, innobase_index_reserve_name); - index->key_number = ~0; - primary_key_number = ULINT_UNDEFINED; - goto created_clustered; - } else { - primary_key_number = 0; - } - - /* Create the PRIMARY key index definition */ - innobase_create_index_def( - altered_table, key_info, primary_key_number, - TRUE, TRUE, indexdef++, heap, (const Field **)altered_table->field); - -created_clustered: - n_add = 1; - - for (ulint i = 0; i < ha_alter_info->key_count; i++) { - if (i == primary_key_number) { - continue; - } - /* Copy the index definitions. */ - innobase_create_index_def( - altered_table, key_info, i, TRUE, FALSE, - indexdef, heap, (const Field **)altered_table->field); - - if (indexdef->ind_type & DICT_FTS) { - n_fts_add++; - } - - indexdef++; - n_add++; - } - - if (n_fts_add > 0) { - if (!add_fts_doc_id - && !innobase_fts_check_doc_id_col( - NULL, altered_table, - &fts_doc_id_col)) { - fts_doc_id_col = - altered_table->s->stored_fields; - add_fts_doc_id = true; - } - - if (!add_fts_doc_idx) { - fts_doc_id_index_enum ret; - ulint doc_col_no; - - ret = innobase_fts_check_doc_id_index( - NULL, altered_table, &doc_col_no); - - /* This should have been checked before */ - ut_ad(ret != FTS_INCORRECT_DOC_ID_INDEX); - - if (ret == FTS_NOT_EXIST_DOC_ID_INDEX) { - add_fts_doc_idx = true; - } else { - ut_ad(ret == FTS_EXIST_DOC_ID_INDEX); - ut_ad(doc_col_no == ULINT_UNDEFINED - || doc_col_no == fts_doc_id_col); - } - } - } - } else { - /* Create definitions for added secondary indexes. */ - - for (ulint i = 0; i < n_add; i++) { - innobase_create_index_def( - altered_table, key_info, add[i], FALSE, FALSE, - indexdef, heap, (const Field **)altered_table->field); - - if (indexdef->ind_type & DICT_FTS) { - n_fts_add++; - } - - indexdef++; - } - } - - DBUG_ASSERT(indexdefs + n_add == indexdef); - - if (add_fts_doc_idx) { - index_def_t* index = indexdef++; - - index->fields = static_cast<index_field_t*>( - mem_heap_alloc(heap, sizeof *index->fields)); - memset(index->fields, 0, sizeof *index->fields); - index->n_fields = 1; - index->fields->col_no = fts_doc_id_col; - index->fields->prefix_len = 0; - index->ind_type = DICT_UNIQUE; - - if (rebuild) { - index->name = mem_heap_strdup( - heap, FTS_DOC_ID_INDEX_NAME); - ut_ad(!add_fts_doc_id - || fts_doc_id_col == altered_table->s->stored_fields); - } else { - char* index_name; - index->name = index_name = static_cast<char*>( - mem_heap_alloc( - heap, - 1 + sizeof FTS_DOC_ID_INDEX_NAME)); - *index_name++ = TEMP_INDEX_PREFIX; - memcpy(index_name, FTS_DOC_ID_INDEX_NAME, - sizeof FTS_DOC_ID_INDEX_NAME); - } - - /* TODO: assign a real MySQL key number for this */ - index->key_number = ULINT_UNDEFINED; - n_add++; - } - - DBUG_ASSERT(indexdef > indexdefs); - DBUG_ASSERT((ulint) (indexdef - indexdefs) - <= ha_alter_info->key_count - + add_fts_doc_idx + got_default_clust); - DBUG_ASSERT(ha_alter_info->index_add_count <= n_add); - DBUG_RETURN(indexdefs); -} - -/*******************************************************************//** -Check each index column size, make sure they do not exceed the max limit -@return true if index column size exceeds limit */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_check_column_length( -/*=========================*/ - ulint max_col_len, /*!< in: maximum column length */ - const KEY* key_info) /*!< in: Indexes to be created */ -{ - for (ulint key_part = 0; key_part < key_info->user_defined_key_parts; key_part++) { - if (key_info->key_part[key_part].length > max_col_len) { - return(true); - } - } - return(false); -} - -struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx -{ - /** Dummy query graph */ - que_thr_t* thr; - /** reference to the prebuilt struct of the creating instance */ - row_prebuilt_t*&prebuilt; - /** InnoDB indexes being created */ - dict_index_t** add_index; - /** MySQL key numbers for the InnoDB indexes that are being created */ - const ulint* add_key_numbers; - /** number of InnoDB indexes being created */ - ulint num_to_add_index; - /** InnoDB indexes being dropped */ - dict_index_t** drop_index; - /** number of InnoDB indexes being dropped */ - const ulint num_to_drop_index; - /** InnoDB foreign key constraints being dropped */ - dict_foreign_t** drop_fk; - /** number of InnoDB foreign key constraints being dropped */ - const ulint num_to_drop_fk; - /** InnoDB foreign key constraints being added */ - dict_foreign_t** add_fk; - /** number of InnoDB foreign key constraints being dropped */ - const ulint num_to_add_fk; - /** whether to create the indexes online */ - bool online; - /** memory heap */ - mem_heap_t* heap; - /** dictionary transaction */ - trx_t* trx; - /** original table (if rebuilt, differs from indexed_table) */ - dict_table_t* old_table; - /** table where the indexes are being created or dropped */ - dict_table_t* new_table; - /** mapping of old column numbers to new ones, or NULL */ - const ulint* col_map; - /** new column names, or NULL if nothing was renamed */ - const char** col_names; - /** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */ - const ulint add_autoinc; - /** default values of ADD COLUMN, or NULL */ - const dtuple_t* add_cols; - /** autoinc sequence to use */ - ib_sequence_t sequence; - /** maximum auto-increment value */ - ulonglong max_autoinc; - /** temporary table name to use for old table when renaming tables */ - const char* tmp_name; - - ha_innobase_inplace_ctx(row_prebuilt_t*& prebuilt_arg, - dict_index_t** drop_arg, - ulint num_to_drop_arg, - dict_foreign_t** drop_fk_arg, - ulint num_to_drop_fk_arg, - dict_foreign_t** add_fk_arg, - ulint num_to_add_fk_arg, - bool online_arg, - mem_heap_t* heap_arg, - dict_table_t* new_table_arg, - const char** col_names_arg, - ulint add_autoinc_arg, - ulonglong autoinc_col_min_value_arg, - ulonglong autoinc_col_max_value_arg) : - inplace_alter_handler_ctx(), - prebuilt (prebuilt_arg), - add_index (0), add_key_numbers (0), num_to_add_index (0), - drop_index (drop_arg), num_to_drop_index (num_to_drop_arg), - drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg), - add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg), - online (online_arg), heap (heap_arg), trx (0), - old_table (prebuilt_arg->table), - new_table (new_table_arg), - col_map (0), col_names (col_names_arg), - add_autoinc (add_autoinc_arg), - add_cols (0), - sequence(prebuilt->trx->mysql_thd, - autoinc_col_min_value_arg, autoinc_col_max_value_arg), - max_autoinc (0), - tmp_name (0) - { -#ifdef UNIV_DEBUG - for (ulint i = 0; i < num_to_add_index; i++) { - ut_ad(!add_index[i]->to_be_dropped); - } - for (ulint i = 0; i < num_to_drop_index; i++) { - ut_ad(drop_index[i]->to_be_dropped); - } -#endif /* UNIV_DEBUG */ - - thr = pars_complete_graph_for_exec(NULL, prebuilt->trx, heap); - } - - ~ha_innobase_inplace_ctx() - { - mem_heap_free(heap); - } - - /** Determine if the table will be rebuilt. - @return whether the table will be rebuilt */ - bool need_rebuild () const { return(old_table != new_table); } - -private: - // Disable copying - ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&); - ha_innobase_inplace_ctx& operator=(const ha_innobase_inplace_ctx&); -}; - -/********************************************************************//** -Drop any indexes that we were not able to free previously due to -open table handles. */ -static -void -online_retry_drop_indexes_low( -/*==========================*/ - dict_table_t* table, /*!< in/out: table */ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); - - /* We can have table->n_ref_count > 1, because other threads - may have prebuilt->table pointing to the table. However, these - other threads should be between statements, waiting for the - next statement to execute, or for a meta-data lock. */ - ut_ad(table->n_ref_count >= 1); - - if (table->drop_aborted) { - row_merge_drop_indexes(trx, table, TRUE); - } -} - -/********************************************************************//** -Drop any indexes that we were not able to free previously due to -open table handles. */ -static MY_ATTRIBUTE((nonnull)) -void -online_retry_drop_indexes( -/*======================*/ - dict_table_t* table, /*!< in/out: table */ - THD* user_thd) /*!< in/out: MySQL connection */ -{ - if (table->drop_aborted) { - trx_t* trx = innobase_trx_allocate(user_thd); - - trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); - - row_mysql_lock_data_dictionary(trx); - online_retry_drop_indexes_low(table, trx); - trx_commit_for_mysql(trx); - row_mysql_unlock_data_dictionary(trx); - trx_free_for_mysql(trx); - } - -#ifdef UNIV_DEBUG - mutex_enter(&dict_sys->mutex); - dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE); - mutex_exit(&dict_sys->mutex); - ut_a(!table->drop_aborted); -#endif /* UNIV_DEBUG */ -} - -/********************************************************************//** -Commit a dictionary transaction and drop any indexes that we were not -able to free previously due to open table handles. */ -static MY_ATTRIBUTE((nonnull)) -void -online_retry_drop_indexes_with_trx( -/*===============================*/ - dict_table_t* table, /*!< in/out: table */ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - - /* Now that the dictionary is being locked, check if we can - drop any incompletely created indexes that may have been left - behind in rollback_inplace_alter_table() earlier. */ - if (table->drop_aborted) { - - trx->table_id = 0; - - trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); - - online_retry_drop_indexes_low(table, trx); - trx_commit_for_mysql(trx); - } -} - -/** Determines if InnoDB is dropping a foreign key constraint. -@param foreign the constraint -@param drop_fk constraints being dropped -@param n_drop_fk number of constraints that are being dropped -@return whether the constraint is being dropped */ -MY_ATTRIBUTE((pure, nonnull(1), warn_unused_result)) -inline -bool -innobase_dropping_foreign( - const dict_foreign_t* foreign, - dict_foreign_t** drop_fk, - ulint n_drop_fk) -{ - while (n_drop_fk--) { - if (*drop_fk++ == foreign) { - return(true); - } - } - - return(false); -} - -/** Determines if an InnoDB FOREIGN KEY constraint depends on a -column that is being dropped or modified to NOT NULL. -@param user_table InnoDB table as it is before the ALTER operation -@param col_name Name of the column being altered -@param drop_fk constraints being dropped -@param n_drop_fk number of constraints that are being dropped -@param drop true=drop column, false=set NOT NULL -@retval true Not allowed (will call my_error()) -@retval false Allowed -*/ -MY_ATTRIBUTE((pure, nonnull(1,4), warn_unused_result)) -static -bool -innobase_check_foreigns_low( - const dict_table_t* user_table, - dict_foreign_t** drop_fk, - ulint n_drop_fk, - const char* col_name, - bool drop) -{ - dict_foreign_t* foreign; - ut_ad(mutex_own(&dict_sys->mutex)); - - /* Check if any FOREIGN KEY constraints are defined on this - column. */ - - for (dict_foreign_set::const_iterator it = user_table->foreign_set.begin(); - it != user_table->foreign_set.end(); - ++it) { - - foreign = *it; - - if (!drop && !(foreign->type - & (DICT_FOREIGN_ON_DELETE_SET_NULL - | DICT_FOREIGN_ON_UPDATE_SET_NULL))) { - continue; - } - - if (innobase_dropping_foreign(foreign, drop_fk, n_drop_fk)) { - continue; - } - - for (unsigned f = 0; f < foreign->n_fields; f++) { - if (!strcmp(foreign->foreign_col_names[f], - col_name)) { - my_error(drop - ? ER_FK_COLUMN_CANNOT_DROP - : ER_FK_COLUMN_NOT_NULL, MYF(0), - col_name, foreign->id); - return(true); - } - } - } - - if (!drop) { - /* SET NULL clauses on foreign key constraints of - child tables affect the child tables, not the parent table. - The column can be NOT NULL in the parent table. */ - return(false); - } - - /* Check if any FOREIGN KEY constraints in other tables are - referring to the column that is being dropped. */ - for (dict_foreign_set::const_iterator it - = user_table->referenced_set.begin(); - it != user_table->referenced_set.end(); - ++it) { - - foreign = *it; - - if (innobase_dropping_foreign(foreign, drop_fk, n_drop_fk)) { - continue; - } - - for (unsigned f = 0; f < foreign->n_fields; f++) { - char display_name[FN_REFLEN]; - - if (strcmp(foreign->referenced_col_names[f], - col_name)) { - continue; - } - - char* buf_end = innobase_convert_name( - display_name, (sizeof display_name) - 1, - foreign->foreign_table_name, - strlen(foreign->foreign_table_name), - NULL, TRUE); - *buf_end = '\0'; - my_error(ER_FK_COLUMN_CANNOT_DROP_CHILD, - MYF(0), col_name, foreign->id, - display_name); - - return(true); - } - } - - return(false); -} - -/** Determines if an InnoDB FOREIGN KEY constraint depends on a -column that is being dropped or modified to NOT NULL. -@param ha_alter_info Data used during in-place alter -@param altered_table MySQL table that is being altered -@param old_table MySQL table as it is before the ALTER operation -@param user_table InnoDB table as it is before the ALTER operation -@param drop_fk constraints being dropped -@param n_drop_fk number of constraints that are being dropped -@retval true Not allowed (will call my_error()) -@retval false Allowed -*/ -MY_ATTRIBUTE((pure, nonnull(1,2,3,4), warn_unused_result)) -static -bool -innobase_check_foreigns( - Alter_inplace_info* ha_alter_info, - const TABLE* altered_table, - const TABLE* old_table, - const dict_table_t* user_table, - dict_foreign_t** drop_fk, - ulint n_drop_fk) -{ - List_iterator_fast<Create_field> cf_it( - ha_alter_info->alter_info->create_list); - - for (Field** fp = old_table->field; *fp; fp++) { - cf_it.rewind(); - const Create_field* new_field; - - ut_ad(!(*fp)->real_maybe_null() - == !!((*fp)->flags & NOT_NULL_FLAG)); - - while ((new_field = cf_it++)) { - if (new_field->field == *fp) { - break; - } - } - - if (!new_field || (new_field->flags & NOT_NULL_FLAG)) { - if (innobase_check_foreigns_low( - user_table, drop_fk, n_drop_fk, - (*fp)->field_name.str, !new_field)) { - return(true); - } - } - } - - return(false); -} - -/** Convert a default value for ADD COLUMN. - -@param heap Memory heap where allocated -@param dfield InnoDB data field to copy to -@param field MySQL value for the column -@param comp nonzero if in compact format */ -static MY_ATTRIBUTE((nonnull)) -void -innobase_build_col_map_add( -/*=======================*/ - mem_heap_t* heap, - dfield_t* dfield, - const Field* field, - ulint comp, - row_prebuilt_t* prebuilt) -{ - if (field->is_real_null()) { - dfield_set_null(dfield); - return; - } - - ulint size = field->pack_length(); - - byte* buf = static_cast<byte*>(mem_heap_alloc(heap, size)); - - row_mysql_store_col_in_innobase_format( - dfield, buf, TRUE, field->ptr, size, comp); -} - -/** Construct the translation table for reordering, dropping or -adding columns. - -@param ha_alter_info Data used during in-place alter -@param altered_table MySQL table that is being altered -@param table MySQL table as it is before the ALTER operation -@param new_table InnoDB table corresponding to MySQL altered_table -@param old_table InnoDB table corresponding to MYSQL table -@param add_cols Default values for ADD COLUMN, or NULL if no ADD COLUMN -@param heap Memory heap where allocated -@return array of integers, mapping column numbers in the table -to column numbers in altered_table */ -static MY_ATTRIBUTE((nonnull(1,2,3,4,5,7), warn_unused_result)) -const ulint* -innobase_build_col_map( -/*===================*/ - Alter_inplace_info* ha_alter_info, - const TABLE* altered_table, - const TABLE* table, - const dict_table_t* new_table, - const dict_table_t* old_table, - dtuple_t* add_cols, - mem_heap_t* heap, - row_prebuilt_t* prebuilt) -{ - uint old_i, old_innobase_i; - DBUG_ENTER("innobase_build_col_map"); - DBUG_ASSERT(altered_table != table); - DBUG_ASSERT(new_table != old_table); - DBUG_ASSERT(dict_table_get_n_cols(new_table) - >= altered_table->s->stored_fields + DATA_N_SYS_COLS); - DBUG_ASSERT(dict_table_get_n_cols(old_table) - >= table->s->stored_fields + DATA_N_SYS_COLS); - DBUG_ASSERT(!!add_cols == !!(ha_alter_info->handler_flags - & Alter_inplace_info::ADD_COLUMN)); - DBUG_ASSERT(!add_cols || dtuple_get_n_fields(add_cols) - == dict_table_get_n_cols(new_table)); - - ulint* col_map = static_cast<ulint*>( - mem_heap_alloc(heap, old_table->n_cols * sizeof *col_map)); - - List_iterator_fast<Create_field> cf_it( - ha_alter_info->alter_info->create_list); - uint i = 0, sql_idx = 0; - - /* Any dropped columns will map to ULINT_UNDEFINED. */ - for (old_innobase_i = 0; - old_innobase_i + DATA_N_SYS_COLS < old_table->n_cols; - old_innobase_i++) { - col_map[old_innobase_i] = ULINT_UNDEFINED; - } - - while (const Create_field* new_field = cf_it++) { - if (!new_field->stored_in_db()) - { - sql_idx++; - continue; - } - for (old_i = 0, old_innobase_i= 0; - table->field[old_i]; - old_i++) { - const Field* field = table->field[old_i]; - if (!table->field[old_i]->stored_in_db()) - continue; - if (new_field->field == field) { - col_map[old_innobase_i] = i; - goto found_col; - } - old_innobase_i++; - } - - innobase_build_col_map_add( - heap, dtuple_get_nth_field(add_cols, i), - altered_table->field[sql_idx], - dict_table_is_comp(new_table), prebuilt); -found_col: - i++; - sql_idx++; - } - - DBUG_ASSERT(i == altered_table->s->stored_fields); - - i = table->s->stored_fields; - - /* Add the InnoDB hidden FTS_DOC_ID column, if any. */ - if (i + DATA_N_SYS_COLS < old_table->n_cols) { - /* There should be exactly one extra field, - the FTS_DOC_ID. */ - DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(old_table, - DICT_TF2_FTS_HAS_DOC_ID)); - DBUG_ASSERT(i + DATA_N_SYS_COLS + 1 == old_table->n_cols); - DBUG_ASSERT(!strcmp(dict_table_get_col_name( - old_table, table->s->stored_fields), - FTS_DOC_ID_COL_NAME)); - if (altered_table->s->stored_fields + DATA_N_SYS_COLS - < new_table->n_cols) { - DBUG_ASSERT(DICT_TF2_FLAG_IS_SET( - new_table, - DICT_TF2_FTS_HAS_DOC_ID)); - DBUG_ASSERT(altered_table->s->stored_fields - + DATA_N_SYS_COLS + 1 - == new_table->n_cols); - col_map[i] = altered_table->s->stored_fields; - } else { - DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET( - new_table, - DICT_TF2_FTS_HAS_DOC_ID)); - col_map[i] = ULINT_UNDEFINED; - } - - i++; - } else { - DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET( - old_table, - DICT_TF2_FTS_HAS_DOC_ID)); - } - - for (; i < old_table->n_cols; i++) { - col_map[i] = i + new_table->n_cols - old_table->n_cols; - } - - DBUG_RETURN(col_map); -} - -/** Drop newly create FTS index related auxiliary table during -FIC create index process, before fts_add_index is called -@param table table that was being rebuilt online -@param trx transaction -@return DB_SUCCESS if successful, otherwise last error code -*/ -static -dberr_t -innobase_drop_fts_index_table( -/*==========================*/ - dict_table_t* table, - trx_t* trx) -{ - dberr_t ret_err = DB_SUCCESS; - - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - if (index->type & DICT_FTS) { - dberr_t err; - - err = fts_drop_index_tables(trx, index); - - if (err != DB_SUCCESS) { - ret_err = err; - } - } - } - - return(ret_err); -} - -/** Get the new column names if any columns were renamed -@param ha_alter_info Data used during in-place alter -@param altered_table MySQL table that is being altered -@param table MySQL table as it is before the ALTER operation -@param user_table InnoDB table as it is before the ALTER operation -@param heap Memory heap for the allocation -@return array of new column names in rebuilt_table, or NULL if not renamed */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -const char** -innobase_get_col_names( - Alter_inplace_info* ha_alter_info, - const TABLE* altered_table, - const TABLE* table, - const dict_table_t* user_table, - mem_heap_t* heap) -{ - const char** cols; - uint i; - - DBUG_ENTER("innobase_get_col_names"); - DBUG_ASSERT(user_table->n_def > table->s->fields); - DBUG_ASSERT(ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME); - - cols = static_cast<const char**>( - mem_heap_zalloc(heap, user_table->n_def * sizeof *cols)); - - i = 0; - List_iterator_fast<Create_field> cf_it( - ha_alter_info->alter_info->create_list); - while (const Create_field* new_field = cf_it++) { - DBUG_ASSERT(i < altered_table->s->fields); - - for (uint old_i = 0; table->field[old_i]; old_i++) { - if (new_field->field == table->field[old_i]) { - cols[old_i] = new_field->field_name.str; - break; - } - } - - i++; - } - - /* Copy the internal column names. */ - i = table->s->fields; - cols[i] = dict_table_get_col_name(user_table, i); - - while (++i < user_table->n_def) { - cols[i] = cols[i - 1] + strlen(cols[i - 1]) + 1; - } - - DBUG_RETURN(cols); -} - -/** Update internal structures with concurrent writes blocked, -while preparing ALTER TABLE. - -@param ha_alter_info Data used during in-place alter -@param altered_table MySQL table that is being altered -@param old_table MySQL table as it is before the ALTER operation -@param table_name Table name in MySQL -@param flags Table and tablespace flags -@param flags2 Additional table flags -@param fts_doc_id_col The column number of FTS_DOC_ID -@param add_fts_doc_id Flag: add column FTS_DOC_ID? -@param add_fts_doc_id_idx Flag: add index FTS_DOC_ID_INDEX (FTS_DOC_ID)? - -@retval true Failure -@retval false Success -*/ -static MY_ATTRIBUTE((warn_unused_result, nonnull(1,2,3,4))) -bool -prepare_inplace_alter_table_dict( -/*=============================*/ - Alter_inplace_info* ha_alter_info, - const TABLE* altered_table, - const TABLE* old_table, - const char* table_name, - ulint flags, - ulint flags2, - ulint fts_doc_id_col, - bool add_fts_doc_id, - bool add_fts_doc_id_idx, - row_prebuilt_t* prebuilt) -{ - bool dict_locked = false; - ulint* add_key_nums; /* MySQL key numbers */ - index_def_t* index_defs; /* index definitions */ - dict_table_t* user_table; - dict_index_t* fts_index = NULL; - ulint new_clustered = 0; - dberr_t error; - ulint num_fts_index; - ha_innobase_inplace_ctx*ctx; - uint sql_idx; - - DBUG_ENTER("prepare_inplace_alter_table_dict"); - - ctx = static_cast<ha_innobase_inplace_ctx*> - (ha_alter_info->handler_ctx); - - DBUG_ASSERT((ctx->add_autoinc != ULINT_UNDEFINED) - == (ctx->sequence.m_max_value > 0)); - DBUG_ASSERT(!ctx->num_to_drop_index == !ctx->drop_index); - DBUG_ASSERT(!ctx->num_to_drop_fk == !ctx->drop_fk); - DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_id_idx); - DBUG_ASSERT(!add_fts_doc_id_idx - || innobase_fulltext_exist(altered_table)); - DBUG_ASSERT(!ctx->add_cols); - DBUG_ASSERT(!ctx->add_index); - DBUG_ASSERT(!ctx->add_key_numbers); - DBUG_ASSERT(!ctx->num_to_add_index); - - user_table = ctx->new_table; - - trx_start_if_not_started_xa(ctx->prebuilt->trx); - - /* Create a background transaction for the operations on - the data dictionary tables. */ - ctx->trx = innobase_trx_allocate(ctx->prebuilt->trx->mysql_thd); - trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX); - - DBUG_ASSERT(!ctx->trx->fake_changes); - - /* Create table containing all indexes to be built in this - ALTER TABLE ADD INDEX so that they are in the correct order - in the table. */ - - ctx->num_to_add_index = ha_alter_info->index_add_count; - - ut_ad(ctx->prebuilt->trx->mysql_thd != NULL); - const char* path = thd_innodb_tmpdir( - ctx->prebuilt->trx->mysql_thd); - - index_defs = innobase_create_key_defs( - ctx->heap, ha_alter_info, altered_table, ctx->num_to_add_index, - num_fts_index, - row_table_got_default_clust_index(ctx->new_table), - fts_doc_id_col, add_fts_doc_id, add_fts_doc_id_idx, - old_table); - - new_clustered = DICT_CLUSTERED & index_defs[0].ind_type; - - if (num_fts_index > 1) { - my_error(ER_INNODB_FT_LIMIT, MYF(0)); - goto error_handled; - } - - if (!ctx->online) { - /* This is not an online operation (LOCK=NONE). */ - } else if (ctx->add_autoinc == ULINT_UNDEFINED - && num_fts_index == 0 - && (!innobase_need_rebuild(ha_alter_info, old_table) - || !innobase_fulltext_exist(altered_table))) { - /* InnoDB can perform an online operation (LOCK=NONE). */ - } else { - /* This should have been blocked in - check_if_supported_inplace_alter(). */ - ut_ad(0); - my_error(ER_NOT_SUPPORTED_YET, MYF(0), - thd_query_string(ctx->prebuilt->trx->mysql_thd)->str); - goto error_handled; - } - - /* The primary index would be rebuilt if a FTS Doc ID - column is to be added, and the primary index definition - is just copied from old table and stored in indexdefs[0] */ - DBUG_ASSERT(!add_fts_doc_id || new_clustered); - DBUG_ASSERT(!!new_clustered == - (innobase_need_rebuild(ha_alter_info, old_table) - || add_fts_doc_id)); - - /* Allocate memory for dictionary index definitions */ - - ctx->add_index = static_cast<dict_index_t**>( - mem_heap_alloc(ctx->heap, ctx->num_to_add_index - * sizeof *ctx->add_index)); - ctx->add_key_numbers = add_key_nums = static_cast<ulint*>( - mem_heap_alloc(ctx->heap, ctx->num_to_add_index - * sizeof *ctx->add_key_numbers)); - - /* This transaction should be dictionary operation, so that - the data dictionary will be locked during crash recovery. */ - - ut_ad(ctx->trx->dict_operation == TRX_DICT_OP_INDEX); - - /* Acquire a lock on the table before creating any indexes. */ - - if (ctx->online) { - error = DB_SUCCESS; - } else { - error = row_merge_lock_table( - ctx->prebuilt->trx, ctx->new_table, LOCK_S); - - if (error != DB_SUCCESS) { - - goto error_handling; - } - } - - /* Latch the InnoDB data dictionary exclusively so that no deadlocks - or lock waits can happen in it during an index create operation. */ - - row_mysql_lock_data_dictionary(ctx->trx); - dict_locked = true; - - /* Wait for background stats processing to stop using the table that - we are going to alter. We know bg stats will not start using it again - until we are holding the data dict locked and we are holding it here - at least until checking ut_ad(user_table->n_ref_count == 1) below. - XXX what may happen if bg stats opens the table after we - have unlocked data dictionary below? */ - dict_stats_wait_bg_to_stop_using_table(user_table, ctx->trx); - - online_retry_drop_indexes_low(ctx->new_table, ctx->trx); - - ut_d(dict_table_check_for_dup_indexes( - ctx->new_table, CHECK_ABORTED_OK)); - - /* If a new clustered index is defined for the table we need - to rebuild the table with a temporary name. */ - - if (new_clustered) { - fil_space_crypt_t* crypt_data; - const char* new_table_name - = dict_mem_create_temporary_tablename( - ctx->heap, - ctx->new_table->name, - ctx->new_table->id); - ulint n_cols; - dtuple_t* add_cols; - ulint key_id = FIL_DEFAULT_ENCRYPTION_KEY; - fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT; - - fil_space_t* space = fil_space_acquire(ctx->prebuilt->table->space); - crypt_data = space->crypt_data; - fil_space_release(space); - - if (crypt_data) { - key_id = crypt_data->key_id; - mode = crypt_data->encryption; - } - - if (innobase_check_foreigns( - ha_alter_info, altered_table, old_table, - user_table, ctx->drop_fk, ctx->num_to_drop_fk)) { - goto new_clustered_failed; - } - - n_cols = altered_table->s->stored_fields; - - if (add_fts_doc_id) { - n_cols++; - DBUG_ASSERT(flags2 & DICT_TF2_FTS); - DBUG_ASSERT(add_fts_doc_id_idx); - flags2 |= DICT_TF2_FTS_ADD_DOC_ID - | DICT_TF2_FTS_HAS_DOC_ID - | DICT_TF2_FTS; - } - - DBUG_ASSERT(!add_fts_doc_id_idx || (flags2 & DICT_TF2_FTS)); - - /* Create the table. */ - trx_set_dict_operation(ctx->trx, TRX_DICT_OP_TABLE); - - if (dict_table_get_low(new_table_name)) { - my_error(ER_TABLE_EXISTS_ERROR, MYF(0), - new_table_name); - goto new_clustered_failed; - } - - /* The initial space id 0 may be overridden later. */ - ctx->new_table = dict_mem_table_create( - new_table_name, 0, n_cols, flags, flags2); - /* The rebuilt indexed_table will use the renamed - column names. */ - ctx->col_names = NULL; - - if (DICT_TF_HAS_DATA_DIR(flags)) { - ctx->new_table->data_dir_path = - mem_heap_strdup(ctx->new_table->heap, - user_table->data_dir_path); - } - - sql_idx= 0; - for (uint i = 0; i < altered_table->s->stored_fields; i++, sql_idx++) { - const Field* field; - while (!((field= altered_table->field[sql_idx])-> - stored_in_db())) - sql_idx++; - ulint is_unsigned; - ulint field_type - = (ulint) field->type(); - ulint col_type - = get_innobase_type_from_mysql_type( - &is_unsigned, field); - ulint charset_no; - ulint col_len; - - /* we assume in dtype_form_prtype() that this - fits in two bytes */ - ut_a(field_type <= MAX_CHAR_COLL_NUM); - - if (!field->real_maybe_null()) { - field_type |= DATA_NOT_NULL; - } - - if (field->binary()) { - field_type |= DATA_BINARY_TYPE; - } - - if (is_unsigned) { - field_type |= DATA_UNSIGNED; - } - - if (dtype_is_string_type(col_type)) { - charset_no = (ulint) field->charset()->number; - - if (charset_no > MAX_CHAR_COLL_NUM) { - dict_mem_table_free( - ctx->new_table); - my_error(ER_WRONG_KEY_COLUMN, MYF(0), - field->field_name.str); - goto new_clustered_failed; - } - } else { - charset_no = 0; - } - - col_len = field->pack_length(); - - /* The MySQL pack length contains 1 or 2 bytes - length field for a true VARCHAR. Let us - subtract that, so that the InnoDB column - length in the InnoDB data dictionary is the - real maximum byte length of the actual data. */ - - if (field->type() == MYSQL_TYPE_VARCHAR) { - uint32 length_bytes - = static_cast<const Field_varstring*>( - field)->length_bytes; - - col_len -= length_bytes; - - if (length_bytes == 2) { - field_type |= DATA_LONG_TRUE_VARCHAR; - } - } - - if (dict_col_name_is_reserved(field->field_name.str)) { - dict_mem_table_free(ctx->new_table); - my_error(ER_WRONG_COLUMN_NAME, MYF(0), - field->field_name.str); - goto new_clustered_failed; - } - - dict_mem_table_add_col( - ctx->new_table, ctx->heap, - field->field_name.str, - col_type, - dtype_form_prtype(field_type, charset_no), - col_len); - } - - if (add_fts_doc_id) { - fts_add_doc_id_column(ctx->new_table, ctx->heap); - ctx->new_table->fts->doc_col = fts_doc_id_col; - ut_ad(fts_doc_id_col == altered_table->s->stored_fields); - } else if (ctx->new_table->fts) { - ctx->new_table->fts->doc_col = fts_doc_id_col; - } - - error = row_create_table_for_mysql( - ctx->new_table, ctx->trx, false, mode, key_id); - - switch (error) { - dict_table_t* temp_table; - case DB_SUCCESS: - /* We need to bump up the table ref count and - before we can use it we need to open the - table. The new_table must be in the data - dictionary cache, because we are still holding - the dict_sys->mutex. */ - ut_ad(mutex_own(&dict_sys->mutex)); - temp_table = dict_table_open_on_name( - ctx->new_table->name, TRUE, FALSE, - DICT_ERR_IGNORE_NONE); - ut_a(ctx->new_table == temp_table); - /* n_ref_count must be 1, because purge cannot - be executing on this very table as we are - holding dict_operation_lock X-latch. */ - DBUG_ASSERT(ctx->new_table->n_ref_count == 1); - break; - case DB_TABLESPACE_EXISTS: - my_error(ER_TABLESPACE_EXISTS, MYF(0), - new_table_name); - goto new_clustered_failed; - case DB_DUPLICATE_KEY: - my_error(HA_ERR_TABLE_EXIST, MYF(0), - altered_table->s->table_name.str); - goto new_clustered_failed; - default: - my_error_innodb(error, table_name, flags); - new_clustered_failed: - DBUG_ASSERT(ctx->trx != ctx->prebuilt->trx); - trx_rollback_to_savepoint(ctx->trx, NULL); - - ut_ad(user_table->n_ref_count == 1); - - online_retry_drop_indexes_with_trx( - user_table, ctx->trx); - goto err_exit; - } - - if (ha_alter_info->handler_flags - & Alter_inplace_info::ADD_COLUMN) { - add_cols = dtuple_create( - ctx->heap, - dict_table_get_n_cols(ctx->new_table)); - - dict_table_copy_types(add_cols, ctx->new_table); - } else { - add_cols = NULL; - } - - ctx->col_map = innobase_build_col_map( - ha_alter_info, altered_table, old_table, - ctx->new_table, user_table, - add_cols, ctx->heap, prebuilt); - ctx->add_cols = add_cols; - } else { - DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info, old_table)); - DBUG_ASSERT(old_table->s->primary_key - == altered_table->s->primary_key); - - if (!ctx->new_table->fts - && innobase_fulltext_exist(altered_table)) { - ctx->new_table->fts = fts_create( - ctx->new_table); - ctx->new_table->fts->doc_col = fts_doc_id_col; - } - } - - /* Assign table_id, so that no table id of - fts_create_index_tables() will be written to the undo logs. */ - DBUG_ASSERT(ctx->new_table->id != 0); - ctx->trx->table_id = ctx->new_table->id; - - /* Create the indexes in SYS_INDEXES and load into dictionary. */ - - for (ulint a = 0; a < ctx->num_to_add_index; a++) { - - ctx->add_index[a] = row_merge_create_index( - ctx->trx, ctx->new_table, - &index_defs[a], ctx->col_names); - - add_key_nums[a] = index_defs[a].key_number; - - if (!ctx->add_index[a]) { - error = ctx->trx->error_state; - DBUG_ASSERT(error != DB_SUCCESS); - goto error_handling; - } - - if (ctx->add_index[a]->type & DICT_FTS) { - DBUG_ASSERT(num_fts_index); - DBUG_ASSERT(!fts_index); - DBUG_ASSERT(ctx->add_index[a]->type == DICT_FTS); - fts_index = ctx->add_index[a]; - } - - /* If only online ALTER TABLE operations have been - requested, allocate a modification log. If the table - will be locked anyway, the modification - log is unnecessary. When rebuilding the table - (new_clustered), we will allocate the log for the - clustered index of the old table, later. */ - if (new_clustered - || !ctx->online - || !user_table->is_readable() - || dict_table_is_discarded(user_table)) { - /* No need to allocate a modification log. */ - ut_ad(!ctx->add_index[a]->online_log); - } else if (ctx->add_index[a]->type & DICT_FTS) { - /* Fulltext indexes are not covered - by a modification log. */ - } else { - DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter", - error = DB_OUT_OF_MEMORY; - goto error_handling;); - rw_lock_x_lock(&ctx->add_index[a]->lock); - - bool ok = row_log_allocate(ctx->add_index[a], - NULL, true, NULL, - NULL, path); - rw_lock_x_unlock(&ctx->add_index[a]->lock); - - if (!ok) { - error = DB_OUT_OF_MEMORY; - goto error_handling; - } - } - } - - ut_ad(new_clustered == ctx->need_rebuild()); - - DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter", - error = DB_OUT_OF_MEMORY; - goto error_handling;); - - if (new_clustered && ctx->online) { - /* Allocate a log for online table rebuild. */ - dict_index_t* clust_index = dict_table_get_first_index( - user_table); - - rw_lock_x_lock(&clust_index->lock); - bool ok = row_log_allocate( - clust_index, ctx->new_table, - !(ha_alter_info->handler_flags - & Alter_inplace_info::ADD_PK_INDEX), - ctx->add_cols, ctx->col_map, path); - rw_lock_x_unlock(&clust_index->lock); - - if (!ok) { - error = DB_OUT_OF_MEMORY; - goto error_handling; - } - } - - if (ctx->online) { - /* Assign a consistent read view for - row_merge_read_clustered_index(). */ - trx_assign_read_view(ctx->prebuilt->trx); - } - - if (fts_index) { - /* Ensure that the dictionary operation mode will - not change while creating the auxiliary tables. */ - trx_dict_op_t op = trx_get_dict_operation(ctx->trx); - -#ifdef UNIV_DEBUG - switch (op) { - case TRX_DICT_OP_NONE: - break; - case TRX_DICT_OP_TABLE: - case TRX_DICT_OP_INDEX: - goto op_ok; - } - ut_error; -op_ok: -#endif /* UNIV_DEBUG */ - ut_ad(ctx->trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(mutex_own(&dict_sys->mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - DICT_TF2_FLAG_SET(ctx->new_table, DICT_TF2_FTS); - - /* This function will commit the transaction and reset - the trx_t::dict_operation flag on success. */ - - error = fts_create_index_tables(ctx->trx, fts_index); - - DBUG_EXECUTE_IF("innodb_test_fail_after_fts_index_table", - error = DB_LOCK_WAIT_TIMEOUT; - goto error_handling;); - - if (error != DB_SUCCESS) { - goto error_handling; - } - - trx_start_for_ddl(ctx->trx, op); - - if (!ctx->new_table->fts - || ib_vector_size(ctx->new_table->fts->indexes) == 0) { - error = fts_create_common_tables( - ctx->trx, ctx->new_table, - user_table->name, TRUE); - - DBUG_EXECUTE_IF( - "innodb_test_fail_after_fts_common_table", - error = DB_LOCK_WAIT_TIMEOUT;); - - if (error != DB_SUCCESS) { - goto error_handling; - } - - ctx->new_table->fts->fts_status - |= TABLE_DICT_LOCKED; - - error = innobase_fts_load_stopword( - ctx->new_table, ctx->trx, - ctx->prebuilt->trx->mysql_thd) - ? DB_SUCCESS : DB_ERROR; - ctx->new_table->fts->fts_status - &= ~TABLE_DICT_LOCKED; - - if (error != DB_SUCCESS) { - goto error_handling; - } - } - - ut_ad(trx_get_dict_operation(ctx->trx) == op); - } - - DBUG_ASSERT(error == DB_SUCCESS); - - /* Commit the data dictionary transaction in order to release - the table locks on the system tables. This means that if - MySQL crashes while creating a new primary key inside - row_merge_build_indexes(), ctx->new_table will not be dropped - by trx_rollback_active(). It will have to be recovered or - dropped by the database administrator. */ - trx_commit_for_mysql(ctx->trx); - - row_mysql_unlock_data_dictionary(ctx->trx); - dict_locked = false; - - ut_a(ctx->trx->lock.n_active_thrs == 0); - - DBUG_EXECUTE_IF("crash_innodb_add_index_after", DBUG_SUICIDE();); - -error_handling: - /* After an error, remove all those index definitions from the - dictionary which were defined. */ - - switch (error) { - case DB_SUCCESS: - ut_a(!dict_locked); - - ut_d(mutex_enter(&dict_sys->mutex)); - ut_d(dict_table_check_for_dup_indexes( - user_table, CHECK_PARTIAL_OK)); - ut_d(mutex_exit(&dict_sys->mutex)); - DBUG_RETURN(false); - case DB_TABLESPACE_EXISTS: - my_error(ER_TABLESPACE_EXISTS, MYF(0), "(unknown)"); - break; - case DB_DUPLICATE_KEY: - my_error(ER_DUP_KEY, MYF(0), "SYS_INDEXES"); - break; - case DB_OUT_OF_FILE_SPACE: - my_error_innodb(error, table_name, user_table->flags); - break; - default: - my_error_innodb(error, table_name, user_table->flags); - } - -error_handled: - - ctx->prebuilt->trx->error_info = NULL; - ctx->trx->error_state = DB_SUCCESS; - - if (!dict_locked) { - row_mysql_lock_data_dictionary(ctx->trx); - } - - if (new_clustered) { - if (ctx->need_rebuild()) { - - if (DICT_TF2_FLAG_IS_SET( - ctx->new_table, DICT_TF2_FTS)) { - innobase_drop_fts_index_table( - ctx->new_table, ctx->trx); - } - - dict_table_close(ctx->new_table, TRUE, FALSE); - -#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG - /* Nobody should have initialized the stats of the - newly created table yet. When this is the case, we - know that it has not been added for background stats - gathering. */ - ut_a(!ctx->new_table->stat_initialized); -#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */ - - row_merge_drop_table(ctx->trx, ctx->new_table); - - /* Free the log for online table rebuild, if - one was allocated. */ - - dict_index_t* clust_index = dict_table_get_first_index( - user_table); - - rw_lock_x_lock(&clust_index->lock); - - if (clust_index->online_log) { - ut_ad(ctx->online); - row_log_abort_sec(clust_index); - clust_index->online_status - = ONLINE_INDEX_COMPLETE; - } - - rw_lock_x_unlock(&clust_index->lock); - } - - trx_commit_for_mysql(ctx->trx); - /* n_ref_count must be 1, because purge cannot - be executing on this very table as we are - holding dict_operation_lock X-latch. */ - DBUG_ASSERT(user_table->n_ref_count == 1 || ctx->online); - - online_retry_drop_indexes_with_trx(user_table, ctx->trx); - } else { - ut_ad(!ctx->need_rebuild()); - row_merge_drop_indexes(ctx->trx, user_table, TRUE); - trx_commit_for_mysql(ctx->trx); - } - - ut_d(dict_table_check_for_dup_indexes(user_table, CHECK_ALL_COMPLETE)); - ut_ad(!user_table->drop_aborted); - -err_exit: - /* Clear the to_be_dropped flag in the data dictionary cache. */ - for (ulint i = 0; i < ctx->num_to_drop_index; i++) { - DBUG_ASSERT(*ctx->drop_index[i]->name != TEMP_INDEX_PREFIX); - DBUG_ASSERT(ctx->drop_index[i]->to_be_dropped); - ctx->drop_index[i]->to_be_dropped = 0; - } - - row_mysql_unlock_data_dictionary(ctx->trx); - - trx_free_for_mysql(ctx->trx); - trx_commit_for_mysql(ctx->prebuilt->trx); - - delete ctx; - ha_alter_info->handler_ctx = NULL; - - DBUG_RETURN(true); -} - -/* Check whether an index is needed for the foreign key constraint. -If so, if it is dropped, is there an equivalent index can play its role. -@return true if the index is needed and can't be dropped */ -static MY_ATTRIBUTE((nonnull(1,2,3,5), warn_unused_result)) -bool -innobase_check_foreign_key_index( -/*=============================*/ - Alter_inplace_info* ha_alter_info, /*!< in: Structure describing - changes to be done by ALTER - TABLE */ - dict_index_t* index, /*!< in: index to check */ - dict_table_t* indexed_table, /*!< in: table that owns the - foreign keys */ - const char** col_names, /*!< in: column names, or NULL - for indexed_table->col_names */ - trx_t* trx, /*!< in/out: transaction */ - dict_foreign_t** drop_fk, /*!< in: Foreign key constraints - to drop */ - ulint n_drop_fk) /*!< in: Number of foreign keys - to drop */ -{ - ut_ad(index != NULL); - ut_ad(indexed_table != NULL); - - const dict_foreign_set* fks = &indexed_table->referenced_set; - - /* Check for all FK references from other tables to the index. */ - for (dict_foreign_set::const_iterator it = fks->begin(); - it != fks->end(); ++it) { - - dict_foreign_t* foreign = *it; - if (foreign->referenced_index != index) { - continue; - } - ut_ad(indexed_table == foreign->referenced_table); - - if (NULL == dict_foreign_find_index( - indexed_table, col_names, - foreign->referenced_col_names, - foreign->n_fields, index, - /*check_charsets=*/TRUE, - /*check_null=*/FALSE, - 0,0,0) - && NULL == innobase_find_equiv_index( - foreign->referenced_col_names, - foreign->n_fields, - ha_alter_info->key_info_buffer, - ha_alter_info->index_add_buffer, - ha_alter_info->index_add_count)) { - - /* Index cannot be dropped. */ - trx->error_info = index; - return(true); - } - } - - fks = &indexed_table->foreign_set; - - /* Check for all FK references in current table using the index. */ - for (dict_foreign_set::const_iterator it = fks->begin(); - it != fks->end(); ++it) { - - dict_foreign_t* foreign = *it; - if (foreign->foreign_index != index) { - continue; - } - - ut_ad(indexed_table == foreign->foreign_table); - - if (!innobase_dropping_foreign( - foreign, drop_fk, n_drop_fk) - && NULL == dict_foreign_find_index( - indexed_table, col_names, - foreign->foreign_col_names, - foreign->n_fields, index, - /*check_charsets=*/TRUE, - /*check_null=*/FALSE, - 0, 0, 0) - && NULL == innobase_find_equiv_index( - foreign->foreign_col_names, - foreign->n_fields, - ha_alter_info->key_info_buffer, - ha_alter_info->index_add_buffer, - ha_alter_info->index_add_count)) { - - /* Index cannot be dropped. */ - trx->error_info = index; - return(true); - } - } - - return(false); -} - -/** Allows InnoDB to update internal structures with concurrent -writes blocked (provided that check_if_supported_inplace_alter() -did not return HA_ALTER_INPLACE_NO_LOCK). -This will be invoked before inplace_alter_table(). - -@param altered_table TABLE object for new version of table. -@param ha_alter_info Structure describing changes to be done -by ALTER TABLE and holding data used during in-place alter. - -@retval true Failure -@retval false Success -*/ -UNIV_INTERN -bool -ha_innobase::prepare_inplace_alter_table( -/*=====================================*/ - TABLE* altered_table, - Alter_inplace_info* ha_alter_info) -{ - dict_index_t** drop_index; /*!< Index to be dropped */ - ulint n_drop_index; /*!< Number of indexes to drop */ - dict_foreign_t**drop_fk; /*!< Foreign key constraints to drop */ - ulint n_drop_fk; /*!< Number of foreign keys to drop */ - dict_foreign_t**add_fk = NULL; /*!< Foreign key constraints to drop */ - ulint n_add_fk; /*!< Number of foreign keys to drop */ - dict_table_t* indexed_table; /*!< Table where indexes are created */ - mem_heap_t* heap; - const char** col_names; - int error; - ulint flags; - ulint flags2; - ulint max_col_len; - ulint add_autoinc_col_no = ULINT_UNDEFINED; - ulonglong autoinc_col_max_value = 0; - ulint fts_doc_col_no = ULINT_UNDEFINED; - bool add_fts_doc_id = false; - bool add_fts_doc_id_idx = false; - bool add_fts_idx = false; - - DBUG_ENTER("prepare_inplace_alter_table"); - DBUG_ASSERT(!ha_alter_info->handler_ctx); - DBUG_ASSERT(ha_alter_info->create_info); - DBUG_ASSERT(!srv_read_only_mode); - - if (UNIV_UNLIKELY(prebuilt->trx->fake_changes)) { - DBUG_RETURN(true); - } - - /* Init online ddl status variables */ - onlineddl_rowlog_rows = 0; - onlineddl_rowlog_pct_used = 0; - onlineddl_pct_progress = 0; - - MONITOR_ATOMIC_INC(MONITOR_PENDING_ALTER_TABLE); - -#ifdef UNIV_DEBUG - for (dict_index_t* index = dict_table_get_first_index(prebuilt->table); - index; - index = dict_table_get_next_index(index)) { - ut_ad(!index->to_be_dropped); - } -#endif /* UNIV_DEBUG */ - - ut_d(mutex_enter(&dict_sys->mutex)); - ut_d(dict_table_check_for_dup_indexes( - prebuilt->table, CHECK_ABORTED_OK)); - ut_d(mutex_exit(&dict_sys->mutex)); - - if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) { - /* Nothing to do */ - goto func_exit; - } - - indexed_table = prebuilt->table; - - if (indexed_table->is_readable()) { - } else { - if (indexed_table->corrupted) { - /* Handled below */ - } else { - FilSpace space(indexed_table->space, true); - - if (space()) { - String str; - const char* engine= table_type(); - char buf[MAX_FULL_NAME_LEN]; - ut_format_name(indexed_table->name, TRUE, buf, sizeof(buf)); - - push_warning_printf(user_thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_DECRYPTION_FAILED, - "Table %s in file %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - buf, space()->chain.start->name); - - my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine); - DBUG_RETURN(true); - } - } - } - - if (indexed_table->corrupted - || dict_table_get_first_index(indexed_table) == NULL - || dict_index_is_corrupted( - dict_table_get_first_index(indexed_table))) { - /* The clustered index is corrupted. */ - my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0)); - DBUG_RETURN(true); - } - - if (ha_alter_info->handler_flags - & Alter_inplace_info::CHANGE_CREATE_OPTION) { - /* Check engine specific table options */ - if (const char* invalid_tbopt = check_table_options( - user_thd, altered_table, - ha_alter_info->create_info, - prebuilt->table->space != 0, - srv_file_format)) { - my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), - table_type(), invalid_tbopt); - goto err_exit_no_heap; - } - - if (const char* invalid_opt = create_options_are_invalid( - user_thd, altered_table, - ha_alter_info->create_info, - prebuilt->table->space != 0)) { - my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0), - table_type(), invalid_opt); - goto err_exit_no_heap; - } - } - - /* Check if any index name is reserved. */ - if (innobase_index_name_is_reserved( - user_thd, - ha_alter_info->key_info_buffer, - ha_alter_info->key_count)) { -err_exit_no_heap: - DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0); - if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) { - online_retry_drop_indexes(prebuilt->table, user_thd); - } - DBUG_RETURN(true); - } - - indexed_table = prebuilt->table; - - /* Check that index keys are sensible */ - error = innobase_check_index_keys(ha_alter_info, indexed_table); - - if (error) { - goto err_exit_no_heap; - } - - /* Prohibit renaming a column to something that the table - already contains. */ - if (ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME) { - List_iterator_fast<Create_field> cf_it( - ha_alter_info->alter_info->create_list); - - for (Field** fp = table->field; *fp; fp++) { - if (!((*fp)->flags & FIELD_IS_RENAMED)) { - continue; - } - - const char* name = 0; - - cf_it.rewind(); - while (Create_field* cf = cf_it++) { - if (cf->field == *fp) { - name = cf->field_name.str; - goto check_if_ok_to_rename; - } - } - - ut_error; -check_if_ok_to_rename: - /* Prohibit renaming a column from FTS_DOC_ID - if full-text indexes exist. */ - if (!my_strcasecmp(system_charset_info, - (*fp)->field_name.str, - FTS_DOC_ID_COL_NAME) - && innobase_fulltext_exist(altered_table)) { - my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN, - MYF(0), name); - goto err_exit_no_heap; - } - - /* Prohibit renaming a column to an internal column. */ - const char* s = prebuilt->table->col_names; - unsigned j; - /* Skip user columns. - MySQL should have checked these already. - We want to allow renaming of c1 to c2, c2 to c1. */ - for (j = 0; j < table->s->fields; j++) { - s += strlen(s) + 1; - } - - for (; j < prebuilt->table->n_def; j++) { - if (!my_strcasecmp( - system_charset_info, name, s)) { - my_error(ER_WRONG_COLUMN_NAME, MYF(0), - s); - goto err_exit_no_heap; - } - - s += strlen(s) + 1; - } - } - } - - if (!innobase_table_flags(altered_table, - ha_alter_info->create_info, - user_thd, - srv_file_per_table - || indexed_table->space != 0, - &flags, &flags2)) { - goto err_exit_no_heap; - } - - max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags); - - /* Check each index's column length to make sure they do not - exceed limit */ - for (ulint i = 0; i < ha_alter_info->index_add_count; i++) { - const KEY* key = &ha_alter_info->key_info_buffer[ - ha_alter_info->index_add_buffer[i]]; - - if (key->flags & HA_FULLTEXT) { - /* The column length does not matter for - fulltext search indexes. But, UNIQUE - fulltext indexes are not supported. */ - DBUG_ASSERT(!(key->flags & HA_NOSAME)); - DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK - & ~(HA_FULLTEXT - | HA_PACK_KEY - | HA_BINARY_PACK_KEY))); - add_fts_idx = true; - continue; - } - - if (innobase_check_column_length(max_col_len, key)) { - my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), - max_col_len); - goto err_exit_no_heap; - } - } - - /* We won't be allowed to add fts index to a table with - fts indexes already but without AUX_HEX_NAME set. - This means the aux tables of the table failed to - rename to hex format but new created aux tables - shall be in hex format, which is contradictory. */ - if (!DICT_TF2_FLAG_IS_SET(indexed_table, DICT_TF2_FTS_AUX_HEX_NAME) - && indexed_table->fts != NULL && add_fts_idx) { - my_error(ER_INNODB_FT_AUX_NOT_HEX_ID, MYF(0)); - goto err_exit_no_heap; - } - - /* Check existing index definitions for too-long column - prefixes as well, in case max_col_len shrunk. */ - for (const dict_index_t* index - = dict_table_get_first_index(indexed_table); - index; - index = dict_table_get_next_index(index)) { - if (index->type & DICT_FTS) { - DBUG_ASSERT(index->type == DICT_FTS - || (index->type & DICT_CORRUPT)); - - /* We need to drop any corrupted fts indexes - before we add a new fts index. */ - if (add_fts_idx && index->type & DICT_CORRUPT) { - ib_errf(user_thd, IB_LOG_LEVEL_ERROR, - ER_INNODB_INDEX_CORRUPT, - "Fulltext index '%s' is corrupt. " - "you should drop this index first.", - index->name); - - goto err_exit_no_heap; - } - - continue; - } - - for (ulint i = 0; i < dict_index_get_n_fields(index); i++) { - const dict_field_t* field - = dict_index_get_nth_field(index, i); - if (field->prefix_len > max_col_len) { - my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), - max_col_len); - goto err_exit_no_heap; - } - } - } - - n_drop_index = 0; - n_drop_fk = 0; - - if (ha_alter_info->handler_flags - & (INNOBASE_ALTER_NOREBUILD | INNOBASE_ALTER_REBUILD)) { - heap = mem_heap_create(1024); - - if (ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME) { - col_names = innobase_get_col_names( - ha_alter_info, altered_table, table, - indexed_table, heap); - } else { - col_names = NULL; - } - } else { - heap = NULL; - col_names = NULL; - } - - if (ha_alter_info->handler_flags - & Alter_inplace_info::DROP_FOREIGN_KEY) { - DBUG_ASSERT(ha_alter_info->alter_info->drop_list.elements > 0); - - drop_fk = static_cast<dict_foreign_t**>( - mem_heap_alloc( - heap, - ha_alter_info->alter_info->drop_list.elements - * sizeof(dict_foreign_t*))); - - List_iterator<Alter_drop> drop_it( - ha_alter_info->alter_info->drop_list); - - while (Alter_drop* drop = drop_it++) { - if (drop->type != Alter_drop::FOREIGN_KEY) { - continue; - } - - for (dict_foreign_set::iterator it - = prebuilt->table->foreign_set.begin(); - it != prebuilt->table->foreign_set.end(); - ++it) { - - dict_foreign_t* foreign = *it; - const char* fid = strchr(foreign->id, '/'); - - DBUG_ASSERT(fid); - /* If no database/ prefix was present in - the FOREIGN KEY constraint name, compare - to the full constraint name. */ - fid = fid ? fid + 1 : foreign->id; - - if (!my_strcasecmp(system_charset_info, - fid, drop->name)) { - drop_fk[n_drop_fk++] = foreign; - goto found_fk; - } - } - - my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0), - drop->type_name(), drop->name); - goto err_exit; -found_fk: - continue; - } - - DBUG_ASSERT(n_drop_fk > 0); - DBUG_ASSERT(n_drop_fk - == ha_alter_info->alter_info->drop_list.elements); - } else { - drop_fk = NULL; - } - - if (ha_alter_info->index_drop_count) { - dict_index_t* drop_primary = NULL; - - DBUG_ASSERT(ha_alter_info->handler_flags - & (Alter_inplace_info::DROP_INDEX - | Alter_inplace_info::DROP_UNIQUE_INDEX - | Alter_inplace_info::DROP_PK_INDEX)); - /* Check which indexes to drop. */ - drop_index = static_cast<dict_index_t**>( - mem_heap_alloc( - heap, (ha_alter_info->index_drop_count + 1) - * sizeof *drop_index)); - - for (uint i = 0; i < ha_alter_info->index_drop_count; i++) { - const KEY* key - = ha_alter_info->index_drop_buffer[i]; - dict_index_t* index - = dict_table_get_index_on_name_and_min_id( - indexed_table, key->name); - - if (!index) { - push_warning_printf( - user_thd, - Sql_condition::WARN_LEVEL_WARN, - HA_ERR_WRONG_INDEX, - "InnoDB could not find key " - "with name %s", key->name); - } else { - ut_ad(!index->to_be_dropped); - if (!dict_index_is_clust(index)) { - drop_index[n_drop_index++] = index; - } else { - drop_primary = index; - } - } - } - - /* If all FULLTEXT indexes were removed, drop an - internal FTS_DOC_ID_INDEX as well, unless it exists in - the table. */ - - if (innobase_fulltext_exist(table) - && !innobase_fulltext_exist(altered_table) - && !DICT_TF2_FLAG_IS_SET( - indexed_table, DICT_TF2_FTS_HAS_DOC_ID)) { - dict_index_t* fts_doc_index - = dict_table_get_index_on_name( - indexed_table, FTS_DOC_ID_INDEX_NAME); - - // Add some fault tolerance for non-debug builds. - if (fts_doc_index == NULL) { - goto check_if_can_drop_indexes; - } - - DBUG_ASSERT(!fts_doc_index->to_be_dropped); - - for (uint i = 0; i < table->s->keys; i++) { - if (!my_strcasecmp( - system_charset_info, - FTS_DOC_ID_INDEX_NAME, - table->key_info[i].name)) { - /* The index exists in the MySQL - data dictionary. Do not drop it, - even though it is no longer needed - by InnoDB fulltext search. */ - goto check_if_can_drop_indexes; - } - } - - drop_index[n_drop_index++] = fts_doc_index; - } - -check_if_can_drop_indexes: - /* Check if the indexes can be dropped. */ - - /* Prevent a race condition between DROP INDEX and - CREATE TABLE adding FOREIGN KEY constraints. */ - row_mysql_lock_data_dictionary(prebuilt->trx); - - if (!n_drop_index) { - drop_index = NULL; - } else { - /* Flag all indexes that are to be dropped. */ - for (ulint i = 0; i < n_drop_index; i++) { - ut_ad(!drop_index[i]->to_be_dropped); - drop_index[i]->to_be_dropped = 1; - } - } - - if (prebuilt->trx->check_foreigns) { - for (uint i = 0; i < n_drop_index; i++) { - dict_index_t* index = drop_index[i]; - - if (innobase_check_foreign_key_index( - ha_alter_info, index, - indexed_table, col_names, - prebuilt->trx, drop_fk, n_drop_fk)) { - row_mysql_unlock_data_dictionary( - prebuilt->trx); - prebuilt->trx->error_info = index; - print_error(HA_ERR_DROP_INDEX_FK, - MYF(0)); - goto err_exit; - } - } - - /* If a primary index is dropped, need to check - any depending foreign constraints get affected */ - if (drop_primary - && innobase_check_foreign_key_index( - ha_alter_info, drop_primary, - indexed_table, col_names, - prebuilt->trx, drop_fk, n_drop_fk)) { - row_mysql_unlock_data_dictionary(prebuilt->trx); - print_error(HA_ERR_DROP_INDEX_FK, MYF(0)); - goto err_exit; - } - } - - row_mysql_unlock_data_dictionary(prebuilt->trx); - } else { - drop_index = NULL; - } - - /* Check if any of the existing indexes are marked as corruption - and if they are, refuse adding more indexes. */ - if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_INDEX) { - for (dict_index_t* index = dict_table_get_first_index(indexed_table); - index != NULL; index = dict_table_get_next_index(index)) { - - if (!index->to_be_dropped && dict_index_is_corrupted(index)) { - char index_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name(index_name, sizeof index_name, - index->name, TRUE); - - my_error(ER_INDEX_CORRUPT, MYF(0), index_name); - goto err_exit; - } - } - } - - n_add_fk = 0; - - if (ha_alter_info->handler_flags - & Alter_inplace_info::ADD_FOREIGN_KEY) { - ut_ad(!prebuilt->trx->check_foreigns); - - add_fk = static_cast<dict_foreign_t**>( - mem_heap_zalloc( - heap, - ha_alter_info->alter_info->key_list.elements - * sizeof(dict_foreign_t*))); - - if (!innobase_get_foreign_key_info( - ha_alter_info, table_share, - prebuilt->table, col_names, - drop_index, n_drop_index, - add_fk, &n_add_fk, prebuilt->trx)) { -err_exit: - if (n_drop_index) { - row_mysql_lock_data_dictionary(prebuilt->trx); - - /* Clear the to_be_dropped flags, which might - have been set at this point. */ - for (ulint i = 0; i < n_drop_index; i++) { - DBUG_ASSERT(*drop_index[i]->name - != TEMP_INDEX_PREFIX); - drop_index[i]->to_be_dropped = 0; - } - - row_mysql_unlock_data_dictionary(prebuilt->trx); - } - - if (heap) { - mem_heap_free(heap); - } - - goto err_exit_no_heap; - } - } - - if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA) - || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) - == Alter_inplace_info::CHANGE_CREATE_OPTION - && !innobase_need_rebuild(ha_alter_info, table))) { - - if (heap) { - ha_alter_info->handler_ctx - = new ha_innobase_inplace_ctx( - prebuilt, - drop_index, n_drop_index, - drop_fk, n_drop_fk, - add_fk, n_add_fk, - ha_alter_info->online, - heap, indexed_table, - col_names, ULINT_UNDEFINED, 0, 0); - } - -func_exit: - DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0); - if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) { - online_retry_drop_indexes(prebuilt->table, user_thd); - } - DBUG_RETURN(false); - } - - /* If we are to build a full-text search index, check whether - the table already has a DOC ID column. If not, we will need to - add a Doc ID hidden column and rebuild the primary index */ - if (innobase_fulltext_exist(altered_table)) { - ulint doc_col_no; - - if (!innobase_fts_check_doc_id_col( - prebuilt->table, altered_table, &fts_doc_col_no)) { - fts_doc_col_no = altered_table->s->stored_fields; - add_fts_doc_id = true; - add_fts_doc_id_idx = true; - - push_warning_printf( - user_thd, - Sql_condition::WARN_LEVEL_WARN, - HA_ERR_WRONG_INDEX, - "InnoDB rebuilding table to add column " - FTS_DOC_ID_COL_NAME); - } else if (fts_doc_col_no == ULINT_UNDEFINED) { - goto err_exit; - } - - switch (innobase_fts_check_doc_id_index( - prebuilt->table, altered_table, &doc_col_no)) { - case FTS_NOT_EXIST_DOC_ID_INDEX: - add_fts_doc_id_idx = true; - break; - case FTS_INCORRECT_DOC_ID_INDEX: - my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0), - FTS_DOC_ID_INDEX_NAME); - goto err_exit; - case FTS_EXIST_DOC_ID_INDEX: - DBUG_ASSERT(doc_col_no == fts_doc_col_no - || doc_col_no == ULINT_UNDEFINED - || (ha_alter_info->handler_flags - & (Alter_inplace_info::ALTER_COLUMN_ORDER - | Alter_inplace_info::DROP_COLUMN - | Alter_inplace_info::ADD_COLUMN))); - } - } - - /* See if an AUTO_INCREMENT column was added. */ - uint i = 0, innodb_idx= 0; - List_iterator_fast<Create_field> cf_it( - ha_alter_info->alter_info->create_list); - while (const Create_field* new_field = cf_it++) { - const Field* field; - if (!new_field->stored_in_db()) { - i++; - continue; - } - - DBUG_ASSERT(i < altered_table->s->fields); - DBUG_ASSERT(innodb_idx < altered_table->s->stored_fields); - - for (uint old_i = 0; table->field[old_i]; old_i++) { - if (!table->field[old_i]->stored_in_db()) - continue; - if (new_field->field == table->field[old_i]) { - goto found_col; - } - } - - /* This is an added column. */ - DBUG_ASSERT(!new_field->field); - DBUG_ASSERT(ha_alter_info->handler_flags - & Alter_inplace_info::ADD_COLUMN); - - field = altered_table->field[i]; - - DBUG_ASSERT((MTYP_TYPENR(field->unireg_check) - == Field::NEXT_NUMBER) - == !!(field->flags & AUTO_INCREMENT_FLAG)); - - if (field->flags & AUTO_INCREMENT_FLAG) { - if (add_autoinc_col_no != ULINT_UNDEFINED) { - /* This should have been blocked earlier. */ - ut_ad(0); - my_error(ER_WRONG_AUTO_KEY, MYF(0)); - goto err_exit; - } - add_autoinc_col_no = innodb_idx; - - autoinc_col_max_value = innobase_get_int_col_max_value( - field); - } -found_col: - i++; - innodb_idx++; - } - - DBUG_ASSERT(heap); - DBUG_ASSERT(user_thd == prebuilt->trx->mysql_thd); - DBUG_ASSERT(!ha_alter_info->handler_ctx); - - ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx( - prebuilt, - drop_index, n_drop_index, - drop_fk, n_drop_fk, add_fk, n_add_fk, - ha_alter_info->online, - heap, prebuilt->table, col_names, - add_autoinc_col_no, - ha_alter_info->create_info->auto_increment_value, - autoinc_col_max_value); - - DBUG_RETURN(prepare_inplace_alter_table_dict( - ha_alter_info, altered_table, table, - table_share->table_name.str, - flags, flags2, - fts_doc_col_no, add_fts_doc_id, - add_fts_doc_id_idx, prebuilt)); -} - -/** Get the name of an erroneous key. -@param[in] error_key_num InnoDB number of the erroneus key -@param[in] ha_alter_info changes that were being performed -@param[in] table InnoDB table -@return the name of the erroneous key */ -static -const char* -get_error_key_name( - ulint error_key_num, - const Alter_inplace_info* ha_alter_info, - const dict_table_t* table) -{ - if (error_key_num == ULINT_UNDEFINED) { - return(FTS_DOC_ID_INDEX_NAME); - } else if (ha_alter_info->key_count == 0) { - return(dict_table_get_first_index(table)->name); - } else { - return(ha_alter_info->key_info_buffer[error_key_num].name); - } -} - -/** Alter the table structure in-place with operations -specified using Alter_inplace_info. -The level of concurrency allowed during this operation depends -on the return value from check_if_supported_inplace_alter(). - -@param altered_table TABLE object for new version of table. -@param ha_alter_info Structure describing changes to be done -by ALTER TABLE and holding data used during in-place alter. - -@retval true Failure -@retval false Success -*/ -UNIV_INTERN -bool -ha_innobase::inplace_alter_table( -/*=============================*/ - TABLE* altered_table, - Alter_inplace_info* ha_alter_info) -{ - dberr_t error; - - DBUG_ENTER("inplace_alter_table"); - DBUG_ASSERT(!srv_read_only_mode); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); - ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - DEBUG_SYNC(user_thd, "innodb_inplace_alter_table_enter"); - - if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)) { -ok_exit: - DEBUG_SYNC(user_thd, "innodb_after_inplace_alter_table"); - DBUG_RETURN(false); - } - - if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) - == Alter_inplace_info::CHANGE_CREATE_OPTION - && !innobase_need_rebuild(ha_alter_info, table)) { - goto ok_exit; - } - - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*> - (ha_alter_info->handler_ctx); - - DBUG_ASSERT(ctx); - DBUG_ASSERT(ctx->trx); - DBUG_ASSERT(ctx->prebuilt == prebuilt); - - if (prebuilt->table->file_unreadable - || dict_table_is_discarded(prebuilt->table)) { - goto all_done; - } - - /* Read the clustered index of the table and build - indexes based on this information using temporary - files and merge sort. */ - DBUG_EXECUTE_IF("innodb_OOM_inplace_alter", - error = DB_OUT_OF_MEMORY; goto oom;); - - error = row_merge_build_indexes( - prebuilt->trx, - prebuilt->table, ctx->new_table, - ctx->online, - ctx->add_index, ctx->add_key_numbers, ctx->num_to_add_index, - altered_table, ctx->add_cols, ctx->col_map, - ctx->add_autoinc, ctx->sequence); -#ifndef DBUG_OFF -oom: -#endif /* !DBUG_OFF */ - if (error == DB_SUCCESS && ctx->online && ctx->need_rebuild()) { - DEBUG_SYNC_C("row_log_table_apply1_before"); - error = row_log_table_apply( - ctx->thr, prebuilt->table, altered_table); - } - - /* Init online ddl status variables */ - onlineddl_rowlog_rows = 0; - onlineddl_rowlog_pct_used = 0; - onlineddl_pct_progress = 0; - - DEBUG_SYNC_C("inplace_after_index_build"); - - DBUG_EXECUTE_IF("create_index_fail", - error = DB_DUPLICATE_KEY; - prebuilt->trx->error_key_num = ULINT_UNDEFINED;); - - /* After an error, remove all those index definitions - from the dictionary which were defined. */ - - switch (error) { - KEY* dup_key; - all_done: - case DB_SUCCESS: - ut_d(mutex_enter(&dict_sys->mutex)); - ut_d(dict_table_check_for_dup_indexes( - prebuilt->table, CHECK_PARTIAL_OK)); - ut_d(mutex_exit(&dict_sys->mutex)); - /* prebuilt->table->n_ref_count can be anything here, - given that we hold at most a shared lock on the table. */ - goto ok_exit; - case DB_DUPLICATE_KEY: - if (prebuilt->trx->error_key_num == ULINT_UNDEFINED - || ha_alter_info->key_count == 0) { - /* This should be the hidden index on - FTS_DOC_ID, or there is no PRIMARY KEY in the - table. Either way, we should be seeing and - reporting a bogus duplicate key error. */ - dup_key = NULL; - } else { - DBUG_ASSERT(prebuilt->trx->error_key_num - < ha_alter_info->key_count); - dup_key = &ha_alter_info->key_info_buffer[ - prebuilt->trx->error_key_num]; - } - print_keydup_error(altered_table, dup_key, MYF(0)); - break; - case DB_ONLINE_LOG_TOO_BIG: - DBUG_ASSERT(ctx->online); - my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0), - get_error_key_name(prebuilt->trx->error_key_num, - ha_alter_info, prebuilt->table)); - break; - case DB_INDEX_CORRUPT: - my_error(ER_INDEX_CORRUPT, MYF(0), - get_error_key_name(prebuilt->trx->error_key_num, - ha_alter_info, prebuilt->table)); - break; - case DB_DECRYPTION_FAILED: { - String str; - const char* engine= table_type(); - get_error_message(HA_ERR_DECRYPTION_FAILED, &str); - my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine); - break; - } - default: - my_error_innodb(error, - table_share->table_name.str, - prebuilt->table->flags); - } - - /* prebuilt->table->n_ref_count can be anything here, given - that we hold at most a shared lock on the table. */ - prebuilt->trx->error_info = NULL; - ctx->trx->error_state = DB_SUCCESS; - - DBUG_RETURN(true); -} - -/** Free the modification log for online table rebuild. -@param table table that was being rebuilt online */ -static -void -innobase_online_rebuild_log_free( -/*=============================*/ - dict_table_t* table) -{ - dict_index_t* clust_index = dict_table_get_first_index(table); - - ut_ad(mutex_own(&dict_sys->mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_x_lock(&clust_index->lock); - - if (clust_index->online_log) { - ut_ad(dict_index_get_online_status(clust_index) - == ONLINE_INDEX_CREATION); - clust_index->online_status = ONLINE_INDEX_COMPLETE; - row_log_free(clust_index->online_log); - DEBUG_SYNC_C("innodb_online_rebuild_log_free_aborted"); - } - - DBUG_ASSERT(dict_index_get_online_status(clust_index) - == ONLINE_INDEX_COMPLETE); - rw_lock_x_unlock(&clust_index->lock); -} - -/** Rollback a secondary index creation, drop the indexes with -temparary index prefix -@param user_table InnoDB table -@param table the TABLE -@param locked TRUE=table locked, FALSE=may need to do a lazy drop -@param trx the transaction -*/ -static MY_ATTRIBUTE((nonnull)) -void -innobase_rollback_sec_index( -/*========================*/ - dict_table_t* user_table, - const TABLE* table, - ibool locked, - trx_t* trx) -{ - row_merge_drop_indexes(trx, user_table, locked); - - /* Free the table->fts only if there is no FTS_DOC_ID - in the table */ - if (user_table->fts - && !DICT_TF2_FLAG_IS_SET(user_table, - DICT_TF2_FTS_HAS_DOC_ID) - && !innobase_fulltext_exist(table)) { - fts_free(user_table); - } -} - -/** Roll back the changes made during prepare_inplace_alter_table() -and inplace_alter_table() inside the storage engine. Note that the -allowed level of concurrency during this operation will be the same as -for inplace_alter_table() and thus might be higher than during -prepare_inplace_alter_table(). (E.g concurrent writes were blocked -during prepare, but might not be during commit). - -@param ha_alter_info Data used during in-place alter. -@param table the TABLE -@param prebuilt the prebuilt struct -@retval true Failure -@retval false Success -*/ -inline MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -rollback_inplace_alter_table( -/*=========================*/ - Alter_inplace_info* ha_alter_info, - const TABLE* table, - row_prebuilt_t* prebuilt) -{ - bool fail = false; - - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*> - (ha_alter_info->handler_ctx); - - DBUG_ENTER("rollback_inplace_alter_table"); - - if (!ctx || !ctx->trx) { - /* If we have not started a transaction yet, - (almost) nothing has been or needs to be done. */ - goto func_exit; - } - - row_mysql_lock_data_dictionary(ctx->trx); - - if (ctx->need_rebuild()) { - dberr_t err; - ulint flags = ctx->new_table->flags; - - /* DML threads can access ctx->new_table via the - online rebuild log. Free it first. */ - innobase_online_rebuild_log_free(prebuilt->table); - - /* Since the FTS index specific auxiliary tables has - not yet registered with "table->fts" by fts_add_index(), - we will need explicitly delete them here */ - if (DICT_TF2_FLAG_IS_SET(ctx->new_table, DICT_TF2_FTS)) { - - err = innobase_drop_fts_index_table( - ctx->new_table, ctx->trx); - - if (err != DB_SUCCESS) { - my_error_innodb( - err, table->s->table_name.str, - flags); - fail = true; - } - } - - /* Drop the table. */ - dict_table_close(ctx->new_table, TRUE, FALSE); - -#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG - /* Nobody should have initialized the stats of the - newly created table yet. When this is the case, we - know that it has not been added for background stats - gathering. */ - ut_a(!ctx->new_table->stat_initialized); -#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */ - - err = row_merge_drop_table(ctx->trx, ctx->new_table); - - switch (err) { - case DB_SUCCESS: - break; - default: - my_error_innodb(err, table->s->table_name.str, - flags); - fail = true; - } - } else { - DBUG_ASSERT(!(ha_alter_info->handler_flags - & Alter_inplace_info::ADD_PK_INDEX)); - DBUG_ASSERT(ctx->new_table == prebuilt->table); - - trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX); - - innobase_rollback_sec_index( - prebuilt->table, table, FALSE, ctx->trx); - } - - trx_commit_for_mysql(ctx->trx); - row_mysql_unlock_data_dictionary(ctx->trx); - trx_free_for_mysql(ctx->trx); - -func_exit: -#ifndef DBUG_OFF - dict_index_t* clust_index = dict_table_get_first_index( - prebuilt->table); - DBUG_ASSERT(!clust_index->online_log); - DBUG_ASSERT(dict_index_get_online_status(clust_index) - == ONLINE_INDEX_COMPLETE); -#endif /* !DBUG_OFF */ - - if (ctx) { - DBUG_ASSERT(ctx->prebuilt == prebuilt); - - if (ctx->num_to_add_fk) { - for (ulint i = 0; i < ctx->num_to_add_fk; i++) { - dict_foreign_free(ctx->add_fk[i]); - } - } - - if (ctx->num_to_drop_index) { - row_mysql_lock_data_dictionary(prebuilt->trx); - - /* Clear the to_be_dropped flags - in the data dictionary cache. - The flags may already have been cleared, - in case an error was detected in - commit_inplace_alter_table(). */ - for (ulint i = 0; i < ctx->num_to_drop_index; i++) { - dict_index_t* index = ctx->drop_index[i]; - DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX); - - index->to_be_dropped = 0; - } - - row_mysql_unlock_data_dictionary(prebuilt->trx); - } - } - - trx_commit_for_mysql(prebuilt->trx); - MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE); - DBUG_RETURN(fail); -} - -/** Drop a FOREIGN KEY constraint from the data dictionary tables. -@param trx data dictionary transaction -@param table_name Table name in MySQL -@param foreign_id Foreign key constraint identifier -@retval true Failure -@retval false Success */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_drop_foreign_try( -/*======================*/ - trx_t* trx, - const char* table_name, - const char* foreign_id) -{ - DBUG_ENTER("innobase_drop_foreign_try"); - - DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(mutex_own(&dict_sys->mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - /* Drop the constraint from the data dictionary. */ - static const char sql[] = - "PROCEDURE DROP_FOREIGN_PROC () IS\n" - "BEGIN\n" - "DELETE FROM SYS_FOREIGN WHERE ID=:id;\n" - "DELETE FROM SYS_FOREIGN_COLS WHERE ID=:id;\n" - "END;\n"; - - dberr_t error; - pars_info_t* info; - - info = pars_info_create(); - pars_info_add_str_literal(info, "id", foreign_id); - - trx->op_info = "dropping foreign key constraint from dictionary"; - error = que_eval_sql(info, sql, FALSE, trx); - trx->op_info = ""; - - DBUG_EXECUTE_IF("ib_drop_foreign_error", - error = DB_OUT_OF_FILE_SPACE;); - - if (error != DB_SUCCESS) { - my_error_innodb(error, table_name, 0); - trx->error_state = DB_SUCCESS; - DBUG_RETURN(true); - } - - DBUG_RETURN(false); -} - -/** Rename a column in the data dictionary tables. -@param user_table InnoDB table that was being altered -@param trx data dictionary transaction -@param table_name Table name in MySQL -@param nth_col 0-based index of the column -@param from old column name -@param to new column name -@param new_clustered whether the table has been rebuilt -@retval true Failure -@retval false Success */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_rename_column_try( -/*=======================*/ - const dict_table_t* user_table, - trx_t* trx, - const char* table_name, - ulint nth_col, - const char* from, - const char* to, - bool new_clustered) -{ - pars_info_t* info; - dberr_t error; - - DBUG_ENTER("innobase_rename_column_try"); - - DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(mutex_own(&dict_sys->mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (new_clustered) { - goto rename_foreign; - } - - info = pars_info_create(); - - pars_info_add_ull_literal(info, "tableid", user_table->id); - pars_info_add_int4_literal(info, "nth", nth_col); - pars_info_add_str_literal(info, "old", from); - pars_info_add_str_literal(info, "new", to); - - trx->op_info = "renaming column in SYS_COLUMNS"; - - error = que_eval_sql( - info, - "PROCEDURE RENAME_SYS_COLUMNS_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_COLUMNS SET NAME=:new\n" - "WHERE TABLE_ID=:tableid AND NAME=:old\n" - "AND POS=:nth;\n" - "END;\n", - FALSE, trx); - - DBUG_EXECUTE_IF("ib_rename_column_error", - error = DB_OUT_OF_FILE_SPACE;); - - if (error != DB_SUCCESS) { -err_exit: - my_error_innodb(error, table_name, 0); - trx->error_state = DB_SUCCESS; - trx->op_info = ""; - DBUG_RETURN(true); - } - - trx->op_info = "renaming column in SYS_FIELDS"; - - for (const dict_index_t* index = dict_table_get_first_index( - user_table); - index != NULL; - index = dict_table_get_next_index(index)) { - - for (ulint i = 0; i < dict_index_get_n_fields(index); i++) { - if (strcmp(dict_index_get_nth_field(index, i)->name, - from)) { - continue; - } - - info = pars_info_create(); - - pars_info_add_ull_literal(info, "indexid", index->id); - pars_info_add_int4_literal(info, "nth", i); - pars_info_add_str_literal(info, "old", from); - pars_info_add_str_literal(info, "new", to); - - error = que_eval_sql( - info, - "PROCEDURE RENAME_SYS_FIELDS_PROC () IS\n" - "BEGIN\n" - - "UPDATE SYS_FIELDS SET COL_NAME=:new\n" - "WHERE INDEX_ID=:indexid AND COL_NAME=:old\n" - "AND POS=:nth;\n" - - /* Try again, in case there is a prefix_len - encoded in SYS_FIELDS.POS */ - - "UPDATE SYS_FIELDS SET COL_NAME=:new\n" - "WHERE INDEX_ID=:indexid AND COL_NAME=:old\n" - "AND POS>=65536*:nth AND POS<65536*(:nth+1);\n" - - "END;\n", - FALSE, trx); - - if (error != DB_SUCCESS) { - goto err_exit; - } - } - } - -rename_foreign: - trx->op_info = "renaming column in SYS_FOREIGN_COLS"; - - std::list<dict_foreign_t*> fk_evict; - bool foreign_modified; - - for (dict_foreign_set::const_iterator it = user_table->foreign_set.begin(); - it != user_table->foreign_set.end(); - ++it) { - - dict_foreign_t* foreign = *it; - foreign_modified = false; - - for (unsigned i = 0; i < foreign->n_fields; i++) { - if (strcmp(foreign->foreign_col_names[i], from)) { - continue; - } - - info = pars_info_create(); - - pars_info_add_str_literal(info, "id", foreign->id); - pars_info_add_int4_literal(info, "nth", i); - pars_info_add_str_literal(info, "old", from); - pars_info_add_str_literal(info, "new", to); - - error = que_eval_sql( - info, - "PROCEDURE RENAME_SYS_FOREIGN_F_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_FOREIGN_COLS\n" - "SET FOR_COL_NAME=:new\n" - "WHERE ID=:id AND POS=:nth\n" - "AND FOR_COL_NAME=:old;\n" - "END;\n", - FALSE, trx); - - if (error != DB_SUCCESS) { - goto err_exit; - } - foreign_modified = true; - } - - if (foreign_modified) { - fk_evict.push_back(foreign); - } - } - - for (dict_foreign_set::const_iterator it - = user_table->referenced_set.begin(); - it != user_table->referenced_set.end(); - ++it) { - - foreign_modified = false; - dict_foreign_t* foreign = *it; - - for (unsigned i = 0; i < foreign->n_fields; i++) { - if (strcmp(foreign->referenced_col_names[i], from)) { - continue; - } - - info = pars_info_create(); - - pars_info_add_str_literal(info, "id", foreign->id); - pars_info_add_int4_literal(info, "nth", i); - pars_info_add_str_literal(info, "old", from); - pars_info_add_str_literal(info, "new", to); - - error = que_eval_sql( - info, - "PROCEDURE RENAME_SYS_FOREIGN_R_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_FOREIGN_COLS\n" - "SET REF_COL_NAME=:new\n" - "WHERE ID=:id AND POS=:nth\n" - "AND REF_COL_NAME=:old;\n" - "END;\n", - FALSE, trx); - - if (error != DB_SUCCESS) { - goto err_exit; - } - foreign_modified = true; - } - - if (foreign_modified) { - fk_evict.push_back(foreign); - } - } - - if (new_clustered) { - std::for_each(fk_evict.begin(), fk_evict.end(), - dict_foreign_remove_from_cache); - } - - trx->op_info = ""; - DBUG_RETURN(false); -} - -/** Rename columns in the data dictionary tables. -@param ha_alter_info Data used during in-place alter. -@param ctx In-place ALTER TABLE context -@param table the TABLE -@param trx data dictionary transaction -@param table_name Table name in MySQL -@retval true Failure -@retval false Success */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_rename_columns_try( -/*========================*/ - Alter_inplace_info* ha_alter_info, - ha_innobase_inplace_ctx*ctx, - const TABLE* table, - trx_t* trx, - const char* table_name) -{ - List_iterator_fast<Create_field> cf_it( - ha_alter_info->alter_info->create_list); - uint i = 0; - - DBUG_ASSERT(ctx); - DBUG_ASSERT(ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME); - - for (Field** fp = table->field; *fp; fp++, i++) { - if (!((*fp)->flags & FIELD_IS_RENAMED) || !((*fp)->stored_in_db())) { - continue; - } - - cf_it.rewind(); - while (Create_field* cf = cf_it++) { - if (cf->field == *fp) { - if (innobase_rename_column_try( - ctx->old_table, trx, table_name, i, - cf->field->field_name.str, - cf->field_name.str, - ctx->need_rebuild())) { - return(true); - } - goto processed_field; - } - } - - ut_error; -processed_field: - continue; - } - - return(false); -} - -/** Rename columns in the data dictionary cache -as part of commit_cache_norebuild(). -@param ha_alter_info Data used during in-place alter. -@param table the TABLE -@param user_table InnoDB table that was being altered */ -static MY_ATTRIBUTE((nonnull)) -void -innobase_rename_columns_cache( -/*==========================*/ - Alter_inplace_info* ha_alter_info, - const TABLE* table, - dict_table_t* user_table) -{ - if (!(ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME)) { - return; - } - - List_iterator_fast<Create_field> cf_it( - ha_alter_info->alter_info->create_list); - uint i = 0; - - for (Field** fp = table->field; *fp; fp++, i++) { - if (!((*fp)->flags & FIELD_IS_RENAMED)) { - continue; - } - - cf_it.rewind(); - while (Create_field* cf = cf_it++) { - if (cf->field == *fp) { - dict_mem_table_col_rename(user_table, i, - cf->field->field_name.str, - cf->field_name.str); - goto processed_field; - } - } - - ut_error; -processed_field: - continue; - } -} - -/** Get the auto-increment value of the table on commit. -@param ha_alter_info Data used during in-place alter -@param ctx In-place ALTER TABLE context -@param altered_table MySQL table that is being altered -@param old_table MySQL table as it is before the ALTER operation -@return the next auto-increment value (0 if not present) */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ulonglong -commit_get_autoinc( -/*===============*/ - Alter_inplace_info* ha_alter_info, - ha_innobase_inplace_ctx*ctx, - const TABLE* altered_table, - const TABLE* old_table) -{ - ulonglong max_autoinc; - - DBUG_ENTER("commit_get_autoinc"); - - if (!altered_table->found_next_number_field) { - /* There is no AUTO_INCREMENT column in the table - after the ALTER operation. */ - max_autoinc = 0; - } else if (ctx->add_autoinc != ULINT_UNDEFINED) { - /* An AUTO_INCREMENT column was added. Get the last - value from the sequence, which may be based on a - supplied AUTO_INCREMENT value. */ - max_autoinc = ctx->sequence.last(); - } else if ((ha_alter_info->handler_flags - & Alter_inplace_info::CHANGE_CREATE_OPTION) - && (ha_alter_info->create_info->used_fields - & HA_CREATE_USED_AUTO)) { - /* An AUTO_INCREMENT value was supplied, but the table was not - rebuilt. Get the user-supplied value or the last value from the - sequence. */ - ib_uint64_t max_value_table; - dberr_t err; - - Field* autoinc_field = - old_table->found_next_number_field; - KEY* autoinc_key = - old_table->key_info + old_table->s->next_number_index; - - dict_index_t* index = dict_table_get_index_on_name( - ctx->old_table, autoinc_key->name); - - max_autoinc = ha_alter_info->create_info->auto_increment_value; - - dict_table_autoinc_lock(ctx->old_table); - - err = row_search_max_autoinc( - index, autoinc_field->field_name.str, &max_value_table); - - if (err != DB_SUCCESS) { - ut_ad(0); - max_autoinc = 0; - } else if (max_autoinc <= max_value_table) { - ulonglong col_max_value; - ulonglong offset; - - col_max_value = innobase_get_int_col_max_value( - old_table->found_next_number_field); - - offset = ctx->prebuilt->autoinc_offset; - max_autoinc = innobase_next_autoinc( - max_value_table, 1, 1, offset, - col_max_value); - } - dict_table_autoinc_unlock(ctx->old_table); - } else { - /* An AUTO_INCREMENT value was not specified. - Read the old counter value from the table. */ - ut_ad(old_table->found_next_number_field); - dict_table_autoinc_lock(ctx->old_table); - max_autoinc = ctx->old_table->autoinc; - dict_table_autoinc_unlock(ctx->old_table); - } - - DBUG_RETURN(max_autoinc); -} - -/** Add or drop foreign key constraints to the data dictionary tables, -but do not touch the data dictionary cache. -@param ha_alter_info Data used during in-place alter -@param ctx In-place ALTER TABLE context -@param trx Data dictionary transaction -@param table_name Table name in MySQL -@retval true Failure -@retval false Success -*/ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -innobase_update_foreign_try( -/*========================*/ - ha_innobase_inplace_ctx*ctx, - trx_t* trx, - const char* table_name) -{ - ulint foreign_id; - ulint i; - - DBUG_ENTER("innobase_update_foreign_try"); - DBUG_ASSERT(ctx); - - foreign_id = dict_table_get_highest_foreign_id(ctx->new_table); - - foreign_id++; - - for (i = 0; i < ctx->num_to_add_fk; i++) { - dict_foreign_t* fk = ctx->add_fk[i]; - - ut_ad(fk->foreign_table == ctx->new_table - || fk->foreign_table == ctx->old_table); - - dberr_t error = dict_create_add_foreign_id( - &foreign_id, ctx->old_table->name, fk); - - if (error != DB_SUCCESS) { - my_error(ER_TOO_LONG_IDENT, MYF(0), - fk->id); - DBUG_RETURN(true); - } - - if (!fk->foreign_index) { - fk->foreign_index = dict_foreign_find_index( - ctx->new_table, ctx->col_names, - fk->foreign_col_names, - fk->n_fields, fk->referenced_index, TRUE, - fk->type - & (DICT_FOREIGN_ON_DELETE_SET_NULL - | DICT_FOREIGN_ON_UPDATE_SET_NULL), - NULL, NULL, NULL); - if (!fk->foreign_index) { - my_error(ER_FK_INCORRECT_OPTION, - MYF(0), table_name, fk->id); - DBUG_RETURN(true); - } - } - - /* The fk->foreign_col_names[] uses renamed column - names, while the columns in ctx->old_table have not - been renamed yet. */ - error = dict_create_add_foreign_to_dictionary( - (dict_table_t*)ctx->old_table, ctx->old_table->name, fk, trx); - - DBUG_EXECUTE_IF( - "innodb_test_cannot_add_fk_system", - error = DB_ERROR;); - - if (error != DB_SUCCESS) { - my_error(ER_FK_FAIL_ADD_SYSTEM, MYF(0), - fk->id); - DBUG_RETURN(true); - } - } - - for (i = 0; i < ctx->num_to_drop_fk; i++) { - dict_foreign_t* fk = ctx->drop_fk[i]; - - DBUG_ASSERT(fk->foreign_table == ctx->old_table); - - if (innobase_drop_foreign_try(trx, table_name, fk->id)) { - DBUG_RETURN(true); - } - } - - DBUG_RETURN(false); -} - -/** Update the foreign key constraint definitions in the data dictionary cache -after the changes to data dictionary tables were committed. -@param ctx In-place ALTER TABLE context -@param user_thd MySQL connection -@return InnoDB error code (should always be DB_SUCCESS) */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -innobase_update_foreign_cache( -/*==========================*/ - ha_innobase_inplace_ctx* ctx, - THD* user_thd) -{ - dict_table_t* user_table; - dberr_t err = DB_SUCCESS; - - DBUG_ENTER("innobase_update_foreign_cache"); - - user_table = ctx->old_table; - - /* Discard the added foreign keys, because we will - load them from the data dictionary. */ - for (ulint i = 0; i < ctx->num_to_add_fk; i++) { - dict_foreign_t* fk = ctx->add_fk[i]; - dict_foreign_free(fk); - } - - if (ctx->need_rebuild()) { - /* The rebuilt table is already using the renamed - column names. No need to pass col_names or to drop - constraints from the data dictionary cache. */ - DBUG_ASSERT(!ctx->col_names); - DBUG_ASSERT(user_table->foreign_set.empty()); - DBUG_ASSERT(user_table->referenced_set.empty()); - user_table = ctx->new_table; - } else { - /* Drop the foreign key constraints if the - table was not rebuilt. If the table is rebuilt, - there would not be any foreign key contraints for - it yet in the data dictionary cache. */ - for (ulint i = 0; i < ctx->num_to_drop_fk; i++) { - dict_foreign_t* fk = ctx->drop_fk[i]; - dict_foreign_remove_from_cache(fk); - } - } - - /* Load the old or added foreign keys from the data dictionary - and prevent the table from being evicted from the data - dictionary cache (work around the lack of WL#6049). */ - err = dict_load_foreigns(user_table->name, - ctx->col_names, false, true, - DICT_ERR_IGNORE_NONE); - - if (err == DB_CANNOT_ADD_CONSTRAINT) { - /* It is possible there are existing foreign key are - loaded with "foreign_key checks" off, - so let's retry the loading with charset_check is off */ - err = dict_load_foreigns(user_table->name, - ctx->col_names, false, false, - DICT_ERR_IGNORE_NONE); - - /* The load with "charset_check" off is successful, warn - the user that the foreign key has loaded with mis-matched - charset */ - if (err == DB_SUCCESS) { - push_warning_printf( - user_thd, - Sql_condition::WARN_LEVEL_WARN, - ER_ALTER_INFO, - "Foreign key constraints for table '%s'" - " are loaded with charset check off", - user_table->name); - } - } - - DBUG_RETURN(err); -} - -/** Commit the changes made during prepare_inplace_alter_table() -and inplace_alter_table() inside the data dictionary tables, -when rebuilding the table. -@param ha_alter_info Data used during in-place alter -@param ctx In-place ALTER TABLE context -@param altered_table MySQL table that is being altered -@param old_table MySQL table as it is before the ALTER operation -@param trx Data dictionary transaction -@param table_name Table name in MySQL -@retval true Failure -@retval false Success -*/ -inline MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -commit_try_rebuild( -/*===============*/ - Alter_inplace_info* ha_alter_info, - ha_innobase_inplace_ctx*ctx, - TABLE* altered_table, - const TABLE* old_table, - trx_t* trx, - const char* table_name) -{ - dict_table_t* rebuilt_table = ctx->new_table; - dict_table_t* user_table = ctx->old_table; - - DBUG_ENTER("commit_try_rebuild"); - DBUG_ASSERT(ctx->need_rebuild()); - DBUG_ASSERT(trx->dict_operation_lock_mode == RW_X_LATCH); - DBUG_ASSERT(!(ha_alter_info->handler_flags - & Alter_inplace_info::DROP_FOREIGN_KEY) - || ctx->num_to_drop_fk > 0); - DBUG_ASSERT(ctx->num_to_drop_fk - == ha_alter_info->alter_info->drop_list.elements); - - for (dict_index_t* index = dict_table_get_first_index(rebuilt_table); - index; - index = dict_table_get_next_index(index)) { - DBUG_ASSERT(dict_index_get_online_status(index) - == ONLINE_INDEX_COMPLETE); - DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX); - if (dict_index_is_corrupted(index)) { - my_error(ER_INDEX_CORRUPT, MYF(0), - index->name); - DBUG_RETURN(true); - } - } - - if (innobase_update_foreign_try(ctx, trx, table_name)) { - DBUG_RETURN(true); - } - - dberr_t error; - - /* Clear the to_be_dropped flag in the data dictionary cache - of user_table. */ - for (ulint i = 0; i < ctx->num_to_drop_index; i++) { - dict_index_t* index = ctx->drop_index[i]; - DBUG_ASSERT(index->table == user_table); - DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX); - DBUG_ASSERT(index->to_be_dropped); - index->to_be_dropped = 0; - } - - /* We copied the table. Any indexes that were requested to be - dropped were not created in the copy of the table. Apply any - last bit of the rebuild log and then rename the tables. */ - - if (ctx->online) { - DEBUG_SYNC_C("row_log_table_apply2_before"); - error = row_log_table_apply( - ctx->thr, user_table, altered_table); - ulint err_key = thr_get_trx(ctx->thr)->error_key_num; - - switch (error) { - KEY* dup_key; - case DB_SUCCESS: - break; - case DB_DUPLICATE_KEY: - if (err_key == ULINT_UNDEFINED) { - /* This should be the hidden index on - FTS_DOC_ID. */ - dup_key = NULL; - } else { - DBUG_ASSERT(err_key < - ha_alter_info->key_count); - dup_key = &ha_alter_info - ->key_info_buffer[err_key]; - } - print_keydup_error(altered_table, dup_key, MYF(0)); - DBUG_RETURN(true); - case DB_ONLINE_LOG_TOO_BIG: - my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0), - get_error_key_name(err_key, ha_alter_info, - rebuilt_table)); - DBUG_RETURN(true); - case DB_INDEX_CORRUPT: - my_error(ER_INDEX_CORRUPT, MYF(0), - get_error_key_name(err_key, ha_alter_info, - rebuilt_table)); - DBUG_RETURN(true); - default: - my_error_innodb(error, table_name, user_table->flags); - DBUG_RETURN(true); - } - } - - if ((ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME) - && innobase_rename_columns_try(ha_alter_info, ctx, old_table, - trx, table_name)) { - DBUG_RETURN(true); - } - - DBUG_EXECUTE_IF("ib_ddl_crash_before_rename", DBUG_SUICIDE();); - - /* The new table must inherit the flag from the - "parent" table. */ - if (dict_table_is_discarded(user_table)) { - rebuilt_table->file_unreadable = true; - rebuilt_table->flags2 |= DICT_TF2_DISCARDED; - } - - /* We can now rename the old table as a temporary table, - rename the new temporary table as the old table and drop the - old table. First, we only do this in the data dictionary - tables. The actual renaming will be performed in - commit_cache_rebuild(), once the data dictionary transaction - has been successfully committed. */ - - error = row_merge_rename_tables_dict( - user_table, rebuilt_table, ctx->tmp_name, trx); - - /* We must be still holding a table handle. */ - DBUG_ASSERT(user_table->n_ref_count >= 1); - - DBUG_EXECUTE_IF("ib_ddl_crash_after_rename", DBUG_SUICIDE();); - DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;); - - if (user_table->n_ref_count > 1) { - /* This should only occur when an innodb_memcached - connection with innodb_api_enable_mdl=off was started - before commit_inplace_alter_table() locked the data - dictionary. We must roll back the ALTER TABLE, because - we cannot drop a table while it is being used. */ - - /* Normally, n_ref_count must be 1, because purge - cannot be executing on this very table as we are - holding dict_operation_lock X-latch. */ - - error = DB_LOCK_WAIT_TIMEOUT; - } - - switch (error) { - case DB_SUCCESS: - DBUG_RETURN(false); - case DB_TABLESPACE_EXISTS: - ut_a(rebuilt_table->n_ref_count == 1); - my_error(ER_TABLESPACE_EXISTS, MYF(0), ctx->tmp_name); - DBUG_RETURN(true); - case DB_DUPLICATE_KEY: - ut_a(rebuilt_table->n_ref_count == 1); - my_error(ER_TABLE_EXISTS_ERROR, MYF(0), ctx->tmp_name); - DBUG_RETURN(true); - default: - my_error_innodb(error, table_name, user_table->flags); - DBUG_RETURN(true); - } -} - -/** Apply the changes made during commit_try_rebuild(), -to the data dictionary cache and the file system. -@param ctx In-place ALTER TABLE context */ -inline MY_ATTRIBUTE((nonnull)) -void -commit_cache_rebuild( -/*=================*/ - ha_innobase_inplace_ctx* ctx) -{ - dberr_t error; - - DBUG_ENTER("commit_cache_rebuild"); - DBUG_ASSERT(ctx->need_rebuild()); - DBUG_ASSERT(dict_table_is_discarded(ctx->old_table) - == dict_table_is_discarded(ctx->new_table)); - - const char* old_name = mem_heap_strdup( - ctx->heap, ctx->old_table->name); - - /* We already committed and redo logged the renames, - so this must succeed. */ - error = dict_table_rename_in_cache( - ctx->old_table, ctx->tmp_name, FALSE); - ut_a(error == DB_SUCCESS); - - DEBUG_SYNC_C("commit_cache_rebuild_middle"); - - error = dict_table_rename_in_cache( - ctx->new_table, old_name, FALSE); - ut_a(error == DB_SUCCESS); - - DBUG_VOID_RETURN; -} - -/** Store the column number of the columns in a list belonging -to indexes which are not being dropped. -@param[in] ctx In-place ALTER TABLE context -@param[out] drop_col_list list which will be set, containing columns - which is part of index being dropped */ -static -void -get_col_list_to_be_dropped( - ha_innobase_inplace_ctx* ctx, - std::set<ulint>& drop_col_list) -{ - for (ulint index_count = 0; index_count < ctx->num_to_drop_index; - index_count++) { - dict_index_t* index = ctx->drop_index[index_count]; - - for (ulint col = 0; col < index->n_user_defined_cols; col++) { - ulint col_no = dict_index_get_nth_col_no(index, col); - drop_col_list.insert(col_no); - } - } -} - -/** For each column, which is part of an index which is not going to be -dropped, it checks if the column number of the column is same as col_no -argument passed. -@param[in] table table object -@param[in] col_no column number of the column which is to be checked -@retval true column exists -@retval false column does not exist. */ -static -bool -check_col_exists_in_indexes( - const dict_table_t* table, - ulint col_no) -{ - for (dict_index_t* index = dict_table_get_first_index(table); index; - index = dict_table_get_next_index(index)) { - - if (index->to_be_dropped) { - continue; - } - - for (ulint col = 0; col < index->n_user_defined_cols; col++) { - - ulint index_col_no = dict_index_get_nth_col_no( - index, col); - if (col_no == index_col_no) { - return(true); - } - } - } - - return(false); -} - -/** Commit the changes made during prepare_inplace_alter_table() -and inplace_alter_table() inside the data dictionary tables, -when not rebuilding the table. -@param ha_alter_info Data used during in-place alter -@param ctx In-place ALTER TABLE context -@param old_table MySQL table as it is before the ALTER operation -@param trx Data dictionary transaction -@param table_name Table name in MySQL -@retval true Failure -@retval false Success -*/ -inline MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -commit_try_norebuild( -/*=================*/ - Alter_inplace_info* ha_alter_info, - ha_innobase_inplace_ctx*ctx, - const TABLE* old_table, - trx_t* trx, - const char* table_name) -{ - DBUG_ENTER("commit_try_norebuild"); - DBUG_ASSERT(!ctx->need_rebuild()); - DBUG_ASSERT(trx->dict_operation_lock_mode == RW_X_LATCH); - DBUG_ASSERT(!(ha_alter_info->handler_flags - & Alter_inplace_info::DROP_FOREIGN_KEY) - || ctx->num_to_drop_fk > 0); - DBUG_ASSERT(ctx->num_to_drop_fk - == ha_alter_info->alter_info->drop_list.elements); - - for (ulint i = 0; i < ctx->num_to_add_index; i++) { - dict_index_t* index = ctx->add_index[i]; - DBUG_ASSERT(dict_index_get_online_status(index) - == ONLINE_INDEX_COMPLETE); - DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX); - if (dict_index_is_corrupted(index)) { - /* Report a duplicate key - error for the index that was - flagged corrupted, most likely - because a duplicate value was - inserted (directly or by - rollback) after - ha_innobase::inplace_alter_table() - completed. - TODO: report this as a corruption - with a detailed reason once - WL#6379 has been implemented. */ - my_error(ER_DUP_UNKNOWN_IN_INDEX, - MYF(0), index->name + 1); - DBUG_RETURN(true); - } - } - - if (innobase_update_foreign_try(ctx, trx, table_name)) { - DBUG_RETURN(true); - } - - dberr_t error; - - /* We altered the table in place. */ - /* Lose the TEMP_INDEX_PREFIX. */ - for (ulint i = 0; i < ctx->num_to_add_index; i++) { - dict_index_t* index = ctx->add_index[i]; - DBUG_ASSERT(dict_index_get_online_status(index) - == ONLINE_INDEX_COMPLETE); - DBUG_ASSERT(*index->name - == TEMP_INDEX_PREFIX); - error = row_merge_rename_index_to_add( - trx, ctx->new_table->id, index->id); - if (error != DB_SUCCESS) { - sql_print_error( - "InnoDB: rename index to add: %lu\n", - (ulong) error); - DBUG_ASSERT(0); - my_error(ER_INTERNAL_ERROR, MYF(0), - "rename index to add"); - DBUG_RETURN(true); - } - } - - /* Drop any indexes that were requested to be dropped. - Rename them to TEMP_INDEX_PREFIX in the data - dictionary first. We do not bother to rename - index->name in the dictionary cache, because the index - is about to be freed after row_merge_drop_indexes_dict(). */ - - for (ulint i = 0; i < ctx->num_to_drop_index; i++) { - dict_index_t* index = ctx->drop_index[i]; - DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX); - DBUG_ASSERT(index->table == ctx->new_table); - DBUG_ASSERT(index->to_be_dropped); - - error = row_merge_rename_index_to_drop( - trx, index->table->id, index->id); - if (error != DB_SUCCESS) { - sql_print_error( - "InnoDB: rename index to drop: %lu\n", - (ulong) error); - DBUG_ASSERT(0); - my_error(ER_INTERNAL_ERROR, MYF(0), - "rename index to drop"); - DBUG_RETURN(true); - } - } - - if (!(ha_alter_info->handler_flags - & Alter_inplace_info::ALTER_COLUMN_NAME)) { - DBUG_RETURN(false); - } - - DBUG_RETURN(innobase_rename_columns_try(ha_alter_info, ctx, - old_table, trx, table_name)); -} - -/** Commit the changes to the data dictionary cache -after a successful commit_try_norebuild() call. -@param ctx In-place ALTER TABLE context -@param table the TABLE before the ALTER -@param trx Data dictionary transaction object -(will be started and committed) -@return whether all replacements were found for dropped indexes */ -inline MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -commit_cache_norebuild( -/*===================*/ - ha_innobase_inplace_ctx*ctx, - const TABLE* table, - trx_t* trx) -{ - DBUG_ENTER("commit_cache_norebuild"); - - bool found = true; - - DBUG_ASSERT(!ctx->need_rebuild()); - - std::set<ulint> drop_list; - std::set<ulint>::const_iterator col_it; - - /* Check if the column, part of an index to be dropped is part of any - other index which is not being dropped. If it so, then set the ord_part - of the column to 0. */ - get_col_list_to_be_dropped(ctx, drop_list); - - for(col_it = drop_list.begin(); col_it != drop_list.end(); ++col_it) { - if (!check_col_exists_in_indexes(ctx->new_table, *col_it)) { - ctx->new_table->cols[*col_it].ord_part = 0; - } - } - - for (ulint i = 0; i < ctx->num_to_add_index; i++) { - dict_index_t* index = ctx->add_index[i]; - DBUG_ASSERT(dict_index_get_online_status(index) - == ONLINE_INDEX_COMPLETE); - DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX); - index->name++; - } - - if (ctx->num_to_drop_index) { - /* Really drop the indexes that were dropped. - The transaction had to be committed first - (after renaming the indexes), so that in the - event of a crash, crash recovery will drop the - indexes, because it drops all indexes whose - names start with TEMP_INDEX_PREFIX. Once we - have started dropping an index tree, there is - no way to roll it back. */ - - for (ulint i = 0; i < ctx->num_to_drop_index; i++) { - dict_index_t* index = ctx->drop_index[i]; - DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX); - DBUG_ASSERT(index->table == ctx->new_table); - DBUG_ASSERT(index->to_be_dropped); - - /* Replace the indexes in foreign key - constraints if needed. */ - - if (!dict_foreign_replace_index( - index->table, ctx->col_names, index)) { - found = false; - } - - /* Mark the index dropped - in the data dictionary cache. */ - rw_lock_x_lock(dict_index_get_lock(index)); - index->page = FIL_NULL; - rw_lock_x_unlock(dict_index_get_lock(index)); - } - - trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); - row_merge_drop_indexes_dict(trx, ctx->new_table->id); - - for (ulint i = 0; i < ctx->num_to_drop_index; i++) { - dict_index_t* index = ctx->drop_index[i]; - DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX); - DBUG_ASSERT(index->table == ctx->new_table); - - if (index->type & DICT_FTS) { - DBUG_ASSERT(index->type == DICT_FTS - || (index->type - & DICT_CORRUPT)); - DBUG_ASSERT(index->table->fts); - fts_drop_index(index->table, index, trx); - } - - dict_index_remove_from_cache(index->table, index); - } - - trx_commit_for_mysql(trx); - } - - DBUG_RETURN(found); -} - -/** Adjust the persistent statistics after non-rebuilding ALTER TABLE. -Remove statistics for dropped indexes, add statistics for created indexes -and rename statistics for renamed indexes. -@param ha_alter_info Data used during in-place alter -@param ctx In-place ALTER TABLE context -@param altered_table MySQL table that is being altered -@param table_name Table name in MySQL -@param thd MySQL connection -*/ -static -void -alter_stats_norebuild( -/*==================*/ - Alter_inplace_info* ha_alter_info, - ha_innobase_inplace_ctx* ctx, - TABLE* altered_table, - const char* table_name, - THD* thd) -{ - ulint i; - - DBUG_ENTER("alter_stats_norebuild"); - DBUG_ASSERT(!ctx->need_rebuild()); - - if (!dict_stats_is_persistent_enabled(ctx->new_table)) { - DBUG_VOID_RETURN; - } - - /* TODO: This will not drop the (unused) statistics for - FTS_DOC_ID_INDEX if it was a hidden index, dropped together - with the last renamining FULLTEXT index. */ - for (i = 0; i < ha_alter_info->index_drop_count; i++) { - const KEY* key = ha_alter_info->index_drop_buffer[i]; - - if (key->flags & HA_FULLTEXT) { - /* There are no index cardinality - statistics for FULLTEXT indexes. */ - continue; - } - - char errstr[1024]; - - if (dict_stats_drop_index( - ctx->new_table->name, key->name, - errstr, sizeof errstr) != DB_SUCCESS) { - push_warning(thd, - Sql_condition::WARN_LEVEL_WARN, - ER_LOCK_WAIT_TIMEOUT, errstr); - } - } - - for (i = 0; i < ctx->num_to_add_index; i++) { - dict_index_t* index = ctx->add_index[i]; - DBUG_ASSERT(index->table == ctx->new_table); - - if (!(index->type & DICT_FTS)) { - dict_stats_init(ctx->new_table); - dict_stats_update_for_index(index); - } - } - - DBUG_VOID_RETURN; -} - -/** Adjust the persistent statistics after rebuilding ALTER TABLE. -Remove statistics for dropped indexes, add statistics for created indexes -and rename statistics for renamed indexes. -@param table InnoDB table that was rebuilt by ALTER TABLE -@param table_name Table name in MySQL -@param thd MySQL connection -*/ -static -void -alter_stats_rebuild( -/*================*/ - dict_table_t* table, - const char* table_name, - THD* thd) -{ - DBUG_ENTER("alter_stats_rebuild"); - - if (dict_table_is_discarded(table) - || !dict_stats_is_persistent_enabled(table)) { - DBUG_VOID_RETURN; - } - - dberr_t ret; - - ret = dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT); - - if (ret != DB_SUCCESS) { - push_warning_printf( - thd, - Sql_condition::WARN_LEVEL_WARN, - ER_ALTER_INFO, - "Error updating stats for table '%s' " - "after table rebuild: %s", - table_name, ut_strerr(ret)); - } - - DBUG_VOID_RETURN; -} - -#ifndef DBUG_OFF -# define DBUG_INJECT_CRASH(prefix, count) \ -do { \ - char buf[32]; \ - ut_snprintf(buf, sizeof buf, prefix "_%u", count); \ - DBUG_EXECUTE_IF(buf, DBUG_SUICIDE();); \ -} while (0) -#else -# define DBUG_INJECT_CRASH(prefix, count) -#endif - -/** Commit or rollback the changes made during -prepare_inplace_alter_table() and inplace_alter_table() inside -the storage engine. Note that the allowed level of concurrency -during this operation will be the same as for -inplace_alter_table() and thus might be higher than during -prepare_inplace_alter_table(). (E.g concurrent writes were -blocked during prepare, but might not be during commit). -@param altered_table TABLE object for new version of table. -@param ha_alter_info Structure describing changes to be done -by ALTER TABLE and holding data used during in-place alter. -@param commit true => Commit, false => Rollback. -@retval true Failure -@retval false Success -*/ -UNIV_INTERN -bool -ha_innobase::commit_inplace_alter_table( -/*====================================*/ - TABLE* altered_table, - Alter_inplace_info* ha_alter_info, - bool commit) -{ - dberr_t error; - ha_innobase_inplace_ctx* ctx0 - = static_cast<ha_innobase_inplace_ctx*> - (ha_alter_info->handler_ctx); -#ifndef DBUG_OFF - uint crash_inject_count = 1; - uint crash_fail_inject_count = 1; - uint failure_inject_count = 1; -#endif - - DBUG_ENTER("commit_inplace_alter_table"); - DBUG_ASSERT(!srv_read_only_mode); - DBUG_ASSERT(!ctx0 || ctx0->prebuilt == prebuilt); - DBUG_ASSERT(!ctx0 || ctx0->old_table == prebuilt->table); - - DEBUG_SYNC_C("innodb_commit_inplace_alter_table_enter"); - - DEBUG_SYNC_C("innodb_commit_inplace_alter_table_wait"); - - if (!commit) { - /* A rollback is being requested. So far we may at - most have created some indexes. If any indexes were to - be dropped, they would actually be dropped in this - method if commit=true. */ - DBUG_RETURN(rollback_inplace_alter_table( - ha_alter_info, table, prebuilt)); - } - - if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) { - DBUG_ASSERT(!ctx0); - MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE); - ha_alter_info->group_commit_ctx = NULL; - DBUG_RETURN(false); - } - - DBUG_ASSERT(ctx0); - - inplace_alter_handler_ctx** ctx_array; - inplace_alter_handler_ctx* ctx_single[2]; - - if (ha_alter_info->group_commit_ctx) { - ctx_array = ha_alter_info->group_commit_ctx; - } else { - ctx_single[0] = ctx0; - ctx_single[1] = NULL; - ctx_array = ctx_single; - } - - DBUG_ASSERT(ctx0 == ctx_array[0]); - ut_ad(prebuilt->table == ctx0->old_table); - ha_alter_info->group_commit_ctx = NULL; - - /* Free the ctx->trx of other partitions, if any. We will only - use the ctx0->trx here. Others may have been allocated in - the prepare stage. */ - - for (inplace_alter_handler_ctx** pctx = &ctx_array[1]; *pctx; - pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*>(*pctx); - - if (ctx->trx) { - trx_free_for_mysql(ctx->trx); - ctx->trx = NULL; - } - } - - trx_start_if_not_started_xa(prebuilt->trx); - - for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*>(*pctx); - DBUG_ASSERT(ctx->prebuilt->trx == prebuilt->trx); - - /* If decryption failed for old table or new table - fail here. */ - if ((ctx->old_table->file_unreadable && - fil_space_get(ctx->old_table->space) != NULL)|| - (ctx->new_table->file_unreadable && - fil_space_get(ctx->new_table->space) != NULL)) { - String str; - const char* engine= table_type(); - get_error_message(HA_ERR_DECRYPTION_FAILED, &str); - my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine); - DBUG_RETURN(true); - } - - /* Exclusively lock the table, to ensure that no other - transaction is holding locks on the table while we - change the table definition. The MySQL meta-data lock - should normally guarantee that no conflicting locks - exist. However, FOREIGN KEY constraints checks and any - transactions collected during crash recovery could be - holding InnoDB locks only, not MySQL locks. */ - - error = row_merge_lock_table( - prebuilt->trx, ctx->old_table, LOCK_X); - - if (error != DB_SUCCESS) { - my_error_innodb( - error, table_share->table_name.str, 0); - DBUG_RETURN(true); - } - } - - DEBUG_SYNC(user_thd, "innodb_alter_commit_after_lock_table"); - - const bool new_clustered = ctx0->need_rebuild(); - trx_t* trx = ctx0->trx; - bool fail = false; - - if (new_clustered) { - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*>(*pctx); - DBUG_ASSERT(ctx->need_rebuild()); - - if (ctx->old_table->fts) { - ut_ad(!ctx->old_table->fts->add_wq); - fts_optimize_remove_table( - ctx->old_table); - } - - if (ctx->new_table->fts) { - ut_ad(!ctx->new_table->fts->add_wq); - fts_optimize_remove_table( - ctx->new_table); - } - } - } - - if (!trx) { - DBUG_ASSERT(!new_clustered); - trx = innobase_trx_allocate(user_thd); - } - - trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); - /* Latch the InnoDB data dictionary exclusively so that no deadlocks - or lock waits can happen in it during the data dictionary operation. */ - row_mysql_lock_data_dictionary(trx); - - /* Prevent the background statistics collection from accessing - the tables. */ - for (;;) { - bool retry = false; - - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*>(*pctx); - - DBUG_ASSERT(new_clustered == ctx->need_rebuild()); - - if (new_clustered - && !dict_stats_stop_bg(ctx->old_table)) { - retry = true; - } - - if (!dict_stats_stop_bg(ctx->new_table)) { - retry = true; - } - } - - if (!retry) { - break; - } - - DICT_STATS_BG_YIELD(trx); - } - - /* Apply the changes to the data dictionary tables, for all - partitions. */ - - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx && !fail; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*>(*pctx); - - DBUG_ASSERT(new_clustered == ctx->need_rebuild()); - - ctx->max_autoinc = commit_get_autoinc( - ha_alter_info, ctx, altered_table, table); - - if (ctx->need_rebuild()) { - ctx->tmp_name = dict_mem_create_temporary_tablename( - ctx->heap, ctx->new_table->name, - ctx->new_table->id); - - fail = commit_try_rebuild( - ha_alter_info, ctx, altered_table, table, - trx, table_share->table_name.str); - } else { - fail = commit_try_norebuild( - ha_alter_info, ctx, table, trx, - table_share->table_name.str); - } - DBUG_INJECT_CRASH("ib_commit_inplace_crash", - crash_inject_count++); -#ifndef DBUG_OFF - { - /* Generate a dynamic dbug text. */ - char buf[32]; - ut_snprintf(buf, sizeof buf, "ib_commit_inplace_fail_%u", - failure_inject_count++); - DBUG_EXECUTE_IF(buf, - my_error(ER_INTERNAL_ERROR, MYF(0), - "Injected error!"); - fail = true; - ); - } -#endif - } - - /* Commit or roll back the changes to the data dictionary. */ - - if (fail) { - trx_rollback_for_mysql(trx); - } else if (!new_clustered) { - trx_commit_for_mysql(trx); - } else { - mtr_t mtr; - mtr_start(&mtr); - - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*>(*pctx); - - DBUG_ASSERT(ctx->need_rebuild()); - /* Check for any possible problems for any - file operations that will be performed in - commit_cache_rebuild(), and if none, generate - the redo log for these operations. */ - error = fil_mtr_rename_log(ctx->old_table, - ctx->new_table, - ctx->tmp_name, &mtr); - if (error != DB_SUCCESS) { - /* Out of memory or a problem will occur - when renaming files. */ - fail = true; - my_error_innodb(error, ctx->old_table->name, - ctx->old_table->flags); - } - DBUG_INJECT_CRASH("ib_commit_inplace_crash", - crash_inject_count++); - } - - /* Test what happens on crash if the redo logs - are flushed to disk here. The log records - about the rename should not be committed, and - the data dictionary transaction should be - rolled back, restoring the old table. */ - DBUG_EXECUTE_IF("innodb_alter_commit_crash_before_commit", - log_buffer_flush_to_disk(); - DBUG_SUICIDE();); - ut_ad(!trx->fts_trx); - - if (fail) { - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - mtr_commit(&mtr); - trx_rollback_for_mysql(trx); - } else { - /* The following call commits the - mini-transaction, making the data dictionary - transaction committed at mtr.end_lsn. The - transaction becomes 'durable' by the time when - log_buffer_flush_to_disk() returns. In the - logical sense the commit in the file-based - data structures happens here. */ - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); - ut_ad(trx->insert_undo || trx->update_undo); - - trx_commit_low(trx, &mtr); - } - - /* If server crashes here, the dictionary in - InnoDB and MySQL will differ. The .ibd files - and the .frm files must be swapped manually by - the administrator. No loss of data. */ - DBUG_EXECUTE_IF("innodb_alter_commit_crash_after_commit", - log_buffer_flush_to_disk(); - DBUG_SUICIDE();); - } - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - /* At this point, the changes to the persistent storage have - been committed or rolled back. What remains to be done is to - update the in-memory structures, close some handles, release - temporary files, and (unless we rolled back) update persistent - statistics. */ - - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*>(*pctx); - - DBUG_ASSERT(ctx->need_rebuild() == new_clustered); - - if (new_clustered) { - innobase_online_rebuild_log_free(ctx->old_table); - } - - if (fail) { - if (new_clustered) { - dict_table_close(ctx->new_table, - TRUE, FALSE); - -#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG - /* Nobody should have initialized the - stats of the newly created table - yet. When this is the case, we know - that it has not been added for - background stats gathering. */ - ut_a(!ctx->new_table->stat_initialized); -#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */ - - trx_start_for_ddl(trx, TRX_DICT_OP_TABLE); - row_merge_drop_table(trx, ctx->new_table); - trx_commit_for_mysql(trx); - ctx->new_table = NULL; - } else { - /* We failed, but did not rebuild the table. - Roll back any ADD INDEX, or get rid of garbage - ADD INDEX that was left over from a previous - ALTER TABLE statement. */ - trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); - innobase_rollback_sec_index( - ctx->new_table, table, TRUE, trx); - trx_commit_for_mysql(trx); - } - DBUG_INJECT_CRASH("ib_commit_inplace_crash_fail", - crash_fail_inject_count++); - - continue; - } - - innobase_copy_frm_flags_from_table_share( - ctx->new_table, altered_table->s); - - if (new_clustered) { - /* We will reload and refresh the - in-memory foreign key constraint - metadata. This is a rename operation - in preparing for dropping the old - table. Set the table to_be_dropped bit - here, so to make sure DML foreign key - constraint check does not use the - stale dict_foreign_t. This is done - because WL#6049 (FK MDL) has not been - implemented yet. */ - ctx->old_table->to_be_dropped = true; - - /* Rename the tablespace files. */ - commit_cache_rebuild(ctx); - - error = innobase_update_foreign_cache(ctx, user_thd); - if (error != DB_SUCCESS) { - goto foreign_fail; - } - } else { - error = innobase_update_foreign_cache(ctx, user_thd); - - if (error != DB_SUCCESS) { -foreign_fail: - /* The data dictionary cache - should be corrupted now. The - best solution should be to - kill and restart the server, - but the *.frm file has not - been replaced yet. */ - my_error(ER_CANNOT_ADD_FOREIGN, - MYF(0)); - sql_print_error( - "InnoDB: dict_load_foreigns()" - " returned %u for %s", - (unsigned) error, - thd_query_string(user_thd) - ->str); - ut_ad(0); - } else { - if (!commit_cache_norebuild( - ctx, table, trx)) { - ut_a(!prebuilt->trx->check_foreigns); - } - - innobase_rename_columns_cache( - ha_alter_info, table, - ctx->new_table); - } - } - DBUG_INJECT_CRASH("ib_commit_inplace_crash", - crash_inject_count++); - } - - /* Invalidate the index translation table. In partitioned - tables, there is one TABLE_SHARE (and also only one TABLE) - covering all partitions. */ - share->idx_trans_tbl.index_count = 0; - - if (trx == ctx0->trx) { - ctx0->trx = NULL; - } - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - if (fail) { - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*> - (*pctx); - DBUG_ASSERT(ctx->need_rebuild() == new_clustered); - - ut_d(dict_table_check_for_dup_indexes( - ctx->old_table, - CHECK_ABORTED_OK)); - ut_a(fts_check_cached_index(ctx->old_table)); - DBUG_INJECT_CRASH("ib_commit_inplace_crash_fail", - crash_fail_inject_count++); - } - - row_mysql_unlock_data_dictionary(trx); - trx_free_for_mysql(trx); - DBUG_RETURN(true); - } - - /* Release the table locks. */ - trx_commit_for_mysql(prebuilt->trx); - - DBUG_EXECUTE_IF("ib_ddl_crash_after_user_trx_commit", DBUG_SUICIDE();); - - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*> - (*pctx); - DBUG_ASSERT(ctx->need_rebuild() == new_clustered); - - if (altered_table->found_next_number_field) { - dict_table_t* t = ctx->new_table; - - dict_table_autoinc_lock(t); - dict_table_autoinc_initialize(t, ctx->max_autoinc); - dict_table_autoinc_unlock(t); - } - - bool add_fts = false; - - /* Publish the created fulltext index, if any. - Note that a fulltext index can be created without - creating the clustered index, if there already exists - a suitable FTS_DOC_ID column. If not, one will be - created, implying new_clustered */ - for (ulint i = 0; i < ctx->num_to_add_index; i++) { - dict_index_t* index = ctx->add_index[i]; - - if (index->type & DICT_FTS) { - DBUG_ASSERT(index->type == DICT_FTS); - /* We reset DICT_TF2_FTS here because the bit - is left unset when a drop proceeds the add. */ - DICT_TF2_FLAG_SET(ctx->new_table, DICT_TF2_FTS); - fts_add_index(index, ctx->new_table); - add_fts = true; - } - } - - ut_d(dict_table_check_for_dup_indexes( - ctx->new_table, CHECK_ALL_COMPLETE)); - - if (add_fts) { - fts_optimize_add_table(ctx->new_table); - } - - ut_d(dict_table_check_for_dup_indexes( - ctx->new_table, CHECK_ABORTED_OK)); - ut_a(fts_check_cached_index(ctx->new_table)); - - if (new_clustered) { - /* Since the table has been rebuilt, we remove - all persistent statistics corresponding to the - old copy of the table (which was renamed to - ctx->tmp_name). */ - - char errstr[1024]; - - DBUG_ASSERT(0 == strcmp(ctx->old_table->name, - ctx->tmp_name)); - - if (dict_stats_drop_table( - ctx->new_table->name, - errstr, sizeof(errstr)) - != DB_SUCCESS) { - push_warning_printf( - user_thd, - Sql_condition::WARN_LEVEL_WARN, - ER_ALTER_INFO, - "Deleting persistent statistics" - " for rebuilt table '%s' in" - " InnoDB failed: %s", - table->s->table_name.str, - errstr); - } - - DBUG_EXECUTE_IF("ib_ddl_crash_before_commit", - DBUG_SUICIDE();); - - trx_t* const user_trx = prebuilt->trx; - - row_prebuilt_free(ctx->prebuilt, TRUE); - - /* Drop the copy of the old table, which was - renamed to ctx->tmp_name at the atomic DDL - transaction commit. If the system crashes - before this is completed, some orphan tables - with ctx->tmp_name may be recovered. */ - trx_start_for_ddl(trx, TRX_DICT_OP_TABLE); - row_merge_drop_table(trx, ctx->old_table); - trx_commit_for_mysql(trx); - - /* Rebuild the prebuilt object. */ - ctx->prebuilt = row_create_prebuilt( - ctx->new_table, altered_table->s->reclength); - trx_start_if_not_started(user_trx); - user_trx->will_lock++; - prebuilt->trx = user_trx; - } - DBUG_INJECT_CRASH("ib_commit_inplace_crash", - crash_inject_count++); - } - - row_mysql_unlock_data_dictionary(trx); - trx_free_for_mysql(trx); - - /* Rebuild index translation table now for temporary tables if we are - restoring secondary keys, as ha_innobase::open will not be called for - the next access. */ - if (dict_table_is_temporary(ctx0->new_table) - && ctx0->num_to_add_index > 0) { - ut_ad(!ctx0->num_to_drop_index); - ut_ad(!ctx0->num_to_drop_fk); - if (!innobase_build_index_translation(altered_table, - ctx0->new_table, - share)) { - MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE); - DBUG_RETURN(true); - } - } - - /* TODO: The following code could be executed - while allowing concurrent access to the table - (MDL downgrade). */ - - if (new_clustered) { - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*> - (*pctx); - DBUG_ASSERT(ctx->need_rebuild()); - - alter_stats_rebuild( - ctx->new_table, table->s->table_name.str, - user_thd); - DBUG_INJECT_CRASH("ib_commit_inplace_crash", - crash_inject_count++); - } - } else { - for (inplace_alter_handler_ctx** pctx = ctx_array; - *pctx; pctx++) { - ha_innobase_inplace_ctx* ctx - = static_cast<ha_innobase_inplace_ctx*> - (*pctx); - DBUG_ASSERT(!ctx->need_rebuild()); - - alter_stats_norebuild( - ha_alter_info, ctx, altered_table, - table->s->table_name.str, user_thd); - DBUG_INJECT_CRASH("ib_commit_inplace_crash", - crash_inject_count++); - } - } - - /* TODO: Also perform DROP TABLE and DROP INDEX after - the MDL downgrade. */ - -#ifndef DBUG_OFF - dict_index_t* clust_index = dict_table_get_first_index( - prebuilt->table); - DBUG_ASSERT(!clust_index->online_log); - DBUG_ASSERT(dict_index_get_online_status(clust_index) - == ONLINE_INDEX_COMPLETE); - - for (dict_index_t* index = dict_table_get_first_index( - prebuilt->table); - index; - index = dict_table_get_next_index(index)) { - DBUG_ASSERT(!index->to_be_dropped); - } -#endif /* DBUG_OFF */ - - MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE); - DBUG_RETURN(false); -} - -/** -@param thd - the session -@param start_value - the lower bound -@param max_value - the upper bound (inclusive) */ -UNIV_INTERN -ib_sequence_t::ib_sequence_t( - THD* thd, - ulonglong start_value, - ulonglong max_value) - : - m_max_value(max_value), - m_increment(0), - m_offset(0), - m_next_value(start_value), - m_eof(false) -{ - if (thd != 0 && m_max_value > 0) { - - thd_get_autoinc(thd, &m_offset, &m_increment); - - if (m_increment > 1 || m_offset > 1) { - - /* If there is an offset or increment specified - then we need to work out the exact next value. */ - - m_next_value = innobase_next_autoinc( - start_value, 1, - m_increment, m_offset, m_max_value); - - } else if (start_value == 0) { - /* The next value can never be 0. */ - m_next_value = 1; - } - } else { - m_eof = true; - } -} - -/** -Postfix increment -@return the next value to insert */ -UNIV_INTERN -ulonglong -ib_sequence_t::operator++(int) UNIV_NOTHROW -{ - ulonglong current = m_next_value; - - ut_ad(!m_eof); - ut_ad(m_max_value > 0); - - m_next_value = innobase_next_autoinc( - current, 1, m_increment, m_offset, m_max_value); - - if (m_next_value == m_max_value && current == m_next_value) { - m_eof = true; - } - - return(current); -} diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc deleted file mode 100644 index 9cef04c4244..00000000000 --- a/storage/xtradb/handler/i_s.cc +++ /dev/null @@ -1,9647 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file handler/i_s.cc -InnoDB INFORMATION SCHEMA tables interface to MySQL. - -Created July 18, 2007 Vasil Dimov -Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits) -*******************************************************/ -#include "univ.i" -#include <my_global.h> -#ifndef MYSQL_SERVER -#define MYSQL_SERVER /* For Item_* classes */ -#include <item.h> -/* Prevent influence of this definition to other headers */ -#undef MYSQL_SERVER -#else -#include <mysql_priv.h> -#endif //MYSQL_SERVER - -#include <ctype.h> /*toupper*/ -#include <mysqld_error.h> -#include <sql_acl.h> - -#include <m_ctype.h> -#include <hash.h> -#include <myisampack.h> -#include <mysys_err.h> -#include <my_sys.h> -#include "i_s.h" -#include <sql_plugin.h> -#include <innodb_priv.h> - -#include "btr0pcur.h" -#include "btr0types.h" -#include "dict0dict.h" -#include "dict0load.h" -#include "buf0buddy.h" -#include "buf0buf.h" -#include "ibuf0ibuf.h" -#include "dict0mem.h" -#include "dict0types.h" -#include "ha_prototypes.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "trx0i_s.h" -#include "trx0trx.h" -#include "srv0mon.h" -#include "fut0fut.h" -#include "pars0pars.h" -#include "fts0types.h" -#include "fts0opt.h" -#include "fts0priv.h" -#include "log0online.h" -#include "btr0btr.h" -#include "page0zip.h" -#include "sync0arr.h" -#include "fil0fil.h" -#include "fil0crypt.h" - -/** structure associates a name string with a file page type and/or buffer -page state. */ -struct buf_page_desc_t{ - const char* type_str; /*!< String explain the page - type/state */ - ulint type_value; /*!< Page type or page state */ -}; - -/** Change buffer B-tree page */ -#define I_S_PAGE_TYPE_IBUF (FIL_PAGE_TYPE_LAST + 1) - -/** Any states greater than I_S_PAGE_TYPE_IBUF would be treated as -unknown. */ -#define I_S_PAGE_TYPE_UNKNOWN (I_S_PAGE_TYPE_IBUF + 1) - -/** We also define I_S_PAGE_TYPE_INDEX as the Index Page's position -in i_s_page_type[] array */ -#define I_S_PAGE_TYPE_INDEX 1 - -/** Name string for File Page Types */ -static buf_page_desc_t i_s_page_type[] = { - {"ALLOCATED", FIL_PAGE_TYPE_ALLOCATED}, - {"INDEX", FIL_PAGE_INDEX}, - {"UNDO_LOG", FIL_PAGE_UNDO_LOG}, - {"INODE", FIL_PAGE_INODE}, - {"IBUF_FREE_LIST", FIL_PAGE_IBUF_FREE_LIST}, - {"IBUF_BITMAP", FIL_PAGE_IBUF_BITMAP}, - {"SYSTEM", FIL_PAGE_TYPE_SYS}, - {"TRX_SYSTEM", FIL_PAGE_TYPE_TRX_SYS}, - {"FILE_SPACE_HEADER", FIL_PAGE_TYPE_FSP_HDR}, - {"EXTENT_DESCRIPTOR", FIL_PAGE_TYPE_XDES}, - {"BLOB", FIL_PAGE_TYPE_BLOB}, - {"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB}, - {"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2}, - {"IBUF_INDEX", I_S_PAGE_TYPE_IBUF}, - {"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED}, - {"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN} -}; - -/* Check if we can hold all page type in a 4 bit value */ -#if I_S_PAGE_TYPE_UNKNOWN > 1<<4 -# error "i_s_page_type[] is too large" -#endif - -/** This structure defines information we will fetch from pages -currently cached in the buffer pool. It will be used to populate -table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE */ -struct buf_page_info_t{ - ulint block_id; /*!< Buffer Pool block ID */ - unsigned space_id:32; /*!< Tablespace ID */ - unsigned page_num:32; /*!< Page number/offset */ - unsigned access_time:32; /*!< Time of first access */ - unsigned pool_id:MAX_BUFFER_POOLS_BITS; - /*!< Buffer Pool ID. Must be less than - MAX_BUFFER_POOLS */ - unsigned flush_type:2; /*!< Flush type */ - unsigned io_fix:2; /*!< type of pending I/O operation */ - unsigned fix_count:19; /*!< Count of how manyfold this block - is bufferfixed */ - unsigned hashed:1; /*!< Whether hash index has been - built on this page */ - unsigned is_old:1; /*!< TRUE if the block is in the old - blocks in buf_pool->LRU_old */ - unsigned freed_page_clock:31; /*!< the value of - buf_pool->freed_page_clock */ - unsigned zip_ssize:PAGE_ZIP_SSIZE_BITS; - /*!< Compressed page size */ - unsigned page_state:BUF_PAGE_STATE_BITS; /*!< Page state */ - unsigned page_type:4; /*!< Page type */ - unsigned num_recs:UNIV_PAGE_SIZE_SHIFT_MAX-2; - /*!< Number of records on Page */ - unsigned data_size:UNIV_PAGE_SIZE_SHIFT_MAX; - /*!< Sum of the sizes of the records */ - lsn_t newest_mod; /*!< Log sequence number of - the youngest modification */ - lsn_t oldest_mod; /*!< Log sequence number of - the oldest modification */ - index_id_t index_id; /*!< Index ID if a index page */ -}; - -/* -Use the following types mapping: - -C type ST_FIELD_INFO::field_type ---------------------------------- -long MYSQL_TYPE_LONGLONG -(field_length=MY_INT64_NUM_DECIMAL_DIGITS) - -long unsigned MYSQL_TYPE_LONGLONG -(field_length=MY_INT64_NUM_DECIMAL_DIGITS, field_flags=MY_I_S_UNSIGNED) - -char* MYSQL_TYPE_STRING -(field_length=n) - -float MYSQL_TYPE_FLOAT -(field_length=0 is ignored) - -void* MYSQL_TYPE_LONGLONG -(field_length=MY_INT64_NUM_DECIMAL_DIGITS, field_flags=MY_I_S_UNSIGNED) - -boolean (if else) MYSQL_TYPE_LONG -(field_length=1) - -time_t MYSQL_TYPE_DATETIME -(field_length=0 ignored) ---------------------------------- -*/ - -/** Implemented on sync0arr.cc */ -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table. -Loop through each item on sync array, and extract the column -information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table. -@return 0 on success */ -UNIV_INTERN -int -sync_arr_fill_sys_semphore_waits_table( -/*===================================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ); /*!< in: condition (not used) */ - -/*******************************************************************//** -Common function to fill any of the dynamic tables: -INFORMATION_SCHEMA.innodb_trx -INFORMATION_SCHEMA.innodb_locks -INFORMATION_SCHEMA.innodb_lock_waits -@return 0 on success */ -static -int -trx_i_s_common_fill_table( -/*======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ); /*!< in: condition (not used) */ - -/*******************************************************************//** -Unbind a dynamic INFORMATION_SCHEMA table. -@return 0 on success */ -static -int -i_s_common_deinit( -/*==============*/ - void* p); /*!< in/out: table schema object */ -/*******************************************************************//** -Auxiliary function to store time_t value in MYSQL_TYPE_DATETIME -field. -@return 0 on success */ -static -int -field_store_time_t( -/*===============*/ - Field* field, /*!< in/out: target field for storage */ - time_t time) /*!< in: value to store */ -{ - MYSQL_TIME my_time; - struct tm tm_time; - - if (time) { -#if 0 - /* use this if you are sure that `variables' and `time_zone' - are always initialized */ - thd->variables.time_zone->gmt_sec_to_TIME( - &my_time, (my_time_t) time); -#else - localtime_r(&time, &tm_time); - localtime_to_TIME(&my_time, &tm_time); - my_time.time_type = MYSQL_TIMESTAMP_DATETIME; -#endif - } else { - memset(&my_time, 0, sizeof(my_time)); - } - - return(field->store_time(&my_time)); -} - -/*******************************************************************//** -Auxiliary function to store char* value in MYSQL_TYPE_STRING field. -@return 0 on success */ -int -field_store_string( -/*===============*/ - Field* field, /*!< in/out: target field for storage */ - const char* str) /*!< in: NUL-terminated utf-8 string, - or NULL */ -{ - int ret; - - if (str != NULL) { - - ret = field->store(str, static_cast<uint>(strlen(str)), - system_charset_info); - field->set_notnull(); - } else { - - ret = 0; /* success */ - field->set_null(); - } - - return(ret); -} - -/*******************************************************************//** -Store the name of an index in a MYSQL_TYPE_VARCHAR field. -Handles the names of incomplete secondary indexes. -@return 0 on success */ -static -int -field_store_index_name( -/*===================*/ - Field* field, /*!< in/out: target field for - storage */ - const char* index_name) /*!< in: NUL-terminated utf-8 - index name, possibly starting with - TEMP_INDEX_PREFIX */ -{ - int ret; - - ut_ad(index_name != NULL); - ut_ad(field->real_type() == MYSQL_TYPE_VARCHAR); - - /* Since TEMP_INDEX_PREFIX is not a valid UTF8, we need to convert - it to something else. */ - if (index_name[0] == TEMP_INDEX_PREFIX) { - char buf[NAME_LEN + 1]; - buf[0] = '?'; - memcpy(buf + 1, index_name + 1, strlen(index_name)); - ret = field->store( - buf, static_cast<uint>(strlen(buf)), - system_charset_info); - } else { - ret = field->store( - index_name, static_cast<uint>(strlen(index_name)), - system_charset_info); - } - - field->set_notnull(); - - return(ret); -} - -/*******************************************************************//** -Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field. -If the value is ULINT_UNDEFINED then the field it set to NULL. -@return 0 on success */ -int -field_store_ulint( -/*==============*/ - Field* field, /*!< in/out: target field for storage */ - ulint n) /*!< in: value to store */ -{ - int ret; - - if (n != ULINT_UNDEFINED) { - - ret = field->store(static_cast<double>(n)); - field->set_notnull(); - } else { - - ret = 0; /* success */ - field->set_null(); - } - - return(ret); -} - -/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_trx */ -static ST_FIELD_INFO innodb_trx_fields_info[] = -{ -#define IDX_TRX_ID 0 - {STRUCT_FLD(field_name, "trx_id"), - STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_STATE 1 - {STRUCT_FLD(field_name, "trx_state"), - STRUCT_FLD(field_length, TRX_QUE_STATE_STR_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_STARTED 2 - {STRUCT_FLD(field_name, "trx_started"), - STRUCT_FLD(field_length, 0), - STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_REQUESTED_LOCK_ID 3 - {STRUCT_FLD(field_name, "trx_requested_lock_id"), - STRUCT_FLD(field_length, TRX_I_S_LOCK_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_WAIT_STARTED 4 - {STRUCT_FLD(field_name, "trx_wait_started"), - STRUCT_FLD(field_length, 0), - STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_WEIGHT 5 - {STRUCT_FLD(field_name, "trx_weight"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_MYSQL_THREAD_ID 6 - {STRUCT_FLD(field_name, "trx_mysql_thread_id"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_QUERY 7 - {STRUCT_FLD(field_name, "trx_query"), - STRUCT_FLD(field_length, TRX_I_S_TRX_QUERY_MAX_LEN), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_OPERATION_STATE 8 - {STRUCT_FLD(field_name, "trx_operation_state"), - STRUCT_FLD(field_length, TRX_I_S_TRX_OP_STATE_MAX_LEN), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_TABLES_IN_USE 9 - {STRUCT_FLD(field_name, "trx_tables_in_use"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_TABLES_LOCKED 10 - {STRUCT_FLD(field_name, "trx_tables_locked"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_LOCK_STRUCTS 11 - {STRUCT_FLD(field_name, "trx_lock_structs"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_LOCK_MEMORY_BYTES 12 - {STRUCT_FLD(field_name, "trx_lock_memory_bytes"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_ROWS_LOCKED 13 - {STRUCT_FLD(field_name, "trx_rows_locked"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_ROWS_MODIFIED 14 - {STRUCT_FLD(field_name, "trx_rows_modified"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_CONNCURRENCY_TICKETS 15 - {STRUCT_FLD(field_name, "trx_concurrency_tickets"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_ISOLATION_LEVEL 16 - {STRUCT_FLD(field_name, "trx_isolation_level"), - STRUCT_FLD(field_length, TRX_I_S_TRX_ISOLATION_LEVEL_MAX_LEN), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_UNIQUE_CHECKS 17 - {STRUCT_FLD(field_name, "trx_unique_checks"), - STRUCT_FLD(field_length, 1), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 1), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_FOREIGN_KEY_CHECKS 18 - {STRUCT_FLD(field_name, "trx_foreign_key_checks"), - STRUCT_FLD(field_length, 1), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 1), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_LAST_FOREIGN_KEY_ERROR 19 - {STRUCT_FLD(field_name, "trx_last_foreign_key_error"), - STRUCT_FLD(field_length, TRX_I_S_TRX_FK_ERROR_MAX_LEN), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_ADAPTIVE_HASH_LATCHED 20 - {STRUCT_FLD(field_name, "trx_adaptive_hash_latched"), - STRUCT_FLD(field_length, 1), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_ADAPTIVE_HASH_TIMEOUT 21 - {STRUCT_FLD(field_name, "trx_adaptive_hash_timeout"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_READ_ONLY 22 - {STRUCT_FLD(field_name, "trx_is_read_only"), - STRUCT_FLD(field_length, 1), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TRX_AUTOCOMMIT_NON_LOCKING 23 - {STRUCT_FLD(field_name, "trx_autocommit_non_locking"), - STRUCT_FLD(field_length, 1), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_trx -table with it. -@return 0 on success */ -static -int -fill_innodb_trx_from_cache( -/*=======================*/ - trx_i_s_cache_t* cache, /*!< in: cache to read from */ - THD* thd, /*!< in: used to call - schema_table_store_record() */ - TABLE* table) /*!< in/out: fill this table */ -{ - Field** fields; - ulint rows_num; - char lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1]; - ulint i; - - DBUG_ENTER("fill_innodb_trx_from_cache"); - - fields = table->field; - - rows_num = trx_i_s_cache_get_rows_used(cache, - I_S_INNODB_TRX); - - for (i = 0; i < rows_num; i++) { - - i_s_trx_row_t* row; - char trx_id[TRX_ID_MAX_LEN + 1]; - - row = (i_s_trx_row_t*) - trx_i_s_cache_get_nth_row( - cache, I_S_INNODB_TRX, i); - - /* trx_id */ - ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, row->trx_id); - OK(field_store_string(fields[IDX_TRX_ID], trx_id)); - - /* trx_state */ - OK(field_store_string(fields[IDX_TRX_STATE], - row->trx_state)); - - /* trx_started */ - OK(field_store_time_t(fields[IDX_TRX_STARTED], - (time_t) row->trx_started)); - - /* trx_requested_lock_id */ - /* trx_wait_started */ - if (row->trx_wait_started != 0) { - - OK(field_store_string( - fields[IDX_TRX_REQUESTED_LOCK_ID], - trx_i_s_create_lock_id( - row->requested_lock_row, - lock_id, sizeof(lock_id)))); - /* field_store_string() sets it no notnull */ - - OK(field_store_time_t( - fields[IDX_TRX_WAIT_STARTED], - (time_t) row->trx_wait_started)); - fields[IDX_TRX_WAIT_STARTED]->set_notnull(); - } else { - - fields[IDX_TRX_REQUESTED_LOCK_ID]->set_null(); - fields[IDX_TRX_WAIT_STARTED]->set_null(); - } - - /* trx_weight */ - OK(fields[IDX_TRX_WEIGHT]->store((longlong) row->trx_weight, - true)); - - /* trx_mysql_thread_id */ - OK(fields[IDX_TRX_MYSQL_THREAD_ID]->store( - static_cast<double>(row->trx_mysql_thread_id))); - - /* trx_query */ - if (row->trx_query) { - /* store will do appropriate character set - conversion check */ - fields[IDX_TRX_QUERY]->store( - row->trx_query, - static_cast<uint>(strlen(row->trx_query)), - row->trx_query_cs); - fields[IDX_TRX_QUERY]->set_notnull(); - } else { - fields[IDX_TRX_QUERY]->set_null(); - } - - /* trx_operation_state */ - OK(field_store_string(fields[IDX_TRX_OPERATION_STATE], - row->trx_operation_state)); - - /* trx_tables_in_use */ - OK(fields[IDX_TRX_TABLES_IN_USE]->store( - (longlong) row->trx_tables_in_use, true)); - - /* trx_tables_locked */ - OK(fields[IDX_TRX_TABLES_LOCKED]->store( - (longlong) row->trx_tables_locked, true)); - - /* trx_lock_structs */ - OK(fields[IDX_TRX_LOCK_STRUCTS]->store( - (longlong) row->trx_lock_structs, true)); - - /* trx_lock_memory_bytes */ - OK(fields[IDX_TRX_LOCK_MEMORY_BYTES]->store( - (longlong) row->trx_lock_memory_bytes, true)); - - /* trx_rows_locked */ - OK(fields[IDX_TRX_ROWS_LOCKED]->store( - (longlong) row->trx_rows_locked, true)); - - /* trx_rows_modified */ - OK(fields[IDX_TRX_ROWS_MODIFIED]->store( - (longlong) row->trx_rows_modified, true)); - - /* trx_concurrency_tickets */ - OK(fields[IDX_TRX_CONNCURRENCY_TICKETS]->store( - (longlong) row->trx_concurrency_tickets, true)); - - /* trx_isolation_level */ - OK(field_store_string(fields[IDX_TRX_ISOLATION_LEVEL], - row->trx_isolation_level)); - - /* trx_unique_checks */ - OK(fields[IDX_TRX_UNIQUE_CHECKS]->store( - static_cast<double>(row->trx_unique_checks))); - - /* trx_foreign_key_checks */ - OK(fields[IDX_TRX_FOREIGN_KEY_CHECKS]->store( - static_cast<double>(row->trx_foreign_key_checks))); - - /* trx_last_foreign_key_error */ - OK(field_store_string(fields[IDX_TRX_LAST_FOREIGN_KEY_ERROR], - row->trx_foreign_key_error)); - - /* trx_adaptive_hash_latched */ - OK(fields[IDX_TRX_ADAPTIVE_HASH_LATCHED]->store( - static_cast<double>(row->trx_has_search_latch))); - - /* trx_adaptive_hash_timeout */ - OK(fields[IDX_TRX_ADAPTIVE_HASH_TIMEOUT]->store( - (longlong) row->trx_search_latch_timeout, true)); - - /* trx_is_read_only*/ - OK(fields[IDX_TRX_READ_ONLY]->store( - (longlong) row->trx_is_read_only, true)); - - /* trx_is_autocommit_non_locking */ - OK(fields[IDX_TRX_AUTOCOMMIT_NON_LOCKING]->store( - (longlong) row->trx_is_autocommit_non_locking, - true)); - - OK(schema_table_store_record(thd, table)); - } - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_trx -@return 0 on success */ -static -int -innodb_trx_init( -/*============*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_trx_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_trx_fields_info; - schema->fill_table = trx_i_s_common_fill_table; - - DBUG_RETURN(0); -} - -static struct st_mysql_information_schema i_s_info = -{ - MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION -}; - -UNIV_INTERN struct st_maria_plugin i_s_innodb_trx = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_TRX"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB transactions"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_trx_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_locks */ -static ST_FIELD_INFO innodb_locks_fields_info[] = -{ -#define IDX_LOCK_ID 0 - {STRUCT_FLD(field_name, "lock_id"), - STRUCT_FLD(field_length, TRX_I_S_LOCK_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_TRX_ID 1 - {STRUCT_FLD(field_name, "lock_trx_id"), - STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_MODE 2 - {STRUCT_FLD(field_name, "lock_mode"), - /* S[,GAP] X[,GAP] IS[,GAP] IX[,GAP] AUTO_INC UNKNOWN */ - STRUCT_FLD(field_length, 32), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_TYPE 3 - {STRUCT_FLD(field_name, "lock_type"), - STRUCT_FLD(field_length, 32 /* RECORD|TABLE|UNKNOWN */), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_TABLE 4 - {STRUCT_FLD(field_name, "lock_table"), - STRUCT_FLD(field_length, 1024), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_INDEX 5 - {STRUCT_FLD(field_name, "lock_index"), - STRUCT_FLD(field_length, 1024), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_SPACE 6 - {STRUCT_FLD(field_name, "lock_space"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_PAGE 7 - {STRUCT_FLD(field_name, "lock_page"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_REC 8 - {STRUCT_FLD(field_name, "lock_rec"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_LOCK_DATA 9 - {STRUCT_FLD(field_name, "lock_data"), - STRUCT_FLD(field_length, TRX_I_S_LOCK_DATA_MAX_LEN), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_locks -table with it. -@return 0 on success */ -static -int -fill_innodb_locks_from_cache( -/*=========================*/ - trx_i_s_cache_t* cache, /*!< in: cache to read from */ - THD* thd, /*!< in: MySQL client connection */ - TABLE* table) /*!< in/out: fill this table */ -{ - Field** fields; - ulint rows_num; - char lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1]; - ulint i; - - DBUG_ENTER("fill_innodb_locks_from_cache"); - - fields = table->field; - - rows_num = trx_i_s_cache_get_rows_used(cache, - I_S_INNODB_LOCKS); - - for (i = 0; i < rows_num; i++) { - - i_s_locks_row_t* row; - char buf[MAX_FULL_NAME_LEN + 1]; - const char* bufend; - - char lock_trx_id[TRX_ID_MAX_LEN + 1]; - - row = (i_s_locks_row_t*) - trx_i_s_cache_get_nth_row( - cache, I_S_INNODB_LOCKS, i); - - /* lock_id */ - trx_i_s_create_lock_id(row, lock_id, sizeof(lock_id)); - OK(field_store_string(fields[IDX_LOCK_ID], - lock_id)); - - /* lock_trx_id */ - ut_snprintf(lock_trx_id, sizeof(lock_trx_id), - TRX_ID_FMT, row->lock_trx_id); - OK(field_store_string(fields[IDX_LOCK_TRX_ID], lock_trx_id)); - - /* lock_mode */ - OK(field_store_string(fields[IDX_LOCK_MODE], - row->lock_mode)); - - /* lock_type */ - OK(field_store_string(fields[IDX_LOCK_TYPE], - row->lock_type)); - - /* lock_table */ - bufend = innobase_convert_name(buf, sizeof(buf), - row->lock_table, - strlen(row->lock_table), - thd, TRUE); - OK(fields[IDX_LOCK_TABLE]->store( - buf, static_cast<uint>(bufend - buf), - system_charset_info)); - - /* lock_index */ - if (row->lock_index != NULL) { - OK(field_store_index_name(fields[IDX_LOCK_INDEX], - row->lock_index)); - } else { - fields[IDX_LOCK_INDEX]->set_null(); - } - - /* lock_space */ - OK(field_store_ulint(fields[IDX_LOCK_SPACE], - row->lock_space)); - - /* lock_page */ - OK(field_store_ulint(fields[IDX_LOCK_PAGE], - row->lock_page)); - - /* lock_rec */ - OK(field_store_ulint(fields[IDX_LOCK_REC], - row->lock_rec)); - - /* lock_data */ - OK(field_store_string(fields[IDX_LOCK_DATA], - row->lock_data)); - - OK(schema_table_store_record(thd, table)); - } - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_locks -@return 0 on success */ -static -int -innodb_locks_init( -/*==============*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_locks_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_locks_fields_info; - schema->fill_table = trx_i_s_common_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_locks = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_LOCKS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB conflicting locks"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_locks_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_lock_waits */ -static ST_FIELD_INFO innodb_lock_waits_fields_info[] = -{ -#define IDX_REQUESTING_TRX_ID 0 - {STRUCT_FLD(field_name, "requesting_trx_id"), - STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_REQUESTED_LOCK_ID 1 - {STRUCT_FLD(field_name, "requested_lock_id"), - STRUCT_FLD(field_length, TRX_I_S_LOCK_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BLOCKING_TRX_ID 2 - {STRUCT_FLD(field_name, "blocking_trx_id"), - STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BLOCKING_LOCK_ID 3 - {STRUCT_FLD(field_name, "blocking_lock_id"), - STRUCT_FLD(field_length, TRX_I_S_LOCK_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Read data from cache buffer and fill the -INFORMATION_SCHEMA.innodb_lock_waits table with it. -@return 0 on success */ -static -int -fill_innodb_lock_waits_from_cache( -/*==============================*/ - trx_i_s_cache_t* cache, /*!< in: cache to read from */ - THD* thd, /*!< in: used to call - schema_table_store_record() */ - TABLE* table) /*!< in/out: fill this table */ -{ - Field** fields; - ulint rows_num; - char requested_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1]; - char blocking_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1]; - ulint i; - - DBUG_ENTER("fill_innodb_lock_waits_from_cache"); - - fields = table->field; - - rows_num = trx_i_s_cache_get_rows_used(cache, - I_S_INNODB_LOCK_WAITS); - - for (i = 0; i < rows_num; i++) { - - i_s_lock_waits_row_t* row; - - char requesting_trx_id[TRX_ID_MAX_LEN + 1]; - char blocking_trx_id[TRX_ID_MAX_LEN + 1]; - - row = (i_s_lock_waits_row_t*) - trx_i_s_cache_get_nth_row( - cache, I_S_INNODB_LOCK_WAITS, i); - - /* requesting_trx_id */ - ut_snprintf(requesting_trx_id, sizeof(requesting_trx_id), - TRX_ID_FMT, row->requested_lock_row->lock_trx_id); - OK(field_store_string(fields[IDX_REQUESTING_TRX_ID], - requesting_trx_id)); - - /* requested_lock_id */ - OK(field_store_string( - fields[IDX_REQUESTED_LOCK_ID], - trx_i_s_create_lock_id( - row->requested_lock_row, - requested_lock_id, - sizeof(requested_lock_id)))); - - /* blocking_trx_id */ - ut_snprintf(blocking_trx_id, sizeof(blocking_trx_id), - TRX_ID_FMT, row->blocking_lock_row->lock_trx_id); - OK(field_store_string(fields[IDX_BLOCKING_TRX_ID], - blocking_trx_id)); - - /* blocking_lock_id */ - OK(field_store_string( - fields[IDX_BLOCKING_LOCK_ID], - trx_i_s_create_lock_id( - row->blocking_lock_row, - blocking_lock_id, - sizeof(blocking_lock_id)))); - - OK(schema_table_store_record(thd, table)); - } - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_lock_waits -@return 0 on success */ -static -int -innodb_lock_waits_init( -/*===================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_lock_waits_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_lock_waits_fields_info; - schema->fill_table = trx_i_s_common_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_lock_waits = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_LOCK_WAITS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB which lock is blocking which"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_lock_waits_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/*******************************************************************//** -Common function to fill any of the dynamic tables: -INFORMATION_SCHEMA.innodb_trx -INFORMATION_SCHEMA.innodb_locks -INFORMATION_SCHEMA.innodb_lock_waits -@return 0 on success */ -static -int -trx_i_s_common_fill_table( -/*======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - const char* table_name; - int ret; - trx_i_s_cache_t* cache; - - DBUG_ENTER("trx_i_s_common_fill_table"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - - DBUG_RETURN(0); - } - - /* minimize the number of places where global variables are - referenced */ - cache = trx_i_s_cache; - - /* which table we have to fill? */ - table_name = tables->schema_table_name; - /* or table_name = tables->schema_table->table_name; */ - - RETURN_IF_INNODB_NOT_STARTED(table_name); - - /* update the cache */ - trx_i_s_cache_start_write(cache); - trx_i_s_possibly_fetch_data_into_cache(cache); - trx_i_s_cache_end_write(cache); - - if (trx_i_s_cache_is_truncated(cache)) { - - /* XXX show warning to user if possible */ - fprintf(stderr, "Warning: data in %s truncated due to " - "memory limit of %d bytes\n", table_name, - TRX_I_S_MEM_LIMIT); - } - - ret = 0; - - trx_i_s_cache_start_read(cache); - - if (innobase_strcasecmp(table_name, "innodb_trx") == 0) { - - if (fill_innodb_trx_from_cache( - cache, thd, tables->table) != 0) { - - ret = 1; - } - - } else if (innobase_strcasecmp(table_name, "innodb_locks") == 0) { - - if (fill_innodb_locks_from_cache( - cache, thd, tables->table) != 0) { - - ret = 1; - } - - } else if (innobase_strcasecmp(table_name, "innodb_lock_waits") == 0) { - - if (fill_innodb_lock_waits_from_cache( - cache, thd, tables->table) != 0) { - - ret = 1; - } - - } else { - - /* huh! what happened!? */ - fprintf(stderr, - "InnoDB: trx_i_s_common_fill_table() was " - "called to fill unknown table: %s.\n" - "This function only knows how to fill " - "innodb_trx, innodb_locks and " - "innodb_lock_waits tables.\n", table_name); - - ret = 1; - } - - trx_i_s_cache_end_read(cache); - -#if 0 - DBUG_RETURN(ret); -#else - /* if this function returns something else than 0 then a - deadlock occurs between the mysqld server and mysql client, - see http://bugs.mysql.com/29900 ; when that bug is resolved - we can enable the DBUG_RETURN(ret) above */ - ret++; // silence a gcc46 warning - DBUG_RETURN(0); -#endif -} - -/* Fields of the dynamic table information_schema.innodb_cmp. */ -static ST_FIELD_INFO i_s_cmp_fields_info[] = -{ - {STRUCT_FLD(field_name, "page_size"), - STRUCT_FLD(field_length, 5), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Compressed Page Size"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "compress_ops"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Total Number of Compressions"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "compress_ops_ok"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Total Number of" - " Successful Compressions"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "compress_time"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Total Duration of Compressions," - " in Seconds"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "uncompress_ops"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Total Number of Decompressions"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "uncompress_time"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Total Duration of Decompressions," - " in Seconds"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_cmp or -innodb_cmp_reset. -@return 0 on success, 1 on failure */ -static -int -i_s_cmp_fill_low( -/*=============*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* , /*!< in: condition (ignored) */ - ibool reset) /*!< in: TRUE=reset cumulated counts */ -{ - TABLE* table = (TABLE*) tables->table; - int status = 0; - - DBUG_ENTER("i_s_cmp_fill_low"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - - DBUG_RETURN(0); - } - - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - for (uint i = 0; i < PAGE_ZIP_SSIZE_MAX; i++) { - page_zip_stat_t* zip_stat = &page_zip_stat[i]; - - table->field[0]->store(UNIV_ZIP_SIZE_MIN << i); - - /* The cumulated counts are not protected by any - mutex. Thus, some operation in page0zip.cc could - increment a counter between the time we read it and - clear it. We could introduce mutex protection, but it - could cause a measureable performance hit in - page0zip.cc. */ - table->field[1]->store( - static_cast<double>(zip_stat->compressed)); - table->field[2]->store( - static_cast<double>(zip_stat->compressed_ok)); - table->field[3]->store( - static_cast<double>(zip_stat->compressed_usec / 1000000)); - table->field[4]->store( - static_cast<double>(zip_stat->decompressed)); - table->field[5]->store( - static_cast<double>(zip_stat->decompressed_usec / 1000000)); - - if (reset) { - memset(zip_stat, 0, sizeof *zip_stat); - } - - if (schema_table_store_record(thd, table)) { - status = 1; - break; - } - } - - DBUG_RETURN(status); -} - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_cmp. -@return 0 on success, 1 on failure */ -static -int -i_s_cmp_fill( -/*=========*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* cond) /*!< in: condition (ignored) */ -{ - return(i_s_cmp_fill_low(thd, tables, cond, FALSE)); -} - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_cmp_reset. -@return 0 on success, 1 on failure */ -static -int -i_s_cmp_reset_fill( -/*===============*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* cond) /*!< in: condition (ignored) */ -{ - return(i_s_cmp_fill_low(thd, tables, cond, TRUE)); -} - -/*******************************************************************//** -Bind the dynamic table information_schema.innodb_cmp. -@return 0 on success */ -static -int -i_s_cmp_init( -/*=========*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_cmp_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_cmp_fields_info; - schema->fill_table = i_s_cmp_fill; - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table information_schema.innodb_cmp_reset. -@return 0 on success */ -static -int -i_s_cmp_reset_init( -/*===============*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_cmp_reset_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_cmp_fields_info; - schema->fill_table = i_s_cmp_reset_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_cmp = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_CMP"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "Statistics for the InnoDB compression"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_cmp_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -UNIV_INTERN struct st_maria_plugin i_s_innodb_cmp_reset = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_CMP_RESET"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "Statistics for the InnoDB compression;" - " reset cumulated counts"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_cmp_reset_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/* Fields of the dynamic tables -information_schema.innodb_cmp_per_index and -information_schema.innodb_cmp_per_index_reset. */ -static ST_FIELD_INFO i_s_cmp_per_index_fields_info[] = -{ -#define IDX_DATABASE_NAME 0 - {STRUCT_FLD(field_name, "database_name"), - STRUCT_FLD(field_length, 192), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_TABLE_NAME 1 - {STRUCT_FLD(field_name, "table_name"), - STRUCT_FLD(field_length, 192), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_INDEX_NAME 2 - {STRUCT_FLD(field_name, "index_name"), - STRUCT_FLD(field_length, 192), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_COMPRESS_OPS 3 - {STRUCT_FLD(field_name, "compress_ops"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_COMPRESS_OPS_OK 4 - {STRUCT_FLD(field_name, "compress_ops_ok"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_COMPRESS_TIME 5 - {STRUCT_FLD(field_name, "compress_time"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_UNCOMPRESS_OPS 6 - {STRUCT_FLD(field_name, "uncompress_ops"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_UNCOMPRESS_TIME 7 - {STRUCT_FLD(field_name, "uncompress_time"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Fill the dynamic table -information_schema.innodb_cmp_per_index or -information_schema.innodb_cmp_per_index_reset. -@return 0 on success, 1 on failure */ -static -int -i_s_cmp_per_index_fill_low( -/*=======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* , /*!< in: condition (ignored) */ - ibool reset) /*!< in: TRUE=reset cumulated counts */ -{ - TABLE* table = tables->table; - Field** fields = table->field; - int status = 0; - - DBUG_ENTER("i_s_cmp_per_index_fill_low"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - - DBUG_RETURN(0); - } - - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* Create a snapshot of the stats so we do not bump into lock - order violations with dict_sys->mutex below. */ - mutex_enter(&page_zip_stat_per_index_mutex); - page_zip_stat_per_index_t snap (page_zip_stat_per_index); - mutex_exit(&page_zip_stat_per_index_mutex); - - mutex_enter(&dict_sys->mutex); - - page_zip_stat_per_index_t::iterator iter; - ulint i; - - for (iter = snap.begin(), i = 0; iter != snap.end(); iter++, i++) { - - char name[192]; - dict_index_t* index = dict_index_find_on_id_low(iter->first); - - if (index != NULL) { - char db_utf8[MAX_DB_UTF8_LEN]; - char table_utf8[MAX_TABLE_UTF8_LEN]; - - dict_fs2utf8(index->table_name, - db_utf8, sizeof(db_utf8), - table_utf8, sizeof(table_utf8)); - - field_store_string(fields[IDX_DATABASE_NAME], db_utf8); - field_store_string(fields[IDX_TABLE_NAME], table_utf8); - field_store_index_name(fields[IDX_INDEX_NAME], - index->name); - } else { - /* index not found */ - ut_snprintf(name, sizeof(name), - "index_id:" IB_ID_FMT, iter->first); - field_store_string(fields[IDX_DATABASE_NAME], - "unknown"); - field_store_string(fields[IDX_TABLE_NAME], - "unknown"); - field_store_string(fields[IDX_INDEX_NAME], - name); - } - - fields[IDX_COMPRESS_OPS]->store( - static_cast<double>(iter->second.compressed)); - - fields[IDX_COMPRESS_OPS_OK]->store( - static_cast<double>(iter->second.compressed_ok)); - - fields[IDX_COMPRESS_TIME]->store( - static_cast<double>(iter->second.compressed_usec / 1000000)); - - fields[IDX_UNCOMPRESS_OPS]->store( - static_cast<double>(iter->second.decompressed)); - - fields[IDX_UNCOMPRESS_TIME]->store( - static_cast<double>(iter->second.decompressed_usec / 1000000)); - - if (schema_table_store_record(thd, table)) { - status = 1; - break; - } - - /* Release and reacquire the dict mutex to allow other - threads to proceed. This could eventually result in the - contents of INFORMATION_SCHEMA.innodb_cmp_per_index being - inconsistent, but it is an acceptable compromise. */ - if (i % 1000 == 0) { - mutex_exit(&dict_sys->mutex); - mutex_enter(&dict_sys->mutex); - } - } - - mutex_exit(&dict_sys->mutex); - - if (reset) { - page_zip_reset_stat_per_index(); - } - - DBUG_RETURN(status); -} - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_cmp_per_index. -@return 0 on success, 1 on failure */ -static -int -i_s_cmp_per_index_fill( -/*===================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* cond) /*!< in: condition (ignored) */ -{ - return(i_s_cmp_per_index_fill_low(thd, tables, cond, FALSE)); -} - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_cmp_per_index_reset. -@return 0 on success, 1 on failure */ -static -int -i_s_cmp_per_index_reset_fill( -/*=========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* cond) /*!< in: condition (ignored) */ -{ - return(i_s_cmp_per_index_fill_low(thd, tables, cond, TRUE)); -} - -/*******************************************************************//** -Bind the dynamic table information_schema.innodb_cmp_per_index. -@return 0 on success */ -static -int -i_s_cmp_per_index_init( -/*===================*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_cmp_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_cmp_per_index_fields_info; - schema->fill_table = i_s_cmp_per_index_fill; - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table information_schema.innodb_cmp_per_index_reset. -@return 0 on success */ -static -int -i_s_cmp_per_index_reset_init( -/*=========================*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_cmp_reset_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_cmp_per_index_fields_info; - schema->fill_table = i_s_cmp_per_index_reset_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmp_per_index = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_CMP_PER_INDEX"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "Statistics for the InnoDB compression (per index)"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_cmp_per_index_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmp_per_index_reset = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_CMP_PER_INDEX_RESET"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "Statistics for the InnoDB compression (per index);" - " reset cumulated counts"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_cmp_per_index_reset_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/* Fields of the dynamic table information_schema.innodb_cmpmem. */ -static ST_FIELD_INFO i_s_cmpmem_fields_info[] = -{ - {STRUCT_FLD(field_name, "page_size"), - STRUCT_FLD(field_length, 5), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Buddy Block Size"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "buffer_pool_instance"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Buffer Pool Id"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "pages_used"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Currently in Use"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "pages_free"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Currently Available"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "relocation_ops"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Total Number of Relocations"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "relocation_time"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, "Total Duration of Relocations," - " in Seconds"), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_cmpmem or -innodb_cmpmem_reset. -@return 0 on success, 1 on failure */ -static -int -i_s_cmpmem_fill_low( -/*================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* , /*!< in: condition (ignored) */ - ibool reset) /*!< in: TRUE=reset cumulated counts */ -{ - int status = 0; - TABLE* table = (TABLE*) tables->table; - - DBUG_ENTER("i_s_cmpmem_fill_low"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - - DBUG_RETURN(0); - } - - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - status = 0; - - buf_pool = buf_pool_from_array(i); - - mutex_enter(&buf_pool->zip_free_mutex); - - for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) { - buf_buddy_stat_t* buddy_stat; - - buddy_stat = &buf_pool->buddy_stat[x]; - - table->field[0]->store(BUF_BUDDY_LOW << x); - table->field[1]->store(static_cast<double>(i)); - table->field[2]->store(static_cast<double>( - buddy_stat->used)); - table->field[3]->store(static_cast<double>( - (x < BUF_BUDDY_SIZES) - ? UT_LIST_GET_LEN(buf_pool->zip_free[x]) - : 0)); - table->field[4]->store( - (longlong) buddy_stat->relocated, true); - table->field[5]->store( - static_cast<double>(buddy_stat->relocated_usec / 1000000)); - - if (reset) { - /* This is protected by - buf_pool->zip_free_mutex. */ - buddy_stat->relocated = 0; - buddy_stat->relocated_usec = 0; - } - - if (schema_table_store_record(thd, table)) { - status = 1; - break; - } - } - - mutex_exit(&buf_pool->zip_free_mutex); - - if (status) { - break; - } - } - - DBUG_RETURN(status); -} - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_cmpmem. -@return 0 on success, 1 on failure */ -static -int -i_s_cmpmem_fill( -/*============*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* cond) /*!< in: condition (ignored) */ -{ - return(i_s_cmpmem_fill_low(thd, tables, cond, FALSE)); -} - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_cmpmem_reset. -@return 0 on success, 1 on failure */ -static -int -i_s_cmpmem_reset_fill( -/*==================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* cond) /*!< in: condition (ignored) */ -{ - return(i_s_cmpmem_fill_low(thd, tables, cond, TRUE)); -} - -/*******************************************************************//** -Bind the dynamic table information_schema.innodb_cmpmem. -@return 0 on success */ -static -int -i_s_cmpmem_init( -/*============*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_cmpmem_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_cmpmem_fields_info; - schema->fill_table = i_s_cmpmem_fill; - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table information_schema.innodb_cmpmem_reset. -@return 0 on success */ -static -int -i_s_cmpmem_reset_init( -/*==================*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_cmpmem_reset_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_cmpmem_fields_info; - schema->fill_table = i_s_cmpmem_reset_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_cmpmem = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_CMPMEM"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "Statistics for the InnoDB compressed buffer pool"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_cmpmem_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -UNIV_INTERN struct st_maria_plugin i_s_innodb_cmpmem_reset = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_CMPMEM_RESET"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "Statistics for the InnoDB compressed buffer pool;" - " reset cumulated counts"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_cmpmem_reset_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_metrics */ -static ST_FIELD_INFO innodb_metrics_fields_info[] = -{ -#define METRIC_NAME 0 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_SUBSYS 1 - {STRUCT_FLD(field_name, "SUBSYSTEM"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_VALUE_START 2 - {STRUCT_FLD(field_name, "COUNT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_MAX_VALUE_START 3 - {STRUCT_FLD(field_name, "MAX_COUNT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_MIN_VALUE_START 4 - {STRUCT_FLD(field_name, "MIN_COUNT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_AVG_VALUE_START 5 - {STRUCT_FLD(field_name, "AVG_COUNT"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_VALUE_RESET 6 - {STRUCT_FLD(field_name, "COUNT_RESET"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_MAX_VALUE_RESET 7 - {STRUCT_FLD(field_name, "MAX_COUNT_RESET"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_MIN_VALUE_RESET 8 - {STRUCT_FLD(field_name, "MIN_COUNT_RESET"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_AVG_VALUE_RESET 9 - {STRUCT_FLD(field_name, "AVG_COUNT_RESET"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_START_TIME 10 - {STRUCT_FLD(field_name, "TIME_ENABLED"), - STRUCT_FLD(field_length, 0), - STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_STOP_TIME 11 - {STRUCT_FLD(field_name, "TIME_DISABLED"), - STRUCT_FLD(field_length, 0), - STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_TIME_ELAPSED 12 - {STRUCT_FLD(field_name, "TIME_ELAPSED"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_RESET_TIME 13 - {STRUCT_FLD(field_name, "TIME_RESET"), - STRUCT_FLD(field_length, 0), - STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_STATUS 14 - {STRUCT_FLD(field_name, "STATUS"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_TYPE 15 - {STRUCT_FLD(field_name, "TYPE"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define METRIC_DESC 16 - {STRUCT_FLD(field_name, "COMMENT"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Fill the information schema metrics table. -@return 0 on success */ -static -int -i_s_metrics_fill( -/*=============*/ - THD* thd, /*!< in: thread */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - int count; - Field** fields; - double time_diff = 0; - monitor_info_t* monitor_info; - mon_type_t min_val; - mon_type_t max_val; - - DBUG_ENTER("i_s_metrics_fill"); - fields = table_to_fill->field; - - for (count = 0; count < NUM_MONITOR; count++) { - monitor_info = srv_mon_get_info((monitor_id_t) count); - - /* A good place to sanity check the Monitor ID */ - ut_a(count == monitor_info->monitor_id); - - /* If the item refers to a Module, nothing to fill, - continue. */ - if ((monitor_info->monitor_type & MONITOR_MODULE) - || (monitor_info->monitor_type & MONITOR_HIDDEN)) { - continue; - } - - /* If this is an existing "status variable", and - its corresponding counter is still on, we need - to calculate the result from its corresponding - counter. */ - if (monitor_info->monitor_type & MONITOR_EXISTING - && MONITOR_IS_ON(count)) { - srv_mon_process_existing_counter((monitor_id_t) count, - MONITOR_GET_VALUE); - } - - /* Fill in counter's basic information */ - OK(field_store_string(fields[METRIC_NAME], - monitor_info->monitor_name)); - - OK(field_store_string(fields[METRIC_SUBSYS], - monitor_info->monitor_module)); - - OK(field_store_string(fields[METRIC_DESC], - monitor_info->monitor_desc)); - - /* Fill in counter values */ - OK(fields[METRIC_VALUE_RESET]->store( - MONITOR_VALUE(count), FALSE)); - - OK(fields[METRIC_VALUE_START]->store( - MONITOR_VALUE_SINCE_START(count), FALSE)); - - /* If the max value is MAX_RESERVED, counter max - value has not been updated. Set the column value - to NULL. */ - if (MONITOR_MAX_VALUE(count) == MAX_RESERVED - || MONITOR_MAX_MIN_NOT_INIT(count)) { - fields[METRIC_MAX_VALUE_RESET]->set_null(); - } else { - OK(fields[METRIC_MAX_VALUE_RESET]->store( - MONITOR_MAX_VALUE(count), FALSE)); - fields[METRIC_MAX_VALUE_RESET]->set_notnull(); - } - - /* If the min value is MAX_RESERVED, counter min - value has not been updated. Set the column value - to NULL. */ - if (MONITOR_MIN_VALUE(count) == MIN_RESERVED - || MONITOR_MAX_MIN_NOT_INIT(count)) { - fields[METRIC_MIN_VALUE_RESET]->set_null(); - } else { - OK(fields[METRIC_MIN_VALUE_RESET]->store( - MONITOR_MIN_VALUE(count), FALSE)); - fields[METRIC_MIN_VALUE_RESET]->set_notnull(); - } - - /* Calculate the max value since counter started */ - max_val = srv_mon_calc_max_since_start((monitor_id_t) count); - - if (max_val == MAX_RESERVED - || MONITOR_MAX_MIN_NOT_INIT(count)) { - fields[METRIC_MAX_VALUE_START]->set_null(); - } else { - OK(fields[METRIC_MAX_VALUE_START]->store( - max_val, FALSE)); - fields[METRIC_MAX_VALUE_START]->set_notnull(); - } - - /* Calculate the min value since counter started */ - min_val = srv_mon_calc_min_since_start((monitor_id_t) count); - - if (min_val == MIN_RESERVED - || MONITOR_MAX_MIN_NOT_INIT(count)) { - fields[METRIC_MIN_VALUE_START]->set_null(); - } else { - OK(fields[METRIC_MIN_VALUE_START]->store( - min_val, FALSE)); - - fields[METRIC_MIN_VALUE_START]->set_notnull(); - } - - /* If monitor has been enabled (no matter it is disabled - or not now), fill METRIC_START_TIME and METRIC_TIME_ELAPSED - field */ - if (MONITOR_FIELD(count, mon_start_time)) { - OK(field_store_time_t(fields[METRIC_START_TIME], - (time_t)MONITOR_FIELD(count, mon_start_time))); - fields[METRIC_START_TIME]->set_notnull(); - - /* If monitor is enabled, the TIME_ELAPSED is the - time difference between current and time when monitor - is enabled. Otherwise, it is the time difference - between time when monitor is enabled and time - when it is disabled */ - if (MONITOR_IS_ON(count)) { - time_diff = difftime(time(NULL), - MONITOR_FIELD(count, mon_start_time)); - } else { - time_diff = difftime( - MONITOR_FIELD(count, mon_stop_time), - MONITOR_FIELD(count, mon_start_time)); - } - - OK(fields[METRIC_TIME_ELAPSED]->store( - time_diff)); - fields[METRIC_TIME_ELAPSED]->set_notnull(); - } else { - fields[METRIC_START_TIME]->set_null(); - fields[METRIC_TIME_ELAPSED]->set_null(); - time_diff = 0; - } - - /* Unless MONITOR__NO_AVERAGE is marked, we will need - to calculate the average value. If this is a monitor set - owner marked by MONITOR_SET_OWNER, divide - the value by another counter (number of calls) designated - by monitor_info->monitor_related_id. - Otherwise average the counter value by the time between the - time that the counter is enabled and time it is disabled - or time it is sampled. */ - if (!(monitor_info->monitor_type & MONITOR_NO_AVERAGE) - && (monitor_info->monitor_type & MONITOR_SET_OWNER) - && monitor_info->monitor_related_id) { - mon_type_t value_start - = MONITOR_VALUE_SINCE_START( - monitor_info->monitor_related_id); - - if (value_start) { - OK(fields[METRIC_AVG_VALUE_START]->store( - MONITOR_VALUE_SINCE_START(count) - / value_start, FALSE)); - - fields[METRIC_AVG_VALUE_START]->set_notnull(); - } else { - fields[METRIC_AVG_VALUE_START]->set_null(); - } - - if (MONITOR_VALUE(monitor_info->monitor_related_id)) { - OK(fields[METRIC_AVG_VALUE_RESET]->store( - MONITOR_VALUE(count) - / MONITOR_VALUE( - monitor_info->monitor_related_id), - FALSE)); - } else { - fields[METRIC_AVG_VALUE_RESET]->set_null(); - } - } else if (!(monitor_info->monitor_type & MONITOR_NO_AVERAGE) - && !(monitor_info->monitor_type - & MONITOR_DISPLAY_CURRENT)) { - if (time_diff) { - OK(fields[METRIC_AVG_VALUE_START]->store( - (double) MONITOR_VALUE_SINCE_START( - count) / time_diff)); - fields[METRIC_AVG_VALUE_START]->set_notnull(); - } else { - fields[METRIC_AVG_VALUE_START]->set_null(); - } - - if (MONITOR_FIELD(count, mon_reset_time)) { - /* calculate the time difference since last - reset */ - if (MONITOR_IS_ON(count)) { - time_diff = difftime( - time(NULL), MONITOR_FIELD( - count, mon_reset_time)); - } else { - time_diff = difftime( - MONITOR_FIELD(count, mon_stop_time), - MONITOR_FIELD(count, mon_reset_time)); - } - } else { - time_diff = 0; - } - - if (time_diff) { - OK(fields[METRIC_AVG_VALUE_RESET]->store( - static_cast<double>( - MONITOR_VALUE(count) / time_diff))); - fields[METRIC_AVG_VALUE_RESET]->set_notnull(); - } else { - fields[METRIC_AVG_VALUE_RESET]->set_null(); - } - } else { - fields[METRIC_AVG_VALUE_START]->set_null(); - fields[METRIC_AVG_VALUE_RESET]->set_null(); - } - - - if (MONITOR_IS_ON(count)) { - /* If monitor is on, the stop time will set to NULL */ - fields[METRIC_STOP_TIME]->set_null(); - - /* Display latest Monitor Reset Time only if Monitor - counter is on. */ - if (MONITOR_FIELD(count, mon_reset_time)) { - OK(field_store_time_t( - fields[METRIC_RESET_TIME], - (time_t)MONITOR_FIELD( - count, mon_reset_time))); - fields[METRIC_RESET_TIME]->set_notnull(); - } else { - fields[METRIC_RESET_TIME]->set_null(); - } - - /* Display the monitor status as "enabled" */ - OK(field_store_string(fields[METRIC_STATUS], - "enabled")); - } else { - if (MONITOR_FIELD(count, mon_stop_time)) { - OK(field_store_time_t(fields[METRIC_STOP_TIME], - (time_t)MONITOR_FIELD(count, mon_stop_time))); - fields[METRIC_STOP_TIME]->set_notnull(); - } else { - fields[METRIC_STOP_TIME]->set_null(); - } - - fields[METRIC_RESET_TIME]->set_null(); - - OK(field_store_string(fields[METRIC_STATUS], - "disabled")); - } - - if (monitor_info->monitor_type & MONITOR_DISPLAY_CURRENT) { - OK(field_store_string(fields[METRIC_TYPE], - "value")); - } else if (monitor_info->monitor_type & MONITOR_EXISTING) { - OK(field_store_string(fields[METRIC_TYPE], - "status_counter")); - } else if (monitor_info->monitor_type & MONITOR_SET_OWNER) { - OK(field_store_string(fields[METRIC_TYPE], - "set_owner")); - } else if ( monitor_info->monitor_type & MONITOR_SET_MEMBER) { - OK(field_store_string(fields[METRIC_TYPE], - "set_member")); - } else { - OK(field_store_string(fields[METRIC_TYPE], - "counter")); - } - - OK(schema_table_store_record(thd, table_to_fill)); - } - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Function to fill information schema metrics tables. -@return 0 on success */ -static -int -i_s_metrics_fill_table( -/*===================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - DBUG_ENTER("i_s_metrics_fill_table"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - i_s_metrics_fill(thd, tables->table); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_metrics -@return 0 on success */ -static -int -innodb_metrics_init( -/*================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_metrics_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_metrics_fields_info; - schema->fill_table = i_s_metrics_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_metrics = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_METRICS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB Metrics Info"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_metrics_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; -/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_ft_default_stopword */ -static ST_FIELD_INFO i_s_stopword_fields_info[] = -{ -#define STOPWORD_VALUE 0 - {STRUCT_FLD(field_name, "value"), - STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Fill the dynamic table information_schema.innodb_ft_default_stopword. -@return 0 on success, 1 on failure */ -static -int -i_s_stopword_fill( -/*==============*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - Field** fields; - ulint i = 0; - TABLE* table = (TABLE*) tables->table; - - DBUG_ENTER("i_s_stopword_fill"); - - fields = table->field; - - /* Fill with server default stopword list in array - fts_default_stopword */ - while (fts_default_stopword[i]) { - OK(field_store_string(fields[STOPWORD_VALUE], - fts_default_stopword[i])); - - OK(schema_table_store_record(thd, table)); - i++; - } - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table information_schema.innodb_ft_default_stopword. -@return 0 on success */ -static -int -i_s_stopword_init( -/*==============*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_stopword_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_stopword_fields_info; - schema->fill_table = i_s_stopword_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_default_stopword = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_FT_DEFAULT_STOPWORD"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "Default stopword list for InnoDB Full Text Search"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_stopword_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED -INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED */ -static ST_FIELD_INFO i_s_fts_doc_fields_info[] = -{ -#define I_S_FTS_DOC_ID 0 - {STRUCT_FLD(field_name, "DOC_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED or -INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED -@return 0 on success, 1 on failure */ -static -int -i_s_fts_deleted_generic_fill( -/*=========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - ibool being_deleted) /*!< in: BEING_DELTED table */ -{ - Field** fields; - TABLE* table = (TABLE*) tables->table; - trx_t* trx; - fts_table_t fts_table; - fts_doc_ids_t* deleted; - dict_table_t* user_table; - - DBUG_ENTER("i_s_fts_deleted_generic_fill"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - if (!fts_internal_tbl_name) { - DBUG_RETURN(0); - } - - /* Prevent DDL to drop fts aux tables. */ - rw_lock_s_lock(&dict_operation_lock); - - user_table = dict_table_open_on_name( - fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); - - if (!user_table) { - rw_lock_s_unlock(&dict_operation_lock); - - DBUG_RETURN(0); - } else if (!dict_table_has_fts_index(user_table)) { - dict_table_close(user_table, FALSE, FALSE); - - rw_lock_s_unlock(&dict_operation_lock); - - DBUG_RETURN(0); - } - - deleted = fts_doc_ids_create(); - - trx = trx_allocate_for_background(); - trx->op_info = "Select for FTS DELETE TABLE"; - - FTS_INIT_FTS_TABLE(&fts_table, - (being_deleted) ? "BEING_DELETED" : "DELETED", - FTS_COMMON_TABLE, user_table); - - fts_table_fetch_doc_ids(trx, &fts_table, deleted); - - fields = table->field; - - int ret = 0; - - for (ulint j = 0; j < ib_vector_size(deleted->doc_ids); ++j) { - doc_id_t doc_id; - - doc_id = *(doc_id_t*) ib_vector_get_const(deleted->doc_ids, j); - - BREAK_IF(ret = fields[I_S_FTS_DOC_ID]->store(doc_id, true)); - - BREAK_IF(ret = schema_table_store_record(thd, table)); - } - - trx_free_for_background(trx); - - fts_doc_ids_free(deleted); - - dict_table_close(user_table, FALSE, FALSE); - - rw_lock_s_unlock(&dict_operation_lock); - - DBUG_RETURN(ret); -} - -/*******************************************************************//** -Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED -@return 0 on success, 1 on failure */ -static -int -i_s_fts_deleted_fill( -/*=================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (ignored) */ -{ - DBUG_ENTER("i_s_fts_deleted_fill"); - - DBUG_RETURN(i_s_fts_deleted_generic_fill(thd, tables, FALSE)); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED -@return 0 on success */ -static -int -i_s_fts_deleted_init( -/*=================*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_fts_deleted_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_fts_doc_fields_info; - schema->fill_table = i_s_fts_deleted_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_deleted = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_FT_DELETED"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "INNODB AUXILIARY FTS DELETED TABLE"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_fts_deleted_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/*******************************************************************//** -Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED -@return 0 on success, 1 on failure */ -static -int -i_s_fts_being_deleted_fill( -/*=======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (ignored) */ -{ - DBUG_ENTER("i_s_fts_being_deleted_fill"); - - DBUG_RETURN(i_s_fts_deleted_generic_fill(thd, tables, TRUE)); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED -@return 0 on success */ -static -int -i_s_fts_being_deleted_init( -/*=======================*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_fts_deleted_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_fts_doc_fields_info; - schema->fill_table = i_s_fts_being_deleted_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_being_deleted = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_FT_BEING_DELETED"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "INNODB AUXILIARY FTS BEING DELETED TABLE"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_fts_being_deleted_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED and -INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE */ -static ST_FIELD_INFO i_s_fts_index_fields_info[] = -{ -#define I_S_FTS_WORD 0 - {STRUCT_FLD(field_name, "WORD"), - STRUCT_FLD(field_length, FTS_MAX_WORD_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define I_S_FTS_FIRST_DOC_ID 1 - {STRUCT_FLD(field_name, "FIRST_DOC_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define I_S_FTS_LAST_DOC_ID 2 - {STRUCT_FLD(field_name, "LAST_DOC_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define I_S_FTS_DOC_COUNT 3 - {STRUCT_FLD(field_name, "DOC_COUNT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define I_S_FTS_ILIST_DOC_ID 4 - {STRUCT_FLD(field_name, "DOC_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define I_S_FTS_ILIST_DOC_POS 5 - {STRUCT_FLD(field_name, "POSITION"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Go through the Doc Node and its ilist, fill the dynamic table -INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED for one FTS index on the table. -@return 0 on success, 1 on failure */ -static -int -i_s_fts_index_cache_fill_one_index( -/*===============================*/ - fts_index_cache_t* index_cache, /*!< in: FTS index cache */ - THD* thd, /*!< in: thread */ - fts_string_t* conv_str, /*!< in/out: buffer */ - TABLE_LIST* tables) /*!< in/out: tables to fill */ -{ - TABLE* table = (TABLE*) tables->table; - Field** fields; - CHARSET_INFO* index_charset; - const ib_rbt_node_t* rbt_node; - uint dummy_errors; - char* word_str; - - DBUG_ENTER("i_s_fts_index_cache_fill_one_index"); - - fields = table->field; - - index_charset = index_cache->charset; - conv_str->f_n_char = 0; - - int ret = 0; - - /* Go through each word in the index cache */ - for (rbt_node = rbt_first(index_cache->words); - rbt_node; - rbt_node = rbt_next(index_cache->words, rbt_node)) { - fts_tokenizer_word_t* word; - - word = rbt_value(fts_tokenizer_word_t, rbt_node); - - /* Convert word from index charset to system_charset_info */ - if (index_charset->cset != system_charset_info->cset) { - conv_str->f_n_char = my_convert( - reinterpret_cast<char*>(conv_str->f_str), - static_cast<uint32>(conv_str->f_len), - system_charset_info, - reinterpret_cast<char*>(word->text.f_str), - static_cast<uint32>(word->text.f_len), - index_charset, &dummy_errors); - ut_ad(conv_str->f_n_char <= conv_str->f_len); - conv_str->f_str[conv_str->f_n_char] = 0; - word_str = reinterpret_cast<char*>(conv_str->f_str); - } else { - word_str = reinterpret_cast<char*>(word->text.f_str); - } - - /* Decrypt the ilist, and display Dod ID and word position */ - for (ulint i = 0; i < ib_vector_size(word->nodes); i++) { - fts_node_t* node; - byte* ptr; - ulint decoded = 0; - doc_id_t doc_id = 0; - - node = static_cast<fts_node_t*> (ib_vector_get( - word->nodes, i)); - - ptr = node->ilist; - - while (decoded < node->ilist_size) { - ulint pos = fts_decode_vlc(&ptr); - - doc_id += pos; - - /* Get position info */ - while (*ptr) { - pos = fts_decode_vlc(&ptr); - - OK(field_store_string( - fields[I_S_FTS_WORD], - word_str)); - - OK(fields[I_S_FTS_FIRST_DOC_ID]->store( - (longlong) node->first_doc_id, - true)); - - OK(fields[I_S_FTS_LAST_DOC_ID]->store( - (longlong) node->last_doc_id, - true)); - - OK(fields[I_S_FTS_DOC_COUNT]->store( - static_cast<double>(node->doc_count))); - - OK(fields[I_S_FTS_ILIST_DOC_ID]->store( - (longlong) doc_id, true)); - - OK(fields[I_S_FTS_ILIST_DOC_POS]->store( - static_cast<double>(pos))); - - OK(schema_table_store_record( - thd, table)); - } - - ++ptr; - - decoded = ptr - (byte*) node->ilist; - } - } - } - - DBUG_RETURN(ret); -} -/*******************************************************************//** -Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED -@return 0 on success, 1 on failure */ -static -int -i_s_fts_index_cache_fill( -/*=====================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (ignored) */ -{ - dict_table_t* user_table; - fts_cache_t* cache; - - DBUG_ENTER("i_s_fts_index_cache_fill"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - if (!fts_internal_tbl_name) { - DBUG_RETURN(0); - } - - user_table = dict_table_open_on_name( - fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); - - if (!user_table) { - DBUG_RETURN(0); - } - - if (user_table->fts == NULL || user_table->fts->cache == NULL) { - dict_table_close(user_table, FALSE, FALSE); - - DBUG_RETURN(0); - } - - cache = user_table->fts->cache; - - ut_a(cache); - - int ret = 0; - fts_string_t conv_str; - conv_str.f_len = system_charset_info->mbmaxlen - * FTS_MAX_WORD_LEN_IN_CHAR; - conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len)); - - for (ulint i = 0; i < ib_vector_size(cache->indexes); i++) { - fts_index_cache_t* index_cache; - - index_cache = static_cast<fts_index_cache_t*> ( - ib_vector_get(cache->indexes, i)); - - BREAK_IF(ret = i_s_fts_index_cache_fill_one_index( - index_cache, thd, &conv_str, tables)); - } - - ut_free(conv_str.f_str); - - dict_table_close(user_table, FALSE, FALSE); - - DBUG_RETURN(ret); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE -@return 0 on success */ -static -int -i_s_fts_index_cache_init( -/*=====================*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_fts_index_cache_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_fts_index_fields_info; - schema->fill_table = i_s_fts_index_cache_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_index_cache = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_FT_INDEX_CACHE"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "INNODB AUXILIARY FTS INDEX CACHED"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_fts_index_cache_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/*******************************************************************//** -Go through a FTS index auxiliary table, fetch its rows and fill -FTS word cache structure. -@return DB_SUCCESS on success, otherwise error code */ -static -dberr_t -i_s_fts_index_table_fill_selected( -/*==============================*/ - dict_index_t* index, /*!< in: FTS index */ - ib_vector_t* words, /*!< in/out: vector to hold - fetched words */ - ulint selected, /*!< in: selected FTS index */ - fts_string_t* word) /*!< in: word to select */ -{ - pars_info_t* info; - fts_table_t fts_table; - trx_t* trx; - que_t* graph; - dberr_t error; - fts_fetch_t fetch; - - info = pars_info_create(); - - fetch.read_arg = words; - fetch.read_record = fts_optimize_index_fetch_node; - fetch.total_memory = 0; - - DBUG_EXECUTE_IF("fts_instrument_result_cache_limit", - fts_result_cache_limit = 8192; - ); - - trx = trx_allocate_for_background(); - - trx->op_info = "fetching FTS index nodes"; - - pars_info_bind_function(info, "my_func", fetch.read_record, &fetch); - pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len); - - FTS_INIT_INDEX_TABLE(&fts_table, fts_get_suffix(selected), - FTS_INDEX_TABLE, index); - - graph = fts_parse_sql( - &fts_table, info, - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS" - " SELECT word, doc_count, first_doc_id, last_doc_id, " - "ilist\n" - " FROM %s WHERE word >= :word;\n" - "BEGIN\n" - "\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE c;"); - - for(;;) { - error = fts_eval_sql(trx, graph); - - if (error == DB_SUCCESS) { - fts_sql_commit(trx); - - break; - } else { - fts_sql_rollback(trx); - - ut_print_timestamp(stderr); - - if (error == DB_LOCK_WAIT_TIMEOUT) { - fprintf(stderr, " InnoDB: Warning: " - "lock wait timeout reading " - "FTS index. Retrying!\n"); - - trx->error_state = DB_SUCCESS; - } else { - fprintf(stderr, " InnoDB: Error: %d " - "while reading FTS index.\n", error); - break; - } - } - } - - mutex_enter(&dict_sys->mutex); - que_graph_free(graph); - mutex_exit(&dict_sys->mutex); - - trx_free_for_background(trx); - - if (fetch.total_memory >= fts_result_cache_limit) { - error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT; - } - - return(error); -} - -/*******************************************************************//** -Free words. */ -static -void -i_s_fts_index_table_free_one_fetch( -/*===============================*/ - ib_vector_t* words) /*!< in: words fetched */ -{ - for (ulint i = 0; i < ib_vector_size(words); i++) { - fts_word_t* word; - - word = static_cast<fts_word_t*>(ib_vector_get(words, i)); - - for (ulint j = 0; j < ib_vector_size(word->nodes); j++) { - fts_node_t* node; - - node = static_cast<fts_node_t*> (ib_vector_get( - word->nodes, j)); - ut_free(node->ilist); - } - - fts_word_free(word); - } - - ib_vector_reset(words); -} - -/*******************************************************************//** -Go through words, fill INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE. -@return 0 on success, 1 on failure */ -static -int -i_s_fts_index_table_fill_one_fetch( -/*===============================*/ - CHARSET_INFO* index_charset, /*!< in: FTS index charset */ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - ib_vector_t* words, /*!< in: words fetched */ - fts_string_t* conv_str, /*!< in: string for conversion*/ - bool has_more) /*!< in: has more to fetch */ -{ - TABLE* table = (TABLE*) tables->table; - Field** fields; - uint dummy_errors; - char* word_str; - ulint words_size; - int ret = 0; - - DBUG_ENTER("i_s_fts_index_table_fill_one_fetch"); - - fields = table->field; - - words_size = ib_vector_size(words); - if (has_more) { - /* the last word is not fetched completely. */ - ut_ad(words_size > 1); - words_size -= 1; - } - - /* Go through each word in the index cache */ - for (ulint i = 0; i < words_size; i++) { - fts_word_t* word; - - word = static_cast<fts_word_t*>(ib_vector_get(words, i)); - - word->text.f_str[word->text.f_len] = 0; - - /* Convert word from index charset to system_charset_info */ - if (index_charset->cset != system_charset_info->cset) { - conv_str->f_n_char = my_convert( - reinterpret_cast<char*>(conv_str->f_str), - static_cast<uint32>(conv_str->f_len), - system_charset_info, - reinterpret_cast<char*>(word->text.f_str), - static_cast<uint32>(word->text.f_len), - index_charset, &dummy_errors); - ut_ad(conv_str->f_n_char <= conv_str->f_len); - conv_str->f_str[conv_str->f_n_char] = 0; - word_str = reinterpret_cast<char*>(conv_str->f_str); - } else { - word_str = reinterpret_cast<char*>(word->text.f_str); - } - - /* Decrypt the ilist, and display Dod ID and word position */ - for (ulint i = 0; i < ib_vector_size(word->nodes); i++) { - fts_node_t* node; - byte* ptr; - ulint decoded = 0; - doc_id_t doc_id = 0; - - node = static_cast<fts_node_t*> (ib_vector_get( - word->nodes, i)); - - ptr = node->ilist; - - while (decoded < node->ilist_size) { - ulint pos = fts_decode_vlc(&ptr); - - doc_id += pos; - - /* Get position info */ - while (*ptr) { - pos = fts_decode_vlc(&ptr); - - OK(field_store_string( - fields[I_S_FTS_WORD], - word_str)); - - OK(fields[I_S_FTS_FIRST_DOC_ID]->store( - (longlong) node->first_doc_id, - true)); - - OK(fields[I_S_FTS_LAST_DOC_ID]->store( - (longlong) node->last_doc_id, - true)); - - OK(fields[I_S_FTS_DOC_COUNT]->store( - static_cast<double>(node->doc_count))); - - OK(fields[I_S_FTS_ILIST_DOC_ID]->store( - (longlong) doc_id, true)); - - OK(fields[I_S_FTS_ILIST_DOC_POS]->store( - static_cast<double>(pos))); - - OK(schema_table_store_record( - thd, table)); - } - - ++ptr; - - decoded = ptr - (byte*) node->ilist; - } - } - } - - DBUG_RETURN(ret); -} - -/*******************************************************************//** -Go through a FTS index and its auxiliary tables, fetch rows in each table -and fill INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE. -@return 0 on success, 1 on failure */ -static -int -i_s_fts_index_table_fill_one_index( -/*===============================*/ - dict_index_t* index, /*!< in: FTS index */ - THD* thd, /*!< in: thread */ - fts_string_t* conv_str, /*!< in/out: buffer */ - TABLE_LIST* tables) /*!< in/out: tables to fill */ -{ - ib_vector_t* words; - mem_heap_t* heap; - fts_string_t word; - CHARSET_INFO* index_charset; - dberr_t error; - int ret = 0; - - DBUG_ENTER("i_s_fts_index_table_fill_one_index"); - DBUG_ASSERT(!dict_index_is_online_ddl(index)); - - heap = mem_heap_create(1024); - - words = ib_vector_create(ib_heap_allocator_create(heap), - sizeof(fts_word_t), 256); - - word.f_str = NULL; - word.f_len = 0; - word.f_n_char = 0; - - index_charset = fts_index_get_charset(index); - - /* Iterate through each auxiliary table as described in - fts_index_selector */ - for (ulint selected = 0; fts_index_selector[selected].value; - selected++) { - bool has_more = false; - - do { - /* Fetch from index */ - error = i_s_fts_index_table_fill_selected( - index, words, selected, &word); - - if (error == DB_SUCCESS) { - has_more = false; - } else if (error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT) { - has_more = true; - } else { - i_s_fts_index_table_free_one_fetch(words); - ret = 1; - goto func_exit; - } - - if (has_more) { - fts_word_t* last_word; - - /* Prepare start point for next fetch */ - last_word = static_cast<fts_word_t*>(ib_vector_last(words)); - ut_ad(last_word != NULL); - fts_utf8_string_dup(&word, &last_word->text, heap); - } - - /* Fill into tables */ - ret = i_s_fts_index_table_fill_one_fetch( - index_charset, thd, tables, words, conv_str, - has_more); - i_s_fts_index_table_free_one_fetch(words); - - if (ret != 0) { - goto func_exit; - } - } while (has_more); - } - -func_exit: - mem_heap_free(heap); - - DBUG_RETURN(ret); -} -/*******************************************************************//** -Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE -@return 0 on success, 1 on failure */ -static -int -i_s_fts_index_table_fill( -/*=====================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (ignored) */ -{ - dict_table_t* user_table; - dict_index_t* index; - - DBUG_ENTER("i_s_fts_index_table_fill"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - if (!fts_internal_tbl_name) { - DBUG_RETURN(0); - } - - /* Prevent DDL to drop fts aux tables. */ - rw_lock_s_lock(&dict_operation_lock); - - user_table = dict_table_open_on_name( - fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); - - if (!user_table) { - rw_lock_s_unlock(&dict_operation_lock); - - DBUG_RETURN(0); - } - - int ret = 0; - fts_string_t conv_str; - conv_str.f_len = system_charset_info->mbmaxlen - * FTS_MAX_WORD_LEN_IN_CHAR; - conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len)); - - for (index = dict_table_get_first_index(user_table); - index; index = dict_table_get_next_index(index)) { - if (index->type & DICT_FTS) { - BREAK_IF(ret = i_s_fts_index_table_fill_one_index( - index, thd, &conv_str, tables)); - } - } - - dict_table_close(user_table, FALSE, FALSE); - - rw_lock_s_unlock(&dict_operation_lock); - - ut_free(conv_str.f_str); - - DBUG_RETURN(ret); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE -@return 0 on success */ -static -int -i_s_fts_index_table_init( -/*=====================*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_fts_index_table_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_fts_index_fields_info; - schema->fill_table = i_s_fts_index_table_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_index_table = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_FT_INDEX_TABLE"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "INNODB AUXILIARY FTS INDEX TABLE"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_fts_index_table_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG */ -static ST_FIELD_INFO i_s_fts_config_fields_info[] = -{ -#define FTS_CONFIG_KEY 0 - {STRUCT_FLD(field_name, "KEY"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define FTS_CONFIG_VALUE 1 - {STRUCT_FLD(field_name, "VALUE"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -static const char* fts_config_key[] = { - FTS_OPTIMIZE_LIMIT_IN_SECS, - FTS_SYNCED_DOC_ID, - FTS_STOPWORD_TABLE_NAME, - FTS_USE_STOPWORD, - NULL -}; - -/*******************************************************************//** -Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG -@return 0 on success, 1 on failure */ -static -int -i_s_fts_config_fill( -/*================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (ignored) */ -{ - Field** fields; - TABLE* table = (TABLE*) tables->table; - trx_t* trx; - fts_table_t fts_table; - dict_table_t* user_table; - ulint i = 0; - dict_index_t* index = NULL; - unsigned char str[FTS_MAX_CONFIG_VALUE_LEN + 1]; - - DBUG_ENTER("i_s_fts_config_fill"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - if (!fts_internal_tbl_name) { - DBUG_RETURN(0); - } - - DEBUG_SYNC_C("i_s_fts_config_fille_check"); - - fields = table->field; - - /* Prevent DDL to drop fts aux tables. */ - rw_lock_s_lock(&dict_operation_lock); - - user_table = dict_table_open_on_name( - fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE); - - if (!user_table) { - rw_lock_s_unlock(&dict_operation_lock); - - DBUG_RETURN(0); - } else if (!dict_table_has_fts_index(user_table)) { - dict_table_close(user_table, FALSE, FALSE); - - rw_lock_s_unlock(&dict_operation_lock); - - DBUG_RETURN(0); - } - - trx = trx_allocate_for_background(); - trx->op_info = "Select for FTS CONFIG TABLE"; - - FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, user_table); - - if (!ib_vector_is_empty(user_table->fts->indexes)) { - index = (dict_index_t*) ib_vector_getp_const( - user_table->fts->indexes, 0); - DBUG_ASSERT(!dict_index_is_online_ddl(index)); - } - - int ret = 0; - - while (fts_config_key[i]) { - fts_string_t value; - char* key_name; - ulint allocated = FALSE; - - value.f_len = FTS_MAX_CONFIG_VALUE_LEN; - - value.f_str = str; - - if (index - && strcmp(fts_config_key[i], FTS_TOTAL_WORD_COUNT) == 0) { - key_name = fts_config_create_index_param_name( - fts_config_key[i], index); - allocated = TRUE; - } else { - key_name = (char*) fts_config_key[i]; - } - - fts_config_get_value(trx, &fts_table, key_name, &value); - - if (allocated) { - ut_free(key_name); - } - - BREAK_IF(ret = field_store_string( - fields[FTS_CONFIG_KEY], fts_config_key[i])); - - BREAK_IF(ret = field_store_string( - fields[FTS_CONFIG_VALUE], - reinterpret_cast<const char*>(value.f_str))); - - BREAK_IF(ret = schema_table_store_record(thd, table)); - - i++; - } - - fts_sql_commit(trx); - - trx_free_for_background(trx); - - dict_table_close(user_table, FALSE, FALSE); - - rw_lock_s_unlock(&dict_operation_lock); - - DBUG_RETURN(ret); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG -@return 0 on success */ -static -int -i_s_fts_config_init( -/*=================*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_fts_config_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_fts_config_fields_info; - schema->fill_table = i_s_fts_config_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_config = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_FT_CONFIG"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "INNODB AUXILIARY FTS CONFIG TABLE"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_fts_config_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/* Fields of the dynamic table INNODB_BUFFER_POOL_STATS. */ -static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] = -{ -#define IDX_BUF_STATS_POOL_ID 0 - {STRUCT_FLD(field_name, "POOL_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_POOL_SIZE 1 - {STRUCT_FLD(field_name, "POOL_SIZE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_FREE_BUFFERS 2 - {STRUCT_FLD(field_name, "FREE_BUFFERS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_LRU_LEN 3 - {STRUCT_FLD(field_name, "DATABASE_PAGES"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_OLD_LRU_LEN 4 - {STRUCT_FLD(field_name, "OLD_DATABASE_PAGES"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_FLUSH_LIST_LEN 5 - {STRUCT_FLD(field_name, "MODIFIED_DATABASE_PAGES"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PENDING_ZIP 6 - {STRUCT_FLD(field_name, "PENDING_DECOMPRESS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PENDING_READ 7 - {STRUCT_FLD(field_name, "PENDING_READS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_FLUSH_LRU 8 - {STRUCT_FLD(field_name, "PENDING_FLUSH_LRU"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_FLUSH_LIST 9 - {STRUCT_FLD(field_name, "PENDING_FLUSH_LIST"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_YOUNG 10 - {STRUCT_FLD(field_name, "PAGES_MADE_YOUNG"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_NOT_YOUNG 11 - {STRUCT_FLD(field_name, "PAGES_NOT_MADE_YOUNG"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_YOUNG_RATE 12 - {STRUCT_FLD(field_name, "PAGES_MADE_YOUNG_RATE"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE 13 - {STRUCT_FLD(field_name, "PAGES_MADE_NOT_YOUNG_RATE"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_READ 14 - {STRUCT_FLD(field_name, "NUMBER_PAGES_READ"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_CREATED 15 - {STRUCT_FLD(field_name, "NUMBER_PAGES_CREATED"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_WRITTEN 16 - {STRUCT_FLD(field_name, "NUMBER_PAGES_WRITTEN"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_READ_RATE 17 - {STRUCT_FLD(field_name, "PAGES_READ_RATE"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_CREATE_RATE 18 - {STRUCT_FLD(field_name, "PAGES_CREATE_RATE"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_PAGE_WRITTEN_RATE 19 - {STRUCT_FLD(field_name, "PAGES_WRITTEN_RATE"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_GET 20 - {STRUCT_FLD(field_name, "NUMBER_PAGES_GET"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_HIT_RATE 21 - {STRUCT_FLD(field_name, "HIT_RATE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_MADE_YOUNG_PCT 22 - {STRUCT_FLD(field_name, "YOUNG_MAKE_PER_THOUSAND_GETS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_NOT_MADE_YOUNG_PCT 23 - {STRUCT_FLD(field_name, "NOT_YOUNG_MAKE_PER_THOUSAND_GETS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_READ_AHREAD 24 - {STRUCT_FLD(field_name, "NUMBER_PAGES_READ_AHEAD"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_READ_AHEAD_EVICTED 25 - {STRUCT_FLD(field_name, "NUMBER_READ_AHEAD_EVICTED"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_READ_AHEAD_RATE 26 - {STRUCT_FLD(field_name, "READ_AHEAD_RATE"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_READ_AHEAD_EVICT_RATE 27 - {STRUCT_FLD(field_name, "READ_AHEAD_EVICTED_RATE"), - STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH), - STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_LRU_IO_SUM 28 - {STRUCT_FLD(field_name, "LRU_IO_TOTAL"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_LRU_IO_CUR 29 - {STRUCT_FLD(field_name, "LRU_IO_CURRENT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_UNZIP_SUM 30 - {STRUCT_FLD(field_name, "UNCOMPRESS_TOTAL"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_STATS_UNZIP_CUR 31 - {STRUCT_FLD(field_name, "UNCOMPRESS_CURRENT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Fill Information Schema table INNODB_BUFFER_POOL_STATS for a particular -buffer pool -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_stats_fill( -/*==================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - const buf_pool_info_t* info) /*!< in: buffer pool - information */ -{ - TABLE* table; - Field** fields; - - DBUG_ENTER("i_s_innodb_stats_fill"); - - table = tables->table; - - fields = table->field; - - OK(fields[IDX_BUF_STATS_POOL_ID]->store( - static_cast<double>(info->pool_unique_id))); - - OK(fields[IDX_BUF_STATS_POOL_SIZE]->store( - static_cast<double>(info->pool_size))); - - OK(fields[IDX_BUF_STATS_LRU_LEN]->store( - static_cast<double>(info->lru_len))); - - OK(fields[IDX_BUF_STATS_OLD_LRU_LEN]->store( - static_cast<double>(info->old_lru_len))); - - OK(fields[IDX_BUF_STATS_FREE_BUFFERS]->store( - static_cast<double>(info->free_list_len))); - - OK(fields[IDX_BUF_STATS_FLUSH_LIST_LEN]->store( - static_cast<double>(info->flush_list_len))); - - OK(fields[IDX_BUF_STATS_PENDING_ZIP]->store( - static_cast<double>(info->n_pend_unzip))); - - OK(fields[IDX_BUF_STATS_PENDING_READ]->store( - static_cast<double>(info->n_pend_reads))); - - OK(fields[IDX_BUF_STATS_FLUSH_LRU]->store( - static_cast<double>(info->n_pending_flush_lru))); - - OK(fields[IDX_BUF_STATS_FLUSH_LIST]->store( - static_cast<double>(info->n_pending_flush_list))); - - OK(fields[IDX_BUF_STATS_PAGE_YOUNG]->store( - static_cast<double>(info->n_pages_made_young))); - - OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG]->store( - static_cast<double>(info->n_pages_not_made_young))); - - OK(fields[IDX_BUF_STATS_PAGE_YOUNG_RATE]->store( - info->page_made_young_rate)); - - OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE]->store( - info->page_not_made_young_rate)); - - OK(fields[IDX_BUF_STATS_PAGE_READ]->store( - static_cast<double>(info->n_pages_read))); - - OK(fields[IDX_BUF_STATS_PAGE_CREATED]->store( - static_cast<double>(info->n_pages_created))); - - OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store( - static_cast<double>(info->n_pages_written))); - - OK(fields[IDX_BUF_STATS_GET]->store( - static_cast<double>(info->n_page_gets))); - - OK(fields[IDX_BUF_STATS_PAGE_READ_RATE]->store( - info->pages_read_rate)); - - OK(fields[IDX_BUF_STATS_PAGE_CREATE_RATE]->store( - info->pages_created_rate)); - - OK(fields[IDX_BUF_STATS_PAGE_WRITTEN_RATE]->store( - info->pages_written_rate)); - - if (info->n_page_get_delta) { - if (info->page_read_delta <= info->n_page_get_delta) { - OK(fields[IDX_BUF_STATS_HIT_RATE]->store( - static_cast<double>( - 1000 - (1000 * info->page_read_delta - / info->n_page_get_delta)))); - } else { - OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0)); - } - - OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store( - static_cast<double>( - 1000 * info->young_making_delta - / info->n_page_get_delta))); - - OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store( - static_cast<double>( - 1000 * info->not_young_making_delta - / info->n_page_get_delta))); - } else { - OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0)); - OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(0)); - OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(0)); - } - - OK(fields[IDX_BUF_STATS_READ_AHREAD]->store( - static_cast<double>(info->n_ra_pages_read))); - - OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICTED]->store( - static_cast<double>(info->n_ra_pages_evicted))); - - OK(fields[IDX_BUF_STATS_READ_AHEAD_RATE]->store( - info->pages_readahead_rate)); - - OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICT_RATE]->store( - info->pages_evicted_rate)); - - OK(fields[IDX_BUF_STATS_LRU_IO_SUM]->store( - static_cast<double>(info->io_sum))); - - OK(fields[IDX_BUF_STATS_LRU_IO_CUR]->store( - static_cast<double>(info->io_cur))); - - OK(fields[IDX_BUF_STATS_UNZIP_SUM]->store( - static_cast<double>(info->unzip_sum))); - - OK(fields[IDX_BUF_STATS_UNZIP_CUR]->store( - static_cast<double>(info->unzip_cur))); - - DBUG_RETURN(schema_table_store_record(thd, table)); -} - -/*******************************************************************//** -This is the function that loops through each buffer pool and fetch buffer -pool stats to information schema table: I_S_INNODB_BUFFER_POOL_STATS -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_buffer_stats_fill_table( -/*===============================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (ignored) */ -{ - int status = 0; - buf_pool_info_t* pool_info; - - DBUG_ENTER("i_s_innodb_buffer_fill_general"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* Only allow the PROCESS privilege holder to access the stats */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - pool_info = (buf_pool_info_t*) mem_zalloc( - srv_buf_pool_instances * sizeof *pool_info); - - /* Walk through each buffer pool */ - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - /* Fetch individual buffer pool info */ - buf_stats_get_pool_info(buf_pool, i, pool_info); - - status = i_s_innodb_stats_fill(thd, tables, &pool_info[i]); - - /* If something goes wrong, break and return */ - if (status) { - break; - } - } - - mem_free(pool_info); - - DBUG_RETURN(status); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_POOL_STATS. -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_buffer_pool_stats_init( -/*==============================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("i_s_innodb_buffer_pool_stats_init"); - - schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p); - - schema->fields_info = i_s_innodb_buffer_stats_fields_info; - schema->fill_table = i_s_innodb_buffer_stats_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_stats = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_BUFFER_POOL_STATS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB Buffer Pool Statistics Information "), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_innodb_buffer_pool_stats_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/* Fields of the dynamic table INNODB_BUFFER_POOL_PAGE. */ -static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] = -{ -#define IDX_BUFFER_POOL_ID 0 - {STRUCT_FLD(field_name, "POOL_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_BLOCK_ID 1 - {STRUCT_FLD(field_name, "BLOCK_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_SPACE 2 - {STRUCT_FLD(field_name, "SPACE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_NUM 3 - {STRUCT_FLD(field_name, "PAGE_NUMBER"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_TYPE 4 - {STRUCT_FLD(field_name, "PAGE_TYPE"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_FLUSH_TYPE 5 - {STRUCT_FLD(field_name, "FLUSH_TYPE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_FIX_COUNT 6 - {STRUCT_FLD(field_name, "FIX_COUNT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_HASHED 7 - {STRUCT_FLD(field_name, "IS_HASHED"), - STRUCT_FLD(field_length, 3), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_NEWEST_MOD 8 - {STRUCT_FLD(field_name, "NEWEST_MODIFICATION"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_OLDEST_MOD 9 - {STRUCT_FLD(field_name, "OLDEST_MODIFICATION"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_ACCESS_TIME 10 - {STRUCT_FLD(field_name, "ACCESS_TIME"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_TABLE_NAME 11 - {STRUCT_FLD(field_name, "TABLE_NAME"), - STRUCT_FLD(field_length, 1024), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_INDEX_NAME 12 - {STRUCT_FLD(field_name, "INDEX_NAME"), - STRUCT_FLD(field_length, 1024), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_NUM_RECS 13 - {STRUCT_FLD(field_name, "NUMBER_RECORDS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_DATA_SIZE 14 - {STRUCT_FLD(field_name, "DATA_SIZE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_ZIP_SIZE 15 - {STRUCT_FLD(field_name, "COMPRESSED_SIZE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_STATE 16 - {STRUCT_FLD(field_name, "PAGE_STATE"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_IO_FIX 17 - {STRUCT_FLD(field_name, "IO_FIX"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_IS_OLD 18 - {STRUCT_FLD(field_name, "IS_OLD"), - STRUCT_FLD(field_length, 3), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUFFER_PAGE_FREE_CLOCK 19 - {STRUCT_FLD(field_name, "FREE_PAGE_CLOCK"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Fill Information Schema table INNODB_BUFFER_PAGE with information -cached in the buf_page_info_t array -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_buffer_page_fill( -/*========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - const buf_page_info_t* info_array, /*!< in: array cached page - info */ - ulint num_page) /*!< in: number of page info - cached */ -{ - TABLE* table; - Field** fields; - - DBUG_ENTER("i_s_innodb_buffer_page_fill"); - - table = tables->table; - - fields = table->field; - - /* Iterate through the cached array and fill the I_S table rows */ - for (ulint i = 0; i < num_page; i++) { - const buf_page_info_t* page_info; - char table_name[MAX_FULL_NAME_LEN + 1]; - const char* table_name_end = NULL; - const char* state_str; - enum buf_page_state state; - - page_info = info_array + i; - - state_str = NULL; - - OK(fields[IDX_BUFFER_POOL_ID]->store( - page_info->pool_id, true)); - - OK(fields[IDX_BUFFER_BLOCK_ID]->store( - page_info->block_id, true)); - - OK(fields[IDX_BUFFER_PAGE_SPACE]->store( - page_info->space_id, true)); - - OK(fields[IDX_BUFFER_PAGE_NUM]->store( - page_info->page_num, true)); - - OK(field_store_string( - fields[IDX_BUFFER_PAGE_TYPE], - i_s_page_type[page_info->page_type].type_str)); - - OK(fields[IDX_BUFFER_PAGE_FLUSH_TYPE]->store( - page_info->flush_type, true)); - - OK(fields[IDX_BUFFER_PAGE_FIX_COUNT]->store( - page_info->fix_count, true)); - - OK(field_store_string(fields[IDX_BUFFER_PAGE_HASHED], - page_info->hashed ? "YES" : "NO")); - - OK(fields[IDX_BUFFER_PAGE_NEWEST_MOD]->store( - (longlong) page_info->newest_mod, true)); - - OK(fields[IDX_BUFFER_PAGE_OLDEST_MOD]->store( - (longlong) page_info->oldest_mod, true)); - - OK(fields[IDX_BUFFER_PAGE_ACCESS_TIME]->store( - page_info->access_time, true)); - - fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_null(); - - fields[IDX_BUFFER_PAGE_INDEX_NAME]->set_null(); - - /* If this is an index page, fetch the index name - and table name */ - if (page_info->page_type == I_S_PAGE_TYPE_INDEX) { - bool ret = false; - - mutex_enter(&dict_sys->mutex); - - if (const dict_index_t* index = - dict_index_get_if_in_cache_low( - page_info->index_id)) { - table_name_end = innobase_convert_name( - table_name, sizeof(table_name), - index->table_name, - strlen(index->table_name), - thd, TRUE); - - ret = fields[IDX_BUFFER_PAGE_TABLE_NAME] - ->store(table_name, - static_cast<uint>( - table_name_end - - table_name), - system_charset_info) - || field_store_index_name( - fields - [IDX_BUFFER_PAGE_INDEX_NAME], - index->name); - } - - mutex_exit(&dict_sys->mutex); - - OK(ret); - - fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_notnull(); - } - - OK(fields[IDX_BUFFER_PAGE_NUM_RECS]->store( - page_info->num_recs, true)); - - OK(fields[IDX_BUFFER_PAGE_DATA_SIZE]->store( - page_info->data_size, true)); - - OK(fields[IDX_BUFFER_PAGE_ZIP_SIZE]->store( - page_info->zip_ssize - ? (UNIV_ZIP_SIZE_MIN >> 1) << page_info->zip_ssize - : 0, true)); - -#if BUF_PAGE_STATE_BITS > 3 -# error "BUF_PAGE_STATE_BITS > 3, please ensure that all 1<<BUF_PAGE_STATE_BITS values are checked for" -#endif - state = static_cast<enum buf_page_state>(page_info->page_state); - - switch (state) { - /* First three states are for compression pages and - are not states we would get as we scan pages through - buffer blocks */ - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - state_str = NULL; - break; - case BUF_BLOCK_NOT_USED: - state_str = "NOT_USED"; - break; - case BUF_BLOCK_READY_FOR_USE: - state_str = "READY_FOR_USE"; - break; - case BUF_BLOCK_FILE_PAGE: - state_str = "FILE_PAGE"; - break; - case BUF_BLOCK_MEMORY: - state_str = "MEMORY"; - break; - case BUF_BLOCK_REMOVE_HASH: - state_str = "REMOVE_HASH"; - break; - }; - - OK(field_store_string(fields[IDX_BUFFER_PAGE_STATE], - state_str)); - - switch (page_info->io_fix) { - case BUF_IO_NONE: - state_str = "IO_NONE"; - break; - case BUF_IO_READ: - state_str = "IO_READ"; - break; - case BUF_IO_WRITE: - state_str = "IO_WRITE"; - break; - case BUF_IO_PIN: - state_str = "IO_PIN"; - break; - } - - OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], - state_str)); - - OK(field_store_string(fields[IDX_BUFFER_PAGE_IS_OLD], - (page_info->is_old) ? "YES" : "NO")); - - OK(fields[IDX_BUFFER_PAGE_FREE_CLOCK]->store( - page_info->freed_page_clock)); - - OK(schema_table_store_record(thd, table)); - } - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Set appropriate page type to a buf_page_info_t structure */ -static -void -i_s_innodb_set_page_type( -/*=====================*/ - buf_page_info_t*page_info, /*!< in/out: structure to fill with - scanned info */ - ulint page_type, /*!< in: page type */ - const byte* frame) /*!< in: buffer frame */ -{ - if (page_type == FIL_PAGE_INDEX) { - const page_t* page = (const page_t*) frame; - - page_info->index_id = btr_page_get_index_id(page); - - /* FIL_PAGE_INDEX is a bit special, its value - is defined as 17855, so we cannot use FIL_PAGE_INDEX - to index into i_s_page_type[] array, its array index - in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX - (1) for index pages or I_S_PAGE_TYPE_IBUF for - change buffer index pages */ - if (page_info->index_id - == static_cast<index_id_t>(DICT_IBUF_ID_MIN - + IBUF_SPACE_ID)) { - page_info->page_type = I_S_PAGE_TYPE_IBUF; - } else { - page_info->page_type = I_S_PAGE_TYPE_INDEX; - } - - page_info->data_size = (ulint)(page_header_get_field( - page, PAGE_HEAP_TOP) - (page_is_comp(page) - ? PAGE_NEW_SUPREMUM_END - : PAGE_OLD_SUPREMUM_END) - - page_header_get_field(page, PAGE_GARBAGE)); - - page_info->num_recs = page_get_n_recs(page); - } else if (page_type > FIL_PAGE_TYPE_LAST) { - /* Encountered an unknown page type */ - page_info->page_type = I_S_PAGE_TYPE_UNKNOWN; - } else { - /* Make sure we get the right index into the - i_s_page_type[] array */ - ut_a(page_type == i_s_page_type[page_type].type_value); - - page_info->page_type = page_type; - } - - if (page_info->page_type == FIL_PAGE_TYPE_ZBLOB - || page_info->page_type == FIL_PAGE_TYPE_ZBLOB2) { - page_info->page_num = mach_read_from_4( - frame + FIL_PAGE_OFFSET); - page_info->space_id = mach_read_from_4( - frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - } -} -/*******************************************************************//** -Scans pages in the buffer cache, and collect their general information -into the buf_page_info_t array which is zero-filled. So any fields -that are not initialized in the function will default to 0 */ -static -void -i_s_innodb_buffer_page_get_info( -/*============================*/ - const buf_page_t*bpage, /*!< in: buffer pool page to scan */ - ulint pool_id, /*!< in: buffer pool id */ - ulint pos, /*!< in: buffer block position in - buffer pool or in the LRU list */ - buf_page_info_t*page_info) /*!< in: zero filled info structure; - out: structure filled with scanned - info */ -{ - ib_mutex_t* mutex = buf_page_get_mutex(bpage); - - ut_ad(pool_id < MAX_BUFFER_POOLS); - - page_info->pool_id = pool_id; - - page_info->block_id = pos; - - mutex_enter(mutex); - - page_info->page_state = buf_page_get_state(bpage); - - /* Only fetch information for buffers that map to a tablespace, - that is, buffer page with state BUF_BLOCK_ZIP_PAGE, - BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_FILE_PAGE */ - if (buf_page_in_file(bpage)) { - const byte* frame; - ulint page_type; - - page_info->space_id = buf_page_get_space(bpage); - - page_info->page_num = buf_page_get_page_no(bpage); - - page_info->flush_type = bpage->flush_type; - - page_info->fix_count = bpage->buf_fix_count; - - page_info->newest_mod = bpage->newest_modification; - - page_info->oldest_mod = bpage->oldest_modification; - - page_info->access_time = bpage->access_time; - - page_info->zip_ssize = bpage->zip.ssize; - - page_info->io_fix = bpage->io_fix; - - page_info->is_old = bpage->old; - - page_info->freed_page_clock = bpage->freed_page_clock; - - switch (buf_page_get_io_fix(bpage)) { - case BUF_IO_NONE: - case BUF_IO_WRITE: - case BUF_IO_PIN: - break; - case BUF_IO_READ: - page_info->page_type = I_S_PAGE_TYPE_UNKNOWN; - mutex_exit(mutex); - return; - } - - if (page_info->page_state == BUF_BLOCK_FILE_PAGE) { - const buf_block_t*block; - - block = reinterpret_cast<const buf_block_t*>(bpage); - frame = block->frame; - page_info->hashed = (block->index != NULL); - } else { - ut_ad(page_info->zip_ssize); - frame = bpage->zip.data; - } - - page_type = fil_page_get_type(frame); - - i_s_innodb_set_page_type(page_info, page_type, frame); - } else { - page_info->page_type = I_S_PAGE_TYPE_UNKNOWN; - } - - mutex_exit(mutex); -} - -/*******************************************************************//** -This is the function that goes through each block of the buffer pool -and fetch information to information schema tables: INNODB_BUFFER_PAGE. -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_fill_buffer_pool( -/*========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - buf_pool_t* buf_pool, /*!< in: buffer pool to scan */ - const ulint pool_id) /*!< in: buffer pool id */ -{ - int status = 0; - mem_heap_t* heap; - - DBUG_ENTER("i_s_innodb_fill_buffer_pool"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - heap = mem_heap_create(10000); - - /* Go through each chunk of buffer pool. Currently, we only - have one single chunk for each buffer pool */ - for (ulint n = 0; n < buf_pool->n_chunks; n++) { - const buf_block_t* block; - ulint n_blocks; - buf_page_info_t* info_buffer; - ulint num_page; - ulint mem_size; - ulint chunk_size; - ulint num_to_process = 0; - ulint block_id = 0; - - /* Get buffer block of the nth chunk */ - block = buf_get_nth_chunk_block(buf_pool, n, &chunk_size); - num_page = 0; - - while (chunk_size > 0) { - /* we cache maximum MAX_BUF_INFO_CACHED number of - buffer page info */ - num_to_process = ut_min(chunk_size, - MAX_BUF_INFO_CACHED); - - mem_size = num_to_process * sizeof(buf_page_info_t); - - /* For each chunk, we'll pre-allocate information - structures to cache the page information read from - the buffer pool. Doing so before obtain any mutex */ - info_buffer = (buf_page_info_t*) mem_heap_zalloc( - heap, mem_size); - - /* GO through each block in the chunk */ - for (n_blocks = num_to_process; n_blocks--; block++) { - i_s_innodb_buffer_page_get_info( - &block->page, pool_id, block_id, - info_buffer + num_page); - block_id++; - num_page++; - } - - /* Fill in information schema table with information - just collected from the buffer chunk scan */ - status = i_s_innodb_buffer_page_fill( - thd, tables, info_buffer, - num_page); - - /* If something goes wrong, break and return */ - if (status) { - break; - } - - mem_heap_empty(heap); - chunk_size -= num_to_process; - num_page = 0; - } - } - - mem_heap_free(heap); - - DBUG_RETURN(status); -} - -/*******************************************************************//** -Fill page information for pages in InnoDB buffer pool to the -dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_buffer_page_fill_table( -/*==============================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (ignored) */ -{ - int status = 0; - - DBUG_ENTER("i_s_innodb_buffer_page_fill_table"); - - /* deny access to user without PROCESS privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - /* Walk through each buffer pool */ - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - /* Fetch information from pages in this buffer pool, - and fill the corresponding I_S table */ - status = i_s_innodb_fill_buffer_pool(thd, tables, buf_pool, i); - - /* If something wrong, break and return */ - if (status) { - break; - } - } - - DBUG_RETURN(status); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE. -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_buffer_page_init( -/*========================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("i_s_innodb_buffer_page_init"); - - schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p); - - schema->fields_info = i_s_innodb_buffer_page_fields_info; - schema->fill_table = i_s_innodb_buffer_page_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_BUFFER_PAGE"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB Buffer Page Information"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_innodb_buffer_page_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] = -{ -#define IDX_BUF_LRU_POOL_ID 0 - {STRUCT_FLD(field_name, "POOL_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_POS 1 - {STRUCT_FLD(field_name, "LRU_POSITION"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_SPACE 2 - {STRUCT_FLD(field_name, "SPACE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_NUM 3 - {STRUCT_FLD(field_name, "PAGE_NUMBER"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_TYPE 4 - {STRUCT_FLD(field_name, "PAGE_TYPE"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_FLUSH_TYPE 5 - {STRUCT_FLD(field_name, "FLUSH_TYPE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_FIX_COUNT 6 - {STRUCT_FLD(field_name, "FIX_COUNT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_HASHED 7 - {STRUCT_FLD(field_name, "IS_HASHED"), - STRUCT_FLD(field_length, 3), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_NEWEST_MOD 8 - {STRUCT_FLD(field_name, "NEWEST_MODIFICATION"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_OLDEST_MOD 9 - {STRUCT_FLD(field_name, "OLDEST_MODIFICATION"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_ACCESS_TIME 10 - {STRUCT_FLD(field_name, "ACCESS_TIME"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_TABLE_NAME 11 - {STRUCT_FLD(field_name, "TABLE_NAME"), - STRUCT_FLD(field_length, 1024), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_INDEX_NAME 12 - {STRUCT_FLD(field_name, "INDEX_NAME"), - STRUCT_FLD(field_length, 1024), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_NUM_RECS 13 - {STRUCT_FLD(field_name, "NUMBER_RECORDS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_DATA_SIZE 14 - {STRUCT_FLD(field_name, "DATA_SIZE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_ZIP_SIZE 15 - {STRUCT_FLD(field_name, "COMPRESSED_SIZE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_STATE 16 - {STRUCT_FLD(field_name, "COMPRESSED"), - STRUCT_FLD(field_length, 3), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_IO_FIX 17 - {STRUCT_FLD(field_name, "IO_FIX"), - STRUCT_FLD(field_length, 64), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_IS_OLD 18 - {STRUCT_FLD(field_name, "IS_OLD"), - STRUCT_FLD(field_length, 3), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define IDX_BUF_LRU_PAGE_FREE_CLOCK 19 - {STRUCT_FLD(field_name, "FREE_PAGE_CLOCK"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Fill Information Schema table INNODB_BUFFER_PAGE_LRU with information -cached in the buf_page_info_t array -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_buf_page_lru_fill( -/*=========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - const buf_page_info_t* info_array, /*!< in: array cached page - info */ - ulint num_page) /*!< in: number of page info - cached */ -{ - DBUG_ENTER("i_s_innodb_buf_page_lru_fill"); - - TABLE* table = tables->table; - Field** fields = table->field; - - /* Iterate through the cached array and fill the I_S table rows */ - for (ulint i = 0; i < num_page; i++) { - const buf_page_info_t* page_info; - char table_name[MAX_FULL_NAME_LEN + 1]; - const char* table_name_end = NULL; - const char* state_str; - enum buf_page_state state; - - state_str = NULL; - - page_info = info_array + i; - - OK(fields[IDX_BUF_LRU_POOL_ID]->store( - page_info->pool_id, true)); - OK(fields[IDX_BUF_LRU_POS]->store( - page_info->block_id, true)); - - OK(fields[IDX_BUF_LRU_PAGE_SPACE]->store( - page_info->space_id, true)); - - OK(fields[IDX_BUF_LRU_PAGE_NUM]->store( - page_info->page_num, true)); - - OK(field_store_string( - fields[IDX_BUF_LRU_PAGE_TYPE], - i_s_page_type[page_info->page_type].type_str)); - - OK(fields[IDX_BUF_LRU_PAGE_FLUSH_TYPE]->store( - page_info->flush_type, true)); - - OK(fields[IDX_BUF_LRU_PAGE_FIX_COUNT]->store( - page_info->fix_count, true)); - - OK(field_store_string(fields[IDX_BUF_LRU_PAGE_HASHED], - page_info->hashed ? "YES" : "NO")); - - OK(fields[IDX_BUF_LRU_PAGE_NEWEST_MOD]->store( - page_info->newest_mod, true)); - - OK(fields[IDX_BUF_LRU_PAGE_OLDEST_MOD]->store( - page_info->oldest_mod, true)); - - OK(fields[IDX_BUF_LRU_PAGE_ACCESS_TIME]->store( - page_info->access_time, true)); - - fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_null(); - - fields[IDX_BUF_LRU_PAGE_INDEX_NAME]->set_null(); - - /* If this is an index page, fetch the index name - and table name */ - if (page_info->page_type == I_S_PAGE_TYPE_INDEX) { - bool ret = false; - - mutex_enter(&dict_sys->mutex); - - if (const dict_index_t* index = - dict_index_get_if_in_cache_low( - page_info->index_id)) { - table_name_end = innobase_convert_name( - table_name, sizeof(table_name), - index->table_name, - strlen(index->table_name), - thd, TRUE); - - ret = fields[IDX_BUF_LRU_PAGE_TABLE_NAME] - ->store(table_name, - static_cast<uint>( - table_name_end - - table_name), - system_charset_info) - || field_store_index_name( - fields - [IDX_BUF_LRU_PAGE_INDEX_NAME], - index->name); - } - - mutex_exit(&dict_sys->mutex); - - OK(ret); - - fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_notnull(); - } - - OK(fields[IDX_BUF_LRU_PAGE_NUM_RECS]->store( - page_info->num_recs, true)); - - OK(fields[IDX_BUF_LRU_PAGE_DATA_SIZE]->store( - page_info->data_size, true)); - - OK(fields[IDX_BUF_LRU_PAGE_ZIP_SIZE]->store( - page_info->zip_ssize - ? 512 << page_info->zip_ssize : 0, true)); - - state = static_cast<enum buf_page_state>(page_info->page_state); - - switch (state) { - /* Compressed page */ - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - state_str = "YES"; - break; - /* Uncompressed page */ - case BUF_BLOCK_FILE_PAGE: - state_str = "NO"; - break; - /* We should not see following states */ - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - state_str = NULL; - break; - }; - - OK(field_store_string(fields[IDX_BUF_LRU_PAGE_STATE], - state_str)); - - switch (page_info->io_fix) { - case BUF_IO_NONE: - state_str = "IO_NONE"; - break; - case BUF_IO_READ: - state_str = "IO_READ"; - break; - case BUF_IO_WRITE: - state_str = "IO_WRITE"; - break; - case BUF_IO_PIN: - state_str = "IO_PIN"; - break; - } - - OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX], - state_str)); - - OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IS_OLD], - page_info->is_old ? "YES" : "NO")); - - OK(fields[IDX_BUF_LRU_PAGE_FREE_CLOCK]->store( - page_info->freed_page_clock, true)); - - OK(schema_table_store_record(thd, table)); - } - - DBUG_RETURN(0); -} - -/*******************************************************************//** -This is the function that goes through buffer pool's LRU list -and fetch information to INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU. -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_fill_buffer_lru( -/*=======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - buf_pool_t* buf_pool, /*!< in: buffer pool to scan */ - const ulint pool_id) /*!< in: buffer pool id */ -{ - int status = 0; - buf_page_info_t* info_buffer; - ulint lru_pos = 0; - const buf_page_t* bpage; - ulint lru_len; - - DBUG_ENTER("i_s_innodb_fill_buffer_lru"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* Obtain buf_pool->LRU_list_mutex before allocate info_buffer, since - UT_LIST_GET_LEN(buf_pool->LRU) could change */ - mutex_enter(&buf_pool->LRU_list_mutex); - - lru_len = UT_LIST_GET_LEN(buf_pool->LRU); - - /* Print error message if malloc fail */ - info_buffer = (buf_page_info_t*) my_malloc( - lru_len * sizeof *info_buffer, MYF(MY_WME)); - - if (!info_buffer) { - status = 1; - goto exit; - } - - memset(info_buffer, 0, lru_len * sizeof *info_buffer); - - /* Walk through Pool's LRU list and print the buffer page - information */ - bpage = UT_LIST_GET_LAST(buf_pool->LRU); - - while (bpage != NULL) { - /* Use the same function that collect buffer info for - INNODB_BUFFER_PAGE to get buffer page info */ - i_s_innodb_buffer_page_get_info(bpage, pool_id, lru_pos, - (info_buffer + lru_pos)); - - bpage = UT_LIST_GET_PREV(LRU, bpage); - - lru_pos++; - } - - ut_ad(lru_pos == lru_len); - ut_ad(lru_pos == UT_LIST_GET_LEN(buf_pool->LRU)); - -exit: - mutex_exit(&buf_pool->LRU_list_mutex); - - if (info_buffer) { - status = i_s_innodb_buf_page_lru_fill( - thd, tables, info_buffer, lru_len); - - my_free(info_buffer); - } - - DBUG_RETURN(status); -} - -/*******************************************************************//** -Fill page information for pages in InnoDB buffer pool to the -dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_buf_page_lru_fill_table( -/*===============================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (ignored) */ -{ - int status = 0; - - DBUG_ENTER("i_s_innodb_buf_page_lru_fill_table"); - - /* deny access to any users that do not hold PROCESS_ACL */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - /* Walk through each buffer pool */ - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool; - - buf_pool = buf_pool_from_array(i); - - /* Fetch information from pages in this buffer pool's LRU list, - and fill the corresponding I_S table */ - status = i_s_innodb_fill_buffer_lru(thd, tables, buf_pool, i); - - /* If something wrong, break and return */ - if (status) { - break; - } - } - - DBUG_RETURN(status); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU. -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_buffer_page_lru_init( -/*============================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("i_s_innodb_buffer_page_lru_init"); - - schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p); - - schema->fields_info = i_s_innodb_buf_page_lru_fields_info; - schema->fill_table = i_s_innodb_buf_page_lru_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page_lru = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_BUFFER_PAGE_LRU"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB Buffer Page in LRU"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, i_s_innodb_buffer_page_lru_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/*******************************************************************//** -Unbind a dynamic INFORMATION_SCHEMA table. -@return 0 on success */ -static -int -i_s_common_deinit( -/*==============*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_common_deinit"); - - /* Do nothing */ - - DBUG_RETURN(0); -} - -/** SYS_TABLES ***************************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLES */ -static ST_FIELD_INFO innodb_sys_tables_fields_info[] = -{ -#define SYS_TABLES_ID 0 - {STRUCT_FLD(field_name, "TABLE_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLES_NAME 1 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLES_FLAG 2 - {STRUCT_FLD(field_name, "FLAG"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLES_NUM_COLUMN 3 - {STRUCT_FLD(field_name, "N_COLS"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLES_SPACE 4 - {STRUCT_FLD(field_name, "SPACE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLES_FILE_FORMAT 5 - {STRUCT_FLD(field_name, "FILE_FORMAT"), - STRUCT_FLD(field_length, 10), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLES_ROW_FORMAT 6 - {STRUCT_FLD(field_name, "ROW_FORMAT"), - STRUCT_FLD(field_length, 12), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLES_ZIP_PAGE_SIZE 7 - {STRUCT_FLD(field_name, "ZIP_PAGE_SIZE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Populate information_schema.innodb_sys_tables table with information -from SYS_TABLES. -@return 0 on success */ -static -int -i_s_dict_fill_sys_tables( -/*=====================*/ - THD* thd, /*!< in: thread */ - dict_table_t* table, /*!< in: table */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - ulint compact = DICT_TF_GET_COMPACT(table->flags); - ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table->flags); - ulint zip_size = dict_tf_get_zip_size(table->flags); - const char* file_format; - const char* row_format; - - file_format = trx_sys_file_format_id_to_name(atomic_blobs); - if (!compact) { - row_format = "Redundant"; - } else if (!atomic_blobs) { - row_format = "Compact"; - } else if DICT_TF_GET_ZIP_SSIZE(table->flags) { - row_format = "Compressed"; - } else { - row_format = "Dynamic"; - } - - DBUG_ENTER("i_s_dict_fill_sys_tables"); - - fields = table_to_fill->field; - - OK(fields[SYS_TABLES_ID]->store(longlong(table->id), TRUE)); - - OK(field_store_string(fields[SYS_TABLES_NAME], table->name)); - - OK(fields[SYS_TABLES_FLAG]->store(table->flags)); - - OK(fields[SYS_TABLES_NUM_COLUMN]->store(table->n_cols)); - - OK(fields[SYS_TABLES_SPACE]->store(table->space)); - - OK(field_store_string(fields[SYS_TABLES_FILE_FORMAT], file_format)); - - OK(field_store_string(fields[SYS_TABLES_ROW_FORMAT], row_format)); - - OK(fields[SYS_TABLES_ZIP_PAGE_SIZE]->store( - static_cast<double>(zip_size))); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to go through each record in SYS_TABLES table, and fill the -information_schema.innodb_sys_tables table with related table information -@return 0 on success */ -static -int -i_s_sys_tables_fill_table( -/*======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_tables_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&(dict_sys->mutex)); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES); - - while (rec) { - const char* err_msg; - dict_table_t* table_rec; - - /* Create and populate a dict_table_t structure with - information from SYS_TABLES row */ - err_msg = dict_process_sys_tables_rec_and_mtr_commit( - heap, rec, &table_rec, - DICT_TABLE_LOAD_FROM_RECORD, &mtr); - - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_tables(thd, table_rec, tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - /* Since dict_process_sys_tables_rec_and_mtr_commit() - is called with DICT_TABLE_LOAD_FROM_RECORD, the table_rec - is created in dict_process_sys_tables_rec(), we will - need to free it */ - if (table_rec) { - dict_mem_table_free(table_rec); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_tables -@return 0 on success */ -static -int -innodb_sys_tables_init( -/*===================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_tables_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_tables_fields_info; - schema->fill_table = i_s_sys_tables_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_tables = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_TABLES"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_TABLES"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_tables_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** SYS_TABLESTATS ***********************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLESTATS */ -static ST_FIELD_INFO innodb_sys_tablestats_fields_info[] = -{ -#define SYS_TABLESTATS_ID 0 - {STRUCT_FLD(field_name, "TABLE_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESTATS_NAME 1 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESTATS_INIT 2 - {STRUCT_FLD(field_name, "STATS_INITIALIZED"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESTATS_NROW 3 - {STRUCT_FLD(field_name, "NUM_ROWS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESTATS_CLUST_SIZE 4 - {STRUCT_FLD(field_name, "CLUST_INDEX_SIZE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESTATS_INDEX_SIZE 5 - {STRUCT_FLD(field_name, "OTHER_INDEX_SIZE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESTATS_MODIFIED 6 - {STRUCT_FLD(field_name, "MODIFIED_COUNTER"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESTATS_AUTONINC 7 - {STRUCT_FLD(field_name, "AUTOINC"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESTATS_TABLE_REF_COUNT 8 - {STRUCT_FLD(field_name, "REF_COUNT"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Populate information_schema.innodb_sys_tablestats table with information -from SYS_TABLES. -@return 0 on success */ -static -int -i_s_dict_fill_sys_tablestats( -/*=========================*/ - THD* thd, /*!< in: thread */ - dict_table_t* table, /*!< in: table */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - - DBUG_ENTER("i_s_dict_fill_sys_tablestats"); - - fields = table_to_fill->field; - - OK(fields[SYS_TABLESTATS_ID]->store(longlong(table->id), TRUE)); - - OK(field_store_string(fields[SYS_TABLESTATS_NAME], table->name)); - - dict_table_stats_lock(table, RW_S_LATCH); - - if (table->stat_initialized) { - OK(field_store_string(fields[SYS_TABLESTATS_INIT], - "Initialized")); - - OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows, - TRUE)); - - OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store( - static_cast<double>(table->stat_clustered_index_size))); - - OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store( - static_cast<double>(table->stat_sum_of_other_index_sizes))); - - OK(fields[SYS_TABLESTATS_MODIFIED]->store( - static_cast<double>(table->stat_modified_counter))); - } else { - OK(field_store_string(fields[SYS_TABLESTATS_INIT], - "Uninitialized")); - - OK(fields[SYS_TABLESTATS_NROW]->store(0, TRUE)); - - OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(0)); - - OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(0)); - - OK(fields[SYS_TABLESTATS_MODIFIED]->store(0)); - } - - dict_table_stats_unlock(table, RW_S_LATCH); - - OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, TRUE)); - - OK(fields[SYS_TABLESTATS_TABLE_REF_COUNT]->store( - static_cast<double>(table->n_ref_count))); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Function to go through each record in SYS_TABLES table, and fill the -information_schema.innodb_sys_tablestats table with table statistics -related information -@return 0 on success */ -static -int -i_s_sys_tables_fill_table_stats( -/*============================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_tables_fill_table_stats"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES); - - while (rec) { - const char* err_msg; - dict_table_t* table_rec; - - /* Fetch the dict_table_t structure corresponding to - this SYS_TABLES record */ - err_msg = dict_process_sys_tables_rec_and_mtr_commit( - heap, rec, &table_rec, - DICT_TABLE_LOAD_FROM_CACHE, &mtr); - - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_tablestats(thd, table_rec, - tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_tablestats -@return 0 on success */ -static -int -innodb_sys_tablestats_init( -/*=======================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_tablestats_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_tablestats_fields_info; - schema->fill_table = i_s_sys_tables_fill_table_stats; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_tablestats = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_TABLESTATS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_TABLESTATS"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_tablestats_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** SYS_INDEXES **************************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_INDEXES */ -static ST_FIELD_INFO innodb_sysindex_fields_info[] = -{ -#define SYS_INDEX_ID 0 - {STRUCT_FLD(field_name, "INDEX_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_INDEX_NAME 1 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_INDEX_TABLE_ID 2 - {STRUCT_FLD(field_name, "TABLE_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_INDEX_TYPE 3 - {STRUCT_FLD(field_name, "TYPE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_INDEX_NUM_FIELDS 4 - {STRUCT_FLD(field_name, "N_FIELDS"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_INDEX_PAGE_NO 5 - {STRUCT_FLD(field_name, "PAGE_NO"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_INDEX_SPACE 6 - {STRUCT_FLD(field_name, "SPACE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Function to populate the information_schema.innodb_sys_indexes table with -collected index information -@return 0 on success */ -static -int -i_s_dict_fill_sys_indexes( -/*======================*/ - THD* thd, /*!< in: thread */ - table_id_t table_id, /*!< in: table id */ - dict_index_t* index, /*!< in: populated dict_index_t - struct with index info */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - - DBUG_ENTER("i_s_dict_fill_sys_indexes"); - - fields = table_to_fill->field; - - OK(field_store_index_name(fields[SYS_INDEX_NAME], index->name)); - - OK(fields[SYS_INDEX_ID]->store(longlong(index->id), TRUE)); - - OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), TRUE)); - - OK(fields[SYS_INDEX_TYPE]->store(index->type)); - - OK(fields[SYS_INDEX_NUM_FIELDS]->store(index->n_fields)); - - /* FIL_NULL is ULINT32_UNDEFINED */ - if (index->page == FIL_NULL) { - OK(fields[SYS_INDEX_PAGE_NO]->store(-1)); - } else { - OK(fields[SYS_INDEX_PAGE_NO]->store(index->page)); - } - - OK(fields[SYS_INDEX_SPACE]->store(index->space)); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to go through each record in SYS_INDEXES table, and fill the -information_schema.innodb_sys_indexes table with related index information -@return 0 on success */ -static -int -i_s_sys_indexes_fill_table( -/*=======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_indexes_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - /* Start scan the SYS_INDEXES table */ - rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES); - - /* Process each record in the table */ - while (rec) { - const char* err_msg; - table_id_t table_id; - dict_index_t index_rec; - - /* Populate a dict_index_t structure with information from - a SYS_INDEXES row */ - err_msg = dict_process_sys_indexes_rec(heap, rec, &index_rec, - &table_id); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_indexes(thd, table_id, &index_rec, - tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_indexes -@return 0 on success */ -static -int -innodb_sys_indexes_init( -/*====================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_indexes_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sysindex_fields_info; - schema->fill_table = i_s_sys_indexes_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_indexes = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_INDEXES"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_INDEXES"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_indexes_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** SYS_COLUMNS **************************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_COLUMNS */ -static ST_FIELD_INFO innodb_sys_columns_fields_info[] = -{ -#define SYS_COLUMN_TABLE_ID 0 - {STRUCT_FLD(field_name, "TABLE_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_COLUMN_NAME 1 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_COLUMN_POSITION 2 - {STRUCT_FLD(field_name, "POS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_COLUMN_MTYPE 3 - {STRUCT_FLD(field_name, "MTYPE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_COLUMN__PRTYPE 4 - {STRUCT_FLD(field_name, "PRTYPE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_COLUMN_COLUMN_LEN 5 - {STRUCT_FLD(field_name, "LEN"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Function to populate the information_schema.innodb_sys_columns with -related column information -@return 0 on success */ -static -int -i_s_dict_fill_sys_columns( -/*======================*/ - THD* thd, /*!< in: thread */ - table_id_t table_id, /*!< in: table ID */ - const char* col_name, /*!< in: column name */ - dict_col_t* column, /*!< in: dict_col_t struct holding - more column information */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - - DBUG_ENTER("i_s_dict_fill_sys_columns"); - - fields = table_to_fill->field; - - OK(fields[SYS_COLUMN_TABLE_ID]->store(longlong(table_id), TRUE)); - - OK(field_store_string(fields[SYS_COLUMN_NAME], col_name)); - - OK(fields[SYS_COLUMN_POSITION]->store(column->ind)); - - OK(fields[SYS_COLUMN_MTYPE]->store(column->mtype)); - - OK(fields[SYS_COLUMN__PRTYPE]->store(column->prtype)); - - OK(fields[SYS_COLUMN_COLUMN_LEN]->store(column->len)); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to fill information_schema.innodb_sys_columns with information -collected by scanning SYS_COLUMNS table. -@return 0 on success */ -static -int -i_s_sys_columns_fill_table( -/*=======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - const char* col_name; - mem_heap_t* heap; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_columns_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_COLUMNS); - - while (rec) { - const char* err_msg; - dict_col_t column_rec; - table_id_t table_id; - - /* populate a dict_col_t structure with information from - a SYS_COLUMNS row */ - err_msg = dict_process_sys_columns_rec(heap, rec, &column_rec, - &table_id, &col_name); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_columns(thd, table_id, col_name, - &column_rec, - tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_columns -@return 0 on success */ -static -int -innodb_sys_columns_init( -/*====================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_columns_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_columns_fields_info; - schema->fill_table = i_s_sys_columns_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_columns = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_COLUMNS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_COLUMNS"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_columns_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** SYS_FIELDS ***************************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FIELDS */ -static ST_FIELD_INFO innodb_sys_fields_fields_info[] = -{ -#define SYS_FIELD_INDEX_ID 0 - {STRUCT_FLD(field_name, "INDEX_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FIELD_NAME 1 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FIELD_POS 2 - {STRUCT_FLD(field_name, "POS"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Function to fill information_schema.innodb_sys_fields with information -collected by scanning SYS_FIELDS table. -@return 0 on success */ -static -int -i_s_dict_fill_sys_fields( -/*=====================*/ - THD* thd, /*!< in: thread */ - index_id_t index_id, /*!< in: index id for the field */ - dict_field_t* field, /*!< in: table */ - ulint pos, /*!< in: Field position */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - - DBUG_ENTER("i_s_dict_fill_sys_fields"); - - fields = table_to_fill->field; - - OK(fields[SYS_FIELD_INDEX_ID]->store(longlong(index_id), TRUE)); - - OK(field_store_string(fields[SYS_FIELD_NAME], field->name)); - - OK(fields[SYS_FIELD_POS]->store(static_cast<double>(pos))); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to go through each record in SYS_FIELDS table, and fill the -information_schema.innodb_sys_fields table with related index field -information -@return 0 on success */ -static -int -i_s_sys_fields_fill_table( -/*======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - index_id_t last_id; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_fields_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - /* will save last index id so that we know whether we move to - the next index. This is used to calculate prefix length */ - last_id = 0; - - rec = dict_startscan_system(&pcur, &mtr, SYS_FIELDS); - - while (rec) { - ulint pos; - const char* err_msg; - index_id_t index_id; - dict_field_t field_rec; - - /* Populate a dict_field_t structure with information from - a SYS_FIELDS row */ - err_msg = dict_process_sys_fields_rec(heap, rec, &field_rec, - &pos, &index_id, last_id); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_fields(thd, index_id, &field_rec, - pos, tables->table); - last_id = index_id; - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_fields -@return 0 on success */ -static -int -innodb_sys_fields_init( -/*===================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_field_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_fields_fields_info; - schema->fill_table = i_s_sys_fields_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_fields = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_FIELDS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_FIELDS"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_fields_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** SYS_FOREIGN ********************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FOREIGN */ -static ST_FIELD_INFO innodb_sys_foreign_fields_info[] = -{ -#define SYS_FOREIGN_ID 0 - {STRUCT_FLD(field_name, "ID"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FOREIGN_FOR_NAME 1 - {STRUCT_FLD(field_name, "FOR_NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FOREIGN_REF_NAME 2 - {STRUCT_FLD(field_name, "REF_NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FOREIGN_NUM_COL 3 - {STRUCT_FLD(field_name, "N_COLS"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FOREIGN_TYPE 4 - {STRUCT_FLD(field_name, "TYPE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Function to fill information_schema.innodb_sys_foreign with information -collected by scanning SYS_FOREIGN table. -@return 0 on success */ -static -int -i_s_dict_fill_sys_foreign( -/*======================*/ - THD* thd, /*!< in: thread */ - dict_foreign_t* foreign, /*!< in: table */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - - DBUG_ENTER("i_s_dict_fill_sys_foreign"); - - fields = table_to_fill->field; - - OK(field_store_string(fields[SYS_FOREIGN_ID], foreign->id)); - - OK(field_store_string(fields[SYS_FOREIGN_FOR_NAME], - foreign->foreign_table_name)); - - OK(field_store_string(fields[SYS_FOREIGN_REF_NAME], - foreign->referenced_table_name)); - - OK(fields[SYS_FOREIGN_NUM_COL]->store(foreign->n_fields)); - - OK(fields[SYS_FOREIGN_TYPE]->store(foreign->type)); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.innodb_sys_foreign table. Loop -through each record in SYS_FOREIGN, and extract the foreign key -information. -@return 0 on success */ -static -int -i_s_sys_foreign_fill_table( -/*=======================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_foreign_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_FOREIGN); - - while (rec) { - const char* err_msg; - dict_foreign_t foreign_rec; - - /* Populate a dict_foreign_t structure with information from - a SYS_FOREIGN row */ - err_msg = dict_process_sys_foreign_rec(heap, rec, &foreign_rec); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_foreign(thd, &foreign_rec, - tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mtr_start(&mtr); - mutex_enter(&dict_sys->mutex); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign -@return 0 on success */ -static -int -innodb_sys_foreign_init( -/*====================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_foreign_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_foreign_fields_info; - schema->fill_table = i_s_sys_foreign_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_foreign = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_FOREIGN"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_FOREIGN"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_foreign_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** SYS_FOREIGN_COLS ********************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FOREIGN_COLS */ -static ST_FIELD_INFO innodb_sys_foreign_cols_fields_info[] = -{ -#define SYS_FOREIGN_COL_ID 0 - {STRUCT_FLD(field_name, "ID"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FOREIGN_COL_FOR_NAME 1 - {STRUCT_FLD(field_name, "FOR_COL_NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FOREIGN_COL_REF_NAME 2 - {STRUCT_FLD(field_name, "REF_COL_NAME"), - STRUCT_FLD(field_length, NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_FOREIGN_COL_POS 3 - {STRUCT_FLD(field_name, "POS"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Function to fill information_schema.innodb_sys_foreign_cols with information -collected by scanning SYS_FOREIGN_COLS table. -@return 0 on success */ -static -int -i_s_dict_fill_sys_foreign_cols( -/*==========================*/ - THD* thd, /*!< in: thread */ - const char* name, /*!< in: foreign key constraint name */ - const char* for_col_name, /*!< in: referencing column name*/ - const char* ref_col_name, /*!< in: referenced column - name */ - ulint pos, /*!< in: column position */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - - DBUG_ENTER("i_s_dict_fill_sys_foreign_cols"); - - fields = table_to_fill->field; - - OK(field_store_string(fields[SYS_FOREIGN_COL_ID], name)); - - OK(field_store_string(fields[SYS_FOREIGN_COL_FOR_NAME], for_col_name)); - - OK(field_store_string(fields[SYS_FOREIGN_COL_REF_NAME], ref_col_name)); - - OK(fields[SYS_FOREIGN_COL_POS]->store(static_cast<double>(pos))); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.innodb_sys_foreign_cols table. Loop -through each record in SYS_FOREIGN_COLS, and extract the foreign key column -information and fill the INFORMATION_SCHEMA.innodb_sys_foreign_cols table. -@return 0 on success */ -static -int -i_s_sys_foreign_cols_fill_table( -/*============================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_foreign_cols_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_FOREIGN_COLS); - - while (rec) { - const char* err_msg; - const char* name; - const char* for_col_name; - const char* ref_col_name; - ulint pos; - - /* Extract necessary information from a SYS_FOREIGN_COLS row */ - err_msg = dict_process_sys_foreign_col_rec( - heap, rec, &name, &for_col_name, &ref_col_name, &pos); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_foreign_cols( - thd, name, for_col_name, ref_col_name, pos, - tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols -@return 0 on success */ -static -int -innodb_sys_foreign_cols_init( -/*========================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_foreign_cols_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_foreign_cols_fields_info; - schema->fill_table = i_s_sys_foreign_cols_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_foreign_cols = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_FOREIGN_COLS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_FOREIGN_COLS"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_foreign_cols_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** SYS_TABLESPACES ********************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES */ -static ST_FIELD_INFO innodb_sys_tablespaces_fields_info[] = -{ -#define SYS_TABLESPACES_SPACE 0 - {STRUCT_FLD(field_name, "SPACE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESPACES_NAME 1 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESPACES_FLAGS 2 - {STRUCT_FLD(field_name, "FLAG"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESPACES_FILE_FORMAT 3 - {STRUCT_FLD(field_name, "FILE_FORMAT"), - STRUCT_FLD(field_length, 10), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESPACES_ROW_FORMAT 4 - {STRUCT_FLD(field_name, "ROW_FORMAT"), - STRUCT_FLD(field_length, 22), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESPACES_PAGE_SIZE 5 - {STRUCT_FLD(field_name, "PAGE_SIZE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_TABLESPACES_ZIP_PAGE_SIZE 6 - {STRUCT_FLD(field_name, "ZIP_PAGE_SIZE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO - -}; - -/**********************************************************************//** -Function to fill INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES with information -collected by scanning SYS_TABLESPACESS table. -@return 0 on success */ -static -int -i_s_dict_fill_sys_tablespaces( -/*==========================*/ - THD* thd, /*!< in: thread */ - ulint space, /*!< in: space ID */ - const char* name, /*!< in: tablespace name */ - ulint flags, /*!< in: tablespace flags */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags); - ulint page_size = fsp_flags_get_page_size(flags); - ulint zip_size = fsp_flags_get_zip_size(flags); - const char* file_format; - const char* row_format; - - DBUG_ENTER("i_s_dict_fill_sys_tablespaces"); - - file_format = trx_sys_file_format_id_to_name(atomic_blobs); - if (!atomic_blobs) { - row_format = "Compact or Redundant"; - } else if DICT_TF_GET_ZIP_SSIZE(flags) { - row_format = "Compressed"; - } else { - row_format = "Dynamic"; - } - - fields = table_to_fill->field; - - OK(fields[SYS_TABLESPACES_SPACE]->store( - static_cast<double>(space))); - - OK(field_store_string(fields[SYS_TABLESPACES_NAME], name)); - - OK(fields[SYS_TABLESPACES_FLAGS]->store( - static_cast<double>(flags))); - - OK(field_store_string(fields[SYS_TABLESPACES_FILE_FORMAT], - file_format)); - - OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT], - row_format)); - - OK(fields[SYS_TABLESPACES_PAGE_SIZE]->store( - static_cast<double>(page_size))); - - OK(fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->store( - static_cast<double>(zip_size))); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table. -Loop through each record in SYS_TABLESPACES, and extract the column -information and fill the INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table. -@return 0 on success */ -static -int -i_s_sys_tablespaces_fill_table( -/*===========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_tablespaces_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES); - - while (rec) { - const char* err_msg; - ulint space; - const char* name; - ulint flags; - - /* Extract necessary information from a SYS_TABLESPACES row */ - err_msg = dict_process_sys_tablespaces( - heap, rec, &space, &name, &flags); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_tablespaces( - thd, space, name, flags, - tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES -@return 0 on success */ -static -int -innodb_sys_tablespaces_init( -/*========================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_tablespaces_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_tablespaces_fields_info; - schema->fill_table = i_s_sys_tablespaces_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tablespaces = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_TABLESPACES"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_TABLESPACES"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_tablespaces_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/** SYS_DATAFILES ************************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_DATAFILES */ -static ST_FIELD_INFO innodb_sys_datafiles_fields_info[] = -{ -#define SYS_DATAFILES_SPACE 0 - {STRUCT_FLD(field_name, "SPACE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define SYS_DATAFILES_PATH 1 - {STRUCT_FLD(field_name, "PATH"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Function to fill INFORMATION_SCHEMA.INNODB_SYS_DATAFILES with information -collected by scanning SYS_DATAFILESS table. -@return 0 on success */ -static -int -i_s_dict_fill_sys_datafiles( -/*========================*/ - THD* thd, /*!< in: thread */ - ulint space, /*!< in: space ID */ - const char* path, /*!< in: absolute path */ - TABLE* table_to_fill) /*!< in/out: fill this table */ -{ - Field** fields; - - DBUG_ENTER("i_s_dict_fill_sys_datafiles"); - - fields = table_to_fill->field; - - OK(field_store_ulint(fields[SYS_DATAFILES_SPACE], space)); - - OK(field_store_string(fields[SYS_DATAFILES_PATH], path)); - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.INNODB_SYS_DATAFILES table. -Loop through each record in SYS_DATAFILES, and extract the column -information and fill the INFORMATION_SCHEMA.INNODB_SYS_DATAFILES table. -@return 0 on success */ -static -int -i_s_sys_datafiles_fill_table( -/*=========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - - DBUG_ENTER("i_s_sys_datafiles_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL, true)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_DATAFILES); - - while (rec) { - const char* err_msg; - ulint space; - const char* path; - - /* Extract necessary information from a SYS_DATAFILES row */ - err_msg = dict_process_sys_datafiles( - heap, rec, &space, &path); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (!err_msg) { - i_s_dict_fill_sys_datafiles( - thd, space, path, tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_DATAFILES -@return 0 on success */ -static -int -innodb_sys_datafiles_init( -/*======================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_datafiles_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_datafiles_fields_info; - schema->fill_table = i_s_sys_datafiles_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_datafiles = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_DATAFILES"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_DATAFILES"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_datafiles_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -static ST_FIELD_INFO i_s_innodb_changed_pages_info[] = -{ - {STRUCT_FLD(field_name, "space_id"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "page_id"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "start_lsn"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "end_lsn"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*********************************************************************** - This function implements ICP for I_S.INNODB_CHANGED_PAGES by parsing a - condition and getting lower and upper bounds for start and end LSNs if the - condition corresponds to a certain pattern. - - In the most general form, we understand queries like - - SELECT * FROM INNODB_CHANGED_PAGES - WHERE START_LSN > num1 AND START_LSN < num2 - AND END_LSN > num3 AND END_LSN < num4; - - That's why the pattern syntax is: - - pattern: comp | and_comp; - comp: lsn < int_num | lsn <= int_num | int_num > lsn | int_num >= lsn; - lsn: start_lsn | end_lsn; - and_comp: expression AND expression | expression AND and_comp; - expression: comp | any_other_expression; - - The two bounds are handled differently: the lower bound is used to find the - correct starting _file_, the upper bound the last _block_ that needs reading. - - Lower bound conditions are handled in the following way: start_lsn >= X - specifies that the reading must start from the file that has the highest - starting LSN less than or equal to X. start_lsn > X is equivalent to - start_lsn >= X + 1. For end_lsn, end_lsn >= X is treated as - start_lsn >= X - 1 and end_lsn > X as start_lsn >= X. - - For the upper bound, suppose the condition is start_lsn < 100, this means we - have to read all blocks with start_lsn < 100. Which is equivalent to reading - all the blocks with end_lsn <= 99, or just end_lsn < 100. That's why it's - enough to find maximum lsn value, doesn't matter if this is start or end lsn - and compare it with "start_lsn" field. LSN <= 100 is treated as LSN < 101. - - Example: - - SELECT * FROM INNODB_CHANGED_PAGES - WHERE - start_lsn > 10 AND - end_lsn <= 1111 AND - 555 > end_lsn AND - page_id = 100; - - end_lsn will be set to 555, start_lsn will be set 11. - - Support for other functions (equal, NULL-safe equal, BETWEEN, IN, etc.) will - be added on demand. - -*/ -static -void -limit_lsn_range_from_condition( -/*===========================*/ - TABLE* table, /*!<in: table */ - Item* cond, /*!<in: condition */ - ib_uint64_t* start_lsn, /*!<in/out: minumum LSN */ - ib_uint64_t* end_lsn) /*!<in/out: maximum LSN */ -{ - enum Item_func::Functype func_type; - - if (cond->type() != Item::COND_ITEM && - cond->type() != Item::FUNC_ITEM) - return; - - func_type = ((Item_func*) cond)->functype(); - - switch (func_type) - { - case Item_func::COND_AND_FUNC: - { - List_iterator<Item> li(*((Item_cond*) cond) - ->argument_list()); - Item *item; - - while ((item= li++)) { - limit_lsn_range_from_condition(table, item, start_lsn, - end_lsn); - } - break; - } - case Item_func::LT_FUNC: - case Item_func::LE_FUNC: - case Item_func::GT_FUNC: - case Item_func::GE_FUNC: - { - Item *left; - Item *right; - Item_field *item_field; - ib_uint64_t tmp_result; - ibool is_end_lsn; - - /* a <= b equals to b >= a that's why we just exchange "left" - and "right" in the case of ">" or ">=" function. We don't - touch the operation itself. */ - if (((Item_func*) cond)->functype() == Item_func::LT_FUNC - || ((Item_func*) cond)->functype() == Item_func::LE_FUNC) { - left = ((Item_func*) cond)->arguments()[0]; - right = ((Item_func*) cond)->arguments()[1]; - } else { - left = ((Item_func*) cond)->arguments()[1]; - right = ((Item_func*) cond)->arguments()[0]; - } - - if (left->type() == Item::FIELD_ITEM) { - item_field = (Item_field *)left; - } else if (right->type() == Item::FIELD_ITEM) { - item_field = (Item_field *)right; - } else { - return; - } - - /* Check if the current field belongs to our table */ - if (table != item_field->field->table) { - return; - } - - /* Check if the field is START_LSN or END_LSN */ - /* END_LSN */ - is_end_lsn = table->field[3]->eq(item_field->field); - - if (/* START_LSN */ !table->field[2]->eq(item_field->field) - && !is_end_lsn) { - return; - } - - if (left->type() == Item::FIELD_ITEM - && right->type() == Item::INT_ITEM) { - - /* The case of start_lsn|end_lsn <|<= const - "end_lsn <=? const" gives a valid upper bound. - "start_lsn <=? const" is not a valid upper bound. - */ - - if (is_end_lsn) { - tmp_result = right->val_int(); - if (((func_type == Item_func::LE_FUNC) - || (func_type == Item_func::GE_FUNC)) - && (tmp_result != IB_UINT64_MAX)) { - - tmp_result++; - } - if (tmp_result < *end_lsn) { - *end_lsn = tmp_result; - } - } - - } else if (left->type() == Item::INT_ITEM - && right->type() == Item::FIELD_ITEM) { - - /* The case of const <|<= start_lsn|end_lsn - turning it around: start_lsn|end_lsn >|>= const - "start_lsn >=? const " is a valid loer bound. - "end_lsn >=? const" is not a valid lower bound. - */ - - if (!is_end_lsn) { - tmp_result = left->val_int(); - if (is_end_lsn && tmp_result != 0) { - tmp_result--; - } - if (((func_type == Item_func::LT_FUNC) - || (func_type == Item_func::GT_FUNC)) - && (tmp_result != IB_UINT64_MAX)) { - - tmp_result++; - } - if (tmp_result > *start_lsn) { - *start_lsn = tmp_result; - } - } - } - - break; - } - default:; - } -} - -/*********************************************************************** -Fill the dynamic table information_schema.innodb_changed_pages. -@return 0 on success, 1 on failure */ -static -int -i_s_innodb_changed_pages_fill( -/*==========================*/ - THD* thd, /*!<in: thread */ - TABLE_LIST* tables, /*!<in/out: tables to fill */ - Item* cond) /*!<in: condition */ -{ - TABLE* table = (TABLE *) tables->table; - log_bitmap_iterator_t i; - ib_uint64_t output_rows_num = 0UL; - lsn_t max_lsn = LSN_MAX; - lsn_t min_lsn = 0ULL; - int ret = 0; - - DBUG_ENTER("i_s_innodb_changed_pages_fill"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL, true)) { - - DBUG_RETURN(0); - } - - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - if (cond) { - limit_lsn_range_from_condition(table, cond, &min_lsn, - &max_lsn); - } - - /* If the log tracker is running and our max_lsn > current tracked LSN, - cap the max lsn so that we don't try to read any partial runs as the - tracked LSN advances. */ - if (srv_track_changed_pages) { - ib_uint64_t tracked_lsn = log_get_tracked_lsn(); - if (max_lsn > tracked_lsn) - max_lsn = tracked_lsn; - } - - if (!log_online_bitmap_iterator_init(&i, min_lsn, max_lsn)) { - my_error(ER_CANT_FIND_SYSTEM_REC, MYF(0)); - DBUG_RETURN(1); - } - - while(log_online_bitmap_iterator_next(&i) && - (!srv_max_changed_pages || - output_rows_num < srv_max_changed_pages)) - { - if (!LOG_BITMAP_ITERATOR_PAGE_CHANGED(i)) - continue; - - /* SPACE_ID */ - table->field[0]->store( - LOG_BITMAP_ITERATOR_SPACE_ID(i)); - /* PAGE_ID */ - table->field[1]->store( - LOG_BITMAP_ITERATOR_PAGE_NUM(i)); - /* START_LSN */ - table->field[2]->store( - LOG_BITMAP_ITERATOR_START_LSN(i), true); - /* END_LSN */ - table->field[3]->store( - LOG_BITMAP_ITERATOR_END_LSN(i), true); - - /* - I_S tables are in-memory tables. If bitmap file is big enough - a lot of memory can be used to store the table. But the size - of used memory can be diminished if we store only data which - corresponds to some conditions (in WHERE sql clause). Here - conditions are checked for the field values stored above. - - Conditions are checked twice. The first is here (during table - generation) and the second during query execution. Maybe it - makes sense to use some flag in THD object to avoid double - checking. - */ - if (cond && !cond->val_int()) - continue; - - if (schema_table_store_record(thd, table)) - { - log_online_bitmap_iterator_release(&i); - my_error(ER_CANT_FIND_SYSTEM_REC, MYF(0)); - DBUG_RETURN(1); - } - - ++output_rows_num; - } - - if (i.failed) { - my_error(ER_CANT_FIND_SYSTEM_REC, MYF(0)); - ret = 1; - } - - log_online_bitmap_iterator_release(&i); - DBUG_RETURN(ret); -} - -static -int -i_s_innodb_changed_pages_init( -/*==========================*/ - void* p) -{ - DBUG_ENTER("i_s_innodb_changed_pages_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_innodb_changed_pages_info; - schema->fill_table = i_s_innodb_changed_pages_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_changed_pages = -{ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - STRUCT_FLD(info, &i_s_info), - STRUCT_FLD(name, "INNODB_CHANGED_PAGES"), - STRUCT_FLD(author, "Percona"), - STRUCT_FLD(descr, "InnoDB CHANGED_PAGES table"), - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - STRUCT_FLD(init, i_s_innodb_changed_pages_init), - STRUCT_FLD(deinit, i_s_common_deinit), - STRUCT_FLD(version, 0x0100 /* 1.0 */), - STRUCT_FLD(status_vars, NULL), - STRUCT_FLD(system_vars, NULL), - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** TABLESPACES_ENCRYPTION ********************************************/ -/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION */ -static ST_FIELD_INFO innodb_tablespaces_encryption_fields_info[] = -{ -#define TABLESPACES_ENCRYPTION_SPACE 0 - {STRUCT_FLD(field_name, "SPACE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_NAME 1 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME 2 - {STRUCT_FLD(field_name, "ENCRYPTION_SCHEME"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS 3 - {STRUCT_FLD(field_name, "KEYSERVER_REQUESTS"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_MIN_KEY_VERSION 4 - {STRUCT_FLD(field_name, "MIN_KEY_VERSION"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION 5 - {STRUCT_FLD(field_name, "CURRENT_KEY_VERSION"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER 6 - {STRUCT_FLD(field_name, "KEY_ROTATION_PAGE_NUMBER"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER 7 - {STRUCT_FLD(field_name, "KEY_ROTATION_MAX_PAGE_NUMBER"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_CURRENT_KEY_ID 8 - {STRUCT_FLD(field_name, "CURRENT_KEY_ID"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_ENCRYPTION_ROTATING_OR_FLUSHING 9 - {STRUCT_FLD(field_name, "ROTATING_OR_FLUSHING"), - STRUCT_FLD(field_length, 1), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION -with information collected by scanning SYS_TABLESPACES table. -@param[in] thd thread handle -@param[in] space Tablespace -@param[in] table_to_fill I_S table to fill -@return 0 on success */ -static -int -i_s_dict_fill_tablespaces_encryption( - THD* thd, - fil_space_t* space, - TABLE* table_to_fill) -{ - Field** fields; - struct fil_space_crypt_status_t status; - - DBUG_ENTER("i_s_dict_fill_tablespaces_encryption"); - - fields = table_to_fill->field; - - fil_space_crypt_get_status(space, &status); - - OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space->id)); - - OK(field_store_string(fields[TABLESPACES_ENCRYPTION_NAME], - space->name)); - - OK(fields[TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME]->store( - status.scheme)); - OK(fields[TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS]->store( - status.keyserver_requests)); - OK(fields[TABLESPACES_ENCRYPTION_MIN_KEY_VERSION]->store( - status.min_key_version)); - OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION]->store( - status.current_key_version)); - OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_ID]->store( - status.key_id)); - OK(fields[TABLESPACES_ENCRYPTION_ROTATING_OR_FLUSHING]->store( - (status.rotating || status.flushing) ? 1 : 0)); - - if (status.rotating) { - fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->set_notnull(); - OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->store( - status.rotate_next_page_number)); - fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->set_notnull(); - OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->store( - status.rotate_max_page_number)); - } else { - fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER] - ->set_null(); - fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER] - ->set_null(); - } - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table. -Loop through each record in TABLESPACES_ENCRYPTION, and extract the column -information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table. -@return 0 on success */ -static -int -i_s_tablespaces_encryption_fill_table( -/*===========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - bool found_space_0 = false; - - DBUG_ENTER("i_s_tablespaces_encryption_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, SUPER_ACL)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES); - - while (rec) { - const char* err_msg; - ulint space_id; - const char* name; - ulint flags; - - /* Extract necessary information from a SYS_TABLESPACES row */ - err_msg = dict_process_sys_tablespaces( - heap, rec, &space_id, &name, &flags); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (space_id == 0) { - found_space_0 = true; - } - - fil_space_t* space = fil_space_acquire_silent(space_id); - - if (!err_msg && space) { - i_s_dict_fill_tablespaces_encryption( - thd, space, tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - if (space) { - fil_space_release(space); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - if (found_space_0 == false) { - /* space 0 does for what ever unknown reason not show up - * in iteration above, add it manually */ - - fil_space_t* space = fil_space_acquire_silent(0); - - i_s_dict_fill_tablespaces_encryption( - thd, space, tables->table); - - fil_space_release(space); - } - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION -@return 0 on success */ -static -int -innodb_tablespaces_encryption_init( -/*========================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_tablespaces_encryption_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_tablespaces_encryption_fields_info; - schema->fill_table = i_s_tablespaces_encryption_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_encryption = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_TABLESPACES_ENCRYPTION"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, "Google Inc"), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB TABLESPACES_ENCRYPTION"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_BSD), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_tablespaces_encryption_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE -}; - -/** TABLESPACES_SCRUBBING ********************************************/ -/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING */ -static ST_FIELD_INFO innodb_tablespaces_scrubbing_fields_info[] = -{ -#define TABLESPACES_SCRUBBING_SPACE 0 - {STRUCT_FLD(field_name, "SPACE"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_SCRUBBING_NAME 1 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_SCRUBBING_COMPRESSED 2 - {STRUCT_FLD(field_name, "COMPRESSED"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED 3 - {STRUCT_FLD(field_name, "LAST_SCRUB_COMPLETED"), - STRUCT_FLD(field_length, 0), - STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED 4 - {STRUCT_FLD(field_name, "CURRENT_SCRUB_STARTED"), - STRUCT_FLD(field_length, 0), - STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS 5 - {STRUCT_FLD(field_name, "CURRENT_SCRUB_ACTIVE_THREADS"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER 6 - {STRUCT_FLD(field_name, "CURRENT_SCRUB_PAGE_NUMBER"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER 7 - {STRUCT_FLD(field_name, "CURRENT_SCRUB_MAX_PAGE_NUMBER"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/**********************************************************************//** -Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING -with information collected by scanning SYS_TABLESPACES table and -fil_space. -@param[in] thd Thread handle -@param[in] space Tablespace -@param[in] table_to_fill I_S table -@return 0 on success */ -static -int -i_s_dict_fill_tablespaces_scrubbing( - THD* thd, - fil_space_t* space, - TABLE* table_to_fill) -{ - Field** fields; - struct fil_space_scrub_status_t status; - - DBUG_ENTER("i_s_dict_fill_tablespaces_scrubbing"); - - fields = table_to_fill->field; - - fil_space_get_scrub_status(space, &status); - - OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space->id)); - - OK(field_store_string(fields[TABLESPACES_SCRUBBING_NAME], - space->name)); - - OK(fields[TABLESPACES_SCRUBBING_COMPRESSED]->store( - status.compressed ? 1 : 0)); - - if (status.last_scrub_completed == 0) { - fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED]->set_null(); - } else { - fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED] - ->set_notnull(); - OK(field_store_time_t( - fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED], - status.last_scrub_completed)); - } - - int field_numbers[] = { - TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED, - TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS, - TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER, - TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER }; - - if (status.scrubbing) { - for (uint i = 0; i < array_elements(field_numbers); i++) { - fields[field_numbers[i]]->set_notnull(); - } - - OK(field_store_time_t( - fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED], - status.current_scrub_started)); - OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS] - ->store(status.current_scrub_active_threads)); - OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER] - ->store(status.current_scrub_page_number)); - OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER] - ->store(status.current_scrub_max_page_number)); - } else { - for (uint i = 0; i < array_elements(field_numbers); i++) { - fields[field_numbers[i]]->set_null(); - } - } - - OK(schema_table_store_record(thd, table_to_fill)); - - DBUG_RETURN(0); -} -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table. -Loop through each record in TABLESPACES_SCRUBBING, and extract the column -information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table. -@return 0 on success */ -static -int -i_s_tablespaces_scrubbing_fill_table( -/*===========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - btr_pcur_t pcur; - const rec_t* rec; - mem_heap_t* heap; - mtr_t mtr; - bool found_space_0 = false; - - DBUG_ENTER("i_s_tablespaces_scrubbing_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without SUPER_ACL privilege */ - if (check_global_access(thd, SUPER_ACL)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(1000); - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - - rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES); - - while (rec) { - const char* err_msg; - ulint space_id; - const char* name; - ulint flags; - - /* Extract necessary information from a SYS_TABLESPACES row */ - err_msg = dict_process_sys_tablespaces( - heap, rec, &space_id, &name, &flags); - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - - if (space_id == 0) { - found_space_0 = true; - } - - fil_space_t* space = fil_space_acquire_silent(space_id); - - if (!err_msg && space) { - i_s_dict_fill_tablespaces_scrubbing( - thd, space, tables->table); - } else { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_CANT_FIND_SYSTEM_REC, "%s", - err_msg); - } - - if (space) { - fil_space_release(space); - } - - mem_heap_empty(heap); - - /* Get the next record */ - mutex_enter(&dict_sys->mutex); - mtr_start(&mtr); - rec = dict_getnext_system(&pcur, &mtr); - } - - mtr_commit(&mtr); - mutex_exit(&dict_sys->mutex); - mem_heap_free(heap); - - if (found_space_0 == false) { - /* space 0 does for what ever unknown reason not show up - * in iteration above, add it manually */ - fil_space_t* space = fil_space_acquire_silent(0); - - i_s_dict_fill_tablespaces_scrubbing( - thd, space, tables->table); - - fil_space_release(space); - } - - DBUG_RETURN(0); -} -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING -@return 0 on success */ -static -int -innodb_tablespaces_scrubbing_init( -/*========================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_tablespaces_scrubbing_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_tablespaces_scrubbing_fields_info; - schema->fill_table = i_s_tablespaces_scrubbing_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_scrubbing = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_TABLESPACES_SCRUBBING"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, "Google Inc"), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB TABLESPACES_SCRUBBING"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_BSD), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_tablespaces_scrubbing_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - /* Maria extension */ - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE) -}; - -/** INNODB_MUTEXES *********************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_MUTEXES */ -static ST_FIELD_INFO innodb_mutexes_fields_info[] = -{ -#define MUTEXES_NAME 0 - {STRUCT_FLD(field_name, "NAME"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, -#define MUTEXES_CREATE_FILE 1 - {STRUCT_FLD(field_name, "CREATE_FILE"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, -#define MUTEXES_CREATE_LINE 2 - {STRUCT_FLD(field_name, "CREATE_LINE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, -#define MUTEXES_OS_WAITS 3 - {STRUCT_FLD(field_name, "OS_WAITS"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.INNODB_MUTEXES table. -Loop through each record in mutex and rw_lock lists, and extract the column -information and fill the INFORMATION_SCHEMA.INNODB_MUTEXES table. -@return 0 on success */ -static -int -i_s_innodb_mutexes_fill_table( -/*==========================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - ib_mutex_t* mutex; - rw_lock_t* lock; - ulint block_mutex_oswait_count = 0; - ulint block_lock_oswait_count = 0; - ib_mutex_t* block_mutex = NULL; - rw_lock_t* block_lock = NULL; - Field** fields = tables->table->field; - - DBUG_ENTER("i_s_innodb_mutexes_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL)) { - DBUG_RETURN(0); - } - - mutex_enter(&mutex_list_mutex); - - for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL; - mutex = UT_LIST_GET_NEXT(list, mutex)) { - if (mutex->count_os_wait == 0) { - continue; - } - - if (buf_pool_is_block_mutex(mutex)) { - block_mutex = mutex; - block_mutex_oswait_count += mutex->count_os_wait; - continue; - } - - OK(field_store_string(fields[MUTEXES_NAME], mutex->cmutex_name)); - OK(field_store_string(fields[MUTEXES_CREATE_FILE], innobase_basename(mutex->cfile_name))); - OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], mutex->cline)); - OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)mutex->count_os_wait)); - OK(schema_table_store_record(thd, tables->table)); - } - - if (block_mutex) { - char buf1[IO_SIZE]; - - my_snprintf(buf1, sizeof buf1, "combined %s", - innobase_basename(block_mutex->cfile_name)); - - OK(field_store_string(fields[MUTEXES_NAME], block_mutex->cmutex_name)); - OK(field_store_string(fields[MUTEXES_CREATE_FILE], buf1)); - OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], block_mutex->cline)); - OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)block_mutex_oswait_count)); - OK(schema_table_store_record(thd, tables->table)); - } - - mutex_exit(&mutex_list_mutex); - - mutex_enter(&rw_lock_list_mutex); - - for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL; - lock = UT_LIST_GET_NEXT(list, lock)) { - if (lock->count_os_wait == 0) { - continue; - } - - if (buf_pool_is_block_lock(lock)) { - block_lock = lock; - block_lock_oswait_count += lock->count_os_wait; - continue; - } - - OK(field_store_string(fields[MUTEXES_NAME], lock->lock_name)); - OK(field_store_string(fields[MUTEXES_CREATE_FILE], innobase_basename(lock->cfile_name))); - OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], lock->cline)); - OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)lock->count_os_wait)); - OK(schema_table_store_record(thd, tables->table)); - } - - if (block_lock) { - char buf1[IO_SIZE]; - - my_snprintf(buf1, sizeof buf1, "combined %s", - innobase_basename(block_lock->cfile_name)); - - OK(field_store_string(fields[MUTEXES_NAME], block_lock->lock_name)); - OK(field_store_string(fields[MUTEXES_CREATE_FILE], buf1)); - OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], block_lock->cline)); - OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)block_lock_oswait_count)); - OK(schema_table_store_record(thd, tables->table)); - } - - mutex_exit(&rw_lock_list_mutex); - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_MUTEXES -@return 0 on success */ -static -int -innodb_mutexes_init( -/*================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_mutexes_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_mutexes_fields_info; - schema->fill_table = i_s_innodb_mutexes_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_mutexes = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_MUTEXES"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_DATAFILES"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_mutexes_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - /* Maria extension */ - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -/** SYS_SEMAPHORE_WAITS ************************************************/ -/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS */ -static ST_FIELD_INFO innodb_sys_semaphore_waits_fields_info[] = -{ - // SYS_SEMAPHORE_WAITS_THREAD_ID 0 - {STRUCT_FLD(field_name, "THREAD_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_OBJECT_NAME 1 - {STRUCT_FLD(field_name, "OBJECT_NAME"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_FILE 2 - {STRUCT_FLD(field_name, "FILE"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_LINE 3 - {STRUCT_FLD(field_name, "LINE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_WAIT_TIME 4 - {STRUCT_FLD(field_name, "WAIT_TIME"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_WAIT_OBJECT 5 - {STRUCT_FLD(field_name, "WAIT_OBJECT"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_WAIT_TYPE 6 - {STRUCT_FLD(field_name, "WAIT_TYPE"), - STRUCT_FLD(field_length, 16), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID 7 - {STRUCT_FLD(field_name, "HOLDER_THREAD_ID"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_HOLDER_FILE 8 - {STRUCT_FLD(field_name, "HOLDER_FILE"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_HOLDER_LINE 9 - {STRUCT_FLD(field_name, "HOLDER_LINE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_CREATED_FILE 10 - {STRUCT_FLD(field_name, "CREATED_FILE"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_CREATED_LINE 11 - {STRUCT_FLD(field_name, "CREATED_LINE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_WRITER_THREAD 12 - {STRUCT_FLD(field_name, "WRITER_THREAD"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_RESERVATION_MODE 13 - {STRUCT_FLD(field_name, "RESERVATION_MODE"), - STRUCT_FLD(field_length, 16), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_READERS 14 - {STRUCT_FLD(field_name, "READERS"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_WAITERS_FLAG 15 - {STRUCT_FLD(field_name, "WAITERS_FLAG"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_LOCK_WORD 16 - {STRUCT_FLD(field_name, "LOCK_WORD"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_LAST_READER_FILE 17 - {STRUCT_FLD(field_name, "LAST_READER_FILE"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_LAST_READER_LINE 18 - {STRUCT_FLD(field_name, "LAST_READER_LINE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 19 - {STRUCT_FLD(field_name, "LAST_WRITER_FILE"), - STRUCT_FLD(field_length, OS_FILE_MAX_PATH), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 20 - {STRUCT_FLD(field_name, "LAST_WRITER_LINE"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - // SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 21 - {STRUCT_FLD(field_name, "OS_WAIT_COUNT"), - STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - - - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS -@return 0 on success */ -static -int -innodb_sys_semaphore_waits_init( -/*============================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_sys_semaphore_waits_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_sys_semaphore_waits_fields_info; - schema->fill_table = sync_arr_fill_sys_semphore_waits_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_semaphore_waits = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "INNODB_SYS_SEMAPHORE_WAITS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, maria_plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "InnoDB SYS_SEMAPHORE_WAITS"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_sys_semaphore_waits_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - /* Maria extension */ - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -static ST_FIELD_INFO innodb_changed_page_bitmaps_fields_info[] = -{ - {STRUCT_FLD(field_name, "dummy"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - END_OF_ST_FIELD_INFO -}; - -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.CHANGED_PAGE_BITMAPS -@return 0 on success */ -static -int -fill_changed_page_bitmaps_table( -/*============================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - Field** fields = tables->table->field; - DBUG_ENTER("fill_changed_page_bitmaps"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL)) { - DBUG_RETURN(0); - } - OK(field_store_ulint(fields[0], 0)); - OK(schema_table_store_record(thd, tables->table)); - - DBUG_RETURN(0); -} - -/*******************************************************************//** -Flush support for changed_page_bitmaps table. -@return 0 on success */ -static -int -flush_changed_page_bitmaps() -/*========================*/ -{ - DBUG_ENTER("flush_changed_page_bitmaps"); - if (srv_track_changed_pages) { - os_event_reset(srv_checkpoint_completed_event); - log_online_follow_redo_log(); - } - DBUG_RETURN(0); -} - -/*******************************************************************//** -Bind the dynamic table INFORMATION_SCHEMA.CHANGED_PAGE_BITMAP -@return 0 on success */ -static -int -innodb_changed_page_bitmaps_init( -/*=============================*/ - void* p) /*!< in/out: table schema object */ -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("innodb_changed_page_bitmaps_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = innodb_changed_page_bitmaps_fields_info; - schema->fill_table = fill_changed_page_bitmaps_table; - schema->reset_table= flush_changed_page_bitmaps; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_innodb_changed_page_bitmaps = -{ - /* the plugin type (a MYSQL_XXX_PLUGIN value) */ - /* int */ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - - /* pointer to type-specific plugin descriptor */ - /* void* */ - STRUCT_FLD(info, &i_s_info), - - /* plugin name */ - /* const char* */ - STRUCT_FLD(name, "CHANGED_PAGE_BITMAPS"), - - /* plugin author (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(author, maria_plugin_author), - - /* general descriptive text (for SHOW PLUGINS) */ - /* const char* */ - STRUCT_FLD(descr, "XtraDB dummy changed_page_bitmaps table"), - - /* the plugin license (PLUGIN_LICENSE_XXX) */ - /* int */ - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - - /* the function to invoke when plugin is loaded */ - /* int (*)(void*); */ - STRUCT_FLD(init, innodb_changed_page_bitmaps_init), - - /* the function to invoke when plugin is unloaded */ - /* int (*)(void*); */ - STRUCT_FLD(deinit, i_s_common_deinit), - - /* plugin version (for SHOW PLUGINS) */ - /* unsigned int */ - STRUCT_FLD(version, INNODB_VERSION_SHORT), - - /* struct st_mysql_show_var* */ - STRUCT_FLD(status_vars, NULL), - - /* struct st_mysql_sys_var** */ - STRUCT_FLD(system_vars, NULL), - - /* Maria extension */ - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - diff --git a/storage/xtradb/handler/i_s.h b/storage/xtradb/handler/i_s.h deleted file mode 100644 index 4bb3ea33462..00000000000 --- a/storage/xtradb/handler/i_s.h +++ /dev/null @@ -1,159 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyrigth (c) 2014, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file handler/i_s.h -InnoDB INFORMATION SCHEMA tables interface to MySQL. - -Created July 18, 2007 Vasil Dimov -Modified Dec 29, 2014 Jan Lindström -*******************************************************/ - -#ifndef i_s_h -#define i_s_h - -const char plugin_author[] = "Oracle Corporation"; -const char maria_plugin_author[] = "MariaDB Corporation"; - -#define st_mysql_plugin st_maria_plugin - -extern struct st_maria_plugin i_s_innodb_trx; -extern struct st_mysql_plugin i_s_innodb_trx; -extern struct st_mysql_plugin i_s_innodb_locks; -extern struct st_mysql_plugin i_s_innodb_lock_waits; -extern struct st_mysql_plugin i_s_innodb_cmp; -extern struct st_mysql_plugin i_s_innodb_cmp_reset; -extern struct st_mysql_plugin i_s_innodb_cmp_per_index; -extern struct st_mysql_plugin i_s_innodb_cmp_per_index_reset; -extern struct st_mysql_plugin i_s_innodb_cmpmem; -extern struct st_mysql_plugin i_s_innodb_cmpmem_reset; -extern struct st_mysql_plugin i_s_innodb_metrics; -extern struct st_mysql_plugin i_s_innodb_ft_default_stopword; -extern struct st_mysql_plugin i_s_innodb_ft_deleted; -extern struct st_mysql_plugin i_s_innodb_ft_being_deleted; -extern struct st_mysql_plugin i_s_innodb_ft_index_cache; -extern struct st_mysql_plugin i_s_innodb_ft_index_table; -extern struct st_mysql_plugin i_s_innodb_ft_config; -extern struct st_mysql_plugin i_s_innodb_buffer_page; -extern struct st_mysql_plugin i_s_innodb_buffer_page_lru; -extern struct st_mysql_plugin i_s_innodb_buffer_stats; -extern struct st_mysql_plugin i_s_innodb_sys_tables; -extern struct st_mysql_plugin i_s_innodb_sys_tablestats; -extern struct st_mysql_plugin i_s_innodb_sys_indexes; -extern struct st_mysql_plugin i_s_innodb_sys_columns; -extern struct st_mysql_plugin i_s_innodb_sys_fields; -extern struct st_mysql_plugin i_s_innodb_sys_foreign; -extern struct st_mysql_plugin i_s_innodb_sys_foreign_cols; -extern struct st_mysql_plugin i_s_innodb_sys_tablespaces; -extern struct st_mysql_plugin i_s_innodb_sys_datafiles; -extern struct st_mysql_plugin i_s_innodb_changed_pages; -extern struct st_mysql_plugin i_s_innodb_mutexes; -extern struct st_maria_plugin i_s_innodb_tablespaces_encryption; -extern struct st_maria_plugin i_s_innodb_tablespaces_scrubbing; -extern struct st_mysql_plugin i_s_innodb_sys_semaphore_waits; -extern struct st_mysql_plugin i_s_innodb_changed_page_bitmaps; - -/** maximum number of buffer page info we would cache. */ -#define MAX_BUF_INFO_CACHED 10000 - -#define OK(expr) \ - if ((expr) != 0) { \ - DBUG_RETURN(1); \ - } - -#define BREAK_IF(expr) if ((expr)) break - -#define RETURN_IF_INNODB_NOT_STARTED(plugin_name) \ -do { \ - if (!srv_was_started) { \ - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, \ - ER_CANT_FIND_SYSTEM_REC, \ - "InnoDB: SELECTing from " \ - "INFORMATION_SCHEMA.%s but " \ - "the InnoDB storage engine " \ - "is not installed", plugin_name); \ - DBUG_RETURN(0); \ - } \ -} while (0) - -#if !defined __STRICT_ANSI__ && defined __GNUC__ && (__GNUC__) > 2 && !defined __INTEL_COMPILER && !defined __clang__ -#ifdef HAVE_C99_INITIALIZERS -#define STRUCT_FLD(name, value) .name = value -#else -#define STRUCT_FLD(name, value) name: value -#endif /* HAVE_C99_INITIALIZERS */ -#else -#define STRUCT_FLD(name, value) value -#endif - -/* Don't use a static const variable here, as some C++ compilers (notably -HPUX aCC: HP ANSI C++ B3910B A.03.65) can't handle it. */ -#define END_OF_ST_FIELD_INFO \ - {STRUCT_FLD(field_name, NULL), \ - STRUCT_FLD(field_length, 0), \ - STRUCT_FLD(field_type, MYSQL_TYPE_NULL), \ - STRUCT_FLD(value, 0), \ - STRUCT_FLD(field_flags, 0), \ - STRUCT_FLD(old_name, ""), \ - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)} - -/** Fields on INFORMATION_SCHEMA.SYS_SEMAMPHORE_WAITS table */ -#define SYS_SEMAPHORE_WAITS_THREAD_ID 0 -#define SYS_SEMAPHORE_WAITS_OBJECT_NAME 1 -#define SYS_SEMAPHORE_WAITS_FILE 2 -#define SYS_SEMAPHORE_WAITS_LINE 3 -#define SYS_SEMAPHORE_WAITS_WAIT_TIME 4 -#define SYS_SEMAPHORE_WAITS_WAIT_OBJECT 5 -#define SYS_SEMAPHORE_WAITS_WAIT_TYPE 6 -#define SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID 7 -#define SYS_SEMAPHORE_WAITS_HOLDER_FILE 8 -#define SYS_SEMAPHORE_WAITS_HOLDER_LINE 9 -#define SYS_SEMAPHORE_WAITS_CREATED_FILE 10 -#define SYS_SEMAPHORE_WAITS_CREATED_LINE 11 -#define SYS_SEMAPHORE_WAITS_WRITER_THREAD 12 -#define SYS_SEMAPHORE_WAITS_RESERVATION_MODE 13 -#define SYS_SEMAPHORE_WAITS_READERS 14 -#define SYS_SEMAPHORE_WAITS_WAITERS_FLAG 15 -#define SYS_SEMAPHORE_WAITS_LOCK_WORD 16 -#define SYS_SEMAPHORE_WAITS_LAST_READER_FILE 17 -#define SYS_SEMAPHORE_WAITS_LAST_READER_LINE 18 -#define SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 19 -#define SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 20 -#define SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 21 - -/*******************************************************************//** -Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field. -If the value is ULINT_UNDEFINED then the field it set to NULL. -@return 0 on success */ -int -field_store_ulint( -/*==============*/ - Field* field, /*!< in/out: target field for storage */ - ulint n); /*!< in: value to store */ - -/*******************************************************************//** -Auxiliary function to store char* value in MYSQL_TYPE_STRING field. -@return 0 on success */ -int -field_store_string( -/*===============*/ - Field* field, /*!< in/out: target field for storage */ - const char* str); /*!< in: NUL-terminated utf-8 string, - or NULL */ -#endif /* i_s_h */ diff --git a/storage/xtradb/handler/xtradb_i_s.cc b/storage/xtradb/handler/xtradb_i_s.cc deleted file mode 100644 index eaf7da733bf..00000000000 --- a/storage/xtradb/handler/xtradb_i_s.cc +++ /dev/null @@ -1,544 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2010-2012, Percona Inc. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -#include "univ.i" -#include <mysqld_error.h> -#include <sql_acl.h> // PROCESS_ACL - -#include <m_ctype.h> -#include <hash.h> -#include <myisampack.h> -#include <mysys_err.h> -#include <my_sys.h> -#include "i_s.h" -#include <sql_plugin.h> -#include <innodb_priv.h> - -#include <read0i_s.h> -#include <trx0i_s.h> -#include "srv0start.h" /* for srv_was_started */ -#include <btr0pcur.h> /* btr_pcur_t */ -#include <btr0sea.h> /* btr_search_sys */ -#include <log0recv.h> /* recv_sys */ -#include <fil0fil.h> - -/* for XTRADB_RSEG table */ -#include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */ -#include "trx0rseg.h" /* for trx_rseg_struct */ -#include "trx0sys.h" /* for trx_sys */ - -#define PLUGIN_AUTHOR "Percona Inc." - -static int field_store_blob(Field*, const char*, uint) __attribute__((unused)); -/** Auxiliary function to store (char*, len) value in MYSQL_TYPE_BLOB -field. -@return 0 on success */ -static -int -field_store_blob( - Field* field, /*!< in/out: target field for storage */ - const char* data, /*!< in: pointer to data, or NULL */ - uint data_len) /*!< in: data length */ -{ - int ret; - - if (data != NULL) { - ret = field->store(data, data_len, system_charset_info); - field->set_notnull(); - } else { - ret = 0; /* success */ - field->set_null(); - } - - return(ret); -} - -static -int -i_s_common_deinit( -/*==============*/ - void* p) /*!< in/out: table schema object */ -{ - DBUG_ENTER("i_s_common_deinit"); - - /* Do nothing */ - - DBUG_RETURN(0); -} - -static ST_FIELD_INFO xtradb_read_view_fields_info[] = -{ -#define READ_VIEW_UNDO_NUMBER 0 - {STRUCT_FLD(field_name, "READ_VIEW_UNDO_NUMBER"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define READ_VIEW_LOW_LIMIT_NUMBER 1 - {STRUCT_FLD(field_name, "READ_VIEW_LOW_LIMIT_TRX_NUMBER"), - STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define READ_VIEW_UPPER_LIMIT_ID 2 - {STRUCT_FLD(field_name, "READ_VIEW_UPPER_LIMIT_TRX_ID"), - STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define READ_VIEW_LOW_LIMIT_ID 3 - {STRUCT_FLD(field_name, "READ_VIEW_LOW_LIMIT_TRX_ID"), - - STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -static int xtradb_read_view_fill_table(THD* thd, TABLE_LIST* tables, Item*) -{ - const char* table_name; - Field** fields; - TABLE* table; - char trx_id[TRX_ID_MAX_LEN + 1]; - - - DBUG_ENTER("xtradb_read_view_fill_table"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL)) { - - DBUG_RETURN(0); - } - - table_name = tables->schema_table_name; - table = tables->table; - fields = table->field; - - RETURN_IF_INNODB_NOT_STARTED(table_name); - - i_s_xtradb_read_view_t read_view; - - if (read_fill_i_s_xtradb_read_view(&read_view) == NULL) - DBUG_RETURN(0); - - OK(field_store_ulint(fields[READ_VIEW_UNDO_NUMBER], read_view.undo_no)); - - ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, read_view.low_limit_no); - OK(field_store_string(fields[READ_VIEW_LOW_LIMIT_NUMBER], trx_id)); - - ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, read_view.up_limit_id); - OK(field_store_string(fields[READ_VIEW_UPPER_LIMIT_ID], trx_id)); - - ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, read_view.low_limit_id); - OK(field_store_string(fields[READ_VIEW_LOW_LIMIT_ID], trx_id)); - - OK(schema_table_store_record(thd, table)); - - DBUG_RETURN(0); -} - - -static int xtradb_read_view_init(void* p) -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("xtradb_read_view_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = xtradb_read_view_fields_info; - schema->fill_table = xtradb_read_view_fill_table; - - DBUG_RETURN(0); -} - -static struct st_mysql_information_schema i_s_info = -{ - MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION -}; - -UNIV_INTERN struct st_mysql_plugin i_s_xtradb_read_view = -{ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - STRUCT_FLD(info, &i_s_info), - STRUCT_FLD(name, "XTRADB_READ_VIEW"), - STRUCT_FLD(author, PLUGIN_AUTHOR), - STRUCT_FLD(descr, "InnoDB Read View information"), - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - STRUCT_FLD(init, xtradb_read_view_init), - STRUCT_FLD(deinit, i_s_common_deinit), - STRUCT_FLD(version, INNODB_VERSION_SHORT), - STRUCT_FLD(status_vars, NULL), - STRUCT_FLD(system_vars, NULL), - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - -static ST_FIELD_INFO xtradb_internal_hash_tables_fields_info[] = -{ -#define INT_HASH_TABLES_NAME 0 - {STRUCT_FLD(field_name, "INTERNAL_HASH_TABLE_NAME"), - STRUCT_FLD(field_length, 100), - STRUCT_FLD(field_type, MYSQL_TYPE_STRING), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, 0), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define INT_HASH_TABLES_TOTAL 1 - {STRUCT_FLD(field_name, "TOTAL_MEMORY"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define INT_HASH_TABLES_CONSTANT 2 - {STRUCT_FLD(field_name, "CONSTANT_MEMORY"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - -#define INT_HASH_TABLES_VARIABLE 3 - {STRUCT_FLD(field_name, "VARIABLE_MEMORY"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -static int xtradb_internal_hash_tables_fill_table(THD* thd, TABLE_LIST* tables, Item*) -{ - const char* table_name; - Field** fields; - TABLE* table; - ulong btr_search_sys_constant; - ulong btr_search_sys_variable; - - DBUG_ENTER("xtradb_internal_hash_tables_fill_table"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL)) { - - DBUG_RETURN(0); - } - - table_name = tables->schema_table_name; - table = tables->table; - fields = table->field; - - RETURN_IF_INNODB_NOT_STARTED(table_name); - - /* Calculate AHI constant and variable memory allocations */ - - btr_search_sys_constant = 0; - btr_search_sys_variable = 0; - - ut_ad(btr_search_sys->hash_tables); - - for (ulint i = 0; i < btr_search_index_num; i++) { - hash_table_t* ht = btr_search_sys->hash_tables[i]; - - ut_ad(ht); - ut_ad(ht->heap); - - /* Multiple mutexes/heaps are currently never used for adaptive - hash index tables. */ - ut_ad(!ht->n_sync_obj); - ut_ad(!ht->heaps); - - btr_search_sys_variable += mem_heap_get_size(ht->heap); - btr_search_sys_constant += ht->n_cells * sizeof(hash_cell_t); - } - - OK(field_store_string(fields[INT_HASH_TABLES_NAME], - "Adaptive hash index")); - OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL], - btr_search_sys_variable + btr_search_sys_constant)); - OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT], - btr_search_sys_constant)); - OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE], - btr_search_sys_variable)); - OK(schema_table_store_record(thd, table)); - - { - OK(field_store_string(fields[INT_HASH_TABLES_NAME], - "Page hash (buffer pool 0 only)")); - OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL], - (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)))); - OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT], - (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)))); - OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE], 0)); - OK(schema_table_store_record(thd, table)); - - } - - if (dict_sys) - { - OK(field_store_string(fields[INT_HASH_TABLES_NAME], - "Dictionary Cache")); - OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL], - ((dict_sys->table_hash->n_cells - + dict_sys->table_id_hash->n_cells - ) * sizeof(hash_cell_t) - + dict_sys->size))); - OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT], - ((dict_sys->table_hash->n_cells - + dict_sys->table_id_hash->n_cells - ) * sizeof(hash_cell_t)))); - OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE], - dict_sys->size)); - OK(schema_table_store_record(thd, table)); - } - - { - OK(field_store_string(fields[INT_HASH_TABLES_NAME], - "File system")); - OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL], - (ulong) (fil_system_hash_cells() - * sizeof(hash_cell_t) - + fil_system_hash_nodes()))); - OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT], - (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)))); - OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE], - (ulong) fil_system_hash_nodes())); - OK(schema_table_store_record(thd, table)); - - } - - { - ulint lock_sys_constant, lock_sys_variable; - - trx_i_s_get_lock_sys_memory_usage(&lock_sys_constant, - &lock_sys_variable); - - OK(field_store_string(fields[INT_HASH_TABLES_NAME], "Lock System")); - OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL], - lock_sys_constant + lock_sys_variable)); - OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT], - lock_sys_constant)); - OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE], - lock_sys_variable)); - OK(schema_table_store_record(thd, table)); - } - - if (recv_sys) - { - ulint recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash) - ? mem_heap_get_size(recv_sys->heap) : 0); - - OK(field_store_string(fields[INT_HASH_TABLES_NAME], "Recovery System")); - OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL], - ((recv_sys->addr_hash) ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0) + recv_sys_subtotal)); - OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT], - ((recv_sys->addr_hash) ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0))); - OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE], - recv_sys_subtotal)); - OK(schema_table_store_record(thd, table)); - } - - DBUG_RETURN(0); -} - -static int xtradb_internal_hash_tables_init(void* p) -{ - ST_SCHEMA_TABLE* schema; - - DBUG_ENTER("xtradb_internal_hash_tables_init"); - - schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = xtradb_internal_hash_tables_fields_info; - schema->fill_table = xtradb_internal_hash_tables_fill_table; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_xtradb_internal_hash_tables = -{ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - STRUCT_FLD(info, &i_s_info), - STRUCT_FLD(name, "XTRADB_INTERNAL_HASH_TABLES"), - STRUCT_FLD(author, PLUGIN_AUTHOR), - STRUCT_FLD(descr, "InnoDB internal hash tables information"), - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - STRUCT_FLD(init, xtradb_internal_hash_tables_init), - STRUCT_FLD(deinit, i_s_common_deinit), - STRUCT_FLD(version, INNODB_VERSION_SHORT), - STRUCT_FLD(status_vars, NULL), - STRUCT_FLD(system_vars, NULL), - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; - - -/*********************************************************************** -*/ -static ST_FIELD_INFO i_s_xtradb_rseg_fields_info[] = -{ - {STRUCT_FLD(field_name, "rseg_id"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "space_id"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "zip_size"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "page_no"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "max_size"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - {STRUCT_FLD(field_name, "curr_size"), - STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS), - STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG), - STRUCT_FLD(value, 0), - STRUCT_FLD(field_flags, MY_I_S_UNSIGNED), - STRUCT_FLD(old_name, ""), - STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}, - - END_OF_ST_FIELD_INFO -}; - -static -int -i_s_xtradb_rseg_fill( -/*=================*/ - THD* thd, /* in: thread */ - TABLE_LIST* tables, /* in/out: tables to fill */ - Item* ) /* in: condition (ignored) */ -{ - TABLE* table = (TABLE *) tables->table; - int status = 0; - trx_rseg_t* rseg; - - DBUG_ENTER("i_s_xtradb_rseg_fill"); - - /* deny access to non-superusers */ - if (check_global_access(thd, PROCESS_ACL)) { - - DBUG_RETURN(0); - } - - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - for(int i=0; i < TRX_SYS_N_RSEGS; i++) - { - rseg = trx_sys->rseg_array[i]; - if (!rseg) - continue; - - table->field[0]->store(rseg->id); - table->field[1]->store(rseg->space); - table->field[2]->store(rseg->zip_size); - table->field[3]->store(rseg->page_no); - table->field[4]->store(rseg->max_size); - table->field[5]->store(rseg->curr_size); - - if (schema_table_store_record(thd, table)) { - status = 1; - break; - } - } - - DBUG_RETURN(status); -} - -static -int -i_s_xtradb_rseg_init( -/*=================*/ - /* out: 0 on success */ - void* p) /* in/out: table schema object */ -{ - DBUG_ENTER("i_s_xtradb_rseg_init"); - ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p; - - schema->fields_info = i_s_xtradb_rseg_fields_info; - schema->fill_table = i_s_xtradb_rseg_fill; - - DBUG_RETURN(0); -} - -UNIV_INTERN struct st_mysql_plugin i_s_xtradb_rseg = -{ - STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN), - STRUCT_FLD(info, &i_s_info), - STRUCT_FLD(name, "XTRADB_RSEG"), - STRUCT_FLD(author, PLUGIN_AUTHOR), - STRUCT_FLD(descr, "InnoDB rollback segment information"), - STRUCT_FLD(license, PLUGIN_LICENSE_GPL), - STRUCT_FLD(init, i_s_xtradb_rseg_init), - STRUCT_FLD(deinit, i_s_common_deinit), - STRUCT_FLD(version, INNODB_VERSION_SHORT), - STRUCT_FLD(status_vars, NULL), - STRUCT_FLD(system_vars, NULL), - STRUCT_FLD(version_info, INNODB_VERSION_STR), - STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE), -}; diff --git a/storage/xtradb/handler/xtradb_i_s.h b/storage/xtradb/handler/xtradb_i_s.h deleted file mode 100644 index 994bc11c1b8..00000000000 --- a/storage/xtradb/handler/xtradb_i_s.h +++ /dev/null @@ -1,27 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2010-2012, Percona Inc. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -#ifndef XTRADB_I_S_H -#define XTRADB_I_S_H - -extern struct st_mysql_plugin i_s_xtradb_read_view; -extern struct st_mysql_plugin i_s_xtradb_internal_hash_tables; -extern struct st_mysql_plugin i_s_xtradb_rseg; - -#endif /* XTRADB_I_S_H */ diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc deleted file mode 100644 index 0445bb557e1..00000000000 --- a/storage/xtradb/ibuf/ibuf0ibuf.cc +++ /dev/null @@ -1,5276 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file ibuf/ibuf0ibuf.cc -Insert buffer - -Created 7/19/1997 Heikki Tuuri -*******************************************************/ - -#include "ibuf0ibuf.h" - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -UNIV_INTERN my_bool srv_ibuf_disable_background_merge; -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - -/** Number of bits describing a single page */ -#define IBUF_BITS_PER_PAGE 4 -#if IBUF_BITS_PER_PAGE % 2 -# error "IBUF_BITS_PER_PAGE must be an even number!" -#endif -/** The start address for an insert buffer bitmap page bitmap */ -#define IBUF_BITMAP PAGE_DATA - -#ifdef UNIV_NONINL -#include "ibuf0ibuf.ic" -#endif - -#ifndef UNIV_HOTBACKUP - -#include "buf0buf.h" -#include "buf0rea.h" -#include "fsp0fsp.h" -#include "trx0sys.h" -#include "fil0fil.h" -#include "rem0rec.h" -#include "btr0cur.h" -#include "btr0pcur.h" -#include "btr0btr.h" -#include "btr0sea.h" -#include "row0upd.h" -#include "sync0sync.h" -#include "dict0boot.h" -#include "fut0lst.h" -#include "lock0lock.h" -#include "log0recv.h" -#include "que0que.h" -#include "srv0start.h" /* srv_shutdown_state */ -#include "ha_prototypes.h" -#include "rem0cmp.h" - -/* STRUCTURE OF AN INSERT BUFFER RECORD - -In versions < 4.1.x: - -1. The first field is the page number. -2. The second field is an array which stores type info for each subsequent - field. We store the information which affects the ordering of records, and - also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it - is 10 bytes. -3. Next we have the fields of the actual index record. - -In versions >= 4.1.x: - -Note that contary to what we planned in the 1990's, there will only be one -insert buffer tree, and that is in the system tablespace of InnoDB. - -1. The first field is the space id. -2. The second field is a one-byte marker (0) which differentiates records from - the < 4.1.x storage format. -3. The third field is the page number. -4. The fourth field contains the type info, where we have also added 2 bytes to - store the charset. In the compressed table format of 5.0.x we must add more - information here so that we can build a dummy 'index' struct which 5.0.x - can use in the binary search on the index page in the ibuf merge phase. -5. The rest of the fields contain the fields of the actual index record. - -In versions >= 5.0.3: - -The first byte of the fourth field is an additional marker (0) if the record -is in the compact format. The presence of this marker can be detected by -looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE. - -The high-order bit of the character set field in the type info is the -"nullable" flag for the field. - -In versions >= 5.5: - -The optional marker byte at the start of the fourth field is replaced by -mandatory 3 fields, totaling 4 bytes: - - 1. 2 bytes: Counter field, used to sort records within a (space id, page - no) in the order they were added. This is needed so that for example the - sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled - correctly. - - 2. 1 byte: Operation type (see ibuf_op_t). - - 3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT. - -To ensure older records, which do not have counters to enforce correct -sorting, are merged before any new records, ibuf_insert checks if we're -trying to insert to a position that contains old-style records, and if so, -refuses the insert. Thus, ibuf pages are gradually converted to the new -format as their corresponding buffer pool pages are read into memory. -*/ - - -/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM - -If an OS thread performs any operation that brings in disk pages from -non-system tablespaces into the buffer pool, or creates such a page there, -then the operation may have as a side effect an insert buffer index tree -compression. Thus, the tree latch of the insert buffer tree may be acquired -in the x-mode, and also the file space latch of the system tablespace may -be acquired in the x-mode. - -Also, an insert to an index in a non-system tablespace can have the same -effect. How do we know this cannot lead to a deadlock of OS threads? There -is a problem with the i\o-handler threads: they break the latching order -because they own x-latches to pages which are on a lower level than the -insert buffer tree latch, its page latches, and the tablespace latch an -insert buffer operation can reserve. - -The solution is the following: Let all the tree and page latches connected -with the insert buffer be later in the latching order than the fsp latch and -fsp page latches. - -Insert buffer pages must be such that the insert buffer is never invoked -when these pages are accessed as this would result in a recursion violating -the latching order. We let a special i/o-handler thread take care of i/o to -the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap -pages and the first inode page, which contains the inode of the ibuf tree: let -us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead -access both non-ibuf and ibuf pages. - -Then an i/o-handler for the insert buffer never needs to access recursively the -insert buffer tree and thus obeys the latching order. On the other hand, other -i/o-handlers for other tablespaces may require access to the insert buffer, -but because all kinds of latches they need to access there are later in the -latching order, no violation of the latching order occurs in this case, -either. - -A problem is how to grow and contract an insert buffer tree. As it is later -in the latching order than the fsp management, we have to reserve the fsp -latch first, before adding or removing pages from the insert buffer tree. -We let the insert buffer tree have its own file space management: a free -list of pages linked to the tree root. To prevent recursive using of the -insert buffer when adding pages to the tree, we must first load these pages -to memory, obtaining a latch on them, and only after that add them to the -free list of the insert buffer tree. More difficult is removing of pages -from the free list. If there is an excess of pages in the free list of the -ibuf tree, they might be needed if some thread reserves the fsp latch, -intending to allocate more file space. So we do the following: if a thread -reserves the fsp latch, we check the writer count field of the latch. If -this field has value 1, it means that the thread did not own the latch -before entering the fsp system, and the mtr of the thread contains no -modifications to the fsp pages. Now we are free to reserve the ibuf latch, -and check if there is an excess of pages in the free list. We can then, in a -separate mini-transaction, take them out of the free list and free them to -the fsp system. - -To avoid deadlocks in the ibuf system, we divide file pages into three levels: - -(1) non-ibuf pages, -(2) ibuf tree pages and the pages in the ibuf tree free list, and -(3) ibuf bitmap pages. - -No OS thread is allowed to access higher level pages if it has latches to -lower level pages; even if the thread owns a B-tree latch it must not access -the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead -is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle -exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively -level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e., -it uses synchronous aio, it can access any pages, as long as it obeys the -access order rules. */ - -/** Table name for the insert buffer. */ -#define IBUF_TABLE_NAME "SYS_IBUF_TABLE" - -/** Operations that can currently be buffered. */ -UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL; - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -/** Flag to control insert buffer debugging. */ -UNIV_INTERN uint ibuf_debug; -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - -/** The insert buffer control structure */ -UNIV_INTERN ibuf_t* ibuf = NULL; - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; -UNIV_INTERN mysql_pfs_key_t ibuf_mutex_key; -UNIV_INTERN mysql_pfs_key_t ibuf_bitmap_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#ifdef UNIV_IBUF_COUNT_DEBUG -/** Number of tablespaces in the ibuf_counts array */ -#define IBUF_COUNT_N_SPACES 4 -/** Number of pages within each tablespace in the ibuf_counts array */ -#define IBUF_COUNT_N_PAGES 130000 - -/** Buffered entry counts for file pages, used in debugging */ -static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES]; - -/******************************************************************//** -Checks that the indexes to ibuf_counts[][] are within limits. */ -UNIV_INLINE -void -ibuf_count_check( -/*=============*/ - ulint space_id, /*!< in: space identifier */ - ulint page_no) /*!< in: page number */ -{ - if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) { - return; - } - - fprintf(stderr, - "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n" - "InnoDB: and breaks crash recovery.\n" - "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n" - "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n", - (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES, - (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES); - ut_error; -} -#endif - -/** @name Offsets to the per-page bits in the insert buffer bitmap */ -/* @{ */ -#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the - amount of free space */ -#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered - changes for the page */ -#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of - the ibuf tree, excluding the - root page, or is in the free - list of the ibuf */ -/* @} */ - -#define IBUF_REC_FIELD_SPACE 0 /*!< in the pre-4.1 format, - the page number. later, the space_id */ -#define IBUF_REC_FIELD_MARKER 1 /*!< starting with 4.1, a marker - consisting of 1 byte that is 0 */ -#define IBUF_REC_FIELD_PAGE 2 /*!< starting with 4.1, the - page number */ -#define IBUF_REC_FIELD_METADATA 3 /* the metadata field */ -#define IBUF_REC_FIELD_USER 4 /* first user field */ - -/* Various constants for checking the type of an ibuf record and extracting -data from it. For details, see the description of the record format at the -top of this file. */ - -/** @name Format of the IBUF_REC_FIELD_METADATA of an insert buffer record -The fourth column in the MySQL 5.5 format contains an operation -type, counter, and some flags. */ -/* @{ */ -#define IBUF_REC_INFO_SIZE 4 /*!< Combined size of info fields at - the beginning of the fourth field */ -#if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE -# error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" -#endif - -/* Offsets for the fields at the beginning of the fourth field */ -#define IBUF_REC_OFFSET_COUNTER 0 /*!< Operation counter */ -#define IBUF_REC_OFFSET_TYPE 2 /*!< Type of operation */ -#define IBUF_REC_OFFSET_FLAGS 3 /*!< Additional flags */ - -/* Record flag masks */ -#define IBUF_REC_COMPACT 0x1 /*!< Set in - IBUF_REC_OFFSET_FLAGS if the - user index is in COMPACT - format or later */ - - -/** The mutex used to block pessimistic inserts to ibuf trees */ -static ib_mutex_t ibuf_pessimistic_insert_mutex; - -/** The mutex protecting the insert buffer structs */ -static ib_mutex_t ibuf_mutex; - -/** The mutex protecting the insert buffer bitmaps */ -static ib_mutex_t ibuf_bitmap_mutex; - -/** The area in pages from which contract looks for page numbers for merge */ -#define IBUF_MERGE_AREA 8UL - -/** Inside the merge area, pages which have at most 1 per this number less -buffered entries compared to maximum volume that can buffered for a single -page are merged along with the page whose buffer became full */ -#define IBUF_MERGE_THRESHOLD 4 - -/** In ibuf_contract at most this number of pages is read to memory in one -batch, in order to merge the entries for them in the insert buffer */ -#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA - -/** If the combined size of the ibuf trees exceeds ibuf->max_size by this -many pages, we start to contract it in connection to inserts there, using -non-synchronous contract */ -#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0 - -/** If the combined size of the ibuf trees exceeds ibuf->max_size by this -many pages, we start to contract it in connection to inserts there, using -synchronous contract */ -#define IBUF_CONTRACT_ON_INSERT_SYNC 5 - -/** If the combined size of the ibuf trees exceeds ibuf->max_size by -this many pages, we start to contract it synchronous contract, but do -not insert */ -#define IBUF_CONTRACT_DO_NOT_INSERT 10 - -/* TODO: how to cope with drop table if there are records in the insert -buffer for the indexes of the table? Is there actually any problem, -because ibuf merge is done to a page when it is read in, and it is -still physically like the index page even if the index would have been -dropped! So, there seems to be no problem. */ - -/******************************************************************//** -Sets the flag in the current mini-transaction record indicating we're -inside an insert buffer routine. */ -UNIV_INLINE -void -ibuf_enter( -/*=======*/ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(!mtr->inside_ibuf); - mtr->inside_ibuf = TRUE; -} - -/******************************************************************//** -Sets the flag in the current mini-transaction record indicating we're -exiting an insert buffer routine. */ -UNIV_INLINE -void -ibuf_exit( -/*======*/ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(mtr->inside_ibuf); - mtr->inside_ibuf = FALSE; -} - -/**************************************************************//** -Commits an insert buffer mini-transaction and sets the persistent -cursor latch mode to BTR_NO_LATCHES, that is, detaches the cursor. */ -UNIV_INLINE -void -ibuf_btr_pcur_commit_specify_mtr( -/*=============================*/ - btr_pcur_t* pcur, /*!< in/out: persistent cursor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_d(ibuf_exit(mtr)); - btr_pcur_commit_specify_mtr(pcur, mtr); -} - -/******************************************************************//** -Gets the ibuf header page and x-latches it. -@return insert buffer header page */ -static -page_t* -ibuf_header_page_get( -/*=================*/ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - buf_block_t* block; - - ut_ad(!ibuf_inside(mtr)); - page_t* page = NULL; - - block = buf_page_get( - IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr); - - if (!block->page.encrypted) { - buf_block_dbg_add_level(block, SYNC_IBUF_HEADER); - - page = buf_block_get_frame(block); - } - - return page; -} - -/******************************************************************//** -Gets the root page and x-latches it. -@return insert buffer tree root page */ -static -page_t* -ibuf_tree_root_get( -/*===============*/ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block; - page_t* root; - - ut_ad(ibuf_inside(mtr)); - ut_ad(mutex_own(&ibuf_mutex)); - - mtr_x_lock(dict_index_get_lock(ibuf->index), mtr); - - block = buf_page_get( - IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr); - - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); - - root = buf_block_get_frame(block); - - ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); - ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO); - ut_ad(ibuf->empty == page_is_empty(root)); - - return(root); -} - -#ifdef UNIV_IBUF_COUNT_DEBUG -/******************************************************************//** -Gets the ibuf count for a given page. -@return number of entries in the insert buffer currently buffered for -this page */ -UNIV_INTERN -ulint -ibuf_count_get( -/*===========*/ - ulint space, /*!< in: space id */ - ulint page_no)/*!< in: page number */ -{ - ibuf_count_check(space, page_no); - - return(ibuf_counts[space][page_no]); -} - -/******************************************************************//** -Sets the ibuf count for a given page. */ -static -void -ibuf_count_set( -/*===========*/ - ulint space, /*!< in: space id */ - ulint page_no,/*!< in: page number */ - ulint val) /*!< in: value to set */ -{ - ibuf_count_check(space, page_no); - ut_a(val < UNIV_PAGE_SIZE); - - ibuf_counts[space][page_no] = val; -} -#endif - -/******************************************************************//** -Closes insert buffer and frees the data structures. */ -UNIV_INTERN -void -ibuf_close(void) -/*============*/ -{ - mutex_free(&ibuf_pessimistic_insert_mutex); - memset(&ibuf_pessimistic_insert_mutex, - 0x0, sizeof(ibuf_pessimistic_insert_mutex)); - - mutex_free(&ibuf_mutex); - memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex)); - - mutex_free(&ibuf_bitmap_mutex); - memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex)); - - mem_free(ibuf); - ibuf = NULL; -} - -/******************************************************************//** -Function to pass ibuf status variables */ -UNIV_INTERN -void -ibuf_export_ibuf_status( -/*====================*/ - ulint* size, - ulint* free_list, - ulint* segment_size, - ulint* merges, - ulint* merged_inserts, - ulint* merged_delete_marks, - ulint* merged_deletes, - ulint* discarded_inserts, - ulint* discarded_delete_marks, - ulint* discarded_deletes) -{ - *size - = ibuf->size; - *free_list - = ibuf->free_list_len; - *segment_size - = ibuf->seg_size; - *merges - = ibuf->n_merges; - *merged_inserts - = ibuf->n_merged_ops[IBUF_OP_INSERT]; - *merged_delete_marks - = ibuf->n_merged_ops[IBUF_OP_DELETE_MARK]; - *merged_deletes - = ibuf->n_merged_ops[IBUF_OP_DELETE]; - *discarded_inserts - = ibuf->n_discarded_ops[IBUF_OP_INSERT]; - *discarded_delete_marks - = ibuf->n_discarded_ops[IBUF_OP_DELETE_MARK]; - *discarded_deletes - = ibuf->n_discarded_ops[IBUF_OP_DELETE]; -} - -/******************************************************************//** -Updates the size information of the ibuf, assuming the segment size has not -changed. */ -static -void -ibuf_size_update( -/*=============*/ - const page_t* root, /*!< in: ibuf tree root */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(mutex_own(&ibuf_mutex)); - - ibuf->free_list_len = flst_get_len(root + PAGE_HEADER - + PAGE_BTR_IBUF_FREE_LIST, mtr); - - ibuf->height = 1 + btr_page_get_level(root, mtr); - - /* the '1 +' is the ibuf header page */ - ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len); -} - -/******************************************************************//** -Creates the insert buffer data structure at a database startup and initializes -the data structures for the insert buffer. -@return DB_SUCCESS or failure */ -UNIV_INTERN -dberr_t -ibuf_init_at_db_start(void) -/*=======================*/ -{ - page_t* root; - mtr_t mtr; - dict_table_t* table; - mem_heap_t* heap; - dict_index_t* index; - ulint n_used; - page_t* header_page; - dberr_t error= DB_SUCCESS; - - ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t))); - - /* At startup we intialize ibuf to have a maximum of - CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the - buffer pool size. Once ibuf struct is initialized this - value is updated with the user supplied size by calling - ibuf_max_size_update(). */ - ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE) - * CHANGE_BUFFER_DEFAULT_SIZE) / 100; - - mutex_create(ibuf_pessimistic_insert_mutex_key, - &ibuf_pessimistic_insert_mutex, - SYNC_IBUF_PESS_INSERT_MUTEX); - - mutex_create(ibuf_mutex_key, - &ibuf_mutex, SYNC_IBUF_MUTEX); - - mutex_create(ibuf_bitmap_mutex_key, - &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX); - - mtr_start(&mtr); - - mutex_enter(&ibuf_mutex); - - mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr); - - header_page = ibuf_header_page_get(&mtr); - - if (!header_page) { - return (DB_DECRYPTION_FAILED); - } - - fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, - &n_used, &mtr); - ibuf_enter(&mtr); - - ut_ad(n_used >= 2); - - ibuf->seg_size = n_used; - - { - buf_block_t* block; - - block = buf_page_get( - IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); - - root = buf_block_get_frame(block); - } - - ibuf_size_update(root, &mtr); - mutex_exit(&ibuf_mutex); - - ibuf->empty = page_is_empty(root); - ibuf_mtr_commit(&mtr); - - heap = mem_heap_create(450); - - /* Use old-style record format for the insert buffer. */ - table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0); - - dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0); - - table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID; - - dict_table_add_to_cache(table, FALSE, heap); - mem_heap_free(heap); - - index = dict_mem_index_create( - IBUF_TABLE_NAME, "CLUST_IND", - IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1); - - dict_mem_index_add_field(index, "DUMMY_COLUMN", 0); - - index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID; - btr_search_index_init(index); - - error = dict_index_add_to_cache(table, index, - FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE); - ut_a(error == DB_SUCCESS); - - ibuf->index = dict_table_get_first_index(table); - return (error); -} - -/*********************************************************************//** -Updates the max_size value for ibuf. */ -UNIV_INTERN -void -ibuf_max_size_update( -/*=================*/ - ulint new_val) /*!< in: new value in terms of - percentage of the buffer pool size */ -{ - ulint new_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE) - * new_val) / 100; - mutex_enter(&ibuf_mutex); - ibuf->max_size = new_size; - mutex_exit(&ibuf_mutex); -} - - -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************************//** -Initializes an ibuf bitmap page. */ -UNIV_INTERN -void -ibuf_bitmap_page_init( -/*==================*/ - buf_block_t* block, /*!< in: bitmap page */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* page; - ulint byte_offset; - ulint zip_size = buf_block_get_zip_size(block); - - ut_a(ut_is_2pow(zip_size)); - - page = buf_block_get_frame(block); - fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP); - - /* Write all zeros to the bitmap */ - - if (!zip_size) { - byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE - * IBUF_BITS_PER_PAGE); - } else { - byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE); - } - - memset(page + IBUF_BITMAP, 0, byte_offset); - - /* The remaining area (up to the page trailer) is uninitialized. */ - -#ifndef UNIV_HOTBACKUP - mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr); -#endif /* !UNIV_HOTBACKUP */ -} - -/*********************************************************************//** -Parses a redo log record of an ibuf bitmap page init. -@return end of log record or NULL */ -UNIV_INTERN -byte* -ibuf_parse_bitmap_init( -/*===================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */ - buf_block_t* block, /*!< in: block or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ut_ad(ptr && end_ptr); - - if (block) { - ibuf_bitmap_page_init(block, mtr); - } - - return(ptr); -} -#ifndef UNIV_HOTBACKUP -# ifdef UNIV_DEBUG -/** Gets the desired bits for a given page from a bitmap page. -@param page in: bitmap page -@param offset in: page whose bits to get -@param zs in: compressed page size in bytes; 0 for uncompressed pages -@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... -@param mtr in: mini-transaction holding an x-latch on the bitmap page -@return value of bits */ -# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \ - ibuf_bitmap_page_get_bits_low(page, offset, zs, \ - MTR_MEMO_PAGE_X_FIX, mtr, bit) -# else /* UNIV_DEBUG */ -/** Gets the desired bits for a given page from a bitmap page. -@param page in: bitmap page -@param offset in: page whose bits to get -@param zs in: compressed page size in bytes; 0 for uncompressed pages -@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... -@param mtr in: mini-transaction holding an x-latch on the bitmap page -@return value of bits */ -# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \ - ibuf_bitmap_page_get_bits_low(page, offset, zs, bit) -# endif /* UNIV_DEBUG */ - -/********************************************************************//** -Gets the desired bits for a given page from a bitmap page. -@return value of bits */ -UNIV_INLINE -ulint -ibuf_bitmap_page_get_bits_low( -/*==========================*/ - const page_t* page, /*!< in: bitmap page */ - ulint page_no,/*!< in: page whose bits to get */ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ -#ifdef UNIV_DEBUG - ulint latch_type, - /*!< in: MTR_MEMO_PAGE_X_FIX, - MTR_MEMO_BUF_FIX, ... */ - mtr_t* mtr, /*!< in: mini-transaction holding latch_type - on the bitmap page */ -#endif /* UNIV_DEBUG */ - ulint bit) /*!< in: IBUF_BITMAP_FREE, - IBUF_BITMAP_BUFFERED, ... */ -{ - ulint byte_offset; - ulint bit_offset; - ulint map_byte; - ulint value; - - ut_ad(bit < IBUF_BITS_PER_PAGE); -#if IBUF_BITS_PER_PAGE % 2 -# error "IBUF_BITS_PER_PAGE % 2 != 0" -#endif - ut_ad(ut_is_2pow(zip_size)); - ut_ad(mtr_memo_contains_page(mtr, page, latch_type)); - - if (!zip_size) { - bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE - + bit; - } else { - bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE - + bit; - } - - byte_offset = bit_offset / 8; - bit_offset = bit_offset % 8; - - ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE); - - map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset); - - value = ut_bit_get_nth(map_byte, bit_offset); - - if (bit == IBUF_BITMAP_FREE) { - ut_ad(bit_offset + 1 < 8); - - value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1); - } - - return(value); -} - -/********************************************************************//** -Sets the desired bit for a given page in a bitmap page. */ -static -void -ibuf_bitmap_page_set_bits( -/*======================*/ - page_t* page, /*!< in: bitmap page */ - ulint page_no,/*!< in: page whose bits to set */ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */ - ulint val, /*!< in: value to set */ - mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */ -{ - ulint byte_offset; - ulint bit_offset; - ulint map_byte; - - ut_ad(bit < IBUF_BITS_PER_PAGE); -#if IBUF_BITS_PER_PAGE % 2 -# error "IBUF_BITS_PER_PAGE % 2 != 0" -#endif - ut_ad(ut_is_2pow(zip_size)); - ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)); -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE) - || (0 == ibuf_count_get(page_get_space_id(page), - page_no))); -#endif - if (!zip_size) { - bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE - + bit; - } else { - bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE - + bit; - } - - byte_offset = bit_offset / 8; - bit_offset = bit_offset % 8; - - ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE); - - map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset); - - if (bit == IBUF_BITMAP_FREE) { - ut_ad(bit_offset + 1 < 8); - ut_ad(val <= 3); - - map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2); - map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2); - } else { - ut_ad(val <= 1); - map_byte = ut_bit_set_nth(map_byte, bit_offset, val); - } - - mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte, - MLOG_1BYTE, mtr); -} - -/********************************************************************//** -Calculates the bitmap page number for a given page number. -@return the bitmap page number where the file page is mapped */ -UNIV_INLINE -ulint -ibuf_bitmap_page_no_calc( -/*=====================*/ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint page_no) /*!< in: tablespace page number */ -{ - ut_ad(ut_is_2pow(zip_size)); - - if (!zip_size) { - return(FSP_IBUF_BITMAP_OFFSET - + (page_no & ~(UNIV_PAGE_SIZE - 1))); - } else { - return(FSP_IBUF_BITMAP_OFFSET - + (page_no & ~(zip_size - 1))); - } -} - -/********************************************************************//** -Gets the ibuf bitmap page where the bits describing a given file page are -stored. -@return bitmap page where the file page is mapped, that is, the bitmap -page containing the descriptor bits for the file page; the bitmap page -is x-latched */ -static -page_t* -ibuf_bitmap_get_map_page_func( -/*==========================*/ - ulint space, /*!< in: space id of the file page */ - ulint page_no,/*!< in: page number of the file page */ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block = NULL; - dberr_t err = DB_SUCCESS; - - block = buf_page_get_gen(space, zip_size, - ibuf_bitmap_page_no_calc(zip_size, page_no), - RW_X_LATCH, NULL, BUF_GET, - file, line, mtr, &err); - - if (err != DB_SUCCESS) { - return NULL; - } - - buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP); - - return(buf_block_get_frame(block)); -} - -/********************************************************************//** -Gets the ibuf bitmap page where the bits describing a given file page are -stored. -@return bitmap page where the file page is mapped, that is, the bitmap -page containing the descriptor bits for the file page; the bitmap page -is x-latched -@param space in: space id of the file page -@param page_no in: page number of the file page -@param zip_size in: compressed page size in bytes; 0 for uncompressed pages -@param mtr in: mini-transaction */ -#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \ - ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \ - __FILE__, __LINE__, mtr) - -/************************************************************************//** -Sets the free bits of the page in the ibuf bitmap. This is done in a separate -mini-transaction, hence this operation does not restrict further work to only -ibuf bitmap operations, which would result if the latch to the bitmap page -were kept. */ -UNIV_INLINE -void -ibuf_set_free_bits_low( -/*===================*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - const buf_block_t* block, /*!< in: index page; free bits are set if - the index is non-clustered and page - level is 0 */ - ulint val, /*!< in: value to set: < 4 */ - mtr_t* mtr) /*!< in/out: mtr */ -{ - page_t* bitmap_page; - ulint space; - ulint page_no; - buf_frame_t* frame; - - if (!block) { - return; - } - - frame = buf_block_get_frame(block); - - if (!frame || !page_is_leaf(frame)) { - return; - } - - space = buf_block_get_space(block); - page_no = buf_block_get_page_no(block); - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr); -#ifdef UNIV_IBUF_DEBUG -# if 0 - fprintf(stderr, - "Setting space %lu page %lu free bits to %lu should be %lu\n", - space, page_no, val, - ibuf_index_page_calc_free(zip_size, block)); -# endif - - ut_a(val <= ibuf_index_page_calc_free(zip_size, block)); -#endif /* UNIV_IBUF_DEBUG */ - ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size, - IBUF_BITMAP_FREE, val, mtr); -} - -/************************************************************************//** -Sets the free bit of the page in the ibuf bitmap. This is done in a separate -mini-transaction, hence this operation does not restrict further work to only -ibuf bitmap operations, which would result if the latch to the bitmap page -were kept. */ -UNIV_INTERN -void -ibuf_set_free_bits_func( -/*====================*/ - buf_block_t* block, /*!< in: index page of a non-clustered index; - free bit is reset if page level is 0 */ -#ifdef UNIV_IBUF_DEBUG - ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum - value which the bits must have before - setting; this is for debugging */ -#endif /* UNIV_IBUF_DEBUG */ - ulint val) /*!< in: value to set: < 4 */ -{ - mtr_t mtr; - page_t* page; - page_t* bitmap_page; - ulint space; - ulint page_no; - ulint zip_size; - - page = buf_block_get_frame(block); - - if (!page_is_leaf(page)) { - - return; - } - - mtr_start(&mtr); - - space = buf_block_get_space(block); - page_no = buf_block_get_page_no(block); - zip_size = buf_block_get_zip_size(block); - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr); - -#ifdef UNIV_IBUF_DEBUG - if (max_val != ULINT_UNDEFINED) { - ulint old_val; - - old_val = ibuf_bitmap_page_get_bits( - bitmap_page, page_no, zip_size, - IBUF_BITMAP_FREE, &mtr); -# if 0 - if (old_val != max_val) { - fprintf(stderr, - "Ibuf: page %lu old val %lu max val %lu\n", - page_get_page_no(page), - old_val, max_val); - } -# endif - - ut_a(old_val <= max_val); - } -# if 0 - fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n", - page_get_page_no(page), val, - ibuf_index_page_calc_free(zip_size, block)); -# endif - - ut_a(val <= ibuf_index_page_calc_free(zip_size, block)); -#endif /* UNIV_IBUF_DEBUG */ - ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size, - IBUF_BITMAP_FREE, val, &mtr); - mtr_commit(&mtr); -} - -/************************************************************************//** -Resets the free bits of the page in the ibuf bitmap. This is done in a -separate mini-transaction, hence this operation does not restrict -further work to only ibuf bitmap operations, which would result if the -latch to the bitmap page were kept. NOTE: The free bits in the insert -buffer bitmap must never exceed the free space on a page. It is safe -to decrement or reset the bits in the bitmap in a mini-transaction -that is committed before the mini-transaction that affects the free -space. */ -UNIV_INTERN -void -ibuf_reset_free_bits( -/*=================*/ - buf_block_t* block) /*!< in: index page; free bits are set to 0 - if the index is a non-clustered - non-unique, and page level is 0 */ -{ - ibuf_set_free_bits(block, 0, ULINT_UNDEFINED); -} - -/**********************************************************************//** -Updates the free bits for an uncompressed page to reflect the present -state. Does this in the mtr given, which means that the latching -order rules virtually prevent any further operations for this OS -thread until mtr is committed. NOTE: The free bits in the insert -buffer bitmap must never exceed the free space on a page. It is safe -to set the free bits in the same mini-transaction that updated the -page. */ -UNIV_INTERN -void -ibuf_update_free_bits_low( -/*======================*/ - const buf_block_t* block, /*!< in: index page */ - ulint max_ins_size, /*!< in: value of - maximum insert size - with reorganize before - the latest operation - performed to the page */ - mtr_t* mtr) /*!< in/out: mtr */ -{ - ulint before; - ulint after; - - ut_a(!buf_block_get_page_zip(block)); - - before = ibuf_index_page_calc_free_bits(0, max_ins_size); - - after = ibuf_index_page_calc_free(0, block); - - /* This approach cannot be used on compressed pages, since the - computed value of "before" often does not match the current - state of the bitmap. This is because the free space may - increase or decrease when a compressed page is reorganized. */ - if (before != after) { - ibuf_set_free_bits_low(0, block, after, mtr); - } -} - -/**********************************************************************//** -Updates the free bits for a compressed page to reflect the present -state. Does this in the mtr given, which means that the latching -order rules virtually prevent any further operations for this OS -thread until mtr is committed. NOTE: The free bits in the insert -buffer bitmap must never exceed the free space on a page. It is safe -to set the free bits in the same mini-transaction that updated the -page. */ -UNIV_INTERN -void -ibuf_update_free_bits_zip( -/*======================*/ - buf_block_t* block, /*!< in/out: index page */ - mtr_t* mtr) /*!< in/out: mtr */ -{ - page_t* bitmap_page; - ulint space; - ulint page_no; - ulint zip_size; - ulint after; - - space = buf_block_get_space(block); - page_no = buf_block_get_page_no(block); - zip_size = buf_block_get_zip_size(block); - - ut_a(block); - - buf_frame_t* frame = buf_block_get_frame(block); - - ut_a(frame && page_is_leaf(frame)); - ut_a(zip_size); - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr); - - after = ibuf_index_page_calc_free_zip(zip_size, block); - - if (after == 0) { - /* We move the page to the front of the buffer pool LRU list: - the purpose of this is to prevent those pages to which we - cannot make inserts using the insert buffer from slipping - out of the buffer pool */ - - buf_page_make_young(&block->page); - } - - ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size, - IBUF_BITMAP_FREE, after, mtr); -} - -/**********************************************************************//** -Updates the free bits for the two pages to reflect the present state. -Does this in the mtr given, which means that the latching order rules -virtually prevent any further operations until mtr is committed. -NOTE: The free bits in the insert buffer bitmap must never exceed the -free space on a page. It is safe to set the free bits in the same -mini-transaction that updated the pages. */ -UNIV_INTERN -void -ibuf_update_free_bits_for_two_pages_low( -/*====================================*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - buf_block_t* block1, /*!< in: index page */ - buf_block_t* block2, /*!< in: index page */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint state; - - /* As we have to x-latch two random bitmap pages, we have to acquire - the bitmap mutex to prevent a deadlock with a similar operation - performed by another OS thread. */ - - mutex_enter(&ibuf_bitmap_mutex); - - state = ibuf_index_page_calc_free(zip_size, block1); - - ibuf_set_free_bits_low(zip_size, block1, state, mtr); - - state = ibuf_index_page_calc_free(zip_size, block2); - - ibuf_set_free_bits_low(zip_size, block2, state, mtr); - - mutex_exit(&ibuf_bitmap_mutex); -} - -/**********************************************************************//** -Returns TRUE if the page is one of the fixed address ibuf pages. -@return TRUE if a fixed address ibuf i/o page */ -UNIV_INLINE -ibool -ibuf_fixed_addr_page( -/*=================*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint page_no)/*!< in: page number */ -{ - return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO) - || ibuf_bitmap_page(zip_size, page_no)); -} - -/***********************************************************************//** -Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. -Must not be called when recv_no_ibuf_operations==TRUE. -@return TRUE if level 2 or level 3 page */ -UNIV_INTERN -ibool -ibuf_page_low( -/*==========*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint page_no,/*!< in: page number */ -#ifdef UNIV_DEBUG - ibool x_latch,/*!< in: FALSE if relaxed check - (avoid latching the bitmap page) */ -#endif /* UNIV_DEBUG */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr which will contain an - x-latch to the bitmap page if the page - is not one of the fixed address ibuf - pages, or NULL, in which case a new - transaction is created. */ -{ - ibool ret; - mtr_t local_mtr; - page_t* bitmap_page; - - ut_ad(!recv_no_ibuf_operations); - ut_ad(x_latch || mtr == NULL); - - if (ibuf_fixed_addr_page(space, zip_size, page_no)) { - - return(TRUE); - } else if (space != IBUF_SPACE_ID) { - - return(FALSE); - } - - ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE); - -#ifdef UNIV_DEBUG - if (!x_latch) { - mtr_start(&local_mtr); - - /* Get the bitmap page without a page latch, so that - we will not be violating the latching order when - another bitmap page has already been latched by this - thread. The page will be buffer-fixed, and thus it - cannot be removed or relocated while we are looking at - it. The contents of the page could change, but the - IBUF_BITMAP_IBUF bit that we are interested in should - not be modified by any other thread. Nobody should be - calling ibuf_add_free_page() or ibuf_remove_free_page() - while the page is linked to the insert buffer b-tree. */ - - bitmap_page = buf_block_get_frame( - buf_page_get_gen( - space, zip_size, - ibuf_bitmap_page_no_calc(zip_size, page_no), - RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, - file, line, &local_mtr)); - - ret = ibuf_bitmap_page_get_bits_low( - bitmap_page, page_no, zip_size, - MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF); - - mtr_commit(&local_mtr); - return(ret); - } -#endif /* UNIV_DEBUG */ - - if (mtr == NULL) { - mtr = &local_mtr; - mtr_start(mtr); - } - - bitmap_page = ibuf_bitmap_get_map_page_func(space, page_no, zip_size, - file, line, mtr); - - ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size, - IBUF_BITMAP_IBUF, mtr); - - if (mtr == &local_mtr) { - mtr_commit(mtr); - } - - return(ret); -} - -#ifdef UNIV_DEBUG -# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(mtr,rec) -#else /* UNIV_DEBUG */ -# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(rec) -#endif /* UNIV_DEBUG */ - -/********************************************************************//** -Returns the page number field of an ibuf record. -@return page number */ -static -ulint -ibuf_rec_get_page_no_func( -/*======================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction owning rec */ -#endif /* UNIV_DEBUG */ - const rec_t* rec) /*!< in: ibuf record */ -{ - const byte* field; - ulint len; - - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(ibuf_inside(mtr)); - ut_ad(rec_get_n_fields_old(rec) > 2); - - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); - - ut_a(len == 1); - - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len); - - ut_a(len == 4); - - return(mach_read_from_4(field)); -} - -#ifdef UNIV_DEBUG -# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(mtr,rec) -#else /* UNIV_DEBUG */ -# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(rec) -#endif /* UNIV_DEBUG */ - -/********************************************************************//** -Returns the space id field of an ibuf record. For < 4.1.x format records -returns 0. -@return space id */ -static -ulint -ibuf_rec_get_space_func( -/*====================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction owning rec */ -#endif /* UNIV_DEBUG */ - const rec_t* rec) /*!< in: ibuf record */ -{ - const byte* field; - ulint len; - - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(ibuf_inside(mtr)); - ut_ad(rec_get_n_fields_old(rec) > 2); - - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); - - ut_a(len == 1); - - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); - - ut_a(len == 4); - - return(mach_read_from_4(field)); -} - -#ifdef UNIV_DEBUG -# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \ - ibuf_rec_get_info_func(mtr,rec,op,comp,info_len,counter) -#else /* UNIV_DEBUG */ -# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \ - ibuf_rec_get_info_func(rec,op,comp,info_len,counter) -#endif -/****************************************************************//** -Get various information about an ibuf record in >= 4.1.x format. */ -static -void -ibuf_rec_get_info_func( -/*===================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction owning rec */ -#endif /* UNIV_DEBUG */ - const rec_t* rec, /*!< in: ibuf record */ - ibuf_op_t* op, /*!< out: operation type, or NULL */ - ibool* comp, /*!< out: compact flag, or NULL */ - ulint* info_len, /*!< out: length of info fields at the - start of the fourth field, or - NULL */ - ulint* counter) /*!< in: counter value, or NULL */ -{ - const byte* types; - ulint fields; - ulint len; - - /* Local variables to shadow arguments. */ - ibuf_op_t op_local; - ibool comp_local; - ulint info_len_local; - ulint counter_local; - - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(ibuf_inside(mtr)); - fields = rec_get_n_fields_old(rec); - ut_a(fields > IBUF_REC_FIELD_USER); - - types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); - - info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE; - - switch (info_len_local) { - case 0: - case 1: - op_local = IBUF_OP_INSERT; - comp_local = info_len_local; - ut_ad(!counter); - counter_local = ULINT_UNDEFINED; - break; - - case IBUF_REC_INFO_SIZE: - op_local = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE]; - comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT; - counter_local = mach_read_from_2( - types + IBUF_REC_OFFSET_COUNTER); - break; - - default: - ut_error; - } - - ut_a(op_local < IBUF_OP_COUNT); - ut_a((len - info_len_local) == - (fields - IBUF_REC_FIELD_USER) - * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - - if (op) { - *op = op_local; - } - - if (comp) { - *comp = comp_local; - } - - if (info_len) { - *info_len = info_len_local; - } - - if (counter) { - *counter = counter_local; - } -} - -#ifdef UNIV_DEBUG -# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(mtr,rec) -#else /* UNIV_DEBUG */ -# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(rec) -#endif - -/****************************************************************//** -Returns the operation type field of an ibuf record. -@return operation type */ -static -ibuf_op_t -ibuf_rec_get_op_type_func( -/*======================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction owning rec */ -#endif /* UNIV_DEBUG */ - const rec_t* rec) /*!< in: ibuf record */ -{ - ulint len; - - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(ibuf_inside(mtr)); - ut_ad(rec_get_n_fields_old(rec) > 2); - - (void) rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); - - if (len > 1) { - /* This is a < 4.1.x format record */ - - return(IBUF_OP_INSERT); - } else { - ibuf_op_t op; - - ibuf_rec_get_info(mtr, rec, &op, NULL, NULL, NULL); - - return(op); - } -} - -/****************************************************************//** -Read the first two bytes from a record's fourth field (counter field in new -records; something else in older records). -@return "counter" field, or ULINT_UNDEFINED if for some reason it -can't be read */ -UNIV_INTERN -ulint -ibuf_rec_get_counter( -/*=================*/ - const rec_t* rec) /*!< in: ibuf record */ -{ - const byte* ptr; - ulint len; - - if (rec_get_n_fields_old(rec) <= IBUF_REC_FIELD_METADATA) { - - return(ULINT_UNDEFINED); - } - - ptr = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); - - if (len >= 2) { - - return(mach_read_from_2(ptr)); - } else { - - return(ULINT_UNDEFINED); - } -} - -/****************************************************************//** -Add accumulated operation counts to a permanent array. Both arrays must be -of size IBUF_OP_COUNT. */ -static -void -ibuf_add_ops( -/*=========*/ - ulint* arr, /*!< in/out: array to modify */ - const ulint* ops) /*!< in: operation counts */ - -{ - ulint i; - -#ifndef HAVE_ATOMIC_BUILTINS - ut_ad(mutex_own(&ibuf_mutex)); -#endif /* !HAVE_ATOMIC_BUILTINS */ - - for (i = 0; i < IBUF_OP_COUNT; i++) { -#ifdef HAVE_ATOMIC_BUILTINS - os_atomic_increment_ulint(&arr[i], ops[i]); -#else /* HAVE_ATOMIC_BUILTINS */ - arr[i] += ops[i]; -#endif /* HAVE_ATOMIC_BUILTINS */ - } -} - -/****************************************************************//** -Print operation counts. The array must be of size IBUF_OP_COUNT. */ -static -void -ibuf_print_ops( -/*===========*/ - const ulint* ops, /*!< in: operation counts */ - FILE* file) /*!< in: file where to print */ -{ - static const char* op_names[] = { - "insert", - "delete mark", - "delete" - }; - ulint i; - - ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT); - - for (i = 0; i < IBUF_OP_COUNT; i++) { - fprintf(file, "%s %lu%s", op_names[i], - (ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : ""); - } - - putc('\n', file); -} - -/********************************************************************//** -Creates a dummy index for inserting a record to a non-clustered index. -@return dummy index */ -static -dict_index_t* -ibuf_dummy_index_create( -/*====================*/ - ulint n, /*!< in: number of fields */ - ibool comp) /*!< in: TRUE=use compact record format */ -{ - dict_table_t* table; - dict_index_t* index; - - table = dict_mem_table_create("IBUF_DUMMY", - DICT_HDR_SPACE, n, - comp ? DICT_TF_COMPACT : 0, 0); - - index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY", - DICT_HDR_SPACE, 0, n); - - index->table = table; - - /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ - index->cached = TRUE; - - return(index); -} -/********************************************************************//** -Add a column to the dummy index */ -static -void -ibuf_dummy_index_add_col( -/*=====================*/ - dict_index_t* index, /*!< in: dummy index */ - const dtype_t* type, /*!< in: the data type of the column */ - ulint len) /*!< in: length of the column */ -{ - ulint i = index->table->n_def; - dict_mem_table_add_col(index->table, NULL, NULL, - dtype_get_mtype(type), - dtype_get_prtype(type), - dtype_get_len(type)); - dict_index_add_col(index, index->table, - dict_table_get_nth_col(index->table, i), len); -} -/********************************************************************//** -Deallocates a dummy index for inserting a record to a non-clustered index. */ -static -void -ibuf_dummy_index_free( -/*==================*/ - dict_index_t* index) /*!< in, own: dummy index */ -{ - dict_table_t* table = index->table; - - dict_mem_index_free(index); - dict_mem_table_free(table); -} - -#ifdef UNIV_DEBUG -# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \ - ibuf_build_entry_from_ibuf_rec_func(mtr,ibuf_rec,heap,pindex) -#else /* UNIV_DEBUG */ -# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \ - ibuf_build_entry_from_ibuf_rec_func(ibuf_rec,heap,pindex) -#endif - -/*********************************************************************//** -Builds the entry used to - -1) IBUF_OP_INSERT: insert into a non-clustered index - -2) IBUF_OP_DELETE_MARK: find the record whose delete-mark flag we need to - activate - -3) IBUF_OP_DELETE: find the record we need to delete - -when we have the corresponding record in an ibuf index. - -NOTE that as we copy pointers to fields in ibuf_rec, the caller must -hold a latch to the ibuf_rec page as long as the entry is used! - -@return own: entry to insert to a non-clustered index */ -static -dtuple_t* -ibuf_build_entry_from_ibuf_rec_func( -/*================================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction owning rec */ -#endif /* UNIV_DEBUG */ - const rec_t* ibuf_rec, /*!< in: record in an insert buffer */ - mem_heap_t* heap, /*!< in: heap where built */ - dict_index_t** pindex) /*!< out, own: dummy index that - describes the entry */ -{ - dtuple_t* tuple; - dfield_t* field; - ulint n_fields; - const byte* types; - const byte* data; - ulint len; - ulint info_len; - ulint i; - ulint comp; - dict_index_t* index; - - ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(ibuf_inside(mtr)); - - data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len); - - ut_a(len == 1); - ut_a(*data == 0); - ut_a(rec_get_n_fields_old(ibuf_rec) > IBUF_REC_FIELD_USER); - - n_fields = rec_get_n_fields_old(ibuf_rec) - IBUF_REC_FIELD_USER; - - tuple = dtuple_create(heap, n_fields); - - types = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_METADATA, &len); - - ibuf_rec_get_info(mtr, ibuf_rec, NULL, &comp, &info_len, NULL); - - index = ibuf_dummy_index_create(n_fields, comp); - - len -= info_len; - types += info_len; - - ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = rec_get_nth_field_old( - ibuf_rec, i + IBUF_REC_FIELD_USER, &len); - - dfield_set_data(field, data, len); - - dtype_new_read_for_order_and_null_size( - dfield_get_type(field), - types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - - ibuf_dummy_index_add_col(index, dfield_get_type(field), len); - } - - /* Prevent an ut_ad() failure in page_zip_write_rec() by - adding system columns to the dummy table pointed to by the - dummy secondary index. The insert buffer is only used for - secondary indexes, whose records never contain any system - columns, such as DB_TRX_ID. */ - ut_d(dict_table_add_system_columns(index->table, index->table->heap)); - - *pindex = index; - - return(tuple); -} - -/******************************************************************//** -Get the data size. -@return size of fields */ -UNIV_INLINE -ulint -ibuf_rec_get_size( -/*==============*/ - const rec_t* rec, /*!< in: ibuf record */ - const byte* types, /*!< in: fields */ - ulint n_fields, /*!< in: number of fields */ - ulint comp) /*!< in: 0=ROW_FORMAT=REDUNDANT, - nonzero=ROW_FORMAT=COMPACT */ -{ - ulint i; - ulint field_offset; - ulint types_offset; - ulint size = 0; - - field_offset = IBUF_REC_FIELD_USER; - types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE; - - for (i = 0; i < n_fields; i++) { - ulint len; - dtype_t dtype; - - rec_get_nth_field_offs_old(rec, i + field_offset, &len); - - if (len != UNIV_SQL_NULL) { - size += len; - } else { - dtype_new_read_for_order_and_null_size(&dtype, types); - - size += dtype_get_sql_null_size(&dtype, comp); - } - - types += types_offset; - } - - return(size); -} - -#ifdef UNIV_DEBUG -# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(mtr,rec) -#else /* UNIV_DEBUG */ -# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(rec) -#endif - -/********************************************************************//** -Returns the space taken by a stored non-clustered index entry if converted to -an index record. -@return size of index record in bytes + an upper limit of the space -taken in the page directory */ -static -ulint -ibuf_rec_get_volume_func( -/*=====================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction owning rec */ -#endif /* UNIV_DEBUG */ - const rec_t* ibuf_rec)/*!< in: ibuf record */ -{ - ulint len; - const byte* data; - const byte* types; - ulint n_fields; - ulint data_size; - ulint comp; - ibuf_op_t op; - ulint info_len; - - ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(ibuf_inside(mtr)); - ut_ad(rec_get_n_fields_old(ibuf_rec) > 2); - - data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len); - ut_a(len == 1); - ut_a(*data == 0); - - types = rec_get_nth_field_old( - ibuf_rec, IBUF_REC_FIELD_METADATA, &len); - - ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL); - - if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) { - /* Delete-marking a record doesn't take any - additional space, and while deleting a record - actually frees up space, we have to play it safe and - pretend it takes no additional space (the record - might not exist, etc.). */ - - return(0); - } else if (comp) { - dtuple_t* entry; - ulint volume; - dict_index_t* dummy_index; - mem_heap_t* heap = mem_heap_create(500); - - entry = ibuf_build_entry_from_ibuf_rec(mtr, ibuf_rec, - heap, &dummy_index); - - volume = rec_get_converted_size(dummy_index, entry, 0); - - ibuf_dummy_index_free(dummy_index); - mem_heap_free(heap); - - return(volume + page_dir_calc_reserved_space(1)); - } - - types += info_len; - n_fields = rec_get_n_fields_old(ibuf_rec) - - IBUF_REC_FIELD_USER; - - data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, comp); - - return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0) - + page_dir_calc_reserved_space(1)); -} - -/*********************************************************************//** -Builds the tuple to insert to an ibuf tree when we have an entry for a -non-clustered index. - -NOTE that the original entry must be kept because we copy pointers to -its fields. - -@return own: entry to insert into an ibuf index tree */ -static -dtuple_t* -ibuf_entry_build( -/*=============*/ - ibuf_op_t op, /*!< in: operation type */ - dict_index_t* index, /*!< in: non-clustered index */ - const dtuple_t* entry, /*!< in: entry for a non-clustered index */ - ulint space, /*!< in: space id */ - ulint page_no,/*!< in: index page number where entry should - be inserted */ - ulint counter,/*!< in: counter value; - ULINT_UNDEFINED=not used */ - mem_heap_t* heap) /*!< in: heap into which to build */ -{ - dtuple_t* tuple; - dfield_t* field; - const dfield_t* entry_field; - ulint n_fields; - byte* buf; - byte* ti; - byte* type_info; - ulint i; - - ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT); - ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF); - ut_ad(op < IBUF_OP_COUNT); - - /* We have to build a tuple with the following fields: - - 1-4) These are described at the top of this file. - - 5) The rest of the fields are copied from the entry. - - All fields in the tuple are ordered like the type binary in our - insert buffer tree. */ - - n_fields = dtuple_get_n_fields(entry); - - tuple = dtuple_create(heap, n_fields + IBUF_REC_FIELD_USER); - - /* 1) Space Id */ - - field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); - - mach_write_to_4(buf, space); - - dfield_set_data(field, buf, 4); - - /* 2) Marker byte */ - - field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 1)); - - /* We set the marker byte zero */ - - mach_write_to_1(buf, 0); - - dfield_set_data(field, buf, 1); - - /* 3) Page number */ - - field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); - - mach_write_to_4(buf, page_no); - - dfield_set_data(field, buf, 4); - - /* 4) Type info, part #1 */ - - if (counter == ULINT_UNDEFINED) { - i = dict_table_is_comp(index->table) ? 1 : 0; - } else { - ut_ad(counter <= 0xFFFF); - i = IBUF_REC_INFO_SIZE; - } - - ti = type_info = static_cast<byte*>( - mem_heap_alloc( - heap, - i + n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE)); - - switch (i) { - default: - ut_error; - break; - case 1: - /* set the flag for ROW_FORMAT=COMPACT */ - *ti++ = 0; - /* fall through */ - case 0: - /* the old format does not allow delete buffering */ - ut_ad(op == IBUF_OP_INSERT); - break; - case IBUF_REC_INFO_SIZE: - mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter); - - ti[IBUF_REC_OFFSET_TYPE] = (byte) op; - ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table) - ? IBUF_REC_COMPACT : 0; - ti += IBUF_REC_INFO_SIZE; - break; - } - - /* 5+) Fields from the entry */ - - for (i = 0; i < n_fields; i++) { - ulint fixed_len; - const dict_field_t* ifield; - - field = dtuple_get_nth_field(tuple, i + IBUF_REC_FIELD_USER); - entry_field = dtuple_get_nth_field(entry, i); - dfield_copy(field, entry_field); - - ifield = dict_index_get_nth_field(index, i); - /* Prefix index columns of fixed-length columns are of - fixed length. However, in the function call below, - dfield_get_type(entry_field) contains the fixed length - of the column in the clustered index. Replace it with - the fixed length of the secondary index column. */ - fixed_len = ifield->fixed_len; - -#ifdef UNIV_DEBUG - if (fixed_len) { - /* dict_index_add_col() should guarantee these */ - ut_ad(fixed_len <= (ulint) - dfield_get_type(entry_field)->len); - if (ifield->prefix_len) { - ut_ad(ifield->prefix_len == fixed_len); - } else { - ut_ad(fixed_len == (ulint) - dfield_get_type(entry_field)->len); - } - } -#endif /* UNIV_DEBUG */ - - dtype_new_store_for_order_and_null_size( - ti, dfield_get_type(entry_field), fixed_len); - ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE; - } - - /* 4) Type info, part #2 */ - - field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_METADATA); - - dfield_set_data(field, type_info, ti - type_info); - - /* Set all the types in the new tuple binary */ - - dtuple_set_types_binary(tuple, n_fields + IBUF_REC_FIELD_USER); - - return(tuple); -} - -/*********************************************************************//** -Builds a search tuple used to search buffered inserts for an index page. -This is for >= 4.1.x format records. -@return own: search tuple */ -static -dtuple_t* -ibuf_search_tuple_build( -/*====================*/ - ulint space, /*!< in: space id */ - ulint page_no,/*!< in: index page number */ - mem_heap_t* heap) /*!< in: heap into which to build */ -{ - dtuple_t* tuple; - dfield_t* field; - byte* buf; - - tuple = dtuple_create(heap, IBUF_REC_FIELD_METADATA); - - /* Store the space id in tuple */ - - field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); - - mach_write_to_4(buf, space); - - dfield_set_data(field, buf, 4); - - /* Store the new format record marker byte */ - - field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 1)); - - mach_write_to_1(buf, 0); - - dfield_set_data(field, buf, 1); - - /* Store the page number in tuple */ - - field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); - - mach_write_to_4(buf, page_no); - - dfield_set_data(field, buf, 4); - - dtuple_set_types_binary(tuple, IBUF_REC_FIELD_METADATA); - - return(tuple); -} - -/*********************************************************************//** -Checks if there are enough pages in the free list of the ibuf tree that we -dare to start a pessimistic insert to the insert buffer. -@return TRUE if enough free pages in list */ -UNIV_INLINE -ibool -ibuf_data_enough_free_for_insert(void) -/*==================================*/ -{ - ut_ad(mutex_own(&ibuf_mutex)); - - /* We want a big margin of free pages, because a B-tree can sometimes - grow in size also if records are deleted from it, as the node pointers - can change, and we must make sure that we are able to delete the - inserts buffered for pages that we read to the buffer pool, without - any risk of running out of free space in the insert buffer. */ - - return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height); -} - -/*********************************************************************//** -Checks if there are enough pages in the free list of the ibuf tree that we -should remove them and free to the file space management. -@return TRUE if enough free pages in list */ -UNIV_INLINE -ibool -ibuf_data_too_much_free(void) -/*=========================*/ -{ - ut_ad(mutex_own(&ibuf_mutex)); - - return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height); -} - -/*********************************************************************//** -Allocates a new page from the ibuf file segment and adds it to the free -list. -@return TRUE on success, FALSE if no space left */ -static -ibool -ibuf_add_free_page(void) -/*====================*/ -{ - mtr_t mtr; - page_t* header_page; - ulint flags; - ulint zip_size; - buf_block_t* block; - page_t* page; - page_t* root; - page_t* bitmap_page; - - mtr_start(&mtr); - - /* Acquire the fsp latch before the ibuf header, obeying the latching - order */ - mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr); - zip_size = fsp_flags_get_zip_size(flags); - - header_page = ibuf_header_page_get(&mtr); - - /* Allocate a new page: NOTE that if the page has been a part of a - non-clustered index which has subsequently been dropped, then the - page may have buffered inserts in the insert buffer, and these - should be deleted from there. These get deleted when the page - allocation creates the page in buffer. Thus the call below may end - up calling the insert buffer routines and, as we yet have no latches - to insert buffer tree pages, these routines can run without a risk - of a deadlock. This is the reason why we created a special ibuf - header page apart from the ibuf tree. */ - - block = fseg_alloc_free_page( - header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP, - &mtr); - - if (block == NULL) { - mtr_commit(&mtr); - - return(FALSE); - } - - ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1); - ibuf_enter(&mtr); - mutex_enter(&ibuf_mutex); - root = ibuf_tree_root_get(&mtr); - - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); - page = buf_block_get_frame(block); - - /* Add the page to the free list and update the ibuf size data */ - - flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); - - mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST, - MLOG_2BYTES, &mtr); - - ibuf->seg_size++; - ibuf->free_list_len++; - - /* Set the bit indicating that this page is now an ibuf tree page - (level 2 page) */ - - bitmap_page = ibuf_bitmap_get_map_page( - IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr); - - mutex_exit(&ibuf_mutex); - - ibuf_bitmap_page_set_bits( - bitmap_page, buf_block_get_page_no(block), zip_size, - IBUF_BITMAP_IBUF, TRUE, &mtr); - - ibuf_mtr_commit(&mtr); - - return(TRUE); -} - -/*********************************************************************//** -Removes a page from the free list and frees it to the fsp system. */ -static -void -ibuf_remove_free_page(void) -/*=======================*/ -{ - mtr_t mtr; - mtr_t mtr2; - page_t* header_page; - ulint flags; - ulint zip_size; - ulint page_no; - page_t* page; - page_t* root; - page_t* bitmap_page; - - mtr_start(&mtr); - - /* Acquire the fsp latch before the ibuf header, obeying the latching - order */ - mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr); - zip_size = fsp_flags_get_zip_size(flags); - - header_page = ibuf_header_page_get(&mtr); - - /* Prevent pessimistic inserts to insert buffer trees for a while */ - ibuf_enter(&mtr); - mutex_enter(&ibuf_pessimistic_insert_mutex); - mutex_enter(&ibuf_mutex); - - if (!ibuf_data_too_much_free()) { - - mutex_exit(&ibuf_mutex); - mutex_exit(&ibuf_pessimistic_insert_mutex); - - ibuf_mtr_commit(&mtr); - - return; - } - - ibuf_mtr_start(&mtr2); - - root = ibuf_tree_root_get(&mtr2); - - mutex_exit(&ibuf_mutex); - - page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - &mtr2).page; - - /* NOTE that we must release the latch on the ibuf tree root - because in fseg_free_page we access level 1 pages, and the root - is a level 2 page. */ - - ibuf_mtr_commit(&mtr2); - ibuf_exit(&mtr); - - /* Since pessimistic inserts were prevented, we know that the - page is still in the free list. NOTE that also deletes may take - pages from the free list, but they take them from the start, and - the free list was so long that they cannot have taken the last - page from it. */ - - fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, - IBUF_SPACE_ID, page_no, &mtr); - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no); -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ - - ibuf_enter(&mtr); - - mutex_enter(&ibuf_mutex); - - root = ibuf_tree_root_get(&mtr); - - ut_ad(page_no == flst_get_last(root + PAGE_HEADER - + PAGE_BTR_IBUF_FREE_LIST, &mtr).page); - - { - buf_block_t* block; - - block = buf_page_get( - IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr); - - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); - - page = buf_block_get_frame(block); - } - - /* Remove the page from the free list and update the ibuf size data */ - - flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); - - mutex_exit(&ibuf_pessimistic_insert_mutex); - - ibuf->seg_size--; - ibuf->free_list_len--; - - /* Set the bit indicating that this page is no more an ibuf tree page - (level 2 page) */ - - bitmap_page = ibuf_bitmap_get_map_page( - IBUF_SPACE_ID, page_no, zip_size, &mtr); - - mutex_exit(&ibuf_mutex); - - ibuf_bitmap_page_set_bits( - bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr); - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no); -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ - ibuf_mtr_commit(&mtr); -} - -/***********************************************************************//** -Frees excess pages from the ibuf free list. This function is called when an OS -thread calls fsp services to allocate a new file segment, or a new page to a -file segment, and the thread did not own the fsp latch before this call. */ -UNIV_INTERN -void -ibuf_free_excess_pages(void) -/*========================*/ -{ - ulint i; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL), - RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(rw_lock_get_x_lock_count( - fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1); - - /* NOTE: We require that the thread did not own the latch before, - because then we know that we can obey the correct latching order - for ibuf latches */ - - if (!ibuf) { - /* Not yet initialized; not sure if this is possible, but - does no harm to check for it. */ - - return; - } - - /* Free at most a few pages at a time, so that we do not delay the - requested service too much */ - - for (i = 0; i < 4; i++) { - - ibool too_much_free; - - mutex_enter(&ibuf_mutex); - too_much_free = ibuf_data_too_much_free(); - mutex_exit(&ibuf_mutex); - - if (!too_much_free) { - return; - } - - ibuf_remove_free_page(); - } -} - -#ifdef UNIV_DEBUG -# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \ - ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,vers,pages,n_stored) -#else /* UNIV_DEBUG */ -# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \ - ibuf_get_merge_page_nos_func(contract,rec,ids,vers,pages,n_stored) -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Reads page numbers from a leaf in an ibuf tree. -@return a lower limit for the combined volume of records which will be -merged */ -static -ulint -ibuf_get_merge_page_nos_func( -/*=========================*/ - ibool contract,/*!< in: TRUE if this function is called to - contract the tree, FALSE if this is called - when a single page becomes full and we look - if it pays to read also nearby pages */ - const rec_t* rec, /*!< in: insert buffer record */ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction holding rec */ -#endif /* UNIV_DEBUG */ - ulint* space_ids,/*!< in/out: space id's of the pages */ - ib_int64_t* space_versions,/*!< in/out: tablespace version - timestamps; used to prevent reading in old - pages after DISCARD + IMPORT tablespace */ - ulint* page_nos,/*!< in/out: buffer for at least - IBUF_MAX_N_PAGES_MERGED many page numbers; - the page numbers are in an ascending order */ - ulint* n_stored)/*!< out: number of page numbers stored to - page_nos in this function */ -{ - ulint prev_page_no; - ulint prev_space_id; - ulint first_page_no; - ulint first_space_id; - ulint rec_page_no; - ulint rec_space_id; - ulint sum_volumes; - ulint volume_for_page; - ulint rec_volume; - ulint limit; - ulint n_pages; - - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(ibuf_inside(mtr)); - - *n_stored = 0; - - limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4); - - if (page_rec_is_supremum(rec)) { - - rec = page_rec_get_prev_const(rec); - } - - if (page_rec_is_infimum(rec)) { - - rec = page_rec_get_next_const(rec); - } - - if (page_rec_is_supremum(rec)) { - - return(0); - } - - first_page_no = ibuf_rec_get_page_no(mtr, rec); - first_space_id = ibuf_rec_get_space(mtr, rec); - n_pages = 0; - prev_page_no = 0; - prev_space_id = 0; - - /* Go backwards from the first rec until we reach the border of the - 'merge area', or the page start or the limit of storeable pages is - reached */ - - while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) { - - rec_page_no = ibuf_rec_get_page_no(mtr, rec); - rec_space_id = ibuf_rec_get_space(mtr, rec); - - if (rec_space_id != first_space_id - || (rec_page_no / IBUF_MERGE_AREA) - != (first_page_no / IBUF_MERGE_AREA)) { - - break; - } - - if (rec_page_no != prev_page_no - || rec_space_id != prev_space_id) { - n_pages++; - } - - prev_page_no = rec_page_no; - prev_space_id = rec_space_id; - - rec = page_rec_get_prev_const(rec); - } - - rec = page_rec_get_next_const(rec); - - /* At the loop start there is no prev page; we mark this with a pair - of space id, page no (0, 0) for which there can never be entries in - the insert buffer */ - - prev_page_no = 0; - prev_space_id = 0; - sum_volumes = 0; - volume_for_page = 0; - - while (*n_stored < limit) { - if (page_rec_is_supremum(rec)) { - /* When no more records available, mark this with - another 'impossible' pair of space id, page no */ - rec_page_no = 1; - rec_space_id = 0; - } else { - rec_page_no = ibuf_rec_get_page_no(mtr, rec); - rec_space_id = ibuf_rec_get_space(mtr, rec); - /* In the system tablespace, the smallest - possible secondary index leaf page number is - bigger than IBUF_TREE_ROOT_PAGE_NO (4). In - other tablespaces, the clustered index tree is - created at page 3, which makes page 4 the - smallest possible secondary index leaf page - (and that only after DROP INDEX). */ - ut_ad(rec_page_no - > (ulint) IBUF_TREE_ROOT_PAGE_NO - - (rec_space_id != 0)); - } - -#ifdef UNIV_IBUF_DEBUG - ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED); -#endif - if ((rec_space_id != prev_space_id - || rec_page_no != prev_page_no) - && (prev_space_id != 0 || prev_page_no != 0)) { - - if (contract - || (prev_page_no == first_page_no - && prev_space_id == first_space_id) - || (volume_for_page - > ((IBUF_MERGE_THRESHOLD - 1) - * 4 * UNIV_PAGE_SIZE - / IBUF_PAGE_SIZE_PER_FREE_SPACE) - / IBUF_MERGE_THRESHOLD)) { - - space_ids[*n_stored] = prev_space_id; - space_versions[*n_stored] - = fil_space_get_version(prev_space_id); - page_nos[*n_stored] = prev_page_no; - - (*n_stored)++; - - sum_volumes += volume_for_page; - } - - if (rec_space_id != first_space_id - || rec_page_no / IBUF_MERGE_AREA - != first_page_no / IBUF_MERGE_AREA) { - - break; - } - - volume_for_page = 0; - } - - if (rec_page_no == 1 && rec_space_id == 0) { - /* Supremum record */ - - break; - } - - rec_volume = ibuf_rec_get_volume(mtr, rec); - - volume_for_page += rec_volume; - - prev_page_no = rec_page_no; - prev_space_id = rec_space_id; - - rec = page_rec_get_next_const(rec); - } - -#ifdef UNIV_IBUF_DEBUG - ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED); -#endif -#if 0 - fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n", - *n_stored, sum_volumes); -#endif - return(sum_volumes); -} - -/*******************************************************************//** -Get the matching records for space id. -@return current rec or NULL */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -const rec_t* -ibuf_get_user_rec( -/*===============*/ - btr_pcur_t* pcur, /*!< in: the current cursor */ - mtr_t* mtr) /*!< in: mini transaction */ -{ - do { - const rec_t* rec = btr_pcur_get_rec(pcur); - - if (page_rec_is_user_rec(rec)) { - return(rec); - } - } while (btr_pcur_move_to_next(pcur, mtr)); - - return(NULL); -} - -/*********************************************************************//** -Reads page numbers for a space id from an ibuf tree. -@return a lower limit for the combined volume of records which will be -merged */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ulint -ibuf_get_merge_pages( -/*=================*/ - btr_pcur_t* pcur, /*!< in/out: cursor */ - ulint space, /*!< in: space for which to merge */ - ulint limit, /*!< in: max page numbers to read */ - ulint* pages, /*!< out: pages read */ - ulint* spaces, /*!< out: spaces read */ - ib_int64_t* versions,/*!< out: space versions read */ - ulint* n_pages,/*!< out: number of pages read */ - mtr_t* mtr) /*!< in: mini transaction */ -{ - const rec_t* rec; - ulint volume = 0; - ib_int64_t version = fil_space_get_version(space); - - ut_a(space != ULINT_UNDEFINED); - - *n_pages = 0; - - while ((rec = ibuf_get_user_rec(pcur, mtr)) != 0 - && ibuf_rec_get_space(mtr, rec) == space - && *n_pages < limit) { - - ulint page_no = ibuf_rec_get_page_no(mtr, rec); - - if (*n_pages == 0 || pages[*n_pages - 1] != page_no) { - spaces[*n_pages] = space; - pages[*n_pages] = page_no; - versions[*n_pages] = version; - ++*n_pages; - } - - volume += ibuf_rec_get_volume(mtr, rec); - - btr_pcur_move_to_next(pcur, mtr); - } - - return(volume); -} - -/*********************************************************************//** -Contracts insert buffer trees by reading pages to the buffer pool. -@return a lower limit for the combined size in bytes of entries which -will be merged from ibuf trees to the pages read, 0 if ibuf is -empty */ -static -ulint -ibuf_merge_pages( -/*=============*/ - ulint* n_pages, /*!< out: number of pages to which merged */ - bool sync) /*!< in: true if the caller wants to wait for - the issued read with the highest tablespace - address to complete */ -{ - mtr_t mtr; - btr_pcur_t pcur; - ulint sum_sizes; - ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; - ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; - ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED]; - - *n_pages = 0; - - ibuf_mtr_start(&mtr); - - /* Open a cursor to a randomly chosen leaf of the tree, at a random - position within the leaf */ - - btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr); - - ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); - - if (page_is_empty(btr_pcur_get_page(&pcur))) { - /* If a B-tree page is empty, it must be the root page - and the whole B-tree must be empty. InnoDB does not - allow empty B-tree pages other than the root. */ - ut_ad(ibuf->empty); - ut_ad(page_get_space_id(btr_pcur_get_page(&pcur)) - == IBUF_SPACE_ID); - ut_ad(page_get_page_no(btr_pcur_get_page(&pcur)) - == FSP_IBUF_TREE_ROOT_PAGE_NO); - - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - - return(0); - } - - sum_sizes = ibuf_get_merge_page_nos(TRUE, - btr_pcur_get_rec(&pcur), &mtr, - space_ids, space_versions, - page_nos, n_pages); -#if 0 /* defined UNIV_IBUF_DEBUG */ - fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", - sync, *n_pages, sum_sizes); -#endif - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - - buf_read_ibuf_merge_pages( - sync, space_ids, space_versions, page_nos, *n_pages); - - return(sum_sizes + 1); -} - -/*********************************************************************//** -Contracts insert buffer trees by reading pages referring to space_id -to the buffer pool. -@returns number of pages merged.*/ -UNIV_INTERN -ulint -ibuf_merge_space( -/*=============*/ - ulint space) /*!< in: tablespace id to merge */ -{ - mtr_t mtr; - btr_pcur_t pcur; - mem_heap_t* heap = mem_heap_create(512); - dtuple_t* tuple = ibuf_search_tuple_build(space, 0, heap); - ulint n_pages = 0; - - ut_ad(space < SRV_LOG_SPACE_FIRST_ID); - - ibuf_mtr_start(&mtr); - - /* Position the cursor on the first matching record. */ - - btr_pcur_open( - ibuf->index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, - &mtr); - - mem_heap_free(heap); - - ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); - - ulint sum_sizes = 0; - ulint pages[IBUF_MAX_N_PAGES_MERGED]; - ulint spaces[IBUF_MAX_N_PAGES_MERGED]; - ib_int64_t versions[IBUF_MAX_N_PAGES_MERGED]; - - if (page_is_empty(btr_pcur_get_page(&pcur))) { - /* If a B-tree page is empty, it must be the root page - and the whole B-tree must be empty. InnoDB does not - allow empty B-tree pages other than the root. */ - ut_ad(ibuf->empty); - ut_ad(page_get_space_id(btr_pcur_get_page(&pcur)) - == IBUF_SPACE_ID); - ut_ad(page_get_page_no(btr_pcur_get_page(&pcur)) - == FSP_IBUF_TREE_ROOT_PAGE_NO); - - } else { - - sum_sizes = ibuf_get_merge_pages( - &pcur, space, IBUF_MAX_N_PAGES_MERGED, - &pages[0], &spaces[0], &versions[0], &n_pages, - &mtr); - ib_logf(IB_LOG_LEVEL_INFO,"\n Size of pages merged %lu" - ,sum_sizes); - } - - ibuf_mtr_commit(&mtr); - - btr_pcur_close(&pcur); - - if (n_pages > 0) { - -#ifdef UNIV_DEBUG - ut_ad(n_pages <= UT_ARR_SIZE(pages)); - - for (ulint i = 0; i < n_pages; ++i) { - ut_ad(spaces[i] == space); - ut_ad(i == 0 || versions[i] == versions[i - 1]); - } -#endif /* UNIV_DEBUG */ - - buf_read_ibuf_merge_pages( - true, spaces, versions, pages, n_pages); - } - - return(n_pages); -} - -/** Contract the change buffer by reading pages to the buffer pool. -@param[out] n_pages number of pages merged -@param[in] sync whether the caller waits for -the issued reads to complete -@return a lower limit for the combined size in bytes of entries which -will be merged from ibuf trees to the pages read, 0 if ibuf is -empty */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ulint -ibuf_merge( -/*=======*/ - ulint* n_pages, /*!< out: number of pages to - which merged */ - bool sync) /*!< in: TRUE if the caller - wants to wait for the issued - read with the highest - tablespace address to complete */ -{ - *n_pages = 0; - - /* We perform a dirty read of ibuf->empty, without latching - the insert buffer root page. We trust this dirty read except - when a slow shutdown is being executed. During a slow - shutdown, the insert buffer merge must be completed. */ - - if (ibuf->empty && !srv_shutdown_state) { - return(0); -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG - } else if (ibuf_debug) { - return(0); -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - } else { - return(ibuf_merge_pages(n_pages, sync)); - } -} - -/** Contract the change buffer by reading pages to the buffer pool. -@param[in] sync whether the caller waits for -the issued reads to complete -@return a lower limit for the combined size in bytes of entries which -will be merged from ibuf trees to the pages read, 0 if ibuf is empty */ -static -ulint -ibuf_contract( -/*==========*/ - bool sync) /*!< in: TRUE if the caller wants to wait for the - issued read with the highest tablespace address - to complete */ -{ - ulint n_pages; - - return(ibuf_merge_pages(&n_pages, sync)); -} - -/** Contract the change buffer by reading pages to the buffer pool. -@param[in] full If true, do a full contraction based -on PCT_IO(100). If false, the size of contract batch is determined -based on the current size of the change buffer. -@return a lower limit for the combined size in bytes of entries which -will be merged from ibuf trees to the pages read, 0 if ibuf is -empty */ -UNIV_INTERN -ulint -ibuf_merge_in_background( -/*=====================*/ - bool full) /*!< in: TRUE if the caller wants to - do a full contract based on PCT_IO(100). - If FALSE then the size of contract - batch is determined based on the - current size of the ibuf tree. */ -{ - ulint sum_bytes = 0; - ulint sum_pages = 0; - ulint n_pag2; - ulint n_pages; - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG - if (srv_ibuf_disable_background_merge) { - return(0); - } -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - - if (full) { - /* Caller has requested a full batch */ - n_pages = PCT_IO(100); - } else { - /* By default we do a batch of 5% of the io_capacity */ - n_pages = PCT_IO(5); - - mutex_enter(&ibuf_mutex); - - /* If the ibuf->size is more than half the max_size - then we make more agreesive contraction. - +1 is to avoid division by zero. */ - if (ibuf->size > ibuf->max_size / 2) { - ulint diff = ibuf->size - ibuf->max_size / 2; - n_pages += PCT_IO((diff * 100) - / (ibuf->max_size + 1)); - } - - mutex_exit(&ibuf_mutex); - } - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG - if (ibuf_debug) { - return(0); - } -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - - while (sum_pages < n_pages) { - ulint n_bytes; - - n_bytes = ibuf_merge(&n_pag2, false); - - if (n_bytes == 0) { - return(sum_bytes); - } - - sum_bytes += n_bytes; - sum_pages += n_pag2; - - srv_inc_activity_count(true); - } - - return(sum_bytes); -} - -/*********************************************************************//** -Contract insert buffer trees after insert if they are too big. */ -UNIV_INLINE -void -ibuf_contract_after_insert( -/*=======================*/ - ulint entry_size) /*!< in: size of a record which was inserted - into an ibuf tree */ -{ - ibool sync; - ulint sum_sizes; - ulint size; - ulint max_size; - - /* Perform dirty reads of ibuf->size and ibuf->max_size, to - reduce ibuf_mutex contention. ibuf->max_size remains constant - after ibuf_init_at_db_start(), but ibuf->size should be - protected by ibuf_mutex. Given that ibuf->size fits in a - machine word, this should be OK; at worst we are doing some - excessive ibuf_contract() or occasionally skipping a - ibuf_contract(). */ - size = ibuf->size; - max_size = ibuf->max_size; - - if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { - return; - } - - sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); - - /* Contract at least entry_size many bytes */ - sum_sizes = 0; - size = 1; - - do { - - size = ibuf_contract(sync); - sum_sizes += size; - } while (size > 0 && sum_sizes < entry_size); -} - -/*********************************************************************//** -Determine if an insert buffer record has been encountered already. -@return TRUE if a new record, FALSE if possible duplicate */ -static -ibool -ibuf_get_volume_buffered_hash( -/*==========================*/ - const rec_t* rec, /*!< in: ibuf record in post-4.1 format */ - const byte* types, /*!< in: fields */ - const byte* data, /*!< in: start of user record data */ - ulint comp, /*!< in: 0=ROW_FORMAT=REDUNDANT, - nonzero=ROW_FORMAT=COMPACT */ - ulint* hash, /*!< in/out: hash array */ - ulint size) /*!< in: number of elements in hash array */ -{ - ulint len; - ulint fold; - ulint bitmask; - - len = ibuf_rec_get_size( - rec, types, - rec_get_n_fields_old(rec) - IBUF_REC_FIELD_USER, comp); - fold = ut_fold_binary(data, len); - - hash += (fold / (CHAR_BIT * sizeof *hash)) % size; - bitmask = static_cast<ulint>(1) << (fold % (CHAR_BIT * sizeof(*hash))); - - if (*hash & bitmask) { - - return(FALSE); - } - - /* We have not seen this record yet. Insert it. */ - *hash |= bitmask; - - return(TRUE); -} - -#ifdef UNIV_DEBUG -# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \ - ibuf_get_volume_buffered_count_func(mtr,rec,hash,size,n_recs) -#else /* UNIV_DEBUG */ -# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \ - ibuf_get_volume_buffered_count_func(rec,hash,size,n_recs) -#endif -/*********************************************************************//** -Update the estimate of the number of records on a page, and -get the space taken by merging the buffered record to the index page. -@return size of index record in bytes + an upper limit of the space -taken in the page directory */ -static -ulint -ibuf_get_volume_buffered_count_func( -/*================================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction owning rec */ -#endif /* UNIV_DEBUG */ - const rec_t* rec, /*!< in: insert buffer record */ - ulint* hash, /*!< in/out: hash array */ - ulint size, /*!< in: number of elements in hash array */ - lint* n_recs) /*!< in/out: estimated number of records - on the page that rec points to */ -{ - ulint len; - ibuf_op_t ibuf_op; - const byte* types; - ulint n_fields; - - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(ibuf_inside(mtr)); - - n_fields = rec_get_n_fields_old(rec); - ut_ad(n_fields > IBUF_REC_FIELD_USER); - n_fields -= IBUF_REC_FIELD_USER; - - rec_get_nth_field_offs_old(rec, 1, &len); - /* This function is only invoked when buffering new - operations. All pre-4.1 records should have been merged - when the database was started up. */ - ut_a(len == 1); - - if (rec_get_deleted_flag(rec, 0)) { - /* This record has been merged already, - but apparently the system crashed before - the change was discarded from the buffer. - Pretend that the record does not exist. */ - return(0); - } - - types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); - - switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE, - IBUF_REC_INFO_SIZE)) { - default: - ut_error; - case 0: - /* This ROW_TYPE=REDUNDANT record does not include an - operation counter. Exclude it from the *n_recs, - because deletes cannot be buffered if there are - old-style inserts buffered for the page. */ - - len = ibuf_rec_get_size(rec, types, n_fields, 0); - - return(len - + rec_get_converted_extra_size(len, n_fields, 0) - + page_dir_calc_reserved_space(1)); - case 1: - /* This ROW_TYPE=COMPACT record does not include an - operation counter. Exclude it from the *n_recs, - because deletes cannot be buffered if there are - old-style inserts buffered for the page. */ - goto get_volume_comp; - - case IBUF_REC_INFO_SIZE: - ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE]; - break; - } - - switch (ibuf_op) { - case IBUF_OP_INSERT: - /* Inserts can be done by updating a delete-marked record. - Because delete-mark and insert operations can be pointing to - the same records, we must not count duplicates. */ - case IBUF_OP_DELETE_MARK: - /* There must be a record to delete-mark. - See if this record has been already buffered. */ - if (n_recs && ibuf_get_volume_buffered_hash( - rec, types + IBUF_REC_INFO_SIZE, - types + len, - types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT, - hash, size)) { - (*n_recs)++; - } - - if (ibuf_op == IBUF_OP_DELETE_MARK) { - /* Setting the delete-mark flag does not - affect the available space on the page. */ - return(0); - } - break; - case IBUF_OP_DELETE: - /* A record will be removed from the page. */ - if (n_recs) { - (*n_recs)--; - } - /* While deleting a record actually frees up space, - we have to play it safe and pretend that it takes no - additional space (the record might not exist, etc.). */ - return(0); - default: - ut_error; - } - - ut_ad(ibuf_op == IBUF_OP_INSERT); - -get_volume_comp: - { - dtuple_t* entry; - ulint volume; - dict_index_t* dummy_index; - mem_heap_t* heap = mem_heap_create(500); - - entry = ibuf_build_entry_from_ibuf_rec( - mtr, rec, heap, &dummy_index); - - volume = rec_get_converted_size(dummy_index, entry, 0); - - ibuf_dummy_index_free(dummy_index); - mem_heap_free(heap); - - return(volume + page_dir_calc_reserved_space(1)); - } -} - -/*********************************************************************//** -Gets an upper limit for the combined size of entries buffered in the insert -buffer for a given page. -@return upper limit for the volume of buffered inserts for the index -page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span -several pages in the insert buffer */ -static -ulint -ibuf_get_volume_buffered( -/*=====================*/ - const btr_pcur_t*pcur, /*!< in: pcur positioned at a place in an - insert buffer tree where we would insert an - entry for the index page whose number is - page_no, latch mode has to be BTR_MODIFY_PREV - or BTR_MODIFY_TREE */ - ulint space, /*!< in: space id */ - ulint page_no,/*!< in: page number of an index page */ - lint* n_recs, /*!< in/out: minimum number of records on the - page after the buffered changes have been - applied, or NULL to disable the counting */ - mtr_t* mtr) /*!< in: mini-transaction of pcur */ -{ - ulint volume; - const rec_t* rec; - const page_t* page; - ulint prev_page_no; - const page_t* prev_page; - ulint next_page_no; - const page_t* next_page; - /* bitmap of buffered recs */ - ulint hash_bitmap[128 / sizeof(ulint)]; - - ut_ad((pcur->latch_mode == BTR_MODIFY_PREV) - || (pcur->latch_mode == BTR_MODIFY_TREE)); - - /* Count the volume of inserts earlier in the alphabetical order than - pcur */ - - volume = 0; - - if (n_recs) { - memset(hash_bitmap, 0, sizeof hash_bitmap); - } - - rec = btr_pcur_get_rec(pcur); - page = page_align(rec); - ut_ad(page_validate(page, ibuf->index)); - - if (page_rec_is_supremum(rec)) { - rec = page_rec_get_prev_const(rec); - } - - for (; !page_rec_is_infimum(rec); - rec = page_rec_get_prev_const(rec)) { - ut_ad(page_align(rec) == page); - - if (page_no != ibuf_rec_get_page_no(mtr, rec) - || space != ibuf_rec_get_space(mtr, rec)) { - - goto count_later; - } - - volume += ibuf_get_volume_buffered_count( - mtr, rec, - hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); - } - - /* Look at the previous page */ - - prev_page_no = btr_page_get_prev(page, mtr); - - if (prev_page_no == FIL_NULL) { - - goto count_later; - } - - { - buf_block_t* block; - - block = buf_page_get( - IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, - mtr); - - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); - - - prev_page = buf_block_get_frame(block); - ut_ad(page_validate(prev_page, ibuf->index)); - } - -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - rec = page_get_supremum_rec(prev_page); - rec = page_rec_get_prev_const(rec); - - for (;; rec = page_rec_get_prev_const(rec)) { - ut_ad(page_align(rec) == prev_page); - - if (page_rec_is_infimum(rec)) { - - /* We cannot go to yet a previous page, because we - do not have the x-latch on it, and cannot acquire one - because of the latching order: we have to give up */ - - return(UNIV_PAGE_SIZE); - } - - if (page_no != ibuf_rec_get_page_no(mtr, rec) - || space != ibuf_rec_get_space(mtr, rec)) { - - goto count_later; - } - - volume += ibuf_get_volume_buffered_count( - mtr, rec, - hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); - } - -count_later: - rec = btr_pcur_get_rec(pcur); - - if (!page_rec_is_supremum(rec)) { - rec = page_rec_get_next_const(rec); - } - - for (; !page_rec_is_supremum(rec); - rec = page_rec_get_next_const(rec)) { - if (page_no != ibuf_rec_get_page_no(mtr, rec) - || space != ibuf_rec_get_space(mtr, rec)) { - - return(volume); - } - - volume += ibuf_get_volume_buffered_count( - mtr, rec, - hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); - } - - /* Look at the next page */ - - next_page_no = btr_page_get_next(page, mtr); - - if (next_page_no == FIL_NULL) { - - return(volume); - } - - { - buf_block_t* block; - - block = buf_page_get( - IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, - mtr); - - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); - - - next_page = buf_block_get_frame(block); - ut_ad(page_validate(next_page, ibuf->index)); - } - -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - rec = page_get_infimum_rec(next_page); - rec = page_rec_get_next_const(rec); - - for (;; rec = page_rec_get_next_const(rec)) { - ut_ad(page_align(rec) == next_page); - - if (page_rec_is_supremum(rec)) { - - /* We give up */ - - return(UNIV_PAGE_SIZE); - } - - if (page_no != ibuf_rec_get_page_no(mtr, rec) - || space != ibuf_rec_get_space(mtr, rec)) { - - return(volume); - } - - volume += ibuf_get_volume_buffered_count( - mtr, rec, - hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); - } -} - -/*********************************************************************//** -Reads the biggest tablespace id from the high end of the insert buffer -tree and updates the counter in fil_system. */ -UNIV_INTERN -void -ibuf_update_max_tablespace_id(void) -/*===============================*/ -{ - ulint max_space_id; - const rec_t* rec; - const byte* field; - ulint len; - btr_pcur_t pcur; - mtr_t mtr; - - ut_a(!dict_table_is_comp(ibuf->index->table)); - - ibuf_mtr_start(&mtr); - - btr_pcur_open_at_index_side( - false, ibuf->index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - - ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); - - btr_pcur_move_to_prev(&pcur, &mtr); - - if (btr_pcur_is_before_first_on_page(&pcur)) { - /* The tree is empty */ - - max_space_id = 0; - } else { - rec = btr_pcur_get_rec(&pcur); - - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); - - ut_a(len == 4); - - max_space_id = mach_read_from_4(field); - } - - ibuf_mtr_commit(&mtr); - - /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */ - - fil_set_max_space_id_if_bigger(max_space_id); -} - -#ifdef UNIV_DEBUG -# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \ - ibuf_get_entry_counter_low_func(mtr,rec,space,page_no) -#else /* UNIV_DEBUG */ -# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \ - ibuf_get_entry_counter_low_func(rec,space,page_no) -#endif -/****************************************************************//** -Helper function for ibuf_get_entry_counter_func. Checks if rec is for -(space, page_no), and if so, reads counter value from it and returns -that + 1. -@retval ULINT_UNDEFINED if the record does not contain any counter -@retval 0 if the record is not for (space, page_no) -@retval 1 + previous counter value, otherwise */ -static -ulint -ibuf_get_entry_counter_low_func( -/*============================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction of rec */ -#endif /* UNIV_DEBUG */ - const rec_t* rec, /*!< in: insert buffer record */ - ulint space, /*!< in: space id */ - ulint page_no) /*!< in: page number */ -{ - ulint counter; - const byte* field; - ulint len; - - ut_ad(ibuf_inside(mtr)); - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); - ut_ad(rec_get_n_fields_old(rec) > 2); - - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); - - ut_a(len == 1); - - /* Check the tablespace identifier. */ - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); - - ut_a(len == 4); - - if (mach_read_from_4(field) != space) { - - return(0); - } - - /* Check the page offset. */ - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len); - ut_a(len == 4); - - if (mach_read_from_4(field) != page_no) { - - return(0); - } - - /* Check if the record contains a counter field. */ - field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); - - switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { - default: - ut_error; - case 0: /* ROW_FORMAT=REDUNDANT */ - case 1: /* ROW_FORMAT=COMPACT */ - return(ULINT_UNDEFINED); - - case IBUF_REC_INFO_SIZE: - counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER); - ut_a(counter < 0xFFFF); - return(counter + 1); - } -} - -#ifdef UNIV_DEBUG -# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \ - ibuf_get_entry_counter_func(space,page_no,rec,mtr,exact_leaf) -#else /* UNIV_DEBUG */ -# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \ - ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf) -#endif - -/****************************************************************//** -Calculate the counter field for an entry based on the current -last record in ibuf for (space, page_no). -@return the counter field, or ULINT_UNDEFINED -if we should abort this insertion to ibuf */ -static -ulint -ibuf_get_entry_counter_func( -/*========================*/ - ulint space, /*!< in: space id of entry */ - ulint page_no, /*!< in: page number of entry */ - const rec_t* rec, /*!< in: the record preceding the - insertion point */ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in: mini-transaction */ -#endif /* UNIV_DEBUG */ - ibool only_leaf) /*!< in: TRUE if this is the only - leaf page that can contain entries - for (space,page_no), that is, there - was no exact match for (space,page_no) - in the node pointer */ -{ - ut_ad(ibuf_inside(mtr)); - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)); - ut_ad(page_validate(page_align(rec), ibuf->index)); - - if (page_rec_is_supremum(rec)) { - /* This is just for safety. The record should be a - page infimum or a user record. */ - ut_ad(0); - return(ULINT_UNDEFINED); - } else if (!page_rec_is_infimum(rec)) { - return(ibuf_get_entry_counter_low(mtr, rec, space, page_no)); - } else if (only_leaf - || fil_page_get_prev(page_align(rec)) == FIL_NULL) { - /* The parent node pointer did not contain the - searched for (space, page_no), which means that the - search ended on the correct page regardless of the - counter value, and since we're at the infimum record, - there are no existing records. */ - return(0); - } else { - /* We used to read the previous page here. It would - break the latching order, because the caller has - buffer-fixed an insert buffer bitmap page. */ - return(ULINT_UNDEFINED); - } -} - -/*********************************************************************//** -Buffer an operation in the insert/delete buffer, instead of doing it -directly to the disk page, if this is possible. -@return DB_SUCCESS, DB_STRONG_FAIL or other error */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -ibuf_insert_low( -/*============*/ - ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */ - ibuf_op_t op, /*!< in: operation type */ - ibool no_counter, - /*!< in: TRUE=use 5.0.3 format; - FALSE=allow delete buffering */ - const dtuple_t* entry, /*!< in: index entry to insert */ - ulint entry_size, - /*!< in: rec_get_converted_size(index, entry) */ - dict_index_t* index, /*!< in: index where to insert; must not be - unique or clustered */ - ulint space, /*!< in: space id where to insert */ - ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint page_no,/*!< in: page number where to insert */ - que_thr_t* thr) /*!< in: query thread */ -{ - big_rec_t* dummy_big_rec; - btr_pcur_t pcur; - btr_cur_t* cursor; - dtuple_t* ibuf_entry; - mem_heap_t* offsets_heap = NULL; - mem_heap_t* heap; - ulint* offsets = NULL; - ulint buffered; - lint min_n_recs; - rec_t* ins_rec; - ibool old_bit_value; - page_t* bitmap_page; - buf_block_t* block; - page_t* root; - dberr_t err; - ibool do_merge; - ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; - ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED]; - ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; - ulint n_stored; - mtr_t mtr; - mtr_t bitmap_mtr; - - ut_a(!dict_index_is_clust(index)); - ut_ad(dtuple_check_typed(entry)); - ut_ad(ut_is_2pow(zip_size)); - ut_ad(!no_counter || op == IBUF_OP_INSERT); - ut_a(op < IBUF_OP_COUNT); - - ut_ad(!(thr_get_trx(thr)->fake_changes)); - - do_merge = FALSE; - - /* Perform dirty reads of ibuf->size and ibuf->max_size, to - reduce ibuf_mutex contention. Given that ibuf->max_size and - ibuf->size fit in a machine word, this should be OK; at worst - we are doing some excessive ibuf_contract() or occasionally - skipping an ibuf_contract(). */ - if (ibuf->max_size == 0) { - return(DB_STRONG_FAIL); - } - - if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) { - /* Insert buffer is now too big, contract it but do not try - to insert */ - - -#ifdef UNIV_IBUF_DEBUG - fputs("Ibuf too big\n", stderr); -#endif - ibuf_contract(true); - - return(DB_STRONG_FAIL); - } - - heap = mem_heap_create(1024); - - /* Build the entry which contains the space id and the page number - as the first fields and the type information for other fields, and - which will be inserted to the insert buffer. Using a counter value - of 0xFFFF we find the last record for (space, page_no), from which - we can then read the counter value N and use N + 1 in the record we - insert. (We patch the ibuf_entry's counter field to the correct - value just before actually inserting the entry.) */ - - ibuf_entry = ibuf_entry_build( - op, index, entry, space, page_no, - no_counter ? ULINT_UNDEFINED : 0xFFFF, heap); - - /* Open a cursor to the insert buffer tree to calculate if we can add - the new entry to it without exceeding the free space limit for the - page. */ - - if (mode == BTR_MODIFY_TREE) { - for (;;) { - mutex_enter(&ibuf_pessimistic_insert_mutex); - mutex_enter(&ibuf_mutex); - - if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) { - - break; - } - - mutex_exit(&ibuf_mutex); - mutex_exit(&ibuf_pessimistic_insert_mutex); - - if (UNIV_UNLIKELY(!ibuf_add_free_page())) { - - mem_heap_free(heap); - return(DB_STRONG_FAIL); - } - } - } - - ibuf_mtr_start(&mtr); - - btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); - ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); - - /* Find out the volume of already buffered inserts for the same index - page */ - min_n_recs = 0; - buffered = ibuf_get_volume_buffered(&pcur, space, page_no, - op == IBUF_OP_DELETE - ? &min_n_recs - : NULL, &mtr); - - if (op == IBUF_OP_DELETE - && (min_n_recs < 2 - || buf_pool_watch_occurred(space, page_no))) { - /* The page could become empty after the record is - deleted, or the page has been read in to the buffer - pool. Refuse to buffer the operation. */ - - /* The buffer pool watch is needed for IBUF_OP_DELETE - because of latching order considerations. We can - check buf_pool_watch_occurred() only after latching - the insert buffer B-tree pages that contain buffered - changes for the page. We never buffer IBUF_OP_DELETE, - unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have - been previously buffered for the page. Because there - are buffered operations for the page, the insert - buffer B-tree page latches held by mtr will guarantee - that no changes for the user page will be merged - before mtr_commit(&mtr). We must not mtr_commit(&mtr) - until after the IBUF_OP_DELETE has been buffered. */ - -fail_exit: - if (mode == BTR_MODIFY_TREE) { - mutex_exit(&ibuf_mutex); - mutex_exit(&ibuf_pessimistic_insert_mutex); - } - - err = DB_STRONG_FAIL; - goto func_exit; - } - - /* After this point, the page could still be loaded to the - buffer pool, but we do not have to care about it, since we are - holding a latch on the insert buffer leaf page that contains - buffered changes for (space, page_no). If the page enters the - buffer pool, buf_page_io_complete() for (space, page_no) will - have to acquire a latch on the same insert buffer leaf page, - which it cannot do until we have buffered the IBUF_OP_DELETE - and done mtr_commit(&mtr) to release the latch. */ - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a((buffered == 0) || ibuf_count_get(space, page_no)); -#endif - ibuf_mtr_start(&bitmap_mtr); - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, - zip_size, &bitmap_mtr); - - /* We check if the index page is suitable for buffered entries */ - - if (buf_page_peek(space, page_no) - || lock_rec_expl_exist_on_page(space, page_no)) { - - ibuf_mtr_commit(&bitmap_mtr); - goto fail_exit; - } - - if (op == IBUF_OP_INSERT) { - ulint bits = ibuf_bitmap_page_get_bits( - bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE, - &bitmap_mtr); - - if (buffered + entry_size + page_dir_calc_reserved_space(1) - > ibuf_index_page_calc_free_from_bits(zip_size, bits)) { - /* Release the bitmap page latch early. */ - ibuf_mtr_commit(&bitmap_mtr); - - /* It may not fit */ - do_merge = TRUE; - - ibuf_get_merge_page_nos(FALSE, - btr_pcur_get_rec(&pcur), &mtr, - space_ids, space_versions, - page_nos, &n_stored); - - goto fail_exit; - } - } - - if (!no_counter) { - /* Patch correct counter value to the entry to - insert. This can change the insert position, which can - result in the need to abort in some cases. */ - ulint counter = ibuf_get_entry_counter( - space, page_no, btr_pcur_get_rec(&pcur), &mtr, - btr_pcur_get_btr_cur(&pcur)->low_match - < IBUF_REC_FIELD_METADATA); - dfield_t* field; - - if (counter == ULINT_UNDEFINED) { - ibuf_mtr_commit(&bitmap_mtr); - goto fail_exit; - } - - field = dtuple_get_nth_field( - ibuf_entry, IBUF_REC_FIELD_METADATA); - mach_write_to_2( - (byte*) dfield_get_data(field) - + IBUF_REC_OFFSET_COUNTER, counter); - } - - /* Set the bitmap bit denoting that the insert buffer contains - buffered entries for this index page, if the bit is not set yet */ - - old_bit_value = ibuf_bitmap_page_get_bits( - bitmap_page, page_no, zip_size, - IBUF_BITMAP_BUFFERED, &bitmap_mtr); - - if (!old_bit_value) { - ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size, - IBUF_BITMAP_BUFFERED, TRUE, - &bitmap_mtr); - } - - ibuf_mtr_commit(&bitmap_mtr); - - cursor = btr_pcur_get_btr_cur(&pcur); - - if (mode == BTR_MODIFY_PREV) { - err = btr_cur_optimistic_insert( - BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, - cursor, &offsets, &offsets_heap, - ibuf_entry, &ins_rec, - &dummy_big_rec, 0, thr, &mtr); - block = btr_cur_get_block(cursor); - ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); - - /* If this is the root page, update ibuf->empty. */ - if (UNIV_UNLIKELY(buf_block_get_page_no(block) - == FSP_IBUF_TREE_ROOT_PAGE_NO)) { - const page_t* root = buf_block_get_frame(block); - - ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); - ut_ad(page_get_page_no(root) - == FSP_IBUF_TREE_ROOT_PAGE_NO); - - ibuf->empty = page_is_empty(root); - } - } else { - ut_ad(mode == BTR_MODIFY_TREE); - - /* We acquire an x-latch to the root page before the insert, - because a pessimistic insert releases the tree x-latch, - which would cause the x-latching of the root after that to - break the latching order. */ - - root = ibuf_tree_root_get(&mtr); - - err = btr_cur_optimistic_insert( - BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, - cursor, &offsets, &offsets_heap, - ibuf_entry, &ins_rec, - &dummy_big_rec, 0, thr, &mtr); - - if (err == DB_FAIL) { - err = btr_cur_pessimistic_insert( - BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, - cursor, &offsets, &offsets_heap, - ibuf_entry, &ins_rec, - &dummy_big_rec, 0, thr, &mtr); - } - - mutex_exit(&ibuf_pessimistic_insert_mutex); - ibuf_size_update(root, &mtr); - mutex_exit(&ibuf_mutex); - ibuf->empty = page_is_empty(root); - - block = btr_cur_get_block(cursor); - ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); - } - - if (offsets_heap) { - mem_heap_free(offsets_heap); - } - - if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { - /* Update the page max trx id field */ - page_update_max_trx_id(block, NULL, - thr_get_trx(thr)->id, &mtr); - } - -func_exit: -#ifdef UNIV_IBUF_COUNT_DEBUG - if (err == DB_SUCCESS) { - fprintf(stderr, - "Incrementing ibuf count of space %lu page %lu\n" - "from %lu by 1\n", space, page_no, - ibuf_count_get(space, page_no)); - - ibuf_count_set(space, page_no, - ibuf_count_get(space, page_no) + 1); - } -#endif - - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - - mem_heap_free(heap); - - if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) { - ibuf_contract_after_insert(entry_size); - } - - if (do_merge) { -#ifdef UNIV_IBUF_DEBUG - ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED); -#endif - buf_read_ibuf_merge_pages(false, space_ids, space_versions, - page_nos, n_stored); - } - - return(err); -} - -/*********************************************************************//** -Buffer an operation in the insert/delete buffer, instead of doing it -directly to the disk page, if this is possible. Does not do it if the index -is clustered or unique. -@return TRUE if success */ -UNIV_INTERN -ibool -ibuf_insert( -/*========*/ - ibuf_op_t op, /*!< in: operation type */ - const dtuple_t* entry, /*!< in: index entry to insert */ - dict_index_t* index, /*!< in: index where to insert */ - ulint space, /*!< in: space id where to insert */ - ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint page_no,/*!< in: page number where to insert */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - ulint entry_size; - ibool no_counter; - /* Read the settable global variable ibuf_use only once in - this function, so that we will have a consistent view of it. */ - ibuf_use_t use = ibuf_use; - DBUG_ENTER("ibuf_insert"); - - DBUG_PRINT("ibuf", ("op: %d, space: %ld, page_no: %ld", - op, space, page_no)); - - ut_ad(dtuple_check_typed(entry)); - ut_ad(ut_is_2pow(zip_size)); - - ut_a(!dict_index_is_clust(index)); - - no_counter = use <= IBUF_USE_INSERT; - - switch (op) { - case IBUF_OP_INSERT: - switch (use) { - case IBUF_USE_NONE: - case IBUF_USE_DELETE: - case IBUF_USE_DELETE_MARK: - DBUG_RETURN(FALSE); - case IBUF_USE_INSERT: - case IBUF_USE_INSERT_DELETE_MARK: - case IBUF_USE_ALL: - goto check_watch; - case IBUF_USE_COUNT: - break; - } - break; - case IBUF_OP_DELETE_MARK: - switch (use) { - case IBUF_USE_NONE: - case IBUF_USE_INSERT: - DBUG_RETURN(FALSE); - case IBUF_USE_DELETE_MARK: - case IBUF_USE_DELETE: - case IBUF_USE_INSERT_DELETE_MARK: - case IBUF_USE_ALL: - ut_ad(!no_counter); - goto check_watch; - case IBUF_USE_COUNT: - break; - } - break; - case IBUF_OP_DELETE: - switch (use) { - case IBUF_USE_NONE: - case IBUF_USE_INSERT: - case IBUF_USE_INSERT_DELETE_MARK: - DBUG_RETURN(FALSE); - case IBUF_USE_DELETE_MARK: - case IBUF_USE_DELETE: - case IBUF_USE_ALL: - ut_ad(!no_counter); - goto skip_watch; - case IBUF_USE_COUNT: - break; - } - break; - case IBUF_OP_COUNT: - break; - } - - /* unknown op or use */ - ut_error; - -check_watch: - /* If a thread attempts to buffer an insert on a page while a - purge is in progress on the same page, the purge must not be - buffered, because it could remove a record that was - re-inserted later. For simplicity, we block the buffering of - all operations on a page that has a purge pending. - - We do not check this in the IBUF_OP_DELETE case, because that - would always trigger the buffer pool watch during purge and - thus prevent the buffering of delete operations. We assume - that the issuer of IBUF_OP_DELETE has called - buf_pool_watch_set(space, page_no). */ - - { - buf_page_t* bpage; - buf_pool_t* buf_pool = buf_pool_get(space, page_no); - bpage = buf_page_get_also_watch(buf_pool, space, page_no); - - if (UNIV_LIKELY_NULL(bpage)) { - /* A buffer pool watch has been set or the - page has been read into the buffer pool. - Do not buffer the request. If a purge operation - is being buffered, have this request executed - directly on the page in the buffer pool after the - buffered entries for this page have been merged. */ - DBUG_RETURN(FALSE); - } - } - -skip_watch: - entry_size = rec_get_converted_size(index, entry, 0); - - if (entry_size - >= page_get_free_space_of_empty(dict_table_is_comp(index->table)) - / 2) { - - DBUG_RETURN(FALSE); - } - - err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter, - entry, entry_size, - index, space, zip_size, page_no, thr); - if (err == DB_FAIL) { - err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter, - entry, entry_size, - index, space, zip_size, page_no, thr); - } - - if (err == DB_SUCCESS) { -#ifdef UNIV_IBUF_DEBUG - /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n", - page_no, index->name); */ -#endif - DBUG_RETURN(TRUE); - - } else { - ut_a(err == DB_STRONG_FAIL || err == DB_TOO_BIG_RECORD); - - DBUG_RETURN(FALSE); - } -} - -/********************************************************************//** -During merge, inserts to an index page a secondary index entry extracted -from the insert buffer. -@return newly inserted record */ -static MY_ATTRIBUTE((nonnull)) -rec_t* -ibuf_insert_to_index_page_low( -/*==========================*/ - const dtuple_t* entry, /*!< in: buffered entry to insert */ - buf_block_t* block, /*!< in/out: index page where the buffered - entry should be placed */ - dict_index_t* index, /*!< in: record descriptor */ - ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t* heap, /*!< in/out: memory heap */ - mtr_t* mtr, /*!< in/out: mtr */ - page_cur_t* page_cur)/*!< in/out: cursor positioned on the record - after which to insert the buffered entry */ -{ - const page_t* page; - ulint space; - ulint page_no; - ulint zip_size; - const page_t* bitmap_page; - ulint old_bits; - rec_t* rec; - DBUG_ENTER("ibuf_insert_to_index_page_low"); - - rec = page_cur_tuple_insert(page_cur, entry, index, - offsets, &heap, 0, mtr); - if (rec != NULL) { - DBUG_RETURN(rec); - } - - /* Page reorganization or recompression should already have - been attempted by page_cur_tuple_insert(). Besides, per - ibuf_index_page_calc_free_zip() the page should not have been - recompressed or reorganized. */ - ut_ad(!buf_block_get_page_zip(block)); - - /* If the record did not fit, reorganize */ - - btr_page_reorganize(page_cur, index, mtr); - - /* This time the record must fit */ - - rec = page_cur_tuple_insert(page_cur, entry, index, - offsets, &heap, 0, mtr); - if (rec != NULL) { - DBUG_RETURN(rec); - } - - page = buf_block_get_frame(block); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: Insert buffer insert fails;" - " page free %lu, dtuple size %lu\n", - (ulong) page_get_max_insert_size(page, 1), - (ulong) rec_get_converted_size(index, entry, 0)); - fputs("InnoDB: Cannot insert index record ", stderr); - dtuple_print(stderr, entry); - fputs("\nInnoDB: The table where this index record belongs\n" - "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n" - "InnoDB: that table.\n", stderr); - - space = page_get_space_id(page); - zip_size = buf_block_get_zip_size(block); - page_no = page_get_page_no(page); - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr); - old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size, - IBUF_BITMAP_FREE, mtr); - - fprintf(stderr, - "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n", - (ulong) space, (ulong) page_no, - (ulong) zip_size, (ulong) old_bits); - - fputs("InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - ut_ad(0); - DBUG_RETURN(NULL); -} - -/************************************************************************ -During merge, inserts to an index page a secondary index entry extracted -from the insert buffer. */ -static -void -ibuf_insert_to_index_page( -/*======================*/ - const dtuple_t* entry, /*!< in: buffered entry to insert */ - buf_block_t* block, /*!< in/out: index page where the buffered entry - should be placed */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_cur_t page_cur; - ulint low_match; - page_t* page = buf_block_get_frame(block); - rec_t* rec; - ulint* offsets; - mem_heap_t* heap; - - DBUG_ENTER("ibuf_insert_to_index_page"); - - DBUG_PRINT("ibuf", ("page_no: %ld", buf_block_get_page_no(block))); - DBUG_PRINT("ibuf", ("index name: %s", index->name)); - DBUG_PRINT("ibuf", ("online status: %d", - dict_index_get_online_status(index))); - - ut_ad(ibuf_inside(mtr)); - ut_ad(dtuple_check_typed(entry)); - ut_ad(!buf_block_align(page)->index); - - if (UNIV_UNLIKELY(dict_table_is_comp(index->table) - != (ibool)!!page_is_comp(page))) { - fputs("InnoDB: Trying to insert a record from" - " the insert buffer to an index page\n" - "InnoDB: but the 'compact' flag does not match!\n", - stderr); - goto dump; - } - - rec = page_rec_get_next(page_get_infimum_rec(page)); - - if (page_rec_is_supremum(rec)) { - fputs("InnoDB: Trying to insert a record from" - " the insert buffer to an index page\n" - "InnoDB: but the index page is empty!\n", - stderr); - goto dump; - } - - if (UNIV_UNLIKELY(rec_get_n_fields(rec, index) - != dtuple_get_n_fields(entry))) { - fputs("InnoDB: Trying to insert a record from" - " the insert buffer to an index page\n" - "InnoDB: but the number of fields does not match!\n", - stderr); -dump: - buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); - - dtuple_print(stderr, entry); - ut_ad(0); - - fputs("InnoDB: The table where where" - " this index record belongs\n" - "InnoDB: is now probably corrupt." - " Please run CHECK TABLE on\n" - "InnoDB: your tables.\n" - "InnoDB: Submit a detailed bug report to" - " http://bugs.mysql.com!\n", stderr); - - DBUG_VOID_RETURN; - } - - low_match = page_cur_search(block, index, entry, - PAGE_CUR_LE, &page_cur); - - heap = mem_heap_create( - sizeof(upd_t) - + REC_OFFS_HEADER_SIZE * sizeof(*offsets) - + dtuple_get_n_fields(entry) - * (sizeof(upd_field_t) + sizeof *offsets)); - - if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) { - upd_t* update; - page_zip_des_t* page_zip; - - rec = page_cur_get_rec(&page_cur); - - /* This is based on - row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */ - ut_ad(rec_get_deleted_flag(rec, page_is_comp(page))); - - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, - &heap); - update = row_upd_build_sec_rec_difference_binary( - rec, index, offsets, entry, heap); - - page_zip = buf_block_get_page_zip(block); - - if (update->n_fields == 0) { - /* The records only differ in the delete-mark. - Clear the delete-mark, like we did before - Bug #56680 was fixed. */ - btr_cur_set_deleted_flag_for_ibuf( - rec, page_zip, FALSE, mtr); - goto updated_in_place; - } - - /* Copy the info bits. Clear the delete-mark. */ - update->info_bits = rec_get_info_bits(rec, page_is_comp(page)); - update->info_bits &= ~REC_INFO_DELETED_FLAG; - - /* We cannot invoke btr_cur_optimistic_update() here, - because we do not have a btr_cur_t or que_thr_t, - as the insert buffer merge occurs at a very low level. */ - if (!row_upd_changes_field_size_or_external(index, offsets, - update) - && (!page_zip || btr_cur_update_alloc_zip( - page_zip, &page_cur, index, offsets, - rec_offs_size(offsets), false, mtr, NULL))) { - /* This is the easy case. Do something similar - to btr_cur_update_in_place(). */ - rec = page_cur_get_rec(&page_cur); - row_upd_rec_in_place(rec, index, offsets, - update, page_zip); - - /* Log the update in place operation. During recovery - MLOG_COMP_REC_UPDATE_IN_PLACE/MLOG_REC_UPDATE_IN_PLACE - expects trx_id, roll_ptr for secondary indexes. So we - just write dummy trx_id(0), roll_ptr(0) */ - btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec, - index, update, 0, 0, mtr); - DBUG_EXECUTE_IF( - "crash_after_log_ibuf_upd_inplace", - log_buffer_flush_to_disk(); - ib_logf(IB_LOG_LEVEL_INFO, - "Wrote log record for ibuf update in " - "place operation"); - DBUG_SUICIDE(); - ); - - goto updated_in_place; - } - - /* btr_cur_update_alloc_zip() may have changed this */ - rec = page_cur_get_rec(&page_cur); - - /* A collation may identify values that differ in - storage length. - Some examples (1 or 2 bytes): - utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I - utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S - utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS - - latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S - - Examples of a character (3-byte UTF-8 sequence) - identified with 2 or 4 characters (1-byte UTF-8 sequences): - - utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO - utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN - */ - - /* Delete the different-length record, and insert the - buffered one. */ - - lock_rec_store_on_page_infimum(block, rec); - page_cur_delete_rec(&page_cur, index, offsets, mtr); - page_cur_move_to_prev(&page_cur); - rec = ibuf_insert_to_index_page_low(entry, block, index, - &offsets, heap, mtr, - &page_cur); - - ut_ad(!cmp_dtuple_rec(entry, rec, offsets)); - lock_rec_restore_from_page_infimum(block, rec, block); - } else { - offsets = NULL; - ibuf_insert_to_index_page_low(entry, block, index, - &offsets, heap, mtr, - &page_cur); - } -updated_in_place: - mem_heap_free(heap); - - DBUG_VOID_RETURN; -} - -/****************************************************************//** -During merge, sets the delete mark on a record for a secondary index -entry. */ -static -void -ibuf_set_del_mark( -/*==============*/ - const dtuple_t* entry, /*!< in: entry */ - buf_block_t* block, /*!< in/out: block */ - const dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_cur_t page_cur; - ulint low_match; - - ut_ad(ibuf_inside(mtr)); - ut_ad(dtuple_check_typed(entry)); - - low_match = page_cur_search( - block, index, entry, PAGE_CUR_LE, &page_cur); - - if (low_match == dtuple_get_n_fields(entry)) { - rec_t* rec; - page_zip_des_t* page_zip; - - rec = page_cur_get_rec(&page_cur); - page_zip = page_cur_get_page_zip(&page_cur); - - /* Delete mark the old index record. According to a - comment in row_upd_sec_index_entry(), it can already - have been delete marked if a lock wait occurred in - row_ins_sec_index_entry() in a previous invocation of - row_upd_sec_index_entry(). */ - - if (UNIV_LIKELY - (!rec_get_deleted_flag( - rec, dict_table_is_comp(index->table)))) { - btr_cur_set_deleted_flag_for_ibuf(rec, page_zip, - TRUE, mtr); - } - } else { - const page_t* page - = page_cur_get_page(&page_cur); - const buf_block_t* block - = page_cur_get_block(&page_cur); - - ut_print_timestamp(stderr); - fputs(" InnoDB: unable to find a record to delete-mark\n", - stderr); - fputs("InnoDB: tuple ", stderr); - dtuple_print(stderr, entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, page_cur_get_rec(&page_cur), index); - fprintf(stderr, "\nspace %u offset %u" - " (%u records, index id %llu)\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - (unsigned) buf_block_get_space(block), - (unsigned) buf_block_get_page_no(block), - (unsigned) page_get_n_recs(page), - (ulonglong) btr_page_get_index_id(page)); - ut_ad(0); - } -} - -/****************************************************************//** -During merge, delete a record for a secondary index entry. */ -static -void -ibuf_delete( -/*========*/ - const dtuple_t* entry, /*!< in: entry */ - buf_block_t* block, /*!< in/out: block */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in/out: mtr; must be committed - before latching any further pages */ -{ - page_cur_t page_cur; - ulint low_match; - - ut_ad(ibuf_inside(mtr)); - ut_ad(dtuple_check_typed(entry)); - - low_match = page_cur_search( - block, index, entry, PAGE_CUR_LE, &page_cur); - - if (low_match == dtuple_get_n_fields(entry)) { - page_zip_des_t* page_zip= buf_block_get_page_zip(block); - page_t* page = buf_block_get_frame(block); - rec_t* rec = page_cur_get_rec(&page_cur); - - /* TODO: the below should probably be a separate function, - it's a bastardized version of btr_cur_optimistic_delete. */ - - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - mem_heap_t* heap = NULL; - ulint max_ins_size = 0; - - rec_offs_init(offsets_); - - offsets = rec_get_offsets( - rec, index, offsets, ULINT_UNDEFINED, &heap); - - if (page_get_n_recs(page) <= 1 - || !(REC_INFO_DELETED_FLAG - & rec_get_info_bits(rec, page_is_comp(page)))) { - /* Refuse to purge the last record or a - record that has not been marked for deletion. */ - ut_print_timestamp(stderr); - fputs(" InnoDB: unable to purge a record\n", - stderr); - fputs("InnoDB: tuple ", stderr); - dtuple_print(stderr, entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print_new(stderr, rec, offsets); - fprintf(stderr, "\nspace %u offset %u" - " (%u records, index id %llu)\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - (unsigned) buf_block_get_space(block), - (unsigned) buf_block_get_page_no(block), - (unsigned) page_get_n_recs(page), - (ulonglong) btr_page_get_index_id(page)); - - ut_ad(0); - return; - } - - lock_update_delete(block, rec); - - if (!page_zip) { - max_ins_size - = page_get_max_insert_size_after_reorganize( - page, 1); - } -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - page_cur_delete_rec(&page_cur, index, offsets, mtr); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - if (page_zip) { - ibuf_update_free_bits_zip(block, mtr); - } else { - ibuf_update_free_bits_low(block, max_ins_size, mtr); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } else { - /* The record must have been purged already. */ - } -} - -/*********************************************************************//** -Restores insert buffer tree cursor position -@return TRUE if the position was restored; FALSE if not */ -static MY_ATTRIBUTE((nonnull)) -ibool -ibuf_restore_pos( -/*=============*/ - ulint space, /*!< in: space id */ - ulint page_no,/*!< in: index page number where the record - should belong */ - const dtuple_t* search_tuple, - /*!< in: search tuple for entries of page_no */ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ - btr_pcur_t* pcur, /*!< in/out: persistent cursor whose - position is to be restored */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE); - - if (btr_pcur_restore_position(mode, pcur, mtr)) { - - return(TRUE); - } - - if (fil_space_get_flags(space) == ULINT_UNDEFINED) { - /* The tablespace has been dropped. It is possible - that another thread has deleted the insert buffer - entry. Do not complain. */ - ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); - } else { - fprintf(stderr, - "InnoDB: ERROR: Submit the output to" - " http://bugs.mysql.com\n" - "InnoDB: ibuf cursor restoration fails!\n" - "InnoDB: ibuf record inserted to page %lu:%lu\n", - (ulong) space, (ulong) page_no); - fflush(stderr); - - rec_print_old(stderr, btr_pcur_get_rec(pcur)); - rec_print_old(stderr, pcur->old_rec); - dtuple_print(stderr, search_tuple); - - rec_print_old(stderr, - page_rec_get_next(btr_pcur_get_rec(pcur))); - fflush(stderr); - - ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); - ut_ad(0); - } - - return(FALSE); -} - -/*********************************************************************//** -Deletes from ibuf the record on which pcur is positioned. If we have to -resort to a pessimistic delete, this function commits mtr and closes -the cursor. -@return TRUE if mtr was committed and pcur closed in this operation */ -static MY_ATTRIBUTE((warn_unused_result)) -ibool -ibuf_delete_rec( -/*============*/ - ulint space, /*!< in: space id */ - ulint page_no,/*!< in: index page number that the record - should belong to */ - btr_pcur_t* pcur, /*!< in: pcur positioned on the record to - delete, having latch mode BTR_MODIFY_LEAF */ - const dtuple_t* search_tuple, - /*!< in: search tuple for entries of page_no */ - mtr_t* mtr) /*!< in: mtr */ -{ - ibool success; - page_t* root; - dberr_t err; - - ut_ad(ibuf_inside(mtr)); - ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur))); - ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no); - ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space); - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG - if (ibuf_debug == 2) { - /* Inject a fault (crash). We do this before trying - optimistic delete, because a pessimistic delete in the - change buffer would require a larger test case. */ - - /* Flag the buffered record as processed, to avoid - an assertion failure after crash recovery. */ - btr_cur_set_deleted_flag_for_ibuf( - btr_pcur_get_rec(pcur), NULL, TRUE, mtr); - ibuf_mtr_commit(mtr); - log_write_up_to(LSN_MAX, LOG_WAIT_ALL_GROUPS, TRUE); - DBUG_SUICIDE(); - } -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - - success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), - 0, mtr); - - if (success) { - if (page_is_empty(btr_pcur_get_page(pcur))) { - /* If a B-tree page is empty, it must be the root page - and the whole B-tree must be empty. InnoDB does not - allow empty B-tree pages other than the root. */ - root = btr_pcur_get_page(pcur); - - ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); - ut_ad(page_get_page_no(root) - == FSP_IBUF_TREE_ROOT_PAGE_NO); - - /* ibuf->empty is protected by the root page latch. - Before the deletion, it had to be FALSE. */ - ut_ad(!ibuf->empty); - ibuf->empty = true; - } - -#ifdef UNIV_IBUF_COUNT_DEBUG - fprintf(stderr, - "Decrementing ibuf count of space %lu page %lu\n" - "from %lu by 1\n", space, page_no, - ibuf_count_get(space, page_no)); - ibuf_count_set(space, page_no, - ibuf_count_get(space, page_no) - 1); -#endif - return(FALSE); - } - - ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur))); - ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no); - ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space); - - /* We have to resort to a pessimistic delete from ibuf. - Delete-mark the record so that it will not be applied again, - in case the server crashes before the pessimistic delete is - made persistent. */ - btr_cur_set_deleted_flag_for_ibuf( - btr_pcur_get_rec(pcur), NULL, TRUE, mtr); - - btr_pcur_store_position(pcur, mtr); - ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); - - ibuf_mtr_start(mtr); - mutex_enter(&ibuf_mutex); - - if (!ibuf_restore_pos(space, page_no, search_tuple, - BTR_MODIFY_TREE, pcur, mtr)) { - - mutex_exit(&ibuf_mutex); - ut_ad(mtr->state == MTR_COMMITTED); - goto func_exit; - } - - root = ibuf_tree_root_get(mtr); - - btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0, - RB_NONE, mtr); - ut_a(err == DB_SUCCESS); - -#ifdef UNIV_IBUF_COUNT_DEBUG - ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1); -#endif - ibuf_size_update(root, mtr); - mutex_exit(&ibuf_mutex); - - ibuf->empty = page_is_empty(root); - ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); - -func_exit: - ut_ad(mtr->state == MTR_COMMITTED); - btr_pcur_close(pcur); - - return(TRUE); -} - -/*********************************************************************//** -When an index page is read from a disk to the buffer pool, this function -applies any buffered operations to the page and deletes the entries from the -insert buffer. If the page is not read, but created in the buffer pool, this -function deletes its buffered entries from the insert buffer; there can -exist entries for such a page if the page belonged to an index which -subsequently was dropped. */ -UNIV_INTERN -void -ibuf_merge_or_delete_for_page( -/*==========================*/ - buf_block_t* block, /*!< in: if page has been read from - disk, pointer to the page x-latched, - else NULL */ - ulint space_id,/*!< in: space id of the index page */ - ulint page_no,/*!< in: page number of the index page */ - ulint zip_size,/*!< in: compressed page size in bytes, - or 0 */ - ibool update_ibuf_bitmap)/*!< in: normally this is set - to TRUE, but if we have deleted or are - deleting the tablespace, then we - naturally do not want to update a - non-existent bitmap page */ -{ - mem_heap_t* heap; - btr_pcur_t pcur; - dtuple_t* search_tuple; -#ifdef UNIV_IBUF_DEBUG - ulint volume = 0; -#endif - page_zip_des_t* page_zip = NULL; - ibool corruption_noticed = FALSE; - mtr_t mtr; - fil_space_t* space = NULL; - - /* Counts for merged & discarded operations. */ - ulint mops[IBUF_OP_COUNT]; - ulint dops[IBUF_OP_COUNT]; - - ut_ad(!block || buf_block_get_space(block) == space_id); - ut_ad(!block || buf_block_get_page_no(block) == page_no); - ut_ad(!block || buf_block_get_zip_size(block) == zip_size); - ut_ad(!block || buf_block_get_io_fix_unlocked(block) == BUF_IO_READ); - - if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE - || trx_sys_hdr_page(space_id, page_no)) { - return; - } - - /* We cannot refer to zip_size in the following, because - zip_size is passed as ULINT_UNDEFINED (it is unknown) when - buf_read_ibuf_merge_pages() is merging (discarding) changes - for a dropped tablespace. When block != NULL or - update_ibuf_bitmap is specified, the zip_size must be known. - That is why we will repeat the check below, with zip_size in - place of 0. Passing zip_size as 0 assumes that the - uncompressed page size always is a power-of-2 multiple of the - compressed page size. */ - - if (ibuf_fixed_addr_page(space_id, 0, page_no) - || fsp_descr_page(0, page_no)) { - return; - } - - if (UNIV_LIKELY(update_ibuf_bitmap)) { - ut_a(ut_is_2pow(zip_size)); - - if (ibuf_fixed_addr_page(space_id, zip_size, page_no) - || fsp_descr_page(zip_size, page_no)) { - return; - } - - /* If the following returns space, we get the counter - incremented, and must decrement it when we leave this - function. When the counter is > 0, that prevents tablespace - from being dropped. */ - - space = fil_space_acquire(space_id); - - if (UNIV_UNLIKELY(!space)) { - /* Do not try to read the bitmap page from space; - just delete the ibuf records for the page */ - - block = NULL; - update_ibuf_bitmap = FALSE; - } else { - page_t* bitmap_page = NULL; - ulint bitmap_bits = 0; - - ibuf_mtr_start(&mtr); - - bitmap_page = ibuf_bitmap_get_map_page( - space_id, page_no, zip_size, &mtr); - - if (bitmap_page && - fil_page_get_type(bitmap_page) != FIL_PAGE_TYPE_ALLOCATED) { - bitmap_bits = ibuf_bitmap_page_get_bits( - bitmap_page, page_no, zip_size, - IBUF_BITMAP_BUFFERED, &mtr); - } - - ibuf_mtr_commit(&mtr); - - if (!bitmap_bits) { - /* No inserts buffered for this page */ - - if (space) { - fil_space_release(space); - } - - return; - } - } - } else if (block - && (ibuf_fixed_addr_page(space_id, zip_size, page_no) - || fsp_descr_page(zip_size, page_no))) { - - return; - } - - heap = mem_heap_create(512); - - search_tuple = ibuf_search_tuple_build(space_id, page_no, heap); - - if (block) { - /* Move the ownership of the x-latch on the page to this OS - thread, so that we can acquire a second x-latch on it. This - is needed for the insert operations to the index page to pass - the debug checks. */ - - rw_lock_x_lock_move_ownership(&(block->lock)); - page_zip = buf_block_get_page_zip(block); - - if (UNIV_UNLIKELY(fil_page_get_type(block->frame) - != FIL_PAGE_INDEX) - || UNIV_UNLIKELY(!page_is_leaf(block->frame))) { - - page_t* bitmap_page; - - corruption_noticed = TRUE; - - ut_print_timestamp(stderr); - - ibuf_mtr_start(&mtr); - - fputs(" InnoDB: Dump of the ibuf bitmap page:\n", - stderr); - - bitmap_page = ibuf_bitmap_get_map_page(space_id, page_no, - zip_size, &mtr); - if (bitmap_page == NULL) - { - fputs("InnoDB: cannot retrieve bitmap page\n", - stderr); - } else { - buf_page_print(bitmap_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - } - ibuf_mtr_commit(&mtr); - - fputs("\nInnoDB: Dump of the page:\n", stderr); - - buf_page_print(block->frame, 0, - BUF_PAGE_PRINT_NO_CRASH); - - fprintf(stderr, - "InnoDB: Error: corruption in the tablespace." - " Bitmap shows insert\n" - "InnoDB: buffer records to page n:o %lu" - " though the page\n" - "InnoDB: type is %lu, which is" - " not an index leaf page!\n" - "InnoDB: We try to resolve the problem" - " by skipping the insert buffer\n" - "InnoDB: merge for this page." - " Please run CHECK TABLE on your tables\n" - "InnoDB: to determine if they are corrupt" - " after this.\n\n" - "InnoDB: Please submit a detailed bug report" - " to http://bugs.mysql.com\n\n", - (ulong) page_no, - (ulong) - fil_page_get_type(block->frame)); - ut_ad(0); - } - } - - memset(mops, 0, sizeof(mops)); - memset(dops, 0, sizeof(dops)); - -loop: - ibuf_mtr_start(&mtr); - - /* Position pcur in the insert buffer at the first entry for this - index page */ - btr_pcur_open_on_user_rec( - ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, - &pcur, &mtr); - - if (block) { - ibool success; - - success = buf_page_get_known_nowait( - RW_X_LATCH, block, - BUF_KEEP_OLD, __FILE__, __LINE__, &mtr); - - ut_a(success); - - /* This is a user page (secondary index leaf page), - but we pretend that it is a change buffer page in - order to obey the latching order. This should be OK, - because buffered changes are applied immediately while - the block is io-fixed. Other threads must not try to - latch an io-fixed block. */ - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); - } - - if (!btr_pcur_is_on_user_rec(&pcur)) { - ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr)); - - goto reset_bit; - } - - for (;;) { - rec_t* rec; - - ut_ad(btr_pcur_is_on_user_rec(&pcur)); - - rec = btr_pcur_get_rec(&pcur); - - /* Check if the entry is for this index page */ - if (ibuf_rec_get_page_no(&mtr, rec) != page_no - || ibuf_rec_get_space(&mtr, rec) != space_id) { - - if (block) { - page_header_reset_last_insert( - block->frame, page_zip, &mtr); - } - - goto reset_bit; - } - - if (UNIV_UNLIKELY(corruption_noticed)) { - fputs("InnoDB: Discarding record\n ", stderr); - rec_print_old(stderr, rec); - fputs("\nInnoDB: from the insert buffer!\n\n", stderr); - } else if (block && !rec_get_deleted_flag(rec, 0)) { - /* Now we have at pcur a record which should be - applied on the index page; NOTE that the call below - copies pointers to fields in rec, and we must - keep the latch to the rec page until the - insertion is finished! */ - dtuple_t* entry; - trx_id_t max_trx_id; - dict_index_t* dummy_index; - ibuf_op_t op = ibuf_rec_get_op_type(&mtr, rec); - - max_trx_id = page_get_max_trx_id(page_align(rec)); - page_update_max_trx_id(block, page_zip, max_trx_id, - &mtr); - - ut_ad(page_validate(page_align(rec), ibuf->index)); - - entry = ibuf_build_entry_from_ibuf_rec( - &mtr, rec, heap, &dummy_index); - - ut_ad(page_validate(block->frame, dummy_index)); - - switch (op) { - ibool success; - case IBUF_OP_INSERT: -#ifdef UNIV_IBUF_DEBUG - volume += rec_get_converted_size( - dummy_index, entry, 0); - - volume += page_dir_calc_reserved_space(1); - - ut_a(volume <= 4 * UNIV_PAGE_SIZE - / IBUF_PAGE_SIZE_PER_FREE_SPACE); -#endif - ibuf_insert_to_index_page( - entry, block, dummy_index, &mtr); - break; - - case IBUF_OP_DELETE_MARK: - ibuf_set_del_mark( - entry, block, dummy_index, &mtr); - break; - - case IBUF_OP_DELETE: - ibuf_delete(entry, block, dummy_index, &mtr); - /* Because ibuf_delete() will latch an - insert buffer bitmap page, commit mtr - before latching any further pages. - Store and restore the cursor position. */ - ut_ad(rec == btr_pcur_get_rec(&pcur)); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(ibuf_rec_get_page_no(&mtr, rec) - == page_no); - ut_ad(ibuf_rec_get_space(&mtr, rec) == space_id); - - /* Mark the change buffer record processed, - so that it will not be merged again in case - the server crashes between the following - mtr_commit() and the subsequent mtr_commit() - of deleting the change buffer record. */ - - btr_cur_set_deleted_flag_for_ibuf( - btr_pcur_get_rec(&pcur), NULL, - TRUE, &mtr); - - btr_pcur_store_position(&pcur, &mtr); - ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr); - - ibuf_mtr_start(&mtr); - - success = buf_page_get_known_nowait( - RW_X_LATCH, block, - BUF_KEEP_OLD, - __FILE__, __LINE__, &mtr); - ut_a(success); - - /* This is a user page (secondary - index leaf page), but it should be OK - to use too low latching order for it, - as the block is io-fixed. */ - buf_block_dbg_add_level( - block, SYNC_IBUF_TREE_NODE); - - if (!ibuf_restore_pos(space_id, page_no, - search_tuple, - BTR_MODIFY_LEAF, - &pcur, &mtr)) { - - ut_ad(mtr.state == MTR_COMMITTED); - mops[op]++; - ibuf_dummy_index_free(dummy_index); - goto loop; - } - - break; - default: - ut_error; - } - - mops[op]++; - - ibuf_dummy_index_free(dummy_index); - } else { - dops[ibuf_rec_get_op_type(&mtr, rec)]++; - } - - /* Delete the record from ibuf */ - if (ibuf_delete_rec(space_id, page_no, &pcur, search_tuple, - &mtr)) { - /* Deletion was pessimistic and mtr was committed: - we start from the beginning again */ - - ut_ad(mtr.state == MTR_COMMITTED); - goto loop; - } else if (btr_pcur_is_after_last_on_page(&pcur)) { - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - - goto loop; - } - } - -reset_bit: - if (UNIV_LIKELY(update_ibuf_bitmap)) { - page_t* bitmap_page; - - bitmap_page = ibuf_bitmap_get_map_page( - space_id, page_no, zip_size, &mtr); - - ibuf_bitmap_page_set_bits( - bitmap_page, page_no, zip_size, - IBUF_BITMAP_BUFFERED, FALSE, &mtr); - - if (block) { - ulint old_bits = ibuf_bitmap_page_get_bits( - bitmap_page, page_no, zip_size, - IBUF_BITMAP_FREE, &mtr); - - ulint new_bits = ibuf_index_page_calc_free( - zip_size, block); - - if (old_bits != new_bits) { - ibuf_bitmap_page_set_bits( - bitmap_page, page_no, zip_size, - IBUF_BITMAP_FREE, new_bits, &mtr); - } - } - } - - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - mem_heap_free(heap); - -#ifdef HAVE_ATOMIC_BUILTINS - os_atomic_increment_ulint(&ibuf->n_merges, 1); - ibuf_add_ops(ibuf->n_merged_ops, mops); - ibuf_add_ops(ibuf->n_discarded_ops, dops); -#else /* HAVE_ATOMIC_BUILTINS */ - /* Protect our statistics keeping from race conditions */ - mutex_enter(&ibuf_mutex); - - ibuf->n_merges++; - ibuf_add_ops(ibuf->n_merged_ops, mops); - ibuf_add_ops(ibuf->n_discarded_ops, dops); - - mutex_exit(&ibuf_mutex); -#endif /* HAVE_ATOMIC_BUILTINS */ - - if (space) { - fil_space_release(space); - } - -#ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(space_id, page_no) == 0); -#endif -} - -/*********************************************************************//** -Deletes all entries in the insert buffer for a given space id. This is used -in DISCARD TABLESPACE and IMPORT TABLESPACE. -NOTE: this does not update the page free bitmaps in the space. The space will -become CORRUPT when you call this function! */ -UNIV_INTERN -void -ibuf_delete_for_discarded_space( -/*============================*/ - ulint space) /*!< in: space id */ -{ - mem_heap_t* heap; - btr_pcur_t pcur; - dtuple_t* search_tuple; - const rec_t* ibuf_rec; - ulint page_no; - mtr_t mtr; - - /* Counts for discarded operations. */ - ulint dops[IBUF_OP_COUNT]; - - heap = mem_heap_create(512); - - /* Use page number 0 to build the search tuple so that we get the - cursor positioned at the first entry for this space id */ - - search_tuple = ibuf_search_tuple_build(space, 0, heap); - - memset(dops, 0, sizeof(dops)); -loop: - ibuf_mtr_start(&mtr); - - /* Position pcur in the insert buffer at the first entry for the - space */ - btr_pcur_open_on_user_rec( - ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, - &pcur, &mtr); - - if (!btr_pcur_is_on_user_rec(&pcur)) { - ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr)); - - goto leave_loop; - } - - for (;;) { - ut_ad(btr_pcur_is_on_user_rec(&pcur)); - - ibuf_rec = btr_pcur_get_rec(&pcur); - - /* Check if the entry is for this space */ - if (ibuf_rec_get_space(&mtr, ibuf_rec) != space) { - - goto leave_loop; - } - - page_no = ibuf_rec_get_page_no(&mtr, ibuf_rec); - - dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++; - - /* Delete the record from ibuf */ - if (ibuf_delete_rec(space, page_no, &pcur, search_tuple, - &mtr)) { - /* Deletion was pessimistic and mtr was committed: - we start from the beginning again */ - - ut_ad(mtr.state == MTR_COMMITTED); - goto loop; - } - - if (btr_pcur_is_after_last_on_page(&pcur)) { - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - - goto loop; - } - } - -leave_loop: - ibuf_mtr_commit(&mtr); - btr_pcur_close(&pcur); - -#ifdef HAVE_ATOMIC_BUILTINS - ibuf_add_ops(ibuf->n_discarded_ops, dops); -#else /* HAVE_ATOMIC_BUILTINS */ - /* Protect our statistics keeping from race conditions */ - mutex_enter(&ibuf_mutex); - ibuf_add_ops(ibuf->n_discarded_ops, dops); - mutex_exit(&ibuf_mutex); -#endif /* HAVE_ATOMIC_BUILTINS */ - - mem_heap_free(heap); -} - -/******************************************************************//** -Looks if the insert buffer is empty. -@return true if empty */ -UNIV_INTERN -bool -ibuf_is_empty(void) -/*===============*/ -{ - bool is_empty; - const page_t* root; - mtr_t mtr; - - ibuf_mtr_start(&mtr); - - mutex_enter(&ibuf_mutex); - root = ibuf_tree_root_get(&mtr); - mutex_exit(&ibuf_mutex); - - is_empty = page_is_empty(root); - ut_a(is_empty == ibuf->empty); - ibuf_mtr_commit(&mtr); - - return(is_empty); -} - -/******************************************************************//** -Prints info of ibuf. */ -UNIV_INTERN -void -ibuf_print( -/*=======*/ - FILE* file) /*!< in: file where to print */ -{ -#ifdef UNIV_IBUF_COUNT_DEBUG - ulint i; - ulint j; -#endif - - mutex_enter(&ibuf_mutex); - - fprintf(file, - "Ibuf: size %lu, free list len %lu," - " seg size %lu, %lu merges\n", - (ulong) ibuf->size, - (ulong) ibuf->free_list_len, - (ulong) ibuf->seg_size, - (ulong) ibuf->n_merges); - - fputs("merged operations:\n ", file); - ibuf_print_ops(ibuf->n_merged_ops, file); - - fputs("discarded operations:\n ", file); - ibuf_print_ops(ibuf->n_discarded_ops, file); - -#ifdef UNIV_IBUF_COUNT_DEBUG - for (i = 0; i < IBUF_COUNT_N_SPACES; i++) { - for (j = 0; j < IBUF_COUNT_N_PAGES; j++) { - ulint count = ibuf_count_get(i, j); - - if (count > 0) { - fprintf(stderr, - "Ibuf count for space/page %lu/%lu" - " is %lu\n", - (ulong) i, (ulong) j, (ulong) count); - } - } - } -#endif /* UNIV_IBUF_COUNT_DEBUG */ - - mutex_exit(&ibuf_mutex); -} - -/******************************************************************//** -Checks the insert buffer bitmaps on IMPORT TABLESPACE. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -ibuf_check_bitmap_on_import( -/*========================*/ - const trx_t* trx, /*!< in: transaction */ - ulint space_id) /*!< in: tablespace identifier */ -{ - ulint zip_size; - ulint page_size; - ulint size; - ulint page_no; - - ut_ad(space_id); - ut_ad(trx->mysql_thd); - - zip_size = fil_space_get_zip_size(space_id); - - if (zip_size == ULINT_UNDEFINED) { - return(DB_TABLE_NOT_FOUND); - } - - size = fil_space_get_size(space_id); - - if (size == 0) { - return(DB_TABLE_NOT_FOUND); - } - - mutex_enter(&ibuf_mutex); - - page_size = zip_size ? zip_size : UNIV_PAGE_SIZE; - - for (page_no = 0; page_no < size; page_no += page_size) { - mtr_t mtr; - page_t* bitmap_page; - ulint i; - - if (trx_is_interrupted(trx)) { - mutex_exit(&ibuf_mutex); - return(DB_INTERRUPTED); - } - - mtr_start(&mtr); - - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - - ibuf_enter(&mtr); - - bitmap_page = ibuf_bitmap_get_map_page( - space_id, page_no, zip_size, &mtr); - - for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) { - const ulint offset = page_no + i; - - if (ibuf_bitmap_page_get_bits( - bitmap_page, offset, zip_size, - IBUF_BITMAP_IBUF, &mtr)) { - - mutex_exit(&ibuf_mutex); - ibuf_exit(&mtr); - mtr_commit(&mtr); - - ib_errf(trx->mysql_thd, - IB_LOG_LEVEL_ERROR, - ER_INNODB_INDEX_CORRUPT, - "Space %u page %u" - " is wrongly flagged to belong to the" - " insert buffer", - (unsigned) space_id, - (unsigned) offset); - - return(DB_CORRUPTION); - } - - if (ibuf_bitmap_page_get_bits( - bitmap_page, offset, zip_size, - IBUF_BITMAP_BUFFERED, &mtr)) { - - ib_errf(trx->mysql_thd, - IB_LOG_LEVEL_WARN, - ER_INNODB_INDEX_CORRUPT, - "Buffered changes" - " for space %u page %u are lost", - (unsigned) space_id, - (unsigned) offset); - - /* Tolerate this error, so that - slightly corrupted tables can be - imported and dumped. Clear the bit. */ - ibuf_bitmap_page_set_bits( - bitmap_page, offset, zip_size, - IBUF_BITMAP_BUFFERED, FALSE, &mtr); - } - } - - ibuf_exit(&mtr); - mtr_commit(&mtr); - } - - mutex_exit(&ibuf_mutex); - return(DB_SUCCESS); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/api0api.h b/storage/xtradb/include/api0api.h deleted file mode 100644 index 500bf4fe3b2..00000000000 --- a/storage/xtradb/include/api0api.h +++ /dev/null @@ -1,1312 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/api0api.h -InnoDB Native API - -2008-08-01 Created by Sunny Bains. -3/20/2011 Jimmy Yang extracted from Embedded InnoDB -*******************************************************/ - -#ifndef api0api_h -#define api0api_h - -#include "db0err.h" -#include <stdio.h> - -#ifdef _MSC_VER -#define strncasecmp _strnicmp -#define strcasecmp _stricmp -#endif - -#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER) -#define UNIV_NO_IGNORE MY_ATTRIBUTE ((warn_unused_result)) -#else -#define UNIV_NO_IGNORE -#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */ - -/* See comment about ib_bool_t as to why the two macros are unsigned long. */ -/** The boolean value of "true" used internally within InnoDB */ -#define IB_TRUE 0x1UL -/** The boolean value of "false" used internally within InnoDB */ -#define IB_FALSE 0x0UL - -/* Basic types used by the InnoDB API. */ -/** All InnoDB error codes are represented by ib_err_t */ -typedef enum dberr_t ib_err_t; -/** Representation of a byte within InnoDB */ -typedef unsigned char ib_byte_t; -/** Representation of an unsigned long int within InnoDB */ -typedef unsigned long int ib_ulint_t; - -/* We assume C99 support except when using VisualStudio. */ -#if !defined(_MSC_VER) -#include <stdint.h> -#endif /* _MSC_VER */ - -/* Integer types used by the API. Microsft VS defines its own types -and we use the Microsoft types when building with Visual Studio. */ -#if defined(_MSC_VER) -/** A signed 8 bit integral type. */ -typedef __int8 ib_i8_t; -#else -/** A signed 8 bit integral type. */ -typedef int8_t ib_i8_t; -#endif - -#if defined(_MSC_VER) -/** An unsigned 8 bit integral type. */ -typedef unsigned __int8 ib_u8_t; -#else -/** An unsigned 8 bit integral type. */ -typedef uint8_t ib_u8_t; -#endif - -#if defined(_MSC_VER) -/** A signed 16 bit integral type. */ -typedef __int16 ib_i16_t; -#else -/** A signed 16 bit integral type. */ -typedef int16_t ib_i16_t; -#endif - -#if defined(_MSC_VER) -/** An unsigned 16 bit integral type. */ -typedef unsigned __int16 ib_u16_t; -#else -/** An unsigned 16 bit integral type. */ -typedef uint16_t ib_u16_t; -#endif - -#if defined(_MSC_VER) -/** A signed 32 bit integral type. */ -typedef __int32 ib_i32_t; -#else -/** A signed 32 bit integral type. */ -typedef int32_t ib_i32_t; -#endif - -#if defined(_MSC_VER) -/** An unsigned 32 bit integral type. */ -typedef unsigned __int32 ib_u32_t; -#else -/** An unsigned 32 bit integral type. */ -typedef uint32_t ib_u32_t; -#endif - -#if defined(_MSC_VER) -/** A signed 64 bit integral type. */ -typedef __int64 ib_i64_t; -#else -/** A signed 64 bit integral type. */ -typedef int64_t ib_i64_t; -#endif - -#if defined(_MSC_VER) -/** An unsigned 64 bit integral type. */ -typedef unsigned __int64 ib_u64_t; -#else -/** An unsigned 64 bit integral type. */ -typedef uint64_t ib_u64_t; -#endif - -typedef void* ib_opaque_t; -typedef ib_opaque_t ib_charset_t; -typedef ib_ulint_t ib_bool_t; -typedef ib_u64_t ib_id_u64_t; - -/** @enum ib_cfg_type_t Possible types for a configuration variable. */ -typedef enum { - IB_CFG_IBOOL, /*!< The configuration parameter is - of type ibool */ - - /* XXX Can we avoid having different types for ulint and ulong? - - On Win64 "unsigned long" is 32 bits - - ulong is always defined as "unsigned long" - - On Win64 ulint is defined as 64 bit integer - => On Win64 ulint != ulong. - If we typecast all ulong and ulint variables to the smaller type - ulong, then we will cut the range of the ulint variables. - This is not a problem for most ulint variables because their max - allowed values do not exceed 2^32-1 (e.g. log_groups is ulint - but its max allowed value is 10). BUT buffer_pool_size and - log_file_size allow up to 2^64-1. */ - - IB_CFG_ULINT, /*!< The configuration parameter is - of type ulint */ - - IB_CFG_ULONG, /*!< The configuration parameter is - of type ulong */ - - IB_CFG_TEXT, /*!< The configuration parameter is - of type char* */ - - IB_CFG_CB /*!< The configuration parameter is - a callback parameter */ -} ib_cfg_type_t; - -/** @enum ib_col_type_t column types that are supported. */ -typedef enum { - IB_VARCHAR = 1, /*!< Character varying length. The - column is not padded. */ - - IB_CHAR = 2, /*!< Fixed length character string. The - column is padded to the right. */ - - IB_BINARY = 3, /*!< Fixed length binary, similar to - IB_CHAR but the column is not padded - to the right. */ - - IB_VARBINARY = 4, /*!< Variable length binary */ - - IB_BLOB = 5, /*!< Binary large object, or - a TEXT type */ - - IB_INT = 6, /*!< Integer: can be any size - from 1 - 8 bytes. If the size is - 1, 2, 4 and 8 bytes then you can use - the typed read and write functions. For - other sizes you will need to use the - ib_col_get_value() function and do the - conversion yourself. */ - - IB_SYS = 8, /*!< System column, this column can - be one of DATA_TRX_ID, DATA_ROLL_PTR - or DATA_ROW_ID. */ - - IB_FLOAT = 9, /*!< C (float) floating point value. */ - - IB_DOUBLE = 10, /*!> C (double) floating point value. */ - - IB_DECIMAL = 11, /*!< Decimal stored as an ASCII - string */ - - IB_VARCHAR_ANYCHARSET = 12, /*!< Any charset, varying length */ - - IB_CHAR_ANYCHARSET = 13 /*!< Any charset, fixed length */ - -} ib_col_type_t; - -/** @enum ib_tbl_fmt_t InnoDB table format types */ -typedef enum { - IB_TBL_REDUNDANT, /*!< Redundant row format, the column - type and length is stored in the row.*/ - - IB_TBL_COMPACT, /*!< Compact row format, the column - type is not stored in the row. The - length is stored in the row but the - storage format uses a compact format - to store the length of the column data - and record data storage format also - uses less storage. */ - - IB_TBL_DYNAMIC, /*!< Compact row format. BLOB prefixes - are not stored in the clustered index */ - - IB_TBL_COMPRESSED /*!< Similar to dynamic format but - with pages compressed */ -} ib_tbl_fmt_t; - -/** @enum ib_col_attr_t InnoDB column attributes */ -typedef enum { - IB_COL_NONE = 0, /*!< No special attributes. */ - - IB_COL_NOT_NULL = 1, /*!< Column data can't be NULL. */ - - IB_COL_UNSIGNED = 2, /*!< Column is IB_INT and unsigned. */ - - IB_COL_NOT_USED = 4, /*!< Future use, reserved. */ - - IB_COL_CUSTOM1 = 8, /*!< Custom precision type, this is - a bit that is ignored by InnoDB and so - can be set and queried by users. */ - - IB_COL_CUSTOM2 = 16, /*!< Custom precision type, this is - a bit that is ignored by InnoDB and so - can be set and queried by users. */ - - IB_COL_CUSTOM3 = 32 /*!< Custom precision type, this is - a bit that is ignored by InnoDB and so - can be set and queried by users. */ -} ib_col_attr_t; - -/* Note: must match lock0types.h */ -/** @enum ib_lck_mode_t InnoDB lock modes. */ -typedef enum { - IB_LOCK_IS = 0, /*!< Intention shared, an intention - lock should be used to lock tables */ - - IB_LOCK_IX, /*!< Intention exclusive, an intention - lock should be used to lock tables */ - - IB_LOCK_S, /*!< Shared locks should be used to - lock rows */ - - IB_LOCK_X, /*!< Exclusive locks should be used to - lock rows*/ - - IB_LOCK_TABLE_X, /*!< exclusive table lock */ - - IB_LOCK_NONE, /*!< This is used internally to note - consistent read */ - - IB_LOCK_NUM = IB_LOCK_NONE /*!< number of lock modes */ -} ib_lck_mode_t; - -typedef enum { - IB_CLUSTERED = 1, /*!< clustered index */ - IB_UNIQUE = 2 /*!< unique index */ -} ib_index_type_t; - -/** @enum ib_srch_mode_t InnoDB cursor search modes for ib_cursor_moveto(). -Note: Values must match those found in page0cur.h */ -typedef enum { - IB_CUR_G = 1, /*!< If search key is not found then - position the cursor on the row that - is greater than the search key */ - - IB_CUR_GE = 2, /*!< If the search key not found then - position the cursor on the row that - is greater than or equal to the search - key */ - - IB_CUR_L = 3, /*!< If search key is not found then - position the cursor on the row that - is less than the search key */ - - IB_CUR_LE = 4 /*!< If search key is not found then - position the cursor on the row that - is less than or equal to the search - key */ -} ib_srch_mode_t; - -/** @enum ib_match_mode_t Various match modes used by ib_cursor_moveto() */ -typedef enum { - IB_CLOSEST_MATCH, /*!< Closest match possible */ - - IB_EXACT_MATCH, /*!< Search using a complete key - value */ - - IB_EXACT_PREFIX /*!< Search using a key prefix which - must match to rows: the prefix may - contain an incomplete field (the - last field in prefix may be just - a prefix of a fixed length column) */ -} ib_match_mode_t; - -/** @struct ib_col_meta_t InnoDB column meta data. */ -typedef struct { - ib_col_type_t type; /*!< Type of the column */ - - ib_col_attr_t attr; /*!< Column attributes */ - - ib_u32_t type_len; /*!< Length of type */ - - ib_u16_t client_type; /*!< 16 bits of data relevant only to - the client. InnoDB doesn't care */ - - ib_charset_t* charset; /*!< Column charset */ -} ib_col_meta_t; - -/* Note: Must be in sync with trx0trx.h */ -/** @enum ib_trx_state_t The transaction state can be queried using the -ib_trx_state() function. The InnoDB deadlock monitor can roll back a -transaction and users should be prepared for this, especially where there -is high contention. The way to determine the state of the transaction is to -query it's state and check. */ -typedef enum { - IB_TRX_NOT_STARTED, /*!< Has not started yet, the - transaction has not ben started yet.*/ - - IB_TRX_ACTIVE, /*!< The transaction is currently - active and needs to be either - committed or rolled back. */ - - IB_TRX_COMMITTED_IN_MEMORY, /*!< Not committed to disk yet */ - - IB_TRX_PREPARED /*!< Support for 2PC/XA */ -} ib_trx_state_t; - -/* Note: Must be in sync with trx0trx.h */ -/** @enum ib_trx_level_t Transaction isolation levels */ -typedef enum { - IB_TRX_READ_UNCOMMITTED = 0, /*!< Dirty read: non-locking SELECTs are - performed so that we do not look at a - possible earlier version of a record; - thus they are not 'consistent' reads - under this isolation level; otherwise - like level 2 */ - - IB_TRX_READ_COMMITTED = 1, /*!< Somewhat Oracle-like isolation, - except that in range UPDATE and DELETE - we must block phantom rows with - next-key locks; SELECT ... FOR UPDATE - and ... LOCK IN SHARE MODE only lock - the index records, NOT the gaps before - them, and thus allow free inserting; - each consistent read reads its own - snapshot */ - - IB_TRX_REPEATABLE_READ = 2, /*!< All consistent reads in the same - trx read the same snapshot; full - next-key locking used in locking reads - to block insertions into gaps */ - - IB_TRX_SERIALIZABLE = 3 /*!< All plain SELECTs are converted to - LOCK IN SHARE MODE reads */ -} ib_trx_level_t; - -/** Generical InnoDB callback prototype. */ -typedef void (*ib_cb_t)(void); - -#define IB_CFG_BINLOG_ENABLED 0x1 -#define IB_CFG_MDL_ENABLED 0x2 -#define IB_CFG_DISABLE_ROWLOCK 0x4 - -/** The first argument to the InnoDB message logging function. By default -it's set to stderr. You should treat ib_msg_stream_t as a void*, since -it will probably change in the future. */ -typedef FILE* ib_msg_stream_t; - -/** All log messages are written to this function.It should have the same -behavior as fprintf(3). */ -typedef int (*ib_msg_log_t)(ib_msg_stream_t, const char*, ...); - -/* Note: This is to make it easy for API users to have type -checking for arguments to our functions. Making it ib_opaque_t -by itself will result in pointer decay resulting in subverting -of the compiler's type checking. */ - -/** InnoDB tuple handle. This handle can refer to either a cluster index -tuple or a secondary index tuple. There are two types of tuples for each -type of index, making a total of four types of tuple handles. There -is a tuple for reading the entire row contents and another for searching -on the index key. */ -typedef struct ib_tuple_t* ib_tpl_t; - -/** InnoDB transaction handle, all database operations need to be covered -by transactions. This handle represents a transaction. The handle can be -created with ib_trx_begin(), you commit your changes with ib_trx_commit() -and undo your changes using ib_trx_rollback(). If the InnoDB deadlock -monitor rolls back the transaction then you need to free the transaction -using the function ib_trx_release(). You can query the state of an InnoDB -transaction by calling ib_trx_state(). */ -typedef struct trx_t* ib_trx_t; - -/** InnoDB cursor handle */ -typedef struct ib_cursor_t* ib_crsr_t; - -/*************************************************************//** -This function is used to compare two data fields for which the data type -is such that we must use the client code to compare them. - -@param col_meta column meta data -@param p1 key -@oaram p1_len key length -@param p2 second key -@param p2_len second key length -@return 1, 0, -1, if a is greater, equal, less than b, respectively */ - -typedef int (*ib_client_cmp_t)( - const ib_col_meta_t* col_meta, - const ib_byte_t* p1, - ib_ulint_t p1_len, - const ib_byte_t* p2, - ib_ulint_t p2_len); - -/* This should be the same as univ.i */ -/** Represents SQL_NULL length */ -#define IB_SQL_NULL 0xFFFFFFFF -/** The number of system columns in a row. */ -#define IB_N_SYS_COLS 3 - -/** The maximum length of a text column. */ -#define MAX_TEXT_LEN 4096 - -/* MySQL uses 3 byte UTF-8 encoding. */ -/** The maximum length of a column name in a table schema. */ -#define IB_MAX_COL_NAME_LEN (64 * 3) - -/** The maximum length of a table name (plus database name). */ -#define IB_MAX_TABLE_NAME_LEN (64 * 3) * 2 - -/*****************************************************************//** -Start a transaction that's been rolled back. This special function -exists for the case when InnoDB's deadlock detector has rolledack -a transaction. While the transaction has been rolled back the handle -is still valid and can be reused by calling this function. If you -don't want to reuse the transaction handle then you can free the handle -by calling ib_trx_release(). -@return innobase txn handle */ - -ib_err_t -ib_trx_start( -/*=========*/ - ib_trx_t ib_trx, /*!< in: transaction to restart */ - ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */ - ib_bool_t read_write, /*!< in: true if read write - transaction */ - ib_bool_t auto_commit, /*!< in: auto commit after each - single DML */ - void* thd); /*!< in: THD */ - -/*****************************************************************//** -Begin a transaction. This will allocate a new transaction handle and -put the transaction in the active state. -@return innobase txn handle */ - -ib_trx_t -ib_trx_begin( -/*=========*/ - ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */ - ib_bool_t read_write, /*!< in: true if read write - transaction */ - ib_bool_t auto_commit); /*!< in: auto commit after each - single DML */ - -/*****************************************************************//** -Query the transaction's state. This function can be used to check for -the state of the transaction in case it has been rolled back by the -InnoDB deadlock detector. Note that when a transaction is selected as -a victim for rollback, InnoDB will always return an appropriate error -code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and -@see DB_LOCK_WAIT_TIMEOUT -@return transaction state */ - -ib_trx_state_t -ib_trx_state( -/*=========*/ - ib_trx_t ib_trx); /*!< in: trx handle */ - - -/*****************************************************************//** -Check if the transaction is read_only */ -ib_u32_t -ib_trx_read_only( -/*=============*/ - ib_trx_t ib_trx); /*!< in: trx handle */ - -/*****************************************************************//** -Release the resources of the transaction. If the transaction was -selected as a victim by InnoDB and rolled back then use this function -to free the transaction handle. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_trx_release( -/*===========*/ - ib_trx_t ib_trx); /*!< in: trx handle */ - -/*****************************************************************//** -Commit a transaction. This function will release the schema latches too. -It will also free the transaction handle. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_trx_commit( -/*==========*/ - ib_trx_t ib_trx); /*!< in: trx handle */ - -/*****************************************************************//** -Rollback a transaction. This function will release the schema latches too. -It will also free the transaction handle. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_trx_rollback( -/*============*/ - ib_trx_t ib_trx); /*!< in: trx handle */ - -/*****************************************************************//** -Open an InnoDB table and return a cursor handle to it. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_open_table_using_id( -/*==========================*/ - ib_id_u64_t table_id, /*!< in: table id of table to open */ - ib_trx_t ib_trx, /*!< in: Current transaction handle - can be NULL */ - ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */ - -/*****************************************************************//** -Open an InnoDB index and return a cursor handle to it. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_open_index_using_id( -/*==========================*/ - ib_id_u64_t index_id, /*!< in: index id of index to open */ - ib_trx_t ib_trx, /*!< in: Current transaction handle - can be NULL */ - ib_crsr_t* ib_crsr); /*!< out: InnoDB cursor */ - -/*****************************************************************//** -Open an InnoDB secondary index cursor and return a cursor handle to it. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_open_index_using_name( -/*============================*/ - ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */ - const char* index_name, /*!< in: secondary index name */ - ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */ - int* idx_type, /*!< out: index is cluster index */ - ib_id_u64_t* idx_id); /*!< out: index id */ - -/*****************************************************************//** -Open an InnoDB table by name and return a cursor handle to it. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_open_table( -/*=================*/ - const char* name, /*!< in: table name */ - ib_trx_t ib_trx, /*!< in: Current transaction handle - can be NULL */ - ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */ - -/*****************************************************************//** -Reset the cursor. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_reset( -/*============*/ - ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */ - - -/*****************************************************************//** -set a cursor trx to NULL*/ - -void -ib_cursor_clear_trx( -/*================*/ - ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */ - -/*****************************************************************//** -Close an InnoDB table and free the cursor. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_close( -/*============*/ - ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */ - -/*****************************************************************//** -Close the table, decrement n_ref_count count. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_close_table( -/*==================*/ - ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */ - -/*****************************************************************//** -update the cursor with new transactions and also reset the cursor -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_new_trx( -/*==============*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */ - ib_trx_t ib_trx); /*!< in: transaction */ - -/*****************************************************************//** -Commit the transaction in a cursor -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_commit_trx( -/*=================*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */ - ib_trx_t ib_trx); /*!< in: transaction */ - -/********************************************************************//** -Open a table using the table name, if found then increment table ref count. -@return table instance if found */ - -void* -ib_open_table_by_name( -/*==================*/ - const char* name); /*!< in: table name to lookup */ - -/*****************************************************************//** -Insert a row to a table. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_insert_row( -/*=================*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */ - const ib_tpl_t ib_tpl); /*!< in: tuple to insert */ - -/*****************************************************************//** -Update a row in a table. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_update_row( -/*=================*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */ - const ib_tpl_t ib_new_tpl); /*!< in: New tuple to update */ - -/*****************************************************************//** -Delete a row in a table. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_delete_row( -/*=================*/ - ib_crsr_t ib_crsr); /*!< in: cursor instance */ - -/*****************************************************************//** -Read current row. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_read_row( -/*===============*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - ib_tpl_t ib_tpl, /*!< out: read cols into this tuple */ - void** row_buf, /*!< in/out: row buffer */ - ib_ulint_t* row_len); /*!< in/out: row buffer len */ - -/*****************************************************************//** -Move cursor to the first record in the table. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_first( -/*============*/ - ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */ - -/*****************************************************************//** -Move cursor to the last record in the table. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_last( -/*===========*/ - ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */ - -/*****************************************************************//** -Move cursor to the next record in the table. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_next( -/*===========*/ - ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */ - -/*****************************************************************//** -Search for key. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_cursor_moveto( -/*=============*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - ib_tpl_t ib_tpl, /*!< in: Key to search for */ - ib_srch_mode_t ib_srch_mode); /*!< in: search mode */ - -/*****************************************************************//** -Set the match mode for ib_cursor_move(). */ - -void -ib_cursor_set_match_mode( -/*=====================*/ - ib_crsr_t ib_crsr, /*!< in: Cursor instance */ - ib_match_mode_t match_mode); /*!< in: ib_cursor_moveto match mode */ - -/*****************************************************************//** -Set a column of the tuple. Make a copy using the tuple's heap. -@return DB_SUCCESS or error code */ - -ib_err_t -ib_col_set_value( -/*=============*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t col_no, /*!< in: column index in tuple */ - const void* src, /*!< in: data value */ - ib_ulint_t len, /*!< in: data value len */ - ib_bool_t need_cpy); /*!< in: if need memcpy */ - - -/*****************************************************************//** -Get the size of the data available in the column the tuple. -@return bytes avail or IB_SQL_NULL */ - -ib_ulint_t -ib_col_get_len( -/*===========*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t i); /*!< in: column index in tuple */ - -/*****************************************************************//** -Copy a column value from the tuple. -@return bytes copied or IB_SQL_NULL */ - -ib_ulint_t -ib_col_copy_value( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: tuple instance */ - ib_ulint_t i, /*!< in: column index in tuple */ - void* dst, /*!< out: copied data value */ - ib_ulint_t len); /*!< in: max data value len to copy */ - -/*************************************************************//** -Read a signed int 8 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_i8( -/*=============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_i8_t* ival); /*!< out: integer value */ - -/*************************************************************//** -Read an unsigned int 8 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_u8( -/*=============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_u8_t* ival); /*!< out: integer value */ - -/*************************************************************//** -Read a signed int 16 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_i16( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_i16_t* ival); /*!< out: integer value */ - -/*************************************************************//** -Read an unsigned int 16 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_u16( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_u16_t* ival); /*!< out: integer value */ - -/*************************************************************//** -Read a signed int 32 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_i32( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_i32_t* ival); /*!< out: integer value */ - -/*************************************************************//** -Read an unsigned int 32 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_u32( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_u32_t* ival); /*!< out: integer value */ - -/*************************************************************//** -Read a signed int 64 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_i64( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_i64_t* ival); /*!< out: integer value */ - -/*************************************************************//** -Read an unsigned int 64 bit column from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_u64( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_u64_t* ival); /*!< out: integer value */ - -/*****************************************************************//** -Get a column value pointer from the tuple. -@return NULL or pointer to buffer */ - -const void* -ib_col_get_value( -/*=============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i); /*!< in: column number */ - -/*****************************************************************//** -Get a column type, length and attributes from the tuple. -@return len of column data */ - -ib_ulint_t -ib_col_get_meta( -/*============*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t i, /*!< in: column number */ - ib_col_meta_t* ib_col_meta); /*!< out: column meta data */ - -/*****************************************************************//** -"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple. -@return new tuple, or NULL */ - -ib_tpl_t -ib_tuple_clear( -/*============*/ - ib_tpl_t ib_tpl); /*!< in: InnoDB tuple */ - -/*****************************************************************//** -Create a new cluster key search tuple and copy the contents of the -secondary index key tuple columns that refer to the cluster index record -to the cluster key. It does a deep copy of the column data. -@return DB_SUCCESS or error code */ - -ib_err_t -ib_tuple_get_cluster_key( -/*=====================*/ - ib_crsr_t ib_crsr, /*!< in: secondary index cursor */ - ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */ - const ib_tpl_t ib_src_tpl); /*!< in: source tuple */ - -/*****************************************************************//** -Copy the contents of source tuple to destination tuple. The tuples -must be of the same type and belong to the same table/index. -@return DB_SUCCESS or error code */ - -ib_err_t -ib_tuple_copy( -/*==========*/ - ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */ - const ib_tpl_t ib_src_tpl); /*!< in: source tuple */ - -/*****************************************************************//** -Create an InnoDB tuple used for index/table search. -@return tuple for current index */ - -ib_tpl_t -ib_sec_search_tuple_create( -/*=======================*/ - ib_crsr_t ib_crsr); /*!< in: Cursor instance */ - -/*****************************************************************//** -Create an InnoDB tuple used for index/table search. -@return tuple for current index */ - -ib_tpl_t -ib_sec_read_tuple_create( -/*=====================*/ - ib_crsr_t ib_crsr); /*!< in: Cursor instance */ - -/*****************************************************************//** -Create an InnoDB tuple used for table key operations. -@return tuple for current table */ - -ib_tpl_t -ib_clust_search_tuple_create( -/*=========================*/ - ib_crsr_t ib_crsr); /*!< in: Cursor instance */ - -/*****************************************************************//** -Create an InnoDB tuple for table row operations. -@return tuple for current table */ - -ib_tpl_t -ib_clust_read_tuple_create( -/*=======================*/ - ib_crsr_t ib_crsr); /*!< in: Cursor instance */ - -/*****************************************************************//** -Return the number of user columns in the tuple definition. -@return number of user columns */ - -ib_ulint_t -ib_tuple_get_n_user_cols( -/*=====================*/ - const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */ - -/*****************************************************************//** -Return the number of columns in the tuple definition. -@return number of columns */ - -ib_ulint_t -ib_tuple_get_n_cols( -/*================*/ - const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */ - -/*****************************************************************//** -Destroy an InnoDB tuple. */ - -void -ib_tuple_delete( -/*============*/ - ib_tpl_t ib_tpl); /*!< in,own: Tuple instance to delete */ - -/*****************************************************************//** -Truncate a table. The cursor handle will be closed and set to NULL -on success. -@return DB_SUCCESS or error code */ - -ib_err_t -ib_cursor_truncate( -/*===============*/ - ib_crsr_t* ib_crsr, /*!< in/out: cursor for table - to truncate */ - ib_id_u64_t* table_id); /*!< out: new table id */ - -/*****************************************************************//** -Get a table id. -@return DB_SUCCESS if found */ - -ib_err_t -ib_table_get_id( -/*============*/ - const char* table_name, /*!< in: table to find */ - ib_id_u64_t* table_id); /*!< out: table id if found */ - -/*****************************************************************//** -Get an index id. -@return DB_SUCCESS if found */ - -ib_err_t -ib_index_get_id( -/*============*/ - const char* table_name, /*!< in: find index for this table */ - const char* index_name, /*!< in: index to find */ - ib_id_u64_t* index_id); /*!< out: index id if found */ - -/*****************************************************************//** -Check if cursor is positioned. -@return IB_TRUE if positioned */ - -ib_bool_t -ib_cursor_is_positioned( -/*====================*/ - const ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */ - -/*****************************************************************//** -Checks if the data dictionary is latched in exclusive mode by a -user transaction. -@return TRUE if exclusive latch */ - -ib_bool_t -ib_schema_lock_is_exclusive( -/*========================*/ - const ib_trx_t ib_trx); /*!< in: transaction */ - -/*****************************************************************//** -Lock an InnoDB cursor/table. -@return DB_SUCCESS or error code */ - -ib_err_t -ib_cursor_lock( -/*===========*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */ - ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */ - -/*****************************************************************//** -Set the Lock an InnoDB table using the table id. -@return DB_SUCCESS or error code */ - -ib_err_t -ib_table_lock( -/*===========*/ - ib_trx_t ib_trx, /*!< in/out: transaction */ - ib_id_u64_t table_id, /*!< in: table id */ - ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */ - -/*****************************************************************//** -Set the Lock mode of the cursor. -@return DB_SUCCESS or error code */ - -ib_err_t -ib_cursor_set_lock_mode( -/*====================*/ - ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */ - ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */ - -/*****************************************************************//** -Set need to access clustered index record flag. */ - -void -ib_cursor_set_cluster_access( -/*=========================*/ - ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */ - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ - -ib_err_t -ib_tuple_write_i8( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_i8_t val); /*!< in: value to write */ - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ - -ib_err_t -ib_tuple_write_i16( -/*=================*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_i16_t val); /*!< in: value to write */ - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ - -ib_err_t -ib_tuple_write_i32( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_i32_t val); /*!< in: value to write */ - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ - -ib_err_t -ib_tuple_write_i64( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_i64_t val); /*!< in: value to write */ - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ - -ib_err_t -ib_tuple_write_u8( -/*==============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_u8_t val); /*!< in: value to write */ - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ - -ib_err_t -ib_tuple_write_u16( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_u16_t val); /*!< in: value to write */ - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ - -ib_err_t -ib_tuple_write_u32( -/*=================*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_u32_t val); /*!< in: value to write */ - -/*****************************************************************//** -Write an integer value to a column. Integers are stored in big-endian -format and will need to be converted from the host format. -@return DB_SUCESS or error */ - -ib_err_t -ib_tuple_write_u64( -/*===============*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - ib_u64_t val); /*!< in: value to write */ - -/*****************************************************************//** -Inform the cursor that it's the start of an SQL statement. */ - -void -ib_cursor_stmt_begin( -/*=================*/ - ib_crsr_t ib_crsr); /*!< in: cursor */ - -/*****************************************************************//** -Write a double value to a column. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_write_double( -/*==================*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - int col_no, /*!< in: column number */ - double val); /*!< in: value to write */ - -/*************************************************************//** -Read a double column value from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_double( -/*=================*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t col_no, /*!< in: column number */ - double* dval); /*!< out: double value */ - -/*****************************************************************//** -Write a float value to a column. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_write_float( -/*=================*/ - ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */ - int col_no, /*!< in: column number */ - float val); /*!< in: value to write */ - -/*************************************************************//** -Read a float value from an InnoDB tuple. -@return DB_SUCCESS or error */ - -ib_err_t -ib_tuple_read_float( -/*================*/ - ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */ - ib_ulint_t col_no, /*!< in: column number */ - float* fval); /*!< out: float value */ - -/*****************************************************************//** -Get a column type, length and attributes from the tuple. -@return len of column data */ - -const char* -ib_col_get_name( -/*============*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - ib_ulint_t i); /*!< in: column index in tuple */ - -/*****************************************************************//** -Get an index field name from the cursor. -@return name of the field */ - -const char* -ib_get_idx_field_name( -/*==================*/ - ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */ - ib_ulint_t i); /*!< in: column index in tuple */ - -/*****************************************************************//** -Truncate a table. -@return DB_SUCCESS or error code */ - -ib_err_t -ib_table_truncate( -/*==============*/ - const char* table_name, /*!< in: table name */ - ib_id_u64_t* table_id); /*!< out: new table id */ - -/*****************************************************************//** -Frees a possible InnoDB trx object associated with the current THD. -@return DB_SUCCESS or error number */ - -ib_err_t -ib_close_thd( -/*=========*/ - void* thd); /*!< in: handle to the MySQL - thread of the user whose resources - should be free'd */ - -/*****************************************************************//** -Get generic configure status -@return configure status*/ - -int -ib_cfg_get_cfg(); -/*============*/ - -/*****************************************************************//** -Increase/decrease the memcached sync count of table to sync memcached -DML with SQL DDLs. -@return DB_SUCCESS or error number */ -ib_err_t -ib_cursor_set_memcached_sync( -/*=========================*/ - ib_crsr_t ib_crsr, /*!< in: cursor */ - ib_bool_t flag); /*!< in: true for increasing */ - -/*****************************************************************//** -Check whether the table name conforms to our requirements. Currently -we only do a simple check for the presence of a '/'. -@return DB_SUCCESS or err code */ - -ib_err_t -ib_table_name_check( -/*================*/ - const char* name); /*!< in: table name to check */ - -/*****************************************************************//** -Return isolation configuration set by "innodb_api_trx_level" -@return trx isolation level*/ - -ib_trx_state_t -ib_cfg_trx_level(); -/*==============*/ - -/*****************************************************************//** -Return configure value for background commit interval (in seconds) -@return background commit interval (in seconds) */ - -ib_ulint_t -ib_cfg_bk_commit_interval(); -/*=======================*/ - -/*****************************************************************//** -Get a trx start time. -@return trx start_time */ - -ib_u64_t -ib_trx_get_start_time( -/*==================*/ - ib_trx_t ib_trx); /*!< in: transaction */ - -#endif /* api0api_h */ diff --git a/storage/xtradb/include/api0misc.h b/storage/xtradb/include/api0misc.h deleted file mode 100644 index fcd748390d1..00000000000 --- a/storage/xtradb/include/api0misc.h +++ /dev/null @@ -1,78 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/api0misc.h -InnoDB Native API - -3/20/2011 Jimmy Yang extracted from Embedded InnoDB -2008 Created by Sunny Bains -*******************************************************/ - -#ifndef api0misc_h -#define api0misc_h - -#include "univ.i" -#include "os0file.h" -#include "que0que.h" -#include "trx0trx.h" - -/** Whether binlog is enabled for applications using InnoDB APIs */ -extern my_bool ib_binlog_enabled; - -/** Whether MySQL MDL is enabled for applications using InnoDB APIs */ -extern my_bool ib_mdl_enabled; - -/** Whether InnoDB row lock is disabled for applications using InnoDB APIs */ -extern my_bool ib_disable_row_lock; - -/** configure value for transaction isolation level */ -extern ulong ib_trx_level_setting; - -/** configure value for background commit interval (in seconds) */ -extern ulong ib_bk_commit_interval; - -/******************************************************************** -Handles user errors and lock waits detected by the database engine. -@return TRUE if it was a lock wait and we should continue running -the query thread */ -UNIV_INTERN -ibool -ib_handle_errors( -/*=============*/ - dberr_t* new_err, /*!< out: possible new error - encountered in lock wait, or if - no new error, the value of - trx->error_state at the entry of this - function */ - trx_t* trx, /*!< in: transaction */ - que_thr_t* thr, /*!< in: query thread */ - trx_savept_t* savept); /*!< in: savepoint or NULL */ - -/************************************************************************* -Sets a lock on a table. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -ib_trx_lock_table_with_retry( -/*=========================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_table_t* table, /*!< in: table to lock */ - enum lock_mode mode); /*!< in: lock mode */ - -#endif /* api0misc_h */ diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h deleted file mode 100644 index 9ab62f7739f..00000000000 --- a/storage/xtradb/include/btr0btr.h +++ /dev/null @@ -1,883 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2015, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/btr0btr.h -The B-tree - -Created 6/2/1994 Heikki Tuuri -*******************************************************/ - -#ifndef btr0btr_h -#define btr0btr_h - -#include "univ.i" - -#include "dict0dict.h" -#include "data0data.h" -#include "page0cur.h" -#include "mtr0mtr.h" -#include "btr0types.h" - -#ifndef UNIV_HOTBACKUP -/** Maximum record size which can be stored on a page, without using the -special big record storage structure */ -#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200) - -/** @brief Maximum depth of a B-tree in InnoDB. - -Note that this isn't a maximum as such; none of the tree operations -avoid producing trees bigger than this. It is instead a "max depth -that other code must work with", useful for e.g. fixed-size arrays -that must store some information about each level in a tree. In other -words: if a B-tree with bigger depth than this is encountered, it is -not acceptable for it to lead to mysterious memory corruption, but it -is acceptable for the program to die with a clear assert failure. */ -#define BTR_MAX_LEVELS 100 - -/** Latching modes for btr_cur_search_to_nth_level(). */ -enum btr_latch_mode { - /** Search a record on a leaf page and S-latch it. */ - BTR_SEARCH_LEAF = RW_S_LATCH, - /** (Prepare to) modify a record on a leaf page and X-latch it. */ - BTR_MODIFY_LEAF = RW_X_LATCH, - /** Obtain no latches. */ - BTR_NO_LATCHES = RW_NO_LATCH, - /** Start modifying the entire B-tree. */ - BTR_MODIFY_TREE = 33, - /** Continue modifying the entire B-tree. */ - BTR_CONT_MODIFY_TREE = 34, - /** Search the previous record. */ - BTR_SEARCH_PREV = 35, - /** Modify the previous record. */ - BTR_MODIFY_PREV = 36, - /** Weaker BTR_MODIFY_TREE that does not lock the leaf page siblings, - used for fake changes. */ - BTR_SEARCH_TREE = 37 /* BTR_MODIFY_TREE | 4 */ -}; - -/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */ - -/** If this is ORed to btr_latch_mode, it means that the search tuple -will be inserted to the index, at the searched position. -When the record is not in the buffer pool, try to use the insert buffer. */ -#define BTR_INSERT 512 - -/** This flag ORed to btr_latch_mode says that we do the search in query -optimization */ -#define BTR_ESTIMATE 1024 - -/** This flag ORed to BTR_INSERT says that we can ignore possible -UNIQUE definition on secondary indexes when we decide if we can use -the insert buffer to speed up inserts */ -#define BTR_IGNORE_SEC_UNIQUE 2048 - -/** Try to delete mark the record at the searched position using the -insert/delete buffer when the record is not in the buffer pool. */ -#define BTR_DELETE_MARK 4096 - -/** Try to purge the record at the searched position using the insert/delete -buffer when the record is not in the buffer pool. */ -#define BTR_DELETE 8192 - -/** In the case of BTR_SEARCH_LEAF or BTR_MODIFY_LEAF, the caller is -already holding an S latch on the index tree */ -#define BTR_ALREADY_S_LATCHED 16384 - -#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \ - ((latch_mode) & ~(BTR_INSERT \ - | BTR_DELETE_MARK \ - | BTR_DELETE \ - | BTR_ESTIMATE \ - | BTR_IGNORE_SEC_UNIQUE \ - | BTR_ALREADY_S_LATCHED)) -#endif /* UNIV_HOTBACKUP */ - -/**************************************************************//** -Report that an index page is corrupted. */ -UNIV_INTERN -void -btr_corruption_report( -/*==================*/ - const buf_block_t* block, /*!< in: corrupted block */ - const dict_index_t* index) /*!< in: index tree */ - UNIV_COLD MY_ATTRIBUTE((nonnull)); - -/** Assert that a B-tree page is not corrupted. -@param block buffer block containing a B-tree page -@param index the B-tree index */ -#define btr_assert_not_corrupted(block, index) \ - if ((ibool) !!page_is_comp(buf_block_get_frame(block)) \ - != dict_table_is_comp((index)->table)) { \ - btr_corruption_report(block, index); \ - ut_error; \ - } - -#ifndef UNIV_HOTBACKUP -#ifdef UNIV_BLOB_DEBUG -# include "ut0rbt.h" -/** An index->blobs entry for keeping track of off-page column references */ -struct btr_blob_dbg_t -{ - unsigned blob_page_no:32; /*!< first BLOB page number */ - unsigned ref_page_no:32; /*!< referring page number */ - unsigned ref_heap_no:16; /*!< referring heap number */ - unsigned ref_field_no:10; /*!< referring field number */ - unsigned owner:1; /*!< TRUE if BLOB owner */ - unsigned always_owner:1; /*!< TRUE if always - has been the BLOB owner; - reset to TRUE on B-tree - page splits and merges */ - unsigned del:1; /*!< TRUE if currently - delete-marked */ -}; - -/**************************************************************//** -Add a reference to an off-page column to the index->blobs map. */ -UNIV_INTERN -void -btr_blob_dbg_add_blob( -/*==================*/ - const rec_t* rec, /*!< in: clustered index record */ - ulint field_no, /*!< in: number of off-page column */ - ulint page_no, /*!< in: start page of the column */ - dict_index_t* index, /*!< in/out: index tree */ - const char* ctx) /*!< in: context (for logging) */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Display the references to off-page columns. -This function is to be called from a debugger, -for example when a breakpoint on ut_dbg_assertion_failed is hit. */ -UNIV_INTERN -void -btr_blob_dbg_print( -/*===============*/ - const dict_index_t* index) /*!< in: index tree */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Check that there are no references to off-page columns from or to -the given page. Invoked when freeing or clearing a page. -@return TRUE when no orphan references exist */ -UNIV_INTERN -ibool -btr_blob_dbg_is_empty( -/*==================*/ - dict_index_t* index, /*!< in: index */ - ulint page_no) /*!< in: page number */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/**************************************************************//** -Modify the 'deleted' flag of a record. */ -UNIV_INTERN -void -btr_blob_dbg_set_deleted_flag( -/*==========================*/ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in/out: index */ - const ulint* offsets,/*!< in: rec_get_offs(rec, index) */ - ibool del) /*!< in: TRUE=deleted, FALSE=exists */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Change the ownership of an off-page column. */ -UNIV_INTERN -void -btr_blob_dbg_owner( -/*===============*/ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in/out: index */ - const ulint* offsets,/*!< in: rec_get_offs(rec, index) */ - ulint i, /*!< in: ith field in rec */ - ibool own) /*!< in: TRUE=owned, FALSE=disowned */ - MY_ATTRIBUTE((nonnull)); -/** Assert that there are no BLOB references to or from the given page. */ -# define btr_blob_dbg_assert_empty(index, page_no) \ - ut_a(btr_blob_dbg_is_empty(index, page_no)) -#else /* UNIV_BLOB_DEBUG */ -# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx) ((void) 0) -# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0) -# define btr_blob_dbg_owner(rec, index, offsets, i, val) ((void) 0) -# define btr_blob_dbg_assert_empty(index, page_no) ((void) 0) -#endif /* UNIV_BLOB_DEBUG */ - -/**************************************************************//** -Gets the root node of a tree and x-latches it. -@return root page, x-latched */ -UNIV_INTERN -page_t* -btr_root_get( -/*=========*/ - const dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); - -/**************************************************************//** -Checks and adjusts the root node of a tree during IMPORT TABLESPACE. -@return error code, or DB_SUCCESS */ -UNIV_INTERN -dberr_t -btr_root_adjust_on_import( -/*======================*/ - const dict_index_t* index) /*!< in: index tree */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/**************************************************************//** -Gets the height of the B-tree (the level of the root, when the leaf -level is assumed to be 0). The caller must hold an S or X latch on -the index. -@return tree height (level of the root) */ -UNIV_INTERN -ulint -btr_height_get( -/*===========*/ - dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**************************************************************//** -Gets a buffer page and declares its latching order level. */ -UNIV_INLINE -buf_block_t* -btr_block_get_func( -/*===============*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - dict_index_t* index, /*!< in: index tree, may be NULL - if it is not an insert buffer tree */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -# ifdef UNIV_SYNC_DEBUG -/** Gets a buffer page and declares its latching order level. -@param space tablespace identifier -@param zip_size compressed page size in bytes or 0 for uncompressed pages -@param page_no page number -@param mode latch mode -@param index index tree, may be NULL if not the insert buffer tree -@param mtr mini-transaction handle -@return the block descriptor */ -# define btr_block_get(space,zip_size,page_no,mode,index,mtr) \ - btr_block_get_func(space,zip_size,page_no,mode, \ - __FILE__,__LINE__,index,mtr) -# else /* UNIV_SYNC_DEBUG */ -/** Gets a buffer page and declares its latching order level. -@param space tablespace identifier -@param zip_size compressed page size in bytes or 0 for uncompressed pages -@param page_no page number -@param mode latch mode -@param idx index tree, may be NULL if not the insert buffer tree -@param mtr mini-transaction handle -@return the block descriptor */ -# define btr_block_get(space,zip_size,page_no,mode,idx,mtr) \ - btr_block_get_func(space,zip_size,page_no,mode, \ - __FILE__,__LINE__,idx,mtr) -# endif /* UNIV_SYNC_DEBUG */ -/** Gets a buffer page and declares its latching order level. -@param space tablespace identifier -@param zip_size compressed page size in bytes or 0 for uncompressed pages -@param page_no page number -@param mode latch mode -@param idx index tree, may be NULL if not the insert buffer tree -@param mtr mini-transaction handle -@return the uncompressed page frame */ -UNIV_INLINE -page_t* -btr_page_get( -/*=========*/ - ulint space, - ulint zip_size, - ulint root_page_no, - ulint mode, - dict_index_t* index, - mtr_t* mtr) - MY_ATTRIBUTE((warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ -/**************************************************************//** -Gets the index id field of a page. -@return index id */ -UNIV_INLINE -index_id_t -btr_page_get_index_id( -/*==================*/ - const page_t* page) /*!< in: index page */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Gets the node level field in an index page. -@return level, leaf level == 0 */ -UNIV_INLINE -ulint -btr_page_get_level_low( -/*===================*/ - const page_t* page) /*!< in: index page */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -#define btr_page_get_level(page, mtr) btr_page_get_level_low(page) -/********************************************************//** -Gets the next index page number. -@return next page number */ -UNIV_INLINE -ulint -btr_page_get_next( -/*==============*/ - const page_t* page, /*!< in: index page */ - mtr_t* mtr) /*!< in: mini-transaction handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************//** -Gets the previous index page number. -@return prev page number */ -UNIV_INLINE -ulint -btr_page_get_prev( -/*==============*/ - const page_t* page, /*!< in: index page */ - mtr_t* mtr) /*!< in: mini-transaction handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************//** -Gets pointer to the previous user record in the tree. It is assumed -that the caller has appropriate latches on the page and its neighbor. -@return previous user record, NULL if there is none */ -UNIV_INTERN -rec_t* -btr_get_prev_user_rec( -/*==================*/ - rec_t* rec, /*!< in: record on leaf level */ - mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if - needed, also to the previous page */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************//** -Gets pointer to the next user record in the tree. It is assumed -that the caller has appropriate latches on the page and its neighbor. -@return next user record, NULL if there is none */ -UNIV_INTERN -rec_t* -btr_get_next_user_rec( -/*==================*/ - rec_t* rec, /*!< in: record on leaf level */ - mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if - needed, also to the next page */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**************************************************************//** -Releases the latch on a leaf page and bufferunfixes it. */ -UNIV_INLINE -void -btr_leaf_page_release( -/*==================*/ - buf_block_t* block, /*!< in: buffer block */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or - BTR_MODIFY_LEAF */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Gets the child node file address in a node pointer. -NOTE: the offsets array must contain all offsets for the record since -we read the last field according to offsets and assume that it contains -the child page number. In other words offsets must have been retrieved -with rec_get_offsets(n_fields=ULINT_UNDEFINED). -@return child node address */ -UNIV_INLINE -ulint -btr_node_ptr_get_child_page_no( -/*===========================*/ - const rec_t* rec, /*!< in: node pointer record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/************************************************************//** -Creates the root node for a new index tree. -@return page number of the created root, FIL_NULL if did not succeed */ -UNIV_INTERN -ulint -btr_create( -/*=======*/ - ulint type, /*!< in: type of the index */ - ulint space, /*!< in: space where created */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - index_id_t index_id,/*!< in: index id */ - dict_index_t* index, /*!< in: index */ - mtr_t* mtr) /*!< in: mini-transaction handle */ - MY_ATTRIBUTE((nonnull)); -/************************************************************//** -Frees a B-tree except the root page, which MUST be freed after this -by calling btr_free_root. */ -UNIV_INTERN -void -btr_free_but_not_root( -/*==================*/ - ulint space, /*!< in: space where created */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint root_page_no); /*!< in: root page number */ -/************************************************************//** -Frees the B-tree root page. Other tree MUST already have been freed. */ -UNIV_INTERN -void -btr_free_root( -/*==========*/ - ulint space, /*!< in: space where created */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint root_page_no, /*!< in: root page number */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Makes tree one level higher by splitting the root, and inserts -the tuple. It is assumed that mtr contains an x-latch on the tree. -NOTE that the operation of this function must always succeed, -we cannot reverse it: therefore enough free disk space must be -guaranteed to be available before this function is called. -@return inserted record */ -UNIV_INTERN -rec_t* -btr_root_raise_and_insert( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor at which to insert: must be - on the root page; when the function returns, - the cursor is positioned on the predecessor - of the inserted record */ - ulint** offsets,/*!< out: offsets on inserted record */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap - that can be emptied, or NULL */ - const dtuple_t* tuple, /*!< in: tuple to insert */ - ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ - __attribute__((nonnull(2,3,4,7), warn_unused_result)); -/*************************************************************//** -Reorganizes an index page. - -IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. This has to -be done either within the same mini-transaction, or by invoking -ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, -IBUF_BITMAP_FREE is unaffected by reorganization. - -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -UNIV_INTERN -bool -btr_page_reorganize_low( -/*====================*/ - bool recovery,/*!< in: true if called in recovery: - locks should not be updated, i.e., - there cannot exist locks on the - page, and a hash index should not be - dropped: it cannot exist */ - ulint z_level,/*!< in: compression level to be used - if dealing with compressed page */ - page_cur_t* cursor, /*!< in/out: page cursor */ - dict_index_t* index, /*!< in: the index tree of the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************//** -Reorganizes an index page. - -IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. This has to -be done either within the same mini-transaction, or by invoking -ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, -IBUF_BITMAP_FREE is unaffected by reorganization. - -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -UNIV_INTERN -bool -btr_page_reorganize( -/*================*/ - page_cur_t* cursor, /*!< in/out: page cursor */ - dict_index_t* index, /*!< in: the index tree of the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Decides if the page should be split at the convergence point of -inserts converging to left. -@return TRUE if split recommended */ -UNIV_INTERN -ibool -btr_page_get_split_rec_to_left( -/*===========================*/ - btr_cur_t* cursor, /*!< in: cursor at which to insert */ - rec_t** split_rec)/*!< out: if split recommended, - the first record on upper half page, - or NULL if tuple should be first */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************//** -Decides if the page should be split at the convergence point of -inserts converging to right. -@return TRUE if split recommended */ -UNIV_INTERN -ibool -btr_page_get_split_rec_to_right( -/*============================*/ - btr_cur_t* cursor, /*!< in: cursor at which to insert */ - rec_t** split_rec)/*!< out: if split recommended, - the first record on upper half page, - or NULL if tuple should be first */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************//** -Splits an index page to halves and inserts the tuple. It is assumed -that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is -released within this function! NOTE that the operation of this -function must always succeed, we cannot reverse it: therefore enough -free disk space (2 pages) must be guaranteed to be available before -this function is called. - -@return inserted record */ -UNIV_INTERN -rec_t* -btr_page_split_and_insert( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor at which to insert; when the - function returns, the cursor is positioned - on the predecessor of the inserted record */ - ulint** offsets,/*!< out: offsets on inserted record */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap - that can be emptied, or NULL */ - const dtuple_t* tuple, /*!< in: tuple to insert */ - ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mtr */ - __attribute__((nonnull(2,3,4,7), warn_unused_result)); -/*******************************************************//** -Inserts a data tuple to a tree on a non-leaf level. It is assumed -that mtr holds an x-latch on the tree. */ -UNIV_INTERN -void -btr_insert_on_non_leaf_level_func( -/*==============================*/ - ulint flags, /*!< in: undo logging and locking flags */ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: level, must be > 0 */ - dtuple_t* tuple, /*!< in: the record to be inserted */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -# define btr_insert_on_non_leaf_level(f,i,l,t,m) \ - btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m) -#endif /* !UNIV_HOTBACKUP */ -/****************************************************************//** -Sets a record as the predefined minimum record. */ -UNIV_INTERN -void -btr_set_min_rec_mark( -/*=================*/ - rec_t* rec, /*!< in/out: record */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Deletes on the upper level the node pointer to a page. */ -UNIV_INTERN -void -btr_node_ptr_delete( -/*================*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: page whose node pointer is deleted */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -#ifdef UNIV_DEBUG -/************************************************************//** -Checks that the node pointer to a page is appropriate. -@return TRUE */ -UNIV_INTERN -ibool -btr_check_node_ptr( -/*===============*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: index page */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* UNIV_DEBUG */ -/*************************************************************//** -Tries to merge the page first to the left immediate brother if such a -brother exists, and the node pointers to the current page and to the -brother reside on the same page. If the left brother does not satisfy these -conditions, looks at the right brother. If the page is the only one on that -level lifts the records of the page to the father page, thus reducing the -tree height. It is assumed that mtr holds an x-latch on the tree and on the -page. If cursor is on the leaf level, mtr must also hold x-latches to -the brothers, if they exist. -@return TRUE on success */ -UNIV_INTERN -ibool -btr_compress( -/*=========*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Discards a page from a B-tree. This is used to remove the last record from -a B-tree page: the whole page must be removed at the same time. This cannot -be used for the root page, which is allowed to be empty. */ -UNIV_INTERN -void -btr_discard_page( -/*=============*/ - btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on - the root page */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/****************************************************************//** -Parses the redo log record for setting an index record as the predefined -minimum record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_parse_set_min_rec_mark( -/*=======================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - ulint comp, /*!< in: nonzero=compact page format */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ - MY_ATTRIBUTE((nonnull(1,2), warn_unused_result)); -/***********************************************************//** -Parses a redo log record of reorganizing a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_parse_page_reorganize( -/*======================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - dict_index_t* index, /*!< in: record descriptor */ - bool compressed,/*!< in: true if compressed page */ - buf_block_t* block, /*!< in: page to be reorganized, or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ - MY_ATTRIBUTE((nonnull(1,2,3), warn_unused_result)); -#ifndef UNIV_HOTBACKUP -/**************************************************************//** -Gets the number of pages in a B-tree. -@return number of pages, or ULINT_UNDEFINED if the index is unavailable */ -UNIV_INTERN -ulint -btr_get_size( -/*=========*/ - dict_index_t* index, /*!< in: index */ - ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - mtr_t* mtr) /*!< in/out: mini-transaction where index - is s-latched */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**************************************************************//** -Gets the number of reserved and used pages in a B-tree. -@return number of pages reserved, or ULINT_UNDEFINED if the index -is unavailable */ -UNIV_INTERN -ulint -btr_get_size_and_reserved( -/*======================*/ - dict_index_t* index, /*!< in: index */ - ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ - ulint* used, /*!< out: number of pages used (<= reserved) */ - mtr_t* mtr) /*!< in/out: mini-transaction where index - is s-latched */ - __attribute__((nonnull)); - -/**************************************************************//** -Allocates a new file page to be used in an index tree. NOTE: we assume -that the caller has made the reservation for free extents! -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -UNIV_INTERN -buf_block_t* -btr_page_alloc( -/*===========*/ - dict_index_t* index, /*!< in: index tree */ - ulint hint_page_no, /*!< in: hint of a good page */ - byte file_direction, /*!< in: direction where a possible - page split is made */ - ulint level, /*!< in: level where the page is placed - in the tree */ - mtr_t* mtr, /*!< in/out: mini-transaction - for the allocation */ - mtr_t* init_mtr) /*!< in/out: mini-transaction - for x-latching and initializing - the page */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**************************************************************//** -Frees a file page used in an index tree. NOTE: cannot free field external -storage pages because the page must contain info on its level. */ -UNIV_INTERN -void -btr_page_free( -/*==========*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Frees a file page used in an index tree. Can be used also to BLOB -external storage pages, because the page level 0 can be given as an -argument. */ -UNIV_INTERN -void -btr_page_free_low( -/*==============*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: block to be freed, x-latched */ - ulint level, /*!< in: page level */ - bool blob, /*!< in: blob page */ - mtr_t* mtr) /*!< in: mtr */ - __attribute__((nonnull)); -/*************************************************************//** -Reorganizes an index page. - -IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. This has to -be done either within the same mini-transaction, or by invoking -ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages, -IBUF_BITMAP_FREE is unaffected by reorganization. - -@retval true if the operation was successful -@retval false if it is a compressed page, and recompression failed */ -UNIV_INTERN -bool -btr_page_reorganize_block( -/*======================*/ - bool recovery,/*!< in: true if called in recovery: - locks should not be updated, i.e., - there cannot exist locks on the - page, and a hash index should not be - dropped: it cannot exist */ - ulint z_level,/*!< in: compression level to be used - if dealing with compressed page */ - buf_block_t* block, /*!< in/out: B-tree page */ - dict_index_t* index, /*!< in: the index tree of the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - __attribute__((nonnull)); - -#ifdef UNIV_BTR_PRINT -/*************************************************************//** -Prints size info of a B-tree. */ -UNIV_INTERN -void -btr_print_size( -/*===========*/ - dict_index_t* index) /*!< in: index tree */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Prints directories and other info of all nodes in the index. */ -UNIV_INTERN -void -btr_print_index( -/*============*/ - dict_index_t* index, /*!< in: index */ - ulint width) /*!< in: print this many entries from start - and end */ - MY_ATTRIBUTE((nonnull)); -#endif /* UNIV_BTR_PRINT */ -/************************************************************//** -Checks the size and number of fields in a record based on the definition of -the index. -@return TRUE if ok */ -UNIV_INTERN -ibool -btr_index_rec_validate( -/*===================*/ - const rec_t* rec, /*!< in: index record */ - const dict_index_t* index, /*!< in: index */ - ibool dump_on_error) /*!< in: TRUE if the function - should print hex dump of record - and page on error */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**************************************************************//** -Checks the consistency of an index tree. -@return DB_SUCCESS if ok, error code if not */ -UNIV_INTERN -dberr_t -btr_validate_index( -/*===============*/ - dict_index_t* index, /*!< in: index */ - const trx_t* trx) /*!< in: transaction or 0 */ - MY_ATTRIBUTE((nonnull(1), warn_unused_result)); - -#ifdef UNIV_SYNC_DEBUG -/*************************************************************//** -Removes a page from the level list of pages. -@param space in: space where removed -@param zip_size in: compressed page size in bytes, or 0 for uncompressed -@param page in/out: page to remove -@param index in: index tree -@param mtr in/out: mini-transaction */ -# define btr_level_list_remove(space,zip_size,page,index,mtr) \ - btr_level_list_remove_func(space,zip_size,page,index,mtr) -#else /* UNIV_SYNC_DEBUG */ -/*************************************************************//** -Removes a page from the level list of pages. -@param space in: space where removed -@param zip_size in: compressed page size in bytes, or 0 for uncompressed -@param page in/out: page to remove -@param index in: index tree -@param mtr in/out: mini-transaction */ -# define btr_level_list_remove(space,zip_size,page,index,mtr) \ - btr_level_list_remove_func(space,zip_size,page,index,mtr) -#endif /* UNIV_SYNC_DEBUG */ - -/*************************************************************//** -Removes a page from the level list of pages. */ -UNIV_INTERN -void -btr_level_list_remove_func( -/*=======================*/ - ulint space, /*!< in: space where removed */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - page_t* page, /*!< in/out: page to remove */ - dict_index_t* index, /*!< in: index tree */ - mtr_t* mtr); /*!< in/out: mini-transaction */ - -/*************************************************************//** -If page is the only on its level, this function moves its records to the -father page, thus reducing the tree height. -@return father block */ -UNIV_INTERN -buf_block_t* -btr_lift_page_up( -/*=============*/ - dict_index_t* index, /*!< in: index tree */ - buf_block_t* block, /*!< in: page which is the only on its level; - must not be empty: use - btr_discard_only_page_on_level if the last - record from the page should be removed */ - mtr_t* mtr) /*!< in: mtr */ - __attribute__((nonnull)); - -#define BTR_N_LEAF_PAGES 1 -#define BTR_TOTAL_SIZE 2 -#endif /* !UNIV_HOTBACKUP */ - -#ifndef UNIV_NONINL -#include "btr0btr.ic" -#endif - -/**************************************************************** -Global variable controlling if scrubbing should be performed */ -extern my_bool srv_immediate_scrub_data_uncompressed; - -#endif diff --git a/storage/xtradb/include/btr0btr.ic b/storage/xtradb/include/btr0btr.ic deleted file mode 100644 index 0f5f025d6a3..00000000000 --- a/storage/xtradb/include/btr0btr.ic +++ /dev/null @@ -1,335 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/btr0btr.ic -The B-tree - -Created 6/2/1994 Heikki Tuuri -*******************************************************/ - -#include "mach0data.h" -#ifndef UNIV_HOTBACKUP -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "page0zip.h" - -#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level - (not really a hard limit). - Used in debug assertions - in btr_page_set_level and - btr_page_get_level_low */ - -/**************************************************************//** -Gets a buffer page and declares its latching order level. */ -UNIV_INLINE -buf_block_t* -btr_block_get_func( -/*===============*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - ulint mode, /*!< in: latch mode */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - dict_index_t* index, /*!< in: index tree, may be NULL - if it is not an insert buffer tree */ - mtr_t* mtr) /*!< in/out: mtr */ -{ - buf_block_t* block; - dberr_t err; - - block = buf_page_get_gen(space, zip_size, page_no, mode, - NULL, BUF_GET, file, line, mtr, &err); - - if (err == DB_DECRYPTION_FAILED) { - if (index && index->table) { - index->table->file_unreadable = true; - } - } - - if (block) { - if (mode != RW_NO_LATCH) { - - buf_block_dbg_add_level( - block, index != NULL && dict_index_is_ibuf(index) - ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE); - } - } - - return(block); -} - -/**************************************************************//** -Sets the index id field of a page. */ -UNIV_INLINE -void -btr_page_set_index_id( -/*==================*/ - page_t* page, /*!< in: page to be created */ - page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed - part will be updated, or NULL */ - index_id_t id, /*!< in: index id */ - mtr_t* mtr) /*!< in: mtr */ -{ - if (page_zip) { - mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id); - page_zip_write_header(page_zip, - page + (PAGE_HEADER + PAGE_INDEX_ID), - 8, mtr); - } else { - mlog_write_ull(page + (PAGE_HEADER + PAGE_INDEX_ID), id, mtr); - } -} - -/** Gets a buffer page and declares its latching order level. -@param space tablespace identifier -@param zip_size compressed page size in bytes or 0 for uncompressed pages -@param page_no page number -@param mode latch mode -@param idx index tree, may be NULL if not the insert buffer tree -@param mtr mini-transaction handle -@return the uncompressed page frame */ -UNIV_INLINE -page_t* -btr_page_get( -/*=========*/ - ulint space, - ulint zip_size, - ulint root_page_no, - ulint mode, - dict_index_t* index, - mtr_t* mtr) -{ - buf_block_t* block=NULL; - buf_frame_t* frame=NULL; - - block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr); - - if (block) { - frame = buf_block_get_frame(block); - } - - return ((page_t*)frame); -} - -#endif /* !UNIV_HOTBACKUP */ - -/**************************************************************//** -Gets the index id field of a page. -@return index id */ -UNIV_INLINE -index_id_t -btr_page_get_index_id( -/*==================*/ - const page_t* page) /*!< in: index page */ -{ - return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Gets the node level field in an index page. -@return level, leaf level == 0 */ -UNIV_INLINE -ulint -btr_page_get_level_low( -/*===================*/ - const page_t* page) /*!< in: index page */ -{ - ulint level; - - ut_ad(page); - - level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL); - - ut_ad(level <= BTR_MAX_NODE_LEVEL); - - return(level); -} - -/********************************************************//** -Sets the node level field in an index page. */ -UNIV_INLINE -void -btr_page_set_level( -/*===============*/ - page_t* page, /*!< in: index page */ - page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed - part will be updated, or NULL */ - ulint level, /*!< in: level, leaf level == 0 */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ut_ad(page && mtr); - ut_ad(level <= BTR_MAX_NODE_LEVEL); - - if (page_zip) { - mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level); - page_zip_write_header(page_zip, - page + (PAGE_HEADER + PAGE_LEVEL), - 2, mtr); - } else { - mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level, - MLOG_2BYTES, mtr); - } -} - -/********************************************************//** -Gets the next index page number. -@return next page number */ -UNIV_INLINE -ulint -btr_page_get_next( -/*==============*/ - const page_t* page, /*!< in: index page */ - mtr_t* mtr MY_ATTRIBUTE((unused))) - /*!< in: mini-transaction handle */ -{ - ut_ad(page != NULL); - ut_ad(mtr != NULL); -#ifndef UNIV_INNOCHECKSUM - ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX)); -#endif /* UNIV_INNOCHECKSUM */ - return(mach_read_from_4(page + FIL_PAGE_NEXT)); -} - -/********************************************************//** -Sets the next index page field. */ -UNIV_INLINE -void -btr_page_set_next( -/*==============*/ - page_t* page, /*!< in: index page */ - page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed - part will be updated, or NULL */ - ulint next, /*!< in: next page number */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ut_ad(page != NULL); - ut_ad(mtr != NULL); - - if (page_zip) { - mach_write_to_4(page + FIL_PAGE_NEXT, next); - page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr); - } else { - mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr); - } -} - -/********************************************************//** -Gets the previous index page number. -@return prev page number */ -UNIV_INLINE -ulint -btr_page_get_prev( -/*==============*/ - const page_t* page, /*!< in: index page */ - mtr_t* mtr MY_ATTRIBUTE((unused))) /*!< in: mini-transaction handle */ -{ - ut_ad(page != NULL); - ut_ad(mtr != NULL); - - return(mach_read_from_4(page + FIL_PAGE_PREV)); -} - -/********************************************************//** -Sets the previous index page field. */ -UNIV_INLINE -void -btr_page_set_prev( -/*==============*/ - page_t* page, /*!< in: index page */ - page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed - part will be updated, or NULL */ - ulint prev, /*!< in: previous page number */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ut_ad(page != NULL); - ut_ad(mtr != NULL); - - if (page_zip) { - mach_write_to_4(page + FIL_PAGE_PREV, prev); - page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr); - } else { - mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr); - } -} - -/**************************************************************//** -Gets the child node file address in a node pointer. -NOTE: the offsets array must contain all offsets for the record since -we read the last field according to offsets and assume that it contains -the child page number. In other words offsets must have been retrieved -with rec_get_offsets(n_fields=ULINT_UNDEFINED). -@return child node address */ -UNIV_INLINE -ulint -btr_node_ptr_get_child_page_no( -/*===========================*/ - const rec_t* rec, /*!< in: node pointer record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - const byte* field; - ulint len; - ulint page_no; - - ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); - - /* The child address is in the last field */ - field = rec_get_nth_field(rec, offsets, - rec_offs_n_fields(offsets) - 1, &len); - - ut_ad(len == 4); - - page_no = mach_read_from_4(field); - - if (page_no == 0) { - fprintf(stderr, - "InnoDB: a nonsensical page number 0" - " in a node ptr record at offset %lu\n", - (ulong) page_offset(rec)); - buf_page_print(page_align(rec), 0, 0); - ut_ad(0); - } - - return(page_no); -} - -/**************************************************************//** -Releases the latches on a leaf page and bufferunfixes it. */ -UNIV_INLINE -void -btr_leaf_page_release( -/*==================*/ - buf_block_t* block, /*!< in: buffer block */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or - BTR_MODIFY_LEAF */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); - ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)); - - mtr_memo_release(mtr, block, - latch_mode == BTR_SEARCH_LEAF - ? MTR_MEMO_PAGE_S_FIX - : MTR_MEMO_PAGE_X_FIX); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h deleted file mode 100644 index e478b33bf8e..00000000000 --- a/storage/xtradb/include/btr0cur.h +++ /dev/null @@ -1,946 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/btr0cur.h -The index tree cursor - -Created 10/16/1994 Heikki Tuuri -*******************************************************/ - -#ifndef btr0cur_h -#define btr0cur_h - -#include "univ.i" -#include "dict0dict.h" -#include "page0cur.h" -#include "btr0types.h" - -/** Mode flags for btr_cur operations; these can be ORed */ -enum { - /** do no undo logging */ - BTR_NO_UNDO_LOG_FLAG = 1, - /** do no record lock checking */ - BTR_NO_LOCKING_FLAG = 2, - /** sys fields will be found in the update vector or inserted - entry */ - BTR_KEEP_SYS_FLAG = 4, - /** btr_cur_pessimistic_update() must keep cursor position - when moving columns to big_rec */ - BTR_KEEP_POS_FLAG = 8, - /** the caller is creating the index or wants to bypass the - index->info.online creation log */ - BTR_CREATE_FLAG = 16, - /** the caller of btr_cur_optimistic_update() or - btr_cur_update_in_place() will take care of - updating IBUF_BITMAP_FREE */ - BTR_KEEP_IBUF_BITMAP = 32 -}; - -#ifndef UNIV_HOTBACKUP -#include "que0types.h" -#include "row0types.h" -#include "ha0ha.h" - -#define BTR_CUR_ADAPT -#define BTR_CUR_HASH_ADAPT - -#ifdef UNIV_DEBUG -/*********************************************************//** -Returns the page cursor component of a tree cursor. -@return pointer to page cursor component */ -UNIV_INLINE -page_cur_t* -btr_cur_get_page_cur( -/*=================*/ - const btr_cur_t* cursor);/*!< in: tree cursor */ -/*********************************************************//** -Returns the buffer block on which the tree cursor is positioned. -@return pointer to buffer block */ -UNIV_INLINE -buf_block_t* -btr_cur_get_block( -/*==============*/ - const btr_cur_t* cursor);/*!< in: tree cursor */ -/*********************************************************//** -Returns the record pointer of a tree cursor. -@return pointer to record */ -UNIV_INLINE -rec_t* -btr_cur_get_rec( -/*============*/ - const btr_cur_t* cursor);/*!< in: tree cursor */ -#else /* UNIV_DEBUG */ -# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur) -# define btr_cur_get_block(cursor) ((cursor)->page_cur.block) -# define btr_cur_get_rec(cursor) ((cursor)->page_cur.rec) -#endif /* UNIV_DEBUG */ -/*********************************************************//** -Returns the compressed page on which the tree cursor is positioned. -@return pointer to compressed page, or NULL if the page is not compressed */ -UNIV_INLINE -page_zip_des_t* -btr_cur_get_page_zip( -/*=================*/ - btr_cur_t* cursor);/*!< in: tree cursor */ -/*********************************************************//** -Invalidates a tree cursor by setting record pointer to NULL. */ -UNIV_INLINE -void -btr_cur_invalidate( -/*===============*/ - btr_cur_t* cursor);/*!< in: tree cursor */ -/*********************************************************//** -Returns the page of a tree cursor. -@return pointer to page */ -UNIV_INLINE -page_t* -btr_cur_get_page( -/*=============*/ - btr_cur_t* cursor);/*!< in: tree cursor */ -/*********************************************************//** -Returns the index of a cursor. -@param cursor b-tree cursor -@return index */ -#define btr_cur_get_index(cursor) ((cursor)->index) -/*********************************************************//** -Positions a tree cursor at a given record. */ -UNIV_INLINE -void -btr_cur_position( -/*=============*/ - dict_index_t* index, /*!< in: index */ - rec_t* rec, /*!< in: record in tree */ - buf_block_t* block, /*!< in: buffer block of rec */ - btr_cur_t* cursor);/*!< in: cursor */ -/********************************************************************//** -Searches an index tree and positions a tree cursor on a given level. -NOTE: n_fields_cmp in tuple must be set so that it cannot be compared -to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then -cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */ -UNIV_INTERN -dberr_t -btr_cur_search_to_nth_level( -/*========================*/ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: the tree level of search */ - const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in - tuple must be set so that it cannot get - compared to the node ptr page number field! */ - ulint mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be PAGE_CUR_LE, - not PAGE_CUR_GE, as the latter may end up on - the previous page of the record! Inserts - should always be made using PAGE_CUR_LE to - search the position! */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with - at most one of BTR_INSERT, BTR_DELETE_MARK, - BTR_DELETE, or BTR_ESTIMATE; - cursor->left_block is used to store a pointer - to the left neighbor page, in the cases - BTR_SEARCH_PREV and BTR_MODIFY_PREV; - NOTE that if has_search_latch - is != 0, we maybe do not have a latch set - on the cursor page, we assume - the caller uses his search latch - to protect the record! */ - btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is - s- or x-latched, but see also above! */ - ulint has_search_latch,/*!< in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, or 0 */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mtr */ -/*****************************************************************//** -Opens a cursor at either end of an index. */ -UNIV_INTERN -dberr_t -btr_cur_open_at_index_side_func( -/*============================*/ - bool from_left, /*!< in: true if open to the low end, - false if to the high end */ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: latch mode */ - btr_cur_t* cursor, /*!< in/out: cursor */ - ulint level, /*!< in: level to search for - (0=leaf) */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -#define btr_cur_open_at_index_side(f,i,l,c,lv,m) \ - btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m) -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree. */ -UNIV_INTERN -void -btr_cur_open_at_rnd_pos_func( -/*=========================*/ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /*!< in/out: B-tree cursor */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mtr */ -#define btr_cur_open_at_rnd_pos(i,l,c,m) \ - btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m) -/*************************************************************//** -Tries to perform an insert to a page in an index tree, next to cursor. -It is assumed that mtr holds an x-latch on the page. The operation does -not succeed if there is too little space on the page. If there is just -one record on the page, the insert will always succeed; this is to -prevent trying to split a page with just one record. -@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */ -UNIV_INTERN -dberr_t -btr_cur_optimistic_insert( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags: if not - zero, the parameters index and thr should be - specified */ - btr_cur_t* cursor, /*!< in: cursor on page after which to insert; - cursor stays valid */ - ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap */ - dtuple_t* entry, /*!< in/out: entry to insert */ - rec_t** rec, /*!< out: pointer to inserted record if - succeed */ - big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller */ - ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr, /*!< in/out: query thread; can be NULL if - !(~flags - & (BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG)) */ - mtr_t* mtr) /*!< in/out: mini-transaction; - if this function returns DB_SUCCESS on - a leaf page of a secondary index in a - compressed tablespace, the caller must - mtr_commit(mtr) before latching - any further pages */ - MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result)); -/*************************************************************//** -Performs an insert on a page of an index tree. It is assumed that mtr -holds an x-latch on the tree and on the cursor page. If the insert is -made on the leaf level, to avoid deadlocks, mtr must also own x-latches -to brothers of page, if those brothers exist. -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -btr_cur_pessimistic_insert( -/*=======================*/ - ulint flags, /*!< in: undo logging and locking flags: if not - zero, the parameter thr should be - specified; if no undo logging is specified, - then the caller must have reserved enough - free extents in the file space so that the - insertion will certainly succeed */ - btr_cur_t* cursor, /*!< in: cursor after which to insert; - cursor stays valid */ - ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap - that can be emptied */ - dtuple_t* entry, /*!< in/out: entry to insert */ - rec_t** rec, /*!< out: pointer to inserted record if - succeed */ - big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller */ - ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr, /*!< in/out: query thread; can be NULL if - !(~flags - & (BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG)) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result)); -/*************************************************************//** -See if there is enough place in the page modification log to log -an update-in-place. - -@retval false if out of space; IBUF_BITMAP_FREE will be reset -outside mtr if the page was recompressed -@retval true if enough place; - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is -a secondary index leaf page. This has to be done either within the -same mini-transaction, or by invoking ibuf_reset_free_bits() before -mtr_commit(mtr). */ -UNIV_INTERN -bool -btr_cur_update_alloc_zip_func( -/*==========================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - page_cur_t* cursor, /*!< in/out: B-tree page cursor */ - dict_index_t* index, /*!< in: the index corresponding to cursor */ -#ifdef UNIV_DEBUG - ulint* offsets,/*!< in/out: offsets of the cursor record */ -#endif /* UNIV_DEBUG */ - ulint length, /*!< in: size needed */ - bool create, /*!< in: true=delete-and-insert, - false=update-in-place */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - trx_t* trx) /*!< in: NULL or transaction */ - MY_ATTRIBUTE((warn_unused_result)); - -#ifdef UNIV_DEBUG -# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr,trx) \ - btr_cur_update_alloc_zip_func(page_zip,cursor,index,offsets,len,cr,mtr,trx) -#else /* UNIV_DEBUG */ -# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr,trx) \ - btr_cur_update_alloc_zip_func(page_zip,cursor,index,len,cr,mtr,trx) -#endif /* UNIV_DEBUG */ -/*************************************************************//** -Updates a record when the update causes no size changes in its fields. -@return locking or undo log related error code, or -@retval DB_SUCCESS on success -@retval DB_ZIP_OVERFLOW if there is not enough space left -on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */ -UNIV_INTERN -dberr_t -btr_cur_update_in_place( -/*====================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */ - const upd_t* update, /*!< in: update vector */ - ulint cmpl_info,/*!< in: compiler info on secondary index - updates */ - que_thr_t* thr, /*!< in: query thread */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in/out: mini-transaction; if this - is a secondary index, the caller must - mtr_commit(mtr) before latching any - further pages */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); -/***********************************************************//** -Writes a redo log record of updating a record in-place. */ -UNIV_INTERN -void -btr_cur_update_in_place_log( -/*========================*/ - ulint flags, /*!< in: flags */ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update, /*!< in: update vector */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr, /*!< in: roll ptr */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Tries to update a record on a page in an index tree. It is assumed that mtr -holds an x-latch on the page. The operation does not succeed if there is too -little space on the page or if the update would result in too empty a page, -so that tree compression is recommended. -@return error code, including -@retval DB_SUCCESS on success -@retval DB_OVERFLOW if the updated record does not fit -@retval DB_UNDERFLOW if the page would become too empty -@retval DB_ZIP_OVERFLOW if there is not enough space left -on the compressed page */ -UNIV_INTERN -dberr_t -btr_cur_optimistic_update( -/*======================*/ - ulint flags, /*!< in: undo logging and locking flags */ - btr_cur_t* cursor, /*!< in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ - mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */ - const upd_t* update, /*!< in: update vector; this must also - contain trx id and roll ptr fields */ - ulint cmpl_info,/*!< in: compiler info on secondary index - updates */ - que_thr_t* thr, /*!< in: query thread */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in/out: mini-transaction; if this - is a secondary index, the caller must - mtr_commit(mtr) before latching any - further pages */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); -/*************************************************************//** -Performs an update of a record on a page of a tree. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. If the -update is made on the leaf level, to avoid deadlocks, mtr must also -own x-latches to brothers of page, if those brothers exist. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -btr_cur_pessimistic_update( -/*=======================*/ - ulint flags, /*!< in: undo logging, locking, and rollback - flags */ - btr_cur_t* cursor, /*!< in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ - ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ - mem_heap_t** offsets_heap, - /*!< in/out: pointer to memory heap - that can be emptied */ - mem_heap_t* entry_heap, - /*!< in/out: memory heap for allocating - big_rec and the index tuple */ - big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller */ - const upd_t* update, /*!< in: update vector; this is allowed also - contain trx id and roll ptr fields, but - the values in update vector have no effect */ - ulint cmpl_info,/*!< in: compiler info on secondary index - updates */ - que_thr_t* thr, /*!< in: query thread */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in/out: mini-transaction; must be committed - before latching any further pages */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); -/***********************************************************//** -Marks a clustered index record deleted. Writes an undo log record to -undo log on this delete marking. Writes in the trx id field the id -of the deleting transaction, and in the roll ptr field pointer to the -undo log record created. -@return DB_SUCCESS, DB_LOCK_WAIT, or error number */ -UNIV_INTERN -dberr_t -btr_cur_del_mark_set_clust_rec( -/*===========================*/ - buf_block_t* block, /*!< in/out: buffer block of the record */ - rec_t* rec, /*!< in/out: record */ - dict_index_t* index, /*!< in: clustered index of the record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec) */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((warn_unused_result)); -/***********************************************************//** -Sets a secondary index record delete mark to TRUE or FALSE. -@return DB_SUCCESS, DB_LOCK_WAIT, or error number */ -UNIV_INTERN -dberr_t -btr_cur_del_mark_set_sec_rec( -/*=========================*/ - ulint flags, /*!< in: locking flag */ - btr_cur_t* cursor, /*!< in: cursor */ - ibool val, /*!< in: value to set */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((warn_unused_result)); -/*************************************************************//** -Tries to compress a page of the tree if it seems useful. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. To avoid -deadlocks, mtr must also own x-latches to brothers of page, if those -brothers exist. NOTE: it is assumed that the caller has reserved enough -free extents so that the compression will always succeed if done! -@return TRUE if compression occurred */ -UNIV_INTERN -ibool -btr_cur_compress_if_useful( -/*=======================*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; - cursor does not stay valid if compression - occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -/*******************************************************//** -Removes the record on which the tree cursor is positioned. It is assumed -that the mtr has an x-latch on the page where the cursor is positioned, -but no latch on the whole tree. -@return TRUE if success, i.e., the page did not become too empty */ -UNIV_INTERN -ibool -btr_cur_optimistic_delete_func( -/*===========================*/ - btr_cur_t* cursor, /*!< in: cursor on the record to delete; - cursor stays valid: if deletion succeeds, - on function exit it points to the successor - of the deleted record */ -# ifdef UNIV_DEBUG - ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */ -# endif /* UNIV_DEBUG */ - mtr_t* mtr) /*!< in: mtr; if this function returns - TRUE on a leaf page of a secondary - index, the mtr must be committed - before latching any further pages */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -# ifdef UNIV_DEBUG -# define btr_cur_optimistic_delete(cursor, flags, mtr) \ - btr_cur_optimistic_delete_func(cursor, flags, mtr) -# else /* UNIV_DEBUG */ -# define btr_cur_optimistic_delete(cursor, flags, mtr) \ - btr_cur_optimistic_delete_func(cursor, mtr) -# endif /* UNIV_DEBUG */ -/*************************************************************//** -Removes the record on which the tree cursor is positioned. Tries -to compress the page if its fillfactor drops below a threshold -or if it is the only page on the level. It is assumed that mtr holds -an x-latch on the tree and on the cursor page. To avoid deadlocks, -mtr must also own x-latches to brothers of page, if those brothers -exist. -@return TRUE if compression occurred */ -UNIV_INTERN -ibool -btr_cur_pessimistic_delete( -/*=======================*/ - dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE; - the latter may occur because we may have - to update node pointers on upper levels, - and in the case of variable length keys - these may actually grow in size */ - ibool has_reserved_extents, /*!< in: TRUE if the - caller has already reserved enough free - extents so that he knows that the operation - will succeed */ - btr_cur_t* cursor, /*!< in: cursor on the record to delete; - if compression does not occur, the cursor - stays valid: it points to successor of - deleted record on function exit */ - ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */ - enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************//** -Parses a redo log record of updating a record in-place. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_cur_parse_update_in_place( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in/out: page or NULL */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - dict_index_t* index); /*!< in: index corresponding to page */ -/****************************************************************//** -Parses the redo log record for delete marking or unmarking of a clustered -index record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_cur_parse_del_mark_set_clust_rec( -/*=================================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in/out: page or NULL */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - dict_index_t* index); /*!< in: index corresponding to page */ -/****************************************************************//** -Parses the redo log record for delete marking or unmarking of a secondary -index record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -btr_cur_parse_del_mark_set_sec_rec( -/*===============================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in/out: page or NULL */ - page_zip_des_t* page_zip);/*!< in/out: compressed page, or NULL */ -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Estimates the number of rows in a given index range. -@return estimated number of rows */ -UNIV_INTERN -ib_int64_t -btr_estimate_n_rows_in_range( -/*=========================*/ - dict_index_t* index, /*!< in: index */ - const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */ - ulint mode1, /*!< in: search mode for range start */ - const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */ - ulint mode2, /*!< in: search mode for range end */ - trx_t* trx); /*!< in: trx */ -/*******************************************************************//** -Estimates the number of different key values in a given index, for -each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed -0..n_uniq-1) and the number of pages that were sampled is saved in -index->stat_n_sample_sizes[]. -If innodb_stats_method is nulls_ignored, we also record the number of -non-null values for each prefix and stored the estimates in -array index->stat_n_non_null_key_vals. */ -UNIV_INTERN -void -btr_estimate_number_of_different_key_vals( -/*======================================*/ - dict_index_t* index); /*!< in: index */ - -/** Gets the externally stored size of a record, in units of a database page. -@param[in] rec record -@param[in] offsets array returned by rec_get_offsets() -@return externally stored part, in units of a database page */ - -ulint -btr_rec_get_externally_stored_len( - const rec_t* rec, - const ulint* offsets); - -/*******************************************************************//** -Marks non-updated off-page fields as disowned by this record. The ownership -must be transferred to the updated record which is inserted elsewhere in the -index tree. In purge only the owner of externally stored field is allowed -to free the field. */ -UNIV_INTERN -void -btr_cur_disown_inherited_fields( -/*============================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed - part will be updated, or NULL */ - rec_t* rec, /*!< in/out: record in a clustered index */ - dict_index_t* index, /*!< in: index of the page */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - const upd_t* update, /*!< in: update vector */ - mtr_t* mtr); /*!< in/out: mini-transaction */ - -/** Operation code for btr_store_big_rec_extern_fields(). */ -enum blob_op { - /** Store off-page columns for a freshly inserted record */ - BTR_STORE_INSERT = 0, - /** Store off-page columns for an insert by update */ - BTR_STORE_INSERT_UPDATE, - /** Store off-page columns for an update */ - BTR_STORE_UPDATE -}; - -/*******************************************************************//** -Determine if an operation on off-page columns is an update. -@return TRUE if op != BTR_STORE_INSERT */ -UNIV_INLINE -ibool -btr_blob_op_is_update( -/*==================*/ - enum blob_op op) /*!< in: operation */ - MY_ATTRIBUTE((warn_unused_result)); - -/*******************************************************************//** -Stores the fields in big_rec_vec to the tablespace and puts pointers to -them in rec. The extern flags in rec will have to be set beforehand. -The fields are stored on pages allocated from leaf node -file segment of the index tree. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -UNIV_INTERN -dberr_t -btr_store_big_rec_extern_fields( -/*============================*/ - dict_index_t* index, /*!< in: index of rec; the index tree - MUST be X-latched */ - buf_block_t* rec_block, /*!< in/out: block containing rec */ - rec_t* rec, /*!< in/out: record */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index); - the "external storage" flags in offsets - will not correspond to rec when - this function returns */ - const big_rec_t*big_rec_vec, /*!< in: vector containing fields - to be stored externally */ - mtr_t* btr_mtr, /*!< in: mtr containing the - latches to the clustered index */ - enum blob_op op) /*! in: operation code */ - MY_ATTRIBUTE((warn_unused_result)); - -/*******************************************************************//** -Frees the space in an externally stored field to the file space -management if the field in data is owned the externally stored field, -in a rollback we may have the additional condition that the field must -not be inherited. */ -UNIV_INTERN -void -btr_free_externally_stored_field( -/*=============================*/ - dict_index_t* index, /*!< in: index of the data, the index - tree MUST be X-latched; if the tree - height is 1, then also the root page - must be X-latched! (this is relevant - in the case this function is called - from purge where 'data' is located on - an undo log page, not an index - page) */ - byte* field_ref, /*!< in/out: field reference */ - const rec_t* rec, /*!< in: record containing field_ref, for - page_zip_write_blob_ptr(), or NULL */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index), - or NULL */ - page_zip_des_t* page_zip, /*!< in: compressed page corresponding - to rec, or NULL if rec == NULL */ - ulint i, /*!< in: field number of field_ref; - ignored if rec == NULL */ - enum trx_rb_ctx rb_ctx, /*!< in: rollback context */ - mtr_t* local_mtr); /*!< in: mtr containing the latch to - data an an X-latch to the index - tree */ -/*******************************************************************//** -Copies the prefix of an externally stored field of a record. The -clustered index record must be protected by a lock or a page latch. -@return the length of the copied field, or 0 if the column was being -or has been deleted */ -UNIV_INTERN -ulint -btr_copy_externally_stored_field_prefix( -/*====================================*/ - byte* buf, /*!< out: the field, or a prefix of it */ - ulint len, /*!< in: length of buf, in bytes */ - ulint zip_size,/*!< in: nonzero=compressed BLOB page size, - zero for uncompressed BLOBs */ - const byte* data, /*!< in: 'internally' stored part of the - field containing also the reference to - the external part; must be protected by - a lock or a page latch */ - ulint local_len,/*!< in: length of data, in bytes */ - trx_t* trx); /*!< in: transaction handle */ -/*******************************************************************//** -Copies an externally stored field of a record to mem heap. The -clustered index record must be protected by a lock or a page latch. -@return the whole field copied to heap */ -UNIV_INTERN -byte* -btr_copy_externally_stored_field( -/*=============================*/ - ulint* len, /*!< out: length of the whole field */ - const byte* data, /*!< in: 'internally' stored part of the - field containing also the reference to - the external part; must be protected by - a lock or a page latch */ - ulint zip_size,/*!< in: nonzero=compressed BLOB page size, - zero for uncompressed BLOBs */ - ulint local_len,/*!< in: length of data */ - mem_heap_t* heap, /*!< in: mem heap */ - trx_t* trx); /*!< in: transaction handle */ -/*******************************************************************//** -Copies an externally stored field of a record to mem heap. -@return the field copied to heap, or NULL if the field is incomplete */ -UNIV_INTERN -byte* -btr_rec_copy_externally_stored_field( -/*=================================*/ - const rec_t* rec, /*!< in: record in a clustered index; - must be protected by a lock or a page latch */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint zip_size,/*!< in: nonzero=compressed BLOB page size, - zero for uncompressed BLOBs */ - ulint no, /*!< in: field number */ - ulint* len, /*!< out: length of the field */ - mem_heap_t* heap, /*!< in: mem heap */ - trx_t* trx); /*!< in: transaction handle */ -/*******************************************************************//** -Flags the data tuple fields that are marked as extern storage in the -update vector. We use this function to remember which fields we must -mark as extern storage in a record inserted for an update. -@return number of flagged external columns */ -UNIV_INTERN -ulint -btr_push_update_extern_fields( -/*==========================*/ - dtuple_t* tuple, /*!< in/out: data tuple */ - const upd_t* update, /*!< in: update vector */ - mem_heap_t* heap); /*!< in: memory heap */ -/***********************************************************//** -Sets a secondary index record's delete mark to the given value. This -function is only used by the insert buffer merge mechanism. */ -UNIV_INTERN -void -btr_cur_set_deleted_flag_for_ibuf( -/*==============================*/ - rec_t* rec, /*!< in/out: record */ - page_zip_des_t* page_zip, /*!< in/out: compressed page - corresponding to rec, or NULL - when the tablespace is - uncompressed */ - ibool val, /*!< in: value to set */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/*######################################################################*/ - -/** In the pessimistic delete, if the page data size drops below this -limit, merging it to a neighbor is tried */ -#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2) - -/** A slot in the path array. We store here info on a search path down the -tree. Each slot contains data on a single level of the tree. */ - -struct btr_path_t{ - ulint nth_rec; /*!< index of the record - where the page cursor stopped on - this level (index in alphabetical - order); value ULINT_UNDEFINED - denotes array end */ - ulint n_recs; /*!< number of records on the page */ - ulint page_no; /*!< no of the page containing the record */ - ulint page_level; /*!< level of the page, if later we fetch - the page under page_no and it is no different - level then we know that the tree has been - reorganized */ -}; - -#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */ - -/** Values for the flag documenting the used search method */ -enum btr_cur_method { - BTR_CUR_HASH = 1, /*!< successful shortcut using - the hash index */ - BTR_CUR_HASH_FAIL, /*!< failure using hash, success using - binary search: the misleading hash - reference is stored in the field - hash_node, and might be necessary to - update */ - BTR_CUR_BINARY, /*!< success using the binary search */ - BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to - the insert buffer */ - BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete - mark in the insert/delete buffer */ - BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in - the insert/delete buffer */ - BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */ -}; - -/** The tree cursor: the definition appears here only for the compiler -to know struct size! */ -struct btr_cur_t { - dict_index_t* index; /*!< index where positioned */ - page_cur_t page_cur; /*!< page cursor */ - purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */ - buf_block_t* left_block; /*!< this field is used to store - a pointer to the left neighbor - page, in the cases - BTR_SEARCH_PREV and - BTR_MODIFY_PREV */ - /*------------------------------*/ - que_thr_t* thr; /*!< this field is only used - when btr_cur_search_to_nth_level - is called for an index entry - insertion: the calling query - thread is passed here to be - used in the insert buffer */ - /*------------------------------*/ - /** The following fields are used in - btr_cur_search_to_nth_level to pass information: */ - /* @{ */ - enum btr_cur_method flag; /*!< Search method used */ - ulint tree_height; /*!< Tree height if the search is done - for a pessimistic insert or update - operation */ - ulint up_match; /*!< If the search mode was PAGE_CUR_LE, - the number of matched fields to the - the first user record to the right of - the cursor record after - btr_cur_search_to_nth_level; - for the mode PAGE_CUR_GE, the matched - fields to the first user record AT THE - CURSOR or to the right of it; - NOTE that the up_match and low_match - values may exceed the correct values - for comparison to the adjacent user - record if that record is on a - different leaf page! (See the note in - row_ins_duplicate_error_in_clust.) */ - ulint up_bytes; /*!< number of matched bytes to the - right at the time cursor positioned; - only used internally in searches: not - defined after the search */ - ulint low_match; /*!< if search mode was PAGE_CUR_LE, - the number of matched fields to the - first user record AT THE CURSOR or - to the left of it after - btr_cur_search_to_nth_level; - NOT defined for PAGE_CUR_GE or any - other search modes; see also the NOTE - in up_match! */ - ulint low_bytes; /*!< number of matched bytes to the - right at the time cursor positioned; - only used internally in searches: not - defined after the search */ - ulint n_fields; /*!< prefix length used in a hash - search if hash_node != NULL */ - ulint n_bytes; /*!< hash prefix bytes if hash_node != - NULL */ - ulint fold; /*!< fold value used in the search if - flag is BTR_CUR_HASH */ - /* @} */ - btr_path_t* path_arr; /*!< in estimating the number of - rows in range, we store in this array - information of the path through - the tree */ -}; - -/** If pessimistic delete fails because of lack of file space, there -is still a good change of success a little later. Try this many -times. */ -#define BTR_CUR_RETRY_DELETE_N_TIMES 100 -/** If pessimistic delete fails because of lack of file space, there -is still a good change of success a little later. Sleep this many -microseconds between retries. */ -#define BTR_CUR_RETRY_SLEEP_TIME 50000 - -/** The reference in a field for which data is stored on a different page. -The reference is at the end of the 'locally' stored part of the field. -'Locally' means storage in the index record. -We store locally a long enough prefix of each column so that we can determine -the ordering parts of each index record without looking into the externally -stored part. */ -/*-------------------------------------- @{ */ -#define BTR_EXTERN_SPACE_ID 0 /*!< space id where stored */ -#define BTR_EXTERN_PAGE_NO 4 /*!< page no where stored */ -#define BTR_EXTERN_OFFSET 8 /*!< offset of BLOB header - on that page */ -#define BTR_EXTERN_LEN 12 /*!< 8 bytes containing the - length of the externally - stored part of the BLOB. - The 2 highest bits are - reserved to the flags below. */ -/*-------------------------------------- @} */ -/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */ - -/** The most significant bit of BTR_EXTERN_LEN (i.e., the most -significant bit of the byte at smallest address) is set to 1 if this -field does not 'own' the externally stored field; only the owner field -is allowed to free the field in purge! */ -#define BTR_EXTERN_OWNER_FLAG 128 -/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the -second most significant bit of the byte at smallest address) is 1 then -it means that the externally stored field was inherited from an -earlier version of the row. In rollback we are not allowed to free an -inherited external field. */ -#define BTR_EXTERN_INHERITED_FLAG 64 - -/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ -extern ulint btr_cur_n_non_sea; -/** Number of successful adaptive hash index lookups in -btr_cur_search_to_nth_level(). */ -extern ulint btr_cur_n_sea; -/** Old value of btr_cur_n_non_sea. Copied by -srv_refresh_innodb_monitor_stats(). Referenced by -srv_printf_innodb_monitor(). */ -extern ulint btr_cur_n_non_sea_old; -/** Old value of btr_cur_n_sea. Copied by -srv_refresh_innodb_monitor_stats(). Referenced by -srv_printf_innodb_monitor(). */ -extern ulint btr_cur_n_sea_old; -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -/* Flag to limit optimistic insert records */ -extern uint btr_cur_limit_optimistic_insert_debug; -#endif /* UNIV_DEBUG */ - -#ifndef UNIV_NONINL -#include "btr0cur.ic" -#endif - -#endif diff --git a/storage/xtradb/include/btr0cur.ic b/storage/xtradb/include/btr0cur.ic deleted file mode 100644 index 43ee3304c0e..00000000000 --- a/storage/xtradb/include/btr0cur.ic +++ /dev/null @@ -1,223 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/btr0cur.ic -The index tree cursor - -Created 10/16/1994 Heikki Tuuri -*******************************************************/ - -#ifndef UNIV_HOTBACKUP -#include "btr0btr.h" - -#ifdef UNIV_DEBUG -# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\ -if (btr_cur_limit_optimistic_insert_debug > 1\ - && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\ - CODE;\ -} -#else -# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE) -#endif /* UNIV_DEBUG */ - -#ifdef UNIV_DEBUG -/*********************************************************//** -Returns the page cursor component of a tree cursor. -@return pointer to page cursor component */ -UNIV_INLINE -page_cur_t* -btr_cur_get_page_cur( -/*=================*/ - const btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(&((btr_cur_t*) cursor)->page_cur); -} - -/*********************************************************//** -Returns the buffer block on which the tree cursor is positioned. -@return pointer to buffer block */ -UNIV_INLINE -buf_block_t* -btr_cur_get_block( -/*==============*/ - const btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(page_cur_get_block(btr_cur_get_page_cur(cursor))); -} - -/*********************************************************//** -Returns the record pointer of a tree cursor. -@return pointer to record */ -UNIV_INLINE -rec_t* -btr_cur_get_rec( -/*============*/ - const btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(page_cur_get_rec(btr_cur_get_page_cur(cursor))); -} -#endif /* UNIV_DEBUG */ - -/*********************************************************//** -Returns the compressed page on which the tree cursor is positioned. -@return pointer to compressed page, or NULL if the page is not compressed */ -UNIV_INLINE -page_zip_des_t* -btr_cur_get_page_zip( -/*=================*/ - btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(buf_block_get_page_zip(btr_cur_get_block(cursor))); -} - -/*********************************************************//** -Invalidates a tree cursor by setting record pointer to NULL. */ -UNIV_INLINE -void -btr_cur_invalidate( -/*===============*/ - btr_cur_t* cursor) /*!< in: tree cursor */ -{ - page_cur_invalidate(&(cursor->page_cur)); -} - -/*********************************************************//** -Returns the page of a tree cursor. -@return pointer to page */ -UNIV_INLINE -page_t* -btr_cur_get_page( -/*=============*/ - btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(page_align(page_cur_get_rec(&(cursor->page_cur)))); -} - -/*********************************************************//** -Positions a tree cursor at a given record. */ -UNIV_INLINE -void -btr_cur_position( -/*=============*/ - dict_index_t* index, /*!< in: index */ - rec_t* rec, /*!< in: record in tree */ - buf_block_t* block, /*!< in: buffer block of rec */ - btr_cur_t* cursor) /*!< out: cursor */ -{ - ut_ad(page_align(rec) == block->frame); - - page_cur_position(rec, block, btr_cur_get_page_cur(cursor)); - - cursor->index = index; -} - -/*********************************************************************//** -Checks if compressing an index page where a btr cursor is placed makes -sense. -@return TRUE if compression is recommended */ -UNIV_INLINE -ibool -btr_cur_compress_recommendation( -/*============================*/ - btr_cur_t* cursor, /*!< in: btr cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - const page_t* page; - - ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), - MTR_MEMO_PAGE_X_FIX)); - - page = btr_cur_get_page(cursor); - - LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2, - return(FALSE)); - - if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT) - || ((btr_page_get_next(page, mtr) == FIL_NULL) - && (btr_page_get_prev(page, mtr) == FIL_NULL))) { - - /* The page fillfactor has dropped below a predefined - minimum value OR the level in the B-tree contains just - one page: we recommend compression if this is not the - root page. */ - - return(dict_index_get_page(cursor->index) - != page_get_page_no(page)); - } - - return(FALSE); -} - -/*********************************************************************//** -Checks if the record on which the cursor is placed can be deleted without -making tree compression necessary (or, recommended). -@return TRUE if can be deleted without recommended compression */ -UNIV_INLINE -ibool -btr_cur_can_delete_without_compress( -/*================================*/ - btr_cur_t* cursor, /*!< in: btr cursor */ - ulint rec_size,/*!< in: rec_get_size(btr_cur_get_rec(cursor))*/ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* page; - - ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), - MTR_MEMO_PAGE_X_FIX)); - - page = btr_cur_get_page(cursor); - - if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT) - || ((btr_page_get_next(page, mtr) == FIL_NULL) - && (btr_page_get_prev(page, mtr) == FIL_NULL)) - || (page_get_n_recs(page) < 2)) { - - /* The page fillfactor will drop below a predefined - minimum value, OR the level in the B-tree contains just - one page, OR the page will become empty: we recommend - compression if this is not the root page. */ - - return(dict_index_get_page(cursor->index) - == page_get_page_no(page)); - } - - return(TRUE); -} - -/*******************************************************************//** -Determine if an operation on off-page columns is an update. -@return TRUE if op != BTR_STORE_INSERT */ -UNIV_INLINE -ibool -btr_blob_op_is_update( -/*==================*/ - enum blob_op op) /*!< in: operation */ -{ - switch (op) { - case BTR_STORE_INSERT: - return(FALSE); - case BTR_STORE_INSERT_UPDATE: - case BTR_STORE_UPDATE: - return(TRUE); - } - - ut_ad(0); - return(FALSE); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/btr0defragment.h b/storage/xtradb/include/btr0defragment.h deleted file mode 100644 index 477824c1a35..00000000000 --- a/storage/xtradb/include/btr0defragment.h +++ /dev/null @@ -1,102 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved. -Copyright (C) 2014, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -#ifndef btr0defragment_h -#define btr0defragment_h - -#include "univ.i" - -#ifndef UNIV_HOTBACKUP - -#include "btr0pcur.h" - -/* Max number of pages to consider at once during defragmentation. */ -#define BTR_DEFRAGMENT_MAX_N_PAGES 32 - -/** stats in btr_defragment */ -extern ulint btr_defragment_compression_failures; -extern ulint btr_defragment_failures; -extern ulint btr_defragment_count; - -/** Item in the work queue for btr_degrament_thread. */ -struct btr_defragment_item_t -{ - btr_pcur_t* pcur; /* persistent cursor where - btr_defragment_n_pages should start */ - os_event_t event; /* if not null, signal after work - is done */ - bool removed; /* Mark an item as removed */ - ulonglong last_processed; /* timestamp of last time this index - is processed by defragment thread */ - - btr_defragment_item_t(btr_pcur_t* pcur, os_event_t event); - ~btr_defragment_item_t(); -}; - -/******************************************************************//** -Initialize defragmentation. */ -void -btr_defragment_init(void); -/******************************************************************//** -Shutdown defragmentation. */ -void -btr_defragment_shutdown(); -/******************************************************************//** -Check whether the given index is in btr_defragment_wq. */ -bool -btr_defragment_find_index( - dict_index_t* index); /*!< Index to find. */ -/******************************************************************//** -Add an index to btr_defragment_wq. Return a pointer to os_event if this -is a synchronized defragmentation. */ -os_event_t -btr_defragment_add_index( - dict_index_t* index, /*!< index to be added */ - bool async, /*!< whether this is an async - defragmentation */ - dberr_t* err); /*!< out: error code */ -/******************************************************************//** -When table is dropped, this function is called to mark a table as removed in -btr_efragment_wq. The difference between this function and the remove_index -function is this will not NULL the event. */ -void -btr_defragment_remove_table( - dict_table_t* table); /*!< Index to be removed. */ -/******************************************************************//** -Mark an index as removed from btr_defragment_wq. */ -void -btr_defragment_remove_index( - dict_index_t* index); /*!< Index to be removed. */ -/*********************************************************************//** -Check whether we should save defragmentation statistics to persistent storage.*/ -UNIV_INTERN -void -btr_defragment_save_defrag_stats_if_needed( - dict_index_t* index); /*!< in: index */ - -/** Merge consecutive b-tree pages into fewer pages to defragment indexes */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(btr_defragment_thread)(void*); - -/** Whether btr_defragment_thread is active */ -extern bool btr_defragment_thread_active; - -#endif /* !UNIV_HOTBACKUP */ -#endif diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h deleted file mode 100644 index dafe14ce556..00000000000 --- a/storage/xtradb/include/btr0pcur.h +++ /dev/null @@ -1,548 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/btr0pcur.h -The index tree persistent cursor - -Created 2/23/1996 Heikki Tuuri -*******************************************************/ - -#ifndef btr0pcur_h -#define btr0pcur_h - -#include "univ.i" -#include "dict0dict.h" -#include "data0data.h" -#include "mtr0mtr.h" -#include "page0cur.h" -#include "btr0cur.h" -#include "btr0btr.h" -#include "btr0types.h" - -/* Relative positions for a stored cursor position */ -#define BTR_PCUR_ON 1 -#define BTR_PCUR_BEFORE 2 -#define BTR_PCUR_AFTER 3 -/* Note that if the tree is not empty, btr_pcur_store_position does not -use the following, but only uses the above three alternatives, where the -position is stored relative to a specific record: this makes implementation -of a scroll cursor easier */ -#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */ -#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */ - -/**************************************************************//** -Allocates memory for a persistent cursor object and initializes the cursor. -@return own: persistent cursor */ -UNIV_INTERN -btr_pcur_t* -btr_pcur_create_for_mysql(void); -/*============================*/ - -/**************************************************************//** -Resets a persistent cursor object, freeing ::old_rec_buf if it is -allocated and resetting the other members to their initial values. */ -UNIV_INTERN -void -btr_pcur_reset( -/*===========*/ - btr_pcur_t* cursor);/*!< in, out: persistent cursor */ - -/**************************************************************//** -Frees the memory for a persistent cursor object. */ -UNIV_INTERN -void -btr_pcur_free_for_mysql( -/*====================*/ - btr_pcur_t* cursor); /*!< in, own: persistent cursor */ -/**************************************************************//** -Copies the stored position of a pcur to another pcur. */ -UNIV_INTERN -void -btr_pcur_copy_stored_position( -/*==========================*/ - btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the - position info */ - btr_pcur_t* pcur_donate); /*!< in: pcur from which the info is - copied */ -/**************************************************************//** -Sets the old_rec_buf field to NULL. */ -UNIV_INLINE -void -btr_pcur_init( -/*==========*/ - btr_pcur_t* pcur); /*!< in: persistent cursor */ -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. It should be -closed with btr_pcur_close. */ -UNIV_INLINE -void -btr_pcur_open_low( -/*==============*/ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: level in the btree */ - const dtuple_t* tuple, /*!< in: tuple on which search done */ - ulint mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mtr */ -#define btr_pcur_open(i,t,md,l,c,m) \ - btr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m) -/**************************************************************//** -Opens an persistent cursor to an index tree without initializing the -cursor. */ -UNIV_INLINE -dberr_t -btr_pcur_open_with_no_init_func( -/*============================*/ - dict_index_t* index, /*!< in: index */ - const dtuple_t* tuple, /*!< in: tuple on which search done */ - ulint mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page of the - record! */ - ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...; - NOTE that if has_search_latch != 0 then - we maybe do not acquire a latch on the cursor - page, but assume that the caller uses his - btr search latch to protect the record! */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - ulint has_search_latch,/*!< in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, or 0 */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mtr */ -#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \ - btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m) - -/*****************************************************************//** -Opens a persistent cursor at either end of an index. */ -UNIV_INLINE -dberr_t -btr_pcur_open_at_index_side( -/*========================*/ - bool from_left, /*!< in: true if open to the low end, - false if to the high end */ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: latch mode */ - btr_pcur_t* pcur, /*!< in/out: cursor */ - bool init_pcur, /*!< in: whether to initialize pcur */ - ulint level, /*!< in: level to search for - (0=leaf) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Gets the up_match value for a pcur after a search. -@return number of matched fields at the cursor or to the right if -search mode was PAGE_CUR_GE, otherwise undefined */ -UNIV_INLINE -ulint -btr_pcur_get_up_match( -/*==================*/ - const btr_pcur_t* cursor); /*!< in: persistent cursor */ -/**************************************************************//** -Gets the low_match value for a pcur after a search. -@return number of matched fields at the cursor or to the right if -search mode was PAGE_CUR_LE, otherwise undefined */ -UNIV_INLINE -ulint -btr_pcur_get_low_match( -/*===================*/ - const btr_pcur_t* cursor); /*!< in: persistent cursor */ -/**************************************************************//** -If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first -user record satisfying the search condition, in the case PAGE_CUR_L or -PAGE_CUR_LE, on the last user record. If no such user record exists, then -in the first case sets the cursor after last in tree, and in the latter case -before first in tree. The latching mode must be BTR_SEARCH_LEAF or -BTR_MODIFY_LEAF. */ -UNIV_INTERN -void -btr_pcur_open_on_user_rec_func( -/*===========================*/ - dict_index_t* index, /*!< in: index */ - const dtuple_t* tuple, /*!< in: tuple on which search done */ - ulint mode, /*!< in: PAGE_CUR_L, ... */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or - BTR_MODIFY_LEAF */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent - cursor */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mtr */ -#define btr_pcur_open_on_user_rec(i,t,md,l,c,m) \ - btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m) -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree. */ -UNIV_INLINE -void -btr_pcur_open_at_rnd_pos_func( -/*==========================*/ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in/out: B-tree pcur */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mtr */ -#define btr_pcur_open_at_rnd_pos(i,l,c,m) \ - btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m) -/**************************************************************//** -Frees the possible memory heap of a persistent cursor and sets the latch -mode of the persistent cursor to BTR_NO_LATCHES. -WARNING: this function does not release the latch on the page where the -cursor is currently positioned. The latch is acquired by the -"move to next/previous" family of functions. Since recursive shared locks -are not allowed, you must take care (if using the cursor in S-mode) to -manually release the latch by either calling -btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr) -or by committing the mini-transaction right after btr_pcur_close(). -A subsequent attempt to crawl the same page in the same mtr would cause -an assertion failure. */ -UNIV_INLINE -void -btr_pcur_close( -/*===========*/ - btr_pcur_t* cursor); /*!< in: persistent cursor */ -/**************************************************************//** -The position of the cursor is stored by taking an initial segment of the -record the cursor is positioned on, before, or after, and copying it to the -cursor data structure, or just setting a flag if the cursor id before the -first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the -page where the cursor is positioned must not be empty if the index tree is -not totally empty! */ -UNIV_INTERN -void -btr_pcur_store_position( -/*====================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor */ - mtr_t* mtr); /*!< in: mtr */ -/**************************************************************//** -Restores the stored position of a persistent cursor bufferfixing the page and -obtaining the specified latches. If the cursor position was saved when the -(1) cursor was positioned on a user record: this function restores the position -to the last record LESS OR EQUAL to the stored record; -(2) cursor was positioned on a page infimum record: restores the position to -the last record LESS than the user record which was the successor of the page -infimum; -(3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. -(4) cursor was positioned before the first or after the last in an empty tree: -restores to before first or after the last in the tree. -@return TRUE if the cursor position was stored when it was on a user -record and it can be restored on a user record whose ordering fields -are identical to the ones of the original user record */ -UNIV_INTERN -ibool -btr_pcur_restore_position_func( -/*===========================*/ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: detached persistent cursor */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mtr */ -#define btr_pcur_restore_position(l,cur,mtr) \ - btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr) -/*********************************************************//** -Gets the rel_pos field for a cursor whose position has been stored. -@return BTR_PCUR_ON, ... */ -UNIV_INLINE -ulint -btr_pcur_get_rel_pos( -/*=================*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -/**************************************************************//** -Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES, -that is, the cursor becomes detached. -Function btr_pcur_store_position should be used before calling this, -if restoration of cursor is wanted later. */ -UNIV_INLINE -void -btr_pcur_commit_specify_mtr( -/*========================*/ - btr_pcur_t* pcur, /*!< in: persistent cursor */ - mtr_t* mtr); /*!< in: mtr to commit */ -/*********************************************************//** -Moves the persistent cursor to the next record in the tree. If no records are -left, the cursor stays 'after last in tree'. -@return TRUE if the cursor was not after last in tree */ -UNIV_INLINE -ibool -btr_pcur_move_to_next( -/*==================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************//** -Moves the persistent cursor to the previous record in the tree. If no records -are left, the cursor stays 'before first in tree'. -@return TRUE if the cursor was not before first in tree */ -UNIV_INTERN -ibool -btr_pcur_move_to_prev( -/*==================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************//** -Moves the persistent cursor to the last record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_last_on_page( -/*==========================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************//** -Moves the persistent cursor to the next user record in the tree. If no user -records are left, the cursor ends up 'after last in tree'. -@return TRUE if the cursor moved forward, ending on a user record */ -UNIV_INLINE -ibool -btr_pcur_move_to_next_user_rec( -/*===========================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************//** -Moves the persistent cursor to the first record on the next page. -Releases the latch on the current page, and bufferunfixes it. -Note that there must not be modifications on the current page, -as then the x-latch can be released only in mtr_commit. */ -UNIV_INTERN -void -btr_pcur_move_to_next_page( -/*=======================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the - last record of the current page */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************//** -Moves the persistent cursor backward if it is on the first record -of the page. Releases the latch on the current page, and bufferunfixes -it. Note that to prevent a possible deadlock, the operation first -stores the position of the cursor, releases the leaf latch, acquires -necessary latches and restores the cursor position again before returning. -The alphabetical position of the cursor is guaranteed to be sensible -on return, but it may happen that the cursor is not positioned on the -last record of any page, because the structure of the tree may have -changed while the cursor had no latches. */ -UNIV_INTERN -void -btr_pcur_move_backward_from_page( -/*=============================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the - first record of the current page */ - mtr_t* mtr); /*!< in: mtr */ -#ifdef UNIV_DEBUG -/*********************************************************//** -Returns the btr cursor component of a persistent cursor. -@return pointer to btr cursor component */ -UNIV_INLINE -btr_cur_t* -btr_pcur_get_btr_cur( -/*=================*/ - const btr_pcur_t* cursor); /*!< in: persistent cursor */ -/*********************************************************//** -Returns the page cursor component of a persistent cursor. -@return pointer to page cursor component */ -UNIV_INLINE -page_cur_t* -btr_pcur_get_page_cur( -/*==================*/ - const btr_pcur_t* cursor); /*!< in: persistent cursor */ -/*********************************************************//** -Returns the page of a persistent cursor. -@return pointer to the page */ -UNIV_INLINE -page_t* -btr_pcur_get_page( -/*==============*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -/*********************************************************//** -Returns the buffer block of a persistent cursor. -@return pointer to the block */ -UNIV_INLINE -buf_block_t* -btr_pcur_get_block( -/*===============*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -/*********************************************************//** -Returns the record of a persistent cursor. -@return pointer to the record */ -UNIV_INLINE -rec_t* -btr_pcur_get_rec( -/*=============*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -#else /* UNIV_DEBUG */ -# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur) -# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur) -# define btr_pcur_get_page(cursor) ((cursor)->btr_cur.page_cur.block->frame) -# define btr_pcur_get_block(cursor) ((cursor)->btr_cur.page_cur.block) -# define btr_pcur_get_rec(cursor) ((cursor)->btr_cur.page_cur.rec) -#endif /* UNIV_DEBUG */ -/*********************************************************//** -Checks if the persistent cursor is on a user record. */ -UNIV_INLINE -ibool -btr_pcur_is_on_user_rec( -/*====================*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -/*********************************************************//** -Checks if the persistent cursor is after the last user record on -a page. */ -UNIV_INLINE -ibool -btr_pcur_is_after_last_on_page( -/*===========================*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -/*********************************************************//** -Checks if the persistent cursor is before the first user record on -a page. */ -UNIV_INLINE -ibool -btr_pcur_is_before_first_on_page( -/*=============================*/ - const btr_pcur_t* cursor);/*!< in: persistent cursor */ -/*********************************************************//** -Checks if the persistent cursor is before the first user record in -the index tree. */ -UNIV_INLINE -ibool -btr_pcur_is_before_first_in_tree( -/*=============================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************//** -Checks if the persistent cursor is after the last user record in -the index tree. */ -UNIV_INLINE -ibool -btr_pcur_is_after_last_in_tree( -/*===========================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************//** -Moves the persistent cursor to the next record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_next_on_page( -/*==========================*/ - btr_pcur_t* cursor);/*!< in/out: persistent cursor */ -/*********************************************************//** -Moves the persistent cursor to the previous record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_prev_on_page( -/*==========================*/ - btr_pcur_t* cursor);/*!< in/out: persistent cursor */ -/*********************************************************//** -Moves the persistent cursor to the infimum record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_before_first_on_page( -/*===============================*/ - btr_pcur_t* cursor); /*!< in/out: persistent cursor */ - -/** Position state of persistent B-tree cursor. */ -enum pcur_pos_t { - /** The persistent cursor is not positioned. */ - BTR_PCUR_NOT_POSITIONED = 0, - /** The persistent cursor was previously positioned. - TODO: currently, the state can be BTR_PCUR_IS_POSITIONED, - though it really should be BTR_PCUR_WAS_POSITIONED, - because we have no obligation to commit the cursor with - mtr; similarly latch_mode may be out of date. This can - lead to problems if btr_pcur is not used the right way; - all current code should be ok. */ - BTR_PCUR_WAS_POSITIONED, - /** The persistent cursor is positioned by optimistic get to the same - record as it was positioned at. Not used for rel_pos == BTR_PCUR_ON. - It may need adjustment depending on previous/current search direction - and rel_pos. */ - BTR_PCUR_IS_POSITIONED_OPTIMISTIC, - /** The persistent cursor is positioned by index search. - Or optimistic get for rel_pos == BTR_PCUR_ON. */ - BTR_PCUR_IS_POSITIONED -}; - -/* The persistent B-tree cursor structure. This is used mainly for SQL -selects, updates, and deletes. */ - -struct btr_pcur_t{ - btr_cur_t btr_cur; /*!< a B-tree cursor */ - ulint latch_mode; /*!< see TODO note below! - BTR_SEARCH_LEAF, BTR_MODIFY_LEAF, - BTR_MODIFY_TREE, or BTR_NO_LATCHES, - depending on the latching state of - the page and tree where the cursor is - positioned; BTR_NO_LATCHES means that - the cursor is not currently positioned: - we say then that the cursor is - detached; it can be restored to - attached if the old position was - stored in old_rec */ - ulint old_stored; /*!< BTR_PCUR_OLD_STORED - or BTR_PCUR_OLD_NOT_STORED */ - rec_t* old_rec; /*!< if cursor position is stored, - contains an initial segment of the - latest record cursor was positioned - either on, before, or after */ - ulint old_n_fields; /*!< number of fields in old_rec */ - ulint rel_pos; /*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or - BTR_PCUR_AFTER, depending on whether - cursor was on, before, or after the - old_rec record */ - buf_block_t* block_when_stored;/* buffer block when the position was - stored */ - ib_uint64_t modify_clock; /*!< the modify clock value of the - buffer block when the cursor position - was stored */ - enum pcur_pos_t pos_state; /*!< btr_pcur_store_position() and - btr_pcur_restore_position() state. */ - ulint search_mode; /*!< PAGE_CUR_G, ... */ - trx_t* trx_if_known; /*!< the transaction, if we know it; - otherwise this field is not defined; - can ONLY BE USED in error prints in - fatal assertion failures! */ - /*-----------------------------*/ - /* NOTE that the following fields may possess dynamically allocated - memory which should be freed if not needed anymore! */ - - byte* old_rec_buf; /*!< NULL, or a dynamically allocated - buffer for old_rec */ - ulint buf_size; /*!< old_rec_buf size if old_rec_buf - is not NULL */ -}; - -#define BTR_PCUR_OLD_STORED 908467085 -#define BTR_PCUR_OLD_NOT_STORED 122766467 - -#ifndef UNIV_NONINL -#include "btr0pcur.ic" -#endif - -#endif diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic deleted file mode 100644 index 1cd13824542..00000000000 --- a/storage/xtradb/include/btr0pcur.ic +++ /dev/null @@ -1,612 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/btr0pcur.ic -The index tree persistent cursor - -Created 2/23/1996 Heikki Tuuri -*******************************************************/ - - -/*********************************************************//** -Gets the rel_pos field for a cursor whose position has been stored. -@return BTR_PCUR_ON, ... */ -UNIV_INLINE -ulint -btr_pcur_get_rel_pos( -/*=================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor); - ut_ad(cursor->old_rec); - ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED); - ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED - || cursor->pos_state == BTR_PCUR_IS_POSITIONED); - - return(cursor->rel_pos); -} - -#ifdef UNIV_DEBUG -/*********************************************************//** -Returns the btr cursor component of a persistent cursor. -@return pointer to btr cursor component */ -UNIV_INLINE -btr_cur_t* -btr_pcur_get_btr_cur( -/*=================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - const btr_cur_t* btr_cur = &cursor->btr_cur; - return((btr_cur_t*) btr_cur); -} - -/*********************************************************//** -Returns the page cursor component of a persistent cursor. -@return pointer to page cursor component */ -UNIV_INLINE -page_cur_t* -btr_pcur_get_page_cur( -/*==================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor))); -} - -/*********************************************************//** -Returns the page of a persistent cursor. -@return pointer to the page */ -UNIV_INLINE -page_t* -btr_pcur_get_page( -/*==============*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - - return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor))); -} - -/*********************************************************//** -Returns the buffer block of a persistent cursor. -@return pointer to the block */ -UNIV_INLINE -buf_block_t* -btr_pcur_get_block( -/*===============*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - - return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor))); -} - -/*********************************************************//** -Returns the record of a persistent cursor. -@return pointer to the record */ -UNIV_INLINE -rec_t* -btr_pcur_get_rec( -/*=============*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor))); -} -#endif /* UNIV_DEBUG */ - -/**************************************************************//** -Gets the up_match value for a pcur after a search. -@return number of matched fields at the cursor or to the right if -search mode was PAGE_CUR_GE, otherwise undefined */ -UNIV_INLINE -ulint -btr_pcur_get_up_match( -/*==================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - const btr_cur_t* btr_cursor; - - ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) - || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); - - btr_cursor = btr_pcur_get_btr_cur(cursor); - - ut_ad(btr_cursor->up_match != ULINT_UNDEFINED); - - return(btr_cursor->up_match); -} - -/**************************************************************//** -Gets the low_match value for a pcur after a search. -@return number of matched fields at the cursor or to the right if -search mode was PAGE_CUR_LE, otherwise undefined */ -UNIV_INLINE -ulint -btr_pcur_get_low_match( -/*===================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - const btr_cur_t* btr_cursor; - - ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) - || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); - - btr_cursor = btr_pcur_get_btr_cur(cursor); - ut_ad(btr_cursor->low_match != ULINT_UNDEFINED); - - return(btr_cursor->low_match); -} - -/*********************************************************//** -Checks if the persistent cursor is after the last user record on -a page. */ -UNIV_INLINE -ibool -btr_pcur_is_after_last_on_page( -/*===========================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor))); -} - -/*********************************************************//** -Checks if the persistent cursor is before the first user record on -a page. */ -UNIV_INLINE -ibool -btr_pcur_is_before_first_on_page( -/*=============================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor))); -} - -/*********************************************************//** -Checks if the persistent cursor is on a user record. */ -UNIV_INLINE -ibool -btr_pcur_is_on_user_rec( -/*====================*/ - const btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - if (btr_pcur_is_before_first_on_page(cursor) - || btr_pcur_is_after_last_on_page(cursor)) { - - return(FALSE); - } - - return(TRUE); -} - -/*********************************************************//** -Checks if the persistent cursor is before the first user record in -the index tree. */ -UNIV_INLINE -ibool -btr_pcur_is_before_first_in_tree( -/*=============================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) { - - return(FALSE); - } - - return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor))); -} - -/*********************************************************//** -Checks if the persistent cursor is after the last user record in -the index tree. */ -UNIV_INLINE -ibool -btr_pcur_is_after_last_in_tree( -/*===========================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) { - - return(FALSE); - } - - return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor))); -} - -/*********************************************************//** -Moves the persistent cursor to the next record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_next_on_page( -/*==========================*/ - btr_pcur_t* cursor) /*!< in/out: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page_cur_move_to_next(btr_pcur_get_page_cur(cursor)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} - -/*********************************************************//** -Moves the persistent cursor to the previous record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_prev_on_page( -/*==========================*/ - btr_pcur_t* cursor) /*!< in/out: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page_cur_move_to_prev(btr_pcur_get_page_cur(cursor)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} - -/*********************************************************//** -Moves the persistent cursor to the last record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_last_on_page( -/*==========================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor */ - mtr_t* mtr) /*!< in: mtr */ -{ - UT_NOT_USED(mtr); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page_cur_set_after_last(btr_pcur_get_block(cursor), - btr_pcur_get_page_cur(cursor)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} - -/*********************************************************//** -Moves the persistent cursor to the next user record in the tree. If no user -records are left, the cursor ends up 'after last in tree'. -@return TRUE if the cursor moved forward, ending on a user record */ -UNIV_INLINE -ibool -btr_pcur_move_to_next_user_rec( -/*===========================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -loop: - if (btr_pcur_is_after_last_on_page(cursor)) { - - if (btr_pcur_is_after_last_in_tree(cursor, mtr)) { - - return(FALSE); - } - - btr_pcur_move_to_next_page(cursor, mtr); - } else { - btr_pcur_move_to_next_on_page(cursor); - } - - if (btr_pcur_is_on_user_rec(cursor)) { - - return(TRUE); - } - - goto loop; -} - -/*********************************************************//** -Moves the persistent cursor to the next record in the tree. If no records are -left, the cursor stays 'after last in tree'. -@return TRUE if the cursor was not after last in tree */ -UNIV_INLINE -ibool -btr_pcur_move_to_next( -/*==================*/ - btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - if (btr_pcur_is_after_last_on_page(cursor)) { - - if (btr_pcur_is_after_last_in_tree(cursor, mtr)) { - - return(FALSE); - } - - btr_pcur_move_to_next_page(cursor, mtr); - - return(TRUE); - } - - btr_pcur_move_to_next_on_page(cursor); - - return(TRUE); -} - -/**************************************************************//** -Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES, -that is, the cursor becomes detached. -Function btr_pcur_store_position should be used before calling this, -if restoration of cursor is wanted later. */ -UNIV_INLINE -void -btr_pcur_commit_specify_mtr( -/*========================*/ - btr_pcur_t* pcur, /*!< in: persistent cursor */ - mtr_t* mtr) /*!< in: mtr to commit */ -{ - ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED); - - pcur->latch_mode = BTR_NO_LATCHES; - - mtr_commit(mtr); - - pcur->pos_state = BTR_PCUR_WAS_POSITIONED; -} - -/**************************************************************//** -Sets the old_rec_buf field to NULL. */ -UNIV_INLINE -void -btr_pcur_init( -/*==========*/ - btr_pcur_t* pcur) /*!< in: persistent cursor */ -{ - pcur->old_stored = BTR_PCUR_OLD_NOT_STORED; - pcur->old_rec_buf = NULL; - pcur->old_rec = NULL; -} - -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. It should be -closed with btr_pcur_close. */ -UNIV_INLINE -void -btr_pcur_open_low( -/*==============*/ - dict_index_t* index, /*!< in: index */ - ulint level, /*!< in: level in the btree */ - const dtuple_t* tuple, /*!< in: tuple on which search done */ - ulint mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - btr_cur_t* btr_cursor; - - /* Initialize the cursor */ - - btr_pcur_init(cursor); - - cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode = mode; - - /* Search with the tree cursor */ - - btr_cursor = btr_pcur_get_btr_cur(cursor); - - btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode, - btr_cursor, 0, file, line, mtr); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - - cursor->trx_if_known = NULL; -} - -/**************************************************************//** -Opens an persistent cursor to an index tree without initializing the -cursor. */ -UNIV_INLINE -dberr_t -btr_pcur_open_with_no_init_func( -/*============================*/ - dict_index_t* index, /*!< in: index */ - const dtuple_t* tuple, /*!< in: tuple on which search done */ - ulint mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page of the - record! */ - ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...; - NOTE that if has_search_latch != 0 then - we maybe do not acquire a latch on the cursor - page, but assume that the caller uses his - btr search latch to protect the record! */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - ulint has_search_latch,/*!< in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, or 0 */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - btr_cur_t* btr_cursor; - dberr_t err = DB_SUCCESS; - - cursor->latch_mode = latch_mode; - cursor->search_mode = mode; - - /* Search with the tree cursor */ - - btr_cursor = btr_pcur_get_btr_cur(cursor); - - err = btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - btr_cursor, has_search_latch, - file, line, mtr); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - cursor->trx_if_known = NULL; - return err; -} - -/*****************************************************************//** -Opens a persistent cursor at either end of an index. */ -UNIV_INLINE -dberr_t -btr_pcur_open_at_index_side( -/*========================*/ - bool from_left, /*!< in: true if open to the low end, - false if to the high end */ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: latch mode */ - btr_pcur_t* pcur, /*!< in/out: cursor */ - bool init_pcur, /*!< in: whether to initialize pcur */ - ulint level, /*!< in: level to search for - (0=leaf) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - dberr_t err = DB_SUCCESS; - - pcur->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - - pcur->search_mode = from_left ? PAGE_CUR_G : PAGE_CUR_L; - - if (init_pcur) { - btr_pcur_init(pcur); - } - - err = btr_cur_open_at_index_side(from_left, index, latch_mode, - btr_pcur_get_btr_cur(pcur), level, mtr); - pcur->pos_state = BTR_PCUR_IS_POSITIONED; - - pcur->old_stored = BTR_PCUR_OLD_NOT_STORED; - - pcur->trx_if_known = NULL; - - return (err); -} - -/**********************************************************************//** -Positions a cursor at a randomly chosen position within a B-tree. */ -UNIV_INLINE -void -btr_pcur_open_at_rnd_pos_func( -/*==========================*/ - dict_index_t* index, /*!< in: index */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in/out: B-tree pcur */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr */ -{ - /* Initialize the cursor */ - - cursor->latch_mode = latch_mode; - cursor->search_mode = PAGE_CUR_G; - - btr_pcur_init(cursor); - - btr_cur_open_at_rnd_pos_func(index, latch_mode, - btr_pcur_get_btr_cur(cursor), - file, line, mtr); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - cursor->trx_if_known = NULL; -} - -/**************************************************************//** -Frees the possible memory heap of a persistent cursor and sets the latch -mode of the persistent cursor to BTR_NO_LATCHES. -WARNING: this function does not release the latch on the page where the -cursor is currently positioned. The latch is acquired by the -"move to next/previous" family of functions. Since recursive shared locks -are not allowed, you must take care (if using the cursor in S-mode) to -manually release the latch by either calling -btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr) -or by committing the mini-transaction right after btr_pcur_close(). -A subsequent attempt to crawl the same page in the same mtr would cause -an assertion failure. */ -UNIV_INLINE -void -btr_pcur_close( -/*===========*/ - btr_pcur_t* cursor) /*!< in: persistent cursor */ -{ - if (cursor->old_rec_buf != NULL) { - - mem_free(cursor->old_rec_buf); - - cursor->old_rec = NULL; - cursor->old_rec_buf = NULL; - } - - cursor->btr_cur.page_cur.rec = NULL; - cursor->btr_cur.page_cur.block = NULL; - cursor->old_rec = NULL; - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - cursor->latch_mode = BTR_NO_LATCHES; - cursor->pos_state = BTR_PCUR_NOT_POSITIONED; - - cursor->trx_if_known = NULL; -} - -/*********************************************************//** -Moves the persistent cursor to the infimum record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_before_first_on_page( -/*===============================*/ - btr_pcur_t* cursor) /*!< in/out: persistent cursor */ -{ - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page_cur_set_before_first(btr_pcur_get_block(cursor), - btr_pcur_get_page_cur(cursor)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} diff --git a/storage/xtradb/include/btr0scrub.h b/storage/xtradb/include/btr0scrub.h deleted file mode 100644 index 608266c206d..00000000000 --- a/storage/xtradb/include/btr0scrub.h +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2014 Google - -#ifndef btr0scrub_h -#define btr0scrub_h - -#include "univ.i" - -#include "dict0dict.h" -#include "data0data.h" -#include "page0cur.h" -#include "mtr0mtr.h" -#include "btr0types.h" - -/** - * enum describing page allocation status - */ -enum btr_scrub_page_allocation_status_t { - BTR_SCRUB_PAGE_FREE, - BTR_SCRUB_PAGE_ALLOCATED, - BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN -}; - -/** -* constants returned by btr_page_needs_scrubbing & btr_scrub_recheck_page -*/ -#define BTR_SCRUB_PAGE 1 /* page should be scrubbed */ -#define BTR_SCRUB_SKIP_PAGE 2 /* no scrub & no action */ -#define BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE 3 /* no scrub & close table */ -#define BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE 4 /* no scrub & complete space */ -#define BTR_SCRUB_TURNED_OFF 5 /* we detected that scrubbing - was disabled by global - variable */ - -/**************************************************************//** -struct for keeping scrub statistics. */ -struct btr_scrub_stat_t { - /* page reorganizations */ - ulint page_reorganizations; - /* page splits */ - ulint page_splits; - /* scrub failures */ - ulint page_split_failures_underflow; - ulint page_split_failures_out_of_filespace; - ulint page_split_failures_missing_index; - ulint page_split_failures_unknown; -}; - -/**************************************************************//** -struct for thread local scrub state. */ -struct btr_scrub_t { - - /* current space */ - ulint space; - - /* is scrubbing enabled for this space */ - bool scrubbing; - - /* is current space compressed */ - bool compressed; - - dict_table_t* current_table; - dict_index_t* current_index; - /* savepoint for X_LATCH of block */ - ulint savepoint; - - /* statistic counters */ - btr_scrub_stat_t scrub_stat; -}; - -/********************************************************************* -Init scrub global variables */ -UNIV_INTERN -void -btr_scrub_init(); - -/********************************************************************* -Cleanup scrub globals */ -UNIV_INTERN -void -btr_scrub_cleanup(); - -/*********************************************************************** -Return crypt statistics */ -UNIV_INTERN -void -btr_scrub_total_stat( -/*==================*/ - btr_scrub_stat_t *stat); /*!< out: stats to update */ - -/**************************************************************//** -Check if a page needs scrubbing -* @return BTR_SCRUB_PAGE if page should be scrubbed -* else btr_scrub_skip_page should be called -* with this return value (and without any latches held) -*/ -UNIV_INTERN -int -btr_page_needs_scrubbing( -/*=====================*/ - btr_scrub_t* scrub_data, /*!< in: scrub data */ - buf_block_t* block, /*!< in: block to check, latched */ - btr_scrub_page_allocation_status_t allocated); /*!< in: is block - allocated, free or - unknown */ - -/**************************************************************** -Recheck if a page needs scrubbing, and if it does load appropriate -table and index -* @return BTR_SCRUB_PAGE if page should be scrubbed -* else btr_scrub_skip_page should be called -* with this return value (and without any latches held) -*/ -UNIV_INTERN -int -btr_scrub_recheck_page( -/*====================*/ - btr_scrub_t* scrub_data, /*!< inut: scrub data */ - buf_block_t* block, /*!< in: block */ - btr_scrub_page_allocation_status_t allocated, /*!< in: is block - allocated or free */ - mtr_t* mtr); /*!< in: mtr */ - -/**************************************************************** -Perform actual scrubbing of page */ -UNIV_INTERN -int -btr_scrub_page( -/*============*/ - btr_scrub_t* scrub_data, /*!< in/out: scrub data */ - buf_block_t* block, /*!< in: block */ - btr_scrub_page_allocation_status_t allocated, /*!< in: is block - allocated or free */ - mtr_t* mtr); /*!< in: mtr */ - -/**************************************************************** -Perform cleanup needed for a page not needing scrubbing */ -UNIV_INTERN -void -btr_scrub_skip_page( -/*============*/ - btr_scrub_t* scrub_data, /*!< in/out: scrub data */ - int needs_scrubbing); /*!< in: return value from - btr_page_needs_scrubbing or - btr_scrub_recheck_page which encodes what kind - of cleanup is needed */ - -/**************************************************************** -Start iterating a space -* @return true if scrubbing is turned on */ -UNIV_INTERN -bool -btr_scrub_start_space( -/*===================*/ - ulint space, /*!< in: space */ - btr_scrub_t* scrub_data); /*!< in/out: scrub data */ - -/**************************************************************** -Complete iterating a space -* @return true if space was scrubbed */ -UNIV_INTERN -bool -btr_scrub_complete_space( -/*=====================*/ - btr_scrub_t* scrub_data); /*!< in/out: scrub data */ - -#endif diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h deleted file mode 100644 index 66c27607013..00000000000 --- a/storage/xtradb/include/btr0sea.h +++ /dev/null @@ -1,356 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/btr0sea.h -The index tree adaptive search - -Created 2/17/1996 Heikki Tuuri -*************************************************************************/ - -#ifndef btr0sea_h -#define btr0sea_h - -#include "univ.i" - -#include "rem0rec.h" -#include "dict0dict.h" -#include "btr0types.h" -#include "mtr0mtr.h" -#include "ha0ha.h" - -/*****************************************************************//** -Creates and initializes the adaptive search system at a database start. */ -UNIV_INTERN -void -btr_search_sys_create( -/*==================*/ - ulint hash_size); /*!< in: hash index hash table size */ -/*****************************************************************//** -Frees the adaptive search system at a database shutdown. */ -UNIV_INTERN -void -btr_search_sys_free(void); -/*=====================*/ - -/********************************************************************//** -Disable the adaptive hash search system and empty the index. */ -UNIV_INTERN -void -btr_search_disable(void); -/*====================*/ -/********************************************************************//** -Enable the adaptive hash search system. */ -UNIV_INTERN -void -btr_search_enable(void); -/*====================*/ - -/********************************************************************//** -Returns search info for an index. -@return search info; search mutex reserved */ -UNIV_INLINE -btr_search_t* -btr_search_get_info( -/*================*/ - dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((nonnull)); -/*****************************************************************//** -Creates and initializes a search info struct. -@return own: search info struct */ -UNIV_INTERN -btr_search_t* -btr_search_info_create( -/*===================*/ - mem_heap_t* heap); /*!< in: heap where created */ -/*****************************************************************//** -Returns the value of ref_count. The value is protected by -the latch of the AHI partition corresponding to this index. -@return ref_count value. */ -UNIV_INTERN -ulint -btr_search_info_get_ref_count( -/*==========================*/ - btr_search_t* info, /*!< in: search info. */ - dict_index_t* index); /*!< in: index */ -/*********************************************************************//** -Updates the search info. */ -UNIV_INLINE -void -btr_search_info_update( -/*===================*/ - dict_index_t* index, /*!< in: index of the cursor */ - btr_cur_t* cursor);/*!< in: cursor which was just positioned */ -/******************************************************************//** -Tries to guess the right search position based on the hash search info -of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts, -and the function returns TRUE, then cursor->up_match and cursor->low_match -both have sensible values. -@return TRUE if succeeded */ -UNIV_INTERN -ibool -btr_search_guess_on_hash( -/*=====================*/ - dict_index_t* index, /*!< in: index */ - btr_search_t* info, /*!< in: index search info */ - const dtuple_t* tuple, /*!< in: logical record */ - ulint mode, /*!< in: PAGE_CUR_L, ... */ - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /*!< out: tree cursor */ - ulint has_search_latch,/*!< in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, RW_X_LATCH, or 0 */ - mtr_t* mtr); /*!< in: mtr */ -/********************************************************************//** -Moves or deletes hash entries for moved records. If new_page is already hashed, -then the hash index for page, if any, is dropped. If new_page is not hashed, -and page is hashed, then a new hash index is built to new_page with the same -parameters as page (this often happens when a page is split). */ -UNIV_INTERN -void -btr_search_move_or_delete_hash_entries( -/*===================================*/ - buf_block_t* new_block, /*!< in: records are copied - to this page */ - buf_block_t* block, /*!< in: index page from which - records were copied, and the - copied records will be deleted - from this page */ - dict_index_t* index); /*!< in: record descriptor */ -/********************************************************************//** -Drops a page hash index. */ -UNIV_INTERN -void -btr_search_drop_page_hash_index( -/*============================*/ - buf_block_t* block); /*!< in: block containing index page, - s- or x-latched, or an index page - for which we know that - block->buf_fix_count == 0 */ -/********************************************************************//** -Drops a possible page hash index when a page is evicted from the buffer pool -or freed in a file segment. */ -UNIV_INTERN -void -btr_search_drop_page_hash_when_freed( -/*=================================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no); /*!< in: page number */ -/********************************************************************//** -Updates the page hash index when a single record is inserted on a page. */ -UNIV_INTERN -void -btr_search_update_hash_node_on_insert( -/*==================================*/ - btr_cur_t* cursor);/*!< in: cursor which was positioned to the - place to insert using btr_cur_search_..., - and the new record has been inserted next - to the cursor */ -/********************************************************************//** -Updates the page hash index when a single record is inserted on a page. */ -UNIV_INTERN -void -btr_search_update_hash_on_insert( -/*=============================*/ - btr_cur_t* cursor);/*!< in: cursor which was positioned to the - place to insert using btr_cur_search_..., - and the new record has been inserted next - to the cursor */ -/********************************************************************//** -Updates the page hash index when a single record is deleted from a page. */ -UNIV_INTERN -void -btr_search_update_hash_on_delete( -/*=============================*/ - btr_cur_t* cursor);/*!< in: cursor which was positioned on the - record to delete using btr_cur_search_..., - the record is not yet deleted */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG -/********************************************************************//** -Validates the search system. -@return TRUE if ok */ -UNIV_INTERN -ibool -btr_search_validate(void); -/*======================*/ -#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ - -/********************************************************************//** -Returns the adaptive hash index table for a given index key. -@return the adaptive hash index table for a given index key */ -UNIV_INLINE -hash_table_t* -btr_search_get_hash_table( -/*======================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); - -/********************************************************************//** -Returns the adaptive hash index latch for a given index key. -@return the adaptive hash index latch for a given index key */ -UNIV_INLINE -prio_rw_lock_t* -btr_search_get_latch( -/*=================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); - -/*********************************************************************//** -Returns the AHI partition number corresponding to a given index ID. */ -UNIV_INLINE -ulint -btr_search_get_key( -/*===============*/ - index_id_t index_id) /*!< in: index ID */ - MY_ATTRIBUTE((pure,warn_unused_result)); - -/*********************************************************************//** -Initializes AHI-related fields in a newly created index. */ -UNIV_INLINE -void -btr_search_index_init( -/*===============*/ - dict_index_t* index); /*!< in: index */ - -/********************************************************************//** -Latches all adaptive hash index latches in exclusive mode. */ -UNIV_INLINE -void -btr_search_x_lock_all(void); -/*========================*/ - -/********************************************************************//** -Unlatches all adaptive hash index latches in exclusive mode. */ -UNIV_INLINE -void -btr_search_x_unlock_all(void); -/*==========================*/ - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Checks if the thread has locked all the adaptive hash index latches in the -specified mode. - -@return true if all latches are locked by the current thread, false -otherwise. */ -UNIV_INLINE -bool -btr_search_own_all( -/*===============*/ - ulint lock_type) - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Checks if the thread owns any adaptive hash latches in either S or X mode. -@return true if the thread owns at least one latch in any mode. */ -UNIV_INLINE -bool -btr_search_own_any(void) -/*=====================*/ - MY_ATTRIBUTE((warn_unused_result)); -#endif - -/** The search info struct in an index */ -struct btr_search_t{ - ulint ref_count; /*!< Number of blocks in this index tree - that have search index built - i.e. block->index points to this index. - Protected by btr_search_latch except - when during initialization in - btr_search_info_create(). */ - - /* @{ The following fields are not protected by any latch. - Unfortunately, this means that they must be aligned to - the machine word, i.e., they cannot be turned into bit-fields. */ - buf_block_t* root_guess;/*!< the root page frame when it was last time - fetched, or NULL */ - ulint hash_analysis; /*!< when this exceeds - BTR_SEARCH_HASH_ANALYSIS, the hash - analysis starts; this is reset if no - success noticed */ - ibool last_hash_succ; /*!< TRUE if the last search would have - succeeded, or did succeed, using the hash - index; NOTE that the value here is not exact: - it is not calculated for every search, and the - calculation itself is not always accurate! */ - ulint n_hash_potential; - /*!< number of consecutive searches - which would have succeeded, or did succeed, - using the hash index; - the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */ - /* @} */ - /*---------------------- @{ */ - ulint n_fields; /*!< recommended prefix length for hash search: - number of full fields */ - ulint n_bytes; /*!< recommended prefix: number of bytes in - an incomplete field - @see BTR_PAGE_MAX_REC_SIZE */ - ibool left_side; /*!< TRUE or FALSE, depending on whether - the leftmost record of several records with - the same prefix should be indexed in the - hash index */ - /*---------------------- @} */ -#ifdef UNIV_SEARCH_PERF_STAT - ulint n_hash_succ; /*!< number of successful hash searches thus - far */ - ulint n_hash_fail; /*!< number of failed hash searches */ - ulint n_patt_succ; /*!< number of successful pattern searches thus - far */ - ulint n_searches; /*!< number of searches */ -#endif /* UNIV_SEARCH_PERF_STAT */ -#ifdef UNIV_DEBUG - ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */ -/** value of btr_search_t::magic_n, used in assertions */ -# define BTR_SEARCH_MAGIC_N 1112765 -#endif /* UNIV_DEBUG */ -}; - -/** The hash index system */ -struct btr_search_sys_t{ - hash_table_t** hash_tables; /*!< the array of adaptive hash index - tables, mapping dtuple_fold values to - rec_t pointers on index pages */ -}; - -/** The adaptive hash index */ -extern btr_search_sys_t* btr_search_sys; - -/** After change in n_fields or n_bytes in info, this many rounds are waited -before starting the hash analysis again: this is to save CPU time when there -is no hope in building a hash index. */ -#define BTR_SEARCH_HASH_ANALYSIS 17 - -/** Limit of consecutive searches for trying a search shortcut on the search -pattern */ -#define BTR_SEARCH_ON_PATTERN_LIMIT 3 - -/** Limit of consecutive searches for trying a search shortcut using -the hash index */ -#define BTR_SEARCH_ON_HASH_LIMIT 3 - -/** We do this many searches before trying to keep the search latch -over calls from MySQL. If we notice someone waiting for the latch, we -again set this much timeout. This is to reduce contention. */ -#define BTR_SEA_TIMEOUT 10000 - -#ifndef UNIV_NONINL -#include "btr0sea.ic" -#endif - -#endif diff --git a/storage/xtradb/include/btr0sea.ic b/storage/xtradb/include/btr0sea.ic deleted file mode 100644 index e963d8a8449..00000000000 --- a/storage/xtradb/include/btr0sea.ic +++ /dev/null @@ -1,210 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/btr0sea.ic -The index tree adaptive search - -Created 2/17/1996 Heikki Tuuri -*************************************************************************/ - -#include "dict0mem.h" -#include "btr0cur.h" -#include "buf0buf.h" - -/*********************************************************************//** -Updates the search info. */ -UNIV_INTERN -void -btr_search_info_update_slow( -/*========================*/ - btr_search_t* info, /*!< in/out: search info */ - btr_cur_t* cursor);/*!< in: cursor which was just positioned */ - -/********************************************************************//** -Returns search info for an index. -@return search info; search mutex reserved */ -UNIV_INLINE -btr_search_t* -btr_search_get_info( -/*================*/ - dict_index_t* index) /*!< in: index */ -{ - return(index->search_info); -} - -/*********************************************************************//** -Updates the search info. */ -UNIV_INLINE -void -btr_search_info_update( -/*===================*/ - dict_index_t* index, /*!< in: index of the cursor */ - btr_cur_t* cursor) /*!< in: cursor which was just positioned */ -{ - btr_search_t* info; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - info = btr_search_get_info(index); - - info->hash_analysis++; - - if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) { - - /* Do nothing */ - - return; - - } - - ut_ad(cursor->flag != BTR_CUR_HASH); - - btr_search_info_update_slow(info, cursor); -} - -/********************************************************************//** -Returns the adaptive hash index table for a given index key. -@return the adaptive hash index table for a given index key */ -UNIV_INLINE -hash_table_t* -btr_search_get_hash_table( -/*======================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->search_table); - - return(index->search_table); -} - -/********************************************************************//** -Returns the adaptive hash index latch for a given index key. -@return the adaptive hash index latch for a given index key */ -UNIV_INLINE -prio_rw_lock_t* -btr_search_get_latch( -/*=================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->search_latch >= btr_search_latch_arr && - index->search_latch < btr_search_latch_arr + - btr_search_index_num); - - return(index->search_latch); -} - -/*********************************************************************//** -Returns the AHI partition number corresponding to a given index ID. */ -UNIV_INLINE -ulint -btr_search_get_key( -/*===============*/ - index_id_t index_id) /*!< in: index ID */ -{ - return(index_id % btr_search_index_num); -} - -/*********************************************************************//** -Initializes AHI-related fields in a newly created index. */ -UNIV_INLINE -void -btr_search_index_init( -/*===============*/ - dict_index_t* index) /*!< in: index */ -{ - index->search_latch = - &btr_search_latch_arr[btr_search_get_key(index->id)]; - index->search_table = - btr_search_sys->hash_tables[btr_search_get_key(index->id)]; -} - -/********************************************************************//** -Latches all adaptive hash index latches in exclusive mode. */ -UNIV_INLINE -void -btr_search_x_lock_all(void) -/*=======================*/ -{ - ulint i; - - for (i = 0; i < btr_search_index_num; i++) { - rw_lock_x_lock(&btr_search_latch_arr[i]); - } -} - -/********************************************************************//** -Unlatches all adaptive hash index latches in exclusive mode. */ -UNIV_INLINE -void -btr_search_x_unlock_all(void) -/*==========================*/ -{ - ulint i; - - for (i = 0; i < btr_search_index_num; i++) { - rw_lock_x_unlock(&btr_search_latch_arr[i]); - } -} - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Checks if the thread has locked all the adaptive hash index latches in the -specified mode. - -@return true if all latches are locked by the current thread, false -otherwise. */ -UNIV_INLINE -bool -btr_search_own_all( -/*===============*/ - ulint lock_type) -{ - ulint i; - - for (i = 0; i < btr_search_index_num; i++) { - if (!rw_lock_own(&btr_search_latch_arr[i], lock_type)) { - return(false); - } - } - - return(true); -} - -/********************************************************************//** -Checks if the thread owns any adaptive hash latches in either S or X mode. -@return true if the thread owns at least one latch in any mode. */ -UNIV_INLINE -bool -btr_search_own_any(void) -/*====================*/ -{ - ulint i; - - for (i = 0; i < btr_search_index_num; i++) { - if (rw_lock_own(&btr_search_latch_arr[i], RW_LOCK_SHARED) || - rw_lock_own(&btr_search_latch_arr[i], RW_LOCK_EX)) { - return(true); - } - } - - return(false); -} -#endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/xtradb/include/btr0types.h b/storage/xtradb/include/btr0types.h deleted file mode 100644 index 4bc9c72eccc..00000000000 --- a/storage/xtradb/include/btr0types.h +++ /dev/null @@ -1,204 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/btr0types.h -The index tree general types - -Created 2/17/1996 Heikki Tuuri -*************************************************************************/ - -#ifndef btr0types_h -#define btr0types_h - -#include "univ.i" - -#include "rem0types.h" -#include "page0types.h" -#include "sync0rw.h" - -/** Persistent cursor */ -struct btr_pcur_t; -/** B-tree cursor */ -struct btr_cur_t; -/** B-tree search information for the adaptive hash index */ -struct btr_search_t; - -#ifndef UNIV_HOTBACKUP - -/** @brief The array of latches protecting the adaptive search partitions - -These latches protect the -(1) hash index from the corresponding AHI partition; -(2) columns of a record to which we have a pointer in the hash index; - -but do NOT protect: - -(3) next record offset field in a record; -(4) next or previous records on the same page. - -Bear in mind (3) and (4) when using the hash indexes. -*/ - -extern prio_rw_lock_t* btr_search_latch_arr; - -#endif /* UNIV_HOTBACKUP */ - -/** Flag: has the search system been enabled? -Protected by btr_search_latch. */ -extern char btr_search_enabled; - -/** Number of adaptive hash index partitions */ -extern ulint btr_search_index_num; - -#ifdef UNIV_BLOB_DEBUG -# include "buf0types.h" -/** An index->blobs entry for keeping track of off-page column references */ -struct btr_blob_dbg_t; - -/** Insert to index->blobs a reference to an off-page column. -@param index the index tree -@param b the reference -@param ctx context (for logging) */ -UNIV_INTERN -void -btr_blob_dbg_rbt_insert( -/*====================*/ - dict_index_t* index, /*!< in/out: index tree */ - const btr_blob_dbg_t* b, /*!< in: the reference */ - const char* ctx) /*!< in: context (for logging) */ - MY_ATTRIBUTE((nonnull)); - -/** Remove from index->blobs a reference to an off-page column. -@param index the index tree -@param b the reference -@param ctx context (for logging) */ -UNIV_INTERN -void -btr_blob_dbg_rbt_delete( -/*====================*/ - dict_index_t* index, /*!< in/out: index tree */ - const btr_blob_dbg_t* b, /*!< in: the reference */ - const char* ctx) /*!< in: context (for logging) */ - MY_ATTRIBUTE((nonnull)); - -/**************************************************************//** -Add to index->blobs any references to off-page columns from a record. -@return number of references added */ -UNIV_INTERN -ulint -btr_blob_dbg_add_rec( -/*=================*/ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in/out: index */ - const ulint* offsets,/*!< in: offsets */ - const char* ctx) /*!< in: context (for logging) */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Remove from index->blobs any references to off-page columns from a record. -@return number of references removed */ -UNIV_INTERN -ulint -btr_blob_dbg_remove_rec( -/*====================*/ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in/out: index */ - const ulint* offsets,/*!< in: offsets */ - const char* ctx) /*!< in: context (for logging) */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Count and add to index->blobs any references to off-page columns -from records on a page. -@return number of references added */ -UNIV_INTERN -ulint -btr_blob_dbg_add( -/*=============*/ - const page_t* page, /*!< in: rewritten page */ - dict_index_t* index, /*!< in/out: index */ - const char* ctx) /*!< in: context (for logging) */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Count and remove from index->blobs any references to off-page columns -from records on a page. -Used when reorganizing a page, before copying the records. -@return number of references removed */ -UNIV_INTERN -ulint -btr_blob_dbg_remove( -/*================*/ - const page_t* page, /*!< in: b-tree page */ - dict_index_t* index, /*!< in/out: index */ - const char* ctx) /*!< in: context (for logging) */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Restore in index->blobs any references to off-page columns -Used when page reorganize fails due to compressed page overflow. */ -UNIV_INTERN -void -btr_blob_dbg_restore( -/*=================*/ - const page_t* npage, /*!< in: page that failed to compress */ - const page_t* page, /*!< in: copy of original page */ - dict_index_t* index, /*!< in/out: index */ - const char* ctx) /*!< in: context (for logging) */ - MY_ATTRIBUTE((nonnull)); - -/** Operation that processes the BLOB references of an index record -@param[in] rec record on index page -@param[in/out] index the index tree of the record -@param[in] offsets rec_get_offsets(rec,index) -@param[in] ctx context (for logging) -@return number of BLOB references processed */ -typedef ulint (*btr_blob_dbg_op_f) -(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx); - -/**************************************************************//** -Count and process all references to off-page columns on a page. -@return number of references processed */ -UNIV_INTERN -ulint -btr_blob_dbg_op( -/*============*/ - const page_t* page, /*!< in: B-tree leaf page */ - const rec_t* rec, /*!< in: record to start from - (NULL to process the whole page) */ - dict_index_t* index, /*!< in/out: index */ - const char* ctx, /*!< in: context (for logging) */ - const btr_blob_dbg_op_f op) /*!< in: operation on records */ - MY_ATTRIBUTE((nonnull(1,3,4,5))); -#else /* UNIV_BLOB_DEBUG */ -# define btr_blob_dbg_add_rec(rec, index, offsets, ctx) ((void) 0) -# define btr_blob_dbg_add(page, index, ctx) ((void) 0) -# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx) ((void) 0) -# define btr_blob_dbg_remove(page, index, ctx) ((void) 0) -# define btr_blob_dbg_restore(npage, page, index, ctx) ((void) 0) -# define btr_blob_dbg_op(page, rec, index, ctx, op) ((void) 0) -#endif /* UNIV_BLOB_DEBUG */ - -/** The size of a reference to data stored on a different page. -The reference is stored at the end of the prefix of the field -in the index record. */ -#define BTR_EXTERN_FIELD_REF_SIZE 20 - -/** A BLOB field reference full of zero, for use in assertions and tests. -Initially, BLOB field references are set to zero, in -dtuple_convert_big_rec(). */ -extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE]; - -#endif diff --git a/storage/xtradb/include/buf0buddy.h b/storage/xtradb/include/buf0buddy.h deleted file mode 100644 index 09768dda92f..00000000000 --- a/storage/xtradb/include/buf0buddy.h +++ /dev/null @@ -1,77 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0buddy.h -Binary buddy allocator for compressed pages - -Created December 2006 by Marko Makela -*******************************************************/ - -#ifndef buf0buddy_h -#define buf0buddy_h - -#ifdef UNIV_MATERIALIZE -# undef UNIV_INLINE -# define UNIV_INLINE -#endif - -#include "univ.i" -#include "buf0types.h" - -/**********************************************************************//** -Allocate a block. The thread calling this function must hold -buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any -block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired. -This function should only be used for allocating compressed page frames. -@return allocated block, never NULL */ -UNIV_INLINE -byte* -buf_buddy_alloc( -/*============*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool in which - the page resides */ - ulint size, /*!< in: compressed page size - (between UNIV_ZIP_SIZE_MIN and - UNIV_PAGE_SIZE) */ - ibool* lru) /*!< in: pointer to a variable - that will be assigned TRUE if - storage was allocated from the - LRU list and buf_pool->LRU_list_mutex - was temporarily released */ - MY_ATTRIBUTE((malloc, nonnull)); - -/**********************************************************************//** -Deallocate a block. */ -UNIV_INLINE -void -buf_buddy_free( -/*===========*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool in which - the block resides */ - void* buf, /*!< in: block to be freed, must not - be pointed to by the buffer pool */ - ulint size) /*!< in: block size, - up to UNIV_PAGE_SIZE */ - MY_ATTRIBUTE((nonnull)); - -#ifndef UNIV_NONINL -# include "buf0buddy.ic" -#endif - -#endif /* buf0buddy_h */ diff --git a/storage/xtradb/include/buf0buddy.ic b/storage/xtradb/include/buf0buddy.ic deleted file mode 100644 index a5fb510dd19..00000000000 --- a/storage/xtradb/include/buf0buddy.ic +++ /dev/null @@ -1,142 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0buddy.ic -Binary buddy allocator for compressed pages - -Created December 2006 by Marko Makela -*******************************************************/ - -#ifdef UNIV_MATERIALIZE -# undef UNIV_INLINE -# define UNIV_INLINE -#endif - -#include "buf0buf.h" -#include "buf0buddy.h" -#include "ut0ut.h" -#include "sync0sync.h" - -/**********************************************************************//** -Allocate a block. The thread calling this function must hold -buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any -block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired. -@return allocated block, never NULL */ -UNIV_INTERN -void* -buf_buddy_alloc_low( -/*================*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - ulint i, /*!< in: index of buf_pool->zip_free[], - or BUF_BUDDY_SIZES */ - ibool* lru) /*!< in: pointer to a variable that - will be assigned TRUE if storage was - allocated from the LRU list and - buf_pool->LRU_list_mutex was - temporarily released */ - MY_ATTRIBUTE((malloc)); - -/**********************************************************************//** -Deallocate a block. */ -UNIV_INTERN -void -buf_buddy_free_low( -/*===============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - void* buf, /*!< in: block to be freed, must not be - pointed to by the buffer pool */ - ulint i) /*!< in: index of buf_pool->zip_free[], - or BUF_BUDDY_SIZES */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Get the index of buf_pool->zip_free[] for a given block size. -@return index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ -UNIV_INLINE -ulint -buf_buddy_get_slot( -/*===============*/ - ulint size) /*!< in: block size */ -{ - ulint i; - ulint s; - - ut_ad(size >= UNIV_ZIP_SIZE_MIN); - - for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) { - } - - ut_ad(i <= BUF_BUDDY_SIZES); - return(i); -} - -/**********************************************************************//** -Allocate a block. The thread calling this function must hold -buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any -block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired. -This function should only be used for allocating compressed page frames. -@return allocated block, never NULL */ -UNIV_INLINE -byte* -buf_buddy_alloc( -/*============*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool in which - the page resides */ - ulint size, /*!< in: compressed page size - (between UNIV_ZIP_SIZE_MIN and - UNIV_PAGE_SIZE) */ - ibool* lru) /*!< in: pointer to a variable - that will be assigned TRUE if - storage was allocated from the - LRU list and buf_pool->LRU_list_mutex - was temporarily released */ -{ - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(ut_is_2pow(size)); - ut_ad(size >= UNIV_ZIP_SIZE_MIN); - ut_ad(size <= UNIV_PAGE_SIZE); - - return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), - lru)); -} - -/**********************************************************************//** -Deallocate a block. */ -UNIV_INLINE -void -buf_buddy_free( -/*===========*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool in which - the block resides */ - void* buf, /*!< in: block to be freed, must not - be pointed to by the buffer pool */ - ulint size) /*!< in: block size, - up to UNIV_PAGE_SIZE */ -{ - ut_ad(ut_is_2pow(size)); - ut_ad(size >= UNIV_ZIP_SIZE_MIN); - ut_ad(size <= UNIV_PAGE_SIZE); - - buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size)); -} - -#ifdef UNIV_MATERIALIZE -# undef UNIV_INLINE -# define UNIV_INLINE UNIV_INLINE_ORIGINAL -#endif diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h deleted file mode 100644 index 1899165ace0..00000000000 --- a/storage/xtradb/include/buf0buf.h +++ /dev/null @@ -1,2353 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0buf.h -The database buffer pool high-level routines - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0buf_h -#define buf0buf_h - -#include "univ.i" -#include "fil0fil.h" -#include "mtr0types.h" -#include "buf0types.h" -#include "hash0hash.h" -#include "ut0byte.h" -#include "page0types.h" -#ifndef UNIV_HOTBACKUP -#include "ut0rbt.h" -#include "os0proc.h" -#include "log0log.h" - -/** @name Modes for buf_page_get_gen */ -/* @{ */ -#define BUF_GET 10 /*!< get always */ -#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */ -#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make - the block young in the LRU list */ -#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but - set no latch; we have - separated this case, because - it is error-prone programming - not to set a latch, and it - should be used with care */ -#define BUF_GET_IF_IN_POOL_OR_WATCH 15 - /*!< Get the page only if it's in the - buffer pool, if not then set a watch - on the page. */ -#define BUF_GET_POSSIBLY_FREED 16 - /*!< Like BUF_GET, but do not mind - if the file page has been freed. */ -/* @} */ -/** @name Modes for buf_page_get_known_nowait */ -/* @{ */ -#define BUF_MAKE_YOUNG 51 /*!< Move the block to the - start of the LRU list if there - is a danger that the block - would drift out of the buffer - pool*/ -#define BUF_KEEP_OLD 52 /*!< Preserve the current LRU - position of the block. */ -/* @} */ - -#define MAX_BUFFER_POOLS_BITS 6 /*!< Number of bits to representing - a buffer pool ID */ - -#define MAX_BUFFER_POOLS (1 << MAX_BUFFER_POOLS_BITS) - /*!< The maximum number of buffer - pools that can be defined */ - -#define BUF_POOL_WATCH_SIZE (srv_n_purge_threads + 1) - /*!< Maximum number of concurrent - buffer pool watches */ -#define MAX_PAGE_HASH_LOCKS 1024 /*!< The maximum number of - page_hash locks */ - -extern buf_pool_t* buf_pool_ptr; /*!< The buffer pools - of the database */ -#ifdef UNIV_DEBUG -extern ibool buf_debug_prints;/*!< If this is set TRUE, the program - prints info whenever read or flush - occurs */ -#endif /* UNIV_DEBUG */ -extern ulint srv_buf_pool_instances; -extern ulint srv_buf_pool_curr_size; -#else /* !UNIV_HOTBACKUP */ -extern buf_block_t* back_block1; /*!< first block, for --apply-log */ -extern buf_block_t* back_block2; /*!< second block, for page reorganize */ -#endif /* !UNIV_HOTBACKUP */ - -/** @brief States of a control block -@see buf_page_t - -The enumeration values must be 0..7. */ -enum buf_page_state { - BUF_BLOCK_POOL_WATCH, /*!< a sentinel for the buffer pool - watch, element of buf_pool->watch[] */ - BUF_BLOCK_ZIP_PAGE, /*!< contains a clean - compressed page */ - BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed - page that is in the - buf_pool->flush_list */ - - BUF_BLOCK_NOT_USED, /*!< is in the free list; - must be after the BUF_BLOCK_ZIP_ - constants for compressed-only pages - @see buf_block_state_valid() */ - BUF_BLOCK_READY_FOR_USE, /*!< when buf_LRU_get_free_block - returns a block, it is in this state */ - BUF_BLOCK_FILE_PAGE, /*!< contains a buffered file page */ - BUF_BLOCK_MEMORY, /*!< contains some main memory - object */ - BUF_BLOCK_REMOVE_HASH /*!< hash index should be removed - before putting to the free list */ -}; - - -/** This structure defines information we will fetch from each buffer pool. It -will be used to print table IO stats */ -struct buf_pool_info_t{ - /* General buffer pool info */ - ulint pool_unique_id; /*!< Buffer Pool ID */ - ulint pool_size; /*!< Buffer Pool size in pages */ - ulint pool_size_bytes; - ulint lru_len; /*!< Length of buf_pool->LRU */ - ulint old_lru_len; /*!< buf_pool->LRU_old_len */ - ulint free_list_len; /*!< Length of buf_pool->free list */ - ulint flush_list_len; /*!< Length of buf_pool->flush_list */ - ulint n_pend_unzip; /*!< buf_pool->n_pend_unzip, pages - pending decompress */ - ulint n_pend_reads; /*!< buf_pool->n_pend_reads, pages - pending read */ - ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */ - ulint n_pending_flush_single_page;/*!< Pages pending to be - flushed as part of single page - flushes issued by various user - threads */ - ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH - LIST */ - ulint n_pages_made_young; /*!< number of pages made young */ - ulint n_pages_not_made_young; /*!< number of pages not made young */ - ulint n_pages_read; /*!< buf_pool->n_pages_read */ - ulint n_pages_created; /*!< buf_pool->n_pages_created */ - ulint n_pages_written; /*!< buf_pool->n_pages_written */ - ulint n_page_gets; /*!< buf_pool->n_page_gets */ - ulint n_ra_pages_read_rnd; /*!< buf_pool->n_ra_pages_read_rnd, - number of pages readahead */ - ulint n_ra_pages_read; /*!< buf_pool->n_ra_pages_read, number - of pages readahead */ - ulint n_ra_pages_evicted; /*!< buf_pool->n_ra_pages_evicted, - number of readahead pages evicted - without access */ - ulint n_page_get_delta; /*!< num of buffer pool page gets since - last printout */ - - /* Buffer pool access stats */ - double page_made_young_rate; /*!< page made young rate in pages - per second */ - double page_not_made_young_rate;/*!< page not made young rate - in pages per second */ - double pages_read_rate; /*!< num of pages read per second */ - double pages_created_rate; /*!< num of pages create per second */ - double pages_written_rate; /*!< num of pages written per second */ - ulint page_read_delta; /*!< num of pages read since last - printout */ - ulint young_making_delta; /*!< num of pages made young since - last printout */ - ulint not_young_making_delta; /*!< num of pages not make young since - last printout */ - - /* Statistics about read ahead algorithm. */ - double pages_readahead_rnd_rate;/*!< random readahead rate in pages per - second */ - double pages_readahead_rate; /*!< readahead rate in pages per - second */ - double pages_evicted_rate; /*!< rate of readahead page evicted - without access, in pages per second */ - - /* Stats about LRU eviction */ - ulint unzip_lru_len; /*!< length of buf_pool->unzip_LRU - list */ - /* Counters for LRU policy */ - ulint io_sum; /*!< buf_LRU_stat_sum.io */ - ulint io_cur; /*!< buf_LRU_stat_cur.io, num of IO - for current interval */ - ulint unzip_sum; /*!< buf_LRU_stat_sum.unzip */ - ulint unzip_cur; /*!< buf_LRU_stat_cur.unzip, num - pages decompressed in current - interval */ -}; - -/** The occupied bytes of lists in all buffer pools */ -struct buf_pools_list_size_t { - ulint LRU_bytes; /*!< LRU size in bytes */ - ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */ - ulint flush_list_bytes; /*!< flush_list size in bytes */ -}; - -#ifndef UNIV_HOTBACKUP - -/********************************************************************//** -Creates the buffer pool. -@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */ -UNIV_INTERN -dberr_t -buf_pool_init( -/*=========*/ - ulint size, /*!< in: Size of the total pool in bytes */ - ulint n_instances); /*!< in: Number of instances */ -/********************************************************************//** -Frees the buffer pool at shutdown. This must not be invoked before -freeing all mutexes. */ -UNIV_INTERN -void -buf_pool_free( -/*==========*/ - ulint n_instances); /*!< in: numbere of instances to free */ - -/********************************************************************//** -Clears the adaptive hash index on all pages in the buffer pool. */ -UNIV_INTERN -void -buf_pool_clear_hash_index(void); -/*===========================*/ - -/********************************************************************//** -Relocate a buffer control block. Relocates the block on the LRU list -and in buf_pool->page_hash. Does not relocate bpage->list. -The caller must take care of relocating bpage->list. */ -UNIV_INTERN -void -buf_relocate( -/*=========*/ - buf_page_t* bpage, /*!< in/out: control block being relocated; - buf_page_get_state(bpage) must be - BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */ - buf_page_t* dpage); /*!< in/out: destination control block */ -/*********************************************************************//** -Gets the current size of buffer buf_pool in bytes. -@return size in bytes */ -UNIV_INLINE -ulint -buf_pool_get_curr_size(void); -/*========================*/ -/*********************************************************************//** -Gets the current size of buffer buf_pool in frames. -@return size in pages */ -UNIV_INLINE -ulint -buf_pool_get_n_pages(void); -/*=======================*/ -/********************************************************************//** -Gets the smallest oldest_modification lsn for any page in the pool. Returns -zero if all modified pages have been flushed to disk. -@return oldest modification in pool, zero if none */ -UNIV_INTERN -lsn_t -buf_pool_get_oldest_modification(void); -/*==================================*/ - -/********************************************************************//** -Gets the smallest oldest_modification lsn for any page in the pool. Returns -zero if all modified pages have been flushed to disk. -@return oldest modification in pool, zero if none */ -UNIV_INTERN -lsn_t -buf_pool_get_oldest_modification_peek(void); -/*=======================================*/ - -/********************************************************************//** -Allocates a buf_page_t descriptor. This function must succeed. In case -of failure we assert in this function. */ -UNIV_INLINE -buf_page_t* -buf_page_alloc_descriptor(void) -/*===========================*/ - MY_ATTRIBUTE((malloc)); -/********************************************************************//** -Free a buf_page_t descriptor. */ -UNIV_INLINE -void -buf_page_free_descriptor( -/*=====================*/ - buf_page_t* bpage) /*!< in: bpage descriptor to free. */ - MY_ATTRIBUTE((nonnull)); - -/********************************************************************//** -Allocates a buffer block. -@return own: the allocated block, in state BUF_BLOCK_MEMORY */ -UNIV_INTERN -buf_block_t* -buf_block_alloc( -/*============*/ - buf_pool_t* buf_pool); /*!< in: buffer pool instance, - or NULL for round-robin selection - of the buffer pool */ -/********************************************************************//** -Frees a buffer block which does not contain a file page. */ -UNIV_INLINE -void -buf_block_free( -/*===========*/ - buf_block_t* block); /*!< in, own: block to be freed */ -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************************//** -Copies contents of a buffer frame to a given buffer. -@return buf */ -UNIV_INLINE -byte* -buf_frame_copy( -/*===========*/ - byte* buf, /*!< in: buffer to copy to */ - const buf_frame_t* frame); /*!< in: buffer frame */ -#ifndef UNIV_HOTBACKUP -/**************************************************************//** -NOTE! The following macros should be used instead of buf_page_get_gen, -to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed -in LA! */ -#define buf_page_get(SP, ZS, OF, LA, MTR) buf_page_get_gen(\ - SP, ZS, OF, LA, NULL,\ - BUF_GET, __FILE__, __LINE__, MTR) -/**************************************************************//** -Use these macros to bufferfix a page with no latching. Remember not to -read the contents of the page unless you know it is safe. Do not modify -the contents of the page! We have separated this case, because it is -error-prone programming not to set a latch, and it should be used -with care. */ -#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\ - SP, ZS, OF, RW_NO_LATCH, NULL,\ - BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR) -/********************************************************************//** -This is the general function used to get optimistic access to a database -page. -@return TRUE if success */ -UNIV_INTERN -ibool -buf_page_optimistic_get( -/*====================*/ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /*!< in: guessed block */ - ib_uint64_t modify_clock,/*!< in: modify clock value */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mini-transaction */ -/********************************************************************//** -This is used to get access to a known database page, when no waiting can be -done. -@return TRUE if success */ -UNIV_INTERN -ibool -buf_page_get_known_nowait( -/*======================*/ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /*!< in: the known page */ - ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mini-transaction */ - -/*******************************************************************//** -Given a tablespace id and page number tries to get that page. If the -page is not in the buffer pool it is not loaded and NULL is returned. -Suitable for using when holding the lock_sys_t::mutex. */ -UNIV_INTERN -buf_block_t* -buf_page_try_get_func( -/*==================*/ - ulint space_id,/*!< in: tablespace id */ - ulint page_no,/*!< in: page number */ - ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */ - bool possibly_freed, /*!< in: don't mind if page is freed */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr); /*!< in: mini-transaction */ - -/** Tries to get a page. If the page is not in the buffer pool it is -not loaded. Suitable for using when holding the lock_sys_t::mutex. -@param space_id in: tablespace id -@param page_no in: page number -@param mtr in: mini-transaction -@return the page if in buffer pool, NULL if not */ -#define buf_page_try_get(space_id, page_no, mtr) \ - buf_page_try_get_func(space_id, page_no, RW_S_LATCH, false, \ - __FILE__, __LINE__, mtr); - -/********************************************************************//** -Get read access to a compressed page (usually of type -FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2). -The page must be released with buf_page_release_zip(). -NOTE: the page is not protected by any latch. Mutual exclusion has to -be implemented at a higher level. In other words, all possible -accesses to a given page through this function must be protected by -the same set of mutexes or latches. -@return pointer to the block, or NULL if not compressed */ -UNIV_INTERN -buf_page_t* -buf_page_get_zip( -/*=============*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size */ - ulint offset);/*!< in: page number */ -/********************************************************************//** -This is the general function used to get access to a database page. -@return pointer to the block or NULL */ -UNIV_INTERN -buf_block_t* -buf_page_get_gen( -/*=============*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint offset, /*!< in: page number */ - ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ - buf_block_t* guess, /*!< in: guessed block or NULL */ - ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH or - BUF_GET_IF_IN_POOL_OR_WATCH */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr, /*!< in: mini-transaction */ - dberr_t* err = NULL); /*!< out: error code */ -/********************************************************************//** -Initializes a page to the buffer buf_pool. The page is usually not read -from a file even if it cannot be found in the buffer buf_pool. This is one -of the functions which perform to a block a state transition NOT_USED => -FILE_PAGE (the other is buf_page_get_gen). -@return pointer to the block, page bufferfixed */ -UNIV_INTERN -buf_block_t* -buf_page_create( -/*============*/ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page within space in units of - a page */ - ulint zip_size,/*!< in: compressed page size, or 0 */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -#else /* !UNIV_HOTBACKUP */ -/********************************************************************//** -Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */ -UNIV_INTERN -void -buf_page_init_for_backup_restore( -/*=============================*/ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page within space - in units of a page */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - buf_block_t* block); /*!< in: block to init */ -#endif /* !UNIV_HOTBACKUP */ - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Releases a compressed-only page acquired with buf_page_get_zip(). */ -UNIV_INLINE -void -buf_page_release_zip( -/*=================*/ - buf_page_t* bpage); /*!< in: buffer block */ -/********************************************************************//** -Decrements the bufferfix count of a buffer control block and releases -a latch, if specified. */ -UNIV_INLINE -void -buf_page_release( -/*=============*/ - buf_block_t* block, /*!< in: buffer block */ - ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH, - RW_NO_LATCH */ -/********************************************************************//** -Moves a page to the start of the buffer pool LRU list. This high-level -function can be used to prevent an important page from slipping out of -the buffer pool. */ -UNIV_INTERN -void -buf_page_make_young( -/*================*/ - buf_page_t* bpage); /*!< in: buffer block of a file page */ -/********************************************************************//** -Returns TRUE if the page can be found in the buffer pool hash table. - -NOTE that it is possible that the page is not yet read from disk, -though. - -@return TRUE if found in the page hash table */ -UNIV_INLINE -ibool -buf_page_peek( -/*==========*/ - ulint space, /*!< in: space id */ - ulint offset);/*!< in: page number */ -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG -/********************************************************************//** -Sets file_page_was_freed TRUE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. -@return control block if found in page hash table, otherwise NULL */ -UNIV_INTERN -buf_page_t* -buf_page_set_file_page_was_freed( -/*=============================*/ - ulint space, /*!< in: space id */ - ulint offset);/*!< in: page number */ -/********************************************************************//** -Sets file_page_was_freed FALSE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. -@return control block if found in page hash table, otherwise NULL */ -UNIV_INTERN -buf_page_t* -buf_page_reset_file_page_was_freed( -/*===============================*/ - ulint space, /*!< in: space id */ - ulint offset); /*!< in: page number */ -#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ -/********************************************************************//** -Reads the freed_page_clock of a buffer block. -@return freed_page_clock */ -UNIV_INLINE -ulint -buf_page_get_freed_page_clock( -/*==========================*/ - const buf_page_t* bpage) /*!< in: block */ - MY_ATTRIBUTE((pure)); -/********************************************************************//** -Reads the freed_page_clock of a buffer block. -@return freed_page_clock */ -UNIV_INLINE -ulint -buf_block_get_freed_page_clock( -/*===========================*/ - const buf_block_t* block) /*!< in: block */ - MY_ATTRIBUTE((pure)); - -/********************************************************************//** -Tells if a block is still close enough to the MRU end of the LRU list -meaning that it is not in danger of getting evicted and also implying -that it has been accessed recently. -Note that this is for heuristics only and does not reserve buffer pool -mutex. -@return TRUE if block is close to MRU end of LRU */ -UNIV_INLINE -ibool -buf_page_peek_if_young( -/*===================*/ - const buf_page_t* bpage); /*!< in: block */ -/********************************************************************//** -Recommends a move of a block to the start of the LRU list if there is danger -of dropping from the buffer pool. NOTE: does not reserve the buffer pool -mutex. -@return TRUE if should be made younger */ -UNIV_INLINE -ibool -buf_page_peek_if_too_old( -/*=====================*/ - const buf_page_t* bpage); /*!< in: block to make younger */ -/********************************************************************//** -Gets the youngest modification log sequence number for a frame. -Returns zero if not file page or no modification occurred yet. -@return newest modification to page */ -UNIV_INLINE -lsn_t -buf_page_get_newest_modification( -/*=============================*/ - const buf_page_t* bpage); /*!< in: block containing the - page frame */ -/********************************************************************//** -Increments the modify clock of a frame by 1. The caller must (1) own the -LRU list mutex and block bufferfix count has to be zero, (2) or own an x-lock -on the block. */ -UNIV_INLINE -void -buf_block_modify_clock_inc( -/*=======================*/ - buf_block_t* block); /*!< in: block */ -/********************************************************************//** -Returns the value of the modify clock. The caller must have an s-lock -or x-lock on the block. -@return value */ -UNIV_INLINE -ib_uint64_t -buf_block_get_modify_clock( -/*=======================*/ - buf_block_t* block); /*!< in: block */ -/*******************************************************************//** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc_func( -/*=======================*/ -# ifdef UNIV_SYNC_DEBUG - const char* file, /*!< in: file name */ - ulint line, /*!< in: line */ -# endif /* UNIV_SYNC_DEBUG */ - buf_block_t* block) /*!< in/out: block to bufferfix */ - MY_ATTRIBUTE((nonnull)); - -/*******************************************************************//** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_fix( -/*===========*/ - buf_block_t* block); /*!< in/out: block to bufferfix */ - -/*******************************************************************//** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_unfix( -/*===========*/ - buf_block_t* block); /*!< in/out: block to bufferfix */ - -# ifdef UNIV_SYNC_DEBUG -/** Increments the bufferfix count. -@param b in/out: block to bufferfix -@param f in: file name where requested -@param l in: line number where requested */ -# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b) -# else /* UNIV_SYNC_DEBUG */ -/** Increments the bufferfix count. -@param b in/out: block to bufferfix -@param f in: file name where requested -@param l in: line number where requested */ -# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) -# endif /* UNIV_SYNC_DEBUG */ -#else /* !UNIV_HOTBACKUP */ -# define buf_block_modify_clock_inc(block) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/** Checks if the page is in crc32 checksum format. -@param[in] read_buf database page -@param[in] checksum_field1 new checksum field -@param[in] checksum_field2 old checksum field -@return true if the page is in crc32 checksum format */ -bool -buf_page_is_checksum_valid_crc32( - const byte* read_buf, - ulint checksum_field1, - ulint checksum_field2) - MY_ATTRIBUTE((warn_unused_result)); - -/** Checks if the page is in innodb checksum format. -@param[in] read_buf database page -@param[in] checksum_field1 new checksum field -@param[in] checksum_field2 old checksum field -@return true if the page is in innodb checksum format */ -bool -buf_page_is_checksum_valid_innodb( - const byte* read_buf, - ulint checksum_field1, - ulint checksum_field2) - MY_ATTRIBUTE((warn_unused_result)); - -/** Checks if the page is in none checksum format. -@param[in] read_buf database page -@param[in] checksum_field1 new checksum field -@param[in] checksum_field2 old checksum field -@return true if the page is in none checksum format */ -bool -buf_page_is_checksum_valid_none( - const byte* read_buf, - ulint checksum_field1, - ulint checksum_field2) - MY_ATTRIBUTE((warn_unused_result)); - -/** Check if a page is corrupt. -@param[in] check_lsn true if LSN should be checked -@param[in] read_buf Page to be checked -@param[in] zip_size compressed size or 0 -@param[in] space Pointer to tablespace -@return true if corrupted, false if not */ -UNIV_INTERN -bool -buf_page_is_corrupted( - bool check_lsn, - const byte* read_buf, - ulint zip_size, - const fil_space_t* space) - MY_ATTRIBUTE((warn_unused_result)); -/** Check if a page is all zeroes. -@param[in] read_buf database page -@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 -@return whether the page is all zeroes */ -UNIV_INTERN -bool -buf_page_is_zeroes(const byte* read_buf, ulint zip_size); -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Gets the space id, page offset, and byte offset within page of a -pointer pointing to a buffer frame containing a file page. */ -UNIV_INLINE -void -buf_ptr_get_fsp_addr( -/*=================*/ - const void* ptr, /*!< in: pointer to a buffer frame */ - ulint* space, /*!< out: space id */ - fil_addr_t* addr); /*!< out: page offset and byte offset */ -/**********************************************************************//** -Gets the hash value of a block. This can be used in searches in the -lock hash table. -@return lock hash value */ -UNIV_INLINE -ulint -buf_block_get_lock_hash_val( -/*========================*/ - const buf_block_t* block) /*!< in: block */ - MY_ATTRIBUTE((pure)); -#ifdef UNIV_DEBUG -/*********************************************************************//** -Finds a block in the buffer pool that points to a -given compressed page. -@return buffer block pointing to the compressed page, or NULL */ -UNIV_INTERN -buf_block_t* -buf_pool_contains_zip( -/*==================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - const void* data); /*!< in: pointer to compressed page */ -#endif /* UNIV_DEBUG */ - -/*********************************************************************** -FIXME_FTS: Gets the frame the pointer is pointing to. */ -UNIV_INLINE -buf_frame_t* -buf_frame_align( -/*============*/ - /* out: pointer to frame */ - byte* ptr); /* in: pointer to a frame */ - - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/*********************************************************************//** -Validates the buffer pool data structure. -@return TRUE */ -UNIV_INTERN -ibool -buf_validate(void); -/*==============*/ -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/*********************************************************************//** -Prints info of the buffer pool data structure. */ -UNIV_INTERN -void -buf_print(void); -/*============*/ -#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ -enum buf_page_print_flags { - /** Do not crash at the end of buf_page_print(). */ - BUF_PAGE_PRINT_NO_CRASH = 1, - /** Do not print the full page dump. */ - BUF_PAGE_PRINT_NO_FULL = 2 -}; - -/********************************************************************//** -Prints a page to stderr. */ -UNIV_INTERN -void -buf_page_print( -/*===========*/ - const byte* read_buf, /*!< in: a database page */ - ulint zip_size, /*!< in: compressed page size, or - 0 for uncompressed pages */ - ulint flags) /*!< in: 0 or - BUF_PAGE_PRINT_NO_CRASH or - BUF_PAGE_PRINT_NO_FULL */ - UNIV_COLD; -/********************************************************************//** -Decompress a block. -@return TRUE if successful */ -UNIV_INTERN -ibool -buf_zip_decompress( -/*===============*/ - buf_block_t* block, /*!< in/out: block */ - ibool check); /*!< in: TRUE=verify the page checksum */ -#ifndef UNIV_HOTBACKUP -#ifdef UNIV_DEBUG -/*********************************************************************//** -Returns the number of latched pages in the buffer pool. -@return number of latched pages */ -UNIV_INTERN -ulint -buf_get_latched_pages_number(void); -/*==============================*/ -#endif /* UNIV_DEBUG */ -/*********************************************************************//** -Returns the number of pending buf pool read ios. -@return number of pending read I/O operations */ -UNIV_INTERN -ulint -buf_get_n_pending_read_ios(void); -/*============================*/ -/*********************************************************************//** -Prints info of the buffer i/o. */ -UNIV_INTERN -void -buf_print_io( -/*=========*/ - FILE* file); /*!< in: file where to print */ -/*******************************************************************//** -Collect buffer pool stats information for a buffer pool. Also -record aggregated stats if there are more than one buffer pool -in the server */ -UNIV_INTERN -void -buf_stats_get_pool_info( -/*====================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool */ - ulint pool_id, /*!< in: buffer pool ID */ - buf_pool_info_t* all_pool_info); /*!< in/out: buffer pool info - to fill */ -/*********************************************************************//** -Returns the ratio in percents of modified pages in the buffer pool / -database pages in the buffer pool. -@return modified page percentage ratio */ -UNIV_INTERN -double -buf_get_modified_ratio_pct(void); -/*============================*/ -/**********************************************************************//** -Refreshes the statistics used to print per-second averages. */ -UNIV_INTERN -void -buf_refresh_io_stats( -/*=================*/ - buf_pool_t* buf_pool); /*!< buffer pool instance */ -/**********************************************************************//** -Refreshes the statistics used to print per-second averages. */ -UNIV_INTERN -void -buf_refresh_io_stats_all(void); -/*=================*/ -/*********************************************************************//** -Asserts that all file pages in the buffer are in a replaceable state. -@return TRUE */ -UNIV_INTERN -ibool -buf_all_freed(void); -/*===============*/ -/*********************************************************************//** -Checks that there currently are no pending i/o-operations for the buffer -pool. -@return number of pending i/o operations */ -UNIV_INTERN -ulint -buf_pool_check_no_pending_io(void); -/*==============================*/ -/*********************************************************************//** -Invalidates the file pages in the buffer pool when an archive recovery is -completed. All the file pages buffered must be in a replaceable state when -this function is called: not latched and not modified. */ -UNIV_INTERN -void -buf_pool_invalidate(void); -/*=====================*/ -#endif /* !UNIV_HOTBACKUP */ - -/*======================================================================== ---------------------------- LOWER LEVEL ROUTINES ------------------------- -=========================================================================*/ - -#ifdef UNIV_SYNC_DEBUG -/*********************************************************************//** -Adds latch level info for the rw-lock protecting the buffer frame. This -should be called in the debug version after a successful latching of a -page if we know the latching order level of the acquired latch. */ -UNIV_INLINE -void -buf_block_dbg_add_level( -/*====================*/ - buf_block_t* block, /*!< in: buffer page - where we have acquired latch */ - ulint level); /*!< in: latching order level */ -#else /* UNIV_SYNC_DEBUG */ -# define buf_block_dbg_add_level(block, level) /* nothing */ -#endif /* UNIV_SYNC_DEBUG */ -/*********************************************************************//** -Gets the state of a block. -@return state */ -UNIV_INLINE -enum buf_page_state -buf_page_get_state( -/*===============*/ - const buf_page_t* bpage); /*!< in: pointer to the control - block */ -/*********************************************************************//** -Gets the state name for state of a block -@return name or "CORRUPTED" */ -UNIV_INLINE -const char* -buf_get_state_name( -/*===============*/ - const buf_block_t* block); /*!< in: pointer to the control - block */ -/*********************************************************************//** -Gets the state of a block. -@return state */ -UNIV_INLINE -enum buf_page_state -buf_block_get_state( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Sets the state of a block. */ -UNIV_INLINE -void -buf_page_set_state( -/*===============*/ - buf_page_t* bpage, /*!< in/out: pointer to control block */ - enum buf_page_state state); /*!< in: state */ -/*********************************************************************//** -Sets the state of a block. */ -UNIV_INLINE -void -buf_block_set_state( -/*================*/ - buf_block_t* block, /*!< in/out: pointer to control block */ - enum buf_page_state state); /*!< in: state */ -/*********************************************************************//** -Determines if a block is mapped to a tablespace. -@return TRUE if mapped */ -UNIV_INLINE -ibool -buf_page_in_file( -/*=============*/ - const buf_page_t* bpage) /*!< in: pointer to control block */ - MY_ATTRIBUTE((pure)); -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Determines if a block should be on unzip_LRU list. -@return TRUE if block belongs to unzip_LRU */ -UNIV_INLINE -ibool -buf_page_belongs_to_unzip_LRU( -/*==========================*/ - const buf_page_t* bpage) /*!< in: pointer to control block */ - MY_ATTRIBUTE((pure)); - -/*********************************************************************//** -Gets the mutex of a block. -@return pointer to mutex protecting bpage */ -UNIV_INLINE -ib_mutex_t* -buf_page_get_mutex( -/*===============*/ - const buf_page_t* bpage) /*!< in: pointer to control block */ - MY_ATTRIBUTE((pure)); - -/*********************************************************************//** -Get the flush type of a page. -@return flush type */ -UNIV_INLINE -buf_flush_t -buf_page_get_flush_type( -/*====================*/ - const buf_page_t* bpage) /*!< in: buffer page */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Set the flush type of a page. */ -UNIV_INLINE -void -buf_page_set_flush_type( -/*====================*/ - buf_page_t* bpage, /*!< in: buffer page */ - buf_flush_t flush_type); /*!< in: flush type */ -/*********************************************************************//** -Map a block to a file page. */ -UNIV_INLINE -void -buf_block_set_file_page( -/*====================*/ - buf_block_t* block, /*!< in/out: pointer to control block */ - ulint space, /*!< in: tablespace id */ - ulint page_no);/*!< in: page number */ -/*********************************************************************//** -Gets the io_fix state of a block. -@return io_fix state */ -UNIV_INLINE -enum buf_io_fix -buf_page_get_io_fix( -/*================*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Gets the io_fix state of a block. -@return io_fix state */ -UNIV_INLINE -enum buf_io_fix -buf_block_get_io_fix( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Gets the io_fix state of a block. Does not assert that the -buf_page_get_mutex() mutex is held, to be used in the cases where it is safe -not to hold it. -@return io_fix state */ -UNIV_INLINE -enum buf_io_fix -buf_page_get_io_fix_unlocked( -/*=========================*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Sets the io_fix state of a block. */ -UNIV_INLINE -void -buf_page_set_io_fix( -/*================*/ - buf_page_t* bpage, /*!< in/out: control block */ - enum buf_io_fix io_fix);/*!< in: io_fix state */ -/*********************************************************************//** -Sets the io_fix state of a block. */ -UNIV_INLINE -void -buf_block_set_io_fix( -/*=================*/ - buf_block_t* block, /*!< in/out: control block */ - enum buf_io_fix io_fix);/*!< in: io_fix state */ -/*********************************************************************//** -Makes a block sticky. A sticky block implies that even after we release -the buf_pool->LRU_list_mutex and the block->mutex: -* it cannot be removed from the flush_list -* the block descriptor cannot be relocated -* it cannot be removed from the LRU list -Note that: -* the block can still change its position in the LRU list -* the next and previous pointers can change. */ -UNIV_INLINE -void -buf_page_set_sticky( -/*================*/ - buf_page_t* bpage); /*!< in/out: control block */ -/*********************************************************************//** -Removes stickiness of a block. */ -UNIV_INLINE -void -buf_page_unset_sticky( -/*==================*/ - buf_page_t* bpage); /*!< in/out: control block */ -/********************************************************************//** -Determine if a buffer block can be relocated in memory. The block -can be dirty, but it must not be I/O-fixed or bufferfixed. */ -UNIV_INLINE -ibool -buf_page_can_relocate( -/*==================*/ - const buf_page_t* bpage) /*!< control block being relocated */ - MY_ATTRIBUTE((pure)); - -/*********************************************************************//** -Determine if a block has been flagged old. -@return TRUE if old */ -UNIV_INLINE -ibool -buf_page_is_old( -/*============*/ - const buf_page_t* bpage) /*!< in: control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Flag a block old. */ -UNIV_INLINE -void -buf_page_set_old( -/*=============*/ - buf_page_t* bpage, /*!< in/out: control block */ - ibool old); /*!< in: old */ -/*********************************************************************//** -Determine the time of first access of a block in the buffer pool. -@return ut_time_ms() at the time of first access, 0 if not accessed */ -UNIV_INLINE -unsigned -buf_page_is_accessed( -/*=================*/ - const buf_page_t* bpage) /*!< in: control block */ - MY_ATTRIBUTE((nonnull, pure)); -/*********************************************************************//** -Flag a block accessed. */ -UNIV_INLINE -void -buf_page_set_accessed( -/*==================*/ - buf_page_t* bpage) /*!< in/out: control block */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Gets the buf_block_t handle of a buffered file block if an uncompressed -page frame exists, or NULL. Note: even though bpage is not declared a -const we don't update its value. It is safe to make this pure. -@return control block, or NULL */ -UNIV_INLINE -buf_block_t* -buf_page_get_block( -/*===============*/ - buf_page_t* bpage) /*!< in: control block, or NULL */ - MY_ATTRIBUTE((pure)); -#endif /* !UNIV_HOTBACKUP */ -#ifdef UNIV_DEBUG -/*********************************************************************//** -Gets a pointer to the memory frame of a block. -@return pointer to the frame */ -UNIV_INLINE -buf_frame_t* -buf_block_get_frame( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); - -/*********************************************************************//** -Gets a pointer to the memory frame of a block, where block is known not to be -NULL. -@return pointer to the frame */ -UNIV_INLINE -buf_frame_t* -buf_nonnull_block_get_frame( - const buf_block_t* block) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); - -#else /* UNIV_DEBUG */ -# define buf_block_get_frame(block) (block ? (block)->frame : 0) -# define buf_nonnull_block_get_frame(block) ((block)->frame) -#endif /* UNIV_DEBUG */ -/*********************************************************************//** -Gets the space id of a block. -@return space id */ -UNIV_INLINE -ulint -buf_page_get_space( -/*===============*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Gets the space id of a block. -@return space id */ -UNIV_INLINE -ulint -buf_block_get_space( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Gets the page number of a block. -@return page number */ -UNIV_INLINE -ulint -buf_page_get_page_no( -/*=================*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Gets the page number of a block. -@return page number */ -UNIV_INLINE -ulint -buf_block_get_page_no( -/*==================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Gets the compressed page size of a block. -@return compressed page size, or 0 */ -UNIV_INLINE -ulint -buf_page_get_zip_size( -/*==================*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Gets the compressed page size of a block. -@return compressed page size, or 0 */ -UNIV_INLINE -ulint -buf_block_get_zip_size( -/*===================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ - MY_ATTRIBUTE((pure)); -/*********************************************************************//** -Gets the compressed page descriptor corresponding to an uncompressed page -if applicable. */ -#define buf_block_get_page_zip(block) \ - ((block)->page.zip.data ? &(block)->page.zip : NULL) -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Gets the block to whose frame the pointer is pointing to. -@return pointer to block, never NULL */ -UNIV_INTERN -buf_block_t* -buf_block_align( -/*============*/ - const byte* ptr); /*!< in: pointer to a frame */ -/********************************************************************//** -Find out if a pointer belongs to a buf_block_t. It can be a pointer to -the buf_block_t itself or a member of it -@return TRUE if ptr belongs to a buf_block_t struct */ -UNIV_INTERN -ibool -buf_pointer_is_block_field( -/*=======================*/ - const void* ptr); /*!< in: pointer not - dereferenced */ -/** Find out if a pointer corresponds to a buf_block_t::mutex. -@param m in: mutex candidate -@return TRUE if m is a buf_block_t::mutex */ -#define buf_pool_is_block_mutex(m) \ - buf_pointer_is_block_field((const void*)(m)) -/** Find out if a pointer corresponds to a buf_block_t::lock. -@param l in: rw-lock candidate -@return TRUE if l is a buf_block_t::lock */ -#define buf_pool_is_block_lock(l) \ - buf_pointer_is_block_field((const void*)(l)) - -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG -/*********************************************************************//** -Gets the compressed page descriptor corresponding to an uncompressed page -if applicable. -@return compressed page descriptor, or NULL */ -UNIV_INLINE -const page_zip_des_t* -buf_frame_get_page_zip( -/*===================*/ - const byte* ptr); /*!< in: pointer to the page */ -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ -/********************************************************************//** -Function which inits a page for read to the buffer buf_pool. If the page is -(1) already in buf_pool, or -(2) if we specify to read only ibuf pages and the page is not an ibuf page, or -(3) if the space is deleted or being deleted, -then this function does nothing. -Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock -on the buffer frame. The io-handler must take care that the flag is cleared -and the lock released later. -@return pointer to the block or NULL */ -UNIV_INTERN -buf_page_t* -buf_page_init_for_read( -/*===================*/ - dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */ - ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size, or 0 */ - ibool unzip, /*!< in: TRUE=request uncompressed page */ - ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong - version of the tablespace in case we have done - DISCARD + IMPORT */ - ulint offset);/*!< in: page number */ -/** Complete a read or write request of a file page to or from the buffer pool. -@param[in,out] bpage Page to complete -@return whether the operation succeeded -@retval DB_SUCCESS always when writing, or if a read page was OK -@retval DB_PAGE_CORRUPTED if the checksum fails on a page read -@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but - after decryption normal page checksum does - not match */ -UNIV_INTERN -dberr_t -buf_page_io_complete(buf_page_t* bpage) - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Calculates a folded value of a file page address to use in the page hash -table. -@return the folded value */ -UNIV_INLINE -ulint -buf_page_address_fold( -/*==================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: offset of the page within space */ - MY_ATTRIBUTE((const)); -/********************************************************************//** -Calculates the index of a buffer pool to the buf_pool[] array. -@return the position of the buffer pool in buf_pool[] */ -UNIV_INLINE -ulint -buf_pool_index( -/*===========*/ - const buf_pool_t* buf_pool) /*!< in: buffer pool */ - MY_ATTRIBUTE((nonnull, const)); -/******************************************************************//** -Returns the buffer pool instance given a page instance -@return buf_pool */ -UNIV_INLINE -buf_pool_t* -buf_pool_from_bpage( -/*================*/ - const buf_page_t* bpage); /*!< in: buffer pool page */ -/******************************************************************//** -Returns the buffer pool instance given a block instance -@return buf_pool */ -UNIV_INLINE -buf_pool_t* -buf_pool_from_block( -/*================*/ - const buf_block_t* block); /*!< in: block */ -/******************************************************************//** -Returns the buffer pool instance given space and offset of page -@return buffer pool */ -UNIV_INLINE -buf_pool_t* -buf_pool_get( -/*==========*/ - ulint space, /*!< in: space id */ - ulint offset);/*!< in: offset of the page within space */ -/******************************************************************//** -Returns the buffer pool instance given its array index -@return buffer pool */ -UNIV_INLINE -buf_pool_t* -buf_pool_from_array( -/*================*/ - ulint index); /*!< in: array index to get - buffer pool instance from */ -/******************************************************************//** -Returns the control block of a file page, NULL if not found. -@return block, NULL if not found */ -UNIV_INLINE -buf_page_t* -buf_page_hash_get_low( -/*==================*/ - buf_pool_t* buf_pool,/*!< buffer pool instance */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page within space */ - ulint fold); /*!< in: buf_page_address_fold(space, offset) */ -/******************************************************************//** -Returns the control block of a file page, NULL if not found. -If the block is found and lock is not NULL then the appropriate -page_hash lock is acquired in the specified lock mode. Otherwise, -mode value is ignored. It is up to the caller to release the -lock. If the block is found and the lock is NULL then the page_hash -lock is released by this function. -@return block, NULL if not found, or watch sentinel (if watch is true) */ -UNIV_INLINE -buf_page_t* -buf_page_hash_get_locked( -/*=====================*/ - /*!< out: pointer to the bpage, - or NULL; if NULL, hash_lock - is also NULL. */ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page number */ - prio_rw_lock_t** lock, /*!< in/out: lock of the page - hash acquired if bpage is - found. NULL otherwise. If NULL - is passed then the hash_lock - is released by this function */ - ulint lock_mode, /*!< in: RW_LOCK_EX or - RW_LOCK_SHARED. Ignored if - lock == NULL */ - bool watch = false); /*!< in: if true, return watch - sentinel also. */ -/******************************************************************//** -Returns the control block of a file page, NULL if not found. -If the block is found and lock is not NULL then the appropriate -page_hash lock is acquired in the specified lock mode. Otherwise, -mode value is ignored. It is up to the caller to release the -lock. If the block is found and the lock is NULL then the page_hash -lock is released by this function. -@return block, NULL if not found */ -UNIV_INLINE -buf_block_t* -buf_block_hash_get_locked( -/*=====================*/ - /*!< out: pointer to the bpage, - or NULL; if NULL, hash_lock - is also NULL. */ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page number */ - prio_rw_lock_t** lock, /*!< in/out: lock of the page - hash acquired if bpage is - found. NULL otherwise. If NULL - is passed then the hash_lock - is released by this function */ - ulint lock_mode); /*!< in: RW_LOCK_EX or - RW_LOCK_SHARED. Ignored if - lock == NULL */ -/* There are four different ways we can try to get a bpage or block -from the page hash: -1) Caller already holds the appropriate page hash lock: in the case call -buf_page_hash_get_low() function. -2) Caller wants to hold page hash lock in x-mode -3) Caller wants to hold page hash lock in s-mode -4) Caller doesn't want to hold page hash lock */ -#define buf_page_hash_get_s_locked(b, s, o, l) \ - buf_page_hash_get_locked(b, s, o, l, RW_LOCK_SHARED) -#define buf_page_hash_get_x_locked(b, s, o, l) \ - buf_page_hash_get_locked(b, s, o, l, RW_LOCK_EX) -#define buf_page_hash_get(b, s, o) \ - buf_page_hash_get_locked(b, s, o, NULL, 0) -#define buf_page_get_also_watch(b, s, o) \ - buf_page_hash_get_locked(b, s, o, NULL, 0, true) - -#define buf_block_hash_get_s_locked(b, s, o, l) \ - buf_block_hash_get_locked(b, s, o, l, RW_LOCK_SHARED) -#define buf_block_hash_get_x_locked(b, s, o, l) \ - buf_block_hash_get_locked(b, s, o, l, RW_LOCK_EX) -#define buf_block_hash_get(b, s, o) \ - buf_block_hash_get_locked(b, s, o, NULL, 0) - -/*********************************************************************//** -Gets the current length of the free list of buffer blocks. -@return length of the free list */ -UNIV_INTERN -ulint -buf_get_free_list_len(void); -/*=======================*/ - -/********************************************************************//** -Determine if a block is a sentinel for a buffer pool watch. -@return TRUE if a sentinel for a buffer pool watch, FALSE if not */ -UNIV_INTERN -ibool -buf_pool_watch_is_sentinel( -/*=======================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - const buf_page_t* bpage) /*!< in: block */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/****************************************************************//** -Add watch for the given page to be read in. Caller must have -appropriate hash_lock for the bpage and hold the LRU list mutex to avoid a race -condition with buf_LRU_free_page inserting the same page into the page hash. -This function may release the hash_lock and reacquire it. -@return NULL if watch set, block if the page is in the buffer pool */ -UNIV_INTERN -buf_page_t* -buf_pool_watch_set( -/*===============*/ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page number */ - ulint fold) /*!< in: buf_page_address_fold(space, offset) */ - MY_ATTRIBUTE((warn_unused_result)); -/****************************************************************//** -Stop watching if the page has been read in. -buf_pool_watch_set(space,offset) must have returned NULL before. */ -UNIV_INTERN -void -buf_pool_watch_unset( -/*=================*/ - ulint space, /*!< in: space id */ - ulint offset);/*!< in: page number */ -/****************************************************************//** -Check if the page has been read in. -This may only be called after buf_pool_watch_set(space,offset) -has returned NULL and before invoking buf_pool_watch_unset(space,offset). -@return FALSE if the given page was not read in, TRUE if it was */ -UNIV_INTERN -ibool -buf_pool_watch_occurred( -/*====================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Get total buffer pool statistics. */ -UNIV_INTERN -void -buf_get_total_list_len( -/*===================*/ - ulint* LRU_len, /*!< out: length of all LRU lists */ - ulint* free_len, /*!< out: length of all free lists */ - ulint* flush_list_len);/*!< out: length of all flush lists */ -/********************************************************************//** -Get total list size in bytes from all buffer pools. */ -UNIV_INTERN -void -buf_get_total_list_size_in_bytes( -/*=============================*/ - buf_pools_list_size_t* buf_pools_list_size); /*!< out: list sizes - in all buffer pools */ -/********************************************************************//** -Get total buffer pool statistics. */ -UNIV_INTERN -void -buf_get_total_stat( -/*===============*/ - buf_pool_stat_t*tot_stat); /*!< out: buffer pool stats */ -/*********************************************************************//** -Get the nth chunk's buffer block in the specified buffer pool. -@return the nth chunk's buffer block. */ -UNIV_INLINE -buf_block_t* -buf_get_nth_chunk_block( -/*====================*/ - const buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint n, /*!< in: nth chunk in the buffer pool */ - ulint* chunk_size); /*!< in: chunk size */ - -/********************************************************************//** -Calculate the checksum of a page from compressed table and update the page. */ -UNIV_INTERN -void -buf_flush_update_zip_checksum( -/*==========================*/ - buf_frame_t* page, /*!< in/out: Page to update */ - ulint zip_size, /*!< in: Compressed page size */ - lsn_t lsn); /*!< in: Lsn to stamp on the page */ - -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -/********************************************************************//** -Checks if buf_pool->zip_mutex is owned and is serving for a given page as its -block mutex. -@return true if buf_pool->zip_mutex is owned. */ -UNIV_INLINE -bool -buf_own_zip_mutex_for_page( -/*=======================*/ - const buf_page_t* bpage) - MY_ATTRIBUTE((nonnull,warn_unused_result)); -#endif /* UNIV_DEBUG */ - -/** Encryption and page_compression hook that is called just before -a page is written to disk. -@param[in,out] space tablespace -@param[in,out] bpage buffer page -@param[in] src_frame physical page frame that is being encrypted -@return page frame to be written to file -(may be src_frame or an encrypted/compressed copy of it) */ -UNIV_INTERN -byte* -buf_page_encrypt_before_write( - fil_space_t* space, - buf_page_t* bpage, - byte* src_frame); - -/********************************************************************** -The hook that is called after page is written to disk. -The function releases any resources needed for encryption that was allocated -in buf_page_encrypt_before_write */ -UNIV_INTERN -ibool -buf_page_encrypt_after_write( -/*=========================*/ - buf_page_t* page); /*!< in/out: buffer page that was flushed */ - -/********************************************************************//** -The hook that is called just before a page is read from disk. -The function allocates memory that is used to temporarily store disk content -before getting decrypted */ -UNIV_INTERN -byte* -buf_page_decrypt_before_read( -/*=========================*/ - buf_page_t* page, /*!< in/out: buffer page read from disk */ - ulint zip_size); /*!< in: compressed page size, or 0 */ - -/********************************************************************//** -The hook that is called just after a page is read from disk. -The function decrypt disk content into buf_page_t and releases the -temporary buffer that was allocated in buf_page_decrypt_before_read */ -UNIV_INTERN -bool -buf_page_decrypt_after_read( -/*========================*/ - buf_page_t* page); /*!< in/out: buffer page read from disk */ - -/** @brief The temporary memory structure. - -NOTE! The definition appears here only for other modules of this -directory (buf) to see it. Do not use from outside! */ - -typedef struct { - bool reserved; /*!< true if this slot is reserved - */ - byte* crypt_buf; /*!< for encryption the data needs to be - copied to a separate buffer before it's - encrypted&written. this as a page can be - read while it's being flushed */ - byte* comp_buf; /*!< for compression we need - temporal buffer because page - can be read while it's being flushed */ - byte* out_buf; /*!< resulting buffer after - encryption/compression. This is a - pointer and not allocated. */ -} buf_tmp_buffer_t; - -/** The common buffer control block structure -for compressed and uncompressed frames */ - -/** Number of bits used for buffer page states. */ -#define BUF_PAGE_STATE_BITS 3 - -struct buf_page_t{ - /** @name General fields - None of these bit-fields must be modified without holding - buf_page_get_mutex() [buf_block_t::mutex or - buf_pool->zip_mutex], since they can be stored in the same - machine word. */ - /* @{ */ - - ib_uint32_t space; /*!< tablespace id. */ - ib_uint32_t offset; /*!< page number. */ - /** count of how manyfold this block is currently bufferfixed */ -#ifdef PAGE_ATOMIC_REF_COUNT - ib_uint32_t buf_fix_count; - - /** type of pending I/O operation; Transitions from BUF_IO_NONE to - BUF_IO_WRITE and back are protected by the buf_page_get_mutex() mutex - and the corresponding flush state mutex. The flush state mutex - protection for io_fix and flush_type is not strictly required, but it - ensures consistent buffer pool instance state snapshots in - buf_pool_validate_instance(). @see enum buf_io_fix */ - byte io_fix; - - byte state; -#else - unsigned buf_fix_count:19; - - /** type of pending I/O operation; also protected by - buf_pool->mutex for writes only @see enum buf_io_fix */ - unsigned io_fix:2; - - /*!< state of the control block. - State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY - need not be protected by buf_page_get_mutex(). @see enum buf_page_state. - State changes that are relevant to page_hash are additionally protected - by the appropriate page_hash mutex i.e.: if a page is in page_hash or - is being added to/removed from page_hash then the corresponding changes - must also be protected by page_hash mutex. */ - unsigned state:BUF_PAGE_STATE_BITS; - -#endif /* PAGE_ATOMIC_REF_COUNT */ - -#ifndef UNIV_HOTBACKUP - unsigned flush_type:2; /*!< if this block is currently being - flushed to disk, this tells the - flush_type. Writes during flushing - protected by buf_page_get_mutex_enter() - mutex and the corresponding flush state - mutex. - @see buf_flush_t */ - unsigned buf_pool_index:6;/*!< index number of the buffer pool - that this block belongs to */ -# if MAX_BUFFER_POOLS > 64 -# error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6" -# endif - /* @} */ -#endif /* !UNIV_HOTBACKUP */ - page_zip_des_t zip; /*!< compressed page; zip.data - (but not the data it points to) is - protected by buf_pool->zip_mutex; - state == BUF_BLOCK_ZIP_PAGE and - zip.data == NULL means an active - buf_pool->watch */ - - ulint write_size; /* Write size is set when this - page is first time written and then - if written again we check is TRIM - operation needed. */ - - bool encrypted; /*!< page is still encrypted */ - - ulint real_size; /*!< Real size of the page - Normal pages == UNIV_PAGE_SIZE - page compressed pages, payload - size alligned to sector boundary. - */ - - buf_tmp_buffer_t* slot; /*!< Slot for temporary memory - used for encryption/compression - or NULL */ -#ifndef UNIV_HOTBACKUP - buf_page_t* hash; /*!< node used in chaining to - buf_pool->page_hash or - buf_pool->zip_hash */ -#ifdef UNIV_DEBUG - ibool in_page_hash; /*!< TRUE if in buf_pool->page_hash */ - ibool in_zip_hash; /*!< TRUE if in buf_pool->zip_hash */ -#endif /* UNIV_DEBUG */ - - /** @name Page flushing fields */ - /* @{ */ - - UT_LIST_NODE_T(buf_page_t) list; - /*!< based on state, this is a - list node, protected either by - a corresponding list mutex, - in one of the following lists in - buf_pool: - - - BUF_BLOCK_NOT_USED: free - - BUF_BLOCK_FILE_PAGE: flush_list - - BUF_BLOCK_ZIP_DIRTY: flush_list - - BUF_BLOCK_ZIP_PAGE: zip_clean - - If bpage is part of flush_list - then the node pointers are - covered by buf_pool->flush_list_mutex. - Otherwise these pointers are - protected by a corresponding list - mutex. - - The contents of the list node - is undefined if !in_flush_list - && state == BUF_BLOCK_FILE_PAGE, - or if state is one of - BUF_BLOCK_MEMORY, - BUF_BLOCK_REMOVE_HASH or - BUF_BLOCK_READY_IN_USE. */ - -#ifdef UNIV_DEBUG - ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list; - when buf_pool->flush_list_mutex is - free, the following should hold: - in_flush_list - == (state == BUF_BLOCK_FILE_PAGE - || state == BUF_BLOCK_ZIP_DIRTY) - Writes to this field must be - covered by both block->mutex - and buf_pool->flush_list_mutex. Hence - reads can happen while holding - any one of the two mutexes */ - ibool in_free_list; /*!< TRUE if in buf_pool->free; when - buf_pool->free_list_mutex is free, the - following should hold: in_free_list - == (state == BUF_BLOCK_NOT_USED) */ -#endif /* UNIV_DEBUG */ - lsn_t newest_modification; - /*!< log sequence number of - the youngest modification to - this block, zero if not - modified. Protected by block - mutex */ - lsn_t oldest_modification; - /*!< log sequence number of - the START of the log entry - written of the oldest - modification to this block - which has not yet been flushed - on disk; zero if all - modifications are on disk. - Writes to this field must be - covered by both block->mutex - and buf_pool->flush_list_mutex. Hence - reads can happen while holding - any one of the two mutexes */ - /* @} */ - /** @name LRU replacement algorithm fields */ - /* @{ */ - - UT_LIST_NODE_T(buf_page_t) LRU; - /*!< node of the LRU list */ -#ifdef UNIV_DEBUG - ibool in_LRU_list; /*!< TRUE if the page is in - the LRU list; used in - debugging */ -#endif /* UNIV_DEBUG */ - unsigned old:1; /*!< TRUE if the block is in the old - blocks in buf_pool->LRU_old. Protected - by the LRU list mutex. May be read for - heuristics purposes under the block - mutex instead. */ - unsigned freed_page_clock:31;/*!< the value of - buf_pool->freed_page_clock - when this block was the last - time put to the head of the - LRU list; a thread is allowed - to read this for heuristic - purposes without holding any - mutex or latch */ - /* @} */ - unsigned access_time; /*!< time of first access, or - 0 if the block was never accessed - in the buffer pool. Protected by - block mutex */ - ibool is_corrupt; -# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - ibool file_page_was_freed; - /*!< this is set to TRUE when - fsp frees a page in buffer pool; - protected by buf_pool->zip_mutex - or buf_block_t::mutex. */ -# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ -}; - -/** The buffer control block structure */ - -struct buf_block_t{ - - /** @name General fields */ - /* @{ */ - - buf_page_t page; /*!< page information; this must - be the first field, so that - buf_pool->page_hash can point - to buf_page_t or buf_block_t */ - byte* frame; /*!< pointer to buffer frame which - is of size UNIV_PAGE_SIZE, and - aligned to an address divisible by - UNIV_PAGE_SIZE */ -#ifndef UNIV_HOTBACKUP - UT_LIST_NODE_T(buf_block_t) unzip_LRU; - /*!< node of the decompressed LRU list; - a block is in the unzip_LRU list - if page.state == BUF_BLOCK_FILE_PAGE - and page.zip.data != NULL */ -#ifdef UNIV_DEBUG - ibool in_unzip_LRU_list;/*!< TRUE if the page is in the - decompressed LRU list; - used in debugging */ -#endif /* UNIV_DEBUG */ - ib_mutex_t mutex; /*!< mutex protecting this block: - state, io_fix, buf_fix_count, - and accessed; we introduce this new - mutex in InnoDB-5.1 to relieve - contention on the buffer pool mutex */ - rw_lock_t lock; /*!< read-write lock of the buffer - frame */ - unsigned lock_hash_val:32;/*!< hashed value of the page address - in the record lock hash table; - protected by buf_block_t::lock - (or buf_block_t::mutex in - buf_page_get_gen(), - buf_page_init_for_read() - and buf_page_create()) */ - ibool check_index_page_at_flush; - /*!< TRUE if we know that this is - an index page, and want the database - to check its consistency before flush; - note that there may be pages in the - buffer pool which are index pages, - but this flag is not set because - we do not keep track of all pages; - NOT protected by any mutex */ - /* @} */ - /** @name Optimistic search field */ - /* @{ */ - - ib_uint64_t modify_clock; /*!< this clock is incremented every - time a pointer to a record on the - page may become obsolete; this is - used in the optimistic cursor - positioning: if the modify clock has - not changed, we know that the pointer - is still valid; this field may be - changed if the thread (1) owns the LRU - list mutex and the page is not - bufferfixed, or (2) the thread has an - x-latch on the block */ - /* @} */ - /** @name Hash search fields (unprotected) - NOTE that these fields are NOT protected by any semaphore! */ - /* @{ */ - - ulint n_hash_helps; /*!< counter which controls building - of a new hash index for the page */ - ulint n_fields; /*!< recommended prefix length for hash - search: number of full fields */ - ulint n_bytes; /*!< recommended prefix: number of bytes - in an incomplete field */ - ibool left_side; /*!< TRUE or FALSE, depending on - whether the leftmost record of several - records with the same prefix should be - indexed in the hash index */ - /* @} */ - - /** @name Hash search fields - These 5 fields may only be modified when we have - an x-latch on btr_search_latch AND - - we are holding an s-latch or x-latch on buf_block_t::lock or - - we know that buf_block_t::buf_fix_count == 0. - - An exception to this is when we init or create a page - in the buffer pool in buf0buf.cc. - - Another exception is that assigning block->index = NULL - is allowed whenever holding an x-latch on btr_search_latch. */ - - /* @{ */ - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ulint n_pointers; /*!< used in debugging: the number of - pointers in the adaptive hash index - pointing to this frame */ -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - unsigned curr_n_fields:10;/*!< prefix length for hash indexing: - number of full fields */ - unsigned curr_n_bytes:15;/*!< number of bytes in hash - indexing */ - unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */ - dict_index_t* index; /*!< Index for which the - adaptive hash index has been - created, or NULL if the page - does not exist in the - index. Note that it does not - guarantee that the index is - complete, though: there may - have been hash collisions, - record deletions, etc. */ - /* @} */ -# ifdef UNIV_SYNC_DEBUG - /** @name Debug fields */ - /* @{ */ - rw_lock_t debug_latch; /*!< in the debug version, each thread - which bufferfixes the block acquires - an s-latch here; so we can use the - debug utilities in sync0rw */ - /* @} */ -# endif -#endif /* !UNIV_HOTBACKUP */ -}; - -/** Check if a buf_block_t object is in a valid state -@param block buffer block -@return TRUE if valid */ -#define buf_block_state_valid(block) \ -(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED \ - && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH)) - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Compute the hash fold value for blocks in buf_pool->zip_hash. */ -/* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE) -#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) -#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) -/* @} */ - -/** Struct that is embedded in the free zip blocks */ -struct buf_buddy_free_t { - union { - ulint size; /*!< size of the block */ - byte bytes[FIL_PAGE_DATA]; - /*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID] - == BUF_BUDDY_FREE_STAMP denotes a free - block. If the space_id field of buddy - block != BUF_BUDDY_FREE_STAMP, the block - is not in any zip_free list. If the - space_id is BUF_BUDDY_FREE_STAMP then - stamp[0] will contain the - buddy block size. */ - } stamp; - - buf_page_t bpage; /*!< Embedded bpage descriptor */ - UT_LIST_NODE_T(buf_buddy_free_t) list; - /*!< Node of zip_free list */ -}; - -/** @brief The buffer pool statistics structure. */ -struct buf_pool_stat_t{ - ulint n_page_gets; /*!< number of page gets performed; - also successful searches through - the adaptive hash index are - counted as page gets. */ - ulint n_pages_read; /*!< number read operations. Accessed - atomically. */ - ulint n_pages_written;/*!< number write operations. Accessed - atomically.*/ - ulint n_pages_created;/*!< number of pages created - in the pool with no read */ - ulint n_ra_pages_read_rnd;/*!< number of pages read in - as part of random read ahead */ - ulint n_ra_pages_read;/*!< number of pages read in - as part of read ahead */ - ulint n_ra_pages_evicted;/*!< number of read ahead - pages that are evicted without - being accessed */ - ulint n_pages_made_young; /*!< number of pages made young, in - calls to buf_LRU_make_block_young() */ - ulint n_pages_not_made_young; /*!< number of pages not made - young because the first access - was not long enough ago, in - buf_page_peek_if_too_old() */ - ulint LRU_bytes; /*!< LRU size in bytes */ - ulint flush_list_bytes;/*!< flush_list size in bytes */ - ulint buf_lru_flush_page_count; -}; - -/** Statistics of buddy blocks of a given size. */ -struct buf_buddy_stat_t { - /** Number of blocks allocated from the buddy system. */ - ulint used; - /** Number of blocks relocated by the buddy system. */ - ib_uint64_t relocated; - /** Total duration of block relocations, in microseconds. */ - ib_uint64_t relocated_usec; -}; - -/** @brief The temporary memory array structure. - -NOTE! The definition appears here only for other modules of this -directory (buf) to see it. Do not use from outside! */ - -typedef struct { - ulint n_slots; /*!< Total number of slots */ - buf_tmp_buffer_t *slots; /*!< Pointer to the slots in the - array */ -} buf_tmp_array_t; - -/** @brief The buffer pool structure. - -NOTE! The definition appears here only for other modules of this -directory (buf) to see it. Do not use from outside! */ - -struct buf_pool_t{ - - /** @name General fields */ - /* @{ */ - ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer - pool instance, protects compressed - only pages (of type buf_page_t, not - buf_block_t */ - ib_prio_mutex_t LRU_list_mutex; - ib_prio_mutex_t free_list_mutex; - ib_mutex_t zip_free_mutex; - ib_mutex_t zip_hash_mutex; - ib_mutex_t flush_state_mutex; /*!< Flush state protection - mutex */ - ulint instance_no; /*!< Array index of this buffer - pool instance */ - ulint old_pool_size; /*!< Old pool size in bytes */ - ulint curr_pool_size; /*!< Current pool size in bytes */ - ulint LRU_old_ratio; /*!< Reserve this much of the buffer - pool for "old" blocks */ -#ifdef UNIV_DEBUG - ulint buddy_n_frames; /*!< Number of frames allocated from - the buffer pool to the buddy system */ -#endif - ulint n_chunks; /*!< number of buffer pool chunks */ - buf_chunk_t* chunks; /*!< buffer pool chunks */ - ulint curr_size; /*!< current pool size in pages */ - ulint read_ahead_area;/*!< size in pages of the area which - the read-ahead algorithms read if - invoked */ - hash_table_t* page_hash; /*!< hash table of buf_page_t or - buf_block_t file pages, - buf_page_in_file() == TRUE, - indexed by (space_id, offset). - page_hash is protected by an - array of mutexes. */ - hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks - whose frames are allocated to the - zip buddy system, - indexed by block->frame */ - ulint n_pend_reads; /*!< number of pending read - operations. Accessed atomically */ - ulint n_pend_unzip; /*!< number of pending decompressions. - Accesssed atomically */ - - time_t last_printout_time; - /*!< when buf_print_io was last time - called. Accesses not protected */ - buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1]; - /*!< Statistics of buddy system, - indexed by block size. Protected by - zip_free_mutex. */ - buf_pool_stat_t stat; /*!< current statistics */ - buf_pool_stat_t old_stat; /*!< old statistics */ - - /* @} */ - - /** @name Page flushing algorithm fields */ - - /* @{ */ - - ib_mutex_t flush_list_mutex;/*!< mutex protecting the - flush list access. This mutex - protects flush_list, flush_rbt - and bpage::list pointers when - the bpage is on flush_list. It - also protects writes to - bpage::oldest_modification and - flush_list_hp */ - const buf_page_t* flush_list_hp;/*!< "hazard pointer" - used during scan of flush_list - while doing flush list batch. - Protected by flush_list_mutex */ - UT_LIST_BASE_NODE_T(buf_page_t) flush_list; - /*!< base node of the modified block - list */ - ibool init_flush[BUF_FLUSH_N_TYPES]; - /*!< this is TRUE when a flush of the - given type is being initialized. - Protected by flush_state_mutex. */ - ulint n_flush[BUF_FLUSH_N_TYPES]; - /*!< this is the number of pending - writes in the given flush type. - Protected by flush_state_mutex. */ - os_event_t no_flush[BUF_FLUSH_N_TYPES]; - /*!< this is in the set state - when there is no flush batch - of the given type running; - os_event_set() and os_event_reset() - are protected by - buf_pool_t::flush_state_mutex */ - ib_rbt_t* flush_rbt; /*!< a red-black tree is used - exclusively during recovery to - speed up insertions in the - flush_list. This tree contains - blocks in order of - oldest_modification LSN and is - kept in sync with the - flush_list. - Each member of the tree MUST - also be on the flush_list. - This tree is relevant only in - recovery and is set to NULL - once the recovery is over. - Protected by flush_list_mutex */ - ulint freed_page_clock;/*!< a sequence number used - to count the number of buffer - blocks removed from the end of - the LRU list; NOTE that this - counter may wrap around at 4 - billion! A thread is allowed - to read this for heuristic - purposes without holding any - mutex or latch. For non-heuristic - purposes protected by LRU_list_mutex */ - ibool try_LRU_scan; /*!< Set to FALSE when an LRU - scan for free block fails. This - flag is used to avoid repeated - scans of LRU list when we know - that there is no free block - available in the scan depth for - eviction. Set to TRUE whenever - we flush a batch from the - buffer pool. Accessed atomically. */ - /* @} */ - - /** @name LRU replacement algorithm fields */ - /* @{ */ - - UT_LIST_BASE_NODE_T(buf_page_t) free; - /*!< base node of the free - block list */ - UT_LIST_BASE_NODE_T(buf_page_t) LRU; - /*!< base node of the LRU list */ - buf_page_t* LRU_old; /*!< pointer to the about - LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV - oldest blocks in the LRU list; - NULL if LRU length less than - BUF_LRU_OLD_MIN_LEN; - NOTE: when LRU_old != NULL, its length - should always equal LRU_old_len */ - ulint LRU_old_len; /*!< length of the LRU list from - the block to which LRU_old points - onward, including that block; - see buf0lru.cc for the restrictions - on this value; 0 if LRU_old == NULL; - NOTE: LRU_old_len must be adjusted - whenever LRU_old shrinks or grows! */ - - UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU; - /*!< base node of the - unzip_LRU list. The list is protected - by LRU list mutex. */ - - /* @} */ - /** @name Buddy allocator fields - The buddy allocator is used for allocating compressed page - frames and buf_page_t descriptors of blocks that exist - in the buffer pool only in compressed form. */ - /* @{ */ -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG - UT_LIST_BASE_NODE_T(buf_page_t) zip_clean; - /*!< unmodified compressed pages */ -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX]; - /*!< buddy free lists */ - - buf_page_t* watch; - /*!< Sentinel records for buffer - pool watches. */ - - buf_tmp_array_t* tmp_arr; - /*!< Array for temporal memory - used in compression and encryption */ - -#if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN -# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN" -#endif - /* @} */ -}; - -/** @name Accessors for buffer pool mutexes -Use these instead of accessing buffer pool mutexes directly. */ -/* @{ */ - -/** Test if flush list mutex is owned. */ -#define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex) - -/** Acquire the flush list mutex. */ -#define buf_flush_list_mutex_enter(b) do { \ - mutex_enter(&b->flush_list_mutex); \ -} while (0) -/** Release the flush list mutex. */ -# define buf_flush_list_mutex_exit(b) do { \ - mutex_exit(&b->flush_list_mutex); \ -} while (0) - -/** Test if block->mutex is owned. */ -#define buf_block_mutex_own(b) mutex_own(&(b)->mutex) - -/** Acquire the block->mutex. */ -#define buf_block_mutex_enter(b) do { \ - mutex_enter(&(b)->mutex); \ -} while (0) - -/** Release the trx->mutex. */ -#define buf_block_mutex_exit(b) do { \ - mutex_exit(&(b)->mutex); \ -} while (0) - - -/** Get appropriate page_hash_lock. */ -# define buf_page_hash_lock_get(b, f) \ - hash_get_lock(b->page_hash, f) - -#ifdef UNIV_SYNC_DEBUG -/** Test if page_hash lock is held in s-mode. */ -# define buf_page_hash_lock_held_s(b, p) \ - rw_lock_own(buf_page_hash_lock_get(b, \ - buf_page_address_fold(p->space, \ - p->offset)), \ - RW_LOCK_SHARED) - -/** Test if page_hash lock is held in x-mode. */ -# define buf_page_hash_lock_held_x(b, p) \ - rw_lock_own(buf_page_hash_lock_get(b, \ - buf_page_address_fold(p->space, \ - p->offset)), \ - RW_LOCK_EX) - -/** Test if page_hash lock is held in x or s-mode. */ -# define buf_page_hash_lock_held_s_or_x(b, p) \ - (buf_page_hash_lock_held_s(b, p) \ - || buf_page_hash_lock_held_x(b, p)) - -# define buf_block_hash_lock_held_s(b, p) \ - buf_page_hash_lock_held_s(b, &(p->page)) - -# define buf_block_hash_lock_held_x(b, p) \ - buf_page_hash_lock_held_x(b, &(p->page)) - -# define buf_block_hash_lock_held_s_or_x(b, p) \ - buf_page_hash_lock_held_s_or_x(b, &(p->page)) -#else /* UNIV_SYNC_DEBUG */ -# define buf_page_hash_lock_held_s(b, p) (TRUE) -# define buf_page_hash_lock_held_x(b, p) (TRUE) -# define buf_page_hash_lock_held_s_or_x(b, p) (TRUE) -# define buf_block_hash_lock_held_s(b, p) (TRUE) -# define buf_block_hash_lock_held_x(b, p) (TRUE) -# define buf_block_hash_lock_held_s_or_x(b, p) (TRUE) -#endif /* UNIV_SYNC_DEBUG */ - -#endif /* !UNIV_HOTBACKUP */ -/* @} */ - -/********************************************************************** -Let us list the consistency conditions for different control block states. - -NOT_USED: is in free list, not in LRU list, not in flush list, nor - page hash table -READY_FOR_USE: is not in free list, LRU list, or flush list, nor page - hash table -MEMORY: is not in free list, LRU list, or flush list, nor page - hash table -FILE_PAGE: space and offset are defined, is in page hash table - if io_fix == BUF_IO_WRITE, - pool: no_flush[flush_type] is in reset state, - pool: n_flush[flush_type] > 0 - - (1) if buf_fix_count == 0, then - is in LRU list, not in free list - is in flush list, - if and only if oldest_modification > 0 - is x-locked, - if and only if io_fix == BUF_IO_READ - is s-locked, - if and only if io_fix == BUF_IO_WRITE - - (2) if buf_fix_count > 0, then - is not in LRU list, not in free list - is in flush list, - if and only if oldest_modification > 0 - if io_fix == BUF_IO_READ, - is x-locked - if io_fix == BUF_IO_WRITE, - is s-locked - -State transitions: - -NOT_USED => READY_FOR_USE -READY_FOR_USE => MEMORY -READY_FOR_USE => FILE_PAGE -MEMORY => NOT_USED -FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if - (1) buf_fix_count == 0, - (2) oldest_modification == 0, and - (3) io_fix == 0. -*/ - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/** Functor to validate the LRU list. */ -struct CheckInLRUList { - void operator()(const buf_page_t* elem) const - { - ut_a(elem->in_LRU_list); - } -}; - -/** Functor to validate the LRU list. */ -struct CheckInFreeList { - void operator()(const buf_page_t* elem) const - { - ut_a(elem->in_free_list); - } -}; - -struct CheckUnzipLRUAndLRUList { - void operator()(const buf_block_t* elem) const - { - ut_a(elem->page.in_LRU_list); - ut_a(elem->in_unzip_LRU_list); - } -}; -#endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */ - -/*********************************************************************//** -Aquire LRU list mutex */ -void -buf_pool_mutex_enter( -/*=================*/ - buf_pool_t* buf_pool); /*!< in: buffer pool */ -/*********************************************************************//** -Exit LRU list mutex */ -void -buf_pool_mutex_exit( -/*================*/ - buf_pool_t* buf_pool); /*!< in: buffer pool */ - -#ifndef UNIV_NONINL -#include "buf0buf.ic" -#endif - -#endif diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic deleted file mode 100644 index 20721b28ef2..00000000000 --- a/storage/xtradb/include/buf0buf.ic +++ /dev/null @@ -1,1561 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2014, 2015, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0buf.ic -The database buffer buf_pool - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "mtr0mtr.h" -#ifndef UNIV_HOTBACKUP -#include "buf0flu.h" -#include "buf0lru.h" -#include "buf0rea.h" - -/** A chunk of buffers. The buffer pool is allocated in chunks. */ -struct buf_chunk_t{ - ulint mem_size; /*!< allocated size of the chunk */ - ulint size; /*!< size of frames[] and blocks[] */ - void* mem; /*!< pointer to the memory area which - was allocated for the frames */ - buf_block_t* blocks; /*!< array of buffer control blocks */ -}; - - -#include "srv0srv.h" - -/*********************************************************************//** -Gets the current size of buffer buf_pool in bytes. -@return size in bytes */ -UNIV_INLINE -ulint -buf_pool_get_curr_size(void) -/*========================*/ -{ - return(srv_buf_pool_curr_size); -} - -/********************************************************************//** -Calculates the index of a buffer pool to the buf_pool[] array. -@return the position of the buffer pool in buf_pool[] */ -UNIV_INLINE -ulint -buf_pool_index( -/*===========*/ - const buf_pool_t* buf_pool) /*!< in: buffer pool */ -{ - ulint i = buf_pool - buf_pool_ptr; - ut_ad(i < MAX_BUFFER_POOLS); - ut_ad(i < srv_buf_pool_instances); - return(i); -} - -/******************************************************************//** -Returns the buffer pool instance given a page instance -@return buf_pool */ -UNIV_INLINE -buf_pool_t* -buf_pool_from_bpage( -/*================*/ - const buf_page_t* bpage) /*!< in: buffer pool page */ -{ - ulint i; - i = bpage->buf_pool_index; - ut_ad(i < srv_buf_pool_instances); - return(&buf_pool_ptr[i]); -} - -/******************************************************************//** -Returns the buffer pool instance given a block instance -@return buf_pool */ -UNIV_INLINE -buf_pool_t* -buf_pool_from_block( -/*================*/ - const buf_block_t* block) /*!< in: block */ -{ - return(buf_pool_from_bpage(&block->page)); -} - -/*********************************************************************//** -Gets the current size of buffer buf_pool in pages. -@return size in pages*/ -UNIV_INLINE -ulint -buf_pool_get_n_pages(void) -/*======================*/ -{ - return(buf_pool_get_curr_size() / UNIV_PAGE_SIZE); -} - -/********************************************************************//** -Reads the freed_page_clock of a buffer block. -@return freed_page_clock */ -UNIV_INLINE -ulint -buf_page_get_freed_page_clock( -/*==========================*/ - const buf_page_t* bpage) /*!< in: block */ -{ - /* This is sometimes read without holding any buffer pool mutex. */ - return(bpage->freed_page_clock); -} - -/********************************************************************//** -Reads the freed_page_clock of a buffer block. -@return freed_page_clock */ -UNIV_INLINE -ulint -buf_block_get_freed_page_clock( -/*===========================*/ - const buf_block_t* block) /*!< in: block */ -{ - return(buf_page_get_freed_page_clock(&block->page)); -} - -/********************************************************************//** -Tells if a block is still close enough to the MRU end of the LRU list -meaning that it is not in danger of getting evicted and also implying -that it has been accessed recently. -Note that this is for heuristics only and does not reserve buffer pool -mutex. -@return TRUE if block is close to MRU end of LRU */ -UNIV_INLINE -ibool -buf_page_peek_if_young( -/*===================*/ - const buf_page_t* bpage) /*!< in: block */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - /* FIXME: bpage->freed_page_clock is 31 bits */ - return((buf_pool->freed_page_clock & ((1UL << 31) - 1)) - < ((ulint) bpage->freed_page_clock - + (buf_pool->curr_size - * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio) - / (BUF_LRU_OLD_RATIO_DIV * 4)))); -} - -/********************************************************************//** -Recommends a move of a block to the start of the LRU list if there is danger -of dropping from the buffer pool. NOTE: does not reserve the buffer pool -mutex. -@return TRUE if should be made younger */ -UNIV_INLINE -ibool -buf_page_peek_if_too_old( -/*=====================*/ - const buf_page_t* bpage) /*!< in: block to make younger */ -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - if (buf_pool->freed_page_clock == 0) { - /* If eviction has not started yet, do not update the - statistics or move blocks in the LRU list. This is - either the warm-up phase or an in-memory workload. */ - return(FALSE); - } else if (buf_LRU_old_threshold_ms && bpage->old) { - unsigned access_time = buf_page_is_accessed(bpage); - - if (access_time > 0 - && ((ib_uint32_t) (ut_time_ms() - access_time)) - >= buf_LRU_old_threshold_ms) { - return(TRUE); - } - - buf_pool->stat.n_pages_not_made_young++; - return(FALSE); - } else { - return(!buf_page_peek_if_young(bpage)); - } -} -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Gets the state of a block. -@return state */ -UNIV_INLINE -enum buf_page_state -buf_page_get_state( -/*===============*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ -{ - enum buf_page_state state = (enum buf_page_state) bpage->state; - -#ifdef UNIV_DEBUG - switch (state) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_FILE_PAGE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - break; - default: - ut_error; - } -#endif /* UNIV_DEBUG */ - - return(state); -} -/*********************************************************************//** -Gets the state of a block. -@return state */ -UNIV_INLINE -enum buf_page_state -buf_block_get_state( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - return(buf_page_get_state(&block->page)); -} - -/*********************************************************************//** -Gets the state name for state of a block -@return name or "CORRUPTED" */ -UNIV_INLINE -const char* -buf_get_state_name( -/*===============*/ - const buf_block_t* block) /*!< in: pointer to the control - block */ -{ - enum buf_page_state state = buf_page_get_state(&block->page); - - switch (state) { - case BUF_BLOCK_POOL_WATCH: - return (const char *) "BUF_BLOCK_POOL_WATCH"; - case BUF_BLOCK_ZIP_PAGE: - return (const char *) "BUF_BLOCK_ZIP_PAGE"; - case BUF_BLOCK_ZIP_DIRTY: - return (const char *) "BUF_BLOCK_ZIP_DIRTY"; - case BUF_BLOCK_NOT_USED: - return (const char *) "BUF_BLOCK_NOT_USED"; - case BUF_BLOCK_READY_FOR_USE: - return (const char *) "BUF_BLOCK_NOT_USED"; - case BUF_BLOCK_FILE_PAGE: - return (const char *) "BUF_BLOCK_FILE_PAGE"; - case BUF_BLOCK_MEMORY: - return (const char *) "BUF_BLOCK_MEMORY"; - case BUF_BLOCK_REMOVE_HASH: - return (const char *) "BUF_BLOCK_REMOVE_HASH"; - default: - return (const char *) "CORRUPTED"; - } -} - -/*********************************************************************//** -Sets the state of a block. */ -UNIV_INLINE -void -buf_page_set_state( -/*===============*/ - buf_page_t* bpage, /*!< in/out: pointer to control block */ - enum buf_page_state state) /*!< in: state */ -{ -#ifdef UNIV_DEBUG - enum buf_page_state old_state = buf_page_get_state(bpage); - - switch (old_state) { - case BUF_BLOCK_POOL_WATCH: - ut_error; - break; - case BUF_BLOCK_ZIP_PAGE: - ut_a(state == BUF_BLOCK_ZIP_DIRTY); - break; - case BUF_BLOCK_ZIP_DIRTY: - ut_a(state == BUF_BLOCK_ZIP_PAGE); - break; - case BUF_BLOCK_NOT_USED: - ut_a(state == BUF_BLOCK_READY_FOR_USE); - break; - case BUF_BLOCK_READY_FOR_USE: - ut_a(state == BUF_BLOCK_MEMORY - || state == BUF_BLOCK_FILE_PAGE - || state == BUF_BLOCK_NOT_USED); - break; - case BUF_BLOCK_MEMORY: - ut_a(state == BUF_BLOCK_NOT_USED); - break; - case BUF_BLOCK_FILE_PAGE: - if (!(state == BUF_BLOCK_NOT_USED - || state == BUF_BLOCK_REMOVE_HASH)) { - const char *old_state_name = buf_get_state_name((buf_block_t*)bpage); - bpage->state = state; - - fprintf(stderr, - "InnoDB: Error: block old state %d (%s) " - " new state %d (%s) not correct\n", - old_state, - old_state_name, - state, - buf_get_state_name((buf_block_t*)bpage)); - } - - ut_a(state == BUF_BLOCK_NOT_USED - || state == BUF_BLOCK_REMOVE_HASH); - break; - case BUF_BLOCK_REMOVE_HASH: - ut_a(state == BUF_BLOCK_MEMORY); - break; - } -#endif /* UNIV_DEBUG */ - - bpage->state = state; -} - -/*********************************************************************//** -Sets the state of a block. */ -UNIV_INLINE -void -buf_block_set_state( -/*================*/ - buf_block_t* block, /*!< in/out: pointer to control block */ - enum buf_page_state state) /*!< in: state */ -{ - buf_page_set_state(&block->page, state); -} - -/*********************************************************************//** -Determines if a block is mapped to a tablespace. -@return TRUE if mapped */ -UNIV_INLINE -ibool -buf_page_in_file( -/*=============*/ - const buf_page_t* bpage) /*!< in: pointer to control block */ -{ - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_POOL_WATCH: - ut_error; - break; - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - case BUF_BLOCK_FILE_PAGE: - return(TRUE); - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - break; - } - - return(FALSE); -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Determines if a block should be on unzip_LRU list. -@return TRUE if block belongs to unzip_LRU */ -UNIV_INLINE -ibool -buf_page_belongs_to_unzip_LRU( -/*==========================*/ - const buf_page_t* bpage) /*!< in: pointer to control block */ -{ - ut_ad(buf_page_in_file(bpage)); - - return(bpage->zip.data - && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); -} - -/*********************************************************************//** -Gets the mutex of a block. -@return pointer to mutex protecting bpage */ -UNIV_INLINE -ib_mutex_t* -buf_page_get_mutex( -/*===============*/ - const buf_page_t* bpage) /*!< in: pointer to control block */ -{ - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_POOL_WATCH: - ut_error; - return(NULL); - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: { - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - return(&buf_pool->zip_mutex); - } - default: - return(&((buf_block_t*) bpage)->mutex); - } -} - -/*********************************************************************//** -Get the flush type of a page. -@return flush type */ -UNIV_INLINE -buf_flush_t -buf_page_get_flush_type( -/*====================*/ - const buf_page_t* bpage) /*!< in: buffer page */ -{ - buf_flush_t flush_type = (buf_flush_t) bpage->flush_type; - -#ifdef UNIV_DEBUG - switch (flush_type) { - case BUF_FLUSH_LRU: - case BUF_FLUSH_LIST: - case BUF_FLUSH_SINGLE_PAGE: - return(flush_type); - case BUF_FLUSH_N_TYPES: - ut_error; - } - ut_error; -#endif /* UNIV_DEBUG */ - return(flush_type); -} -/*********************************************************************//** -Set the flush type of a page. */ -UNIV_INLINE -void -buf_page_set_flush_type( -/*====================*/ - buf_page_t* bpage, /*!< in: buffer page */ - buf_flush_t flush_type) /*!< in: flush type */ -{ - bpage->flush_type = flush_type; - ut_ad(buf_page_get_flush_type(bpage) == flush_type); -} - -/*********************************************************************//** -Map a block to a file page. */ -UNIV_INLINE -void -buf_block_set_file_page( -/*====================*/ - buf_block_t* block, /*!< in/out: pointer to control block */ - ulint space, /*!< in: tablespace id */ - ulint page_no)/*!< in: page number */ -{ - buf_block_set_state(block, BUF_BLOCK_FILE_PAGE); - block->page.space = static_cast<ib_uint32_t>(space); - block->page.offset = static_cast<ib_uint32_t>(page_no); -} - -/*********************************************************************//** -Gets the io_fix state of a block. -@return io_fix state */ -UNIV_INLINE -enum buf_io_fix -buf_page_get_io_fix( -/*================*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ -{ - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - return buf_page_get_io_fix_unlocked(bpage); -} - -/*********************************************************************//** -Gets the io_fix state of a block. Does not assert that the -buf_page_get_mutex() mutex is held, to be used in the cases where it is safe -not to hold it. -@return io_fix state */ -UNIV_INLINE -enum buf_io_fix -buf_page_get_io_fix_unlocked( -/*=========================*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ -{ - enum buf_io_fix io_fix = (enum buf_io_fix) bpage->io_fix; -#ifdef UNIV_DEBUG - switch (io_fix) { - case BUF_IO_NONE: - case BUF_IO_READ: - case BUF_IO_WRITE: - case BUF_IO_PIN: - return(io_fix); - } - ut_error; -#endif /* UNIV_DEBUG */ - return(io_fix); -} - -/*********************************************************************//** -Gets the io_fix state of a block. -@return io_fix state */ -UNIV_INLINE -enum buf_io_fix -buf_block_get_io_fix( -/*=================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - return(buf_page_get_io_fix(&block->page)); -} - -/*********************************************************************//** -Gets the io_fix state of a block. Does not assert that the -buf_page_get_mutex() mutex is held, to be used in the cases where it is safe -not to hold it. -@return io_fix state */ -UNIV_INLINE -enum buf_io_fix -buf_block_get_io_fix_unlocked( -/*==========================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - return(buf_page_get_io_fix_unlocked(&block->page)); -} - - -/*********************************************************************//** -Sets the io_fix state of a block. */ -UNIV_INLINE -void -buf_page_set_io_fix( -/*================*/ - buf_page_t* bpage, /*!< in/out: control block */ - enum buf_io_fix io_fix) /*!< in: io_fix state */ -{ - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - - bpage->io_fix = io_fix; - ut_ad(buf_page_get_io_fix(bpage) == io_fix); -} - -/*********************************************************************//** -Sets the io_fix state of a block. */ -UNIV_INLINE -void -buf_block_set_io_fix( -/*=================*/ - buf_block_t* block, /*!< in/out: control block */ - enum buf_io_fix io_fix) /*!< in: io_fix state */ -{ - buf_page_set_io_fix(&block->page, io_fix); -} - -/*********************************************************************//** -Makes a block sticky. A sticky block implies that even after we release -the buf_pool->LRU_list_mutex and the block->mutex: -* it cannot be removed from the flush_list -* the block descriptor cannot be relocated -* it cannot be removed from the LRU list -Note that: -* the block can still change its position in the LRU list -* the next and previous pointers can change. */ -UNIV_INLINE -void -buf_page_set_sticky( -/*================*/ - buf_page_t* bpage) /*!< in/out: control block */ -{ -#ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); -#endif - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE); - ut_ad(bpage->in_LRU_list); - - bpage->io_fix = BUF_IO_PIN; -} - -/*********************************************************************//** -Removes stickiness of a block. */ -UNIV_INLINE -void -buf_page_unset_sticky( -/*==================*/ - buf_page_t* bpage) /*!< in/out: control block */ -{ - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN); - - bpage->io_fix = BUF_IO_NONE; -} - -/********************************************************************//** -Determine if a buffer block can be relocated in memory. The block -can be dirty, but it must not be I/O-fixed or bufferfixed. */ -UNIV_INLINE -ibool -buf_page_can_relocate( -/*==================*/ - const buf_page_t* bpage) /*!< control block being relocated */ -{ - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - ut_ad(buf_page_in_file(bpage)); - ut_ad(bpage->in_LRU_list); - - return(buf_page_get_io_fix(bpage) == BUF_IO_NONE - && bpage->buf_fix_count == 0); -} - -/*********************************************************************//** -Determine if a block has been flagged old. -@return TRUE if old */ -UNIV_INLINE -ibool -buf_page_is_old( -/*============*/ - const buf_page_t* bpage) /*!< in: control block */ -{ -#ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); -#endif - /* Buffer page mutex is not strictly required here for heuristic - purposes even if LRU mutex is not being held. Keep the assertion - for now since all the callers hold it. */ - ut_ad(mutex_own(buf_page_get_mutex(bpage)) - || mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(buf_page_in_file(bpage)); - - return(bpage->old); -} - -/*********************************************************************//** -Flag a block old. */ -UNIV_INLINE -void -buf_page_set_old( -/*=============*/ - buf_page_t* bpage, /*!< in/out: control block */ - ibool old) /*!< in: old */ -{ -#ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); -#endif /* UNIV_DEBUG */ - ut_a(buf_page_in_file(bpage)); - ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); - ut_ad(bpage->in_LRU_list); - -#ifdef UNIV_LRU_DEBUG - ut_a((buf_pool->LRU_old_len == 0) == (buf_pool->LRU_old == NULL)); - /* If a block is flagged "old", the LRU_old list must exist. */ - ut_a(!old || buf_pool->LRU_old); - - if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage)) { - const buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage); - const buf_page_t* next = UT_LIST_GET_NEXT(LRU, bpage); - if (prev->old == next->old) { - ut_a(prev->old == old); - } else { - ut_a(!prev->old); - ut_a(buf_pool->LRU_old == (old ? bpage : next)); - } - } -#endif /* UNIV_LRU_DEBUG */ - - bpage->old = old; -} - -/*********************************************************************//** -Determine the time of first access of a block in the buffer pool. -@return ut_time_ms() at the time of first access, 0 if not accessed */ -UNIV_INLINE -unsigned -buf_page_is_accessed( -/*=================*/ - const buf_page_t* bpage) /*!< in: control block */ -{ - ut_ad(buf_page_in_file(bpage)); - - return(bpage->access_time); -} - -/*********************************************************************//** -Flag a block accessed. */ -UNIV_INLINE -void -buf_page_set_accessed( -/*==================*/ - buf_page_t* bpage) /*!< in/out: control block */ -{ - ut_ad(mutex_own(buf_page_get_mutex(bpage))); - - ut_a(buf_page_in_file(bpage)); - - if (bpage->access_time == 0) { - /* Make this the time of the first access. */ - bpage->access_time = static_cast<uint>(ut_time_ms()); - } -} - -/*********************************************************************//** -Gets the buf_block_t handle of a buffered file block if an uncompressed -page frame exists, or NULL. -@return control block, or NULL */ -UNIV_INLINE -buf_block_t* -buf_page_get_block( -/*===============*/ - buf_page_t* bpage) /*!< in: control block, or NULL */ -{ - if (bpage != NULL) { -#ifdef UNIV_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage) - || mutex_own(&buf_pool->LRU_list_mutex)); -#endif - ut_ad(buf_page_in_file(bpage)); - - if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) { - return((buf_block_t*) bpage); - } - } - - return(NULL); -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Gets a pointer to the memory frame of a block. -@return pointer to the frame */ -UNIV_INLINE -buf_frame_t* -buf_block_get_frame( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - if (!block) { - return NULL; - } - - SRV_CORRUPT_TABLE_CHECK(block, return(0);); - - return(buf_nonnull_block_get_frame(block)); -} - -/*********************************************************************//** -Gets a pointer to the memory frame of a block, where block is known not to be -NULL. -@return pointer to the frame */ -UNIV_INLINE -buf_frame_t* -buf_nonnull_block_get_frame( -/*========================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - switch (buf_block_get_state(block)) { - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - case BUF_BLOCK_NOT_USED: - if (block->page.encrypted) { - goto ok; - } - ut_error; - break; - case BUF_BLOCK_FILE_PAGE: -# ifndef UNIV_HOTBACKUP - ut_a(block->page.buf_fix_count > 0); -# endif /* !UNIV_HOTBACKUP */ - /* fall through */ - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - goto ok; - } - ut_error; -ok: - return((buf_frame_t*) block->frame); -} - -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Gets the space id of a block. -@return space id */ -UNIV_INLINE -ulint -buf_page_get_space( -/*===============*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ -{ - ut_ad(bpage); - ut_a(buf_page_in_file(bpage)); - - return(bpage->space); -} - -/*********************************************************************//** -Gets the space id of a block. -@return space id */ -UNIV_INLINE -ulint -buf_block_get_space( -/*================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - ut_ad(block); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - - return(block->page.space); -} - -/*********************************************************************//** -Gets the page number of a block. -@return page number */ -UNIV_INLINE -ulint -buf_page_get_page_no( -/*=================*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ -{ - ut_ad(bpage); - ut_a(buf_page_in_file(bpage)); - - return(bpage->offset); -} -/*********************************************************************** -FIXME_FTS Gets the frame the pointer is pointing to. */ -UNIV_INLINE -buf_frame_t* -buf_frame_align( -/*============*/ - /* out: pointer to frame */ - byte* ptr) /* in: pointer to a frame */ -{ - buf_frame_t* frame; - - ut_ad(ptr); - - frame = (buf_frame_t*) ut_align_down(ptr, UNIV_PAGE_SIZE); - - return(frame); -} - -/*********************************************************************//** -Gets the page number of a block. -@return page number */ -UNIV_INLINE -ulint -buf_block_get_page_no( -/*==================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - ut_ad(block); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - - return(block->page.offset); -} - -/*********************************************************************//** -Gets the compressed page size of a block. -@return compressed page size, or 0 */ -UNIV_INLINE -ulint -buf_page_get_zip_size( -/*==================*/ - const buf_page_t* bpage) /*!< in: pointer to the control block */ -{ - return(bpage->zip.ssize - ? (UNIV_ZIP_SIZE_MIN >> 1) << bpage->zip.ssize : 0); -} - -/*********************************************************************//** -Gets the compressed page size of a block. -@return compressed page size, or 0 */ -UNIV_INLINE -ulint -buf_block_get_zip_size( -/*===================*/ - const buf_block_t* block) /*!< in: pointer to the control block */ -{ - return(block->page.zip.ssize - ? (UNIV_ZIP_SIZE_MIN >> 1) << block->page.zip.ssize : 0); -} - -#ifndef UNIV_HOTBACKUP -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG -/*********************************************************************//** -Gets the compressed page descriptor corresponding to an uncompressed page -if applicable. -@return compressed page descriptor, or NULL */ -UNIV_INLINE -const page_zip_des_t* -buf_frame_get_page_zip( -/*===================*/ - const byte* ptr) /*!< in: pointer to the page */ -{ - return(buf_block_get_page_zip(buf_block_align(ptr))); -} -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Gets the space id, page offset, and byte offset within page of a -pointer pointing to a buffer frame containing a file page. */ -UNIV_INLINE -void -buf_ptr_get_fsp_addr( -/*=================*/ - const void* ptr, /*!< in: pointer to a buffer frame */ - ulint* space, /*!< out: space id */ - fil_addr_t* addr) /*!< out: page offset and byte offset */ -{ - const page_t* page = (const page_t*) ut_align_down(ptr, - UNIV_PAGE_SIZE); - - *space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET); - addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Gets the hash value of the page the pointer is pointing to. This can be used -in searches in the lock hash table. -@return lock hash value */ -UNIV_INLINE -ulint -buf_block_get_lock_hash_val( -/*========================*/ - const buf_block_t* block) /*!< in: block */ -{ - ut_ad(block); - ut_ad(buf_page_in_file(&block->page)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE) - || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - return(block->lock_hash_val); -} - -/********************************************************************//** -Allocates a buf_page_t descriptor. This function must succeed. In case -of failure we assert in this function. -@return: the allocated descriptor. */ -UNIV_INLINE -buf_page_t* -buf_page_alloc_descriptor(void) -/*===========================*/ -{ - buf_page_t* bpage; - - bpage = (buf_page_t*) ut_malloc(sizeof *bpage); - ut_d(memset(bpage, 0, sizeof *bpage)); - UNIV_MEM_ALLOC(bpage, sizeof *bpage); - - return(bpage); -} - -/********************************************************************//** -Free a buf_page_t descriptor. */ -UNIV_INLINE -void -buf_page_free_descriptor( -/*=====================*/ - buf_page_t* bpage) /*!< in: bpage descriptor to free. */ -{ - ut_free(bpage); -} - -/********************************************************************//** -Frees a buffer block which does not contain a file page. */ -UNIV_INLINE -void -buf_block_free( -/*===========*/ - buf_block_t* block) /*!< in, own: block to be freed */ -{ - mutex_enter(&block->mutex); - - ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE); - - buf_LRU_block_free_non_file_page(block); - - mutex_exit(&block->mutex); -} - -/********************************************************************//** -Get buf frame. */ -UNIV_INLINE -void * -buf_page_get_frame( -/*===============*/ - const buf_page_t* bpage) /*!< in: buffer pool page */ -{ - /* In encryption/compression buffer pool page may contain extra - buffer where result is stored. */ - if (bpage->slot && bpage->slot->out_buf) { - return bpage->slot->out_buf; - } else if (bpage->zip.data) { - return bpage->zip.data; - } else { - return ((buf_block_t*) bpage)->frame; - } -} - -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Copies contents of a buffer frame to a given buffer. -@return buf */ -UNIV_INLINE -byte* -buf_frame_copy( -/*===========*/ - byte* buf, /*!< in: buffer to copy to */ - const buf_frame_t* frame) /*!< in: buffer frame */ -{ - ut_ad(buf && frame); - - ut_memcpy(buf, frame, UNIV_PAGE_SIZE); - - return(buf); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Calculates a folded value of a file page address to use in the page hash -table. -@return the folded value */ -UNIV_INLINE -ulint -buf_page_address_fold( -/*==================*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: offset of the page within space */ -{ - return((space << 20) + space + offset); -} - -/********************************************************************//** -Gets the youngest modification log sequence number for a frame. -Returns zero if not file page or no modification occurred yet. -@return newest modification to page */ -UNIV_INLINE -lsn_t -buf_page_get_newest_modification( -/*=============================*/ - const buf_page_t* bpage) /*!< in: block containing the - page frame */ -{ - lsn_t lsn; - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - - mutex_enter(block_mutex); - - if (buf_page_in_file(bpage)) { - lsn = bpage->newest_modification; - } else { - lsn = 0; - } - - mutex_exit(block_mutex); - - return(lsn); -} - -/********************************************************************//** -Increments the modify clock of a frame by 1. The caller must (1) own the -LRU list mutex and block bufferfix count has to be zero, (2) or own an x-lock -on the block. */ -UNIV_INLINE -void -buf_block_modify_clock_inc( -/*=======================*/ - buf_block_t* block) /*!< in: block */ -{ -#ifdef UNIV_SYNC_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*) block); - - ut_ad((mutex_own(&buf_pool->LRU_list_mutex) - && (block->page.buf_fix_count == 0)) - || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); -#endif /* UNIV_SYNC_DEBUG */ - - block->modify_clock++; -} - -/********************************************************************//** -Returns the value of the modify clock. The caller must have an s-lock -or x-lock on the block. -@return value */ -UNIV_INLINE -ib_uint64_t -buf_block_get_modify_clock( -/*=======================*/ - buf_block_t* block) /*!< in: block */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) - || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); -#endif /* UNIV_SYNC_DEBUG */ - - return(block->modify_clock); -} - -/*******************************************************************//** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_fix( -/*===========*/ - buf_block_t* block) /*!< in/out: block to bufferfix */ -{ - ut_ad(!mutex_own(buf_page_get_mutex(&block->page))); -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_increment_uint32(&block->page.buf_fix_count, 1); -#else - ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page); - - mutex_enter(block_mutex); - ++block->page.buf_fix_count; - mutex_exit(block_mutex); -#endif /* PAGE_ATOMIC_REF_COUNT */ -} - -/*******************************************************************//** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc_func( -/*=======================*/ -#ifdef UNIV_SYNC_DEBUG - const char* file, /*!< in: file name */ - ulint line, /*!< in: line */ -#endif /* UNIV_SYNC_DEBUG */ - buf_block_t* block) /*!< in/out: block to bufferfix */ -{ -#ifdef UNIV_SYNC_DEBUG - ibool ret; - - ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line); - ut_a(ret); -#endif /* UNIV_SYNC_DEBUG */ - -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_increment_uint32(&block->page.buf_fix_count, 1); -#else - ut_ad(mutex_own(&block->mutex)); - - ++block->page.buf_fix_count; -#endif /* PAGE_ATOMIC_REF_COUNT */ -} - -/*******************************************************************//** -Decrements the bufferfix count. */ -UNIV_INLINE -void -buf_block_unfix( -/*============*/ - buf_block_t* block) /*!< in/out: block to bufferunfix */ -{ - ut_ad(block->page.buf_fix_count > 0); - ut_ad(!mutex_own(buf_page_get_mutex(&block->page))); - -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_decrement_uint32(&block->page.buf_fix_count, 1); -#else - ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page); - - mutex_enter(block_mutex); - --block->page.buf_fix_count; - mutex_exit(block_mutex); -#endif /* PAGE_ATOMIC_REF_COUNT */ -} - -/*******************************************************************//** -Decrements the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_dec( -/*==================*/ - buf_block_t* block) /*!< in/out: block to bufferunfix */ -{ - ut_ad(block->page.buf_fix_count > 0); - -#ifdef PAGE_ATOMIC_REF_COUNT - os_atomic_decrement_uint32(&block->page.buf_fix_count, 1); -#else - mutex_enter(&block->mutex); - --block->page.buf_fix_count; - mutex_exit(&block->mutex); -#endif /* PAGE_ATOMIC_REF_COUNT */ - -#ifdef UNIV_SYNC_DEBUG - rw_lock_s_unlock(&block->debug_latch); -#endif -} - -/******************************************************************//** -Returns the buffer pool instance given space and offset of page -@return buffer pool */ -UNIV_INLINE -buf_pool_t* -buf_pool_get( -/*==========*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: offset of the page within space */ -{ - ulint fold; - ulint index; - ulint ignored_offset; - - ignored_offset = offset >> 6; /* 2log of BUF_READ_AHEAD_AREA (64)*/ - fold = buf_page_address_fold(space, ignored_offset); - index = fold % srv_buf_pool_instances; - return(&buf_pool_ptr[index]); -} - -/******************************************************************//** -Returns the buffer pool instance given its array index -@return buffer pool */ -UNIV_INLINE -buf_pool_t* -buf_pool_from_array( -/*================*/ - ulint index) /*!< in: array index to get - buffer pool instance from */ -{ - ut_ad(index < MAX_BUFFER_POOLS); - ut_ad(index < srv_buf_pool_instances); - return(&buf_pool_ptr[index]); -} - -/******************************************************************//** -Returns the control block of a file page, NULL if not found. -@return block, NULL if not found */ -UNIV_INLINE -buf_page_t* -buf_page_hash_get_low( -/*==================*/ - buf_pool_t* buf_pool,/*!< buffer pool instance */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page within space */ - ulint fold) /*!< in: buf_page_address_fold(space, offset) */ -{ - buf_page_t* bpage; - -#ifdef UNIV_SYNC_DEBUG - ulint hash_fold; - prio_rw_lock_t* hash_lock; - - hash_fold = buf_page_address_fold(space, offset); - ut_ad(hash_fold == fold); - - hash_lock = hash_get_lock(buf_pool->page_hash, fold); - ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX) - || rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - /* Look for the page in the hash table */ - - HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage, - ut_ad(bpage->in_page_hash && !bpage->in_zip_hash - && buf_page_in_file(bpage)), - bpage->space == space && bpage->offset == offset); - if (bpage) { - ut_a(buf_page_in_file(bpage)); - ut_ad(bpage->in_page_hash); - ut_ad(!bpage->in_zip_hash); - } - - return(bpage); -} - -/******************************************************************//** -Returns the control block of a file page, NULL if not found. -If the block is found and lock is not NULL then the appropriate -page_hash lock is acquired in the specified lock mode. Otherwise, -mode value is ignored. It is up to the caller to release the -lock. If the block is found and the lock is NULL then the page_hash -lock is released by this function. -@return block, NULL if not found, or watch sentinel (if watch is true) */ -UNIV_INLINE -buf_page_t* -buf_page_hash_get_locked( -/*=====================*/ - /*!< out: pointer to the bpage, - or NULL; if NULL, hash_lock - is also NULL. */ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page number */ - prio_rw_lock_t** lock, /*!< in/out: lock of the page - hash acquired if bpage is - found. NULL otherwise. If NULL - is passed then the hash_lock - is released by this function */ - ulint lock_mode, /*!< in: RW_LOCK_EX or - RW_LOCK_SHARED. Ignored if - lock == NULL */ - bool watch) /*!< in: if true, return watch - sentinel also. */ -{ - buf_page_t* bpage = NULL; - ulint fold; - prio_rw_lock_t* hash_lock; - ulint mode = RW_LOCK_SHARED; - - if (lock != NULL) { - *lock = NULL; - ut_ad(lock_mode == RW_LOCK_EX - || lock_mode == RW_LOCK_SHARED); - mode = lock_mode; - } - - fold = buf_page_address_fold(space, offset); - hash_lock = hash_get_lock(buf_pool->page_hash, fold); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX) - && !rw_lock_own(hash_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - if (mode == RW_LOCK_SHARED) { - rw_lock_s_lock(hash_lock); - } else { - rw_lock_x_lock(hash_lock); - } - - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - - if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) { - if (!watch) { - bpage = NULL; - } - goto unlock_and_exit; - } - - ut_ad(buf_page_in_file(bpage)); - ut_ad(offset == bpage->offset); - ut_ad(space == bpage->space); - - if (lock == NULL) { - /* The caller wants us to release the page_hash lock */ - goto unlock_and_exit; - } else { - /* To be released by the caller */ - *lock = hash_lock; - goto exit; - } - -unlock_and_exit: - if (mode == RW_LOCK_SHARED) { - rw_lock_s_unlock(hash_lock); - } else { - rw_lock_x_unlock(hash_lock); - } -exit: - return(bpage); -} - -/******************************************************************//** -Returns the control block of a file page, NULL if not found. -If the block is found and lock is not NULL then the appropriate -page_hash lock is acquired in the specified lock mode. Otherwise, -mode value is ignored. It is up to the caller to release the -lock. If the block is found and the lock is NULL then the page_hash -lock is released by this function. -@return block, NULL if not found */ -UNIV_INLINE -buf_block_t* -buf_block_hash_get_locked( -/*=====================*/ - /*!< out: pointer to the bpage, - or NULL; if NULL, hash_lock - is also NULL. */ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: page number */ - prio_rw_lock_t** lock, /*!< in/out: lock of the page - hash acquired if bpage is - found. NULL otherwise. If NULL - is passed then the hash_lock - is released by this function */ - ulint lock_mode) /*!< in: RW_LOCK_EX or - RW_LOCK_SHARED. Ignored if - lock == NULL */ -{ - buf_page_t* bpage = buf_page_hash_get_locked(buf_pool, - space, - offset, - lock, - lock_mode); - buf_block_t* block = buf_page_get_block(bpage); - - if (block) { - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!lock || rw_lock_own(*lock, lock_mode)); -#endif /* UNIV_SYNC_DEBUG */ - return(block); - } else if (bpage) { - /* It is not a block. Just a bpage */ - ut_ad(buf_page_in_file(bpage)); - - if (lock) { - if (lock_mode == RW_LOCK_SHARED) { - rw_lock_s_unlock(*lock); - } else { - rw_lock_x_unlock(*lock); - } - } - *lock = NULL; - return(NULL); - } - - ut_ad(!bpage); - ut_ad(lock == NULL ||*lock == NULL); - return(NULL); -} - -/********************************************************************//** -Returns TRUE if the page can be found in the buffer pool hash table. - -NOTE that it is possible that the page is not yet read from disk, -though. - -@return TRUE if found in the page hash table */ -UNIV_INLINE -ibool -buf_page_peek( -/*==========*/ - ulint space, /*!< in: space id */ - ulint offset) /*!< in: page number */ -{ - buf_pool_t* buf_pool = buf_pool_get(space, offset); - - return(buf_page_hash_get(buf_pool, space, offset) != NULL); -} - -/********************************************************************//** -Releases a compressed-only page acquired with buf_page_get_zip(). */ -UNIV_INLINE -void -buf_page_release_zip( -/*=================*/ - buf_page_t* bpage) /*!< in: buffer block */ -{ - buf_block_t* block; - - block = (buf_block_t*) bpage; - - switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_FILE_PAGE: -#ifdef UNIV_SYNC_DEBUG - rw_lock_s_unlock(&block->debug_latch); -#endif /* UNUV_SYNC_DEBUG */ - /* Fall through */ - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - buf_block_unfix(block); - return; - - case BUF_BLOCK_POOL_WATCH: - case BUF_BLOCK_NOT_USED: - case BUF_BLOCK_READY_FOR_USE: - case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: - break; - } - - ut_error; -} - -/********************************************************************//** -Decrements the bufferfix count of a buffer control block and releases -a latch, if specified. */ -UNIV_INLINE -void -buf_page_release( -/*=============*/ - buf_block_t* block, /*!< in: buffer block */ - ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH, - RW_NO_LATCH */ -{ - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_s_unlock(&(block->debug_latch)); -#endif - if (rw_latch == RW_S_LATCH) { - rw_lock_s_unlock(&(block->lock)); - } else if (rw_latch == RW_X_LATCH) { - rw_lock_x_unlock(&(block->lock)); - } - - buf_block_unfix(block); -} - -#ifdef UNIV_SYNC_DEBUG -/*********************************************************************//** -Adds latch level info for the rw-lock protecting the buffer frame. This -should be called in the debug version after a successful latching of a -page if we know the latching order level of the acquired latch. */ -UNIV_INLINE -void -buf_block_dbg_add_level( -/*====================*/ - buf_block_t* block, /*!< in: buffer page - where we have acquired latch */ - ulint level) /*!< in: latching order level */ -{ - sync_thread_add_level(&block->lock, level, FALSE); -} - -#endif /* UNIV_SYNC_DEBUG */ -/*********************************************************************//** -Get the nth chunk's buffer block in the specified buffer pool. -@return the nth chunk's buffer block. */ -UNIV_INLINE -buf_block_t* -buf_get_nth_chunk_block( -/*====================*/ - const buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint n, /*!< in: nth chunk in the buffer pool */ - ulint* chunk_size) /*!< in: chunk size */ -{ - const buf_chunk_t* chunk; - - chunk = buf_pool->chunks + n; - *chunk_size = chunk->size; - return(chunk->blocks); -} - -#ifdef UNIV_DEBUG -/********************************************************************//** -Checks if buf_pool->zip_mutex is owned and is serving for a given page as its -block mutex. -@return true if buf_pool->zip_mutex is owned. */ -UNIV_INLINE -bool -buf_own_zip_mutex_for_page( -/*=======================*/ - const buf_page_t* bpage) -{ - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - - ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE - || buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY); - ut_ad(buf_page_get_mutex(bpage) == &buf_pool->zip_mutex); - - return(mutex_own(&buf_pool->zip_mutex)); -} -#endif /* UNIV_DEBUG */ - -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/buf0checksum.h b/storage/xtradb/include/buf0checksum.h deleted file mode 100644 index 6818345f965..00000000000 --- a/storage/xtradb/include/buf0checksum.h +++ /dev/null @@ -1,83 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0checksum.h -Buffer pool checksum functions, also linked from /extra/innochecksum.cc - -Created Aug 11, 2011 Vasil Dimov -*******************************************************/ - -#ifndef buf0checksum_h -#define buf0checksum_h - -#include "univ.i" - -#include "buf0types.h" - -/** Magic value to use instead of checksums when they are disabled */ -#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL - -/********************************************************************//** -Calculates a page CRC32 which is stored to the page when it is written -to a file. Note that we must be careful to calculate the same value on -32-bit and 64-bit architectures. -@return checksum */ -UNIV_INTERN -ib_uint32_t -buf_calc_page_crc32( -/*================*/ - const byte* page); /*!< in: buffer page */ - -/********************************************************************//** -Calculates a page checksum which is stored to the page when it is written -to a file. Note that we must be careful to calculate the same value on -32-bit and 64-bit architectures. -@return checksum */ -UNIV_INTERN -ulint -buf_calc_page_new_checksum( -/*=======================*/ - const byte* page); /*!< in: buffer page */ - -/********************************************************************//** -In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only -looked at the first few bytes of the page. This calculates that old -checksum. -NOTE: we must first store the new formula checksum to -FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum -because this takes that field as an input! -@return checksum */ -UNIV_INTERN -ulint -buf_calc_page_old_checksum( -/*=======================*/ - const byte* page); /*!< in: buffer page */ - -/********************************************************************//** -Return a printable string describing the checksum algorithm. -@return algorithm name */ -UNIV_INTERN -const char* -buf_checksum_algorithm_name( -/*========================*/ - srv_checksum_algorithm_t algo); /*!< in: algorithm */ - -extern ulong srv_checksum_algorithm; - -#endif /* buf0checksum_h */ diff --git a/storage/xtradb/include/buf0dblwr.h b/storage/xtradb/include/buf0dblwr.h deleted file mode 100644 index 7b7464761cc..00000000000 --- a/storage/xtradb/include/buf0dblwr.h +++ /dev/null @@ -1,167 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0dblwr.h -Doublewrite buffer module - -Created 2011/12/19 Inaam Rana -*******************************************************/ - -#ifndef buf0dblwr_h -#define buf0dblwr_h - -#include "univ.i" -#include "ut0byte.h" -#include "log0log.h" -#include "log0recv.h" - -#ifndef UNIV_HOTBACKUP - -/** Doublewrite system */ -extern buf_dblwr_t* buf_dblwr; -/** Set to TRUE when the doublewrite buffer is being created */ -extern ibool buf_dblwr_being_created; - -/** Create the doublewrite buffer if the doublewrite buffer header -is not present in the TRX_SYS page. -@return whether the operation succeeded -@retval true if the doublewrite buffer exists or was created -@retval false if the creation failed (too small first data file) */ -UNIV_INTERN -bool -buf_dblwr_create() - MY_ATTRIBUTE((warn_unused_result)); - -/****************************************************************//** -At a database startup initializes the doublewrite buffer memory structure if -we already have a doublewrite buffer created in the data files. If we are -upgrading to an InnoDB version which supports multiple tablespaces, then this -function performs the necessary update operations. If we are in a crash -recovery, this function loads the pages from double write buffer into memory. */ -void -buf_dblwr_init_or_load_pages( -/*=========================*/ - pfs_os_file_t file, - char* path, - bool load_corrupt_pages); - -/****************************************************************//** -Process the double write buffer pages. */ -void -buf_dblwr_process(void); -/*===================*/ - -/****************************************************************//** -frees doublewrite buffer. */ -UNIV_INTERN -void -buf_dblwr_free(void); -/*================*/ -/********************************************************************//** -Updates the doublewrite buffer when an IO request is completed. */ -UNIV_INTERN -void -buf_dblwr_update( -/*=============*/ - const buf_page_t* bpage, /*!< in: buffer block descriptor */ - buf_flush_t flush_type);/*!< in: flush type */ -/****************************************************************//** -Determines if a page number is located inside the doublewrite buffer. -@return TRUE if the location is inside the two blocks of the -doublewrite buffer */ -UNIV_INTERN -ibool -buf_dblwr_page_inside( -/*==================*/ - ulint page_no); /*!< in: page number */ -/********************************************************************//** -Posts a buffer page for writing. If the doublewrite memory buffer is -full, calls buf_dblwr_flush_buffered_writes and waits for for free -space to appear. */ -UNIV_INTERN -void -buf_dblwr_add_to_batch( -/*====================*/ - buf_page_t* bpage); /*!< in: buffer block to write */ -/********************************************************************//** -Flushes possible buffered writes from the doublewrite memory buffer to disk, -and also wakes up the aio thread if simulated aio is used. It is very -important to call this function after a batch of writes has been posted, -and also when we may have to wait for a page latch! Otherwise a deadlock -of threads can occur. */ -UNIV_INTERN -void -buf_dblwr_flush_buffered_writes(void); -/*=================================*/ -/********************************************************************//** -Writes a page to the doublewrite buffer on disk, sync it, then write -the page to the datafile and sync the datafile. This function is used -for single page flushes. If all the buffers allocated for single page -flushes in the doublewrite buffer are in use we wait here for one to -become free. We are guaranteed that a slot will become free because any -thread that is using a slot must also release the slot before leaving -this function. */ -UNIV_INTERN -void -buf_dblwr_write_single_page( -/*========================*/ - buf_page_t* bpage, /*!< in: buffer block to write */ - bool sync); /*!< in: true if sync IO requested */ - -/** Doublewrite control struct */ -struct buf_dblwr_t{ - ib_mutex_t mutex; /*!< mutex protecting the first_free - field and write_buf */ - ulint block1; /*!< the page number of the first - doublewrite block (64 pages) */ - ulint block2; /*!< page number of the second block */ - ulint first_free;/*!< first free position in write_buf - measured in units of UNIV_PAGE_SIZE */ - ulint b_reserved;/*!< number of slots currently reserved - for batch flush. */ - os_event_t b_event;/*!< event where threads wait for a - batch flush to end; - os_event_set() and os_event_reset() - are protected by buf_dblwr_t::mutex */ - ulint s_reserved;/*!< number of slots currently - reserved for single page flushes. */ - os_event_t s_event;/*!< event where threads wait for a - single page flush slot. Protected by mutex. */ - bool* in_use; /*!< flag used to indicate if a slot is - in use. Only used for single page - flushes. */ - bool batch_running;/*!< set to TRUE if currently a batch - is being written from the doublewrite - buffer. */ - byte* write_buf;/*!< write buffer used in writing to the - doublewrite buffer, aligned to an - address divisible by UNIV_PAGE_SIZE - (which is required by Windows aio) */ - byte* write_buf_unaligned;/*!< pointer to write_buf, - but unaligned */ - buf_page_t** buf_block_arr;/*!< array to store pointers to - the buffer blocks which have been - cached to write_buf */ -}; - - -#endif /* UNIV_HOTBACKUP */ - -#endif diff --git a/storage/xtradb/include/buf0dump.h b/storage/xtradb/include/buf0dump.h deleted file mode 100644 index c704a8e97e0..00000000000 --- a/storage/xtradb/include/buf0dump.h +++ /dev/null @@ -1,72 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file buf/buf0dump.h -Implements a buffer pool dump/load. - -Created April 08, 2011 Vasil Dimov -*******************************************************/ - -#ifndef buf0dump_h -#define buf0dump_h - -#include "univ.i" - -/*****************************************************************//** -Wakes up the buffer pool dump/load thread and instructs it to start -a dump. This function is called by MySQL code via buffer_pool_dump_now() -and it should return immediately because the whole MySQL is frozen during -its execution. */ -UNIV_INTERN -void -buf_dump_start(); -/*============*/ - -/*****************************************************************//** -Wakes up the buffer pool dump/load thread and instructs it to start -a load. This function is called by MySQL code via buffer_pool_load_now() -and it should return immediately because the whole MySQL is frozen during -its execution. */ -UNIV_INTERN -void -buf_load_start(); -/*============*/ - -/*****************************************************************//** -Aborts a currently running buffer pool load. This function is called by -MySQL code via buffer_pool_load_abort() and it should return immediately -because the whole MySQL is frozen during its execution. */ -UNIV_INTERN -void -buf_load_abort(); -/*============*/ - -/*****************************************************************//** -This is the main thread for buffer pool dump/load. It waits for an -event and when waked up either performs a dump or load and sleeps -again. -@return this function does not return, it calls os_thread_exit() */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(buf_dump_thread)( -/*============================*/ - void* arg); /*!< in: a dummy parameter - required by os_thread_create */ - -#endif /* buf0dump_h */ diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h deleted file mode 100644 index 6089baf81e8..00000000000 --- a/storage/xtradb/include/buf0flu.h +++ /dev/null @@ -1,382 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0flu.h -The database buffer pool flush algorithm - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0flu_h -#define buf0flu_h - -#include "univ.i" -#include "ut0byte.h" -#include "log0log.h" -#ifndef UNIV_HOTBACKUP -#include "mtr0types.h" -#include "buf0types.h" - -/** Flag indicating if the page_cleaner is in active state. */ -extern bool buf_page_cleaner_is_active; - -/** Flag indicating if the lru_manager is in active state. */ -extern bool buf_lru_manager_is_active; - -/** Handled page counters for a single flush */ -struct flush_counters_t { - ulint flushed; /*!< number of dirty pages flushed */ - ulint evicted; /*!< number of clean pages evicted */ - ulint unzip_LRU_evicted;/*!< number of uncompressed page images - evicted */ -}; - -/********************************************************************//** -Remove a block from the flush list of modified blocks. */ -UNIV_INTERN -void -buf_flush_remove( -/*=============*/ - buf_page_t* bpage); /*!< in: pointer to the block in question */ -/*******************************************************************//** -Relocates a buffer control block on the flush_list. -Note that it is assumed that the contents of bpage has already been -copied to dpage. */ -UNIV_INTERN -void -buf_flush_relocate_on_flush_list( -/*=============================*/ - buf_page_t* bpage, /*!< in/out: control block being moved */ - buf_page_t* dpage); /*!< in/out: destination block */ -/********************************************************************//** -Updates the flush system data structures when a write is completed. */ -UNIV_INTERN -void -buf_flush_write_complete( -/*=====================*/ - buf_page_t* bpage); /*!< in: pointer to the block in question */ -#endif /* !UNIV_HOTBACKUP */ -/********************************************************************//** -Initializes a page for writing to the tablespace. */ -UNIV_INTERN -void -buf_flush_init_for_writing( -/*=======================*/ - byte* page, /*!< in/out: page */ - void* page_zip_, /*!< in/out: compressed page, or NULL */ - lsn_t newest_lsn); /*!< in: newest modification lsn - to the page */ -#ifndef UNIV_HOTBACKUP -# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -/********************************************************************//** -Writes a flushable page asynchronously from the buffer pool to a file. -NOTE: block and LRU list mutexes must be held upon entering this function, and -they will be released by this function after flushing. This is loosely based on -buf_flush_batch() and buf_flush_page(). -@return TRUE if the page was flushed and the mutexes released */ -UNIV_INTERN -ibool -buf_flush_page_try( -/*===============*/ - buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ - buf_block_t* block) /*!< in/out: buffer control block */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ -/*******************************************************************//** -This utility flushes dirty blocks from the end of the flush list of -all buffer pool instances. -NOTE: The calling thread is not allowed to own any latches on pages! -@return true if a batch was queued successfully for each buffer pool -instance. false if another batch of same type was already running in -at least one of the buffer pool instance */ -UNIV_INTERN -bool -buf_flush_list( -/*===========*/ - ulint min_n, /*!< in: wished minimum mumber of blocks - flushed (it is not guaranteed that the - actual number is that big, though) */ - lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all - blocks whose oldest_modification is - smaller than this should be flushed - (if their number does not exceed - min_n), otherwise ignored */ - ulint* n_processed); /*!< out: the number of pages - which were processed is passed - back to caller. Ignored if NULL */ -/******************************************************************//** -This function picks up a single dirty page from the tail of the LRU -list, flushes it, removes it from page_hash and LRU list and puts -it on the free list. It is called from user threads when they are -unable to find a replacable page at the tail of the LRU list i.e.: -when the background LRU flushing in the page_cleaner thread is not -fast enough to keep pace with the workload. -@return TRUE if success. */ -UNIV_INTERN -ibool -buf_flush_single_page_from_LRU( -/*===========================*/ - buf_pool_t* buf_pool); /*!< in/out: buffer pool instance */ -/******************************************************************//** -Waits until a flush batch of the given type ends */ -UNIV_INTERN -void -buf_flush_wait_batch_end( -/*=====================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_flush_t type); /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ -/******************************************************************//** -Waits until a flush batch of the given type ends. This is called by -a thread that only wants to wait for a flush to end but doesn't do -any flushing itself. */ -UNIV_INTERN -void -buf_flush_wait_batch_end_wait_only( -/*===============================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_flush_t type); /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ -/********************************************************************//** -This function should be called at a mini-transaction commit, if a page was -modified in it. Puts the block to the list of modified blocks, if it not -already in it. */ -UNIV_INLINE -void -buf_flush_note_modification( -/*========================*/ - buf_block_t* block, /*!< in: block which is modified */ - mtr_t* mtr); /*!< in: mtr */ -/********************************************************************//** -This function should be called when recovery has modified a buffer page. */ -UNIV_INLINE -void -buf_flush_recv_note_modification( -/*=============================*/ - buf_block_t* block, /*!< in: block which is modified */ - lsn_t start_lsn, /*!< in: start lsn of the first mtr in a - set of mtr's */ - lsn_t end_lsn); /*!< in: end lsn of the last mtr in the - set of mtr's */ -/********************************************************************//** -Returns TRUE if the file page block is immediately suitable for replacement, -i.e., transition FILE_PAGE => NOT_USED allowed. -@return TRUE if can replace immediately */ -UNIV_INTERN -ibool -buf_flush_ready_for_replace( -/*========================*/ - buf_page_t* bpage); /*!< in: buffer control block, must be - buf_page_in_file(bpage) and in the LRU list */ -/******************************************************************//** -page_cleaner thread tasked with flushing dirty pages from the buffer -pool flush lists. As of now we'll have only one instance of this thread. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(buf_flush_page_cleaner_thread)( -/*==========================================*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ -/******************************************************************//** -lru_manager thread tasked with performing LRU flushes and evictions to refill -the buffer pool free lists. As of now we'll have only one instance of this -thread. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(buf_flush_lru_manager_thread)( -/*=========================================*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ -/*********************************************************************//** -Clears up tail of the LRU lists: -* Put replaceable pages at the tail of LRU to the free list -* Flush dirty pages at the tail of LRU to the disk -The depth to which we scan each buffer pool is controlled by dynamic -config parameter innodb_LRU_scan_depth. -@return number of pages flushed */ -UNIV_INTERN -ulint -buf_flush_LRU_tail(void); -/*====================*/ -/*********************************************************************//** -Wait for any possible LRU flushes that are in progress to end. */ -UNIV_INTERN -void -buf_flush_wait_LRU_batch_end(void); -/*==============================*/ - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/******************************************************************//** -Validates the flush list. -@return TRUE if ok */ -UNIV_INTERN -ibool -buf_flush_validate( -/*===============*/ - buf_pool_t* buf_pool); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -/********************************************************************//** -Initialize the red-black tree to speed up insertions into the flush_list -during recovery process. Should be called at the start of recovery -process before any page has been read/written. */ -UNIV_INTERN -void -buf_flush_init_flush_rbt(void); -/*==========================*/ - -/********************************************************************//** -Frees up the red-black tree. */ -UNIV_INTERN -void -buf_flush_free_flush_rbt(void); -/*==========================*/ - -/********************************************************************//** -Writes a flushable page asynchronously from the buffer pool to a file. -NOTE: in simulated aio we must call -os_aio_simulated_wake_handler_threads after we have posted a batch of -writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this -function, and they will be released by this function if it returns true. -LRU_list_mutex must be held iff performing a single page flush and will be -released by the function if it returns true. -@return TRUE if the page was flushed */ -UNIV_INTERN -bool -buf_flush_page( -/*===========*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_page_t* bpage, /*!< in: buffer control block */ - buf_flush_t flush_type, /*!< in: type of flush */ - bool sync); /*!< in: true if sync IO request */ -/********************************************************************//** -Returns true if the block is modified and ready for flushing. -@return true if can flush immediately */ -UNIV_INTERN -bool -buf_flush_ready_for_flush( -/*======================*/ - buf_page_t* bpage, /*!< in: buffer control block, must be - buf_page_in_file(bpage) */ - buf_flush_t flush_type)/*!< in: type of flush */ - MY_ATTRIBUTE((warn_unused_result)); - -#ifdef UNIV_DEBUG -/******************************************************************//** -Check if there are any dirty pages that belong to a space id in the flush -list in a particular buffer pool. -@return number of dirty pages present in a single buffer pool */ -UNIV_INTERN -ulint -buf_pool_get_dirty_pages_count( -/*===========================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool */ - ulint id); /*!< in: space id to check */ -/******************************************************************//** -Check if there are any dirty pages that belong to a space id in the flush list. -@return count of dirty pages present in all the buffer pools */ -UNIV_INTERN -ulint -buf_flush_get_dirty_pages_count( -/*============================*/ - ulint id); /*!< in: space id to check */ -#endif /* UNIV_DEBUG */ - -#endif /* !UNIV_HOTBACKUP */ - -/******************************************************************//** -Check if a flush list flush is in progress for any buffer pool instance, or if -all the instances are clean, for heuristic purposes. -@return true if flush list flush is in progress or buffer pool is clean */ -UNIV_INLINE -bool -buf_flush_flush_list_in_progress(void) -/*==================================*/ - MY_ATTRIBUTE((warn_unused_result)); - -/** If LRU list of a buf_pool is less than this size then LRU eviction -should not happen. This is because when we do LRU flushing we also put -the blocks on free list. If LRU list is very small then we can end up -in thrashing. */ -#define BUF_LRU_MIN_LEN 256 - -/******************************************************************//** -Start a buffer flush batch for LRU or flush list */ -ibool -buf_flush_start( -/*============*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ - -/******************************************************************//** -End a buffer flush batch for LRU or flush list */ -void -buf_flush_end( -/*==========*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU - or BUF_FLUSH_LIST */ - -/*******************************************************************//** -This utility flushes dirty blocks from the end of the LRU list or flush_list. -NOTE 1: in the case of an LRU flush the calling thread may own latches to -pages: to avoid deadlocks, this function must be written so that it cannot -end up waiting for these latches! NOTE 2: in the case of a flush list flush, -the calling thread is not allowed to own any latches on pages! -@return number of blocks for which the write request was queued */ -__attribute__((nonnull)) -void -buf_flush_batch( -/*============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or - BUF_FLUSH_LIST; if BUF_FLUSH_LIST, - then the caller must not own any - latches on pages */ - ulint min_n, /*!< in: wished minimum mumber of blocks - flushed (it is not guaranteed that the - actual number is that big, though) */ - lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST - all blocks whose oldest_modification is - smaller than this should be flushed - (if their number does not exceed - min_n), otherwise ignored */ - bool limited_lru_scan,/*!< in: for LRU flushes, if true, - allow to scan only up to - srv_LRU_scan_depth pages in total */ - flush_counters_t* n); /*!< out: flushed/evicted page - counts */ - - -/******************************************************************//** -Gather the aggregated stats for both flush list and LRU list flushing */ -void -buf_flush_common( -/*=============*/ - buf_flush_t flush_type, /*!< in: type of flush */ - ulint page_count); /*!< in: number of pages flushed */ - -#ifndef UNIV_NONINL -#include "buf0flu.ic" -#endif - -#endif diff --git a/storage/xtradb/include/buf0flu.ic b/storage/xtradb/include/buf0flu.ic deleted file mode 100644 index 06fa49754cd..00000000000 --- a/storage/xtradb/include/buf0flu.ic +++ /dev/null @@ -1,167 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0flu.ic -The database buffer pool flush algorithm - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef UNIV_HOTBACKUP -#include "buf0buf.h" -#include "mtr0mtr.h" -#include "srv0srv.h" - -/********************************************************************//** -Inserts a modified block into the flush list. */ -UNIV_INTERN -void -buf_flush_insert_into_flush_list( -/*=============================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - buf_block_t* block, /*!< in/out: block which is modified */ - lsn_t lsn); /*!< in: oldest modification */ -/********************************************************************//** -Inserts a modified block into the flush list in the right sorted position. -This function is used by recovery, because there the modifications do not -necessarily come in the order of lsn's. */ -UNIV_INTERN -void -buf_flush_insert_sorted_into_flush_list( -/*====================================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - buf_block_t* block, /*!< in/out: block which is modified */ - lsn_t lsn); /*!< in: oldest modification */ - -/********************************************************************//** -This function should be called at a mini-transaction commit, if a page was -modified in it. Puts the block to the list of modified blocks, if it is not -already in it. */ -UNIV_INLINE -void -buf_flush_note_modification( -/*========================*/ - buf_block_t* block, /*!< in: block which is modified */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_pool_t* buf_pool = buf_pool_from_block(block); - - ut_ad(!srv_read_only_mode); - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_ad(block->page.buf_fix_count > 0); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(!buf_flush_list_mutex_own(buf_pool)); - ut_ad(!mtr->made_dirty || log_flush_order_mutex_own()); - - ut_ad(mtr->start_lsn != 0); - ut_ad(mtr->modifications); - - mutex_enter(&block->mutex); - ut_ad(block->page.newest_modification <= mtr->end_lsn); - - block->page.newest_modification = mtr->end_lsn; - - if (!block->page.oldest_modification) { - ut_a(mtr->made_dirty); - ut_ad(log_flush_order_mutex_own()); - buf_flush_insert_into_flush_list( - buf_pool, block, mtr->start_lsn); - } else { - ut_ad(block->page.oldest_modification <= mtr->start_lsn); - } - - mutex_exit(&block->mutex); - - srv_stats.buf_pool_write_requests.inc(); -} - -/********************************************************************//** -This function should be called when recovery has modified a buffer page. */ -UNIV_INLINE -void -buf_flush_recv_note_modification( -/*=============================*/ - buf_block_t* block, /*!< in: block which is modified */ - lsn_t start_lsn, /*!< in: start lsn of the first mtr in a - set of mtr's */ - lsn_t end_lsn) /*!< in: end lsn of the last mtr in the - set of mtr's */ -{ - buf_pool_t* buf_pool = buf_pool_from_block(block); - - ut_ad(!srv_read_only_mode); - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_ad(block->page.buf_fix_count > 0); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(!buf_flush_list_mutex_own(buf_pool)); - ut_ad(log_flush_order_mutex_own()); - - ut_ad(start_lsn != 0); - ut_ad(block->page.newest_modification <= end_lsn); - - mutex_enter(&block->mutex); - block->page.newest_modification = end_lsn; - - if (!block->page.oldest_modification) { - buf_flush_insert_sorted_into_flush_list( - buf_pool, block, start_lsn); - } else { - ut_ad(block->page.oldest_modification <= start_lsn); - } - - mutex_exit(&block->mutex); - -} -#endif /* !UNIV_HOTBACKUP */ - -/******************************************************************//** -Check if a flush list flush is in progress for any buffer pool instance, or if -all the instances are clean, for heuristic purposes. -@return true if flush list flush is in progress or buffer pool is clean */ -UNIV_INLINE -bool -buf_flush_flush_list_in_progress(void) -/*==================================*/ -{ - bool all_clean = true; - - for (ulint i = 0; i < srv_buf_pool_instances; i++) { - - const buf_pool_t* buf_pool = buf_pool_from_array(i); - if (buf_pool->init_flush[BUF_FLUSH_LIST] - || buf_pool->n_flush[BUF_FLUSH_LIST]) { - - return(true); - } - - if (all_clean) { - - all_clean = (UT_LIST_GET_LEN(buf_pool->flush_list) - == 0); - } - - } - return(all_clean); -} diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h deleted file mode 100644 index f056c6c4116..00000000000 --- a/storage/xtradb/include/buf0lru.h +++ /dev/null @@ -1,314 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0lru.h -The database buffer pool LRU replacement algorithm - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0lru_h -#define buf0lru_h - -#include "univ.i" -#ifndef UNIV_HOTBACKUP -#ifndef UNIV_INNOCHECKSUM -#include "ut0byte.h" -#endif -#include "buf0types.h" - -// Forward declaration -struct trx_t; - -/******************************************************************//** -Returns TRUE if less than 25 % of the buffer pool is available. This can be -used in heuristics to prevent huge transactions eating up the whole buffer -pool for their locks. -@return TRUE if less than 25 % of buffer pool left */ -UNIV_INTERN -ibool -buf_LRU_buf_pool_running_out(void); -/*==============================*/ - -/*####################################################################### -These are low-level functions -#########################################################################*/ - -/** Minimum LRU list length for which the LRU_old pointer is defined */ -#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */ - -/******************************************************************//** -Flushes all dirty pages or removes all pages belonging -to a given tablespace. A PROBLEM: if readahead is being started, what -guarantees that it will not try to read in pages after this operation -has completed? */ -UNIV_INTERN -void -buf_LRU_flush_or_remove_pages( -/*==========================*/ - ulint id, /*!< in: space id */ - buf_remove_t buf_remove, /*!< in: remove or flush strategy */ - const trx_t* trx); /*!< to check if the operation must - be interrupted */ - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/********************************************************************//** -Insert a compressed block into buf_pool->zip_clean in the LRU order. */ -UNIV_INTERN -void -buf_LRU_insert_zip_clean( -/*=====================*/ - buf_page_t* bpage); /*!< in: pointer to the block in question */ -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - -/******************************************************************//** -Try to free a block. If bpage is a descriptor of a compressed-only -page, the descriptor object will be freed as well. - -NOTE: If this function returns true, it will release the LRU list mutex, -and temporarily release and relock the buf_page_get_mutex() mutex. -Furthermore, the page frame will no longer be accessible via bpage. If this -function returns false, the buf_page_get_mutex() might be temporarily released -and relocked too. - -The caller must hold the LRU list and buf_page_get_mutex() mutexes. - -@return true if freed, false otherwise. */ -UNIV_INTERN -bool -buf_LRU_free_page( -/*==============*/ - buf_page_t* bpage, /*!< in: block to be freed */ - bool zip) /*!< in: true if should remove also the - compressed page of an uncompressed page */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Try to free a replaceable block. -@return TRUE if found and freed */ -UNIV_INTERN -ibool -buf_LRU_scan_and_free_block( -/*========================*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ibool scan_all) /*!< in: scan whole LRU list - if TRUE, otherwise scan only - 'old' blocks. */ - MY_ATTRIBUTE((nonnull,warn_unused_result)); -/******************************************************************//** -Returns a free block from the buf_pool. The block is taken off the -free list. If it is empty, returns NULL. -@return a free control block, or NULL if the buf_block->free list is empty */ -UNIV_INTERN -buf_block_t* -buf_LRU_get_free_only( -/*==================*/ - buf_pool_t* buf_pool); /*!< buffer pool instance */ -/******************************************************************//** -Returns a free block from the buf_pool. The block is taken off the -free list. If it is empty, blocks are moved from the end of the -LRU list to the free list. -This function is called from a user thread when it needs a clean -block to read in a page. Note that we only ever get a block from -the free list. Even when we flush a page or find a page in LRU scan -we put it to free list to be used. -* iteration 0: - * get a block from free list, success:done - * if there is an LRU flush batch in progress: - * wait for batch to end: retry free list - * if buf_pool->try_LRU_scan is set - * scan LRU up to srv_LRU_scan_depth to find a clean block - * the above will put the block on free list - * success:retry the free list - * flush one dirty page from tail of LRU to disk - * the above will put the block on free list - * success: retry the free list -* iteration 1: - * same as iteration 0 except: - * scan whole LRU list - * scan LRU list even if buf_pool->try_LRU_scan is not set -* iteration > 1: - * same as iteration 1 but sleep 100ms -@return the free control block, in state BUF_BLOCK_READY_FOR_USE */ -UNIV_INTERN -buf_block_t* -buf_LRU_get_free_block( -/*===================*/ - buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */ - MY_ATTRIBUTE((nonnull,warn_unused_result)); -/******************************************************************//** -Determines if the unzip_LRU list should be used for evicting a victim -instead of the general LRU list. -@return TRUE if should use unzip_LRU */ -UNIV_INTERN -ibool -buf_LRU_evict_from_unzip_LRU( -/*=========================*/ - buf_pool_t* buf_pool); -/******************************************************************//** -Puts a block back to the free list. */ -UNIV_INTERN -void -buf_LRU_block_free_non_file_page( -/*=============================*/ - buf_block_t* block); /*!< in: block, must not contain a file page */ -/******************************************************************//** -Adds a block to the LRU list. Please make sure that the zip_size is -already set into the page zip when invoking the function, so that we -can get correct zip_size from the buffer page when adding a block -into LRU */ -UNIV_INTERN -void -buf_LRU_add_block( -/*==============*/ - buf_page_t* bpage, /*!< in: control block */ - ibool old); /*!< in: TRUE if should be put to the old - blocks in the LRU list, else put to the - start; if the LRU list is very short, added to - the start regardless of this parameter */ -/******************************************************************//** -Adds a block to the LRU list of decompressed zip pages. */ -UNIV_INTERN -void -buf_unzip_LRU_add_block( -/*====================*/ - buf_block_t* block, /*!< in: control block */ - ibool old); /*!< in: TRUE if should be put to the end - of the list, else put to the start */ -/******************************************************************//** -Moves a block to the start of the LRU list. */ -UNIV_INTERN -void -buf_LRU_make_block_young( -/*=====================*/ - buf_page_t* bpage); /*!< in: control block */ -/******************************************************************//** -Moves a block to the end of the LRU list. */ -UNIV_INTERN -void -buf_LRU_make_block_old( -/*===================*/ - buf_page_t* bpage); /*!< in: control block */ -/**********************************************************************//** -Updates buf_pool->LRU_old_ratio. -@return updated old_pct */ -UNIV_INTERN -ulint -buf_LRU_old_ratio_update( -/*=====================*/ - uint old_pct,/*!< in: Reserve this percentage of - the buffer pool for "old" blocks. */ - ibool adjust);/*!< in: TRUE=adjust the LRU list; - FALSE=just assign buf_pool->LRU_old_ratio - during the initialization of InnoDB */ -/********************************************************************//** -Update the historical stats that we are collecting for LRU eviction -policy at the end of each interval. */ -UNIV_INTERN -void -buf_LRU_stat_update(void); -/*=====================*/ - -/******************************************************************//** -Remove one page from LRU list and put it to free list */ -UNIV_INTERN -void -buf_LRU_free_one_page( -/*==================*/ - buf_page_t* bpage) /*!< in/out: block, must contain a file page and - be in a state where it can be freed; there - may or may not be a hash index to the page */ - MY_ATTRIBUTE((nonnull)); - -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/**********************************************************************//** -Validates the LRU list. -@return TRUE */ -UNIV_INTERN -ibool -buf_LRU_validate(void); -/*==================*/ -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/**********************************************************************//** -Prints the LRU list. */ -UNIV_INTERN -void -buf_LRU_print(void); -/*===============*/ -#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ - -/** @name Heuristics for detecting index scan @{ */ -/** The denominator of buf_pool->LRU_old_ratio. */ -#define BUF_LRU_OLD_RATIO_DIV 1024 -/** Maximum value of buf_pool->LRU_old_ratio. -@see buf_LRU_old_adjust_len -@see buf_pool->LRU_old_ratio_update */ -#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV -/** Minimum value of buf_pool->LRU_old_ratio. -@see buf_LRU_old_adjust_len -@see buf_pool->LRU_old_ratio_update -The minimum must exceed -(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */ -#define BUF_LRU_OLD_RATIO_MIN 51 - -#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX -# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX" -#endif -#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV -# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV" -#endif - -/** Move blocks to "new" LRU list only if the first access was at -least this many milliseconds ago. Not protected by any mutex or latch. */ -extern uint buf_LRU_old_threshold_ms; -/* @} */ - -/** @brief Statistics for selecting the LRU list for eviction. - -These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O -and page_zip_decompress() operations. Based on the statistics we decide -if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */ -struct buf_LRU_stat_t -{ - ulint io; /**< Counter of buffer pool I/O operations. */ - ulint unzip; /**< Counter of page_zip_decompress operations. */ -}; - -/** Current operation counters. Not protected by any mutex. -Cleared by buf_LRU_stat_update(). */ -extern buf_LRU_stat_t buf_LRU_stat_cur; - -/** Running sum of past values of buf_LRU_stat_cur. -Updated by buf_LRU_stat_update(). */ -extern buf_LRU_stat_t buf_LRU_stat_sum; - -/********************************************************************//** -Increments the I/O counter in buf_LRU_stat_cur. */ -#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++ -/********************************************************************//** -Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */ -#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++ - -#ifndef UNIV_NONINL -#include "buf0lru.ic" -#endif - -#endif /* !UNIV_HOTBACKUP */ - -#endif diff --git a/storage/xtradb/include/buf0lru.ic b/storage/xtradb/include/buf0lru.ic deleted file mode 100644 index 6e0da7a2588..00000000000 --- a/storage/xtradb/include/buf0lru.ic +++ /dev/null @@ -1,25 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0lru.ic -The database buffer replacement algorithm - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - diff --git a/storage/xtradb/include/buf0mtflu.h b/storage/xtradb/include/buf0mtflu.h deleted file mode 100644 index 0475335bbf5..00000000000 --- a/storage/xtradb/include/buf0mtflu.h +++ /dev/null @@ -1,95 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2014 SkySQL Ab. All Rights Reserved. -Copyright (C) 2014 Fusion-io. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/buf0mtflu.h -Multi-threadef flush method interface function prototypes - -Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com - Dhananjoy Das DDas@fusionio.com -***********************************************************************/ - -#ifndef buf0mtflu_h -#define buf0mtflu_h - -/******************************************************************//** -Add exit work item to work queue to signal multi-threded flush -threads that they should exit. -*/ -void -buf_mtflu_io_thread_exit(void); -/*===========================*/ - -/******************************************************************//** -Initialize multi-threaded flush thread syncronization data. -@return Initialized multi-threaded flush thread syncroniztion data. */ -void* -buf_mtflu_handler_init( -/*===================*/ - ulint n_threads, /*!< in: Number of threads to create */ - ulint wrk_cnt); /*!< in: Number of work items */ - -/******************************************************************//** -Return true if multi-threaded flush is initialized -@return true if initialized, false if not */ -bool -buf_mtflu_init_done(void); -/*======================*/ - -/*********************************************************************//** -Clears up tail of the LRU lists: -* Put replaceable pages at the tail of LRU to the free list -* Flush dirty pages at the tail of LRU to the disk -The depth to which we scan each buffer pool is controlled by dynamic -config parameter innodb_LRU_scan_depth. -@return total pages flushed */ -UNIV_INTERN -ulint -buf_mtflu_flush_LRU_tail(void); -/*===========================*/ - -/*******************************************************************//** -Multi-threaded version of buf_flush_list -*/ -bool -buf_mtflu_flush_list( -/*=================*/ - ulint min_n, /*!< in: wished minimum mumber of blocks - flushed (it is not guaranteed that the - actual number is that big, though) */ - lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all - blocks whose oldest_modification is - smaller than this should be flushed - (if their number does not exceed - min_n), otherwise ignored */ - ulint* n_processed); /*!< out: the number of pages - which were processed is passed - back to caller. Ignored if NULL */ - -/*********************************************************************//** -Set correct thread identifiers to io thread array based on -information we have. */ -void -buf_mtflu_set_thread_ids( -/*=====================*/ - ulint n_threads, /*!<in: Number of threads to fill */ - void* ctx, /*!<in: thread context */ - os_thread_id_t* thread_ids); /*!<in: thread id array */ - -#endif diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h deleted file mode 100644 index ab73108a71e..00000000000 --- a/storage/xtradb/include/buf0rea.h +++ /dev/null @@ -1,190 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0rea.h -The database buffer read - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0rea_h -#define buf0rea_h - -#include "univ.i" -#include "buf0types.h" - -/********************************************************************//** -High-level function which reads a page asynchronously from a file to the -buffer buf_pool if it is not already there. Sets the io_fix flag and sets -an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. - -@param[in] space space_id -@param[in] zip_size compressed page size in bytes, or 0 -@param[in] offset page number -@param[in] trx transaction -@return DB_SUCCESS if page has been read and is not corrupted, -@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted, -@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but -after decryption normal page checksum does not match. -@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */ -UNIV_INTERN -dberr_t -buf_read_page( - ulint space, - ulint zip_size, - ulint offset, - trx_t* trx); - -/********************************************************************//** -High-level function which reads a page asynchronously from a file to the -buffer buf_pool if it is not already there. Sets the io_fix flag and sets -an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. -@param[in] space Tablespace id -@param[in] offset Page number */ -UNIV_INTERN -void -buf_read_page_async( - ulint space, - ulint offset); - -/********************************************************************//** -Applies a random read-ahead in buf_pool if there are at least a threshold -value of accessed pages from the random read-ahead area. Does not read any -page, not even the one at the position (space, offset), if the read-ahead -mechanism is not activated. NOTE 1: the calling thread may own latches on -pages: to avoid deadlocks this function must be written such that it cannot -end up waiting for these latches! NOTE 2: the calling thread must want -access to the page given: this rule is set to prevent unintended read-aheads -performed by ibuf routines, a situation which could result in a deadlock if -the OS does not support asynchronous i/o. -@return number of page read requests issued; NOTE that if we read ibuf -pages, it may happen that the page at the given page number does not -get read even if we return a positive value! -@return number of page read requests issued */ -UNIV_INTERN -ulint -buf_read_ahead_random( -/*==================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes, - or 0 */ - ulint offset, /*!< in: page number of a page which - the current thread wants to access */ - ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf - routine */ - trx_t* trx); -/********************************************************************//** -Applies linear read-ahead if in the buf_pool the page is a border page of -a linear read-ahead area and all the pages in the area have been accessed. -Does not read any page if the read-ahead mechanism is not activated. Note -that the algorithm looks at the 'natural' adjacent successor and -predecessor of the page, which on the leaf level of a B-tree are the next -and previous page in the chain of leaves. To know these, the page specified -in (space, offset) must already be present in the buf_pool. Thus, the -natural way to use this function is to call it when a page in the buf_pool -is accessed the first time, calling this function just after it has been -bufferfixed. -NOTE 1: as this function looks at the natural predecessor and successor -fields on the page, what happens, if these are not initialized to any -sensible value? No problem, before applying read-ahead we check that the -area to read is within the span of the space, if not, read-ahead is not -applied. An uninitialized value may result in a useless read operation, but -only very improbably. -NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this -function must be written such that it cannot end up waiting for these -latches! -NOTE 3: the calling thread must want access to the page given: this rule is -set to prevent unintended read-aheads performed by ibuf routines, a situation -which could result in a deadlock if the OS does not support asynchronous io. -@return number of page read requests issued */ -UNIV_INTERN -ulint -buf_read_ahead_linear( -/*==================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes, or 0 */ - ulint offset, /*!< in: page number; see NOTE 3 above */ - ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */ - trx_t* trx); -/********************************************************************//** -Issues read requests for pages which the ibuf module wants to read in, in -order to contract the insert buffer tree. Technically, this function is like -a read-ahead function. */ -UNIV_INTERN -void -buf_read_ibuf_merge_pages( -/*======================*/ - bool sync, /*!< in: true if the caller - wants this function to wait - for the highest address page - to get read in, before this - function returns */ - const ulint* space_ids, /*!< in: array of space ids */ - const ib_int64_t* space_versions,/*!< in: the spaces must have - this version number - (timestamp), otherwise we - discard the read; we use this - to cancel reads if DISCARD + - IMPORT may have changed the - tablespace size */ - const ulint* page_nos, /*!< in: array of page numbers - to read, with the highest page - number the last in the - array */ - ulint n_stored); /*!< in: number of elements - in the arrays */ -/********************************************************************//** -Issues read requests for pages which recovery wants to read in. */ -UNIV_INTERN -void -buf_read_recv_pages( -/*================*/ - ibool sync, /*!< in: TRUE if the caller - wants this function to wait - for the highest address page - to get read in, before this - function returns */ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in - bytes, or 0 */ - const ulint* page_nos, /*!< in: array of page numbers - to read, with the highest page - number the last in the - array */ - ulint n_stored); /*!< in: number of page numbers - in the array */ - -/** The size in pages of the area which the read-ahead algorithms read if -invoked */ -#define BUF_READ_AHEAD_AREA(b) ((b)->read_ahead_area) - -/** @name Modes used in read-ahead @{ */ -/** read only pages belonging to the insert buffer tree */ -#define BUF_READ_IBUF_PAGES_ONLY 131 -/** read any page */ -#define BUF_READ_ANY_PAGE 132 -/** read any page, but ignore (return an error) if a page does not exist -instead of crashing like BUF_READ_ANY_PAGE does */ -#define BUF_READ_IGNORE_NONEXISTENT_PAGES 1024 -/* @} */ - -#endif diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h deleted file mode 100644 index 4eb5ea18cef..00000000000 --- a/storage/xtradb/include/buf0types.h +++ /dev/null @@ -1,157 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/buf0types.h -The database buffer pool global types for the directory - -Created 11/17/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0types_h -#define buf0types_h - -#if defined(INNODB_PAGE_ATOMIC_REF_COUNT) && defined(HAVE_ATOMIC_BUILTINS) -#define PAGE_ATOMIC_REF_COUNT -#endif /* INNODB_PAGE_ATOMIC_REF_COUNT && HAVE_ATOMIC_BUILTINS */ - -/** Buffer page (uncompressed or compressed) */ -struct buf_page_t; -/** Buffer block for which an uncompressed page exists */ -struct buf_block_t; -/** Buffer pool chunk comprising buf_block_t */ -struct buf_chunk_t; -/** Buffer pool comprising buf_chunk_t */ -struct buf_pool_t; -/** Buffer pool statistics struct */ -struct buf_pool_stat_t; -/** Buffer pool buddy statistics struct */ -struct buf_buddy_stat_t; -/** Doublewrite memory struct */ -struct buf_dblwr_t; - -/** A buffer frame. @see page_t */ -typedef byte buf_frame_t; - -/** Flags for flush types */ -enum buf_flush_t { - BUF_FLUSH_LRU = 0, /*!< flush via the LRU list */ - BUF_FLUSH_LIST, /*!< flush via the flush list - of dirty blocks */ - BUF_FLUSH_SINGLE_PAGE, /*!< flush via the LRU list - but only a single page */ - BUF_FLUSH_N_TYPES /*!< index of last element + 1 */ -}; - -/** Algorithm to remove the pages for a tablespace from the buffer pool. -See buf_LRU_flush_or_remove_pages(). */ -enum buf_remove_t { - BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer - pool, don't write or sync to disk */ - BUF_REMOVE_FLUSH_NO_WRITE, /*!< Remove only, from the flush list, - don't write or sync to disk */ - BUF_REMOVE_FLUSH_WRITE /*!< Flush dirty pages to disk only - don't remove from the buffer pool */ -}; - -/** Flags for io_fix types */ -enum buf_io_fix { - BUF_IO_NONE = 0, /**< no pending I/O */ - BUF_IO_READ, /**< read pending */ - BUF_IO_WRITE, /**< write pending */ - BUF_IO_PIN /**< disallow relocation of - block and its removal of from - the flush_list */ -}; - -/** Alternatives for srv_checksum_algorithm, which can be changed by -setting innodb_checksum_algorithm */ -enum srv_checksum_algorithm_t { - SRV_CHECKSUM_ALGORITHM_CRC32, /*!< Write crc32, allow crc32, - innodb or none when reading */ - SRV_CHECKSUM_ALGORITHM_STRICT_CRC32, /*!< Write crc32, allow crc32 - when reading */ - SRV_CHECKSUM_ALGORITHM_INNODB, /*!< Write innodb, allow crc32, - innodb or none when reading */ - SRV_CHECKSUM_ALGORITHM_STRICT_INNODB, /*!< Write innodb, allow - innodb when reading */ - SRV_CHECKSUM_ALGORITHM_NONE, /*!< Write none, allow crc32, - innodb or none when reading */ - SRV_CHECKSUM_ALGORITHM_STRICT_NONE /*!< Write none, allow none - when reading */ -}; - -/** Alternatives for srv_cleaner_lsn_age_factor, set through -innodb_cleaner_lsn_age_factor variable */ -enum srv_cleaner_lsn_age_factor_t { - SRV_CLEANER_LSN_AGE_FACTOR_LEGACY, /*!< Original Oracle MySQL 5.6 - formula */ - SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT - /*!< Percona Server 5.6 formula - that returns lower values than - legacy option for low - checkpoint ages, and higher - values for high ages. This has - the effect of stabilizing the - checkpoint age higher. */ -}; - -/** Alternatives for srv_foreground_preflush, set through -innodb_foreground_preflush variable */ -enum srv_foreground_preflush_t { - SRV_FOREGROUND_PREFLUSH_SYNC_PREFLUSH, /*!< Original Oracle MySQL 5.6 - behavior of performing a sync - flush list flush */ - SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF /*!< Exponential backoff wait - for the page cleaner to flush - for us */ -}; - -/** Alternatives for srv_empty_free_list_algorithm, set through -innodb_empty_free_list_algorithm variable */ -enum srv_empty_free_list_t { - SRV_EMPTY_FREE_LIST_LEGACY, /*!< Original Oracle MySQL 5.6 - algorithm */ - SRV_EMPTY_FREE_LIST_BACKOFF /*!< Percona Server 5.6 algorithm that - loops in a progressive backoff until a - free page is produced by the cleaner - thread */ -}; - -/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */ -/* @{ */ -/** Zip shift value for the smallest page size */ -#define BUF_BUDDY_LOW_SHIFT UNIV_ZIP_SIZE_SHIFT_MIN - -/** Smallest buddy page size */ -#define BUF_BUDDY_LOW (1U << BUF_BUDDY_LOW_SHIFT) - -/** Actual number of buddy sizes based on current page size */ -#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT) - -/** Maximum number of buddy sizes based on the max page size */ -#define BUF_BUDDY_SIZES_MAX (UNIV_PAGE_SIZE_SHIFT_MAX \ - - BUF_BUDDY_LOW_SHIFT) - -/** twice the maximum block size of the buddy system; -the underlying memory is aligned by this amount: -this must be equal to UNIV_PAGE_SIZE */ -#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES) -/* @} */ - -#endif /* buf0types.h */ diff --git a/storage/xtradb/include/data0data.h b/storage/xtradb/include/data0data.h deleted file mode 100644 index 1d954bfc07c..00000000000 --- a/storage/xtradb/include/data0data.h +++ /dev/null @@ -1,536 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/data0data.h -SQL data field and tuple - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#ifndef data0data_h -#define data0data_h - -#include "univ.i" - -#include "data0types.h" -#include "data0type.h" -#include "mem0mem.h" -#include "dict0types.h" - -/** Storage for overflow data in a big record, that is, a clustered -index record which needs external storage of data fields */ -struct big_rec_t; - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Gets pointer to the type struct of SQL data field. -@return pointer to the type struct */ -UNIV_INLINE -dtype_t* -dfield_get_type( -/*============*/ - const dfield_t* field) /*!< in: SQL data field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Gets pointer to the data in a field. -@return pointer to data */ -UNIV_INLINE -void* -dfield_get_data( -/*============*/ - const dfield_t* field) /*!< in: field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#else /* UNIV_DEBUG */ -# define dfield_get_type(field) (&(field)->type) -# define dfield_get_data(field) ((field)->data) -#endif /* UNIV_DEBUG */ -/*********************************************************************//** -Sets the type struct of SQL data field. */ -UNIV_INLINE -void -dfield_set_type( -/*============*/ - dfield_t* field, /*!< in: SQL data field */ - const dtype_t* type) /*!< in: pointer to data type struct */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Gets length of field data. -@return length of data; UNIV_SQL_NULL if SQL null data */ -UNIV_INLINE -ulint -dfield_get_len( -/*===========*/ - const dfield_t* field) /*!< in: field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Sets length in a field. */ -UNIV_INLINE -void -dfield_set_len( -/*===========*/ - dfield_t* field, /*!< in: field */ - ulint len) /*!< in: length or UNIV_SQL_NULL */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Determines if a field is SQL NULL -@return nonzero if SQL null data */ -UNIV_INLINE -ulint -dfield_is_null( -/*===========*/ - const dfield_t* field) /*!< in: field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Determines if a field is externally stored -@return nonzero if externally stored */ -UNIV_INLINE -ulint -dfield_is_ext( -/*==========*/ - const dfield_t* field) /*!< in: field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Sets the "external storage" flag */ -UNIV_INLINE -void -dfield_set_ext( -/*===========*/ - dfield_t* field) /*!< in/out: field */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Sets pointer to the data and length in a field. */ -UNIV_INLINE -void -dfield_set_data( -/*============*/ - dfield_t* field, /*!< in: field */ - const void* data, /*!< in: data */ - ulint len) /*!< in: length or UNIV_SQL_NULL */ - MY_ATTRIBUTE((nonnull(1))); -/*********************************************************************//** -Sets a data field to SQL NULL. */ -UNIV_INLINE -void -dfield_set_null( -/*============*/ - dfield_t* field) /*!< in/out: field */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Writes an SQL null field full of zeros. */ -UNIV_INLINE -void -data_write_sql_null( -/*================*/ - byte* data, /*!< in: pointer to a buffer of size len */ - ulint len) /*!< in: SQL null size in bytes */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Copies the data and len fields. */ -UNIV_INLINE -void -dfield_copy_data( -/*=============*/ - dfield_t* field1, /*!< out: field to copy to */ - const dfield_t* field2) /*!< in: field to copy from */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Copies a data field to another. */ -UNIV_INLINE -void -dfield_copy( -/*========*/ - dfield_t* field1, /*!< out: field to copy to */ - const dfield_t* field2) /*!< in: field to copy from */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Copies the data pointed to by a data field. */ -UNIV_INLINE -void -dfield_dup( -/*=======*/ - dfield_t* field, /*!< in/out: data field */ - mem_heap_t* heap) /*!< in: memory heap where allocated */ - MY_ATTRIBUTE((nonnull)); -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Tests if two data fields are equal. -If len==0, tests the data length and content for equality. -If len>0, tests the first len bytes of the content for equality. -@return TRUE if both fields are NULL or if they are equal */ -UNIV_INLINE -ibool -dfield_datas_are_binary_equal( -/*==========================*/ - const dfield_t* field1, /*!< in: field */ - const dfield_t* field2, /*!< in: field */ - ulint len) /*!< in: maximum prefix to compare, - or 0 to compare the whole field length */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Tests if dfield data length and content is equal to the given. -@return TRUE if equal */ -UNIV_INLINE -ibool -dfield_data_is_binary_equal( -/*========================*/ - const dfield_t* field, /*!< in: field */ - ulint len, /*!< in: data length or UNIV_SQL_NULL */ - const byte* data) /*!< in: data */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************************//** -Gets number of fields in a data tuple. -@return number of fields */ -UNIV_INLINE -ulint -dtuple_get_n_fields( -/*================*/ - const dtuple_t* tuple) /*!< in: tuple */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#ifdef UNIV_DEBUG -/*********************************************************************//** -Gets nth field of a tuple. -@return nth field */ -UNIV_INLINE -dfield_t* -dtuple_get_nth_field( -/*=================*/ - const dtuple_t* tuple, /*!< in: tuple */ - ulint n); /*!< in: index of field */ -#else /* UNIV_DEBUG */ -# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n)) -#endif /* UNIV_DEBUG */ -/*********************************************************************//** -Gets info bits in a data tuple. -@return info bits */ -UNIV_INLINE -ulint -dtuple_get_info_bits( -/*=================*/ - const dtuple_t* tuple) /*!< in: tuple */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Sets info bits in a data tuple. */ -UNIV_INLINE -void -dtuple_set_info_bits( -/*=================*/ - dtuple_t* tuple, /*!< in: tuple */ - ulint info_bits) /*!< in: info bits */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Gets number of fields used in record comparisons. -@return number of fields used in comparisons in rem0cmp.* */ -UNIV_INLINE -ulint -dtuple_get_n_fields_cmp( -/*====================*/ - const dtuple_t* tuple) /*!< in: tuple */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Gets number of fields used in record comparisons. */ -UNIV_INLINE -void -dtuple_set_n_fields_cmp( -/*====================*/ - dtuple_t* tuple, /*!< in: tuple */ - ulint n_fields_cmp) /*!< in: number of fields used in - comparisons in rem0cmp.* */ - MY_ATTRIBUTE((nonnull)); - -/* Estimate the number of bytes that are going to be allocated when -creating a new dtuple_t object */ -#define DTUPLE_EST_ALLOC(n_fields) \ - (sizeof(dtuple_t) + (n_fields) * sizeof(dfield_t)) - -/**********************************************************//** -Creates a data tuple from an already allocated chunk of memory. -The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields). -The default value for number of fields used in record comparisons -for this tuple is n_fields. -@return created tuple (inside buf) */ -UNIV_INLINE -dtuple_t* -dtuple_create_from_mem( -/*===================*/ - void* buf, /*!< in, out: buffer to use */ - ulint buf_size, /*!< in: buffer size */ - ulint n_fields) /*!< in: number of fields */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/**********************************************************//** -Creates a data tuple to a memory heap. The default value for number -of fields used in record comparisons for this tuple is n_fields. -@return own: created tuple */ -UNIV_INLINE -dtuple_t* -dtuple_create( -/*==========*/ - mem_heap_t* heap, /*!< in: memory heap where the tuple - is created, DTUPLE_EST_ALLOC(n_fields) - bytes will be allocated from this heap */ - ulint n_fields)/*!< in: number of fields */ - MY_ATTRIBUTE((nonnull, malloc)); - -/*********************************************************************//** -Sets number of fields used in a tuple. Normally this is set in -dtuple_create, but if you want later to set it smaller, you can use this. */ -UNIV_INTERN -void -dtuple_set_n_fields( -/*================*/ - dtuple_t* tuple, /*!< in: tuple */ - ulint n_fields) /*!< in: number of fields */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Copies a data tuple to another. This is a shallow copy; if a deep copy -is desired, dfield_dup() will have to be invoked on each field. -@return own: copy of tuple */ -UNIV_INLINE -dtuple_t* -dtuple_copy( -/*========*/ - const dtuple_t* tuple, /*!< in: tuple to copy from */ - mem_heap_t* heap) /*!< in: memory heap - where the tuple is created */ - MY_ATTRIBUTE((nonnull, malloc)); -/**********************************************************//** -The following function returns the sum of data lengths of a tuple. The space -occupied by the field structs or the tuple struct is not counted. -@return sum of data lens */ -UNIV_INLINE -ulint -dtuple_get_data_size( -/*=================*/ - const dtuple_t* tuple, /*!< in: typed data tuple */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Computes the number of externally stored fields in a data tuple. -@return number of fields */ -UNIV_INLINE -ulint -dtuple_get_n_ext( -/*=============*/ - const dtuple_t* tuple) /*!< in: tuple */ - MY_ATTRIBUTE((nonnull)); -/************************************************************//** -Compare two data tuples, respecting the collation of character fields. -@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively, -than tuple2 */ -UNIV_INTERN -int -dtuple_coll_cmp( -/*============*/ - const dtuple_t* tuple1, /*!< in: tuple 1 */ - const dtuple_t* tuple2) /*!< in: tuple 2 */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/************************************************************//** -Folds a prefix given as the number of fields of a tuple. -@return the folded value */ -UNIV_INLINE -ulint -dtuple_fold( -/*========*/ - const dtuple_t* tuple, /*!< in: the tuple */ - ulint n_fields,/*!< in: number of complete fields to fold */ - ulint n_bytes,/*!< in: number of bytes to fold in an - incomplete last field */ - index_id_t tree_id)/*!< in: index tree id */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/*******************************************************************//** -Sets types of fields binary in a tuple. */ -UNIV_INLINE -void -dtuple_set_types_binary( -/*====================*/ - dtuple_t* tuple, /*!< in: data tuple */ - ulint n) /*!< in: number of fields to set */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Checks if a dtuple contains an SQL null value. -@return TRUE if some field is SQL null */ -UNIV_INLINE -ibool -dtuple_contains_null( -/*=================*/ - const dtuple_t* tuple) /*!< in: dtuple */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************//** -Checks that a data field is typed. Asserts an error if not. -@return TRUE if ok */ -UNIV_INTERN -ibool -dfield_check_typed( -/*===============*/ - const dfield_t* field) /*!< in: data field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************//** -Checks that a data tuple is typed. Asserts an error if not. -@return TRUE if ok */ -UNIV_INTERN -ibool -dtuple_check_typed( -/*===============*/ - const dtuple_t* tuple) /*!< in: tuple */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************//** -Checks that a data tuple is typed. -@return TRUE if ok */ -UNIV_INTERN -ibool -dtuple_check_typed_no_assert( -/*=========================*/ - const dtuple_t* tuple) /*!< in: tuple */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#ifdef UNIV_DEBUG -/**********************************************************//** -Validates the consistency of a tuple which must be complete, i.e, -all fields must have been set. -@return TRUE if ok */ -UNIV_INTERN -ibool -dtuple_validate( -/*============*/ - const dtuple_t* tuple) /*!< in: tuple */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* UNIV_DEBUG */ -/*************************************************************//** -Pretty prints a dfield value according to its data type. */ -UNIV_INTERN -void -dfield_print( -/*=========*/ - const dfield_t* dfield) /*!< in: dfield */ - MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Pretty prints a dfield value according to its data type. Also the hex string -is printed if a string contains non-printable characters. */ -UNIV_INTERN -void -dfield_print_also_hex( -/*==================*/ - const dfield_t* dfield) /*!< in: dfield */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************//** -The following function prints the contents of a tuple. */ -UNIV_INTERN -void -dtuple_print( -/*=========*/ - FILE* f, /*!< in: output stream */ - const dtuple_t* tuple) /*!< in: tuple */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Moves parts of long fields in entry to the big record vector so that -the size of tuple drops below the maximum record size allowed in the -database. Moves data only from those fields which are not necessary -to determine uniquely the insertion place of the tuple in the index. -@return own: created big record vector, NULL if we are not able to -shorten the entry enough, i.e., if there are too many fixed-length or -short fields in entry or the index is clustered */ -UNIV_INTERN -big_rec_t* -dtuple_convert_big_rec( -/*===================*/ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in/out: index entry */ - ulint* n_ext) /*!< in/out: number of - externally stored columns */ - MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); -/**************************************************************//** -Puts back to entry the data stored in vector. Note that to ensure the -fields in entry can accommodate the data, vector must have been created -from entry with dtuple_convert_big_rec. */ -UNIV_INTERN -void -dtuple_convert_back_big_rec( -/*========================*/ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: entry whose data was put to vector */ - big_rec_t* vector) /*!< in, own: big rec vector; it is - freed in this function */ - MY_ATTRIBUTE((nonnull)); -/**************************************************************//** -Frees the memory in a big rec vector. */ -UNIV_INLINE -void -dtuple_big_rec_free( -/*================*/ - big_rec_t* vector) /*!< in, own: big rec vector; it is - freed in this function */ - MY_ATTRIBUTE((nonnull)); - -/*######################################################################*/ - -/** Structure for an SQL data field */ -struct dfield_t{ - void* data; /*!< pointer to data */ - unsigned ext:1; /*!< TRUE=externally stored, FALSE=local */ - unsigned len:32; /*!< data length; UNIV_SQL_NULL if SQL null */ - dtype_t type; /*!< type of data */ -}; - -/** Structure for an SQL data tuple of fields (logical record) */ -struct dtuple_t { - ulint info_bits; /*!< info bits of an index record: - the default is 0; this field is used - if an index record is built from - a data tuple */ - ulint n_fields; /*!< number of fields in dtuple */ - ulint n_fields_cmp; /*!< number of fields which should - be used in comparison services - of rem0cmp.*; the index search - is performed by comparing only these - fields, others are ignored; the - default value in dtuple creation is - the same value as n_fields */ - dfield_t* fields; /*!< fields */ - UT_LIST_NODE_T(dtuple_t) tuple_list; - /*!< data tuples can be linked into a - list using this field */ -#ifdef UNIV_DEBUG - ulint magic_n; /*!< magic number, used in - debug assertions */ -/** Value of dtuple_t::magic_n */ -# define DATA_TUPLE_MAGIC_N 65478679 -#endif /* UNIV_DEBUG */ -}; - -/** A slot for a field in a big rec vector */ -struct big_rec_field_t { - ulint field_no; /*!< field number in record */ - ulint len; /*!< stored data length, in bytes */ - const void* data; /*!< stored data */ -}; - -/** Storage format for overflow data in a big record, that is, a -clustered index record which needs external storage of data fields */ -struct big_rec_t { - mem_heap_t* heap; /*!< memory heap from which - allocated */ - ulint n_fields; /*!< number of stored fields */ - big_rec_field_t*fields; /*!< stored fields */ -}; - -#ifndef UNIV_NONINL -#include "data0data.ic" -#endif - -#endif diff --git a/storage/xtradb/include/data0data.ic b/storage/xtradb/include/data0data.ic deleted file mode 100644 index 11499ab928c..00000000000 --- a/storage/xtradb/include/data0data.ic +++ /dev/null @@ -1,651 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/data0data.ic -SQL data field and tuple - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#include "mem0mem.h" -#include "ut0rnd.h" - -#ifdef UNIV_DEBUG -/** Dummy variable to catch access to uninitialized fields. In the -debug version, dtuple_create() will make all fields of dtuple_t point -to data_error. */ -extern byte data_error; - -/*********************************************************************//** -Gets pointer to the type struct of SQL data field. -@return pointer to the type struct */ -UNIV_INLINE -dtype_t* -dfield_get_type( -/*============*/ - const dfield_t* field) /*!< in: SQL data field */ -{ - ut_ad(field); - - return((dtype_t*) &(field->type)); -} -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Sets the type struct of SQL data field. */ -UNIV_INLINE -void -dfield_set_type( -/*============*/ - dfield_t* field, /*!< in: SQL data field */ - const dtype_t* type) /*!< in: pointer to data type struct */ -{ - ut_ad(field != NULL); - ut_ad(type != NULL); - - field->type = *type; -} - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Gets pointer to the data in a field. -@return pointer to data */ -UNIV_INLINE -void* -dfield_get_data( -/*============*/ - const dfield_t* field) /*!< in: field */ -{ - ut_ad(field); - ut_ad((field->len == UNIV_SQL_NULL) - || (field->data != &data_error)); - - return((void*) field->data); -} -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Gets length of field data. -@return length of data; UNIV_SQL_NULL if SQL null data */ -UNIV_INLINE -ulint -dfield_get_len( -/*===========*/ - const dfield_t* field) /*!< in: field */ -{ - ut_ad(field); - ut_ad((field->len == UNIV_SQL_NULL) - || (field->data != &data_error)); - - return(field->len); -} - -/*********************************************************************//** -Sets length in a field. */ -UNIV_INLINE -void -dfield_set_len( -/*===========*/ - dfield_t* field, /*!< in: field */ - ulint len) /*!< in: length or UNIV_SQL_NULL */ -{ - ut_ad(field); -#ifdef UNIV_VALGRIND_DEBUG - if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len); -#endif /* UNIV_VALGRIND_DEBUG */ - - field->ext = 0; - field->len = len; -} - -/*********************************************************************//** -Determines if a field is SQL NULL -@return nonzero if SQL null data */ -UNIV_INLINE -ulint -dfield_is_null( -/*===========*/ - const dfield_t* field) /*!< in: field */ -{ - ut_ad(field); - - return(field->len == UNIV_SQL_NULL); -} - -/*********************************************************************//** -Determines if a field is externally stored -@return nonzero if externally stored */ -UNIV_INLINE -ulint -dfield_is_ext( -/*==========*/ - const dfield_t* field) /*!< in: field */ -{ - ut_ad(field); - - return(field->ext); -} - -/*********************************************************************//** -Sets the "external storage" flag */ -UNIV_INLINE -void -dfield_set_ext( -/*===========*/ - dfield_t* field) /*!< in/out: field */ -{ - ut_ad(field); - - field->ext = 1; -} - -/*********************************************************************//** -Sets pointer to the data and length in a field. */ -UNIV_INLINE -void -dfield_set_data( -/*============*/ - dfield_t* field, /*!< in: field */ - const void* data, /*!< in: data */ - ulint len) /*!< in: length or UNIV_SQL_NULL */ -{ - ut_ad(field); - -#ifdef UNIV_VALGRIND_DEBUG - if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len); -#endif /* UNIV_VALGRIND_DEBUG */ - field->data = (void*) data; - field->ext = 0; - field->len = len; -} - -/*********************************************************************//** -Sets a data field to SQL NULL. */ -UNIV_INLINE -void -dfield_set_null( -/*============*/ - dfield_t* field) /*!< in/out: field */ -{ - dfield_set_data(field, NULL, UNIV_SQL_NULL); -} - -/*********************************************************************//** -Copies the data and len fields. */ -UNIV_INLINE -void -dfield_copy_data( -/*=============*/ - dfield_t* field1, /*!< out: field to copy to */ - const dfield_t* field2) /*!< in: field to copy from */ -{ - ut_ad(field1 != NULL); - ut_ad(field2 != NULL); - - field1->data = field2->data; - field1->len = field2->len; - field1->ext = field2->ext; -} - -/*********************************************************************//** -Copies a data field to another. */ -UNIV_INLINE -void -dfield_copy( -/*========*/ - dfield_t* field1, /*!< out: field to copy to */ - const dfield_t* field2) /*!< in: field to copy from */ -{ - *field1 = *field2; -} - -/*********************************************************************//** -Copies the data pointed to by a data field. */ -UNIV_INLINE -void -dfield_dup( -/*=======*/ - dfield_t* field, /*!< in/out: data field */ - mem_heap_t* heap) /*!< in: memory heap where allocated */ -{ - if (!dfield_is_null(field)) { - UNIV_MEM_ASSERT_RW(field->data, field->len); - field->data = mem_heap_dup(heap, field->data, field->len); - } -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Tests if two data fields are equal. -If len==0, tests the data length and content for equality. -If len>0, tests the first len bytes of the content for equality. -@return TRUE if both fields are NULL or if they are equal */ -UNIV_INLINE -ibool -dfield_datas_are_binary_equal( -/*==========================*/ - const dfield_t* field1, /*!< in: field */ - const dfield_t* field2, /*!< in: field */ - ulint len) /*!< in: maximum prefix to compare, - or 0 to compare the whole field length */ -{ - ulint len2 = len; - - if (field1->len == UNIV_SQL_NULL || len == 0 || field1->len < len) { - len = field1->len; - } - - if (field2->len == UNIV_SQL_NULL || len2 == 0 || field2->len < len2) { - len2 = field2->len; - } - - return(len == len2 - && (len == UNIV_SQL_NULL - || !memcmp(field1->data, field2->data, len))); -} - -/*********************************************************************//** -Tests if dfield data length and content is equal to the given. -@return TRUE if equal */ -UNIV_INLINE -ibool -dfield_data_is_binary_equal( -/*========================*/ - const dfield_t* field, /*!< in: field */ - ulint len, /*!< in: data length or UNIV_SQL_NULL */ - const byte* data) /*!< in: data */ -{ - return(len == dfield_get_len(field) - && (len == UNIV_SQL_NULL - || !memcmp(dfield_get_data(field), data, len))); -} -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Gets info bits in a data tuple. -@return info bits */ -UNIV_INLINE -ulint -dtuple_get_info_bits( -/*=================*/ - const dtuple_t* tuple) /*!< in: tuple */ -{ - ut_ad(tuple); - - return(tuple->info_bits); -} - -/*********************************************************************//** -Sets info bits in a data tuple. */ -UNIV_INLINE -void -dtuple_set_info_bits( -/*=================*/ - dtuple_t* tuple, /*!< in: tuple */ - ulint info_bits) /*!< in: info bits */ -{ - ut_ad(tuple); - - tuple->info_bits = info_bits; -} - -/*********************************************************************//** -Gets number of fields used in record comparisons. -@return number of fields used in comparisons in rem0cmp.* */ -UNIV_INLINE -ulint -dtuple_get_n_fields_cmp( -/*====================*/ - const dtuple_t* tuple) /*!< in: tuple */ -{ - ut_ad(tuple); - - return(tuple->n_fields_cmp); -} - -/*********************************************************************//** -Sets number of fields used in record comparisons. */ -UNIV_INLINE -void -dtuple_set_n_fields_cmp( -/*====================*/ - dtuple_t* tuple, /*!< in: tuple */ - ulint n_fields_cmp) /*!< in: number of fields used in - comparisons in rem0cmp.* */ -{ - ut_ad(tuple); - ut_ad(n_fields_cmp <= tuple->n_fields); - - tuple->n_fields_cmp = n_fields_cmp; -} - -/*********************************************************************//** -Gets number of fields in a data tuple. -@return number of fields */ -UNIV_INLINE -ulint -dtuple_get_n_fields( -/*================*/ - const dtuple_t* tuple) /*!< in: tuple */ -{ - ut_ad(tuple); - - return(tuple->n_fields); -} - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Gets nth field of a tuple. -@return nth field */ -UNIV_INLINE -dfield_t* -dtuple_get_nth_field( -/*=================*/ - const dtuple_t* tuple, /*!< in: tuple */ - ulint n) /*!< in: index of field */ -{ - ut_ad(tuple); - ut_ad(n < tuple->n_fields); - - return((dfield_t*) tuple->fields + n); -} -#endif /* UNIV_DEBUG */ - -/**********************************************************//** -Creates a data tuple from an already allocated chunk of memory. -The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields). -The default value for number of fields used in record comparisons -for this tuple is n_fields. -@return created tuple (inside buf) */ -UNIV_INLINE -dtuple_t* -dtuple_create_from_mem( -/*===================*/ - void* buf, /*!< in, out: buffer to use */ - ulint buf_size, /*!< in: buffer size */ - ulint n_fields) /*!< in: number of fields */ -{ - dtuple_t* tuple; - - ut_ad(buf != NULL); - ut_a(buf_size >= DTUPLE_EST_ALLOC(n_fields)); - - tuple = (dtuple_t*) buf; - tuple->info_bits = 0; - tuple->n_fields = n_fields; - tuple->n_fields_cmp = n_fields; - tuple->fields = (dfield_t*) &tuple[1]; - -#ifdef UNIV_DEBUG - tuple->magic_n = DATA_TUPLE_MAGIC_N; - - { /* In the debug version, initialize fields to an error value */ - ulint i; - - for (i = 0; i < n_fields; i++) { - dfield_t* field; - - field = dtuple_get_nth_field(tuple, i); - - dfield_set_len(field, UNIV_SQL_NULL); - field->data = &data_error; - dfield_get_type(field)->mtype = DATA_ERROR; - } - } -#endif - UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields); - UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields); - return(tuple); -} - -/**********************************************************//** -Creates a data tuple to a memory heap. The default value for number -of fields used in record comparisons for this tuple is n_fields. -@return own: created tuple */ -UNIV_INLINE -dtuple_t* -dtuple_create( -/*==========*/ - mem_heap_t* heap, /*!< in: memory heap where the tuple - is created, DTUPLE_EST_ALLOC(n_fields) - bytes will be allocated from this heap */ - ulint n_fields) /*!< in: number of fields */ -{ - void* buf; - ulint buf_size; - dtuple_t* tuple; - - ut_ad(heap); - - buf_size = DTUPLE_EST_ALLOC(n_fields); - buf = mem_heap_alloc(heap, buf_size); - - tuple = dtuple_create_from_mem(buf, buf_size, n_fields); - - return(tuple); -} - -/*********************************************************************//** -Copies a data tuple to another. This is a shallow copy; if a deep copy -is desired, dfield_dup() will have to be invoked on each field. -@return own: copy of tuple */ -UNIV_INLINE -dtuple_t* -dtuple_copy( -/*========*/ - const dtuple_t* tuple, /*!< in: tuple to copy from */ - mem_heap_t* heap) /*!< in: memory heap - where the tuple is created */ -{ - ulint n_fields = dtuple_get_n_fields(tuple); - dtuple_t* new_tuple = dtuple_create(heap, n_fields); - ulint i; - - for (i = 0; i < n_fields; i++) { - dfield_copy(dtuple_get_nth_field(new_tuple, i), - dtuple_get_nth_field(tuple, i)); - } - - return(new_tuple); -} - -/**********************************************************//** -The following function returns the sum of data lengths of a tuple. The space -occupied by the field structs or the tuple struct is not counted. Neither -is possible space in externally stored parts of the field. -@return sum of data lengths */ -UNIV_INLINE -ulint -dtuple_get_data_size( -/*=================*/ - const dtuple_t* tuple, /*!< in: typed data tuple */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ -{ - const dfield_t* field; - ulint n_fields; - ulint len; - ulint i; - ulint sum = 0; - - ut_ad(tuple); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); - - n_fields = tuple->n_fields; - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - len = dfield_get_len(field); - - if (len == UNIV_SQL_NULL) { - len = dtype_get_sql_null_size(dfield_get_type(field), - comp); - } - - sum += len; - } - - return(sum); -} - -/*********************************************************************//** -Computes the number of externally stored fields in a data tuple. -@return number of externally stored fields */ -UNIV_INLINE -ulint -dtuple_get_n_ext( -/*=============*/ - const dtuple_t* tuple) /*!< in: tuple */ -{ - ulint n_ext = 0; - ulint n_fields = tuple->n_fields; - ulint i; - - ut_ad(tuple); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); - - for (i = 0; i < n_fields; i++) { - n_ext += dtuple_get_nth_field(tuple, i)->ext; - } - - return(n_ext); -} - -/*******************************************************************//** -Sets types of fields binary in a tuple. */ -UNIV_INLINE -void -dtuple_set_types_binary( -/*====================*/ - dtuple_t* tuple, /*!< in: data tuple */ - ulint n) /*!< in: number of fields to set */ -{ - dtype_t* dfield_type; - ulint i; - - for (i = 0; i < n; i++) { - dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i)); - dtype_set(dfield_type, DATA_BINARY, 0, 0); - } -} - -/************************************************************//** -Folds a prefix given as the number of fields of a tuple. -@return the folded value */ -UNIV_INLINE -ulint -dtuple_fold( -/*========*/ - const dtuple_t* tuple, /*!< in: the tuple */ - ulint n_fields,/*!< in: number of complete fields to fold */ - ulint n_bytes,/*!< in: number of bytes to fold in an - incomplete last field */ - index_id_t tree_id)/*!< in: index tree id */ -{ - const dfield_t* field; - ulint i; - const byte* data; - ulint len; - ulint fold; - - ut_ad(tuple); - ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); - ut_ad(dtuple_check_typed(tuple)); - - fold = ut_fold_ull(tree_id); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = (const byte*) dfield_get_data(field); - len = dfield_get_len(field); - - if (len != UNIV_SQL_NULL) { - fold = ut_fold_ulint_pair(fold, - ut_fold_binary(data, len)); - } - } - - if (n_bytes > 0) { - field = dtuple_get_nth_field(tuple, i); - - data = (const byte*) dfield_get_data(field); - len = dfield_get_len(field); - - if (len != UNIV_SQL_NULL) { - if (len > n_bytes) { - len = n_bytes; - } - - fold = ut_fold_ulint_pair(fold, - ut_fold_binary(data, len)); - } - } - - return(fold); -} - -/**********************************************************************//** -Writes an SQL null field full of zeros. */ -UNIV_INLINE -void -data_write_sql_null( -/*================*/ - byte* data, /*!< in: pointer to a buffer of size len */ - ulint len) /*!< in: SQL null size in bytes */ -{ - memset(data, 0, len); -} - -/**********************************************************************//** -Checks if a dtuple contains an SQL null value. -@return TRUE if some field is SQL null */ -UNIV_INLINE -ibool -dtuple_contains_null( -/*=================*/ - const dtuple_t* tuple) /*!< in: dtuple */ -{ - ulint n; - ulint i; - - n = dtuple_get_n_fields(tuple); - - for (i = 0; i < n; i++) { - if (dfield_is_null(dtuple_get_nth_field(tuple, i))) { - - return(TRUE); - } - } - - return(FALSE); -} - -/**************************************************************//** -Frees the memory in a big rec vector. */ -UNIV_INLINE -void -dtuple_big_rec_free( -/*================*/ - big_rec_t* vector) /*!< in, own: big rec vector; it is - freed in this function */ -{ - mem_heap_free(vector->heap); -} diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h deleted file mode 100644 index df6b6a41c11..00000000000 --- a/storage/xtradb/include/data0type.h +++ /dev/null @@ -1,545 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/data0type.h -Data types - -Created 1/16/1996 Heikki Tuuri -*******************************************************/ - -#ifndef data0type_h -#define data0type_h - -#include "univ.i" - -extern ulint data_mysql_default_charset_coll; -#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8 -#define DATA_MYSQL_BINARY_CHARSET_COLL 63 - -/* SQL data type struct */ -struct dtype_t; - -/* SQL Like operator comparison types */ -enum ib_like_t { - IB_LIKE_EXACT, /* e.g. STRING */ - IB_LIKE_PREFIX, /* e.g., STRING% */ - IB_LIKE_SUFFIX, /* e.g., %STRING */ - IB_LIKE_SUBSTR, /* e.g., %STRING% */ - IB_LIKE_REGEXP /* Future */ -}; - -/*-------------------------------------------*/ -/* The 'MAIN TYPE' of a column */ -#define DATA_MISSING 0 /* missing column */ -#define DATA_VARCHAR 1 /* character varying of the - latin1_swedish_ci charset-collation; note - that the MySQL format for this, DATA_BINARY, - DATA_VARMYSQL, is also affected by whether the - 'precise type' contains - DATA_MYSQL_TRUE_VARCHAR */ -#define DATA_CHAR 2 /* fixed length character of the - latin1_swedish_ci charset-collation */ -#define DATA_FIXBINARY 3 /* binary string of fixed length */ -#define DATA_BINARY 4 /* binary string */ -#define DATA_BLOB 5 /* binary large object, or a TEXT type; - if prtype & DATA_BINARY_TYPE == 0, then this is - actually a TEXT column (or a BLOB created - with < 4.0.14; since column prefix indexes - came only in 4.0.14, the missing flag in BLOBs - created before that does not cause any harm) */ -#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */ -#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */ -#define DATA_SYS 8 /* system column */ - -/* Data types >= DATA_FLOAT must be compared using the whole field, not as -binary strings */ - -#define DATA_FLOAT 9 -#define DATA_DOUBLE 10 -#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */ -#define DATA_VARMYSQL 12 /* any charset varying length char */ -#define DATA_MYSQL 13 /* any charset fixed length char */ - /* NOTE that 4.1.1 used DATA_MYSQL and - DATA_VARMYSQL for all character sets, and the - charset-collation for tables created with it - can also be latin1_swedish_ci */ -#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size() - requires the values are <= 63 */ -/*-------------------------------------------*/ -/* The 'PRECISE TYPE' of a column */ -/* -Tables created by a MySQL user have the following convention: - -- In the least significant byte in the precise type we store the MySQL type -code (not applicable for system columns). - -- In the second least significant byte we OR flags DATA_NOT_NULL, -DATA_UNSIGNED, DATA_BINARY_TYPE. - -- In the third least significant byte of the precise type of string types we -store the MySQL charset-collation code. In DATA_BLOB columns created with -< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there -are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no -problem, though. - -Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the -precise type, since the charset was always the default charset of the MySQL -installation. If the stored charset code is 0 in the system table SYS_COLUMNS -of InnoDB, that means that the default charset of this MySQL installation -should be used. - -When loading a table definition from the system tables to the InnoDB data -dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check -if the stored charset-collation is 0, and if that is the case and the type is -a non-binary string, replace that 0 by the default charset-collation code of -this MySQL installation. In short, in old tables, the charset-collation code -in the system tables on disk can be 0, but in in-memory data structures -(dtype_t), the charset-collation code is always != 0 for non-binary string -types. - -In new tables, in binary string types, the charset-collation code is the -MySQL code for the 'binary charset', that is, != 0. - -For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those -DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci, -InnoDB performs all comparisons internally, without resorting to the MySQL -comparison functions. This is to save CPU time. - -InnoDB's own internal system tables have different precise types for their -columns, and for them the precise type is usually not used at all. -*/ - -#define DATA_ENGLISH 4 /* English language character string: this - is a relic from pre-MySQL time and only used - for InnoDB's own system tables */ -#define DATA_ERROR 111 /* another relic from pre-MySQL time */ - -#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL - type from the precise type */ -#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3 - format true VARCHAR */ - -/* Precise data types for system columns and the length of those columns; -NOTE: the values must run from 0 up in the order given! All codes must -be less than 256 */ -#define DATA_ROW_ID 0 /* row id: a 48-bit integer */ -#define DATA_ROW_ID_LEN 6 /* stored length for row id */ - -#define DATA_TRX_ID 1 /* transaction id: 6 bytes */ -#define DATA_TRX_ID_LEN 6 - -#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */ -#define DATA_ROLL_PTR_LEN 7 - -#define DATA_N_SYS_COLS 3 /* number of system columns defined above */ - -#define DATA_FTS_DOC_ID 3 /* Used as FTS DOC ID column */ - -#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */ - -/* Flags ORed to the precise data type */ -#define DATA_NOT_NULL 256 /* this is ORed to the precise type when - the column is declared as NOT NULL */ -#define DATA_UNSIGNED 512 /* this id ORed to the precise type when - we have an unsigned integer type */ -#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character - string, this is ORed to the precise type: - this only holds for tables created with - >= MySQL-4.0.14 */ -/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1. - In earlier versions this was set for some - BLOB columns. -*/ -#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data - type when the column is true VARCHAR where - MySQL uses 2 bytes to store the data len; - for shorter VARCHARs MySQL uses only 1 byte */ -/*-------------------------------------------*/ - -/* This many bytes we need to store the type information affecting the -alphabetical order for a single field and decide the storage size of an -SQL null*/ -#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4 -/* In the >= 4.1.x storage format we add 2 bytes more so that we can also -store the charset-collation number; one byte is left unused, though */ -#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6 - -/* Maximum multi-byte character length in bytes, plus 1 */ -#define DATA_MBMAX 5 - -/* Pack mbminlen, mbmaxlen to mbminmaxlen. */ -#define DATA_MBMINMAXLEN(mbminlen, mbmaxlen) \ - ((mbmaxlen) * DATA_MBMAX + (mbminlen)) -/* Get mbminlen from mbminmaxlen. Cast the result of UNIV_EXPECT to ulint -because in GCC it returns a long. */ -#define DATA_MBMINLEN(mbminmaxlen) ((ulint) \ - UNIV_EXPECT(((mbminmaxlen) % DATA_MBMAX), \ - 1)) -/* Get mbmaxlen from mbminmaxlen. */ -#define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX)) - -/* We now support 15 bits (up to 32767) collation number */ -#define MAX_CHAR_COLL_NUM 32767 - -/* Mask to get the Charset Collation number (0x7fff) */ -#define CHAR_COLL_MASK MAX_CHAR_COLL_NUM - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Gets the MySQL type code from a dtype. -@return MySQL type code; this is NOT an InnoDB type code! */ -UNIV_INLINE -ulint -dtype_get_mysql_type( -/*=================*/ - const dtype_t* type); /*!< in: type struct */ -/*********************************************************************//** -Determine how many bytes the first n characters of the given string occupy. -If the string is shorter than n characters, returns the number of bytes -the characters in the string occupy. -@return length of the prefix, in bytes */ -UNIV_INTERN -ulint -dtype_get_at_most_n_mbchars( -/*========================*/ - ulint prtype, /*!< in: precise type */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of - a multi-byte character */ - ulint prefix_len, /*!< in: length of the requested - prefix, in characters, multiplied by - dtype_get_mbmaxlen(dtype) */ - ulint data_len, /*!< in: length of str (in bytes) */ - const char* str); /*!< in: the string whose prefix - length is being determined */ -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************************//** -Checks if a data main type is a string type. Also a BLOB is considered a -string type. -@return TRUE if string type */ -UNIV_INTERN -ibool -dtype_is_string_type( -/*=================*/ - ulint mtype); /*!< in: InnoDB main data type code: DATA_CHAR, ... */ -/*********************************************************************//** -Checks if a type is a binary string type. Note that for tables created with -< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For -those DATA_BLOB columns this function currently returns FALSE. -@return TRUE if binary string type */ -UNIV_INTERN -ibool -dtype_is_binary_string_type( -/*========================*/ - ulint mtype, /*!< in: main data type */ - ulint prtype);/*!< in: precise type */ -/*********************************************************************//** -Checks if a type is a non-binary string type. That is, dtype_is_string_type is -TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created -with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. -For those DATA_BLOB columns this function currently returns TRUE. -@return TRUE if non-binary string type */ -UNIV_INTERN -ibool -dtype_is_non_binary_string_type( -/*============================*/ - ulint mtype, /*!< in: main data type */ - ulint prtype);/*!< in: precise type */ -/*********************************************************************//** -Sets a data type structure. */ -UNIV_INLINE -void -dtype_set( -/*======*/ - dtype_t* type, /*!< in: type struct to init */ - ulint mtype, /*!< in: main data type */ - ulint prtype, /*!< in: precise type */ - ulint len); /*!< in: precision of type */ -/*********************************************************************//** -Copies a data type structure. */ -UNIV_INLINE -void -dtype_copy( -/*=======*/ - dtype_t* type1, /*!< in: type struct to copy to */ - const dtype_t* type2); /*!< in: type struct to copy from */ -/*********************************************************************//** -Gets the SQL main data type. -@return SQL main data type */ -UNIV_INLINE -ulint -dtype_get_mtype( -/*============*/ - const dtype_t* type); /*!< in: data type */ -/*********************************************************************//** -Gets the precise data type. -@return precise data type */ -UNIV_INLINE -ulint -dtype_get_prtype( -/*=============*/ - const dtype_t* type); /*!< in: data type */ -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Compute the mbminlen and mbmaxlen members of a data type structure. */ -UNIV_INLINE -void -dtype_get_mblen( -/*============*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type (and collation) */ - ulint* mbminlen, /*!< out: minimum length of a - multi-byte character */ - ulint* mbmaxlen); /*!< out: maximum length of a - multi-byte character */ -/*********************************************************************//** -Gets the MySQL charset-collation code for MySQL string types. -@return MySQL charset-collation code */ -UNIV_INLINE -ulint -dtype_get_charset_coll( -/*===================*/ - ulint prtype);/*!< in: precise data type */ -/*********************************************************************//** -Forms a precise type from the < 4.1.2 format precise type plus the -charset-collation code. -@return precise type, including the charset-collation code */ -UNIV_INTERN -ulint -dtype_form_prtype( -/*==============*/ - ulint old_prtype, /*!< in: the MySQL type code and the flags - DATA_BINARY_TYPE etc. */ - ulint charset_coll); /*!< in: MySQL charset-collation code */ -/*********************************************************************//** -Determines if a MySQL string type is a subset of UTF-8. This function -may return false negatives, in case further character-set collation -codes are introduced in MySQL later. -@return TRUE if a subset of UTF-8 */ -UNIV_INLINE -ibool -dtype_is_utf8( -/*==========*/ - ulint prtype);/*!< in: precise data type */ -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************************//** -Gets the type length. -@return fixed length of the type, in bytes, or 0 if variable-length */ -UNIV_INLINE -ulint -dtype_get_len( -/*==========*/ - const dtype_t* type); /*!< in: data type */ -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Gets the minimum length of a character, in bytes. -@return minimum length of a char, in bytes, or 0 if this is not a -character type */ -UNIV_INLINE -ulint -dtype_get_mbminlen( -/*===============*/ - const dtype_t* type); /*!< in: type */ -/*********************************************************************//** -Gets the maximum length of a character, in bytes. -@return maximum length of a char, in bytes, or 0 if this is not a -character type */ -UNIV_INLINE -ulint -dtype_get_mbmaxlen( -/*===============*/ - const dtype_t* type); /*!< in: type */ -/*********************************************************************//** -Sets the minimum and maximum length of a character, in bytes. */ -UNIV_INLINE -void -dtype_set_mbminmaxlen( -/*==================*/ - dtype_t* type, /*!< in/out: type */ - ulint mbminlen, /*!< in: minimum length of a char, - in bytes, or 0 if this is not - a character type */ - ulint mbmaxlen); /*!< in: maximum length of a char, - in bytes, or 0 if this is not - a character type */ -/*********************************************************************//** -Gets the padding character code for the type. -@return padding character code, or ULINT_UNDEFINED if no padding specified */ -UNIV_INLINE -ulint -dtype_get_pad_char( -/*===============*/ - ulint mtype, /*!< in: main type */ - ulint prtype); /*!< in: precise type */ -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************************//** -Returns the size of a fixed size data type, 0 if not a fixed size type. -@return fixed size, or 0 */ -UNIV_INLINE -ulint -dtype_get_fixed_size_low( -/*=====================*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - ulint len, /*!< in: length */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of a - multibyte character, in bytes */ - ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */ -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Returns the minimum size of a data type. -@return minimum size */ -UNIV_INLINE -ulint -dtype_get_min_size_low( -/*===================*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - ulint len, /*!< in: length */ - ulint mbminmaxlen); /*!< in: minimum and maximum length of a - multibyte character */ -/***********************************************************************//** -Returns the maximum size of a data type. Note: types in system tables may be -incomplete and return incorrect information. -@return maximum size */ -UNIV_INLINE -ulint -dtype_get_max_size_low( -/*===================*/ - ulint mtype, /*!< in: main type */ - ulint len); /*!< in: length */ -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************************//** -Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type. -For fixed length types it is the fixed length of the type, otherwise 0. -@return SQL null storage size in ROW_FORMAT=REDUNDANT */ -UNIV_INLINE -ulint -dtype_get_sql_null_size( -/*====================*/ - const dtype_t* type, /*!< in: type */ - ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */ -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Reads to a type the stored information which determines its alphabetical -ordering and the storage size of an SQL NULL value. */ -UNIV_INLINE -void -dtype_read_for_order_and_null_size( -/*===============================*/ - dtype_t* type, /*!< in: type struct */ - const byte* buf); /*!< in: buffer for the stored order info */ -/**********************************************************************//** -Stores for a type the information which determines its alphabetical ordering -and the storage size of an SQL NULL value. This is the >= 4.1.x storage -format. */ -UNIV_INLINE -void -dtype_new_store_for_order_and_null_size( -/*====================================*/ - byte* buf, /*!< in: buffer for - DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE - bytes where we store the info */ - const dtype_t* type, /*!< in: type struct */ - ulint prefix_len);/*!< in: prefix length to - replace type->len, or 0 */ -/**********************************************************************//** -Reads to a type the stored information which determines its alphabetical -ordering and the storage size of an SQL NULL value. This is the 4.1.x storage -format. */ -UNIV_INLINE -void -dtype_new_read_for_order_and_null_size( -/*===================================*/ - dtype_t* type, /*!< in: type struct */ - const byte* buf); /*!< in: buffer for stored type order info */ - -/*********************************************************************//** -Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len -@return the SQL type name */ -UNIV_INLINE -char* -dtype_sql_name( -/*===========*/ - unsigned mtype, /*!< in: mtype */ - unsigned prtype, /*!< in: prtype */ - unsigned len, /*!< in: len */ - char* name, /*!< out: SQL name */ - unsigned name_sz);/*!< in: size of the name buffer */ - -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Validates a data type structure. -@return TRUE if ok */ -UNIV_INTERN -ibool -dtype_validate( -/*===========*/ - const dtype_t* type); /*!< in: type struct to validate */ -/*********************************************************************//** -Prints a data type structure. */ -UNIV_INTERN -void -dtype_print( -/*========*/ - const dtype_t* type); /*!< in: type */ - -/* Structure for an SQL data type. -If you add fields to this structure, be sure to initialize them everywhere. -This structure is initialized in the following functions: -dtype_set() -dtype_read_for_order_and_null_size() -dtype_new_read_for_order_and_null_size() -sym_tab_add_null_lit() */ - -struct dtype_t{ - unsigned prtype:32; /*!< precise type; MySQL data - type, charset code, flags to - indicate nullability, - signedness, whether this is a - binary string, whether this is - a true VARCHAR where MySQL - uses 2 bytes to store the length */ - unsigned mtype:8; /*!< main data type */ - - /* the remaining fields do not affect alphabetical ordering: */ - - unsigned len:16; /*!< length; for MySQL data this - is field->pack_length(), - except that for a >= 5.0.3 - type true VARCHAR this is the - maximum byte length of the - string data (in addition to - the string, MySQL uses 1 or 2 - bytes to store the string length) */ -#ifndef UNIV_HOTBACKUP - unsigned mbminmaxlen:5; /*!< minimum and maximum length of a - character, in bytes; - DATA_MBMINMAXLEN(mbminlen,mbmaxlen); - mbminlen=DATA_MBMINLEN(mbminmaxlen); - mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */ -#endif /* !UNIV_HOTBACKUP */ -}; - -#ifndef UNIV_NONINL -#include "data0type.ic" -#endif - -#endif diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic deleted file mode 100644 index 8f5cee0fd5f..00000000000 --- a/storage/xtradb/include/data0type.ic +++ /dev/null @@ -1,714 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/data0type.ic -Data types - -Created 1/16/1996 Heikki Tuuri -*******************************************************/ - -#include <string.h> /* strlen() */ - -#include "mach0data.h" -#include "rem0types.h" /* ZIP_COLUMN_HEADER_LENGTH */ -#ifndef UNIV_HOTBACKUP -# include "ha_prototypes.h" - -/*********************************************************************//** -Gets the MySQL charset-collation code for MySQL string types. -@return MySQL charset-collation code */ -UNIV_INLINE -ulint -dtype_get_charset_coll( -/*===================*/ - ulint prtype) /*!< in: precise data type */ -{ - return((prtype >> 16) & CHAR_COLL_MASK); -} - -/*********************************************************************//** -Determines if a MySQL string type is a subset of UTF-8. This function -may return false negatives, in case further character-set collation -codes are introduced in MySQL later. -@return TRUE if a subset of UTF-8 */ -UNIV_INLINE -ibool -dtype_is_utf8( -/*==========*/ - ulint prtype) /*!< in: precise data type */ -{ - /* These codes have been copied from strings/ctype-extra.c - and strings/ctype-utf8.c. */ - switch (dtype_get_charset_coll(prtype)) { - case 11: /* ascii_general_ci */ - case 65: /* ascii_bin */ - case 33: /* utf8_general_ci */ - case 83: /* utf8_bin */ - case 254: /* utf8_general_cs */ - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************************//** -Gets the MySQL type code from a dtype. -@return MySQL type code; this is NOT an InnoDB type code! */ -UNIV_INLINE -ulint -dtype_get_mysql_type( -/*=================*/ - const dtype_t* type) /*!< in: type struct */ -{ - return(type->prtype & 0xFFUL); -} - -/*********************************************************************//** -Compute the mbminlen and mbmaxlen members of a data type structure. */ -UNIV_INLINE -void -dtype_get_mblen( -/*============*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type (and collation) */ - ulint* mbminlen, /*!< out: minimum length of a - multi-byte character */ - ulint* mbmaxlen) /*!< out: maximum length of a - multi-byte character */ -{ - if (dtype_is_string_type(mtype)) { - innobase_get_cset_width(dtype_get_charset_coll(prtype), - mbminlen, mbmaxlen); - ut_ad(*mbminlen <= *mbmaxlen); - ut_ad(*mbminlen < DATA_MBMAX); - ut_ad(*mbmaxlen < DATA_MBMAX); - } else { - *mbminlen = *mbmaxlen = 0; - } -} - -/*********************************************************************//** -Sets the minimum and maximum length of a character, in bytes. */ -UNIV_INLINE -void -dtype_set_mbminmaxlen( -/*==================*/ - dtype_t* type, /*!< in/out: type */ - ulint mbminlen, /*!< in: minimum length of a char, - in bytes, or 0 if this is not - a character type */ - ulint mbmaxlen) /*!< in: maximum length of a char, - in bytes, or 0 if this is not - a character type */ -{ - ut_ad(mbminlen < DATA_MBMAX); - ut_ad(mbmaxlen < DATA_MBMAX); - ut_ad(mbminlen <= mbmaxlen); - - type->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen); -} - -/*********************************************************************//** -Compute the mbminlen and mbmaxlen members of a data type structure. */ -UNIV_INLINE -void -dtype_set_mblen( -/*============*/ - dtype_t* type) /*!< in/out: type */ -{ - ulint mbminlen; - ulint mbmaxlen; - - dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen); - dtype_set_mbminmaxlen(type, mbminlen, mbmaxlen); - - ut_ad(dtype_validate(type)); -} -#else /* !UNIV_HOTBACKUP */ -# define dtype_set_mblen(type) (void) 0 -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Sets a data type structure. */ -UNIV_INLINE -void -dtype_set( -/*======*/ - dtype_t* type, /*!< in: type struct to init */ - ulint mtype, /*!< in: main data type */ - ulint prtype, /*!< in: precise type */ - ulint len) /*!< in: precision of type */ -{ - ut_ad(type); - ut_ad(mtype <= DATA_MTYPE_MAX); - - type->mtype = mtype; - type->prtype = prtype; - type->len = len; - - dtype_set_mblen(type); -} - -/*********************************************************************//** -Copies a data type structure. */ -UNIV_INLINE -void -dtype_copy( -/*=======*/ - dtype_t* type1, /*!< in: type struct to copy to */ - const dtype_t* type2) /*!< in: type struct to copy from */ -{ - *type1 = *type2; - - ut_ad(dtype_validate(type1)); -} - -/*********************************************************************//** -Gets the SQL main data type. -@return SQL main data type */ -UNIV_INLINE -ulint -dtype_get_mtype( -/*============*/ - const dtype_t* type) /*!< in: data type */ -{ - ut_ad(type); - - return(type->mtype); -} - -/*********************************************************************//** -Gets the precise data type. -@return precise data type */ -UNIV_INLINE -ulint -dtype_get_prtype( -/*=============*/ - const dtype_t* type) /*!< in: data type */ -{ - ut_ad(type); - - return(type->prtype); -} - -/*********************************************************************//** -Gets the type length. -@return fixed length of the type, in bytes, or 0 if variable-length */ -UNIV_INLINE -ulint -dtype_get_len( -/*==========*/ - const dtype_t* type) /*!< in: data type */ -{ - ut_ad(type); - - return(type->len); -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Gets the minimum length of a character, in bytes. -@return minimum length of a char, in bytes, or 0 if this is not a -character type */ -UNIV_INLINE -ulint -dtype_get_mbminlen( -/*===============*/ - const dtype_t* type) /*!< in: type */ -{ - ut_ad(type); - return(DATA_MBMINLEN(type->mbminmaxlen)); -} -/*********************************************************************//** -Gets the maximum length of a character, in bytes. -@return maximum length of a char, in bytes, or 0 if this is not a -character type */ -UNIV_INLINE -ulint -dtype_get_mbmaxlen( -/*===============*/ - const dtype_t* type) /*!< in: type */ -{ - ut_ad(type); - return(DATA_MBMAXLEN(type->mbminmaxlen)); -} - -/*********************************************************************//** -Gets the padding character code for a type. -@return padding character code, or ULINT_UNDEFINED if no padding specified */ -UNIV_INLINE -ulint -dtype_get_pad_char( -/*===============*/ - ulint mtype, /*!< in: main type */ - ulint prtype) /*!< in: precise type */ -{ - switch (mtype) { - case DATA_FIXBINARY: - case DATA_BINARY: - if (dtype_get_charset_coll(prtype) - == DATA_MYSQL_BINARY_CHARSET_COLL) { - /* Starting from 5.0.18, do not pad - VARBINARY or BINARY columns. */ - return(ULINT_UNDEFINED); - } - /* Fall through */ - case DATA_CHAR: - case DATA_VARCHAR: - case DATA_MYSQL: - case DATA_VARMYSQL: - /* Space is the padding character for all char and binary - strings, and starting from 5.0.3, also for TEXT strings. */ - - return(0x20); - case DATA_BLOB: - if (!(prtype & DATA_BINARY_TYPE)) { - return(0x20); - } - /* Fall through */ - default: - /* No padding specified */ - return(ULINT_UNDEFINED); - } -} - -/**********************************************************************//** -Stores for a type the information which determines its alphabetical ordering -and the storage size of an SQL NULL value. This is the >= 4.1.x storage -format. */ -UNIV_INLINE -void -dtype_new_store_for_order_and_null_size( -/*====================================*/ - byte* buf, /*!< in: buffer for - DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE - bytes where we store the info */ - const dtype_t* type, /*!< in: type struct */ - ulint prefix_len)/*!< in: prefix length to - replace type->len, or 0 */ -{ -#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE -#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" -#endif - ulint len; - - ut_ad(type); - ut_ad(type->mtype >= DATA_VARCHAR); - ut_ad(type->mtype <= DATA_MYSQL); - - buf[0] = (byte)(type->mtype & 0xFFUL); - - if (type->prtype & DATA_BINARY_TYPE) { - buf[0] |= 128; - } - - /* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) { - buf[0] |= 64; - } - */ - - buf[1] = (byte)(type->prtype & 0xFFUL); - - len = prefix_len ? prefix_len : type->len; - - mach_write_to_2(buf + 2, len & 0xFFFFUL); - - ut_ad(dtype_get_charset_coll(type->prtype) <= MAX_CHAR_COLL_NUM); - mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype)); - - if (type->prtype & DATA_NOT_NULL) { - buf[4] |= 128; - } -} - -/**********************************************************************//** -Reads to a type the stored information which determines its alphabetical -ordering and the storage size of an SQL NULL value. This is the < 4.1.x -storage format. */ -UNIV_INLINE -void -dtype_read_for_order_and_null_size( -/*===============================*/ - dtype_t* type, /*!< in: type struct */ - const byte* buf) /*!< in: buffer for stored type order info */ -{ -#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE -# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE" -#endif - - type->mtype = buf[0] & 63; - type->prtype = buf[1]; - - if (buf[0] & 128) { - type->prtype |= DATA_BINARY_TYPE; - } - - type->len = mach_read_from_2(buf + 2); - - type->prtype = dtype_form_prtype(type->prtype, - data_mysql_default_charset_coll); - dtype_set_mblen(type); -} - -/**********************************************************************//** -Reads to a type the stored information which determines its alphabetical -ordering and the storage size of an SQL NULL value. This is the >= 4.1.x -storage format. */ -UNIV_INLINE -void -dtype_new_read_for_order_and_null_size( -/*===================================*/ - dtype_t* type, /*!< in: type struct */ - const byte* buf) /*!< in: buffer for stored type order info */ -{ - ulint charset_coll; - -#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE -#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" -#endif - - type->mtype = buf[0] & 63; - type->prtype = buf[1]; - - if (buf[0] & 128) { - type->prtype |= DATA_BINARY_TYPE; - } - - if (buf[4] & 128) { - type->prtype |= DATA_NOT_NULL; - } - - type->len = mach_read_from_2(buf + 2); - - charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK; - - if (dtype_is_string_type(type->mtype)) { - ut_a(charset_coll <= MAX_CHAR_COLL_NUM); - - if (charset_coll == 0) { - /* This insert buffer record was inserted with MySQL - version < 4.1.2, and the charset-collation code was not - explicitly stored to dtype->prtype at that time. It - must be the default charset-collation of this MySQL - installation. */ - - charset_coll = data_mysql_default_charset_coll; - } - - type->prtype = dtype_form_prtype(type->prtype, charset_coll); - } - dtype_set_mblen(type); -} - -/*********************************************************************//** -Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len -@return the SQL type name */ -UNIV_INLINE -char* -dtype_sql_name( -/*===========*/ - unsigned mtype, /*!< in: mtype */ - unsigned prtype, /*!< in: prtype */ - unsigned len, /*!< in: len */ - char* name, /*!< out: SQL name */ - unsigned name_sz)/*!< in: size of the name buffer */ -{ - -#define APPEND_UNSIGNED() \ - do { \ - if (prtype & DATA_UNSIGNED) { \ - ut_snprintf(name + strlen(name), \ - name_sz - strlen(name), \ - " UNSIGNED"); \ - } \ - } while (0) - - ut_snprintf(name, name_sz, "UNKNOWN"); - - switch (mtype) { - case DATA_INT: - switch (len) { - case 1: - ut_snprintf(name, name_sz, "TINYINT"); - break; - case 2: - ut_snprintf(name, name_sz, "SMALLINT"); - break; - case 3: - ut_snprintf(name, name_sz, "MEDIUMINT"); - break; - case 4: - ut_snprintf(name, name_sz, "INT"); - break; - case 8: - ut_snprintf(name, name_sz, "BIGINT"); - break; - } - APPEND_UNSIGNED(); - break; - case DATA_FLOAT: - ut_snprintf(name, name_sz, "FLOAT"); - APPEND_UNSIGNED(); - break; - case DATA_DOUBLE: - ut_snprintf(name, name_sz, "DOUBLE"); - APPEND_UNSIGNED(); - break; - case DATA_FIXBINARY: - ut_snprintf(name, name_sz, "BINARY(%u)", len); - break; - case DATA_CHAR: - case DATA_MYSQL: - ut_snprintf(name, name_sz, "CHAR(%u)", len); - break; - case DATA_VARCHAR: - case DATA_VARMYSQL: - ut_snprintf(name, name_sz, "VARCHAR(%u)", len); - break; - case DATA_BINARY: - ut_snprintf(name, name_sz, "VARBINARY(%u)", len); - break; - case DATA_BLOB: - switch (len) { - case 9: - ut_snprintf(name, name_sz, "TINYBLOB"); - break; - case 10: - ut_snprintf(name, name_sz, "BLOB"); - break; - case 11: - ut_snprintf(name, name_sz, "MEDIUMBLOB"); - break; - case 12: - ut_snprintf(name, name_sz, "LONGBLOB"); - break; - } - } - - if (prtype & DATA_NOT_NULL) { - ut_snprintf(name + strlen(name), - name_sz - strlen(name), - " NOT NULL"); - } - - return(name); -} - -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************************//** -Returns the size of a fixed size data type, 0 if not a fixed size type. -@return fixed size, or 0 */ -UNIV_INLINE -ulint -dtype_get_fixed_size_low( -/*=====================*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - ulint len, /*!< in: length */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of - a multibyte character, in bytes */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ -{ - switch (mtype) { - case DATA_SYS: -#ifdef UNIV_DEBUG - switch (prtype & DATA_MYSQL_TYPE_MASK) { - case DATA_ROW_ID: - ut_ad(len == DATA_ROW_ID_LEN); - break; - case DATA_TRX_ID: - ut_ad(len == DATA_TRX_ID_LEN); - break; - case DATA_ROLL_PTR: - ut_ad(len == DATA_ROLL_PTR_LEN); - break; - default: - ut_ad(0); - return(0); - } -#endif /* UNIV_DEBUG */ - case DATA_CHAR: - case DATA_FIXBINARY: - case DATA_INT: - case DATA_FLOAT: - case DATA_DOUBLE: - return(len); - case DATA_MYSQL: -#ifndef UNIV_HOTBACKUP - if (prtype & DATA_BINARY_TYPE) { - return(len); - } else if (!comp) { - return(len); - } else { -#ifdef UNIV_DEBUG - ulint i_mbminlen, i_mbmaxlen; - - innobase_get_cset_width( - dtype_get_charset_coll(prtype), - &i_mbminlen, &i_mbmaxlen); - - ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen) - == mbminmaxlen); -#endif /* UNIV_DEBUG */ - if (DATA_MBMINLEN(mbminmaxlen) - == DATA_MBMAXLEN(mbminmaxlen)) { - return(len); - } - } -#else /* !UNIV_HOTBACKUP */ - return(len); -#endif /* !UNIV_HOTBACKUP */ - /* Treat as variable-length. */ - /* Fall through */ - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_DECIMAL: - case DATA_VARMYSQL: - case DATA_BLOB: - return(0); - default: - ut_error; - } - - return(0); -} - -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Returns the minimum size of a data type. -@return minimum size */ -UNIV_INLINE -ulint -dtype_get_min_size_low( -/*===================*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - ulint len, /*!< in: length */ - ulint mbminmaxlen) /*!< in: minimum and maximum length of a - multi-byte character */ -{ - switch (mtype) { - case DATA_SYS: -#ifdef UNIV_DEBUG - switch (prtype & DATA_MYSQL_TYPE_MASK) { - case DATA_ROW_ID: - ut_ad(len == DATA_ROW_ID_LEN); - break; - case DATA_TRX_ID: - ut_ad(len == DATA_TRX_ID_LEN); - break; - case DATA_ROLL_PTR: - ut_ad(len == DATA_ROLL_PTR_LEN); - break; - default: - ut_ad(0); - return(0); - } -#endif /* UNIV_DEBUG */ - case DATA_CHAR: - case DATA_FIXBINARY: - case DATA_INT: - case DATA_FLOAT: - case DATA_DOUBLE: - return(len); - case DATA_MYSQL: - if (prtype & DATA_BINARY_TYPE) { - return(len); - } else { - ulint mbminlen = DATA_MBMINLEN(mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(mbminmaxlen); - - if (mbminlen == mbmaxlen) { - return(len); - } - - /* this is a variable-length character set */ - ut_a(mbminlen > 0); - ut_a(mbmaxlen > mbminlen); - ut_a(len % mbmaxlen == 0); - return(len * mbminlen / mbmaxlen); - } - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_DECIMAL: - case DATA_VARMYSQL: - case DATA_BLOB: - return(0); - default: - ut_error; - } - - return(0); -} - -/***********************************************************************//** -Returns the maximum size of a data type. Note: types in system tables may be -incomplete and return incorrect information. -@return maximum size */ -UNIV_INLINE -ulint -dtype_get_max_size_low( -/*===================*/ - ulint mtype, /*!< in: main type */ - ulint len) /*!< in: length */ -{ - switch (mtype) { - case DATA_SYS: - case DATA_CHAR: - case DATA_FIXBINARY: - case DATA_INT: - case DATA_FLOAT: - case DATA_DOUBLE: - case DATA_MYSQL: - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_DECIMAL: - case DATA_VARMYSQL: - return(len); - case DATA_BLOB: - break; - default: - ut_error; - } - - return(ULINT_MAX); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************************//** -Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type. -For fixed length types it is the fixed length of the type, otherwise 0. -@return SQL null storage size in ROW_FORMAT=REDUNDANT */ -UNIV_INLINE -ulint -dtype_get_sql_null_size( -/*====================*/ - const dtype_t* type, /*!< in: type */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ -{ -#ifndef UNIV_HOTBACKUP - return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len, - type->mbminmaxlen, comp)); -#else /* !UNIV_HOTBACKUP */ - return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len, - 0, 0)); -#endif /* !UNIV_HOTBACKUP */ -} diff --git a/storage/xtradb/include/data0types.h b/storage/xtradb/include/data0types.h deleted file mode 100644 index bd2bb577611..00000000000 --- a/storage/xtradb/include/data0types.h +++ /dev/null @@ -1,36 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2000, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/data0types.h -Some type definitions - -Created 9/21/2000 Heikki Tuuri -*************************************************************************/ - -#ifndef data0types_h -#define data0types_h - -/* SQL data field struct */ -struct dfield_t; - -/* SQL data tuple struct */ -struct dtuple_t; - -#endif - diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h deleted file mode 100644 index 8bd3beda110..00000000000 --- a/storage/xtradb/include/db0err.h +++ /dev/null @@ -1,170 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/db0err.h -Global error codes for the database - -Created 5/24/1996 Heikki Tuuri -*******************************************************/ - -#ifndef db0err_h -#define db0err_h - - -enum dberr_t { - DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new - explicit record lock was created */ - DB_SUCCESS = 10, - - /* The following are error codes */ - DB_ERROR, - DB_INTERRUPTED, - DB_OUT_OF_MEMORY, - DB_OUT_OF_FILE_SPACE, - DB_LOCK_WAIT, - DB_DEADLOCK, - DB_ROLLBACK, - DB_DUPLICATE_KEY, - DB_QUE_THR_SUSPENDED, - DB_MISSING_HISTORY, /*!< required history data has been - deleted due to lack of space in - rollback segment */ - DB_CLUSTER_NOT_FOUND = 30, - DB_TABLE_NOT_FOUND, - DB_MUST_GET_MORE_FILE_SPACE, /*!< the database has to be stopped - and restarted with more file space */ - DB_TABLE_IS_BEING_USED, - DB_TOO_BIG_RECORD, /*!< a record in an index would not fit - on a compressed page, or it would - become bigger than 1/2 free space in - an uncompressed page frame */ - DB_LOCK_WAIT_TIMEOUT, /*!< lock wait lasted too long */ - DB_NO_REFERENCED_ROW, /*!< referenced key value not found - for a foreign key in an insert or - update of a row */ - DB_ROW_IS_REFERENCED, /*!< cannot delete or update a row - because it contains a key value - which is referenced */ - DB_CANNOT_ADD_CONSTRAINT, /*!< adding a foreign key constraint - to a table failed */ - DB_CORRUPTION, /*!< data structure corruption noticed */ - DB_CANNOT_DROP_CONSTRAINT, /*!< dropping a foreign key constraint - from a table failed */ - DB_NO_SAVEPOINT, /*!< no savepoint exists with the given - name */ - DB_TABLESPACE_EXISTS, /*!< we cannot create a new single-table - tablespace because a file of the same - name already exists */ - DB_TABLESPACE_DELETED, /*!< tablespace was deleted or is - being dropped right now */ - DB_TABLESPACE_NOT_FOUND, /*<! Attempt to delete a tablespace - instance that was not found in the - tablespace hash table */ - DB_LOCK_TABLE_FULL, /*!< lock structs have exhausted the - buffer pool (for big transactions, - InnoDB stores the lock structs in the - buffer pool) */ - DB_FOREIGN_DUPLICATE_KEY, /*!< foreign key constraints - activated by the operation would - lead to a duplicate key in some - table */ - DB_TOO_MANY_CONCURRENT_TRXS, /*!< when InnoDB runs out of the - preconfigured undo slots, this can - only happen when there are too many - concurrent transactions */ - DB_UNSUPPORTED, /*!< when InnoDB sees any artefact or - a feature that it can't recoginize or - work with e.g., FT indexes created by - a later version of the engine. */ - - DB_INVALID_NULL, /*!< a NOT NULL column was found to - be NULL during table rebuild */ - - DB_STATS_DO_NOT_EXIST, /*!< an operation that requires the - persistent storage, used for recording - table and index statistics, was - requested but this storage does not - exist itself or the stats for a given - table do not exist */ - DB_FOREIGN_EXCEED_MAX_CASCADE, /*!< Foreign key constraint related - cascading delete/update exceeds - maximum allowed depth */ - DB_CHILD_NO_INDEX, /*!< the child (foreign) table does - not have an index that contains the - foreign keys as its prefix columns */ - DB_PARENT_NO_INDEX, /*!< the parent table does not - have an index that contains the - foreign keys as its prefix columns */ - DB_TOO_BIG_INDEX_COL, /*!< index column size exceeds - maximum limit */ - DB_INDEX_CORRUPT, /*!< we have corrupted index */ - DB_UNDO_RECORD_TOO_BIG, /*!< the undo log record is too big */ - DB_READ_ONLY, /*!< Update operation attempted in - a read-only transaction */ - DB_FTS_INVALID_DOCID, /* FTS Doc ID cannot be zero */ - DB_TABLE_IN_FK_CHECK, /* table is being used in foreign - key check */ - DB_ONLINE_LOG_TOO_BIG, /*!< Modification log grew too big - during online index creation */ - - DB_IO_ERROR, /*!< Generic IO error */ - DB_IDENTIFIER_TOO_LONG, /*!< Identifier name too long */ - DB_FTS_EXCEED_RESULT_CACHE_LIMIT, /*!< FTS query memory - exceeds result cache limit */ - DB_TEMP_FILE_WRITE_FAILURE, /*!< Temp file write failure */ - DB_FTS_TOO_MANY_WORDS_IN_PHRASE, - /*< Too many words in a phrase */ - DB_TOO_BIG_FOR_REDO, /* Record length greater than 10% - of redo log */ - DB_DECRYPTION_FAILED, /* Tablespace encrypted and - decrypt operation failed because - of missing key management plugin, - or missing or incorrect key or - incorret AES method or algorithm. */ - DB_PAGE_CORRUPTED, /* Page read from tablespace is - corrupted. */ - /* The following are partial failure codes */ - DB_FAIL = 1000, - DB_OVERFLOW, - DB_UNDERFLOW, - DB_STRONG_FAIL, - DB_ZIP_OVERFLOW, - DB_RECORD_NOT_FOUND = 1500, - DB_END_OF_INDEX, - DB_DICT_CHANGED, /*!< Some part of table dictionary has - changed. Such as index dropped or - foreign key dropped */ - - DB_SEARCH_ABORTED_BY_USER= 1533, - - /* The following are API only error codes. */ - DB_DATA_MISMATCH = 2000, /*!< Column update or read failed - because the types mismatch */ - - DB_SCHEMA_NOT_LOCKED, /*!< If an API function expects the - schema to be locked in exclusive mode - and if it's not then that API function - will return this error code */ - - DB_NOT_FOUND /*!< Generic error code for "Not found" - type of errors */ -}; - -#endif diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h deleted file mode 100644 index 4fd9b0b7f98..00000000000 --- a/storage/xtradb/include/dict0boot.h +++ /dev/null @@ -1,343 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0boot.h -Data dictionary creation and booting - -Created 4/18/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0boot_h -#define dict0boot_h - -#include "univ.i" - -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "ut0byte.h" -#include "buf0buf.h" -#include "fsp0fsp.h" -#include "dict0dict.h" - -typedef byte dict_hdr_t; - -/**********************************************************************//** -Gets a pointer to the dictionary header and x-latches its page. -@return pointer to the dictionary header, page x-latched */ -UNIV_INTERN -dict_hdr_t* -dict_hdr_get( -/*=========*/ - mtr_t* mtr); /*!< in: mtr */ -/**********************************************************************//** -Returns a new table, index, or space id. */ -UNIV_INTERN -void -dict_hdr_get_new_id( -/*================*/ - table_id_t* table_id, /*!< out: table id - (not assigned if NULL) */ - index_id_t* index_id, /*!< out: index id - (not assigned if NULL) */ - ulint* space_id); /*!< out: space id - (not assigned if NULL) */ -/**********************************************************************//** -Writes the current value of the row id counter to the dictionary header file -page. */ -UNIV_INTERN -void -dict_hdr_flush_row_id(void); -/*=======================*/ -/**********************************************************************//** -Returns a new row id. -@return the new id */ -UNIV_INLINE -row_id_t -dict_sys_get_new_row_id(void); -/*=========================*/ -/**********************************************************************//** -Reads a row id from a record or other 6-byte stored form. -@return row id */ -UNIV_INLINE -row_id_t -dict_sys_read_row_id( -/*=================*/ - const byte* field); /*!< in: record field */ -/**********************************************************************//** -Writes a row id to a record or other 6-byte stored form. */ -UNIV_INLINE -void -dict_sys_write_row_id( -/*==================*/ - byte* field, /*!< in: record field */ - row_id_t row_id);/*!< in: row id */ -/*****************************************************************//** -Initializes the data dictionary memory structures when the database is -started. This function is also called when the data dictionary is created. -@return DB_SUCCESS or error code. */ -UNIV_INTERN -dberr_t -dict_boot(void) -/*===========*/ - MY_ATTRIBUTE((warn_unused_result)); - -/*****************************************************************//** -Creates and initializes the data dictionary at the server bootstrap. -@return DB_SUCCESS or error code. */ -UNIV_INTERN -dberr_t -dict_create(void) -/*=============*/ - MY_ATTRIBUTE((warn_unused_result)); - -/*********************************************************************//** -Check if a table id belongs to system table. -@return true if the table id belongs to a system table. */ -UNIV_INLINE -bool -dict_is_sys_table( -/*==============*/ - table_id_t id) /*!< in: table id to check */ - MY_ATTRIBUTE((warn_unused_result)); - -/* Space id and page no where the dictionary header resides */ -#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */ -#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO - -/* The ids for the basic system tables and their indexes */ -#define DICT_TABLES_ID 1 -#define DICT_COLUMNS_ID 2 -#define DICT_INDEXES_ID 3 -#define DICT_FIELDS_ID 4 -/* The following is a secondary index on SYS_TABLES */ -#define DICT_TABLE_IDS_ID 5 - -#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start - from this number, except for basic - system tables and their above defined - indexes; ibuf tables and indexes are - assigned as the id the number - DICT_IBUF_ID_MIN plus the space id */ - -/* The offset of the dictionary header on the page */ -#define DICT_HDR FSEG_PAGE_DATA - -/*-------------------------------------------------------------*/ -/* Dictionary header offsets */ -#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ -#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ -#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ -#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/ -#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID*/ -#define DICT_HDR_TABLES 32 /* Root of SYS_TABLES clust index */ -#define DICT_HDR_TABLE_IDS 36 /* Root of SYS_TABLE_IDS sec index */ -#define DICT_HDR_COLUMNS 40 /* Root of SYS_COLUMNS clust index */ -#define DICT_HDR_INDEXES 44 /* Root of SYS_INDEXES clust index */ -#define DICT_HDR_FIELDS 48 /* Root of SYS_FIELDS clust index */ - -#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace - segment into which the dictionary - header is created */ -/*-------------------------------------------------------------*/ - -/* The columns in SYS_TABLES */ -enum dict_col_sys_tables_enum { - DICT_COL__SYS_TABLES__NAME = 0, - DICT_COL__SYS_TABLES__ID = 1, - DICT_COL__SYS_TABLES__N_COLS = 2, - DICT_COL__SYS_TABLES__TYPE = 3, - DICT_COL__SYS_TABLES__MIX_ID = 4, - DICT_COL__SYS_TABLES__MIX_LEN = 5, - DICT_COL__SYS_TABLES__CLUSTER_ID = 6, - DICT_COL__SYS_TABLES__SPACE = 7, - DICT_NUM_COLS__SYS_TABLES = 8 -}; -/* The field numbers in the SYS_TABLES clustered index */ -enum dict_fld_sys_tables_enum { - DICT_FLD__SYS_TABLES__NAME = 0, - DICT_FLD__SYS_TABLES__DB_TRX_ID = 1, - DICT_FLD__SYS_TABLES__DB_ROLL_PTR = 2, - DICT_FLD__SYS_TABLES__ID = 3, - DICT_FLD__SYS_TABLES__N_COLS = 4, - DICT_FLD__SYS_TABLES__TYPE = 5, - DICT_FLD__SYS_TABLES__MIX_ID = 6, - DICT_FLD__SYS_TABLES__MIX_LEN = 7, - DICT_FLD__SYS_TABLES__CLUSTER_ID = 8, - DICT_FLD__SYS_TABLES__SPACE = 9, - DICT_NUM_FIELDS__SYS_TABLES = 10 -}; -/* The field numbers in the SYS_TABLE_IDS index */ -enum dict_fld_sys_table_ids_enum { - DICT_FLD__SYS_TABLE_IDS__ID = 0, - DICT_FLD__SYS_TABLE_IDS__NAME = 1, - DICT_NUM_FIELDS__SYS_TABLE_IDS = 2 -}; -/* The columns in SYS_COLUMNS */ -enum dict_col_sys_columns_enum { - DICT_COL__SYS_COLUMNS__TABLE_ID = 0, - DICT_COL__SYS_COLUMNS__POS = 1, - DICT_COL__SYS_COLUMNS__NAME = 2, - DICT_COL__SYS_COLUMNS__MTYPE = 3, - DICT_COL__SYS_COLUMNS__PRTYPE = 4, - DICT_COL__SYS_COLUMNS__LEN = 5, - DICT_COL__SYS_COLUMNS__PREC = 6, - DICT_NUM_COLS__SYS_COLUMNS = 7 -}; -/* The field numbers in the SYS_COLUMNS clustered index */ -enum dict_fld_sys_columns_enum { - DICT_FLD__SYS_COLUMNS__TABLE_ID = 0, - DICT_FLD__SYS_COLUMNS__POS = 1, - DICT_FLD__SYS_COLUMNS__DB_TRX_ID = 2, - DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR = 3, - DICT_FLD__SYS_COLUMNS__NAME = 4, - DICT_FLD__SYS_COLUMNS__MTYPE = 5, - DICT_FLD__SYS_COLUMNS__PRTYPE = 6, - DICT_FLD__SYS_COLUMNS__LEN = 7, - DICT_FLD__SYS_COLUMNS__PREC = 8, - DICT_NUM_FIELDS__SYS_COLUMNS = 9 -}; -/* The columns in SYS_INDEXES */ -enum dict_col_sys_indexes_enum { - DICT_COL__SYS_INDEXES__TABLE_ID = 0, - DICT_COL__SYS_INDEXES__ID = 1, - DICT_COL__SYS_INDEXES__NAME = 2, - DICT_COL__SYS_INDEXES__N_FIELDS = 3, - DICT_COL__SYS_INDEXES__TYPE = 4, - DICT_COL__SYS_INDEXES__SPACE = 5, - DICT_COL__SYS_INDEXES__PAGE_NO = 6, - DICT_NUM_COLS__SYS_INDEXES = 7 -}; -/* The field numbers in the SYS_INDEXES clustered index */ -enum dict_fld_sys_indexes_enum { - DICT_FLD__SYS_INDEXES__TABLE_ID = 0, - DICT_FLD__SYS_INDEXES__ID = 1, - DICT_FLD__SYS_INDEXES__DB_TRX_ID = 2, - DICT_FLD__SYS_INDEXES__DB_ROLL_PTR = 3, - DICT_FLD__SYS_INDEXES__NAME = 4, - DICT_FLD__SYS_INDEXES__N_FIELDS = 5, - DICT_FLD__SYS_INDEXES__TYPE = 6, - DICT_FLD__SYS_INDEXES__SPACE = 7, - DICT_FLD__SYS_INDEXES__PAGE_NO = 8, - DICT_NUM_FIELDS__SYS_INDEXES = 9 -}; -/* The columns in SYS_FIELDS */ -enum dict_col_sys_fields_enum { - DICT_COL__SYS_FIELDS__INDEX_ID = 0, - DICT_COL__SYS_FIELDS__POS = 1, - DICT_COL__SYS_FIELDS__COL_NAME = 2, - DICT_NUM_COLS__SYS_FIELDS = 3 -}; -/* The field numbers in the SYS_FIELDS clustered index */ -enum dict_fld_sys_fields_enum { - DICT_FLD__SYS_FIELDS__INDEX_ID = 0, - DICT_FLD__SYS_FIELDS__POS = 1, - DICT_FLD__SYS_FIELDS__DB_TRX_ID = 2, - DICT_FLD__SYS_FIELDS__DB_ROLL_PTR = 3, - DICT_FLD__SYS_FIELDS__COL_NAME = 4, - DICT_NUM_FIELDS__SYS_FIELDS = 5 -}; -/* The columns in SYS_FOREIGN */ -enum dict_col_sys_foreign_enum { - DICT_COL__SYS_FOREIGN__ID = 0, - DICT_COL__SYS_FOREIGN__FOR_NAME = 1, - DICT_COL__SYS_FOREIGN__REF_NAME = 2, - DICT_COL__SYS_FOREIGN__N_COLS = 3, - DICT_NUM_COLS__SYS_FOREIGN = 4 -}; -/* The field numbers in the SYS_FOREIGN clustered index */ -enum dict_fld_sys_foreign_enum { - DICT_FLD__SYS_FOREIGN__ID = 0, - DICT_FLD__SYS_FOREIGN__DB_TRX_ID = 1, - DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR = 2, - DICT_FLD__SYS_FOREIGN__FOR_NAME = 3, - DICT_FLD__SYS_FOREIGN__REF_NAME = 4, - DICT_FLD__SYS_FOREIGN__N_COLS = 5, - DICT_NUM_FIELDS__SYS_FOREIGN = 6 -}; -/* The field numbers in the SYS_FOREIGN_FOR_NAME secondary index */ -enum dict_fld_sys_foreign_for_name_enum { - DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME = 0, - DICT_FLD__SYS_FOREIGN_FOR_NAME__ID = 1, - DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME = 2 -}; -/* The columns in SYS_FOREIGN_COLS */ -enum dict_col_sys_foreign_cols_enum { - DICT_COL__SYS_FOREIGN_COLS__ID = 0, - DICT_COL__SYS_FOREIGN_COLS__POS = 1, - DICT_COL__SYS_FOREIGN_COLS__FOR_COL_NAME = 2, - DICT_COL__SYS_FOREIGN_COLS__REF_COL_NAME = 3, - DICT_NUM_COLS__SYS_FOREIGN_COLS = 4 -}; -/* The field numbers in the SYS_FOREIGN_COLS clustered index */ -enum dict_fld_sys_foreign_cols_enum { - DICT_FLD__SYS_FOREIGN_COLS__ID = 0, - DICT_FLD__SYS_FOREIGN_COLS__POS = 1, - DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID = 2, - DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR = 3, - DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME = 4, - DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME = 5, - DICT_NUM_FIELDS__SYS_FOREIGN_COLS = 6 -}; -/* The columns in SYS_TABLESPACES */ -enum dict_col_sys_tablespaces_enum { - DICT_COL__SYS_TABLESPACES__SPACE = 0, - DICT_COL__SYS_TABLESPACES__NAME = 1, - DICT_COL__SYS_TABLESPACES__FLAGS = 2, - DICT_NUM_COLS__SYS_TABLESPACES = 3 -}; -/* The field numbers in the SYS_TABLESPACES clustered index */ -enum dict_fld_sys_tablespaces_enum { - DICT_FLD__SYS_TABLESPACES__SPACE = 0, - DICT_FLD__SYS_TABLESPACES__DB_TRX_ID = 1, - DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR = 2, - DICT_FLD__SYS_TABLESPACES__NAME = 3, - DICT_FLD__SYS_TABLESPACES__FLAGS = 4, - DICT_NUM_FIELDS__SYS_TABLESPACES = 5 -}; -/* The columns in SYS_DATAFILES */ -enum dict_col_sys_datafiles_enum { - DICT_COL__SYS_DATAFILES__SPACE = 0, - DICT_COL__SYS_DATAFILES__PATH = 1, - DICT_NUM_COLS__SYS_DATAFILES = 2 -}; -/* The field numbers in the SYS_DATAFILES clustered index */ -enum dict_fld_sys_datafiles_enum { - DICT_FLD__SYS_DATAFILES__SPACE = 0, - DICT_FLD__SYS_DATAFILES__DB_TRX_ID = 1, - DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR = 2, - DICT_FLD__SYS_DATAFILES__PATH = 3, - DICT_NUM_FIELDS__SYS_DATAFILES = 4 -}; - -/* A number of the columns above occur in multiple tables. These are the -length of thos fields. */ -#define DICT_FLD_LEN_SPACE 4 -#define DICT_FLD_LEN_FLAGS 4 - -/* When a row id which is zero modulo this number (which must be a power of -two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is -updated */ -#define DICT_HDR_ROW_ID_WRITE_MARGIN 256 - -#ifndef UNIV_NONINL -#include "dict0boot.ic" -#endif - -#endif diff --git a/storage/xtradb/include/dict0boot.ic b/storage/xtradb/include/dict0boot.ic deleted file mode 100644 index 42e91ee930e..00000000000 --- a/storage/xtradb/include/dict0boot.ic +++ /dev/null @@ -1,95 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0boot.ic -Data dictionary creation and booting - -Created 4/18/1996 Heikki Tuuri -*******************************************************/ - -/**********************************************************************//** -Returns a new row id. -@return the new id */ -UNIV_INLINE -row_id_t -dict_sys_get_new_row_id(void) -/*=========================*/ -{ - row_id_t id; - - mutex_enter(&(dict_sys->mutex)); - - id = dict_sys->row_id; - - if (0 == (id % DICT_HDR_ROW_ID_WRITE_MARGIN)) { - - dict_hdr_flush_row_id(); - } - - dict_sys->row_id++; - - mutex_exit(&(dict_sys->mutex)); - - return(id); -} - -/**********************************************************************//** -Reads a row id from a record or other 6-byte stored form. -@return row id */ -UNIV_INLINE -row_id_t -dict_sys_read_row_id( -/*=================*/ - const byte* field) /*!< in: record field */ -{ -#if DATA_ROW_ID_LEN != 6 -# error "DATA_ROW_ID_LEN != 6" -#endif - - return(mach_read_from_6(field)); -} - -/**********************************************************************//** -Writes a row id to a record or other 6-byte stored form. */ -UNIV_INLINE -void -dict_sys_write_row_id( -/*==================*/ - byte* field, /*!< in: record field */ - row_id_t row_id) /*!< in: row id */ -{ -#if DATA_ROW_ID_LEN != 6 -# error "DATA_ROW_ID_LEN != 6" -#endif - - mach_write_to_6(field, row_id); -} - -/*********************************************************************//** -Check if a table id belongs to system table. -@return true if the table id belongs to a system table. */ -UNIV_INLINE -bool -dict_is_sys_table( -/*==============*/ - table_id_t id) /*!< in: table id to check */ -{ - return(id < DICT_HDR_FIRST_ID); -} diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h deleted file mode 100644 index 082048b8bbd..00000000000 --- a/storage/xtradb/include/dict0crea.h +++ /dev/null @@ -1,266 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0crea.h -Database object creation - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0crea_h -#define dict0crea_h - -#include "univ.i" -#include "dict0types.h" -#include "dict0dict.h" -#include "que0types.h" -#include "row0types.h" -#include "mtr0mtr.h" -#include "fil0crypt.h" - -/*********************************************************************//** -Creates a table create graph. -@return own: table create node */ -UNIV_INTERN -tab_node_t* -tab_create_graph_create( -/*====================*/ - dict_table_t* table, /*!< in: table to create, built as a memory data - structure */ - mem_heap_t* heap, /*!< in: heap where created */ - bool commit, /*!< in: true if the commit node should be - added to the query graph */ - fil_encryption_t mode, /*!< in: encryption mode */ - ulint key_id);/*!< in: encryption key_id */ -/*********************************************************************//** -Creates an index create graph. -@return own: index create node */ -UNIV_INTERN -ind_node_t* -ind_create_graph_create( -/*====================*/ - dict_index_t* index, /*!< in: index to create, built as a memory data - structure */ - mem_heap_t* heap, /*!< in: heap where created */ - bool commit);/*!< in: true if the commit node should be - added to the query graph */ -/***********************************************************//** -Creates a table. This is a high-level function used in SQL execution graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -dict_create_table_step( -/*===================*/ - que_thr_t* thr); /*!< in: query thread */ -/***********************************************************//** -Creates an index. This is a high-level function used in SQL execution -graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -dict_create_index_step( -/*===================*/ - que_thr_t* thr); /*!< in: query thread */ -/*******************************************************************//** -Truncates the index tree associated with a row in SYS_INDEXES table. -@return new root page number, or FIL_NULL on failure */ -UNIV_INTERN -ulint -dict_truncate_index_tree( -/*=====================*/ - dict_table_t* table, /*!< in: the table the index belongs to */ - ulint space, /*!< in: 0=truncate, - nonzero=create the index tree in the - given tablespace */ - btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to - record in the clustered index of - SYS_INDEXES table. The cursor may be - repositioned in this call. */ - mtr_t* mtr); /*!< in: mtr having the latch - on the record page. The mtr may be - committed and restarted in this call. */ -/*******************************************************************//** -Drops the index tree associated with a row in SYS_INDEXES table. */ -UNIV_INTERN -void -dict_drop_index_tree( -/*=================*/ - rec_t* rec, /*!< in/out: record in the clustered index - of SYS_INDEXES table */ - mtr_t* mtr); /*!< in: mtr having the latch on the record page */ -/****************************************************************//** -Creates the foreign key constraints system tables inside InnoDB -at server bootstrap or server start if they are not found or are -not of the right form. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_create_or_check_foreign_constraint_tables(void); -/*================================================*/ - -/********************************************************************//** -Construct foreign key constraint defintion from data dictionary information. -*/ -UNIV_INTERN -char* -dict_foreign_def_get( -/*=================*/ - dict_foreign_t* foreign,/*!< in: foreign */ - trx_t* trx); /*!< in: trx */ - -/********************************************************************//** -Generate a foreign key constraint name when it was not named by the user. -A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER, -where the numbers start from 1, and are given locally for this table, that is, -the number is not global, as it used to be before MySQL 4.0.18. */ -UNIV_INLINE -dberr_t -dict_create_add_foreign_id( -/*=======================*/ - ulint* id_nr, /*!< in/out: number to use in id generation; - incremented if used */ - const char* name, /*!< in: table name */ - dict_foreign_t* foreign)/*!< in/out: foreign key */ - MY_ATTRIBUTE((nonnull)); - -/** Adds the given set of foreign key objects to the dictionary tables -in the database. This function does not modify the dictionary cache. The -caller must ensure that all foreign key objects contain a valid constraint -name in foreign->id. -@param[in] local_fk_set set of foreign key objects, to be added to -the dictionary tables -@param[in] table table to which the foreign key objects in -local_fk_set belong to -@param[in,out] trx transaction -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_create_add_foreigns_to_dictionary( -/*===================================*/ - const dict_foreign_set& local_fk_set, - const dict_table_t* table, - trx_t* trx) - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/****************************************************************//** -Creates the tablespaces and datafiles system tables inside InnoDB -at server bootstrap or server start if they are not found or are -not of the right form. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_create_or_check_sys_tablespace(void); -/*=====================================*/ - -/********************************************************************//** -Add a single tablespace definition to the data dictionary tables in the -database. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_create_add_tablespace_to_dictionary( -/*=====================================*/ - ulint space, /*!< in: tablespace id */ - const char* name, /*!< in: tablespace name */ - ulint flags, /*!< in: tablespace flags */ - const char* path, /*!< in: tablespace path */ - trx_t* trx, /*!< in: transaction */ - bool commit); /*!< in: if true then commit the - transaction */ - -/********************************************************************//** -Add a foreign key definition to the data dictionary tables. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_create_add_foreign_to_dictionary( -/*==================================*/ - dict_table_t* table, /*!< in: table */ - const char* name, /*!< in: table name */ - const dict_foreign_t* foreign,/*!< in: foreign key */ - trx_t* trx) /*!< in/out: dictionary transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/* Table create node structure */ -struct tab_node_t{ - que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */ - dict_table_t* table; /*!< table to create, built as a memory data - structure with dict_mem_... functions */ - ins_node_t* tab_def; /* child node which does the insert of - the table definition; the row to be inserted - is built by the parent node */ - ins_node_t* col_def; /* child node which does the inserts of - the column definitions; the row to be inserted - is built by the parent node */ - commit_node_t* commit_node; - /* child node which performs a commit after - a successful table creation */ - /*----------------------*/ - /* Local storage for this graph node */ - ulint state; /*!< node execution state */ - ulint col_no; /*!< next column definition to insert */ - ulint key_id; /*!< encryption key_id */ - fil_encryption_t mode; /*!< encryption mode */ - mem_heap_t* heap; /*!< memory heap used as auxiliary storage */ -}; - -/* Table create node states */ -#define TABLE_BUILD_TABLE_DEF 1 -#define TABLE_BUILD_COL_DEF 2 -#define TABLE_COMMIT_WORK 3 -#define TABLE_ADD_TO_CACHE 4 -#define TABLE_COMPLETED 5 - -/* Index create node struct */ - -struct ind_node_t{ - que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */ - dict_index_t* index; /*!< index to create, built as a memory data - structure with dict_mem_... functions */ - ins_node_t* ind_def; /* child node which does the insert of - the index definition; the row to be inserted - is built by the parent node */ - ins_node_t* field_def; /* child node which does the inserts of - the field definitions; the row to be inserted - is built by the parent node */ - commit_node_t* commit_node; - /* child node which performs a commit after - a successful index creation */ - /*----------------------*/ - /* Local storage for this graph node */ - ulint state; /*!< node execution state */ - ulint page_no;/* root page number of the index */ - dict_table_t* table; /*!< table which owns the index */ - dtuple_t* ind_row;/* index definition row built */ - ulint field_no;/* next field definition to insert */ - mem_heap_t* heap; /*!< memory heap used as auxiliary storage */ -}; - -/* Index create node states */ -#define INDEX_BUILD_INDEX_DEF 1 -#define INDEX_BUILD_FIELD_DEF 2 -#define INDEX_CREATE_INDEX_TREE 3 -#define INDEX_COMMIT_WORK 4 -#define INDEX_ADD_TO_CACHE 5 - -#ifndef UNIV_NONINL -#include "dict0crea.ic" -#endif - -#endif diff --git a/storage/xtradb/include/dict0crea.ic b/storage/xtradb/include/dict0crea.ic deleted file mode 100644 index 1cbaa47032b..00000000000 --- a/storage/xtradb/include/dict0crea.ic +++ /dev/null @@ -1,98 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0crea.ic -Database object creation - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#include "mem0mem.h" - -/*********************************************************************//** -Checks if a table name contains the string "/#sql" which denotes temporary -tables in MySQL. -@return true if temporary table */ -UNIV_INTERN -bool -row_is_mysql_tmp_table_name( -/*========================*/ - const char* name) MY_ATTRIBUTE((warn_unused_result)); - /*!< in: table name in the form - 'database/tablename' */ - - -/********************************************************************//** -Generate a foreign key constraint name when it was not named by the user. -A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER, -where the numbers start from 1, and are given locally for this table, that is, -the number is not global, as it used to be before MySQL 4.0.18. */ -UNIV_INLINE -dberr_t -dict_create_add_foreign_id( -/*=======================*/ - ulint* id_nr, /*!< in/out: number to use in id generation; - incremented if used */ - const char* name, /*!< in: table name */ - dict_foreign_t* foreign)/*!< in/out: foreign key */ -{ - if (foreign->id == NULL) { - /* Generate a new constraint id */ - ulint namelen = strlen(name); - char* id = static_cast<char*>( - mem_heap_alloc(foreign->heap, - namelen + 20)); - - if (row_is_mysql_tmp_table_name(name)) { - - /* no overflow if number < 1e13 */ - sprintf(id, "%s_ibfk_%lu", name, - (ulong) (*id_nr)++); - } else { - char table_name[MAX_TABLE_NAME_LEN + 20] = ""; - uint errors = 0; - - strncpy(table_name, name, - MAX_TABLE_NAME_LEN + 20); - - innobase_convert_to_system_charset( - strchr(table_name, '/') + 1, - strchr(name, '/') + 1, - MAX_TABLE_NAME_LEN, &errors); - - if (errors) { - strncpy(table_name, name, - MAX_TABLE_NAME_LEN + 20); - } - - /* no overflow if number < 1e13 */ - sprintf(id, "%s_ibfk_%lu", table_name, - (ulong) (*id_nr)++); - - if (innobase_check_identifier_length( - strchr(id,'/') + 1)) { - return(DB_IDENTIFIER_TOO_LONG); - } - } - foreign->id = id; - } - - return(DB_SUCCESS); -} - diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h deleted file mode 100644 index 0290b884ece..00000000000 --- a/storage/xtradb/include/dict0dict.h +++ /dev/null @@ -1,1907 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0dict.h -Data dictionary system - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0dict_h -#define dict0dict_h - -#include "univ.i" -#include "db0err.h" -#include "dict0types.h" -#include "dict0mem.h" -#include "data0type.h" -#include "data0data.h" -#include "mem0mem.h" -#include "rem0types.h" -#include "ut0mem.h" -#include "ut0lst.h" -#include "hash0hash.h" -#include "ut0rnd.h" -#include "ut0byte.h" -#include "trx0types.h" -#include "row0types.h" -#include "fsp0fsp.h" -#include "dict0pagecompress.h" - -extern bool innodb_table_stats_not_found; -extern bool innodb_index_stats_not_found; - -#ifndef UNIV_HOTBACKUP -# include "sync0sync.h" -# include "sync0rw.h" -/******************************************************************//** -Makes all characters in a NUL-terminated UTF-8 string lower case. */ -UNIV_INTERN -void -dict_casedn_str( -/*============*/ - char* a) /*!< in/out: string to put in lower case */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Get the database name length in a table name. -@return database name length */ -UNIV_INTERN -ulint -dict_get_db_name_len( -/*=================*/ - const char* name) /*!< in: table name in the form - dbname '/' tablename */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Open a table from its database and table name, this is currently used by -foreign constraint parser to get the referenced table. -@return complete table name with database and table name, allocated from -heap memory passed in */ -UNIV_INTERN -char* -dict_get_referenced_table( -/*======================*/ - const char* name, /*!< in: foreign key table name */ - const char* database_name, /*!< in: table db name */ - ulint database_name_len,/*!< in: db name length */ - const char* table_name, /*!< in: table name */ - ulint table_name_len, /*!< in: table name length */ - dict_table_t** table, /*!< out: table object or NULL */ - mem_heap_t* heap); /*!< in: heap memory */ -/*********************************************************************//** -Frees a foreign key struct. */ - -void -dict_foreign_free( -/*==============*/ - dict_foreign_t* foreign); /*!< in, own: foreign key struct */ -/*********************************************************************//** -Finds the highest [number] for foreign key constraints of the table. Looks -only at the >= 4.0.18-format id's, which are of the form -databasename/tablename_ibfk_[number]. -@return highest number, 0 if table has no new format foreign key constraints */ -UNIV_INTERN -ulint -dict_table_get_highest_foreign_id( -/*==============================*/ - dict_table_t* table); /*!< in: table in the dictionary - memory cache */ -/********************************************************************//** -Return the end of table name where we have removed dbname and '/'. -@return table name */ -UNIV_INTERN -const char* -dict_remove_db_name( -/*================*/ - const char* name) /*!< in: table name in the form - dbname '/' tablename */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/** Operation to perform when opening a table */ -enum dict_table_op_t { - /** Expect the tablespace to exist. */ - DICT_TABLE_OP_NORMAL = 0, - /** Drop any orphan indexes after an aborted online index creation */ - DICT_TABLE_OP_DROP_ORPHAN, - /** Silently load the tablespace if it does not exist, - and do not load the definitions of incomplete indexes. */ - DICT_TABLE_OP_LOAD_TABLESPACE, - /** Open the table only if it's in table cache. */ - DICT_TABLE_OP_OPEN_ONLY_IF_CACHED -}; - -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INTERN -dict_table_t* -dict_table_open_on_id( -/*==================*/ - table_id_t table_id, /*!< in: table id */ - ibool dict_locked, /*!< in: TRUE=data dictionary locked */ - dict_table_op_t table_op) /*!< in: operation to perform */ - __attribute__((warn_unused_result)); - -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INTERN -dict_table_t* -dict_table_open_on_index_id( -/*==================*/ - table_id_t table_id, /*!< in: table id */ - bool dict_locked) /*!< in: TRUE=data dictionary locked */ - __attribute__((warn_unused_result)); -/********************************************************************//** -Decrements the count of open handles to a table. */ -UNIV_INTERN -void -dict_table_close( -/*=============*/ - dict_table_t* table, /*!< in/out: table */ - ibool dict_locked, /*!< in: TRUE=data dictionary locked */ - ibool try_drop) /*!< in: TRUE=try to drop any orphan - indexes after an aborted online - index creation */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Inits the data dictionary module. */ -UNIV_INTERN -void -dict_init(void); -/*===========*/ -/********************************************************************//** -Gets the space id of every table of the data dictionary and makes a linear -list and a hash table of them to the data dictionary cache. This function -can be called at database startup if we did not need to do a crash recovery. -In crash recovery we must scan the space id's from the .ibd files in MySQL -database directories. */ -UNIV_INTERN -void -dict_load_space_id_list(void); -/*=========================*/ -/*********************************************************************//** -Gets the minimum number of bytes per character. -@return minimum multi-byte char size, in bytes */ -UNIV_INLINE -ulint -dict_col_get_mbminlen( -/*==================*/ - const dict_col_t* col) /*!< in: column */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Gets the maximum number of bytes per character. -@return maximum multi-byte char size, in bytes */ -UNIV_INLINE -ulint -dict_col_get_mbmaxlen( -/*==================*/ - const dict_col_t* col) /*!< in: column */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Sets the minimum and maximum number of bytes per character. */ -UNIV_INLINE -void -dict_col_set_mbminmaxlen( -/*=====================*/ - dict_col_t* col, /*!< in/out: column */ - ulint mbminlen, /*!< in: minimum multi-byte - character size, in bytes */ - ulint mbmaxlen) /*!< in: minimum multi-byte - character size, in bytes */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Gets the column data type. */ -UNIV_INLINE -void -dict_col_copy_type( -/*===============*/ - const dict_col_t* col, /*!< in: column */ - dtype_t* type) /*!< out: data type */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Determine bytes of column prefix to be stored in the undo log. Please -note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix -needs to be stored in the undo log. -@return bytes of column prefix to be stored in the undo log */ -UNIV_INLINE -ulint -dict_max_field_len_store_undo( -/*==========================*/ - dict_table_t* table, /*!< in: table */ - const dict_col_t* col) /*!< in: column which index prefix - is based on */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ -#ifdef UNIV_DEBUG -/*********************************************************************//** -Assert that a column and a data type match. -@return TRUE */ -UNIV_INLINE -ibool -dict_col_type_assert_equal( -/*=======================*/ - const dict_col_t* col, /*!< in: column */ - const dtype_t* type) /*!< in: data type */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* UNIV_DEBUG */ -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Returns the minimum size of the column. -@return minimum size */ -UNIV_INLINE -ulint -dict_col_get_min_size( -/*==================*/ - const dict_col_t* col) /*!< in: column */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***********************************************************************//** -Returns the maximum size of the column. -@return maximum size */ -UNIV_INLINE -ulint -dict_col_get_max_size( -/*==================*/ - const dict_col_t* col) /*!< in: column */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***********************************************************************//** -Returns the size of a fixed size column, 0 if not a fixed size column. -@return fixed size, or 0 */ -UNIV_INLINE -ulint -dict_col_get_fixed_size( -/*====================*/ - const dict_col_t* col, /*!< in: column */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***********************************************************************//** -Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column. -For fixed length types it is the fixed length of the type, otherwise 0. -@return SQL null storage size in ROW_FORMAT=REDUNDANT */ -UNIV_INLINE -ulint -dict_col_get_sql_null_size( -/*=======================*/ - const dict_col_t* col, /*!< in: column */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Gets the column number. -@return col->ind, table column position (starting from 0) */ -UNIV_INLINE -ulint -dict_col_get_no( -/*============*/ - const dict_col_t* col) /*!< in: column */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Gets the column position in the clustered index. */ -UNIV_INLINE -ulint -dict_col_get_clust_pos( -/*===================*/ - const dict_col_t* col, /*!< in: table column */ - const dict_index_t* clust_index) /*!< in: clustered index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/****************************************************************//** -If the given column name is reserved for InnoDB system columns, return -TRUE. -@return TRUE if name is reserved */ -UNIV_INTERN -ibool -dict_col_name_is_reserved( -/*======================*/ - const char* name) /*!< in: column name */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Acquire the autoinc lock. */ -UNIV_INTERN -void -dict_table_autoinc_lock( -/*====================*/ - dict_table_t* table) /*!< in/out: table */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Unconditionally set the autoinc counter. */ -UNIV_INTERN -void -dict_table_autoinc_initialize( -/*==========================*/ - dict_table_t* table, /*!< in/out: table */ - ib_uint64_t value) /*!< in: next value to assign to a row */ - MY_ATTRIBUTE((nonnull)); - -/** Store autoinc value when the table is evicted. -@param[in] table table evicted */ -UNIV_INTERN -void -dict_table_autoinc_store( - const dict_table_t* table); - -/** Restore autoinc value when the table is loaded. -@param[in] table table loaded */ -UNIV_INTERN -void -dict_table_autoinc_restore( - dict_table_t* table); - -/********************************************************************//** -Reads the next autoinc value (== autoinc counter value), 0 if not yet -initialized. -@return value for a new row, or 0 */ -UNIV_INTERN -ib_uint64_t -dict_table_autoinc_read( -/*====================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Updates the autoinc counter if the value supplied is greater than the -current value. */ -UNIV_INTERN -void -dict_table_autoinc_update_if_greater( -/*=================================*/ - - dict_table_t* table, /*!< in/out: table */ - ib_uint64_t value) /*!< in: value which was assigned to a row */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Release the autoinc lock. */ -UNIV_INTERN -void -dict_table_autoinc_unlock( -/*======================*/ - dict_table_t* table) /*!< in/out: table */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/**********************************************************************//** -Adds system columns to a table object. */ -UNIV_INTERN -void -dict_table_add_system_columns( -/*==========================*/ - dict_table_t* table, /*!< in/out: table */ - mem_heap_t* heap) /*!< in: temporary heap */ - MY_ATTRIBUTE((nonnull)); -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Adds a table object to the dictionary cache. */ -UNIV_INTERN -void -dict_table_add_to_cache( -/*====================*/ - dict_table_t* table, /*!< in: table */ - ibool can_be_evicted, /*!< in: TRUE if can be evicted*/ - mem_heap_t* heap) /*!< in: temporary heap */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -UNIV_INTERN -void -dict_table_remove_from_cache( -/*=========================*/ - dict_table_t* table) /*!< in, own: table */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -UNIV_INTERN -void -dict_table_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in, own: table */ - ibool lru_evict); /*!< in: TRUE if table being evicted - to make room in the table LRU list */ -/**********************************************************************//** -Renames a table object. -@return TRUE if success */ -UNIV_INTERN -dberr_t -dict_table_rename_in_cache( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - const char* new_name, /*!< in: new name */ - ibool rename_also_foreigns) - /*!< in: in ALTER TABLE we want - to preserve the original table name - in constraints which reference it */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Removes an index from the dictionary cache. */ -UNIV_INTERN -void -dict_index_remove_from_cache( -/*=========================*/ - dict_table_t* table, /*!< in/out: table */ - dict_index_t* index) /*!< in, own: index */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Change the id of a table object in the dictionary cache. This is used in -DISCARD TABLESPACE. */ -UNIV_INTERN -void -dict_table_change_id_in_cache( -/*==========================*/ - dict_table_t* table, /*!< in/out: table object already in cache */ - table_id_t new_id) /*!< in: new id to set */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Removes a foreign constraint struct from the dictionary cache. */ -UNIV_INTERN -void -dict_foreign_remove_from_cache( -/*===========================*/ - dict_foreign_t* foreign) /*!< in, own: foreign constraint */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Adds a foreign key constraint object to the dictionary cache. May free -the object if there already is an object with the same identifier in. -At least one of foreign table or referenced table must already be in -the dictionary cache! -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_foreign_add_to_cache( -/*======================*/ - dict_foreign_t* foreign, - /*!< in, own: foreign key constraint */ - const char** col_names, - /*!< in: column names, or NULL to use - foreign->foreign_table->col_names */ - bool check_charsets, - /*!< in: whether to check charset - compatibility */ - dict_err_ignore_t ignore_err) - /*!< in: error to be ignored */ - MY_ATTRIBUTE((nonnull(1), warn_unused_result)); -/*********************************************************************//** -Checks if a table is referenced by foreign keys. -@return TRUE if table is referenced by a foreign key */ -UNIV_INTERN -ibool -dict_table_is_referenced_by_foreign_key( -/*====================================*/ - const dict_table_t* table) /*!< in: InnoDB table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Replace the index passed in with another equivalent index in the -foreign key lists of the table. -@return whether all replacements were found */ -UNIV_INTERN -bool -dict_foreign_replace_index( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - const char** col_names, - /*!< in: column names, or NULL - to use table->col_names */ - const dict_index_t* index) /*!< in: index to be replaced */ - MY_ATTRIBUTE((nonnull(1,3), warn_unused_result)); -/**********************************************************************//** -Determines whether a string starts with the specified keyword. -@return TRUE if str starts with keyword */ -UNIV_INTERN -ibool -dict_str_starts_with_keyword( -/*=========================*/ - THD* thd, /*!< in: MySQL thread handle */ - const char* str, /*!< in: string to scan for keyword */ - const char* keyword) /*!< in: keyword to look for */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Scans a table create SQL string and adds to the data dictionary -the foreign key constraints declared in the string. This function -should be called after the indexes for a table have been created. -Each foreign key constraint must be accompanied with indexes in -bot participating tables. The indexes are allowed to contain more -fields than mentioned in the constraint. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_create_foreign_constraints( -/*============================*/ - trx_t* trx, /*!< in: transaction */ - const char* sql_string, /*!< in: table create statement where - foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES - table2(c, d), table2 can be written - also with the database - name before it: test.table2; the - default database id the database of - parameter name */ - size_t sql_length, /*!< in: length of sql_string */ - const char* name, /*!< in: table full name in the - normalized form - database_name/table_name */ - ibool reject_fks) /*!< in: if TRUE, fail with error - code DB_CANNOT_ADD_CONSTRAINT if - any foreign keys are found. */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. -@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the -constraint id does not match */ -UNIV_INTERN -dberr_t -dict_foreign_parse_drop_constraints( -/*================================*/ - mem_heap_t* heap, /*!< in: heap from which we can - allocate memory */ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table, /*!< in: table */ - ulint* n, /*!< out: number of constraints - to drop */ - const char*** constraints_to_drop) /*!< out: id's of the - constraints to drop */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Returns a table object and increments its open handle count. -NOTE! This is a high-level function to be used mainly from outside the -'dict' directory. Inside this directory dict_table_get_low -is usually the appropriate function. -@return table, NULL if does not exist */ -UNIV_INTERN -dict_table_t* -dict_table_open_on_name( -/*====================*/ - const char* table_name, /*!< in: table name */ - ibool dict_locked, /*!< in: TRUE=data dictionary locked */ - ibool try_drop, /*!< in: TRUE=try to drop any orphan - indexes after an aborted online - index creation */ - dict_err_ignore_t - ignore_err) /*!< in: error to be ignored when - loading the table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************//** -Tries to find an index whose first fields are the columns in the array, -in the same order and is not marked for deletion and is not the same -as types_idx. -@return matching index, NULL if not found */ -UNIV_INTERN -dict_index_t* -dict_foreign_find_index( -/*====================*/ - const dict_table_t* table, /*!< in: table */ - const char** col_names, - /*!< in: column names, or NULL - to use table->col_names */ - const char** columns,/*!< in: array of column names */ - ulint n_cols, /*!< in: number of columns */ - const dict_index_t* types_idx, - /*!< in: NULL or an index - whose types the column types - must match */ - bool check_charsets, - /*!< in: whether to check - charsets. only has an effect - if types_idx != NULL */ - ulint check_null, - /*!< in: nonzero if none of - the columns must be declared - NOT NULL */ - ulint* error, /*!< out: error code */ - ulint* err_col_no, - /*!< out: column number where - error happened */ - dict_index_t** err_index) - /*!< out: index where error - happened */ - MY_ATTRIBUTE((nonnull(1,3), warn_unused_result)); -/**********************************************************************//** -Returns a column's name. -@return column name. NOTE: not guaranteed to stay valid if table is -modified in any way (columns added, etc.). */ -UNIV_INTERN -const char* -dict_table_get_col_name( -/*====================*/ - const dict_table_t* table, /*!< in: table */ - ulint col_nr) /*!< in: column number */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Returns a column's name. -@return column name. NOTE: not guaranteed to stay valid if table is -modified in any way (columns added, etc.). */ -UNIV_INTERN -const char* -dict_table_get_col_name_for_mysql( -/*==============================*/ - const dict_table_t* table, /*!< in: table */ - const char* col_name)/*!< in: MySQL table column name */ - __attribute__((nonnull, warn_unused_result)); -/**********************************************************************//** -Prints a table data. */ -UNIV_INTERN -void -dict_table_print( -/*=============*/ - dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Outputs info on foreign keys of a table. */ -UNIV_INTERN -std::string -dict_print_info_on_foreign_keys( -/*============================*/ - ibool create_table_format, /*!< in: if TRUE then print in - a format suitable to be inserted into - a CREATE TABLE, otherwise in the format - of SHOW TABLE STATUS */ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table); /*!< in: table */ -/**********************************************************************//** -Outputs info on a foreign key of a table in a format suitable for -CREATE TABLE. */ -UNIV_INTERN -std::string -dict_print_info_on_foreign_key_in_create_format( -/*============================================*/ - trx_t* trx, /*!< in: transaction */ - dict_foreign_t* foreign, /*!< in: foreign key constraint */ - ibool add_newline); /*!< in: whether to add a newline */ -/********************************************************************//** -Displays the names of the index and the table. */ -UNIV_INTERN -void -dict_index_name_print( -/*==================*/ - FILE* file, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - const dict_index_t* index) /*!< in: index to print */ - MY_ATTRIBUTE((nonnull(1,3))); -/*********************************************************************//** -Tries to find an index whose first fields are the columns in the array, -in the same order and is not marked for deletion and is not the same -as types_idx. -@return matching index, NULL if not found */ -UNIV_INTERN -bool -dict_foreign_qualify_index( -/*====================*/ - const dict_table_t* table, /*!< in: table */ - const char** col_names, - /*!< in: column names, or NULL - to use table->col_names */ - const char** columns,/*!< in: array of column names */ - ulint n_cols, /*!< in: number of columns */ - const dict_index_t* index, /*!< in: index to check */ - const dict_index_t* types_idx, - /*!< in: NULL or an index - whose types the column types - must match */ - bool check_charsets, - /*!< in: whether to check - charsets. only has an effect - if types_idx != NULL */ - ulint check_null, - /*!< in: nonzero if none of - the columns must be declared - NOT NULL */ - ulint* error, /*!< out: error code */ - ulint* err_col_no, - /*!< out: column number where - error happened */ - dict_index_t** err_index) - /*!< out: index where error - happened */ - - MY_ATTRIBUTE((nonnull(1,3), warn_unused_result)); -#ifdef UNIV_DEBUG -/********************************************************************//** -Gets the first index on the table (the clustered index). -@return index, NULL if none exists */ -UNIV_INLINE -dict_index_t* -dict_table_get_first_index( -/*=======================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Gets the last index on the table. -@return index, NULL if none exists */ -UNIV_INLINE -dict_index_t* -dict_table_get_last_index( -/*=======================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Gets the next index on the table. -@return index, NULL if none left */ -UNIV_INLINE -dict_index_t* -dict_table_get_next_index( -/*======================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#else /* UNIV_DEBUG */ -# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes) -# define dict_table_get_last_index(table) UT_LIST_GET_LAST((table)->indexes) -# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index) -#endif /* UNIV_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ - -/* Skip corrupted index */ -#define dict_table_skip_corrupt_index(index) \ - while (index && dict_index_is_corrupted(index)) { \ - index = dict_table_get_next_index(index); \ - } - -/* Get the next non-corrupt index */ -#define dict_table_next_uncorrupted_index(index) \ -do { \ - index = dict_table_get_next_index(index); \ - dict_table_skip_corrupt_index(index); \ -} while (0) - -/********************************************************************//** -Check whether the index is the clustered index. -@return nonzero for clustered index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_clust( -/*================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Check whether the index is unique. -@return nonzero for unique index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_unique( -/*=================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Check whether the index is the insert buffer tree. -@return nonzero for insert buffer, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_ibuf( -/*===============*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Check whether the index is a secondary index or the insert buffer tree. -@return nonzero for insert buffer, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_sec_or_ibuf( -/*======================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); - -/************************************************************************ -Gets the all the FTS indexes for the table. NOTE: must not be called for -tables which do not have an FTS-index. */ -UNIV_INTERN -ulint -dict_table_get_all_fts_indexes( -/*===========================*/ - /* out: number of indexes collected */ - dict_table_t* table, /* in: table */ - ib_vector_t* indexes)/* out: vector for collecting FTS indexes */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Gets the number of user-defined columns in a table in the dictionary -cache. -@return number of user-defined (e.g., not ROW_ID) columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_user_cols( -/*=======================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Gets the number of system columns in a table in the dictionary cache. -@return number of system (e.g., ROW_ID) columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_sys_cols( -/*======================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/********************************************************************//** -Gets the number of all columns (also system) in a table in the dictionary -cache. -@return number of columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_cols( -/*==================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Gets the approximately estimated number of rows in the table. -@return estimated number of rows */ -UNIV_INLINE -ib_uint64_t -dict_table_get_n_rows( -/*==================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Increment the number of rows in the table by one. -Notice that this operation is not protected by any latch, the number is -approximate. */ -UNIV_INLINE -void -dict_table_n_rows_inc( -/*==================*/ - dict_table_t* table) /*!< in/out: table */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Decrement the number of rows in the table by one. -Notice that this operation is not protected by any latch, the number is -approximate. */ -UNIV_INLINE -void -dict_table_n_rows_dec( -/*==================*/ - dict_table_t* table) /*!< in/out: table */ - MY_ATTRIBUTE((nonnull)); -#ifdef UNIV_DEBUG -/********************************************************************//** -Gets the nth column of a table. -@return pointer to column object */ -UNIV_INLINE -dict_col_t* -dict_table_get_nth_col( -/*===================*/ - const dict_table_t* table, /*!< in: table */ - ulint pos) /*!< in: position of column */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Gets the given system column of a table. -@return pointer to column object */ -UNIV_INLINE -dict_col_t* -dict_table_get_sys_col( -/*===================*/ - const dict_table_t* table, /*!< in: table */ - ulint sys) /*!< in: DATA_ROW_ID, ... */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#else /* UNIV_DEBUG */ -#define dict_table_get_nth_col(table, pos) \ -((table)->cols + (pos)) -#define dict_table_get_sys_col(table, sys) \ -((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS) -#endif /* UNIV_DEBUG */ -/********************************************************************//** -Gets the given system column number of a table. -@return column number */ -UNIV_INLINE -ulint -dict_table_get_sys_col_no( -/*======================*/ - const dict_table_t* table, /*!< in: table */ - ulint sys) /*!< in: DATA_ROW_ID, ... */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Returns the minimum data size of an index record. -@return minimum data size in bytes */ -UNIV_INLINE -ulint -dict_index_get_min_size( -/*====================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ -/********************************************************************//** -Check whether the table uses the compact page format. -@return TRUE if table uses the compact page format */ -UNIV_INLINE -ibool -dict_table_is_comp( -/*===============*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Determine the file format of a table. -@return file format version */ -UNIV_INLINE -ulint -dict_table_get_format( -/*==================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Determine the file format from a dict_table_t::flags. -@return file format version */ -UNIV_INLINE -ulint -dict_tf_get_format( -/*===============*/ - ulint flags) /*!< in: dict_table_t::flags */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Set the various values in a dict_table_t::flags pointer. */ -UNIV_INLINE -void -dict_tf_set( -/*========*/ - ulint* flags, /*!< in/out: table */ - rec_format_t format, /*!< in: file format */ - ulint zip_ssize, /*!< in: zip shift size */ - bool remote_path, /*!< in: table uses DATA DIRECTORY - */ - bool page_compressed,/*!< in: table uses page compressed - pages */ - ulint page_compression_level, /*!< in: table page compression - level */ - ulint atomic_writes) /*!< in: table atomic - writes option value*/ - __attribute__((nonnull)); -/********************************************************************//** -Convert a 32 bit integer table flags to the 32 bit integer that is -written into the tablespace header at the offset FSP_SPACE_FLAGS and is -also stored in the fil_space_t::flags field. The following chart shows -the translation of the low order bit. Other bits are the same. -========================= Low order bit ========================== - | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC -dict_table_t::flags | 0 | 1 | 1 | 1 -fil_space_t::flags | 0 | 0 | 1 | 1 -================================================================== -@return tablespace flags (fil_space_t::flags) */ -UNIV_INLINE -ulint -dict_tf_to_fsp_flags( -/*=================*/ - ulint flags) /*!< in: dict_table_t::flags */ - MY_ATTRIBUTE((const)); -/********************************************************************//** -Extract the compressed page size from table flags. -@return compressed page size, or 0 if not compressed */ -UNIV_INLINE -ulint -dict_tf_get_zip_size( -/*=================*/ - ulint flags) /*!< in: flags */ - MY_ATTRIBUTE((const)); -/********************************************************************//** -Check whether the table uses the compressed compact page format. -@return compressed page size, or 0 if not compressed */ -UNIV_INLINE -ulint -dict_table_zip_size( -/*================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Obtain exclusive locks on all index trees of the table. This is to prevent -accessing index trees while InnoDB is updating internal metadata for -operations such as truncate tables. */ -UNIV_INLINE -void -dict_table_x_lock_indexes( -/*======================*/ - dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Release the exclusive locks on all index tree. */ -UNIV_INLINE -void -dict_table_x_unlock_indexes( -/*========================*/ - dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Checks if a column is in the ordering columns of the clustered index of a -table. Column prefixes are treated like whole columns. -@return TRUE if the column, or its prefix, is in the clustered key */ -UNIV_INTERN -ibool -dict_table_col_in_clustered_key( -/*============================*/ - const dict_table_t* table, /*!< in: table */ - ulint n) /*!< in: column number */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Check if the table has an FTS index. -@return TRUE if table has an FTS index */ -UNIV_INLINE -ibool -dict_table_has_fts_index( -/*=====================*/ - dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Copies types of columns contained in table to tuple and sets all -fields of the tuple to the SQL NULL value. This function should -be called right after dtuple_create(). */ -UNIV_INTERN -void -dict_table_copy_types( -/*==================*/ - dtuple_t* tuple, /*!< in/out: data tuple */ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************** -Wait until all the background threads of the given table have exited, i.e., -bg_threads == 0. Note: bg_threads_mutex must be reserved when -calling this. */ -UNIV_INTERN -void -dict_table_wait_for_bg_threads_to_exit( -/*===================================*/ - dict_table_t* table, /* in: table */ - ulint delay) /* in: time in microseconds to wait between - checks of bg_threads. */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Looks for an index with the given id. NOTE that we do not reserve -the dictionary mutex: this function is for emergency purposes like -printing info of a corrupt database page! -@return index or NULL if not found from cache */ -UNIV_INTERN -dict_index_t* -dict_index_find_on_id_low( -/*======================*/ - index_id_t id) /*!< in: index id */ - MY_ATTRIBUTE((warn_unused_result)); -/**********************************************************************//** -Make room in the table cache by evicting an unused table. The unused table -should not be part of FK relationship and currently not used in any user -transaction. There is no guarantee that it will remove a table. -@return number of tables evicted. */ -UNIV_INTERN -ulint -dict_make_room_in_cache( -/*====================*/ - ulint max_tables, /*!< in: max tables allowed in cache */ - ulint pct_check); /*!< in: max percent to check */ -/**********************************************************************//** -Adds an index to the dictionary cache. -@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */ -UNIV_INTERN -dberr_t -dict_index_add_to_cache( -/*====================*/ - dict_table_t* table, /*!< in: table on which the index is */ - dict_index_t* index, /*!< in, own: index; NOTE! The index memory - object is freed in this function! */ - ulint page_no,/*!< in: root page number of the index */ - ibool strict) /*!< in: TRUE=refuse to create the index - if records could be too big to fit in - an B-tree page */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Removes an index from the dictionary cache. */ -UNIV_INTERN -void -dict_index_remove_from_cache( -/*=========================*/ - dict_table_t* table, /*!< in/out: table */ - dict_index_t* index) /*!< in, own: index */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/********************************************************************//** -Gets the number of fields in the internal representation of an index, -including fields added by the dictionary system. -@return number of fields */ -UNIV_INLINE -ulint -dict_index_get_n_fields( -/*====================*/ - const dict_index_t* index) /*!< in: an internal - representation of index (in - the dictionary cache) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Gets the number of fields in the internal representation of an index -that uniquely determine the position of an index entry in the index, if -we do not take multiversioning into account: in the B-tree use the value -returned by dict_index_get_n_unique_in_tree. -@return number of fields */ -UNIV_INLINE -ulint -dict_index_get_n_unique( -/*====================*/ - const dict_index_t* index) /*!< in: an internal representation - of index (in the dictionary cache) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Gets the number of fields in the internal representation of an index -which uniquely determine the position of an index entry in the index, if -we also take multiversioning into account. -@return number of fields */ -UNIV_INLINE -ulint -dict_index_get_n_unique_in_tree( -/*============================*/ - const dict_index_t* index) /*!< in: an internal representation - of index (in the dictionary cache) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Gets the number of user-defined ordering fields in the index. In the internal -representation we add the row id to the ordering fields to make all indexes -unique, but this function returns the number of fields the user defined -in the index as ordering fields. -@return number of fields */ -UNIV_INLINE -ulint -dict_index_get_n_ordering_defined_by_user( -/*======================================*/ - const dict_index_t* index) /*!< in: an internal representation - of index (in the dictionary cache) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#ifdef UNIV_DEBUG -/********************************************************************//** -Gets the nth field of an index. -@return pointer to field object */ -UNIV_INLINE -dict_field_t* -dict_index_get_nth_field( -/*=====================*/ - const dict_index_t* index, /*!< in: index */ - ulint pos) /*!< in: position of field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#else /* UNIV_DEBUG */ -# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos)) -#endif /* UNIV_DEBUG */ -/********************************************************************//** -Gets pointer to the nth column in an index. -@return column */ -UNIV_INLINE -const dict_col_t* -dict_index_get_nth_col( -/*===================*/ - const dict_index_t* index, /*!< in: index */ - ulint pos) /*!< in: position of the field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Gets the column number of the nth field in an index. -@return column number */ -UNIV_INLINE -ulint -dict_index_get_nth_col_no( -/*======================*/ - const dict_index_t* index, /*!< in: index */ - ulint pos) /*!< in: position of the field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Looks for column n in an index. -@return position in internal representation of the index; -ULINT_UNDEFINED if not contained */ -UNIV_INLINE -ulint -dict_index_get_nth_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint n, /*!< in: column number */ - ulint* prefix_col_pos) /*!< out: col num if prefix */ - MY_ATTRIBUTE((nonnull(1), warn_unused_result)); -/********************************************************************//** -Looks for column n in an index. -@return position in internal representation of the index; -ULINT_UNDEFINED if not contained */ -UNIV_INTERN -ulint -dict_index_get_nth_col_or_prefix_pos( -/*=================================*/ - const dict_index_t* index, /*!< in: index */ - ulint n, /*!< in: column number */ - ibool inc_prefix, /*!< in: TRUE=consider - column prefixes too */ - ulint* prefix_col_pos) /*!< out: col num if prefix */ - - MY_ATTRIBUTE((nonnull(1), warn_unused_result)); -/********************************************************************//** -Returns TRUE if the index contains a column or a prefix of that column. -@return TRUE if contains the column or its prefix */ -UNIV_INTERN -ibool -dict_index_contains_col_or_prefix( -/*==============================*/ - const dict_index_t* index, /*!< in: index */ - ulint n) /*!< in: column number */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Looks for a matching field in an index. The column has to be the same. The -column in index must be complete, or must contain a prefix longer than the -column in index2. That is, we must be able to construct the prefix in index2 -from the prefix in index. -@return position in internal representation of the index; -ULINT_UNDEFINED if not contained */ -UNIV_INTERN -ulint -dict_index_get_nth_field_pos( -/*=========================*/ - const dict_index_t* index, /*!< in: index from which to search */ - const dict_index_t* index2, /*!< in: index */ - ulint n) /*!< in: field number in index2 */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Looks for column n position in the clustered index. -@return position in internal representation of the clustered index */ -UNIV_INTERN -ulint -dict_table_get_nth_col_pos( -/*=======================*/ - const dict_table_t* table, /*!< in: table */ - ulint n) /*!< in: column number */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Returns the position of a system column in an index. -@return position, ULINT_UNDEFINED if not contained */ -UNIV_INLINE -ulint -dict_index_get_sys_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint type) /*!< in: DATA_ROW_ID, ... */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Adds a column to index. */ -UNIV_INTERN -void -dict_index_add_col( -/*===============*/ - dict_index_t* index, /*!< in/out: index */ - const dict_table_t* table, /*!< in: table */ - dict_col_t* col, /*!< in: column */ - ulint prefix_len) /*!< in: column prefix length */ - MY_ATTRIBUTE((nonnull)); -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Copies types of fields contained in index to tuple. */ -UNIV_INTERN -void -dict_index_copy_types( -/*==================*/ - dtuple_t* tuple, /*!< in/out: data tuple */ - const dict_index_t* index, /*!< in: index */ - ulint n_fields) /*!< in: number of - field types to copy */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************************//** -Gets the field column. -@return field->col, pointer to the table column */ -UNIV_INLINE -const dict_col_t* -dict_field_get_col( -/*===============*/ - const dict_field_t* field) /*!< in: index field */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Returns an index object if it is found in the dictionary cache. -Assumes that dict_sys->mutex is already being held. -@return index, NULL if not found */ -UNIV_INTERN -dict_index_t* -dict_index_get_if_in_cache_low( -/*===========================*/ - index_id_t index_id) /*!< in: index id */ - MY_ATTRIBUTE((warn_unused_result)); -#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG -/**********************************************************************//** -Returns an index object if it is found in the dictionary cache. -@return index, NULL if not found */ -UNIV_INTERN -dict_index_t* -dict_index_get_if_in_cache( -/*=======================*/ - index_id_t index_id) /*!< in: index id */ - MY_ATTRIBUTE((warn_unused_result)); -#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ -#ifdef UNIV_DEBUG -/**********************************************************************//** -Checks that a tuple has n_fields_cmp value in a sensible range, so that -no comparison can occur with the page number field in a node pointer. -@return TRUE if ok */ -UNIV_INTERN -ibool -dict_index_check_search_tuple( -/*==========================*/ - const dict_index_t* index, /*!< in: index tree */ - const dtuple_t* tuple) /*!< in: tuple used in a search */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/** Whether and when to allow temporary index names */ -enum check_name { - /** Require all indexes to be complete. */ - CHECK_ALL_COMPLETE, - /** Allow aborted online index creation. */ - CHECK_ABORTED_OK, - /** Allow partial indexes to exist. */ - CHECK_PARTIAL_OK -}; -/**********************************************************************//** -Check for duplicate index entries in a table [using the index name] */ -UNIV_INTERN -void -dict_table_check_for_dup_indexes( -/*=============================*/ - const dict_table_t* table, /*!< in: Check for dup indexes - in this table */ - enum check_name check) /*!< in: whether and when to allow - temporary index names */ - MY_ATTRIBUTE((nonnull)); -#endif /* UNIV_DEBUG */ -/**********************************************************************//** -Builds a node pointer out of a physical record and a page number. -@return own: node pointer */ -UNIV_INTERN -dtuple_t* -dict_index_build_node_ptr( -/*======================*/ - const dict_index_t* index, /*!< in: index */ - const rec_t* rec, /*!< in: record for which to build node - pointer */ - ulint page_no,/*!< in: page number to put in node - pointer */ - mem_heap_t* heap, /*!< in: memory heap where pointer - created */ - ulint level) /*!< in: level of rec in tree: - 0 means leaf level */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Copies an initial segment of a physical record, long enough to specify an -index entry uniquely. -@return pointer to the prefix record */ -UNIV_INTERN -rec_t* -dict_index_copy_rec_order_prefix( -/*=============================*/ - const dict_index_t* index, /*!< in: index */ - const rec_t* rec, /*!< in: record for which to - copy prefix */ - ulint* n_fields,/*!< out: number of fields copied */ - byte** buf, /*!< in/out: memory buffer for the - copied prefix, or NULL */ - ulint* buf_size)/*!< in/out: buffer size */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Builds a typed data tuple out of a physical record. -@return own: data tuple */ -UNIV_INTERN -dtuple_t* -dict_index_build_data_tuple( -/*========================*/ - dict_index_t* index, /*!< in: index */ - rec_t* rec, /*!< in: record for which to build data tuple */ - ulint n_fields,/*!< in: number of data fields */ - mem_heap_t* heap) /*!< in: memory heap where tuple created */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Gets the space id of the root of the index tree. -@return space id */ -UNIV_INLINE -ulint -dict_index_get_space( -/*=================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Sets the space id of the root of the index tree. */ -UNIV_INLINE -void -dict_index_set_space( -/*=================*/ - dict_index_t* index, /*!< in/out: index */ - ulint space) /*!< in: space id */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Gets the page number of the root of the index tree. -@return page number */ -UNIV_INLINE -ulint -dict_index_get_page( -/*================*/ - const dict_index_t* tree) /*!< in: index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Gets the read-write lock of the index tree. -@return read-write lock */ -UNIV_INLINE -prio_rw_lock_t* -dict_index_get_lock( -/*================*/ - dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Returns free space reserved for future updates of records. This is -relevant only in the case of many consecutive inserts, as updates -which make the records bigger might fragment the index. -@return number of free bytes on page, reserved for updates */ -UNIV_INLINE -ulint -dict_index_get_space_reserve(void); -/*==============================*/ - -/* Online index creation @{ */ -/********************************************************************//** -Gets the status of online index creation. -@return the status */ -UNIV_INLINE -enum online_index_status -dict_index_get_online_status( -/*=========================*/ - const dict_index_t* index) /*!< in: secondary index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Sets the status of online index creation. */ -UNIV_INLINE -void -dict_index_set_online_status( -/*=========================*/ - dict_index_t* index, /*!< in/out: index */ - enum online_index_status status) /*!< in: status */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Determines if a secondary index is being or has been created online, -or if the table is being rebuilt online, allowing concurrent modifications -to the table. -@retval true if the index is being or has been built online, or -if this is a clustered index and the table is being or has been rebuilt online -@retval false if the index has been created or the table has been -rebuilt completely */ -UNIV_INLINE -bool -dict_index_is_online_ddl( -/*=====================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Calculates the minimum record length in an index. */ -UNIV_INTERN -ulint -dict_index_calc_min_rec_len( -/*========================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Reserves the dictionary system mutex for MySQL. */ -UNIV_INTERN -void -dict_mutex_enter_for_mysql_func(const char * file, ulint line); -/*============================*/ - -#define dict_mutex_enter_for_mysql() \ - dict_mutex_enter_for_mysql_func(__FILE__, __LINE__) - -/********************************************************************//** -Releases the dictionary system mutex for MySQL. */ -UNIV_INTERN -void -dict_mutex_exit_for_mysql(void); -/*===========================*/ - -/** Create a dict_table_t's stats latch or delay for lazy creation. -This function is only called from either single threaded environment -or from a thread that has not shared the table object with other threads. -@param[in,out] table table whose stats latch to create -@param[in] enabled if false then the latch is disabled -and dict_table_stats_lock()/unlock() become noop on this table. */ - -void -dict_table_stats_latch_create( - dict_table_t* table, - bool enabled); - -/** Destroy a dict_table_t's stats latch. -This function is only called from either single threaded environment -or from a thread that has not shared the table object with other threads. -@param[in,out] table table whose stats latch to destroy */ - -void -dict_table_stats_latch_destroy( - dict_table_t* table); - -/**********************************************************************//** -Lock the appropriate latch to protect a given table's statistics. -table->id is used to pick the corresponding latch from a global array of -latches. */ -UNIV_INTERN -void -dict_table_stats_lock( -/*==================*/ - dict_table_t* table, /*!< in: table */ - ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */ -/**********************************************************************//** -Unlock the latch that has been locked by dict_table_stats_lock() */ -UNIV_INTERN -void -dict_table_stats_unlock( -/*====================*/ - dict_table_t* table, /*!< in: table */ - ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */ -/********************************************************************//** -Checks if the database name in two table names is the same. -@return TRUE if same db name */ -UNIV_INTERN -ibool -dict_tables_have_same_db( -/*=====================*/ - const char* name1, /*!< in: table name in the form - dbname '/' tablename */ - const char* name2) /*!< in: table name in the form - dbname '/' tablename */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Removes an index from the cache */ -UNIV_INTERN -void -dict_index_remove_from_cache( -/*=========================*/ - dict_table_t* table, /*!< in/out: table */ - dict_index_t* index) /*!< in, own: index */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Get index by name -@return index, NULL if does not exist */ -UNIV_INTERN -dict_index_t* -dict_table_get_index_on_name( -/*=========================*/ - dict_table_t* table, /*!< in: table */ - const char* name) /*!< in: name of the index to find */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Looks for an index with the given id given a table instance. -@return index or NULL */ -UNIV_INTERN -dict_index_t* -dict_table_find_index_on_id( -/*========================*/ - const dict_table_t* table, /*!< in: table instance */ - index_id_t id) /*!< in: index id */ - __attribute__((nonnull, warn_unused_result)); -/**********************************************************************//** -In case there is more than one index with the same name return the index -with the min(id). -@return index, NULL if does not exist */ -UNIV_INTERN -dict_index_t* -dict_table_get_index_on_name_and_min_id( -/*====================================*/ - dict_table_t* table, /*!< in: table */ - const char* name) /*!< in: name of the index to find */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************** -Check whether a column exists in an FTS index. */ -UNIV_INLINE -ulint -dict_table_is_fts_column( -/*=====================*/ - /* out: ULINT_UNDEFINED if no match else - the offset within the vector */ - ib_vector_t* indexes,/* in: vector containing only FTS indexes */ - ulint col_no) /* in: col number to search for */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Move a table to the non LRU end of the LRU list. */ -UNIV_INTERN -void -dict_table_move_from_lru_to_non_lru( -/*================================*/ - dict_table_t* table) /*!< in: table to move from LRU to non-LRU */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Move a table to the LRU list from the non-LRU list. */ -UNIV_INTERN -void -dict_table_move_from_non_lru_to_lru( -/*================================*/ - dict_table_t* table) /*!< in: table to move from non-LRU to LRU */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Move to the most recently used segment of the LRU list. */ -UNIV_INTERN -void -dict_move_to_mru( -/*=============*/ - dict_table_t* table) /*!< in: table to move to MRU */ - MY_ATTRIBUTE((nonnull)); - -/** Maximum number of columns in a foreign key constraint. Please Note MySQL -has a much lower limit on the number of columns allowed in a foreign key -constraint */ -#define MAX_NUM_FK_COLUMNS 500 - -/* Buffers for storing detailed information about the latest foreign key -and unique key errors */ -extern FILE* dict_foreign_err_file; -extern ib_mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */ - -/** the dictionary system */ -extern dict_sys_t* dict_sys; -/** the data dictionary rw-latch protecting dict_sys */ -extern rw_lock_t dict_operation_lock; - -typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t; - -/* Dictionary system struct */ -struct dict_sys_t{ - ib_prio_mutex_t mutex; /*!< mutex protecting the data - dictionary; protects also the - disk-based dictionary system tables; - this mutex serializes CREATE TABLE - and DROP TABLE, as well as reading - the dictionary data for a table from - system tables */ - row_id_t row_id; /*!< the next row id to assign; - NOTE that at a checkpoint this - must be written to the dict system - header and flushed to a file; in - recovery this must be derived from - the log records */ - hash_table_t* table_hash; /*!< hash table of the tables, based - on name */ - hash_table_t* table_id_hash; /*!< hash table of the tables, based - on id */ - ulint size; /*!< varying space in bytes occupied - by the data dictionary table and - index objects */ - dict_table_t* sys_tables; /*!< SYS_TABLES table */ - dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ - dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ - dict_table_t* sys_fields; /*!< SYS_FIELDS table */ - - /*=============================*/ - UT_LIST_BASE_NODE_T(dict_table_t) - table_LRU; /*!< List of tables that can be evicted - from the cache */ - UT_LIST_BASE_NODE_T(dict_table_t) - table_non_LRU; /*!< List of tables that can't be - evicted from the cache */ - autoinc_map_t* autoinc_map; /*!< Map to store table id and autoinc - when table is evicted */ -}; -#endif /* !UNIV_HOTBACKUP */ - -/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */ -extern dict_index_t* dict_ind_redundant; -/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */ -extern dict_index_t* dict_ind_compact; - -/**********************************************************************//** -Inits dict_ind_redundant and dict_ind_compact. */ -UNIV_INTERN -void -dict_ind_init(void); -/*===============*/ - -/* Auxiliary structs for checking a table definition @{ */ - -/* This struct is used to specify the name and type that a column must -have when checking a table's schema. */ -struct dict_col_meta_t { - const char* name; /* column name */ - ulint mtype; /* required column main type */ - ulint prtype_mask; /* required column precise type mask; - if this is non-zero then all the - bits it has set must also be set - in the column's prtype */ - ulint len; /* required column length */ -}; - -/* This struct is used for checking whether a given table exists and -whether it has a predefined schema (number of columns and columns names -and types) */ -struct dict_table_schema_t { - const char* table_name; /* the name of the table whose - structure we are checking */ - ulint n_cols; /* the number of columns the - table must have */ - dict_col_meta_t* columns; /* metadata for the columns; - this array has n_cols - elements */ - ulint n_foreign; /* number of foreign keys this - table has, pointing to other - tables (where this table is - FK child) */ - ulint n_referenced; /* number of foreign keys other - tables have, pointing to this - table (where this table is - parent) */ -}; -/* @} */ - -/*********************************************************************//** -Checks whether a table exists and whether it has the given structure. -The table must have the same number of columns with the same names and -types. The order of the columns does not matter. -The caller must own the dictionary mutex. -dict_table_schema_check() @{ -@return DB_SUCCESS if the table exists and contains the necessary columns */ -UNIV_INTERN -dberr_t -dict_table_schema_check( -/*====================*/ - dict_table_schema_t* req_schema, /*!< in/out: required table - schema */ - char* errstr, /*!< out: human readable error - message if != DB_SUCCESS and - != DB_TABLE_NOT_FOUND is - returned */ - size_t errstr_sz) /*!< in: errstr size */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/* @} */ - -/*********************************************************************//** -Converts a database and table name from filesystem encoding -(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two -strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be -at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */ -UNIV_INTERN -void -dict_fs2utf8( -/*=========*/ - const char* db_and_table, /*!< in: database and table names, - e.g. d@i1b/a@q1b@1Kc */ - char* db_utf8, /*!< out: database name, e.g. dцb */ - size_t db_utf8_size, /*!< in: dbname_utf8 size */ - char* table_utf8, /*!< out: table name, e.g. aюbØc */ - size_t table_utf8_size)/*!< in: table_utf8 size */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Closes the data dictionary module. */ -UNIV_INTERN -void -dict_close(void); -/*============*/ -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Check whether the index is corrupted. -@return nonzero for corrupted index, zero for valid indexes */ -UNIV_INLINE -ulint -dict_index_is_corrupted( -/*====================*/ - const dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((warn_unused_result)); - -#endif /* !UNIV_HOTBACKUP */ -/**********************************************************************//** -Flags an index and table corrupted both in the data dictionary cache -and in the system table SYS_INDEXES. */ -UNIV_INTERN -void -dict_set_corrupted( -/*===============*/ - dict_index_t* index, /*!< in/out: index */ - trx_t* trx, /*!< in/out: transaction */ - const char* ctx) /*!< in: context */ - UNIV_COLD; - -/**********************************************************************//** -Flags an index corrupted in the data dictionary cache only. This -is used mostly to mark a corrupted index when index's own dictionary -is corrupted, and we force to load such index for repair purpose */ -UNIV_INTERN -void -dict_set_corrupted_index_cache_only( -/*================================*/ - dict_index_t* index, /*!< in/out: index */ - dict_table_t* table); /*!< in/out: table */ - -/**********************************************************************//** -Flags a table with specified space_id corrupted in the table dictionary -cache. -@return TRUE if successful */ -UNIV_INTERN -ibool -dict_set_corrupted_by_space( -/*========================*/ - ulint space_id); /*!< in: space ID */ - -/**********************************************************************//** -Flags a table with specified space_id encrypted in the data dictionary -cache -@param[in] space_id Tablespace id */ -UNIV_INTERN -void -dict_set_encrypted_by_space( - ulint space_id); - -/********************************************************************//** -Validate the table flags. -@return true if valid. */ -UNIV_INLINE -bool -dict_tf_is_valid( -/*=============*/ - ulint flags) /*!< in: table flags */ - MY_ATTRIBUTE((warn_unused_result)); - -/********************************************************************//** -Check if the tablespace for the table has been discarded. -@return true if the tablespace has been discarded. */ -UNIV_INLINE -bool -dict_table_is_discarded( -/*====================*/ - const dict_table_t* table) /*!< in: table to check */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -/********************************************************************//** -Check if it is a temporary table. -@return true if temporary table flag is set. */ -UNIV_INLINE -bool -dict_table_is_temporary( -/*====================*/ - const dict_table_t* table) /*!< in: table to check */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -This function should be called whenever a page is successfully -compressed. Updates the compression padding information. */ -UNIV_INTERN -void -dict_index_zip_success( -/*===================*/ - dict_index_t* index) /*!< in/out: index to be updated. */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -This function should be called whenever a page compression attempt -fails. Updates the compression padding information. */ -UNIV_INTERN -void -dict_index_zip_failure( -/*===================*/ - dict_index_t* index) /*!< in/out: index to be updated. */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Return the optimal page size, for which page will likely compress. -@return page size beyond which page may not compress*/ -UNIV_INTERN -ulint -dict_index_zip_pad_optimal_page_size( -/*=================================*/ - dict_index_t* index) /*!< in: index for which page size - is requested */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************//** -Convert table flag to row format string. -@return row format name */ -UNIV_INTERN -const char* -dict_tf_to_row_format_string( -/*=========================*/ - ulint table_flag); /*!< in: row format setting */ -/*****************************************************************//** -Get index by first field of the index -@return index which is having first field matches -with the field present in field_index position of table */ -UNIV_INLINE -dict_index_t* -dict_table_get_index_on_first_col( -/*==============================*/ - const dict_table_t* table, /*!< in: table */ - ulint col_index); /*!< in: position of column - in table */ - -#endif /* !UNIV_HOTBACKUP */ - -#ifndef UNIV_NONINL -#include "dict0dict.ic" -#endif - -#endif diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic deleted file mode 100644 index f68d4e176da..00000000000 --- a/storage/xtradb/include/dict0dict.ic +++ /dev/null @@ -1,1588 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/dict0dict.ic -Data dictionary system - -Created 1/8/1996 Heikki Tuuri -***********************************************************************/ - -#include "data0type.h" -#ifndef UNIV_HOTBACKUP -#include "dict0load.h" -#include "rem0types.h" -#include "fsp0fsp.h" -#include "srv0srv.h" -#include "sync0rw.h" /* RW_S_LATCH */ - -/*********************************************************************//** -Gets the minimum number of bytes per character. -@return minimum multi-byte char size, in bytes */ -UNIV_INLINE -ulint -dict_col_get_mbminlen( -/*==================*/ - const dict_col_t* col) /*!< in: column */ -{ - return(DATA_MBMINLEN(col->mbminmaxlen)); -} -/*********************************************************************//** -Gets the maximum number of bytes per character. -@return maximum multi-byte char size, in bytes */ -UNIV_INLINE -ulint -dict_col_get_mbmaxlen( -/*==================*/ - const dict_col_t* col) /*!< in: column */ -{ - return(DATA_MBMAXLEN(col->mbminmaxlen)); -} -/*********************************************************************//** -Sets the minimum and maximum number of bytes per character. */ -UNIV_INLINE -void -dict_col_set_mbminmaxlen( -/*=====================*/ - dict_col_t* col, /*!< in/out: column */ - ulint mbminlen, /*!< in: minimum multi-byte - character size, in bytes */ - ulint mbmaxlen) /*!< in: minimum multi-byte - character size, in bytes */ -{ - ut_ad(mbminlen < DATA_MBMAX); - ut_ad(mbmaxlen < DATA_MBMAX); - ut_ad(mbminlen <= mbmaxlen); - - col->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen); -} -/*********************************************************************//** -Gets the column data type. */ -UNIV_INLINE -void -dict_col_copy_type( -/*===============*/ - const dict_col_t* col, /*!< in: column */ - dtype_t* type) /*!< out: data type */ -{ - ut_ad(col != NULL); - ut_ad(type != NULL); - - type->mtype = col->mtype; - type->prtype = col->prtype; - type->len = col->len; - type->mbminmaxlen = col->mbminmaxlen; -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Assert that a column and a data type match. -@return TRUE */ -UNIV_INLINE -ibool -dict_col_type_assert_equal( -/*=======================*/ - const dict_col_t* col, /*!< in: column */ - const dtype_t* type) /*!< in: data type */ -{ - ut_ad(col); - ut_ad(type); - - ut_ad(col->mtype == type->mtype); - ut_ad(col->prtype == type->prtype); - //ut_ad(col->len == type->len); -# ifndef UNIV_HOTBACKUP - ut_ad(col->mbminmaxlen == type->mbminmaxlen); -# endif /* !UNIV_HOTBACKUP */ - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Returns the minimum size of the column. -@return minimum size */ -UNIV_INLINE -ulint -dict_col_get_min_size( -/*==================*/ - const dict_col_t* col) /*!< in: column */ -{ - return(dtype_get_min_size_low(col->mtype, col->prtype, col->len, - col->mbminmaxlen)); -} -/***********************************************************************//** -Returns the maximum size of the column. -@return maximum size */ -UNIV_INLINE -ulint -dict_col_get_max_size( -/*==================*/ - const dict_col_t* col) /*!< in: column */ -{ - return(dtype_get_max_size_low(col->mtype, col->len)); -} -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************************//** -Returns the size of a fixed size column, 0 if not a fixed size column. -@return fixed size, or 0 */ -UNIV_INLINE -ulint -dict_col_get_fixed_size( -/*====================*/ - const dict_col_t* col, /*!< in: column */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ -{ - return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len, - col->mbminmaxlen, comp)); -} -/***********************************************************************//** -Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column. -For fixed length types it is the fixed length of the type, otherwise 0. -@return SQL null storage size in ROW_FORMAT=REDUNDANT */ -UNIV_INLINE -ulint -dict_col_get_sql_null_size( -/*=======================*/ - const dict_col_t* col, /*!< in: column */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ -{ - return(dict_col_get_fixed_size(col, comp)); -} - -/*********************************************************************//** -Gets the column number. -@return col->ind, table column position (starting from 0) */ -UNIV_INLINE -ulint -dict_col_get_no( -/*============*/ - const dict_col_t* col) /*!< in: column */ -{ - ut_ad(col); - - return(col->ind); -} - -/*********************************************************************//** -Gets the column position in the clustered index. */ -UNIV_INLINE -ulint -dict_col_get_clust_pos( -/*===================*/ - const dict_col_t* col, /*!< in: table column */ - const dict_index_t* clust_index) /*!< in: clustered index */ -{ - ulint i; - - ut_ad(col); - ut_ad(clust_index); - ut_ad(dict_index_is_clust(clust_index)); - - for (i = 0; i < clust_index->n_def; i++) { - const dict_field_t* field = &clust_index->fields[i]; - - if (!field->prefix_len && field->col == col) { - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -#ifndef UNIV_HOTBACKUP -#ifdef UNIV_DEBUG -/********************************************************************//** -Gets the first index on the table (the clustered index). -@return index, NULL if none exists */ -UNIV_INLINE -dict_index_t* -dict_table_get_first_index( -/*=======================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes)); -} - -/********************************************************************//** -Gets the last index on the table. -@return index, NULL if none exists */ -UNIV_INLINE -dict_index_t* -dict_table_get_last_index( -/*=======================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(UT_LIST_GET_LAST((const_cast<dict_table_t*>(table)) - ->indexes)); -} - -/********************************************************************//** -Gets the next index on the table. -@return index, NULL if none left */ -UNIV_INLINE -dict_index_t* -dict_table_get_next_index( -/*======================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index)); -} -#endif /* UNIV_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************************//** -Check whether the index is the clustered index. -@return nonzero for clustered index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_clust( -/*================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->type & DICT_CLUSTERED); -} -/********************************************************************//** -Check whether the index is unique. -@return nonzero for unique index, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_unique( -/*=================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->type & DICT_UNIQUE); -} - -/********************************************************************//** -Check whether the index is the insert buffer tree. -@return nonzero for insert buffer, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_ibuf( -/*===============*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->type & DICT_IBUF); -} - -/********************************************************************//** -Check whether the index is an universal index tree. -@return nonzero for universal tree, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_univ( -/*===============*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->type & DICT_UNIVERSAL); -} - -/********************************************************************//** -Check whether the index is a secondary index or the insert buffer tree. -@return nonzero for insert buffer, zero for other indexes */ -UNIV_INLINE -ulint -dict_index_is_sec_or_ibuf( -/*======================*/ - const dict_index_t* index) /*!< in: index */ -{ - ulint type; - - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - type = index->type; - - return(!(type & DICT_CLUSTERED) || (type & DICT_IBUF)); -} - -/********************************************************************//** -Gets the number of user-defined columns in a table in the dictionary -cache. -@return number of user-defined (e.g., not ROW_ID) columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_user_cols( -/*=======================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(table->n_cols - DATA_N_SYS_COLS); -} - -/********************************************************************//** -Gets the number of system columns in a table in the dictionary cache. -@return number of system (e.g., ROW_ID) columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_sys_cols( -/*======================*/ - const dict_table_t* table MY_ATTRIBUTE((unused))) /*!< in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(table->cached); - - return(DATA_N_SYS_COLS); -} - -/********************************************************************//** -Gets the number of all columns (also system) in a table in the dictionary -cache. -@return number of columns of a table */ -UNIV_INLINE -ulint -dict_table_get_n_cols( -/*==================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(table->n_cols); -} - -/********************************************************************//** -Gets the approximately estimated number of rows in the table. -@return estimated number of rows */ -UNIV_INLINE -ib_uint64_t -dict_table_get_n_rows( -/*==================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table->stat_initialized); - - return(table->stat_n_rows); -} - -/********************************************************************//** -Increment the number of rows in the table by one. -Notice that this operation is not protected by any latch, the number is -approximate. */ -UNIV_INLINE -void -dict_table_n_rows_inc( -/*==================*/ - dict_table_t* table) /*!< in/out: table */ -{ - if (table->stat_initialized) { - ib_uint64_t n_rows = table->stat_n_rows; - if (n_rows < 0xFFFFFFFFFFFFFFFFULL) { - table->stat_n_rows = n_rows + 1; - } - } -} - -/********************************************************************//** -Decrement the number of rows in the table by one. -Notice that this operation is not protected by any latch, the number is -approximate. */ -UNIV_INLINE -void -dict_table_n_rows_dec( -/*==================*/ - dict_table_t* table) /*!< in/out: table */ -{ - if (table->stat_initialized) { - ib_uint64_t n_rows = table->stat_n_rows; - if (n_rows > 0) { - table->stat_n_rows = n_rows - 1; - } - } -} - -#ifdef UNIV_DEBUG -/********************************************************************//** -Gets the nth column of a table. -@return pointer to column object */ -UNIV_INLINE -dict_col_t* -dict_table_get_nth_col( -/*===================*/ - const dict_table_t* table, /*!< in: table */ - ulint pos) /*!< in: position of column */ -{ - ut_ad(table); - ut_ad(pos < table->n_def); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return((dict_col_t*) (table->cols) + pos); -} - -/********************************************************************//** -Gets the given system column of a table. -@return pointer to column object */ -UNIV_INLINE -dict_col_t* -dict_table_get_sys_col( -/*===================*/ - const dict_table_t* table, /*!< in: table */ - ulint sys) /*!< in: DATA_ROW_ID, ... */ -{ - dict_col_t* col; - - ut_ad(table); - ut_ad(sys < DATA_N_SYS_COLS); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - col = dict_table_get_nth_col(table, table->n_cols - - DATA_N_SYS_COLS + sys); - ut_ad(col->mtype == DATA_SYS); - ut_ad(col->prtype == (sys | DATA_NOT_NULL)); - - return(col); -} -#endif /* UNIV_DEBUG */ - -/********************************************************************//** -Gets the given system column number of a table. -@return column number */ -UNIV_INLINE -ulint -dict_table_get_sys_col_no( -/*======================*/ - const dict_table_t* table, /*!< in: table */ - ulint sys) /*!< in: DATA_ROW_ID, ... */ -{ - ut_ad(table); - ut_ad(sys < DATA_N_SYS_COLS); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(table->n_cols - DATA_N_SYS_COLS + sys); -} - -/********************************************************************//** -Check whether the table uses the compact page format. -@return TRUE if table uses the compact page format */ -UNIV_INLINE -ibool -dict_table_is_comp( -/*===============*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table); - -#if DICT_TF_COMPACT != 1 -#error "DICT_TF_COMPACT must be 1" -#endif - - return(table->flags & DICT_TF_COMPACT); -} - -/************************************************************************ -Check if the table has an FTS index. */ -UNIV_INLINE -ibool -dict_table_has_fts_index( -/*=====================*/ - /* out: TRUE if table has an FTS index */ - dict_table_t* table) /* in: table */ -{ - ut_ad(table); - - return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)); -} - -/********************************************************************//** -Validate the table flags. -@return true if valid. */ -UNIV_INLINE -bool -dict_tf_is_valid( -/*=============*/ - ulint flags) /*!< in: table flags */ -{ - ulint compact = DICT_TF_GET_COMPACT(flags); - ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags); - ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags); - ulint unused = DICT_TF_GET_UNUSED(flags); - ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags); - ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags); - ulint data_dir = DICT_TF_HAS_DATA_DIR(flags); - ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags); - - /* Make sure there are no bits that we do not know about. */ - if (unused != 0) { - fprintf(stderr, - "InnoDB: Error: table unused flags are " ULINTPF - " in the data dictionary and are corrupted\n" - "InnoDB: Error: data dictionary flags are\n" - "InnoDB: compact " ULINTPF " atomic_blobs " ULINTPF - "\nInnoDB: unused " ULINTPF " data_dir " ULINTPF - " zip_ssize " ULINTPF - "\nInnoDB: page_compression " ULINTPF - " page_compression_level " ULINTPF - "\nInnoDB: atomic_writes " ULINTPF "\n", - unused, - compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes - ); - - return(false); - - } else if (atomic_blobs) { - /* Barracuda row formats COMPRESSED and DYNAMIC build on - the page structure introduced for the COMPACT row format - by allowing keys in secondary indexes to be made from - data stored off-page in the clustered index. */ - - if (!compact) { - fprintf(stderr, - "InnoDB: Error: table compact flags are " - ULINTPF - " in the data dictionary and are corrupted\n" - "InnoDB: Error: data dictionary flags are\n" - "InnoDB: compact " ULINTPF - " atomic_blobs " ULINTPF "\n" - "InnoDB: unused " ULINTPF - " data_dir " ULINTPF " zip_ssize " ULINTPF - "\nInnoDB: page_compression " ULINTPF - " page_compression_level " ULINTPF - "\nInnoDB: atomic_writes " ULINTPF "\n", - compact, compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes - ); - return(false); - } - - } else if (zip_ssize) { - - /* Antelope does not support COMPRESSED row format. */ - fprintf(stderr, - "InnoDB: Error: table flags are " ULINTPF - " in the data dictionary and are corrupted\n" - "InnoDB: Error: data dictionary flags are\n" - "InnoDB: compact " ULINTPF " atomic_blobs " ULINTPF - "\nInnoDB: unused " ULINTPF " data_dir " ULINTPF - " zip_ssize " ULINTPF - "\nInnoDB: page_compression " ULINTPF - " page_compression_level " ULINTPF - "\nInnoDB: atomic_writes " ULINTPF "\n", - flags, compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes - ); - return(false); - } - - if (zip_ssize) { - - /* COMPRESSED row format must have compact and atomic_blobs - bits set and validate the number is within allowed range. */ - - if (!compact - || !atomic_blobs - || zip_ssize > PAGE_ZIP_SSIZE_MAX) { - - fprintf(stderr, - "InnoDB: Error: table compact flags are " - ULINTPF - " in the data dictionary and are corrupted\n" - "InnoDB: Error: data dictionary flags are\n" - "InnoDB: compact " ULINTPF - " atomic_blobs " ULINTPF "\n" - "InnoDB: unused " ULINTPF - " data_dir " ULINTPF " zip_ssize " ULINTPF - "\nInnoDB: page_compression " ULINTPF - " page_compression_level " ULINTPF - "\nInnoDB: atomic_writes " ULINTPF "\n", - flags, - compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes - - ); - return(false); - } - } - - if (page_compression || page_compression_level) { - /* Page compression format must have compact and - atomic_blobs and page_compression_level requires - page_compression */ - if (!compact - || !page_compression - || !atomic_blobs) { - - fprintf(stderr, - "InnoDB: Error: table flags are " ULINTPF - " in the data dictionary and are corrupted\n" - "InnoDB: Error: data dictionary flags are\n" - "InnoDB: compact " ULINTPF - " atomic_blobs " ULINTPF "\n" - "InnoDB: unused " ULINTPF - " data_dir " ULINTPF " zip_ssize " ULINTPF - "\nInnoDB: page_compression " ULINTPF - " page_compression_level " ULINTPF - "\nInnoDB: atomic_writes " ULINTPF "\n", - flags, compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes - ); - return(false); - } - } - - if (atomic_writes) { - - if(atomic_writes > ATOMIC_WRITES_OFF) { - - fprintf(stderr, - "InnoDB: Error: table flags are " ULINTPF - " in the data dictionary and are corrupted\n" - "InnoDB: Error: data dictionary flags are\n" - "InnoDB: compact " ULINTPF - " atomic_blobs " ULINTPF "\n" - "InnoDB: unused " ULINTPF - " data_dir " ULINTPF " zip_ssize " ULINTPF - "\nInnoDB: page_compression " ULINTPF - " page_compression_level " ULINTPF - "\nInnoDB: atomic_writes " ULINTPF "\n", - flags, compact, atomic_blobs, unused, data_dir, zip_ssize, - page_compression, page_compression_level, atomic_writes - ); - return(false); - } - } - - /* CREATE TABLE ... DATA DIRECTORY is supported for any row format, - so the DATA_DIR flag is compatible with all other table flags. */ - - return(true); -} - -/********************************************************************//** -Validate a SYS_TABLES TYPE field and return it. -@return Same as input after validating it as a SYS_TABLES TYPE field. -If there is an error, return ULINT_UNDEFINED. */ -UNIV_INLINE -ulint -dict_sys_tables_type_validate( -/*==========================*/ - ulint type, /*!< in: SYS_TABLES.TYPE */ - ulint n_cols) /*!< in: SYS_TABLES.N_COLS */ -{ - ulint low_order_bit = DICT_TF_GET_COMPACT(type); - ulint redundant = !(n_cols & DICT_N_COLS_COMPACT); - ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type); - ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type); - ulint unused = DICT_TF_GET_UNUSED(type); - ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(type); - ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type); - ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type); - - /* The low order bit of SYS_TABLES.TYPE is always set to 1. - If the format is UNIV_FORMAT_B or higher, this field is the same - as dict_table_t::flags. Zero is not allowed here. */ - if (!low_order_bit) { - return(ULINT_UNDEFINED); - } - - if (redundant) { - if (zip_ssize || atomic_blobs) { - return(ULINT_UNDEFINED); - } - } - - /* Make sure there are no bits that we do not know about. */ - if (unused) { - return(ULINT_UNDEFINED); - } - - if (atomic_blobs) { - /* Barracuda row formats COMPRESSED and DYNAMIC build on - the page structure introduced for the COMPACT row format - by allowing keys in secondary indexes to be made from - data stored off-page in the clustered index. - - The DICT_N_COLS_COMPACT flag should be in N_COLS, - but we already know that. */ - } else if (zip_ssize) { - /* Antelope does not support COMPRESSED format. */ - return(ULINT_UNDEFINED); - } - - if (zip_ssize) { - /* COMPRESSED row format must have low_order_bit and - atomic_blobs bits set and the DICT_N_COLS_COMPACT flag - should be in N_COLS, but we already know about the - low_order_bit and DICT_N_COLS_COMPACT flags. */ - if (!atomic_blobs) { - return(ULINT_UNDEFINED); - } - - /* Validate that the number is within allowed range. */ - if (zip_ssize > PAGE_ZIP_SSIZE_MAX) { - return(ULINT_UNDEFINED); - } - } - - /* There is nothing to validate for the data_dir field. - CREATE TABLE ... DATA DIRECTORY is supported for any row - format, so the DATA_DIR flag is compatible with any other - table flags. However, it is not used with TEMPORARY tables.*/ - - if (page_compression || page_compression_level) { - /* page compressed row format must have low_order_bit and - atomic_blobs bits set and the DICT_N_COLS_COMPACT flag - should be in N_COLS, but we already know about the - low_order_bit and DICT_N_COLS_COMPACT flags. */ - - if (!atomic_blobs || !page_compression) { - return(ULINT_UNDEFINED); - } - } - - /* Validate that the atomic writes number is within allowed range. */ - if (atomic_writes > ATOMIC_WRITES_OFF) { - return(ULINT_UNDEFINED); - } - - /* Return the validated SYS_TABLES.TYPE. */ - return(type); -} - -/********************************************************************//** -Determine the file format from dict_table_t::flags -The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any -other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set. -@return file format version */ -UNIV_INLINE -rec_format_t -dict_tf_get_rec_format( -/*===================*/ - ulint flags) /*!< in: dict_table_t::flags */ -{ - ut_a(dict_tf_is_valid(flags)); - - if (!DICT_TF_GET_COMPACT(flags)) { - return(REC_FORMAT_REDUNDANT); - } - - if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) { - return(REC_FORMAT_COMPACT); - } - - if (DICT_TF_GET_ZIP_SSIZE(flags)) { - return(REC_FORMAT_COMPRESSED); - } - - return(REC_FORMAT_DYNAMIC); -} - -/********************************************************************//** -Determine the file format from a dict_table_t::flags. -@return file format version */ -UNIV_INLINE -ulint -dict_tf_get_format( -/*===============*/ - ulint flags) /*!< in: dict_table_t::flags */ -{ - if (DICT_TF_HAS_ATOMIC_BLOBS(flags)) { - return(UNIV_FORMAT_B); - } - - return(UNIV_FORMAT_A); -} - -/********************************************************************//** -Determine the file format of a table. -@return file format version */ -UNIV_INLINE -ulint -dict_table_get_format( -/*==================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table); - - return(dict_tf_get_format(table->flags)); -} - -/********************************************************************//** -Set the file format and zip size in a dict_table_t::flags. If zip size -is not needed, it should be 0. */ -UNIV_INLINE -void -dict_tf_set( -/*========*/ - ulint* flags, /*!< in/out: table flags */ - rec_format_t format, /*!< in: file format */ - ulint zip_ssize, /*!< in: zip shift size */ - bool use_data_dir, /*!< in: table uses DATA DIRECTORY - */ - bool page_compressed,/*!< in: table uses page compressed - pages */ - ulint page_compression_level, /*!< in: table page compression - level */ - ulint atomic_writes) /*!< in: table atomic writes setup */ -{ - atomic_writes_t awrites = (atomic_writes_t)atomic_writes; - - switch (format) { - case REC_FORMAT_REDUNDANT: - *flags = 0; - ut_ad(zip_ssize == 0); - break; - case REC_FORMAT_COMPACT: - *flags = DICT_TF_COMPACT; - ut_ad(zip_ssize == 0); - break; - case REC_FORMAT_COMPRESSED: - *flags = DICT_TF_COMPACT - | (1 << DICT_TF_POS_ATOMIC_BLOBS) - | (zip_ssize << DICT_TF_POS_ZIP_SSIZE); - break; - case REC_FORMAT_DYNAMIC: - *flags = DICT_TF_COMPACT - | (1 << DICT_TF_POS_ATOMIC_BLOBS); - ut_ad(zip_ssize == 0); - break; - } - - if (use_data_dir) { - *flags |= (1 << DICT_TF_POS_DATA_DIR); - } - - if (page_compressed) { - *flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS) - | (1 << DICT_TF_POS_PAGE_COMPRESSION) - | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL); - - ut_ad(zip_ssize == 0); - ut_ad(dict_tf_get_page_compression(*flags) == TRUE); - ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level); - } - - *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES); - ut_a(dict_tf_get_atomic_writes(*flags) == awrites); -} - -/********************************************************************//** -Convert a 32 bit integer table flags to the 32 bit integer that is -written into the tablespace header at the offset FSP_SPACE_FLAGS and is -also stored in the fil_space_t::flags field. The following chart shows -the translation of the low order bit. Other bits are the same. -========================= Low order bit ========================== - | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC -dict_table_t::flags | 0 | 1 | 1 | 1 -fil_space_t::flags | 0 | 0 | 1 | 1 -================================================================== -@return tablespace flags (fil_space_t::flags) */ -UNIV_INLINE -ulint -dict_tf_to_fsp_flags( -/*=================*/ - ulint table_flags) /*!< in: dict_table_t::flags */ -{ - ulint fsp_flags; - ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL( - table_flags); - ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags); - - ut_ad((DICT_TF_GET_PAGE_COMPRESSION(table_flags) == 0) - == (page_compression_level == 0)); - - DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure", - return(ULINT_UNDEFINED);); - - /* Adjust bit zero. */ - fsp_flags = DICT_TF_HAS_ATOMIC_BLOBS(table_flags) ? 1 : 0; - - /* ZIP_SSIZE and ATOMIC_BLOBS are at the same position. */ - fsp_flags |= table_flags - & (DICT_TF_MASK_ZIP_SSIZE | DICT_TF_MASK_ATOMIC_BLOBS); - - fsp_flags |= FSP_FLAGS_PAGE_SSIZE(); - - if (page_compression_level) { - fsp_flags |= FSP_FLAGS_MASK_PAGE_COMPRESSION; - } - - ut_a(fsp_flags_is_valid(fsp_flags)); - - if (DICT_TF_HAS_DATA_DIR(table_flags)) { - fsp_flags |= 1U << FSP_FLAGS_MEM_DATA_DIR; - } - - fsp_flags |= atomic_writes << FSP_FLAGS_MEM_ATOMIC_WRITES; - fsp_flags |= page_compression_level << FSP_FLAGS_MEM_COMPRESSION_LEVEL; - - return(fsp_flags); -} - -/********************************************************************//** -Convert a 32 bit integer from SYS_TABLES.TYPE to dict_table_t::flags -The following chart shows the translation of the low order bit. -Other bits are the same. -========================= Low order bit ========================== - | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC -SYS_TABLES.TYPE | 1 | 1 | 1 -dict_table_t::flags | 0 | 1 | 1 -================================================================== -@return ulint containing SYS_TABLES.TYPE */ -UNIV_INLINE -ulint -dict_sys_tables_type_to_tf( -/*=======================*/ - ulint type, /*!< in: SYS_TABLES.TYPE field */ - ulint n_cols) /*!< in: SYS_TABLES.N_COLS field */ -{ - ulint flags; - ulint redundant = !(n_cols & DICT_N_COLS_COMPACT); - - /* Adjust bit zero. */ - flags = redundant ? 0 : 1; - - /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION, - PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */ - flags |= type & (DICT_TF_MASK_ZIP_SSIZE - | DICT_TF_MASK_ATOMIC_BLOBS - | DICT_TF_MASK_DATA_DIR - | DICT_TF_MASK_PAGE_COMPRESSION - | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL - | DICT_TF_MASK_ATOMIC_WRITES - - ); - - return(flags); -} - -/********************************************************************//** -Convert a 32 bit integer table flags to the 32bit integer that is written -to a SYS_TABLES.TYPE field. The following chart shows the translation of -the low order bit. Other bits are the same. -========================= Low order bit ========================== - | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC -dict_table_t::flags | 0 | 1 | 1 -SYS_TABLES.TYPE | 1 | 1 | 1 -================================================================== -@return ulint containing SYS_TABLES.TYPE */ -UNIV_INLINE -ulint -dict_tf_to_sys_tables_type( -/*=======================*/ - ulint flags) /*!< in: dict_table_t::flags */ -{ - ulint type; - - ut_a(dict_tf_is_valid(flags)); - - /* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */ - type = 1; - - /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION, - PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */ - type |= flags & (DICT_TF_MASK_ZIP_SSIZE - | DICT_TF_MASK_ATOMIC_BLOBS - | DICT_TF_MASK_DATA_DIR - | DICT_TF_MASK_PAGE_COMPRESSION - | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL - | DICT_TF_MASK_ATOMIC_WRITES); - - return(type); -} - -/********************************************************************//** -Extract the compressed page size from dict_table_t::flags. -These flags are in memory, so assert that they are valid. -@return compressed page size, or 0 if not compressed */ -UNIV_INLINE -ulint -dict_tf_get_zip_size( -/*=================*/ - ulint flags) /*!< in: flags */ -{ - ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags); - ulint zip_size = (zip_ssize - ? (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize - : 0); - - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - - return(zip_size); -} - -/********************************************************************//** -Check whether the table uses the compressed compact page format. -@return compressed page size, or 0 if not compressed */ -UNIV_INLINE -ulint -dict_table_zip_size( -/*================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table); - - return(dict_tf_get_zip_size(table->flags)); -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Obtain exclusive locks on all index trees of the table. This is to prevent -accessing index trees while InnoDB is updating internal metadata for -operations such as truncate tables. */ -UNIV_INLINE -void -dict_table_x_lock_indexes( -/*======================*/ - dict_table_t* table) /*!< in: table */ -{ - dict_index_t* index; - - ut_a(table); - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* Loop through each index of the table and lock them */ - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - rw_lock_x_lock(dict_index_get_lock(index)); - } -} - -/*********************************************************************//** -Release the exclusive locks on all index tree. */ -UNIV_INLINE -void -dict_table_x_unlock_indexes( -/*========================*/ - dict_table_t* table) /*!< in: table */ -{ - dict_index_t* index; - - ut_a(table); - ut_ad(mutex_own(&(dict_sys->mutex))); - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - rw_lock_x_unlock(dict_index_get_lock(index)); - } -} -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************************//** -Gets the number of fields in the internal representation of an index, -including fields added by the dictionary system. -@return number of fields */ -UNIV_INLINE -ulint -dict_index_get_n_fields( -/*====================*/ - const dict_index_t* index) /*!< in: an internal - representation of index (in - the dictionary cache) */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->n_fields); -} - -/********************************************************************//** -Gets the number of fields in the internal representation of an index -that uniquely determine the position of an index entry in the index, if -we do not take multiversioning into account: in the B-tree use the value -returned by dict_index_get_n_unique_in_tree. -@return number of fields */ -UNIV_INLINE -ulint -dict_index_get_n_unique( -/*====================*/ - const dict_index_t* index) /*!< in: an internal representation - of index (in the dictionary cache) */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); - - return(index->n_uniq); -} - -/********************************************************************//** -Gets the number of fields in the internal representation of an index -which uniquely determine the position of an index entry in the index, if -we also take multiversioning into account. -@return number of fields */ -UNIV_INLINE -ulint -dict_index_get_n_unique_in_tree( -/*============================*/ - const dict_index_t* index) /*!< in: an internal representation - of index (in the dictionary cache) */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); - - if (dict_index_is_clust(index)) { - - return(dict_index_get_n_unique(index)); - } - - return(dict_index_get_n_fields(index)); -} - -/********************************************************************//** -Gets the number of user-defined ordering fields in the index. In the internal -representation of clustered indexes we add the row id to the ordering fields -to make a clustered index unique, but this function returns the number of -fields the user defined in the index as ordering fields. -@return number of fields */ -UNIV_INLINE -ulint -dict_index_get_n_ordering_defined_by_user( -/*======================================*/ - const dict_index_t* index) /*!< in: an internal representation - of index (in the dictionary cache) */ -{ - return(index->n_user_defined_cols); -} - -#ifdef UNIV_DEBUG -/********************************************************************//** -Gets the nth field of an index. -@return pointer to field object */ -UNIV_INLINE -dict_field_t* -dict_index_get_nth_field( -/*=====================*/ - const dict_index_t* index, /*!< in: index */ - ulint pos) /*!< in: position of field */ -{ - ut_ad(index); - ut_ad(pos < index->n_def); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return((dict_field_t*) (index->fields) + pos); -} -#endif /* UNIV_DEBUG */ - -/********************************************************************//** -Returns the position of a system column in an index. -@return position, ULINT_UNDEFINED if not contained */ -UNIV_INLINE -ulint -dict_index_get_sys_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint type) /*!< in: DATA_ROW_ID, ... */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(!dict_index_is_univ(index)); - - if (dict_index_is_clust(index)) { - - return(dict_col_get_clust_pos( - dict_table_get_sys_col(index->table, type), - index)); - } - - return(dict_index_get_nth_col_pos( - index, dict_table_get_sys_col_no(index->table, type), - NULL)); -} - -/*********************************************************************//** -Gets the field column. -@return field->col, pointer to the table column */ -UNIV_INLINE -const dict_col_t* -dict_field_get_col( -/*===============*/ - const dict_field_t* field) /*!< in: index field */ -{ - ut_ad(field); - - return(field->col); -} - -/********************************************************************//** -Gets pointer to the nth column in an index. -@return column */ -UNIV_INLINE -const dict_col_t* -dict_index_get_nth_col( -/*===================*/ - const dict_index_t* index, /*!< in: index */ - ulint pos) /*!< in: position of the field */ -{ - return(dict_field_get_col(dict_index_get_nth_field(index, pos))); -} - -/********************************************************************//** -Gets the column number the nth field in an index. -@return column number */ -UNIV_INLINE -ulint -dict_index_get_nth_col_no( -/*======================*/ - const dict_index_t* index, /*!< in: index */ - ulint pos) /*!< in: position of the field */ -{ - return(dict_col_get_no(dict_index_get_nth_col(index, pos))); -} - -/********************************************************************//** -Looks for column n in an index. -@return position in internal representation of the index; -ULINT_UNDEFINED if not contained */ -UNIV_INLINE -ulint -dict_index_get_nth_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint n, /*!< in: column number */ - ulint* prefix_col_pos) /*!< out: col num if prefix */ -{ - return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE, - prefix_col_pos)); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Returns the minimum data size of an index record. -@return minimum data size in bytes */ -UNIV_INLINE -ulint -dict_index_get_min_size( -/*====================*/ - const dict_index_t* index) /*!< in: index */ -{ - ulint n = dict_index_get_n_fields(index); - ulint size = 0; - - while (n--) { - size += dict_col_get_min_size(dict_index_get_nth_col(index, - n)); - } - - return(size); -} - -/*********************************************************************//** -Gets the space id of the root of the index tree. -@return space id */ -UNIV_INLINE -ulint -dict_index_get_space( -/*=================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->space); -} - -/*********************************************************************//** -Sets the space id of the root of the index tree. */ -UNIV_INLINE -void -dict_index_set_space( -/*=================*/ - dict_index_t* index, /*!< in/out: index */ - ulint space) /*!< in: space id */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - index->space = space; -} - -/*********************************************************************//** -Gets the page number of the root of the index tree. -@return page number */ -UNIV_INLINE -ulint -dict_index_get_page( -/*================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->page); -} - -/*********************************************************************//** -Gets the read-write lock of the index tree. -@return read-write lock */ -UNIV_INLINE -prio_rw_lock_t* -dict_index_get_lock( -/*================*/ - dict_index_t* index) /*!< in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(&(index->lock)); -} - -/********************************************************************//** -Returns free space reserved for future updates of records. This is -relevant only in the case of many consecutive inserts, as updates -which make the records bigger might fragment the index. -@return number of free bytes on page, reserved for updates */ -UNIV_INLINE -ulint -dict_index_get_space_reserve(void) -/*==============================*/ -{ - return(UNIV_PAGE_SIZE / 16); -} - -/********************************************************************//** -Gets the status of online index creation. -@return the status */ -UNIV_INLINE -enum online_index_status -dict_index_get_online_status( -/*=========================*/ - const dict_index_t* index) /*!< in: secondary index */ -{ - enum online_index_status status; - - status = (enum online_index_status) index->online_status; - - /* Without the index->lock protection, the online - status can change from ONLINE_INDEX_CREATION to - ONLINE_INDEX_COMPLETE (or ONLINE_INDEX_ABORTED) in - row_log_apply() once log application is done. So to make - sure the status is ONLINE_INDEX_CREATION or ONLINE_INDEX_COMPLETE - you should always do the recheck after acquiring index->lock */ - -#ifdef UNIV_DEBUG - switch (status) { - case ONLINE_INDEX_COMPLETE: - case ONLINE_INDEX_CREATION: - case ONLINE_INDEX_ABORTED: - case ONLINE_INDEX_ABORTED_DROPPED: - return(status); - } - ut_error; -#endif /* UNIV_DEBUG */ - return(status); -} - -/********************************************************************//** -Sets the status of online index creation. */ -UNIV_INLINE -void -dict_index_set_online_status( -/*=========================*/ - dict_index_t* index, /*!< in/out: index */ - enum online_index_status status) /*!< in: status */ -{ - ut_ad(!(index->type & DICT_FTS)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ -#ifdef UNIV_DEBUG - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_COMPLETE: - case ONLINE_INDEX_CREATION: - break; - case ONLINE_INDEX_ABORTED: - ut_ad(status == ONLINE_INDEX_ABORTED_DROPPED); - break; - case ONLINE_INDEX_ABORTED_DROPPED: - ut_error; - } -#endif /* UNIV_DEBUG */ - - index->online_status = status; - ut_ad(dict_index_get_online_status(index) == status); -} - -/********************************************************************//** -Determines if a secondary index is being or has been created online, -or if the table is being rebuilt online, allowing concurrent modifications -to the table. -@retval true if the index is being or has been built online, or -if this is a clustered index and the table is being or has been rebuilt online -@retval false if the index has been created or the table has been -rebuilt completely */ -UNIV_INLINE -bool -dict_index_is_online_ddl( -/*=====================*/ - const dict_index_t* index) /*!< in: index */ -{ -#ifdef UNIV_DEBUG - if (dict_index_is_clust(index)) { - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_CREATION: - return(true); - case ONLINE_INDEX_COMPLETE: - return(false); - case ONLINE_INDEX_ABORTED: - case ONLINE_INDEX_ABORTED_DROPPED: - break; - } - ut_ad(0); - return(false); - } -#endif /* UNIV_DEBUG */ - - return(UNIV_UNLIKELY(dict_index_get_online_status(index) - != ONLINE_INDEX_COMPLETE)); -} - -/**********************************************************************//** -Check whether a column exists in an FTS index. -@return ULINT_UNDEFINED if no match else the offset within the vector */ -UNIV_INLINE -ulint -dict_table_is_fts_column( -/*=====================*/ - ib_vector_t* indexes,/*!< in: vector containing only FTS indexes */ - ulint col_no) /*!< in: col number to search for */ - -{ - ulint i; - - for (i = 0; i < ib_vector_size(indexes); ++i) { - dict_index_t* index; - - index = (dict_index_t*) ib_vector_getp(indexes, i); - - if (dict_index_contains_col_or_prefix(index, col_no)) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/**********************************************************************//** -Determine bytes of column prefix to be stored in the undo log. Please -note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix -needs to be stored in the undo log. -@return bytes of column prefix to be stored in the undo log */ -UNIV_INLINE -ulint -dict_max_field_len_store_undo( -/*==========================*/ - dict_table_t* table, /*!< in: table */ - const dict_col_t* col) /*!< in: column which index prefix - is based on */ -{ - ulint prefix_len = 0; - - if (dict_table_get_format(table) >= UNIV_FORMAT_B) - { - prefix_len = col->max_prefix - ? col->max_prefix - : DICT_MAX_FIELD_LEN_BY_FORMAT(table); - } - - return(prefix_len); -} - -/********************************************************************//** -Check whether the index is corrupted. -@return nonzero for corrupted index, zero for valid indexes */ -UNIV_INLINE -ulint -dict_index_is_corrupted( -/*====================*/ - const dict_index_t* index) /*!< in: index */ -{ - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return((index->type & DICT_CORRUPT) - || (index->table && index->table->corrupted)); -} - -/********************************************************************//** -Check if the tablespace for the table has been discarded. -@return true if the tablespace has been discarded. */ -UNIV_INLINE -bool -dict_table_is_discarded( -/*====================*/ - const dict_table_t* table) /*!< in: table to check */ -{ - return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED)); -} - -/********************************************************************//** -Check if it is a temporary table. -@return true if temporary table flag is set. */ -UNIV_INLINE -bool -dict_table_is_temporary( -/*====================*/ - const dict_table_t* table) /*!< in: table to check */ -{ - return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)); -} - -/**********************************************************************//** -Get index by first field of the index -@return index which is having first field matches -with the field present in field_index position of table */ -UNIV_INLINE -dict_index_t* -dict_table_get_index_on_first_col( -/*==============================*/ - const dict_table_t* table, /*!< in: table */ - ulint col_index) /*!< in: position of column - in table */ -{ - ut_ad(col_index < table->n_cols); - - dict_col_t* column = dict_table_get_nth_col(table, col_index); - - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; index = dict_table_get_next_index(index)) { - - if (index->fields[0].col == column) { - return(index); - } - } - ut_error; - return(0); -} - -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h deleted file mode 100644 index 1a720de5bb6..00000000000 --- a/storage/xtradb/include/dict0load.h +++ /dev/null @@ -1,430 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0load.h -Loads to the memory cache database object definitions -from dictionary tables - -Created 4/24/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0load_h -#define dict0load_h - -#include "univ.i" -#include "dict0types.h" -#include "trx0types.h" -#include "ut0byte.h" -#include "mem0mem.h" -#include "btr0types.h" - -/** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */ -enum dict_system_id_t { - SYS_TABLES = 0, - SYS_INDEXES, - SYS_COLUMNS, - SYS_FIELDS, - SYS_FOREIGN, - SYS_FOREIGN_COLS, - SYS_TABLESPACES, - SYS_DATAFILES, - - /* This must be last item. Defines the number of system tables. */ - SYS_NUM_SYSTEM_TABLES -}; - -/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */ -enum dict_table_info_t { - DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t - structure with information from - a SYS_TABLES record */ - DICT_TABLE_LOAD_FROM_CACHE = 1 /*!< Check first whether dict_table_t - is in the cache, if so, return it */ -}; - -/** Check type for dict_check_tablespaces_and_store_max_id() */ -enum dict_check_t { - /** No user tablespaces have been opened - (no crash recovery, no transactions recovered). */ - DICT_CHECK_NONE_LOADED = 0, - /** Some user tablespaces may have been opened - (no crash recovery; recovered table locks for transactions). */ - DICT_CHECK_SOME_LOADED, - /** All user tablespaces have been opened (crash recovery). */ - DICT_CHECK_ALL_LOADED -}; - -/********************************************************************//** -In a crash recovery we already have all the tablespace objects created. -This function compares the space id information in the InnoDB data dictionary -to what we already read with fil_load_single_table_tablespaces(). - -In a normal startup, we create the tablespace objects for every table in -InnoDB's data dictionary, if the corresponding .ibd file exists. -We also scan the biggest space id, and store it to fil_system. */ -UNIV_INTERN -void -dict_check_tablespaces_and_store_max_id( -/*====================================*/ - dict_check_t dict_check); /*!< in: how to check */ -/********************************************************************//** -Finds the first table name in the given database. -@return own: table name, NULL if does not exist; the caller must free -the memory in the string! */ -UNIV_INTERN -char* -dict_get_first_table_name_in_db( -/*============================*/ - const char* name); /*!< in: database name which ends to '/' */ - -/********************************************************************//** -Loads a table definition from a SYS_TABLES record to dict_table_t. -Does not load any columns or indexes. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_load_table_low( -/*================*/ - const char* name, /*!< in: table name */ - const rec_t* rec, /*!< in: SYS_TABLES record */ - dict_table_t** table); /*!< out,own: table, or NULL */ -/********************************************************************//** -Loads a table column definition from a SYS_COLUMNS record to -dict_table_t. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_load_column_low( -/*=================*/ - dict_table_t* table, /*!< in/out: table, could be NULL - if we just populate a dict_column_t - struct with information from - a SYS_COLUMNS record */ - mem_heap_t* heap, /*!< in/out: memory heap - for temporary storage */ - dict_col_t* column, /*!< out: dict_column_t to fill, - or NULL if table != NULL */ - table_id_t* table_id, /*!< out: table id */ - const char** col_name, /*!< out: column name */ - const rec_t* rec); /*!< in: SYS_COLUMNS record */ -/********************************************************************//** -Loads an index definition from a SYS_INDEXES record to dict_index_t. -If allocate=TRUE, we will create a dict_index_t structure and fill it -accordingly. If allocated=FALSE, the dict_index_t will be supplied by -the caller and filled with information read from the record. @return -error message, or NULL on success */ -UNIV_INTERN -const char* -dict_load_index_low( -/*================*/ - byte* table_id, /*!< in/out: table id (8 bytes), - an "in" value if allocate=TRUE - and "out" when allocate=FALSE */ - const char* table_name, /*!< in: table name */ - mem_heap_t* heap, /*!< in/out: temporary memory heap */ - const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool allocate, /*!< in: TRUE=allocate *index, - FALSE=fill in a pre-allocated - *index */ - dict_index_t** index); /*!< out,own: index, or NULL */ -/********************************************************************//** -Loads an index field definition from a SYS_FIELDS record to -dict_index_t. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_load_field_low( -/*================*/ - byte* index_id, /*!< in/out: index id (8 bytes) - an "in" value if index != NULL - and "out" if index == NULL */ - dict_index_t* index, /*!< in/out: index, could be NULL - if we just populate a dict_field_t - struct with information from - a SYS_FIELDS record */ - dict_field_t* sys_field, /*!< out: dict_field_t to be - filled */ - ulint* pos, /*!< out: Field position */ - byte* last_index_id, /*!< in: last index id */ - mem_heap_t* heap, /*!< in/out: memory heap - for temporary storage */ - const rec_t* rec); /*!< in: SYS_FIELDS record */ -/********************************************************************//** -Using the table->heap, copy the null-terminated filepath into -table->data_dir_path and put a null byte before the extension. -This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path. -Make this data directory path only if it has not yet been saved. */ -UNIV_INTERN -void -dict_save_data_dir_path( -/*====================*/ - dict_table_t* table, /*!< in/out: table */ - char* filepath); /*!< in: filepath of tablespace */ -/*****************************************************************//** -Make sure the data_file_name is saved in dict_table_t if needed. Try to -read it from the file dictionary first, then from SYS_DATAFILES. */ -UNIV_INTERN -void -dict_get_and_save_data_dir_path( -/*============================*/ - dict_table_t* table, /*!< in/out: table */ - bool dict_mutex_own); /*!< in: true if dict_sys->mutex - is owned already */ -/********************************************************************//** -Loads a table definition and also all its index definitions, and also -the cluster definition if the table is a member in a cluster. Also loads -all foreign key constraints where the foreign key is in the table or where -a foreign key references columns in this table. -@return table, NULL if does not exist; if the table is stored in an -.ibd file, but the file does not exist, then we set the -ibd_file_missing flag TRUE in the table object we return */ -UNIV_INTERN -dict_table_t* -dict_load_table( -/*============*/ - const char* name, /*!< in: table name in the - databasename/tablename format */ - ibool cached, /*!< in: TRUE=add to cache, FALSE=do not */ - dict_err_ignore_t ignore_err); - /*!< in: error to be ignored when loading - table and its indexes' definition */ -/***********************************************************************//** -Loads a table object based on the table id. -@return table; NULL if table does not exist */ -UNIV_INTERN -dict_table_t* -dict_load_table_on_id( -/*==================*/ - table_id_t table_id, /*!< in: table id */ - dict_err_ignore_t ignore_err); /*!< in: errors to ignore - when loading the table */ -/********************************************************************//** -This function is called when the database is booted. -Loads system table index definitions except for the clustered index which -is added to the dictionary cache at booting before calling this function. */ -UNIV_INTERN -void -dict_load_sys_table( -/*================*/ - dict_table_t* table); /*!< in: system table */ -/***********************************************************************//** -Loads foreign key constraints where the table is either the foreign key -holder or where the table is referenced by a foreign key. Adds these -constraints to the data dictionary. Note that we know that the dictionary -cache already contains all constraints where the other relevant table is -already in the dictionary cache. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_load_foreigns( -/*===============*/ - const char* table_name, /*!< in: table name */ - const char** col_names, /*!< in: column names, or NULL - to use table->col_names */ - bool check_recursive,/*!< in: Whether to check - recursive load of tables - chained by FK */ - bool check_charsets, /*!< in: whether to check - charset compatibility */ - dict_err_ignore_t ignore_err) /*!< in: error to be ignored */ - MY_ATTRIBUTE((nonnull(1), warn_unused_result)); -/********************************************************************//** -Prints to the standard output information on all tables found in the data -dictionary system table. */ -UNIV_INTERN -void -dict_print(void); -/*============*/ - -/********************************************************************//** -This function opens a system table, and return the first record. -@return first record of the system table */ -UNIV_INTERN -const rec_t* -dict_startscan_system( -/*==================*/ - btr_pcur_t* pcur, /*!< out: persistent cursor to - the record */ - mtr_t* mtr, /*!< in: the mini-transaction */ - dict_system_id_t system_id); /*!< in: which system table to open */ -/********************************************************************//** -This function get the next system table record as we scan the table. -@return the record if found, NULL if end of scan. */ -UNIV_INTERN -const rec_t* -dict_getnext_system( -/*================*/ - btr_pcur_t* pcur, /*!< in/out: persistent cursor - to the record */ - mtr_t* mtr); /*!< in: the mini-transaction */ -/********************************************************************//** -This function processes one SYS_TABLES record and populate the dict_table_t -struct for the table. Extracted out of dict_print() to be used by -both monitor table output and information schema innodb_sys_tables output. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_tables_rec_and_mtr_commit( -/*=======================================*/ - mem_heap_t* heap, /*!< in: temporary memory heap */ - const rec_t* rec, /*!< in: SYS_TABLES record */ - dict_table_t** table, /*!< out: dict_table_t to fill */ - dict_table_info_t status, /*!< in: status bit controls - options such as whether we shall - look for dict_table_t from cache - first */ - mtr_t* mtr); /*!< in/out: mini-transaction, - will be committed */ -/********************************************************************//** -This function parses a SYS_INDEXES record and populate a dict_index_t -structure with the information from the record. For detail information -about SYS_INDEXES fields, please refer to dict_boot() function. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_indexes_rec( -/*=========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_INDEXES rec */ - dict_index_t* index, /*!< out: dict_index_t to be - filled */ - table_id_t* table_id); /*!< out: table id */ -/********************************************************************//** -This function parses a SYS_COLUMNS record and populate a dict_column_t -structure with the information from the record. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_columns_rec( -/*=========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_COLUMNS rec */ - dict_col_t* column, /*!< out: dict_col_t to be filled */ - table_id_t* table_id, /*!< out: table id */ - const char** col_name); /*!< out: column name */ -/********************************************************************//** -This function parses a SYS_FIELDS record and populate a dict_field_t -structure with the information from the record. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_fields_rec( -/*========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_FIELDS rec */ - dict_field_t* sys_field, /*!< out: dict_field_t to be - filled */ - ulint* pos, /*!< out: Field position */ - index_id_t* index_id, /*!< out: current index id */ - index_id_t last_id); /*!< in: previous index id */ -/********************************************************************//** -This function parses a SYS_FOREIGN record and populate a dict_foreign_t -structure with the information from the record. For detail information -about SYS_FOREIGN fields, please refer to dict_load_foreign() function -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_foreign_rec( -/*=========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_FOREIGN rec */ - dict_foreign_t* foreign); /*!< out: dict_foreign_t to be - filled */ -/********************************************************************//** -This function parses a SYS_FOREIGN_COLS record and extract necessary -information from the record and return to caller. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_foreign_col_rec( -/*=============================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_FOREIGN_COLS rec */ - const char** name, /*!< out: foreign key constraint name */ - const char** for_col_name, /*!< out: referencing column name */ - const char** ref_col_name, /*!< out: referenced column name - in referenced table */ - ulint* pos); /*!< out: column position */ -/********************************************************************//** -This function parses a SYS_TABLESPACES record, extracts necessary -information from the record and returns to caller. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_tablespaces( -/*=========================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */ - ulint* space, /*!< out: pace id */ - const char** name, /*!< out: tablespace name */ - ulint* flags); /*!< out: tablespace flags */ -/********************************************************************//** -This function parses a SYS_DATAFILES record, extracts necessary -information from the record and returns to caller. -@return error message, or NULL on success */ -UNIV_INTERN -const char* -dict_process_sys_datafiles( -/*=======================*/ - mem_heap_t* heap, /*!< in/out: heap memory */ - const rec_t* rec, /*!< in: current SYS_DATAFILES rec */ - ulint* space, /*!< out: pace id */ - const char** path); /*!< out: datafile path */ - -/********************************************************************//** -Get the filepath for a spaceid from SYS_DATAFILES. This function provides -a temporary heap which is used for the table lookup, but not for the path. -The caller must free the memory for the path returned. This function can -return NULL if the space ID is not found in SYS_DATAFILES, then the caller -will assume that the ibd file is in the normal datadir. -@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for -the given space ID. NULL if space ID is zero or not found. */ -UNIV_INTERN -char* -dict_get_first_path( -/*================*/ - ulint space, /*!< in: space id */ - const char* name); /*!< in: tablespace name */ -/********************************************************************//** -Update the record for space_id in SYS_TABLESPACES to this filepath. -@return DB_SUCCESS if OK, dberr_t if the insert failed */ -UNIV_INTERN -dberr_t -dict_update_filepath( -/*=================*/ - ulint space_id, /*!< in: space id */ - const char* filepath); /*!< in: filepath */ -/********************************************************************//** -Insert records into SYS_TABLESPACES and SYS_DATAFILES. -@return DB_SUCCESS if OK, dberr_t if the insert failed */ -UNIV_INTERN -dberr_t -dict_insert_tablespace_and_filepath( -/*================================*/ - ulint space, /*!< in: space id */ - const char* name, /*!< in: talespace name */ - const char* filepath, /*!< in: filepath */ - ulint fsp_flags); /*!< in: tablespace flags */ - -#ifndef UNIV_NONINL -#include "dict0load.ic" -#endif - -#endif diff --git a/storage/xtradb/include/dict0load.ic b/storage/xtradb/include/dict0load.ic deleted file mode 100644 index 2c0f1ff38a5..00000000000 --- a/storage/xtradb/include/dict0load.ic +++ /dev/null @@ -1,26 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0load.ic -Loads to the memory cache database object definitions -from dictionary tables - -Created 4/24/1996 Heikki Tuuri -*******************************************************/ - diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h deleted file mode 100644 index 2a4422fc18b..00000000000 --- a/storage/xtradb/include/dict0mem.h +++ /dev/null @@ -1,1522 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0mem.h -Data dictionary memory object creation - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0mem_h -#define dict0mem_h - -#include "univ.i" - -#ifndef UNIV_INNOCHECKSUM - -#include "dict0types.h" -#include "data0type.h" -#include "mem0mem.h" -#include "row0types.h" -#include "rem0types.h" -#include "btr0types.h" -#ifndef UNIV_HOTBACKUP -# include "lock0types.h" -# include "que0types.h" -# include "sync0rw.h" -#endif /* !UNIV_HOTBACKUP */ -#include "ut0mem.h" -#include "ut0lst.h" -#include "ut0rnd.h" -#include "ut0byte.h" -#include "hash0hash.h" -#include "trx0types.h" -#include "fts0fts.h" -#include "os0once.h" -#include "fil0fil.h" -#include <my_crypt.h> -#include "fil0crypt.h" -#include <set> -#include <algorithm> -#include <iterator> -#include <ostream> - -/* Forward declaration. */ -struct ib_rbt_t; - -/** Type flags of an index: OR'ing of the flags is allowed to define a -combination of types */ -/* @{ */ -#define DICT_CLUSTERED 1 /*!< clustered index */ -#define DICT_UNIQUE 2 /*!< unique index */ -#define DICT_UNIVERSAL 4 /*!< index which can contain records from any - other index */ -#define DICT_IBUF 8 /*!< insert buffer tree */ -#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag - in SYS_INDEXES.TYPE */ -#define DICT_FTS 32 /* FTS index; can't be combined with the - other flags */ - -#define DICT_IT_BITS 6 /*!< number of bits used for - SYS_INDEXES.TYPE */ -/* @} */ - -#if 0 /* not implemented, retained for history */ -/** Types for a table object */ -#define DICT_TABLE_ORDINARY 1 /*!< ordinary table */ -#define DICT_TABLE_CLUSTER_MEMBER 2 -#define DICT_TABLE_CLUSTER 3 /* this means that the table is - really a cluster definition */ -#endif - -/* Table and tablespace flags are generally not used for the Antelope file -format except for the low order bit, which is used differently depending on -where the flags are stored. - -==================== Low order flags bit ========================= - | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC -SYS_TABLES.TYPE | 1 | 1 | 1 -dict_table_t::flags | 0 | 1 | 1 -FSP_SPACE_FLAGS | 0 | 0 | 1 -fil_space_t::flags | 0 | 0 | 1 - -Before the 5.1 plugin, SYS_TABLES.TYPE was always DICT_TABLE_ORDINARY (1) -and the tablespace flags field was always 0. In the 5.1 plugin, these fields -were repurposed to identify compressed and dynamic row formats. - -The following types and constants describe the flags found in dict_table_t -and SYS_TABLES.TYPE. Similar flags found in fil_space_t and FSP_SPACE_FLAGS -are described in fsp0fsp.h. */ - -/* @{ */ -/** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */ -#define DICT_TF_REDUNDANT 0 /*!< Redundant row format. */ -/** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */ -#define DICT_TF_COMPACT 1 /*!< Compact row format. */ - -/** This bitmask is used in SYS_TABLES.N_COLS to set and test whether -the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */ -#define DICT_N_COLS_COMPACT 0x80000000UL - -#endif /* !UNIV_INNOCHECKSUM */ - -/** Width of the COMPACT flag */ -#define DICT_TF_WIDTH_COMPACT 1 -/** Width of the ZIP_SSIZE flag */ -#define DICT_TF_WIDTH_ZIP_SSIZE 4 -/** Width of the ATOMIC_BLOBS flag. The Antelope file formats broke up -BLOB and TEXT fields, storing the first 768 bytes in the clustered index. -Brracuda row formats store the whole blob or text field off-page atomically. -Secondary indexes are created from this external data using row_ext_t -to cache the BLOB prefixes. */ -#define DICT_TF_WIDTH_ATOMIC_BLOBS 1 -/** If a table is created with the MYSQL option DATA DIRECTORY and -innodb-file-per-table, an older engine will not be able to find that table. -This flag prevents older engines from attempting to open the table and -allows InnoDB to update_create_info() accordingly. */ -#define DICT_TF_WIDTH_DATA_DIR 1 - -/** -Width of the page compression flag -*/ -#define DICT_TF_WIDTH_PAGE_COMPRESSION 1 -#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4 - -/** -Width of the page encryption flag -*/ -#define DICT_TF_WIDTH_PAGE_ENCRYPTION 1 -#define DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY 8 - -/** -Width of atomic writes flag -DEFAULT=0, ON = 1, OFF = 2 -*/ -#define DICT_TF_WIDTH_ATOMIC_WRITES 2 - -/** Width of all the currently known table flags */ -#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \ - + DICT_TF_WIDTH_ZIP_SSIZE \ - + DICT_TF_WIDTH_ATOMIC_BLOBS \ - + DICT_TF_WIDTH_DATA_DIR \ - + DICT_TF_WIDTH_PAGE_COMPRESSION \ - + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \ - + DICT_TF_WIDTH_ATOMIC_WRITES \ - + DICT_TF_WIDTH_PAGE_ENCRYPTION \ - + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY) - -/** A mask of all the known/used bits in table flags */ -#define DICT_TF_BIT_MASK (~(~0U << DICT_TF_BITS)) - -/** Zero relative shift position of the COMPACT field */ -#define DICT_TF_POS_COMPACT 0 -/** Zero relative shift position of the ZIP_SSIZE field */ -#define DICT_TF_POS_ZIP_SSIZE (DICT_TF_POS_COMPACT \ - + DICT_TF_WIDTH_COMPACT) -/** Zero relative shift position of the ATOMIC_BLOBS field */ -#define DICT_TF_POS_ATOMIC_BLOBS (DICT_TF_POS_ZIP_SSIZE \ - + DICT_TF_WIDTH_ZIP_SSIZE) -/** Zero relative shift position of the DATA_DIR field */ -#define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \ - + DICT_TF_WIDTH_ATOMIC_BLOBS) -/** Zero relative shift position of the PAGE_COMPRESSION field */ -#define DICT_TF_POS_PAGE_COMPRESSION (DICT_TF_POS_DATA_DIR \ - + DICT_TF_WIDTH_DATA_DIR) -/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */ -#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \ - + DICT_TF_WIDTH_PAGE_COMPRESSION) -/** Zero relative shift position of the ATOMIC_WRITES field */ -#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \ - + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL) - -/** Zero relative shift position of the PAGE_ENCRYPTION field */ -#define DICT_TF_POS_PAGE_ENCRYPTION (DICT_TF_POS_ATOMIC_WRITES \ - + DICT_TF_WIDTH_ATOMIC_WRITES) -/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */ -#define DICT_TF_POS_PAGE_ENCRYPTION_KEY (DICT_TF_POS_PAGE_ENCRYPTION \ - + DICT_TF_WIDTH_PAGE_ENCRYPTION) -/** Zero relative shift position of the start of the UNUSED bits */ -#define DICT_TF_POS_UNUSED (DICT_TF_POS_PAGE_ENCRYPTION_KEY \ - + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY) - -/** Bit mask of the COMPACT field */ -#define DICT_TF_MASK_COMPACT \ - ((~(~0U << DICT_TF_WIDTH_COMPACT)) \ - << DICT_TF_POS_COMPACT) -/** Bit mask of the ZIP_SSIZE field */ -#define DICT_TF_MASK_ZIP_SSIZE \ - ((~(~0U << DICT_TF_WIDTH_ZIP_SSIZE)) \ - << DICT_TF_POS_ZIP_SSIZE) -/** Bit mask of the ATOMIC_BLOBS field */ -#define DICT_TF_MASK_ATOMIC_BLOBS \ - ((~(~0U << DICT_TF_WIDTH_ATOMIC_BLOBS)) \ - << DICT_TF_POS_ATOMIC_BLOBS) -/** Bit mask of the DATA_DIR field */ -#define DICT_TF_MASK_DATA_DIR \ - ((~(~0U << DICT_TF_WIDTH_DATA_DIR)) \ - << DICT_TF_POS_DATA_DIR) -/** Bit mask of the PAGE_COMPRESSION field */ -#define DICT_TF_MASK_PAGE_COMPRESSION \ - ((~(~0U << DICT_TF_WIDTH_PAGE_COMPRESSION)) \ - << DICT_TF_POS_PAGE_COMPRESSION) -/** Bit mask of the PAGE_COMPRESSION_LEVEL field */ -#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL \ - ((~(~0U << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \ - << DICT_TF_POS_PAGE_COMPRESSION_LEVEL) -/** Bit mask of the ATOMIC_WRITES field */ -#define DICT_TF_MASK_ATOMIC_WRITES \ - ((~(~0U << DICT_TF_WIDTH_ATOMIC_WRITES)) \ - << DICT_TF_POS_ATOMIC_WRITES) -/** Bit mask of the PAGE_ENCRYPTION field */ -#define DICT_TF_MASK_PAGE_ENCRYPTION \ - ((~(~0U << DICT_TF_WIDTH_PAGE_ENCRYPTION)) \ - << DICT_TF_POS_PAGE_ENCRYPTION) -/** Bit mask of the PAGE_ENCRYPTION_KEY field */ -#define DICT_TF_MASK_PAGE_ENCRYPTION_KEY \ - ((~(~0U << DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)) \ - << DICT_TF_POS_PAGE_ENCRYPTION_KEY) - -/** Return the value of the COMPACT field */ -#define DICT_TF_GET_COMPACT(flags) \ - ((flags & DICT_TF_MASK_COMPACT) \ - >> DICT_TF_POS_COMPACT) -/** Return the value of the ZIP_SSIZE field */ -#define DICT_TF_GET_ZIP_SSIZE(flags) \ - ((flags & DICT_TF_MASK_ZIP_SSIZE) \ - >> DICT_TF_POS_ZIP_SSIZE) -/** Return the value of the ATOMIC_BLOBS field */ -#define DICT_TF_HAS_ATOMIC_BLOBS(flags) \ - ((flags & DICT_TF_MASK_ATOMIC_BLOBS) \ - >> DICT_TF_POS_ATOMIC_BLOBS) -/** Return the value of the DATA_DIR field */ -#define DICT_TF_HAS_DATA_DIR(flags) \ - ((flags & DICT_TF_MASK_DATA_DIR) \ - >> DICT_TF_POS_DATA_DIR) - -/** Return the contents of the PAGE_ENCRYPTION field */ -#define DICT_TF_GET_PAGE_ENCRYPTION(flags) \ - ((flags & DICT_TF_MASK_PAGE_ENCRYPTION) \ - >> DICT_TF_POS_PAGE_ENCRYPTION) -/** Return the contents of the PAGE_ENCRYPTION KEY field */ -#define DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags) \ - ((flags & DICT_TF_MASK_PAGE_ENCRYPTION_KEY) \ - >> DICT_TF_POS_PAGE_ENCRYPTION_KEY) - - -/** Return the contents of the UNUSED bits */ -#define DICT_TF_GET_UNUSED(flags) \ - (flags >> DICT_TF_POS_UNUSED) - -/** Return the value of the PAGE_COMPRESSION field */ -#define DICT_TF_GET_PAGE_COMPRESSION(flags) \ - ((flags & DICT_TF_MASK_PAGE_COMPRESSION) \ - >> DICT_TF_POS_PAGE_COMPRESSION) -/** Return the value of the PAGE_COMPRESSION_LEVEL field */ -#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags) \ - ((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL) \ - >> DICT_TF_POS_PAGE_COMPRESSION_LEVEL) -/** Return the value of the ATOMIC_WRITES field */ -#define DICT_TF_GET_ATOMIC_WRITES(flags) \ - ((flags & DICT_TF_MASK_ATOMIC_WRITES) \ - >> DICT_TF_POS_ATOMIC_WRITES) -/* @} */ - -#ifndef UNIV_INNOCHECKSUM - -/** @brief Table Flags set number 2. - -These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags -will be written as 0. The column may contain garbage for tables -created with old versions of InnoDB that only implemented -ROW_FORMAT=REDUNDANT. InnoDB engines do not check these flags -for unknown bits in order to protect backward incompatibility. */ -/* @{ */ -/** Total number of bits in table->flags2. */ -#define DICT_TF2_BITS 7 -#define DICT_TF2_BIT_MASK ~(~0U << DICT_TF2_BITS) - -/** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */ -#define DICT_TF2_TEMPORARY 1 -/** The table has an internal defined DOC ID column */ -#define DICT_TF2_FTS_HAS_DOC_ID 2 -/** The table has an FTS index */ -#define DICT_TF2_FTS 4 -/** Need to add Doc ID column for FTS index build. -This is a transient bit for index build */ -#define DICT_TF2_FTS_ADD_DOC_ID 8 -/** This bit is used during table creation to indicate that it will -use its own tablespace instead of the system tablespace. */ -#define DICT_TF2_USE_TABLESPACE 16 - -/** Set when we discard/detach the tablespace */ -#define DICT_TF2_DISCARDED 32 - -/** This bit is set if all aux table names (both common tables and -index tables) of a FTS table are in HEX format. */ -#define DICT_TF2_FTS_AUX_HEX_NAME 64 -/* @} */ - -#define DICT_TF2_FLAG_SET(table, flag) \ - (table->flags2 |= (flag)) - -#define DICT_TF2_FLAG_IS_SET(table, flag) \ - (table->flags2 & (flag)) - -#define DICT_TF2_FLAG_UNSET(table, flag) \ - (table->flags2 &= ~(flag)) - -/** Tables could be chained together with Foreign key constraint. When -first load the parent table, we would load all of its descedents. -This could result in rescursive calls and out of stack error eventually. -DICT_FK_MAX_RECURSIVE_LOAD defines the maximum number of recursive loads, -when exceeded, the child table will not be loaded. It will be loaded when -the foreign constraint check needs to be run. */ -#define DICT_FK_MAX_RECURSIVE_LOAD 20 - -/** Similarly, when tables are chained together with foreign key constraints -with on cascading delete/update clause, delete from parent table could -result in recursive cascading calls. This defines the maximum number of -such cascading deletes/updates allowed. When exceeded, the delete from -parent table will fail, and user has to drop excessive foreign constraint -before proceeds. */ -#define FK_MAX_CASCADE_DEL 255 - -/**********************************************************************//** -Creates a table memory object. -@return own: table object */ -UNIV_INTERN -dict_table_t* -dict_mem_table_create( -/*==================*/ - const char* name, /*!< in: table name */ - ulint space, /*!< in: space where the clustered index - of the table is placed */ - ulint n_cols, /*!< in: number of columns */ - ulint flags, /*!< in: table flags */ - ulint flags2); /*!< in: table flags2 */ -/**********************************************************************//** -Determines if a table belongs to a system database -@return true if table belong to a system database */ -UNIV_INTERN -bool -dict_mem_table_is_system( -/*==================*/ - char *name); /*!< in: table name */ -/****************************************************************//** -Free a table memory object. */ -UNIV_INTERN -void -dict_mem_table_free( -/*================*/ - dict_table_t* table); /*!< in: table */ -/**********************************************************************//** -Adds a column definition to a table. */ -UNIV_INTERN -void -dict_mem_table_add_col( -/*===================*/ - dict_table_t* table, /*!< in: table */ - mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */ - const char* name, /*!< in: column name, or NULL */ - ulint mtype, /*!< in: main datatype */ - ulint prtype, /*!< in: precise type */ - ulint len) /*!< in: precision */ - MY_ATTRIBUTE((nonnull(1))); -/**********************************************************************//** -Renames a column of a table in the data dictionary cache. */ -UNIV_INTERN -void -dict_mem_table_col_rename( -/*======================*/ - dict_table_t* table, /*!< in/out: table */ - unsigned nth_col,/*!< in: column index */ - const char* from, /*!< in: old column name */ - const char* to) /*!< in: new column name */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -This function populates a dict_col_t memory structure with -supplied information. */ -UNIV_INTERN -void -dict_mem_fill_column_struct( -/*========================*/ - dict_col_t* column, /*!< out: column struct to be - filled */ - ulint col_pos, /*!< in: column position */ - ulint mtype, /*!< in: main data type */ - ulint prtype, /*!< in: precise type */ - ulint col_len); /*!< in: column length */ -/**********************************************************************//** -This function poplulates a dict_index_t index memory structure with -supplied information. */ -UNIV_INLINE -void -dict_mem_fill_index_struct( -/*=======================*/ - dict_index_t* index, /*!< out: index to be filled */ - mem_heap_t* heap, /*!< in: memory heap */ - const char* table_name, /*!< in: table name */ - const char* index_name, /*!< in: index name */ - ulint space, /*!< in: space where the index tree is - placed, ignored if the index is of - the clustered type */ - ulint type, /*!< in: DICT_UNIQUE, - DICT_CLUSTERED, ... ORed */ - ulint n_fields); /*!< in: number of fields */ -/**********************************************************************//** -Creates an index memory object. -@return own: index object */ -UNIV_INTERN -dict_index_t* -dict_mem_index_create( -/*==================*/ - const char* table_name, /*!< in: table name */ - const char* index_name, /*!< in: index name */ - ulint space, /*!< in: space where the index tree is - placed, ignored if the index is of - the clustered type */ - ulint type, /*!< in: DICT_UNIQUE, - DICT_CLUSTERED, ... ORed */ - ulint n_fields); /*!< in: number of fields */ -/**********************************************************************//** -Adds a field definition to an index. NOTE: does not take a copy -of the column name if the field is a column. The memory occupied -by the column name may be released only after publishing the index. */ -UNIV_INTERN -void -dict_mem_index_add_field( -/*=====================*/ - dict_index_t* index, /*!< in: index */ - const char* name, /*!< in: column name */ - ulint prefix_len); /*!< in: 0 or the column prefix length - in a MySQL index like - INDEX (textcol(25)) */ -/**********************************************************************//** -Frees an index memory object. */ -UNIV_INTERN -void -dict_mem_index_free( -/*================*/ - dict_index_t* index); /*!< in: index */ -/**********************************************************************//** -Creates and initializes a foreign constraint memory object. -@return own: foreign constraint struct */ -UNIV_INTERN -dict_foreign_t* -dict_mem_foreign_create(void); -/*=========================*/ - -/**********************************************************************//** -Sets the foreign_table_name_lookup pointer based on the value of -lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup -will point to foreign_table_name. If 2, then another string is -allocated from the heap and set to lower case. */ -UNIV_INTERN -void -dict_mem_foreign_table_name_lookup_set( -/*===================================*/ - dict_foreign_t* foreign, /*!< in/out: foreign struct */ - ibool do_alloc); /*!< in: is an alloc needed */ - -/**********************************************************************//** -Sets the referenced_table_name_lookup pointer based on the value of -lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup -will point to referenced_table_name. If 2, then another string is -allocated from the heap and set to lower case. */ -UNIV_INTERN -void -dict_mem_referenced_table_name_lookup_set( -/*======================================*/ - dict_foreign_t* foreign, /*!< in/out: foreign struct */ - ibool do_alloc); /*!< in: is an alloc needed */ - -/** Create a temporary tablename like "#sql-ibtid-inc where - tid = the Table ID - inc = a randomly initialized number that is incremented for each file -The table ID is a 64 bit integer, can use up to 20 digits, and is -initialized at bootstrap. The second number is 32 bits, can use up to 10 -digits, and is initialized at startup to a randomly distributed number. -It is hoped that the combination of these two numbers will provide a -reasonably unique temporary file name. -@param[in] heap A memory heap -@param[in] dbtab Table name in the form database/table name -@param[in] id Table id -@return A unique temporary tablename suitable for InnoDB use */ -UNIV_INTERN -char* -dict_mem_create_temporary_tablename( - mem_heap_t* heap, - const char* dbtab, - table_id_t id); - -/** Initialize dict memory variables */ - -void -dict_mem_init(void); - -/** Data structure for a column in a table */ -struct dict_col_t{ - /*----------------------*/ - /** The following are copied from dtype_t, - so that all bit-fields can be packed tightly. */ - /* @{ */ - unsigned prtype:32; /*!< precise type; MySQL data - type, charset code, flags to - indicate nullability, - signedness, whether this is a - binary string, whether this is - a true VARCHAR where MySQL - uses 2 bytes to store the length */ - unsigned mtype:8; /*!< main data type */ - - /* the remaining fields do not affect alphabetical ordering: */ - - unsigned len:16; /*!< length; for MySQL data this - is field->pack_length(), - except that for a >= 5.0.3 - type true VARCHAR this is the - maximum byte length of the - string data (in addition to - the string, MySQL uses 1 or 2 - bytes to store the string length) */ - - unsigned mbminmaxlen:5; /*!< minimum and maximum length of a - character, in bytes; - DATA_MBMINMAXLEN(mbminlen,mbmaxlen); - mbminlen=DATA_MBMINLEN(mbminmaxlen); - mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */ - /*----------------------*/ - /* End of definitions copied from dtype_t */ - /* @} */ - - unsigned ind:10; /*!< table column position - (starting from 0) */ - unsigned ord_part:1; /*!< nonzero if this column - appears in the ordering fields - of an index */ - unsigned max_prefix:12; /*!< maximum index prefix length on - this column. Our current max limit is - 3072 for Barracuda table */ -}; - -/** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and -is the maximum indexed column length (or indexed prefix length) in -ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format, -any fixed-length field that is longer than this will be encoded as -a variable-length field. - -It is set to 3*256, so that one can create a column prefix index on -256 characters of a TEXT or VARCHAR column also in the UTF-8 -charset. In that charset, a character may take at most 3 bytes. This -constant MUST NOT BE CHANGED, or the compatibility of InnoDB data -files would be at risk! */ -#define DICT_ANTELOPE_MAX_INDEX_COL_LEN REC_ANTELOPE_MAX_INDEX_COL_LEN - -/** Find out maximum indexed column length by its table format. -For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum -field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For -Barracuda row formats COMPRESSED and DYNAMIC, the length could -be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */ -#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \ - ((dict_table_get_format(table) < UNIV_FORMAT_B) \ - ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \ - : REC_VERSION_56_MAX_INDEX_COL_LEN) - -#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \ - ((DICT_TF_HAS_ATOMIC_BLOBS(flags) < UNIV_FORMAT_B) \ - ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \ - : REC_VERSION_56_MAX_INDEX_COL_LEN) - -/** Defines the maximum fixed length column size */ -#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN -#ifdef WITH_WSREP -#define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500 -#endif /* WITH_WSREP */ - -/** Data structure for a field in an index */ -struct dict_field_t{ - dict_col_t* col; /*!< pointer to the table column */ - const char* name; /*!< name of the column */ - unsigned prefix_len:12; /*!< 0 or the length of the column - prefix in bytes in a MySQL index of - type, e.g., INDEX (textcol(25)); - must be smaller than - DICT_MAX_FIELD_LEN_BY_FORMAT; - NOTE that in the UTF-8 charset, MySQL - sets this to (mbmaxlen * the prefix len) - in UTF-8 chars */ - unsigned fixed_len:10; /*!< 0 or the fixed length of the - column if smaller than - DICT_ANTELOPE_MAX_INDEX_COL_LEN */ -}; - -/**********************************************************************//** -PADDING HEURISTIC BASED ON LINEAR INCREASE OF PADDING TO AVOID -COMPRESSION FAILURES -(Note: this is relevant only for compressed indexes) -GOAL: Avoid compression failures by maintaining information about the -compressibility of data. If data is not very compressible then leave -some extra space 'padding' in the uncompressed page making it more -likely that compression of less than fully packed uncompressed page will -succeed. - -This padding heuristic works by increasing the pad linearly until the -desired failure rate is reached. A "round" is a fixed number of -compression operations. -After each round, the compression failure rate for that round is -computed. If the failure rate is too high, then padding is incremented -by a fixed value, otherwise it's left intact. -If the compression failure is lower than the desired rate for a fixed -number of consecutive rounds, then the padding is decreased by a fixed -value. This is done to prevent overshooting the padding value, -and to accommodate the possible change in data compressibility. */ - -/** Number of zip ops in one round. */ -#define ZIP_PAD_ROUND_LEN (128) - -/** Number of successful rounds after which the padding is decreased */ -#define ZIP_PAD_SUCCESSFUL_ROUND_LIMIT (5) - -/** Amount by which padding is increased. */ -#define ZIP_PAD_INCR (128) - -/** Percentage of compression failures that are allowed in a single -round */ -extern ulong zip_failure_threshold_pct; - -/** Maximum percentage of a page that can be allowed as a pad to avoid -compression failures */ -extern ulong zip_pad_max; - -/** Data structure to hold information about how much space in -an uncompressed page should be left as padding to avoid compression -failures. This estimate is based on a self-adapting heuristic. */ -struct zip_pad_info_t { - os_fast_mutex_t* - mutex; /*!< mutex protecting the info */ - ulint pad; /*!< number of bytes used as pad */ - ulint success;/*!< successful compression ops during - current round */ - ulint failure;/*!< failed compression ops during - current round */ - ulint n_rounds;/*!< number of currently successful - rounds */ - volatile os_once::state_t - mutex_created; - /*!< Creation state of mutex member */ -}; - -/** Number of samples of data size kept when page compression fails for -a certain index.*/ -#define STAT_DEFRAG_DATA_SIZE_N_SAMPLE 10 - -/** Data structure for an index. Most fields will be -initialized to 0, NULL or FALSE in dict_mem_index_create(). */ -struct dict_index_t{ - index_id_t id; /*!< id of the index */ - prio_rw_lock_t* search_latch; /*!< latch protecting the AHI partition - corresponding to this index */ - hash_table_t* search_table; /*!< hash table protected by - search_latch */ - mem_heap_t* heap; /*!< memory heap */ - const char* name; /*!< index name */ - const char* table_name;/*!< table name */ - dict_table_t* table; /*!< back pointer to table */ -#ifndef UNIV_HOTBACKUP - unsigned space:32; - /*!< space where the index tree is placed */ - unsigned page:32;/*!< index tree root page number */ -#endif /* !UNIV_HOTBACKUP */ - unsigned type:DICT_IT_BITS; - /*!< index type (DICT_CLUSTERED, DICT_UNIQUE, - DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */ -#define MAX_KEY_LENGTH_BITS 12 - unsigned trx_id_offset:MAX_KEY_LENGTH_BITS; - /*!< position of the trx id column - in a clustered index record, if the fields - before it are known to be of a fixed size, - 0 otherwise */ -#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH -# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH -#endif - unsigned n_user_defined_cols:10; - /*!< number of columns the user defined to - be in the index: in the internal - representation we add more columns */ - unsigned n_uniq:10;/*!< number of fields from the beginning - which are enough to determine an index - entry uniquely */ - unsigned n_def:10;/*!< number of fields defined so far */ - unsigned n_fields:10;/*!< number of fields in the index */ - unsigned n_nullable:10;/*!< number of nullable fields */ - unsigned cached:1;/*!< TRUE if the index object is in the - dictionary cache */ - unsigned to_be_dropped:1; - /*!< TRUE if the index is to be dropped; - protected by dict_operation_lock */ - unsigned online_status:2; - /*!< enum online_index_status. - Transitions from ONLINE_INDEX_COMPLETE (to - ONLINE_INDEX_CREATION) are protected - by dict_operation_lock and - dict_sys->mutex. Other changes are - protected by index->lock. */ - dict_field_t* fields; /*!< array of field descriptions */ -#ifndef UNIV_HOTBACKUP - UT_LIST_NODE_T(dict_index_t) - indexes;/*!< list of indexes of the table */ - btr_search_t* search_info; - /*!< info used in optimistic searches */ - row_log_t* online_log; - /*!< the log of modifications - during online index creation; - valid when online_status is - ONLINE_INDEX_CREATION */ - /*----------------------*/ - /** Statistics for query optimization */ - /* @{ */ - ib_uint64_t* stat_n_diff_key_vals; - /*!< approximate number of different - key values for this index, for each - n-column prefix where 1 <= n <= - dict_get_n_unique(index) (the array is - indexed from 0 to n_uniq-1); we - periodically calculate new - estimates */ - ib_uint64_t* stat_n_sample_sizes; - /*!< number of pages that were sampled - to calculate each of stat_n_diff_key_vals[], - e.g. stat_n_sample_sizes[3] pages were sampled - to get the number stat_n_diff_key_vals[3]. */ - ib_uint64_t* stat_n_non_null_key_vals; - /* approximate number of non-null key values - for this index, for each column where - 1 <= n <= dict_get_n_unique(index) (the array - is indexed from 0 to n_uniq-1); This - is used when innodb_stats_method is - "nulls_ignored". */ - ulint stat_index_size; - /*!< approximate index size in - database pages */ - ulint stat_n_leaf_pages; - /*!< approximate number of leaf pages in the - index tree */ - bool stats_error_printed; - /*!< has persistent statistics error printed - for this index ? */ - /* @} */ - /** Statistics for defragmentation, these numbers are estimations and - could be very inaccurate at certain times, e.g. right after restart, - during defragmentation, etc. */ - /* @{ */ - ulint stat_defrag_modified_counter; - ulint stat_defrag_n_pages_freed; - /* number of pages freed by defragmentation. */ - ulint stat_defrag_n_page_split; - /* number of page splits since last full index - defragmentation. */ - ulint stat_defrag_data_size_sample[STAT_DEFRAG_DATA_SIZE_N_SAMPLE]; - /* data size when compression failure happened - the most recent 10 times. */ - ulint stat_defrag_sample_next_slot; - /* in which slot the next sample should be - saved. */ - /* @} */ - prio_rw_lock_t lock; /*!< read-write lock protecting the - upper levels of the index tree */ - trx_id_t trx_id; /*!< id of the transaction that created this - index, or 0 if the index existed - when InnoDB was started up */ - zip_pad_info_t zip_pad;/*!< Information about state of - compression failures and successes */ -#endif /* !UNIV_HOTBACKUP */ -#ifdef UNIV_BLOB_DEBUG - ib_mutex_t blobs_mutex; - /*!< mutex protecting blobs */ - ib_rbt_t* blobs; /*!< map of (page_no,heap_no,field_no) - to first_blob_page_no; protected by - blobs_mutex; @see btr_blob_dbg_t */ -#endif /* UNIV_BLOB_DEBUG */ - - bool is_readable() const; - -#ifdef UNIV_DEBUG - ulint magic_n;/*!< magic number */ -/** Value of dict_index_t::magic_n */ -# define DICT_INDEX_MAGIC_N 76789786 -#endif -}; - -/** The status of online index creation */ -enum online_index_status { - /** the index is complete and ready for access */ - ONLINE_INDEX_COMPLETE = 0, - /** the index is being created, online - (allowing concurrent modifications) */ - ONLINE_INDEX_CREATION, - /** secondary index creation was aborted and the index - should be dropped as soon as index->table->n_ref_count reaches 0, - or online table rebuild was aborted and the clustered index - of the original table should soon be restored to - ONLINE_INDEX_COMPLETE */ - ONLINE_INDEX_ABORTED, - /** the online index creation was aborted, the index was - dropped from the data dictionary and the tablespace, and it - should be dropped from the data dictionary cache as soon as - index->table->n_ref_count reaches 0. */ - ONLINE_INDEX_ABORTED_DROPPED -}; - -/** Data structure for a foreign key constraint; an example: -FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be -initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */ -struct dict_foreign_t{ - mem_heap_t* heap; /*!< this object is allocated from - this memory heap */ - char* id; /*!< id of the constraint as a - null-terminated string */ - unsigned n_fields:10; /*!< number of indexes' first fields - for which the foreign key - constraint is defined: we allow the - indexes to contain more fields than - mentioned in the constraint, as long - as the first fields are as mentioned */ - unsigned type:6; /*!< 0 or DICT_FOREIGN_ON_DELETE_CASCADE - or DICT_FOREIGN_ON_DELETE_SET_NULL */ - char* foreign_table_name;/*!< foreign table name */ - char* foreign_table_name_lookup; - /*!< foreign table name used for dict lookup */ - dict_table_t* foreign_table; /*!< table where the foreign key is */ - const char** foreign_col_names;/*!< names of the columns in the - foreign key */ - char* referenced_table_name;/*!< referenced table name */ - char* referenced_table_name_lookup; - /*!< referenced table name for dict lookup*/ - dict_table_t* referenced_table;/*!< table where the referenced key - is */ - const char** referenced_col_names;/*!< names of the referenced - columns in the referenced table */ - dict_index_t* foreign_index; /*!< foreign index; we require that - both tables contain explicitly defined - indexes for the constraint: InnoDB - does not generate new indexes - implicitly */ - dict_index_t* referenced_index;/*!< referenced index */ -}; - -std::ostream& -operator<< (std::ostream& out, const dict_foreign_t& foreign); - -struct dict_foreign_print { - - dict_foreign_print(std::ostream& out) - : m_out(out) - {} - - void operator()(const dict_foreign_t* foreign) { - m_out << *foreign; - } -private: - std::ostream& m_out; -}; - -/** Compare two dict_foreign_t objects using their ids. Used in the ordering -of dict_table_t::foreign_set and dict_table_t::referenced_set. It returns -true if the first argument is considered to go before the second in the -strict weak ordering it defines, and false otherwise. */ -struct dict_foreign_compare { - - bool operator()( - const dict_foreign_t* lhs, - const dict_foreign_t* rhs) const - { - return(ut_strcmp(lhs->id, rhs->id) < 0); - } -}; - -/** A function object to find a foreign key with the given index as the -referenced index. Return the foreign key with matching criteria or NULL */ -struct dict_foreign_with_index { - - dict_foreign_with_index(const dict_index_t* index) - : m_index(index) - {} - - bool operator()(const dict_foreign_t* foreign) const - { - return(foreign->referenced_index == m_index); - } - - const dict_index_t* m_index; -}; - -/* A function object to check if the foreign constraint is between different -tables. Returns true if foreign key constraint is between different tables, -false otherwise. */ -struct dict_foreign_different_tables { - - bool operator()(const dict_foreign_t* foreign) const - { - return(foreign->foreign_table != foreign->referenced_table); - } -}; - -/** A function object to check if the foreign key constraint has the same -name as given. If the full name of the foreign key constraint doesn't match, -then, check if removing the database name from the foreign key constraint -matches. Return true if it matches, false otherwise. */ -struct dict_foreign_matches_id { - - dict_foreign_matches_id(const char* id) - : m_id(id) - {} - - bool operator()(const dict_foreign_t* foreign) const - { - if (0 == innobase_strcasecmp(foreign->id, m_id)) { - return(true); - } - if (const char* pos = strchr(foreign->id, '/')) { - if (0 == innobase_strcasecmp(m_id, pos + 1)) { - return(true); - } - } - return(false); - } - - const char* m_id; -}; - -typedef std::set<dict_foreign_t*, dict_foreign_compare> dict_foreign_set; - -std::ostream& -operator<< (std::ostream& out, const dict_foreign_set& fk_set); - -/** Function object to check if a foreign key object is there -in the given foreign key set or not. It returns true if the -foreign key is not found, false otherwise */ -struct dict_foreign_not_exists { - dict_foreign_not_exists(const dict_foreign_set& obj_) - : m_foreigns(obj_) - {} - - /* Return true if the given foreign key is not found */ - bool operator()(dict_foreign_t* const & foreign) const { - return(m_foreigns.find(foreign) == m_foreigns.end()); - } -private: - const dict_foreign_set& m_foreigns; -}; - -/** Validate the search order in the foreign key set. -@param[in] fk_set the foreign key set to be validated -@return true if search order is fine in the set, false otherwise. */ -bool -dict_foreign_set_validate( - const dict_foreign_set& fk_set); - -/** Validate the search order in the foreign key sets of the table -(foreign_set and referenced_set). -@param[in] table table whose foreign key sets are to be validated -@return true if foreign key sets are fine, false otherwise. */ -bool -dict_foreign_set_validate( - const dict_table_t& table); - -/*********************************************************************//** -Frees a foreign key struct. */ -inline -void -dict_foreign_free( -/*==============*/ - dict_foreign_t* foreign) /*!< in, own: foreign key struct */ -{ - mem_heap_free(foreign->heap); -} - -/** The destructor will free all the foreign key constraints in the set -by calling dict_foreign_free() on each of the foreign key constraints. -This is used to free the allocated memory when a local set goes out -of scope. */ -struct dict_foreign_set_free { - - dict_foreign_set_free(const dict_foreign_set& foreign_set) - : m_foreign_set(foreign_set) - {} - - ~dict_foreign_set_free() - { - std::for_each(m_foreign_set.begin(), - m_foreign_set.end(), - dict_foreign_free); - } - - const dict_foreign_set& m_foreign_set; -}; - -/** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that -a foreign key constraint is enforced, therefore RESTRICT just means no flag */ -/* @{ */ -#define DICT_FOREIGN_ON_DELETE_CASCADE 1 /*!< ON DELETE CASCADE */ -#define DICT_FOREIGN_ON_DELETE_SET_NULL 2 /*!< ON UPDATE SET NULL */ -#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 /*!< ON DELETE CASCADE */ -#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 /*!< ON UPDATE SET NULL */ -#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 /*!< ON DELETE NO ACTION */ -#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */ -/* @} */ - -/* This flag is for sync SQL DDL and memcached DML. -if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on -the table, DML from memcached will be blocked. */ -#define DICT_TABLE_IN_DDL -1 - -/** These are used when MySQL FRM and InnoDB data dictionary are -in inconsistent state. */ -typedef enum { - DICT_FRM_CONSISTENT = 0, /*!< Consistent state */ - DICT_FRM_NO_PK = 1, /*!< MySQL has no primary key - but InnoDB dictionary has - non-generated one. */ - DICT_NO_PK_FRM_HAS = 2, /*!< MySQL has primary key but - InnoDB dictionary has not. */ - DICT_FRM_INCONSISTENT_KEYS = 3 /*!< Key count mismatch */ -} dict_frm_t; - -/** Data structure for a database table. Most fields will be -initialized to 0, NULL or FALSE in dict_mem_table_create(). */ -struct dict_table_t{ - - - table_id_t id; /*!< id of the table */ - mem_heap_t* heap; /*!< memory heap */ - char* name; /*!< table name */ - const char* dir_path_of_temp_table;/*!< NULL or the directory path - where a TEMPORARY table that was explicitly - created by a user should be placed if - innodb_file_per_table is defined in my.cnf; - in Unix this is usually /tmp/..., in Windows - temp\... */ - char* data_dir_path; /*!< NULL or the directory path - specified by DATA DIRECTORY */ - unsigned space:32; - /*!< space where the clustered index of the - table is placed */ - unsigned flags:DICT_TF_BITS; /*!< DICT_TF_... */ - unsigned flags2:DICT_TF2_BITS; /*!< DICT_TF2_... */ - unsigned file_unreadable:1; - /*!< true if this is in a single-table - tablespace and the .ibd file is missing - or page decryption failed and page is - corrupted; then we must return in - ha_innodb.cc an error if the - user tries to query such table */ - unsigned cached:1;/*!< TRUE if the table object has been added - to the dictionary cache */ - unsigned to_be_dropped:1; - /*!< TRUE if the table is to be dropped, but - not yet actually dropped (could in the bk - drop list); It is turned on at the beginning - of row_drop_table_for_mysql() and turned off - just before we start to update system tables - for the drop. It is protected by - dict_operation_lock */ - unsigned n_def:10;/*!< number of columns defined so far */ - unsigned n_cols:10;/*!< number of columns */ - unsigned can_be_evicted:1; - /*!< TRUE if it's not an InnoDB system table - or a table that has no FK relationships */ - unsigned corrupted:1; - /*!< TRUE if table is corrupted */ - unsigned drop_aborted:1; - /*!< TRUE if some indexes should be dropped - after ONLINE_INDEX_ABORTED - or ONLINE_INDEX_ABORTED_DROPPED */ - dict_col_t* cols; /*!< array of column descriptions */ - const char* col_names; - /*!< Column names packed in a character string - "name1\0name2\0...nameN\0". Until - the string contains n_cols, it will be - allocated from a temporary heap. The final - string will be allocated from table->heap. */ - bool is_system_db; - /*!< True if the table belongs to a system - database (mysql, information_schema or - performance_schema) */ - dict_frm_t dict_frm_mismatch; - /*!< !DICT_FRM_CONSISTENT==0 if data - dictionary information and - MySQL FRM information mismatch. */ -#ifndef UNIV_HOTBACKUP - hash_node_t name_hash; /*!< hash chain node */ - hash_node_t id_hash; /*!< hash chain node */ - UT_LIST_BASE_NODE_T(dict_index_t) - indexes; /*!< list of indexes of the table */ - - dict_foreign_set foreign_set; - /*!< set of foreign key constraints - in the table; these refer to columns - in other tables */ - - dict_foreign_set referenced_set; - /*!< list of foreign key constraints - which refer to this table */ - - UT_LIST_NODE_T(dict_table_t) - table_LRU; /*!< node of the LRU list of tables */ - unsigned fk_max_recusive_level:8; - /*!< maximum recursive level we support when - loading tables chained together with FK - constraints. If exceeds this level, we will - stop loading child table into memory along with - its parent table */ - ulint n_foreign_key_checks_running; - /*!< count of how many foreign key check - operations are currently being performed - on the table: we cannot drop the table while - there are foreign key checks running on - it! */ - trx_id_t def_trx_id; - /*!< transaction id that last touched - the table definition, either when - loading the definition or CREATE - TABLE, or ALTER TABLE (prepare, - commit, and rollback phases) */ - trx_id_t query_cache_inv_trx_id; - /*!< transactions whose trx id is - smaller than this number are not - allowed to store to the MySQL query - cache or retrieve from it; when a trx - with undo logs commits, it sets this - to the value of the trx id counter for - the tables it had an IX lock on */ -#ifdef UNIV_DEBUG - /*----------------------*/ - ibool does_not_fit_in_memory; - /*!< this field is used to specify in - simulations tables which are so big - that disk should be accessed: disk - access is simulated by putting the - thread to sleep for a while; NOTE that - this flag is not stored to the data - dictionary on disk, and the database - will forget about value TRUE if it has - to reload the table definition from - disk */ -#endif /* UNIV_DEBUG */ - /*----------------------*/ - unsigned big_rows:1; - /*!< flag: TRUE if the maximum length of - a single row exceeds BIG_ROW_SIZE; - initialized in dict_table_add_to_cache() */ - /** Statistics for query optimization */ - /* @{ */ - - volatile os_once::state_t stats_latch_created; - /*!< Creation state of 'stats_latch'. */ - - rw_lock_t* stats_latch; /*!< this latch protects: - dict_table_t::stat_initialized - dict_table_t::stat_n_rows (*) - dict_table_t::stat_clustered_index_size - dict_table_t::stat_sum_of_other_index_sizes - dict_table_t::stat_modified_counter (*) - dict_table_t::indexes*::stat_n_diff_key_vals[] - dict_table_t::indexes*::stat_index_size - dict_table_t::indexes*::stat_n_leaf_pages - (*) those are not always protected for - performance reasons. */ - unsigned stat_initialized:1; /*!< TRUE if statistics have - been calculated the first time - after database startup or table creation */ -#define DICT_TABLE_IN_USED -1 - lint memcached_sync_count; - /*!< count of how many handles are opened - to this table from memcached; DDL on the - table is NOT allowed until this count - goes to zero. If it's -1, means there's DDL - on the table, DML from memcached will be - blocked. */ - ib_time_t stats_last_recalc; - /*!< Timestamp of last recalc of the stats */ - ib_uint32_t stat_persistent; - /*!< The two bits below are set in the - ::stat_persistent member and have the following - meaning: - 1. _ON=0, _OFF=0, no explicit persistent stats - setting for this table, the value of the global - srv_stats_persistent is used to determine - whether the table has persistent stats enabled - or not - 2. _ON=0, _OFF=1, persistent stats are - explicitly disabled for this table, regardless - of the value of the global srv_stats_persistent - 3. _ON=1, _OFF=0, persistent stats are - explicitly enabled for this table, regardless - of the value of the global srv_stats_persistent - 4. _ON=1, _OFF=1, not allowed, we assert if - this ever happens. */ -#define DICT_STATS_PERSISTENT_ON (1 << 1) -#define DICT_STATS_PERSISTENT_OFF (1 << 2) - ib_uint32_t stats_auto_recalc; - /*!< The two bits below are set in the - ::stats_auto_recalc member and have - the following meaning: - 1. _ON=0, _OFF=0, no explicit auto recalc - setting for this table, the value of the global - srv_stats_persistent_auto_recalc is used to - determine whether the table has auto recalc - enabled or not - 2. _ON=0, _OFF=1, auto recalc is explicitly - disabled for this table, regardless of the - value of the global - srv_stats_persistent_auto_recalc - 3. _ON=1, _OFF=0, auto recalc is explicitly - enabled for this table, regardless of the - value of the global - srv_stats_persistent_auto_recalc - 4. _ON=1, _OFF=1, not allowed, we assert if - this ever happens. */ -#define DICT_STATS_AUTO_RECALC_ON (1 << 1) -#define DICT_STATS_AUTO_RECALC_OFF (1 << 2) - ulint stats_sample_pages; - /*!< the number of pages to sample for this - table during persistent stats estimation; - if this is 0, then the value of the global - srv_stats_persistent_sample_pages will be - used instead. */ - ib_uint64_t stat_n_rows; - /*!< approximate number of rows in the table; - we periodically calculate new estimates */ - ulint stat_clustered_index_size; - /*!< approximate clustered index size in - database pages */ - ulint stat_sum_of_other_index_sizes; - /*!< other indexes in database pages */ - ib_uint64_t stat_modified_counter; - /*!< when a row is inserted, updated, - or deleted, - we add 1 to this number; we calculate new - estimates for the stat_... values for the - table and the indexes when about 1 / 16 of - table has been modified; - also when the estimate operation is - called for MySQL SHOW TABLE STATUS; the - counter is reset to zero at statistics - calculation; this counter is not protected by - any latch, because this is only used for - heuristics */ - -#define BG_STAT_IN_PROGRESS ((byte)(1 << 0)) - /*!< BG_STAT_IN_PROGRESS is set in - stats_bg_flag when the background - stats code is working on this table. The DROP - TABLE code waits for this to be cleared - before proceeding. */ -#define BG_STAT_SHOULD_QUIT ((byte)(1 << 1)) - /*!< BG_STAT_SHOULD_QUIT is set in - stats_bg_flag when DROP TABLE starts - waiting on BG_STAT_IN_PROGRESS to be cleared, - the background stats thread will detect this - and will eventually quit sooner */ -#define BG_SCRUB_IN_PROGRESS ((byte)(1 << 2)) - /*!< BG_SCRUB_IN_PROGRESS is set in - stats_bg_flag when the background - scrub code is working on this table. The DROP - TABLE code waits for this to be cleared - before proceeding. */ - -#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS) - - byte stats_bg_flag; - /*!< see BG_STAT_* above. - Writes are covered by dict_sys->mutex. - Dirty reads are possible. */ - bool stats_error_printed; - /*!< Has persistent stats error beein - already printed for this table ? */ - /* @} */ - /*----------------------*/ - /**!< The following fields are used by the - AUTOINC code. The actual collection of - tables locked during AUTOINC read/write is - kept in trx_t. In order to quickly determine - whether a transaction has locked the AUTOINC - lock we keep a pointer to the transaction - here in the autoinc_trx variable. This is to - avoid acquiring the lock_sys_t::mutex and - scanning the vector in trx_t. - - When an AUTOINC lock has to wait, the - corresponding lock instance is created on - the trx lock heap rather than use the - pre-allocated instance in autoinc_lock below.*/ - /* @{ */ - lock_t* autoinc_lock; - /*!< a buffer for an AUTOINC lock - for this table: we allocate the memory here - so that individual transactions can get it - and release it without a need to allocate - space from the lock heap of the trx: - otherwise the lock heap would grow rapidly - if we do a large insert from a select */ - ib_mutex_t* autoinc_mutex; - /*!< mutex protecting the autoincrement - counter */ - - /** Creation state of autoinc_mutex member */ - volatile os_once::state_t - autoinc_mutex_created; - - ib_uint64_t autoinc;/*!< autoinc counter value to give to the - next inserted row */ - ulong n_waiting_or_granted_auto_inc_locks; - /*!< This counter is used to track the number - of granted and pending autoinc locks on this - table. This value is set after acquiring the - lock_sys_t::mutex but we peek the contents to - determine whether other transactions have - acquired the AUTOINC lock or not. Of course - only one transaction can be granted the - lock but there can be multiple waiters. */ - const trx_t* autoinc_trx; - /*!< The transaction that currently holds the - the AUTOINC lock on this table. - Protected by lock_sys->mutex. */ - fts_t* fts; /* FTS specific state variables */ - /* @} */ - /*----------------------*/ - - ib_quiesce_t quiesce;/*!< Quiescing states, protected by the - dict_index_t::lock. ie. we can only change - the state if we acquire all the latches - (dict_index_t::lock) in X mode of this table's - indexes. */ - - /*----------------------*/ - ulint n_rec_locks; - /*!< Count of the number of record locks on - this table. We use this to determine whether - we can evict the table from the dictionary - cache. It is protected by lock_sys->mutex. */ - ulint n_ref_count; - /*!< count of how many handles are opened - to this table; dropping of the table is - NOT allowed until this count gets to zero; - MySQL does NOT itself check the number of - open handles at drop */ - UT_LIST_BASE_NODE_T(lock_t) - locks; /*!< list of locks on the table; protected - by lock_sys->mutex */ - -#endif /* !UNIV_HOTBACKUP */ - - /* Returns true if this is a single-table tablespace - and the .ibd file is missing or page decryption failed - and/or page is corrupted. - @return true if table is readable - @retval false if table is not readable */ - inline bool is_readable() const - { - return(UNIV_LIKELY(!file_unreadable)); - } - -#ifdef UNIV_DEBUG - ulint magic_n;/*!< magic number */ -/** Value of dict_table_t::magic_n */ -# define DICT_TABLE_MAGIC_N 76333786 -#endif /* UNIV_DEBUG */ -}; - -/* Returns true if this is a single-table tablespace -and the .ibd file is missing or page decryption failed -and/or page is corrupted. -@return true if table is readable -@retval false if table is not readable */ -inline bool dict_index_t::is_readable() const -{ - return(UNIV_LIKELY(!table->file_unreadable)); -} - -/** A function object to add the foreign key constraint to the referenced set -of the referenced table, if it exists in the dictionary cache. */ -struct dict_foreign_add_to_referenced_table { - void operator()(dict_foreign_t* foreign) const - { - if (dict_table_t* table = foreign->referenced_table) { - std::pair<dict_foreign_set::iterator, bool> ret - = table->referenced_set.insert(foreign); - ut_a(ret.second); - } - } -}; - -/** Destroy the autoinc latch of the given table. -This function is only called from either single threaded environment -or from a thread that has not shared the table object with other threads. -@param[in,out] table table whose stats latch to destroy */ -inline -void -dict_table_autoinc_destroy( - dict_table_t* table) -{ - if (table->autoinc_mutex_created == os_once::DONE - && table->autoinc_mutex != NULL) { - mutex_free(table->autoinc_mutex); - delete table->autoinc_mutex; - } -} - -/** Allocate and init the autoinc latch of a given table. -This function must not be called concurrently on the same table object. -@param[in,out] table_void table whose autoinc latch to create */ -void -dict_table_autoinc_alloc( - void* table_void); - -/** Allocate and init the zip_pad_mutex of a given index. -This function must not be called concurrently on the same index object. -@param[in,out] index_void index whose zip_pad_mutex to create */ -void -dict_index_zip_pad_alloc( - void* index_void); - -/** Request for lazy creation of the autoinc latch of a given table. -This function is only called from either single threaded environment -or from a thread that has not shared the table object with other threads. -@param[in,out] table table whose autoinc latch is to be created. */ -inline -void -dict_table_autoinc_create_lazy( - dict_table_t* table) -{ -#ifdef HAVE_ATOMIC_BUILTINS - table->autoinc_mutex = NULL; - table->autoinc_mutex_created = os_once::NEVER_DONE; -#else /* HAVE_ATOMIC_BUILTINS */ - dict_table_autoinc_alloc(table); - table->autoinc_mutex_created = os_once::DONE; -#endif /* HAVE_ATOMIC_BUILTINS */ -} - -/** Request a lazy creation of dict_index_t::zip_pad::mutex. -This function is only called from either single threaded environment -or from a thread that has not shared the table object with other threads. -@param[in,out] index index whose zip_pad mutex is to be created */ -inline -void -dict_index_zip_pad_mutex_create_lazy( - dict_index_t* index) -{ -#ifdef HAVE_ATOMIC_BUILTINS - index->zip_pad.mutex = NULL; - index->zip_pad.mutex_created = os_once::NEVER_DONE; -#else /* HAVE_ATOMIC_BUILTINS */ - dict_index_zip_pad_alloc(index); - index->zip_pad.mutex_created = os_once::DONE; -#endif /* HAVE_ATOMIC_BUILTINS */ -} - -/** Destroy the zip_pad_mutex of the given index. -This function is only called from either single threaded environment -or from a thread that has not shared the table object with other threads. -@param[in,out] table table whose stats latch to destroy */ -inline -void -dict_index_zip_pad_mutex_destroy( - dict_index_t* index) -{ - if (index->zip_pad.mutex_created == os_once::DONE - && index->zip_pad.mutex != NULL) { - os_fast_mutex_free(index->zip_pad.mutex); - delete index->zip_pad.mutex; - } -} - -/** Release the zip_pad_mutex of a given index. -@param[in,out] index index whose zip_pad_mutex is to be released */ -inline -void -dict_index_zip_pad_unlock( - dict_index_t* index) -{ - os_fast_mutex_unlock(index->zip_pad.mutex); -} - -#ifdef UNIV_DEBUG -/** Check if the current thread owns the autoinc_mutex of a given table. -@param[in] table the autoinc_mutex belongs to this table -@return true, if the current thread owns the autoinc_mutex, false otherwise.*/ -inline -bool -dict_table_autoinc_own( - const dict_table_t* table) -{ - return(mutex_own(table->autoinc_mutex)); -} -#endif /* UNIV_DEBUG */ - -#ifndef UNIV_NONINL -#include "dict0mem.ic" -#endif - -#endif /* !UNIV_INNOCHECKSUM */ - -#endif diff --git a/storage/xtradb/include/dict0mem.ic b/storage/xtradb/include/dict0mem.ic deleted file mode 100644 index 38d51f61789..00000000000 --- a/storage/xtradb/include/dict0mem.ic +++ /dev/null @@ -1,74 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/dict0mem.ic -Data dictionary memory object creation - -Created 1/8/1996 Heikki Tuuri -***********************************************************************/ - -#include "data0type.h" -#include "dict0mem.h" -#include "fil0fil.h" - -/**********************************************************************//** -This function poplulates a dict_index_t index memory structure with -supplied information. */ -UNIV_INLINE -void -dict_mem_fill_index_struct( -/*=======================*/ - dict_index_t* index, /*!< out: index to be filled */ - mem_heap_t* heap, /*!< in: memory heap */ - const char* table_name, /*!< in: table name */ - const char* index_name, /*!< in: index name */ - ulint space, /*!< in: space where the index tree is - placed, ignored if the index is of - the clustered type */ - ulint type, /*!< in: DICT_UNIQUE, - DICT_CLUSTERED, ... ORed */ - ulint n_fields) /*!< in: number of fields */ -{ - - if (heap) { - index->heap = heap; - index->name = mem_heap_strdup(heap, index_name); - index->fields = (dict_field_t*) mem_heap_alloc( - heap, 1 + n_fields * sizeof(dict_field_t)); - } else { - index->name = index_name; - index->heap = NULL; - index->fields = NULL; - } - - /* Assign a ulint to a 4-bit-mapped field. - Only the low-order 4 bits are assigned. */ - index->type = type; -#ifndef UNIV_HOTBACKUP - index->space = (unsigned int) space; - index->page = FIL_NULL; -#endif /* !UNIV_HOTBACKUP */ - index->table_name = table_name; - index->n_fields = (unsigned int) n_fields; - /* The '1 +' above prevents allocation - of an empty mem block */ -#ifdef UNIV_DEBUG - index->magic_n = DICT_INDEX_MAGIC_N; -#endif /* UNIV_DEBUG */ -} diff --git a/storage/xtradb/include/dict0pagecompress.h b/storage/xtradb/include/dict0pagecompress.h deleted file mode 100644 index 6503c86ffa2..00000000000 --- a/storage/xtradb/include/dict0pagecompress.h +++ /dev/null @@ -1,83 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/dict0pagecompress.h -Helper functions for extracting/storing page compression information -to dictionary. - -Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com -***********************************************************************/ - -#ifndef dict0pagecompress_h -#define dict0pagecompress_h - -/********************************************************************//** -Extract the page compression level from table flags. -@return page compression level, or 0 if not compressed */ -UNIV_INLINE -ulint -dict_tf_get_page_compression_level( -/*===============================*/ - ulint flags) /*!< in: flags */ - __attribute__((const)); -/********************************************************************//** -Extract the page compression flag from table flags -@return page compression flag, or false if not compressed */ -UNIV_INLINE -ibool -dict_tf_get_page_compression( -/*==========================*/ - ulint flags) /*!< in: flags */ - __attribute__((const)); - -/********************************************************************//** -Check whether the table uses the page compressed page format. -@return page compression level, or 0 if not compressed */ -UNIV_INLINE -ulint -dict_table_page_compression_level( -/*==============================*/ - const dict_table_t* table) /*!< in: table */ - __attribute__((const)); - -/********************************************************************//** -Extract the atomic writes flag from table flags. -@return true if atomic writes are used, false if not used */ -UNIV_INLINE -atomic_writes_t -dict_tf_get_atomic_writes( -/*======================*/ - ulint flags) /*!< in: flags */ - __attribute__((const)); - -/********************************************************************//** -Check whether the table uses the atomic writes. -@return true if atomic writes is used, false if not */ -UNIV_INLINE -atomic_writes_t -dict_table_get_atomic_writes( -/*=========================*/ - const dict_table_t* table); /*!< in: table */ - - -#ifndef UNIV_NONINL -#include "dict0pagecompress.ic" -#endif - -#endif diff --git a/storage/xtradb/include/dict0pagecompress.ic b/storage/xtradb/include/dict0pagecompress.ic deleted file mode 100644 index 13c2b46c51c..00000000000 --- a/storage/xtradb/include/dict0pagecompress.ic +++ /dev/null @@ -1,105 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/dict0pagecompress.ic -Inline implementation for helper functions for extracting/storing -page compression and atomic writes information to dictionary. - -Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com -***********************************************************************/ - -/********************************************************************//** -Extract the page compression level from dict_table_t::flags. -These flags are in memory, so assert that they are valid. -@return page compression level, or 0 if not compressed */ -UNIV_INLINE -ulint -dict_tf_get_page_compression_level( -/*===============================*/ - ulint flags) /*!< in: flags */ -{ - ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags); - - ut_ad(page_compression_level <= 9); - - return(page_compression_level); -} - -/********************************************************************//** -Check whether the table uses the page compression page format. -@return page compression level, or 0 if not compressed */ -UNIV_INLINE -ulint -dict_table_page_compression_level( -/*==============================*/ - const dict_table_t* table) /*!< in: table */ -{ - ut_ad(table); - ut_ad(dict_tf_get_page_compression(table->flags)); - - return(dict_tf_get_page_compression_level(table->flags)); -} - -/********************************************************************//** -Check whether the table uses the page compression page format. -@return true if page compressed, false if not */ -UNIV_INLINE -ibool -dict_tf_get_page_compression( -/*=========================*/ - ulint flags) /*!< in: flags */ -{ - return(DICT_TF_GET_PAGE_COMPRESSION(flags)); -} - -/********************************************************************//** -Check whether the table uses the page compression page format. -@return true if page compressed, false if not */ -UNIV_INLINE -ibool -dict_table_is_page_compressed( -/*==========================*/ - const dict_table_t* table) /*!< in: table */ -{ - return (dict_tf_get_page_compression(table->flags)); -} - -/********************************************************************//** -Extract the atomic writes flag from table flags. -@return enumerated value of atomic writes */ -UNIV_INLINE -atomic_writes_t -dict_tf_get_atomic_writes( -/*======================*/ - ulint flags) /*!< in: flags */ -{ - return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags)); -} - -/********************************************************************//** -Check whether the table uses the atomic writes. -@return enumerated value of atomic writes */ -UNIV_INLINE -atomic_writes_t -dict_table_get_atomic_writes( -/*=========================*/ - const dict_table_t* table) /*!< in: table */ -{ - return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags)); -} diff --git a/storage/xtradb/include/dict0priv.h b/storage/xtradb/include/dict0priv.h deleted file mode 100644 index e034662aba0..00000000000 --- a/storage/xtradb/include/dict0priv.h +++ /dev/null @@ -1,64 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0priv.h -Data dictionary private functions - -Created Fri 2 Jul 2010 13:30:38 EST - Sunny Bains -*******************************************************/ - -#ifndef dict0priv_h -#define dict0priv_h - -/**********************************************************************//** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. Note: Not to be called from outside dict0*c functions. -@return table, NULL if not found */ -UNIV_INLINE -dict_table_t* -dict_table_get_low( -/*===============*/ - const char* table_name); /*!< in: table name */ - -/**********************************************************************//** -Checks if a table is in the dictionary cache. -@return table, NULL if not found */ -UNIV_INLINE -dict_table_t* -dict_table_check_if_in_cache_low( -/*=============================*/ - const char* table_name); /*!< in: table name */ - -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INLINE -dict_table_t* -dict_table_open_on_id_low( -/*=====================*/ - table_id_t table_id, /*!< in: table id */ - dict_err_ignore_t ignore_err, /*!< in: errors to ignore - when loading the table */ - ibool open_only_if_in_cache); - -#ifndef UNIV_NONINL -#include "dict0priv.ic" -#endif - -#endif /* dict0priv.h */ diff --git a/storage/xtradb/include/dict0priv.ic b/storage/xtradb/include/dict0priv.ic deleted file mode 100644 index 983218af78a..00000000000 --- a/storage/xtradb/include/dict0priv.ic +++ /dev/null @@ -1,126 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/dict0priv.ic -Data dictionary system private include file - -Created Wed 13 Oct 2010 16:10:14 EST Sunny Bains -***********************************************************************/ - -#include "dict0dict.h" -#include "dict0load.h" -#include "dict0priv.h" -#ifndef UNIV_HOTBACKUP - -/**********************************************************************//** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. -@return table, NULL if not found */ -UNIV_INLINE -dict_table_t* -dict_table_get_low( -/*===============*/ - const char* table_name) /*!< in: table name */ -{ - dict_table_t* table; - - ut_ad(table_name); - ut_ad(mutex_own(&(dict_sys->mutex))); - - table = dict_table_check_if_in_cache_low(table_name); - - if (table && table->corrupted) { - fprintf(stderr, "InnoDB: table"); - ut_print_name(stderr, NULL, TRUE, table->name); - if (srv_load_corrupted) { - fputs(" is corrupted, but" - " innodb_force_load_corrupted is set\n", stderr); - } else { - fputs(" is corrupted\n", stderr); - return(NULL); - } - } - - if (table == NULL) { - table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE); - } - - ut_ad(!table || table->cached); - - return(table); -} - -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INLINE -dict_table_t* -dict_table_open_on_id_low( -/*======================*/ - table_id_t table_id, /*!< in: table id */ - dict_err_ignore_t ignore_err, /*!< in: errors to ignore - when loading the table */ - ibool open_only_if_in_cache) -{ - dict_table_t* table; - ulint fold; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* Look for the table name in the hash table */ - fold = ut_fold_ull(table_id); - - HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, - dict_table_t*, table, ut_ad(table->cached), - table->id == table_id); - if (table == NULL && !open_only_if_in_cache) { - table = dict_load_table_on_id(table_id, ignore_err); - } - - ut_ad(!table || table->cached); - - /* TODO: should get the type information from MySQL */ - - return(table); -} - -/**********************************************************************//** -Checks if a table is in the dictionary cache. -@return table, NULL if not found */ -UNIV_INLINE -dict_table_t* -dict_table_check_if_in_cache_low( -/*=============================*/ - const char* table_name) /*!< in: table name */ -{ - dict_table_t* table; - ulint table_fold; - - ut_ad(table_name); - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* Look for the table name in the hash table */ - table_fold = ut_fold_string(table_name); - - HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, - dict_table_t*, table, ut_ad(table->cached), - !strcmp(table->name, table_name)); - return(table); -} -#endif /*! UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/dict0stats.h b/storage/xtradb/include/dict0stats.h deleted file mode 100644 index 72501bf9429..00000000000 --- a/storage/xtradb/include/dict0stats.h +++ /dev/null @@ -1,235 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2009, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0stats.h -Code used for calculating and manipulating table statistics. - -Created Jan 06, 2010 Vasil Dimov -*******************************************************/ - -#ifndef dict0stats_h -#define dict0stats_h - -#include "univ.i" - -#include "db0err.h" -#include "dict0types.h" -#include "trx0types.h" - -enum dict_stats_upd_option_t { - DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the - statistics using a precise and slow - algo and save them to the persistent - storage, if the persistent storage is - not present then emit a warning and - fall back to transient stats */ - DICT_STATS_RECALC_TRANSIENT,/* (re) calculate the statistics - using an imprecise quick algo - without saving the results - persistently */ - DICT_STATS_EMPTY_TABLE, /* Write all zeros (or 1 where it makes sense) - into a table and its indexes' statistics - members. The resulting stats correspond to an - empty table. If the table is using persistent - statistics, then they are saved on disk. */ - DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* fetch the stats - from the persistent storage if the in-memory - structures have not been initialized yet, - otherwise do nothing */ -}; - -/*********************************************************************//** -Calculates new estimates for table and index statistics. This function -is relatively quick and is used to calculate transient statistics that -are not saved on disk. -This was the only way to calculate statistics before the -Persistent Statistics feature was introduced. */ -UNIV_INTERN -void -dict_stats_update_transient( -/*========================*/ - dict_table_t* table); /*!< in/out: table */ - -/*********************************************************************//** -Set the persistent statistics flag for a given table. This is set only -in the in-memory table object and is not saved on disk. It will be read -from the .frm file upon first open from MySQL after a server restart. */ -UNIV_INLINE -void -dict_stats_set_persistent( -/*======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool ps_on, /*!< in: persistent stats explicitly enabled */ - ibool ps_off) /*!< in: persistent stats explicitly disabled */ - MY_ATTRIBUTE((nonnull)); - -/*********************************************************************//** -Check whether persistent statistics is enabled for a given table. -@return TRUE if enabled, FALSE otherwise */ -UNIV_INLINE -ibool -dict_stats_is_persistent_enabled( -/*=============================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************//** -Set the auto recalc flag for a given table (only honored for a persistent -stats enabled table). The flag is set only in the in-memory table object -and is not saved in InnoDB files. It will be read from the .frm file upon -first open from MySQL after a server restart. */ -UNIV_INLINE -void -dict_stats_auto_recalc_set( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool auto_recalc_on, /*!< in: explicitly enabled */ - ibool auto_recalc_off); /*!< in: explicitly disabled */ - -/*********************************************************************//** -Check whether auto recalc is enabled for a given table. -@return TRUE if enabled, FALSE otherwise */ -UNIV_INLINE -ibool -dict_stats_auto_recalc_is_enabled( -/*==============================*/ - const dict_table_t* table); /*!< in: table */ - -/*********************************************************************//** -Initialize table's stats for the first time when opening a table. */ -UNIV_INLINE -void -dict_stats_init( -/*============*/ - dict_table_t* table); /*!< in/out: table */ - -/*********************************************************************//** -Deinitialize table's stats after the last close of the table. This is -used to detect "FLUSH TABLE" and refresh the stats upon next open. */ -UNIV_INLINE -void -dict_stats_deinit( -/*==============*/ - dict_table_t* table) /*!< in/out: table */ - MY_ATTRIBUTE((nonnull)); - -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. -@return DB_* error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -dict_stats_update( -/*==============*/ - dict_table_t* table, /*!< in/out: table */ - dict_stats_upd_option_t stats_upd_option); - /*!< in: whether to (re) calc - the stats or to fetch them from - the persistent storage */ - -/*********************************************************************//** -Removes the information for a particular index's stats from the persistent -storage if it exists and if there is data stored for this index. -This function creates its own trx and commits it. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_drop_index( -/*==================*/ - const char* tname, /*!< in: table name */ - const char* iname, /*!< in: index name */ - char* errstr, /*!< out: error message if != DB_SUCCESS - is returned */ - ulint errstr_sz);/*!< in: size of the errstr buffer */ - -/*********************************************************************//** -Removes the statistics for a table and all of its indexes from the -persistent storage if it exists and if there is data stored for the table. -This function creates its own transaction and commits it. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_drop_table( -/*==================*/ - const char* table_name, /*!< in: table name */ - char* errstr, /*!< out: error message - if != DB_SUCCESS is returned */ - ulint errstr_sz); /*!< in: size of errstr buffer */ - -/*********************************************************************//** -Fetches or calculates new estimates for index statistics. */ -UNIV_INTERN -void -dict_stats_update_for_index( -/*========================*/ - dict_index_t* index) /*!< in/out: index */ - MY_ATTRIBUTE((nonnull)); - -/*********************************************************************//** -Renames a table in InnoDB persistent stats storage. -This function creates its own transaction and commits it. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_rename_table( -/*====================*/ - const char* old_name, /*!< in: old table name */ - const char* new_name, /*!< in: new table name */ - char* errstr, /*!< out: error string if != DB_SUCCESS - is returned */ - size_t errstr_sz); /*!< in: errstr size */ - -/*********************************************************************//** -Save defragmentation result. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_save_defrag_summary( - dict_index_t* index); /*!< in: index */ - -/*********************************************************************//** -Save defragmentation stats for a given index. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -dict_stats_save_defrag_stats( - dict_index_t* index); /*!< in: index */ - -/**********************************************************************//** -Clear defragmentation summary. */ -UNIV_INTERN -void -dict_stats_empty_defrag_summary( -/*==================*/ - dict_index_t* index); /*!< in: index to clear defragmentation stats */ - -/**********************************************************************//** -Clear defragmentation related index stats. */ -UNIV_INTERN -void -dict_stats_empty_defrag_stats( -/*==================*/ - dict_index_t* index); /*!< in: index to clear defragmentation stats */ - - -#ifndef UNIV_NONINL -#include "dict0stats.ic" -#endif - -#endif /* dict0stats_h */ diff --git a/storage/xtradb/include/dict0stats.ic b/storage/xtradb/include/dict0stats.ic deleted file mode 100644 index ec9a9065470..00000000000 --- a/storage/xtradb/include/dict0stats.ic +++ /dev/null @@ -1,236 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0stats.ic -Code used for calculating and manipulating table statistics. - -Created Jan 23, 2012 Vasil Dimov -*******************************************************/ - -#include "univ.i" -#include "dict0dict.h" /* dict_table_stats_lock() */ -#include "dict0types.h" /* dict_table_t */ -#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */ - -/*********************************************************************//** -Set the persistent statistics flag for a given table. This is set only -in the in-memory table object and is not saved on disk. It will be read -from the .frm file upon first open from MySQL after a server restart. */ -UNIV_INLINE -void -dict_stats_set_persistent( -/*======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool ps_on, /*!< in: persistent stats explicitly enabled */ - ibool ps_off) /*!< in: persistent stats explicitly disabled */ -{ - /* Not allowed to have both flags set, but a CREATE or ALTER - statement that contains "STATS_PERSISTENT=0 STATS_PERSISTENT=1" would - end up having both set. In this case we clear the OFF flag. */ - if (ps_on && ps_off) { - ps_off = FALSE; - } - - ib_uint32_t stat_persistent = 0; - - if (ps_on) { - stat_persistent |= DICT_STATS_PERSISTENT_ON; - } - - if (ps_off) { - stat_persistent |= DICT_STATS_PERSISTENT_OFF; - } - - /* we rely on this assignment to be atomic */ - table->stat_persistent = stat_persistent; -} - -/*********************************************************************//** -Check whether persistent statistics is enabled for a given table. -@return TRUE if enabled, FALSE otherwise */ -UNIV_INLINE -ibool -dict_stats_is_persistent_enabled( -/*=============================*/ - const dict_table_t* table) /*!< in: table */ -{ - /* Because of the nature of this check (non-locking) it is possible - that a table becomes: - * PS-disabled immediately after this function has returned TRUE or - * PS-enabled immediately after this function has returned FALSE. - This means that it is possible that we do: - + dict_stats_update(DICT_STATS_RECALC_PERSISTENT) on a table that has - just been PS-disabled or - + dict_stats_update(DICT_STATS_RECALC_TRANSIENT) on a table that has - just been PS-enabled. - This is acceptable. Avoiding this would mean that we would have to - protect the ::stat_persistent with dict_table_stats_lock() like the - other ::stat_ members which would be too big performance penalty, - especially when this function is called from - row_update_statistics_if_needed(). */ - - /* we rely on this read to be atomic */ - ib_uint32_t stat_persistent = table->stat_persistent; - - if (stat_persistent & DICT_STATS_PERSISTENT_ON) { - ut_ad(!(stat_persistent & DICT_STATS_PERSISTENT_OFF)); - return(TRUE); - } else if (stat_persistent & DICT_STATS_PERSISTENT_OFF) { - return(FALSE); - } else { - return(srv_stats_persistent); - } -} - -/*********************************************************************//** -Set the auto recalc flag for a given table (only honored for a persistent -stats enabled table). The flag is set only in the in-memory table object -and is not saved in InnoDB files. It will be read from the .frm file upon -first open from MySQL after a server restart. */ -UNIV_INLINE -void -dict_stats_auto_recalc_set( -/*=======================*/ - dict_table_t* table, /*!< in/out: table */ - ibool auto_recalc_on, /*!< in: explicitly enabled */ - ibool auto_recalc_off) /*!< in: explicitly disabled */ -{ - ut_ad(!auto_recalc_on || !auto_recalc_off); - - ib_uint32_t stats_auto_recalc = 0; - - if (auto_recalc_on) { - stats_auto_recalc |= DICT_STATS_AUTO_RECALC_ON; - } - - if (auto_recalc_off) { - stats_auto_recalc |= DICT_STATS_AUTO_RECALC_OFF; - } - - /* we rely on this assignment to be atomic */ - table->stats_auto_recalc = stats_auto_recalc; -} - -/*********************************************************************//** -Check whether auto recalc is enabled for a given table. -@return TRUE if enabled, FALSE otherwise */ -UNIV_INLINE -ibool -dict_stats_auto_recalc_is_enabled( -/*==============================*/ - const dict_table_t* table) /*!< in: table */ -{ - /* we rely on this read to be atomic */ - ib_uint32_t stats_auto_recalc = table->stats_auto_recalc; - - if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_ON) { - ut_ad(!(stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF)); - return(TRUE); - } else if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF) { - return(FALSE); - } else { - return(srv_stats_auto_recalc); - } -} - -/*********************************************************************//** -Initialize table's stats for the first time when opening a table. */ -UNIV_INLINE -void -dict_stats_init( -/*============*/ - dict_table_t* table) /*!< in/out: table */ -{ - ut_ad(!mutex_own(&dict_sys->mutex)); - - if (table->stat_initialized) { - return; - } - - dict_stats_upd_option_t opt; - - if (dict_stats_is_persistent_enabled(table)) { - opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY; - } else { - opt = DICT_STATS_RECALC_TRANSIENT; - } - - dict_stats_update(table, opt); -} - -/*********************************************************************//** -Deinitialize table's stats after the last close of the table. This is -used to detect "FLUSH TABLE" and refresh the stats upon next open. */ -UNIV_INLINE -void -dict_stats_deinit( -/*==============*/ - dict_table_t* table) /*!< in/out: table */ -{ - ut_ad(mutex_own(&dict_sys->mutex)); - - ut_a(table->n_ref_count == 0); - - dict_table_stats_lock(table, RW_X_LATCH); - - if (!table->stat_initialized) { - dict_table_stats_unlock(table, RW_X_LATCH); - return; - } - - table->stat_initialized = FALSE; - -#ifdef UNIV_DEBUG_VALGRIND - UNIV_MEM_INVALID(&table->stat_n_rows, - sizeof(table->stat_n_rows)); - UNIV_MEM_INVALID(&table->stat_clustered_index_size, - sizeof(table->stat_clustered_index_size)); - UNIV_MEM_INVALID(&table->stat_sum_of_other_index_sizes, - sizeof(table->stat_sum_of_other_index_sizes)); - UNIV_MEM_INVALID(&table->stat_modified_counter, - sizeof(table->stat_modified_counter)); - - dict_index_t* index; - - for (index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - - ulint n_uniq = dict_index_get_n_unique(index); - - UNIV_MEM_INVALID( - index->stat_n_diff_key_vals, - n_uniq * sizeof(index->stat_n_diff_key_vals[0])); - UNIV_MEM_INVALID( - index->stat_n_sample_sizes, - n_uniq * sizeof(index->stat_n_sample_sizes[0])); - UNIV_MEM_INVALID( - index->stat_n_non_null_key_vals, - n_uniq * sizeof(index->stat_n_non_null_key_vals[0])); - UNIV_MEM_INVALID( - &index->stat_index_size, - sizeof(index->stat_index_size)); - UNIV_MEM_INVALID( - &index->stat_n_leaf_pages, - sizeof(index->stat_n_leaf_pages)); - } -#endif /* UNIV_DEBUG_VALGRIND */ - - dict_table_stats_unlock(table, RW_X_LATCH); -} diff --git a/storage/xtradb/include/dict0stats_bg.h b/storage/xtradb/include/dict0stats_bg.h deleted file mode 100644 index 8f3385eb22b..00000000000 --- a/storage/xtradb/include/dict0stats_bg.h +++ /dev/null @@ -1,155 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0stats_bg.h -Code used for background table and index stats gathering. - -Created Apr 26, 2012 Vasil Dimov -*******************************************************/ - -#ifndef dict0stats_bg_h -#define dict0stats_bg_h - -#include "univ.i" - -#include "dict0types.h" /* dict_table_t, table_id_t */ -#include "os0sync.h" /* os_event_t */ -#include "os0thread.h" /* DECLARE_THREAD */ - -/** Event to wake up dict_stats_thread on dict_stats_recalc_pool_add() -or shutdown. Not protected by any mutex. */ -extern os_event_t dict_stats_event; - -/*****************************************************************//** -Add a table to the recalc pool, which is processed by the -background stats gathering thread. Only the table id is added to the -list, so the table can be closed after being enqueued and it will be -opened when needed. If the table does not exist later (has been DROPped), -then it will be removed from the pool and skipped. */ -UNIV_INTERN -void -dict_stats_recalc_pool_add( -/*=======================*/ - const dict_table_t* table); /*!< in: table to add */ - -/*****************************************************************//** -Delete a given table from the auto recalc pool. -dict_stats_recalc_pool_del() */ -UNIV_INTERN -void -dict_stats_recalc_pool_del( -/*=======================*/ - const dict_table_t* table); /*!< in: table to remove */ - -/*****************************************************************//** -Add an index in a table to the defrag pool, which is processed by the -background stats gathering thread. Only the table id and index id are -added to the list, so the table can be closed after being enqueued and -it will be opened when needed. If the table or index does not exist later -(has been DROPped), then it will be removed from the pool and skipped. */ -UNIV_INTERN -void -dict_stats_defrag_pool_add( -/*=======================*/ - const dict_index_t* index); /*!< in: table to add */ - -/*****************************************************************//** -Delete a given index from the auto defrag pool. */ -UNIV_INTERN -void -dict_stats_defrag_pool_del( -/*=======================*/ - const dict_table_t* table, /*!<in: if given, remove - all entries for the table */ - const dict_index_t* index); /*!< in: index to remove */ - -/** Yield the data dictionary latch when waiting -for the background thread to stop accessing a table. -@param trx transaction holding the data dictionary locks */ -#define DICT_STATS_BG_YIELD(trx) do { \ - row_mysql_unlock_data_dictionary(trx); \ - os_thread_sleep(250000); \ - row_mysql_lock_data_dictionary(trx); \ -} while (0) - -/*****************************************************************//** -Request the background collection of statistics to stop for a table. -@retval true when no background process is active -@retval false when it is not safe to modify the table definition */ -UNIV_INLINE -bool -dict_stats_stop_bg( -/*===============*/ - dict_table_t* table) /*!< in/out: table */ - MY_ATTRIBUTE((warn_unused_result)); - -/*****************************************************************//** -Wait until background stats thread has stopped using the specified table. -The caller must have locked the data dictionary using -row_mysql_lock_data_dictionary() and this function may unlock it temporarily -and restore the lock before it exits. -The background stats thread is guaranteed not to start using the specified -table after this function returns and before the caller unlocks the data -dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag -under dict_sys->mutex. */ -UNIV_INTERN -void -dict_stats_wait_bg_to_stop_using_table( -/*===================================*/ - dict_table_t* table, /*!< in/out: table */ - trx_t* trx); /*!< in/out: transaction to use for - unlocking/locking the data dict */ -/*****************************************************************//** -Initialize global variables needed for the operation of dict_stats_thread(). -Must be called before dict_stats_thread() is started. */ -UNIV_INTERN -void -dict_stats_thread_init(); -/*====================*/ - -/*****************************************************************//** -Free resources allocated by dict_stats_thread_init(), must be called -after dict_stats_thread() has exited. */ -UNIV_INTERN -void -dict_stats_thread_deinit(); -/*======================*/ - -/*****************************************************************//** -This is the thread for background stats gathering. It pops tables, from -the auto recalc list and proceeds them, eventually recalculating their -statistics. -@return this function does not return, it calls os_thread_exit() */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(dict_stats_thread)( -/*==============================*/ - void* arg); /*!< in: a dummy parameter - required by os_thread_create */ - -/** Shut down the dict_stats_thread. */ -void -dict_stats_shutdown(); - -# ifndef UNIV_NONINL -# include "dict0stats_bg.ic" -# endif - -#endif /* dict0stats_bg_h */ diff --git a/storage/xtradb/include/dict0stats_bg.ic b/storage/xtradb/include/dict0stats_bg.ic deleted file mode 100644 index 87e3225de58..00000000000 --- a/storage/xtradb/include/dict0stats_bg.ic +++ /dev/null @@ -1,45 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0stats_bg.ic -Code used for background table and index stats gathering. - -Created Feb 8, 2013 Marko Makela -*******************************************************/ - -/*****************************************************************//** -Request the background collection of statistics to stop for a table. -@retval true when no background process is active -@retval false when it is not safe to modify the table definition */ -UNIV_INLINE -bool -dict_stats_stop_bg( -/*===============*/ - dict_table_t* table) /*!< in/out: table */ -{ - ut_ad(!srv_read_only_mode); - ut_ad(mutex_own(&dict_sys->mutex)); - - if (!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)) { - return(true); - } - - table->stats_bg_flag |= BG_STAT_SHOULD_QUIT; - return(false); -} diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h deleted file mode 100644 index 909fdf9cf3d..00000000000 --- a/storage/xtradb/include/dict0types.h +++ /dev/null @@ -1,100 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, SkySQL Ab. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dict0types.h -Data dictionary global types - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0types_h -#define dict0types_h - -struct dict_sys_t; -struct dict_col_t; -struct dict_field_t; -struct dict_index_t; -struct dict_table_t; -struct dict_foreign_t; - -struct ind_node_t; -struct tab_node_t; - -/* Space id and page no where the dictionary header resides */ -#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */ -#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO - -/* The ibuf table and indexes's ID are assigned as the number -DICT_IBUF_ID_MIN plus the space id */ -#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL - -typedef ib_id_t table_id_t; -typedef ib_id_t index_id_t; - -/** Error to ignore when we load table dictionary into memory. However, -the table and index will be marked as "corrupted", and caller will -be responsible to deal with corrupted table or index. -Note: please define the IGNORE_ERR_* as bits, so their value can -be or-ed together */ -enum dict_err_ignore_t { - DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */ - DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root - page is FIL_NULL or incorrect value */ - DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */ - DICT_ERR_IGNORE_FK_NOKEY = 4, /*!< ignore error if any foreign - key is missing */ - DICT_ERR_IGNORE_RECOVER_LOCK = 8, - /*!< Used when recovering table locks - for resurrected transactions. - Silently load a missing - tablespace, and do not load - incomplete index definitions. */ - DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */ -}; - -/** Quiescing states for flushing tables to disk. */ -enum ib_quiesce_t { - QUIESCE_NONE, - QUIESCE_START, /*!< Initialise, prepare to start */ - QUIESCE_COMPLETE /*!< All done */ -}; - -/** Prefix for tmp tables, adopted from sql/table.h */ -#define tmp_file_prefix "#sql" -#define tmp_file_prefix_length 4 -#define TEMP_FILE_PREFIX_INNODB "#sql-ib" - -#define TEMP_TABLE_PREFIX "#sql" -#define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX - - -/** Enum values for atomic_writes table option */ -typedef enum { - ATOMIC_WRITES_DEFAULT = 0, - ATOMIC_WRITES_ON = 1, - ATOMIC_WRITES_OFF = 2 -} atomic_writes_t; - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -/** Flag to control insert buffer debugging. */ -extern uint ibuf_debug; -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - -#endif diff --git a/storage/xtradb/include/dyn0dyn.h b/storage/xtradb/include/dyn0dyn.h deleted file mode 100644 index 20963a1472b..00000000000 --- a/storage/xtradb/include/dyn0dyn.h +++ /dev/null @@ -1,197 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dyn0dyn.h -The dynamically allocated array - -Created 2/5/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dyn0dyn_h -#define dyn0dyn_h - -#include "univ.i" -#include "ut0lst.h" -#include "mem0mem.h" - -/** A block in a dynamically allocated array */ -struct dyn_block_t; -/** Dynamically allocated array */ -typedef dyn_block_t dyn_array_t; - -/** This is the initial 'payload' size of a dynamic array; -this must be > MLOG_BUF_MARGIN + 30! */ -#define DYN_ARRAY_DATA_SIZE 512 - -/*********************************************************************//** -Initializes a dynamic array. -@return initialized dyn array */ -UNIV_INLINE -dyn_array_t* -dyn_array_create( -/*=============*/ - dyn_array_t* arr); /*!< in/out memory buffer of - size sizeof(dyn_array_t) */ -/************************************************************//** -Frees a dynamic array. */ -UNIV_INLINE -void -dyn_array_free( -/*===========*/ - dyn_array_t* arr) /*!< in,own: dyn array */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Makes room on top of a dyn array and returns a pointer to a buffer in it. -After copying the elements, the caller must close the buffer using -dyn_array_close. -@return pointer to the buffer */ -UNIV_INLINE -byte* -dyn_array_open( -/*===========*/ - dyn_array_t* arr, /*!< in: dynamic array */ - ulint size) /*!< in: size in bytes of the buffer; MUST be - smaller than DYN_ARRAY_DATA_SIZE! */ - MY_ATTRIBUTE((warn_unused_result)); -/*********************************************************************//** -Closes the buffer returned by dyn_array_open. */ -UNIV_INLINE -void -dyn_array_close( -/*============*/ - dyn_array_t* arr, /*!< in: dynamic array */ - const byte* ptr); /*!< in: end of used space */ -/*********************************************************************//** -Makes room on top of a dyn array and returns a pointer to -the added element. The caller must copy the element to -the pointer returned. -@return pointer to the element */ -UNIV_INLINE -void* -dyn_array_push( -/*===========*/ - dyn_array_t* arr, /*!< in/out: dynamic array */ - ulint size) /*!< in: size in bytes of the element */ - MY_ATTRIBUTE((warn_unused_result)); -/************************************************************//** -Returns pointer to an element in dyn array. -@return pointer to element */ -UNIV_INLINE -void* -dyn_array_get_element( -/*==================*/ - const dyn_array_t* arr, /*!< in: dyn array */ - ulint pos) /*!< in: position of element - in bytes from array start */ - MY_ATTRIBUTE((warn_unused_result)); -/************************************************************//** -Returns the size of stored data in a dyn array. -@return data size in bytes */ -UNIV_INLINE -ulint -dyn_array_get_data_size( -/*====================*/ - const dyn_array_t* arr) /*!< in: dyn array */ - MY_ATTRIBUTE((warn_unused_result)); -/************************************************************//** -Gets the first block in a dyn array. -@param arr dyn array -@return first block */ -#define dyn_array_get_first_block(arr) (arr) -/************************************************************//** -Gets the last block in a dyn array. -@param arr dyn array -@return last block */ -#define dyn_array_get_last_block(arr) \ - ((arr)->heap ? UT_LIST_GET_LAST((arr)->base) : (arr)) -/********************************************************************//** -Gets the next block in a dyn array. -@param arr dyn array -@param block dyn array block -@return pointer to next, NULL if end of list */ -#define dyn_array_get_next_block(arr, block) \ - ((arr)->heap ? UT_LIST_GET_NEXT(list, block) : NULL) -/********************************************************************//** -Gets the previous block in a dyn array. -@param arr dyn array -@param block dyn array block -@return pointer to previous, NULL if end of list */ -#define dyn_array_get_prev_block(arr, block) \ - ((arr)->heap ? UT_LIST_GET_PREV(list, block) : NULL) -/********************************************************************//** -Gets the number of used bytes in a dyn array block. -@return number of bytes used */ -UNIV_INLINE -ulint -dyn_block_get_used( -/*===============*/ - const dyn_block_t* block) /*!< in: dyn array block */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************************//** -Gets pointer to the start of data in a dyn array block. -@return pointer to data */ -UNIV_INLINE -byte* -dyn_block_get_data( -/*===============*/ - const dyn_block_t* block) /*!< in: dyn array block */ - MY_ATTRIBUTE((warn_unused_result)); -/********************************************************//** -Pushes n bytes to a dyn array. */ -UNIV_INLINE -void -dyn_push_string( -/*============*/ - dyn_array_t* arr, /*!< in/out: dyn array */ - const byte* str, /*!< in: string to write */ - ulint len) /*!< in: string length */ - MY_ATTRIBUTE((nonnull)); - -/*#################################################################*/ - -/** @brief A block in a dynamically allocated array. -NOTE! Do not access the fields of the struct directly: the definition -appears here only for the compiler to know its size! */ -struct dyn_block_t{ - mem_heap_t* heap; /*!< in the first block this is != NULL - if dynamic allocation has been needed */ - ulint used; /*!< number of data bytes used in this block; - DYN_BLOCK_FULL_FLAG is set when the block - becomes full */ - byte data[DYN_ARRAY_DATA_SIZE]; - /*!< storage for array elements */ - UT_LIST_BASE_NODE_T(dyn_block_t) base; - /*!< linear list of dyn blocks: this node is - used only in the first block */ - UT_LIST_NODE_T(dyn_block_t) list; - /*!< linear list node: used in all blocks */ -#ifdef UNIV_DEBUG - ulint buf_end;/*!< only in the debug version: if dyn - array is opened, this is the buffer - end offset, else this is 0 */ - ulint magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */ -#endif -}; - - -#ifndef UNIV_NONINL -#include "dyn0dyn.ic" -#endif - -#endif diff --git a/storage/xtradb/include/dyn0dyn.ic b/storage/xtradb/include/dyn0dyn.ic deleted file mode 100644 index 6e97649245e..00000000000 --- a/storage/xtradb/include/dyn0dyn.ic +++ /dev/null @@ -1,298 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/dyn0dyn.ic -The dynamically allocated array - -Created 2/5/1996 Heikki Tuuri -*******************************************************/ - -/** Value of dyn_block_t::magic_n */ -#define DYN_BLOCK_MAGIC_N 375767 -/** Flag for dyn_block_t::used that indicates a full block */ -#define DYN_BLOCK_FULL_FLAG 0x1000000UL - -/************************************************************//** -Adds a new block to a dyn array. -@return created block */ -UNIV_INTERN -dyn_block_t* -dyn_array_add_block( -/*================*/ - dyn_array_t* arr) /*!< in/out: dyn array */ - MY_ATTRIBUTE((warn_unused_result)); - -/********************************************************************//** -Gets the number of used bytes in a dyn array block. -@return number of bytes used */ -UNIV_INLINE -ulint -dyn_block_get_used( -/*===============*/ - const dyn_block_t* block) /*!< in: dyn array block */ -{ - return((block->used) & ~DYN_BLOCK_FULL_FLAG); -} - -/********************************************************************//** -Gets pointer to the start of data in a dyn array block. -@return pointer to data */ -UNIV_INLINE -byte* -dyn_block_get_data( -/*===============*/ - const dyn_block_t* block) /*!< in: dyn array block */ -{ - ut_ad(block); - - return(const_cast<byte*>(block->data)); -} - -/*********************************************************************//** -Initializes a dynamic array. -@return initialized dyn array */ -UNIV_INLINE -dyn_array_t* -dyn_array_create( -/*=============*/ - dyn_array_t* arr) /*!< in/out: memory buffer of - size sizeof(dyn_array_t) */ -{ -#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG -# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG" -#endif - - arr->heap = NULL; - arr->used = 0; - - ut_d(arr->buf_end = 0); - ut_d(arr->magic_n = DYN_BLOCK_MAGIC_N); - - return(arr); -} - -/************************************************************//** -Frees a dynamic array. */ -UNIV_INLINE -void -dyn_array_free( -/*===========*/ - dyn_array_t* arr) /*!< in: dyn array */ -{ - if (arr->heap != NULL) { - mem_heap_free(arr->heap); - } - - ut_d(arr->magic_n = 0); -} - -/*********************************************************************//** -Makes room on top of a dyn array and returns a pointer to the added element. -The caller must copy the element to the pointer returned. -@return pointer to the element */ -UNIV_INLINE -void* -dyn_array_push( -/*===========*/ - dyn_array_t* arr, /*!< in/out: dynamic array */ - ulint size) /*!< in: size in bytes of the element */ -{ - dyn_block_t* block; - ulint used; - - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - ut_ad(size <= DYN_ARRAY_DATA_SIZE); - ut_ad(size); - - block = arr; - - if (block->used + size > DYN_ARRAY_DATA_SIZE) { - /* Get the last array block */ - - block = dyn_array_get_last_block(arr); - - if (block->used + size > DYN_ARRAY_DATA_SIZE) { - block = dyn_array_add_block(arr); - } - } - - used = block->used; - - block->used = used + size; - ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); - - return(block->data + used); -} - -/*********************************************************************//** -Makes room on top of a dyn array and returns a pointer to a buffer in it. -After copying the elements, the caller must close the buffer using -dyn_array_close. -@return pointer to the buffer */ -UNIV_INLINE -byte* -dyn_array_open( -/*===========*/ - dyn_array_t* arr, /*!< in: dynamic array */ - ulint size) /*!< in: size in bytes of the buffer; MUST be - smaller than DYN_ARRAY_DATA_SIZE! */ -{ - dyn_block_t* block; - - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - ut_ad(size <= DYN_ARRAY_DATA_SIZE); - ut_ad(size); - - block = arr; - - if (block->used + size > DYN_ARRAY_DATA_SIZE) { - /* Get the last array block */ - - block = dyn_array_get_last_block(arr); - - if (block->used + size > DYN_ARRAY_DATA_SIZE) { - block = dyn_array_add_block(arr); - ut_a(size <= DYN_ARRAY_DATA_SIZE); - } - } - - ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); - ut_ad(arr->buf_end == 0); - ut_d(arr->buf_end = block->used + size); - - return(block->data + block->used); -} - -/*********************************************************************//** -Closes the buffer returned by dyn_array_open. */ -UNIV_INLINE -void -dyn_array_close( -/*============*/ - dyn_array_t* arr, /*!< in/out: dynamic array */ - const byte* ptr) /*!< in: end of used space */ -{ - dyn_block_t* block; - - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - - block = dyn_array_get_last_block(arr); - - ut_ad(arr->buf_end + block->data >= ptr); - - block->used = ptr - block->data; - - ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); - - ut_d(arr->buf_end = 0); -} - -/************************************************************//** -Returns pointer to an element in dyn array. -@return pointer to element */ -UNIV_INLINE -void* -dyn_array_get_element( -/*==================*/ - const dyn_array_t* arr, /*!< in: dyn array */ - ulint pos) /*!< in: position of element - in bytes from array start */ -{ - const dyn_block_t* block; - - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - - /* Get the first array block */ - block = dyn_array_get_first_block(arr); - - if (arr->heap != NULL) { - for (;;) { - ulint used = dyn_block_get_used(block); - - if (pos < used) { - break; - } - - pos -= used; - block = UT_LIST_GET_NEXT(list, block); - ut_ad(block); - } - } - - ut_ad(block); - ut_ad(dyn_block_get_used(block) >= pos); - - return(const_cast<byte*>(block->data) + pos); -} - -/************************************************************//** -Returns the size of stored data in a dyn array. -@return data size in bytes */ -UNIV_INLINE -ulint -dyn_array_get_data_size( -/*====================*/ - const dyn_array_t* arr) /*!< in: dyn array */ -{ - const dyn_block_t* block; - ulint sum = 0; - - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - - if (arr->heap == NULL) { - - return(arr->used); - } - - /* Get the first array block */ - block = dyn_array_get_first_block(arr); - - while (block != NULL) { - sum += dyn_block_get_used(block); - block = dyn_array_get_next_block(arr, block); - } - - return(sum); -} - -/********************************************************//** -Pushes n bytes to a dyn array. */ -UNIV_INLINE -void -dyn_push_string( -/*============*/ - dyn_array_t* arr, /*!< in/out: dyn array */ - const byte* str, /*!< in: string to write */ - ulint len) /*!< in: string length */ -{ - ulint n_copied; - - while (len > 0) { - if (len > DYN_ARRAY_DATA_SIZE) { - n_copied = DYN_ARRAY_DATA_SIZE; - } else { - n_copied = len; - } - - memcpy(dyn_array_push(arr, n_copied), str, n_copied); - - str += n_copied; - len -= n_copied; - } -} diff --git a/storage/xtradb/include/eval0eval.h b/storage/xtradb/include/eval0eval.h deleted file mode 100644 index e3b1e6c16b6..00000000000 --- a/storage/xtradb/include/eval0eval.h +++ /dev/null @@ -1,114 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/eval0eval.h -SQL evaluator: evaluates simple data structures, like expressions, in -a query graph - -Created 12/29/1997 Heikki Tuuri -*******************************************************/ - -#ifndef eval0eval_h -#define eval0eval_h - -#include "univ.i" -#include "que0types.h" -#include "pars0sym.h" -#include "pars0pars.h" - -/*****************************************************************//** -Free the buffer from global dynamic memory for a value of a que_node, -if it has been allocated in the above function. The freeing for pushed -column values is done in sel_col_prefetch_buf_free. */ -UNIV_INTERN -void -eval_node_free_val_buf( -/*===================*/ - que_node_t* node); /*!< in: query graph node */ -/*****************************************************************//** -Evaluates a symbol table symbol. */ -UNIV_INLINE -void -eval_sym( -/*=====*/ - sym_node_t* sym_node); /*!< in: symbol table node */ -/*****************************************************************//** -Evaluates an expression. */ -UNIV_INLINE -void -eval_exp( -/*=====*/ - que_node_t* exp_node); /*!< in: expression */ -/*****************************************************************//** -Sets an integer value as the value of an expression node. */ -UNIV_INLINE -void -eval_node_set_int_val( -/*==================*/ - que_node_t* node, /*!< in: expression node */ - lint val); /*!< in: value to set */ -/*****************************************************************//** -Gets an integer value from an expression node. -@return integer value */ -UNIV_INLINE -lint -eval_node_get_int_val( -/*==================*/ - que_node_t* node); /*!< in: expression node */ -/*****************************************************************//** -Copies a binary string value as the value of a query graph node. Allocates a -new buffer if necessary. */ -UNIV_INLINE -void -eval_node_copy_and_alloc_val( -/*=========================*/ - que_node_t* node, /*!< in: query graph node */ - const byte* str, /*!< in: binary string */ - ulint len); /*!< in: string length or UNIV_SQL_NULL */ -/*****************************************************************//** -Copies a query node value to another node. */ -UNIV_INLINE -void -eval_node_copy_val( -/*===============*/ - que_node_t* node1, /*!< in: node to copy to */ - que_node_t* node2); /*!< in: node to copy from */ -/*****************************************************************//** -Gets a iboolean value from a query node. -@return iboolean value */ -UNIV_INLINE -ibool -eval_node_get_ibool_val( -/*====================*/ - que_node_t* node); /*!< in: query graph node */ -/*****************************************************************//** -Evaluates a comparison node. -@return the result of the comparison */ -UNIV_INTERN -ibool -eval_cmp( -/*=====*/ - func_node_t* cmp_node); /*!< in: comparison node */ - - -#ifndef UNIV_NONINL -#include "eval0eval.ic" -#endif - -#endif diff --git a/storage/xtradb/include/eval0eval.ic b/storage/xtradb/include/eval0eval.ic deleted file mode 100644 index e4b1dd08017..00000000000 --- a/storage/xtradb/include/eval0eval.ic +++ /dev/null @@ -1,255 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/eval0eval.ic -SQL evaluator: evaluates simple data structures, like expressions, in -a query graph - -Created 12/29/1997 Heikki Tuuri -*******************************************************/ - -#include "que0que.h" -#include "rem0cmp.h" -#include "pars0grm.h" - -/*****************************************************************//** -Evaluates a function node. */ -UNIV_INTERN -void -eval_func( -/*======*/ - func_node_t* func_node); /*!< in: function node */ -/*****************************************************************//** -Allocate a buffer from global dynamic memory for a value of a que_node. -NOTE that this memory must be explicitly freed when the query graph is -freed. If the node already has allocated buffer, that buffer is freed -here. NOTE that this is the only function where dynamic memory should be -allocated for a query node val field. -@return pointer to allocated buffer */ -UNIV_INTERN -byte* -eval_node_alloc_val_buf( -/*====================*/ - que_node_t* node, /*!< in: query graph node; sets the val field - data field to point to the new buffer, and - len field equal to size */ - ulint size); /*!< in: buffer size */ - - -/*****************************************************************//** -Allocates a new buffer if needed. -@return pointer to buffer */ -UNIV_INLINE -byte* -eval_node_ensure_val_buf( -/*=====================*/ - que_node_t* node, /*!< in: query graph node; sets the val field - data field to point to the new buffer, and - len field equal to size */ - ulint size) /*!< in: buffer size */ -{ - dfield_t* dfield; - byte* data; - - dfield = que_node_get_val(node); - dfield_set_len(dfield, size); - - data = static_cast<byte*>(dfield_get_data(dfield)); - - if (!data || que_node_get_val_buf_size(node) < size) { - - data = eval_node_alloc_val_buf(node, size); - } - - return(data); -} - -/*****************************************************************//** -Evaluates a symbol table symbol. */ -UNIV_INLINE -void -eval_sym( -/*=====*/ - sym_node_t* sym_node) /*!< in: symbol table node */ -{ - - ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL); - - if (sym_node->indirection) { - /* The symbol table node is an alias for a variable or a - column */ - - dfield_copy_data(que_node_get_val(sym_node), - que_node_get_val(sym_node->indirection)); - } -} - -/*****************************************************************//** -Evaluates an expression. */ -UNIV_INLINE -void -eval_exp( -/*=====*/ - que_node_t* exp_node) /*!< in: expression */ -{ - if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) { - - eval_sym((sym_node_t*) exp_node); - - return; - } - - eval_func(static_cast<func_node_t*>(exp_node)); -} - -/*****************************************************************//** -Sets an integer value as the value of an expression node. */ -UNIV_INLINE -void -eval_node_set_int_val( -/*==================*/ - que_node_t* node, /*!< in: expression node */ - lint val) /*!< in: value to set */ -{ - dfield_t* dfield; - byte* data; - - dfield = que_node_get_val(node); - - data = static_cast<byte*>(dfield_get_data(dfield)); - - if (data == NULL) { - data = eval_node_alloc_val_buf(node, 4); - } - - ut_ad(dfield_get_len(dfield) == 4); - - mach_write_to_4(data, (ulint) val); -} - -/*****************************************************************//** -Gets an integer non-SQL null value from an expression node. -@return integer value */ -UNIV_INLINE -lint -eval_node_get_int_val( -/*==================*/ - que_node_t* node) /*!< in: expression node */ -{ - const byte* ptr; - dfield_t* dfield; - - dfield = que_node_get_val(node); - ptr = static_cast<byte*>(dfield_get_data(dfield)); - - ut_ad(dfield_get_len(dfield) == 4); - - return((int) mach_read_from_4(ptr)); -} - -/*****************************************************************//** -Gets a iboolean value from a query node. -@return iboolean value */ -UNIV_INLINE -ibool -eval_node_get_ibool_val( -/*====================*/ - que_node_t* node) /*!< in: query graph node */ -{ - dfield_t* dfield; - byte* data; - - dfield = que_node_get_val(node); - - data = static_cast<byte*>(dfield_get_data(dfield)); - - ut_ad(data != NULL); - - return(mach_read_from_1(data)); -} - -/*****************************************************************//** -Sets a iboolean value as the value of a function node. */ -UNIV_INLINE -void -eval_node_set_ibool_val( -/*====================*/ - func_node_t* func_node, /*!< in: function node */ - ibool val) /*!< in: value to set */ -{ - dfield_t* dfield; - byte* data; - - dfield = que_node_get_val(func_node); - - data = static_cast<byte*>(dfield_get_data(dfield)); - - if (data == NULL) { - /* Allocate 1 byte to hold the value */ - - data = eval_node_alloc_val_buf(func_node, 1); - } - - ut_ad(dfield_get_len(dfield) == 1); - - mach_write_to_1(data, val); -} - -/*****************************************************************//** -Copies a binary string value as the value of a query graph node. Allocates a -new buffer if necessary. */ -UNIV_INLINE -void -eval_node_copy_and_alloc_val( -/*=========================*/ - que_node_t* node, /*!< in: query graph node */ - const byte* str, /*!< in: binary string */ - ulint len) /*!< in: string length or UNIV_SQL_NULL */ -{ - byte* data; - - if (len == UNIV_SQL_NULL) { - dfield_set_len(que_node_get_val(node), len); - - return; - } - - data = eval_node_ensure_val_buf(node, len); - - ut_memcpy(data, str, len); -} - -/*****************************************************************//** -Copies a query node value to another node. */ -UNIV_INLINE -void -eval_node_copy_val( -/*===============*/ - que_node_t* node1, /*!< in: node to copy to */ - que_node_t* node2) /*!< in: node to copy from */ -{ - dfield_t* dfield2; - - dfield2 = que_node_get_val(node2); - - eval_node_copy_and_alloc_val( - node1, - static_cast<byte*>(dfield_get_data(dfield2)), - dfield_get_len(dfield2)); -} diff --git a/storage/xtradb/include/eval0proc.h b/storage/xtradb/include/eval0proc.h deleted file mode 100644 index 7755fb10343..00000000000 --- a/storage/xtradb/include/eval0proc.h +++ /dev/null @@ -1,104 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/eval0proc.h -Executes SQL stored procedures and their control structures - -Created 1/20/1998 Heikki Tuuri -*******************************************************/ - -#ifndef eval0proc_h -#define eval0proc_h - -#include "univ.i" -#include "que0types.h" -#include "pars0sym.h" -#include "pars0pars.h" - -/**********************************************************************//** -Performs an execution step of a procedure node. -@return query thread to run next or NULL */ -UNIV_INLINE -que_thr_t* -proc_step( -/*======*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs an execution step of an if-statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -if_step( -/*====*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs an execution step of a while-statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -while_step( -/*=======*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs an execution step of a for-loop node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -for_step( -/*=====*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs an execution step of an assignment statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -assign_step( -/*========*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs an execution step of a procedure call node. -@return query thread to run next or NULL */ -UNIV_INLINE -que_thr_t* -proc_eval_step( -/*===========*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs an execution step of an exit statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -exit_step( -/*======*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs an execution step of a return-statement node. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -return_step( -/*========*/ - que_thr_t* thr); /*!< in: query thread */ - - -#ifndef UNIV_NONINL -#include "eval0proc.ic" -#endif - -#endif diff --git a/storage/xtradb/include/eval0proc.ic b/storage/xtradb/include/eval0proc.ic deleted file mode 100644 index 81418bae2c9..00000000000 --- a/storage/xtradb/include/eval0proc.ic +++ /dev/null @@ -1,88 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/eval0proc.ic -Executes SQL stored procedures and their control structures - -Created 1/20/1998 Heikki Tuuri -*******************************************************/ - -#include "pars0pars.h" -#include "que0que.h" -#include "eval0eval.h" - -/**********************************************************************//** -Performs an execution step of a procedure node. -@return query thread to run next or NULL */ -UNIV_INLINE -que_thr_t* -proc_step( -/*======*/ - que_thr_t* thr) /*!< in: query thread */ -{ - proc_node_t* node; - - ut_ad(thr); - - node = static_cast<proc_node_t*>(thr->run_node); - ut_ad(que_node_get_type(node) == QUE_NODE_PROC); - - if (thr->prev_node == que_node_get_parent(node)) { - /* Start execution from the first statement in the statement - list */ - - thr->run_node = node->stat_list; - } else { - /* Move to the next statement */ - ut_ad(que_node_get_next(thr->prev_node) == NULL); - - thr->run_node = NULL; - } - - if (thr->run_node == NULL) { - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} - -/**********************************************************************//** -Performs an execution step of a procedure call node. -@return query thread to run next or NULL */ -UNIV_INLINE -que_thr_t* -proc_eval_step( -/*===========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - func_node_t* node; - - ut_ad(thr); - - node = static_cast<func_node_t*>(thr->run_node); - ut_ad(que_node_get_type(node) == QUE_NODE_FUNC); - - /* Evaluate the procedure */ - - eval_exp(node); - - thr->run_node = que_node_get_parent(node); - - return(thr); -} diff --git a/storage/xtradb/include/fil0crypt.h b/storage/xtradb/include/fil0crypt.h deleted file mode 100644 index 228dfb895fe..00000000000 --- a/storage/xtradb/include/fil0crypt.h +++ /dev/null @@ -1,511 +0,0 @@ -/***************************************************************************** -Copyright (C) 2013, 2015, Google Inc. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/fil0crypt.h -The low-level file system encryption support functions - -Created 04/01/2015 Jan Lindström -*******************************************************/ - -#ifndef fil0crypt_h -#define fil0crypt_h - -#include "os0sync.h" - -/** -* Magic pattern in start of crypt data on page 0 -*/ -#define MAGIC_SZ 6 - -static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = { - 's', 0xE, 0xC, 'R', 'E', 't' }; - -/* This key will be used if nothing else is given */ -#define FIL_DEFAULT_ENCRYPTION_KEY ENCRYPTION_KEY_SYSTEM_DATA - -extern os_event_t fil_crypt_threads_event; - -/** - * CRYPT_SCHEME_UNENCRYPTED - * - * Used as intermediate state when convering a space from unencrypted - * to encrypted - */ -/** - * CRYPT_SCHEME_1 - * - * xxx is AES_CTR or AES_CBC (or another block cypher with the same key and iv lengths) - * L = AES_ECB(KEY, IV) - * CRYPT(PAGE) = xxx(KEY=L, IV=C, PAGE) - */ - -#define CRYPT_SCHEME_1 1 -#define CRYPT_SCHEME_1_IV_LEN 16 -#define CRYPT_SCHEME_UNENCRYPTED 0 - -/* Cached L or key for given key_version */ -struct key_struct -{ - uint key_version; /*!< Version of the key */ - uint key_length; /*!< Key length */ - unsigned char key[MY_AES_MAX_KEY_LENGTH]; /*!< Cached key - (that is L in CRYPT_SCHEME_1) */ -}; - -/** is encryption enabled */ -extern ulong srv_encrypt_tables; - -#ifdef UNIV_PFS_MUTEX -extern mysql_pfs_key_t fil_crypt_data_mutex_key; -#endif - -/** Mutex helper for crypt_data->scheme -@param[in, out] schme encryption scheme -@param[in] exit should we exit or enter mutex ? */ -void -crypt_data_scheme_locker( - st_encryption_scheme* scheme, - int exit); - -struct fil_space_rotate_state_t -{ - time_t start_time; /*!< time when rotation started */ - ulint active_threads; /*!< active threads in space */ - ulint next_offset; /*!< next "free" offset */ - ulint max_offset; /*!< max offset needing to be rotated */ - uint min_key_version_found; /*!< min key version found but not - rotated */ - lsn_t end_lsn; /*!< max lsn created when rotating this - space */ - bool starting; /*!< initial write of IV */ - bool flushing; /*!< space is being flushed at end of rotate */ - struct { - bool is_active; /*!< is scrubbing active in this space */ - time_t last_scrub_completed; /*!< when was last scrub - completed */ - } scrubbing; -}; - -struct fil_space_crypt_t : st_encryption_scheme -{ - public: - /** Constructor. Does not initialize the members! - The object is expected to be placed in a buffer that - has been zero-initialized. */ - fil_space_crypt_t( - uint new_type, - uint new_min_key_version, - uint new_key_id, - fil_encryption_t new_encryption) - : st_encryption_scheme(), - min_key_version(new_min_key_version), - page0_offset(0), - encryption(new_encryption), - key_found(0), - rotate_state() - { - key_id = new_key_id; - my_random_bytes(iv, sizeof(iv)); - mutex_create(fil_crypt_data_mutex_key, - &mutex, SYNC_NO_ORDER_CHECK); - locker = crypt_data_scheme_locker; - type = new_type; - - if (new_encryption == FIL_ENCRYPTION_OFF || - (!srv_encrypt_tables && - new_encryption == FIL_ENCRYPTION_DEFAULT)) { - type = CRYPT_SCHEME_UNENCRYPTED; - } else { - type = CRYPT_SCHEME_1; - min_key_version = key_get_latest_version(); - } - - key_found = min_key_version; - } - - /** Destructor */ - ~fil_space_crypt_t() - { - mutex_free(&mutex); - } - - /** Get latest key version from encryption plugin - @retval key_version or - @retval ENCRYPTION_KEY_VERSION_INVALID if used key_id - is not found from encryption plugin. */ - uint key_get_latest_version(void); - - /** Returns true if key was found from encryption plugin - and false if not. */ - bool is_key_found() const { - return key_found != ENCRYPTION_KEY_VERSION_INVALID; - } - - /** Returns true if tablespace should be encrypted */ - bool should_encrypt() const { - return ((encryption == FIL_ENCRYPTION_ON) || - (srv_encrypt_tables && - encryption == FIL_ENCRYPTION_DEFAULT)); - } - - /** Return true if tablespace is encrypted. */ - bool is_encrypted() const { - return (encryption != FIL_ENCRYPTION_OFF); - } - - /** Return true if default tablespace encryption is used, */ - bool is_default_encryption() const { - return (encryption == FIL_ENCRYPTION_DEFAULT); - } - - /** Return true if tablespace is not encrypted. */ - bool not_encrypted() const { - return (encryption == FIL_ENCRYPTION_OFF); - } - - /** Write crypt data to a page (0) - @param[in,out] page0 Page 0 where to write - @param[in,out] mtr Minitransaction */ - void write_page0(byte* page0, mtr_t* mtr); - - uint min_key_version; // min key version for this space - ulint page0_offset; // byte offset on page 0 for crypt data - fil_encryption_t encryption; // Encryption setup - - ib_mutex_t mutex; // mutex protecting following variables - - /** Return code from encryption_key_get_latest_version. - If ENCRYPTION_KEY_VERSION_INVALID encryption plugin - could not find the key and there is no need to call - get_latest_key_version again as keys are read only - at startup. */ - uint key_found; - - fil_space_rotate_state_t rotate_state; -}; - -/** Status info about encryption */ -struct fil_space_crypt_status_t { - ulint space; /*!< tablespace id */ - ulint scheme; /*!< encryption scheme */ - uint min_key_version; /*!< min key version */ - uint current_key_version;/*!< current key version */ - uint keyserver_requests;/*!< no of key requests to key server */ - ulint key_id; /*!< current key_id */ - bool rotating; /*!< is key rotation ongoing */ - bool flushing; /*!< is flush at end of rotation ongoing */ - ulint rotate_next_page_number; /*!< next page if key rotating */ - ulint rotate_max_page_number; /*!< max page if key rotating */ -}; - -/** Statistics about encryption key rotation */ -struct fil_crypt_stat_t { - ulint pages_read_from_cache; - ulint pages_read_from_disk; - ulint pages_modified; - ulint pages_flushed; - ulint estimated_iops; -}; - -/** Status info about scrubbing */ -struct fil_space_scrub_status_t { - ulint space; /*!< tablespace id */ - bool compressed; /*!< is space compressed */ - time_t last_scrub_completed; /*!< when was last scrub completed */ - bool scrubbing; /*!< is scrubbing ongoing */ - time_t current_scrub_started; /*!< when started current scrubbing */ - ulint current_scrub_active_threads; /*!< current scrub active threads */ - ulint current_scrub_page_number; /*!< current scrub page no */ - ulint current_scrub_max_page_number; /*!< current scrub max page no */ -}; - -/********************************************************************* -Init space crypt */ -UNIV_INTERN -void -fil_space_crypt_init(); - -/********************************************************************* -Cleanup space crypt */ -UNIV_INTERN -void -fil_space_crypt_cleanup(); - -/****************************************************************** -Create a fil_space_crypt_t object -@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or - FIL_ENCRYPTION_ON or - FIL_ENCRYPTION_OFF - -@param[in] key_id Encryption key id -@return crypt object */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_create_crypt_data( - fil_encryption_t encrypt_mode, - uint key_id) - MY_ATTRIBUTE((warn_unused_result)); - -/****************************************************************** -Merge fil_space_crypt_t object -@param[in,out] dst Destination cryp data -@param[in] src Source crypt data */ -UNIV_INTERN -void -fil_space_merge_crypt_data( - fil_space_crypt_t* dst, - const fil_space_crypt_t* src); - -/****************************************************************** -Read crypt data from a page (0) -@param[in] space space_id -@param[in] page Page 0 -@param[in] offset Offset to crypt data -@return crypt data from page 0 or NULL. */ -UNIV_INTERN -fil_space_crypt_t* -fil_space_read_crypt_data( - ulint space, - const byte* page, - ulint offset) - MY_ATTRIBUTE((warn_unused_result)); - -/****************************************************************** -Free a crypt data object -@param[in,out] crypt_data crypt data to be freed */ -UNIV_INTERN -void -fil_space_destroy_crypt_data( - fil_space_crypt_t **crypt_data); - -/****************************************************************** -Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry -@param[in] ptr Log entry start -@param[in] end_ptr Log entry end -@param[in] block buffer block -@param[out] err DB_SUCCESS or DB_DECRYPTION_FAILED -@return position on log buffer */ -UNIV_INTERN -byte* -fil_parse_write_crypt_data( - byte* ptr, - const byte* end_ptr, - const buf_block_t* block, - dberr_t* err) - MY_ATTRIBUTE((warn_unused_result)); - -/****************************************************************** -Encrypt a buffer -@param[in,out] crypt_data Crypt data -@param[in] space space_id -@param[in] offset Page offset -@param[in] lsn Log sequence number -@param[in] src_frame Page to encrypt -@param[in] zip_size Compressed size or 0 -@param[in,out] dst_frame Output buffer -@return encrypted buffer or NULL */ -UNIV_INTERN -byte* -fil_encrypt_buf( - fil_space_crypt_t* crypt_data, - ulint space, - ulint offset, - lsn_t lsn, - const byte* src_frame, - ulint zip_size, - byte* dst_frame) - MY_ATTRIBUTE((warn_unused_result)); - -/****************************************************************** -Encrypt a page - -@param[in] space Tablespace -@param[in] offset Page offset -@param[in] lsn Log sequence number -@param[in] src_frame Page to encrypt -@param[in,out] dst_frame Output buffer -@return encrypted buffer or NULL */ -UNIV_INTERN -byte* -fil_space_encrypt( - const fil_space_t* space, - ulint offset, - lsn_t lsn, - byte* src_frame, - byte* dst_frame) - MY_ATTRIBUTE((warn_unused_result)); - -/****************************************************************** -Decrypt a page -@param[in,out] crypt_data crypt_data -@param[in] tmp_frame Temporary buffer -@param[in] page_size Page size -@param[in,out] src_frame Page to decrypt -@param[out] err DB_SUCCESS or error -@return true if page decrypted, false if not.*/ -UNIV_INTERN -bool -fil_space_decrypt( - fil_space_crypt_t* crypt_data, - byte* tmp_frame, - ulint page_size, - byte* src_frame, - dberr_t* err); - -/****************************************************************** -Decrypt a page -@param[in] space Tablespace -@param[in] tmp_frame Temporary buffer used for decrypting -@param[in] page_size Page size -@param[in,out] src_frame Page to decrypt -@param[out] decrypted true if page was decrypted -@return decrypted page, or original not encrypted page if decryption is -not needed.*/ -UNIV_INTERN -byte* -fil_space_decrypt( - const fil_space_t* space, - byte* tmp_frame, - byte* src_frame, - bool* decrypted) - MY_ATTRIBUTE((warn_unused_result)); - -/****************************************************************** -Calculate post encryption checksum -@param[in] zip_size zip_size or 0 -@param[in] dst_frame Block where checksum is calculated -@return page checksum or BUF_NO_CHECKSUM_MAGIC -not needed. */ -UNIV_INTERN -ulint -fil_crypt_calculate_checksum( - ulint zip_size, - const byte* dst_frame) - MY_ATTRIBUTE((warn_unused_result)); - -/********************************************************************* -Verify that post encryption checksum match calculated checksum. -This function should be called only if tablespace contains crypt_data -metadata (this is strong indication that tablespace is encrypted). -Function also verifies that traditional checksum does not match -calculated checksum as if it does page could be valid unencrypted, -encrypted, or corrupted. - -@param[in] page Page to verify -@param[in] zip_size zip size -@param[in] space Tablespace -@param[in] pageno Page no -@return true if page is encrypted AND OK, false otherwise */ -UNIV_INTERN -bool -fil_space_verify_crypt_checksum( - byte* page, - ulint zip_size, - const fil_space_t* space, - ulint pageno) - MY_ATTRIBUTE((warn_unused_result)); - -/********************************************************************* -Adjust thread count for key rotation -@param[in] enw_cnt Number of threads to be used */ -UNIV_INTERN -void -fil_crypt_set_thread_cnt( - uint new_cnt); - -/********************************************************************* -Adjust max key age -@param[in] val New max key age */ -UNIV_INTERN -void -fil_crypt_set_rotate_key_age( - uint val); - -/********************************************************************* -Adjust rotation iops -@param[in] val New max roation iops */ -UNIV_INTERN -void -fil_crypt_set_rotation_iops( - uint val); - -/********************************************************************* -Adjust encrypt tables -@param[in] val New setting for innodb-encrypt-tables */ -UNIV_INTERN -void -fil_crypt_set_encrypt_tables( - uint val); - -/********************************************************************* -Init threads for key rotation */ -UNIV_INTERN -void -fil_crypt_threads_init(); - -/********************************************************************* -Clean up key rotation threads resources */ -UNIV_INTERN -void -fil_crypt_threads_cleanup(); - -/********************************************************************* -Wait for crypt threads to stop accessing space -@param[in] space Tablespace */ -UNIV_INTERN -void -fil_space_crypt_close_tablespace( - const fil_space_t* space); - -/********************************************************************* -Get crypt status for a space (used by information_schema) -@param[in] space Tablespace -@param[out] status Crypt status -return 0 if crypt data present */ -UNIV_INTERN -void -fil_space_crypt_get_status( - const fil_space_t* space, - struct fil_space_crypt_status_t* status); - -/********************************************************************* -Return crypt statistics -@param[out] stat Crypt statistics */ -UNIV_INTERN -void -fil_crypt_total_stat( - fil_crypt_stat_t *stat); - -/********************************************************************* -Get scrub status for a space (used by information_schema) - -@param[in] space Tablespace -@param[out] status Scrub status -return 0 if data found */ -UNIV_INTERN -void -fil_space_get_scrub_status( - const fil_space_t* space, - struct fil_space_scrub_status_t* status); - -#ifndef UNIV_NONINL -#include "fil0crypt.ic" -#endif - -#endif /* fil0crypt_h */ diff --git a/storage/xtradb/include/fil0crypt.ic b/storage/xtradb/include/fil0crypt.ic deleted file mode 100644 index cb9ba083466..00000000000 --- a/storage/xtradb/include/fil0crypt.ic +++ /dev/null @@ -1,36 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2015, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/fil0fil.h -The low-level file system encryption support functions - -Created 04/01/2015 Jan Lindström -*******************************************************/ - -/*******************************************************************//** -Find out whether the page is page encrypted -@return true if page is page encrypted, false if not */ -UNIV_INLINE -bool -fil_page_is_encrypted( -/*==================*/ - const byte *buf) /*!< in: page */ -{ - return(mach_read_from_4(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0); -} diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h deleted file mode 100644 index 6eab5db6883..00000000000 --- a/storage/xtradb/include/fil0fil.h +++ /dev/null @@ -1,1540 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/fil0fil.h -The low-level file system - -Created 10/25/1995 Heikki Tuuri -*******************************************************/ - -#ifndef fil0fil_h -#define fil0fil_h -#include "univ.i" - -#ifndef UNIV_INNOCHECKSUM - -#include "dict0types.h" -#include "ut0byte.h" -#include "os0file.h" -#include "hash0hash.h" -#ifndef UNIV_HOTBACKUP -#include "sync0rw.h" -#include "ibuf0types.h" -#include "log0log.h" -#endif /* !UNIV_HOTBACKUP */ -#include "trx0types.h" - -#include <list> - -// Forward declaration -struct trx_t; - -typedef std::list<const char*> space_name_list_t; - -/** When mysqld is run, the default directory "." is the mysqld datadir, -but in the MySQL Embedded Server Library and mysqlbackup it is not the default -directory, and we must set the base file path explicitly */ -extern const char* fil_path_to_mysql_datadir; - -/** Initial size of a single-table tablespace in pages */ -#define FIL_IBD_FILE_INITIAL_SIZE 4 - -/** 'null' (undefined) page offset in the context of file spaces */ -#define FIL_NULL ULINT32_UNDEFINED - -/* Space address data type; this is intended to be used when -addresses accurate to a byte are stored in file pages. If the page part -of the address is FIL_NULL, the address is considered undefined. */ - -typedef byte fil_faddr_t; /*!< 'type' definition in C: an address - stored in a file page is a string of bytes */ -#define FIL_ADDR_PAGE 0 /* first in address is the page offset */ -#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/ - -#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */ - -/** File space address */ -struct fil_addr_t{ - ulint page; /*!< page number within a space */ - ulint boffset; /*!< byte offset within the page */ -}; - -/** The null file address */ -extern fil_addr_t fil_addr_null; - -#endif /* !UNIV_INNOCHECKSUM */ - -/** The byte offsets on a file page for various variables @{ */ -#define FIL_PAGE_SPACE_OR_CHKSUM 0 /*!< in < MySQL-4.0.14 space id the - page belongs to (== 0) but in later - versions the 'new' checksum of the - page */ -#define FIL_PAGE_OFFSET 4 /*!< page offset inside space */ -#define FIL_PAGE_PREV 8 /*!< if there is a 'natural' - predecessor of the page, its - offset. Otherwise FIL_NULL. - This field is not set on BLOB - pages, which are stored as a - singly-linked list. See also - FIL_PAGE_NEXT. */ -#define FIL_PAGE_NEXT 12 /*!< if there is a 'natural' successor - of the page, its offset. - Otherwise FIL_NULL. - B-tree index pages - (FIL_PAGE_TYPE contains FIL_PAGE_INDEX) - on the same PAGE_LEVEL are maintained - as a doubly linked list via - FIL_PAGE_PREV and FIL_PAGE_NEXT - in the collation order of the - smallest user record on each page. */ -#define FIL_PAGE_LSN 16 /*!< lsn of the end of the newest - modification log record to the page */ -#define FIL_PAGE_TYPE 24 /*!< file page type: FIL_PAGE_INDEX,..., - 2 bytes. - - The contents of this field can only - be trusted in the following case: - if the page is an uncompressed - B-tree index page, then it is - guaranteed that the value is - FIL_PAGE_INDEX. - The opposite does not hold. - - In tablespaces created by - MySQL/InnoDB 5.1.7 or later, the - contents of this field is valid - for all uncompressed pages. */ -#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26 /*!< for the first page - in a system tablespace data file - (ibdata*, not *.ibd): the file has - been flushed to disk at least up - to this lsn - for other pages: a 32-bit key version - used to encrypt the page + 32-bit checksum - or 64 bits of zero if no encryption - */ -#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this - contains the space id of the page */ -#define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID - -#define FIL_PAGE_DATA 38 /*!< start of the data on the page */ -/* Following are used when page compression is used */ - -#define FIL_PAGE_COMPRESSED_SIZE 2 /*!< Number of bytes used to store - actual payload data size on - compressed pages. */ -#define FIL_PAGE_COMPRESSION_METHOD_SIZE 2 - /*!< Number of bytes used to store - actual compression method. */ -/* @} */ -/** File page trailer @{ */ -#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used - to store the page checksum, the - last 4 bytes should be identical - to the last 4 bytes of FIL_PAGE_LSN */ -#define FIL_PAGE_DATA_END 8 /*!< size of the page trailer */ -/* @} */ - -/** File page types (values of FIL_PAGE_TYPE) @{ */ -#define FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED 37401 /*!< Page is compressed and - then encrypted */ -#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< Page compressed page */ -#define FIL_PAGE_INDEX 17855 /*!< B-tree node */ -#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */ -#define FIL_PAGE_INODE 3 /*!< Index node */ -#define FIL_PAGE_IBUF_FREE_LIST 4 /*!< Insert buffer free list */ -/* File page types introduced in MySQL/InnoDB 5.1.7 */ -#define FIL_PAGE_TYPE_ALLOCATED 0 /*!< Freshly allocated page */ -#define FIL_PAGE_IBUF_BITMAP 5 /*!< Insert buffer bitmap */ -#define FIL_PAGE_TYPE_SYS 6 /*!< System page */ -#define FIL_PAGE_TYPE_TRX_SYS 7 /*!< Transaction system data */ -#define FIL_PAGE_TYPE_FSP_HDR 8 /*!< File space header */ -#define FIL_PAGE_TYPE_XDES 9 /*!< Extent descriptor page */ -#define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */ -#define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */ -#define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */ -#define FIL_PAGE_TYPE_COMPRESSED 13 /*!< Compressed page */ -#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_COMPRESSED - /*!< Last page type */ -/* @} */ - -#ifndef UNIV_INNOCHECKSUM - -/** Space types @{ */ -#define FIL_TABLESPACE 501 /*!< tablespace */ -#define FIL_LOG 502 /*!< redo log */ -/* @} */ - -/** Structure containing encryption specification */ -struct fil_space_crypt_t; - -/** Enum values for encryption table option */ -enum fil_encryption_t { - /** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */ - FIL_ENCRYPTION_DEFAULT, - /** Encrypted */ - FIL_ENCRYPTION_ON, - /** Not encrypted */ - FIL_ENCRYPTION_OFF -}; - -/** The number of fsyncs done to the log */ -extern ulint fil_n_log_flushes; - -/** Number of pending redo log flushes */ -extern ulint fil_n_pending_log_flushes; -/** Number of pending tablespace flushes */ -extern ulint fil_n_pending_tablespace_flushes; - -/** Number of files currently open */ -extern ulint fil_n_file_opened; - -struct fsp_open_info { - ibool success; /*!< Has the tablespace been opened? */ - const char* check_msg; /*!< fil_check_first_page() message */ - ibool valid; /*!< Is the tablespace valid? */ - pfs_os_file_t file; /*!< File handle */ - char* filepath; /*!< File path to open */ - ulint id; /*!< Space ID */ - ulint flags; /*!< Tablespace flags */ - ulint encryption_error; /*!< if an encryption error occurs */ - fil_space_crypt_t* crypt_data; /*!< crypt data */ - dict_table_t* table; /*!< table */ -}; - -struct fil_space_t; - -/** File node of a tablespace or the log data space */ -struct fil_node_t { - fil_space_t* space; /*!< backpointer to the space where this node - belongs */ - char* name; /*!< path to the file */ - ibool open; /*!< TRUE if file open */ - pfs_os_file_t handle; /*!< OS handle to the file, if file open */ - os_event_t sync_event;/*!< Condition event to group and - serialize calls to fsync; - os_event_set() and os_event_reset() - are protected by fil_system_t::mutex */ - ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw - device or a raw disk partition */ - ulint size; /*!< size of the file in database pages, 0 if - not known yet; the possible last incomplete - megabyte may be ignored if space == 0 */ - ulint n_pending; - /*!< count of pending i/o's on this file; - closing of the file is not allowed if - this is > 0 */ - ulint n_pending_flushes; - /*!< count of pending flushes on this file; - closing of the file is not allowed if - this is > 0 */ - ibool being_extended; - /*!< TRUE if the node is currently - being extended. */ - ib_int64_t modification_counter;/*!< when we write to the file we - increment this by one */ - ib_int64_t flush_counter;/*!< up to what - modification_counter value we have - flushed the modifications to disk */ - ulint file_block_size;/*!< file system block size */ - UT_LIST_NODE_T(fil_node_t) chain; - /*!< link field for the file chain */ - UT_LIST_NODE_T(fil_node_t) LRU; - /*!< link field for the LRU list */ - ulint magic_n;/*!< FIL_NODE_MAGIC_N */ -}; - -/** Value of fil_node_t::magic_n */ -#define FIL_NODE_MAGIC_N 89389 - -/** Tablespace or log data space: let us call them by a common name space */ -struct fil_space_t { - char* name; /*!< space name = the path to the first file in - it */ - ulint id; /*!< space id */ - ib_int64_t tablespace_version; - /*!< in DISCARD/IMPORT this timestamp - is used to check if we should ignore - an insert buffer merge request for a - page because it actually was for the - previous incarnation of the space */ - ibool stop_ios;/*!< TRUE if we want to rename the - .ibd file of tablespace and want to - stop temporarily posting of new i/o - requests on the file */ - bool stop_new_ops; - /*!< we set this true when we start - deleting a single-table tablespace. - When this is set following new ops - are not allowed: - * read IO request - * ibuf merge - * file flush - Note that we can still possibly have - new write operations because we don't - check this flag when doing flush - batches. */ - ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or - FIL_ARCH_LOG */ - UT_LIST_BASE_NODE_T(fil_node_t) chain; - /*!< base node for the file chain */ - ulint size; /*!< space size in pages; 0 if a single-table - tablespace whose size we do not know yet; - last incomplete megabytes in data files may be - ignored if space == 0 */ - ulint recv_size; - /*!< recovered tablespace size in pages; - 0 if no size change was read from the redo log, - or if the size change was implemented */ - ulint flags; /*!< FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags; - see fsp0fsp.h, - fsp_flags_is_valid(), - fsp_flags_get_zip_size() */ - ulint n_reserved_extents; - /*!< number of reserved free extents for - ongoing operations like B-tree page split */ - ulint n_pending_flushes; /*!< this is positive when flushing - the tablespace to disk; dropping of the - tablespace is forbidden if this is positive */ - /** Number of pending buffer pool operations accessing the tablespace - without holding a table lock or dict_operation_lock S-latch - that would prevent the table (and tablespace) from being - dropped. An example is change buffer merge. - The tablespace cannot be dropped while this is nonzero, - or while fil_node_t::n_pending is nonzero. - Protected by fil_system->mutex. */ - ulint n_pending_ops; - /** Number of pending block read or write operations - (when a write is imminent or a read has recently completed). - The tablespace object cannot be freed while this is nonzero, - but it can be detached from fil_system. - Note that fil_node_t::n_pending tracks actual pending I/O requests. - Protected by fil_system->mutex. */ - ulint n_pending_ios; - hash_node_t hash; /*!< hash chain node */ - hash_node_t name_hash;/*!< hash chain the name_hash table */ -#ifndef UNIV_HOTBACKUP - prio_rw_lock_t latch; /*!< latch protecting the file space storage - allocation */ -#endif /* !UNIV_HOTBACKUP */ - - UT_LIST_NODE_T(fil_space_t) unflushed_spaces; - /*!< list of spaces with at least one unflushed - file we have written to */ - bool is_in_unflushed_spaces; - /*!< true if this space is currently in - unflushed_spaces */ - /** True if srv_pass_corrupt_table=true and tablespace contains - corrupted page. */ - bool is_corrupt; - /*!< true if tablespace corrupted */ - bool printed_compression_failure; - /*!< true if we have already printed - compression failure */ - fil_space_crypt_t* crypt_data; - /*!< tablespace crypt data or NULL */ - ulint file_block_size; - /*!< file system block size */ - - UT_LIST_NODE_T(fil_space_t) space_list; - /*!< list of all spaces */ - - /*!< Protected by fil_system */ - UT_LIST_NODE_T(fil_space_t) rotation_list; - /*!< list of spaces needing - key rotation */ - - bool is_in_rotation_list; - /*!< true if this space is - currently in key rotation list */ - - ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ - - /** @return whether the tablespace is about to be dropped or truncated */ - bool is_stopping() const - { - return stop_new_ops; - } -}; - -/** Value of fil_space_t::magic_n */ -#define FIL_SPACE_MAGIC_N 89472 - -/** The tablespace memory cache; also the totality of logs (the log -data space) is stored here; below we talk about tablespaces, but also -the ib_logfiles form a 'space' and it is handled here */ -struct fil_system_t { -#ifndef UNIV_HOTBACKUP - ib_mutex_t mutex; /*!< The mutex protecting the cache */ -#endif /* !UNIV_HOTBACKUP */ - hash_table_t* spaces; /*!< The hash table of spaces in the - system; they are hashed on the space - id */ - hash_table_t* name_hash; /*!< hash table based on the space - name */ - UT_LIST_BASE_NODE_T(fil_node_t) LRU; - /*!< base node for the LRU list of the - most recently used open files with no - pending i/o's; if we start an i/o on - the file, we first remove it from this - list, and return it to the start of - the list when the i/o ends; - log files and the system tablespace are - not put to this list: they are opened - after the startup, and kept open until - shutdown */ - UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces; - /*!< base node for the list of those - tablespaces whose files contain - unflushed writes; those spaces have - at least one file node where - modification_counter > flush_counter */ - ulint n_open; /*!< number of files currently open */ - ulint max_n_open; /*!< n_open is not allowed to exceed - this */ - ib_int64_t modification_counter;/*!< when we write to a file we - increment this by one */ - ulint max_assigned_id;/*!< maximum space id in the existing - tables, or assigned during the time - mysqld has been up; at an InnoDB - startup we scan the data dictionary - and set here the maximum of the - space id's of the tables there */ - ib_int64_t tablespace_version; - /*!< a counter which is incremented for - every space object memory creation; - every space mem object gets a - 'timestamp' from this; in DISCARD/ - IMPORT this is used to check if we - should ignore an insert buffer merge - request */ - UT_LIST_BASE_NODE_T(fil_space_t) space_list; - /*!< list of all file spaces */ - - UT_LIST_BASE_NODE_T(fil_space_t) rotation_list; - /*!< list of all file spaces needing - key rotation.*/ - - ibool space_id_reuse_warned; - /* !< TRUE if fil_space_create() - has issued a warning about - potential space_id reuse */ -}; - -/** The tablespace memory cache. This variable is NULL before the module is -initialized. */ -extern fil_system_t* fil_system; - -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Returns the version number of a tablespace, -1 if not found. -@return version number, -1 if the tablespace does not exist in the -memory cache */ -UNIV_INTERN -ib_int64_t -fil_space_get_version( -/*==================*/ - ulint id); /*!< in: space id */ -/*******************************************************************//** -Returns the latch of a file space. -@return latch protecting storage allocation */ -UNIV_INTERN -prio_rw_lock_t* -fil_space_get_latch( -/*================*/ - ulint id, /*!< in: space id */ - ulint* zip_size);/*!< out: compressed page size, or - 0 for uncompressed tablespaces */ -/*******************************************************************//** -Returns the type of a file space. -@return FIL_TABLESPACE or FIL_LOG */ -UNIV_INTERN -ulint -fil_space_get_type( -/*===============*/ - ulint id); /*!< in: space id */ -#endif /* !UNIV_HOTBACKUP */ -/*******************************************************************//** -Appends a new file to the chain of files of a space. File must be closed. -@return pointer to the file name, or NULL on error */ -UNIV_INTERN -char* -fil_node_create( -/*============*/ - const char* name, /*!< in: file name (file must be closed) */ - ulint size, /*!< in: file size in database blocks, rounded - downwards to an integer */ - ulint id, /*!< in: space id where to append */ - ibool is_raw) /*!< in: TRUE if a raw device or - a raw disk partition */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -#ifdef UNIV_LOG_ARCHIVE -/****************************************************************//** -Drops files from the start of a file space, so that its size is cut by -the amount given. */ -UNIV_INTERN -void -fil_space_truncate_start( -/*=====================*/ - ulint id, /*!< in: space id */ - ulint trunc_len); /*!< in: truncate by this much; it is an error - if this does not equal to the combined size of - some initial files in the space */ -/****************************************************************//** -Check is there node in file space with given name. */ -UNIV_INTERN -ibool -fil_space_contains_node( -/*====================*/ - ulint id, /*!< in: space id */ - char* node_name); /*!< in: node name */ -#endif /* UNIV_LOG_ARCHIVE */ -/*******************************************************************//** -Creates a space memory object and puts it to the 'fil system' hash table. -If there is an error, prints an error message to the .err log. -@param[in] name Space name -@param[in] id Space id -@param[in] flags Tablespace flags -@param[in] purpose FIL_TABLESPACE or FIL_LOG if log -@param[in] crypt_data Encryption information -@param[in] create_table True if this is create table -@param[in] mode Encryption mode -@return TRUE if success */ -UNIV_INTERN -bool -fil_space_create( - const char* name, - ulint id, - ulint flags, - ulint purpose, - fil_space_crypt_t* crypt_data, - bool create_table, - fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT); - -/*******************************************************************//** -Assigns a new space id for a new single-table tablespace. This works simply by -incrementing the global counter. If 4 billion id's is not enough, we may need -to recycle id's. -@return TRUE if assigned, FALSE if not */ -UNIV_INTERN -ibool -fil_assign_new_space_id( -/*====================*/ - ulint* space_id); /*!< in/out: space id */ -/*******************************************************************//** -Returns the path from the first fil_node_t found for the space ID sent. -The caller is responsible for freeing the memory allocated here for the -value returned. -@return a copy of fil_node_t::path, NULL if space is zero or not found. */ -UNIV_INTERN -char* -fil_space_get_first_path( -/*=====================*/ - ulint id); /*!< in: space id */ -/** Set the recovered size of a tablespace in pages. -@param id tablespace ID -@param size recovered size in pages */ -UNIV_INTERN -void -fil_space_set_recv_size(ulint id, ulint size); -/*******************************************************************//** -Returns the size of the space in pages. The tablespace must be cached in the -memory cache. -@return space size, 0 if space not found */ -UNIV_INTERN -ulint -fil_space_get_size( -/*===============*/ - ulint id); /*!< in: space id */ -/*******************************************************************//** -Returns the flags of the space. The tablespace must be cached -in the memory cache. -@return flags, ULINT_UNDEFINED if space not found */ -UNIV_INTERN -ulint -fil_space_get_flags( -/*================*/ - ulint id); /*!< in: space id */ -/*******************************************************************//** -Returns the compressed page size of the space, or 0 if the space -is not compressed. The tablespace must be cached in the memory cache. -@return compressed page size, ULINT_UNDEFINED if space not found */ -UNIV_INTERN -ulint -fil_space_get_zip_size( -/*===================*/ - ulint id); /*!< in: space id */ -/*******************************************************************//** -Checks if the pair space, page_no refers to an existing page in a tablespace -file space. The tablespace must be cached in the memory cache. -@return TRUE if the address is meaningful */ -UNIV_INTERN -ibool -fil_check_adress_in_tablespace( -/*===========================*/ - ulint id, /*!< in: space id */ - ulint page_no);/*!< in: page number */ -/****************************************************************//** -Initializes the tablespace memory cache. */ -UNIV_INTERN -void -fil_init( -/*=====*/ - ulint hash_size, /*!< in: hash table size */ - ulint max_n_open); /*!< in: max number of open files */ -/*******************************************************************//** -Initializes the tablespace memory cache. */ -UNIV_INTERN -void -fil_close(void); -/*===========*/ -/*******************************************************************//** -Opens all log files and system tablespace data files. They stay open until the -database server shutdown. This should be called at a server startup after the -space objects for the log and the system tablespace have been created. The -purpose of this operation is to make sure we never run out of file descriptors -if we need to read from the insert buffer or to write to the log. */ -UNIV_INTERN -void -fil_open_log_and_system_tablespace_files(void); -/*==========================================*/ -/*******************************************************************//** -Closes all open files. There must not be any pending i/o's or not flushed -modifications in the files. */ -UNIV_INTERN -void -fil_close_all_files(void); -/*=====================*/ -/*******************************************************************//** -Closes the redo log files. There must not be any pending i/o's or not -flushed modifications in the files. */ -UNIV_INTERN -void -fil_close_log_files( -/*================*/ - bool free); /*!< in: whether to free the memory object */ -/*******************************************************************//** -Sets the max tablespace id counter if the given number is bigger than the -previous value. */ -UNIV_INTERN -void -fil_set_max_space_id_if_bigger( -/*===========================*/ - ulint max_id);/*!< in: maximum known id */ - -#ifndef UNIV_HOTBACKUP - -/** Write the flushed LSN to the page header of the first page in the -system tablespace. -@param[in] lsn flushed LSN -@return DB_SUCCESS or error number */ -dberr_t -fil_write_flushed_lsn( - lsn_t lsn) - MY_ATTRIBUTE((warn_unused_result)); - -/** Acquire a tablespace when it could be dropped concurrently. -Used by background threads that do not necessarily hold proper locks -for concurrency control. -@param[in] id tablespace ID -@param[in] silent whether to silently ignore missing tablespaces -@return the tablespace -@retval NULL if missing or being deleted or truncated */ -UNIV_INTERN -fil_space_t* -fil_space_acquire_low(ulint id, bool silent) - MY_ATTRIBUTE((warn_unused_result)); - -/** Acquire a tablespace when it could be dropped concurrently. -Used by background threads that do not necessarily hold proper locks -for concurrency control. -@param[in] id tablespace ID -@param[in] for_io whether to look up the tablespace while performing I/O - (possibly executing TRUNCATE) -@return the tablespace -@retval NULL if missing or being deleted or truncated */ -inline -fil_space_t* -fil_space_acquire(ulint id) -{ - return(fil_space_acquire_low(id, false)); -} - -/** Acquire a tablespace that may not exist. -Used by background threads that do not necessarily hold proper locks -for concurrency control. -@param[in] id tablespace ID -@return the tablespace -@retval NULL if missing or being deleted */ -inline -fil_space_t* -fil_space_acquire_silent(ulint id) -{ - return(fil_space_acquire_low(id, true)); -} - -/** Release a tablespace acquired with fil_space_acquire(). -@param[in,out] space tablespace to release */ -UNIV_INTERN -void -fil_space_release(fil_space_t* space); - -/** Acquire a tablespace for reading or writing a block, -when it could be dropped concurrently. -@param[in] id tablespace ID -@return the tablespace -@retval NULL if missing */ -UNIV_INTERN -fil_space_t* -fil_space_acquire_for_io(ulint id); - -/** Release a tablespace acquired with fil_space_acquire_for_io(). -@param[in,out] space tablespace to release */ -UNIV_INTERN -void -fil_space_release_for_io(fil_space_t* space); - -/** Return the next fil_space_t. -Once started, the caller must keep calling this until it returns NULL. -fil_space_acquire() and fil_space_release() are invoked here which -blocks a concurrent operation from dropping the tablespace. -@param[in,out] prev_space Pointer to the previous fil_space_t. -If NULL, use the first fil_space_t on fil_system->space_list. -@return pointer to the next fil_space_t. -@retval NULL if this was the last */ -UNIV_INTERN -fil_space_t* -fil_space_next( - fil_space_t* prev_space) - MY_ATTRIBUTE((warn_unused_result)); - -/** Return the next fil_space_t from key rotation list. -Once started, the caller must keep calling this until it returns NULL. -fil_space_acquire() and fil_space_release() are invoked here which -blocks a concurrent operation from dropping the tablespace. -@param[in,out] prev_space Pointer to the previous fil_space_t. -If NULL, use the first fil_space_t on fil_system->space_list. -@return pointer to the next fil_space_t. -@retval NULL if this was the last*/ -UNIV_INTERN -fil_space_t* -fil_space_keyrotate_next( - fil_space_t* prev_space) - MY_ATTRIBUTE((warn_unused_result)); - -/** Wrapper with reference-counting for a fil_space_t. */ -class FilSpace -{ -public: - /** Default constructor: Use this when reference counting - is done outside this wrapper. */ - FilSpace() : m_space(NULL) {} - - /** Constructor: Look up the tablespace and increment the - reference count if found. - @param[in] space_id tablespace ID - @param[in] silent whether not to print any errors */ - explicit FilSpace(ulint space_id, bool silent = false) - : m_space(fil_space_acquire_low(space_id, silent)) {} - - /** Assignment operator: This assumes that fil_space_acquire() - has already been done for the fil_space_t. The caller must - assign NULL if it calls fil_space_release(). - @param[in] space tablespace to assign */ - class FilSpace& operator=(fil_space_t* space) - { - /* fil_space_acquire() must have been invoked. */ - ut_ad(space == NULL || space->n_pending_ops > 0); - m_space = space; - return(*this); - } - - /** Destructor - Decrement the reference count if a fil_space_t - is still assigned. */ - ~FilSpace() - { - if (m_space != NULL) { - fil_space_release(m_space); - } - } - - /** Implicit type conversion - @return the wrapped object */ - operator const fil_space_t*() const - { - return(m_space); - } - - /** Explicit type conversion - @return the wrapped object */ - const fil_space_t* operator()() const - { - return(m_space); - } - -private: - /** The wrapped pointer */ - fil_space_t* m_space; -}; - -/** Reads the flushed lsn, arch no, space_id and tablespace flag fields from -the first page of a first data file at database startup. -@param[in] data_file open data file -@param[in] one_read_only true if first datafile is already - read -@param[out] flags FSP_SPACE_FLAGS -@param[out] space_id tablepspace ID -@param[out] flushed_lsn flushed lsn value -@param[out] crypt_data encryption crypt data -@retval NULL on success, or if innodb_force_recovery is set -@return pointer to an error message string */ -UNIV_INTERN -const char* -fil_read_first_page( - pfs_os_file_t data_file, - ibool one_read_already, - ulint* flags, - ulint* space_id, - lsn_t* flushed_lsn, - fil_space_crypt_t** crypt_data) - MY_ATTRIBUTE((warn_unused_result)); - -#endif /* !UNIV_HOTBACKUP */ -/*******************************************************************//** -Parses the body of a log record written about an .ibd file operation. That is, -the log record part after the standard (type, space id, page no) header of the -log record. - -If desired, also replays the delete or rename operation if the .ibd file -exists and the space id in it matches. Replays the create operation if a file -at that path does not exist yet. If the database directory for the file to be -created does not exist, then we create the directory, too. - -Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to -the datadir that we should use in replaying the file operations. -@return end of log record, or NULL if the record was not completely -contained between ptr and end_ptr */ -UNIV_INTERN -byte* -fil_op_log_parse_or_replay( -/*=======================*/ - byte* ptr, /*!< in: buffer containing the log record body, - or an initial segment of it, if the record does - not fir completely between ptr and end_ptr */ - byte* end_ptr, /*!< in: buffer end */ - ulint type, /*!< in: the type of this log record */ - ulint space_id, /*!< in: the space id of the tablespace in - question, or 0 if the log record should - only be parsed but not replayed */ - ulint log_flags); /*!< in: redo log flags - (stored in the page number parameter) */ -/*******************************************************************//** -Deletes a single-table tablespace. The tablespace must be cached in the -memory cache. -@return TRUE if success */ -UNIV_INTERN -dberr_t -fil_delete_tablespace( -/*==================*/ - ulint id, /*!< in: space id */ - buf_remove_t buf_remove); /*!< in: specify the action to take - on the tables pages in the buffer - pool */ -/*******************************************************************//** -Closes a single-table tablespace. The tablespace must be cached in the -memory cache. Free all pages used by the tablespace. -@return DB_SUCCESS or error */ -UNIV_INTERN -dberr_t -fil_close_tablespace( -/*=================*/ - trx_t* trx, /*!< in/out: Transaction covering the close */ - ulint id); /*!< in: space id */ -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Discards a single-table tablespace. The tablespace must be cached in the -memory cache. Discarding is like deleting a tablespace, but - - 1. We do not drop the table from the data dictionary; - - 2. We remove all insert buffer entries for the tablespace immediately; - in DROP TABLE they are only removed gradually in the background; - - 3. When the user does IMPORT TABLESPACE, the tablespace will have the - same id as it originally had. - - 4. Free all the pages in use by the tablespace if rename=TRUE. -@return DB_SUCCESS or error */ -UNIV_INTERN -dberr_t -fil_discard_tablespace( -/*===================*/ - ulint id) /*!< in: space id */ - MY_ATTRIBUTE((warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ - -/** Test if a tablespace file can be renamed to a new filepath by checking -if that the old filepath exists and the new filepath does not exist. -@param[in] space_id tablespace id -@param[in] old_path old filepath -@param[in] new_path new filepath -@param[in] is_discarded whether the tablespace is discarded -@return innodb error code */ -dberr_t -fil_rename_tablespace_check( - ulint space_id, - const char* old_path, - const char* new_path, - bool is_discarded); - -/*******************************************************************//** -Renames a single-table tablespace. The tablespace must be cached in the -tablespace memory cache. -@return TRUE if success */ -UNIV_INTERN -ibool -fil_rename_tablespace( -/*==================*/ - const char* old_name_in, /*!< in: old table name in the - standard databasename/tablename - format of InnoDB, or NULL if we - do the rename based on the space - id only */ - ulint id, /*!< in: space id */ - const char* new_name, /*!< in: new table name in the - standard databasename/tablename - format of InnoDB */ - const char* new_path); /*!< in: new full datafile path - if the tablespace is remotely - located, or NULL if it is located - in the normal data directory. */ - -/*******************************************************************//** -Allocates a file name for a single-table tablespace. The string must be freed -by caller with mem_free(). -@return own: file name */ -UNIV_INTERN -char* -fil_make_ibd_name( -/*==============*/ - const char* name, /*!< in: table name or a dir path */ - bool is_full_path); /*!< in: TRUE if it is a dir path */ -/*******************************************************************//** -Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link). -The string must be freed by caller with mem_free(). -@return own: file name */ -UNIV_INTERN -char* -fil_make_isl_name( -/*==============*/ - const char* name); /*!< in: table name */ -/*******************************************************************//** -Creates a new InnoDB Symbolic Link (ISL) file. It is always created -under the 'datadir' of MySQL. The datadir is the directory of a -running mysqld program. We can refer to it by simply using the path '.'. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fil_create_link_file( -/*=================*/ - const char* tablename, /*!< in: tablename */ - const char* filepath); /*!< in: pathname of tablespace */ -/*******************************************************************//** -Deletes an InnoDB Symbolic Link (ISL) file. */ -UNIV_INTERN -void -fil_delete_link_file( -/*==================*/ - const char* tablename); /*!< in: name of table */ -/*******************************************************************//** -Reads an InnoDB Symbolic Link (ISL) file. -It is always created under the 'datadir' of MySQL. The name is of the -form {databasename}/{tablename}. and the isl file is expected to be in a -'{databasename}' directory called '{tablename}.isl'. The caller must free -the memory of the null-terminated path returned if it is not null. -@return own: filepath found in link file, NULL if not found. */ -UNIV_INTERN -char* -fil_read_link_file( -/*===============*/ - const char* name); /*!< in: tablespace name */ - -#include "fil0crypt.h" - -/*******************************************************************//** -Creates a new single-table tablespace to a database directory of MySQL. -Database directories are under the 'datadir' of MySQL. The datadir is the -directory of a running mysqld program. We can refer to it by simply the -path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp -dir of the mysqld server. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fil_create_new_single_table_tablespace( -/*===================================*/ - ulint space_id, /*!< in: space id */ - const char* tablename, /*!< in: the table name in the usual - databasename/tablename format - of InnoDB */ - const char* dir_path, /*!< in: NULL or a dir path */ - ulint flags, /*!< in: tablespace flags */ - ulint flags2, /*!< in: table flags2 */ - ulint size, /*!< in: the initial size of the - tablespace file in pages, - must be >= FIL_IBD_FILE_INITIAL_SIZE */ - fil_encryption_t mode, /*!< in: encryption mode */ - ulint key_id) /*!< in: encryption key_id */ - MY_ATTRIBUTE((nonnull(2), warn_unused_result)); -#ifndef UNIV_HOTBACKUP -/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations. -(Typically when upgrading from MariaDB 10.1.0..10.1.20.) -@param[in] space_id tablespace ID -@param[in] flags desired tablespace flags */ -UNIV_INTERN -void -fsp_flags_try_adjust(ulint space_id, ulint flags); - -/********************************************************************//** -Tries to open a single-table tablespace and optionally checks the space id is -right in it. If does not succeed, prints an error message to the .err log. This -function is used to open a tablespace when we start up mysqld, and also in -IMPORT TABLESPACE. -NOTE that we assume this operation is used either at the database startup -or under the protection of the dictionary mutex, so that two users cannot -race here. This operation does not leave the file associated with the -tablespace open, but closes it after we have looked at the space id in it. - -If the validate boolean is set, we read the first page of the file and -check that the space id in the file is what we expect. We assume that -this function runs much faster if no check is made, since accessing the -file inode probably is much faster (the OS caches them) than accessing -the first page of the file. This boolean may be initially FALSE, but if -a remote tablespace is found it will be changed to true. - -If the fix_dict boolean is set, then it is safe to use an internal SQL -statement to update the dictionary tables if they are incorrect. - -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fil_open_single_table_tablespace( -/*=============================*/ - bool validate, /*!< in: Do we validate tablespace? */ - bool fix_dict, /*!< in: Can we fix the dictionary? */ - ulint id, /*!< in: space id */ - ulint flags, /*!< in: expected FSP_SPACE_FLAGS */ - const char* tablename, /*!< in: table name in the - databasename/tablename format */ - const char* filepath) /*!< in: tablespace filepath */ - __attribute__((nonnull(5), warn_unused_result)); - -#endif /* !UNIV_HOTBACKUP */ -/********************************************************************//** -At the server startup, if we need crash recovery, scans the database -directories under the MySQL datadir, looking for .ibd files. Those files are -single-table tablespaces. We need to know the space id in each of them so that -we know into which file we should look to check the contents of a page stored -in the doublewrite buffer, also to know where to apply log records where the -space id is != 0. -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -fil_load_single_table_tablespaces(ibool (*pred)(const char*, const char*)=0); -/*===================================*/ -/*******************************************************************//** -Returns TRUE if a single-table tablespace does not exist in the memory cache, -or is being deleted there. -@return TRUE if does not exist or is being deleted */ -UNIV_INTERN -ibool -fil_tablespace_deleted_or_being_deleted_in_mem( -/*===========================================*/ - ulint id, /*!< in: space id */ - ib_int64_t version);/*!< in: tablespace_version should be this; if - you pass -1 as the value of this, then this - parameter is ignored */ -/*******************************************************************//** -Returns TRUE if a single-table tablespace exists in the memory cache. -@return TRUE if exists */ -UNIV_INTERN -ibool -fil_tablespace_exists_in_mem( -/*=========================*/ - ulint id); /*!< in: space id */ -#ifndef UNIV_HOTBACKUP -/** Check if a matching tablespace exists in the InnoDB tablespace memory -cache. Note that if we have not done a crash recovery at the database startup, -there may be many tablespaces which are not yet in the memory cache. -@return whether a matching tablespace exists in the memory cache */ -UNIV_INTERN -bool -fil_space_for_table_exists_in_mem( -/*==============================*/ - ulint id, /*!< in: space id */ - const char* name, /*!< in: table name in the standard - 'databasename/tablename' format */ - bool print_error_if_does_not_exist, - /*!< in: print detailed error - information to the .err log if a - matching tablespace is not found from - memory */ - bool remove_from_data_dict_if_does_not_exist, - /*!< in: remove from the data dictionary - if tablespace does not exist */ - bool adjust_space, /*!< in: whether to adjust space id - when find table space mismatch */ - mem_heap_t* heap, /*!< in: heap memory */ - table_id_t table_id, /*!< in: table id */ - ulint table_flags); /*!< in: table flags */ -#else /* !UNIV_HOTBACKUP */ -/********************************************************************//** -Extends all tablespaces to the size stored in the space header. During the -mysqlbackup --apply-log phase we extended the spaces on-demand so that log -records could be appllied, but that may have left spaces still too small -compared to the size stored in the space header. */ -UNIV_INTERN -void -fil_extend_tablespaces_to_stored_len(void); -/*======================================*/ -#endif /* !UNIV_HOTBACKUP */ -/**********************************************************************//** -Tries to extend a data file so that it would accommodate the number of pages -given. The tablespace must be cached in the memory cache. If the space is big -enough already, does nothing. -@return TRUE if success */ -UNIV_INTERN -ibool -fil_extend_space_to_desired_size( -/*=============================*/ - ulint* actual_size, /*!< out: size of the space after extension; - if we ran out of disk space this may be lower - than the desired size */ - ulint space_id, /*!< in: space id */ - ulint size_after_extend);/*!< in: desired size in pages after the - extension; if the current space size is bigger - than this already, the function does nothing */ -/*******************************************************************//** -Tries to reserve free extents in a file space. -@return TRUE if succeed */ -UNIV_INTERN -ibool -fil_space_reserve_free_extents( -/*===========================*/ - ulint id, /*!< in: space id */ - ulint n_free_now, /*!< in: number of free extents now */ - ulint n_to_reserve); /*!< in: how many one wants to reserve */ -/*******************************************************************//** -Releases free extents in a file space. */ -UNIV_INTERN -void -fil_space_release_free_extents( -/*===========================*/ - ulint id, /*!< in: space id */ - ulint n_reserved); /*!< in: how many one reserved */ -/*******************************************************************//** -Gets the number of reserved extents. If the database is silent, this number -should be zero. */ -UNIV_INTERN -ulint -fil_space_get_n_reserved_extents( -/*=============================*/ - ulint id); /*!< in: space id */ -/********************************************************************//** -Reads or writes data. This operation is asynchronous (aio). -@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do -i/o on a tablespace which does not exist */ -UNIV_INTERN -dberr_t -_fil_io( -/*===*/ - ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, - ORed to OS_FILE_LOG, if a log i/o - and ORed to OS_AIO_SIMULATED_WAKE_LATER - if simulated aio and we want to post a - batch of i/os; NOTE that a simulated batch - may introduce hidden chances of deadlocks, - because i/os are not actually handled until - all have been posted: use with great - caution! */ - bool sync, /*!< in: true if synchronous aio is desired */ - ulint space_id, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint block_offset, /*!< in: offset in number of blocks */ - ulint byte_offset, /*!< in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len, /*!< in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ - void* buf, /*!< in/out: buffer where to store read data - or from where to write; in aio this must be - appropriately aligned */ - void* message, /*!< in: message for aio handler if non-sync - aio used, else ignored */ - ulint* write_size, /*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ - trx_t* trx) /*!< in: trx */ - - __attribute__((nonnull(8))); - -#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size) \ - _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size, NULL) - -/** Determine the block size of the data file. -@param[in] space tablespace -@param[in] offset page number -@return block size */ -UNIV_INTERN -ulint -fil_space_get_block_size(const fil_space_t* space, unsigned offset); - -/**********************************************************************//** -Waits for an aio operation to complete. This function is used to write the -handler for completed requests. The aio array of pending requests is divided -into segments (see os0file.cc for more info). The thread specifies which -segment it wants to wait for. */ -UNIV_INTERN -void -fil_aio_wait( -/*=========*/ - ulint segment); /*!< in: the number of the segment in the aio - array to wait for */ -/**********************************************************************//** -Flushes to disk possible writes cached by the OS. If the space does not exist -or is being dropped, does not do anything. */ -UNIV_INTERN -void -fil_flush( -/*======*/ - ulint space_id); /*!< in: file space id (this can be a group of - log files or a tablespace of the database) */ -/** Flush a tablespace. -@param[in,out] space tablespace to flush */ -UNIV_INTERN -void -fil_flush(fil_space_t* space); - -/** Flush to disk the writes in file spaces of the given type -possibly cached by the OS. -@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */ -UNIV_INTERN -void -fil_flush_file_spaces(ulint purpose); -/******************************************************************//** -Checks the consistency of the tablespace cache. -@return TRUE if ok */ -UNIV_INTERN -ibool -fil_validate(void); -/*==============*/ -/********************************************************************//** -Returns TRUE if file address is undefined. -@return TRUE if undefined */ -UNIV_INTERN -ibool -fil_addr_is_null( -/*=============*/ - fil_addr_t addr); /*!< in: address */ -/********************************************************************//** -Get the predecessor of a file page. -@return FIL_PAGE_PREV */ -UNIV_INTERN -ulint -fil_page_get_prev( -/*==============*/ - const byte* page); /*!< in: file page */ -/********************************************************************//** -Get the successor of a file page. -@return FIL_PAGE_NEXT */ -UNIV_INTERN -ulint -fil_page_get_next( -/*==============*/ - const byte* page); /*!< in: file page */ -/*********************************************************************//** -Sets the file page type. */ -UNIV_INTERN -void -fil_page_set_type( -/*==============*/ - byte* page, /*!< in/out: file page */ - ulint type); /*!< in: type */ -/*********************************************************************//** -Gets the file page type. -@return type; NOTE that if the type has not been written to page, the -return value not defined */ -UNIV_INTERN -ulint -fil_page_get_type( -/*==============*/ - const byte* page); /*!< in: file page */ - -/*******************************************************************//** -Returns TRUE if a single-table tablespace is being deleted. -@return TRUE if being deleted */ -UNIV_INTERN -ibool -fil_tablespace_is_being_deleted( -/*============================*/ - ulint id); /*!< in: space id */ - -/********************************************************************//** -Delete the tablespace file and any related files like .cfg. -This should not be called for temporary tables. */ -UNIV_INTERN -void -fil_delete_file( -/*============*/ - const char* path); /*!< in: filepath of the ibd tablespace */ - -/** Callback functor. */ -struct PageCallback { - - /** - Default constructor */ - PageCallback() - : - m_zip_size(), - m_page_size(), - m_filepath() UNIV_NOTHROW {} - - virtual ~PageCallback() UNIV_NOTHROW {} - - /** - Called for page 0 in the tablespace file at the start. - @param file_size - size of the file in bytes - @param block - contents of the first page in the tablespace file - @retval DB_SUCCESS or error code.*/ - virtual dberr_t init( - os_offset_t file_size, - const buf_block_t* block) UNIV_NOTHROW = 0; - - /** - Called for every page in the tablespace. If the page was not - updated then its state must be set to BUF_PAGE_NOT_USED. For - compressed tables the page descriptor memory will be at offset: - block->frame + UNIV_PAGE_SIZE; - @param offset - physical offset within the file - @param block - block read from file, note it is not from the buffer pool - @retval DB_SUCCESS or error code. */ - virtual dberr_t operator()( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW = 0; - - /** - Set the name of the physical file and the file handle that is used - to open it for the file that is being iterated over. - @param filename - then physical name of the tablespace file. - @param file - OS file handle */ - void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW - { - m_file = file; - m_filepath = filename; - } - - /** - @return the space id of the tablespace */ - virtual ulint get_space_id() const UNIV_NOTHROW = 0; - - /** The compressed page size - @return the compressed page size */ - ulint get_zip_size() const - { - return(m_zip_size); - } - - /** - Set the tablespace compressed table size. - @return DB_SUCCESS if it is valie or DB_CORRUPTION if not */ - dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW; - - /** The compressed page size - @return the compressed page size */ - ulint get_page_size() const - { - return(m_page_size); - } - - /** Compressed table page size */ - ulint m_zip_size; - - /** The tablespace page size. */ - ulint m_page_size; - - /** File handle to the tablespace */ - pfs_os_file_t m_file; - - /** Physical file path. */ - const char* m_filepath; - -protected: - // Disable copying - PageCallback(const PageCallback&); - PageCallback& operator=(const PageCallback&); -}; - -/********************************************************************//** -Iterate over all the pages in the tablespace. -@param table - the table definiton in the server -@param n_io_buffers - number of blocks to read and write together -@param callback - functor that will do the page updates -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fil_tablespace_iterate( -/*===================*/ - dict_table_t* table, - ulint n_io_buffers, - PageCallback& callback) - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*******************************************************************//** -Checks if a single-table tablespace for a given table name exists in the -tablespace memory cache. -@return space id, ULINT_UNDEFINED if not found */ -UNIV_INTERN -ulint -fil_get_space_id_for_table( -/*=======================*/ - const char* name); /*!< in: table name in the standard - 'databasename/tablename' format */ - -/** -Iterate over all the spaces in the space list and fetch the -tablespace names. It will return a copy of the name that must be -freed by the caller using: delete[]. -@return DB_SUCCESS if all OK. */ -UNIV_INTERN -dberr_t -fil_get_space_names( -/*================*/ - space_name_list_t& space_name_list) - /*!< in/out: Vector for collecting the names. */ - MY_ATTRIBUTE((warn_unused_result)); - -/** Generate redo log for swapping two .ibd files -@param[in] old_table old table -@param[in] new_table new table -@param[in] tmp_name temporary table name -@param[in,out] mtr mini-transaction -@return innodb error code */ -UNIV_INTERN -dberr_t -fil_mtr_rename_log( - const dict_table_t* old_table, - const dict_table_t* new_table, - const char* tmp_name, - mtr_t* mtr) - MY_ATTRIBUTE((nonnull)); - -/*******************************************************************//** -Finds the given page_no of the given space id from the double write buffer, -and copies it to the corresponding .ibd file. -@return true if copy was successful, or false. */ -bool -fil_user_tablespace_restore_page( -/*==============================*/ - fsp_open_info* fsp, /* in: contains space id and .ibd - file information */ - ulint page_no); /* in: page_no to obtain from double - write buffer */ - -/*******************************************************************//** -Returns a pointer to the file_space_t that is in the memory cache -associated with a space id. -@return file_space_t pointer, NULL if space not found */ -fil_space_t* -fil_space_get( -/*==========*/ - ulint id); /*!< in: space id */ -#endif /* !UNIV_INNOCHECKSUM */ - -/************************************************************************* -Return local hash table informations. */ - -ulint -fil_system_hash_cells(void); -/*========================*/ - -ulint -fil_system_hash_nodes(void); -/*========================*/ - -/************************************************************************* -functions to access is_corrupt flag of fil_space_t*/ - -void -fil_space_set_corrupt( -/*==================*/ - ulint space_id); - -/** Acquire the fil_system mutex. */ -#define fil_system_enter() mutex_enter(&fil_system->mutex) -/** Release the fil_system mutex. */ -#define fil_system_exit() mutex_exit(&fil_system->mutex) - -#ifndef UNIV_INNOCHECKSUM -/*******************************************************************//** -Returns the table space by a given id, NULL if not found. */ -fil_space_t* -fil_space_found_by_id( -/*==================*/ - ulint id); /*!< in: space id */ - -/*******************************************************************//** -Returns the table space by a given id, NULL if not found. */ -fil_space_t* -fil_space_get_by_id( -/*================*/ - ulint id); /*!< in: space id */ - -#endif /* UNIV_INNOCHECKSUM */ - -/****************************************************************//** -Does error handling when a file operation fails. -@return TRUE if we should retry the operation */ -ibool -os_file_handle_error_no_exit( -/*=========================*/ - const char* name, /*!< in: name of a file or NULL */ - const char* operation, /*!< in: operation */ - ibool on_error_silent,/*!< in: if TRUE then don't print - any message to the log. */ - const char* file, /*!< in: file name */ - const ulint line); /*!< in: line */ - -/*******************************************************************//** -Return page type name */ -UNIV_INLINE -const char* -fil_get_page_type_name( -/*===================*/ - ulint page_type); /*!< in: FIL_PAGE_TYPE */ - -#ifndef UNIV_NONINL -#include "fil0fil.ic" -#endif - -#endif /* fil0fil_h */ diff --git a/storage/xtradb/include/fil0fil.ic b/storage/xtradb/include/fil0fil.ic deleted file mode 100644 index 6c2504c9f8c..00000000000 --- a/storage/xtradb/include/fil0fil.ic +++ /dev/null @@ -1,148 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2015, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/fil0fil.ic -The low-level file system support functions - -Created 31/03/2015 Jan Lindström -*******************************************************/ - -#ifndef fil0fil_ic -#define fil0fil_ic - -/*******************************************************************//** -Return page type name */ -UNIV_INLINE -const char* -fil_get_page_type_name( -/*===================*/ - ulint page_type) /*!< in: FIL_PAGE_TYPE */ -{ - switch(page_type) { - case FIL_PAGE_PAGE_COMPRESSED: - return "PAGE_COMPRESSED"; - case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED: - return "PAGE_COMPRESSED_ENCRYPTED"; - case FIL_PAGE_INDEX: - return "INDEX"; - case FIL_PAGE_UNDO_LOG: - return "UNDO LOG"; - case FIL_PAGE_INODE: - return "INODE"; - case FIL_PAGE_IBUF_FREE_LIST: - return "IBUF_FREE_LIST"; - case FIL_PAGE_TYPE_ALLOCATED: - return "ALLOCATED"; - case FIL_PAGE_IBUF_BITMAP: - return "IBUF_BITMAP"; - case FIL_PAGE_TYPE_SYS: - return "SYS"; - case FIL_PAGE_TYPE_TRX_SYS: - return "TRX_SYS"; - case FIL_PAGE_TYPE_FSP_HDR: - return "FSP_HDR"; - case FIL_PAGE_TYPE_XDES: - return "XDES"; - case FIL_PAGE_TYPE_BLOB: - return "BLOB"; - case FIL_PAGE_TYPE_ZBLOB: - return "ZBLOB"; - case FIL_PAGE_TYPE_ZBLOB2: - return "ZBLOB2"; - case FIL_PAGE_TYPE_COMPRESSED: - return "ORACLE PAGE COMPRESSED"; - } - - return "PAGE TYPE CORRUPTED"; - -} - -/****************************************************************//** -Get block size from fil node -@return block size*/ -UNIV_INLINE -ulint -fil_node_get_block_size( -/*====================*/ - fil_node_t* node) /*!< in: Node where to get block - size */ -{ - return (node->file_block_size); -} - -/****************************************************************//** -Validate page type. -@return true if valid, false if not */ -UNIV_INLINE -bool -fil_page_type_validate( - const byte* page) /*!< in: page */ -{ -#ifdef UNIV_DEBUG - ulint page_type = mach_read_from_2(page + FIL_PAGE_TYPE); - - /* Validate page type */ - if (!((page_type == FIL_PAGE_PAGE_COMPRESSED || - page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED || - page_type == FIL_PAGE_INDEX || - page_type == FIL_PAGE_UNDO_LOG || - page_type == FIL_PAGE_INODE || - page_type == FIL_PAGE_IBUF_FREE_LIST || - page_type == FIL_PAGE_TYPE_ALLOCATED || - page_type == FIL_PAGE_IBUF_BITMAP || - page_type == FIL_PAGE_TYPE_SYS || - page_type == FIL_PAGE_TYPE_TRX_SYS || - page_type == FIL_PAGE_TYPE_FSP_HDR || - page_type == FIL_PAGE_TYPE_XDES || - page_type == FIL_PAGE_TYPE_BLOB || - page_type == FIL_PAGE_TYPE_ZBLOB || - page_type == FIL_PAGE_TYPE_ZBLOB2 || - page_type == FIL_PAGE_TYPE_COMPRESSED))) { - - ulint key_version = mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); - bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED); - bool page_compressed_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); - ulint space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - ulint offset = mach_read_from_4(page + FIL_PAGE_OFFSET); - ib_uint64_t lsn = mach_read_from_8(page + FIL_PAGE_LSN); - ulint compressed_len = mach_read_from_2(page + FIL_PAGE_DATA); - fil_system_enter(); - fil_space_t* rspace = fil_space_get_by_id(space); - fil_system_exit(); - - /* Dump out the page info */ - fprintf(stderr, "InnoDB: Page " ULINTPF ":" ULINTPF - " name %s page_type " ULINTPF " page_type_name %s\n" - "InnoDB: key_version " ULINTPF - " page_compressed %d page_compressed_encrypted %d lsn " - LSN_PF " compressed_len " ULINTPF "\n", - space, offset, rspace->name, page_type, - fil_get_page_type_name(page_type), - key_version, - page_compressed, page_compressed_encrypted, - lsn, compressed_len); - ut_error; - return false; - } - -#endif /* UNIV_DEBUG */ - return true; -} - -#endif /* fil0fil_ic */ diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h deleted file mode 100644 index 03e16699ce3..00000000000 --- a/storage/xtradb/include/fil0pagecompress.h +++ /dev/null @@ -1,132 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2017 MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -#ifndef fil0pagecompress_h -#define fil0pagecompress_h - -#include "fsp0fsp.h" -#include "fsp0pagecompress.h" - -/******************************************************************//** -@file include/fil0pagecompress.h -Helper functions for extracting/storing page compression and -atomic writes information to table space. - -Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com -***********************************************************************/ - -/*******************************************************************//** -Find out wheather the page is index page or not -@return true if page type index page, false if not */ -UNIV_INLINE -ibool -fil_page_is_index_page( -/*===================*/ - byte *buf); /*!< in: page */ - -/****************************************************************//** -Get the name of the compression algorithm used for page -compression. -@return compression algorithm name or "UNKNOWN" if not known*/ -UNIV_INLINE -const char* -fil_get_compression_alg_name( -/*=========================*/ - ulint comp_alg); /*!<in: compression algorithm number */ - -/****************************************************************//** -For page compressed pages compress the page before actual write -operation. -@return compressed page to be written*/ -UNIV_INTERN -byte* -fil_compress_page( -/*==============*/ - fil_space_t* space, /*!< in,out: tablespace (NULL during IMPORT) */ - byte* buf, /*!< in: buffer from which to write; in aio - this must be appropriately aligned */ - byte* out_buf, /*!< out: compressed buffer */ - ulint len, /*!< in: length of input buffer.*/ - ulint level, /* in: compression level */ - ulint block_size, /*!< in: block size */ - bool encrypted, /*!< in: is page also encrypted */ - ulint* out_len); /*!< out: actual length of compressed - page */ - -/****************************************************************//** -For page compressed pages decompress the page after actual read -operation. */ -UNIV_INTERN -void -fil_decompress_page( -/*================*/ - byte* page_buf, /*!< in: preallocated buffer or NULL */ - byte* buf, /*!< out: buffer from which to read; in aio - this must be appropriately aligned */ - ulong len, /*!< in: length of output buffer.*/ - ulint* write_size, /*!< in/out: Actual payload size of - the compressed data. */ - bool return_error=false); - /*!< in: true if only an error should - be produced when decompression fails. - By default this parameter is false. */ - -/****************************************************************//** -Get space id from fil node -@return space id*/ -UNIV_INTERN -ulint -fil_node_get_space_id( -/*==================*/ - fil_node_t* node); /*!< in: Node where to get space id*/ - -/****************************************************************//** -Get block size from fil node -@return block size*/ -UNIV_INLINE -ulint -fil_node_get_block_size( - fil_node_t* node); /*!< in: Node where to get block - size */ -/*******************************************************************//** -Find out wheather the page is page compressed -@return true if page is page compressed*/ -UNIV_INLINE -ibool -fil_page_is_compressed( -/*===================*/ - byte* buf); /*!< in: page */ - -/*******************************************************************//** -Find out wheather the page is page compressed -@return true if page is page compressed*/ -UNIV_INLINE -ibool -fil_page_is_compressed_encrypted( -/*=============================*/ - byte* buf); /*!< in: page */ - -/*******************************************************************//** -Find out wheather the page is page compressed with lzo method -@return true if page is page compressed with lzo method*/ -UNIV_INLINE -ibool -fil_page_is_lzo_compressed( -/*=======================*/ - byte* buf); /*!< in: page */ -#endif diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h deleted file mode 100644 index 715572199ab..00000000000 --- a/storage/xtradb/include/fsp0fsp.h +++ /dev/null @@ -1,1068 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/fsp0fsp.h -File space management - -Created 12/18/1995 Heikki Tuuri -*******************************************************/ - -#ifndef fsp0fsp_h -#define fsp0fsp_h - -#include "univ.i" - -#ifndef UNIV_INNOCHECKSUM - -#include "mtr0mtr.h" -#include "fut0lst.h" -#include "ut0byte.h" -#include "page0types.h" -#include "fsp0types.h" - -#endif /* !UNIV_INNOCHECKSUM */ - -/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */ - -/** Width of the POST_ANTELOPE flag */ -#define FSP_FLAGS_WIDTH_POST_ANTELOPE 1 -/** Number of flag bits used to indicate the tablespace zip page size */ -#define FSP_FLAGS_WIDTH_ZIP_SSIZE 4 -/** Width of the ATOMIC_BLOBS flag. The ability to break up a long -column into an in-record prefix and an externally stored part is available -to the two Barracuda row formats COMPRESSED and DYNAMIC. */ -#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS 1 -/** Number of flag bits used to indicate the tablespace page size */ -#define FSP_FLAGS_WIDTH_PAGE_SSIZE 4 -/** Number of reserved bits */ -#define FSP_FLAGS_WIDTH_RESERVED 6 -/** Number of flag bits used to indicate the page compression */ -#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1 - -/** Width of all the currently known persistent tablespace flags */ -#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \ - + FSP_FLAGS_WIDTH_ZIP_SSIZE \ - + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \ - + FSP_FLAGS_WIDTH_PAGE_SSIZE \ - + FSP_FLAGS_WIDTH_RESERVED \ - + FSP_FLAGS_WIDTH_PAGE_COMPRESSION) - -/** A mask of all the known/used bits in FSP_SPACE_FLAGS */ -#define FSP_FLAGS_MASK (~(~0U << FSP_FLAGS_WIDTH)) - -/* FSP_SPACE_FLAGS position and name in MySQL 5.6/MariaDB 10.0 or older -and MariaDB 10.1.20 or older MariaDB 10.1 and in MariaDB 10.1.21 -or newer. -MySQL 5.6 MariaDB 10.1.x MariaDB 10.1.21 -==================================================================== -Below flags in same offset -==================================================================== -0: POST_ANTELOPE 0:POST_ANTELOPE 0: POST_ANTELOPE -1..4: ZIP_SSIZE(0..5) 1..4:ZIP_SSIZE(0..5) 1..4: ZIP_SSIZE(0..5) -(NOTE: bit 4 is always 0) -5: ATOMIC_BLOBS 5:ATOMIC_BLOBS 5: ATOMIC_BLOBS -===================================================================== -Below note the order difference: -===================================================================== -6..9: PAGE_SSIZE(3..7) 6: COMPRESSION 6..9: PAGE_SSIZE(3..7) -10: DATA_DIR 7..10: COMP_LEVEL(0..9) 10: RESERVED (5.6 DATA_DIR) -===================================================================== -The flags below were in incorrect position in MariaDB 10.1, -or have been introduced in MySQL 5.7 or 8.0: -===================================================================== -11: UNUSED 11..12:ATOMIC_WRITES 11: RESERVED (5.7 SHARED) - 12: RESERVED (5.7 TEMPORARY) - 13..15:PAGE_SSIZE(3..7) 13: RESERVED (5.7 ENCRYPTION) - 14: RESERVED (8.0 SDI) - 15: RESERVED - 16: PAGE_SSIZE_msb(0) 16: COMPRESSION - 17: DATA_DIR 17: UNUSED - 18: UNUSED -===================================================================== -The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS: -===================================================================== - 25: DATA_DIR - 26..27: ATOMIC_WRITES - 28..31: COMPRESSION_LEVEL -*/ - -/** A mask of the memory-only flags in fil_space_t::flags */ -#define FSP_FLAGS_MEM_MASK (~0U << FSP_FLAGS_MEM_DATA_DIR) - -/** Zero relative shift position of the DATA_DIR flag */ -#define FSP_FLAGS_MEM_DATA_DIR 25 -/** Zero relative shift position of the ATOMIC_WRITES field */ -#define FSP_FLAGS_MEM_ATOMIC_WRITES 26 -/** Zero relative shift position of the COMPRESSION_LEVEL field */ -#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 28 - -/** Zero relative shift position of the POST_ANTELOPE field */ -#define FSP_FLAGS_POS_POST_ANTELOPE 0 -/** Zero relative shift position of the ZIP_SSIZE field */ -#define FSP_FLAGS_POS_ZIP_SSIZE (FSP_FLAGS_POS_POST_ANTELOPE \ - + FSP_FLAGS_WIDTH_POST_ANTELOPE) -/** Zero relative shift position of the ATOMIC_BLOBS field */ -#define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \ - + FSP_FLAGS_WIDTH_ZIP_SSIZE) -/** Zero relative shift position of the start of the PAGE_SSIZE bits */ -#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \ - + FSP_FLAGS_WIDTH_ATOMIC_BLOBS) -/** Zero relative shift position of the start of the RESERVED bits -these are only used in MySQL 5.7 and used for compatibility. */ -#define FSP_FLAGS_POS_RESERVED (FSP_FLAGS_POS_PAGE_SSIZE \ - + FSP_FLAGS_WIDTH_PAGE_SSIZE) -/** Zero relative shift position of the PAGE_COMPRESSION field */ -#define FSP_FLAGS_POS_PAGE_COMPRESSION (FSP_FLAGS_POS_RESERVED \ - + FSP_FLAGS_WIDTH_RESERVED) - -/** Bit mask of the POST_ANTELOPE field */ -#define FSP_FLAGS_MASK_POST_ANTELOPE \ - ((~(~0U << FSP_FLAGS_WIDTH_POST_ANTELOPE)) \ - << FSP_FLAGS_POS_POST_ANTELOPE) -/** Bit mask of the ZIP_SSIZE field */ -#define FSP_FLAGS_MASK_ZIP_SSIZE \ - ((~(~0U << FSP_FLAGS_WIDTH_ZIP_SSIZE)) \ - << FSP_FLAGS_POS_ZIP_SSIZE) -/** Bit mask of the ATOMIC_BLOBS field */ -#define FSP_FLAGS_MASK_ATOMIC_BLOBS \ - ((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_BLOBS)) \ - << FSP_FLAGS_POS_ATOMIC_BLOBS) -/** Bit mask of the PAGE_SSIZE field */ -#define FSP_FLAGS_MASK_PAGE_SSIZE \ - ((~(~0U << FSP_FLAGS_WIDTH_PAGE_SSIZE)) \ - << FSP_FLAGS_POS_PAGE_SSIZE) -/** Bit mask of the RESERVED1 field */ -#define FSP_FLAGS_MASK_RESERVED \ - ((~(~0U << FSP_FLAGS_WIDTH_RESERVED)) \ - << FSP_FLAGS_POS_RESERVED) -/** Bit mask of the PAGE_COMPRESSION field */ -#define FSP_FLAGS_MASK_PAGE_COMPRESSION \ - ((~(~0U << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \ - << FSP_FLAGS_POS_PAGE_COMPRESSION) - -/** Bit mask of the in-memory ATOMIC_WRITES field */ -#define FSP_FLAGS_MASK_MEM_ATOMIC_WRITES \ - (3U << FSP_FLAGS_MEM_ATOMIC_WRITES) - -/** Bit mask of the in-memory COMPRESSION_LEVEL field */ -#define FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL \ - (15U << FSP_FLAGS_MEM_COMPRESSION_LEVEL) - -/** Return the value of the POST_ANTELOPE field */ -#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \ - ((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \ - >> FSP_FLAGS_POS_POST_ANTELOPE) -/** Return the value of the ZIP_SSIZE field */ -#define FSP_FLAGS_GET_ZIP_SSIZE(flags) \ - ((flags & FSP_FLAGS_MASK_ZIP_SSIZE) \ - >> FSP_FLAGS_POS_ZIP_SSIZE) -/** Return the value of the ATOMIC_BLOBS field */ -#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags) \ - ((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS) \ - >> FSP_FLAGS_POS_ATOMIC_BLOBS) -/** Return the value of the PAGE_SSIZE field */ -#define FSP_FLAGS_GET_PAGE_SSIZE(flags) \ - ((flags & FSP_FLAGS_MASK_PAGE_SSIZE) \ - >> FSP_FLAGS_POS_PAGE_SSIZE) -/** @return the RESERVED flags */ -#define FSP_FLAGS_GET_RESERVED(flags) \ - ((flags & FSP_FLAGS_MASK_RESERVED) \ - >> FSP_FLAGS_POS_RESERVED) -/** @return the PAGE_COMPRESSION flag */ -#define FSP_FLAGS_HAS_PAGE_COMPRESSION(flags) \ - ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION) \ - >> FSP_FLAGS_POS_PAGE_COMPRESSION) - -/** Return the contents of the UNUSED bits */ -#define FSP_FLAGS_GET_UNUSED(flags) \ - (flags >> FSP_FLAGS_POS_UNUSED) - -/** @return the PAGE_SSIZE flags for the current innodb_page_size */ -#define FSP_FLAGS_PAGE_SSIZE() \ - ((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) ? \ - 0 : (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1) \ - << FSP_FLAGS_POS_PAGE_SSIZE) - -/** @return the value of the DATA_DIR field */ -#define FSP_FLAGS_HAS_DATA_DIR(flags) \ - (flags & 1U << FSP_FLAGS_MEM_DATA_DIR) -/** @return the COMPRESSION_LEVEL field */ -#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags) \ - ((flags & FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL) \ - >> FSP_FLAGS_MEM_COMPRESSION_LEVEL) -/** @return the ATOMIC_WRITES field */ -#define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \ - ((flags & FSP_FLAGS_MASK_MEM_ATOMIC_WRITES) \ - >> FSP_FLAGS_MEM_ATOMIC_WRITES) - -/* Compatibility macros for MariaDB 10.1.20 or older 10.1 see -table above. */ -/** Zero relative shift position of the PAGE_COMPRESSION field */ -#define FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101 \ - (FSP_FLAGS_POS_ATOMIC_BLOBS \ - + FSP_FLAGS_WIDTH_ATOMIC_BLOBS) -/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */ -#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL_MARIADB101 \ - (FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101 + 1) -/** Zero relative shift position of the ATOMIC_WRITES field */ -#define FSP_FLAGS_POS_ATOMIC_WRITES_MARIADB101 \ - (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL_MARIADB101 + 4) -/** Zero relative shift position of the PAGE_SSIZE field */ -#define FSP_FLAGS_POS_PAGE_SSIZE_MARIADB101 \ - (FSP_FLAGS_POS_ATOMIC_WRITES_MARIADB101 + 2) - -/** Bit mask of the PAGE_COMPRESSION field */ -#define FSP_FLAGS_MASK_PAGE_COMPRESSION_MARIADB101 \ - (1U << FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101) -/** Bit mask of the PAGE_COMPRESSION_LEVEL field */ -#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL_MARIADB101 \ - (15U << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL_MARIADB101) -/** Bit mask of the ATOMIC_WRITES field */ -#define FSP_FLAGS_MASK_ATOMIC_WRITES_MARIADB101 \ - (3U << FSP_FLAGS_POS_ATOMIC_WRITES_MARIADB101) -/** Bit mask of the PAGE_SSIZE field */ -#define FSP_FLAGS_MASK_PAGE_SSIZE_MARIADB101 \ - (15U << FSP_FLAGS_POS_PAGE_SSIZE_MARIADB101) - -/** Return the value of the PAGE_COMPRESSION field */ -#define FSP_FLAGS_GET_PAGE_COMPRESSION_MARIADB101(flags) \ - ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_MARIADB101) \ - >> FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101) -/** Return the value of the PAGE_COMPRESSION_LEVEL field */ -#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL_MARIADB101(flags) \ - ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL_MARIADB101) \ - >> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL_MARIADB101) -/** Return the value of the PAGE_SSIZE field */ -#define FSP_FLAGS_GET_PAGE_SSIZE_MARIADB101(flags) \ - ((flags & FSP_FLAGS_MASK_PAGE_SSIZE_MARIADB101) \ - >> FSP_FLAGS_POS_PAGE_SSIZE_MARIADB101) - -/* @} */ - -/* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */ - -/** Offset of the space header within a file page */ -#define FSP_HEADER_OFFSET FIL_PAGE_DATA - -/* The data structures in files are defined just as byte strings in C */ -typedef byte fsp_header_t; -typedef byte xdes_t; - -/* SPACE HEADER - ============ - -File space header data structure: this data structure is contained in the -first page of a space. The space for this header is reserved in every extent -descriptor page, but used only in the first. */ - -/*-------------------------------------*/ -#define FSP_SPACE_ID 0 /* space id */ -#define FSP_NOT_USED 4 /* this field contained a value up to - which we know that the modifications - in the database have been flushed to - the file space; not used now */ -#define FSP_SIZE 8 /* Current size of the space in - pages */ -#define FSP_FREE_LIMIT 12 /* Minimum page number for which the - free list has not been initialized: - the pages >= this limit are, by - definition, free; note that in a - single-table tablespace where size - < 64 pages, this number is 64, i.e., - we have initialized the space - about the first extent, but have not - physically allocted those pages to the - file */ -#define FSP_SPACE_FLAGS 16 /* fsp_space_t.flags, similar to - dict_table_t::flags */ -#define FSP_FRAG_N_USED 20 /* number of used pages in the - FSP_FREE_FRAG list */ -#define FSP_FREE 24 /* list of free extents */ -#define FSP_FREE_FRAG (24 + FLST_BASE_NODE_SIZE) - /* list of partially free extents not - belonging to any segment */ -#define FSP_FULL_FRAG (24 + 2 * FLST_BASE_NODE_SIZE) - /* list of full extents not belonging - to any segment */ -#define FSP_SEG_ID (24 + 3 * FLST_BASE_NODE_SIZE) - /* 8 bytes which give the first unused - segment id */ -#define FSP_SEG_INODES_FULL (32 + 3 * FLST_BASE_NODE_SIZE) - /* list of pages containing segment - headers, where all the segment inode - slots are reserved */ -#define FSP_SEG_INODES_FREE (32 + 4 * FLST_BASE_NODE_SIZE) - /* list of pages containing segment - headers, where not all the segment - header slots are reserved */ -/*-------------------------------------*/ -/* File space header size */ -#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE) - -#define FSP_FREE_ADD 4 /* this many free extents are added - to the free list from above - FSP_FREE_LIMIT at a time */ -/* @} */ - -#ifndef UNIV_INNOCHECKSUM - -/* @defgroup File Segment Inode Constants (moved from fsp0fsp.c) @{ */ - -/* FILE SEGMENT INODE - ================== - -Segment inode which is created for each segment in a tablespace. NOTE: in -purge we assume that a segment having only one currently used page can be -freed in a few steps, so that the freeing cannot fill the file buffer with -bufferfixed file pages. */ - -typedef byte fseg_inode_t; - -#define FSEG_INODE_PAGE_NODE FSEG_PAGE_DATA - /* the list node for linking - segment inode pages */ - -#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE) -/*-------------------------------------*/ -#define FSEG_ID 0 /* 8 bytes of segment id: if this is 0, - it means that the header is unused */ -#define FSEG_NOT_FULL_N_USED 8 - /* number of used segment pages in - the FSEG_NOT_FULL list */ -#define FSEG_FREE 12 - /* list of free extents of this - segment */ -#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE) - /* list of partially free extents */ -#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE) - /* list of full extents */ -#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE) - /* magic number used in debugging */ -#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE) - /* array of individual pages - belonging to this segment in fsp - fragment extent lists */ -#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2) - /* number of slots in the array for - the fragment pages */ -#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its - page number within space, FIL_NULL - means that the slot is not in use */ -/*-------------------------------------*/ -#define FSEG_INODE_SIZE \ - (16 + 3 * FLST_BASE_NODE_SIZE \ - + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE) - -#define FSP_SEG_INODES_PER_PAGE(zip_size) \ - (((zip_size ? zip_size : UNIV_PAGE_SIZE) \ - - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE) - /* Number of segment inodes which fit on a - single page */ - -#define FSEG_MAGIC_N_VALUE 97937874 - -#define FSEG_FILLFACTOR 8 /* If this value is x, then if - the number of unused but reserved - pages in a segment is less than - reserved pages * 1/x, and there are - at least FSEG_FRAG_LIMIT used pages, - then we allow a new empty extent to - be added to the segment in - fseg_alloc_free_page. Otherwise, we - use unused pages of the segment. */ - -#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS - /* If the segment has >= this many - used pages, it may be expanded by - allocating extents to the segment; - until that only individual fragment - pages are allocated from the space */ - -#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment - is at least this many extents, we - allow extents to be put to the free - list of the extent: at most - FSEG_FREE_LIST_MAX_LEN many */ -#define FSEG_FREE_LIST_MAX_LEN 4 -/* @} */ - -/* @defgroup Extent Descriptor Constants (moved from fsp0fsp.c) @{ */ - -/* EXTENT DESCRIPTOR - ================= - -File extent descriptor data structure: contains bits to tell which pages in -the extent are free and which contain old tuple version to clean. */ - -/*-------------------------------------*/ -#define XDES_ID 0 /* The identifier of the segment - to which this extent belongs */ -#define XDES_FLST_NODE 8 /* The list node data structure - for the descriptors */ -#define XDES_STATE (FLST_NODE_SIZE + 8) - /* contains state information - of the extent */ -#define XDES_BITMAP (FLST_NODE_SIZE + 12) - /* Descriptor bitmap of the pages - in the extent */ -/*-------------------------------------*/ - -#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */ -#define XDES_FREE_BIT 0 /* Index of the bit which tells if - the page is free */ -#define XDES_CLEAN_BIT 1 /* NOTE: currently not used! - Index of the bit which tells if - there are old versions of tuples - on the page */ -/* States of a descriptor */ -#define XDES_FREE 1 /* extent is in free list of space */ -#define XDES_FREE_FRAG 2 /* extent is in free fragment list of - space */ -#define XDES_FULL_FRAG 3 /* extent is in full fragment list of - space */ -#define XDES_FSEG 4 /* extent belongs to a segment */ - -/** File extent data structure size in bytes. */ -#define XDES_SIZE \ - (XDES_BITMAP \ - + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE)) - -/** File extent data structure size in bytes for MAX page size. */ -#define XDES_SIZE_MAX \ - (XDES_BITMAP \ - + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MAX * XDES_BITS_PER_PAGE)) - -/** File extent data structure size in bytes for MIN page size. */ -#define XDES_SIZE_MIN \ - (XDES_BITMAP \ - + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MIN * XDES_BITS_PER_PAGE)) - -/** Offset of the descriptor array on a descriptor page */ -#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE) - -/* @} */ - -/**********************************************************************//** -Initializes the file space system. */ -UNIV_INTERN -void -fsp_init(void); -/*==========*/ -/**********************************************************************//** -Gets the size of the system tablespace from the tablespace header. If -we do not have an auto-extending data file, this should be equal to -the size of the data files. If there is an auto-extending data file, -this can be smaller. -@return size in pages */ -UNIV_INTERN -ulint -fsp_header_get_tablespace_size(void); -/*================================*/ -/**********************************************************************//** -Reads the file space size stored in the header page. -@return tablespace size stored in the space header */ -UNIV_INTERN -ulint -fsp_get_size_low( -/*=============*/ - page_t* page); /*!< in: header page (page 0 in the tablespace) */ -/**********************************************************************//** -Reads the space id from the first page of a tablespace. -@return space id, ULINT UNDEFINED if error */ -UNIV_INTERN -ulint -fsp_header_get_space_id( -/*====================*/ - const page_t* page); /*!< in: first page of a tablespace */ -/**********************************************************************//** -Reads the space flags from the first page of a tablespace. -@return flags */ -UNIV_INTERN -ulint -fsp_header_get_flags( -/*=================*/ - const page_t* page); /*!< in: first page of a tablespace */ -/**********************************************************************//** -Reads the compressed page size from the first page of a tablespace. -@return compressed page size in bytes, or 0 if uncompressed */ -UNIV_INTERN -ulint -fsp_header_get_zip_size( -/*====================*/ - const page_t* page); /*!< in: first page of a tablespace */ -/**********************************************************************//** -Writes the space id and flags to a tablespace header. The flags contain -row type, physical/compressed page size, and logical/uncompressed page -size of the tablespace. */ -UNIV_INTERN -void -fsp_header_init_fields( -/*===================*/ - page_t* page, /*!< in/out: first page in the space */ - ulint space_id, /*!< in: space id */ - ulint flags); /*!< in: tablespace flags (FSP_SPACE_FLAGS): - 0, or table->flags if newer than COMPACT */ -/** Initialize a tablespace header. -@param[in] space_id space id -@param[in] size current size in blocks -@param[in,out] mtr mini-transaction */ -UNIV_INTERN -void -fsp_header_init(ulint space_id, ulint size, mtr_t* mtr); - -/**********************************************************************//** -Increases the space size field of a space. */ -UNIV_INTERN -void -fsp_header_inc_size( -/*================*/ - ulint space, /*!< in: space id */ - ulint size_inc, /*!< in: size increment in pages */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Creates a new segment. -@return the block where the segment header is placed, x-latched, NULL -if could not create segment because of lack of space */ -UNIV_INTERN -buf_block_t* -fseg_create( -/*========*/ - ulint space, /*!< in: space id */ - ulint page, /*!< in: page where the segment header is placed: if - this is != 0, the page must belong to another segment, - if this is 0, a new page will be allocated and it - will belong to the created segment */ - ulint byte_offset, /*!< in: byte offset of the created segment header - on the page */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Creates a new segment. -@return the block where the segment header is placed, x-latched, NULL -if could not create segment because of lack of space */ -UNIV_INTERN -buf_block_t* -fseg_create_general( -/*================*/ - ulint space, /*!< in: space id */ - ulint page, /*!< in: page where the segment header is placed: if - this is != 0, the page must belong to another segment, - if this is 0, a new page will be allocated and it - will belong to the created segment */ - ulint byte_offset, /*!< in: byte offset of the created segment header - on the page */ - ibool has_done_reservation, /*!< in: TRUE if the caller has already - done the reservation for the pages with - fsp_reserve_free_extents (at least 2 extents: one for - the inode and the other for the segment) then there is - no need to do the check for this individual - operation */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Calculates the number of pages reserved by a segment, and how many pages are -currently used. -@return number of reserved pages */ -UNIV_INTERN -ulint -fseg_n_reserved_pages( -/*==================*/ - fseg_header_t* header, /*!< in: segment header */ - ulint* used, /*!< out: number of pages used (<= reserved) */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize -file space fragmentation. -@param[in/out] seg_header segment header -@param[in] hint hint of which page would be desirable -@param[in] direction if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR -@param[in/out] mtr mini-transaction -@return X-latched block, or NULL if no page could be allocated */ -#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \ - fseg_alloc_free_page_general(seg_header, hint, direction, \ - FALSE, mtr, mtr) -/**********************************************************************//** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. -@retval NULL if no page could be allocated -@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded -(init_mtr == mtr, or the page was not previously freed in mtr) -@retval block (not allocated or initialized) otherwise */ -UNIV_INTERN -buf_block_t* -fseg_alloc_free_page_general( -/*=========================*/ - fseg_header_t* seg_header,/*!< in/out: segment header */ - ulint hint, /*!< in: hint of which page would be - desirable */ - byte direction,/*!< in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - ibool has_done_reservation, /*!< in: TRUE if the caller has - already done the reservation for the page - with fsp_reserve_free_extents, then there - is no need to do the check for this individual - page */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction - in which the page should be initialized. - If init_mtr!=mtr, but the page is already - latched in mtr, do not initialize the page. */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); -/**********************************************************************//** -Reserves free pages from a tablespace. All mini-transactions which may -use several pages from the tablespace should call this function beforehand -and reserve enough free extents so that they certainly will be able -to do their operation, like a B-tree page split, fully. Reservations -must be released with function fil_space_release_free_extents! - -The alloc_type below has the following meaning: FSP_NORMAL means an -operation which will probably result in more space usage, like an -insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are -deleting rows, then this allocation will in the long run result in -less space usage (after a purge); FSP_CLEANING means allocation done -in a physical record delete (like in a purge) or other cleaning operation -which will result in less space usage in the long run. We prefer the latter -two types of allocation: when space is scarce, FSP_NORMAL allocations -will not succeed, but the latter two allocations will succeed, if possible. -The purpose is to avoid dead end where the database is full but the -user cannot free any space because these freeing operations temporarily -reserve some space. - -Single-table tablespaces whose size is < 32 pages are a special case. In this -function we would liberally reserve several 64 page extents for every page -split or merge in a B-tree. But we do not want to waste disk space if the table -only occupies < 32 pages. That is why we apply different rules in that special -case, just ensuring that there are 3 free pages available. -@return TRUE if we were able to make the reservation */ -UNIV_INTERN -ibool -fsp_reserve_free_extents( -/*=====================*/ - ulint* n_reserved,/*!< out: number of extents actually reserved; if we - return TRUE and the tablespace size is < 64 pages, - then this can be 0, otherwise it is n_ext */ - ulint space, /*!< in: space id */ - ulint n_ext, /*!< in: number of extents to reserve */ - ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */ - mtr_t* mtr); /*!< in: mini-transaction */ -/**********************************************************************//** -This function should be used to get information on how much we still -will be able to insert new data to the database without running out the -tablespace. Only free extents are taken into account and we also subtract -the safety margin required by the above function fsp_reserve_free_extents. -@return available space in kB */ -UNIV_INTERN -ullint -fsp_get_available_space_in_free_extents( -/*====================================*/ - ulint space); /*!< in: space id */ -/**********************************************************************//** -Frees a single page of a segment. */ -UNIV_INTERN -void -fseg_free_page( -/*===========*/ - fseg_header_t* seg_header, /*!< in: segment header */ - ulint space, /*!< in: space id */ - ulint page, /*!< in: page offset */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Checks if a single page of a segment is free. -@return true if free */ -UNIV_INTERN -bool -fseg_page_is_free( -/*==============*/ - fseg_header_t* seg_header, /*!< in: segment header */ - ulint space, /*!< in: space id */ - ulint page) /*!< in: page offset */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************************//** -Frees part of a segment. This function can be used to free a segment -by repeatedly calling this function in different mini-transactions. -Doing the freeing in a single mini-transaction might result in -too big a mini-transaction. -@return TRUE if freeing completed */ -UNIV_INTERN -ibool -fseg_free_step( -/*===========*/ - fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header - resides on the first page of the frag list - of the segment, this pointer becomes obsolete - after the last freeing step */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************************//** -Frees part of a segment. Differs from fseg_free_step because this function -leaves the header page unfreed. -@return TRUE if freeing completed, except the header page */ -UNIV_INTERN -ibool -fseg_free_step_not_header( -/*======================*/ - fseg_header_t* header, /*!< in: segment header which must reside on - the first fragment page of the segment */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/***********************************************************************//** -Checks if a page address is an extent descriptor page address. -@return TRUE if a descriptor page */ -UNIV_INLINE -ibool -fsp_descr_page( -/*===========*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint page_no);/*!< in: page number */ -/***********************************************************//** -Parses a redo log record of a file page init. -@return end of log record or NULL */ -UNIV_INTERN -byte* -fsp_parse_init_file_page( -/*=====================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr, /*!< in: buffer end */ - buf_block_t* block); /*!< in: block or NULL */ -/*******************************************************************//** -Validates the file space system and its segments. -@return TRUE if ok */ -UNIV_INTERN -ibool -fsp_validate( -/*=========*/ - ulint space); /*!< in: space id */ -/*******************************************************************//** -Prints info of a file space. */ -UNIV_INTERN -void -fsp_print( -/*======*/ - ulint space); /*!< in: space id */ -#ifdef UNIV_DEBUG -/*******************************************************************//** -Validates a segment. -@return TRUE if ok */ -UNIV_INTERN -ibool -fseg_validate( -/*==========*/ - fseg_header_t* header, /*!< in: segment header */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -#endif /* UNIV_DEBUG */ -#ifdef UNIV_BTR_PRINT -/*******************************************************************//** -Writes info of a segment. */ -UNIV_INTERN -void -fseg_print( -/*=======*/ - fseg_header_t* header, /*!< in: segment header */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -#endif /* UNIV_BTR_PRINT */ - -/** Validate the tablespace flags, which are stored in the -tablespace header at offset FSP_SPACE_FLAGS. -@param[in] flags the contents of FSP_SPACE_FLAGS -@return whether the flags are correct (not in the buggy 10.1) format */ -MY_ATTRIBUTE((warn_unused_result, const)) -UNIV_INLINE -bool -fsp_flags_is_valid(ulint flags) -{ - DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", - return(false);); - if (flags == 0) { - return(true); - } - if (flags & ~FSP_FLAGS_MASK) { - return(false); - } - if ((flags & (FSP_FLAGS_MASK_POST_ANTELOPE | FSP_FLAGS_MASK_ATOMIC_BLOBS)) - == FSP_FLAGS_MASK_ATOMIC_BLOBS) { - /* If the "atomic blobs" flag (indicating - ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED) flag - is set, then the "post Antelope" (ROW_FORMAT!=REDUNDANT) flag - must also be set. */ - return(false); - } - /* Bits 10..14 should be 0b0000d where d is the DATA_DIR flag - of MySQL 5.6 and MariaDB 10.0, which we ignore. - In the buggy FSP_SPACE_FLAGS written by MariaDB 10.1.0 to 10.1.20, - bits 10..14 would be nonzero 0bsssaa where sss is - nonzero PAGE_SSIZE (3, 4, 6, or 7) - and aa is ATOMIC_WRITES (not 0b11). */ - if (FSP_FLAGS_GET_RESERVED(flags) & ~1) { - return(false); - } - - const ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags); - if (ssize == 1 || ssize == 2 || ssize == 5 || ssize & 8) { - /* the page_size is not between 4k and 64k; - 16k should be encoded as 0, not 5 */ - return(false); - } - const ulint zssize = FSP_FLAGS_GET_ZIP_SSIZE(flags); - if (zssize == 0) { - /* not ROW_FORMAT=COMPRESSED */ - } else if (zssize > (ssize ? ssize : 5)) { - /* invalid KEY_BLOCK_SIZE */ - return(false); - } else if (~flags & (FSP_FLAGS_MASK_POST_ANTELOPE - | FSP_FLAGS_MASK_ATOMIC_BLOBS)) { - /* both these flags should be set for - ROW_FORMAT=COMPRESSED */ - return(false); - } - - return(true); -} - -/** Convert FSP_SPACE_FLAGS from the buggy MariaDB 10.1.0..10.1.20 format. -@param[in] flags the contents of FSP_SPACE_FLAGS -@return the flags corrected from the buggy MariaDB 10.1 format -@retval ULINT_UNDEFINED if the flags are not in the buggy 10.1 format */ -MY_ATTRIBUTE((warn_unused_result, const)) -UNIV_INLINE -ulint -fsp_flags_convert_from_101(ulint flags) -{ - DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", - return(ULINT_UNDEFINED);); - if (flags == 0) { - return(flags); - } - - if (flags >> 18) { - /* The most significant FSP_SPACE_FLAGS bit that was ever set - by MariaDB 10.1.0 to 10.1.20 was bit 17 (misplaced DATA_DIR flag). - The flags must be less than 1<<18 in order to be valid. */ - return(ULINT_UNDEFINED); - } - - if ((flags & (FSP_FLAGS_MASK_POST_ANTELOPE | FSP_FLAGS_MASK_ATOMIC_BLOBS)) - == FSP_FLAGS_MASK_ATOMIC_BLOBS) { - /* If the "atomic blobs" flag (indicating - ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED) flag - is set, then the "post Antelope" (ROW_FORMAT!=REDUNDANT) flag - must also be set. */ - return(ULINT_UNDEFINED); - } - - /* Bits 6..10 denote compression in MariaDB 10.1.0 to 10.1.20. - They must be either 0b00000 or 0b00011 through 0b10011. - In correct versions, these bits would be - 0bd0sss where d is the DATA_DIR flag (garbage bit) and - sss is the PAGE_SSIZE (3, 4, 6, or 7). - - NOTE: MariaDB 10.1.0 to 10.1.20 can misinterpret - uncompressed data files with innodb_page_size=4k or 64k as - compressed innodb_page_size=16k files. Below is an exhaustive - state space analysis. - - -0by1zzz: impossible (the bit 4 must be clean; see above) - -0b101xx: DATA_DIR, innodb_page_size>4k: invalid (COMPRESSION_LEVEL>9) - +0bx0011: innodb_page_size=4k: - !!! Misinterpreted as COMPRESSION_LEVEL=9 or 1, COMPRESSION=1. - -0bx0010: impossible, because sss must be 0b011 or 0b1xx - -0bx0001: impossible, because sss must be 0b011 or 0b1xx - -0b10000: DATA_DIR, innodb_page_size=16: - invalid (COMPRESSION_LEVEL=8 but COMPRESSION=0) - +0b00111: no DATA_DIR, innodb_page_size=64k: - !!! Misinterpreted as COMPRESSION_LEVEL=3, COMPRESSION=1. - -0b00101: impossible, because sss must be 0 for 16k, not 0b101 - -0b001x0: no DATA_DIR, innodb_page_size=32k or 8k: - invalid (COMPRESSION_LEVEL=3 but COMPRESSION=0) - +0b00000: innodb_page_size=16k (looks like COMPRESSION=0) - ??? Could actually be compressed; see PAGE_SSIZE below */ - const ulint level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL_MARIADB101( - flags); - if (FSP_FLAGS_GET_PAGE_COMPRESSION_MARIADB101(flags) != (level != 0) - || level > 9) { - /* The compression flags are not in the buggy MariaDB - 10.1 format. */ - return(ULINT_UNDEFINED); - } - if (!(~flags & FSP_FLAGS_MASK_ATOMIC_WRITES_MARIADB101)) { - /* The ATOMIC_WRITES flags cannot be 0b11. - (The bits 11..12 should actually never be 0b11, - because in MySQL they would be SHARED|TEMPORARY.) */ - return(ULINT_UNDEFINED); - } - - /* Bits 13..16 are the wrong position for PAGE_SSIZE, and they - should contain one of the values 3,4,6,7, that is, be of the form - 0b0011 or 0b01xx (except 0b0110). - In correct versions, these bits should be 0bc0se - where c is the MariaDB COMPRESSED flag - and e is the MySQL 5.7 ENCRYPTION flag - and s is the MySQL 8.0 SDI flag. MariaDB can only support s=0, e=0. - - Compressed innodb_page_size=16k tables with correct FSP_SPACE_FLAGS - will be properly rejected by older MariaDB 10.1.x because they - would read as PAGE_SSIZE>=8 which is not valid. */ - - const ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE_MARIADB101(flags); - if (ssize == 1 || ssize == 2 || ssize == 5 || ssize & 8) { - /* the page_size is not between 4k and 64k; - 16k should be encoded as 0, not 5 */ - return(ULINT_UNDEFINED); - } - const ulint zssize = FSP_FLAGS_GET_ZIP_SSIZE(flags); - if (zssize == 0) { - /* not ROW_FORMAT=COMPRESSED */ - } else if (zssize > (ssize ? ssize : 5)) { - /* invalid KEY_BLOCK_SIZE */ - return(ULINT_UNDEFINED); - } else if (~flags & (FSP_FLAGS_MASK_POST_ANTELOPE - | FSP_FLAGS_MASK_ATOMIC_BLOBS)) { - /* both these flags should be set for - ROW_FORMAT=COMPRESSED */ - return(ULINT_UNDEFINED); - } - - flags = ((flags & 0x3f) | ssize << FSP_FLAGS_POS_PAGE_SSIZE - | FSP_FLAGS_GET_PAGE_COMPRESSION_MARIADB101(flags) - << FSP_FLAGS_POS_PAGE_COMPRESSION); - ut_ad(fsp_flags_is_valid(flags)); - return(flags); -} - -/** Compare tablespace flags. -@param[in] expected expected flags from dict_tf_to_fsp_flags() -@param[in] actual flags read from FSP_SPACE_FLAGS -@return whether the flags match */ -MY_ATTRIBUTE((warn_unused_result)) -UNIV_INLINE -bool -fsp_flags_match(ulint expected, ulint actual) -{ - expected &= ~FSP_FLAGS_MEM_MASK; - ut_ad(fsp_flags_is_valid(expected)); - - if (actual == expected) { - return(true); - } - - actual = fsp_flags_convert_from_101(actual); - return(actual == expected); -} - -/********************************************************************//** -Determine if the tablespace is compressed from dict_table_t::flags. -@return TRUE if compressed, FALSE if not compressed */ -UNIV_INLINE -ibool -fsp_flags_is_compressed( -/*====================*/ - ulint flags); /*!< in: tablespace flags */ - -/********************************************************************//** -Calculates the descriptor index within a descriptor page. -@return descriptor index */ -UNIV_INLINE -ulint -xdes_calc_descriptor_index( -/*=======================*/ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint offset); /*!< in: page offset */ - -/**********************************************************************//** -Gets a descriptor bit of a page. -@return TRUE if free */ -UNIV_INLINE -ibool -xdes_get_bit( -/*=========*/ - const xdes_t* descr, /*!< in: descriptor */ - ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ulint offset);/*!< in: page offset within extent: - 0 ... FSP_EXTENT_SIZE - 1 */ - -/********************************************************************//** -Calculates the page where the descriptor of a page resides. -@return descriptor page offset */ -UNIV_INLINE -ulint -xdes_calc_descriptor_page( -/*======================*/ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint offset); /*!< in: page offset */ - -#endif /* !UNIV_INNOCHECKSUM */ - -/********************************************************************//** -Extract the zip size from tablespace flags. A tablespace has only one -physical page size whether that page is compressed or not. -@return compressed page size of the file-per-table tablespace in bytes, -or zero if the table is not compressed. */ -UNIV_INLINE -ulint -fsp_flags_get_zip_size( -/*====================*/ - ulint flags); /*!< in: tablespace flags */ -/********************************************************************//** -Extract the page size from tablespace flags. -@return page size of the tablespace in bytes */ -UNIV_INLINE -ulint -fsp_flags_get_page_size( -/*====================*/ - ulint flags); /*!< in: tablespace flags */ - -/********************************************************************* -Compute offset after xdes where crypt data can be stored -@param[in] zip_size Compressed size or 0 -@return offset */ -UNIV_INTERN -ulint -fsp_header_get_crypt_offset( - const ulint zip_size) - MY_ATTRIBUTE((warn_unused_result)); - -#define fsp_page_is_free(space,page,mtr) \ - fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__) - -/**********************************************************************//** -Checks if a single page is free. -@return true if free */ -UNIV_INTERN -bool -fsp_page_is_free_func( -/*==============*/ - ulint space, /*!< in: space id */ - ulint page, /*!< in: page offset */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - const char *file, - ulint line); - -#ifndef UNIV_NONINL -#include "fsp0fsp.ic" -#endif - -#endif diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic deleted file mode 100644 index ee4cb1f32c7..00000000000 --- a/storage/xtradb/include/fsp0fsp.ic +++ /dev/null @@ -1,202 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/fsp0fsp.ic -File space management - -Created 12/18/1995 Heikki Tuuri -*******************************************************/ - -#ifndef UNIV_INNOCHECKSUM - -/***********************************************************************//** -Checks if a page address is an extent descriptor page address. -@return TRUE if a descriptor page */ -UNIV_INLINE -ibool -fsp_descr_page( -/*===========*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint page_no)/*!< in: page number */ -{ - ut_ad(ut_is_2pow(zip_size)); - - if (!zip_size) { - return((page_no & (UNIV_PAGE_SIZE - 1)) == FSP_XDES_OFFSET); - } - - return((page_no & (zip_size - 1)) == FSP_XDES_OFFSET); -} - -/********************************************************************//** -Determine if the tablespace is compressed from dict_table_t::flags. -@return TRUE if compressed, FALSE if not compressed */ -UNIV_INLINE -ibool -fsp_flags_is_compressed( -/*====================*/ - ulint flags) /*!< in: tablespace flags */ -{ - return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0); -} - -#endif /* !UNIV_INNOCHECKSUM */ - -/********************************************************************//** -Extract the zip size from tablespace flags. -@return compressed page size of the file-per-table tablespace in bytes, -or zero if the table is not compressed. */ -UNIV_INLINE -ulint -fsp_flags_get_zip_size( -/*===================*/ - ulint flags) /*!< in: tablespace flags */ -{ - ulint zip_size = 0; - ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags); - - /* Convert from a 'log2 minus 9' to a page size in bytes. */ - if (ssize) { - zip_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize); - - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - } - - return(zip_size); -} - -/********************************************************************//** -Extract the page size from tablespace flags. -@return page size of the tablespace in bytes */ -UNIV_INLINE -ulint -fsp_flags_get_page_size( -/*====================*/ - ulint flags) /*!< in: tablespace flags */ -{ - ulint page_size = 0; - ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags); - - /* Convert from a 'log2 minus 9' to a page size in bytes. */ - if (UNIV_UNLIKELY(ssize)) { - page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize); - - ut_ad(page_size <= UNIV_PAGE_SIZE_MAX); - } else { - /* If the page size was not stored, then it is the - original 16k. */ - page_size = UNIV_PAGE_SIZE_ORIG; - } - - return(page_size); -} - -#ifndef UNIV_INNOCHECKSUM -/********************************************************************//** -Calculates the descriptor index within a descriptor page. -@return descriptor index */ -UNIV_INLINE -ulint -xdes_calc_descriptor_index( -/*=======================*/ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint offset) /*!< in: page offset */ -{ - ut_ad(ut_is_2pow(zip_size)); - - if (zip_size == 0) { - return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE) - / FSP_EXTENT_SIZE); - } else { - return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE); - } -} - -/**********************************************************************//** -Gets a descriptor bit of a page. -@return TRUE if free */ -UNIV_INLINE -ibool -xdes_get_bit( -/*=========*/ - const xdes_t* descr, /*!< in: descriptor */ - ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ulint offset) /*!< in: page offset within extent: - 0 ... FSP_EXTENT_SIZE - 1 */ -{ - ut_ad(offset < FSP_EXTENT_SIZE); - ut_ad(bit == XDES_FREE_BIT || bit == XDES_CLEAN_BIT); - - ulint index = bit + XDES_BITS_PER_PAGE * offset; - - ulint bit_index = index % 8; - ulint byte_index = index / 8; - - return(ut_bit_get_nth( - mach_read_ulint(descr + XDES_BITMAP + byte_index, - MLOG_1BYTE), - bit_index)); -} - -/********************************************************************//** -Calculates the page where the descriptor of a page resides. -@return descriptor page offset */ -UNIV_INLINE -ulint -xdes_calc_descriptor_page( -/*======================*/ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint offset) /*!< in: page offset */ -{ -#ifndef DOXYGEN /* Doxygen gets confused by these */ -# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET \ - + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX) \ - * XDES_SIZE_MAX -# error -# endif -# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET \ - + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN) \ - * XDES_SIZE_MIN -# error -# endif -#endif /* !DOXYGEN */ - - ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET - + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE) - * XDES_SIZE); - ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET - + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE) - * XDES_SIZE); - - ut_ad(ut_is_2pow(zip_size)); - - if (zip_size == 0) { - return(ut_2pow_round(offset, UNIV_PAGE_SIZE)); - } else { - ut_ad(zip_size > XDES_ARR_OFFSET - + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE); - return(ut_2pow_round(offset, zip_size)); - } -} - -#endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/xtradb/include/fsp0pagecompress.h b/storage/xtradb/include/fsp0pagecompress.h deleted file mode 100644 index c623d11c326..00000000000 --- a/storage/xtradb/include/fsp0pagecompress.h +++ /dev/null @@ -1,75 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fsp0pagecompress.h -Helper functions for extracting/storing page compression and -atomic writes information to file space. - -Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com -***********************************************************************/ - -#ifndef fsp0pagecompress_h -#define fsp0pagecompress_h - -/* Supported page compression methods */ - -#define PAGE_UNCOMPRESSED 0 -#define PAGE_ZLIB_ALGORITHM 1 -#define PAGE_LZ4_ALGORITHM 2 -#define PAGE_LZO_ALGORITHM 3 -#define PAGE_LZMA_ALGORITHM 4 -#define PAGE_BZIP2_ALGORITHM 5 -#define PAGE_SNAPPY_ALGORITHM 6 -#define PAGE_ALGORITHM_LAST PAGE_SNAPPY_ALGORITHM - -/**********************************************************************//** -Reads the page compression level from the first page of a tablespace. -@return page compression level, or 0 if uncompressed */ -UNIV_INTERN -ulint -fsp_header_get_compression_level( -/*=============================*/ - const page_t* page); /*!< in: first page of a tablespace */ - -/********************************************************************//** -Extract the page compression level from tablespace flags. -A tablespace has only one physical page compression level -whether that page is compressed or not. -@return page compression level of the file-per-table tablespace, -or zero if the table is not compressed. */ -UNIV_INLINE -ulint -fsp_flags_get_page_compression_level( -/*=================================*/ - ulint flags); /*!< in: tablespace flags */ - -/********************************************************************//** -Determine the tablespace is using atomic writes from dict_table_t::flags. -@return true if atomic writes is used, false if not */ -UNIV_INLINE -atomic_writes_t -fsp_flags_get_atomic_writes( -/*========================*/ - ulint flags); /*!< in: tablespace flags */ - -#ifndef UNIV_NONINL -#include "fsp0pagecompress.ic" -#endif - -#endif diff --git a/storage/xtradb/include/fsp0pagecompress.ic b/storage/xtradb/include/fsp0pagecompress.ic deleted file mode 100644 index 14f968e319e..00000000000 --- a/storage/xtradb/include/fsp0pagecompress.ic +++ /dev/null @@ -1,142 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fsp0pagecompress.ic -Implementation for helper functions for extracting/storing page -compression and atomic writes information to file space. - -Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com - -***********************************************************************/ - -/********************************************************************//** -Determine the tablespace is page compression level from dict_table_t::flags. -@return page compression level or 0 if not compressed*/ -UNIV_INLINE -ulint -fsp_flags_get_page_compression_level( -/*=================================*/ - ulint flags) /*!< in: tablespace flags */ -{ - return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)); -} - -/********************************************************************//** -Determine the tablespace is using atomic writes from dict_table_t::flags. -@return true if atomic writes is used, false if not */ -UNIV_INLINE -atomic_writes_t -fsp_flags_get_atomic_writes( -/*========================*/ - ulint flags) /*!< in: tablespace flags */ -{ - return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags)); -} - -/*******************************************************************//** -Find out wheather the page is index page or not -@return true if page type index page, false if not */ -UNIV_INLINE -ibool -fil_page_is_index_page( -/*===================*/ - byte* buf) /*!< in: page */ -{ - return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX); -} - -/*******************************************************************//** -Find out wheather the page is page compressed -@return true if page is page compressed, false if not */ -UNIV_INLINE -ibool -fil_page_is_compressed( -/*===================*/ - byte* buf) /*!< in: page */ -{ - return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED); -} - -/*******************************************************************//** -Find out wheather the page is page compressed -@return true if page is page compressed, false if not */ -UNIV_INLINE -ibool -fil_page_is_compressed_encrypted( -/*=============================*/ - byte* buf) /*!< in: page */ -{ - return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED); -} - -/****************************************************************//** -Get the name of the compression algorithm used for page -compression. -@return compression algorithm name or "UNKNOWN" if not known*/ -UNIV_INLINE -const char* -fil_get_compression_alg_name( -/*=========================*/ - ulint comp_alg) /*!<in: compression algorithm number */ -{ - switch(comp_alg) { - case PAGE_UNCOMPRESSED: - return ("uncompressed"); - break; - case PAGE_ZLIB_ALGORITHM: - return ("ZLIB"); - break; - case PAGE_LZ4_ALGORITHM: - return ("LZ4"); - break; - case PAGE_LZO_ALGORITHM: - return ("LZO"); - break; - case PAGE_LZMA_ALGORITHM: - return ("LZMA"); - break; - case PAGE_BZIP2_ALGORITHM: - return ("BZIP2"); - break; - case PAGE_SNAPPY_ALGORITHM: - return ("SNAPPY"); - break; - /* No default to get compiler warning */ - } - - return ("NULL"); -} - -#ifndef UNIV_INNOCHECKSUM -/*******************************************************************//** -Find out wheather the page is page compressed with lzo method -@return true if page is page compressed with lzo method, false if not */ -UNIV_INLINE -ibool -fil_page_is_lzo_compressed( -/*=======================*/ - byte* buf) /*!< in: page */ -{ - return((mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED && - mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == PAGE_LZO_ALGORITHM) || - (mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED && - mach_read_from_2(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE) == PAGE_LZO_ALGORITHM)); -} - -#endif /* UNIV_INNOCHECKSUM */ diff --git a/storage/xtradb/include/fsp0types.h b/storage/xtradb/include/fsp0types.h deleted file mode 100644 index 7152d65054f..00000000000 --- a/storage/xtradb/include/fsp0types.h +++ /dev/null @@ -1,130 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/****************************************************** -@file include/fsp0types.h -File space management types - -Created May 26, 2009 Vasil Dimov -*******************************************************/ - -#ifndef fsp0types_h -#define fsp0types_h - -#include "univ.i" - -#include "fil0fil.h" /* for FIL_PAGE_DATA */ - -/** @name Flags for inserting records in order -If records are inserted in order, there are the following -flags to tell this (their type is made byte for the compiler -to warn if direction and hint parameters are switched in -fseg_alloc_free_page) */ -/* @{ */ -#define FSP_UP ((byte)111) /*!< alphabetically upwards */ -#define FSP_DOWN ((byte)112) /*!< alphabetically downwards */ -#define FSP_NO_DIR ((byte)113) /*!< no order */ -/* @} */ - -/** File space extent size in pages -page size | file space extent size -----------+----------------------- - 4 KiB | 256 pages = 1 MiB - 8 KiB | 128 pages = 1 MiB - 16 KiB | 64 pages = 1 MiB - 32 KiB | 64 pages = 2 MiB - 64 KiB | 64 pages = 4 MiB -*/ -/** File space extent size (one megabyte if default two or four if not) in pages */ -#define FSP_EXTENT_SIZE ((UNIV_PAGE_SIZE <= (16384) ? \ - (1048576U / UNIV_PAGE_SIZE) : \ - ((UNIV_PAGE_SIZE <= (32768)) ? \ - (2097152U / UNIV_PAGE_SIZE) : \ - (4194304U / UNIV_PAGE_SIZE)))) - -/** File space extent size (four megabytes) in pages for MAX page size */ -#define FSP_EXTENT_SIZE_MAX (4194304U / UNIV_PAGE_SIZE_MAX) - -/** File space extent size (one megabyte) in pages for MIN page size */ -#define FSP_EXTENT_SIZE_MIN (1048576U / UNIV_PAGE_SIZE_MIN) - -/** On a page of any file segment, data may be put starting from this -offset */ -#define FSEG_PAGE_DATA FIL_PAGE_DATA - -/** @name File segment header -The file segment header points to the inode describing the file segment. */ -/* @{ */ -/** Data type for file segment header */ -typedef byte fseg_header_t; - -#define FSEG_HDR_SPACE 0 /*!< space id of the inode */ -#define FSEG_HDR_PAGE_NO 4 /*!< page number of the inode */ -#define FSEG_HDR_OFFSET 8 /*!< byte offset of the inode */ - -#define FSEG_HEADER_SIZE 10 /*!< Length of the file system - header, in bytes */ -/* @} */ - -/** Flags for fsp_reserve_free_extents @{ */ -#define FSP_NORMAL 1000000 -#define FSP_UNDO 2000000 -#define FSP_CLEANING 3000000 -/* @} */ - -/* Number of pages described in a single descriptor page: currently each page -description takes less than 1 byte; a descriptor page is repeated every -this many file pages */ -/* #define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE */ -/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */ - -/** @name The space low address page map -The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated -every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */ -/* @{ */ -/*--------------------------------------*/ -#define FSP_XDES_OFFSET 0 /* !< extent descriptor */ -#define FSP_IBUF_BITMAP_OFFSET 1 /* !< insert buffer bitmap */ - /* The ibuf bitmap pages are the ones whose - page number is the number above plus a - multiple of XDES_DESCRIBED_PER_PAGE */ - -#define FSP_FIRST_INODE_PAGE_NO 2 /*!< in every tablespace */ - /* The following pages exist - in the system tablespace (space 0). */ -#define FSP_IBUF_HEADER_PAGE_NO 3 /*!< insert buffer - header page, in - tablespace 0 */ -#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /*!< insert buffer - B-tree root page in - tablespace 0 */ - /* The ibuf tree root page number in - tablespace 0; its fseg inode is on the page - number FSP_FIRST_INODE_PAGE_NO */ -#define FSP_TRX_SYS_PAGE_NO 5 /*!< transaction - system header, in - tablespace 0 */ -#define FSP_FIRST_RSEG_PAGE_NO 6 /*!< first rollback segment - page, in tablespace 0 */ -#define FSP_DICT_HDR_PAGE_NO 7 /*!< data dictionary header - page, in tablespace 0 */ -/*--------------------------------------*/ -/* @} */ - -#endif /* fsp0types_h */ diff --git a/storage/xtradb/include/fts0ast.h b/storage/xtradb/include/fts0ast.h deleted file mode 100644 index 6229869e8d0..00000000000 --- a/storage/xtradb/include/fts0ast.h +++ /dev/null @@ -1,342 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0ast.h -The FTS query parser (AST) abstract syntax tree routines - -Created 2007/03/16/03 Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_FST0AST_H -#define INNOBASE_FST0AST_H - -#include "mem0mem.h" -#include "ha_prototypes.h" - -#define exit(x) abort() - -/* The type of AST Node */ -enum fts_ast_type_t { - FTS_AST_OPER, /*!< Operator */ - FTS_AST_NUMB, /*!< Number */ - FTS_AST_TERM, /*!< Term (or word) */ - FTS_AST_TEXT, /*!< Text string */ - FTS_AST_LIST, /*!< Expression list */ - FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ -}; - -/* The FTS query operators that we support */ -enum fts_ast_oper_t { - FTS_NONE, /*!< No operator */ - - FTS_IGNORE, /*!< Ignore rows that contain - this word */ - - FTS_EXIST, /*!< Include rows that contain - this word */ - - FTS_NEGATE, /*!< Include rows that contain - this word but rank them - lower*/ - - FTS_INCR_RATING, /*!< Increase the rank for this - word*/ - - FTS_DECR_RATING, /*!< Decrease the rank for this - word*/ - - FTS_DISTANCE, /*!< Proximity distance */ - FTS_IGNORE_SKIP, /*!< Transient node operator - signifies that this is a - FTS_IGNORE node, and ignored in - the first pass of - fts_ast_visit() */ - FTS_EXIST_SKIP /*!< Transient node operator - signifies that this ia a - FTS_EXIST node, and ignored in - the first pass of - fts_ast_visit() */ -}; - -/* Data types used by the FTS parser */ -struct fts_lexer_t; -struct fts_ast_node_t; -struct fts_ast_state_t; -struct fts_ast_string_t; - -typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); - -/******************************************************************** -Parse the string using the lexer setup within state.*/ -int -fts_parse( -/*======*/ - /* out: 0 on OK, 1 on error */ - fts_ast_state_t* state); /*!< in: ast state instance.*/ - -/******************************************************************** -Create an AST operator node */ -extern -fts_ast_node_t* -fts_ast_create_node_oper( -/*=====================*/ - void* arg, /*!< in: ast state */ - fts_ast_oper_t oper); /*!< in: ast operator */ -/******************************************************************** -Create an AST term node, makes a copy of ptr */ -extern -fts_ast_node_t* -fts_ast_create_node_term( -/*=====================*/ - void* arg, /*!< in: ast state */ - const fts_ast_string_t* ptr); /*!< in: term string */ -/******************************************************************** -Create an AST text node */ -extern -fts_ast_node_t* -fts_ast_create_node_text( -/*=====================*/ - void* arg, /*!< in: ast state */ - const fts_ast_string_t* ptr); /*!< in: text string */ -/******************************************************************** -Create an AST expr list node */ -extern -fts_ast_node_t* -fts_ast_create_node_list( -/*=====================*/ - void* arg, /*!< in: ast state */ - fts_ast_node_t* expr); /*!< in: ast expr */ -/******************************************************************** -Create a sub-expression list node. This function takes ownership of -expr and is responsible for deleting it. */ -extern -fts_ast_node_t* -fts_ast_create_node_subexp_list( -/*============================*/ - /* out: new node */ - void* arg, /*!< in: ast state instance */ - fts_ast_node_t* expr); /*!< in: ast expr instance */ -/******************************************************************** -Set the wildcard attribute of a term.*/ -extern -void -fts_ast_term_set_wildcard( -/*======================*/ - fts_ast_node_t* node); /*!< in: term to change */ -/******************************************************************** -Set the proximity attribute of a text node. */ - -void -fts_ast_term_set_distance( -/*======================*/ - fts_ast_node_t* node, /*!< in/out: text node */ - ulint distance); /*!< in: the text proximity - distance */ -/********************************************************************//** -Free a fts_ast_node_t instance. -@return next node to free */ -UNIV_INTERN -fts_ast_node_t* -fts_ast_free_node( -/*==============*/ - fts_ast_node_t* node); /*!< in: node to free */ -/******************************************************************** -Add a sub-expression to an AST*/ -extern -fts_ast_node_t* -fts_ast_add_node( -/*=============*/ - fts_ast_node_t* list, /*!< in: list node instance */ - fts_ast_node_t* node); /*!< in: (sub) expr to add */ -/******************************************************************** -Print the AST node recursively.*/ -extern -void -fts_ast_node_print( -/*===============*/ - fts_ast_node_t* node); /*!< in: ast node to print */ -/******************************************************************** -For tracking node allocations, in case there is an during parsing.*/ -extern -void -fts_ast_state_add_node( -/*===================*/ - fts_ast_state_t*state, /*!< in: ast state instance */ - fts_ast_node_t* node); /*!< in: node to add to state */ -/******************************************************************** -Free node and expr allocations.*/ -extern -void -fts_ast_state_free( -/*===============*/ - fts_ast_state_t*state); /*!< in: state instance - to free */ -/******************************************************************//** -Traverse the AST - in-order traversal. -@return DB_SUCCESS if all went well */ -UNIV_INTERN -dberr_t -fts_ast_visit( -/*==========*/ - fts_ast_oper_t oper, /*!< in: FTS operator */ - fts_ast_node_t* node, /*!< in: instance to traverse*/ - fts_ast_callback visitor, /*!< in: callback */ - void* arg, /*!< in: callback arg */ - bool* has_ignore) /*!< out: whether we encounter - and ignored processing an - operator, currently we only - ignore FTS_IGNORE operator */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*****************************************************************//** -Process (nested) sub-expression, create a new result set to store the -sub-expression result by processing nodes under current sub-expression -list. Merge the sub-expression result with that of parent expression list. -@return DB_SUCCESS if all went well */ -UNIV_INTERN -dberr_t -fts_ast_visit_sub_exp( -/*==================*/ - fts_ast_node_t* node, /*!< in: instance to traverse*/ - fts_ast_callback visitor, /*!< in: callback */ - void* arg) /*!< in: callback arg */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************** -Create a lex instance.*/ -UNIV_INTERN -fts_lexer_t* -fts_lexer_create( -/*=============*/ - ibool boolean_mode, /*!< in: query type */ - const byte* query, /*!< in: query string */ - ulint query_len) /*!< in: query string len */ - MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); -/******************************************************************** -Free an fts_lexer_t instance.*/ -UNIV_INTERN -void -fts_lexer_free( -/*===========*/ - fts_lexer_t* fts_lexer) /*!< in: lexer instance to - free */ - MY_ATTRIBUTE((nonnull)); - -/** -Create an ast string object, with NUL-terminator, so the string -has one more byte than len -@param[in] str pointer to string -@param[in] len length of the string -@return ast string with NUL-terminator */ -UNIV_INTERN -fts_ast_string_t* -fts_ast_string_create( - const byte* str, - ulint len); - -/** -Free an ast string instance -@param[in,out] ast_str string to free */ -UNIV_INTERN -void -fts_ast_string_free( - fts_ast_string_t* ast_str); - -/** -Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul -@param[in] str string to translate -@param[in] base the base -@return translated number */ -UNIV_INTERN -ulint -fts_ast_string_to_ul( - const fts_ast_string_t* ast_str, - int base); - -/** -Print the ast string -@param[in] str string to print */ -UNIV_INTERN -void -fts_ast_string_print( - const fts_ast_string_t* ast_str); - -/* String of length len. -We always store the string of length len with a terminating '\0', -regardless of there is any 0x00 in the string itself */ -struct fts_ast_string_t { - /*!< Pointer to string. */ - byte* str; - - /*!< Length of the string. */ - ulint len; -}; - -/* Query term type */ -struct fts_ast_term_t { - fts_ast_string_t* ptr; /*!< Pointer to term string.*/ - ibool wildcard; /*!< TRUE if wild card set.*/ -}; - -/* Query text type */ -struct fts_ast_text_t { - fts_ast_string_t* ptr; /*!< Pointer to text string.*/ - ulint distance; /*!< > 0 if proximity distance - set */ -}; - -/* The list of nodes in an expr list */ -struct fts_ast_list_t { - fts_ast_node_t* head; /*!< Children list head */ - fts_ast_node_t* tail; /*!< Children list tail */ -}; - -/* FTS AST node to store the term, text, operator and sub-expressions.*/ -struct fts_ast_node_t { - fts_ast_type_t type; /*!< The type of node */ - fts_ast_text_t text; /*!< Text node */ - fts_ast_term_t term; /*!< Term node */ - fts_ast_oper_t oper; /*!< Operator value */ - fts_ast_list_t list; /*!< Expression list */ - fts_ast_node_t* next; /*!< Link for expr list */ - fts_ast_node_t* next_alloc; /*!< For tracking allocations */ - bool visited; /*!< whether this node is - already processed */ -}; - -/* To track state during parsing */ -struct fts_ast_state_t { - mem_heap_t* heap; /*!< Heap to use for alloc */ - fts_ast_node_t* root; /*!< If all goes OK, then this - will point to the root.*/ - - fts_ast_list_t list; /*!< List of nodes allocated */ - - fts_lexer_t* lexer; /*!< Lexer callback + arg */ - CHARSET_INFO* charset; /*!< charset used for - tokenization */ -}; - -#ifdef UNIV_DEBUG -const char* -fts_ast_oper_name_get(fts_ast_oper_t oper); -const char* -fts_ast_node_type_get(fts_ast_type_t type); -#endif /* UNIV_DEBUG */ - -#endif /* INNOBASE_FSTS0AST_H */ diff --git a/storage/xtradb/include/fts0blex.h b/storage/xtradb/include/fts0blex.h deleted file mode 100644 index d0e4cae0678..00000000000 --- a/storage/xtradb/include/fts0blex.h +++ /dev/null @@ -1,349 +0,0 @@ -#ifndef fts0bHEADER_H -#define fts0bHEADER_H 1 -#define fts0bIN_HEADER 1 - -#line 6 "../include/fts0blex.h" - -#line 8 "../include/fts0blex.h" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 35 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - -/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS 1 -#endif - -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! C99 */ - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -/* C99 requires __STDC__ to be defined as 1. */ -#if defined (__STDC__) - -#define YY_USE_CONST - -#endif /* defined (__STDC__) */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* An opaque pointer. */ -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T -typedef void* yyscan_t; -#endif - -/* For convenience, these vars (plus the bison vars far below) - are macros in the reentrant scanner. */ -#define yyin yyg->yyin_r -#define yyout yyg->yyout_r -#define yyextra yyg->yyextra_r -#define yyleng yyg->yyleng_r -#define yytext yyg->yytext_r -#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) -#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) -#define yy_flex_debug yyg->yy_flex_debug_r - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k. - * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. - * Ditto for the __ia64__ case accordingly. - */ -#define YY_BUF_SIZE 32768 -#else -#define YY_BUF_SIZE 16384 -#endif /* __ia64__ */ -#endif - -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE -typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif - -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - -#ifndef YY_STRUCT_YY_BUFFER_STATE -#define YY_STRUCT_YY_BUFFER_STATE -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - int yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - int yy_bs_lineno; /**< The line count. */ - int yy_bs_column; /**< The column count. */ - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; - - }; -#endif /* !YY_STRUCT_YY_BUFFER_STATE */ - -void fts0brestart (FILE *input_file ,yyscan_t yyscanner ); -void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0b_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); -void fts0b_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void fts0b_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -void fts0bpop_buffer_state (yyscan_t yyscanner ); - -YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); - -void *fts0balloc (yy_size_t ,yyscan_t yyscanner ); -void *fts0brealloc (void *,yy_size_t ,yyscan_t yyscanner ); -void fts0bfree (void * ,yyscan_t yyscanner ); - -/* Begin user sect3 */ - -#define fts0bwrap(n) 1 -#define YY_SKIP_YYWRAP - -#define yytext_ptr yytext_r - -#ifdef YY_HEADER_EXPORT_START_CONDITIONS -#define INITIAL 0 - -#endif - -#ifndef YY_NO_UNISTD_H -/* Special case for "unistd.h", since it is non-ANSI. We include it way - * down here because we want the user's section 1 to have been scanned first. - * The user has a chance to override it with an option. - */ -#include <unistd.h> -#endif - -#ifndef YY_EXTRA_TYPE -#define YY_EXTRA_TYPE void * -#endif - -int fts0blex_init (yyscan_t* scanner); - -int fts0blex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); - -/* Accessor methods to globals. - These are made visible to non-reentrant scanners for convenience. */ - -int fts0blex_destroy (yyscan_t yyscanner ); - -int fts0bget_debug (yyscan_t yyscanner ); - -void fts0bset_debug (int debug_flag ,yyscan_t yyscanner ); - -YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner ); - -void fts0bset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); - -FILE *fts0bget_in (yyscan_t yyscanner ); - -void fts0bset_in (FILE * in_str ,yyscan_t yyscanner ); - -FILE *fts0bget_out (yyscan_t yyscanner ); - -void fts0bset_out (FILE * out_str ,yyscan_t yyscanner ); - -int fts0bget_leng (yyscan_t yyscanner ); - -char *fts0bget_text (yyscan_t yyscanner ); - -int fts0bget_lineno (yyscan_t yyscanner ); - -void fts0bset_lineno (int line_number ,yyscan_t yyscanner ); - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int fts0bwrap (yyscan_t yyscanner ); -#else -extern int fts0bwrap (yyscan_t yyscanner ); -#endif -#endif - -#ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); -#endif - -#ifndef YY_NO_INPUT - -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k */ -#define YY_READ_BUF_SIZE 16384 -#else -#define YY_READ_BUF_SIZE 8192 -#endif /* __ia64__ */ -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL_IS_OURS 1 - -extern int fts0blex (yyscan_t yyscanner); - -#define YY_DECL int fts0blex (yyscan_t yyscanner) -#endif /* !YY_DECL */ - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - -#undef YY_NEW_FILE -#undef YY_FLUSH_BUFFER -#undef yy_set_bol -#undef yy_new_buffer -#undef yy_set_interactive -#undef YY_DO_BEFORE_ACTION - -#ifdef YY_DECL_IS_OURS -#undef YY_DECL_IS_OURS -#undef YY_DECL -#endif - -#line 73 "fts0blex.l" - - -#line 348 "../include/fts0blex.h" -#undef fts0bIN_HEADER -#endif /* fts0bHEADER_H */ diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h deleted file mode 100644 index 7aa7055640c..00000000000 --- a/storage/xtradb/include/fts0fts.h +++ /dev/null @@ -1,1064 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. All Rights reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0fts.h -Full text search header file - -Created 2011/09/02 Sunny Bains -***********************************************************************/ - -#ifndef fts0fts_h -#define fts0fts_h - -#include "univ.i" - -#include "data0type.h" -#include "data0types.h" -#include "dict0types.h" -#include "hash0hash.h" -#include "mem0mem.h" -#include "rem0types.h" -#include "row0types.h" -#include "trx0types.h" -#include "ut0vec.h" -#include "ut0rbt.h" -#include "ut0wqueue.h" -#include "que0types.h" -#include "ft_global.h" - -/** "NULL" value of a document id. */ -#define FTS_NULL_DOC_ID 0 - -/** FTS hidden column that is used to map to and from the row */ -#define FTS_DOC_ID_COL_NAME "FTS_DOC_ID" - -/** The name of the index created by FTS */ -#define FTS_DOC_ID_INDEX_NAME "FTS_DOC_ID_INDEX" - -#define FTS_DOC_ID_INDEX_NAME_LEN 16 - -/** Doc ID is a 8 byte value */ -#define FTS_DOC_ID_LEN 8 - -/** The number of fields to sort when we build FT index with -FIC. Three fields are sort: (word, doc_id, position) */ -#define FTS_NUM_FIELDS_SORT 3 - -/** Maximum number of rows in a table, smaller than which, we will -optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */ -#define MAX_DOC_ID_OPT_VAL 1073741824 - -/** Document id type. */ -typedef ib_uint64_t doc_id_t; - -/** doc_id_t printf format */ -#define FTS_DOC_ID_FORMAT IB_ID_FMT - -/** Convert document id to the InnoDB (BIG ENDIAN) storage format. */ -#define fts_write_doc_id(d, s) mach_write_to_8(d, s) - -/** Read a document id to internal format. */ -#define fts_read_doc_id(s) mach_read_from_8(s) - -/** Bind the doc id to a variable */ -#define fts_bind_doc_id(i, n, v) pars_info_bind_int8_literal(i, n, v) - -/** Defines for FTS query mode, they have the same values as -those defined in mysql file ft_global.h */ -#define FTS_NL 0 -#define FTS_BOOL 1 -#define FTS_SORTED 2 -#define FTS_EXPAND 4 -#define FTS_PROXIMITY 8 -#define FTS_PHRASE 16 -#define FTS_OPT_RANKING 32 - -#define FTS_INDEX_TABLE_IND_NAME "FTS_INDEX_TABLE_IND" - -/** Threshold where our optimize thread automatically kicks in */ -#define FTS_OPTIMIZE_THRESHOLD 10000000 - -/** Threshold to avoid exhausting of doc ids. Consecutive doc id difference -should not exceed FTS_DOC_ID_MAX_STEP */ -#define FTS_DOC_ID_MAX_STEP 65535 - -/** Variable specifying the FTS parallel sort degree */ -extern ulong fts_sort_pll_degree; - -/** Variable specifying the number of word to optimize for each optimize table -call */ -extern ulong fts_num_word_optimize; - -/** Variable specifying whether we do additional FTS diagnostic printout -in the log */ -extern char fts_enable_diag_print; - -/** FTS rank type, which will be between 0 .. 1 inclusive */ -typedef float fts_rank_t; - -/** Type of a row during a transaction. FTS_NOTHING means the row can be -forgotten from the FTS system's POV, FTS_INVALID is an internal value used -to mark invalid states. - -NOTE: Do not change the order or value of these, fts_trx_row_get_new_state -depends on them being exactly as they are. */ -enum fts_row_state { - FTS_INSERT = 0, - FTS_MODIFY, - FTS_DELETE, - FTS_NOTHING, - FTS_INVALID -}; - -/** The FTS table types. */ -enum fts_table_type_t { - FTS_INDEX_TABLE, /*!< FTS auxiliary table that is - specific to a particular FTS index - on a table */ - - FTS_COMMON_TABLE /*!< FTS auxiliary table that is common - for all FTS index on a table */ -}; - -struct fts_doc_t; -struct fts_cache_t; -struct fts_token_t; -struct fts_doc_ids_t; -struct fts_index_cache_t; - - -/** Initialize the "fts_table" for internal query into FTS auxiliary -tables */ -#define FTS_INIT_FTS_TABLE(fts_table, m_suffix, m_type, m_table)\ -do { \ - (fts_table)->suffix = m_suffix; \ - (fts_table)->type = m_type; \ - (fts_table)->table_id = m_table->id; \ - (fts_table)->parent = m_table->name; \ - (fts_table)->table = m_table; \ -} while (0); - -#define FTS_INIT_INDEX_TABLE(fts_table, m_suffix, m_type, m_index)\ -do { \ - (fts_table)->suffix = m_suffix; \ - (fts_table)->type = m_type; \ - (fts_table)->table_id = m_index->table->id; \ - (fts_table)->parent = m_index->table->name; \ - (fts_table)->table = m_index->table; \ - (fts_table)->index_id = m_index->id; \ -} while (0); - -/** Information about changes in a single transaction affecting -the FTS system. */ -struct fts_trx_t { - trx_t* trx; /*!< InnoDB transaction */ - - ib_vector_t* savepoints; /*!< Active savepoints, must have at - least one element, the implied - savepoint */ - ib_vector_t* last_stmt; /*!< last_stmt */ - - mem_heap_t* heap; /*!< heap */ -}; - -/** Information required for transaction savepoint handling. */ -struct fts_savepoint_t { - char* name; /*!< First entry is always NULL, the - default instance. Otherwise the name - of the savepoint */ - - ib_rbt_t* tables; /*!< Modified FTS tables */ -}; - -/** Information about changed rows in a transaction for a single table. */ -struct fts_trx_table_t { - dict_table_t* table; /*!< table */ - - fts_trx_t* fts_trx; /*!< link to parent */ - - ib_rbt_t* rows; /*!< rows changed; indexed by doc-id, - cells are fts_trx_row_t* */ - - fts_doc_ids_t* added_doc_ids; /*!< list of added doc ids (NULL until - the first addition) */ - - /*!< for adding doc ids */ - que_t* docs_added_graph; -}; - -/** Information about one changed row in a transaction. */ -struct fts_trx_row_t { - doc_id_t doc_id; /*!< Id of the ins/upd/del document */ - - fts_row_state state; /*!< state of the row */ - - ib_vector_t* fts_indexes; /*!< The indexes that are affected */ -}; - -/** List of document ids that were added during a transaction. This -list is passed on to a background 'Add' thread and OPTIMIZE, so it -needs its own memory heap. */ -struct fts_doc_ids_t { - ib_vector_t* doc_ids; /*!< document ids (each element is - of type doc_id_t). */ - - ib_alloc_t* self_heap; /*!< Allocator used to create an - instance of this type and the - doc_ids vector */ -}; - -// FIXME: Get rid of this if possible. -/** Since MySQL's character set support for Unicode is woefully inadequate -(it supports basic operations like isalpha etc. only for 8-bit characters), -we have to implement our own. We use UTF-16 without surrogate processing -as our in-memory format. This typedef is a single such character. */ -typedef unsigned short ib_uc_t; - -/** An UTF-16 ro UTF-8 string. */ -struct fts_string_t { - byte* f_str; /*!< string, not necessary terminated in - any way */ - ulint f_len; /*!< Length of the string in bytes */ - ulint f_n_char; /*!< Number of characters */ -}; - -/** Query ranked doc ids. */ -struct fts_ranking_t { - doc_id_t doc_id; /*!< Document id */ - - fts_rank_t rank; /*!< Rank is between 0 .. 1 */ - - byte* words; /*!< this contains the words - that were queried - and found in this document */ - ulint words_len; /*!< words len */ -}; - -/** Query result. */ -struct fts_result_t { - ib_rbt_node_t* current; /*!< Current element */ - - ib_rbt_t* rankings_by_id; /*!< RB tree of type fts_ranking_t - indexed by doc id */ - ib_rbt_t* rankings_by_rank;/*!< RB tree of type fts_ranking_t - indexed by rank */ -}; - -/** This is used to generate the FTS auxiliary table name, we need the -table id and the index id to generate the column specific FTS auxiliary -table name. */ -struct fts_table_t { - const char* parent; /*!< Parent table name, this is - required only for the database - name */ - - fts_table_type_t - type; /*!< The auxiliary table type */ - - table_id_t table_id; /*!< The table id */ - - index_id_t index_id; /*!< The index id */ - - const char* suffix; /*!< The suffix of the fts auxiliary - table name, can be NULL, not used - everywhere (yet) */ - const dict_table_t* - table; /*!< Parent table */ - CHARSET_INFO* charset; /*!< charset info if it is for FTS - index auxiliary table */ -}; - -enum fts_status { - BG_THREAD_STOP = 1, /*!< TRUE if the FTS background thread - has finished reading the ADDED table, - meaning more items can be added to - the table. */ - - BG_THREAD_READY = 2, /*!< TRUE if the FTS background thread - is ready */ - - ADD_THREAD_STARTED = 4, /*!< TRUE if the FTS add thread - has started */ - - ADDED_TABLE_SYNCED = 8, /*!< TRUE if the ADDED table record is - sync-ed after crash recovery */ - - TABLE_DICT_LOCKED = 16 /*!< Set if the table has - dict_sys->mutex */ -}; - -typedef enum fts_status fts_status_t; - -/** The state of the FTS sub system. */ -struct fts_t { - /*!< mutex protecting bg_threads* and - fts_add_wq. */ - ib_mutex_t bg_threads_mutex; - - ulint bg_threads; /*!< number of background threads - accessing this table */ - - /*!< TRUE if background threads running - should stop themselves */ - ulint fts_status; /*!< Status bit regarding fts - running state */ - - ib_wqueue_t* add_wq; /*!< Work queue for scheduling jobs - for the FTS 'Add' thread, or NULL - if the thread has not yet been - created. Each work item is a - fts_trx_doc_ids_t*. */ - - fts_cache_t* cache; /*!< FTS memory buffer for this table, - or NULL if the table has no FTS - index. */ - - ulint doc_col; /*!< FTS doc id hidden column number - in the CLUSTERED index. */ - - ib_vector_t* indexes; /*!< Vector of FTS indexes, this is - mainly for caching purposes. */ - mem_heap_t* fts_heap; /*!< heap for fts_t allocation */ -}; - -struct fts_stopword_t; - -/** status bits for fts_stopword_t status field. */ -#define STOPWORD_NOT_INIT 0x1 -#define STOPWORD_OFF 0x2 -#define STOPWORD_FROM_DEFAULT 0x4 -#define STOPWORD_USER_TABLE 0x8 - -extern const char* fts_default_stopword[]; - -/** Variable specifying the maximum FTS cache size for each table */ -extern ulong fts_max_cache_size; - -/** Variable specifying the total memory allocated for FTS cache */ -extern ulong fts_max_total_cache_size; - -/** Variable specifying the FTS result cache limit for each query */ -extern ulong fts_result_cache_limit; - -/** Variable specifying the maximum FTS max token size */ -extern ulong fts_max_token_size; - -/** Variable specifying the minimum FTS max token size */ -extern ulong fts_min_token_size; - -/** Whether the total memory used for FTS cache is exhausted, and we will -need a sync to free some memory */ -extern bool fts_need_sync; - -/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */ -#define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4) - -/** Maximum possible Fulltext word length (in characters) */ -#define FTS_MAX_WORD_LEN_IN_CHAR HA_FT_MAXCHARLEN - -/** Variable specifying the table that has Fulltext index to display its -content through information schema table */ -extern char* fts_internal_tbl_name; -extern char* fts_internal_tbl_name2; - -#define fts_que_graph_free(graph) \ -do { \ - mutex_enter(&dict_sys->mutex); \ - que_graph_free(graph); \ - mutex_exit(&dict_sys->mutex); \ -} while (0) - -/******************************************************************//** -Create a FTS cache. */ -UNIV_INTERN -fts_cache_t* -fts_cache_create( -/*=============*/ - dict_table_t* table); /*!< table owns the FTS cache */ - -/******************************************************************//** -Create a FTS index cache. -@return Index Cache */ -UNIV_INTERN -fts_index_cache_t* -fts_cache_index_cache_create( -/*=========================*/ - dict_table_t* table, /*!< in: table with FTS index */ - dict_index_t* index); /*!< in: FTS index */ - -/******************************************************************//** -Get the next available document id. This function creates a new -transaction to generate the document id. -@return DB_SUCCESS if OK */ -UNIV_INTERN -dberr_t -fts_get_next_doc_id( -/*================*/ - const dict_table_t* table, /*!< in: table */ - doc_id_t* doc_id) /*!< out: new document id */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Update the next and last Doc ID in the CONFIG table to be the input -"doc_id" value (+ 1). We would do so after each FTS index build or -table truncate */ -UNIV_INTERN -void -fts_update_next_doc_id( -/*===================*/ - trx_t* trx, /*!< in/out: transaction */ - const dict_table_t* table, /*!< in: table */ - const char* table_name, /*!< in: table name, or NULL */ - doc_id_t doc_id) /*!< in: DOC ID to set */ - MY_ATTRIBUTE((nonnull(2))); - -/******************************************************************//** -Create a new document id . -@return DB_SUCCESS if all went well else error */ -UNIV_INTERN -dberr_t -fts_create_doc_id( -/*==============*/ - dict_table_t* table, /*!< in: row is of this - table. */ - dtuple_t* row, /*!< in/out: add doc id - value to this row. This is the - current row that is being - inserted. */ - mem_heap_t* heap) /*!< in: heap */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Create a new fts_doc_ids_t. -@return new fts_doc_ids_t. */ -UNIV_INTERN -fts_doc_ids_t* -fts_doc_ids_create(void); -/*=====================*/ - -/******************************************************************//** -Free a fts_doc_ids_t. */ -UNIV_INTERN -void -fts_doc_ids_free( -/*=============*/ - fts_doc_ids_t* doc_ids); /*!< in: doc_ids to free */ - -/******************************************************************//** -Notify the FTS system about an operation on an FTS-indexed table. */ -UNIV_INTERN -void -fts_trx_add_op( -/*===========*/ - trx_t* trx, /*!< in: InnoDB transaction */ - dict_table_t* table, /*!< in: table */ - doc_id_t doc_id, /*!< in: doc id */ - fts_row_state state, /*!< in: state of the row */ - ib_vector_t* fts_indexes) /*!< in: FTS indexes affected - (NULL=all) */ - MY_ATTRIBUTE((nonnull(1,2))); - -/******************************************************************//** -Free an FTS trx. */ -UNIV_INTERN -void -fts_trx_free( -/*=========*/ - fts_trx_t* fts_trx); /*!< in, own: FTS trx */ - -/******************************************************************//** -Creates the common ancillary tables needed for supporting an FTS index -on the given table. row_mysql_lock_data_dictionary must have been -called before this. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_create_common_tables( -/*=====================*/ - trx_t* trx, /*!< in: transaction handle */ - const dict_table_t* - table, /*!< in: table with one FTS - index */ - const char* name, /*!< in: table name */ - bool skip_doc_id_index) /*!< in: Skip index on doc id */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Wrapper function of fts_create_index_tables_low(), create auxiliary -tables for an FTS index -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_create_index_tables( -/*====================*/ - trx_t* trx, /*!< in: transaction handle */ - const dict_index_t* index) /*!< in: the FTS index - instance */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Creates the column specific ancillary tables needed for supporting an -FTS index on the given table. row_mysql_lock_data_dictionary must have -been called before this. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_create_index_tables_low( -/*========================*/ - trx_t* trx, /*!< in: transaction handle */ - const dict_index_t* - index, /*!< in: the FTS index - instance */ - const char* table_name, /*!< in: the table name */ - table_id_t table_id) /*!< in: the table id */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Add the FTS document id hidden column. */ -UNIV_INTERN -void -fts_add_doc_id_column( -/*==================*/ - dict_table_t* table, /*!< in/out: Table with FTS index */ - mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */ - MY_ATTRIBUTE((nonnull(1))); - -/*********************************************************************//** -Drops the ancillary tables needed for supporting an FTS index on the -given table. row_mysql_lock_data_dictionary must have been called before -this. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_drop_tables( -/*============*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table) /*!< in: table has the FTS - index */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -The given transaction is about to be committed; do whatever is necessary -from the FTS system's POV. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_commit( -/*=======*/ - trx_t* trx) /*!< in: transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*******************************************************************//** -FTS Query entry point. -@return DB_SUCCESS if successful otherwise error code */ -UNIV_INTERN -dberr_t -fts_query( -/*======*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index to search */ - uint flags, /*!< in: FTS search mode */ - const byte* query, /*!< in: FTS query */ - ulint query_len, /*!< in: FTS query string len - in bytes */ - fts_result_t** result) /*!< out: query result, to be - freed by the caller.*/ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/******************************************************************//** -Retrieve the FTS Relevance Ranking result for doc with doc_id -@return the relevance ranking value. */ -UNIV_INTERN -float -fts_retrieve_ranking( -/*=================*/ - fts_result_t* result, /*!< in: FTS result structure */ - doc_id_t doc_id); /*!< in: the interested document - doc_id */ - -/******************************************************************//** -FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */ -UNIV_INTERN -void -fts_query_sort_result_on_rank( -/*==========================*/ - fts_result_t* result); /*!< out: result instance - to sort.*/ - -/******************************************************************//** -FTS Query free result, returned by fts_query(). */ -UNIV_INTERN -void -fts_query_free_result( -/*==================*/ - fts_result_t* result); /*!< in: result instance - to free.*/ - -/******************************************************************//** -Extract the doc id from the FTS hidden column. */ -UNIV_INTERN -doc_id_t -fts_get_doc_id_from_row( -/*====================*/ - dict_table_t* table, /*!< in: table */ - dtuple_t* row); /*!< in: row whose FTS doc id we - want to extract.*/ - -/******************************************************************//** -Extract the doc id from the FTS hidden column. */ -UNIV_INTERN -doc_id_t -fts_get_doc_id_from_rec( -/*====================*/ - dict_table_t* table, /*!< in: table */ - const rec_t* rec, /*!< in: rec */ - mem_heap_t* heap); /*!< in: heap */ - -/******************************************************************//** -Update the query graph with a new document id. -@return Doc ID used */ -UNIV_INTERN -doc_id_t -fts_update_doc_id( -/*==============*/ - dict_table_t* table, /*!< in: table */ - upd_field_t* ufield, /*!< out: update node */ - doc_id_t* next_doc_id); /*!< out: buffer for writing */ - -/******************************************************************//** -FTS initialize. */ -UNIV_INTERN -void -fts_startup(void); -/*==============*/ - -/******************************************************************//** -Signal FTS threads to initiate shutdown. */ -UNIV_INTERN -void -fts_start_shutdown( -/*===============*/ - dict_table_t* table, /*!< in: table with FTS - indexes */ - fts_t* fts); /*!< in: fts instance to - shutdown */ - -/******************************************************************//** -Wait for FTS threads to shutdown. */ -UNIV_INTERN -void -fts_shutdown( -/*=========*/ - dict_table_t* table, /*!< in: table with FTS - indexes */ - fts_t* fts); /*!< in: fts instance to - shutdown */ - -/******************************************************************//** -Create an instance of fts_t. -@return instance of fts_t */ -UNIV_INTERN -fts_t* -fts_create( -/*=======*/ - dict_table_t* table); /*!< out: table with FTS - indexes */ - -/**********************************************************************//** -Free the FTS resources. */ -UNIV_INTERN -void -fts_free( -/*=====*/ - dict_table_t* table); /*!< in/out: table with - FTS indexes */ - -/*********************************************************************//** -Run OPTIMIZE on the given table. -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -fts_optimize_table( -/*===============*/ - dict_table_t* table) /*!< in: table to optimiza */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Startup the optimize thread and create the work queue. */ -UNIV_INTERN -void -fts_optimize_init(void); -/*====================*/ - -/**********************************************************************//** -Check whether the work queue is initialized. -@return TRUE if optimze queue is initialized. */ -UNIV_INTERN -ibool -fts_optimize_is_init(void); -/*======================*/ - -/****************************************************************//** -Drops index ancillary tables for a FTS index -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_drop_index_tables( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index) /*!< in: Index to drop */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/******************************************************************//** -Remove the table from the OPTIMIZER's list. We do wait for -acknowledgement from the consumer of the message. */ -UNIV_INTERN -void -fts_optimize_remove_table( -/*======================*/ - dict_table_t* table); /*!< in: table to remove */ - -/** Send sync fts cache for the table. -@param[in] table table to sync */ -UNIV_INTERN -void -fts_optimize_request_sync_table( - dict_table_t* table); - -/**********************************************************************//** -Signal the optimize thread to prepare for shutdown. */ -UNIV_INTERN -void -fts_optimize_start_shutdown(void); -/*==============================*/ - -/**********************************************************************//** -Inform optimize to clean up. */ -UNIV_INTERN -void -fts_optimize_end(void); -/*===================*/ - -/**********************************************************************//** -Take a FTS savepoint. */ -UNIV_INTERN -void -fts_savepoint_take( -/*===============*/ - trx_t* trx, /*!< in: transaction */ - fts_trx_t* fts_trx, /*!< in: fts transaction */ - const char* name) /*!< in: savepoint name */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Refresh last statement savepoint. */ -UNIV_INTERN -void -fts_savepoint_laststmt_refresh( -/*===========================*/ - trx_t* trx) /*!< in: transaction */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Release the savepoint data identified by name. */ -UNIV_INTERN -void -fts_savepoint_release( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - const char* name); /*!< in: savepoint name */ - -/**********************************************************************//** -Free the FTS cache. */ -UNIV_INTERN -void -fts_cache_destroy( -/*==============*/ - fts_cache_t* cache); /*!< in: cache*/ - -/** Clear cache. -@param[in,out] cache fts cache */ -UNIV_INTERN -void -fts_cache_clear( - fts_cache_t* cache); - -/*********************************************************************//** -Initialize things in cache. */ -UNIV_INTERN -void -fts_cache_init( -/*===========*/ - fts_cache_t* cache); /*!< in: cache */ - -/*********************************************************************//** -Rollback to and including savepoint indentified by name. */ -UNIV_INTERN -void -fts_savepoint_rollback( -/*===================*/ - trx_t* trx, /*!< in: transaction */ - const char* name); /*!< in: savepoint name */ - -/*********************************************************************//** -Rollback to and including savepoint indentified by name. */ -UNIV_INTERN -void -fts_savepoint_rollback_last_stmt( -/*=============================*/ - trx_t* trx); /*!< in: transaction */ - -/***********************************************************************//** -Drop all orphaned FTS auxiliary tables, those that don't have a parent -table or FTS index defined on them. */ -UNIV_INTERN -void -fts_drop_orphaned_tables(void); -/*==========================*/ - -/* Get parent table name if it's a fts aux table -@param[in] aux_table_name aux table name -@param[in] aux_table_len aux table length -@return parent table name, or NULL */ -char* -fts_get_parent_table_name( - const char* aux_table_name, - ulint aux_table_len); - -/******************************************************************//** -Since we do a horizontal split on the index table, we need to drop -all the split tables. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_drop_index_split_tables( -/*========================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index) /*!< in: fts instance */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/** Run SYNC on the table, i.e., write out data from the cache to the -FTS auxiliary INDEX table and clear the cache at the end. -@param[in,out] table fts table -@param[in] unlock_cache whether unlock cache when write node -@param[in] wait whether wait for existing sync to finish -@param[in] has_dict whether has dict operation lock -@return DB_SUCCESS on success, error code on failure. */ -UNIV_INTERN -dberr_t -fts_sync_table( - dict_table_t* table, - bool unlock_cache, - bool wait, - bool has_dict); - -/****************************************************************//** -Free the query graph but check whether dict_sys->mutex is already -held */ -UNIV_INTERN -void -fts_que_graph_free_check_lock( -/*==========================*/ - fts_table_t* fts_table, /*!< in: FTS table */ - const fts_index_cache_t*index_cache, /*!< in: FTS index cache */ - que_t* graph); /*!< in: query graph */ - -/****************************************************************//** -Create an FTS index cache. */ -UNIV_INTERN -CHARSET_INFO* -fts_index_get_charset( -/*==================*/ - dict_index_t* index); /*!< in: FTS index */ - -/*********************************************************************//** -Get the initial Doc ID by consulting the CONFIG table -@return initial Doc ID */ -UNIV_INTERN -doc_id_t -fts_init_doc_id( -/*============*/ - const dict_table_t* table); /*!< in: table */ - -/******************************************************************//** -compare two character string according to their charset. */ -extern -int -innobase_fts_text_cmp( -/*==================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2); /*!< in: node */ - -/******************************************************************//** -Makes all characters in a string lower case. */ -extern -size_t -innobase_fts_casedn_str( -/*====================*/ - CHARSET_INFO* cs, /*!< in: Character set */ - char* src, /*!< in: string to put in - lower case */ - size_t src_len, /*!< in: input string length */ - char* dst, /*!< in: buffer for result - string */ - size_t dst_len); /*!< in: buffer size */ - - -/******************************************************************//** -compare two character string according to their charset. */ -extern -int -innobase_fts_text_cmp_prefix( -/*=========================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2); /*!< in: node */ - -/*************************************************************//** -Get the next token from the given string and store it in *token. */ -extern -ulint -innobase_mysql_fts_get_token( -/*=========================*/ - CHARSET_INFO* charset, /*!< in: Character set */ - const byte* start, /*!< in: start of text */ - const byte* end, /*!< in: one character past - end of text */ - fts_string_t* token, /*!< out: token's text */ - ulint* offset); /*!< out: offset to token, - measured as characters from - 'start' */ - -/*********************************************************************//** -Fetch COUNT(*) from specified table. -@return the number of rows in the table */ -UNIV_INTERN -ulint -fts_get_rows_count( -/*===============*/ - fts_table_t* fts_table); /*!< in: fts table to read */ - -/*************************************************************//** -Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists -@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */ -UNIV_INTERN -doc_id_t -fts_get_max_doc_id( -/*===============*/ - dict_table_t* table); /*!< in: user table */ - -/******************************************************************//** -Check whether user supplied stopword table exists and is of -the right format. -@return the stopword column charset if qualifies */ -UNIV_INTERN -CHARSET_INFO* -fts_valid_stopword_table( -/*=====================*/ - const char* stopword_table_name); /*!< in: Stopword table - name */ -/****************************************************************//** -This function loads specified stopword into FTS cache -@return TRUE if success */ -UNIV_INTERN -ibool -fts_load_stopword( -/*==============*/ - const dict_table_t* - table, /*!< in: Table with FTS */ - trx_t* trx, /*!< in: Transaction */ - const char* global_stopword_table, /*!< in: Global stopword table - name */ - const char* session_stopword_table, /*!< in: Session stopword table - name */ - ibool stopword_is_on, /*!< in: Whether stopword - option is turned on/off */ - ibool reload); /*!< in: Whether it is during - reload of FTS table */ - -/****************************************************************//** -Create the vector of fts_get_doc_t instances. -@return vector of fts_get_doc_t instances */ -UNIV_INTERN -ib_vector_t* -fts_get_docs_create( -/*================*/ - fts_cache_t* cache); /*!< in: fts cache */ - -/****************************************************************//** -Read the rows from the FTS index -@return DB_SUCCESS if OK */ -UNIV_INTERN -dberr_t -fts_table_fetch_doc_ids( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table, /*!< in: aux table */ - fts_doc_ids_t* doc_ids); /*!< in: For collecting - doc ids */ -/****************************************************************//** -This function brings FTS index in sync when FTS index is first -used. There are documents that have not yet sync-ed to auxiliary -tables from last server abnormally shutdown, we will need to bring -such document into FTS cache before any further operations -@return TRUE if all OK */ -UNIV_INTERN -ibool -fts_init_index( -/*===========*/ - dict_table_t* table, /*!< in: Table with FTS */ - ibool has_cache_lock); /*!< in: Whether we already - have cache lock */ -/*******************************************************************//** -Add a newly create index in FTS cache */ -UNIV_INTERN -void -fts_add_index( -/*==========*/ - dict_index_t* index, /*!< FTS index to be added */ - dict_table_t* table); /*!< table */ - -/*******************************************************************//** -Drop auxiliary tables related to an FTS index -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -fts_drop_index( -/*===========*/ - dict_table_t* table, /*!< in: Table where indexes are dropped */ - dict_index_t* index, /*!< in: Index to be dropped */ - trx_t* trx) /*!< in: Transaction for the drop */ - MY_ATTRIBUTE((nonnull)); - -/****************************************************************//** -Rename auxiliary tables for all fts index for a table -@return DB_SUCCESS or error code */ - -dberr_t -fts_rename_aux_tables( -/*==================*/ - dict_table_t* table, /*!< in: user Table */ - const char* new_name, /*!< in: new table name */ - trx_t* trx); /*!< in: transaction */ - -/*******************************************************************//** -Check indexes in the fts->indexes is also present in index cache and -table->indexes list -@return TRUE if all indexes match */ -UNIV_INTERN -ibool -fts_check_cached_index( -/*===================*/ - dict_table_t* table); /*!< in: Table where indexes are dropped */ -#endif /*!< fts0fts.h */ - diff --git a/storage/xtradb/include/fts0opt.h b/storage/xtradb/include/fts0opt.h deleted file mode 100644 index 92eaf8270d2..00000000000 --- a/storage/xtradb/include/fts0opt.h +++ /dev/null @@ -1,37 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2001, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0opt.h -Full Text Search optimize thread - -Created 2011-02-15 Jimmy Yang -***********************************************************************/ -#ifndef INNODB_FTS0OPT_H -#define INNODB_FTS0OPT_H - -/******************************************************************** -Callback function to fetch the rows in an FTS INDEX record. */ -UNIV_INTERN -ibool -fts_optimize_index_fetch_node( -/*==========================*/ - /* out: always returns non-NULL */ - void* row, /* in: sel_node_t* */ - void* user_arg); /* in: pointer to ib_vector_t */ -#endif diff --git a/storage/xtradb/include/fts0pars.h b/storage/xtradb/include/fts0pars.h deleted file mode 100644 index 8108e811599..00000000000 --- a/storage/xtradb/include/fts0pars.h +++ /dev/null @@ -1,72 +0,0 @@ -/* A Bison parser, made by GNU Bison 2.5. */ - -/* Bison interface for Yacc-like parsers in C - - Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - FTS_OPER = 258, - FTS_TEXT = 259, - FTS_TERM = 260, - FTS_NUMB = 261 - }; -#endif - - - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef union YYSTYPE -{ - -/* Line 2068 of yacc.c */ -#line 61 "fts0pars.y" - - int oper; - fts_ast_string_t* token; - fts_ast_node_t* node; - - - -/* Line 2068 of yacc.c */ -#line 64 "fts0pars.hh" -} YYSTYPE; -# define YYSTYPE_IS_TRIVIAL 1 -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -#endif - - - - diff --git a/storage/xtradb/include/fts0priv.h b/storage/xtradb/include/fts0priv.h deleted file mode 100644 index 2d4e9d88fd1..00000000000 --- a/storage/xtradb/include/fts0priv.h +++ /dev/null @@ -1,653 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0priv.h -Full text search internal header file - -Created 2011/09/02 Sunny Bains -***********************************************************************/ - -#ifndef INNOBASE_FTS0PRIV_H -#define INNOBASE_FTS0PRIV_H - -#include "dict0dict.h" -#include "pars0pars.h" -#include "que0que.h" -#include "que0types.h" -#include "fts0types.h" - -/* The various states of the FTS sub system pertaining to a table with -FTS indexes defined on it. */ -enum fts_table_state_enum { - /* !<This must be 0 since we insert - a hard coded '0' at create time - to the config table */ - - FTS_TABLE_STATE_RUNNING = 0, /*!< Auxiliary tables created OK */ - - FTS_TABLE_STATE_OPTIMIZING, /*!< This is a substate of RUNNING */ - - FTS_TABLE_STATE_DELETED /*!< All aux tables to be dropped when - it's safe to do so */ -}; - -typedef enum fts_table_state_enum fts_table_state_t; - -/** The default time to wait for the background thread (in microsecnds). */ -#define FTS_MAX_BACKGROUND_THREAD_WAIT 10000 - -/** Maximum number of iterations to wait before we complain */ -#define FTS_BACKGROUND_THREAD_WAIT_COUNT 1000 - -/** The maximum length of the config table's value column in bytes */ -#define FTS_MAX_CONFIG_NAME_LEN 64 - -/** The maximum length of the config table's value column in bytes */ -#define FTS_MAX_CONFIG_VALUE_LEN 1024 - -/** Approx. upper limit of ilist length in bytes. */ -#define FTS_ILIST_MAX_SIZE (64 * 1024) - -/** FTS config table name parameters */ - -/** The number of seconds after which an OPTIMIZE run will stop */ -#define FTS_OPTIMIZE_LIMIT_IN_SECS "optimize_checkpoint_limit" - -/** The next doc id */ -#define FTS_SYNCED_DOC_ID "synced_doc_id" - -/** The last word that was OPTIMIZED */ -#define FTS_LAST_OPTIMIZED_WORD "last_optimized_word" - -/** Total number of documents that have been deleted. The next_doc_id -minus this count gives us the total number of documents. */ -#define FTS_TOTAL_DELETED_COUNT "deleted_doc_count" - -/** Total number of words parsed from all documents */ -#define FTS_TOTAL_WORD_COUNT "total_word_count" - -/** Start of optimize of an FTS index */ -#define FTS_OPTIMIZE_START_TIME "optimize_start_time" - -/** End of optimize for an FTS index */ -#define FTS_OPTIMIZE_END_TIME "optimize_end_time" - -/** User specified stopword table name */ -#define FTS_STOPWORD_TABLE_NAME "stopword_table_name" - -/** Whether to use (turn on/off) stopword */ -#define FTS_USE_STOPWORD "use_stopword" - -/** State of the FTS system for this table. It can be one of - RUNNING, OPTIMIZING, DELETED. */ -#define FTS_TABLE_STATE "table_state" - -/** The minimum length of an FTS auxiliary table names's id component -e.g., For an auxiliary table name - - FTS_<TABLE_ID>_SUFFIX - -This constant is for the minimum length required to store the <TABLE_ID> -component. -*/ -#define FTS_AUX_MIN_TABLE_ID_LENGTH 48 - -/** Maximum length of an integer stored in the config table value column. */ -#define FTS_MAX_INT_LEN 32 - -/******************************************************************//** -Parse an SQL string. %s is replaced with the table's id. -@return query graph */ -UNIV_INTERN -que_t* -fts_parse_sql( -/*==========*/ - fts_table_t* fts_table, /*!< in: FTS aux table */ - pars_info_t* info, /*!< in: info struct, or NULL */ - const char* sql) /*!< in: SQL string to evaluate */ - MY_ATTRIBUTE((nonnull(3), malloc, warn_unused_result)); -/******************************************************************//** -Evaluate a parsed SQL statement -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_eval_sql( -/*=========*/ - trx_t* trx, /*!< in: transaction */ - que_t* graph) /*!< in: Parsed statement */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Construct the name of an ancillary FTS table for the given table. -@return own: table name, must be freed with mem_free() */ -UNIV_INTERN -char* -fts_get_table_name( -/*===============*/ - const fts_table_t* - fts_table) /*!< in: FTS aux table info */ - MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); -/******************************************************************//** -Construct the column specification part of the SQL string for selecting the -indexed FTS columns for the given table. Adds the necessary bound -ids to the given 'info' and returns the SQL string. Examples: - -One indexed column named "text": - - "$sel0", - info/ids: sel0 -> "text" - -Two indexed columns named "subject" and "content": - - "$sel0, $sel1", - info/ids: sel0 -> "subject", sel1 -> "content", -@return heap-allocated WHERE string */ -UNIV_INTERN -const char* -fts_get_select_columns_str( -/*=======================*/ - dict_index_t* index, /*!< in: FTS index */ - pars_info_t* info, /*!< in/out: parser info */ - mem_heap_t* heap) /*!< in: memory heap */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/** define for fts_doc_fetch_by_doc_id() "option" value, defines whether -we want to get Doc whose ID is equal to or greater or smaller than supplied -ID */ -#define FTS_FETCH_DOC_BY_ID_EQUAL 1 -#define FTS_FETCH_DOC_BY_ID_LARGE 2 -#define FTS_FETCH_DOC_BY_ID_SMALL 3 - -/*************************************************************//** -Fetch document (= a single row's indexed text) with the given -document id. -@return: DB_SUCCESS if fetch is successful, else error */ -UNIV_INTERN -dberr_t -fts_doc_fetch_by_doc_id( -/*====================*/ - fts_get_doc_t* get_doc, /*!< in: state */ - doc_id_t doc_id, /*!< in: id of document to fetch */ - dict_index_t* index_to_use, /*!< in: caller supplied FTS index, - or NULL */ - ulint option, /*!< in: search option, if it is - greater than doc_id or equal */ - fts_sql_callback - callback, /*!< in: callback to read - records */ - void* arg) /*!< in: callback arg */ - MY_ATTRIBUTE((nonnull(6))); - -/*******************************************************************//** -Callback function for fetch that stores the text of an FTS document, -converting each column to UTF-16. -@return always FALSE */ -UNIV_INTERN -ibool -fts_query_expansion_fetch_doc( -/*==========================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: fts_doc_t* */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************** -Write out a single word's data as new entry/entries in the INDEX table. -@return DB_SUCCESS if all OK. */ -UNIV_INTERN -dberr_t -fts_write_node( -/*===========*/ - trx_t* trx, /*!< in: transaction */ - que_t** graph, /*!< in: query graph */ - fts_table_t* fts_table, /*!< in: the FTS aux index */ - fts_string_t* word, /*!< in: word in UTF-8 */ - fts_node_t* node) /*!< in: node columns */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Tokenize a document. */ -UNIV_INTERN -void -fts_tokenize_document( -/*==================*/ - fts_doc_t* doc, /*!< in/out: document to - tokenize */ - fts_doc_t* result) /*!< out: if provided, save - result tokens here */ - MY_ATTRIBUTE((nonnull(1))); - -/*******************************************************************//** -Continue to tokenize a document. */ -UNIV_INTERN -void -fts_tokenize_document_next( -/*=======================*/ - fts_doc_t* doc, /*!< in/out: document to - tokenize */ - ulint add_pos, /*!< in: add this position to all - tokens from this tokenization */ - fts_doc_t* result) /*!< out: if provided, save - result tokens here */ - MY_ATTRIBUTE((nonnull(1))); -/******************************************************************//** -Initialize a document. */ -UNIV_INTERN -void -fts_doc_init( -/*=========*/ - fts_doc_t* doc) /*!< in: doc to initialize */ - MY_ATTRIBUTE((nonnull)); - -/******************************************************************//** -Do a binary search for a doc id in the array -@return +ve index if found -ve index where it should be - inserted if not found */ -UNIV_INTERN -int -fts_bsearch( -/*========*/ - fts_update_t* array, /*!< in: array to sort */ - int lower, /*!< in: lower bound of array*/ - int upper, /*!< in: upper bound of array*/ - doc_id_t doc_id) /*!< in: doc id to lookup */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Free document. */ -UNIV_INTERN -void -fts_doc_free( -/*=========*/ - fts_doc_t* doc) /*!< in: document */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Free fts_optimizer_word_t instanace.*/ -UNIV_INTERN -void -fts_word_free( -/*==========*/ - fts_word_t* word) /*!< in: instance to free.*/ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Read the rows from the FTS inde -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_index_fetch_nodes( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - que_t** graph, /*!< in: prepared statement */ - fts_table_t* fts_table, /*!< in: FTS aux table */ - const fts_string_t* - word, /*!< in: the word to fetch */ - fts_fetch_t* fetch) /*!< in: fetch callback.*/ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Create a fts_optimizer_word_t instance. -@return new instance */ -UNIV_INTERN -fts_word_t* -fts_word_init( -/*==========*/ - fts_word_t* word, /*!< in: word to initialize */ - byte* utf8, /*!< in: UTF-8 string */ - ulint len) /*!< in: length of string in bytes */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Compare two fts_trx_table_t instances, we actually compare the -table id's here. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_trx_table_cmp( -/*==============*/ - const void* v1, /*!< in: id1 */ - const void* v2) /*!< in: id2 */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Compare a table id with a trx_table_t table id. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_trx_table_id_cmp( -/*=================*/ - const void* p1, /*!< in: id1 */ - const void* p2) /*!< in: id2 */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Commit a transaction. -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -fts_sql_commit( -/*===========*/ - trx_t* trx) /*!< in: transaction */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Rollback a transaction. -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -fts_sql_rollback( -/*=============*/ - trx_t* trx) /*!< in: transaction */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Parse an SQL string. %s is replaced with the table's id. Don't acquire -the dict mutex -@return query graph */ -UNIV_INTERN -que_t* -fts_parse_sql_no_dict_lock( -/*=======================*/ - fts_table_t* fts_table, /*!< in: table with FTS index */ - pars_info_t* info, /*!< in: parser info */ - const char* sql) /*!< in: SQL string to evaluate */ - MY_ATTRIBUTE((nonnull(3), malloc, warn_unused_result)); -/******************************************************************//** -Get value from config table. The caller must ensure that enough -space is allocated for value to hold the column contents -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_get_value( -/*=================*/ - trx_t* trx, /* transaction */ - fts_table_t* fts_table, /*!< in: the indexed FTS table */ - const char* name, /*!< in: get config value for - this parameter name */ - fts_string_t* value) /*!< out: value read from - config table */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Get value specific to an FTS index from the config table. The caller -must ensure that enough space is allocated for value to hold the -column contents. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_get_index_value( -/*=======================*/ - trx_t* trx, /*!< transaction */ - dict_index_t* index, /*!< in: index */ - const char* param, /*!< in: get config value for - this parameter name */ - fts_string_t* value) /*!< out: value read from - config table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Set the value in the config table for name. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_set_value( -/*=================*/ - trx_t* trx, /*!< transaction */ - fts_table_t* fts_table, /*!< in: the indexed FTS table */ - const char* name, /*!< in: get config value for - this parameter name */ - const fts_string_t* - value) /*!< in: value to update */ - MY_ATTRIBUTE((nonnull)); -/****************************************************************//** -Set an ulint value in the config table. -@return DB_SUCCESS if all OK else error code */ -UNIV_INTERN -dberr_t -fts_config_set_ulint( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table, /*!< in: the indexed FTS table */ - const char* name, /*!< in: param name */ - ulint int_value) /*!< in: value */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Set the value specific to an FTS index in the config table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_set_index_value( -/*=======================*/ - trx_t* trx, /*!< transaction */ - dict_index_t* index, /*!< in: index */ - const char* param, /*!< in: get config value for - this parameter name */ - fts_string_t* value) /*!< out: value read from - config table */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Increment the value in the config table for column name. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_increment_value( -/*=======================*/ - trx_t* trx, /*!< transaction */ - fts_table_t* fts_table, /*!< in: the indexed FTS table */ - const char* name, /*!< in: increment config value - for this parameter name */ - ulint delta) /*!< in: increment by this much */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Increment the per index value in the config table for column name. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_increment_index_value( -/*=============================*/ - trx_t* trx, /*!< transaction */ - dict_index_t* index, /*!< in: FTS index */ - const char* name, /*!< in: increment config value - for this parameter name */ - ulint delta) /*!< in: increment by this much */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Get an ulint value from the config table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_get_index_ulint( -/*=======================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index */ - const char* name, /*!< in: param name */ - ulint* int_value) /*!< out: value */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Set an ulint value int the config table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_set_index_ulint( -/*=======================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: FTS index */ - const char* name, /*!< in: param name */ - ulint int_value) /*!< in: value */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Get an ulint value from the config table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_config_get_ulint( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - fts_table_t* fts_table, /*!< in: the indexed FTS table */ - const char* name, /*!< in: param name */ - ulint* int_value) /*!< out: value */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Search cache for word. -@return the word node vector if found else NULL */ -UNIV_INTERN -const ib_vector_t* -fts_cache_find_word( -/*================*/ - const fts_index_cache_t* - index_cache, /*!< in: cache to search */ - const fts_string_t* - text) /*!< in: word to search for */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Check cache for deleted doc id. -@return TRUE if deleted */ -UNIV_INTERN -ibool -fts_cache_is_deleted_doc_id( -/*========================*/ - const fts_cache_t* - cache, /*!< in: cache ito search */ - doc_id_t doc_id) /*!< in: doc id to search for */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Append deleted doc ids to vector and sort the vector. */ -UNIV_INTERN -void -fts_cache_append_deleted_doc_ids( -/*=============================*/ - const fts_cache_t* - cache, /*!< in: cache to use */ - ib_vector_t* vector); /*!< in: append to this vector */ -/******************************************************************//** -Wait for the background thread to start. We poll to detect change -of state, which is acceptable, since the wait should happen only -once during startup. -@return true if the thread started else FALSE (i.e timed out) */ -UNIV_INTERN -ibool -fts_wait_for_background_thread_to_start( -/*====================================*/ - dict_table_t* table, /*!< in: table to which the thread - is attached */ - ulint max_wait); /*!< in: time in microseconds, if set - to 0 then it disables timeout - checking */ -#ifdef FTS_DOC_STATS_DEBUG -/******************************************************************//** -Get the total number of words in the FTS for a particular FTS index. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -fts_get_total_word_count( -/*=====================*/ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: for this index */ - ulint* total) /*!< out: total words */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif -/******************************************************************//** -Search the index specific cache for a particular FTS index. -@return the index specific cache else NULL */ -UNIV_INTERN -fts_index_cache_t* -fts_find_index_cache( -/*================*/ - const fts_cache_t* - cache, /*!< in: cache to search */ - const dict_index_t* - index) /*!< in: index to search for */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Write the table id to the given buffer (including final NUL). Buffer must be -at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long. -@return number of bytes written */ -UNIV_INLINE -int -fts_write_object_id( -/*================*/ - ib_id_t id, /*!< in: a table/index id */ - char* str, /*!< in: buffer to write the id to */ - bool hex_format MY_ATTRIBUTE((unused))) - /*!< in: true for fixed hex format, - false for old ambiguous format */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Read the table id from the string generated by fts_write_object_id(). -@return TRUE if parse successful */ -UNIV_INLINE -ibool -fts_read_object_id( -/*===============*/ - ib_id_t* id, /*!< out: a table id */ - const char* str) /*!< in: buffer to read from */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Get the table id. -@return number of bytes written */ -UNIV_INTERN -int -fts_get_table_id( -/*=============*/ - const fts_table_t* - fts_table, /*!< in: FTS Auxiliary table */ - char* table_id) /*!< out: table id, must be at least - FTS_AUX_MIN_TABLE_ID_LENGTH bytes - long */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Add the table to add to the OPTIMIZER's list. */ -UNIV_INTERN -void -fts_optimize_add_table( -/*===================*/ - dict_table_t* table) /*!< in: table to add */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Optimize a table. */ -UNIV_INTERN -void -fts_optimize_do_table( -/*==================*/ - dict_table_t* table) /*!< in: table to optimize */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Construct the prefix name of an FTS table. -@return own: table name, must be freed with mem_free() */ -UNIV_INTERN -char* -fts_get_table_name_prefix( -/*======================*/ - const fts_table_t* - fts_table) /*!< in: Auxiliary table type */ - MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); -/******************************************************************//** -Add node positions. */ -UNIV_INTERN -void -fts_cache_node_add_positions( -/*=========================*/ - fts_cache_t* cache, /*!< in: cache */ - fts_node_t* node, /*!< in: word node */ - doc_id_t doc_id, /*!< in: doc id */ - ib_vector_t* positions) /*!< in: fts_token_t::positions */ - MY_ATTRIBUTE((nonnull(2,4))); - -/******************************************************************//** -Create the config table name for retrieving index specific value. -@return index config parameter name */ -UNIV_INTERN -char* -fts_config_create_index_param_name( -/*===============================*/ - const char* param, /*!< in: base name of param */ - const dict_index_t* index) /*!< in: index for config */ - MY_ATTRIBUTE((nonnull, malloc, warn_unused_result)); - -#ifndef UNIV_NONINL -#include "fts0priv.ic" -#endif - -#endif /* INNOBASE_FTS0PRIV_H */ diff --git a/storage/xtradb/include/fts0priv.ic b/storage/xtradb/include/fts0priv.ic deleted file mode 100644 index 88f2d67c7b8..00000000000 --- a/storage/xtradb/include/fts0priv.ic +++ /dev/null @@ -1,130 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0priv.ic -Full text search internal header file - -Created 2011/11/12 Sunny Bains -***********************************************************************/ - -/******************************************************************//** -Write the table id to the given buffer (including final NUL). Buffer must be -at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long. -@return number of bytes written */ -UNIV_INLINE -int -fts_write_object_id( -/*================*/ - ib_id_t id, /* in: a table/index id */ - char* str, /* in: buffer to write the id to */ - bool hex_format MY_ATTRIBUTE((unused))) - /* in: true for fixed hex format, - false for old ambiguous format */ -{ - -#ifdef _WIN32 - - DBUG_EXECUTE_IF("innodb_test_wrong_non_windows_fts_aux_table_name", - return(sprintf(str, UINT64PFx, id));); - - /* Use this to construct old(5.6.14 and 5.7.3) windows - ambiguous aux table names */ - DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", - return(sprintf(str, "%016llu", id));); - -#else /* _WIN32 */ - - /* Use this to construct old(5.6.14 and 5.7.3) windows - ambiguous aux table names */ - DBUG_EXECUTE_IF("innodb_test_wrong_windows_fts_aux_table_name", - return(sprintf(str, "%016" PRIu64, id));); - - DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", - return(sprintf(str, UINT64PFx, id));); - -#endif /* _WIN32 */ - - /* As above, but this is only for those tables failing to rename. */ - if (!hex_format) { -#ifdef _WIN32 - // FIXME: Use ut_snprintf(), so does following one. - return(sprintf(str, "%016llu", id)); -#else /* _WIN32 */ - return(sprintf(str, "%016" PRIu64, id)); -#endif /* _WIN32 */ - } - - return(sprintf(str, UINT64PFx, id)); -} - -/******************************************************************//** -Read the table id from the string generated by fts_write_object_id(). -@return TRUE if parse successful */ -UNIV_INLINE -ibool -fts_read_object_id( -/*===============*/ - ib_id_t* id, /* out: an id */ - const char* str) /* in: buffer to read from */ -{ - /* NOTE: this func doesn't care about whether current table - is set with HEX_NAME, the user of the id read here will check - if the id is HEX or DEC and do the right thing with it. */ - return(sscanf(str, UINT64PFx, id) == 1); -} - -/******************************************************************//** -Compare two fts_trx_table_t instances. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_trx_table_cmp( -/*==============*/ - const void* p1, /*!< in: id1 */ - const void* p2) /*!< in: id2 */ -{ - const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table; - const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table; - - return((table1->id > table2->id) - ? 1 - : (table1->id == table2->id) - ? 0 - : -1); -} - -/******************************************************************//** -Compare a table id with a fts_trx_table_t table id. -@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */ -UNIV_INLINE -int -fts_trx_table_id_cmp( -/*=================*/ - const void* p1, /*!< in: id1 */ - const void* p2) /*!< in: id2 */ -{ - const ullint* table_id = (const ullint*) p1; - const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table; - - return((*table_id > table2->id) - ? 1 - : (*table_id == table2->id) - ? 0 - : -1); -} diff --git a/storage/xtradb/include/fts0tlex.h b/storage/xtradb/include/fts0tlex.h deleted file mode 100644 index f91533803e8..00000000000 --- a/storage/xtradb/include/fts0tlex.h +++ /dev/null @@ -1,349 +0,0 @@ -#ifndef fts0tHEADER_H -#define fts0tHEADER_H 1 -#define fts0tIN_HEADER 1 - -#line 6 "../include/fts0tlex.h" - -#line 8 "../include/fts0tlex.h" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 35 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - -/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS 1 -#endif - -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! C99 */ - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -/* C99 requires __STDC__ to be defined as 1. */ -#if defined (__STDC__) - -#define YY_USE_CONST - -#endif /* defined (__STDC__) */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* An opaque pointer. */ -#ifndef YY_TYPEDEF_YY_SCANNER_T -#define YY_TYPEDEF_YY_SCANNER_T -typedef void* yyscan_t; -#endif - -/* For convenience, these vars (plus the bison vars far below) - are macros in the reentrant scanner. */ -#define yyin yyg->yyin_r -#define yyout yyg->yyout_r -#define yyextra yyg->yyextra_r -#define yyleng yyg->yyleng_r -#define yytext yyg->yytext_r -#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) -#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) -#define yy_flex_debug yyg->yy_flex_debug_r - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k. - * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. - * Ditto for the __ia64__ case accordingly. - */ -#define YY_BUF_SIZE 32768 -#else -#define YY_BUF_SIZE 16384 -#endif /* __ia64__ */ -#endif - -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE -typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif - -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - -#ifndef YY_STRUCT_YY_BUFFER_STATE -#define YY_STRUCT_YY_BUFFER_STATE -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - int yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - int yy_bs_lineno; /**< The line count. */ - int yy_bs_column; /**< The column count. */ - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; - - }; -#endif /* !YY_STRUCT_YY_BUFFER_STATE */ - -void fts0trestart (FILE *input_file ,yyscan_t yyscanner ); -void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0t_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); -void fts0t_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void fts0t_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); -void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); -void fts0tpop_buffer_state (yyscan_t yyscanner ); - -YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); -YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); - -void *fts0talloc (yy_size_t ,yyscan_t yyscanner ); -void *fts0trealloc (void *,yy_size_t ,yyscan_t yyscanner ); -void fts0tfree (void * ,yyscan_t yyscanner ); - -/* Begin user sect3 */ - -#define fts0twrap(n) 1 -#define YY_SKIP_YYWRAP - -#define yytext_ptr yytext_r - -#ifdef YY_HEADER_EXPORT_START_CONDITIONS -#define INITIAL 0 - -#endif - -#ifndef YY_NO_UNISTD_H -/* Special case for "unistd.h", since it is non-ANSI. We include it way - * down here because we want the user's section 1 to have been scanned first. - * The user has a chance to override it with an option. - */ -#include <unistd.h> -#endif - -#ifndef YY_EXTRA_TYPE -#define YY_EXTRA_TYPE void * -#endif - -int fts0tlex_init (yyscan_t* scanner); - -int fts0tlex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); - -/* Accessor methods to globals. - These are made visible to non-reentrant scanners for convenience. */ - -int fts0tlex_destroy (yyscan_t yyscanner ); - -int fts0tget_debug (yyscan_t yyscanner ); - -void fts0tset_debug (int debug_flag ,yyscan_t yyscanner ); - -YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner ); - -void fts0tset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); - -FILE *fts0tget_in (yyscan_t yyscanner ); - -void fts0tset_in (FILE * in_str ,yyscan_t yyscanner ); - -FILE *fts0tget_out (yyscan_t yyscanner ); - -void fts0tset_out (FILE * out_str ,yyscan_t yyscanner ); - -int fts0tget_leng (yyscan_t yyscanner ); - -char *fts0tget_text (yyscan_t yyscanner ); - -int fts0tget_lineno (yyscan_t yyscanner ); - -void fts0tset_lineno (int line_number ,yyscan_t yyscanner ); - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int fts0twrap (yyscan_t yyscanner ); -#else -extern int fts0twrap (yyscan_t yyscanner ); -#endif -#endif - -#ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); -#endif - -#ifndef YY_NO_INPUT - -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k */ -#define YY_READ_BUF_SIZE 16384 -#else -#define YY_READ_BUF_SIZE 8192 -#endif /* __ia64__ */ -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL_IS_OURS 1 - -extern int fts0tlex (yyscan_t yyscanner); - -#define YY_DECL int fts0tlex (yyscan_t yyscanner) -#endif /* !YY_DECL */ - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - -#undef YY_NEW_FILE -#undef YY_FLUSH_BUFFER -#undef yy_set_bol -#undef yy_new_buffer -#undef yy_set_interactive -#undef YY_DO_BEFORE_ACTION - -#ifdef YY_DECL_IS_OURS -#undef YY_DECL_IS_OURS -#undef YY_DECL -#endif - -#line 68 "fts0tlex.l" - - -#line 348 "../include/fts0tlex.h" -#undef fts0tIN_HEADER -#endif /* fts0tHEADER_H */ diff --git a/storage/xtradb/include/fts0types.h b/storage/xtradb/include/fts0types.h deleted file mode 100644 index 0dad75d8f1b..00000000000 --- a/storage/xtradb/include/fts0types.h +++ /dev/null @@ -1,480 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0types.h -Full text search types file - -Created 2007-03-27 Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_FTS0TYPES_H -#define INNOBASE_FTS0TYPES_H - -#include "que0types.h" -#include "ut0byte.h" -#include "fut0fut.h" -#include "ut0rbt.h" -#include "fts0fts.h" - -/** Types used within FTS. */ -struct fts_que_t; -struct fts_node_t; -struct fts_utf8_str_t; - -/** Callbacks used within FTS. */ -typedef pars_user_func_cb_t fts_sql_callback; -typedef void (*fts_filter)(void*, fts_node_t*, void*, ulint len); - -/** Statistics relevant to a particular document, used during retrieval. */ -struct fts_doc_stats_t { - doc_id_t doc_id; /*!< Document id */ - ulint word_count; /*!< Total words in the document */ -}; - -/** It's main purpose is to store the SQL prepared statements that -are required to retrieve a document from the database. */ -struct fts_get_doc_t { - fts_index_cache_t* - index_cache; /*!< The index cache instance */ - - /*!< Parsed sql statement */ - que_t* get_document_graph; - fts_cache_t* cache; /*!< The parent cache */ -}; - -/** Since we can have multiple FTS indexes on a table, we keep a -per index cache of words etc. */ -struct fts_index_cache_t { - dict_index_t* index; /*!< The FTS index instance */ - - ib_rbt_t* words; /*!< Nodes; indexed by fts_string_t*, - cells are fts_tokenizer_word_t*.*/ - - ib_vector_t* doc_stats; /*!< Array of the fts_doc_stats_t - contained in the memory buffer. - Must be in sorted order (ascending). - The ideal choice is an rb tree but - the rb tree imposes a space overhead - that we can do without */ - - que_t** ins_graph; /*!< Insert query graphs */ - - que_t** sel_graph; /*!< Select query graphs */ - CHARSET_INFO* charset; /*!< charset */ -}; - -/** For supporting the tracking of updates on multiple FTS indexes we need -to track which FTS indexes need to be updated. For INSERT and DELETE we -update all fts indexes. */ -struct fts_update_t { - doc_id_t doc_id; /*!< The doc id affected */ - - ib_vector_t* fts_indexes; /*!< The FTS indexes that need to be - updated. A NULL value means all - indexes need to be updated. This - vector is not allocated on the heap - and so must be freed explicitly, - when we are done with it */ -}; - -/** Stop word control infotmation. */ -struct fts_stopword_t { - ulint status; /*!< Status of the stopword tree */ - ib_alloc_t* heap; /*!< The memory allocator to use */ - ib_rbt_t* cached_stopword;/*!< This stores all active stopwords */ - CHARSET_INFO* charset; /*!< charset for stopword */ -}; - -/** The SYNC state of the cache. There is one instance of this struct -associated with each ADD thread. */ -struct fts_sync_t { - trx_t* trx; /*!< The transaction used for SYNCing - the cache to disk */ - dict_table_t* table; /*!< Table with FTS index(es) */ - ulint max_cache_size; /*!< Max size in bytes of the cache */ - ibool cache_full; /*!< flag, when true it indicates that - we need to sync the cache to disk */ - ulint lower_index; /*!< the start index of the doc id - vector from where to start adding - documents to the FTS cache */ - ulint upper_index; /*!< max index of the doc id vector to - add to the FTS cache */ - ibool interrupted; /*!< TRUE if SYNC was interrupted */ - doc_id_t min_doc_id; /*!< The smallest doc id added to the - cache. It should equal to - doc_ids[lower_index] */ - doc_id_t max_doc_id; /*!< The doc id at which the cache was - noted as being full, we use this to - set the upper_limit field */ - ib_time_t start_time; /*!< SYNC start time */ - bool in_progress; /*!< flag whether sync is in progress.*/ - bool unlock_cache; /*!< flag whether unlock cache when - write fts node */ - os_event_t event; /*!< sync finish event; - only os_event_set() and os_event_wait() - are used */ -}; - -/** The cache for the FTS system. It is a memory-based inverted index -that new entries are added to, until it grows over the configured maximum -size, at which time its contents are written to the INDEX table. */ -struct fts_cache_t { - rw_lock_t lock; /*!< lock protecting all access to the - memory buffer. FIXME: this needs to - be our new upgrade-capable rw-lock */ - - rw_lock_t init_lock; /*!< lock used for the cache - intialization, it has different - SYNC level as above cache lock */ - - ib_mutex_t optimize_lock; /*!< Lock for OPTIMIZE */ - - ib_mutex_t deleted_lock; /*!< Lock covering deleted_doc_ids */ - - ib_mutex_t doc_id_lock; /*!< Lock covering Doc ID */ - - ib_vector_t* deleted_doc_ids;/*!< Array of deleted doc ids, each - element is of type fts_update_t */ - - ib_vector_t* indexes; /*!< We store the stats and inverted - index for the individual FTS indexes - in this vector. Each element is - an instance of fts_index_cache_t */ - - ib_vector_t* get_docs; /*!< information required to read - the document from the table. Each - element is of type fts_doc_t */ - - ulint total_size; /*!< total size consumed by the ilist - field of all nodes. SYNC is run - whenever this gets too big */ - fts_sync_t* sync; /*!< sync structure to sync data to - disk */ - ib_alloc_t* sync_heap; /*!< The heap allocator, for indexes - and deleted_doc_ids, ie. transient - objects, they are recreated after - a SYNC is completed */ - - ib_alloc_t* self_heap; /*!< This heap is the heap out of - which an instance of the cache itself - was created. Objects created using - this heap will last for the lifetime - of the cache */ - - doc_id_t next_doc_id; /*!< Next doc id */ - - doc_id_t synced_doc_id; /*!< Doc ID sync-ed to CONFIG table */ - - doc_id_t first_doc_id; /*!< first doc id since this table - was opened */ - - ulint deleted; /*!< Number of doc ids deleted since - last optimized. This variable is - covered by deleted_lock */ - - ulint added; /*!< Number of doc ids added since last - optimized. This variable is covered by - the deleted lock */ - - fts_stopword_t stopword_info; /*!< Cached stopwords for the FTS */ - mem_heap_t* cache_heap; /*!< Cache Heap */ -}; - -/** Columns of the FTS auxiliary INDEX table */ -struct fts_node_t { - doc_id_t first_doc_id; /*!< First document id in ilist. */ - - doc_id_t last_doc_id; /*!< Last document id in ilist. */ - - byte* ilist; /*!< Binary list of documents & word - positions the token appears in. - TODO: For now, these are simply - ut_malloc'd, but if testing shows - that they waste memory unacceptably, a - special memory allocator will have - to be written */ - - ulint doc_count; /*!< Number of doc ids in ilist */ - - ulint ilist_size; /*!< Used size of ilist in bytes. */ - - ulint ilist_size_alloc; - /*!< Allocated size of ilist in - bytes */ - bool synced; /*!< flag whether the node is synced */ -}; - -/** A tokenizer word. Contains information about one word. */ -struct fts_tokenizer_word_t { - fts_string_t text; /*!< Token text. */ - - ib_vector_t* nodes; /*!< Word node ilists, each element is - of type fts_node_t */ -}; - -/** Word text plus it's array of nodes as on disk in FTS index */ -struct fts_word_t { - fts_string_t text; /*!< Word value in UTF-8 */ - ib_vector_t* nodes; /*!< Nodes read from disk */ - - ib_alloc_t* heap_alloc; /*!< For handling all allocations */ -}; - -/** Callback for reading and filtering nodes that are read from FTS index */ -struct fts_fetch_t { - void* read_arg; /*!< Arg for the sql_callback */ - - fts_sql_callback - read_record; /*!< Callback for reading index - record */ - ulint total_memory; /*!< Total memory used */ -}; - -/** For horizontally splitting an FTS auxiliary index */ -struct fts_index_selector_t { - ulint value; /*!< Character value at which - to split */ - - const char* suffix; /*!< FTS aux index suffix */ -}; - -/** This type represents a single document. */ -struct fts_doc_t { - fts_string_t text; /*!< document text */ - - ibool found; /*!< TRUE if the document was found - successfully in the database */ - - ib_rbt_t* tokens; /*!< This is filled when the document - is tokenized. Tokens; indexed by - fts_string_t*, cells are of type - fts_token_t* */ - - ib_alloc_t* self_heap; /*!< An instance of this type is - allocated from this heap along - with any objects that have the - same lifespan, most notably - the vector of token positions */ - CHARSET_INFO* charset; /*!< Document's charset info */ -}; - -/** A token and its positions within a document. */ -struct fts_token_t { - fts_string_t text; /*!< token text */ - - ib_vector_t* positions; /*!< an array of the positions the - token is found in; each item is - actually an ulint. */ -}; - -/** It's defined in fts/fts0fts.c */ -extern const fts_index_selector_t fts_index_selector[]; - -/******************************************************************//** -Compare two UTF-8 strings. */ -UNIV_INLINE -int -fts_utf8_string_cmp( -/*================*/ - /*!< out: - < 0 if n1 < n2, - 0 if n1 == n2, - > 0 if n1 > n2 */ - const void* p1, /*!< in: key */ - const void* p2); /*!< in: node */ - -/******************************************************************//** -Compare two UTF-8 strings, and return match (0) if -passed in "key" value equals or is the prefix of the "node" value. */ -UNIV_INLINE -int -fts_utf8_string_cmp_prefix( -/*=======================*/ - /*!< out: - < 0 if n1 < n2, - 0 if n1 == n2, - > 0 if n1 > n2 */ - const void* p1, /*!< in: key */ - const void* p2); /*!< in: node */ - -/******************************************************************//** -Compare two fts_trx_row_t instances doc_ids. */ -UNIV_INLINE -int -fts_trx_row_doc_id_cmp( -/*===================*/ - /*!< out: - < 0 if n1 < n2, - 0 if n1 == n2, - > 0 if n1 > n2 */ - const void* p1, /*!< in: id1 */ - const void* p2); /*!< in: id2 */ - -/******************************************************************//** -Compare two fts_ranking_t instances doc_ids. */ -UNIV_INLINE -int -fts_ranking_doc_id_cmp( -/*===================*/ - /*!< out: - < 0 if n1 < n2, - 0 if n1 == n2, - > 0 if n1 > n2 */ - const void* p1, /*!< in: id1 */ - const void* p2); /*!< in: id2 */ - -/******************************************************************//** -Compare two fts_update_t instances doc_ids. */ -UNIV_INLINE -int -fts_update_doc_id_cmp( -/*==================*/ - /*!< out: - < 0 if n1 < n2, - 0 if n1 == n2, - > 0 if n1 > n2 */ - const void* p1, /*!< in: id1 */ - const void* p2); /*!< in: id2 */ - -/******************************************************************//** -Decode and return the integer that was encoded using our VLC scheme.*/ -UNIV_INLINE -ulint -fts_decode_vlc( -/*===========*/ - /*!< out: value decoded */ - byte** ptr); /*!< in: ptr to decode from, this ptr is - incremented by the number of bytes decoded */ - -/******************************************************************//** -Duplicate an UTF-8 string. */ -UNIV_INLINE -void -fts_utf8_string_dup( -/*================*/ - /*!< out: - < 0 if n1 < n2, - 0 if n1 == n2, - > 0 if n1 > n2 */ - fts_string_t* dst, /*!< in: dup to here */ - const fts_string_t* src, /*!< in: src string */ - mem_heap_t* heap); /*!< in: heap to use */ - -/******************************************************************//** -Return length of val if it were encoded using our VLC scheme. */ -UNIV_INLINE -ulint -fts_get_encoded_len( -/*================*/ - /*!< out: length of value - encoded, in bytes */ - ulint val); /*!< in: value to encode */ - -/******************************************************************//** -Encode an integer using our VLC scheme and return the length in bytes. */ -UNIV_INLINE -ulint -fts_encode_int( -/*===========*/ - /*!< out: length of value - encoded, in bytes */ - ulint val, /*!< in: value to encode */ - byte* buf); /*!< in: buffer, must have - enough space */ - -/******************************************************************//** -Decode a UTF-8 character. - -http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf: - - Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte -00000000 0xxxxxxx 0xxxxxxx -00000yyy yyxxxxxx 110yyyyy 10xxxxxx -zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx -000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx - -This function decodes UTF-8 sequences up to 6 bytes (31 bits). - -On error *ptr will point to the first byte that was not correctly -decoded. This will hopefully help in resyncing the input. */ -UNIV_INLINE -ulint -fts_utf8_decode( -/*============*/ - /*!< out: UTF8_ERROR if *ptr - did not point to a valid - UTF-8 sequence, or the - Unicode code point. */ - const byte** ptr); /*!< in/out: pointer to - UTF-8 string. The - pointer is advanced to - the start of the next - character. */ - -/******************************************************************//** -Lowercase an UTF-8 string. */ -UNIV_INLINE -void -fts_utf8_tolower( -/*=============*/ - fts_string_t* str); /*!< in: string */ - -/******************************************************************//** -Get the selected FTS aux INDEX suffix. */ -UNIV_INLINE -const char* -fts_get_suffix( -/*===========*/ - ulint selected); /*!< in: selected index */ - -/******************************************************************** -Get the number of index selectors. */ -UNIV_INLINE -ulint -fts_get_n_selectors(void); -/*=====================*/ - -/******************************************************************//** -Select the FTS auxiliary index for the given string. -@return the index to use for the string */ -UNIV_INLINE -ulint -fts_select_index( -/*=============*/ - const CHARSET_INFO* cs, /*!< Charset */ - const byte* str, /*!< in: word string */ - ulint len); /*!< in: string length */ - -/******************************************************************** -Select the next FTS auxiliary index for the given character. -@return the next index to use for character */ -UNIV_INLINE -ulint -fts_select_next_index( -/*==================*/ - const CHARSET_INFO* cs, /*!< Charset */ - const byte* str, /*!< in: string */ - ulint len); /*!< in: string length */ - -#ifndef UNIV_NONINL -#include "fts0types.ic" -#include "fts0vlc.ic" -#endif - -#endif /* INNOBASE_FTS0TYPES_H */ diff --git a/storage/xtradb/include/fts0types.ic b/storage/xtradb/include/fts0types.ic deleted file mode 100644 index f0dfd023a70..00000000000 --- a/storage/xtradb/include/fts0types.ic +++ /dev/null @@ -1,388 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0types.ic -Full text search types. - -Created 2007-03-27 Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_FTS0TYPES_IC -#define INNOBASE_FTS0TYPES_IC - -#include <ctype.h> - -#include "rem0cmp.h" -#include "ha_prototypes.h" - -extern const ulint UTF8_ERROR; - -/* Determine if a UTF-8 continuation byte is valid. */ -#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80) - -/******************************************************************//** -Duplicate an UTF-8 string. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -void -fts_utf8_string_dup( -/*================*/ - fts_string_t* dst, /*!< in: dup to here */ - const fts_string_t* src, /*!< in: src string */ - mem_heap_t* heap) /*!< in: heap to use */ -{ - dst->f_str = (byte*)mem_heap_alloc(heap, src->f_len + 1); - memcpy(dst->f_str, src->f_str, src->f_len); - - dst->f_len = src->f_len; - dst->f_str[src->f_len] = 0; - dst->f_n_char = src->f_n_char; -} - -/******************************************************************//** -Compare two fts_trx_row_t doc_ids. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_trx_row_doc_id_cmp( -/*===================*/ - const void* p1, /*!< in: id1 */ - const void* p2) /*!< in: id2 */ -{ - const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1; - const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2; - - return((int)(tr1->doc_id - tr2->doc_id)); -} - -/******************************************************************//** -Compare two fts_ranking_t doc_ids. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_ranking_doc_id_cmp( -/*===================*/ - const void* p1, /*!< in: id1 */ - const void* p2) /*!< in: id2 */ -{ - const fts_ranking_t* rk1 = (const fts_ranking_t*) p1; - const fts_ranking_t* rk2 = (const fts_ranking_t*) p2; - - return((int)(rk1->doc_id - rk2->doc_id)); -} - -/******************************************************************//** -Compare two fts_update_t doc_ids. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_update_doc_id_cmp( -/*==================*/ - const void* p1, /*!< in: id1 */ - const void* p2) /*!< in: id2 */ -{ - const fts_update_t* up1 = (const fts_update_t*) p1; - const fts_update_t* up2 = (const fts_update_t*) p2; - - return((int)(up1->doc_id - up2->doc_id)); -} - - -/******************************************************************//** -Lowercase an UTF-8 string. */ -UNIV_INLINE -void -fts_utf8_tolower( -/*=============*/ - fts_string_t* str) /*!< in: string */ -{ - innobase_casedn_str((char*) str->f_str); -} - -/******************************************************************//** -Compare two UTF-8 strings. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_utf8_string_cmp( -/*================*/ - const void* p1, /*!< in: key */ - const void* p2) /*!< in: node */ -{ - const fts_string_t* s1 = (const fts_string_t*) p1; - const fts_string_t* s2 = (const fts_string_t*) p2; - - return(cmp_data_data_slow_varchar( - s1->f_str, s1->f_len, s2->f_str, s2->f_len)); -} - -/******************************************************************//** -Compare two UTF-8 strings, and return match (0) if -passed in "key" value equals or is the prefix of the "node" value. -@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ -UNIV_INLINE -int -fts_utf8_string_cmp_prefix( -/*=======================*/ - const void* p1, /*!< in: key */ - const void* p2) /*!< in: node */ -{ - int result; - ulint len; - - const fts_string_t* s1 = (const fts_string_t*) p1; - const fts_string_t* s2 = (const fts_string_t*) p2; - - len = ut_min(s1->f_len, s2->f_len); - - result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len); - - if (result) { - return(result); - } - - if (s1->f_len > s2->f_len) { - return(1); - } - - return(0); -} - -/******************************************************************//** -Decode a UTF-8 character. - -http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf: - - Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte -00000000 0xxxxxxx 0xxxxxxx -00000yyy yyxxxxxx 110yyyyy 10xxxxxx -zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx -000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx - -This function decodes UTF-8 sequences up to 6 bytes (31 bits). - -On error *ptr will point to the first byte that was not correctly -decoded. This will hopefully help in resyncing the input. -@return UTF8_ERROR if *ptr did not point to a valid -UTF-8 sequence, or the Unicode code point. */ -UNIV_INLINE -ulint -fts_utf8_decode( -/*============*/ - const byte** ptr) /*!< in/out: pointer to - UTF-8 string. The - pointer is advanced to - the start of the next - character. */ -{ - const byte* p = *ptr; - ulint ch = *p++; -#ifdef UNIV_DEBUG - ulint min_ch; -#endif /* UNIV_DEBUG */ - - if (UNIV_LIKELY(ch < 0x80)) { - /* 0xxxxxxx */ - } else if (UNIV_UNLIKELY(ch < 0xC0)) { - /* A continuation byte cannot start a code. */ - goto err_exit; - } else if (ch < 0xE0) { - /* 110yyyyy 10xxxxxx */ - ch &= 0x1F; - ut_d(min_ch = 0x80); - goto get1; - } else if (ch < 0xF0) { - /* 1110zzzz 10yyyyyy 10xxxxxx */ - ch &= 0x0F; - ut_d(min_ch = 0x800); - goto get2; - } else if (ch < 0xF8) { - /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */ - ch &= 0x07; - ut_d(min_ch = 0x10000); - goto get3; - } else if (ch < 0xFC) { - /* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */ - ch &= 0x03; - ut_d(min_ch = 0x200000); - goto get4; - } else if (ch < 0xFE) { - /* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */ - ut_d(min_ch = 0x4000000); - if (!fts_utf8_is_valid(*p)) { - goto err_exit; - } - ch <<= 6; - ch |= (*p++) & 0x3F; -get4: - if (!fts_utf8_is_valid(*p)) { - goto err_exit; - } - ch <<= 6; - ch |= (*p++) & 0x3F; -get3: - if (!fts_utf8_is_valid(*p)) { - goto err_exit; - } - ch <<= 6; - ch |= (*p++) & 0x3F; -get2: - if (!fts_utf8_is_valid(*p)) { - goto err_exit; - } - ch <<= 6; - ch |= (*p++) & 0x3F; -get1: - if (!fts_utf8_is_valid(*p)) { - goto err_exit; - } - ch <<= 6; - ch |= (*p++) & 0x3F; - - /* The following is needed in the 6-byte case - when ulint is wider than 32 bits. */ - ch &= 0xFFFFFFFF; - - /* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs) - and U+FFFE and U+FFFF cannot occur in valid UTF-8. */ - - if ( (ch >= 0xD800 && ch <= 0xDFFF) -#ifdef UNIV_DEBUG - || ch < min_ch -#endif /* UNIV_DEBUG */ - || ch == 0xFFFE || ch == 0xFFFF) { - - ch = UTF8_ERROR; - } - } else { -err_exit: - ch = UTF8_ERROR; - } - - *ptr = p; - - return(ch); -} - -/******************************************************************//** -Get the first character's code position for FTS index partition */ -extern -ulint -innobase_strnxfrm( -/*==============*/ - const CHARSET_INFO* cs, /*!< in: Character set */ - const uchar* p2, /*!< in: string */ - const ulint len2); /*!< in: string length */ - -/******************************************************************//** -Select the FTS auxiliary index for the given character. -@return the index to use for the string */ -UNIV_INLINE -ulint -fts_select_index( -/*=============*/ - const CHARSET_INFO* cs, /*!< in: Charset */ - const byte* str, /*!< in: string */ - ulint len) /*!< in: string length */ -{ - ulint selected = 0; - ulint value = innobase_strnxfrm(cs, str, len); - - while (fts_index_selector[selected].value != 0) { - - if (fts_index_selector[selected].value == value) { - - return(selected); - - } else if (fts_index_selector[selected].value > value) { - - return(selected > 0 ? selected - 1 : 0); - } - - ++selected; - } - - ut_ad(selected > 1); - - return(selected - 1); -} - -/******************************************************************//** -Select the next FTS auxiliary index for the given character. -@return the next index to use for character */ -UNIV_INLINE -ulint -fts_select_next_index( -/*==================*/ - const CHARSET_INFO* cs, /*!< in: Charset */ - const byte* str, /*!< in: string */ - ulint len) /*!< in: string length */ -{ - ulint selected = 0; - ulint value = innobase_strnxfrm(cs, str, len); - - while (fts_index_selector[selected].value != 0) { - - if (fts_index_selector[selected].value == value) { - - return(selected + 1); - - } else if (fts_index_selector[selected].value > value) { - - return(selected); - } - - ++selected; - } - - ut_ad(selected > 0); - - return((ulint) selected); -} - -/******************************************************************//** -Return the selected FTS aux index suffix. */ -UNIV_INLINE -const char* -fts_get_suffix( -/*===========*/ - ulint selected) /*!< in: selected index */ -{ - return(fts_index_selector[selected].suffix); -} - -/******************************************************************//** -Get the number of index selectors. -@return The number of selectors */ -UNIV_INLINE -ulint -fts_get_n_selectors(void) -/*=====================*/ -{ - ulint i = 0; - - // FIXME: This is a hack - while (fts_index_selector[i].value != 0) { - ++i; - } - - return(i); -} - -#endif /* INNOBASE_FTS0TYPES_IC */ diff --git a/storage/xtradb/include/fts0vlc.ic b/storage/xtradb/include/fts0vlc.ic deleted file mode 100644 index e79bcf59347..00000000000 --- a/storage/xtradb/include/fts0vlc.ic +++ /dev/null @@ -1,142 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fts0vlc.ic -Full text variable length integer encoding/decoding. - -Created 2007-03-27 Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_FTS0VLC_IC -#define INNOBASE_FTS0VLC_IC - -#include "fts0types.h" - -/******************************************************************//** -Return length of val if it were encoded using our VLC scheme. -FIXME: We will need to be able encode 8 bytes value -@return length of value encoded, in bytes */ -UNIV_INLINE -ulint -fts_get_encoded_len( -/*================*/ - ulint val) /* in: value to encode */ -{ - if (val <= 127) { - return(1); - } else if (val <= 16383) { - return(2); - } else if (val <= 2097151) { - return(3); - } else if (val <= 268435455) { - return(4); - } else { - /* Possibly we should care that on 64-bit machines ulint can - contain values that we can't encode in 5 bytes, but - fts_encode_int doesn't handle them either so it doesn't much - matter. */ - - return(5); - } -} - -/******************************************************************//** -Encode an integer using our VLC scheme and return the length in bytes. -@return length of value encoded, in bytes */ -UNIV_INLINE -ulint -fts_encode_int( -/*===========*/ - ulint val, /* in: value to encode */ - byte* buf) /* in: buffer, must have enough space */ -{ - ulint len; - - if (val <= 127) { - *buf = (byte) val; - - len = 1; - } else if (val <= 16383) { - *buf++ = (byte)(val >> 7); - *buf = (byte)(val & 0x7F); - - len = 2; - } else if (val <= 2097151) { - *buf++ = (byte)(val >> 14); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 3; - } else if (val <= 268435455) { - *buf++ = (byte)(val >> 21); - *buf++ = (byte)((val >> 14) & 0x7F); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 4; - } else { - /* Best to keep the limitations of the 32/64 bit versions - identical, at least for the time being. */ - ut_ad(val <= 4294967295u); - - *buf++ = (byte)(val >> 28); - *buf++ = (byte)((val >> 21) & 0x7F); - *buf++ = (byte)((val >> 14) & 0x7F); - *buf++ = (byte)((val >> 7) & 0x7F); - *buf = (byte)(val & 0x7F); - - len = 5; - } - - /* High-bit on means "last byte in the encoded integer". */ - *buf |= 0x80; - - return(len); -} - -/******************************************************************//** -Decode and return the integer that was encoded using our VLC scheme. -@return value decoded */ -UNIV_INLINE -ulint -fts_decode_vlc( -/*===========*/ - byte** ptr) /* in: ptr to decode from, this ptr is - incremented by the number of bytes decoded */ -{ - ulint val = 0; - - for (;;) { - byte b = **ptr; - - ++*ptr; - val |= (b & 0x7F); - - /* High-bit on means "last byte in the encoded integer". */ - if (b & 0x80) { - break; - } else { - val <<= 7; - } - } - - return(val); -} - -#endif diff --git a/storage/xtradb/include/fut0fut.h b/storage/xtradb/include/fut0fut.h deleted file mode 100644 index 851cdb44cdf..00000000000 --- a/storage/xtradb/include/fut0fut.h +++ /dev/null @@ -1,55 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fut0fut.h -File-based utilities - -Created 12/13/1995 Heikki Tuuri -***********************************************************************/ - - -#ifndef fut0fut_h -#define fut0fut_h - -#include "univ.i" - -#include "fil0fil.h" -#include "mtr0mtr.h" - -/********************************************************************//** -Gets a pointer to a file address and latches the page. -@return pointer to a byte in a frame; the file page in the frame is -bufferfixed and latched */ -UNIV_INLINE -byte* -fut_get_ptr( -/*========*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - fil_addr_t addr, /*!< in: file address */ - ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */ - mtr_t* mtr); /*!< in: mtr handle */ - -#ifndef UNIV_NONINL -#include "fut0fut.ic" -#endif - -#endif - diff --git a/storage/xtradb/include/fut0fut.ic b/storage/xtradb/include/fut0fut.ic deleted file mode 100644 index 15c964df6c7..00000000000 --- a/storage/xtradb/include/fut0fut.ic +++ /dev/null @@ -1,60 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fut0fut.ic -File-based utilities - -Created 12/13/1995 Heikki Tuuri -***********************************************************************/ - -#include "srv0srv.h" -#include "sync0rw.h" -#include "buf0buf.h" - -/********************************************************************//** -Gets a pointer to a file address and latches the page. -@return pointer to a byte in a frame; the file page in the frame is -bufferfixed and latched */ -UNIV_INLINE -byte* -fut_get_ptr( -/*========*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - fil_addr_t addr, /*!< in: file address */ - ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */ - mtr_t* mtr) /*!< in: mtr handle */ -{ - buf_block_t* block; - byte* ptr; - - ut_ad(addr.boffset < UNIV_PAGE_SIZE); - ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); - - block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr); - - SRV_CORRUPT_TABLE_CHECK(block, return(0);); - - ptr = buf_block_get_frame(block) + addr.boffset; - - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); - - return(ptr); -} diff --git a/storage/xtradb/include/fut0lst.h b/storage/xtradb/include/fut0lst.h deleted file mode 100644 index 8554cc60cdd..00000000000 --- a/storage/xtradb/include/fut0lst.h +++ /dev/null @@ -1,192 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fut0lst.h -File-based list utilities - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef fut0lst_h -#define fut0lst_h - -#include "univ.i" - -#include "fil0fil.h" -#include "mtr0mtr.h" - - -/* The C 'types' of base node and list node: these should be used to -write self-documenting code. Of course, the sizeof macro cannot be -applied to these types! */ - -typedef byte flst_base_node_t; -typedef byte flst_node_t; - -/* The physical size of a list base node in bytes */ -#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE) - -/* The physical size of a list node in bytes */ -#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE) - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Initializes a list base node. */ -UNIV_INLINE -void -flst_init( -/*======*/ - flst_base_node_t* base, /*!< in: pointer to base node */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Adds a node as the last node in a list. */ -UNIV_INTERN -void -flst_add_last( -/*==========*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node, /*!< in: node to add */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Adds a node as the first node in a list. */ -UNIV_INTERN -void -flst_add_first( -/*===========*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node, /*!< in: node to add */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Inserts a node after another in a list. */ -UNIV_INTERN -void -flst_insert_after( -/*==============*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node1, /*!< in: node to insert after */ - flst_node_t* node2, /*!< in: node to add */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Inserts a node before another in a list. */ -UNIV_INTERN -void -flst_insert_before( -/*===============*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node2, /*!< in: node to insert */ - flst_node_t* node3, /*!< in: node to insert before */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Removes a node. */ -UNIV_INTERN -void -flst_remove( -/*========*/ - flst_base_node_t* base, /*!< in: pointer to base node of list */ - flst_node_t* node2, /*!< in: node to remove */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Gets list length. -@return length */ -UNIV_INLINE -ulint -flst_get_len( -/*=========*/ - const flst_base_node_t* base, /*!< in: pointer to base node */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Gets list first node address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_get_first( -/*===========*/ - const flst_base_node_t* base, /*!< in: pointer to base node */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Gets list last node address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_get_last( -/*==========*/ - const flst_base_node_t* base, /*!< in: pointer to base node */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Gets list next node address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_get_next_addr( -/*===============*/ - const flst_node_t* node, /*!< in: pointer to node */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Gets list prev node address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_get_prev_addr( -/*===============*/ - const flst_node_t* node, /*!< in: pointer to node */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Writes a file address. */ -UNIV_INLINE -void -flst_write_addr( -/*============*/ - fil_faddr_t* faddr, /*!< in: pointer to file faddress */ - fil_addr_t addr, /*!< in: file address */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Reads a file address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_read_addr( -/*===========*/ - const fil_faddr_t* faddr, /*!< in: pointer to file faddress */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************************//** -Validates a file-based list. -@return TRUE if ok */ -UNIV_INTERN -ibool -flst_validate( -/*==========*/ - const flst_base_node_t* base, /*!< in: pointer to base node of list */ - mtr_t* mtr1); /*!< in: mtr */ -/********************************************************************//** -Prints info of a file-based list. */ -UNIV_INTERN -void -flst_print( -/*=======*/ - const flst_base_node_t* base, /*!< in: pointer to base node of list */ - mtr_t* mtr); /*!< in: mtr */ - - -#ifndef UNIV_NONINL -#include "fut0lst.ic" -#endif - -#endif /* !UNIV_HOTBACKUP */ - -#endif diff --git a/storage/xtradb/include/fut0lst.ic b/storage/xtradb/include/fut0lst.ic deleted file mode 100644 index d18cf21378f..00000000000 --- a/storage/xtradb/include/fut0lst.ic +++ /dev/null @@ -1,167 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/fut0lst.ic -File-based list utilities - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#include "fut0fut.h" -#include "mtr0log.h" -#include "buf0buf.h" - -/* We define the field offsets of a node for the list */ -#define FLST_PREV 0 /* 6-byte address of the previous list element; - the page part of address is FIL_NULL, if no - previous element */ -#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next - list element; the page part of address - is FIL_NULL, if no next element */ - -/* We define the field offsets of a base node for the list */ -#define FLST_LEN 0 /* 32-bit list length field */ -#define FLST_FIRST 4 /* 6-byte address of the first element - of the list; undefined if empty list */ -#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the - last element of the list; undefined - if empty list */ - -/********************************************************************//** -Writes a file address. */ -UNIV_INLINE -void -flst_write_addr( -/*============*/ - fil_faddr_t* faddr, /*!< in: pointer to file faddress */ - fil_addr_t addr, /*!< in: file address */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ut_ad(faddr && mtr); - ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX)); - ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA); - ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA); - - mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr); - mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset, - MLOG_2BYTES, mtr); -} - -/********************************************************************//** -Reads a file address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_read_addr( -/*===========*/ - const fil_faddr_t* faddr, /*!< in: pointer to file faddress */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - fil_addr_t addr; - - ut_ad(faddr && mtr); - - addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr); - addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES, - mtr); - ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA); - ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA); - return(addr); -} - -/********************************************************************//** -Initializes a list base node. */ -UNIV_INLINE -void -flst_init( -/*======*/ - flst_base_node_t* base, /*!< in: pointer to base node */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); - - mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr); - flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr); - flst_write_addr(base + FLST_LAST, fil_addr_null, mtr); -} - -/********************************************************************//** -Gets list length. -@return length */ -UNIV_INLINE -ulint -flst_get_len( -/*=========*/ - const flst_base_node_t* base, /*!< in: pointer to base node */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr)); -} - -/********************************************************************//** -Gets list first node address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_get_first( -/*===========*/ - const flst_base_node_t* base, /*!< in: pointer to base node */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - return(flst_read_addr(base + FLST_FIRST, mtr)); -} - -/********************************************************************//** -Gets list last node address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_get_last( -/*==========*/ - const flst_base_node_t* base, /*!< in: pointer to base node */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - return(flst_read_addr(base + FLST_LAST, mtr)); -} - -/********************************************************************//** -Gets list next node address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_get_next_addr( -/*===============*/ - const flst_node_t* node, /*!< in: pointer to node */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - return(flst_read_addr(node + FLST_NEXT, mtr)); -} - -/********************************************************************//** -Gets list prev node address. -@return file address */ -UNIV_INLINE -fil_addr_t -flst_get_prev_addr( -/*===============*/ - const flst_node_t* node, /*!< in: pointer to node */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - return(flst_read_addr(node + FLST_PREV, mtr)); -} diff --git a/storage/xtradb/include/ha0ha.h b/storage/xtradb/include/ha0ha.h deleted file mode 100644 index 58eb581e76a..00000000000 --- a/storage/xtradb/include/ha0ha.h +++ /dev/null @@ -1,265 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/ha0ha.h -The hash table with external chains - -Created 8/18/1994 Heikki Tuuri -*******************************************************/ - -#ifndef ha0ha_h -#define ha0ha_h - -#include "univ.i" - -#include "hash0hash.h" -#include "page0types.h" -#include "buf0types.h" -#include "rem0types.h" - -/*************************************************************//** -Looks for an element in a hash table. -@return pointer to the data of the first hash table node in chain -having the fold number, NULL if not found */ -UNIV_INLINE -const rec_t* -ha_search_and_get_data( -/*===================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: folded value of the searched data */ -/*********************************************************//** -Looks for an element when we know the pointer to the data and updates -the pointer to data if found. -@return TRUE if found */ -UNIV_INTERN -ibool -ha_search_and_update_if_found_func( -/*===============================*/ - hash_table_t* table, /*!< in/out: hash table */ - ulint fold, /*!< in: folded value of the searched data */ - const rec_t* data, /*!< in: pointer to the data */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - buf_block_t* new_block,/*!< in: block containing new_data */ -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - const rec_t* new_data);/*!< in: new pointer to the data */ - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG -/** Looks for an element when we know the pointer to the data and -updates the pointer to data if found. -@param table in/out: hash table -@param fold in: folded value of the searched data -@param data in: pointer to the data -@param new_block in: block containing new_data -@param new_data in: new pointer to the data */ -# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \ - ha_search_and_update_if_found_func(table,fold,data,new_block,new_data) -#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ -/** Looks for an element when we know the pointer to the data and -updates the pointer to data if found. -@param table in/out: hash table -@param fold in: folded value of the searched data -@param data in: pointer to the data -@param new_block ignored: block containing new_data -@param new_data in: new pointer to the data */ -# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \ - ha_search_and_update_if_found_func(table,fold,data,new_data) -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ -/*************************************************************//** -Creates a hash table with at least n array cells. The actual number -of cells is chosen to be a prime number slightly bigger than n. -@return own: created table */ -UNIV_INTERN -hash_table_t* -ha_create_func( -/*===========*/ - ulint n, /*!< in: number of array cells */ -#ifdef UNIV_SYNC_DEBUG - ulint mutex_level, /*!< in: level of the mutexes in the latching - order: this is used in the debug version */ -#endif /* UNIV_SYNC_DEBUG */ - ulint n_mutexes, /*!< in: number of mutexes to protect the - hash table: must be a power of 2, or 0 */ - ulint type); /*!< in: type of datastructure for which - the memory heap is going to be used e.g.: - MEM_HEAP_FOR_BTR_SEARCH or - MEM_HEAP_FOR_PAGE_HASH */ -#ifdef UNIV_SYNC_DEBUG -/** Creates a hash table. -@return own: created table -@param n_c in: number of array cells. The actual number of cells is -chosen to be a slightly bigger prime number. -@param level in: level of the mutexes in the latching order -@param n_m in: number of mutexes to protect the hash table; - must be a power of 2, or 0 */ -# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type) -#else /* UNIV_SYNC_DEBUG */ -/** Creates a hash table. -@return own: created table -@param n_c in: number of array cells. The actual number of cells is -chosen to be a slightly bigger prime number. -@param level in: level of the mutexes in the latching order -@param n_m in: number of mutexes to protect the hash table; - must be a power of 2, or 0 */ -# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type) -#endif /* UNIV_SYNC_DEBUG */ - -/*************************************************************//** -Empties a hash table and frees the memory heaps. */ -UNIV_INTERN -void -ha_clear( -/*=====*/ - hash_table_t* table); /*!< in, own: hash table */ - -/*************************************************************//** -Inserts an entry into a hash table. If an entry with the same fold number -is found, its node is updated to point to the new data, and no new node -is inserted. -@return TRUE if succeed, FALSE if no more memory could be allocated */ -UNIV_INTERN -ibool -ha_insert_for_fold_func( -/*====================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold, /*!< in: folded value of data; if a node with - the same fold value already exists, it is - updated to point to the same data, and no new - node is created! */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - buf_block_t* block, /*!< in: buffer block containing the data */ -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - const rec_t* data); /*!< in: data, must not be NULL */ - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG -/** -Inserts an entry into a hash table. If an entry with the same fold number -is found, its node is updated to point to the new data, and no new node -is inserted. -@return TRUE if succeed, FALSE if no more memory could be allocated -@param t in: hash table -@param f in: folded value of data -@param b in: buffer block containing the data -@param d in: data, must not be NULL */ -# define ha_insert_for_fold(t,f,b,d) do { \ - ha_insert_for_fold_func(t,f,b,d); \ - MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \ -} while(0) -#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ -/** -Inserts an entry into a hash table. If an entry with the same fold number -is found, its node is updated to point to the new data, and no new node -is inserted. -@return TRUE if succeed, FALSE if no more memory could be allocated -@param t in: hash table -@param f in: folded value of data -@param b ignored: buffer block containing the data -@param d in: data, must not be NULL */ -# define ha_insert_for_fold(t,f,b,d) do { \ - ha_insert_for_fold_func(t,f,d); \ - MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \ -} while (0) -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - -/*********************************************************//** -Looks for an element when we know the pointer to the data and deletes -it from the hash table if found. -@return TRUE if found */ -UNIV_INLINE -ibool -ha_search_and_delete_if_found( -/*==========================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold, /*!< in: folded value of the searched data */ - const rec_t* data); /*!< in: pointer to the data */ -#ifndef UNIV_HOTBACKUP -/*****************************************************************//** -Removes from the chain determined by fold all nodes whose data pointer -points to the page given. */ -UNIV_INTERN -void -ha_remove_all_nodes_to_page( -/*========================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold, /*!< in: fold value */ - const page_t* page); /*!< in: buffer page */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG -/*************************************************************//** -Validates a given range of the cells in hash table. -@return TRUE if ok */ -UNIV_INTERN -ibool -ha_validate( -/*========*/ - hash_table_t* table, /*!< in: hash table */ - ulint start_index, /*!< in: start index */ - ulint end_index); /*!< in: end index */ -#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ -/*************************************************************//** -Prints info of a hash table. */ -UNIV_INTERN -void -ha_print_info( -/*==========*/ - FILE* file, /*!< in: file where to print */ - hash_table_t* table); /*!< in: hash table */ -#endif /* !UNIV_HOTBACKUP */ - -/** The hash table external chain node */ -struct ha_node_t { - ha_node_t* next; /*!< next chain node or NULL if none */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - buf_block_t* block; /*!< buffer block containing the data, or NULL */ -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - const rec_t* data; /*!< pointer to the data */ - ulint fold; /*!< fold value for the data */ -}; - -#ifdef UNIV_DEBUG -/********************************************************************//** -Assert that the synchronization object in a hash operation involving -possible change in the hash table is held. -Note that in case of mutexes we assert that mutex is owned while in case -of rw-locks we assert that it is held in exclusive mode. */ -UNIV_INLINE -void -hash_assert_can_modify( -/*===================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold value */ -/********************************************************************//** -Assert that the synchronization object in a hash search operation is held. -Note that in case of mutexes we assert that mutex is owned while in case -of rw-locks we assert that it is held either in x-mode or s-mode. */ -UNIV_INLINE -void -hash_assert_can_search( -/*===================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold value */ -#else /* UNIV_DEBUG */ -#define hash_assert_can_modify(t, f) -#define hash_assert_can_search(t, f) -#endif /* UNIV_DEBUG */ - - -#ifndef UNIV_NONINL -#include "ha0ha.ic" -#endif - -#endif diff --git a/storage/xtradb/include/ha0ha.ic b/storage/xtradb/include/ha0ha.ic deleted file mode 100644 index 9d0e396e200..00000000000 --- a/storage/xtradb/include/ha0ha.ic +++ /dev/null @@ -1,246 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/ha0ha.ic -The hash table with external chains - -Created 8/18/1994 Heikki Tuuri -*************************************************************************/ - -#include "ut0rnd.h" -#include "mem0mem.h" -#include "btr0types.h" - -/***********************************************************//** -Deletes a hash node. */ -UNIV_INTERN -void -ha_delete_hash_node( -/*================*/ - hash_table_t* table, /*!< in: hash table */ - ha_node_t* del_node); /*!< in: node to be deleted */ - -/******************************************************************//** -Gets a hash node data. -@return pointer to the data */ -UNIV_INLINE -const rec_t* -ha_node_get_data( -/*=============*/ - const ha_node_t* node) /*!< in: hash chain node */ -{ - return(node->data); -} - -/******************************************************************//** -Sets hash node data. */ -UNIV_INLINE -void -ha_node_set_data_func( -/*==================*/ - ha_node_t* node, /*!< in: hash chain node */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - buf_block_t* block, /*!< in: buffer block containing the data */ -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - const rec_t* data) /*!< in: pointer to the data */ -{ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - node->block = block; -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - node->data = data; -} - -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG -/** Sets hash node data. -@param n in: hash chain node -@param b in: buffer block containing the data -@param d in: pointer to the data */ -# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d) -#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ -/** Sets hash node data. -@param n in: hash chain node -@param b in: buffer block containing the data -@param d in: pointer to the data */ -# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d) -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - -/******************************************************************//** -Gets the next node in a hash chain. -@return next node, NULL if none */ -UNIV_INLINE -ha_node_t* -ha_chain_get_next( -/*==============*/ - ha_node_t* node) /*!< in: hash chain node */ -{ - return(node->next); -} - -/******************************************************************//** -Gets the first node in a hash chain. -@return first node, NULL if none */ -UNIV_INLINE -ha_node_t* -ha_chain_get_first( -/*===============*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold value determining the chain */ -{ - return((ha_node_t*) - hash_get_nth_cell(table, hash_calc_hash(fold, table))->node); -} - -#ifdef UNIV_DEBUG -/********************************************************************//** -Assert that the synchronization object in a hash operation involving -possible change in the hash table is held. -Note that in case of mutexes we assert that mutex is owned while in case -of rw-locks we assert that it is held in exclusive mode. */ -UNIV_INLINE -void -hash_assert_can_modify( -/*===================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold value */ -{ - if (table->type == HASH_TABLE_SYNC_MUTEX) { - ut_ad(mutex_own(hash_get_mutex(table, fold))); - } else if (table->type == HASH_TABLE_SYNC_RW_LOCK) { -# ifdef UNIV_SYNC_DEBUG - prio_rw_lock_t* lock = hash_get_lock(table, fold); - ut_ad(rw_lock_own(lock, RW_LOCK_EX)); -# endif - } else { - ut_ad(table->type == HASH_TABLE_SYNC_NONE); - } -} - -/********************************************************************//** -Assert that the synchronization object in a hash search operation is held. -Note that in case of mutexes we assert that mutex is owned while in case -of rw-locks we assert that it is held either in x-mode or s-mode. */ -UNIV_INLINE -void -hash_assert_can_search( -/*===================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold value */ -{ - if (table->type == HASH_TABLE_SYNC_MUTEX) { - ut_ad(mutex_own(hash_get_mutex(table, fold))); - } else if (table->type == HASH_TABLE_SYNC_RW_LOCK) { -# ifdef UNIV_SYNC_DEBUG - prio_rw_lock_t* lock = hash_get_lock(table, fold); - ut_ad(rw_lock_own(lock, RW_LOCK_EX) - || rw_lock_own(lock, RW_LOCK_SHARED)); -# endif - } else { - ut_ad(table->type == HASH_TABLE_SYNC_NONE); - } -} -#endif /* UNIV_DEBUG */ - -/*************************************************************//** -Looks for an element in a hash table. -@return pointer to the data of the first hash table node in chain -having the fold number, NULL if not found */ -UNIV_INLINE -const rec_t* -ha_search_and_get_data( -/*===================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: folded value of the searched data */ -{ - ha_node_t* node; - - hash_assert_can_search(table, fold); - ut_ad(btr_search_enabled); - - node = ha_chain_get_first(table, fold); - - while (node) { - if (node->fold == fold) { - - return(node->data); - } - - node = ha_chain_get_next(node); - } - - return(NULL); -} - -/*********************************************************//** -Looks for an element when we know the pointer to the data. -@return pointer to the hash table node, NULL if not found in the table */ -UNIV_INLINE -ha_node_t* -ha_search_with_data( -/*================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold, /*!< in: folded value of the searched data */ - const rec_t* data) /*!< in: pointer to the data */ -{ - ha_node_t* node; - - hash_assert_can_search(table, fold); - - ut_ad(btr_search_enabled); - - node = ha_chain_get_first(table, fold); - - while (node) { - if (node->data == data) { - - return(node); - } - - node = ha_chain_get_next(node); - } - - return(NULL); -} - -/*********************************************************//** -Looks for an element when we know the pointer to the data, and deletes -it from the hash table, if found. -@return TRUE if found */ -UNIV_INLINE -ibool -ha_search_and_delete_if_found( -/*==========================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold, /*!< in: folded value of the searched data */ - const rec_t* data) /*!< in: pointer to the data */ -{ - ha_node_t* node; - - hash_assert_can_modify(table, fold); - ut_ad(btr_search_enabled); - - node = ha_search_with_data(table, fold, data); - - if (node) { - ha_delete_hash_node(table, node); - - return(TRUE); - } - - return(FALSE); -} diff --git a/storage/xtradb/include/ha0storage.h b/storage/xtradb/include/ha0storage.h deleted file mode 100644 index 0073930b502..00000000000 --- a/storage/xtradb/include/ha0storage.h +++ /dev/null @@ -1,140 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/ha0storage.h -Hash storage. -Provides a data structure that stores chunks of data in -its own storage, avoiding duplicates. - -Created September 22, 2007 Vasil Dimov -*******************************************************/ - -#ifndef ha0storage_h -#define ha0storage_h - -#include "univ.i" - -/** This value is used by default by ha_storage_create(). More memory -is allocated later when/if it is needed. */ -#define HA_STORAGE_DEFAULT_HEAP_BYTES 1024 - -/** This value is used by default by ha_storage_create(). It is a -constant per ha_storage's lifetime. */ -#define HA_STORAGE_DEFAULT_HASH_CELLS 4096 - -/** Hash storage */ -struct ha_storage_t; - -/*******************************************************************//** -Creates a hash storage. If any of the parameters is 0, then a default -value is used. -@return own: hash storage */ -UNIV_INLINE -ha_storage_t* -ha_storage_create( -/*==============*/ - ulint initial_heap_bytes, /*!< in: initial heap's size */ - ulint initial_hash_cells); /*!< in: initial number of cells - in the hash table */ - -/*******************************************************************//** -Copies data into the storage and returns a pointer to the copy. If the -same data chunk is already present, then pointer to it is returned. -Data chunks are considered to be equal if len1 == len2 and -memcmp(data1, data2, len1) == 0. If "data" is not present (and thus -data_len bytes need to be allocated) and the size of storage is going to -become more than "memlim" then "data" is not added and NULL is returned. -To disable this behavior "memlim" can be set to 0, which stands for -"no limit". -@return pointer to the copy */ -UNIV_INTERN -const void* -ha_storage_put_memlim( -/*==================*/ - ha_storage_t* storage, /*!< in/out: hash storage */ - const void* data, /*!< in: data to store */ - ulint data_len, /*!< in: data length */ - ulint memlim); /*!< in: memory limit to obey */ - -/*******************************************************************//** -Same as ha_storage_put_memlim() but without memory limit. -@param storage in/out: hash storage -@param data in: data to store -@param data_len in: data length -@return pointer to the copy of the string */ -#define ha_storage_put(storage, data, data_len) \ - ha_storage_put_memlim((storage), (data), (data_len), 0) - -/*******************************************************************//** -Copies string into the storage and returns a pointer to the copy. If the -same string is already present, then pointer to it is returned. -Strings are considered to be equal if strcmp(str1, str2) == 0. -@param storage in/out: hash storage -@param str in: string to put -@return pointer to the copy of the string */ -#define ha_storage_put_str(storage, str) \ - ((const char*) ha_storage_put((storage), (str), strlen(str) + 1)) - -/*******************************************************************//** -Copies string into the storage and returns a pointer to the copy obeying -a memory limit. -If the same string is already present, then pointer to it is returned. -Strings are considered to be equal if strcmp(str1, str2) == 0. -@param storage in/out: hash storage -@param str in: string to put -@param memlim in: memory limit to obey -@return pointer to the copy of the string */ -#define ha_storage_put_str_memlim(storage, str, memlim) \ - ((const char*) ha_storage_put_memlim((storage), (str), \ - strlen(str) + 1, (memlim))) - -/*******************************************************************//** -Empties a hash storage, freeing memory occupied by data chunks. -This invalidates any pointers previously returned by ha_storage_put(). -The hash storage is not invalidated itself and can be used again. */ -UNIV_INLINE -void -ha_storage_empty( -/*=============*/ - ha_storage_t** storage); /*!< in/out: hash storage */ - -/*******************************************************************//** -Frees a hash storage and everything it contains, it cannot be used after -this call. -This invalidates any pointers previously returned by ha_storage_put(). */ -UNIV_INLINE -void -ha_storage_free( -/*============*/ - ha_storage_t* storage); /*!< in, own: hash storage */ - -/*******************************************************************//** -Gets the size of the memory used by a storage. -@return bytes used */ -UNIV_INLINE -ulint -ha_storage_get_size( -/*================*/ - const ha_storage_t* storage); /*!< in: hash storage */ - -#ifndef UNIV_NONINL -#include "ha0storage.ic" -#endif - -#endif /* ha0storage_h */ diff --git a/storage/xtradb/include/ha0storage.ic b/storage/xtradb/include/ha0storage.ic deleted file mode 100644 index 7150ca045ec..00000000000 --- a/storage/xtradb/include/ha0storage.ic +++ /dev/null @@ -1,146 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/ha0storage.ic -Hash storage. -Provides a data structure that stores chunks of data in -its own storage, avoiding duplicates. - -Created September 24, 2007 Vasil Dimov -*******************************************************/ - -#include "univ.i" -#include "ha0storage.h" -#include "hash0hash.h" -#include "mem0mem.h" - -/** Hash storage for strings */ -struct ha_storage_t { - mem_heap_t* heap; /*!< memory heap from which memory is - allocated */ - hash_table_t* hash; /*!< hash table used to avoid - duplicates */ -}; - -/** Objects of this type are stored in ha_storage_t */ -struct ha_storage_node_t { - ulint data_len;/*!< length of the data */ - const void* data; /*!< pointer to data */ - ha_storage_node_t* next; /*!< next node in hash chain */ -}; - -/*******************************************************************//** -Creates a hash storage. If any of the parameters is 0, then a default -value is used. -@return own: hash storage */ -UNIV_INLINE -ha_storage_t* -ha_storage_create( -/*==============*/ - ulint initial_heap_bytes, /*!< in: initial heap's size */ - ulint initial_hash_cells) /*!< in: initial number of cells - in the hash table */ -{ - ha_storage_t* storage; - mem_heap_t* heap; - - if (initial_heap_bytes == 0) { - - initial_heap_bytes = HA_STORAGE_DEFAULT_HEAP_BYTES; - } - - if (initial_hash_cells == 0) { - - initial_hash_cells = HA_STORAGE_DEFAULT_HASH_CELLS; - } - - /* we put "storage" within "storage->heap" */ - - heap = mem_heap_create(sizeof(ha_storage_t) - + initial_heap_bytes); - - storage = (ha_storage_t*) mem_heap_alloc(heap, - sizeof(ha_storage_t)); - - storage->heap = heap; - storage->hash = hash_create(initial_hash_cells); - - return(storage); -} - -/*******************************************************************//** -Empties a hash storage, freeing memory occupied by data chunks. -This invalidates any pointers previously returned by ha_storage_put(). -The hash storage is not invalidated itself and can be used again. */ -UNIV_INLINE -void -ha_storage_empty( -/*=============*/ - ha_storage_t** storage) /*!< in/out: hash storage */ -{ - ha_storage_t temp_storage; - - temp_storage.heap = (*storage)->heap; - temp_storage.hash = (*storage)->hash; - - hash_table_clear(temp_storage.hash); - mem_heap_empty(temp_storage.heap); - - *storage = (ha_storage_t*) mem_heap_alloc(temp_storage.heap, - sizeof(ha_storage_t)); - - (*storage)->heap = temp_storage.heap; - (*storage)->hash = temp_storage.hash; -} - -/*******************************************************************//** -Frees a hash storage and everything it contains, it cannot be used after -this call. -This invalidates any pointers previously returned by ha_storage_put(). */ -UNIV_INLINE -void -ha_storage_free( -/*============*/ - ha_storage_t* storage) /*!< in, own: hash storage */ -{ - /* order is important because the pointer storage->hash is - within the heap */ - hash_table_free(storage->hash); - mem_heap_free(storage->heap); -} - -/*******************************************************************//** -Gets the size of the memory used by a storage. -@return bytes used */ -UNIV_INLINE -ulint -ha_storage_get_size( -/*================*/ - const ha_storage_t* storage) /*!< in: hash storage */ -{ - ulint ret; - - ret = mem_heap_get_size(storage->heap); - - /* this assumes hash->heap and hash->heaps are NULL */ - ret += sizeof(hash_table_t); - ret += sizeof(hash_cell_t) * hash_get_n_cells(storage->hash); - - return(ret); -} diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h deleted file mode 100644 index b053be9e61d..00000000000 --- a/storage/xtradb/include/ha_prototypes.h +++ /dev/null @@ -1,692 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/ha_prototypes.h -Prototypes for global functions in ha_innodb.cc that are called by -InnoDB C code - -Created 5/11/2006 Osku Salerma -************************************************************************/ - -#ifndef HA_INNODB_PROTOTYPES_H -#define HA_INNODB_PROTOTYPES_H - -#include "my_dbug.h" -#include "my_compare.h" -#include "my_sys.h" -#include "m_string.h" -#include "my_base.h" -#include "dur_prop.h" - -#ifndef UNIV_INNOCHECKSUM -#include "mysqld_error.h" -#include "debug_sync.h" -#include "trx0types.h" -#endif - -#include "m_ctype.h" /* CHARSET_INFO */ - -// Forward declarations -class Field; -struct fts_string_t; - -/*********************************************************************//** -Wrapper around MySQL's copy_and_convert function. -@return number of bytes copied to 'to' */ -UNIV_INTERN -ulint -innobase_convert_string( -/*====================*/ - void* to, /*!< out: converted string */ - ulint to_length, /*!< in: number of bytes reserved - for the converted string */ - CHARSET_INFO* to_cs, /*!< in: character set to convert to */ - const void* from, /*!< in: string to convert */ - ulint from_length, /*!< in: number of bytes to convert */ - CHARSET_INFO* from_cs, /*!< in: character set to convert - from */ - uint* errors); /*!< out: number of errors encountered - during the conversion */ - -/*******************************************************************//** -Formats the raw data in "data" (in InnoDB on-disk format) that is of -type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes -the result to "buf". The result is converted to "system_charset_info". -Not more than "buf_size" bytes are written to "buf". -The result is always NUL-terminated (provided buf_size > 0) and the -number of bytes that were written to "buf" is returned (including the -terminating NUL). -@return number of bytes that were written */ -UNIV_INTERN -ulint -innobase_raw_format( -/*================*/ - const char* data, /*!< in: raw data */ - ulint data_len, /*!< in: raw data length - in bytes */ - ulint charset_coll, /*!< in: charset collation */ - char* buf, /*!< out: output buffer */ - ulint buf_size); /*!< in: output buffer size - in bytes */ - -#ifndef UNIV_INNOCHECKSUM - -/*****************************************************************//** -Invalidates the MySQL query cache for the table. */ -UNIV_INTERN -void -innobase_invalidate_query_cache( -/*============================*/ - trx_t* trx, /*!< in: transaction which - modifies the table */ - const char* full_name, /*!< in: concatenation of - database name, null char NUL, - table name, null char NUL; - NOTE that in Windows this is - always in LOWER CASE! */ - ulint full_name_len); /*!< in: full name length where - also the null chars count */ - -#endif /* #ifndef UNIV_INNOCHECKSUM */ - -/*****************************************************************//** -Convert a table or index name to the MySQL system_charset_info (UTF-8) -and quote it if needed. -@return pointer to the end of buf */ -UNIV_INTERN -char* -innobase_convert_name( -/*==================*/ - char* buf, /*!< out: buffer for converted identifier */ - ulint buflen, /*!< in: length of buf, in bytes */ - const char* id, /*!< in: identifier to convert */ - ulint idlen, /*!< in: length of id, in bytes */ - THD* thd, /*!< in: MySQL connection thread, or NULL */ - ibool table_id);/*!< in: TRUE=id is a table or database name; - FALSE=id is an index name */ - -/******************************************************************//** -Returns true if the thread is the replication thread on the slave -server. Used in srv_conc_enter_innodb() to determine if the thread -should be allowed to enter InnoDB - the replication thread is treated -differently than other threads. Also used in -srv_conc_force_exit_innodb(). -@return true if thd is the replication thread */ -UNIV_INTERN -ibool -thd_is_replication_slave_thread( -/*============================*/ - THD* thd); /*!< in: thread handle */ - -/******************************************************************//** -Gets information on the durability property requested by thread. -Used when writing either a prepare or commit record to the log -buffer. -@return the durability property. */ -UNIV_INTERN -enum durability_properties -thd_requested_durability( -/*=====================*/ - const THD* thd) /*!< in: thread handle */ - MY_ATTRIBUTE((warn_unused_result)); - -/******************************************************************//** -Returns true if the transaction this thread is processing has edited -non-transactional tables. Used by the deadlock detector when deciding -which transaction to rollback in case of a deadlock - we try to avoid -rolling back transactions that have edited non-transactional tables. -@return true if non-transactional tables have been edited */ -UNIV_INTERN -ibool -thd_has_edited_nontrans_tables( -/*===========================*/ - THD* thd); /*!< in: thread handle */ - -/** -Get high resolution timestamp for the current query start time. - -@retval timestamp in microseconds precision -*/ -unsigned long long thd_query_start_micro(const MYSQL_THD thd); - -/*************************************************************//** -Prints info of a THD object (== user session thread) to the given file. */ -UNIV_INTERN -void -innobase_mysql_print_thd( -/*=====================*/ - FILE* f, /*!< in: output stream */ - THD* thd, /*!< in: pointer to a MySQL THD object */ - uint max_query_len); /*!< in: max query length to print, or 0 to - use the default max length */ - -/*****************************************************************//** -Log code calls this whenever log has been written and/or flushed up -to a new position. We use this to notify upper layer of a new commit -checkpoint when necessary.*/ -UNIV_INTERN -void -innobase_mysql_log_notify( -/*===============*/ - ib_uint64_t write_lsn, /*!< in: LSN written to log file */ - ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */ - -/*************************************************************//** -InnoDB uses this function to compare two data fields for which the data type -is such that we must use MySQL code to compare them. -@return 1, 0, -1, if a is greater, equal, less than b, respectively */ -UNIV_INTERN -int -innobase_mysql_cmp( -/*===============*/ - int mysql_type, /*!< in: MySQL type */ - uint charset_number, /*!< in: number of the charset */ - const unsigned char* a, /*!< in: data field */ - unsigned int a_length, /*!< in: data field length, - not UNIV_SQL_NULL */ - const unsigned char* b, /*!< in: data field */ - unsigned int b_length) /*!< in: data field length, - not UNIV_SQL_NULL */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**************************************************************//** -Converts a MySQL type to an InnoDB type. Note that this function returns -the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1 -VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. -@return DATA_BINARY, DATA_VARCHAR, ... */ -UNIV_INTERN -ulint -get_innobase_type_from_mysql_type( -/*==============================*/ - ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an - 'unsigned type'; - at least ENUM and SET, - and unsigned integer - types are 'unsigned types' */ - const void* field) /*!< in: MySQL Field */ - MY_ATTRIBUTE((nonnull)); - -/******************************************************************//** -Get the variable length bounds of the given character set. */ -UNIV_INTERN -void -innobase_get_cset_width( -/*====================*/ - ulint cset, /*!< in: MySQL charset-collation code */ - ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */ - ulint* mbmaxlen); /*!< out: maximum length of a char (in bytes) */ - -/******************************************************************//** -Compares NUL-terminated UTF-8 strings case insensitively. -@return 0 if a=b, <0 if a<b, >1 if a>b */ -UNIV_INTERN -int -innobase_strcasecmp( -/*================*/ - const char* a, /*!< in: first string to compare */ - const char* b); /*!< in: second string to compare */ - -/******************************************************************//** -Compares NUL-terminated UTF-8 strings case insensitively. The -second string contains wildcards. -@return 0 if a match is found, 1 if not */ -UNIV_INTERN -int -innobase_wildcasecmp( -/*=================*/ - const char* a, /*!< in: string to compare */ - const char* b); /*!< in: wildcard string to compare */ - -/******************************************************************//** -Strip dir name from a full path name and return only its file name. -@return file name or "null" if no file name */ -UNIV_INTERN -const char* -innobase_basename( -/*==============*/ - const char* path_name); /*!< in: full path name */ - -/******************************************************************//** -Returns true if the thread is executing a SELECT statement. -@return true if thd is executing SELECT */ -UNIV_INTERN -ibool -thd_is_select( -/*==========*/ - const THD* thd); /*!< in: thread handle */ - -/******************************************************************//** -Converts an identifier to a table name. */ -UNIV_INTERN -void -innobase_convert_from_table_id( -/*===========================*/ - struct charset_info_st* cs, /*!< in: the 'from' character set */ - char* to, /*!< out: converted identifier */ - const char* from, /*!< in: identifier to convert */ - ulint len); /*!< in: length of 'to', in bytes; should - be at least 5 * strlen(to) + 1 */ -/******************************************************************//** -Converts an identifier to UTF-8. */ -UNIV_INTERN -void -innobase_convert_from_id( -/*=====================*/ - struct charset_info_st* cs, /*!< in: the 'from' character set */ - char* to, /*!< out: converted identifier */ - const char* from, /*!< in: identifier to convert */ - ulint len); /*!< in: length of 'to', in bytes; - should be at least 3 * strlen(to) + 1 */ -/******************************************************************//** -Makes all characters in a NUL-terminated UTF-8 string lower case. */ -UNIV_INTERN -void -innobase_casedn_str( -/*================*/ - char* a); /*!< in/out: string to put in lower case */ - -#ifdef WITH_WSREP -UNIV_INTERN -int -wsrep_innobase_kill_one_trx(void * const thd_ptr, - const trx_t * const bf_trx, - trx_t *victim_trx, - ibool signal); -int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number, - unsigned char* str, unsigned int str_length, - unsigned int buf_length); -#endif /* WITH_WSREP */ -/**********************************************************************//** -Determines the connection character set. -@return connection character set */ -UNIV_INTERN -struct charset_info_st* -innobase_get_charset( -/*=================*/ - THD* thd); /*!< in: MySQL thread handle */ -/**********************************************************************//** -Determines the current SQL statement. -@return SQL statement string */ -UNIV_INTERN -const char* -innobase_get_stmt( -/*==============*/ - THD* thd, /*!< in: MySQL thread handle */ - size_t* length) /*!< out: length of the SQL statement */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -This function is used to find the storage length in bytes of the first n -characters for prefix indexes using a multibyte character set. The function -finds charset information and returns length of prefix_len characters in the -index field in bytes. -@return number of bytes occupied by the first n characters */ -UNIV_INTERN -ulint -innobase_get_at_most_n_mbchars( -/*===========================*/ - ulint charset_id, /*!< in: character set id */ - ulint prefix_len, /*!< in: prefix length in bytes of the index - (this has to be divided by mbmaxlen to get the - number of CHARACTERS n in the prefix) */ - ulint data_len, /*!< in: length of the string in bytes */ - const char* str); /*!< in: character string */ - -/*************************************************************//** -InnoDB index push-down condition check -@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ -UNIV_INTERN -enum icp_result -innobase_index_cond( -/*================*/ - void* file) /*!< in/out: pointer to ha_innobase */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Returns true if the thread supports XA, -global value of innodb_supports_xa if thd is NULL. -@return true if thd supports XA */ -UNIV_INTERN -ibool -thd_supports_xa( -/*============*/ - THD* thd); /*!< in: thread handle, or NULL to query - the global innodb_supports_xa */ - -/** Get status of innodb_tmpdir. -@param[in] thd thread handle, or NULL to query - the global innodb_tmpdir. -@retval NULL if innodb_tmpdir="" */ -UNIV_INTERN -const char* -thd_innodb_tmpdir( - THD* thd); - -/******************************************************************//** -Check the status of fake changes mode (innodb_fake_changes) -@return true if fake change mode is enabled. */ -UNIV_INTERN -ibool -thd_fake_changes( -/*=============*/ - THD* thd); /*!< in: thread handle, or NULL to query - the global innodb_supports_xa */ - -/******************************************************************//** -Returns the lock wait timeout for the current connection. -@return the lock wait timeout, in seconds */ -UNIV_INTERN -ulong -thd_lock_wait_timeout( -/*==================*/ - THD* thd); /*!< in: thread handle, or NULL to query - the global innodb_lock_wait_timeout */ -/******************************************************************//** -Add up the time waited for the lock for the current query. */ -UNIV_INTERN -void -thd_set_lock_wait_time( -/*===================*/ - THD* thd, /*!< in/out: thread handle */ - ulint value); /*!< in: time waited for the lock */ - -/**********************************************************************//** -Get the current setting of the table_cache_size global parameter. We do -a dirty read because for one there is no synchronization object and -secondly there is little harm in doing so even if we get a torn read. -@return SQL statement string */ -UNIV_INTERN -ulint -innobase_get_table_cache_size(void); -/*===============================*/ - -/******************************************************************//** - */ -ulong -thd_flush_log_at_trx_commit( -/*================================*/ - void* thd); - -/**********************************************************************//** -Get the current setting of the lower_case_table_names global parameter from -mysqld.cc. We do a dirty read because for one there is no synchronization -object and secondly there is little harm in doing so even if we get a torn -read. -@return value of lower_case_table_names */ -UNIV_INTERN -ulint -innobase_get_lower_case_table_names(void); -/*=====================================*/ - -/*****************************************************************//** -Frees a possible InnoDB trx object associated with the current THD. -@return 0 or error number */ -UNIV_INTERN -int -innobase_close_thd( -/*===============*/ - THD* thd); /*!< in: MySQL thread handle for - which to close the connection */ -/*************************************************************//** -Get the next token from the given string and store it in *token. */ -UNIV_INTERN -ulint -innobase_mysql_fts_get_token( -/*=========================*/ - CHARSET_INFO* charset, /*!< in: Character set */ - const byte* start, /*!< in: start of text */ - const byte* end, /*!< in: one character past end of - text */ - fts_string_t* token, /*!< out: token's text */ - ulint* offset); /*!< out: offset to token, - measured as characters from - 'start' */ - -/******************************************************************//** -compare two character string case insensitively according to their charset. */ -UNIV_INTERN -int -innobase_fts_text_case_cmp( -/*=======================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2); /*!< in: node */ - -/****************************************************************//** -Get FTS field charset info from the field's prtype -@return charset info */ -UNIV_INTERN -CHARSET_INFO* -innobase_get_fts_charset( -/*=====================*/ - int mysql_type, /*!< in: MySQL type */ - uint charset_number);/*!< in: number of the charset */ -/******************************************************************//** -Returns true if transaction should be flagged as read-only. -@return true if the thd is marked as read-only */ -UNIV_INTERN -ibool -thd_trx_is_read_only( -/*=================*/ - THD* thd); /*!< in/out: thread handle */ - -/******************************************************************//** -Check if the transaction is an auto-commit transaction. TRUE also -implies that it is a SELECT (read-only) transaction. -@return true if the transaction is an auto commit read-only transaction. */ -UNIV_INTERN -ibool -thd_trx_is_auto_commit( -/*===================*/ - THD* thd); /*!< in: thread handle, or NULL */ - -/*****************************************************************//** -A wrapper function of innobase_convert_name(), convert a table or -index name to the MySQL system_charset_info (UTF-8) and quote it if needed. -@return pointer to the end of buf */ -UNIV_INTERN -void -innobase_format_name( -/*==================*/ - char* buf, /*!< out: buffer for converted - identifier */ - ulint buflen, /*!< in: length of buf, in bytes */ - const char* name, /*!< in: index or table name - to format */ - ibool is_index_name) /*!< in: index name */ - MY_ATTRIBUTE((nonnull)); - -/** Corresponds to Sql_condition:enum_warning_level. */ -enum ib_log_level_t { - IB_LOG_LEVEL_INFO, - IB_LOG_LEVEL_WARN, - IB_LOG_LEVEL_ERROR, - IB_LOG_LEVEL_FATAL -}; - -/******************************************************************//** -Use this when the args are first converted to a formatted string and then -passed to the format string from errmsg-utf8.txt. The error message format -must be: "Some string ... %s". - -Push a warning message to the client, it is a wrapper around: - -void push_warning_printf( - THD *thd, Sql_condition::enum_warning_level level, - uint code, const char *format, ...); -*/ -UNIV_INTERN -void -ib_errf( -/*====*/ - THD* thd, /*!< in/out: session */ - ib_log_level_t level, /*!< in: warning level */ - ib_uint32_t code, /*!< MySQL error code */ - const char* format, /*!< printf format */ - ...) /*!< Args */ - MY_ATTRIBUTE((format(printf, 4, 5))); - -/******************************************************************//** -Use this when the args are passed to the format string from -errmsg-utf8.txt directly as is. - -Push a warning message to the client, it is a wrapper around: - -void push_warning_printf( - THD *thd, Sql_condition::enum_warning_level level, - uint code, const char *format, ...); -*/ -UNIV_INTERN -void -ib_senderrf( -/*========*/ - THD* thd, /*!< in/out: session */ - ib_log_level_t level, /*!< in: warning level */ - ib_uint32_t code, /*!< MySQL error code */ - ...); /*!< Args */ - -/******************************************************************//** -Write a message to the MySQL log, prefixed with "InnoDB: ". -Wrapper around sql_print_information() */ -UNIV_INTERN -void -ib_logf( -/*====*/ - ib_log_level_t level, /*!< in: warning level */ - const char* format, /*!< printf format */ - ...) /*!< Args */ - MY_ATTRIBUTE((format(printf, 2, 3))); - -/******************************************************************//** -Returns the NUL terminated value of glob_hostname. -@return pointer to glob_hostname. */ -UNIV_INTERN -const char* -server_get_hostname(); -/*=================*/ - -/******************************************************************//** -Get the error message format string. -@return the format string or 0 if not found. */ -UNIV_INTERN -const char* -innobase_get_err_msg( -/*=================*/ - int error_code); /*!< in: MySQL error code */ - -/*********************************************************************//** -Compute the next autoinc value. - -For MySQL replication the autoincrement values can be partitioned among -the nodes. The offset is the start or origin of the autoincrement value -for a particular node. For n nodes the increment will be n and the offset -will be in the interval [1, n]. The formula tries to allocate the next -value for a particular node. - -Note: This function is also called with increment set to the number of -values we want to reserve for multi-value inserts e.g., - - INSERT INTO T VALUES(), (), (); - -innobase_next_autoinc() will be called with increment set to 3 where -autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for -the multi-value INSERT above. -@return the next value */ -UNIV_INTERN -ulonglong -innobase_next_autoinc( -/*==================*/ - ulonglong current, /*!< in: Current value */ - ulonglong need, /*!< in: count of values needed */ - ulonglong step, /*!< in: AUTOINC increment step */ - ulonglong offset, /*!< in: AUTOINC offset */ - ulonglong max_value) /*!< in: max value for type */ - MY_ATTRIBUTE((pure, warn_unused_result)); - -/********************************************************************//** -Get the upper limit of the MySQL integral and floating-point type. -@return maximum allowed value for the field */ -UNIV_INTERN -ulonglong -innobase_get_int_col_max_value( -/*===========================*/ - const Field* field) /*!< in: MySQL field */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -/********************************************************************** -Check if the length of the identifier exceeds the maximum allowed. -The input to this function is an identifier in charset my_charset_filename. -return true when length of identifier is too long. */ -UNIV_INTERN -my_bool -innobase_check_identifier_length( -/*=============================*/ - const char* id); /* in: identifier to check. it must belong - to charset my_charset_filename */ - -/********************************************************************** -Converts an identifier from my_charset_filename to UTF-8 charset. */ -uint -innobase_convert_to_system_charset( -/*===============================*/ - char* to, /* out: converted identifier */ - const char* from, /* in: identifier to convert */ - ulint len, /* in: length of 'to', in bytes */ - uint* errors); /* out: error return */ - -/********************************************************************** -Converts an identifier from my_charset_filename to UTF-8 charset. */ -uint -innobase_convert_to_filename_charset( -/*=================================*/ - char* to, /* out: converted identifier */ - const char* from, /* in: identifier to convert */ - ulint len); /* in: length of 'to', in bytes */ - -/********************************************************************//** -Helper function to push warnings from InnoDB internals to SQL-layer. */ -UNIV_INTERN -void -ib_push_warning( - trx_t* trx, /*!< in: trx */ - ulint error, /*!< in: error code to push as warning */ - const char *format,/*!< in: warning message */ - ...); -/********************************************************************//** -Helper function to push warnings from InnoDB internals to SQL-layer. */ -UNIV_INTERN -void -ib_push_warning( - void* ithd, /*!< in: thd */ - ulint error, /*!< in: error code to push as warning */ - const char *format,/*!< in: warning message */ - ...); - -/*****************************************************************//** -Normalizes a table name string. A normalized name consists of the -database name catenated to '/' and table name. An example: -test/mytable. On Windows normalization puts both the database name and the -table name always to lower case if "set_lower_case" is set to TRUE. */ -void -normalize_table_name_low( -/*=====================*/ - char* norm_name, /*!< out: normalized name as a - null-terminated string */ - const char* name, /*!< in: table name string */ - ibool set_lower_case); /*!< in: TRUE if we want to set - name to lower case */ -#endif /* HA_INNODB_PROTOTYPES_H */ diff --git a/storage/xtradb/include/handler0alter.h b/storage/xtradb/include/handler0alter.h deleted file mode 100644 index 3dd6c99eb6d..00000000000 --- a/storage/xtradb/include/handler0alter.h +++ /dev/null @@ -1,114 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/handler0alter.h -Smart ALTER TABLE -*******************************************************/ - -/*************************************************************//** -Copies an InnoDB record to table->record[0]. */ -UNIV_INTERN -void -innobase_rec_to_mysql( -/*==================*/ - struct TABLE* table, /*!< in/out: MySQL table */ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index, /*!< in: index */ - const ulint* offsets)/*!< in: rec_get_offsets( - rec, index, ...) */ - MY_ATTRIBUTE((nonnull)); - -/*************************************************************//** -Copies an InnoDB index entry to table->record[0]. */ -UNIV_INTERN -void -innobase_fields_to_mysql( -/*=====================*/ - struct TABLE* table, /*!< in/out: MySQL table */ - const dict_index_t* index, /*!< in: InnoDB index */ - const dfield_t* fields) /*!< in: InnoDB index fields */ - MY_ATTRIBUTE((nonnull)); - -/*************************************************************//** -Copies an InnoDB row to table->record[0]. */ -UNIV_INTERN -void -innobase_row_to_mysql( -/*==================*/ - struct TABLE* table, /*!< in/out: MySQL table */ - const dict_table_t* itab, /*!< in: InnoDB table */ - const dtuple_t* row) /*!< in: InnoDB row */ - MY_ATTRIBUTE((nonnull)); - -/*************************************************************//** -Resets table->record[0]. */ -UNIV_INTERN -void -innobase_rec_reset( -/*===============*/ - struct TABLE* table) /*!< in/out: MySQL table */ - MY_ATTRIBUTE((nonnull)); - -/** Generate the next autoinc based on a snapshot of the session -auto_increment_increment and auto_increment_offset variables. */ -struct ib_sequence_t { - - /** - @param thd - the session - @param start_value - the lower bound - @param max_value - the upper bound (inclusive) */ - ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value); - - /** - Postfix increment - @return the value to insert */ - ulonglong operator++(int) UNIV_NOTHROW; - - /** Check if the autoinc "sequence" is exhausted. - @return true if the sequence is exhausted */ - bool eof() const UNIV_NOTHROW - { - return(m_eof); - } - - /** - @return the next value in the sequence */ - ulonglong last() const UNIV_NOTHROW - { - ut_ad(m_next_value > 0); - - return(m_next_value); - } - - /** Maximum calumn value if adding an AUTOINC column else 0. Once - we reach the end of the sequence it will be set to ~0. */ - const ulonglong m_max_value; - - /** Value of auto_increment_increment */ - ulong m_increment; - - /** Value of auto_increment_offset */ - ulong m_offset; - - /** Next value in the sequence */ - ulonglong m_next_value; - - /** true if no more values left in the sequence */ - bool m_eof; -}; diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h deleted file mode 100644 index 68d3c6ace4e..00000000000 --- a/storage/xtradb/include/hash0hash.h +++ /dev/null @@ -1,603 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/hash0hash.h -The simple hash table utility - -Created 5/20/1997 Heikki Tuuri -*******************************************************/ - -#ifndef hash0hash_h -#define hash0hash_h - -#include "univ.i" -#include "mem0mem.h" -#ifndef UNIV_HOTBACKUP -# include "sync0sync.h" -# include "sync0rw.h" -#endif /* !UNIV_HOTBACKUP */ - -struct hash_table_t; -struct hash_cell_t; - -typedef void* hash_node_t; - -/* Fix Bug #13859: symbol collision between imap/mysql */ -#define hash_create hash0_create - -/* Differnt types of hash_table based on the synchronization -method used for it. */ -enum hash_table_sync_t { - HASH_TABLE_SYNC_NONE = 0, /*!< Don't use any internal - synchronization objects for - this hash_table. */ - HASH_TABLE_SYNC_MUTEX, /*!< Use mutexes to control - access to this hash_table. */ - HASH_TABLE_SYNC_RW_LOCK /*!< Use rw_locks to control - access to this hash_table. */ -}; - -/*************************************************************//** -Creates a hash table with >= n array cells. The actual number -of cells is chosen to be a prime number slightly bigger than n. -@return own: created table */ -UNIV_INTERN -hash_table_t* -hash_create( -/*========*/ - ulint n); /*!< in: number of array cells */ -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Creates a sync object array array to protect a hash table. -::sync_obj can be mutexes or rw_locks depening on the type of -hash table. */ -UNIV_INTERN -void -hash_create_sync_obj_func( -/*======================*/ - hash_table_t* table, /*!< in: hash table */ - enum hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX - or HASH_TABLE_SYNC_RW_LOCK */ -#ifdef UNIV_SYNC_DEBUG - ulint sync_level,/*!< in: latching order level - of the mutexes: used in the - debug version */ -#endif /* UNIV_SYNC_DEBUG */ - ulint n_sync_obj);/*!< in: number of sync objects, - must be a power of 2 */ -#ifdef UNIV_SYNC_DEBUG -# define hash_create_sync_obj(t, s, n, level) \ - hash_create_sync_obj_func(t, s, level, n) -#else /* UNIV_SYNC_DEBUG */ -# define hash_create_sync_obj(t, s, n, level) \ - hash_create_sync_obj_func(t, s, n) -#endif /* UNIV_SYNC_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ - -/*************************************************************//** -Frees a hash table. */ -UNIV_INTERN -void -hash_table_free( -/*============*/ - hash_table_t* table); /*!< in, own: hash table */ -/**************************************************************//** -Calculates the hash value from a folded value. -@return hashed value */ -UNIV_INLINE -ulint -hash_calc_hash( -/*===========*/ - ulint fold, /*!< in: folded value */ - hash_table_t* table); /*!< in: hash table */ -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Assert that the mutex for the table is held */ -# define HASH_ASSERT_OWN(TABLE, FOLD) \ - ut_ad((TABLE)->type != HASH_TABLE_SYNC_MUTEX \ - || (mutex_own(hash_get_mutex((TABLE), FOLD)))); -#else /* !UNIV_HOTBACKUP */ -# define HASH_ASSERT_OWN(TABLE, FOLD) -#endif /* !UNIV_HOTBACKUP */ - -/*******************************************************************//** -Inserts a struct to a hash table. */ - -#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\ -do {\ - hash_cell_t* cell3333;\ - TYPE* struct3333;\ -\ - HASH_ASSERT_OWN(TABLE, FOLD)\ -\ - (DATA)->NAME = NULL;\ -\ - cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\ -\ - if (cell3333->node == NULL) {\ - cell3333->node = DATA;\ - } else {\ - struct3333 = (TYPE*) cell3333->node;\ -\ - while (struct3333->NAME != NULL) {\ -\ - struct3333 = (TYPE*) struct3333->NAME;\ - }\ -\ - struct3333->NAME = DATA;\ - }\ -} while (0) - -#ifdef WITH_WSREP -/*******************************************************************//** -Inserts a struct to the head of hash table. */ - -#define HASH_PREPEND(TYPE, NAME, TABLE, FOLD, DATA) \ -do { \ - hash_cell_t* cell3333; \ - TYPE* struct3333; \ - \ - HASH_ASSERT_OWN(TABLE, FOLD) \ - \ - (DATA)->NAME = NULL; \ - \ - cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\ - \ - if (cell3333->node == NULL) { \ - cell3333->node = DATA; \ - DATA->NAME = NULL; \ - } else { \ - struct3333 = (TYPE*) cell3333->node; \ - \ - DATA->NAME = struct3333; \ - \ - cell3333->node = DATA; \ - } \ -} while (0) -#endif /*WITH_WSREP */ -#ifdef UNIV_HASH_DEBUG -# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1) -# define HASH_INVALIDATE(DATA, NAME) *(void**) (&DATA->NAME) = (void*) -1 -#else -# define HASH_ASSERT_VALID(DATA) do {} while (0) -# define HASH_INVALIDATE(DATA, NAME) do {} while (0) -#endif - -/*******************************************************************//** -Deletes a struct from a hash table. */ - -#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\ -do {\ - hash_cell_t* cell3333;\ - TYPE* struct3333;\ -\ - HASH_ASSERT_OWN(TABLE, FOLD)\ -\ - cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\ -\ - if (cell3333->node == DATA) {\ - HASH_ASSERT_VALID(DATA->NAME);\ - cell3333->node = DATA->NAME;\ - } else {\ - struct3333 = (TYPE*) cell3333->node;\ -\ - while (struct3333->NAME != DATA) {\ -\ - struct3333 = (TYPE*) struct3333->NAME;\ - ut_a(struct3333);\ - }\ -\ - struct3333->NAME = DATA->NAME;\ - }\ - HASH_INVALIDATE(DATA, NAME);\ -} while (0) - -/*******************************************************************//** -Gets the first struct in a hash chain, NULL if none. */ - -#define HASH_GET_FIRST(TABLE, HASH_VAL)\ - (hash_get_nth_cell(TABLE, HASH_VAL)->node) - -/*******************************************************************//** -Gets the next struct in a hash chain, NULL if none. */ - -#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME) - -/********************************************************************//** -Looks for a struct in a hash table. */ -#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\ -{\ -\ - HASH_ASSERT_OWN(TABLE, FOLD)\ -\ - (DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\ - HASH_ASSERT_VALID(DATA);\ -\ - while ((DATA) != NULL) {\ - ASSERTION;\ - if (TEST) {\ - break;\ - } else {\ - HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\ - (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\ - }\ - }\ -} - -/********************************************************************//** -Looks for an item in all hash buckets. */ -#define HASH_SEARCH_ALL(NAME, TABLE, TYPE, DATA, ASSERTION, TEST) \ -do { \ - ulint i3333; \ - \ - for (i3333 = (TABLE)->n_cells; i3333--; ) { \ - (DATA) = (TYPE) HASH_GET_FIRST(TABLE, i3333); \ - \ - while ((DATA) != NULL) { \ - HASH_ASSERT_VALID(DATA); \ - ASSERTION; \ - \ - if (TEST) { \ - break; \ - } \ - \ - (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA); \ - } \ - \ - if ((DATA) != NULL) { \ - break; \ - } \ - } \ -} while (0) - -/************************************************************//** -Gets the nth cell in a hash table. -@return pointer to cell */ -UNIV_INLINE -hash_cell_t* -hash_get_nth_cell( -/*==============*/ - hash_table_t* table, /*!< in: hash table */ - ulint n); /*!< in: cell index */ - -/*************************************************************//** -Clears a hash table so that all the cells become empty. */ -UNIV_INLINE -void -hash_table_clear( -/*=============*/ - hash_table_t* table); /*!< in/out: hash table */ - -/*************************************************************//** -Returns the number of cells in a hash table. -@return number of cells */ -UNIV_INLINE -ulint -hash_get_n_cells( -/*=============*/ - hash_table_t* table); /*!< in: table */ -/*******************************************************************//** -Deletes a struct which is stored in the heap of the hash table, and compacts -the heap. The fold value must be stored in the struct NODE in a field named -'fold'. */ - -#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\ -do {\ - TYPE* node111;\ - TYPE* top_node111;\ - hash_cell_t* cell111;\ - ulint fold111;\ -\ - fold111 = (NODE)->fold;\ -\ - HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\ -\ - top_node111 = (TYPE*) mem_heap_get_top(\ - hash_get_heap(TABLE, fold111),\ - sizeof(TYPE));\ -\ - /* If the node to remove is not the top node in the heap, compact the\ - heap of nodes by moving the top node in the place of NODE. */\ -\ - if (NODE != top_node111) {\ -\ - /* Copy the top node in place of NODE */\ -\ - *(NODE) = *top_node111;\ -\ - cell111 = hash_get_nth_cell(TABLE,\ - hash_calc_hash(top_node111->fold, TABLE));\ -\ - /* Look for the pointer to the top node, to update it */\ -\ - if (cell111->node == top_node111) {\ - /* The top node is the first in the chain */\ -\ - cell111->node = NODE;\ - } else {\ - /* We have to look for the predecessor of the top\ - node */\ - node111 = static_cast<TYPE*>(cell111->node);\ -\ - while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\ -\ - node111 = static_cast<TYPE*>(\ - HASH_GET_NEXT(NAME, node111));\ - }\ -\ - /* Now we have the predecessor node */\ -\ - node111->NAME = NODE;\ - }\ - }\ -\ - /* Free the space occupied by the top node */\ -\ - mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\ -} while (0) - -#ifndef UNIV_HOTBACKUP -/****************************************************************//** -Move all hash table entries from OLD_TABLE to NEW_TABLE. */ - -#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \ -do {\ - ulint i2222;\ - ulint cell_count2222;\ -\ - cell_count2222 = hash_get_n_cells(OLD_TABLE);\ -\ - for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ - NODE_TYPE* node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\ -\ - while (node2222) {\ - NODE_TYPE* next2222 = node2222->PTR_NAME;\ - ulint fold2222 = FOLD_FUNC(node2222);\ -\ - HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\ - fold2222, node2222);\ -\ - node2222 = next2222;\ - }\ - }\ -} while (0) - -/************************************************************//** -Gets the sync object index for a fold value in a hash table. -@return index */ -UNIV_INLINE -ulint -hash_get_sync_obj_index( -/*====================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -Gets the nth heap in a hash table. -@return mem heap */ -UNIV_INLINE -mem_heap_t* -hash_get_nth_heap( -/*==============*/ - hash_table_t* table, /*!< in: hash table */ - ulint i); /*!< in: index of the heap */ -/************************************************************//** -Gets the heap for a fold value in a hash table. -@return mem heap */ -UNIV_INLINE -mem_heap_t* -hash_get_heap( -/*==========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -Gets the nth mutex in a hash table. -@return mutex */ -UNIV_INLINE -ib_prio_mutex_t* -hash_get_nth_mutex( -/*===============*/ - hash_table_t* table, /*!< in: hash table */ - ulint i); /*!< in: index of the mutex */ -/************************************************************//** -Gets the nth rw_lock in a hash table. -@return rw_lock */ -UNIV_INLINE -prio_rw_lock_t* -hash_get_nth_lock( -/*==============*/ - hash_table_t* table, /*!< in: hash table */ - ulint i); /*!< in: index of the rw_lock */ -/************************************************************//** -Gets the mutex for a fold value in a hash table. -@return mutex */ -UNIV_INLINE -ib_prio_mutex_t* -hash_get_mutex( -/*===========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -Gets the rw_lock for a fold value in a hash table. -@return rw_lock */ -UNIV_INLINE -prio_rw_lock_t* -hash_get_lock( -/*==========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -Reserves the mutex for a fold value in a hash table. */ -UNIV_INTERN -void -hash_mutex_enter( -/*=============*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -Releases the mutex for a fold value in a hash table. */ -UNIV_INTERN -void -hash_mutex_exit( -/*============*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -Reserves all the mutexes of a hash table, in an ascending order. */ -UNIV_INTERN -void -hash_mutex_enter_all( -/*=================*/ - hash_table_t* table); /*!< in: hash table */ -/************************************************************//** -Releases all the mutexes of a hash table. */ -UNIV_INTERN -void -hash_mutex_exit_all( -/*================*/ - hash_table_t* table); /*!< in: hash table */ -/************************************************************//** -Releases all but the passed in mutex of a hash table. */ -UNIV_INTERN -void -hash_mutex_exit_all_but( -/*====================*/ - hash_table_t* table, /*!< in: hash table */ - ib_prio_mutex_t* keep_mutex); /*!< in: mutex to keep */ -/************************************************************//** -s-lock a lock for a fold value in a hash table. */ -UNIV_INTERN -void -hash_lock_s( -/*========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -x-lock a lock for a fold value in a hash table. */ -UNIV_INTERN -void -hash_lock_x( -/*========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -unlock an s-lock for a fold value in a hash table. */ -UNIV_INTERN -void -hash_unlock_s( -/*==========*/ - - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -unlock x-lock for a fold value in a hash table. */ -UNIV_INTERN -void -hash_unlock_x( -/*==========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold); /*!< in: fold */ -/************************************************************//** -Reserves all the locks of a hash table, in an ascending order. */ -UNIV_INTERN -void -hash_lock_x_all( -/*============*/ - hash_table_t* table); /*!< in: hash table */ -/************************************************************//** -Releases all the locks of a hash table, in an ascending order. */ -UNIV_INTERN -void -hash_unlock_x_all( -/*==============*/ - hash_table_t* table); /*!< in: hash table */ -/************************************************************//** -Releases all but passed in lock of a hash table, */ -UNIV_INTERN -void -hash_unlock_x_all_but( -/*==================*/ - hash_table_t* table, /*!< in: hash table */ - prio_rw_lock_t* keep_lock); /*!< in: lock to keep */ - -#else /* !UNIV_HOTBACKUP */ -# define hash_get_heap(table, fold) ((table)->heap) -# define hash_mutex_enter(table, fold) ((void) 0) -# define hash_mutex_exit(table, fold) ((void) 0) -# define hash_mutex_enter_all(table) ((void) 0) -# define hash_mutex_exit_all(table) ((void) 0) -# define hash_mutex_exit_all_but(t, m) ((void) 0) -# define hash_lock_s(t, f) ((void) 0) -# define hash_lock_x(t, f) ((void) 0) -# define hash_unlock_s(t, f) ((void) 0) -# define hash_unlock_x(t, f) ((void) 0) -# define hash_lock_x_all(t) ((void) 0) -# define hash_unlock_x_all(t) ((void) 0) -# define hash_unlock_x_all_but(t, l) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -struct hash_cell_t{ - void* node; /*!< hash chain node, NULL if none */ -}; - -/* The hash table structure */ -struct hash_table_t { - enum hash_table_sync_t type; /*<! type of hash_table. */ -#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG -# ifndef UNIV_HOTBACKUP - ibool adaptive;/* TRUE if this is the hash - table of the adaptive hash - index */ -# endif /* !UNIV_HOTBACKUP */ -#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - ulint n_cells;/* number of cells in the hash table */ - hash_cell_t* array; /*!< pointer to cell array */ -#ifndef UNIV_HOTBACKUP - ulint n_sync_obj;/* if sync_objs != NULL, then - the number of either the number - of mutexes or the number of - rw_locks depending on the type. - Must be a power of 2 */ - union { - ib_prio_mutex_t* mutexes; - /* NULL, or an array of mutexes - used to protect segments of the - hash table */ - prio_rw_lock_t* rw_locks;/* NULL, or an array of rw_lcoks - used to protect segments of the - hash table */ - } sync_obj; - - mem_heap_t** heaps; /*!< if this is non-NULL, hash - chain nodes for external chaining - can be allocated from these memory - heaps; there are then n_mutexes - many of these heaps */ -#endif /* !UNIV_HOTBACKUP */ - mem_heap_t* heap; -#ifdef UNIV_DEBUG - ulint magic_n; -# define HASH_TABLE_MAGIC_N 76561114 -#endif /* UNIV_DEBUG */ -}; - -#ifndef UNIV_NONINL -#include "hash0hash.ic" -#endif - -#endif diff --git a/storage/xtradb/include/hash0hash.ic b/storage/xtradb/include/hash0hash.ic deleted file mode 100644 index e4822538e19..00000000000 --- a/storage/xtradb/include/hash0hash.ic +++ /dev/null @@ -1,225 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/hash0hash.ic -The simple hash table utility - -Created 5/20/1997 Heikki Tuuri -*******************************************************/ - -#include "ut0rnd.h" - -/************************************************************//** -Gets the nth cell in a hash table. -@return pointer to cell */ -UNIV_INLINE -hash_cell_t* -hash_get_nth_cell( -/*==============*/ - hash_table_t* table, /*!< in: hash table */ - ulint n) /*!< in: cell index */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_ad(n < table->n_cells); - - return(table->array + n); -} - -/*************************************************************//** -Clears a hash table so that all the cells become empty. */ -UNIV_INLINE -void -hash_table_clear( -/*=============*/ - hash_table_t* table) /*!< in/out: hash table */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - memset(table->array, 0x0, - table->n_cells * sizeof(*table->array)); -} - -/*************************************************************//** -Returns the number of cells in a hash table. -@return number of cells */ -UNIV_INLINE -ulint -hash_get_n_cells( -/*=============*/ - hash_table_t* table) /*!< in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - return(table->n_cells); -} - -/**************************************************************//** -Calculates the hash value from a folded value. -@return hashed value */ -UNIV_INLINE -ulint -hash_calc_hash( -/*===========*/ - ulint fold, /*!< in: folded value */ - hash_table_t* table) /*!< in: hash table */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - return(ut_hash_ulint(fold, table->n_cells)); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Gets the sync object index for a fold value in a hash table. -@return index */ -UNIV_INLINE -ulint -hash_get_sync_obj_index( -/*====================*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_ad(table->type != HASH_TABLE_SYNC_NONE); - ut_ad(ut_is_2pow(table->n_sync_obj)); - return(ut_2pow_remainder(hash_calc_hash(fold, table), - table->n_sync_obj)); -} - -/************************************************************//** -Gets the nth heap in a hash table. -@return mem heap */ -UNIV_INLINE -mem_heap_t* -hash_get_nth_heap( -/*==============*/ - hash_table_t* table, /*!< in: hash table */ - ulint i) /*!< in: index of the heap */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_ad(table->type != HASH_TABLE_SYNC_NONE); - ut_ad(i < table->n_sync_obj); - - return(table->heaps[i]); -} - -/************************************************************//** -Gets the heap for a fold value in a hash table. -@return mem heap */ -UNIV_INLINE -mem_heap_t* -hash_get_heap( -/*==========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - ulint i; - - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - - if (table->heap) { - return(table->heap); - } - - i = hash_get_sync_obj_index(table, fold); - - return(hash_get_nth_heap(table, i)); -} - -/************************************************************//** -Gets the nth mutex in a hash table. -@return mutex */ -UNIV_INLINE -ib_prio_mutex_t* -hash_get_nth_mutex( -/*===============*/ - hash_table_t* table, /*!< in: hash table */ - ulint i) /*!< in: index of the mutex */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_ad(table->type == HASH_TABLE_SYNC_MUTEX); - ut_ad(i < table->n_sync_obj); - - return(table->sync_obj.mutexes + i); -} - -/************************************************************//** -Gets the mutex for a fold value in a hash table. -@return mutex */ -UNIV_INLINE -ib_prio_mutex_t* -hash_get_mutex( -/*===========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - ulint i; - - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - - i = hash_get_sync_obj_index(table, fold); - - return(hash_get_nth_mutex(table, i)); -} - -/************************************************************//** -Gets the nth rw_lock in a hash table. -@return rw_lock */ -UNIV_INLINE -prio_rw_lock_t* -hash_get_nth_lock( -/*==============*/ - hash_table_t* table, /*!< in: hash table */ - ulint i) /*!< in: index of the rw_lock */ -{ - ut_ad(table); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - ut_ad(i < table->n_sync_obj); - - return(table->sync_obj.rw_locks + i); -} - -/************************************************************//** -Gets the rw_lock for a fold value in a hash table. -@return rw_lock */ -UNIV_INLINE -prio_rw_lock_t* -hash_get_lock( -/*==========*/ - hash_table_t* table, /*!< in: hash table */ - ulint fold) /*!< in: fold */ -{ - ulint i; - - ut_ad(table); - ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); - ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - - i = hash_get_sync_obj_index(table, fold); - - return(hash_get_nth_lock(table, i)); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/ibuf0ibuf.h b/storage/xtradb/include/ibuf0ibuf.h deleted file mode 100644 index 0b325b68a84..00000000000 --- a/storage/xtradb/include/ibuf0ibuf.h +++ /dev/null @@ -1,493 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/ibuf0ibuf.h -Insert buffer - -Created 7/19/1997 Heikki Tuuri -*******************************************************/ - -#ifndef ibuf0ibuf_h -#define ibuf0ibuf_h - -#include "univ.i" - -#include "mtr0mtr.h" -#include "dict0mem.h" -#include "fsp0fsp.h" - -#ifndef UNIV_HOTBACKUP -# include "ibuf0types.h" - -/** Default value for maximum on-disk size of change buffer in terms -of percentage of the buffer pool. */ -#define CHANGE_BUFFER_DEFAULT_SIZE (25) - -/* Possible operations buffered in the insert/whatever buffer. See -ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */ -typedef enum { - IBUF_OP_INSERT = 0, - IBUF_OP_DELETE_MARK = 1, - IBUF_OP_DELETE = 2, - - /* Number of different operation types. */ - IBUF_OP_COUNT = 3 -} ibuf_op_t; - -/** Combinations of operations that can be buffered. Because the enum -values are used for indexing innobase_change_buffering_values[], they -should start at 0 and there should not be any gaps. */ -typedef enum { - IBUF_USE_NONE = 0, - IBUF_USE_INSERT, /* insert */ - IBUF_USE_DELETE_MARK, /* delete */ - IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */ - IBUF_USE_DELETE, /* delete+purge */ - IBUF_USE_ALL, /* insert+delete+purge */ - - IBUF_USE_COUNT /* number of entries in ibuf_use_t */ -} ibuf_use_t; - -/** Operations that can currently be buffered. */ -extern ibuf_use_t ibuf_use; - -/** The insert buffer control structure */ -extern ibuf_t* ibuf; - -/* The purpose of the insert buffer is to reduce random disk access. -When we wish to insert a record into a non-unique secondary index and -the B-tree leaf page where the record belongs to is not in the buffer -pool, we insert the record into the insert buffer B-tree, indexed by -(space_id, page_no). When the page is eventually read into the buffer -pool, we look up the insert buffer B-tree for any modifications to the -page, and apply these upon the completion of the read operation. This -is called the insert buffer merge. */ - -/* The insert buffer merge must always succeed. To guarantee this, -the insert buffer subsystem keeps track of the free space in pages for -which it can buffer operations. Two bits per page in the insert -buffer bitmap indicate the available space in coarse increments. The -free bits in the insert buffer bitmap must never exceed the free space -on a page. It is safe to decrement or reset the bits in the bitmap in -a mini-transaction that is committed before the mini-transaction that -affects the free space. It is unsafe to increment the bits in a -separately committed mini-transaction, because in crash recovery, the -free bits could momentarily be set too high. */ - -/******************************************************************//** -Creates the insert buffer data structure at a database startup. -@return DB_SUCCESS or failure */ -UNIV_INTERN -dberr_t -ibuf_init_at_db_start(void); -/*=======================*/ -/*********************************************************************//** -Updates the max_size value for ibuf. */ -UNIV_INTERN -void -ibuf_max_size_update( -/*=================*/ - ulint new_val); /*!< in: new value in terms of - percentage of the buffer pool size */ -/*********************************************************************//** -Reads the biggest tablespace id from the high end of the insert buffer -tree and updates the counter in fil_system. */ -UNIV_INTERN -void -ibuf_update_max_tablespace_id(void); -/*===============================*/ -/***************************************************************//** -Starts an insert buffer mini-transaction. */ -UNIV_INLINE -void -ibuf_mtr_start( -/*===========*/ - mtr_t* mtr) /*!< out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -/***************************************************************//** -Commits an insert buffer mini-transaction. */ -UNIV_INLINE -void -ibuf_mtr_commit( -/*============*/ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Initializes an ibuf bitmap page. */ -UNIV_INTERN -void -ibuf_bitmap_page_init( -/*==================*/ - buf_block_t* block, /*!< in: bitmap page */ - mtr_t* mtr); /*!< in: mtr */ -/************************************************************************//** -Resets the free bits of the page in the ibuf bitmap. This is done in a -separate mini-transaction, hence this operation does not restrict -further work to only ibuf bitmap operations, which would result if the -latch to the bitmap page were kept. NOTE: The free bits in the insert -buffer bitmap must never exceed the free space on a page. It is safe -to decrement or reset the bits in the bitmap in a mini-transaction -that is committed before the mini-transaction that affects the free -space. */ -UNIV_INTERN -void -ibuf_reset_free_bits( -/*=================*/ - buf_block_t* block); /*!< in: index page; free bits are set to 0 - if the index is a non-clustered - non-unique, and page level is 0 */ -/************************************************************************//** -Updates the free bits of an uncompressed page in the ibuf bitmap if -there is not enough free on the page any more. This is done in a -separate mini-transaction, hence this operation does not restrict -further work to only ibuf bitmap operations, which would result if the -latch to the bitmap page were kept. NOTE: The free bits in the insert -buffer bitmap must never exceed the free space on a page. It is -unsafe to increment the bits in a separately committed -mini-transaction, because in crash recovery, the free bits could -momentarily be set too high. It is only safe to use this function for -decrementing the free bits. Should more free space become available, -we must not update the free bits here, because that would break crash -recovery. */ -UNIV_INLINE -void -ibuf_update_free_bits_if_full( -/*==========================*/ - buf_block_t* block, /*!< in: index page to which we have added new - records; the free bits are updated if the - index is non-clustered and non-unique and - the page level is 0, and the page becomes - fuller */ - ulint max_ins_size,/*!< in: value of maximum insert size with - reorganize before the latest operation - performed to the page */ - ulint increase);/*!< in: upper limit for the additional space - used in the latest operation, if known, or - ULINT_UNDEFINED */ -/**********************************************************************//** -Updates the free bits for an uncompressed page to reflect the present -state. Does this in the mtr given, which means that the latching -order rules virtually prevent any further operations for this OS -thread until mtr is committed. NOTE: The free bits in the insert -buffer bitmap must never exceed the free space on a page. It is safe -to set the free bits in the same mini-transaction that updated the -page. */ -UNIV_INTERN -void -ibuf_update_free_bits_low( -/*======================*/ - const buf_block_t* block, /*!< in: index page */ - ulint max_ins_size, /*!< in: value of - maximum insert size - with reorganize before - the latest operation - performed to the page */ - mtr_t* mtr); /*!< in/out: mtr */ -/**********************************************************************//** -Updates the free bits for a compressed page to reflect the present -state. Does this in the mtr given, which means that the latching -order rules virtually prevent any further operations for this OS -thread until mtr is committed. NOTE: The free bits in the insert -buffer bitmap must never exceed the free space on a page. It is safe -to set the free bits in the same mini-transaction that updated the -page. */ -UNIV_INTERN -void -ibuf_update_free_bits_zip( -/*======================*/ - buf_block_t* block, /*!< in/out: index page */ - mtr_t* mtr); /*!< in/out: mtr */ -/**********************************************************************//** -Updates the free bits for the two pages to reflect the present state. -Does this in the mtr given, which means that the latching order rules -virtually prevent any further operations until mtr is committed. -NOTE: The free bits in the insert buffer bitmap must never exceed the -free space on a page. It is safe to set the free bits in the same -mini-transaction that updated the pages. */ -UNIV_INTERN -void -ibuf_update_free_bits_for_two_pages_low( -/*====================================*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - buf_block_t* block1, /*!< in: index page */ - buf_block_t* block2, /*!< in: index page */ - mtr_t* mtr); /*!< in: mtr */ -/**********************************************************************//** -A basic partial test if an insert to the insert buffer could be possible and -recommended. */ -UNIV_INLINE -ibool -ibuf_should_try( -/*============*/ - dict_index_t* index, /*!< in: index where to insert */ - ulint ignore_sec_unique); /*!< in: if != 0, we should - ignore UNIQUE constraint on - a secondary index when we - decide */ -/******************************************************************//** -Returns TRUE if the current OS thread is performing an insert buffer -routine. - -For instance, a read-ahead of non-ibuf pages is forbidden by threads -that are executing an insert buffer routine. -@return TRUE if inside an insert buffer routine */ -UNIV_INLINE -ibool -ibuf_inside( -/*========*/ - const mtr_t* mtr) /*!< in: mini-transaction */ - MY_ATTRIBUTE((nonnull, pure)); -/***********************************************************************//** -Checks if a page address is an ibuf bitmap page (level 3 page) address. -@return TRUE if a bitmap page */ -UNIV_INLINE -ibool -ibuf_bitmap_page( -/*=============*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint page_no);/*!< in: page number */ -/***********************************************************************//** -Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. -Must not be called when recv_no_ibuf_operations==TRUE. -@return TRUE if level 2 or level 3 page */ -UNIV_INTERN -ibool -ibuf_page_low( -/*==========*/ - ulint space, /*!< in: space id */ - ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint page_no,/*!< in: page number */ -#ifdef UNIV_DEBUG - ibool x_latch,/*!< in: FALSE if relaxed check - (avoid latching the bitmap page) */ -#endif /* UNIV_DEBUG */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line where called */ - mtr_t* mtr) /*!< in: mtr which will contain an - x-latch to the bitmap page if the page - is not one of the fixed address ibuf - pages, or NULL, in which case a new - transaction is created. */ - MY_ATTRIBUTE((warn_unused_result)); -#ifdef UNIV_DEBUG -/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of -pages. Must not be called when recv_no_ibuf_operations==TRUE. -@param space tablespace identifier -@param zip_size compressed page size in bytes, or 0 -@param page_no page number -@param mtr mini-transaction or NULL -@return TRUE if level 2 or level 3 page */ -# define ibuf_page(space, zip_size, page_no, mtr) \ - ibuf_page_low(space, zip_size, page_no, TRUE, __FILE__, __LINE__, mtr) -#else /* UVIV_DEBUG */ -/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of -pages. Must not be called when recv_no_ibuf_operations==TRUE. -@param space tablespace identifier -@param zip_size compressed page size in bytes, or 0 -@param page_no page number -@param mtr mini-transaction or NULL -@return TRUE if level 2 or level 3 page */ -# define ibuf_page(space, zip_size, page_no, mtr) \ - ibuf_page_low(space, zip_size, page_no, __FILE__, __LINE__, mtr) -#endif /* UVIV_DEBUG */ -/***********************************************************************//** -Frees excess pages from the ibuf free list. This function is called when an OS -thread calls fsp services to allocate a new file segment, or a new page to a -file segment, and the thread did not own the fsp latch before this call. */ -UNIV_INTERN -void -ibuf_free_excess_pages(void); -/*========================*/ -/*********************************************************************//** -Buffer an operation in the insert/delete buffer, instead of doing it -directly to the disk page, if this is possible. Does not do it if the index -is clustered or unique. -@return TRUE if success */ -UNIV_INTERN -ibool -ibuf_insert( -/*========*/ - ibuf_op_t op, /*!< in: operation type */ - const dtuple_t* entry, /*!< in: index entry to insert */ - dict_index_t* index, /*!< in: index where to insert */ - ulint space, /*!< in: space id where to insert */ - ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint page_no,/*!< in: page number where to insert */ - que_thr_t* thr); /*!< in: query thread */ -/*********************************************************************//** -When an index page is read from a disk to the buffer pool, this function -applies any buffered operations to the page and deletes the entries from the -insert buffer. If the page is not read, but created in the buffer pool, this -function deletes its buffered entries from the insert buffer; there can -exist entries for such a page if the page belonged to an index which -subsequently was dropped. */ -UNIV_INTERN -void -ibuf_merge_or_delete_for_page( -/*==========================*/ - buf_block_t* block, /*!< in: if page has been read from - disk, pointer to the page x-latched, - else NULL */ - ulint space, /*!< in: space id of the index page */ - ulint page_no,/*!< in: page number of the index page */ - ulint zip_size,/*!< in: compressed page size in bytes, - or 0 */ - ibool update_ibuf_bitmap);/*!< in: normally this is set - to TRUE, but if we have deleted or are - deleting the tablespace, then we - naturally do not want to update a - non-existent bitmap page */ -/*********************************************************************//** -Deletes all entries in the insert buffer for a given space id. This is used -in DISCARD TABLESPACE and IMPORT TABLESPACE. -NOTE: this does not update the page free bitmaps in the space. The space will -become CORRUPT when you call this function! */ -UNIV_INTERN -void -ibuf_delete_for_discarded_space( -/*============================*/ - ulint space); /*!< in: space id */ -/** Contract the change buffer by reading pages to the buffer pool. -@param[in] full If true, do a full contraction based -on PCT_IO(100). If false, the size of contract batch is determined -based on the current size of the change buffer. -@return a lower limit for the combined size in bytes of entries which -will be merged from ibuf trees to the pages read, 0 if ibuf is -empty */ -UNIV_INTERN -ulint -ibuf_merge_in_background( - bool full); /*!< in: TRUE if the caller wants to - do a full contract based on PCT_IO(100). - If FALSE then the size of contract - batch is determined based on the - current size of the ibuf tree. */ - -/** Contracts insert buffer trees by reading pages referring to space_id -to the buffer pool. -@returns number of pages merged.*/ -UNIV_INTERN -ulint -ibuf_merge_space( -/*=============*/ - ulint space); /*!< in: space id */ - -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************************//** -Parses a redo log record of an ibuf bitmap page init. -@return end of log record or NULL */ -UNIV_INTERN -byte* -ibuf_parse_bitmap_init( -/*===================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in: block or NULL */ - mtr_t* mtr); /*!< in: mtr or NULL */ -#ifndef UNIV_HOTBACKUP -#ifdef UNIV_IBUF_COUNT_DEBUG -/******************************************************************//** -Gets the ibuf count for a given page. -@return number of entries in the insert buffer currently buffered for -this page */ -UNIV_INTERN -ulint -ibuf_count_get( -/*===========*/ - ulint space, /*!< in: space id */ - ulint page_no);/*!< in: page number */ -#endif -/******************************************************************//** -Looks if the insert buffer is empty. -@return true if empty */ -UNIV_INTERN -bool -ibuf_is_empty(void); -/*===============*/ -/******************************************************************//** -Prints info of ibuf. */ -UNIV_INTERN -void -ibuf_print( -/*=======*/ - FILE* file); /*!< in: file where to print */ -/******************************************************************** -Read the first two bytes from a record's fourth field (counter field in new -records; something else in older records). -@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */ -UNIV_INTERN -ulint -ibuf_rec_get_counter( -/*=================*/ - const rec_t* rec); /*!< in: ibuf record */ -/******************************************************************//** -Closes insert buffer and frees the data structures. */ -UNIV_INTERN -void -ibuf_close(void); -/*============*/ -/******************************************************************//** -Function to pass ibuf status variables */ -UNIV_INTERN -void -ibuf_export_ibuf_status( -/*====================*/ - ulint* size, - ulint* free_list, - ulint* segment_size, - ulint* merges, - ulint* merged_inserts, - ulint* merged_delete_marks, - ulint* merged_deletes, - ulint* discarded_inserts, - ulint* discarded_delete_marks, - ulint* discarded_deletes); - -/******************************************************************//** -Checks the insert buffer bitmaps on IMPORT TABLESPACE. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -ibuf_check_bitmap_on_import( -/*========================*/ - const trx_t* trx, /*!< in: transaction */ - ulint space_id) /*!< in: tablespace identifier */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO -#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO - -#endif /* !UNIV_HOTBACKUP */ - -/* The ibuf header page currently contains only the file segment header -for the file segment from which the pages for the ibuf tree are allocated */ -#define IBUF_HEADER PAGE_DATA -#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */ - -/* The insert buffer tree itself is always located in space 0. */ -#define IBUF_SPACE_ID 0 - -#ifndef UNIV_NONINL -#include "ibuf0ibuf.ic" -#endif - -#endif diff --git a/storage/xtradb/include/ibuf0ibuf.ic b/storage/xtradb/include/ibuf0ibuf.ic deleted file mode 100644 index a5df9f7b6b4..00000000000 --- a/storage/xtradb/include/ibuf0ibuf.ic +++ /dev/null @@ -1,368 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/ibuf0ibuf.ic -Insert buffer - -Created 7/19/1997 Heikki Tuuri -*******************************************************/ - -#include "page0page.h" -#include "page0zip.h" -#ifndef UNIV_HOTBACKUP -#include "buf0lru.h" - -/** An index page must contain at least UNIV_PAGE_SIZE / -IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to -buffer inserts to this page. If there is this much of free space, the -corresponding bits are set in the ibuf bitmap. */ -#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32 - -/***************************************************************//** -Starts an insert buffer mini-transaction. */ -UNIV_INLINE -void -ibuf_mtr_start( -/*===========*/ - mtr_t* mtr) /*!< out: mini-transaction */ -{ - mtr_start(mtr); - mtr->inside_ibuf = TRUE; -} -/***************************************************************//** -Commits an insert buffer mini-transaction. */ -UNIV_INLINE -void -ibuf_mtr_commit( -/*============*/ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(mtr->inside_ibuf); - ut_d(mtr->inside_ibuf = FALSE); - mtr_commit(mtr); -} - -/** Insert buffer struct */ -struct ibuf_t{ - ulint size; /*!< current size of the ibuf index - tree, in pages */ - ulint max_size; /*!< recommended maximum size of the - ibuf index tree, in pages */ - ulint seg_size; /*!< allocated pages of the file - segment containing ibuf header and - tree */ - bool empty; /*!< Protected by the page - latch of the root page of the - insert buffer tree - (FSP_IBUF_TREE_ROOT_PAGE_NO). true - if and only if the insert - buffer tree is empty. */ - ulint free_list_len; /*!< length of the free list */ - ulint height; /*!< tree height */ - dict_index_t* index; /*!< insert buffer index */ - - ulint n_merges; /*!< number of pages merged */ - ulint n_merged_ops[IBUF_OP_COUNT]; - /*!< number of operations of each type - merged to index pages */ - ulint n_discarded_ops[IBUF_OP_COUNT]; - /*!< number of operations of each type - discarded without merging due to the - tablespace being deleted or the - index being dropped */ -}; - -/************************************************************************//** -Sets the free bit of the page in the ibuf bitmap. This is done in a separate -mini-transaction, hence this operation does not restrict further work to only -ibuf bitmap operations, which would result if the latch to the bitmap page -were kept. */ -UNIV_INTERN -void -ibuf_set_free_bits_func( -/*====================*/ - buf_block_t* block, /*!< in: index page of a non-clustered index; - free bit is reset if page level is 0 */ -#ifdef UNIV_IBUF_DEBUG - ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum - value which the bits must have before - setting; this is for debugging */ -#endif /* UNIV_IBUF_DEBUG */ - ulint val); /*!< in: value to set: < 4 */ -#ifdef UNIV_IBUF_DEBUG -# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v) -#else /* UNIV_IBUF_DEBUG */ -# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v) -#endif /* UNIV_IBUF_DEBUG */ - -/**********************************************************************//** -A basic partial test if an insert to the insert buffer could be possible and -recommended. */ -UNIV_INLINE -ibool -ibuf_should_try( -/*============*/ - dict_index_t* index, /*!< in: index where to insert */ - ulint ignore_sec_unique) /*!< in: if != 0, we should - ignore UNIQUE constraint on - a secondary index when we - decide */ -{ - return(ibuf_use != IBUF_USE_NONE - && ibuf->max_size != 0 - && !dict_index_is_clust(index) - && index->table->quiesce == QUIESCE_NONE - && (ignore_sec_unique || !dict_index_is_unique(index)) - && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE); -} - -/******************************************************************//** -Returns TRUE if the current OS thread is performing an insert buffer -routine. - -For instance, a read-ahead of non-ibuf pages is forbidden by threads -that are executing an insert buffer routine. -@return TRUE if inside an insert buffer routine */ -UNIV_INLINE -ibool -ibuf_inside( -/*========*/ - const mtr_t* mtr) /*!< in: mini-transaction */ -{ - return(mtr->inside_ibuf); -} - -/***********************************************************************//** -Checks if a page address is an ibuf bitmap page address. -@return TRUE if a bitmap page */ -UNIV_INLINE -ibool -ibuf_bitmap_page( -/*=============*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint page_no)/*!< in: page number */ -{ - ut_ad(ut_is_2pow(zip_size)); - - if (!zip_size) { - return((page_no & (UNIV_PAGE_SIZE - 1)) - == FSP_IBUF_BITMAP_OFFSET); - } - - return((page_no & (zip_size - 1)) == FSP_IBUF_BITMAP_OFFSET); -} - -/*********************************************************************//** -Translates the free space on a page to a value in the ibuf bitmap. -@return value for ibuf bitmap bits */ -UNIV_INLINE -ulint -ibuf_index_page_calc_free_bits( -/*===========================*/ - ulint zip_size, /*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint max_ins_size) /*!< in: maximum insert size after reorganize - for the page */ -{ - ulint n; - ut_ad(ut_is_2pow(zip_size)); - ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE); - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - - if (zip_size) { - n = max_ins_size - / (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE); - } else { - n = max_ins_size - / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); - } - - if (n == 3) { - n = 2; - } - - if (n > 3) { - n = 3; - } - - return(n); -} - -/*********************************************************************//** -Translates the ibuf free bits to the free space on a page in bytes. -@return maximum insert size after reorganize for the page */ -UNIV_INLINE -ulint -ibuf_index_page_calc_free_from_bits( -/*================================*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - ulint bits) /*!< in: value for ibuf bitmap bits */ -{ - ut_ad(bits < 4); - ut_ad(ut_is_2pow(zip_size)); - ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE); - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - - if (zip_size) { - if (bits == 3) { - return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE); - } - - return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE); - } - - if (bits == 3) { - return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); - } - - return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE)); -} - -/*********************************************************************//** -Translates the free space on a compressed page to a value in the ibuf bitmap. -@return value for ibuf bitmap bits */ -UNIV_INLINE -ulint -ibuf_index_page_calc_free_zip( -/*==========================*/ - ulint zip_size, - /*!< in: compressed page size in bytes */ - const buf_block_t* block) /*!< in: buffer block */ -{ - ulint max_ins_size; - const page_zip_des_t* page_zip; - lint zip_max_ins; - - ut_ad(zip_size == buf_block_get_zip_size(block)); - ut_ad(zip_size); - - /* Consider the maximum insert size on the uncompressed page - without reorganizing the page. We must not assume anything - about the compression ratio. If zip_max_ins > max_ins_size and - there is 1/4 garbage on the page, recompression after the - reorganize could fail, in theory. So, let us guarantee that - merging a buffered insert to a compressed page will always - succeed without reorganizing or recompressing the page, just - by using the page modification log. */ - max_ins_size = page_get_max_insert_size( - buf_block_get_frame(block), 1); - - page_zip = buf_block_get_page_zip(block); - zip_max_ins = page_zip_max_ins_size(page_zip, - FALSE/* not clustered */); - - if (zip_max_ins < 0) { - return(0); - } else if (max_ins_size > (ulint) zip_max_ins) { - max_ins_size = (ulint) zip_max_ins; - } - - return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size)); -} - -/*********************************************************************//** -Translates the free space on a page to a value in the ibuf bitmap. -@return value for ibuf bitmap bits */ -UNIV_INLINE -ulint -ibuf_index_page_calc_free( -/*======================*/ - ulint zip_size,/*!< in: compressed page size in bytes; - 0 for uncompressed pages */ - const buf_block_t* block) /*!< in: buffer block */ -{ - ut_ad(zip_size == buf_block_get_zip_size(block)); - - if (!zip_size) { - ulint max_ins_size; - - max_ins_size = page_get_max_insert_size_after_reorganize( - buf_block_get_frame(block), 1); - - return(ibuf_index_page_calc_free_bits(0, max_ins_size)); - } else { - return(ibuf_index_page_calc_free_zip(zip_size, block)); - } -} - -/************************************************************************//** -Updates the free bits of an uncompressed page in the ibuf bitmap if -there is not enough free on the page any more. This is done in a -separate mini-transaction, hence this operation does not restrict -further work to only ibuf bitmap operations, which would result if the -latch to the bitmap page were kept. NOTE: The free bits in the insert -buffer bitmap must never exceed the free space on a page. It is -unsafe to increment the bits in a separately committed -mini-transaction, because in crash recovery, the free bits could -momentarily be set too high. It is only safe to use this function for -decrementing the free bits. Should more free space become available, -we must not update the free bits here, because that would break crash -recovery. */ -UNIV_INLINE -void -ibuf_update_free_bits_if_full( -/*==========================*/ - buf_block_t* block, /*!< in: index page to which we have added new - records; the free bits are updated if the - index is non-clustered and non-unique and - the page level is 0, and the page becomes - fuller */ - ulint max_ins_size,/*!< in: value of maximum insert size with - reorganize before the latest operation - performed to the page */ - ulint increase)/*!< in: upper limit for the additional space - used in the latest operation, if known, or - ULINT_UNDEFINED */ -{ - ulint before; - ulint after; - - ut_ad(!buf_block_get_page_zip(block)); - - before = ibuf_index_page_calc_free_bits(0, max_ins_size); - - if (max_ins_size >= increase) { -#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX -# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX" -#endif - after = ibuf_index_page_calc_free_bits(0, max_ins_size - - increase); -#ifdef UNIV_IBUF_DEBUG - ut_a(after <= ibuf_index_page_calc_free(0, block)); -#endif - } else { - after = ibuf_index_page_calc_free(0, block); - } - - if (after == 0) { - /* We move the page to the front of the buffer pool LRU list: - the purpose of this is to prevent those pages to which we - cannot make inserts using the insert buffer from slipping - out of the buffer pool */ - - buf_page_make_young(&block->page); - } - - if (before > after) { - ibuf_set_free_bits(block, after, before); - } -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/ibuf0types.h b/storage/xtradb/include/ibuf0types.h deleted file mode 100644 index 3fdbf078b0b..00000000000 --- a/storage/xtradb/include/ibuf0types.h +++ /dev/null @@ -1,31 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/ibuf0types.h -Insert buffer global types - -Created 7/29/1997 Heikki Tuuri -*******************************************************/ - -#ifndef ibuf0types_h -#define ibuf0types_h - -struct ibuf_t; - -#endif diff --git a/storage/xtradb/include/lock0iter.h b/storage/xtradb/include/lock0iter.h deleted file mode 100644 index 0054850b526..00000000000 --- a/storage/xtradb/include/lock0iter.h +++ /dev/null @@ -1,69 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/lock0iter.h -Lock queue iterator type and function prototypes. - -Created July 16, 2007 Vasil Dimov -*******************************************************/ - -#ifndef lock0iter_h -#define lock0iter_h - -#include "univ.i" -#include "lock0types.h" - -struct lock_queue_iterator_t { - const lock_t* current_lock; - /* In case this is a record lock queue (not table lock queue) - then bit_no is the record number within the heap in which the - record is stored. */ - ulint bit_no; -}; - -/*******************************************************************//** -Initialize lock queue iterator so that it starts to iterate from -"lock". bit_no specifies the record number within the heap where the -record is stored. It can be undefined (ULINT_UNDEFINED) in two cases: -1. If the lock is a table lock, thus we have a table lock queue; -2. If the lock is a record lock and it is a wait lock. In this case - bit_no is calculated in this function by using - lock_rec_find_set_bit(). There is exactly one bit set in the bitmap - of a wait lock. */ -UNIV_INTERN -void -lock_queue_iterator_reset( -/*======================*/ - lock_queue_iterator_t* iter, /*!< out: iterator */ - const lock_t* lock, /*!< in: lock to start from */ - ulint bit_no);/*!< in: record number in the - heap */ - -/*******************************************************************//** -Gets the previous lock in the lock queue, returns NULL if there are no -more locks (i.e. the current lock is the first one). The iterator is -receded (if not-NULL is returned). -@return previous lock or NULL */ - -const lock_t* -lock_queue_iterator_get_prev( -/*=========================*/ - lock_queue_iterator_t* iter); /*!< in/out: iterator */ - -#endif /* lock0iter_h */ diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h deleted file mode 100644 index 923c463aa22..00000000000 --- a/storage/xtradb/include/lock0lock.h +++ /dev/null @@ -1,1036 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/lock0lock.h -The transaction lock system - -Created 5/7/1996 Heikki Tuuri -*******************************************************/ - -#ifndef lock0lock_h -#define lock0lock_h - -#include "univ.i" -#include "buf0types.h" -#include "trx0types.h" -#include "mtr0types.h" -#include "rem0types.h" -#include "dict0types.h" -#include "que0types.h" -#include "lock0types.h" -#include "read0types.h" -#include "hash0hash.h" -#include "srv0srv.h" -#include "ut0vec.h" - -#include <string> - -#ifdef UNIV_DEBUG -extern ibool lock_print_waits; -#endif /* UNIV_DEBUG */ - -/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by - setting innodb_lock_schedule_algorithm. */ -enum innodb_lock_schedule_algorithm_t { - INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, /*!< First Come First Served */ - INNODB_LOCK_SCHEDULE_ALGORITHM_VATS /*!< Variance-Aware-Transaction-Scheduling */ -}; - -extern ulong innodb_lock_schedule_algorithm; - -extern ulint srv_n_lock_deadlock_count; - -/*********************************************************************//** -Gets the size of a lock struct. -@return size in bytes */ -UNIV_INTERN -ulint -lock_get_size(void); -/*===============*/ -/*********************************************************************//** -Creates the lock system at database start. */ -UNIV_INTERN -void -lock_sys_create( -/*============*/ - ulint n_cells); /*!< in: number of slots in lock hash table */ -/*********************************************************************//** -Closes the lock system at database shutdown. */ -UNIV_INTERN -void -lock_sys_close(void); -/*================*/ -/*********************************************************************//** -Gets the heap_no of the smallest user record on a page. -@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */ -UNIV_INLINE -ulint -lock_get_min_heap_no( -/*=================*/ - const buf_block_t* block); /*!< in: buffer block */ -/*************************************************************//** -Updates the lock table when we have reorganized a page. NOTE: we copy -also the locks set on the infimum of the page; the infimum may carry -locks if an update of a record is occurring on the page, and its locks -were temporarily stored on the infimum. */ -UNIV_INTERN -void -lock_move_reorganize_page( -/*======================*/ - const buf_block_t* block, /*!< in: old index page, now - reorganized */ - const buf_block_t* oblock);/*!< in: copy of the old, not - reorganized page */ -/*************************************************************//** -Moves the explicit locks on user records to another page if a record -list end is moved to another page. */ -UNIV_INTERN -void -lock_move_rec_list_end( -/*===================*/ - const buf_block_t* new_block, /*!< in: index page to move to */ - const buf_block_t* block, /*!< in: index page */ - const rec_t* rec); /*!< in: record on page: this - is the first record moved */ -/*************************************************************//** -Moves the explicit locks on user records to another page if a record -list start is moved to another page. */ -UNIV_INTERN -void -lock_move_rec_list_start( -/*=====================*/ - const buf_block_t* new_block, /*!< in: index page to move to */ - const buf_block_t* block, /*!< in: index page */ - const rec_t* rec, /*!< in: record on page: - this is the first - record NOT copied */ - const rec_t* old_end); /*!< in: old - previous-to-last - record on new_page - before the records - were copied */ -/*************************************************************//** -Updates the lock table when a page is split to the right. */ -UNIV_INTERN -void -lock_update_split_right( -/*====================*/ - const buf_block_t* right_block, /*!< in: right page */ - const buf_block_t* left_block); /*!< in: left page */ -/*************************************************************//** -Updates the lock table when a page is merged to the right. */ -UNIV_INTERN -void -lock_update_merge_right( -/*====================*/ - const buf_block_t* right_block, /*!< in: right page to - which merged */ - const rec_t* orig_succ, /*!< in: original - successor of infimum - on the right page - before merge */ - const buf_block_t* left_block); /*!< in: merged index - page which will be - discarded */ -/*************************************************************//** -Updates the lock table when the root page is copied to another in -btr_root_raise_and_insert. Note that we leave lock structs on the -root page, even though they do not make sense on other than leaf -pages: the reason is that in a pessimistic update the infimum record -of the root page will act as a dummy carrier of the locks of the record -to be updated. */ -UNIV_INTERN -void -lock_update_root_raise( -/*===================*/ - const buf_block_t* block, /*!< in: index page to which copied */ - const buf_block_t* root); /*!< in: root page */ -/*************************************************************//** -Updates the lock table when a page is copied to another and the original page -is removed from the chain of leaf pages, except if page is the root! */ -UNIV_INTERN -void -lock_update_copy_and_discard( -/*=========================*/ - const buf_block_t* new_block, /*!< in: index page to - which copied */ - const buf_block_t* block); /*!< in: index page; - NOT the root! */ -/*************************************************************//** -Updates the lock table when a page is split to the left. */ -UNIV_INTERN -void -lock_update_split_left( -/*===================*/ - const buf_block_t* right_block, /*!< in: right page */ - const buf_block_t* left_block); /*!< in: left page */ -/*************************************************************//** -Updates the lock table when a page is merged to the left. */ -UNIV_INTERN -void -lock_update_merge_left( -/*===================*/ - const buf_block_t* left_block, /*!< in: left page to - which merged */ - const rec_t* orig_pred, /*!< in: original predecessor - of supremum on the left page - before merge */ - const buf_block_t* right_block); /*!< in: merged index page - which will be discarded */ -/*************************************************************//** -Updates the lock table when a page is splited and merged to -two pages. */ -UNIV_INTERN -void -lock_update_split_and_merge( - const buf_block_t* left_block, /*!< in: left page to which merged */ - const rec_t* orig_pred, /*!< in: original predecessor of - supremum on the left page before merge*/ - const buf_block_t* right_block);/*!< in: right page from which merged */ -/*************************************************************//** -Resets the original locks on heir and replaces them with gap type locks -inherited from rec. */ -UNIV_INTERN -void -lock_rec_reset_and_inherit_gap_locks( -/*=================================*/ - const buf_block_t* heir_block, /*!< in: block containing the - record which inherits */ - const buf_block_t* block, /*!< in: block containing the - record from which inherited; - does NOT reset the locks on - this record */ - ulint heir_heap_no, /*!< in: heap_no of the - inheriting record */ - ulint heap_no); /*!< in: heap_no of the - donating record */ -/*************************************************************//** -Updates the lock table when a page is discarded. */ -UNIV_INTERN -void -lock_update_discard( -/*================*/ - const buf_block_t* heir_block, /*!< in: index page - which will inherit the locks */ - ulint heir_heap_no, /*!< in: heap_no of the record - which will inherit the locks */ - const buf_block_t* block); /*!< in: index page - which will be discarded */ -/*************************************************************//** -Updates the lock table when a new user record is inserted. */ -UNIV_INTERN -void -lock_update_insert( -/*===============*/ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec); /*!< in: the inserted record */ -/*************************************************************//** -Updates the lock table when a record is removed. */ -UNIV_INTERN -void -lock_update_delete( -/*===============*/ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec); /*!< in: the record to be removed */ -/*********************************************************************//** -Stores on the page infimum record the explicit locks of another record. -This function is used to store the lock state of a record when it is -updated and the size of the record changes in the update. The record -is in such an update moved, perhaps to another page. The infimum record -acts as a dummy carrier record, taking care of lock releases while the -actual record is being moved. */ -UNIV_INTERN -void -lock_rec_store_on_page_infimum( -/*===========================*/ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec); /*!< in: record whose lock state - is stored on the infimum - record of the same page; lock - bits are reset on the - record */ -/*********************************************************************//** -Restores the state of explicit lock requests on a single record, where the -state was stored on the infimum of the page. */ -UNIV_INTERN -void -lock_rec_restore_from_page_infimum( -/*===============================*/ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec, /*!< in: record whose lock state - is restored */ - const buf_block_t* donator);/*!< in: page (rec is not - necessarily on this page) - whose infimum stored the lock - state; lock bits are reset on - the infimum */ -/*********************************************************************//** -Determines if there are explicit record locks on a page. -@return an explicit record lock on the page, or NULL if there are none */ -UNIV_INTERN -lock_t* -lock_rec_expl_exist_on_page( -/*========================*/ - ulint space, /*!< in: space id */ - ulint page_no)/*!< in: page number */ - MY_ATTRIBUTE((warn_unused_result)); -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate insert of -a record. If they do, first tests if the query thread should anyway -be suspended for some reason; if not, then puts the transaction and -the query thread to the lock wait state and inserts a waiting request -for a gap x-lock to the lock queue. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_rec_insert_check_and_lock( -/*===========================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is - set, does nothing */ - const rec_t* rec, /*!< in: record after which to insert */ - buf_block_t* block, /*!< in/out: buffer block of rec */ - dict_index_t* index, /*!< in: index */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - ibool* inherit)/*!< out: set to TRUE if the new - inserted record maybe should inherit - LOCK_GAP type locks from the successor - record */ - MY_ATTRIBUTE((nonnull(2,3,4,6,7), warn_unused_result)); -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate modify (update, -delete mark, or delete unmark) of a clustered index record. If they do, -first tests if the query thread should anyway be suspended for some -reason; if not, then puts the transaction and the query thread to the -lock wait state and inserts a waiting request for a record x-lock to the -lock queue. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_clust_rec_modify_check_and_lock( -/*=================================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: record which should be - modified */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate modify -(delete mark or delete unmark) of a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_sec_rec_modify_check_and_lock( -/*===============================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - buf_block_t* block, /*!< in/out: buffer block of rec */ - const rec_t* rec, /*!< in: record which should be - modified; NOTE: as this is a secondary - index, we always have to modify the - clustered index record first: see the - comment below */ - dict_index_t* index, /*!< in: secondary index */ - que_thr_t* thr, /*!< in: query thread - (can be NULL if BTR_NO_LOCKING_FLAG) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,4,6))); -/*********************************************************************//** -Like lock_clust_rec_read_check_and_lock(), but reads a -secondary index record. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, -or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_sec_rec_read_check_and_lock( -/*=============================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: user record or page - supremum record which should - be read or passed over by a - read cursor */ - dict_index_t* index, /*!< in: secondary index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - enum lock_mode mode, /*!< in: mode of the lock which - the read cursor should set on - records: LOCK_S or LOCK_X; the - latter is possible in - SELECT FOR UPDATE */ - ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr); /*!< in: query thread */ -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, -or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_clust_rec_read_check_and_lock( -/*===============================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: user record or page - supremum record which should - be read or passed over by a - read cursor */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - enum lock_mode mode, /*!< in: mode of the lock which - the read cursor should set on - records: LOCK_S or LOCK_X; the - latter is possible in - SELECT FOR UPDATE */ - ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr); /*!< in: query thread */ -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. This is an alternative version of -lock_clust_rec_read_check_and_lock() that does not require the parameter -"offsets". -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_clust_rec_read_check_and_lock_alt( -/*===================================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: user record or page - supremum record which should - be read or passed over by a - read cursor */ - dict_index_t* index, /*!< in: clustered index */ - enum lock_mode mode, /*!< in: mode of the lock which - the read cursor should set on - records: LOCK_S or LOCK_X; the - latter is possible in - SELECT FOR UPDATE */ - ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Checks that a record is seen in a consistent read. -@return true if sees, or false if an earlier version of the record -should be retrieved */ -UNIV_INTERN -bool -lock_clust_rec_cons_read_sees( -/*==========================*/ - const rec_t* rec, /*!< in: user record which should be read or - passed over by a read cursor */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - read_view_t* view); /*!< in: consistent read view */ -/*********************************************************************//** -Checks that a non-clustered index record is seen in a consistent read. - -NOTE that a non-clustered index page contains so little information on -its modifications that also in the case false, the present version of -rec may be the right, but we must check this from the clustered index -record. - -@return true if certainly sees, or false if an earlier version of the -clustered index record might be needed */ -UNIV_INTERN -bool -lock_sec_rec_cons_read_sees( -/*========================*/ - const rec_t* rec, /*!< in: user record which - should be read or passed over - by a read cursor */ - const read_view_t* view) /*!< in: consistent read view */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Locks the specified database table in the mode given. If the lock cannot -be granted immediately, the query thread is put to wait. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_table( -/*=======*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - dict_table_t* table, /*!< in/out: database table - in dictionary cache */ - enum lock_mode mode, /*!< in: lock mode */ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Creates a table IX lock object for a resurrected transaction. */ -UNIV_INTERN -void -lock_table_ix_resurrect( -/*====================*/ - dict_table_t* table, /*!< in/out: table */ - trx_t* trx); /*!< in/out: transaction */ -/*************************************************************//** -Removes a granted record lock of a transaction from the queue and grants -locks to other transactions waiting in the queue if they now are entitled -to a lock. */ -UNIV_INTERN -void -lock_rec_unlock( -/*============*/ - trx_t* trx, /*!< in/out: transaction that has - set a record lock */ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec, /*!< in: record */ - enum lock_mode lock_mode);/*!< in: LOCK_S or LOCK_X */ -/*********************************************************************//** -Releases a transaction's locks, and releases possible other transactions -waiting because of these locks. Change the state of the transaction to -TRX_STATE_COMMITTED_IN_MEMORY. */ -UNIV_INTERN -void -lock_trx_release_locks( -/*===================*/ - trx_t* trx); /*!< in/out: transaction */ - -/*********************************************************************//** -Cancels a waiting lock request and releases possible other transactions -waiting behind it. */ -UNIV_INTERN -void -lock_cancel_waiting_and_release( -/*============================*/ - lock_t* lock); /*!< in/out: waiting lock request */ - -/*********************************************************************//** -Removes locks on a table to be dropped or truncated. -If remove_also_table_sx_locks is TRUE then table-level S and X locks are -also removed in addition to other table-level and record-level locks. -No lock, that is going to be removed, is allowed to be a wait lock. */ -UNIV_INTERN -void -lock_remove_all_on_table( -/*=====================*/ - dict_table_t* table, /*!< in: table to be dropped - or truncated */ - ibool remove_also_table_sx_locks);/*!< in: also removes - table S and X locks */ - -/*********************************************************************//** -Calculates the fold value of a page file address: used in inserting or -searching for a lock in the hash table. -@return folded value */ -UNIV_INLINE -ulint -lock_rec_fold( -/*==========*/ - ulint space, /*!< in: space */ - ulint page_no)/*!< in: page number */ - MY_ATTRIBUTE((const)); -/*********************************************************************//** -Calculates the hash value of a page file address: used in inserting or -searching for a lock in the hash table. -@return hashed value */ -UNIV_INLINE -ulint -lock_rec_hash( -/*==========*/ - ulint space, /*!< in: space */ - ulint page_no);/*!< in: page number */ - -/**********************************************************************//** -Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, -if none found. -@return bit index == heap number of the record, or ULINT_UNDEFINED if -none found */ -UNIV_INTERN -ulint -lock_rec_find_set_bit( -/*==================*/ - const lock_t* lock); /*!< in: record lock with at least one - bit set */ - -/*********************************************************************//** -Gets the source table of an ALTER TABLE transaction. The table must be -covered by an IX or IS table lock. -@return the source table of transaction, if it is covered by an IX or -IS table lock; dest if there is no source table, and NULL if the -transaction is locking more than two tables or an inconsistency is -found */ -UNIV_INTERN -dict_table_t* -lock_get_src_table( -/*===============*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* dest, /*!< in: destination of ALTER TABLE */ - enum lock_mode* mode); /*!< out: lock mode of the source table */ -/*********************************************************************//** -Determine if the given table is exclusively "owned" by the given -transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC -on the table. -@return TRUE if table is only locked by trx, with LOCK_IX, and -possibly LOCK_AUTO_INC */ -UNIV_INTERN -ibool -lock_is_table_exclusive( -/*====================*/ - const dict_table_t* table, /*!< in: table */ - const trx_t* trx) /*!< in: transaction */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Checks if a lock request lock1 has to wait for request lock2. -@return TRUE if lock1 has to wait for lock2 to be removed */ -UNIV_INTERN -ibool -lock_has_to_wait( -/*=============*/ - const lock_t* lock1, /*!< in: waiting lock */ - const lock_t* lock2); /*!< in: another lock; NOTE that it is - assumed that this has a lock bit set - on the same record as in lock1 if the - locks are record locks */ -/*********************************************************************//** -Reports that a transaction id is insensible, i.e., in the future. */ -UNIV_INTERN -void -lock_report_trx_id_insanity( -/*========================*/ - trx_id_t trx_id, /*!< in: trx id */ - const rec_t* rec, /*!< in: user record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ - trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Prints info of a table lock. */ -UNIV_INTERN -void -lock_table_print( -/*=============*/ - FILE* file, /*!< in: file where to print */ - const lock_t* lock); /*!< in: table type lock */ -/*********************************************************************//** -Prints info of a record lock. */ -UNIV_INTERN -void -lock_rec_print( -/*===========*/ - FILE* file, /*!< in: file where to print */ - const lock_t* lock); /*!< in: record type lock */ -/*********************************************************************//** -Prints info of locks for all transactions. -@return FALSE if not able to obtain lock mutex and exits without -printing info */ -UNIV_INTERN -ibool -lock_print_info_summary( -/*====================*/ - FILE* file, /*!< in: file where to print */ - ibool nowait) /*!< in: whether to wait for the lock mutex */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Prints info of locks for each transaction. This function assumes that the -caller holds the lock mutex and more importantly it will release the lock -mutex on behalf of the caller. (This should be fixed in the future). */ -UNIV_INTERN -void -lock_print_info_all_transactions( -/*=============================*/ - FILE* file); /*!< in: file where to print */ -/*********************************************************************//** -Return approximate number or record locks (bits set in the bitmap) for -this transaction. Since delete-marked records may be removed, the -record count will not be precise. -The caller must be holding lock_sys->mutex. */ -UNIV_INTERN -ulint -lock_number_of_rows_locked( -/*=======================*/ - const trx_lock_t* trx_lock) /*!< in: transaction locks */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*******************************************************************//** -Gets the type of a lock. Non-inline version for using outside of the -lock module. -@return LOCK_TABLE or LOCK_REC */ -UNIV_INTERN -ulint -lock_get_type( -/*==========*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -Gets the trx of the lock. Non-inline version for using outside of the -lock module. -@return trx_t* */ -UNIV_INTERN -trx_t* -lock_get_trx( -/*=========*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -Gets the id of the transaction owning a lock. -@return transaction id */ -UNIV_INTERN -trx_id_t -lock_get_trx_id( -/*============*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -Gets the mode of a lock in a human readable string. -The string should not be free()'d or modified. -@return lock mode */ -UNIV_INTERN -const char* -lock_get_mode_str( -/*==============*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -Gets the type of a lock in a human readable string. -The string should not be free()'d or modified. -@return lock type */ -UNIV_INTERN -const char* -lock_get_type_str( -/*==============*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -Gets the id of the table on which the lock is. -@return id of the table */ -UNIV_INTERN -table_id_t -lock_get_table_id( -/*==============*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -Gets the name of the table on which the lock is. -The string should not be free()'d or modified. -@return name of the table */ -UNIV_INTERN -const char* -lock_get_table_name( -/*================*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -For a record lock, gets the index on which the lock is. -@return index */ -UNIV_INTERN -const dict_index_t* -lock_rec_get_index( -/*===============*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -For a record lock, gets the name of the index on which the lock is. -The string should not be free()'d or modified. -@return name of the index */ -UNIV_INTERN -const char* -lock_rec_get_index_name( -/*====================*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -For a record lock, gets the tablespace number on which the lock is. -@return tablespace number */ -UNIV_INTERN -ulint -lock_rec_get_space_id( -/*==================*/ - const lock_t* lock); /*!< in: lock */ - -/*******************************************************************//** -For a record lock, gets the page number on which the lock is. -@return page number */ -UNIV_INTERN -ulint -lock_rec_get_page_no( -/*=================*/ - const lock_t* lock); /*!< in: lock */ -/*******************************************************************//** -Check if there are any locks (table or rec) against table. -@return TRUE if locks exist */ -UNIV_INTERN -ibool -lock_table_has_locks( -/*=================*/ - const dict_table_t* table); /*!< in: check if there are any locks - held on records in this table or on the - table itself */ - -/*********************************************************************//** -A thread which wakes up threads whose lock wait may have lasted too long. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(lock_wait_timeout_thread)( -/*=====================================*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ - -/********************************************************************//** -Releases a user OS thread waiting for a lock to be released, if the -thread is already suspended. */ -UNIV_INTERN -void -lock_wait_release_thread_if_suspended( -/*==================================*/ - que_thr_t* thr); /*!< in: query thread associated with the - user OS thread */ - -/***************************************************************//** -Puts a user OS thread to wait for a lock to be released. If an error -occurs during the wait trx->error_state associated with thr is -!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK -are possible errors. DB_DEADLOCK is returned if selective deadlock -resolution chose this transaction as a victim. */ -UNIV_INTERN -void -lock_wait_suspend_thread( -/*=====================*/ - que_thr_t* thr); /*!< in: query thread associated with the - user OS thread */ -/*********************************************************************//** -Unlocks AUTO_INC type locks that were possibly reserved by a trx. This -function should be called at the the end of an SQL statement, by the -connection thread that owns the transaction (trx->mysql_thd). */ -UNIV_INTERN -void -lock_unlock_table_autoinc( -/*======================*/ - trx_t* trx); /*!< in/out: transaction */ -/*********************************************************************//** -Check whether the transaction has already been rolled back because it -was selected as a deadlock victim, or if it has to wait then cancel -the wait lock. -@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ -UNIV_INTERN -dberr_t -lock_trx_handle_wait( -/*=================*/ - trx_t* trx) /*!< in/out: trx lock state */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Get the number of locks on a table. -@return number of locks */ -UNIV_INTERN -ulint -lock_table_get_n_locks( -/*===================*/ - const dict_table_t* table) /*!< in: table */ - MY_ATTRIBUTE((nonnull)); -#ifdef UNIV_DEBUG -/*********************************************************************//** -Checks that a transaction id is sensible, i.e., not in the future. -@return true if ok */ -UNIV_INTERN -bool -lock_check_trx_id_sanity( -/*=====================*/ - trx_id_t trx_id, /*!< in: trx id */ - const rec_t* rec, /*!< in: user record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Check if the transaction holds any locks on the sys tables -or its records. -@return the strongest lock found on any sys table or 0 for none */ -UNIV_INTERN -const lock_t* -lock_trx_has_sys_table_locks( -/*=========================*/ - const trx_t* trx) /*!< in: transaction to check */ - MY_ATTRIBUTE((warn_unused_result)); - -/*******************************************************************//** -Check if the transaction holds an exclusive lock on a record. -@return whether the locks are held */ -UNIV_INTERN -bool -lock_trx_has_rec_x_lock( -/*====================*/ - const trx_t* trx, /*!< in: transaction to check */ - const dict_table_t* table, /*!< in: table to check */ - const buf_block_t* block, /*!< in: buffer block of the record */ - ulint heap_no)/*!< in: record heap number */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* UNIV_DEBUG */ - -/** Lock modes and types */ -/* @{ */ -#define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the - type_mode field in a lock */ -/** Lock types */ -/* @{ */ -#define LOCK_TABLE 16 /*!< table lock */ -#define LOCK_REC 32 /*!< record lock */ -#define LOCK_TYPE_MASK 0xF0UL /*!< mask used to extract lock type from the - type_mode field in a lock */ -#if LOCK_MODE_MASK & LOCK_TYPE_MASK -# error "LOCK_MODE_MASK & LOCK_TYPE_MASK" -#endif - -#define LOCK_WAIT 256 /*!< Waiting lock flag; when set, it - means that the lock has not yet been - granted, it is just waiting for its - turn in the wait queue */ -/* Precise modes */ -#define LOCK_ORDINARY 0 /*!< this flag denotes an ordinary - next-key lock in contrast to LOCK_GAP - or LOCK_REC_NOT_GAP */ -#define LOCK_GAP 512 /*!< when this bit is set, it means that the - lock holds only on the gap before the record; - for instance, an x-lock on the gap does not - give permission to modify the record on which - the bit is set; locks of this type are created - when records are removed from the index chain - of records */ -#define LOCK_REC_NOT_GAP 1024 /*!< this bit means that the lock is only on - the index record and does NOT block inserts - to the gap before the index record; this is - used in the case when we retrieve a record - with a unique key, and is also used in - locking plain SELECTs (not part of UPDATE - or DELETE) when the user has set the READ - COMMITTED isolation level */ -#define LOCK_INSERT_INTENTION 2048 /*!< this bit is set when we place a waiting - gap type record lock request in order to let - an insert of an index record to wait until - there are no conflicting locks by other - transactions on the gap; note that this flag - remains set when the waiting lock is granted, - or if the lock is inherited to a neighboring - record */ - -#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK -# error -#endif -#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK -# error -#endif -/* @} */ - -/** Lock operation struct */ -struct lock_op_t{ - dict_table_t* table; /*!< table to be locked */ - enum lock_mode mode; /*!< lock mode */ -}; - -/** The lock system struct */ -struct lock_sys_t{ - ib_mutex_t mutex; /*!< Mutex protecting the - locks */ - hash_table_t* rec_hash; /*!< hash table of the record - locks */ - ulint rec_num; - ib_mutex_t wait_mutex; /*!< Mutex protecting the - next two fields */ - srv_slot_t* waiting_threads; /*!< Array of user threads - suspended while waiting for - locks within InnoDB, protected - by the lock_sys->wait_mutex; - os_event_set() and - os_event_reset() on - waiting_threads[]->event - are protected by - trx_t::mutex */ - srv_slot_t* last_slot; /*!< highest slot ever used - in the waiting_threads array, - protected by - lock_sys->wait_mutex */ - ibool rollback_complete; - /*!< TRUE if rollback of all - recovered transactions is - complete. Protected by - lock_sys->mutex */ - - ulint n_lock_max_wait_time; /*!< Max wait time */ - - os_event_t timeout_event; /*!< An event waited for by - lock_wait_timeout_thread. - Not protected by a mutex, - but the waits are timed. - Signaled on shutdown only. */ - - bool timeout_thread_active; /*!< True if the timeout thread - is running */ -}; - -/** The lock system */ -extern lock_sys_t* lock_sys; - -/** Test if lock_sys->mutex can be acquired without waiting. */ -#define lock_mutex_enter_nowait() mutex_enter_nowait(&lock_sys->mutex) - -/** Test if lock_sys->mutex is owned. */ -#define lock_mutex_own() mutex_own(&lock_sys->mutex) - -/** Acquire the lock_sys->mutex. */ -#define lock_mutex_enter() do { \ - mutex_enter(&lock_sys->mutex); \ -} while (0) - -/** Release the lock_sys->mutex. */ -#define lock_mutex_exit() do { \ - mutex_exit(&lock_sys->mutex); \ -} while (0) - -/** Test if lock_sys->wait_mutex is owned. */ -#define lock_wait_mutex_own() mutex_own(&lock_sys->wait_mutex) - -/** Acquire the lock_sys->wait_mutex. */ -#define lock_wait_mutex_enter() do { \ - mutex_enter(&lock_sys->wait_mutex); \ -} while (0) - -/** Release the lock_sys->wait_mutex. */ -#define lock_wait_mutex_exit() do { \ - mutex_exit(&lock_sys->wait_mutex); \ -} while (0) - -/*******************************************************************//** -Get lock mode and table/index name -@return string containing lock info */ -std::string -lock_get_info( - const lock_t*); - -#ifndef UNIV_NONINL -#include "lock0lock.ic" -#endif - -#endif diff --git a/storage/xtradb/include/lock0lock.ic b/storage/xtradb/include/lock0lock.ic deleted file mode 100644 index 736936954cb..00000000000 --- a/storage/xtradb/include/lock0lock.ic +++ /dev/null @@ -1,92 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/lock0lock.ic -The transaction lock system - -Created 5/7/1996 Heikki Tuuri -*******************************************************/ - -#include "sync0sync.h" -#include "srv0srv.h" -#include "dict0dict.h" -#include "row0row.h" -#include "trx0sys.h" -#include "trx0trx.h" -#include "buf0buf.h" -#include "page0page.h" -#include "page0cur.h" -#include "row0vers.h" -#include "que0que.h" -#include "btr0cur.h" -#include "read0read.h" -#include "log0recv.h" - -/*********************************************************************//** -Calculates the fold value of a page file address: used in inserting or -searching for a lock in the hash table. -@return folded value */ -UNIV_INLINE -ulint -lock_rec_fold( -/*==========*/ - ulint space, /*!< in: space */ - ulint page_no)/*!< in: page number */ -{ - return(ut_fold_ulint_pair(space, page_no)); -} - -/*********************************************************************//** -Calculates the hash value of a page file address: used in inserting or -searching for a lock in the hash table. -@return hashed value */ -UNIV_INLINE -ulint -lock_rec_hash( -/*==========*/ - ulint space, /*!< in: space */ - ulint page_no)/*!< in: page number */ -{ - return(hash_calc_hash(lock_rec_fold(space, page_no), - lock_sys->rec_hash)); -} - -/*********************************************************************//** -Gets the heap_no of the smallest user record on a page. -@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */ -UNIV_INLINE -ulint -lock_get_min_heap_no( -/*=================*/ - const buf_block_t* block) /*!< in: buffer block */ -{ - const page_t* page = block->frame; - - if (page_is_comp(page)) { - return(rec_get_heap_no_new( - page - + rec_get_next_offs(page + PAGE_NEW_INFIMUM, - TRUE))); - } else { - return(rec_get_heap_no_old( - page - + rec_get_next_offs(page + PAGE_OLD_INFIMUM, - FALSE))); - } -} diff --git a/storage/xtradb/include/lock0priv.h b/storage/xtradb/include/lock0priv.h deleted file mode 100644 index 7a74cbdc2e3..00000000000 --- a/storage/xtradb/include/lock0priv.h +++ /dev/null @@ -1,124 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, MariaDB Corporation - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/lock0priv.h -Lock module internal structures and methods. - -Created July 12, 2007 Vasil Dimov -*******************************************************/ - -#ifndef lock0priv_h -#define lock0priv_h - -#ifndef LOCK_MODULE_IMPLEMENTATION -/* If you need to access members of the structures defined in this -file, please write appropriate functions that retrieve them and put -those functions in lock/ */ -#error Do not include lock0priv.h outside of the lock/ module -#endif - -#include "univ.i" -#include "dict0types.h" -#include "hash0hash.h" -#include "trx0types.h" -#include "ut0lst.h" - -/** A table lock */ -struct lock_table_t { - dict_table_t* table; /*!< database table in dictionary - cache */ - UT_LIST_NODE_T(lock_t) - locks; /*!< list of locks on the same - table */ -}; - -/** Record lock for a page */ -struct lock_rec_t { - ulint space; /*!< space id */ - ulint page_no; /*!< page number */ - ulint n_bits; /*!< number of bits in the lock - bitmap; NOTE: the lock bitmap is - placed immediately after the - lock struct */ -}; - -/** Lock struct; protected by lock_sys->mutex */ -struct lock_t { - trx_t* trx; /*!< transaction owning the - lock */ - UT_LIST_NODE_T(lock_t) - trx_locks; /*!< list of the locks of the - transaction */ - ulint type_mode; /*!< lock type, mode, LOCK_GAP or - LOCK_REC_NOT_GAP, - LOCK_INSERT_INTENTION, - wait flag, ORed */ - hash_node_t hash; /*!< hash chain node for a record - lock */ - dict_index_t* index; /*!< index for a record lock */ - - /* Statistics for how long lock has been held and time - how long this lock had to be waited before it was granted */ - time_t requested_time; /*!< Lock request time */ - ulint wait_time; /*!< Time waited this lock or 0 */ - - union { - lock_table_t tab_lock;/*!< table lock */ - lock_rec_t rec_lock;/*!< record lock */ - } un_member; /*!< lock details */ -}; - -/*********************************************************************//** -Gets the type of a lock. -@return LOCK_TABLE or LOCK_REC */ -UNIV_INLINE -ulint -lock_get_type_low( -/*==============*/ - const lock_t* lock); /*!< in: lock */ - -/*********************************************************************//** -Gets the previous record lock set on a record. -@return previous lock on the same record, NULL if none exists */ -UNIV_INTERN -const lock_t* -lock_rec_get_prev( -/*==============*/ - const lock_t* in_lock,/*!< in: record lock */ - ulint heap_no);/*!< in: heap number of the record */ - -/*********************************************************************//** -Checks if some transaction has an implicit x-lock on a record in a clustered -index. -@return transaction id of the transaction which has the x-lock, or 0 */ -UNIV_INLINE -trx_id_t -lock_clust_rec_some_has_impl( -/*=========================*/ - const rec_t* rec, /*!< in: user record */ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -#ifndef UNIV_NONINL -#include "lock0priv.ic" -#endif - -#endif /* lock0priv_h */ diff --git a/storage/xtradb/include/lock0priv.ic b/storage/xtradb/include/lock0priv.ic deleted file mode 100644 index 6b70dc33d3c..00000000000 --- a/storage/xtradb/include/lock0priv.ic +++ /dev/null @@ -1,67 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/lock0priv.ic -Lock module internal inline methods. - -Created July 16, 2007 Vasil Dimov -*******************************************************/ - -/* This file contains only methods which are used in -lock/lock0* files, other than lock/lock0lock.cc. -I.e. lock/lock0lock.cc contains more internal inline -methods but they are used only in that file. */ - -#ifndef LOCK_MODULE_IMPLEMENTATION -#error Do not include lock0priv.ic outside of the lock/ module -#endif - -/*********************************************************************//** -Gets the type of a lock. -@return LOCK_TABLE or LOCK_REC */ -UNIV_INLINE -ulint -lock_get_type_low( -/*==============*/ - const lock_t* lock) /*!< in: lock */ -{ - ut_ad(lock); - - return(lock->type_mode & LOCK_TYPE_MASK); -} - -/*********************************************************************//** -Checks if some transaction has an implicit x-lock on a record in a clustered -index. -@return transaction id of the transaction which has the x-lock, or 0 */ -UNIV_INLINE -trx_id_t -lock_clust_rec_some_has_impl( -/*=========================*/ - const rec_t* rec, /*!< in: user record */ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - ut_ad(dict_index_is_clust(index)); - ut_ad(page_rec_is_user_rec(rec)); - - return(row_get_rec_trx_id(rec, index, offsets)); -} - -/* vim: set filetype=c: */ diff --git a/storage/xtradb/include/lock0types.h b/storage/xtradb/include/lock0types.h deleted file mode 100644 index cf32e72f864..00000000000 --- a/storage/xtradb/include/lock0types.h +++ /dev/null @@ -1,47 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/lock0types.h -The transaction lock system global types - -Created 5/7/1996 Heikki Tuuri -*******************************************************/ - -#ifndef lock0types_h -#define lock0types_h - -#define lock_t ib_lock_t -struct lock_t; -struct lock_sys_t; - -/* Basic lock modes */ -enum lock_mode { - LOCK_IS = 0, /* intention shared */ - LOCK_IX, /* intention exclusive */ - LOCK_S, /* shared */ - LOCK_X, /* exclusive */ - LOCK_AUTO_INC, /* locks the auto-inc counter of a table - in an exclusive mode */ - LOCK_NONE, /* this is used elsewhere to note consistent read */ - LOCK_NUM = LOCK_NONE, /* number of lock modes */ - LOCK_NONE_UNSET = 255 -}; - - -#endif diff --git a/storage/xtradb/include/log0crypt.h b/storage/xtradb/include/log0crypt.h deleted file mode 100644 index 6b164e90d6e..00000000000 --- a/storage/xtradb/include/log0crypt.h +++ /dev/null @@ -1,128 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2015, Google Inc. All Rights Reserved. -Copyright (C) 2014, 2016, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ -/**************************************************//** -@file include/log0crypt.h -Innodb log encrypt/decrypt - -Created 11/25/2013 Minli Zhu -Modified Jan Lindström jan.lindstrom@mariadb.com -*******************************************************/ -#ifndef log0crypt_h -#define log0crypt_h - -#include "univ.i" -#include "ut0byte.h" -#include "my_crypt.h" - -typedef int Crypt_result; - -/* If true, enable redo log encryption. */ -extern my_bool srv_encrypt_log; - -/*********************************************************************** -Set next checkpoint's key version to latest one, and generate new key */ -UNIV_INTERN -void -log_crypt_set_ver_and_key( -/*======================*/ - ib_uint64_t next_checkpoint_no);/*!< in: next checkpoint no */ - - -/*********************************************************************//** -Writes the crypto (version, msg and iv) info, which has been used for -log blocks with lsn <= this checkpoint's lsn, to a log header's -checkpoint buf. */ -UNIV_INTERN -void -log_crypt_write_checkpoint_buf( -/*===========================*/ - byte* buf); /*!< in/out: checkpoint buffer */ - -/*********************************************************************//** -Read the crypto (version, msg and iv) info, which has been used for -log blocks with lsn <= this checkpoint's lsn, from a log header's -checkpoint buf. */ -UNIV_INTERN -bool -log_crypt_read_checkpoint_buf( -/*===========================*/ - const byte* buf); /*!< in: checkpoint buffer */ - -/******************************************************** -Encrypt one or more log block before it is flushed to disk */ -UNIV_INTERN -void -log_encrypt_before_write( -/*=====================*/ - ib_uint64_t next_checkpoint_no, /*!< in: log group to be flushed */ - byte* block, /*!< in/out: pointer to a log block */ - const ulint size); /*!< in: size of log blocks */ - -/******************************************************** -Decrypt a specified log segment after they are read from a log file to a buffer. -*/ -UNIV_INTERN -void -log_decrypt_after_read( -/*===================*/ - byte* frame, /*!< in/out: log segment */ - const ulint size); /*!< in: log segment size */ - -/* Error codes for crypt info */ -typedef enum { - LOG_UNENCRYPTED = 0, - LOG_CRYPT_KEY_NOT_FOUND = 1, - LOG_DECRYPT_MAYBE_FAILED = 2 -} log_crypt_err_t; - -/******************************************************** -Check is the checkpoint information encrypted. This check -is based on fact has log group crypt info and based -on this crypt info was the key version different from -unencrypted key version. There is no realible way to -distinguish encrypted log block from corrupted log block, -but if log block corruption is found this function is -used to find out if log block is maybe encrypted but -encryption key, key management plugin or encryption -algorithm does not match. -@return TRUE, if log block may be encrypted */ -UNIV_INTERN -ibool -log_crypt_block_maybe_encrypted( -/*============================*/ - const byte* log_block, /*!< in: log block */ - log_crypt_err_t* err_info); /*!< out: error info */ - -/******************************************************** -Print crypt error message to error log */ -UNIV_INTERN -void -log_crypt_print_error( -/*==================*/ - log_crypt_err_t err_info); /*!< out: error info */ - -/*********************************************************************//** -Print checkpoint no from log block and all encryption keys from -checkpoints if they are present. Used for problem analysis. */ -void -log_crypt_print_checkpoint_keys( -/*============================*/ - const byte* log_block); - -#endif // log0crypt.h diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h deleted file mode 100644 index a55c1ea818c..00000000000 --- a/storage/xtradb/include/log0log.h +++ /dev/null @@ -1,1077 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2009, Google Inc. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/log0log.h -Database log - -Created 12/9/1995 Heikki Tuuri -*******************************************************/ - -#ifndef log0log_h -#define log0log_h - -#include "univ.i" -#include "ut0byte.h" -#include "ut0lst.h" -#ifndef UNIV_HOTBACKUP -#include "sync0sync.h" -#include "sync0rw.h" -#endif /* !UNIV_HOTBACKUP */ -#include "log0crypt.h" - -/* Type used for all log sequence number storage and arithmetics */ -typedef ib_uint64_t lsn_t; -#define LSN_MAX IB_UINT64_MAX - -#define LSN_PF UINT64PF - -/** Redo log buffer */ -struct log_t; -/** Redo log group */ -struct log_group_t; - -#ifdef UNIV_DEBUG -/** Flag: write to log file? */ -extern ibool log_do_write; -/** Flag: enable debug output when writing to the log? */ -extern ibool log_debug_writes; -#else /* UNIV_DEBUG */ -/** Write to log */ -# define log_do_write TRUE -#endif /* UNIV_DEBUG */ - -/** Magic value to use instead of log checksums when they are disabled */ -#define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL - -typedef ulint (*log_checksum_func_t)(const byte* log_block); - -/** Pointer to the log checksum calculation function. Protected with -log_sys->mutex. */ -extern log_checksum_func_t log_checksum_algorithm_ptr; - -/** Wait modes for log_write_up_to @{ */ -#define LOG_NO_WAIT 91 -#define LOG_WAIT_ONE_GROUP 92 -#define LOG_WAIT_ALL_GROUPS 93 -/* @} */ -/** Maximum number of log groups in log_group_t::checkpoint_buf */ -#define LOG_MAX_N_GROUPS 32 - -#define IB_ARCHIVED_LOGS_PREFIX "ib_log_archive_" -#define IB_ARCHIVED_LOGS_PREFIX_LEN (sizeof(IB_ARCHIVED_LOGS_PREFIX) - 1) -#define IB_ARCHIVED_LOGS_SERIAL_LEN 20 - -/*******************************************************************//** -Calculates where in log files we find a specified lsn. -@return log file number */ -UNIV_INTERN -ulint -log_calc_where_lsn_is( -/*==================*/ - ib_int64_t* log_file_offset, /*!< out: offset in that file - (including the header) */ - ib_uint64_t first_header_lsn, /*!< in: first log file start - lsn */ - ib_uint64_t lsn, /*!< in: lsn whose position to - determine */ - ulint n_log_files, /*!< in: total number of log - files */ - ib_int64_t log_file_size); /*!< in: log file size - (including the header) */ -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Writes to the log the string given. The log must be released with -log_release. -@return end lsn of the log record, zero if did not succeed */ -UNIV_INLINE -lsn_t -log_reserve_and_write_fast( -/*=======================*/ - const void* str, /*!< in: string */ - ulint len, /*!< in: string length */ - lsn_t* start_lsn);/*!< out: start lsn of the log record */ -/***********************************************************************//** -Releases the log mutex. */ -UNIV_INLINE -void -log_release(void); -/*=============*/ -/***********************************************************************//** -Checks if there is need for a log buffer flush or a new checkpoint, and does -this if yes. Any database operation should call this when it has modified -more than about 4 pages. NOTE that this function may only be called when the -OS thread owns no synchronization objects except the dictionary mutex. */ -UNIV_INLINE -void -log_free_check(void); -/*================*/ -/**************************************************************************//** -Locks the log mutex and opens the log for log_write_low. The log must be closed -with log_close and released with log_release. -@return start lsn of the log record */ -UNIV_INLINE -lsn_t -log_reserve_and_open( -/*=================*/ - ulint len); /*!< in: length of data to be catenated */ -/************************************************************//** -Opens the log for log_write_low. The log must be closed with log_close. -@return start lsn of the log record */ -UNIV_INTERN -lsn_t -log_open( -/*=====*/ - ulint len); /*!< in: length of data to be catenated */ -/************************************************************//** -Writes to the log the string given. It is assumed that the caller holds the -log mutex. */ -UNIV_INTERN -void -log_write_low( -/*==========*/ - byte* str, /*!< in: string */ - ulint str_len); /*!< in: string length */ -/************************************************************//** -Closes the log. -@return lsn */ -UNIV_INTERN -lsn_t -log_close(void); -/*===========*/ -/************************************************************//** -Gets the current lsn. -@return current lsn */ -UNIV_INLINE -lsn_t -log_get_lsn(void); -/*=============*/ -/************************************************************//** -Gets the current lsn. -@return current lsn */ -UNIV_INLINE -lsn_t -log_get_lsn_nowait(void); -/*=============*/ -/************************************************************//** -Gets the last lsn that is fully flushed to disk. -@return last flushed lsn */ -UNIV_INLINE -ib_uint64_t -log_get_flush_lsn(void); -/*=============*/ -/**************************************************************** -Gets the log group capacity. It is OK to read the value without -holding log_sys->mutex because it is constant. -@return log group capacity */ -UNIV_INLINE -lsn_t -log_get_capacity(void); -/*==================*/ -/**************************************************************** -Get log_sys::max_modified_age_async. It is OK to read the value without -holding log_sys::mutex because it is constant. -@return max_modified_age_async */ -UNIV_INLINE -lsn_t -log_get_max_modified_age_async(void); -/*================================*/ -/******************************************************//** -Initializes the log. */ -UNIV_INTERN -void -log_init(void); -/*==========*/ -/******************************************************************//** -Inits a log group to the log system. */ -UNIV_INTERN -void -log_group_init( -/*===========*/ - ulint id, /*!< in: group id */ - ulint n_files, /*!< in: number of log files */ - lsn_t file_size, /*!< in: log file size in bytes */ - ulint space_id, /*!< in: space id of the file space - which contains the log files of this - group */ - ulint archive_space_id); /*!< in: space id of the file space - which contains some archived log - files for this group; currently, only - for the first log group this is - used */ -/******************************************************//** -Completes an i/o to a log file. */ -UNIV_INTERN -void -log_io_complete( -/*============*/ - log_group_t* group); /*!< in: log group */ -/******************************************************//** -This function is called, e.g., when a transaction wants to commit. It checks -that the log has been written to the log file up to the last log entry written -by the transaction. If there is a flush running, it waits and checks if the -flush flushed enough. If not, starts a new flush. */ -UNIV_INTERN -void -log_write_up_to( -/*============*/ - lsn_t lsn, /*!< in: log sequence number up to which - the log should be written, LSN_MAX if not specified */ - ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, - or LOG_WAIT_ALL_GROUPS */ - ibool flush_to_disk); - /*!< in: TRUE if we want the written log - also to be flushed to disk */ -/****************************************************************//** -Does a syncronous flush of the log buffer to disk. */ -UNIV_INTERN -void -log_buffer_flush_to_disk(void); -/*==========================*/ -/****************************************************************//** -This functions writes the log buffer to the log file and if 'flush' -is set it forces a flush of the log file as well. This is meant to be -called from background master thread only as it does not wait for -the write (+ possible flush) to finish. */ -UNIV_INTERN -void -log_buffer_sync_in_background( -/*==========================*/ - ibool flush); /*<! in: flush the logs to disk */ -/******************************************************//** -Makes a checkpoint. Note that this function does not flush dirty -blocks from the buffer pool: it only checks what is lsn of the oldest -modification in the pool, and writes information about the lsn in -log files. Use log_make_checkpoint_at to flush also the pool. -@return TRUE if success, FALSE if a checkpoint write was already running */ -UNIV_INTERN -ibool -log_checkpoint( -/*===========*/ - ibool sync, /*!< in: TRUE if synchronous operation is - desired */ - ibool write_always, /*!< in: the function normally checks if the - the new checkpoint would have a greater - lsn than the previous one: if not, then no - physical write is done; by setting this - parameter TRUE, a physical write will always be - made to log files */ - ibool safe_to_ignore);/*!< in: TRUE if checkpoint can be ignored in - the case checkpoint's are disabled */ - -/****************************************************************//** -Makes a checkpoint at a given lsn or later. */ -UNIV_INTERN -void -log_make_checkpoint_at( -/*===================*/ - lsn_t lsn, /*!< in: make a checkpoint at this or a - later lsn, if LSN_MAX, makes - a checkpoint at the latest lsn */ - ibool write_always); /*!< in: the function normally checks if - the new checkpoint would have a - greater lsn than the previous one: if - not, then no physical write is done; - by setting this parameter TRUE, a - physical write will always be made to - log files */ -/****************************************************************//** -Disable checkpoints. This is used when doing a volume snapshot -to ensure that we don't get checkpoint between snapshoting two -different volumes */ -UNIV_INTERN -ibool log_disable_checkpoint(); - -/****************************************************************//** -Enable checkpoints that was disabled with log_disable_checkpoint() */ -UNIV_INTERN -void log_enable_checkpoint(); - -/****************************************************************//** -Makes a checkpoint at the latest lsn and writes it to first page of each -data file in the database, so that we know that the file spaces contain -all modifications up to that lsn. This can only be called at database -shutdown. This function also writes all log in log files to the log archive. */ -UNIV_INTERN -void -logs_empty_and_mark_files_at_shutdown(void); -/*=======================================*/ -/******************************************************//** -Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */ -UNIV_INTERN -void -log_group_read_checkpoint_info( -/*===========================*/ - log_group_t* group, /*!< in: log group */ - ulint field); /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */ -/*******************************************************************//** -Gets info from a checkpoint about a log group. */ -UNIV_INTERN -void -log_checkpoint_get_nth_group_info( -/*==============================*/ - const byte* buf, /*!< in: buffer containing checkpoint info */ - ulint n, /*!< in: nth slot */ - lsn_t* file_no);/*!< out: archived file number */ -/******************************************************//** -Writes checkpoint info to groups. */ -UNIV_INTERN -void -log_groups_write_checkpoint_info(void); -/*==================================*/ -/********************************************************************//** -Starts an archiving operation. -@return TRUE if succeed, FALSE if an archiving operation was already running */ -UNIV_INTERN -ibool -log_archive_do( -/*===========*/ - ibool sync, /*!< in: TRUE if synchronous operation is desired */ - ulint* n_bytes);/*!< out: archive log buffer size, 0 if nothing to - archive */ -/****************************************************************//** -Starts again archiving which has been stopped. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint -log_archive_start(void); -/*===================*/ -/****************************************************************//** -Stop archiving the log so that a gap may occur in the archived log files. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint -log_archive_noarchivelog(void); -/*==========================*/ -/****************************************************************//** -Start archiving the log so that a gap may occur in the archived log files. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint -log_archive_archivelog(void); -/*========================*/ -/******************************************************//** -Generates an archived log file name. */ -UNIV_INTERN -void -log_archived_file_name_gen( -/*=======================*/ - char* buf, /*!< in: buffer where to write */ - ulint buf_len,/*!< in: buffer length */ - ulint id, /*!< in: group id */ - lsn_t file_no);/*!< in: file number */ - -UNIV_INTERN -void -log_archived_get_offset( -/*====================*/ - log_group_t* group, /*!< in: log group */ - lsn_t file_no, /*!< in: archive log file number */ - lsn_t archived_lsn, /*!< in: last archived LSN */ - lsn_t* offset); /*!< out: offset within archived file */ -#else /* !UNIV_HOTBACKUP */ -/******************************************************//** -Writes info to a buffer of a log group when log files are created in -backup restoration. */ -UNIV_INTERN -void -log_reset_first_header_and_checkpoint( -/*==================================*/ - byte* hdr_buf,/*!< in: buffer which will be written to the - start of the first log file */ - ib_uint64_t start); /*!< in: lsn of the start of the first log file; - we pretend that there is a checkpoint at - start + LOG_BLOCK_HDR_SIZE */ -#endif /* !UNIV_HOTBACKUP */ -/********************************************************************//** -Checks that there is enough free space in the log to start a new query step. -Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this -function may only be called if the calling thread owns no synchronization -objects! */ -UNIV_INTERN -void -log_check_margins(void); -/*===================*/ -#ifndef UNIV_HOTBACKUP -/******************************************************//** -Reads a specified log segment to a buffer. */ -UNIV_INTERN -void -log_group_read_log_seg( -/*===================*/ - ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */ - byte* buf, /*!< in: buffer where to read */ - log_group_t* group, /*!< in: log group */ - lsn_t start_lsn, /*!< in: read area start */ - lsn_t end_lsn, /*!< in: read area end */ - ibool release_mutex); /*!< in: whether the log_sys->mutex - should be released before the read */ -/******************************************************//** -Writes a buffer to a log file group. */ -UNIV_INTERN -void -log_group_write_buf( -/*================*/ - log_group_t* group, /*!< in: log group */ - byte* buf, /*!< in: buffer */ - ulint len, /*!< in: buffer len; must be divisible - by OS_FILE_LOG_BLOCK_SIZE */ - lsn_t start_lsn, /*!< in: start lsn of the buffer; must - be divisible by - OS_FILE_LOG_BLOCK_SIZE */ - ulint new_data_offset);/*!< in: start offset of new data in - buf: this parameter is used to decide - if we have to write a new log file - header */ -/********************************************************//** -Sets the field values in group to correspond to a given lsn. For this function -to work, the values must already be correctly initialized to correspond to -some lsn, for instance, a checkpoint lsn. */ -UNIV_INTERN -void -log_group_set_fields( -/*=================*/ - log_group_t* group, /*!< in/out: group */ - lsn_t lsn); /*!< in: lsn for which the values should be - set */ -/******************************************************//** -Calculates the data capacity of a log group, when the log file headers are not -included. -@return capacity in bytes */ -UNIV_INTERN -lsn_t -log_group_get_capacity( -/*===================*/ - const log_group_t* group); /*!< in: log group */ -#endif /* !UNIV_HOTBACKUP */ -/************************************************************//** -Gets a log block flush bit. -@return TRUE if this block was the first to be written in a log flush */ -UNIV_INLINE -ibool -log_block_get_flush_bit( -/*====================*/ - const byte* log_block); /*!< in: log block */ -/************************************************************//** -Gets a log block number stored in the header. -@return log block number stored in the block header */ -UNIV_INLINE -ulint -log_block_get_hdr_no( -/*=================*/ - const byte* log_block); /*!< in: log block */ -/************************************************************//** -Gets a log block data length. -@return log block data length measured as a byte offset from the block start */ -UNIV_INLINE -ulint -log_block_get_data_len( -/*===================*/ - const byte* log_block); /*!< in: log block */ -/************************************************************//** -Sets the log block data length. */ -UNIV_INLINE -void -log_block_set_data_len( -/*===================*/ - byte* log_block, /*!< in/out: log block */ - ulint len); /*!< in: data length */ -/************************************************************//** -Calculates the checksum for a log block. -@return checksum */ -UNIV_INLINE -ulint -log_block_calc_checksum( -/*====================*/ - const byte* block); /*!< in: log block */ -/************************************************************//** -Gets a log block checksum field value. -@return checksum */ -UNIV_INLINE -ulint -log_block_get_checksum( -/*===================*/ - const byte* log_block); /*!< in: log block */ -/************************************************************//** -Sets a log block checksum field value. */ -UNIV_INLINE -void -log_block_set_checksum( -/*===================*/ - byte* log_block, /*!< in/out: log block */ - ulint checksum); /*!< in: checksum */ -/************************************************************//** -Gets a log block first mtr log record group offset. -@return first mtr log record group byte offset from the block start, 0 -if none */ -UNIV_INLINE -ulint -log_block_get_first_rec_group( -/*==========================*/ - const byte* log_block); /*!< in: log block */ -/************************************************************//** -Sets the log block first mtr log record group offset. */ -UNIV_INLINE -void -log_block_set_first_rec_group( -/*==========================*/ - byte* log_block, /*!< in/out: log block */ - ulint offset); /*!< in: offset, 0 if none */ -/************************************************************//** -Gets a log block checkpoint number field (4 lowest bytes). -@return checkpoint no (4 lowest bytes) */ -UNIV_INLINE -ulint -log_block_get_checkpoint_no( -/*========================*/ - const byte* log_block); /*!< in: log block */ -/************************************************************//** -Initializes a log block in the log buffer. */ -UNIV_INLINE -void -log_block_init( -/*===========*/ - byte* log_block, /*!< in: pointer to the log buffer */ - lsn_t lsn); /*!< in: lsn within the log block */ -/************************************************************//** -Initializes a log block in the log buffer in the old, < 3.23.52 format, where -there was no checksum yet. */ -UNIV_INLINE -void -log_block_init_in_old_format( -/*=========================*/ - byte* log_block, /*!< in: pointer to the log buffer */ - lsn_t lsn); /*!< in: lsn within the log block */ -/************************************************************//** -Converts a lsn to a log block number. -@return log block number, it is > 0 and <= 1G */ -UNIV_INLINE -ulint -log_block_convert_lsn_to_no( -/*========================*/ - lsn_t lsn); /*!< in: lsn of a byte within the block */ -/******************************************************//** -Prints info of the log. */ -UNIV_INTERN -void -log_print( -/*======*/ - FILE* file); /*!< in: file where to print */ -/******************************************************//** -Peeks the current lsn. -@return TRUE if success, FALSE if could not get the log system mutex */ -UNIV_INTERN -ibool -log_peek_lsn( -/*=========*/ - lsn_t* lsn); /*!< out: if returns TRUE, current lsn is here */ -/**********************************************************************//** -Refreshes the statistics used to print per-second averages. */ -UNIV_INTERN -void -log_refresh_stats(void); -/*===================*/ -/********************************************************//** -Closes all log groups. */ -UNIV_INTERN -void -log_group_close_all(void); -/*=====================*/ -/********************************************************//** -Shutdown the log system but do not release all the memory. */ -UNIV_INTERN -void -log_shutdown(void); -/*==============*/ -/********************************************************//** -Free the log system data structures. */ -UNIV_INTERN -void -log_mem_free(void); -/*==============*/ - -/****************************************************************//** -Safely reads the log_sys->tracked_lsn value. The writer counterpart function -is log_set_tracked_lsn() in log0online.c. - -@return log_sys->tracked_lsn value. */ -UNIV_INLINE -lsn_t -log_get_tracked_lsn(void); -/*=====================*/ - -extern log_t* log_sys; - -/* Values used as flags */ -#define LOG_FLUSH 7652559 -#define LOG_CHECKPOINT 78656949 -#ifdef UNIV_LOG_ARCHIVE -# define LOG_ARCHIVE 11122331 -#endif /* UNIV_LOG_ARCHIVE */ -#define LOG_RECOVER 98887331 - -/* The counting of lsn's starts from this value: this must be non-zero */ -#define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE)) - -#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE) -#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4) - -/* Offsets of a log block header */ -#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and - is allowed to wrap around at 2G; the - highest bit is set to 1 if this is the - first log block in a log flush write - segment */ -#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL - /* mask used to get the highest bit in - the preceding field */ -#define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to - this block */ -#define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an - mtr log record group in this log block, - 0 if none; if the value is the same - as LOG_BLOCK_HDR_DATA_LEN, it means - that the first rec group has not yet - been catenated to this log block, but - if it will, it will start at this - offset; an archive recovery can - start parsing the log records starting - from this offset in this log block, - if value not 0 */ -#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of - log_sys->next_checkpoint_no when the - log block was last written to: if the - block has not yet been written full, - this value is only updated before a - log buffer flush */ -#define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in - bytes */ - -/* Offsets of a log block trailer from the end of the block */ -#define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block - contents; in InnoDB versions - < 3.23.52 this did not contain the - checksum but the same value as - .._HDR_NO */ -#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */ - -/* Offsets for a checkpoint field */ -#define LOG_CHECKPOINT_NO 0 -#define LOG_CHECKPOINT_LSN 8 -#define LOG_CHECKPOINT_OFFSET_LOW32 16 -#define LOG_CHECKPOINT_LOG_BUF_SIZE 20 -#define LOG_CHECKPOINT_ARCHIVED_LSN 24 -#define LOG_CHECKPOINT_GROUP_ARRAY 32 - -/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */ - -#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0 -#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4 - -#define LOG_CHECKPOINT_ARRAY_END (LOG_CHECKPOINT_GROUP_ARRAY\ - + LOG_MAX_N_GROUPS * 8) -#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END -#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END) -#if 0 -#define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END) - /*!< Not used (0); - This used to contain the - current fsp free limit in - tablespace 0, in units of one - megabyte. - - This information might have been used - since mysqlbackup version 0.35 but - before 1.41 to decide if unused ends of - non-auto-extending data files - in space 0 can be truncated. - - This information was made obsolete - by mysqlbackup --compress. */ -#define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END) - /*!< Not used (0); - This magic number tells if the - checkpoint contains the above field: - the field was added to - InnoDB-3.23.50 and - removed from MySQL 5.6 */ -#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243 - /*!< if LOG_CHECKPOINT_FSP_MAGIC_N - contains this value, then - LOG_CHECKPOINT_FSP_FREE_LIMIT - is valid */ -#endif -#define LOG_CHECKPOINT_OFFSET_HIGH32 (16 + LOG_CHECKPOINT_ARRAY_END) -#define LOG_CRYPT_VER (20 + LOG_CHECKPOINT_ARRAY_END) - -#define LOG_CRYPT_MAX_ENTRIES (5) -#define LOG_CRYPT_ENTRY_SIZE (4 + 4 + 2 * MY_AES_BLOCK_SIZE) -#define LOG_CRYPT_SIZE (1 + 1 + \ - (LOG_CRYPT_MAX_ENTRIES * \ - LOG_CRYPT_ENTRY_SIZE)) - -#define LOG_CHECKPOINT_SIZE (20 + LOG_CHECKPOINT_ARRAY_END + \ - LOG_CRYPT_SIZE) - -/* Offsets of a log file header */ -#define LOG_GROUP_ID 0 /* log group number */ -#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this - log file */ -#define LOG_FILE_NO 12 /* 4-byte archived log file number; - this field is only defined in an - archived log file */ -#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16 - /* a 32-byte field which contains - the string 'ibbackup' and the - creation time if the log file was - created by mysqlbackup --restore; - when mysqld is first time started - on the restored database, it can - print helpful info for the user */ -#define LOG_FILE_OS_FILE_LOG_BLOCK_SIZE 64 - /* extend to record log_block_size - of XtraDB. 0 means default 512 */ -#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE - /* this 4-byte field is TRUE when - the writing of an archived log file - has been completed; this field is - only defined in an archived log file */ -#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4) - /* lsn where the archived log file - at least extends: actually the - archived log file may extend to a - later lsn, as long as it is within the - same log block as this lsn; this field - is defined only when an archived log - file has been completely written */ -#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE - /* first checkpoint field in the log - header; we write alternately to the - checkpoint fields when we make new - checkpoints; this field is only defined - in the first log file of a log group */ -#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE) - /* second checkpoint field in the log - header */ -#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE) - -#define LOG_GROUP_OK 301 -#define LOG_GROUP_CORRUPTED 302 - -/** Log group consists of a number of log files, each of the same size; a log -group is implemented as a space in the sense of the module fil0fil. */ -struct log_group_t{ - /* The following fields are protected by log_sys->mutex */ - ulint id; /*!< log group id */ - ulint n_files; /*!< number of files in the group */ - lsn_t file_size; /*!< individual log file size in bytes, - including the log file header */ - ulint space_id; /*!< file space which implements the log - group */ - ulint state; /*!< LOG_GROUP_OK or - LOG_GROUP_CORRUPTED */ - lsn_t lsn; /*!< lsn used to fix coordinates within - the log group */ - lsn_t lsn_offset; /*!< the offset of the above lsn */ - ulint n_pending_writes;/*!< number of currently pending flush - writes for this log group */ - byte** file_header_bufs_ptr;/*!< unaligned buffers */ - byte** file_header_bufs;/*!< buffers for each file - header in the group */ -#ifdef UNIV_LOG_ARCHIVE - /*-----------------------------*/ - byte** archive_file_header_bufs_ptr;/*!< unaligned buffers */ - byte** archive_file_header_bufs;/*!< buffers for each file - header in the group */ - ulint archive_space_id;/*!< file space which - implements the log group - archive */ - lsn_t archived_file_no;/*!< file number corresponding to - log_sys->archived_lsn */ - lsn_t archived_offset;/*!< file offset corresponding to - log_sys->archived_lsn, 0 if we have - not yet written to the archive file - number archived_file_no */ - lsn_t next_archived_file_no;/*!< during an archive write, - until the write is completed, we - store the next value for - archived_file_no here: the write - completion function then sets the new - value to ..._file_no */ - lsn_t next_archived_offset; /*!< like the preceding field */ -#endif /* UNIV_LOG_ARCHIVE */ - /*-----------------------------*/ - lsn_t scanned_lsn; /*!< used only in recovery: recovery scan - succeeded up to this lsn in this log - group */ - byte* checkpoint_buf_ptr;/*!< unaligned checkpoint header */ - byte* checkpoint_buf; /*!< checkpoint header is written from - this buffer to the group */ - UT_LIST_NODE_T(log_group_t) - log_groups; /*!< list of log groups */ -}; - -/** Redo log buffer */ -struct log_t{ - byte pad[CACHE_LINE_SIZE]; /*!< padding to prevent other memory - update hotspots from residing on the - same memory cache line */ - lsn_t lsn; /*!< log sequence number */ - ulint buf_free; /*!< first free offset within the log - buffer */ -#ifndef UNIV_HOTBACKUP - ib_prio_mutex_t mutex; /*!< mutex protecting the log */ - - ib_mutex_t log_flush_order_mutex;/*!< mutex to serialize access to - the flush list when we are putting - dirty blocks in the list. The idea - behind this mutex is to be able - to release log_sys->mutex during - mtr_commit and still ensure that - insertions in the flush_list happen - in the LSN order. */ -#endif /* !UNIV_HOTBACKUP */ - byte* buf_ptr; /* unaligned log buffer */ - byte* buf; /*!< log buffer */ - ulint buf_size; /*!< log buffer size in bytes */ - ulint max_buf_free; /*!< recommended maximum value of - buf_free, after which the buffer is - flushed */ - #ifdef UNIV_LOG_DEBUG - ulint old_buf_free; /*!< value of buf free when log was - last time opened; only in the debug - version */ - ib_uint64_t old_lsn; /*!< value of lsn when log was - last time opened; only in the - debug version */ -#endif /* UNIV_LOG_DEBUG */ - ibool check_flush_or_checkpoint; - /*!< this is set to TRUE when there may - be need to flush the log buffer, or - preflush buffer pool pages, or make - a checkpoint; this MUST be TRUE when - lsn - last_checkpoint_lsn > - max_checkpoint_age; this flag is - peeked at by log_free_check(), which - does not reserve the log mutex */ - UT_LIST_BASE_NODE_T(log_group_t) - log_groups; /*!< log groups */ - -#ifndef UNIV_HOTBACKUP - /** The fields involved in the log buffer flush @{ */ - - ulint buf_next_to_write;/*!< first offset in the log buffer - where the byte content may not exist - written to file, e.g., the start - offset of a log record catenated - later; this is advanced when a flush - operation is completed to all the log - groups */ - volatile bool is_extending; /*!< this is set to true during extend - the log buffer size */ - lsn_t written_to_some_lsn; - /*!< first log sequence number not yet - written to any log group; for this to - be advanced, it is enough that the - write i/o has been completed for any - one log group */ - lsn_t written_to_all_lsn; - /*!< first log sequence number not yet - written to some log group; for this to - be advanced, it is enough that the - write i/o has been completed for all - log groups. - Note that since InnoDB currently - has only one log group therefore - this value is redundant. Also it - is possible that this value - falls behind the - flushed_to_disk_lsn transiently. - It is appropriate to use either - flushed_to_disk_lsn or - write_lsn which are always - up-to-date and accurate. */ - lsn_t write_lsn; /*!< end lsn for the current running - write */ - ulint write_end_offset;/*!< the data in buffer has - been written up to this offset - when the current write ends: - this field will then be copied - to buf_next_to_write */ - lsn_t current_flush_lsn;/*!< end lsn for the current running - write + flush operation */ - lsn_t flushed_to_disk_lsn; - /*!< how far we have written the log - AND flushed to disk */ - ulint n_pending_writes;/*!< number of currently - pending flushes or writes */ - /* NOTE on the 'flush' in names of the fields below: starting from - 4.0.14, we separate the write of the log file and the actual fsync() - or other method to flush it to disk. The names below should really - be 'flush_or_write'! */ - os_event_t no_flush_event; /*!< this event is in the reset state - when a flush or a write is running; - os_event_set() and os_event_reset() - are protected by log_sys_t::mutex */ - ibool one_flushed; /*!< during a flush, this is - first FALSE and becomes TRUE - when one log group has been - written or flushed */ - os_event_t one_flushed_event;/*!< this event is reset when the - flush or write has not yet completed - for any log group; e.g., this means - that a transaction has been committed - when this is set; - os_event_set() and os_event_reset() - are protected by log_sys_t::mutex */ - ulint n_log_ios; /*!< number of log i/os initiated thus - far */ - ulint n_log_ios_old; /*!< number of log i/o's at the - previous printout */ - time_t last_printout_time;/*!< when log_print was last time - called */ - /* @} */ - - /** Fields involved in checkpoints @{ */ - lsn_t log_group_capacity; /*!< capacity of the log group; if - the checkpoint age exceeds this, it is - a serious error because it is possible - we will then overwrite log and spoil - crash recovery */ - lsn_t max_modified_age_async; - /*!< when this recommended - value for lsn - - buf_pool_get_oldest_modification() - is exceeded, we start an - asynchronous preflush of pool pages */ - lsn_t max_modified_age_sync; - /*!< when this recommended - value for lsn - - buf_pool_get_oldest_modification() - is exceeded, we start a - synchronous preflush of pool pages */ - lsn_t max_checkpoint_age_async; - /*!< when this checkpoint age - is exceeded we start an - asynchronous writing of a new - checkpoint */ - lsn_t max_checkpoint_age; - /*!< this is the maximum allowed value - for lsn - last_checkpoint_lsn when a - new query step is started */ - ib_uint64_t next_checkpoint_no; - /*!< next checkpoint number */ - lsn_t last_checkpoint_lsn; - /*!< latest checkpoint lsn */ - lsn_t next_checkpoint_lsn; - /*!< next checkpoint lsn */ - ulint n_pending_checkpoint_writes; - /*!< number of currently pending - checkpoint writes */ - rw_lock_t checkpoint_lock;/*!< this latch is x-locked when a - checkpoint write is running; a thread - should wait for this without owning - the log mutex */ -#endif /* !UNIV_HOTBACKUP */ - byte* checkpoint_buf_ptr;/* unaligned checkpoint header */ - byte* checkpoint_buf; /*!< checkpoint header is read to this - buffer */ - /* @} */ -#ifdef UNIV_LOG_ARCHIVE - /** Fields involved in archiving @{ */ - ulint archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING - LOG_ARCH_STOPPED, LOG_ARCH_OFF */ - lsn_t archived_lsn; /*!< archiving has advanced to this - lsn */ - lsn_t max_archived_lsn_age_async; - /*!< recommended maximum age of - archived_lsn, before we start - asynchronous copying to the archive */ - lsn_t max_archived_lsn_age; - /*!< maximum allowed age for - archived_lsn */ - lsn_t next_archived_lsn;/*!< during an archive write, - until the write is completed, we - store the next value for - archived_lsn here: the write - completion function then sets the new - value to archived_lsn */ - ulint archiving_phase;/*!< LOG_ARCHIVE_READ or - LOG_ARCHIVE_WRITE */ - ulint n_pending_archive_ios; - /*!< number of currently pending reads - or writes in archiving */ - rw_lock_t archive_lock; /*!< this latch is x-locked when an - archive write is running; a thread - should wait for this without owning - the log mutex */ - ulint archive_buf_size;/*!< size of archive_buf */ - byte* archive_buf_ptr;/*!< unaligned archived_buf */ - byte* archive_buf; /*!< log segment is written to the - archive from this buffer */ - os_event_t archiving_on; /*!< if archiving has been stopped; - os_event_set() and os_event_reset() - are protected by log_sys_t::mutex */ - /* @} */ -#endif /* UNIV_LOG_ARCHIVE */ - lsn_t tracked_lsn; /*!< log tracking has advanced to this - lsn. Field accessed atomically where - 64-bit atomic ops are supported, - protected by the log sys mutex - otherwise. */ -}; - -/** Test if flush order mutex is owned. */ -#define log_flush_order_mutex_own() \ - mutex_own(&log_sys->log_flush_order_mutex) - -/** Acquire the flush order mutex. */ -#define log_flush_order_mutex_enter() do { \ - mutex_enter(&log_sys->log_flush_order_mutex); \ -} while (0) -/** Release the flush order mutex. */ -# define log_flush_order_mutex_exit() do { \ - mutex_exit(&log_sys->log_flush_order_mutex); \ -} while (0) - -#ifdef UNIV_LOG_ARCHIVE -/** Archiving state @{ */ -#define LOG_ARCH_ON 71 -#define LOG_ARCH_STOPPING 72 -#define LOG_ARCH_STOPPING2 73 -#define LOG_ARCH_STOPPED 74 -#define LOG_ARCH_OFF 75 -/* @} */ -#endif /* UNIV_LOG_ARCHIVE */ - -/* log scrubbing speed, in bytes/sec */ -extern ulonglong innodb_scrub_log_speed; - -#ifndef UNIV_NONINL -#include "log0log.ic" -#endif - -#endif diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic deleted file mode 100644 index 70458fa546b..00000000000 --- a/storage/xtradb/include/log0log.ic +++ /dev/null @@ -1,567 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/log0log.ic -Database log - -Created 12/9/1995 Heikki Tuuri -*******************************************************/ - -#include "os0file.h" -#include "mach0data.h" -#include "mtr0mtr.h" -#include "srv0mon.h" -#include "srv0srv.h" -#include "ut0crc32.h" - -#ifdef UNIV_LOG_DEBUG -/******************************************************//** -Checks by parsing that the catenated log segment for a single mtr is -consistent. */ -UNIV_INTERN -ibool -log_check_log_recs( -/*===============*/ - const byte* buf, /*!< in: pointer to the start of - the log segment in the - log_sys->buf log buffer */ - ulint len, /*!< in: segment length in bytes */ - ib_uint64_t buf_start_lsn); /*!< in: buffer start lsn */ -#endif /* UNIV_LOG_DEBUG */ - -/************************************************************//** -Gets a log block flush bit. -@return TRUE if this block was the first to be written in a log flush */ -UNIV_INLINE -ibool -log_block_get_flush_bit( -/*====================*/ - const byte* log_block) /*!< in: log block */ -{ - if (LOG_BLOCK_FLUSH_BIT_MASK - & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************//** -Sets the log block flush bit. */ -UNIV_INLINE -void -log_block_set_flush_bit( -/*====================*/ - byte* log_block, /*!< in/out: log block */ - ibool val) /*!< in: value to set */ -{ - ulint field; - - field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO); - - if (val) { - field = field | LOG_BLOCK_FLUSH_BIT_MASK; - } else { - field = field & ~LOG_BLOCK_FLUSH_BIT_MASK; - } - - mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field); -} - -/************************************************************//** -Gets a log block number stored in the header. -@return log block number stored in the block header */ -UNIV_INLINE -ulint -log_block_get_hdr_no( -/*=================*/ - const byte* log_block) /*!< in: log block */ -{ - return(~LOG_BLOCK_FLUSH_BIT_MASK - & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)); -} - -/************************************************************//** -Sets the log block number stored in the header; NOTE that this must be set -before the flush bit! */ -UNIV_INLINE -void -log_block_set_hdr_no( -/*=================*/ - byte* log_block, /*!< in/out: log block */ - ulint n) /*!< in: log block number: must be > 0 and - < LOG_BLOCK_FLUSH_BIT_MASK */ -{ - ut_ad(n > 0); - ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK); - - mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n); -} - -/************************************************************//** -Gets a log block data length. -@return log block data length measured as a byte offset from the block start */ -UNIV_INLINE -ulint -log_block_get_data_len( -/*===================*/ - const byte* log_block) /*!< in: log block */ -{ - return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN)); -} - -/************************************************************//** -Sets the log block data length. */ -UNIV_INLINE -void -log_block_set_data_len( -/*===================*/ - byte* log_block, /*!< in/out: log block */ - ulint len) /*!< in: data length */ -{ - mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len); -} - -/************************************************************//** -Gets a log block first mtr log record group offset. -@return first mtr log record group byte offset from the block start, 0 -if none */ -UNIV_INLINE -ulint -log_block_get_first_rec_group( -/*==========================*/ - const byte* log_block) /*!< in: log block */ -{ - return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP)); -} - -/************************************************************//** -Sets the log block first mtr log record group offset. */ -UNIV_INLINE -void -log_block_set_first_rec_group( -/*==========================*/ - byte* log_block, /*!< in/out: log block */ - ulint offset) /*!< in: offset, 0 if none */ -{ - mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset); -} - -/************************************************************//** -Gets a log block checkpoint number field (4 lowest bytes). -@return checkpoint no (4 lowest bytes) */ -UNIV_INLINE -ulint -log_block_get_checkpoint_no( -/*========================*/ - const byte* log_block) /*!< in: log block */ -{ - return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO)); -} - -/************************************************************//** -Sets a log block checkpoint number field (4 lowest bytes). */ -UNIV_INLINE -void -log_block_set_checkpoint_no( -/*========================*/ - byte* log_block, /*!< in/out: log block */ - ib_uint64_t no) /*!< in: checkpoint no */ -{ - mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO, (ulint) no); -} - -/************************************************************//** -Converts a lsn to a log block number. -@return log block number, it is > 0 and <= 1G */ -UNIV_INLINE -ulint -log_block_convert_lsn_to_no( -/*========================*/ - lsn_t lsn) /*!< in: lsn of a byte within the block */ -{ - return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1); -} - -/************************************************************//** -Calculates the checksum for a log block using the current algorithm. -@return checksum */ -UNIV_INLINE -ulint -log_block_calc_checksum( -/*====================*/ - const byte* block) /*!< in: log block */ -{ - return(log_checksum_algorithm_ptr(block)); -} -/************************************************************//** -Calculates the checksum for a log block using the default InnoDB algorithm. -@return checksum */ -UNIV_INLINE -ulint -log_block_calc_checksum_innodb( -/*===========================*/ - const byte* block) /*!< in: log block */ -{ - ulint sum; - ulint sh; - ulint i; - - sum = 1; - sh = 0; - - for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) { - ulint b = (ulint) block[i]; - sum &= 0x7FFFFFFFUL; - sum += b; - sum += b << sh; - sh++; - if (sh > 24) { - sh = 0; - } - } - - return(sum); -} - -/************************************************************//** -Calculates the checksum for a log block using the CRC32 algorithm. -@return checksum */ -UNIV_INLINE -ulint -log_block_calc_checksum_crc32( -/*==========================*/ - const byte* block) /*!< in: log block */ -{ - return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE)); -} - -/************************************************************//** -Calculates the checksum for a log block using the "no-op" algorithm. -@return checksum */ -UNIV_INLINE -ulint -log_block_calc_checksum_none( -/*=========================*/ - const byte* block) /*!< in: log block */ -{ - return(LOG_NO_CHECKSUM_MAGIC); -} - -/************************************************************//** -Gets a log block checksum field value. -@return checksum */ -UNIV_INLINE -ulint -log_block_get_checksum( -/*===================*/ - const byte* log_block) /*!< in: log block */ -{ - return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE - - LOG_BLOCK_CHECKSUM)); -} - -/************************************************************//** -Sets a log block checksum field value. */ -UNIV_INLINE -void -log_block_set_checksum( -/*===================*/ - byte* log_block, /*!< in/out: log block */ - ulint checksum) /*!< in: checksum */ -{ - mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE - - LOG_BLOCK_CHECKSUM, - checksum); -} - -/************************************************************//** -Initializes a log block in the log buffer. */ -UNIV_INLINE -void -log_block_init( -/*===========*/ - byte* log_block, /*!< in: pointer to the log buffer */ - lsn_t lsn) /*!< in: lsn within the log block */ -{ - ulint no; - - ut_ad(mutex_own(&(log_sys->mutex))); - - no = log_block_convert_lsn_to_no(lsn); - - log_block_set_hdr_no(log_block, no); - - log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE); - log_block_set_first_rec_group(log_block, 0); -} - -/************************************************************//** -Initializes a log block in the log buffer in the old format, where there -was no checksum yet. */ -UNIV_INLINE -void -log_block_init_in_old_format( -/*=========================*/ - byte* log_block, /*!< in: pointer to the log buffer */ - lsn_t lsn) /*!< in: lsn within the log block */ -{ - ulint no; - - ut_ad(mutex_own(&(log_sys->mutex))); - - no = log_block_convert_lsn_to_no(lsn); - - log_block_set_hdr_no(log_block, no); - mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE - - LOG_BLOCK_CHECKSUM, no); - log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE); - log_block_set_first_rec_group(log_block, 0); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Writes to the log the string given. The log must be released with -log_release. -@return end lsn of the log record, zero if did not succeed */ -UNIV_INLINE -lsn_t -log_reserve_and_write_fast( -/*=======================*/ - const void* str, /*!< in: string */ - ulint len, /*!< in: string length */ - lsn_t* start_lsn)/*!< out: start lsn of the log record */ -{ - ulint data_len; -#ifdef UNIV_LOG_LSN_DEBUG - /* length of the LSN pseudo-record */ - ulint lsn_len; -#endif /* UNIV_LOG_LSN_DEBUG */ - - mutex_enter(&log_sys->mutex); -#ifdef UNIV_LOG_LSN_DEBUG - lsn_len = 1 - + mach_get_compressed_size(log_sys->lsn >> 32) - + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL); -#endif /* UNIV_LOG_LSN_DEBUG */ - - data_len = len -#ifdef UNIV_LOG_LSN_DEBUG - + lsn_len -#endif /* UNIV_LOG_LSN_DEBUG */ - + log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE; - - if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - - /* The string does not fit within the current log block or the - log block would become full. Do not release the log mutex, - because it has to be reacquired immediately for the "slow" write - procedure via log_write_low(). */ - - return(0); - } - - *start_lsn = log_sys->lsn; - -#ifdef UNIV_LOG_LSN_DEBUG - { - /* Write the LSN pseudo-record. */ - byte* b = &log_sys->buf[log_sys->buf_free]; - *b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str); - /* Write the LSN in two parts, - as a pseudo page number and space id. */ - b += mach_write_compressed(b, log_sys->lsn >> 32); - b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL); - ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]); - - memcpy(b, str, len); - len += lsn_len; - } -#else /* UNIV_LOG_LSN_DEBUG */ - memcpy(log_sys->buf + log_sys->buf_free, str, len); -#endif /* UNIV_LOG_LSN_DEBUG */ - - log_block_set_data_len((byte*) ut_align_down(log_sys->buf - + log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE), - data_len); -#ifdef UNIV_LOG_DEBUG - log_sys->old_buf_free = log_sys->buf_free; - log_sys->old_lsn = log_sys->lsn; -#endif - log_sys->buf_free += len; - - ut_ad(log_sys->buf_free <= log_sys->buf_size); - - log_sys->lsn += len; - - MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, - log_sys->lsn - log_sys->last_checkpoint_lsn); - -#ifdef UNIV_LOG_DEBUG - log_check_log_recs(log_sys->buf + log_sys->old_buf_free, - log_sys->buf_free - log_sys->old_buf_free, - log_sys->old_lsn); -#endif - return(log_sys->lsn); -} - -/**************************************************************************//** -Locks the log mutex and opens the log for log_write_low. The log must be closed -with log_close and released with log_release. -@return start lsn of the log record */ -UNIV_INLINE -ib_uint64_t -log_reserve_and_open( -/*=================*/ - ulint len) /*!< in: length of data to be catenated */ -{ - mutex_enter(&(log_sys->mutex)); - - return log_open(len); -} - -/***********************************************************************//** -Releases the log mutex. */ -UNIV_INLINE -void -log_release(void) -/*=============*/ -{ - mutex_exit(&(log_sys->mutex)); -} - -/************************************************************//** -Gets the current lsn. -@return current lsn */ -UNIV_INLINE -lsn_t -log_get_lsn(void) -/*=============*/ -{ - lsn_t lsn; - - mutex_enter(&(log_sys->mutex)); - - lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - return(lsn); -} - -/************************************************************//** -Gets the last lsn that is fully flushed to disk. -@return last flushed lsn */ -UNIV_INLINE -ib_uint64_t -log_get_flush_lsn(void) -/*=============*/ -{ - ib_uint64_t lsn; - - mutex_enter(&(log_sys->mutex)); - - lsn = log_sys->flushed_to_disk_lsn; - - mutex_exit(&(log_sys->mutex)); - - return(lsn); -} - -/************************************************************//** -Gets the current lsn with a trylock -@return current lsn or 0 if false*/ -UNIV_INLINE -lsn_t -log_get_lsn_nowait(void) -/*=============*/ -{ - lsn_t lsn=0; - - if (!mutex_enter_nowait(&(log_sys->mutex))) { - - lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - } - - return(lsn); -} - -/**************************************************************** -Gets the log group capacity. It is OK to read the value without -holding log_sys->mutex because it is constant. -@return log group capacity */ -UNIV_INLINE -lsn_t -log_get_capacity(void) -/*==================*/ -{ - return(log_sys->log_group_capacity); -} - -/**************************************************************** -Get log_sys::max_modified_age_async. It is OK to read the value without -holding log_sys::mutex because it is constant. -@return max_modified_age_async */ -UNIV_INLINE -lsn_t -log_get_max_modified_age_async(void) -/*================================*/ -{ - return(log_sys->max_modified_age_async); -} - -/***********************************************************************//** -Checks if there is need for a log buffer flush or a new checkpoint, and does -this if yes. Any database operation should call this when it has modified -more than about 4 pages. NOTE that this function may only be called when the -OS thread owns no synchronization objects except the dictionary mutex. */ -UNIV_INLINE -void -log_free_check(void) -/*================*/ -{ - -#ifdef UNIV_SYNC_DEBUG - ut_ad(sync_thread_levels_empty_except_dict()); -#endif /* UNIV_SYNC_DEBUG */ - - if (log_sys->check_flush_or_checkpoint) { - - log_check_margins(); - } -} -#endif /* !UNIV_HOTBACKUP */ - -/****************************************************************//** -Safely reads the log_sys->tracked_lsn value. The writer counterpart function -is log_set_tracked_lsn() in log0online.c. - -@return log_sys->tracked_lsn value. */ -UNIV_INLINE -lsn_t -log_get_tracked_lsn(void) -/*=====================*/ -{ - os_rmb; - return log_sys->tracked_lsn; -} diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h deleted file mode 100644 index 5c3e7d07fd9..00000000000 --- a/storage/xtradb/include/log0online.h +++ /dev/null @@ -1,187 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011-2012, Percona Inc. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 51 Franklin -Street, Fifth Floor, Boston, MA 02110-1301, USA - -*****************************************************************************/ - -/**************************************************//** -@file include/log0online.h -Online database log parsing for changed page tracking -*******************************************************/ - -#ifndef log0online_h -#define log0online_h - -#include "univ.i" -#include "os0file.h" -#include "log0log.h" - -/** Single bitmap file information */ -typedef struct log_online_bitmap_file_struct log_online_bitmap_file_t; - -/** A set of bitmap files containing some LSN range */ -typedef struct log_online_bitmap_file_range_struct -log_online_bitmap_file_range_t; - -/** An iterator over changed page info */ -typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t; - -/** Initialize the constant part of the log tracking subsystem */ -UNIV_INTERN -void -log_online_init(void); - -/** Initialize the dynamic part of the log tracking subsystem */ -UNIV_INTERN -void -log_online_read_init(void); - -/** Shut down the dynamic part of the log tracking subsystem */ -UNIV_INTERN -void -log_online_read_shutdown(void); - -/** Shut down the constant part of the log tracking subsystem */ -UNIV_INTERN -void -log_online_shutdown(void); - -/*********************************************************************//** -Reads and parses the redo log up to last checkpoint LSN to build the changed -page bitmap which is then written to disk. - -@return TRUE if log tracking succeeded, FALSE if bitmap write I/O error */ -UNIV_INTERN -ibool -log_online_follow_redo_log(void); -/*=============================*/ - -/************************************************************//** -Delete all the bitmap files for data less than the specified LSN. -If called with lsn == 0 (i.e. set by RESET request) or -IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise -continue it. - -@return FALSE to indicate success, TRUE for failure. */ -UNIV_INTERN -ibool -log_online_purge_changed_page_bitmaps( -/*==================================*/ - lsn_t lsn); /*!<in: LSN to purge files up to */ - -#define LOG_BITMAP_ITERATOR_START_LSN(i) \ - ((i).start_lsn) -#define LOG_BITMAP_ITERATOR_END_LSN(i) \ - ((i).end_lsn) -#define LOG_BITMAP_ITERATOR_SPACE_ID(i) \ - ((i).space_id) -#define LOG_BITMAP_ITERATOR_PAGE_NUM(i) \ - ((i).first_page_id + (i).bit_offset) -#define LOG_BITMAP_ITERATOR_PAGE_CHANGED(i) \ - ((i).changed) - -/*********************************************************************//** -Initializes log bitmap iterator. The minimum LSN is used for finding the -correct starting file with records and it there may be records returned by -the iterator that have LSN less than start_lsn. - -@return TRUE if the iterator is initialized OK, FALSE otherwise. */ -UNIV_INTERN -ibool -log_online_bitmap_iterator_init( -/*============================*/ - log_bitmap_iterator_t *i, /*!<in/out: iterator */ - lsn_t min_lsn, /*!<in: start LSN for the - iterator */ - lsn_t max_lsn); /*!<in: end LSN for the - iterator */ - -/*********************************************************************//** -Releases log bitmap iterator. */ -UNIV_INTERN -void -log_online_bitmap_iterator_release( -/*===============================*/ - log_bitmap_iterator_t *i); /*!<in/out: iterator */ - -/*********************************************************************//** -Iterates through bits of saved bitmap blocks. -Sequentially reads blocks from bitmap file(s) and interates through -their bits. Ignores blocks with wrong checksum. -@return TRUE if iteration is successful, FALSE if all bits are iterated. */ -UNIV_INTERN -ibool -log_online_bitmap_iterator_next( -/*============================*/ - log_bitmap_iterator_t *i); /*!<in/out: iterator */ - -/** Struct for single bitmap file information */ -struct log_online_bitmap_file_struct { - char name[FN_REFLEN]; /*!< Name with full path */ - pfs_os_file_t file; /*!< Handle to opened file */ - ib_uint64_t size; /*!< Size of the file */ - os_offset_t offset; /*!< Offset of the next read, - or count of already-read bytes - */ -}; - -/** Struct for a set of bitmap files containing some LSN range */ -struct log_online_bitmap_file_range_struct { - size_t count; /*!< Number of files */ - /*!< Dynamically-allocated array of info about individual files */ - struct files_t { - char name[FN_REFLEN]; /*!< Name of a file */ - lsn_t start_lsn; /*!< Starting LSN of data in - this file */ - ulong seq_num; /*!< Sequence number of this - file */ - } *files; -}; - -/** Struct for an iterator through all bits of changed pages bitmap blocks */ -struct log_bitmap_iterator_struct -{ - lsn_t max_lsn; /*!< End LSN of the - range */ - ibool failed; /*!< Has the iteration - stopped prematurely */ - log_online_bitmap_file_range_t in_files; /*!< The bitmap files - for this iterator */ - size_t in_i; /*!< Currently read - file index in in_files - */ - log_online_bitmap_file_t in; /*!< Currently read - file */ - ib_uint32_t bit_offset; /*!< bit offset inside - the current bitmap - block */ - lsn_t start_lsn; /*!< Start LSN of the - current bitmap block */ - lsn_t end_lsn; /*!< End LSN of the - current bitmap block */ - ib_uint32_t space_id; /*!< Current block - space id */ - ib_uint32_t first_page_id; /*!< Id of the first - page in the current - block */ - ibool last_page_in_run;/*!< "Last page in - run" flag value for the - current block */ - ibool changed; /*!< true if current - page was changed */ - byte* page; /*!< Bitmap block */ -}; - -#endif diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h deleted file mode 100644 index 73d53d2ddab..00000000000 --- a/storage/xtradb/include/log0recv.h +++ /dev/null @@ -1,517 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/log0recv.h -Recovery - -Created 9/20/1997 Heikki Tuuri -*******************************************************/ - -#ifndef log0recv_h -#define log0recv_h - -#include "univ.i" -#include "ut0byte.h" -#include "buf0types.h" -#include "hash0hash.h" -#include "log0log.h" -#include <list> - -/******************************************************//** -Checks the 4-byte checksum to the trailer checksum field of a log -block. We also accept a log block in the old format before -InnoDB-3.23.52 where the checksum field contains the log block number. -@return TRUE if ok, or if the log block may be in the format of InnoDB -version predating 3.23.52 */ -UNIV_INTERN -ibool -log_block_checksum_is_ok_or_old_format( -/*===================================*/ - const byte* block, /*!< in: pointer to a log block */ - bool print_err); /*!< in print error ? */ - -/*******************************************************//** -Calculates the new value for lsn when more data is added to the log. */ -UNIV_INTERN -ib_uint64_t -recv_calc_lsn_on_data_add( -/*======================*/ - lsn_t lsn, /*!< in: old lsn */ - ib_uint64_t len); /*!< in: this many bytes of data is - added, log block headers not included */ - -#ifdef UNIV_HOTBACKUP -extern ibool recv_replay_file_ops; - -/*******************************************************************//** -Reads the checkpoint info needed in hot backup. -@return TRUE if success */ -UNIV_INTERN -ibool -recv_read_checkpoint_info_for_backup( -/*=================================*/ - const byte* hdr, /*!< in: buffer containing the log group - header */ - lsn_t* lsn, /*!< out: checkpoint lsn */ - lsn_t* offset, /*!< out: checkpoint offset in the log group */ - lsn_t* cp_no, /*!< out: checkpoint number */ - lsn_t* first_header_lsn) - /*!< out: lsn of of the start of the - first log file */ - MY_ATTRIBUTE((nonnull)); -/*******************************************************************//** -Scans the log segment and n_bytes_scanned is set to the length of valid -log scanned. */ -UNIV_INTERN -void -recv_scan_log_seg_for_backup( -/*=========================*/ - byte* buf, /*!< in: buffer containing log data */ - ulint buf_len, /*!< in: data length in that buffer */ - lsn_t* scanned_lsn, /*!< in/out: lsn of buffer start, - we return scanned lsn */ - ulint* scanned_checkpoint_no, - /*!< in/out: 4 lowest bytes of the - highest scanned checkpoint number so - far */ - ulint* n_bytes_scanned);/*!< out: how much we were able to - scan, smaller than buf_len if log - data ended here */ -#endif /* UNIV_HOTBACKUP */ -/*******************************************************************//** -Returns TRUE if recovery is currently running. -@return recv_recovery_on */ -UNIV_INLINE -ibool -recv_recovery_is_on(void); -/*=====================*/ -/************************************************************************//** -Applies the hashed log records to the page, if the page lsn is less than the -lsn of a log record. This can be called when a buffer page has just been -read in, or also for a page already in the buffer pool. */ -UNIV_INTERN -void -recv_recover_page_func( -/*===================*/ -#ifndef UNIV_HOTBACKUP - ibool just_read_in, - /*!< in: TRUE if the i/o handler calls - this for a freshly read page */ -#endif /* !UNIV_HOTBACKUP */ - buf_block_t* block); /*!< in/out: buffer block */ -#ifndef UNIV_HOTBACKUP -/** Wrapper for recv_recover_page_func(). -Applies the hashed log records to the page, if the page lsn is less than the -lsn of a log record. This can be called when a buffer page has just been -read in, or also for a page already in the buffer pool. -@param jri in: TRUE if just read in (the i/o handler calls this for -a freshly read page) -@param block in/out: the buffer block -*/ -# define recv_recover_page(jri, block) recv_recover_page_func(jri, block) -#else /* !UNIV_HOTBACKUP */ -/** Wrapper for recv_recover_page_func(). -Applies the hashed log records to the page, if the page lsn is less than the -lsn of a log record. This can be called when a buffer page has just been -read in, or also for a page already in the buffer pool. -@param jri in: TRUE if just read in (the i/o handler calls this for -a freshly read page) -@param block in/out: the buffer block -*/ -# define recv_recover_page(jri, block) recv_recover_page_func(block) -#endif /* !UNIV_HOTBACKUP */ - -/** Recovers from a checkpoint. When this function returns, the database is able -to start processing of new user transactions, but the function -recv_recovery_from_checkpoint_finish should be called later to complete -the recovery and free the resources used in it. -@param[in] type LOG_CHECKPOINT or LOG_ARCHIVE -@param[in] limit_lsn recover up to this lsn if possible -@param[in] flushed_lsn flushed lsn from first data file -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -recv_recovery_from_checkpoint_start_func( -#ifdef UNIV_LOG_ARCHIVE - ulint type, - lsn_t limit_lsn, -#endif /* UNIV_LOG_ARCHIVE */ - lsn_t flushed_lsn) - MY_ATTRIBUTE((warn_unused_result)); - -#ifdef UNIV_LOG_ARCHIVE -/** Wrapper for recv_recovery_from_checkpoint_start_func(). -Recovers from a checkpoint. When this function returns, the database is able -to start processing of new user transactions, but the function -recv_recovery_from_checkpoint_finish should be called later to complete -the recovery and free the resources used in it. -@param type in: LOG_CHECKPOINT or LOG_ARCHIVE -@param lim in: recover up to this log sequence number if possible -@param lsn in: flushed log sequence number from first data file -@return error code or DB_SUCCESS */ -# define recv_recovery_from_checkpoint_start(type,lim,lsn) \ - recv_recovery_from_checkpoint_start_func(type,lim,lsn) -#else /* UNIV_LOG_ARCHIVE */ -/** Wrapper for recv_recovery_from_checkpoint_start_func(). -Recovers from a checkpoint. When this function returns, the database is able -to start processing of new user transactions, but the function -recv_recovery_from_checkpoint_finish should be called later to complete -the recovery and free the resources used in it. -@param type ignored: LOG_CHECKPOINT or LOG_ARCHIVE -@param lim ignored: recover up to this log sequence number if possible -@param lsn in: flushed log sequence number from first data file -@return error code or DB_SUCCESS */ -# define recv_recovery_from_checkpoint_start(type,lim,lsn) \ - recv_recovery_from_checkpoint_start_func(lsn) -#endif /* UNIV_LOG_ARCHIVE */ - -/********************************************************//** -Completes recovery from a checkpoint. */ -UNIV_INTERN -void -recv_recovery_from_checkpoint_finish(void); -/*======================================*/ -/********************************************************//** -Initiates the rollback of active transactions. */ -UNIV_INTERN -void -recv_recovery_rollback_active(void); -/*===============================*/ - -/*******************************************************************//** -Tries to parse a single log record and returns its length. -@return length of the record, or 0 if the record was not complete */ -UNIV_INTERN -ulint -recv_parse_log_rec( -/*===============*/ - byte* ptr, /*!< in: pointer to a buffer */ - byte* end_ptr,/*!< in: pointer to the buffer end */ - byte* type, /*!< out: type */ - ulint* space, /*!< out: space id */ - ulint* page_no,/*!< out: page number */ - byte** body); /*!< out: log record body start */ - -/*******************************************************//** -Scans log from a buffer and stores new log data to the parsing buffer. -Parses and hashes the log records if new data found. Unless -UNIV_HOTBACKUP is defined, this function will apply log records -automatically when the hash table becomes full. -@return TRUE if limit_lsn has been reached, or not able to scan any -more in this log group */ -UNIV_INTERN -ibool -recv_scan_log_recs( -/*===============*/ - ulint available_memory,/*!< in: we let the hash table of recs - to grow to this size, at the maximum */ - ibool store_to_hash, /*!< in: TRUE if the records should be - stored to the hash table; this is set - to FALSE if just debug checking is - needed */ - const byte* buf, /*!< in: buffer containing a log - segment or garbage */ - ulint len, /*!< in: buffer length */ - lsn_t start_lsn, /*!< in: buffer start lsn */ - lsn_t* contiguous_lsn, /*!< in/out: it is known that all log - groups contain contiguous log data up - to this lsn */ - lsn_t* group_scanned_lsn);/*!< out: scanning succeeded up to - this lsn */ -/******************************************************//** -Resets the logs. The contents of log files will be lost! */ -UNIV_INTERN -void -recv_reset_logs( -/*============*/ -#ifdef UNIV_LOG_ARCHIVE - ulint arch_log_no, /*!< in: next archived log file number */ - ibool new_logs_created,/*!< in: TRUE if resetting logs - is done at the log creation; - FALSE if it is done after - archive recovery */ -#endif /* UNIV_LOG_ARCHIVE */ - lsn_t lsn); /*!< in: reset to this lsn - rounded up to be divisible by - OS_FILE_LOG_BLOCK_SIZE, after - which we add - LOG_BLOCK_HDR_SIZE */ -#ifdef UNIV_HOTBACKUP -/******************************************************//** -Creates new log files after a backup has been restored. */ -UNIV_INTERN -void -recv_reset_log_files_for_backup( -/*============================*/ - const char* log_dir, /*!< in: log file directory path */ - ulint n_log_files, /*!< in: number of log files */ - lsn_t log_file_size, /*!< in: log file size */ - lsn_t lsn); /*!< in: new start lsn, must be - divisible by OS_FILE_LOG_BLOCK_SIZE */ -#endif /* UNIV_HOTBACKUP */ -/********************************************************//** -Creates the recovery system. */ -UNIV_INTERN -void -recv_sys_create(void); -/*=================*/ -/**********************************************************//** -Release recovery system mutexes. */ -UNIV_INTERN -void -recv_sys_close(void); -/*================*/ -/********************************************************//** -Frees the recovery system memory. */ -UNIV_INTERN -void -recv_sys_mem_free(void); -/*===================*/ -/********************************************************//** -Inits the recovery system for a recovery operation. */ -UNIV_INTERN -void -recv_sys_init( -/*==========*/ - ulint available_memory); /*!< in: available memory in bytes */ -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Reset the state of the recovery system variables. */ -UNIV_INTERN -void -recv_sys_var_init(void); -/*===================*/ -#endif /* !UNIV_HOTBACKUP */ -/** Apply the hash table of stored log records to persistent data pages. -@param[in] last_batch whether the change buffer merge will be - performed as part of the operation */ -UNIV_INTERN -void -recv_apply_hashed_log_recs(bool last_batch); -#ifdef UNIV_HOTBACKUP -/*******************************************************************//** -Applies log records in the hash table to a backup. */ -UNIV_INTERN -void -recv_apply_log_recs_for_backup(void); -/*================================*/ -#endif - -/** Block of log record data */ -struct recv_data_t{ - recv_data_t* next; /*!< pointer to the next block or NULL */ - /*!< the log record data is stored physically - immediately after this struct, max amount - RECV_DATA_BLOCK_SIZE bytes of it */ -}; - -/** Stored log record struct */ -struct recv_t{ - byte type; /*!< log record type */ - ulint len; /*!< log record body length in bytes */ - recv_data_t* data; /*!< chain of blocks containing the log record - body */ - lsn_t start_lsn;/*!< start lsn of the log segment written by - the mtr which generated this log record: NOTE - that this is not necessarily the start lsn of - this log record */ - lsn_t end_lsn;/*!< end lsn of the log segment written by - the mtr which generated this log record: NOTE - that this is not necessarily the end lsn of - this log record */ - UT_LIST_NODE_T(recv_t) - rec_list;/*!< list of log records for this page */ -}; - -/** States of recv_addr_t */ -enum recv_addr_state { - /** not yet processed */ - RECV_NOT_PROCESSED, - /** page is being read */ - RECV_BEING_READ, - /** log records are being applied on the page */ - RECV_BEING_PROCESSED, - /** log records have been applied on the page, or they have - been discarded because the tablespace does not exist */ - RECV_PROCESSED -}; - -/** Hashed page file address struct */ -struct recv_addr_t{ - enum recv_addr_state state; - /*!< recovery state of the page */ - unsigned space:32;/*!< space id */ - unsigned page_no:32;/*!< page number */ - UT_LIST_BASE_NODE_T(recv_t) - rec_list;/*!< list of log records for this page */ - hash_node_t addr_hash;/*!< hash node in the hash bucket chain */ -}; - -struct recv_dblwr_t { - void add(byte* page); - - byte* find_page(ulint space_id, ulint page_no); - - std::list<byte *> pages; /* Pages from double write buffer */ - - void operator() () { - pages.clear(); - } -}; - -/** Recovery system data structure */ -struct recv_sys_t{ -#ifndef UNIV_HOTBACKUP - ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs, - n_addrs, and the state field in each recv_addr - struct */ - ib_mutex_t writer_mutex;/*!< mutex coordinating - flushing between recv_writer_thread and - the recovery thread. */ -#endif /* !UNIV_HOTBACKUP */ - ibool apply_log_recs; - /*!< this is TRUE when log rec application to - pages is allowed; this flag tells the - i/o-handler if it should do log record - application */ - ibool apply_batch_on; - /*!< this is TRUE when a log rec application - batch is running */ - lsn_t lsn; /*!< log sequence number */ - ulint last_log_buf_size; - /*!< size of the log buffer when the database - last time wrote to the log */ - byte* last_block; - /*!< possible incomplete last recovered log - block */ - byte* last_block_buf_start; - /*!< the nonaligned start address of the - preceding buffer */ - byte* buf; /*!< buffer for parsing log records */ - ulint len; /*!< amount of data in buf */ - lsn_t parse_start_lsn; - /*!< this is the lsn from which we were able to - start parsing log records and adding them to - the hash table; zero if a suitable - start point not found yet */ - lsn_t scanned_lsn; - /*!< the log data has been scanned up to this - lsn */ - ulint scanned_checkpoint_no; - /*!< the log data has been scanned up to this - checkpoint number (lowest 4 bytes) */ - ulint recovered_offset; - /*!< start offset of non-parsed log records in - buf */ - lsn_t recovered_lsn; - /*!< the log records have been parsed up to - this lsn */ - lsn_t limit_lsn;/*!< recovery should be made at most - up to this lsn */ - ibool found_corrupt_log; - /*!< this is set to TRUE if we during log - scan find a corrupt log block, or a corrupt - log record, or there is a log parsing - buffer overflow */ - /** the time when progress was last reported */ - ib_time_t progress_time; -#ifdef UNIV_LOG_ARCHIVE - log_group_t* archive_group; - /*!< in archive recovery: the log group whose - archive is read */ -#endif /* !UNIV_LOG_ARCHIVE */ - mem_heap_t* heap; /*!< memory heap of log records and file - addresses*/ - hash_table_t* addr_hash;/*!< hash table of file addresses of pages */ - ulint n_addrs;/*!< number of not processed hashed file - addresses in the hash table */ - - recv_dblwr_t dblwr; - - /** Determine whether redo log recovery progress should be reported. - @param[in] time the current time - @return whether progress should be reported - (the last report was at least 15 seconds ago) */ - bool report(ib_time_t time) - { - if (time - progress_time < 15) { - return false; - } - - progress_time = time; - return true; - } -}; - -/** The recovery system */ -extern recv_sys_t* recv_sys; - -/** TRUE when applying redo log records during crash recovery; FALSE -otherwise. Note that this is FALSE while a background thread is -rolling back incomplete transactions. */ -extern ibool recv_recovery_on; -/** If the following is TRUE, the buffer pool file pages must be invalidated -after recovery and no ibuf operations are allowed; this becomes TRUE if -the log record hash table becomes too full, and log records must be merged -to file pages already before the recovery is finished: in this case no -ibuf operations are allowed, as they could modify the pages read in the -buffer pool before the pages have been recovered to the up-to-date state. - -TRUE means that recovery is running and no operations on the log files -are allowed yet: the variable name is misleading. */ -extern ibool recv_no_ibuf_operations; -/** TRUE when recv_init_crash_recovery() has been called. */ -extern ibool recv_needed_recovery; -#ifdef UNIV_DEBUG -/** TRUE if writing to the redo log (mtr_commit) is forbidden. -Protected by log_sys->mutex. */ -extern ibool recv_no_log_write; -#endif /* UNIV_DEBUG */ - -/** TRUE if buf_page_is_corrupted() should check if the log sequence -number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by -recv_recovery_from_checkpoint_start_func(). */ -extern ibool recv_lsn_checks_on; -#ifdef UNIV_HOTBACKUP -/** TRUE when the redo log is being backed up */ -extern ibool recv_is_making_a_backup; -#endif /* UNIV_HOTBACKUP */ -/** Maximum page number encountered in the redo log */ -extern ulint recv_max_parsed_page_no; - -/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many -times! */ -#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024) - -/** Size of block reads when the log groups are scanned forward to do a -roll-forward */ -#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE) - -/** This many frames must be left free in the buffer pool when we scan -the log and store the scanned log records in the buffer pool: we will -use these free frames to read in pages when we start applying the -log records to the database. */ -extern ulint recv_n_pool_free_frames; - -#ifndef UNIV_NONINL -#include "log0recv.ic" -#endif - -#endif diff --git a/storage/xtradb/include/log0recv.ic b/storage/xtradb/include/log0recv.ic deleted file mode 100644 index b29272f4672..00000000000 --- a/storage/xtradb/include/log0recv.ic +++ /dev/null @@ -1,37 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/log0recv.ic -Recovery - -Created 9/20/1997 Heikki Tuuri -*******************************************************/ - -#include "univ.i" - -/*******************************************************************//** -Returns TRUE if recovery is currently running. -@return recv_recovery_on */ -UNIV_INLINE -ibool -recv_recovery_is_on(void) -/*=====================*/ -{ - return(recv_recovery_on); -} diff --git a/storage/xtradb/include/mach0data.h b/storage/xtradb/include/mach0data.h deleted file mode 100644 index 2e16634a6c2..00000000000 --- a/storage/xtradb/include/mach0data.h +++ /dev/null @@ -1,418 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/mach0data.h -Utilities for converting data from the database file -to the machine format. - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef mach0data_h -#define mach0data_h - -#ifndef UNIV_INNOCHECKSUM - -#include "univ.i" -#include "ut0byte.h" - -/* The data and all fields are always stored in a database file -in the same format: ascii, big-endian, ... . -All data in the files MUST be accessed using the functions in this -module. */ - -/*******************************************************//** -The following function is used to store data in one byte. */ -UNIV_INLINE -void -mach_write_to_1( -/*============*/ - byte* b, /*!< in: pointer to byte where to store */ - ulint n); /*!< in: ulint integer to be stored, >= 0, < 256 */ -/********************************************************//** -The following function is used to fetch data from one byte. -@return ulint integer, >= 0, < 256 */ -UNIV_INLINE -ulint -mach_read_from_1( -/*=============*/ - const byte* b) /*!< in: pointer to byte */ - MY_ATTRIBUTE((warn_unused_result)); -/*******************************************************//** -The following function is used to store data in two consecutive -bytes. We store the most significant byte to the lower address. */ -UNIV_INLINE -void -mach_write_to_2( -/*============*/ - byte* b, /*!< in: pointer to two bytes where to store */ - ulint n); /*!< in: ulint integer to be stored, >= 0, < 64k */ -/********************************************************//** -The following function is used to fetch data from two consecutive -bytes. The most significant byte is at the lowest address. -@return ulint integer, >= 0, < 64k */ -UNIV_INLINE -ulint -mach_read_from_2( -/*=============*/ - const byte* b) /*!< in: pointer to two bytes */ - MY_ATTRIBUTE((nonnull, pure)); - -/********************************************************//** -The following function is used to convert a 16-bit data item -to the canonical format, for fast bytewise equality test -against memory. -@return 16-bit integer in canonical format */ -UNIV_INLINE -uint16 -mach_encode_2( -/*==========*/ - ulint n) /*!< in: integer in machine-dependent format */ - MY_ATTRIBUTE((const)); -/********************************************************//** -The following function is used to convert a 16-bit data item -from the canonical format, for fast bytewise equality test -against memory. -@return integer in machine-dependent format */ -UNIV_INLINE -ulint -mach_decode_2( -/*==========*/ - uint16 n) /*!< in: 16-bit integer in canonical format */ - MY_ATTRIBUTE((const)); -/*******************************************************//** -The following function is used to store data in 3 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_3( -/*============*/ - byte* b, /*!< in: pointer to 3 bytes where to store */ - ulint n); /*!< in: ulint integer to be stored */ -/********************************************************//** -The following function is used to fetch data from 3 consecutive -bytes. The most significant byte is at the lowest address. -@return ulint integer */ -UNIV_INLINE -ulint -mach_read_from_3( -/*=============*/ - const byte* b) /*!< in: pointer to 3 bytes */ - MY_ATTRIBUTE((warn_unused_result)); -/*******************************************************//** -The following function is used to store data in four consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_4( -/*============*/ - byte* b, /*!< in: pointer to four bytes where to store */ - ulint n); /*!< in: ulint integer to be stored */ -/********************************************************//** -The following function is used to fetch data from 4 consecutive -bytes. The most significant byte is at the lowest address. -@return ulint integer */ -UNIV_INLINE -ulint -mach_read_from_4( -/*=============*/ - const byte* b) /*!< in: pointer to four bytes */ - MY_ATTRIBUTE((warn_unused_result)); -/*********************************************************//** -Writes a ulint in a compressed form (1..5 bytes). -@return stored size in bytes */ -UNIV_INLINE -ulint -mach_write_compressed( -/*==================*/ - byte* b, /*!< in: pointer to memory where to store */ - ulint n); /*!< in: ulint integer to be stored */ -/*********************************************************//** -Returns the size of an ulint when written in the compressed form. -@return compressed size in bytes */ -UNIV_INLINE -ulint -mach_get_compressed_size( -/*=====================*/ - ulint n) /*!< in: ulint integer to be stored */ - MY_ATTRIBUTE((const)); -/*********************************************************//** -Reads a ulint in a compressed form. -@return read integer */ -UNIV_INLINE -ulint -mach_read_compressed( -/*=================*/ - const byte* b) /*!< in: pointer to memory from where to read */ - MY_ATTRIBUTE((warn_unused_result)); -/*******************************************************//** -The following function is used to store data in 6 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_6( -/*============*/ - byte* b, /*!< in: pointer to 6 bytes where to store */ - ib_uint64_t id); /*!< in: 48-bit integer */ -/********************************************************//** -The following function is used to fetch data from 6 consecutive -bytes. The most significant byte is at the lowest address. -@return 48-bit integer */ -UNIV_INLINE -ib_uint64_t -mach_read_from_6( -/*=============*/ - const byte* b) /*!< in: pointer to 6 bytes */ - MY_ATTRIBUTE((warn_unused_result)); -/*******************************************************//** -The following function is used to store data in 7 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_7( -/*============*/ - byte* b, /*!< in: pointer to 7 bytes where to store */ - ib_uint64_t n); /*!< in: 56-bit integer */ -/********************************************************//** -The following function is used to fetch data from 7 consecutive -bytes. The most significant byte is at the lowest address. -@return 56-bit integer */ -UNIV_INLINE -ib_uint64_t -mach_read_from_7( -/*=============*/ - const byte* b) /*!< in: pointer to 7 bytes */ - MY_ATTRIBUTE((warn_unused_result)); -/*******************************************************//** -The following function is used to store data in 8 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_8( -/*============*/ - void* b, /*!< in: pointer to 8 bytes where to store */ - ib_uint64_t n); /*!< in: 64-bit integer to be stored */ -/********************************************************//** -The following function is used to fetch data from 8 consecutive -bytes. The most significant byte is at the lowest address. -@return 64-bit integer */ -UNIV_INLINE -ib_uint64_t -mach_read_from_8( -/*=============*/ - const byte* b) /*!< in: pointer to 8 bytes */ - MY_ATTRIBUTE((nonnull, pure)); -/*********************************************************//** -Writes a 64-bit integer in a compressed form (5..9 bytes). -@return size in bytes */ -UNIV_INLINE -ulint -mach_ull_write_compressed( -/*======================*/ - byte* b, /*!< in: pointer to memory where to store */ - ib_uint64_t n); /*!< in: 64-bit integer to be stored */ -/*********************************************************//** -Returns the size of a 64-bit integer when written in the compressed form. -@return compressed size in bytes */ -UNIV_INLINE -ulint -mach_ull_get_compressed_size( -/*=========================*/ - ib_uint64_t n); /*!< in: 64-bit integer to be stored */ -/*********************************************************//** -Reads a 64-bit integer in a compressed form. -@return the value read */ -UNIV_INLINE -ib_uint64_t -mach_ull_read_compressed( -/*=====================*/ - const byte* b) /*!< in: pointer to memory from where to read */ - MY_ATTRIBUTE((warn_unused_result)); -/*********************************************************//** -Writes a 64-bit integer in a compressed form (1..11 bytes). -@return size in bytes */ -UNIV_INLINE -ulint -mach_ull_write_much_compressed( -/*===========================*/ - byte* b, /*!< in: pointer to memory where to store */ - ib_uint64_t n); /*!< in: 64-bit integer to be stored */ -/*********************************************************//** -Returns the size of a 64-bit integer when written in the compressed form. -@return compressed size in bytes */ -UNIV_INLINE -ulint -mach_ull_get_much_compressed_size( -/*==============================*/ - ib_uint64_t n) /*!< in: 64-bit integer to be stored */ - MY_ATTRIBUTE((const)); -/*********************************************************//** -Reads a 64-bit integer in a compressed form. -@return the value read */ -UNIV_INLINE -ib_uint64_t -mach_ull_read_much_compressed( -/*==========================*/ - const byte* b) /*!< in: pointer to memory from where to read */ - MY_ATTRIBUTE((warn_unused_result)); -/*********************************************************//** -Reads a ulint in a compressed form if the log record fully contains it. -@return pointer to end of the stored field, NULL if not complete */ -UNIV_INTERN -byte* -mach_parse_compressed( -/*==================*/ - byte* ptr, /*!< in: pointer to buffer from where to read */ - byte* end_ptr,/*!< in: pointer to end of the buffer */ - ulint* val); /*!< out: read value */ -/*********************************************************//** -Reads a 64-bit integer in a compressed form -if the log record fully contains it. -@return pointer to end of the stored field, NULL if not complete */ -UNIV_INLINE -byte* -mach_ull_parse_compressed( -/*======================*/ - byte* ptr, /*!< in: pointer to buffer from where to read */ - byte* end_ptr,/*!< in: pointer to end of the buffer */ - ib_uint64_t* val); /*!< out: read value */ -#ifndef UNIV_HOTBACKUP -/*********************************************************//** -Reads a double. It is stored in a little-endian format. -@return double read */ -UNIV_INLINE -double -mach_double_read( -/*=============*/ - const byte* b) /*!< in: pointer to memory from where to read */ - MY_ATTRIBUTE((nonnull, pure)); -/*********************************************************//** -Writes a double. It is stored in a little-endian format. */ -UNIV_INLINE -void -mach_double_write( -/*==============*/ - byte* b, /*!< in: pointer to memory where to write */ - double d); /*!< in: double */ -/*********************************************************//** -Reads a float. It is stored in a little-endian format. -@return float read */ -UNIV_INLINE -float -mach_float_read( -/*============*/ - const byte* b) /*!< in: pointer to memory from where to read */ - MY_ATTRIBUTE((nonnull, pure)); -/*********************************************************//** -Writes a float. It is stored in a little-endian format. */ -UNIV_INLINE -void -mach_float_write( -/*=============*/ - byte* b, /*!< in: pointer to memory where to write */ - float d); /*!< in: float */ -/*********************************************************//** -Reads a ulint stored in the little-endian format. -@return unsigned long int */ -UNIV_INLINE -ulint -mach_read_from_n_little_endian( -/*===========================*/ - const byte* buf, /*!< in: from where to read */ - ulint buf_size) /*!< in: from how many bytes to read */ - MY_ATTRIBUTE((nonnull, pure)); -/*********************************************************//** -Writes a ulint in the little-endian format. */ -UNIV_INLINE -void -mach_write_to_n_little_endian( -/*==========================*/ - byte* dest, /*!< in: where to write */ - ulint dest_size, /*!< in: into how many bytes to write */ - ulint n); /*!< in: unsigned long int to write */ -/*********************************************************//** -Reads a ulint stored in the little-endian format. -@return unsigned long int */ -UNIV_INLINE -ulint -mach_read_from_2_little_endian( -/*===========================*/ - const byte* buf) /*!< in: from where to read */ - MY_ATTRIBUTE((nonnull, pure)); -/*********************************************************//** -Writes a ulint in the little-endian format. */ -UNIV_INLINE -void -mach_write_to_2_little_endian( -/*==========================*/ - byte* dest, /*!< in: where to write */ - ulint n); /*!< in: unsigned long int to write */ -/*********************************************************//** -Convert integral type from storage byte order (big endian) to -host byte order. -@return integer value */ -UNIV_INLINE -ib_uint64_t -mach_read_int_type( -/*===============*/ - const byte* src, /*!< in: where to read from */ - ulint len, /*!< in: length of src */ - ibool unsigned_type); /*!< in: signed or unsigned flag */ -/***********************************************************//** -Convert integral type from host byte order to (big-endian) storage -byte order. */ -UNIV_INLINE -void -mach_write_int_type( -/*================*/ - byte* dest, /*!< in: where to write*/ - const byte* src, /*!< in: where to read from */ - ulint len, /*!< in: length of src */ - bool usign); /*!< in: signed or unsigned flag */ - -/************************************************************* -Convert a ulonglong integer from host byte order to (big-endian) -storage byte order. */ -UNIV_INLINE -void -mach_write_ulonglong( -/*=================*/ - byte* dest, /*!< in: where to write */ - ulonglong src, /*!< in: where to read from */ - ulint len, /*!< in: length of dest */ - bool usign); /*!< in: signed or unsigned flag */ - -/********************************************************//** -Reads 1 - 4 bytes from a file page buffered in the buffer pool. -@return value read */ -UNIV_INLINE -ulint -mach_read_ulint( -/*============*/ - const byte* ptr, /*!< in: pointer from where to read */ - ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - -#endif /* !UNIV_HOTBACKUP */ -#endif /* !UNIV_INNOCHECKSUM */ - -#ifndef UNIV_NONINL -#include "mach0data.ic" -#endif - -#endif diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic deleted file mode 100644 index 3b1cf9c0378..00000000000 --- a/storage/xtradb/include/mach0data.ic +++ /dev/null @@ -1,869 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/mach0data.ic -Utilities for converting data from the database file -to the machine format. - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef UNIV_INNOCHECKSUM - -#include "ut0mem.h" - -/*******************************************************//** -The following function is used to store data in one byte. */ -UNIV_INLINE -void -mach_write_to_1( -/*============*/ - byte* b, /*!< in: pointer to byte where to store */ - ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */ -{ - ut_ad(b); - ut_ad((n & ~0xFFUL) == 0); - - b[0] = (byte) n; -} - -/********************************************************//** -The following function is used to fetch data from one byte. -@return ulint integer, >= 0, < 256 */ -UNIV_INLINE -ulint -mach_read_from_1( -/*=============*/ - const byte* b) /*!< in: pointer to byte */ -{ - return((ulint)(b[0])); -} - -/*******************************************************//** -The following function is used to store data in two consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_2( -/*============*/ - byte* b, /*!< in: pointer to two bytes where to store */ - ulint n) /*!< in: ulint integer to be stored */ -{ - ut_ad(b); - ut_ad((n & ~0xFFFFUL) == 0); - - b[0] = (byte)(n >> 8); - b[1] = (byte)(n); -} - -/********************************************************//** -The following function is used to convert a 16-bit data item -to the canonical format, for fast bytewise equality test -against memory. -@return 16-bit integer in canonical format */ -UNIV_INLINE -uint16 -mach_encode_2( -/*==========*/ - ulint n) /*!< in: integer in machine-dependent format */ -{ - uint16 ret; - ut_ad(2 == sizeof ret); - mach_write_to_2((byte*) &ret, n); - return(ret); -} -/********************************************************//** -The following function is used to convert a 16-bit data item -from the canonical format, for fast bytewise equality test -against memory. -@return integer in machine-dependent format */ -UNIV_INLINE -ulint -mach_decode_2( -/*==========*/ - uint16 n) /*!< in: 16-bit integer in canonical format */ -{ - ut_ad(2 == sizeof n); - return(mach_read_from_2((const byte*) &n)); -} - -/*******************************************************//** -The following function is used to store data in 3 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_3( -/*============*/ - byte* b, /*!< in: pointer to 3 bytes where to store */ - ulint n) /*!< in: ulint integer to be stored */ -{ - ut_ad(b); - ut_ad((n & ~0xFFFFFFUL) == 0); - - b[0] = (byte)(n >> 16); - b[1] = (byte)(n >> 8); - b[2] = (byte)(n); -} - -/********************************************************//** -The following function is used to fetch data from 3 consecutive -bytes. The most significant byte is at the lowest address. -@return ulint integer */ -UNIV_INLINE -ulint -mach_read_from_3( -/*=============*/ - const byte* b) /*!< in: pointer to 3 bytes */ -{ - return( ((ulint)(b[0]) << 16) - | ((ulint)(b[1]) << 8) - | (ulint)(b[2]) - ); -} - -/*******************************************************//** -The following function is used to store data in four consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_4( -/*============*/ - byte* b, /*!< in: pointer to four bytes where to store */ - ulint n) /*!< in: ulint integer to be stored */ -{ - ut_ad(b); - - b[0] = (byte)(n >> 24); - b[1] = (byte)(n >> 16); - b[2] = (byte)(n >> 8); - b[3] = (byte) n; -} - -#endif /* !UNIV_INNOCHECKSUM */ - -/********************************************************//** -The following function is used to fetch data from 2 consecutive -bytes. The most significant byte is at the lowest address. -@return ulint integer */ -UNIV_INLINE -ulint -mach_read_from_2( -/*=============*/ - const byte* b) /*!< in: pointer to 2 bytes */ -{ - return(((ulint)(b[0]) << 8) | (ulint)(b[1])); -} - -/********************************************************//** -The following function is used to fetch data from 4 consecutive -bytes. The most significant byte is at the lowest address. -@return ulint integer */ -UNIV_INLINE -ulint -mach_read_from_4( -/*=============*/ - const byte* b) /*!< in: pointer to four bytes */ -{ - return( ((ulint)(b[0]) << 24) - | ((ulint)(b[1]) << 16) - | ((ulint)(b[2]) << 8) - | (ulint)(b[3]) - ); -} - -#ifndef UNIV_INNOCHECKSUM - -/*********************************************************//** -Writes a ulint in a compressed form where the first byte codes the -length of the stored ulint. We look at the most significant bits of -the byte. If the most significant bit is zero, it means 1-byte storage, -else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0, -it means 3-byte storage, else if 4th is 0, it means 4-byte storage, -else the storage is 5-byte. -@return compressed size in bytes */ -UNIV_INLINE -ulint -mach_write_compressed( -/*==================*/ - byte* b, /*!< in: pointer to memory where to store */ - ulint n) /*!< in: ulint integer (< 2^32) to be stored */ -{ - ut_ad(b); - - if (n < 0x80UL) { - mach_write_to_1(b, n); - return(1); - } else if (n < 0x4000UL) { - mach_write_to_2(b, n | 0x8000UL); - return(2); - } else if (n < 0x200000UL) { - mach_write_to_3(b, n | 0xC00000UL); - return(3); - } else if (n < 0x10000000UL) { - mach_write_to_4(b, n | 0xE0000000UL); - return(4); - } else { - mach_write_to_1(b, 0xF0UL); - mach_write_to_4(b + 1, n); - return(5); - } -} - -/*********************************************************//** -Returns the size of a ulint when written in the compressed form. -@return compressed size in bytes */ -UNIV_INLINE -ulint -mach_get_compressed_size( -/*=====================*/ - ulint n) /*!< in: ulint integer (< 2^32) to be stored */ -{ - if (n < 0x80UL) { - return(1); - } else if (n < 0x4000UL) { - return(2); - } else if (n < 0x200000UL) { - return(3); - } else if (n < 0x10000000UL) { - return(4); - } else { - return(5); - } -} - -/*********************************************************//** -Reads a ulint in a compressed form. -@return read integer (< 2^32) */ -UNIV_INLINE -ulint -mach_read_compressed( -/*=================*/ - const byte* b) /*!< in: pointer to memory from where to read */ -{ - ulint flag; - - flag = mach_read_from_1(b); - - if (flag < 0x80UL) { - return(flag); - } else if (flag < 0xC0UL) { - return(mach_read_from_2(b) & 0x7FFFUL); - } else if (flag < 0xE0UL) { - return(mach_read_from_3(b) & 0x3FFFFFUL); - } else if (flag < 0xF0UL) { - return(mach_read_from_4(b) & 0x1FFFFFFFUL); - } else { - ut_ad(flag == 0xF0UL); - return(mach_read_from_4(b + 1)); - } -} - -/*******************************************************//** -The following function is used to store data in 8 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_8( -/*============*/ - void* b, /*!< in: pointer to 8 bytes where to store */ - ib_uint64_t n) /*!< in: 64-bit integer to be stored */ -{ - ut_ad(b); - - mach_write_to_4(static_cast<byte*>(b), (ulint) (n >> 32)); - mach_write_to_4(static_cast<byte*>(b) + 4, (ulint) n); -} - -/********************************************************//** -The following function is used to fetch data from 8 consecutive -bytes. The most significant byte is at the lowest address. -@return 64-bit integer */ -UNIV_INLINE -ib_uint64_t -mach_read_from_8( -/*=============*/ - const byte* b) /*!< in: pointer to 8 bytes */ -{ - ib_uint64_t ull; - - ull = ((ib_uint64_t) mach_read_from_4(b)) << 32; - ull |= (ib_uint64_t) mach_read_from_4(b + 4); - - return(ull); -} - -/*******************************************************//** -The following function is used to store data in 7 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_7( -/*============*/ - byte* b, /*!< in: pointer to 7 bytes where to store */ - ib_uint64_t n) /*!< in: 56-bit integer */ -{ - ut_ad(b); - - mach_write_to_3(b, (ulint) (n >> 32)); - mach_write_to_4(b + 3, (ulint) n); -} - -/********************************************************//** -The following function is used to fetch data from 7 consecutive -bytes. The most significant byte is at the lowest address. -@return 56-bit integer */ -UNIV_INLINE -ib_uint64_t -mach_read_from_7( -/*=============*/ - const byte* b) /*!< in: pointer to 7 bytes */ -{ - return(ut_ull_create(mach_read_from_3(b), mach_read_from_4(b + 3))); -} - -/*******************************************************//** -The following function is used to store data in 6 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_6( -/*============*/ - byte* b, /*!< in: pointer to 6 bytes where to store */ - ib_uint64_t n) /*!< in: 48-bit integer */ -{ - ut_ad(b); - - mach_write_to_2(b, (ulint) (n >> 32)); - mach_write_to_4(b + 2, (ulint) n); -} - -/********************************************************//** -The following function is used to fetch data from 6 consecutive -bytes. The most significant byte is at the lowest address. -@return 48-bit integer */ -UNIV_INLINE -ib_uint64_t -mach_read_from_6( -/*=============*/ - const byte* b) /*!< in: pointer to 6 bytes */ -{ - return(ut_ull_create(mach_read_from_2(b), mach_read_from_4(b + 2))); -} - -/*********************************************************//** -Writes a 64-bit integer in a compressed form (5..9 bytes). -@return size in bytes */ -UNIV_INLINE -ulint -mach_ull_write_compressed( -/*======================*/ - byte* b, /*!< in: pointer to memory where to store */ - ib_uint64_t n) /*!< in: 64-bit integer to be stored */ -{ - ulint size; - - ut_ad(b); - - size = mach_write_compressed(b, (ulint) (n >> 32)); - mach_write_to_4(b + size, (ulint) n); - - return(size + 4); -} - -/*********************************************************//** -Returns the size of a 64-bit integer when written in the compressed form. -@return compressed size in bytes */ -UNIV_INLINE -ulint -mach_ull_get_compressed_size( -/*=========================*/ - ib_uint64_t n) /*!< in: 64-bit integer to be stored */ -{ - return(4 + mach_get_compressed_size((ulint) (n >> 32))); -} - -/*********************************************************//** -Reads a 64-bit integer in a compressed form. -@return the value read */ -UNIV_INLINE -ib_uint64_t -mach_ull_read_compressed( -/*=====================*/ - const byte* b) /*!< in: pointer to memory from where to read */ -{ - ib_uint64_t n; - ulint size; - - n = (ib_uint64_t) mach_read_compressed(b); - - size = mach_get_compressed_size((ulint) n); - - n <<= 32; - n |= (ib_uint64_t) mach_read_from_4(b + size); - - return(n); -} - -/*********************************************************//** -Writes a 64-bit integer in a compressed form (1..11 bytes). -@return size in bytes */ -UNIV_INLINE -ulint -mach_ull_write_much_compressed( -/*===========================*/ - byte* b, /*!< in: pointer to memory where to store */ - ib_uint64_t n) /*!< in: 64-bit integer to be stored */ -{ - ulint size; - - ut_ad(b); - - if (!(n >> 32)) { - return(mach_write_compressed(b, (ulint) n)); - } - - *b = (byte)0xFF; - size = 1 + mach_write_compressed(b + 1, (ulint) (n >> 32)); - - size += mach_write_compressed(b + size, (ulint) n & 0xFFFFFFFF); - - return(size); -} - -/*********************************************************//** -Returns the size of a 64-bit integer when written in the compressed form. -@return compressed size in bytes */ -UNIV_INLINE -ulint -mach_ull_get_much_compressed_size( -/*==============================*/ - ib_uint64_t n) /*!< in: 64-bit integer to be stored */ -{ - if (!(n >> 32)) { - return(mach_get_compressed_size((ulint) n)); - } - - return(1 + mach_get_compressed_size((ulint) (n >> 32)) - + mach_get_compressed_size((ulint) n & ULINT32_MASK)); -} - -/*********************************************************//** -Reads a 64-bit integer in a compressed form. -@return the value read */ -UNIV_INLINE -ib_uint64_t -mach_ull_read_much_compressed( -/*==========================*/ - const byte* b) /*!< in: pointer to memory from where to read */ -{ - ib_uint64_t n; - ulint size; - - if (*b != (byte)0xFF) { - n = 0; - size = 0; - } else { - n = (ib_uint64_t) mach_read_compressed(b + 1); - - size = 1 + mach_get_compressed_size((ulint) n); - n <<= 32; - } - - n |= mach_read_compressed(b + size); - - return(n); -} - -/*********************************************************//** -Reads a 64-bit integer in a compressed form -if the log record fully contains it. -@return pointer to end of the stored field, NULL if not complete */ -UNIV_INLINE -byte* -mach_ull_parse_compressed( -/*======================*/ - byte* ptr, /* in: pointer to buffer from where to read */ - byte* end_ptr,/* in: pointer to end of the buffer */ - ib_uint64_t* val) /* out: read value */ -{ - ulint size; - - ut_ad(ptr); - ut_ad(end_ptr); - ut_ad(val); - - if (end_ptr < ptr + 5) { - - return(NULL); - } - - *val = mach_read_compressed(ptr); - - size = mach_get_compressed_size((ulint) *val); - - ptr += size; - - if (end_ptr < ptr + 4) { - - return(NULL); - } - - *val <<= 32; - *val |= mach_read_from_4(ptr); - - return(ptr + 4); -} -#ifndef UNIV_HOTBACKUP -/*********************************************************//** -Reads a double. It is stored in a little-endian format. -@return double read */ -UNIV_INLINE -double -mach_double_read( -/*=============*/ - const byte* b) /*!< in: pointer to memory from where to read */ -{ - double d; - ulint i; - byte* ptr; - - ptr = (byte*) &d; - - for (i = 0; i < sizeof(double); i++) { -#ifdef WORDS_BIGENDIAN - ptr[sizeof(double) - i - 1] = b[i]; -#else - ptr[i] = b[i]; -#endif - } - - return(d); -} - -/*********************************************************//** -Writes a double. It is stored in a little-endian format. */ -UNIV_INLINE -void -mach_double_write( -/*==============*/ - byte* b, /*!< in: pointer to memory where to write */ - double d) /*!< in: double */ -{ - ulint i; - byte* ptr; - - ptr = (byte*) &d; - - for (i = 0; i < sizeof(double); i++) { -#ifdef WORDS_BIGENDIAN - b[i] = ptr[sizeof(double) - i - 1]; -#else - b[i] = ptr[i]; -#endif - } -} - -/*********************************************************//** -Reads a float. It is stored in a little-endian format. -@return float read */ -UNIV_INLINE -float -mach_float_read( -/*============*/ - const byte* b) /*!< in: pointer to memory from where to read */ -{ - float d; - ulint i; - byte* ptr; - - ptr = (byte*) &d; - - for (i = 0; i < sizeof(float); i++) { -#ifdef WORDS_BIGENDIAN - ptr[sizeof(float) - i - 1] = b[i]; -#else - ptr[i] = b[i]; -#endif - } - - return(d); -} - -/*********************************************************//** -Writes a float. It is stored in a little-endian format. */ -UNIV_INLINE -void -mach_float_write( -/*=============*/ - byte* b, /*!< in: pointer to memory where to write */ - float d) /*!< in: float */ -{ - ulint i; - byte* ptr; - - ptr = (byte*) &d; - - for (i = 0; i < sizeof(float); i++) { -#ifdef WORDS_BIGENDIAN - b[i] = ptr[sizeof(float) - i - 1]; -#else - b[i] = ptr[i]; -#endif - } -} - -/*********************************************************//** -Reads a ulint stored in the little-endian format. -@return unsigned long int */ -UNIV_INLINE -ulint -mach_read_from_n_little_endian( -/*===========================*/ - const byte* buf, /*!< in: from where to read */ - ulint buf_size) /*!< in: from how many bytes to read */ -{ - ulint n = 0; - const byte* ptr; - - ut_ad(buf_size > 0); - - ptr = buf + buf_size; - - for (;;) { - ptr--; - - n = n << 8; - - n += (ulint)(*ptr); - - if (ptr == buf) { - break; - } - } - - return(n); -} - -/*********************************************************//** -Writes a ulint in the little-endian format. */ -UNIV_INLINE -void -mach_write_to_n_little_endian( -/*==========================*/ - byte* dest, /*!< in: where to write */ - ulint dest_size, /*!< in: into how many bytes to write */ - ulint n) /*!< in: unsigned long int to write */ -{ - byte* end; - - ut_ad(dest_size <= sizeof(ulint)); - ut_ad(dest_size > 0); - - end = dest + dest_size; - - for (;;) { - *dest = (byte)(n & 0xFF); - - n = n >> 8; - - dest++; - - if (dest == end) { - break; - } - } - - ut_ad(n == 0); -} - -/*********************************************************//** -Reads a ulint stored in the little-endian format. -@return unsigned long int */ -UNIV_INLINE -ulint -mach_read_from_2_little_endian( -/*===========================*/ - const byte* buf) /*!< in: from where to read */ -{ - return((ulint)(buf[0]) | ((ulint)(buf[1]) << 8)); -} - -/*********************************************************//** -Writes a ulint in the little-endian format. */ -UNIV_INLINE -void -mach_write_to_2_little_endian( -/*==========================*/ - byte* dest, /*!< in: where to write */ - ulint n) /*!< in: unsigned long int to write */ -{ - ut_ad(n < 256 * 256); - - *dest = (byte)(n & 0xFFUL); - - n = n >> 8; - dest++; - - *dest = (byte)(n & 0xFFUL); -} - -/*********************************************************//** -Convert integral type from storage byte order (big endian) to -host byte order. -@return integer value */ -UNIV_INLINE -ib_uint64_t -mach_read_int_type( -/*===============*/ - const byte* src, /*!< in: where to read from */ - ulint len, /*!< in: length of src */ - ibool unsigned_type) /*!< in: signed or unsigned flag */ -{ - /* XXX this can be optimized on big-endian machines */ - - ullint ret; - uint i; - - if (unsigned_type || (src[0] & 0x80)) { - - ret = 0x0000000000000000ULL; - } else { - - ret = 0xFFFFFFFFFFFFFF00ULL; - } - - if (unsigned_type) { - - ret |= src[0]; - } else { - - ret |= src[0] ^ 0x80; - } - - for (i = 1; i < len; i++) { - ret <<= 8; - ret |= src[i]; - } - - return(ret); -} -/*********************************************************//** -Swap byte ordering. */ -UNIV_INLINE -void -mach_swap_byte_order( -/*=================*/ - byte* dest, /*!< out: where to write */ - const byte* from, /*!< in: where to read from */ - ulint len) /*!< in: length of src */ -{ - ut_ad(len > 0); - ut_ad(len <= 8); - - dest += len; - - switch (len & 0x7) { - case 0: *--dest = *from++; /* fall through */ - case 7: *--dest = *from++; /* fall through */ - case 6: *--dest = *from++; /* fall through */ - case 5: *--dest = *from++; /* fall through */ - case 4: *--dest = *from++; /* fall through */ - case 3: *--dest = *from++; /* fall through */ - case 2: *--dest = *from++; /* fall through */ - case 1: *--dest = *from; - } -} - -/************************************************************* -Convert integral type from host byte order (big-endian) storage -byte order. */ -UNIV_INLINE -void -mach_write_int_type( -/*================*/ - byte* dest, /*!< in: where to write */ - const byte* src, /*!< in: where to read from */ - ulint len, /*!< in: length of src */ - bool usign) /*!< in: signed or unsigned flag */ -{ -#ifdef WORDS_BIGENDIAN - memcpy(dest, src, len); -#else - mach_swap_byte_order(dest, src, len); -#endif /* WORDS_BIGENDIAN */ - - if (!usign) { - *dest ^= 0x80; - } -} - -/************************************************************* -Convert a ulonglong integer from host byte order to (big-endian) -storage byte order. */ -UNIV_INLINE -void -mach_write_ulonglong( -/*=================*/ - byte* dest, /*!< in: where to write */ - ulonglong src, /*!< in: where to read from */ - ulint len, /*!< in: length of dest */ - bool usign) /*!< in: signed or unsigned flag */ -{ - byte* ptr = reinterpret_cast<byte*>(&src); - - ut_ad(len <= sizeof(ulonglong)); - -#ifdef WORDS_BIGENDIAN - memcpy(dest, ptr + (sizeof(src) - len), len); -#else - mach_swap_byte_order(dest, reinterpret_cast<byte*>(ptr), len); -#endif /* WORDS_BIGENDIAN */ - - if (!usign) { - *dest ^= 0x80; - } -} - -/********************************************************//** -Reads 1 - 4 bytes from a file page buffered in the buffer pool. -@return value read */ -UNIV_INLINE -ulint -mach_read_ulint( -/*============*/ - const byte* ptr, /*!< in: pointer from where to read */ - ulint type) /*!< in: 1,2 or 4 bytes */ -{ - switch (type) { - case 1: - return(mach_read_from_1(ptr)); - case 2: - return(mach_read_from_2(ptr)); - case 4: - return(mach_read_from_4(ptr)); - default: - ut_error; - } - - return(0); -} - -#endif /* !UNIV_HOTBACKUP */ -#endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/xtradb/include/mem0dbg.h b/storage/xtradb/include/mem0dbg.h deleted file mode 100644 index cc339b82910..00000000000 --- a/storage/xtradb/include/mem0dbg.h +++ /dev/null @@ -1,150 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/mem0dbg.h -The memory management: the debug code. This is not a compilation module, -but is included in mem0mem.* ! - -Created 6/9/1994 Heikki Tuuri -*******************************************************/ - -/* In the debug version each allocated field is surrounded with -check fields whose sizes are given below */ - -#ifdef UNIV_MEM_DEBUG -# ifndef UNIV_HOTBACKUP -/* The mutex which protects in the debug version the hash table -containing the list of live memory heaps, and also the global -variables in mem0dbg.cc. */ -extern ib_mutex_t mem_hash_mutex; -# endif /* !UNIV_HOTBACKUP */ - -#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\ - UNIV_MEM_ALIGNMENT) -#define MEM_FIELD_TRAILER_SIZE sizeof(ulint) -#else -#define MEM_FIELD_HEADER_SIZE 0 -#endif - - -/* Space needed when allocating for a user a field of -length N. The space is allocated only in multiples of -UNIV_MEM_ALIGNMENT. In the debug version there are also -check fields at the both ends of the field. */ -#ifdef UNIV_MEM_DEBUG -#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\ - + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT) -#else -#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT) -#endif - -#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG -/***************************************************************//** -Checks a memory heap for consistency and prints the contents if requested. -Outputs the sum of sizes of buffers given to the user (only in -the debug version), the physical size of the heap and the number of -blocks in the heap. In case of error returns 0 as sizes and number -of blocks. */ -UNIV_INTERN -void -mem_heap_validate_or_print( -/*=======================*/ - mem_heap_t* heap, /*!< in: memory heap */ - byte* top, /*!< in: calculate and validate only until - this top pointer in the heap is reached, - if this pointer is NULL, ignored */ - ibool print, /*!< in: if TRUE, prints the contents - of the heap; works only in - the debug version */ - ibool* error, /*!< out: TRUE if error */ - ulint* us_size,/*!< out: allocated memory - (for the user) in the heap, - if a NULL pointer is passed as this - argument, it is ignored; in the - non-debug version this is always -1 */ - ulint* ph_size,/*!< out: physical size of the heap, - if a NULL pointer is passed as this - argument, it is ignored */ - ulint* n_blocks); /*!< out: number of blocks in the heap, - if a NULL pointer is passed as this - argument, it is ignored */ -/**************************************************************//** -Validates the contents of a memory heap. -@return TRUE if ok */ -UNIV_INTERN -ibool -mem_heap_validate( -/*==============*/ - mem_heap_t* heap); /*!< in: memory heap */ -#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */ -#ifdef UNIV_DEBUG -/**************************************************************//** -Checks that an object is a memory heap (or a block of it) -@return TRUE if ok */ -UNIV_INTERN -ibool -mem_heap_check( -/*===========*/ - mem_heap_t* heap); /*!< in: memory heap */ -#endif /* UNIV_DEBUG */ -#ifdef UNIV_MEM_DEBUG -/*****************************************************************//** -TRUE if no memory is currently allocated. -@return TRUE if no heaps exist */ -UNIV_INTERN -ibool -mem_all_freed(void); -/*===============*/ -/*****************************************************************//** -Validates the dynamic memory -@return TRUE if error */ -UNIV_INTERN -ibool -mem_validate_no_assert(void); -/*=========================*/ -/************************************************************//** -Validates the dynamic memory -@return TRUE if ok */ -UNIV_INTERN -ibool -mem_validate(void); -/*===============*/ -#endif /* UNIV_MEM_DEBUG */ -/************************************************************//** -Tries to find neigboring memory allocation blocks and dumps to stderr -the neighborhood of a given pointer. */ -UNIV_INTERN -void -mem_analyze_corruption( -/*===================*/ - void* ptr); /*!< in: pointer to place of possible corruption */ -/*****************************************************************//** -Prints information of dynamic memory usage and currently allocated memory -heaps or buffers. Can only be used in the debug version. */ -UNIV_INTERN -void -mem_print_info(void); -/*================*/ -/*****************************************************************//** -Prints information of dynamic memory usage and currently allocated memory -heaps or buffers since the last ..._print_info or..._print_new_info. */ -UNIV_INTERN -void -mem_print_new_info(void); -/*====================*/ diff --git a/storage/xtradb/include/mem0dbg.ic b/storage/xtradb/include/mem0dbg.ic deleted file mode 100644 index ec60ed35337..00000000000 --- a/storage/xtradb/include/mem0dbg.ic +++ /dev/null @@ -1,109 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/mem0dbg.ic -The memory management: the debug code. This is not an independent -compilation module but is included in mem0mem.*. - -Created 6/8/1994 Heikki Tuuri -*************************************************************************/ - -#ifdef UNIV_MEM_DEBUG -extern ulint mem_current_allocated_memory; - -/******************************************************************//** -Initializes an allocated memory field in the debug version. */ -UNIV_INTERN -void -mem_field_init( -/*===========*/ - byte* buf, /*!< in: memory field */ - ulint n); /*!< in: how many bytes the user requested */ -/******************************************************************//** -Erases an allocated memory field in the debug version. */ -UNIV_INTERN -void -mem_field_erase( -/*============*/ - byte* buf, /*!< in: memory field */ - ulint n); /*!< in: how many bytes the user requested */ -/***************************************************************//** -Initializes a buffer to a random combination of hex BA and BE. -Used to initialize allocated memory. */ -UNIV_INTERN -void -mem_init_buf( -/*=========*/ - byte* buf, /*!< in: pointer to buffer */ - ulint n); /*!< in: length of buffer */ -/***************************************************************//** -Initializes a buffer to a random combination of hex DE and AD. -Used to erase freed memory. */ -UNIV_INTERN -void -mem_erase_buf( -/*==========*/ - byte* buf, /*!< in: pointer to buffer */ - ulint n); /*!< in: length of buffer */ -/***************************************************************//** -Inserts a created memory heap to the hash table of -current allocated memory heaps. -Initializes the hash table when first called. */ -UNIV_INTERN -void -mem_hash_insert( -/*============*/ - mem_heap_t* heap, /*!< in: the created heap */ - const char* file_name, /*!< in: file name of creation */ - ulint line); /*!< in: line where created */ -/***************************************************************//** -Removes a memory heap (which is going to be freed by the caller) -from the list of live memory heaps. Returns the size of the heap -in terms of how much memory in bytes was allocated for the user of -the heap (not the total space occupied by the heap). -Also validates the heap. -NOTE: This function does not free the storage occupied by the -heap itself, only the node in the list of heaps. */ -UNIV_INTERN -void -mem_hash_remove( -/*============*/ - mem_heap_t* heap, /*!< in: the heap to be freed */ - const char* file_name, /*!< in: file name of freeing */ - ulint line); /*!< in: line where freed */ - - -void -mem_field_header_set_len(byte* field, ulint len); - -ulint -mem_field_header_get_len(byte* field); - -void -mem_field_header_set_check(byte* field, ulint check); - -ulint -mem_field_header_get_check(byte* field); - -void -mem_field_trailer_set_check(byte* field, ulint check); - -ulint -mem_field_trailer_get_check(byte* field); -#endif /* UNIV_MEM_DEBUG */ diff --git a/storage/xtradb/include/mem0mem.h b/storage/xtradb/include/mem0mem.h deleted file mode 100644 index de9b8b29fd9..00000000000 --- a/storage/xtradb/include/mem0mem.h +++ /dev/null @@ -1,425 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/mem0mem.h -The memory management - -Created 6/9/1994 Heikki Tuuri -*******************************************************/ - -#ifndef mem0mem_h -#define mem0mem_h - -#include "univ.i" -#include "ut0mem.h" -#include "ut0byte.h" -#include "ut0rnd.h" -#ifndef UNIV_HOTBACKUP -# include "sync0sync.h" -#endif /* UNIV_HOTBACKUP */ -#include "ut0lst.h" -#include "mach0data.h" - -/* -------------------- MEMORY HEAPS ----------------------------- */ - -/* A block of a memory heap consists of the info structure -followed by an area of memory */ -typedef struct mem_block_info_t mem_block_t; - -/* A memory heap is a nonempty linear list of memory blocks */ -typedef mem_block_t mem_heap_t; - -/* Types of allocation for memory heaps: DYNAMIC means allocation from the -dynamic memory pool of the C compiler, BUFFER means allocation from the -buffer pool; the latter method is used for very big heaps */ - -#define MEM_HEAP_DYNAMIC 0 /* the most common type */ -#define MEM_HEAP_BUFFER 1 -#define MEM_HEAP_BTR_SEARCH 2 /* this flag can optionally be - ORed to MEM_HEAP_BUFFER, in which - case heap->free_block is used in - some cases for memory allocations, - and if it's NULL, the memory - allocation functions can return - NULL. */ - -/* Different type of heaps in terms of which datastructure is using them */ -#define MEM_HEAP_FOR_BTR_SEARCH (MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER) -#define MEM_HEAP_FOR_PAGE_HASH (MEM_HEAP_DYNAMIC) -#define MEM_HEAP_FOR_RECV_SYS (MEM_HEAP_BUFFER) -#define MEM_HEAP_FOR_LOCK_HEAP (MEM_HEAP_BUFFER) - -/* The following start size is used for the first block in the memory heap if -the size is not specified, i.e., 0 is given as the parameter in the call of -create. The standard size is the maximum (payload) size of the blocks used for -allocations of small buffers. */ - -#define MEM_BLOCK_START_SIZE 64 -#define MEM_BLOCK_STANDARD_SIZE \ - (UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF) - -/* If a memory heap is allowed to grow into the buffer pool, the following -is the maximum size for a single allocated buffer: */ -#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200) - -/******************************************************************//** -Initializes the memory system. */ -UNIV_INTERN -void -mem_init( -/*=====*/ - ulint size); /*!< in: common pool size in bytes */ -/******************************************************************//** -Closes the memory system. */ -UNIV_INTERN -void -mem_close(void); -/*===========*/ - -#ifdef UNIV_DEBUG -/**************************************************************//** -Use this macro instead of the corresponding function! Macro for memory -heap creation. */ - -# define mem_heap_create(N) mem_heap_create_func( \ - (N), __FILE__, __LINE__, MEM_HEAP_DYNAMIC) -/**************************************************************//** -Use this macro instead of the corresponding function! Macro for memory -heap creation. */ - -# define mem_heap_create_typed(N, T) mem_heap_create_func( \ - (N), __FILE__, __LINE__, (T)) - -#else /* UNIV_DEBUG */ -/**************************************************************//** -Use this macro instead of the corresponding function! Macro for memory -heap creation. */ - -# define mem_heap_create(N) mem_heap_create_func( \ - (N), MEM_HEAP_DYNAMIC) -/**************************************************************//** -Use this macro instead of the corresponding function! Macro for memory -heap creation. */ - -# define mem_heap_create_typed(N, T) mem_heap_create_func( \ - (N), (T)) - -#endif /* UNIV_DEBUG */ -/**************************************************************//** -Use this macro instead of the corresponding function! Macro for memory -heap freeing. */ - -#define mem_heap_free(heap) mem_heap_free_func(\ - (heap), __FILE__, __LINE__) -/*****************************************************************//** -NOTE: Use the corresponding macros instead of this function. Creates a -memory heap. For debugging purposes, takes also the file name and line as -arguments. -@return own: memory heap, NULL if did not succeed (only possible for -MEM_HEAP_BTR_SEARCH type heaps) */ -UNIV_INLINE -mem_heap_t* -mem_heap_create_func( -/*=================*/ - ulint n, /*!< in: desired start block size, - this means that a single user buffer - of size n will fit in the block, - 0 creates a default size block */ -#ifdef UNIV_DEBUG - const char* file_name, /*!< in: file name where created */ - ulint line, /*!< in: line where created */ -#endif /* UNIV_DEBUG */ - ulint type); /*!< in: heap type */ -/*****************************************************************//** -NOTE: Use the corresponding macro instead of this function. Frees the space -occupied by a memory heap. In the debug version erases the heap memory -blocks. */ -UNIV_INLINE -void -mem_heap_free_func( -/*===============*/ - mem_heap_t* heap, /*!< in, own: heap to be freed */ - const char* file_name, /*!< in: file name where freed */ - ulint line); /*!< in: line where freed */ -/***************************************************************//** -Allocates and zero-fills n bytes of memory from a memory heap. -@return allocated, zero-filled storage */ -UNIV_INLINE -void* -mem_heap_zalloc( -/*============*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n); /*!< in: number of bytes; if the heap is allowed - to grow into the buffer pool, this must be - <= MEM_MAX_ALLOC_IN_BUF */ -/***************************************************************//** -Allocates n bytes of memory from a memory heap. -@return allocated storage, NULL if did not succeed (only possible for -MEM_HEAP_BTR_SEARCH type heaps) */ -UNIV_INLINE -void* -mem_heap_alloc( -/*===========*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n); /*!< in: number of bytes; if the heap is allowed - to grow into the buffer pool, this must be - <= MEM_MAX_ALLOC_IN_BUF */ -/*****************************************************************//** -Returns a pointer to the heap top. -@return pointer to the heap top */ -UNIV_INLINE -byte* -mem_heap_get_heap_top( -/*==================*/ - mem_heap_t* heap); /*!< in: memory heap */ -/*****************************************************************//** -Frees the space in a memory heap exceeding the pointer given. The -pointer must have been acquired from mem_heap_get_heap_top. The first -memory block of the heap is not freed. */ -UNIV_INLINE -void -mem_heap_free_heap_top( -/*===================*/ - mem_heap_t* heap, /*!< in: heap from which to free */ - byte* old_top);/*!< in: pointer to old top of heap */ -/*****************************************************************//** -Empties a memory heap. The first memory block of the heap is not freed. */ -UNIV_INLINE -void -mem_heap_empty( -/*===========*/ - mem_heap_t* heap); /*!< in: heap to empty */ -/*****************************************************************//** -Returns a pointer to the topmost element in a memory heap. -The size of the element must be given. -@return pointer to the topmost element */ -UNIV_INLINE -void* -mem_heap_get_top( -/*=============*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n); /*!< in: size of the topmost element */ -/*****************************************************************//** -Frees the topmost element in a memory heap. -The size of the element must be given. */ -UNIV_INLINE -void -mem_heap_free_top( -/*==============*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n); /*!< in: size of the topmost element */ -/*****************************************************************//** -Returns the space in bytes occupied by a memory heap. */ -UNIV_INLINE -ulint -mem_heap_get_size( -/*==============*/ - mem_heap_t* heap); /*!< in: heap */ -/**************************************************************//** -Use this macro instead of the corresponding function! -Macro for memory buffer allocation */ - -#define mem_zalloc(N) memset(mem_alloc(N), 0, (N)) - -#ifdef UNIV_DEBUG -#define mem_alloc(N) mem_alloc_func((N), __FILE__, __LINE__, NULL) -#define mem_alloc2(N,S) mem_alloc_func((N), __FILE__, __LINE__, (S)) -#else /* UNIV_DEBUG */ -#define mem_alloc(N) mem_alloc_func((N), NULL) -#define mem_alloc2(N,S) mem_alloc_func((N), (S)) -#endif /* UNIV_DEBUG */ - -/***************************************************************//** -NOTE: Use the corresponding macro instead of this function. -Allocates a single buffer of memory from the dynamic memory of -the C compiler. Is like malloc of C. The buffer must be freed -with mem_free. -@return own: free storage */ -UNIV_INLINE -void* -mem_alloc_func( -/*===========*/ - ulint n, /*!< in: requested size in bytes */ -#ifdef UNIV_DEBUG - const char* file_name, /*!< in: file name where created */ - ulint line, /*!< in: line where created */ -#endif /* UNIV_DEBUG */ - ulint* size); /*!< out: allocated size in bytes, - or NULL */ - -/**************************************************************//** -Use this macro instead of the corresponding function! -Macro for memory buffer freeing */ - -#define mem_free(PTR) mem_free_func((PTR), __FILE__, __LINE__) -/***************************************************************//** -NOTE: Use the corresponding macro instead of this function. -Frees a single buffer of storage from -the dynamic memory of C compiler. Similar to free of C. */ -UNIV_INLINE -void -mem_free_func( -/*==========*/ - void* ptr, /*!< in, own: buffer to be freed */ - const char* file_name, /*!< in: file name where created */ - ulint line); /*!< in: line where created */ - -/**********************************************************************//** -Duplicates a NUL-terminated string. -@return own: a copy of the string, must be deallocated with mem_free */ -UNIV_INLINE -char* -mem_strdup( -/*=======*/ - const char* str); /*!< in: string to be copied */ -/**********************************************************************//** -Makes a NUL-terminated copy of a nonterminated string. -@return own: a copy of the string, must be deallocated with mem_free */ -UNIV_INLINE -char* -mem_strdupl( -/*========*/ - const char* str, /*!< in: string to be copied */ - ulint len); /*!< in: length of str, in bytes */ - -/**********************************************************************//** -Duplicates a NUL-terminated string, allocated from a memory heap. -@return own: a copy of the string */ -UNIV_INTERN -char* -mem_heap_strdup( -/*============*/ - mem_heap_t* heap, /*!< in: memory heap where string is allocated */ - const char* str); /*!< in: string to be copied */ -/**********************************************************************//** -Makes a NUL-terminated copy of a nonterminated string, -allocated from a memory heap. -@return own: a copy of the string */ -UNIV_INLINE -char* -mem_heap_strdupl( -/*=============*/ - mem_heap_t* heap, /*!< in: memory heap where string is allocated */ - const char* str, /*!< in: string to be copied */ - ulint len); /*!< in: length of str, in bytes */ - -/**********************************************************************//** -Concatenate two strings and return the result, using a memory heap. -@return own: the result */ -UNIV_INTERN -char* -mem_heap_strcat( -/*============*/ - mem_heap_t* heap, /*!< in: memory heap where string is allocated */ - const char* s1, /*!< in: string 1 */ - const char* s2); /*!< in: string 2 */ - -/**********************************************************************//** -Duplicate a block of data, allocated from a memory heap. -@return own: a copy of the data */ -UNIV_INTERN -void* -mem_heap_dup( -/*=========*/ - mem_heap_t* heap, /*!< in: memory heap where copy is allocated */ - const void* data, /*!< in: data to be copied */ - ulint len); /*!< in: length of data, in bytes */ - -/****************************************************************//** -A simple sprintf replacement that dynamically allocates the space for the -formatted string from the given heap. This supports a very limited set of -the printf syntax: types 's' and 'u' and length modifier 'l' (which is -required for the 'u' type). -@return heap-allocated formatted string */ -UNIV_INTERN -char* -mem_heap_printf( -/*============*/ - mem_heap_t* heap, /*!< in: memory heap */ - const char* format, /*!< in: format string */ - ...) MY_ATTRIBUTE ((format (printf, 2, 3))); - -#ifdef MEM_PERIODIC_CHECK -/******************************************************************//** -Goes through the list of all allocated mem blocks, checks their magic -numbers, and reports possible corruption. */ -UNIV_INTERN -void -mem_validate_all_blocks(void); -/*=========================*/ -#endif - -/*#######################################################################*/ - -/** The info structure stored at the beginning of a heap block */ -struct mem_block_info_t { - ulint magic_n;/* magic number for debugging */ -#ifdef UNIV_DEBUG - char file_name[8];/* file name where the mem heap was created */ - ulint line; /*!< line number where the mem heap was created */ -#endif /* UNIV_DEBUG */ - UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the - the list this is the base node of the list of blocks; - in subsequent blocks this is undefined */ - UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next - and prev in the list. The first block allocated - to the heap is also the first block in this list, - though it also contains the base node of the list. */ - ulint len; /*!< physical length of this block in bytes */ - ulint total_size; /*!< physical length in bytes of all blocks - in the heap. This is defined only in the base - node and is set to ULINT_UNDEFINED in others. */ - ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or - MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */ - ulint free; /*!< offset in bytes of the first free position for - user data in the block */ - ulint start; /*!< the value of the struct field 'free' at the - creation of the block */ -#ifndef UNIV_HOTBACKUP - void* free_block; - /* if the MEM_HEAP_BTR_SEARCH bit is set in type, - and this is the heap root, this can contain an - allocated buffer frame, which can be appended as a - free block to the heap, if we need more space; - otherwise, this is NULL */ - void* buf_block; - /* if this block has been allocated from the buffer - pool, this contains the buf_block_t handle; - otherwise, this is NULL */ -#endif /* !UNIV_HOTBACKUP */ -#ifdef MEM_PERIODIC_CHECK - UT_LIST_NODE_T(mem_block_t) mem_block_list; - /* List of all mem blocks allocated; protected - by the mem_comm_pool mutex */ -#endif -}; - -#define MEM_BLOCK_MAGIC_N 764741555 -#define MEM_FREED_BLOCK_MAGIC_N 547711122 - -/* Header size for a memory heap block */ -#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\ - UNIV_MEM_ALIGNMENT) -#include "mem0dbg.h" - -#ifndef UNIV_NONINL -#include "mem0mem.ic" -#endif - -#endif diff --git a/storage/xtradb/include/mem0mem.ic b/storage/xtradb/include/mem0mem.ic deleted file mode 100644 index 63e68150b61..00000000000 --- a/storage/xtradb/include/mem0mem.ic +++ /dev/null @@ -1,649 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/mem0mem.ic -The memory management - -Created 6/8/1994 Heikki Tuuri -*************************************************************************/ - -#include "mem0dbg.ic" -#ifndef UNIV_HOTBACKUP -# include "mem0pool.h" -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -# define mem_heap_create_block(heap, n, type, file_name, line) \ - mem_heap_create_block_func(heap, n, file_name, line, type) -# define mem_heap_create_at(N, file_name, line) \ - mem_heap_create_func(N, file_name, line, MEM_HEAP_DYNAMIC) -#else /* UNIV_DEBUG */ -# define mem_heap_create_block(heap, n, type, file_name, line) \ - mem_heap_create_block_func(heap, n, type) -# define mem_heap_create_at(N, file_name, line) \ - mem_heap_create_func(N, MEM_HEAP_DYNAMIC) -#endif /* UNIV_DEBUG */ -/***************************************************************//** -Creates a memory heap block where data can be allocated. -@return own: memory heap block, NULL if did not succeed (only possible -for MEM_HEAP_BTR_SEARCH type heaps) */ -UNIV_INTERN -mem_block_t* -mem_heap_create_block_func( -/*=======================*/ - mem_heap_t* heap, /*!< in: memory heap or NULL if first block - should be created */ - ulint n, /*!< in: number of bytes needed for user data */ -#ifdef UNIV_DEBUG - const char* file_name,/*!< in: file name where created */ - ulint line, /*!< in: line where created */ -#endif /* UNIV_DEBUG */ - ulint type); /*!< in: type of heap: MEM_HEAP_DYNAMIC or - MEM_HEAP_BUFFER */ -/******************************************************************//** -Frees a block from a memory heap. */ -UNIV_INTERN -void -mem_heap_block_free( -/*================*/ - mem_heap_t* heap, /*!< in: heap */ - mem_block_t* block); /*!< in: block to free */ -#ifndef UNIV_HOTBACKUP -/******************************************************************//** -Frees the free_block field from a memory heap. */ -UNIV_INTERN -void -mem_heap_free_block_free( -/*=====================*/ - mem_heap_t* heap); /*!< in: heap */ -#endif /* !UNIV_HOTBACKUP */ -/***************************************************************//** -Adds a new block to a memory heap. -@return created block, NULL if did not succeed (only possible for -MEM_HEAP_BTR_SEARCH type heaps) */ -UNIV_INTERN -mem_block_t* -mem_heap_add_block( -/*===============*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n); /*!< in: number of bytes user needs */ - -UNIV_INLINE -void -mem_block_set_len(mem_block_t* block, ulint len) -{ - ut_ad(len > 0); - - block->len = len; -} - -UNIV_INLINE -ulint -mem_block_get_len(mem_block_t* block) -{ - return(block->len); -} - -UNIV_INLINE -void -mem_block_set_type(mem_block_t* block, ulint type) -{ - ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER) - || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH)); - - block->type = type; -} - -UNIV_INLINE -ulint -mem_block_get_type(mem_block_t* block) -{ - return(block->type); -} - -UNIV_INLINE -void -mem_block_set_free(mem_block_t* block, ulint free) -{ - ut_ad(free > 0); - ut_ad(free <= mem_block_get_len(block)); - - block->free = free; -} - -UNIV_INLINE -ulint -mem_block_get_free(mem_block_t* block) -{ - return(block->free); -} - -UNIV_INLINE -void -mem_block_set_start(mem_block_t* block, ulint start) -{ - ut_ad(start > 0); - - block->start = start; -} - -UNIV_INLINE -ulint -mem_block_get_start(mem_block_t* block) -{ - return(block->start); -} - -/***************************************************************//** -Allocates and zero-fills n bytes of memory from a memory heap. -@return allocated, zero-filled storage */ -UNIV_INLINE -void* -mem_heap_zalloc( -/*============*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n) /*!< in: number of bytes; if the heap is allowed - to grow into the buffer pool, this must be - <= MEM_MAX_ALLOC_IN_BUF */ -{ - ut_ad(heap); - ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH)); - return(memset(mem_heap_alloc(heap, n), 0, n)); -} - -/***************************************************************//** -Allocates n bytes of memory from a memory heap. -@return allocated storage, NULL if did not succeed (only possible for -MEM_HEAP_BTR_SEARCH type heaps) */ -UNIV_INLINE -void* -mem_heap_alloc( -/*===========*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n) /*!< in: number of bytes; if the heap is allowed - to grow into the buffer pool, this must be - <= MEM_MAX_ALLOC_IN_BUF */ -{ - mem_block_t* block; - void* buf; - ulint free; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF)); - - /* Check if there is enough space in block. If not, create a new - block to the heap */ - - if (mem_block_get_len(block) - < mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) { - - block = mem_heap_add_block(heap, n); - - if (block == NULL) { - - return(NULL); - } - } - - free = mem_block_get_free(block); - - buf = (byte*) block + free; - - mem_block_set_free(block, free + MEM_SPACE_NEEDED(n)); - -#ifdef UNIV_MEM_DEBUG - UNIV_MEM_ALLOC(buf, - n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE); - - /* In the debug version write debugging info to the field */ - mem_field_init((byte*) buf, n); - - /* Advance buf to point at the storage which will be given to the - caller */ - buf = (byte*) buf + MEM_FIELD_HEADER_SIZE; - -#endif - UNIV_MEM_ALLOC(buf, n); - return(buf); -} - -/*****************************************************************//** -Returns a pointer to the heap top. -@return pointer to the heap top */ -UNIV_INLINE -byte* -mem_heap_get_heap_top( -/*==================*/ - mem_heap_t* heap) /*!< in: memory heap */ -{ - mem_block_t* block; - byte* buf; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - buf = (byte*) block + mem_block_get_free(block); - - return(buf); -} - -/*****************************************************************//** -Frees the space in a memory heap exceeding the pointer given. The -pointer must have been acquired from mem_heap_get_heap_top. The first -memory block of the heap is not freed. */ -UNIV_INLINE -void -mem_heap_free_heap_top( -/*===================*/ - mem_heap_t* heap, /*!< in: heap from which to free */ - byte* old_top)/*!< in: pointer to old top of heap */ -{ - mem_block_t* block; - mem_block_t* prev_block; -#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG - ibool error; - ulint total_size; - ulint size; - - ut_ad(mem_heap_check(heap)); - - /* Validate the heap and get its total allocated size */ - mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size, - NULL, NULL); - ut_a(!error); - - /* Get the size below top pointer */ - mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL, - NULL); - ut_a(!error); - -#endif - - block = UT_LIST_GET_LAST(heap->base); - - while (block != NULL) { - if (((byte*) block + mem_block_get_free(block) >= old_top) - && ((byte*) block <= old_top)) { - /* Found the right block */ - - break; - } - - /* Store prev_block value before freeing the current block - (the current block will be erased in freeing) */ - - prev_block = UT_LIST_GET_PREV(list, block); - - mem_heap_block_free(heap, block); - - block = prev_block; - } - - ut_ad(block); - - /* Set the free field of block */ - mem_block_set_free(block, old_top - (byte*) block); - - ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); - UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top); -#if defined UNIV_MEM_DEBUG - /* In the debug version erase block from top up */ - mem_erase_buf(old_top, (byte*) block + block->len - old_top); - - /* Update allocated memory count */ - mutex_enter(&mem_hash_mutex); - mem_current_allocated_memory -= (total_size - size); - mutex_exit(&mem_hash_mutex); -#endif /* UNIV_MEM_DEBUG */ - UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top); - - /* If free == start, we may free the block if it is not the first - one */ - - if ((heap != block) && (mem_block_get_free(block) - == mem_block_get_start(block))) { - mem_heap_block_free(heap, block); - } -} - -/*****************************************************************//** -Empties a memory heap. The first memory block of the heap is not freed. */ -UNIV_INLINE -void -mem_heap_empty( -/*===========*/ - mem_heap_t* heap) /*!< in: heap to empty */ -{ - mem_heap_free_heap_top(heap, (byte*) heap + mem_block_get_start(heap)); -#ifndef UNIV_HOTBACKUP - if (heap->free_block) { - mem_heap_free_block_free(heap); - } -#endif /* !UNIV_HOTBACKUP */ -} - -/*****************************************************************//** -Returns a pointer to the topmost element in a memory heap. The size of the -element must be given. -@return pointer to the topmost element */ -UNIV_INLINE -void* -mem_heap_get_top( -/*=============*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n) /*!< in: size of the topmost element */ -{ - mem_block_t* block; - byte* buf; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - buf = (byte*) block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n); - -#ifdef UNIV_MEM_DEBUG - ut_ad(mem_block_get_start(block) <= (ulint) (buf - (byte*) block)); - - /* In the debug version, advance buf to point at the storage which - was given to the caller in the allocation*/ - - buf += MEM_FIELD_HEADER_SIZE; - - /* Check that the field lengths agree */ - ut_ad(n == mem_field_header_get_len(buf)); -#endif - - return((void*) buf); -} - -/*****************************************************************//** -Frees the topmost element in a memory heap. The size of the element must be -given. */ -UNIV_INLINE -void -mem_heap_free_top( -/*==============*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n) /*!< in: size of the topmost element */ -{ - mem_block_t* block; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - /* Subtract the free field of block */ - mem_block_set_free(block, mem_block_get_free(block) - - MEM_SPACE_NEEDED(n)); - UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n); -#ifdef UNIV_MEM_DEBUG - - ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); - - /* In the debug version check the consistency, and erase field */ - mem_field_erase((byte*) block + mem_block_get_free(block), n); -#endif - - /* If free == start, we may free the block if it is not the first - one */ - - if ((heap != block) && (mem_block_get_free(block) - == mem_block_get_start(block))) { - mem_heap_block_free(heap, block); - } else { - /* Avoid a bogus UNIV_MEM_ASSERT_W() warning in a - subsequent invocation of mem_heap_free_top(). - Originally, this was UNIV_MEM_FREE(), to catch writes - to freed memory. */ - UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n); - } -} - -/*****************************************************************//** -NOTE: Use the corresponding macros instead of this function. Creates a -memory heap. For debugging purposes, takes also the file name and line as -argument. -@return own: memory heap, NULL if did not succeed (only possible for -MEM_HEAP_BTR_SEARCH type heaps) */ -UNIV_INLINE -mem_heap_t* -mem_heap_create_func( -/*=================*/ - ulint n, /*!< in: desired start block size, - this means that a single user buffer - of size n will fit in the block, - 0 creates a default size block */ -#ifdef UNIV_DEBUG - const char* file_name, /*!< in: file name where created */ - ulint line, /*!< in: line where created */ -#endif /* UNIV_DEBUG */ - ulint type) /*!< in: heap type */ -{ - mem_block_t* block; - - if (!n) { - n = MEM_BLOCK_START_SIZE; - } - - block = mem_heap_create_block(NULL, n, type, file_name, line); - - if (block == NULL) { - - return(NULL); - } - - UT_LIST_INIT(block->base); - - /* Add the created block itself as the first block in the list */ - UT_LIST_ADD_FIRST(list, block->base, block); - -#ifdef UNIV_MEM_DEBUG - - mem_hash_insert(block, file_name, line); - -#endif - - return(block); -} - -/*****************************************************************//** -NOTE: Use the corresponding macro instead of this function. Frees the space -occupied by a memory heap. In the debug version erases the heap memory -blocks. */ -UNIV_INLINE -void -mem_heap_free_func( -/*===============*/ - mem_heap_t* heap, /*!< in, own: heap to be freed */ - const char* file_name MY_ATTRIBUTE((unused)), - /*!< in: file name where freed */ - ulint line MY_ATTRIBUTE((unused))) -{ - mem_block_t* block; - mem_block_t* prev_block; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - -#ifdef UNIV_MEM_DEBUG - - /* In the debug version remove the heap from the hash table of heaps - and check its consistency */ - - mem_hash_remove(heap, file_name, line); - -#endif -#ifndef UNIV_HOTBACKUP - if (heap->free_block) { - mem_heap_free_block_free(heap); - } -#endif /* !UNIV_HOTBACKUP */ - - while (block != NULL) { - /* Store the contents of info before freeing current block - (it is erased in freeing) */ - - prev_block = UT_LIST_GET_PREV(list, block); - - mem_heap_block_free(heap, block); - - block = prev_block; - } -} - -/***************************************************************//** -NOTE: Use the corresponding macro instead of this function. -Allocates a single buffer of memory from the dynamic memory of -the C compiler. Is like malloc of C. The buffer must be freed -with mem_free. -@return own: free storage */ -UNIV_INLINE -void* -mem_alloc_func( -/*===========*/ - ulint n, /*!< in: desired number of bytes */ -#ifdef UNIV_DEBUG - const char* file_name, /*!< in: file name where created */ - ulint line, /*!< in: line where created */ -#endif /* UNIV_DEBUG */ - ulint* size) /*!< out: allocated size in bytes, - or NULL */ -{ - mem_heap_t* heap; - void* buf; - - heap = mem_heap_create_at(n, file_name, line); - - /* Note that as we created the first block in the heap big enough - for the buffer requested by the caller, the buffer will be in the - first block and thus we can calculate the pointer to the heap from - the pointer to the buffer when we free the memory buffer. */ - - if (size) { - /* Adjust the allocation to the actual size of the - memory block. */ - ulint m = mem_block_get_len(heap) - - mem_block_get_free(heap); -#ifdef UNIV_MEM_DEBUG - m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE; -#endif /* UNIV_MEM_DEBUG */ - ut_ad(m >= n); - n = m; - *size = m; - } - - buf = mem_heap_alloc(heap, n); - - ut_a((byte*) heap == (byte*) buf - MEM_BLOCK_HEADER_SIZE - - MEM_FIELD_HEADER_SIZE); - return(buf); -} - -/***************************************************************//** -NOTE: Use the corresponding macro instead of this function. Frees a single -buffer of storage from the dynamic memory of the C compiler. Similar to the -free of C. */ -UNIV_INLINE -void -mem_free_func( -/*==========*/ - void* ptr, /*!< in, own: buffer to be freed */ - const char* file_name, /*!< in: file name where created */ - ulint line) /*!< in: line where created */ -{ - mem_heap_t* heap; - - heap = (mem_heap_t*)((byte*) ptr - MEM_BLOCK_HEADER_SIZE - - MEM_FIELD_HEADER_SIZE); - mem_heap_free_func(heap, file_name, line); -} - -/*****************************************************************//** -Returns the space in bytes occupied by a memory heap. */ -UNIV_INLINE -ulint -mem_heap_get_size( -/*==============*/ - mem_heap_t* heap) /*!< in: heap */ -{ - ulint size = 0; - - ut_ad(mem_heap_check(heap)); - - size = heap->total_size; - -#ifndef UNIV_HOTBACKUP - if (heap->free_block) { - size += UNIV_PAGE_SIZE; - } -#endif /* !UNIV_HOTBACKUP */ - - return(size); -} - -/**********************************************************************//** -Duplicates a NUL-terminated string. -@return own: a copy of the string, must be deallocated with mem_free */ -UNIV_INLINE -char* -mem_strdup( -/*=======*/ - const char* str) /*!< in: string to be copied */ -{ - ulint len = strlen(str) + 1; - return((char*) memcpy(mem_alloc(len), str, len)); -} - -/**********************************************************************//** -Makes a NUL-terminated copy of a nonterminated string. -@return own: a copy of the string, must be deallocated with mem_free */ -UNIV_INLINE -char* -mem_strdupl( -/*========*/ - const char* str, /*!< in: string to be copied */ - ulint len) /*!< in: length of str, in bytes */ -{ - char* s = (char*) mem_alloc(len + 1); - s[len] = 0; - return((char*) memcpy(s, str, len)); -} - -/**********************************************************************//** -Makes a NUL-terminated copy of a nonterminated string, -allocated from a memory heap. -@return own: a copy of the string */ -UNIV_INLINE -char* -mem_heap_strdupl( -/*=============*/ - mem_heap_t* heap, /*!< in: memory heap where string is allocated */ - const char* str, /*!< in: string to be copied */ - ulint len) /*!< in: length of str, in bytes */ -{ - char* s = (char*) mem_heap_alloc(heap, len + 1); - s[len] = 0; - return((char*) memcpy(s, str, len)); -} diff --git a/storage/xtradb/include/mem0pool.h b/storage/xtradb/include/mem0pool.h deleted file mode 100644 index a65ba50fdf9..00000000000 --- a/storage/xtradb/include/mem0pool.h +++ /dev/null @@ -1,121 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/mem0pool.h -The lowest-level memory management - -Created 6/9/1994 Heikki Tuuri -*******************************************************/ - -#ifndef mem0pool_h -#define mem0pool_h - -#include "univ.i" -#include "os0file.h" -#include "ut0lst.h" - -/** Memory pool */ -struct mem_pool_t; - -/** The common memory pool */ -extern mem_pool_t* mem_comm_pool; - -/** Memory area header */ -struct mem_area_t{ - ulint size_and_free; /*!< memory area size is obtained by - anding with ~MEM_AREA_FREE; area in - a free list if ANDing with - MEM_AREA_FREE results in nonzero */ - UT_LIST_NODE_T(mem_area_t) - free_list; /*!< free list node */ -}; - -/** Each memory area takes this many extra bytes for control information */ -#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_t),\ - UNIV_MEM_ALIGNMENT)) - -/********************************************************************//** -Creates a memory pool. -@return memory pool */ -UNIV_INTERN -mem_pool_t* -mem_pool_create( -/*============*/ - ulint size); /*!< in: pool size in bytes */ -/********************************************************************//** -Frees a memory pool. */ -UNIV_INTERN -void -mem_pool_free( -/*==========*/ - mem_pool_t* pool); /*!< in, own: memory pool */ -/********************************************************************//** -Allocates memory from a pool. NOTE: This low-level function should only be -used in mem0mem.*! -@return own: allocated memory buffer */ -UNIV_INTERN -void* -mem_area_alloc( -/*===========*/ - ulint* psize, /*!< in: requested size in bytes; for optimum - space usage, the size should be a power of 2 - minus MEM_AREA_EXTRA_SIZE; - out: allocated size in bytes (greater than - or equal to the requested size) */ - mem_pool_t* pool); /*!< in: memory pool */ -/********************************************************************//** -Frees memory to a pool. */ -UNIV_INTERN -void -mem_area_free( -/*==========*/ - void* ptr, /*!< in, own: pointer to allocated memory - buffer */ - mem_pool_t* pool); /*!< in: memory pool */ -/********************************************************************//** -Returns the amount of reserved memory. -@return reserved mmeory in bytes */ -UNIV_INTERN -ulint -mem_pool_get_reserved( -/*==================*/ - mem_pool_t* pool); /*!< in: memory pool */ -/********************************************************************//** -Validates a memory pool. -@return TRUE if ok */ -UNIV_INTERN -ibool -mem_pool_validate( -/*==============*/ - mem_pool_t* pool); /*!< in: memory pool */ -/********************************************************************//** -Prints info of a memory pool. */ -UNIV_INTERN -void -mem_pool_print_info( -/*================*/ - FILE* outfile,/*!< in: output file to write to */ - mem_pool_t* pool); /*!< in: memory pool */ - - -#ifndef UNIV_NONINL -#include "mem0pool.ic" -#endif - -#endif diff --git a/storage/xtradb/include/mem0pool.ic b/storage/xtradb/include/mem0pool.ic deleted file mode 100644 index f4bafb8ba63..00000000000 --- a/storage/xtradb/include/mem0pool.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/mem0pool.ic -The lowest-level memory management - -Created 6/8/1994 Heikki Tuuri -*************************************************************************/ diff --git a/storage/xtradb/include/mtr0log.h b/storage/xtradb/include/mtr0log.h deleted file mode 100644 index 18a345d050f..00000000000 --- a/storage/xtradb/include/mtr0log.h +++ /dev/null @@ -1,251 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/mtr0log.h -Mini-transaction logging routines - -Created 12/7/1995 Heikki Tuuri -*******************************************************/ - -#ifndef mtr0log_h -#define mtr0log_h - -#include "univ.i" -#include "mtr0mtr.h" -#include "dict0types.h" - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log -record to the mini-transaction log if mtr is not NULL. */ -UNIV_INTERN -void -mlog_write_ulint( -/*=============*/ - byte* ptr, /*!< in: pointer where to write */ - ulint val, /*!< in: value to write */ - byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************//** -Writes 8 bytes to a file page. Writes the corresponding log -record to the mini-transaction log, only if mtr is not NULL */ -UNIV_INTERN -void -mlog_write_ull( -/*===========*/ - byte* ptr, /*!< in: pointer where to write */ - ib_uint64_t val, /*!< in: value to write */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************//** -Writes a string to a file page buffered in the buffer pool. Writes the -corresponding log record to the mini-transaction log. */ -UNIV_INTERN -void -mlog_write_string( -/*==============*/ - byte* ptr, /*!< in: pointer where to write */ - const byte* str, /*!< in: string to write */ - ulint len, /*!< in: string length */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************//** -Logs a write of a string to a file page buffered in the buffer pool. -Writes the corresponding log record to the mini-transaction log. */ -UNIV_INTERN -void -mlog_log_string( -/*============*/ - byte* ptr, /*!< in: pointer written to */ - ulint len, /*!< in: string length */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************//** -Writes initial part of a log record consisting of one-byte item -type and four-byte space and page numbers. */ -UNIV_INTERN -void -mlog_write_initial_log_record( -/*==========================*/ - const byte* ptr, /*!< in: pointer to (inside) a buffer - frame holding the file page where - modification is made */ - byte type, /*!< in: log item type: MLOG_1BYTE, ... */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -/********************************************************//** -Writes a log record about an .ibd file create/delete/rename. -@return new value of log_ptr */ -UNIV_INLINE -byte* -mlog_write_initial_log_record_for_file_op( -/*======================================*/ - ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or - MLOG_FILE_RENAME */ - ulint space_id,/*!< in: space id, if applicable */ - ulint page_no,/*!< in: page number (not relevant currently) */ - byte* log_ptr,/*!< in: pointer to mtr log which has been opened */ - mtr_t* mtr); /*!< in: mtr */ -/********************************************************//** -Catenates 1 - 4 bytes to the mtr log. */ -UNIV_INLINE -void -mlog_catenate_ulint( -/*================*/ - mtr_t* mtr, /*!< in: mtr */ - ulint val, /*!< in: value to write */ - ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ -/********************************************************//** -Catenates n bytes to the mtr log. */ -UNIV_INTERN -void -mlog_catenate_string( -/*=================*/ - mtr_t* mtr, /*!< in: mtr */ - const byte* str, /*!< in: string to write */ - ulint len); /*!< in: string length */ -/********************************************************//** -Catenates a compressed ulint to mlog. */ -UNIV_INLINE -void -mlog_catenate_ulint_compressed( -/*===========================*/ - mtr_t* mtr, /*!< in: mtr */ - ulint val); /*!< in: value to write */ -/********************************************************//** -Catenates a compressed 64-bit integer to mlog. */ -UNIV_INLINE -void -mlog_catenate_ull_compressed( -/*=========================*/ - mtr_t* mtr, /*!< in: mtr */ - ib_uint64_t val); /*!< in: value to write */ -/********************************************************//** -Opens a buffer to mlog. It must be closed with mlog_close. -@return buffer, NULL if log mode MTR_LOG_NONE */ -UNIV_INLINE -byte* -mlog_open( -/*======*/ - mtr_t* mtr, /*!< in: mtr */ - ulint size); /*!< in: buffer size in bytes; MUST be - smaller than DYN_ARRAY_DATA_SIZE! */ -/********************************************************//** -Closes a buffer opened to mlog. */ -UNIV_INLINE -void -mlog_close( -/*=======*/ - mtr_t* mtr, /*!< in: mtr */ - byte* ptr); /*!< in: buffer space from ptr up was not used */ -/********************************************************//** -Writes the initial part of a log record (3..11 bytes). -If the implementation of this function is changed, all -size parameters to mlog_open() should be adjusted accordingly! -@return new value of log_ptr */ -UNIV_INLINE -byte* -mlog_write_initial_log_record_fast( -/*===============================*/ - const byte* ptr, /*!< in: pointer to (inside) a buffer - frame holding the file page where - modification is made */ - byte type, /*!< in: log item type: MLOG_1BYTE, ... */ - byte* log_ptr,/*!< in: pointer to mtr log which has - been opened */ - mtr_t* mtr); /*!< in: mtr */ -#else /* !UNIV_HOTBACKUP */ -# define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0) -# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte*) 0) -#endif /* !UNIV_HOTBACKUP */ -/********************************************************//** -Parses an initial log record written by mlog_write_initial_log_record. -@return parsed record end, NULL if not a complete record */ -UNIV_INTERN -byte* -mlog_parse_initial_log_record( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - byte* type, /*!< out: log record type: MLOG_1BYTE, ... */ - ulint* space, /*!< out: space id */ - ulint* page_no);/*!< out: page number */ -/********************************************************//** -Parses a log record written by mlog_write_ulint or mlog_write_ull. -@return parsed record end, NULL if not a complete record */ -UNIV_INTERN -byte* -mlog_parse_nbytes( -/*==============*/ - ulint type, /*!< in: log record type: MLOG_1BYTE, ... */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - byte* page, /*!< in: page where to apply the log record, or NULL */ - void* page_zip);/*!< in/out: compressed page, or NULL */ -/********************************************************//** -Parses a log record written by mlog_write_string. -@return parsed record end, NULL if not a complete record */ -UNIV_INTERN -byte* -mlog_parse_string( -/*==============*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - byte* page, /*!< in: page where to apply the log record, or NULL */ - void* page_zip);/*!< in/out: compressed page, or NULL */ - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Opens a buffer for mlog, writes the initial log record and, -if needed, the field lengths of an index. Reserves space -for further log entries. The log entry must be closed with -mtr_close(). -@return buffer, NULL if log mode MTR_LOG_NONE */ -UNIV_INTERN -byte* -mlog_open_and_write_index( -/*======================*/ - mtr_t* mtr, /*!< in: mtr */ - const byte* rec, /*!< in: index record or page */ - const dict_index_t* index, /*!< in: record descriptor */ - byte type, /*!< in: log item type */ - ulint size); /*!< in: requested buffer size in bytes - (if 0, calls mlog_close() and - returns NULL) */ -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************//** -Parses a log record written by mlog_open_and_write_index. -@return parsed record end, NULL if not a complete record */ -UNIV_INTERN -byte* -mlog_parse_index( -/*=============*/ - byte* ptr, /*!< in: buffer */ - const byte* end_ptr,/*!< in: buffer end */ - ibool comp, /*!< in: TRUE=compact record format */ - dict_index_t** index); /*!< out, own: dummy index */ - -#ifndef UNIV_HOTBACKUP -/* Insert, update, and maybe other functions may use this value to define an -extra mlog buffer size for variable size data */ -#define MLOG_BUF_MARGIN 256 -#endif /* !UNIV_HOTBACKUP */ - -#ifndef UNIV_NONINL -#include "mtr0log.ic" -#endif - -#endif diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic deleted file mode 100644 index d508d30fafe..00000000000 --- a/storage/xtradb/include/mtr0log.ic +++ /dev/null @@ -1,277 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/mtr0log.ic -Mini-transaction logging routines - -Created 12/7/1995 Heikki Tuuri -*******************************************************/ - -#include "mach0data.h" -#include "ut0lst.h" -#include "buf0buf.h" -#include "buf0dblwr.h" -#include "fsp0types.h" -#include "btr0types.h" -#include "trx0sys.h" - -/********************************************************//** -Opens a buffer to mlog. It must be closed with mlog_close. -@return buffer, NULL if log mode MTR_LOG_NONE */ -UNIV_INLINE -byte* -mlog_open( -/*======*/ - mtr_t* mtr, /*!< in: mtr */ - ulint size) /*!< in: buffer size in bytes; MUST be - smaller than DYN_ARRAY_DATA_SIZE! */ -{ - dyn_array_t* mlog; - - mtr->modifications = TRUE; - - if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { - - return(NULL); - } - - mlog = &(mtr->log); - - return(dyn_array_open(mlog, size)); -} - -/********************************************************//** -Closes a buffer opened to mlog. */ -UNIV_INLINE -void -mlog_close( -/*=======*/ - mtr_t* mtr, /*!< in: mtr */ - byte* ptr) /*!< in: buffer space from ptr up was not used */ -{ - dyn_array_t* mlog; - - ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE); - - mlog = &(mtr->log); - - dyn_array_close(mlog, ptr); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */ -UNIV_INLINE -void -mlog_catenate_ulint( -/*================*/ - mtr_t* mtr, /*!< in: mtr */ - ulint val, /*!< in: value to write */ - ulint type) /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ -{ - dyn_array_t* mlog; - byte* ptr; - - if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { - - return; - } - - mlog = &(mtr->log); - -#if MLOG_1BYTE != 1 -# error "MLOG_1BYTE != 1" -#endif -#if MLOG_2BYTES != 2 -# error "MLOG_2BYTES != 2" -#endif -#if MLOG_4BYTES != 4 -# error "MLOG_4BYTES != 4" -#endif -#if MLOG_8BYTES != 8 -# error "MLOG_8BYTES != 8" -#endif - ptr = (byte*) dyn_array_push(mlog, type); - - if (type == MLOG_4BYTES) { - mach_write_to_4(ptr, val); - } else if (type == MLOG_2BYTES) { - mach_write_to_2(ptr, val); - } else { - ut_ad(type == MLOG_1BYTE); - mach_write_to_1(ptr, val); - } -} - -/********************************************************//** -Catenates a compressed ulint to mlog. */ -UNIV_INLINE -void -mlog_catenate_ulint_compressed( -/*===========================*/ - mtr_t* mtr, /*!< in: mtr */ - ulint val) /*!< in: value to write */ -{ - byte* log_ptr; - - log_ptr = mlog_open(mtr, 10); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr += mach_write_compressed(log_ptr, val); - - mlog_close(mtr, log_ptr); -} - -/********************************************************//** -Catenates a compressed 64-bit integer to mlog. */ -UNIV_INLINE -void -mlog_catenate_ull_compressed( -/*=========================*/ - mtr_t* mtr, /*!< in: mtr */ - ib_uint64_t val) /*!< in: value to write */ -{ - byte* log_ptr; - - log_ptr = mlog_open(mtr, 15); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr += mach_ull_write_compressed(log_ptr, val); - - mlog_close(mtr, log_ptr); -} - -/********************************************************//** -Writes the initial part of a log record (3..11 bytes). -If the implementation of this function is changed, all -size parameters to mlog_open() should be adjusted accordingly! -@return new value of log_ptr */ -UNIV_INLINE -byte* -mlog_write_initial_log_record_fast( -/*===============================*/ - const byte* ptr, /*!< in: pointer to (inside) a buffer - frame holding the file page where - modification is made */ - byte type, /*!< in: log item type: MLOG_1BYTE, ... */ - byte* log_ptr,/*!< in: pointer to mtr log which has - been opened */ - mtr_t* mtr) /*!< in: mtr */ -{ -#ifdef UNIV_DEBUG - buf_block_t* block; -#endif - const byte* page; - ulint space; - ulint offset; - - ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX)); - ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type)); - ut_ad(ptr && log_ptr); - - page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE); - space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - offset = mach_read_from_4(page + FIL_PAGE_OFFSET); - - /* check whether the page is in the doublewrite buffer; - the doublewrite buffer is located in pages - FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the - system tablespace */ - if (space == TRX_SYS_SPACE - && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) { - if (buf_dblwr_being_created) { - /* Do nothing: we only come to this branch in an - InnoDB database creation. We do not redo log - anything for the doublewrite buffer pages. */ - return(log_ptr); - } else { - fprintf(stderr, - "Error: trying to redo log a record of type " - "%d on page %lu of space %lu in the " - "doublewrite buffer, continuing anyway.\n" - "Please post a bug report to " - "bugs.mysql.com.\n", - type, offset, space); - ut_ad(0); - } - } - - mach_write_to_1(log_ptr, type); - log_ptr++; - log_ptr += mach_write_compressed(log_ptr, space); - log_ptr += mach_write_compressed(log_ptr, offset); - - mtr->n_log_recs++; - -#ifdef UNIV_LOG_DEBUG - fprintf(stderr, - "Adding to mtr log record type %lu space %lu page no %lu\n", - (ulong) type, space, offset); -#endif - -#ifdef UNIV_DEBUG - /* We now assume that all x-latched pages have been modified! */ - block = (buf_block_t*) buf_block_align(ptr); - - if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) { - - mtr_memo_push(mtr, block, MTR_MEMO_MODIFY); - } -#endif - return(log_ptr); -} - -/********************************************************//** -Writes a log record about an .ibd file create/delete/rename. -@return new value of log_ptr */ -UNIV_INLINE -byte* -mlog_write_initial_log_record_for_file_op( -/*======================================*/ - ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or - MLOG_FILE_RENAME */ - ulint space_id,/*!< in: space id, if applicable */ - ulint page_no,/*!< in: page number (not relevant currently) */ - byte* log_ptr,/*!< in: pointer to mtr log which has been opened */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(log_ptr); - - mach_write_to_1(log_ptr, type); - log_ptr++; - - /* We write dummy space id and page number */ - log_ptr += mach_write_compressed(log_ptr, space_id); - log_ptr += mach_write_compressed(log_ptr, page_no); - - mtr->n_log_recs++; - - return(log_ptr); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/mtr0mtr.h b/storage/xtradb/include/mtr0mtr.h deleted file mode 100644 index ef6cd61719d..00000000000 --- a/storage/xtradb/include/mtr0mtr.h +++ /dev/null @@ -1,453 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/mtr0mtr.h -Mini-transaction buffer - -Created 11/26/1995 Heikki Tuuri -*******************************************************/ - -#ifndef mtr0mtr_h -#define mtr0mtr_h - -#include "univ.i" -#include "mem0mem.h" -#include "dyn0dyn.h" -#include "buf0types.h" -#include "sync0rw.h" -#include "ut0byte.h" -#include "mtr0types.h" -#include "page0types.h" -#include "trx0types.h" - -/* Logging modes for a mini-transaction */ -#define MTR_LOG_ALL 21 /* default mode: log all operations - modifying disk-based data */ -#define MTR_LOG_NONE 22 /* log no operations */ -#define MTR_LOG_NO_REDO 23 /* Don't generate REDO */ -/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying - file space page allocation data - (operations in fsp0fsp.* ) */ -#define MTR_LOG_SHORT_INSERTS 24 /* inserts are logged in a shorter - form */ - -/* Types for the mlock objects to store in the mtr memo; NOTE that the -first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ -#define MTR_MEMO_PAGE_S_FIX RW_S_LATCH -#define MTR_MEMO_PAGE_X_FIX RW_X_LATCH -#define MTR_MEMO_BUF_FIX RW_NO_LATCH -#ifdef UNIV_DEBUG -# define MTR_MEMO_MODIFY 54 -#endif /* UNIV_DEBUG */ -#define MTR_MEMO_S_LOCK 55 -#define MTR_MEMO_X_LOCK 56 - -/** @name Log item types -The log items are declared 'byte' so that the compiler can warn if val -and type parameters are switched in a call to mlog_write_ulint. NOTE! -For 1 - 8 bytes, the flag value must give the length also! @{ */ -#define MLOG_SINGLE_REC_FLAG 128 /*!< if the mtr contains only - one log record for one page, - i.e., write_initial_log_record - has been called only once, - this flag is ORed to the type - of that first log record */ -#define MLOG_1BYTE (1) /*!< one byte is written */ -#define MLOG_2BYTES (2) /*!< 2 bytes ... */ -#define MLOG_4BYTES (4) /*!< 4 bytes ... */ -#define MLOG_8BYTES (8) /*!< 8 bytes ... */ -#define MLOG_REC_INSERT ((byte)9) /*!< record insert */ -#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /*!< mark clustered index record - deleted */ -#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /*!< mark secondary index record - deleted */ -#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /*!< update of a record, - preserves record field sizes */ -#define MLOG_REC_DELETE ((byte)14) /*!< delete a record from a - page */ -#define MLOG_LIST_END_DELETE ((byte)15) /*!< delete record list end on - index page */ -#define MLOG_LIST_START_DELETE ((byte)16) /*!< delete record list start on - index page */ -#define MLOG_LIST_END_COPY_CREATED ((byte)17) /*!< copy record list end to a - new created index page */ -#define MLOG_PAGE_REORGANIZE ((byte)18) /*!< reorganize an - index page in - ROW_FORMAT=REDUNDANT */ -#define MLOG_PAGE_CREATE ((byte)19) /*!< create an index page */ -#define MLOG_UNDO_INSERT ((byte)20) /*!< insert entry in an undo - log */ -#define MLOG_UNDO_ERASE_END ((byte)21) /*!< erase an undo log - page end */ -#define MLOG_UNDO_INIT ((byte)22) /*!< initialize a page in an - undo log */ -#define MLOG_UNDO_HDR_DISCARD ((byte)23) /*!< discard an update undo log - header */ -#define MLOG_UNDO_HDR_REUSE ((byte)24) /*!< reuse an insert undo log - header */ -#define MLOG_UNDO_HDR_CREATE ((byte)25) /*!< create an undo - log header */ -#define MLOG_REC_MIN_MARK ((byte)26) /*!< mark an index - record as the - predefined minimum - record */ -#define MLOG_IBUF_BITMAP_INIT ((byte)27) /*!< initialize an - ibuf bitmap page */ -/*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */ -#ifdef UNIV_LOG_LSN_DEBUG -# define MLOG_LSN ((byte)28) /* current LSN */ -#endif -#define MLOG_INIT_FILE_PAGE ((byte)29) /*!< this means that a - file page is taken - into use and the prior - contents of the page - should be ignored: in - recovery we must not - trust the lsn values - stored to the file - page */ -#define MLOG_WRITE_STRING ((byte)30) /*!< write a string to - a page */ -#define MLOG_MULTI_REC_END ((byte)31) /*!< if a single mtr writes - several log records, - this log record ends the - sequence of these records */ -#define MLOG_DUMMY_RECORD ((byte)32) /*!< dummy log record used to - pad a log block full */ -#define MLOG_FILE_CREATE ((byte)33) /*!< log record about an .ibd - file creation */ -#define MLOG_FILE_RENAME ((byte)34) /*!< log record about an .ibd - file rename */ -#define MLOG_FILE_DELETE ((byte)35) /*!< log record about an .ibd - file deletion */ -#define MLOG_COMP_REC_MIN_MARK ((byte)36) /*!< mark a compact - index record as the - predefined minimum - record */ -#define MLOG_COMP_PAGE_CREATE ((byte)37) /*!< create a compact - index page */ -#define MLOG_COMP_REC_INSERT ((byte)38) /*!< compact record insert */ -#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39) - /*!< mark compact - clustered index record - deleted */ -#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact - secondary index record - deleted; this log - record type is - redundant, as - MLOG_REC_SEC_DELETE_MARK - is independent of the - record format. */ -#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a - compact record, - preserves record field - sizes */ -#define MLOG_COMP_REC_DELETE ((byte)42) /*!< delete a compact record - from a page */ -#define MLOG_COMP_LIST_END_DELETE ((byte)43) /*!< delete compact record list - end on index page */ -#define MLOG_COMP_LIST_START_DELETE ((byte)44) /*!< delete compact record list - start on index page */ -#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45) - /*!< copy compact - record list end to a - new created index - page */ -#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /*!< reorganize an index page */ -#define MLOG_FILE_CREATE2 ((byte)47) /*!< log record about creating - an .ibd file, with format */ -#define MLOG_ZIP_WRITE_NODE_PTR ((byte)48) /*!< write the node pointer of - a record on a compressed - non-leaf B-tree page */ -#define MLOG_ZIP_WRITE_BLOB_PTR ((byte)49) /*!< write the BLOB pointer - of an externally stored column - on a compressed page */ -#define MLOG_ZIP_WRITE_HEADER ((byte)50) /*!< write to compressed page - header */ -#define MLOG_ZIP_PAGE_COMPRESS ((byte)51) /*!< compress an index page */ -#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA ((byte)52)/*!< compress an index page - without logging it's image */ -#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53) /*!< reorganize a compressed - page */ -#define MLOG_BIGGEST_TYPE ((byte)53) /*!< biggest value (used in - assertions) */ - -#define MLOG_FILE_WRITE_CRYPT_DATA ((byte)100) /*!< log record for - writing/updating crypt data of - a tablespace */ - -#define EXTRA_CHECK_MLOG_NUMBER(x) \ - ((x) == MLOG_FILE_WRITE_CRYPT_DATA) - -/* @} */ - -/** @name Flags for MLOG_FILE operations -(stored in the page number parameter, called log_flags in the -functions). The page number parameter was originally written as 0. @{ */ -#define MLOG_FILE_FLAG_TEMP 1 /*!< identifies TEMPORARY TABLE in - MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */ -/* @} */ - -/* included here because it needs MLOG_LSN defined */ -#include "log0log.h" - -/***************************************************************//** -Starts a mini-transaction. */ -UNIV_INLINE -void -mtr_start_trx( -/*======*/ - mtr_t* mtr, /*!< out: mini-transaction */ - trx_t* trx) /*!< in: transaction */ - __attribute__((nonnull (1))); -/***************************************************************//** -Starts a mini-transaction. */ -UNIV_INLINE -void -mtr_start( -/*======*/ - mtr_t* mtr) /*!< out: mini-transaction */ -{ - mtr_start_trx(mtr, NULL); -} - MY_ATTRIBUTE((nonnull)) -/***************************************************************//** -Commits a mini-transaction. */ -UNIV_INTERN -void -mtr_commit( -/*=======*/ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/**********************************************************//** -Sets and returns a savepoint in mtr. -@return savepoint */ -UNIV_INLINE -ulint -mtr_set_savepoint( -/*==============*/ - mtr_t* mtr); /*!< in: mtr */ -#ifndef UNIV_HOTBACKUP -/**********************************************************//** -Releases the (index tree) s-latch stored in an mtr memo after a -savepoint. */ -UNIV_INLINE -void -mtr_release_s_latch_at_savepoint( -/*=============================*/ - mtr_t* mtr, /*!< in: mtr */ - ulint savepoint, /*!< in: savepoint */ - prio_rw_lock_t* lock); /*!< in: latch to release */ -#else /* !UNIV_HOTBACKUP */ -# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************//** -Releases a buf_page stored in an mtr memo after a -savepoint. */ -UNIV_INTERN -void -mtr_release_buf_page_at_savepoint( -/*=============================*/ - mtr_t* mtr, /*!< in: mtr */ - ulint savepoint, /*!< in: savepoint */ - buf_block_t* block); /*!< in: block to release */ - -/***************************************************************//** -Gets the logging mode of a mini-transaction. -@return logging mode: MTR_LOG_NONE, ... */ -UNIV_INLINE -ulint -mtr_get_log_mode( -/*=============*/ - mtr_t* mtr); /*!< in: mtr */ -/***************************************************************//** -Changes the logging mode of a mini-transaction. -@return old mode */ -UNIV_INLINE -ulint -mtr_set_log_mode( -/*=============*/ - mtr_t* mtr, /*!< in: mtr */ - ulint mode); /*!< in: logging mode: MTR_LOG_NONE, ... */ -/********************************************************//** -Reads 1 - 4 bytes from a file page buffered in the buffer pool. -@return value read */ -UNIV_INTERN -ulint -mtr_read_ulint( -/*===========*/ - const byte* ptr, /*!< in: pointer from where to read */ - ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -This macro locks an rw-lock in s-mode. */ -#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\ - (MTR)) -/*********************************************************************//** -This macro locks an rw-lock in x-mode. */ -#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\ - (MTR)) -/*********************************************************************//** -NOTE! Use the macro above! -Locks a lock in s-mode. */ -UNIV_INLINE -void -mtr_s_lock_func( -/*============*/ - prio_rw_lock_t* lock, /*!< in: rw-lock */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line number */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************************//** -NOTE! Use the macro above! -Locks a lock in x-mode. */ -UNIV_INLINE -void -mtr_x_lock_func( -/*============*/ - prio_rw_lock_t* lock, /*!< in: rw-lock */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line number */ - mtr_t* mtr); /*!< in: mtr */ -#endif /* !UNIV_HOTBACKUP */ - -/***************************************************//** -Releases an object in the memo stack. -@return true if released */ -UNIV_INTERN -bool -mtr_memo_release( -/*=============*/ - mtr_t* mtr, /*!< in/out: mini-transaction */ - void* object, /*!< in: object */ - ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */ - MY_ATTRIBUTE((nonnull)); -#ifdef UNIV_DEBUG -# ifndef UNIV_HOTBACKUP -/**********************************************************//** -Checks if memo contains the given item. -@return TRUE if contains */ -UNIV_INLINE -bool -mtr_memo_contains( -/*==============*/ - mtr_t* mtr, /*!< in: mtr */ - const void* object, /*!< in: object to search */ - ulint type) /*!< in: type of object */ - MY_ATTRIBUTE((warn_unused_result)); - -/**********************************************************//** -Checks if memo contains the given page. -@return TRUE if contains */ -UNIV_INTERN -ibool -mtr_memo_contains_page( -/*===================*/ - mtr_t* mtr, /*!< in: mtr */ - const byte* ptr, /*!< in: pointer to buffer frame */ - ulint type); /*!< in: type of object */ -/*********************************************************//** -Prints info of an mtr handle. */ -UNIV_INTERN -void -mtr_print( -/*======*/ - mtr_t* mtr); /*!< in: mtr */ -# else /* !UNIV_HOTBACKUP */ -# define mtr_memo_contains(mtr, object, type) TRUE -# define mtr_memo_contains_page(mtr, ptr, type) TRUE -# endif /* !UNIV_HOTBACKUP */ -#endif /* UNIV_DEBUG */ -/*######################################################################*/ - -#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */ - -/***************************************************************//** -Returns the log object of a mini-transaction buffer. -@return log */ -UNIV_INLINE -dyn_array_t* -mtr_get_log( -/*========*/ - mtr_t* mtr); /*!< in: mini-transaction */ -/***************************************************//** -Pushes an object to an mtr memo stack. */ -UNIV_INLINE -void -mtr_memo_push( -/*==========*/ - mtr_t* mtr, /*!< in: mtr */ - void* object, /*!< in: object */ - ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */ - -/** Mini-transaction memo stack slot. */ -struct mtr_memo_slot_t{ - ulint type; /*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */ - void* object; /*!< pointer to the object */ -}; - -/* Mini-transaction handle and buffer */ -struct mtr_t{ -#ifdef UNIV_DEBUG - ulint state; /*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */ -#endif - dyn_array_t memo; /*!< memo stack for locks etc. */ - dyn_array_t log; /*!< mini-transaction log */ - unsigned inside_ibuf:1; - /*!< TRUE if inside ibuf changes */ - unsigned modifications:1; - /*!< TRUE if the mini-transaction - modified buffer pool pages */ - unsigned made_dirty:1; - /*!< TRUE if mtr has made at least - one buffer pool page dirty */ - ulint n_log_recs; - /* count of how many page initial log records - have been written to the mtr log */ - ulint n_freed_pages; - /* number of pages that have been freed in - this mini-transaction */ - ulint log_mode; /* specifies which operations should be - logged; default value MTR_LOG_ALL */ - lsn_t start_lsn;/* start lsn of the possible log entry for - this mtr */ - lsn_t end_lsn;/* end lsn of the possible log entry for - this mtr */ -#ifdef UNIV_DEBUG - ulint magic_n; -#endif /* UNIV_DEBUG */ - trx_t* trx; /*!< transaction */ -}; - -#ifdef UNIV_DEBUG -# define MTR_MAGIC_N 54551 -#endif /* UNIV_DEBUG */ - -#define MTR_ACTIVE 12231 -#define MTR_COMMITTING 56456 -#define MTR_COMMITTED 34676 - -#ifndef UNIV_NONINL -#include "mtr0mtr.ic" -#endif - -#endif diff --git a/storage/xtradb/include/mtr0mtr.ic b/storage/xtradb/include/mtr0mtr.ic deleted file mode 100644 index a6d9df09925..00000000000 --- a/storage/xtradb/include/mtr0mtr.ic +++ /dev/null @@ -1,298 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/mtr0mtr.ic -Mini-transaction buffer - -Created 11/26/1995 Heikki Tuuri -*******************************************************/ - -#ifndef UNIV_HOTBACKUP -# include "sync0sync.h" -# include "sync0rw.h" -#endif /* !UNIV_HOTBACKUP */ -#include "mach0data.h" - -/***************************************************//** -Checks if a mini-transaction is dirtying a clean page. -@return TRUE if the mtr is dirtying a clean page. */ -UNIV_INTERN -ibool -mtr_block_dirtied( -/*==============*/ - const buf_block_t* block) /*!< in: block being x-fixed */ - MY_ATTRIBUTE((nonnull,warn_unused_result)); - -/***************************************************************//** -Starts a mini-transaction. */ -UNIV_INLINE -void -mtr_start_trx( -/*======*/ - mtr_t* mtr, /*!< out: mini-transaction */ - trx_t* trx) /*!< in: transaction */ -{ - UNIV_MEM_INVALID(mtr, sizeof *mtr); - - dyn_array_create(&(mtr->memo)); - dyn_array_create(&(mtr->log)); - - mtr->log_mode = MTR_LOG_ALL; - mtr->inside_ibuf = FALSE; - mtr->modifications = FALSE; - mtr->made_dirty = FALSE; - mtr->n_log_recs = 0; - mtr->n_freed_pages = 0; - mtr->trx = trx; - - ut_d(mtr->state = MTR_ACTIVE); - ut_d(mtr->magic_n = MTR_MAGIC_N); -} - -/***************************************************//** -Pushes an object to an mtr memo stack. */ -UNIV_INLINE -void -mtr_memo_push( -/*==========*/ - mtr_t* mtr, /*!< in: mtr */ - void* object, /*!< in: object */ - ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */ -{ - dyn_array_t* memo; - mtr_memo_slot_t* slot; - - ut_ad(object); - ut_ad(type >= MTR_MEMO_PAGE_S_FIX); - ut_ad(type <= MTR_MEMO_X_LOCK); - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - /* If this mtr has x-fixed a clean page then we set - the made_dirty flag. This tells us if we need to - grab log_flush_order_mutex at mtr_commit so that we - can insert the dirtied page to the flush list. */ - if (type == MTR_MEMO_PAGE_X_FIX && !mtr->made_dirty) { - mtr->made_dirty = - mtr_block_dirtied((const buf_block_t*) object); - } - - memo = &(mtr->memo); - - slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot); - - slot->object = object; - slot->type = type; -} - -/**********************************************************//** -Sets and returns a savepoint in mtr. -@return savepoint */ -UNIV_INLINE -ulint -mtr_set_savepoint( -/*==============*/ - mtr_t* mtr) /*!< in: mtr */ -{ - dyn_array_t* memo; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - memo = &(mtr->memo); - - return(dyn_array_get_data_size(memo)); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************//** -Releases the (index tree) s-latch stored in an mtr memo after a -savepoint. */ -UNIV_INLINE -void -mtr_release_s_latch_at_savepoint( -/*=============================*/ - mtr_t* mtr, /*!< in: mtr */ - ulint savepoint, /*!< in: savepoint */ - prio_rw_lock_t* lock) /*!< in: latch to release */ -{ - mtr_memo_slot_t* slot; - dyn_array_t* memo; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - memo = &(mtr->memo); - - ut_ad(dyn_array_get_data_size(memo) > savepoint); - - slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint); - - ut_ad(slot->object == lock); - ut_ad(slot->type == MTR_MEMO_S_LOCK); - - rw_lock_s_unlock(lock); - - slot->object = NULL; -} - -# ifdef UNIV_DEBUG -/**********************************************************//** -Checks if memo contains the given item. -@return TRUE if contains */ -UNIV_INLINE -bool -mtr_memo_contains( -/*==============*/ - mtr_t* mtr, /*!< in: mtr */ - const void* object, /*!< in: object to search */ - ulint type) /*!< in: type of object */ -{ - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING); - - for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo); - block; - block = dyn_array_get_prev_block(&mtr->memo, block)) { - const mtr_memo_slot_t* start - = reinterpret_cast<mtr_memo_slot_t*>( - dyn_block_get_data(block)); - mtr_memo_slot_t* slot - = reinterpret_cast<mtr_memo_slot_t*>( - dyn_block_get_data(block) - + dyn_block_get_used(block)); - - ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t))); - - while (slot-- != start) { - if (object == slot->object && type == slot->type) { - return(true); - } - } - } - - return(false); -} -# endif /* UNIV_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ - -/***************************************************************//** -Returns the log object of a mini-transaction buffer. -@return log */ -UNIV_INLINE -dyn_array_t* -mtr_get_log( -/*========*/ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - - return(&(mtr->log)); -} - -/***************************************************************//** -Gets the logging mode of a mini-transaction. -@return logging mode: MTR_LOG_NONE, ... */ -UNIV_INLINE -ulint -mtr_get_log_mode( -/*=============*/ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(mtr); - ut_ad(mtr->log_mode >= MTR_LOG_ALL); - ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS); - - return(mtr->log_mode); -} - -/***************************************************************//** -Changes the logging mode of a mini-transaction. -@return old mode */ -UNIV_INLINE -ulint -mtr_set_log_mode( -/*=============*/ - mtr_t* mtr, /*!< in: mtr */ - ulint mode) /*!< in: logging mode: MTR_LOG_NONE, ... */ -{ - ulint old_mode; - - ut_ad(mtr); - ut_ad(mode >= MTR_LOG_ALL); - ut_ad(mode <= MTR_LOG_SHORT_INSERTS); - - old_mode = mtr->log_mode; - - if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) { - /* Do nothing */ - } else { - mtr->log_mode = mode; - } - - ut_ad(old_mode >= MTR_LOG_ALL); - ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS); - - return(old_mode); -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Locks a lock in s-mode. */ -UNIV_INLINE -void -mtr_s_lock_func( -/*============*/ - prio_rw_lock_t* lock, /*!< in: rw-lock */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line number */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(mtr); - ut_ad(lock); - - rw_lock_s_lock_inline(lock, 0, file, line); - - mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK); -} - -/*********************************************************************//** -Locks a lock in x-mode. */ -UNIV_INLINE -void -mtr_x_lock_func( -/*============*/ - prio_rw_lock_t* lock, /*!< in: rw-lock */ - const char* file, /*!< in: file name */ - ulint line, /*!< in: line number */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(mtr); - ut_ad(lock); - - rw_lock_x_lock_inline(lock, 0, file, line); - - mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/mtr0types.h b/storage/xtradb/include/mtr0types.h deleted file mode 100644 index 43368c0b726..00000000000 --- a/storage/xtradb/include/mtr0types.h +++ /dev/null @@ -1,31 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/mtr0types.h -Mini-transaction buffer global types - -Created 11/26/1995 Heikki Tuuri -*******************************************************/ - -#ifndef mtr0types_h -#define mtr0types_h - -struct mtr_t; - -#endif diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h deleted file mode 100644 index b17e09cf0fa..00000000000 --- a/storage/xtradb/include/os0file.h +++ /dev/null @@ -1,1565 +0,0 @@ -/*********************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -***********************************************************************/ - -/**************************************************//** -@file include/os0file.h -The interface to the operating system file io - -Created 10/21/1995 Heikki Tuuri -*******************************************************/ - -#ifndef os0file_h -#define os0file_h - -#include "univ.i" -#include "trx0types.h" - -#ifndef __WIN__ -#include <dirent.h> -#include <sys/stat.h> -#include <time.h> -#endif - -/** File node of a tablespace or the log data space */ -struct fil_node_t; - -extern ibool os_has_said_disk_full; -/** Flag: enable debug printout for asynchronous i/o */ -extern ibool os_aio_print_debug; - -#ifdef __WIN__ - -/** We define always WIN_ASYNC_IO, and check at run-time whether - the OS actually supports it: Win 95 does not, NT does. */ -#define WIN_ASYNC_IO - -/** Use unbuffered I/O */ -#define UNIV_NON_BUFFERED_IO - -#endif - -/** File offset in bytes */ -typedef ib_uint64_t os_offset_t; -#ifdef _WIN32 -# define SRV_PATH_SEPARATOR '\\' -/** File handle */ -typedef HANDLE os_file_t; -/** Convert a C file descriptor to a native file handle -@param fd file descriptor -@return native file handle */ -# define OS_FILE_FROM_FD(fd) reinterpret_cast<HANDLE>(_get_osfhandle(fd)) -#else -# define SRV_PATH_SEPARATOR '/' -/** File handle */ -typedef int os_file_t; -/** Convert a C file descriptor to a native file handle -@param fd file descriptor -@return native file handle */ -# define OS_FILE_FROM_FD(fd) fd -#endif - -/** File descriptor with optional PERFORMANCE_SCHEMA instrumentation */ -struct pfs_os_file_t -{ - /** Default constructor */ - pfs_os_file_t() : m_file( -#ifdef _WIN32 - INVALID_HANDLE_VALUE -#else - -1 -#endif - ) -#ifdef UNIV_PFS_IO - , m_psi(NULL) -#endif - {} - - /** The wrapped file handle */ - os_file_t m_file; -#ifdef UNIV_PFS_IO - /** PERFORMANCE_SCHEMA descriptor */ - struct PSI_file *m_psi; -#endif - /** Implicit type conversion. - @return the wrapped file handle */ - operator os_file_t() const { return m_file; } - /** Assignment operator. - @param[in] file file handle to be assigned */ - void operator=(os_file_t file) { m_file = file; } -}; - -/** Umask for creating files */ -extern ulint os_innodb_umask; - -/** The next value should be smaller or equal to the smallest sector size used -on any disk. A log block is required to be a portion of disk which is written -so that if the start and the end of a block get written to disk, then the -whole block gets written. This should be true even in most cases of a crash: -if this fails for a log block, then it is equivalent to a media failure in the -log. */ - -#define OS_FILE_LOG_BLOCK_SIZE srv_log_block_size - -/** Options for os_file_create_func @{ */ -enum os_file_create_t { - OS_FILE_OPEN = 51, /*!< to open an existing file (if - doesn't exist, error) */ - OS_FILE_CREATE, /*!< to create new file (if - exists, error) */ - OS_FILE_OVERWRITE, /*!< to create a new file, if exists - the overwrite old file */ - OS_FILE_OPEN_RAW, /*!< to open a raw device or disk - partition */ - OS_FILE_CREATE_PATH, /*!< to create the directories */ - OS_FILE_OPEN_RETRY, /*!< open with retry */ - - /** Flags that can be combined with the above values. Please ensure - that the above values stay below 128. */ - - OS_FILE_ON_ERROR_NO_EXIT = 128, /*!< do not exit on unknown errors */ - OS_FILE_ON_ERROR_SILENT = 256 /*!< don't print diagnostic messages to - the log unless it is a fatal error, - this flag is only used if - ON_ERROR_NO_EXIT is set */ -}; - -/** Options for os_file_advise_func @{ */ -enum os_file_advise_t { - OS_FILE_ADVISE_NORMAL = 1, /*!< no advice on access pattern - (default) */ - OS_FILE_ADVISE_RANDOM = 2, /*!< access in random order */ - OS_FILE_ADVISE_SEQUENTIAL = 4, /*!< access the specified data - sequentially (with lower offsets read - before higher ones) */ - OS_FILE_ADVISE_WILLNEED = 8, /*!< specified data will be accessed - in the near future */ - OS_FILE_ADVISE_DONTNEED = 16, /*!< specified data will not be - accessed in the near future */ - OS_FILE_ADVISE_NOREUSE = 32 /*!< access only once */ -}; - -#define OS_FILE_READ_ONLY 333 -#define OS_FILE_READ_WRITE 444 -#define OS_FILE_READ_ALLOW_DELETE 555 /* for mysqlbackup */ -#define OS_FILE_READ_WRITE_CACHED 666 /* OS_FILE_READ_WRITE but never - O_DIRECT. Only for - os_file_create_simple_no_error_handling - currently. */ - -/* Options for file_create */ -#define OS_FILE_AIO 61 -#define OS_FILE_NORMAL 62 -/* @} */ - -/** Types for file create @{ */ -#define OS_DATA_FILE 100 -#define OS_LOG_FILE 101 -/* @} */ - -/** Error codes from os_file_get_last_error @{ */ -#define OS_FILE_NAME_TOO_LONG 36 -#define OS_FILE_NOT_FOUND 71 -#define OS_FILE_DISK_FULL 72 -#define OS_FILE_ALREADY_EXISTS 73 -#define OS_FILE_PATH_ERROR 74 -#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources - to become available again */ -#define OS_FILE_SHARING_VIOLATION 76 -#define OS_FILE_ERROR_NOT_SPECIFIED 77 -#define OS_FILE_INSUFFICIENT_RESOURCE 78 -#define OS_FILE_AIO_INTERRUPTED 79 -#define OS_FILE_OPERATION_ABORTED 80 -#define OS_FILE_ACCESS_VIOLATION 81 -#define OS_FILE_OPERATION_NOT_SUPPORTED 125 -#define OS_FILE_ERROR_MAX 200 -/* @} */ - -/** Types for aio operations @{ */ -#define OS_FILE_READ 10 -#define OS_FILE_WRITE 11 - -#define OS_FILE_LOG 256 /* This can be ORed to type */ -/* @} */ - -#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more - than 64 */ - -/** Modes for aio operations @{ */ -#define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf - pages or ibuf bitmap pages */ -#define OS_AIO_IBUF 22 /*!< Asynchronous i/o for ibuf pages or ibuf - bitmap pages */ -#define OS_AIO_LOG 23 /*!< Asynchronous i/o for the log */ -#define OS_AIO_SYNC 24 /*!< Asynchronous i/o where the calling thread - will itself wait for the i/o to complete, - doing also the job of the i/o-handler thread; - can be used for any pages, ibuf or non-ibuf. - This is used to save CPU time, as we can do - with fewer thread switches. Plain synchronous - i/o is not as good, because it must serialize - the file seek and read or write, causing a - bottleneck for parallelism. */ - -#define OS_AIO_SIMULATED_WAKE_LATER 512 /*!< This can be ORed to mode - in the call of os_aio(...), - if the caller wants to post several i/o - requests in a batch, and only after that - wake the i/o-handler thread; this has - effect only in simulated aio */ -/* @} */ - -#define OS_WIN31 1 /*!< Microsoft Windows 3.x */ -#define OS_WIN95 2 /*!< Microsoft Windows 95 */ -#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */ -#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */ -#define OS_WINXP 5 /*!< Microsoft Windows XP - or Windows Server 2003 */ -#define OS_WINVISTA 6 /*!< Microsoft Windows Vista - or Windows Server 2008 */ -#define OS_WIN7 7 /*!< Microsoft Windows 7 - or Windows Server 2008 R2 */ - - -extern ulint os_n_file_reads; -extern ulint os_n_file_writes; -extern ulint os_n_fsyncs; - -#define OS_MIN_LOG_BLOCK_SIZE 512 - -extern ulint srv_log_block_size; - -#ifdef UNIV_PFS_IO -/* Keys to register InnoDB I/O with performance schema */ -extern mysql_pfs_key_t innodb_file_data_key; -extern mysql_pfs_key_t innodb_file_log_key; -extern mysql_pfs_key_t innodb_file_temp_key; -extern mysql_pfs_key_t innodb_file_bmp_key; - -/* Following four macros are instumentations to register -various file I/O operations with performance schema. -1) register_pfs_file_open_begin() and register_pfs_file_open_end() are -used to register file creation, opening, closing and renaming. -2) register_pfs_file_rename_begin() and register_pfs_file_rename_end() -are used to register file renaming -2) register_pfs_file_io_begin() and register_pfs_file_io_end() are -used to register actual file read, write and flush -3) register_pfs_file_close_begin() and register_pfs_file_close_end() -are used to register file deletion operations*/ -# define register_pfs_file_open_begin(state, locker, key, op, name, \ - src_file, src_line) \ -do { \ - locker = PSI_FILE_CALL(get_thread_file_name_locker)( \ - state, key, op, name, &locker); \ - if (locker != NULL) { \ - PSI_FILE_CALL(start_file_open_wait)( \ - locker, src_file, src_line); \ - } \ -} while (0) - -# define register_pfs_file_open_end(locker, file, result) \ -do { \ - if (locker != NULL) { \ - file.m_psi = PSI_FILE_CALL( \ - end_file_open_wait)( \ - locker, result); \ - } \ -} while (0) - -# define register_pfs_file_rename_begin(state, locker, key, op, name, \ - src_file, src_line) \ - register_pfs_file_open_begin(state, locker, key, op, name, \ - src_file, src_line) \ - -# define register_pfs_file_rename_end(locker, result) \ -do { \ - if (locker != NULL) { \ - PSI_FILE_CALL(end_file_open_wait)(locker, result); \ - } \ -} while (0) - -# define register_pfs_file_close_begin(state, locker, key, op, name, \ - src_file, src_line) \ -do { \ - locker = PSI_FILE_CALL(get_thread_file_name_locker)( \ - state, key, op, name, &locker); \ - if (UNIV_LIKELY(locker != NULL)) { \ - PSI_FILE_CALL(start_file_close_wait)( \ - locker, src_file, src_line); \ - } \ -} while (0) - -# define register_pfs_file_close_end(locker, result) \ -do { \ - if (UNIV_LIKELY(locker != NULL)) { \ - PSI_FILE_CALL(end_file_close_wait)( \ - locker, result); \ - } \ -} while (0) - -# define register_pfs_file_io_begin(state, locker, file, count, op, \ - src_file, src_line) \ -do { \ - locker = PSI_FILE_CALL(get_thread_file_stream_locker)( \ - state, file.m_psi, op); \ - if (locker != NULL) { \ - PSI_FILE_CALL(start_file_wait)( \ - locker, count, src_file, src_line); \ - } \ -} while (0) - -# define register_pfs_file_io_end(locker, count) \ -do { \ - if (locker != NULL) { \ - PSI_FILE_CALL(end_file_wait)(locker, count); \ - } \ -} while (0) -#endif /* UNIV_PFS_IO */ - -/* Following macros/functions are file I/O APIs that would be performance -schema instrumented if "UNIV_PFS_IO" is defined. They would point to -wrapper functions with performance schema instrumentation in such case. - -os_file_create -os_file_create_simple -os_file_create_simple_no_error_handling -os_file_close -os_file_close_no_error_handling -os_file_rename -os_aio -os_file_read -os_file_read_no_error_handling -os_file_read_no_error_handling_int_fd -os_file_write -os_file_write_int_fd -os_file_set_eof_at -os_file_allocate - -The wrapper functions have the prefix of "innodb_". */ - -#ifdef UNIV_PFS_IO -# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \ - pfs_os_file_create_func(key, name, create, purpose, type, \ - success, atomic_writes, __FILE__, __LINE__) - -# define os_file_create_simple(key, name, create, access, success) \ - pfs_os_file_create_simple_func(key, name, create, access, \ - success, __FILE__, __LINE__) - -# define os_file_create_simple_no_error_handling( \ - key, name, create_mode, access, success, atomic_writes) \ - pfs_os_file_create_simple_no_error_handling_func( \ - key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__) - -# define os_file_close(file) \ - pfs_os_file_close_func(file, __FILE__, __LINE__) - -# define os_file_close_no_error_handling(file) \ - pfs_os_file_close_no_error_handling_func(file, __FILE__, __LINE__) - -# define os_aio(type, is_log, mode, name, file, buf, offset, \ - n, page_size, message1, message2, space_id, \ - trx, write_size) \ - pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \ - n, page_size, message1, message2, space_id, trx, write_size, \ - __FILE__, __LINE__) - -# define os_file_read(file, buf, offset, n) \ - pfs_os_file_read_func(file, buf, offset, n, NULL, \ - __FILE__, __LINE__) - -# define os_file_read_trx(file, buf, offset, n, trx) \ - pfs_os_file_read_func(file, buf, offset, n, trx, \ - __FILE__, __LINE__) - -# define os_file_read_no_error_handling(file, buf, offset, n) \ - pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \ - __FILE__, __LINE__) - -# define os_file_read_no_error_handling_int_fd( \ - file, buf, offset, n) \ - pfs_os_file_read_no_error_handling_int_fd_func( \ - file, buf, offset, n, __FILE__, __LINE__) - -# define os_file_write(name, file, buf, offset, n) \ - pfs_os_file_write_func(name, file, buf, offset, \ - n, __FILE__, __LINE__) - -# define os_file_write_int_fd(name, file, buf, offset, n) \ - pfs_os_file_write_int_fd_func(name, file, buf, offset, \ - n, __FILE__, __LINE__) - -# define os_file_flush(file) \ - pfs_os_file_flush_func(file, __FILE__, __LINE__) - -# define os_file_rename(key, oldpath, newpath) \ - pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__) - -# define os_file_delete(key, name) \ - pfs_os_file_delete_func(key, name, __FILE__, __LINE__) - -# define os_file_delete_if_exists(key, name) \ - pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__) - -# define os_file_set_eof_at(file, new_len) \ - pfs_os_file_set_eof_at_func(file, new_len, __FILE__, __LINE__) - -# ifdef HAVE_POSIX_FALLOCATE -# define os_file_allocate(file, offset, len) \ - pfs_os_file_allocate_func(file, offset, len, __FILE__, __LINE__) -# endif - -#else /* UNIV_PFS_IO */ - -/* If UNIV_PFS_IO is not defined, these I/O APIs point -to original un-instrumented file I/O APIs */ -# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \ - os_file_create_func(name, create, purpose, type, success, atomic_writes) - -# define os_file_create_simple(key, name, create_mode, access, success) \ - os_file_create_simple_func(name, create_mode, access, success) - -# define os_file_create_simple_no_error_handling( \ - key, name, create_mode, access, success, atomic_writes) \ - os_file_create_simple_no_error_handling_func( \ - name, create_mode, access, success, atomic_writes) - -# define os_file_close(file) \ - os_file_close_func(file) - -# define os_file_close_no_error_handling(file) \ - os_file_close_no_error_handling_func(file) - -# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \ - message2, space_id, trx, write_size) \ - os_aio_func(type, is_log, mode, name, file, buf, offset, n, \ - page_size, message1, message2, space_id, trx, write_size) - -# define os_file_read(file, buf, offset, n) \ - os_file_read_func(file, buf, offset, n, NULL) - -# define os_file_read_trx(file, buf, offset, n, trx) \ - os_file_read_func(file, buf, offset, n, trx) - -# define os_file_read_no_error_handling(file, buf, offset, n) \ - os_file_read_no_error_handling_func(file, buf, offset, n) -# define os_file_read_no_error_handling_int_fd( \ - file, buf, offset, n) \ - os_file_read_no_error_handling_func(file, buf, offset, n) - -# define os_file_write_int_fd(name, file, buf, offset, n) \ - os_file_write_func(name, file, buf, offset, n) -# define os_file_write(name, file, buf, offset, n) \ - os_file_write_func(name, file, buf, offset, n) - - -# define os_file_flush(file) os_file_flush_func(file) - -# define os_file_rename(key, oldpath, newpath) \ - os_file_rename_func(oldpath, newpath) - -# define os_file_delete(key, name) os_file_delete_func(name) - -# define os_file_delete_if_exists(key, name) \ - os_file_delete_if_exists_func(name) - -# define os_file_set_eof_at(file, new_len) \ - os_file_set_eof_at_func(file, new_len) - -#endif /* UNIV_PFS_IO */ - -/* File types for directory entry data type */ - -enum os_file_type_t { - OS_FILE_TYPE_UNKNOWN = 0, - OS_FILE_TYPE_FILE, /* regular file - (or a character/block device) */ - OS_FILE_TYPE_DIR, /* directory */ - OS_FILE_TYPE_LINK /* symbolic link */ -}; - -/* Maximum path string length in bytes when referring to tables with in the -'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers -of this size from the thread stack; that is why this should not be made much -bigger than 4000 bytes */ -#define OS_FILE_MAX_PATH 4000 - -/** Struct used in fetching information of a file in a directory */ -struct os_file_stat_t { - char name[OS_FILE_MAX_PATH]; /*!< path to a file */ - os_file_type_t type; /*!< file type */ - ib_int64_t size; /*!< file size */ - time_t ctime; /*!< creation time */ - time_t mtime; /*!< modification time */ - time_t atime; /*!< access time */ - bool rw_perm; /*!< true if can be opened - in read-write mode. Only valid - if type == OS_FILE_TYPE_FILE */ -}; - -#ifdef __WIN__ -typedef HANDLE os_file_dir_t; /*!< directory stream */ -#else -typedef DIR* os_file_dir_t; /*!< directory stream */ -#endif - -#ifdef __WIN__ -/***********************************************************************//** -Gets the operating system version. Currently works only on Windows. -@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA, -OS_WIN7. */ -UNIV_INTERN -ulint -os_get_os_version(void); -/*===================*/ -#endif /* __WIN__ */ -#ifndef UNIV_HOTBACKUP - - -/** Create a temporary file. This function is like tmpfile(3), but -the temporary file is created in the given parameter path. If the path -is null then it will create the file in the mysql server configuration -parameter (--tmpdir). -@param[in] path location for creating temporary file -@return temporary file handle, or NULL on error */ -UNIV_INTERN -FILE* -os_file_create_tmpfile( - const char* path); - -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************************//** -The os_file_opendir() function opens a directory stream corresponding to the -directory named by the dirname argument. The directory stream is positioned -at the first entry. In both Unix and Windows we automatically skip the '.' -and '..' items at the start of the directory listing. -@return directory stream, NULL if error */ -UNIV_INTERN -os_file_dir_t -os_file_opendir( -/*============*/ - const char* dirname, /*!< in: directory name; it must not - contain a trailing '\' or '/' */ - ibool error_is_fatal);/*!< in: TRUE if we should treat an - error as a fatal error; if we try to - open symlinks then we do not wish a - fatal error if it happens not to be - a directory */ -/***********************************************************************//** -Closes a directory stream. -@return 0 if success, -1 if failure */ -UNIV_INTERN -int -os_file_closedir( -/*=============*/ - os_file_dir_t dir); /*!< in: directory stream */ -/***********************************************************************//** -This function returns information of the next file in the directory. We jump -over the '.' and '..' entries in the directory. -@return 0 if ok, -1 if error, 1 if at the end of the directory */ -UNIV_INTERN -int -os_file_readdir_next_file( -/*======================*/ - const char* dirname,/*!< in: directory name or path */ - os_file_dir_t dir, /*!< in: directory stream */ - os_file_stat_t* info); /*!< in/out: buffer where the info is returned */ -/*****************************************************************//** -This function attempts to create a directory named pathname. The new directory -gets default permissions. On Unix, the permissions are (0770 & ~umask). If the -directory exists already, nothing is done and the call succeeds, unless the -fail_if_exists arguments is true. -@return TRUE if call succeeds, FALSE on error */ -UNIV_INTERN -ibool -os_file_create_directory( -/*=====================*/ - const char* pathname, /*!< in: directory name as - null-terminated string */ - ibool fail_if_exists);/*!< in: if TRUE, pre-existing directory - is treated as an error. */ -/****************************************************************//** -NOTE! Use the corresponding macro os_file_create_simple(), not directly -this function! -A simple function to open or create a file. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INTERN -os_file_t -os_file_create_simple_func( -/*=======================*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: create mode */ - ulint access_type,/*!< in: OS_FILE_READ_ONLY or - OS_FILE_READ_WRITE */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes);/*!< in: atomic writes table option - value */ -/****************************************************************//** -NOTE! Use the corresponding macro -os_file_create_simple_no_error_handling(), not directly this function! -A simple function to open or create a file. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INTERN -pfs_os_file_t -os_file_create_simple_no_error_handling_func( -/*=========================================*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: create mode */ - ulint access_type,/*!< in: OS_FILE_READ_ONLY, - OS_FILE_READ_WRITE, - OS_FILE_READ_ALLOW_DELETE (used by a backup - program reading the file), or - OS_FILE_READ_WRITE_CACHED (disable O_DIRECT - if it would be enabled otherwise) */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes)/*!< in: atomic writes table option - value */ - __attribute__((nonnull, warn_unused_result)); -/****************************************************************//** -Tries to disable OS caching on an opened file descriptor. -@return true if operation is success and false otherwise */ -UNIV_INTERN -bool -os_file_set_nocache( -/*================*/ - os_file_t fd, /*!< in: file descriptor to alter */ - const char* file_name, /*!< in: file name, used in the - diagnostic message */ - const char* operation_name);/*!< in: "open" or "create"; used in the - diagnostic message */ -/****************************************************************//** -NOTE! Use the corresponding macro os_file_create(), not directly -this function! -Opens an existing file or creates a new. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INTERN -pfs_os_file_t -os_file_create_func( -/*================*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: create mode */ - ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, - non-buffered i/o is desired, - OS_FILE_NORMAL, if any normal file; - NOTE that it also depends on type, os_aio_.. - and srv_.. variables whether we really use - async i/o or unbuffered i/o: look in the - function source code for the exact rules */ - ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes) /*!< in: atomic writes table option - value */ - __attribute__((nonnull, warn_unused_result)); -/***********************************************************************//** -Deletes a file. The file has to be closed before calling this. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_delete_func( -/*================*/ - const char* name); /*!< in: file path as a null-terminated - string */ - -/***********************************************************************//** -Deletes a file if it exists. The file has to be closed before calling this. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_delete_if_exists_func( -/*==========================*/ - const char* name); /*!< in: file path as a null-terminated - string */ -/***********************************************************************//** -NOTE! Use the corresponding macro os_file_rename(), not directly -this function! -Renames a file (can also move it to another directory). It is safest that the -file is closed before calling this function. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_rename_func( -/*================*/ - const char* oldpath, /*!< in: old file path as a - null-terminated string */ - const char* newpath); /*!< in: new file path */ -/***********************************************************************//** -NOTE! Use the corresponding macro os_file_close(), not directly this -function! -Closes a file handle. In case of error, error number can be retrieved with -os_file_get_last_error. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_close_func( -/*===============*/ - os_file_t file); /*!< in, own: handle to a file */ -/***********************************************************************//** -NOTE! Use the corresponding macro os_file_close(), not directly this -function! -Closes a file handle. In case of error, error number can be retrieved with -os_file_get_last_error. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_close_no_error_handling_func( -/*===============*/ - os_file_t file); /*!< in, own: handle to a file */ - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_set_eof_at(), not -directly this function! -Truncates a file at the specified position. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_set_eof_at_func( - os_file_t file, /*!< in: handle to a file */ - ib_uint64_t new_len);/*!< in: new file length */ - -#ifdef HAVE_POSIX_FALLOCATE -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_allocate(), not -directly this function! -Ensures that disk space is allocated for the file. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_allocate_func( - os_file_t file, /*!< in, own: handle to a file */ - os_offset_t offset, /*!< in: file region offset */ - os_offset_t len); /*!< in: file region length */ -#endif - -#ifdef UNIV_PFS_IO -/****************************************************************//** -NOTE! Please use the corresponding macro os_file_create_simple(), -not directly this function! -A performance schema instrumented wrapper function for -os_file_create_simple() which opens or creates a file. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INLINE -pfs_os_file_t -pfs_os_file_create_simple_func( -/*===========================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: create mode */ - ulint access_type,/*!< in: OS_FILE_READ_ONLY or - OS_FILE_READ_WRITE */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes,/*!< in: atomic writes table option - value */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/****************************************************************//** -NOTE! Please use the corresponding macro -os_file_create_simple_no_error_handling(), not directly this function! -A performance schema instrumented wrapper function for -os_file_create_simple_no_error_handling(). Add instrumentation to -monitor file creation/open. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INLINE -pfs_os_file_t -pfs_os_file_create_simple_no_error_handling_func( -/*=============================================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode, /*!< in: file create mode */ - ulint access_type,/*!< in: OS_FILE_READ_ONLY, - OS_FILE_READ_WRITE, or - OS_FILE_READ_ALLOW_DELETE; the last option is - used by a backup program reading the file */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes,/*!< in: atomic writes table option - value*/ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/****************************************************************//** -NOTE! Please use the corresponding macro os_file_create(), not directly -this function! -A performance schema wrapper function for os_file_create(). -Add instrumentation to monitor file creation/open. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INLINE -pfs_os_file_t -pfs_os_file_create_func( -/*====================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: file create mode */ - ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, - non-buffered i/o is desired, - OS_FILE_NORMAL, if any normal file; - NOTE that it also depends on type, os_aio_.. - and srv_.. variables whether we really use - async i/o or unbuffered i/o: look in the - function source code for the exact rules */ - ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes,/*!< in: atomic writes table option - value */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_close(), not directly -this function! -A performance schema instrumented wrapper function for os_file_close(). -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_os_file_close_func( -/*===================*/ - pfs_os_file_t file, /*!< in, own: handle to a file */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_close_no_error_handling(), -not directly this function! -A performance schema instrumented wrapper function for -os_file_close_no_error_handling(). -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_close_no_error_handling_func( -/*===================*/ - pfs_os_file_t file, /*!< in, own: handle to a file */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ -/*******************************************************************//** -NOTE! Please use the corresponding macro os_file_read(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_read() which requests a synchronous read operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_file_read_func( -/*==================*/ - pfs_os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n, /*!< in: number of bytes to read */ - trx_t* trx, /*!< in: trx */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ - -/*******************************************************************//** -NOTE! Please use the corresponding macro os_file_read_no_error_handling(), -not directly this function! -This is the performance schema instrumented wrapper function for -os_file_read_no_error_handling_func() which requests a synchronous -read operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_file_read_no_error_handling_func( -/*====================================*/ - pfs_os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n, /*!< in: number of bytes to read */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ - -/*******************************************************************//** -NOTE! Please use the corresponding macro os_aio(), not directly this -function! -Performance schema wrapper function of os_aio() which requests -an asynchronous i/o operation. -@return TRUE if request was queued successfully, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_aio_func( -/*============*/ - ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ - ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */ - ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - pfs_os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read or from which - to write */ - os_offset_t offset, /*!< in: file offset where to read or write */ - ulint n, /*!< in: number of bytes to read or write */ - ulint page_size,/*!< in: page size in bytes */ - fil_node_t* message1,/*!< in: message for the aio handler - (can be used to identify a completed - aio operation); ignored if mode is - OS_AIO_SYNC */ - void* message2,/*!< in: message for the aio handler - (can be used to identify a completed - aio operation); ignored if mode is - OS_AIO_SYNC */ - ulint space_id, - trx_t* trx, - ulint* write_size,/*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ -/*******************************************************************//** -NOTE! Please use the corresponding macro os_file_write(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_write() which requests a synchronous write operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_file_write_func( -/*===================*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - pfs_os_file_t file, /*!< in: handle to a file */ - const void* buf, /*!< in: buffer from which to write */ - os_offset_t offset, /*!< in: file offset where to write */ - ulint n, /*!< in: number of bytes to write */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_flush(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_flush() which flushes the write buffers of a given file to the disk. -Flushes the write buffers of a given file to the disk. -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_os_file_flush_func( -/*===================*/ - pfs_os_file_t file, /*!< in, own: handle to a file */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_rename(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_rename() -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_os_file_rename_func( -/*====================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* oldpath,/*!< in: old file path as a null-terminated - string */ - const char* newpath,/*!< in: new file path */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_delete(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_delete() -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_delete_func( -/*====================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: old file path as a null-terminated - string */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_delete_if_exists(), not -directly this function! -This is the performance schema instrumented wrapper function for -os_file_delete_if_exists() -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_delete_if_exists_func( -/*==============================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: old file path as a null-terminated - string */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_set_eof_at(), not -directly this function! -This is the performance schema instrumented wrapper function for -os_file_set_eof_at() -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_set_eof_at_func( - pfs_os_file_t file, /*!< in: handle to a file */ - ib_uint64_t new_len,/*!< in: new file length */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ - -#ifdef HAVE_POSIX_FALLOCATE -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_allocate(), not -directly this function! -Ensures that disk space is allocated for the file. -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_allocate_func( - pfs_os_file_t file, /*!< in, own: handle to a file */ - os_offset_t offset, /*!< in: file region offset */ - os_offset_t len, /*!< in: file region length */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ -#endif - -#endif /* UNIV_PFS_IO */ - -/***********************************************************************//** -Checks if the file is marked as invalid. -@return TRUE if invalid */ -UNIV_INTERN -bool -os_file_is_invalid( - pfs_os_file_t file); /*!< in, own: handle to a file */ - -/***********************************************************************//** -Marks the file as invalid. */ -UNIV_INTERN -void -os_file_mark_invalid( - pfs_os_file_t* file); /*!< out: pointer to a handle to a file */ - -/***********************************************************************//** -Announces an intention to access file data in a specific pattern in the -future. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_advise( - pfs_os_file_t file, /*!< in, own: handle to a file */ - os_offset_t offset, /*!< in: file region offset */ - os_offset_t len, /*!< in: file region length */ - ulint advice);/*!< in: advice for access pattern */ - -/***********************************************************************//** -Gets a file size. -@return file size, or (os_offset_t) -1 on failure */ -UNIV_INTERN -os_offset_t -os_file_get_size( -/*=============*/ - pfs_os_file_t file) /*!< in: handle to a file */ - MY_ATTRIBUTE((warn_unused_result)); -/** Set the size of a newly created file. -@param[in] name file name -@param[in] file file handle -@param[in] size desired file size -@param[in] sparse whether to create a sparse file (no preallocating) -@return whether the operation succeeded */ -UNIV_INTERN -bool -os_file_set_size( - const char* name, - pfs_os_file_t file, - os_offset_t size, - bool is_sparse = false) - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***********************************************************************//** -Truncates a file at its current position. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_set_eof( -/*============*/ - FILE* file); /*!< in: file to be truncated */ -/***********************************************************************//** -NOTE! Use the corresponding macro os_file_flush(), not directly this function! -Flushes the write buffers of a given file to the disk. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_flush_func( -/*===============*/ - os_file_t file); /*!< in, own: handle to a file */ -/***********************************************************************//** -Retrieves the last error number if an error occurs in a file io function. -The number should be retrieved before any other OS calls (because they may -overwrite the error number). If the number is not known to this program, -the OS error number + 100 is returned. -@return error number, or OS error number + 100 */ -UNIV_INTERN -ulint -os_file_get_last_error( -/*===================*/ - bool report_all_errors); /*!< in: TRUE if we want an error message - printed of all errors */ -/*******************************************************************//** -NOTE! Use the corresponding macro os_file_read(), not directly this function! -Requests a synchronous read operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -os_file_read_func( -/*==============*/ - os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n, /*!< in: number of bytes to read */ - trx_t* trx); /*!< in: trx */ -/*******************************************************************//** -Rewind file to its start, read at most size - 1 bytes from it to str, and -NUL-terminate str. All errors are silently ignored. This function is -mostly meant to be used with temporary files. */ -UNIV_INTERN -void -os_file_read_string( -/*================*/ - FILE* file, /*!< in: file to read from */ - char* str, /*!< in: buffer where to read */ - ulint size); /*!< in: size of buffer */ -/*******************************************************************//** -NOTE! Use the corresponding macro os_file_read_no_error_handling(), -not directly this function! -Requests a synchronous positioned read operation. This function does not do -any error handling. In case of error it returns FALSE. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -os_file_read_no_error_handling_func( -/*================================*/ - os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n); /*!< in: number of bytes to read */ - -/*******************************************************************//** -NOTE! Use the corresponding macro os_file_write(), not directly this -function! -Requests a synchronous write operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -os_file_write_func( -/*===============*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - os_file_t file, /*!< in: handle to a file */ - const void* buf, /*!< in: buffer from which to write */ - os_offset_t offset, /*!< in: file offset where to write */ - ulint n); /*!< in: number of bytes to write */ - -/*******************************************************************//** -Check the existence and type of the given file. -@return TRUE if call succeeded */ -UNIV_INTERN -ibool -os_file_status( -/*===========*/ - const char* path, /*!< in: pathname of the file */ - ibool* exists, /*!< out: TRUE if file exists */ - os_file_type_t* type); /*!< out: type of the file (if it exists) */ -/****************************************************************//** -The function os_file_dirname returns a directory component of a -null-terminated pathname string. In the usual case, dirname returns -the string up to, but not including, the final '/', and basename -is the component following the final '/'. Trailing '/' characters -are not counted as part of the pathname. - -If path does not contain a slash, dirname returns the string ".". - -Concatenating the string returned by dirname, a "/", and the basename -yields a complete pathname. - -The return value is a copy of the directory component of the pathname. -The copy is allocated from heap. It is the caller responsibility -to free it after it is no longer needed. - -The following list of examples (taken from SUSv2) shows the strings -returned by dirname and basename for different paths: - - path dirname basename - "/usr/lib" "/usr" "lib" - "/usr/" "/" "usr" - "usr" "." "usr" - "/" "/" "/" - "." "." "." - ".." "." ".." - -@return own: directory component of the pathname */ -UNIV_INTERN -char* -os_file_dirname( -/*============*/ - const char* path); /*!< in: pathname */ -/****************************************************************//** -This function returns a new path name after replacing the basename -in an old path with a new basename. The old_path is a full path -name including the extension. The tablename is in the normal -form "databasename/tablename". The new base name is found after -the forward slash. Both input strings are null terminated. - -This function allocates memory to be returned. It is the callers -responsibility to free the return value after it is no longer needed. - -@return own: new full pathname */ -UNIV_INTERN -char* -os_file_make_new_pathname( -/*======================*/ - const char* old_path, /*!< in: pathname */ - const char* new_name); /*!< in: new file name */ -/****************************************************************//** -This function returns a remote path name by combining a data directory -path provided in a DATA DIRECTORY clause with the tablename which is -in the form 'database/tablename'. It strips the file basename (which -is the tablename) found after the last directory in the path provided. -The full filepath created will include the database name as a directory -under the path provided. The filename is the tablename with the '.ibd' -extension. All input and output strings are null-terminated. - -This function allocates memory to be returned. It is the callers -responsibility to free the return value after it is no longer needed. - -@return own: A full pathname; data_dir_path/databasename/tablename.ibd */ -UNIV_INTERN -char* -os_file_make_remote_pathname( -/*=========================*/ - const char* data_dir_path, /*!< in: pathname */ - const char* tablename, /*!< in: tablename */ - const char* extention); /*!< in: file extention; ibd,cfg*/ -/****************************************************************//** -This function reduces a null-terminated full remote path name into -the path that is sent by MySQL for DATA DIRECTORY clause. It replaces -the 'databasename/tablename.ibd' found at the end of the path with just -'tablename'. - -Since the result is always smaller than the path sent in, no new memory -is allocated. The caller should allocate memory for the path sent in. -This function manipulates that path in place. - -If the path format is not as expected, just return. The result is used -to inform a SHOW CREATE TABLE command. */ -UNIV_INTERN -void -os_file_make_data_dir_path( -/*========================*/ - char* data_dir_path); /*!< in/out: full path/data_dir_path */ -/****************************************************************//** -Creates all missing subdirectories along the given path. -@return TRUE if call succeeded FALSE otherwise */ -UNIV_INTERN -ibool -os_file_create_subdirs_if_needed( -/*=============================*/ - const char* path); /*!< in: path name */ -/*********************************************************************** -Initializes the asynchronous io system. Creates one array each for ibuf -and log i/o. Also creates one array each for read and write where each -array is divided logically into n_read_segs and n_write_segs -respectively. The caller must create an i/o handler thread for each -segment in these arrays. This function also creates the sync array. -No i/o handler thread needs to be created for that */ -UNIV_INTERN -ibool -os_aio_init( -/*========*/ - ulint n_per_seg, /*<! in: maximum number of pending aio - operations allowed per segment */ - ulint n_read_segs, /*<! in: number of reader threads */ - ulint n_write_segs, /*<! in: number of writer threads */ - ulint n_slots_sync); /*<! in: number of slots in the sync aio - array */ -/*********************************************************************** -Frees the asynchronous io system. */ -UNIV_INTERN -void -os_aio_free(void); -/*=============*/ - -/*******************************************************************//** -NOTE! Use the corresponding macro os_aio(), not directly this function! -Requests an asynchronous i/o operation. -@return TRUE if request was queued successfully, FALSE if fail */ -UNIV_INTERN -ibool -os_aio_func( -/*========*/ - ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ - ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */ - ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed - to OS_AIO_SIMULATED_WAKE_LATER: the - last flag advises this function not to wake - i/o-handler threads, but the caller will - do the waking explicitly later, in this - way the caller can post several requests in - a batch; NOTE that the batch must not be - so big that it exhausts the slots in aio - arrays! NOTE that a simulated batch - may introduce hidden chances of deadlocks, - because i/os are not actually handled until - all have been posted: use with great - caution! */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - pfs_os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read or from which - to write */ - os_offset_t offset, /*!< in: file offset where to read or write */ - ulint n, /*!< in: number of bytes to read or write */ - ulint page_size, /*!< in: page size in bytes */ - fil_node_t* message1,/*!< in: message for the aio handler - (can be used to identify a completed - aio operation); ignored if mode is - OS_AIO_SYNC */ - void* message2,/*!< in: message for the aio handler - (can be used to identify a completed - aio operation); ignored if mode is - OS_AIO_SYNC */ - ulint space_id, - trx_t* trx, - ulint* write_size);/*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ -/************************************************************************//** -Wakes up all async i/o threads so that they know to exit themselves in -shutdown. */ -UNIV_INTERN -void -os_aio_wake_all_threads_at_shutdown(void); -/*=====================================*/ -/************************************************************************//** -Waits until there are no pending writes in os_aio_write_array. There can -be other, synchronous, pending writes. */ -UNIV_INTERN -void -os_aio_wait_until_no_pending_writes(void); -/*=====================================*/ -/**********************************************************************//** -Wakes up simulated aio i/o-handler threads if they have something to do. */ -UNIV_INTERN -void -os_aio_simulated_wake_handler_threads(void); -/*=======================================*/ -#ifdef _WIN32 -/**********************************************************************//** -This function can be called if one wants to post a batch of reads and -prefers an i/o-handler thread to handle them all at once later. You must -call os_aio_simulated_wake_handler_threads later to ensure the threads -are not left sleeping! */ -UNIV_INTERN -void -os_aio_simulated_put_read_threads_to_sleep(); -#else /* _WIN32 */ -# define os_aio_simulated_put_read_threads_to_sleep() -#endif /* _WIN32 */ - -#ifdef WIN_ASYNC_IO -/**********************************************************************//** -This function is only used in Windows asynchronous i/o. -Waits for an aio operation to complete. This function is used to wait the -for completed requests. The aio array of pending requests is divided -into segments. The thread specifies which segment or slot it wants to wait -for. NOTE: this function will also take care of freeing the aio slot, -therefore no other thread is allowed to do the freeing! -@return TRUE if the aio operation succeeded */ -UNIV_INTERN -ibool -os_aio_windows_handle( -/*==================*/ - ulint segment, /*!< in: the number of the segment in the aio - arrays to wait for; segment 0 is the ibuf - i/o thread, segment 1 the log i/o thread, - then follow the non-ibuf read threads, and as - the last are the non-ibuf write threads; if - this is ULINT_UNDEFINED, then it means that - sync aio is used, and this parameter is - ignored */ - ulint pos, /*!< this parameter is used only in sync aio: - wait for the aio slot at this position */ - fil_node_t**message1, /*!< out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2, - ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ - ulint* space_id); - -#endif - -/**********************************************************************//** -Does simulated aio. This function should be called by an i/o-handler -thread. -@return TRUE if the aio operation succeeded */ -UNIV_INTERN -ibool -os_aio_simulated_handle( -/*====================*/ - ulint segment, /*!< in: the number of the segment in the aio - arrays to wait for; segment 0 is the ibuf - i/o thread, segment 1 the log i/o thread, - then follow the non-ibuf read threads, and as - the last are the non-ibuf write threads */ - fil_node_t**message1, /*!< out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2, - ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ - ulint* space_id); -/**********************************************************************//** -Validates the consistency of the aio system. -@return TRUE if ok */ -UNIV_INTERN -ibool -os_aio_validate(void); -/*=================*/ -/**********************************************************************//** -Prints info of the aio arrays. */ -UNIV_INTERN -void -os_aio_print( -/*=========*/ - FILE* file); /*!< in: file where to print */ -/**********************************************************************//** -Refreshes the statistics used to print per-second averages. */ -UNIV_INTERN -void -os_aio_refresh_stats(void); -/*======================*/ - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Checks that all slots in the system have been freed, that is, there are -no pending io operations. */ -UNIV_INTERN -ibool -os_aio_all_slots_free(void); -/*=======================*/ -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -This function returns information about the specified file -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -os_file_get_status( -/*===============*/ - const char* path, /*!< in: pathname of the file */ - os_file_stat_t* stat_info, /*!< information of a file in a - directory */ - bool check_rw_perm); /*!< in: for testing whether the - file can be opened in RW mode */ - -#if !defined(UNIV_HOTBACKUP) -/** Create a temporary file in the location specified by the parameter -path. If the path is null, then it will be created in tmpdir. -@param[in] path location for creating temporary file -@return temporary file descriptor, or < 0 on error */ -UNIV_INTERN -int -innobase_mysql_tmpfile( - const char* path); -#endif /* !UNIV_HOTBACKUP */ - - -#if defined(LINUX_NATIVE_AIO) -/************************************************************************** -This function is only used in Linux native asynchronous i/o. -Waits for an aio operation to complete. This function is used to wait the -for completed requests. The aio array of pending requests is divided -into segments. The thread specifies which segment or slot it wants to wait -for. NOTE: this function will also take care of freeing the aio slot, -therefore no other thread is allowed to do the freeing! -@return TRUE if the IO was successful */ -UNIV_INTERN -ibool -os_aio_linux_handle( -/*================*/ - ulint global_seg, /*!< in: segment number in the aio array - to wait for; segment 0 is the ibuf - i/o thread, segment 1 is log i/o thread, - then follow the non-ibuf read threads, - and the last are the non-ibuf write - threads. */ - fil_node_t**message1, /*!< out: the messages passed with the */ - void** message2, /*!< aio request; note that in case the - aio operation failed, these output - parameters are valid and can be used to - restart the operation. */ - ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ - ulint* space_id); -#endif /* LINUX_NATIVE_AIO */ - -/****************************************************************//** -Does error handling when a file operation fails. -@return TRUE if we should retry the operation */ -ibool -os_file_handle_error_no_exit( -/*=========================*/ - const char* name, /*!< in: name of a file or NULL */ - const char* operation, /*!< in: operation */ - ibool on_error_silent);/*!< in: if TRUE then don't print - any message to the log. */ - - -/***********************************************************************//** -Try to get number of bytes per sector from file system. -@return file block size */ -UNIV_INTERN -ulint -os_file_get_block_size( -/*===================*/ - os_file_t file, /*!< in: handle to a file */ - const char* name); /*!< in: file name */ - -#ifndef UNIV_NONINL -#include "os0file.ic" -#endif - -#endif diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic deleted file mode 100644 index 72ac9d9dd6a..00000000000 --- a/storage/xtradb/include/os0file.ic +++ /dev/null @@ -1,629 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2010, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0file.ic -The interface to the operating system file io - -Created 2/20/2010 Jimmy Yang -*******************************************************/ - -#include "univ.i" - -#ifdef UNIV_PFS_IO -/****************************************************************//** -NOTE! Please use the corresponding macro os_file_create_simple(), -not directly this function! -A performance schema instrumented wrapper function for -os_file_create_simple() which opens or creates a file. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INLINE -pfs_os_file_t -pfs_os_file_create_simple_func( -/*===========================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: create mode */ - ulint access_type,/*!< in: OS_FILE_READ_ONLY or - OS_FILE_READ_WRITE */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes,/*!< in: atomic writes table option - value */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - pfs_os_file_t file; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - /* register a file open or creation depending on "create_mode" */ - register_pfs_file_open_begin(&state, locker, key, - ((create_mode == OS_FILE_CREATE) - ? PSI_FILE_CREATE - : PSI_FILE_OPEN), - name, src_file, src_line); - - file = os_file_create_simple_func(name, create_mode, - access_type, success, atomic_writes); - - /* Register psi value for the file */ - register_pfs_file_open_end(locker, file, - (*success == TRUE ? success : 0)); - - return(file); -} - -/****************************************************************//** -NOTE! Please use the corresponding macro -os_file_create_simple_no_error_handling(), not directly this function! -A performance schema instrumented wrapper function for -os_file_create_simple_no_error_handling(). Add instrumentation to -monitor file creation/open. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INLINE -pfs_os_file_t -pfs_os_file_create_simple_no_error_handling_func( -/*=============================================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode, /*!< in: file create mode */ - ulint access_type,/*!< in: OS_FILE_READ_ONLY, - OS_FILE_READ_WRITE, or - OS_FILE_READ_ALLOW_DELETE; the last option is - used by a backup program reading the file */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes,/*!< in: atomic writes table option - value */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - pfs_os_file_t file; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - /* register a file open or creation depending on "create_mode" */ - register_pfs_file_open_begin(&state, locker, key, - ((create_mode == OS_FILE_CREATE) - ? PSI_FILE_CREATE - : PSI_FILE_OPEN), - name, src_file, src_line); - - file = os_file_create_simple_no_error_handling_func( - name, create_mode, access_type, success, atomic_writes); - - register_pfs_file_open_end(locker, file, - (*success == TRUE ? success : 0)); - - return(file); -} - -/****************************************************************//** -NOTE! Please use the corresponding macro os_file_create(), not directly -this function! -A performance schema wrapper function for os_file_create(). -Add instrumentation to monitor file creation/open. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INLINE -pfs_os_file_t -pfs_os_file_create_func( -/*====================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: file create mode */ - ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, - non-buffered i/o is desired, - OS_FILE_NORMAL, if any normal file; - NOTE that it also depends on type, os_aio_.. - and srv_.. variables whether we really use - async i/o or unbuffered i/o: look in the - function source code for the exact rules */ - ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes, /*!< in: atomic writes table option - value */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - pfs_os_file_t file; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - /* register a file open or creation depending on "create_mode" */ - register_pfs_file_open_begin(&state, locker, key, - ((create_mode == OS_FILE_CREATE) - ? PSI_FILE_CREATE - : PSI_FILE_OPEN), - name, src_file, src_line); - - file = os_file_create_func(name, create_mode, purpose, type, - success, atomic_writes); - - register_pfs_file_open_end(locker, file, - (*success == TRUE ? success : 0)); - - return(file); -} - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_close(), not directly -this function! -A performance schema instrumented wrapper function for os_file_close(). -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_os_file_close_func( -/*===================*/ - pfs_os_file_t file, /*!< in, own: handle to a file */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - ibool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - /* register the file close */ - register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE, - src_file, src_line); - - result = os_file_close_func(file); - - register_pfs_file_io_end(locker, 0); - - return(result); -} -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_close_no_error_handling(), -not directly this function! -A performance schema instrumented wrapper function for -os_file_close_no_error_handling(). -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_close_no_error_handling_func( -/*===================*/ - pfs_os_file_t file, /*!< in, own: handle to a file */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - bool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - /* register the file close */ - register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE, - src_file, src_line); - - result = os_file_close_no_error_handling_func(file); - - register_pfs_file_io_end(locker, 0); - - return(result); -} - -/*******************************************************************//** -NOTE! Please use the corresponding macro os_aio(), not directly this -function! -Performance schema instrumented wrapper function of os_aio() which -requests an asynchronous i/o operation. -@return TRUE if request was queued successfully, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_aio_func( -/*============*/ - ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ - ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */ - ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - pfs_os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read or from which - to write */ - os_offset_t offset, /*!< in: file offset where to read or write */ - ulint n, /*!< in: number of bytes to read or write */ - ulint page_size, /*!< in: page size in bytes */ - fil_node_t* message1,/*!< in: message for the aio handler - (can be used to identify a completed - aio operation); ignored if mode is - OS_AIO_SYNC */ - void* message2,/*!< in: message for the aio handler - (can be used to identify a completed - aio operation); ignored if mode is - OS_AIO_SYNC */ - ulint space_id, - trx_t* trx, - ulint* write_size,/*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - ibool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - /* Register the read or write I/O depending on "type" */ - register_pfs_file_io_begin(&state, locker, file, n, - (type == OS_FILE_WRITE) - ? PSI_FILE_WRITE - : PSI_FILE_READ, - src_file, src_line); - - result = os_aio_func(type, is_log, mode, name, file, buf, offset, - n, page_size, message1, message2, space_id, trx, - write_size); - - register_pfs_file_io_end(locker, n); - - return(result); -} - -/*******************************************************************//** -NOTE! Please use the corresponding macro os_file_read(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_read() which requests a synchronous read operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_file_read_func( -/*==================*/ - pfs_os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n, /*!< in: number of bytes to read */ - trx_t* trx, - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - ibool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, - src_file, src_line); - - result = os_file_read_func(file, buf, offset, n, trx); - - register_pfs_file_io_end(locker, n); - - return(result); -} - -/*******************************************************************//** -NOTE! Please use the corresponding macro -os_file_read_no_error_handling(), not directly this function! -This is the performance schema instrumented wrapper function for -os_file_read_no_error_handling() which requests a synchronous -positioned read operation. This function does not do any error -handling. In case of error it returns FALSE. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_file_read_no_error_handling_func( -/*====================================*/ - pfs_os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n, /*!< in: number of bytes to read */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - ibool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, - src_file, src_line); - - result = os_file_read_no_error_handling_func(file, buf, offset, n); - - register_pfs_file_io_end(locker, n); - - return(result); -} - -/** NOTE! Please use the corresponding macro -os_file_read_no_error_handling_int_fd(), not directly this function! -This is the performance schema instrumented wrapper function for -os_file_read_no_error_handling_int_fd_func() which requests a -synchronous read operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_file_read_no_error_handling_int_fd_func( - int file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n, /*!< in: number of bytes to read */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - PSI_file_locker_state state; - struct PSI_file_locker* locker; - - locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( - &state, file, PSI_FILE_READ); - if (locker != NULL) { - PSI_FILE_CALL(start_file_wait)( - locker, n, - __FILE__, __LINE__); - } - ibool result = os_file_read_no_error_handling_func( - OS_FILE_FROM_FD(file), buf, offset, n); - - if (locker != NULL) { - PSI_FILE_CALL(end_file_wait)(locker, n); - } - - return(result); -} - -/*******************************************************************//** -NOTE! Please use the corresponding macro os_file_write(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_write() which requests a synchronous write operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_file_write_func( -/*===================*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - pfs_os_file_t file, /*!< in: handle to a file */ - const void* buf, /*!< in: buffer from which to write */ - os_offset_t offset, /*!< in: file offset where to write */ - ulint n, /*!< in: number of bytes to write */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - ibool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE, - src_file, src_line); - - result = os_file_write_func(name, file, buf, offset, n); - - register_pfs_file_io_end(locker, n); - - return(result); -} - -/** NOTE! Please use the corresponding macro os_file_write(), not -directly this function! -This is the performance schema instrumented wrapper function for -os_file_write() which requests a synchronous write operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INLINE -ibool -pfs_os_file_write_int_fd_func( - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - int file, /*!< in: handle to a file */ - const void* buf, /*!< in: buffer from which to write */ - os_offset_t offset, /*!< in: file offset where to write */ - ulint n, /*!< in: number of bytes to write */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - PSI_file_locker_state state; - struct PSI_file_locker* locker = NULL; - - locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( - &state, file, PSI_FILE_WRITE); - if (locker != NULL) { - PSI_FILE_CALL(start_file_wait)( - locker, n, - __FILE__, __LINE__); - } - ibool result = os_file_write_func( - name, OS_FILE_FROM_FD(file), buf, offset, n); - - if (locker != NULL) { - PSI_FILE_CALL(end_file_wait)(locker, n); - } - - return(result); -} - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_flush(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_flush() which flushes the write buffers of a given file to the disk. -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_os_file_flush_func( -/*===================*/ - pfs_os_file_t file, /*!< in, own: handle to a file */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - ibool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC, - src_file, src_line); - result = os_file_flush_func(file); - - register_pfs_file_io_end(locker, 0); - - return(result); -} - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_rename(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_rename() -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_os_file_rename_func( -/*====================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* oldpath,/*!< in: old file path as a null-terminated - string */ - const char* newpath,/*!< in: new file path */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - ibool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_rename_begin(&state, locker, key, PSI_FILE_RENAME, newpath, - src_file, src_line); - - result = os_file_rename_func(oldpath, newpath); - - register_pfs_file_rename_end(locker, 0); - - return(result); -} - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_delete(), not directly -this function! -This is the performance schema instrumented wrapper function for -os_file_delete() -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_delete_func( -/*====================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: file path as a null-terminated - string */ - const char* src_file, /*!< in: file name where func invoked */ - ulint src_line) /*!< in: line where the func invoked */ -{ - bool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE, - name, src_file, src_line); - - result = os_file_delete_func(name); - - register_pfs_file_close_end(locker, 0); - - return(result); -} - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_delete_if_exists(), not -directly this function! -This is the performance schema instrumented wrapper function for -os_file_delete_if_exists() -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_delete_if_exists_func( -/*==============================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema Key */ - const char* name, /*!< in: file path as a null-terminated - string */ - const char* src_file, /*!< in: file name where func invoked */ - ulint src_line) /*!< in: line where the func invoked */ -{ - bool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE, - name, src_file, src_line); - - result = os_file_delete_if_exists_func(name); - - register_pfs_file_close_end(locker, 0); - - return(result); -} - -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_set_eof_at(), not -directly this function! -This is the performance schema instrumented wrapper function for -os_file_set_eof_at() -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_set_eof_at_func( - pfs_os_file_t file, /*!< in: handle to a file */ - ib_uint64_t new_len,/*!< in: new file length */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - bool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE, - src_file, src_line); - result = os_file_set_eof_at_func(file, new_len); - - register_pfs_file_io_end(locker, 0); - - return(result); -} - -#ifdef HAVE_POSIX_FALLOCATE -/***********************************************************************//** -NOTE! Please use the corresponding macro os_file_allocate(), not -directly this function! -Ensures that disk space is allocated for the file. -@return TRUE if success */ -UNIV_INLINE -bool -pfs_os_file_allocate_func( - pfs_os_file_t file, /*!< in, own: handle to a file */ - os_offset_t offset, /*!< in: file region offset */ - os_offset_t len, /*!< in: file region length */ - const char* src_file,/*!< in: file name where func invoked */ - ulint src_line)/*!< in: line where the func invoked */ -{ - bool result; - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - - register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE, - src_file, src_line); - result = os_file_allocate_func(file, offset, len); - - register_pfs_file_io_end(locker, 0); - - return(result); -} -#endif - -#endif /* UNIV_PFS_IO */ diff --git a/storage/xtradb/include/os0once.h b/storage/xtradb/include/os0once.h deleted file mode 100644 index a8bbaf1d2d4..00000000000 --- a/storage/xtradb/include/os0once.h +++ /dev/null @@ -1,125 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0once.h -A class that aids executing a given function exactly once in a multi-threaded -environment. - -Created Feb 20, 2014 Vasil Dimov -*******************************************************/ - -#ifndef os0once_h -#define os0once_h - -#include "univ.i" - -#include "os0sync.h" -#include "ut0ut.h" - -/** Execute a given function exactly once in a multi-threaded environment -or wait for the function to be executed by another thread. - -Example usage: -First the user must create a control variable of type os_once::state_t and -assign it os_once::NEVER_DONE. -Then the user must pass this variable, together with a function to be -executed to os_once::do_or_wait_for_done(). - -Multiple threads can call os_once::do_or_wait_for_done() simultaneously with -the same (os_once::state_t) control variable. The provided function will be -called exactly once and when os_once::do_or_wait_for_done() returns then this -function has completed execution, by this or another thread. In other words -os_once::do_or_wait_for_done() will either execute the provided function or -will wait for its execution to complete if it is already called by another -thread or will do nothing if the function has already completed its execution -earlier. - -This mimics pthread_once(3), but unfortunatelly pthread_once(3) does not -support passing arguments to the init_routine() function. We should use -std::call_once() when we start compiling with C++11 enabled. */ -class os_once { -public: - /** Control variables' state type */ - typedef ib_uint32_t state_t; - - /** Not yet executed. */ - static const state_t NEVER_DONE = 0; - - /** Currently being executed by this or another thread. */ - static const state_t IN_PROGRESS = 1; - - /** Finished execution. */ - static const state_t DONE = 2; - -#ifdef HAVE_ATOMIC_BUILTINS - /** Call a given function or wait its execution to complete if it is - already called by another thread. - @param[in,out] state control variable - @param[in] do_func function to call - @param[in,out] do_func_arg an argument to pass to do_func(). */ - static - void - do_or_wait_for_done( - volatile state_t* state, - void (*do_func)(void*), - void* do_func_arg) - { - /* Avoid calling os_compare_and_swap_uint32() in the most - common case. */ - if (*state == DONE) { - return; - } - - if (os_compare_and_swap_uint32(state, - NEVER_DONE, IN_PROGRESS)) { - /* We are the first. Call the function. */ - - do_func(do_func_arg); - - const bool swapped = os_compare_and_swap_uint32( - state, IN_PROGRESS, DONE); - - ut_a(swapped); - } else { - /* The state is not NEVER_DONE, so either it is - IN_PROGRESS (somebody is calling the function right - now or DONE (it has already been called and completed). - Wait for it to become DONE. */ - for (;;) { - const state_t s = *state; - - switch (s) { - case DONE: - return; - case IN_PROGRESS: - break; - case NEVER_DONE: - /* fall through */ - default: - ut_error; - } - - UT_RELAX_CPU(); - } - } - } -#endif /* HAVE_ATOMIC_BUILTINS */ -}; - -#endif /* os0once_h */ diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h deleted file mode 100644 index 613e3bd6947..00000000000 --- a/storage/xtradb/include/os0proc.h +++ /dev/null @@ -1,77 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0proc.h -The interface to the operating system -process control primitives - -Created 9/30/1995 Heikki Tuuri -*******************************************************/ - -#ifndef os0proc_h -#define os0proc_h - -#include "univ.i" - -#ifdef UNIV_LINUX -#include <sys/ipc.h> -#include <sys/shm.h> -#endif - -typedef void* os_process_t; -typedef unsigned long int os_process_id_t; - -extern ibool os_use_large_pages; -/* Large page size. This may be a boot-time option on some platforms */ -extern ulint os_large_page_size; - -/****************************************************************//** -Converts the current process id to a number. It is not guaranteed that the -number is unique. In Linux returns the 'process number' of the current -thread. That number is the same as one sees in 'top', for example. In Linux -the thread id is not the same as one sees in 'top'. -@return process id as a number */ -UNIV_INTERN -ulint -os_proc_get_number(void); -/*====================*/ -/****************************************************************//** -Allocates large pages memory. -@return allocated memory */ -UNIV_INTERN -void* -os_mem_alloc_large( -/*===============*/ - ulint* n); /*!< in/out: number of bytes */ -/****************************************************************//** -Frees large pages memory. */ -UNIV_INTERN -void -os_mem_free_large( -/*==============*/ - void *ptr, /*!< in: pointer returned by - os_mem_alloc_large() */ - ulint size); /*!< in: size returned by - os_mem_alloc_large() */ - -#ifndef UNIV_NONINL -#include "os0proc.ic" -#endif - -#endif diff --git a/storage/xtradb/include/os0proc.ic b/storage/xtradb/include/os0proc.ic deleted file mode 100644 index 506f4f8ce0c..00000000000 --- a/storage/xtradb/include/os0proc.ic +++ /dev/null @@ -1,27 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0proc.ic -The interface to the operating system -process control primitives - -Created 9/30/1995 Heikki Tuuri -*******************************************************/ - - diff --git a/storage/xtradb/include/os0stacktrace.h b/storage/xtradb/include/os0stacktrace.h deleted file mode 100644 index e79347c6189..00000000000 --- a/storage/xtradb/include/os0stacktrace.h +++ /dev/null @@ -1,44 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013 SkySQL Ab. All Rights Reserved. - - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -#ifndef os0stacktrace_h -#define os0stacktrace_h - -#if defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS -#if HAVE_EXECINFO_H -#include <execinfo.h> -#endif -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -/***************************************************************//** -Prints stacktrace for this thread. -*/ -void -os_stacktrace_print( -/*================*/ - int sig_num, /*!< in: signal number */ - siginfo_t* info, /*!< in: signal information */ - void* ucontext);/*!< in: signal context */ - -#endif /* defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS */ -#endif /* os0stacktrace.h */ diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h deleted file mode 100644 index ce03f6a2124..00000000000 --- a/storage/xtradb/include/os0sync.h +++ /dev/null @@ -1,999 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0sync.h -The interface to the operating system -synchronization primitives. - -Created 9/6/1995 Heikki Tuuri -*******************************************************/ - -#ifndef os0sync_h -#define os0sync_h - -#include "univ.i" -#include "ut0lst.h" -#include "sync0types.h" - -#ifdef CPU_LEVEL1_DCACHE_LINESIZE -# define CACHE_LINE_SIZE CPU_LEVEL1_DCACHE_LINESIZE -#else -# error CPU_LEVEL1_DCACHE_LINESIZE is undefined -#endif /* CPU_LEVEL1_DCACHE_LINESIZE */ - -#ifdef HAVE_WINDOWS_ATOMICS -typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates - on LONG variable */ -#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE) -typedef ulint lock_word_t; -#else - -#define IB_LOCK_WORD_IS_BYTE - -typedef byte lock_word_t; - -#endif /* HAVE_WINDOWS_ATOMICS */ - -#ifdef __WIN__ -/** Native event (slow)*/ -typedef HANDLE os_native_event_t; -/** Native mutex */ -typedef CRITICAL_SECTION fast_mutex_t; -/** Native condition variable. */ -typedef CONDITION_VARIABLE os_cond_t; -#else -/** Native mutex */ -typedef pthread_mutex_t fast_mutex_t; -/** Native condition variable */ -typedef pthread_cond_t os_cond_t; -#endif - -/** Structure that includes Performance Schema Probe pfs_psi -in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */ -struct os_fast_mutex_t { - fast_mutex_t mutex; /*!< os_fast_mutex */ -#ifdef UNIV_PFS_MUTEX - struct PSI_mutex* pfs_psi;/*!< The performance schema - instrumentation hook */ -#endif -}; - -/** Operating system event handle */ -typedef struct os_event* os_event_t; - -/** An asynchronous signal sent between threads */ -struct os_event { -#ifdef __WIN__ - HANDLE handle; /*!< kernel event object, slow, - used on older Windows */ -#endif - os_fast_mutex_t os_mutex; /*!< this mutex protects the next - fields */ -private: - /** Masks for the event signal count and set flag in the count_and_set - field */ - static const ib_uint64_t count_mask = 0x7fffffffffffffffULL; - static const ib_uint64_t set_mask = 0x8000000000000000ULL; - - /** The MSB is set whenever when the event is in the signaled state, - i.e. a thread does not stop if it tries to wait for this event. Lower - bits are incremented each time the event becomes signaled. */ - ib_uint64_t count_and_set; -public: - os_cond_t cond_var; /*!< condition variable is used in - waiting for the event */ - - /** Initialise count_and_set field */ - void init_count_and_set(void) - { - /* We return this value in os_event_reset(), which can then be - be used to pass to the os_event_wait_low(). The value of zero - is reserved in os_event_wait_low() for the case when the - caller does not want to pass any signal_count value. To - distinguish between the two cases we initialize signal_count - to 1 here. */ - count_and_set = 1; - } - - /** Mark this event as set */ - void set(void) - { - count_and_set |= set_mask; - } - - /** Unmark this event as set */ - void reset(void) - { - count_and_set &= count_mask; - } - - /** Return true if this event is set */ - bool is_set(void) const - { - return count_and_set & set_mask; - } - - /** Bump signal count for this event */ - void inc_signal_count(void) - { - ut_ad(static_cast<ib_uint64_t>(signal_count()) < count_mask); - count_and_set++; - } - - /** Return how many times this event has been signalled */ - ib_int64_t signal_count(void) const - { - return (count_and_set & count_mask); - } -}; - -/** Denotes an infinite delay for os_event_wait_time() */ -#define OS_SYNC_INFINITE_TIME ULINT_UNDEFINED - -/** Return value of os_event_wait_time() when the time is exceeded */ -#define OS_SYNC_TIME_EXCEEDED 1 - -/** Operating system mutex handle */ -typedef struct os_mutex_t* os_ib_mutex_t; - -// All the os_*_count variables are accessed atomically - -/** This is incremented by 1 in os_thread_create and decremented by 1 in -os_thread_exit */ -extern ulint os_thread_count; - -extern ulint os_event_count; -extern ulint os_mutex_count; -extern ulint os_fast_mutex_count; - -/*********************************************************//** -Initializes global event and OS 'slow' mutex lists. */ -UNIV_INTERN -void -os_sync_init(void); -/*==============*/ - -/** Create an event semaphore, i.e., a semaphore which may just have two -states: signaled and nonsignaled. The created event is manual reset: it must be -reset explicitly by calling sync_os_reset_event. -@param[in,out] event memory block where to create the event */ -UNIV_INTERN -void -os_event_create(os_event_t event); - -/*********************************************************//** -Creates an event semaphore, i.e., a semaphore which may just have two states: -signaled and nonsignaled. The created event is manual reset: it must be reset -explicitly by calling sync_os_reset_event. -@return the event handle */ -UNIV_INTERN -os_event_t -os_event_create(void); -/*==================*/ -/**********************************************************//** -Sets an event semaphore to the signaled state: lets waiting threads -proceed. */ -UNIV_INTERN -void -os_event_set( -/*=========*/ - os_event_t event); /*!< in: event to set */ -/**********************************************************//** -Resets an event semaphore to the nonsignaled state. Waiting threads will -stop to wait for the event. -The return value should be passed to os_even_wait_low() if it is desired -that this thread should not wait in case of an intervening call to -os_event_set() between this os_event_reset() and the -os_event_wait_low() call. See comments for os_event_wait_low(). */ -UNIV_INTERN -ib_int64_t -os_event_reset( -/*===========*/ - os_event_t event); /*!< in: event to reset */ -/**********************************************************//** -Frees an event object. */ -UNIV_INTERN -void -os_event_free( -/*==========*/ - os_event_t event, /*!< in: event to free */ - bool free_memory = true); - /*!< in: if true, deallocate the event memory - block too */ - -/**********************************************************//** -Waits for an event object until it is in the signaled state. - -Typically, if the event has been signalled after the os_event_reset() -we'll return immediately because event->is_set == TRUE. -There are, however, situations (e.g.: sync_array code) where we may -lose this information. For example: - -thread A calls os_event_reset() -thread B calls os_event_set() [event->is_set == TRUE] -thread C calls os_event_reset() [event->is_set == FALSE] -thread A calls os_event_wait() [infinite wait!] -thread C calls os_event_wait() [infinite wait!] - -Where such a scenario is possible, to avoid infinite wait, the -value returned by os_event_reset() should be passed in as -reset_sig_count. */ -UNIV_INTERN -void -os_event_wait_low( -/*==============*/ - os_event_t event, /*!< in: event to wait */ - ib_int64_t reset_sig_count);/*!< in: zero or the value - returned by previous call of - os_event_reset(). */ - -#define os_event_wait(event) os_event_wait_low(event, 0) -#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0) - -/**********************************************************//** -Waits for an event object until it is in the signaled state or -a timeout is exceeded. -@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ -UNIV_INTERN -ulint -os_event_wait_time_low( -/*===================*/ - os_event_t event, /*!< in: event to wait */ - ulint time_in_usec, /*!< in: timeout in - microseconds, or - OS_SYNC_INFINITE_TIME */ - ib_int64_t reset_sig_count); /*!< in: zero or the value - returned by previous call of - os_event_reset(). */ -/*********************************************************//** -Creates an operating system mutex semaphore. Because these are slow, the -mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible. -@return the mutex handle */ -UNIV_INTERN -os_ib_mutex_t -os_mutex_create(void); -/*=================*/ -/**********************************************************//** -Acquires ownership of a mutex semaphore. */ -UNIV_INTERN -void -os_mutex_enter( -/*===========*/ - os_ib_mutex_t mutex); /*!< in: mutex to acquire */ -/**********************************************************//** -Releases ownership of a mutex. */ -UNIV_INTERN -void -os_mutex_exit( -/*==========*/ - os_ib_mutex_t mutex); /*!< in: mutex to release */ -/**********************************************************//** -Frees an mutex object. */ -UNIV_INTERN -void -os_mutex_free( -/*==========*/ - os_ib_mutex_t mutex); /*!< in: mutex to free */ -/**********************************************************//** -Acquires ownership of a fast mutex. Currently in Windows this is the same -as os_fast_mutex_lock! -@return 0 if success, != 0 if was reserved by another thread */ -UNIV_INLINE -ulint -os_fast_mutex_trylock( -/*==================*/ - os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */ - -/********************************************************************** -Following os_fast_ mutex APIs would be performance schema instrumented: - -os_fast_mutex_init -os_fast_mutex_lock -os_fast_mutex_unlock -os_fast_mutex_free - -These mutex APIs will point to corresponding wrapper functions that contain -the performance schema instrumentation. - -NOTE! The following macro should be used in mutex operation, not the -corresponding function. */ - -#ifdef UNIV_PFS_MUTEX -# define os_fast_mutex_init(K, M) \ - pfs_os_fast_mutex_init(K, M) - -# define os_fast_mutex_lock(M) \ - pfs_os_fast_mutex_lock(M, __FILE__, __LINE__) - -# define os_fast_mutex_unlock(M) pfs_os_fast_mutex_unlock(M) - -# define os_fast_mutex_free(M) pfs_os_fast_mutex_free(M) - -/*********************************************************//** -NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly -this function! -A wrapper function for os_fast_mutex_init_func(). Initializes an operating -system fast mutex semaphore. */ -UNIV_INLINE -void -pfs_os_fast_mutex_init( -/*===================*/ - PSI_mutex_key key, /*!< in: Performance Schema - key */ - os_fast_mutex_t* fast_mutex); /*!< out: fast mutex */ -/**********************************************************//** -NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly -this function! -Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance -schema probes when freeing the mutex */ -UNIV_INLINE -void -pfs_os_fast_mutex_free( -/*===================*/ - os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to free */ -/**********************************************************//** -NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly -this function! -Wrapper function of os_fast_mutex_lock. Acquires ownership of a fast mutex. */ -UNIV_INLINE -void -pfs_os_fast_mutex_lock( -/*===================*/ - os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */ - const char* file_name, /*!< in: file name where - locked */ - ulint line); /*!< in: line where locked */ -/**********************************************************//** -NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly -this function! -Wrapper function of os_fast_mutex_unlock. Releases ownership of a fast mutex. */ -UNIV_INLINE -void -pfs_os_fast_mutex_unlock( -/*=====================*/ - os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to release */ - -#else /* UNIV_PFS_MUTEX */ - -# define os_fast_mutex_init(K, M) \ - os_fast_mutex_init_func(&((os_fast_mutex_t*)(M))->mutex) - -# define os_fast_mutex_lock(M) \ - os_fast_mutex_lock_func(&((os_fast_mutex_t*)(M))->mutex) - -# define os_fast_mutex_unlock(M) \ - os_fast_mutex_unlock_func(&((os_fast_mutex_t*)(M))->mutex) - -# define os_fast_mutex_free(M) \ - os_fast_mutex_free_func(&((os_fast_mutex_t*)(M))->mutex) -#endif /* UNIV_PFS_MUTEX */ - -/**********************************************************//** -Acquires ownership of a fast mutex. Implies a full memory barrier even on -platforms such as PowerPC where this is not normally required. -@return 0 if success, != 0 if was reserved by another thread */ -UNIV_INLINE -ulint -os_fast_mutex_trylock_full_barrier( -/*==================*/ - os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */ -/**********************************************************//** -Releases ownership of a fast mutex. */ -UNIV_INTERN -void -os_fast_mutex_unlock_func( -/*======================*/ - fast_mutex_t* fast_mutex); /*!< in: mutex to release */ -/**********************************************************//** -Releases ownership of a fast mutex. Implies a full memory barrier even on -platforms such as PowerPC where this is not normally required. */ -UNIV_INTERN -void -os_fast_mutex_unlock_full_barrier( -/*=================*/ - os_fast_mutex_t* fast_mutex); /*!< in: mutex to release */ -/*********************************************************//** -Initializes an operating system fast mutex semaphore. */ -UNIV_INTERN -void -os_fast_mutex_init_func( -/*====================*/ - fast_mutex_t* fast_mutex); /*!< in: fast mutex */ -/**********************************************************//** -Acquires ownership of a fast mutex. */ -UNIV_INTERN -void -os_fast_mutex_lock_func( -/*====================*/ - fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */ -/**********************************************************//** -Frees an mutex object. */ -UNIV_INTERN -void -os_fast_mutex_free_func( -/*====================*/ - fast_mutex_t* fast_mutex); /*!< in: mutex to free */ - -/**********************************************************//** -Atomic compare-and-swap and increment for InnoDB. */ - -#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS) - -# define HAVE_ATOMIC_BUILTINS - -# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE -# define HAVE_ATOMIC_BUILTINS_BYTE -# endif - -# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64 -# define HAVE_ATOMIC_BUILTINS_64 -# endif - -/**********************************************************//** -Returns true if swapped, ptr is pointer to target, old_val is value to -compare to, new_val is the value to swap in. */ - -# define os_compare_and_swap(ptr, old_val, new_val) \ - __sync_bool_compare_and_swap(ptr, old_val, new_val) - -# define os_compare_and_swap_ulint(ptr, old_val, new_val) \ - os_compare_and_swap(ptr, old_val, new_val) - -# define os_compare_and_swap_lint(ptr, old_val, new_val) \ - os_compare_and_swap(ptr, old_val, new_val) - -# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ - os_compare_and_swap(ptr, old_val, new_val) - -# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC -# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ - os_compare_and_swap(ptr, old_val, new_val) -# define INNODB_RW_LOCKS_USE_ATOMICS -# define IB_ATOMICS_STARTUP_MSG \ - "Mutexes and rw_locks use GCC atomic builtins" -# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */ -# define IB_ATOMICS_STARTUP_MSG \ - "Mutexes use GCC atomic builtins, rw_locks do not" -# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */ - -/**********************************************************//** -Returns the resulting value, ptr is pointer to target, amount is the -amount of increment. */ - -# define os_atomic_increment(ptr, amount) \ - __sync_add_and_fetch(ptr, amount) - -# define os_atomic_increment_lint(ptr, amount) \ - os_atomic_increment(ptr, amount) - -# define os_atomic_increment_uint32(ptr, amount ) \ - os_atomic_increment(ptr, amount) - -# define os_atomic_increment_ulint(ptr, amount) \ - os_atomic_increment(ptr, amount) - -# define os_atomic_increment_uint64(ptr, amount) \ - os_atomic_increment(ptr, amount) - -/* Returns the resulting value, ptr is pointer to target, amount is the -amount to decrement. */ - -# define os_atomic_decrement(ptr, amount) \ - __sync_sub_and_fetch(ptr, amount) - -# define os_atomic_decrement_uint32(ptr, amount) \ - os_atomic_decrement(ptr, amount) - -# define os_atomic_decrement_lint(ptr, amount) \ - os_atomic_decrement(ptr, amount) - -# define os_atomic_decrement_ulint(ptr, amount) \ - os_atomic_decrement(ptr, amount) - -# define os_atomic_decrement_uint64(ptr, amount) \ - os_atomic_decrement(ptr, amount) - -# if defined(HAVE_ATOMIC_BUILTINS) - -/** Do an atomic test and set. -@param[in,out] ptr Memory location to set to non-zero -@return the previous value */ -inline -lock_word_t -os_atomic_test_and_set(volatile lock_word_t* ptr) -{ - return(__sync_lock_test_and_set(ptr, 1)); -} - -/** Do an atomic release. -@param[in,out] ptr Memory location to write to -@return the previous value */ -inline -void -os_atomic_clear(volatile lock_word_t* ptr) -{ - __sync_lock_release(ptr); -} - -# elif defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) - -/** Do an atomic test-and-set. -@param[in,out] ptr Memory location to set to non-zero -@return the previous value */ -inline -lock_word_t -os_atomic_test_and_set(volatile lock_word_t* ptr) -{ - return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); -} - -/** Do an atomic clear. -@param[in,out] ptr Memory location to set to zero */ -inline -void -os_atomic_clear(volatile lock_word_t* ptr) -{ - __atomic_clear(ptr, __ATOMIC_RELEASE); -} - -# else - -# error "Unsupported platform" - -# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */ - -#if defined(__powerpc__) || defined(__aarch64__) -/* - os_atomic_test_and_set_byte_release() should imply a release barrier before - setting, and a full barrier after. But __sync_lock_test_and_set() is only - documented as an aquire barrier. So on PowerPC we need to add the full - barrier explicitly. */ -# define os_atomic_test_and_set_byte_release(ptr, new_val) \ - do { __sync_lock_release(ptr); \ - __sync_synchronize(); } while (0) -#else -/* - On x86, __sync_lock_test_and_set() happens to be full barrier, due to - LOCK prefix. -*/ -# define os_atomic_test_and_set_byte_release(ptr, new_val) \ - __sync_lock_test_and_set(ptr, (byte) new_val) -#endif -/* - os_atomic_test_and_set_byte_acquire() is a full memory barrier on x86. But - in general, just an aquire barrier should be sufficient. */ -# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \ - __sync_lock_test_and_set(ptr, (byte) new_val) - -#elif defined(HAVE_IB_SOLARIS_ATOMICS) - -# define HAVE_ATOMIC_BUILTINS -# define HAVE_ATOMIC_BUILTINS_BYTE -# define HAVE_ATOMIC_BUILTINS_64 - -/* If not compiling with GCC or GCC doesn't support the atomic -intrinsics and running on Solaris >= 10 use Solaris atomics */ - -# include <atomic.h> - -/**********************************************************//** -Returns true if swapped, ptr is pointer to target, old_val is value to -compare to, new_val is the value to swap in. */ - -# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ - (atomic_cas_32(ptr, old_val, new_val) == old_val) - -# define os_compare_and_swap_ulint(ptr, old_val, new_val) \ - (atomic_cas_ulong(ptr, old_val, new_val) == old_val) - -# define os_compare_and_swap_lint(ptr, old_val, new_val) \ - ((lint) atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val) - -# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS -# if SIZEOF_PTHREAD_T == 4 -# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ - ((pthread_t) atomic_cas_32(ptr, old_val, new_val) == old_val) -# elif SIZEOF_PTHREAD_T == 8 -# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ - ((pthread_t) atomic_cas_64(ptr, old_val, new_val) == old_val) -# else -# error "SIZEOF_PTHREAD_T != 4 or 8" -# endif /* SIZEOF_PTHREAD_T CHECK */ -# define INNODB_RW_LOCKS_USE_ATOMICS -# define IB_ATOMICS_STARTUP_MSG \ - "Mutexes and rw_locks use Solaris atomic functions" -# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */ -# define IB_ATOMICS_STARTUP_MSG \ - "Mutexes use Solaris atomic functions, rw_locks do not" -# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */ - -/**********************************************************//** -Returns the resulting value, ptr is pointer to target, amount is the -amount of increment. */ - -# define os_atomic_increment_uint32(ptr, amount) \ - atomic_add_32_nv(ptr, amount) - -# define os_atomic_increment_ulint(ptr, amount) \ - atomic_add_long_nv(ptr, amount) - -# define os_atomic_increment_lint(ptr, amount) \ - os_atomic_increment_ulint((ulong_t*) ptr, amount) - -# define os_atomic_increment_uint64(ptr, amount) \ - atomic_add_64_nv((uint64_t *) ptr, amount) - -/* Returns the resulting value, ptr is pointer to target, amount is the -amount to decrement. */ - -# define os_atomic_decrement_uint32(ptr, amount) \ - os_atomic_increment_uint32(ptr, -(amount)) - -# define os_atomic_decrement_lint(ptr, amount) \ - os_atomic_increment_ulint((ulong_t*) ptr, -(amount)) - -# define os_atomic_decrement_ulint(ptr, amount) \ - os_atomic_increment_ulint(ptr, -(amount)) - -# define os_atomic_decrement_uint64(ptr, amount) \ - os_atomic_increment_uint64(ptr, -(amount)) - -# ifdef IB_LOCK_WORD_IS_BYTE - -/** Do an atomic xchg and set to non-zero. -@param[in,out] ptr Memory location to set to non-zero -@return the previous value */ -inline -lock_word_t -os_atomic_test_and_set(volatile lock_word_t* ptr) -{ - return(atomic_swap_uchar(ptr, 1)); -} - -/** Do an atomic xchg and set to zero. -@param[in,out] ptr Memory location to set to zero -@return the previous value */ -inline -lock_word_t -os_atomic_clear(volatile lock_word_t* ptr) -{ - return(atomic_swap_uchar(ptr, 0)); -} - -# else - -/** Do an atomic xchg and set to non-zero. -@param[in,out] ptr Memory location to set to non-zero -@return the previous value */ -inline -lock_word_t -os_atomic_test_and_set(volatile lock_word_t* ptr) -{ - return(atomic_swap_ulong(ptr, 1)); -} - -/** Do an atomic xchg and set to zero. -@param[in,out] ptr Memory location to set to zero -@return the previous value */ -inline -lock_word_t -os_atomic_clear(volatile lock_word_t* ptr) -{ - return(atomic_swap_ulong(ptr, 0)); -} - -# endif /* IB_LOCK_WORD_IS_BYTE */ - -# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \ - atomic_swap_uchar(ptr, new_val) - -# define os_atomic_test_and_set_byte_release(ptr, new_val) \ - atomic_swap_uchar(ptr, new_val) - -#elif defined(HAVE_WINDOWS_ATOMICS) - -# define HAVE_ATOMIC_BUILTINS -# define HAVE_ATOMIC_BUILTINS_BYTE -# define HAVE_ATOMIC_BUILTINS_64 - -/**********************************************************//** -Atomic compare and exchange of signed integers (both 32 and 64 bit). -@return value found before the exchange. -If it is not equal to old_value the exchange did not happen. */ -UNIV_INLINE -lint -win_cmp_and_xchg_lint( -/*==================*/ - volatile lint* ptr, /*!< in/out: source/destination */ - lint new_val, /*!< in: exchange value */ - lint old_val); /*!< in: value to compare to */ - -/**********************************************************//** -Atomic addition of signed integers. -@return Initial value of the variable pointed to by ptr */ -UNIV_INLINE -lint -win_xchg_and_add( -/*=============*/ - volatile lint* ptr, /*!< in/out: address of destination */ - lint val); /*!< in: number to be added */ - -/**********************************************************//** -Atomic compare and exchange of unsigned integers. -@return value found before the exchange. -If it is not equal to old_value the exchange did not happen. */ -UNIV_INLINE -ulint -win_cmp_and_xchg_ulint( -/*===================*/ - volatile ulint* ptr, /*!< in/out: source/destination */ - ulint new_val, /*!< in: exchange value */ - ulint old_val); /*!< in: value to compare to */ - -/**********************************************************//** -Atomic compare and exchange of 32 bit unsigned integers. -@return value found before the exchange. -If it is not equal to old_value the exchange did not happen. */ -UNIV_INLINE -DWORD -win_cmp_and_xchg_dword( -/*===================*/ - volatile DWORD* ptr, /*!< in/out: source/destination */ - DWORD new_val, /*!< in: exchange value */ - DWORD old_val); /*!< in: value to compare to */ - -/**********************************************************//** -Returns true if swapped, ptr is pointer to target, old_val is value to -compare to, new_val is the value to swap in. */ - -# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ - (InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \ - new_val, old_val) == old_val) - -# define os_compare_and_swap_ulint(ptr, old_val, new_val) \ - (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val) - -# define os_compare_and_swap_lint(ptr, old_val, new_val) \ - (win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val) - -/* windows thread objects can always be passed to windows atomic functions */ -# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ - (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val) - -# define INNODB_RW_LOCKS_USE_ATOMICS -# define IB_ATOMICS_STARTUP_MSG \ - "Mutexes and rw_locks use Windows interlocked functions" - -/**********************************************************//** -Returns the resulting value, ptr is pointer to target, amount is the -amount of increment. */ - -# define os_atomic_increment_lint(ptr, amount) \ - (win_xchg_and_add(ptr, amount) + amount) - -# define os_atomic_increment_uint32(ptr, amount) \ - ((ulint) InterlockedExchangeAdd((long*) ptr, amount)) - -# define os_atomic_increment_ulint(ptr, amount) \ - ((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount)) - -# define os_atomic_increment_uint64(ptr, amount) \ - ((ib_uint64_t) (InterlockedExchangeAdd64( \ - (ib_int64_t*) ptr, \ - (ib_int64_t) amount) + amount)) - -/**********************************************************//** -Returns the resulting value, ptr is pointer to target, amount is the -amount to decrement. There is no atomic substract function on Windows */ - -# define os_atomic_decrement_uint32(ptr, amount) \ - ((ulint) InterlockedExchangeAdd((long*) ptr, (-amount))) - -# define os_atomic_decrement_lint(ptr, amount) \ - (win_xchg_and_add(ptr, -(lint) amount) - amount) - -# define os_atomic_decrement_ulint(ptr, amount) \ - ((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount)) - -# define os_atomic_decrement_uint64(ptr, amount) \ - ((ib_uint64_t) (InterlockedExchangeAdd64( \ - (ib_int64_t*) ptr, \ - -(ib_int64_t) amount) - amount)) - -/** Do an atomic test and set. -InterlockedExchange() operates on LONG, and the LONG will be clobbered -@param[in,out] ptr Memory location to set to non-zero -@return the previous value */ -inline -lock_word_t -os_atomic_test_and_set(volatile lock_word_t* ptr) -{ - return(InterlockedExchange(ptr, 1)); -} - -/** Do an atomic release. -InterlockedExchange() operates on LONG, and the LONG will be clobbered -@param[in,out] ptr Memory location to set to zero -@return the previous value */ -inline -lock_word_t -os_atomic_clear(volatile lock_word_t* ptr) -{ - return(InterlockedExchange(ptr, 0)); -} - -# define os_atomic_lock_release_byte(ptr) \ - (void) InterlockedExchange(ptr, 0) - -#else -# define IB_ATOMICS_STARTUP_MSG \ - "Mutexes and rw_locks use InnoDB's own implementation" -#endif -#ifdef HAVE_ATOMIC_BUILTINS -#define os_atomic_inc_ulint(m,v,d) os_atomic_increment_ulint(v, d) -#define os_atomic_dec_ulint(m,v,d) os_atomic_decrement_ulint(v, d) -#else -#define os_atomic_inc_ulint(m,v,d) os_atomic_inc_ulint_func(m, v, d) -#define os_atomic_dec_ulint(m,v,d) os_atomic_dec_ulint_func(m, v, d) -#endif /* HAVE_ATOMIC_BUILTINS */ - -/**********************************************************//** -Following macros are used to update specified counter atomically -if HAVE_ATOMIC_BUILTINS defined. Otherwise, use mutex passed in -for synchronization */ -#ifdef HAVE_ATOMIC_BUILTINS -#define os_increment_counter_by_amount(mutex, counter, amount) \ - (void) os_atomic_increment_ulint(&counter, amount) - -#define os_decrement_counter_by_amount(mutex, counter, amount) \ - (void) os_atomic_increment_ulint(&counter, (-((lint) amount))) -#else -#define os_increment_counter_by_amount(mutex, counter, amount) \ - do { \ - mutex_enter(&(mutex)); \ - (counter) += (amount); \ - mutex_exit(&(mutex)); \ - } while (0) - -#define os_decrement_counter_by_amount(mutex, counter, amount) \ - do { \ - ut_a(counter >= amount); \ - mutex_enter(&(mutex)); \ - (counter) -= (amount); \ - mutex_exit(&(mutex)); \ - } while (0) -#endif /* HAVE_ATOMIC_BUILTINS */ - -#define os_inc_counter(mutex, counter) \ - os_increment_counter_by_amount(mutex, counter, 1) - -#define os_dec_counter(mutex, counter) \ - do { \ - os_decrement_counter_by_amount(mutex, counter, 1);\ - } while (0); - -/** barrier definitions for memory ordering */ -#if defined(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) -# define HAVE_MEMORY_BARRIER -# define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE) -# define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE) -# define os_mb __atomic_thread_fence(__ATOMIC_SEQ_CST) - -# define IB_MEMORY_BARRIER_STARTUP_MSG \ - "GCC builtin __atomic_thread_fence() is used for memory barrier" - -#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE) -# define HAVE_MEMORY_BARRIER -# define os_rmb __sync_synchronize() -# define os_wmb __sync_synchronize() -# define os_mb __sync_synchronize() -# define IB_MEMORY_BARRIER_STARTUP_MSG \ - "GCC builtin __sync_synchronize() is used for memory barrier" - -#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS) -# define HAVE_MEMORY_BARRIER -# include <mbarrier.h> -# define os_rmb __machine_r_barrier() -# define os_wmb __machine_w_barrier() -# define os_mb __machine_rw_barrier() -# define IB_MEMORY_BARRIER_STARTUP_MSG \ - "Solaris memory ordering functions are used for memory barrier" - -#elif defined(HAVE_WINDOWS_MM_FENCE) -# define HAVE_MEMORY_BARRIER -# include <intrin.h> -# define os_rmb _mm_lfence() -# define os_wmb _mm_sfence() -# define os_mb _mm_mfence() -# define IB_MEMORY_BARRIER_STARTUP_MSG \ - "_mm_lfence() and _mm_sfence() are used for memory barrier" - -#else -# define os_rmb do { } while(0) -# define os_wmb do { } while(0) -# define os_mb do { } while(0) -# define IB_MEMORY_BARRIER_STARTUP_MSG \ - "Memory barrier is not used" -#endif - - -/** Simple counter aligned to CACHE_LINE_SIZE -@tparam Type the integer type of the counter -@tparam atomic whether to use atomic memory access */ -template <typename Type = ulint, bool atomic = false> -struct MY_ALIGNED(CACHE_LINE_SIZE) simple_counter -{ - /** Increment the counter */ - Type inc() { return add(1); } - /** Decrement the counter */ - Type dec() { return sub(1); } - - /** Add to the counter - @param[in] i amount to be added - @return the value of the counter after adding */ - Type add(Type i) - { - compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint)); - if (atomic) { - /* GCC would perform a type check in this code - also in case the template is instantiated with - simple_counter<Type=not_ulint, atomic=false>. - On Solaris, os_atomic_increment_ulint() maps - to atomic_add_long_nv(), which expects the - parameter to be correctly typed. */ - return os_atomic_increment_ulint( - reinterpret_cast<ulint*>(&m_counter), i); - } else { - return m_counter += i; - } - } - /** Subtract from the counter - @param[in] i amount to be subtracted - @return the value of the counter after adding */ - Type sub(Type i) - { - compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint)); - if (atomic) { - return os_atomic_decrement_ulint(&m_counter, i); - } else { - return m_counter -= i; - } - } - - /** @return the value of the counter (non-atomic access)! */ - operator Type() const { return m_counter; } - -private: - /** The counter */ - Type m_counter; -}; - -#ifndef UNIV_NONINL -#include "os0sync.ic" -#endif - -#endif diff --git a/storage/xtradb/include/os0sync.ic b/storage/xtradb/include/os0sync.ic deleted file mode 100644 index 5f4b0d24089..00000000000 --- a/storage/xtradb/include/os0sync.ic +++ /dev/null @@ -1,265 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0sync.ic -The interface to the operating system synchronization primitives. - -Created 9/6/1995 Heikki Tuuri -*******************************************************/ - -#ifdef __WIN__ -#include <winbase.h> -#endif - -/**********************************************************//** -Acquires ownership of a fast mutex. -@return 0 if success, != 0 if was reserved by another thread */ -UNIV_INLINE -ulint -os_fast_mutex_trylock( -/*==================*/ - os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ -{ - fast_mutex_t* mutex = &fast_mutex->mutex; - -#ifdef __WIN__ - return(!TryEnterCriticalSection(mutex)); -#else - /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock - so that it returns 0 on success. In the operating system - libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and - returns 1 on success (but MySQL remaps that to 0), while Linux, - FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */ - - return((ulint) pthread_mutex_trylock(mutex)); -#endif -} - -#ifdef UNIV_PFS_MUTEX -/*********************************************************//** -NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly -this function! -A wrapper function for os_fast_mutex_init_func(). Initializes an operating -system fast mutex semaphore. */ -UNIV_INLINE -void -pfs_os_fast_mutex_init( -/*===================*/ - PSI_mutex_key key, /*!< in: Performance Schema - key */ - os_fast_mutex_t* fast_mutex) /*!< out: fast mutex */ -{ -#ifdef HAVE_PSI_MUTEX_INTERFACE - fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex); -#else - fast_mutex->pfs_psi = NULL; -#endif - - os_fast_mutex_init_func(&fast_mutex->mutex); -} -/******************************************************************//** -NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly -this function! -Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance -schema probes when freeing the mutex */ -UNIV_INLINE -void -pfs_os_fast_mutex_free( -/*===================*/ - os_fast_mutex_t* fast_mutex) /*!< in/out: mutex */ -{ -#ifdef HAVE_PSI_MUTEX_INTERFACE - if (fast_mutex->pfs_psi != NULL) - PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi); -#endif - fast_mutex->pfs_psi = NULL; - - os_fast_mutex_free_func(&fast_mutex->mutex); -} -/**********************************************************//** -NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly -this function! -Wrapper function of os_fast_mutex_lock_func. Acquires ownership of a fast -mutex. */ -UNIV_INLINE -void -pfs_os_fast_mutex_lock( -/*===================*/ - os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */ - const char* file_name, /*!< in: file name where - locked */ - ulint line) /*!< in: line where locked */ -{ -#ifdef HAVE_PSI_MUTEX_INTERFACE - if (fast_mutex->pfs_psi != NULL) - { - PSI_mutex_locker* locker; - PSI_mutex_locker_state state; - - locker = PSI_MUTEX_CALL(start_mutex_wait)( - &state, fast_mutex->pfs_psi, - PSI_MUTEX_LOCK, file_name, - static_cast<uint>(line)); - - os_fast_mutex_lock_func(&fast_mutex->mutex); - - if (locker != NULL) - PSI_MUTEX_CALL(end_mutex_wait)(locker, 0); - } - else -#endif - { - os_fast_mutex_lock_func(&fast_mutex->mutex); - } - - return; -} -/**********************************************************//** -NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly -this function! -Wrapper function of os_fast_mutex_unlock_func. Releases ownership of a -fast mutex. */ -UNIV_INLINE -void -pfs_os_fast_mutex_unlock( -/*=====================*/ - os_fast_mutex_t* fast_mutex) /*!< in/out: mutex to release */ -{ -#ifdef HAVE_PSI_MUTEX_INTERFACE - if (fast_mutex->pfs_psi != NULL) - PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi); -#endif - - os_fast_mutex_unlock_func(&fast_mutex->mutex); -} -#endif /* UNIV_PFS_MUTEX */ - -#ifdef HAVE_WINDOWS_ATOMICS - -/* Use inline functions to make 64 and 32 bit versions of windows atomic -functions so that typecasts are evaluated at compile time. Take advantage -that lint is either __int64 or long int and windows atomic functions work -on __int64 and LONG */ - -/**********************************************************//** -Atomic compare and exchange of unsigned integers. -@return value found before the exchange. -If it is not equal to old_value the exchange did not happen. */ -UNIV_INLINE -lint -win_cmp_and_xchg_lint( -/*==================*/ - volatile lint* ptr, /*!< in/out: source/destination */ - lint new_val, /*!< in: exchange value */ - lint old_val) /*!< in: value to compare to */ -{ -# ifdef _WIN64 - return(InterlockedCompareExchange64(ptr, new_val, old_val)); -# else - return(InterlockedCompareExchange(ptr, new_val, old_val)); -# endif -} - -/**********************************************************//** -Atomic addition of signed integers. -@return Initial value of the variable pointed to by ptr */ -UNIV_INLINE -lint -win_xchg_and_add( -/*=============*/ - volatile lint* ptr, /*!< in/out: address of destination */ - lint val) /*!< in: number to be added */ -{ -#ifdef _WIN64 - return(InterlockedExchangeAdd64(ptr, val)); -#else - return(InterlockedExchangeAdd(ptr, val)); -#endif -} - -/**********************************************************//** -Atomic compare and exchange of unsigned integers. -@return value found before the exchange. -If it is not equal to old_value the exchange did not happen. */ -UNIV_INLINE -ulint -win_cmp_and_xchg_ulint( -/*===================*/ - volatile ulint* ptr, /*!< in/out: source/destination */ - ulint new_val, /*!< in: exchange value */ - ulint old_val) /*!< in: value to compare to */ -{ - return((ulint) win_cmp_and_xchg_lint( - (volatile lint*) ptr, - (lint) new_val, - (lint) old_val)); -} - -/**********************************************************//** -Atomic compare and exchange of 32-bit unsigned integers. -@return value found before the exchange. -If it is not equal to old_value the exchange did not happen. */ -UNIV_INLINE -DWORD -win_cmp_and_xchg_dword( -/*===================*/ - volatile DWORD* ptr, /*!< in/out: source/destination */ - DWORD new_val, /*!< in: exchange value */ - DWORD old_val) /*!< in: value to compare to */ -{ - ut_ad(sizeof(DWORD) == sizeof(LONG)); /* We assume this. */ - return(InterlockedCompareExchange( - (volatile LONG*) ptr, - (LONG) new_val, - (LONG) old_val)); -} - -#endif /* HAVE_WINDOWS_ATOMICS */ - -/**********************************************************//** -Acquires ownership of a fast mutex. Implies a full memory barrier even on -platforms such as PowerPC where this is not normally required. -@return 0 if success, != 0 if was reserved by another thread */ -UNIV_INLINE -ulint -os_fast_mutex_trylock_full_barrier( -/*==================*/ - os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ -{ -#ifdef __WIN__ - if (TryEnterCriticalSection(&fast_mutex->mutex)) { - - return(0); - } else { - - return(1); - } -#else - /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock - so that it returns 0 on success. In the operating system - libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and - returns 1 on success (but MySQL remaps that to 0), while Linux, - FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */ - -#ifdef __powerpc__ - os_mb; -#endif - return((ulint) pthread_mutex_trylock(&fast_mutex->mutex)); -#endif -} diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h deleted file mode 100644 index 7865358b0f7..00000000000 --- a/storage/xtradb/include/os0thread.h +++ /dev/null @@ -1,211 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0thread.h -The interface to the operating system -process and thread control primitives - -Created 9/8/1995 Heikki Tuuri -*******************************************************/ - -#ifndef os0thread_h -#define os0thread_h - -#include "univ.i" - -#ifdef UNIV_LINUX -#include <sys/types.h> -#endif - -/* Maximum number of threads which can be created in the program; -this is also the size of the wait slot array for MySQL threads which -can wait inside InnoDB */ - -#define OS_THREAD_MAX_N srv_max_n_threads - -/* Possible fixed priorities for threads */ -#define OS_THREAD_PRIORITY_NONE 100 -#define OS_THREAD_PRIORITY_BACKGROUND 1 -#define OS_THREAD_PRIORITY_NORMAL 2 -#define OS_THREAD_PRIORITY_ABOVE_NORMAL 3 - -#ifdef __WIN__ -typedef void* os_thread_t; -typedef DWORD os_thread_id_t; /*!< In Windows the thread id - is an unsigned long int */ -typedef os_thread_id_t os_tid_t; -extern "C" { -typedef LPTHREAD_START_ROUTINE os_thread_func_t; -} - -/** Macro for specifying a Windows thread start function. */ -#define DECLARE_THREAD(func) WINAPI func - -/** Required to get around a build error on Windows. Even though our functions -are defined/declared as WINAPI f(LPVOID a); the compiler complains that they -are defined as: os_thread_ret_t (__cdecl*)(void*). Because our functions -don't access the arguments and don't return any value, we should be safe. */ -#define os_thread_create(f,a,i) \ - os_thread_create_func(reinterpret_cast<os_thread_func_t>(f), a, i) - -#else - -typedef pthread_t os_thread_t; -typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread - handle itself as the id of - the thread */ -#ifdef UNIV_LINUX -typedef pid_t os_tid_t; /*!< An alias for pid_t on - Linux, where setpriority() - accepts thread id of this type - and not pthread_t */ -#else -typedef os_thread_id_t os_tid_t; -#endif - -extern "C" { typedef void* (*os_thread_func_t)(void*); } - -/** Macro for specifying a POSIX thread start function. */ -#define DECLARE_THREAD(func) func -#define os_thread_create(f,a,i) os_thread_create_func(f, a, i) - -#endif /* __WIN__ */ - -/* Define a function pointer type to use in a typecast */ -typedef void* (*os_posix_f_t) (void*); - -#ifdef HAVE_PSI_INTERFACE -/* Define for performance schema registration key */ -typedef unsigned int mysql_pfs_key_t; -#endif - -/***************************************************************//** -Compares two thread ids for equality. -@return TRUE if equal */ -UNIV_INTERN -ibool -os_thread_eq( -/*=========*/ - os_thread_id_t a, /*!< in: OS thread or thread id */ - os_thread_id_t b); /*!< in: OS thread or thread id */ -/****************************************************************//** -Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is -unique for the thread though! -@return thread identifier as a number */ -UNIV_INTERN -ulint -os_thread_pf( -/*=========*/ - os_thread_id_t a); /*!< in: OS thread identifier */ -/****************************************************************//** -Creates a new thread of execution. The execution starts from -the function given. The start function takes a void* parameter -and returns a ulint. -NOTE: We count the number of threads in os_thread_exit(). A created -thread should always use that to exit and not use return() to exit. -@return handle to the thread */ -UNIV_INTERN -os_thread_t -os_thread_create_func( -/*==================*/ - os_thread_func_t func, /*!< in: pointer to function - from which to start */ - void* arg, /*!< in: argument to start - function */ - os_thread_id_t* thread_id); /*!< out: id of the created - thread, or NULL */ - -/** Waits until the specified thread completes and joins it. -Its return value is ignored. -@param[in,out] thread thread to join */ -UNIV_INTERN -void -os_thread_join( - os_thread_t thread); - -/*****************************************************************//** -Exits the current thread. */ -UNIV_INTERN -void -os_thread_exit( -/*===========*/ - void* exit_value, /*!< in: exit value; in Windows this void* - is cast as a DWORD */ - bool detach = true) /*!< in: if true, the thread will be detached - right before exiting. If false, another thread - is responsible for joining this thread. */ - UNIV_COLD MY_ATTRIBUTE((noreturn)); -/*****************************************************************//** -Returns the thread identifier of current thread. -@return current thread identifier */ -UNIV_INTERN -os_thread_id_t -os_thread_get_curr_id(void); -/*========================*/ -/*****************************************************************//** -Returns the system-specific thread identifier of current thread. On Linux, -returns tid. On other systems currently returns os_thread_get_curr_id(). - -@return current thread identifier */ -UNIV_INTERN -os_tid_t -os_thread_get_tid(void); -/*=====================*/ -/*****************************************************************//** -Advises the os to give up remainder of the thread's time slice. */ -UNIV_INTERN -void -os_thread_yield(void); -/*=================*/ -/*****************************************************************//** -The thread sleeps at least the time given in microseconds. */ -UNIV_INTERN -void -os_thread_sleep( -/*============*/ - ulint tm); /*!< in: time in microseconds */ -/*****************************************************************//** -Set relative scheduling priority for a given thread on Linux. Currently a -no-op on other systems. - -@return An actual thread priority after the update */ -UNIV_INTERN -ulint -os_thread_set_priority( -/*===================*/ - os_tid_t thread_id, /*!< in: thread id */ - ulint relative_priority); /*!< in: system-specific - priority value */ - -/*****************************************************************//** -Get priority for a given thread on Linux. Currently a -no-op on other systems. - -@return An actual thread priority */ -UNIV_INTERN -ulint -os_thread_get_priority( -/*===================*/ - os_tid_t thread_id); /*!< in: thread id */ - -#ifndef UNIV_NONINL -#include "os0thread.ic" -#endif - -#endif diff --git a/storage/xtradb/include/os0thread.ic b/storage/xtradb/include/os0thread.ic deleted file mode 100644 index 0622d22f2dc..00000000000 --- a/storage/xtradb/include/os0thread.ic +++ /dev/null @@ -1,25 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0thread.ic -The interface to the operating system -process and thread control primitives - -Created 9/8/1995 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/page0cur.h b/storage/xtradb/include/page0cur.h deleted file mode 100644 index f04667ff29c..00000000000 --- a/storage/xtradb/include/page0cur.h +++ /dev/null @@ -1,387 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/page0cur.h -The page cursor - -Created 10/4/1994 Heikki Tuuri -*************************************************************************/ - -#ifndef page0cur_h -#define page0cur_h - -#include "univ.i" - -#include "buf0types.h" -#include "page0page.h" -#include "rem0rec.h" -#include "data0data.h" -#include "mtr0mtr.h" - - -#define PAGE_CUR_ADAPT - -/* Page cursor search modes; the values must be in this order! */ - -#define PAGE_CUR_UNSUPP 0 -#define PAGE_CUR_G 1 -#define PAGE_CUR_GE 2 -#define PAGE_CUR_L 3 -#define PAGE_CUR_LE 4 -/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in - "column LIKE 'abc%' ORDER BY column DESC"; - we have to find strings which are <= 'abc' or - which extend it */ -#ifdef UNIV_SEARCH_DEBUG -# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */ -#endif /* UNIV_SEARCH_DEBUG */ - -#ifdef UNIV_DEBUG -/*********************************************************//** -Gets pointer to the page frame where the cursor is positioned. -@return page */ -UNIV_INLINE -page_t* -page_cur_get_page( -/*==============*/ - page_cur_t* cur); /*!< in: page cursor */ -/*********************************************************//** -Gets pointer to the buffer block where the cursor is positioned. -@return page */ -UNIV_INLINE -buf_block_t* -page_cur_get_block( -/*===============*/ - page_cur_t* cur); /*!< in: page cursor */ -/*********************************************************//** -Gets pointer to the page frame where the cursor is positioned. -@return page */ -UNIV_INLINE -page_zip_des_t* -page_cur_get_page_zip( -/*==================*/ - page_cur_t* cur); /*!< in: page cursor */ -/*********************************************************//** -Gets the record where the cursor is positioned. -@return record */ -UNIV_INLINE -rec_t* -page_cur_get_rec( -/*=============*/ - page_cur_t* cur); /*!< in: page cursor */ -#else /* UNIV_DEBUG */ -# define page_cur_get_page(cur) page_align((cur)->rec) -# define page_cur_get_block(cur) (cur)->block -# define page_cur_get_page_zip(cur) buf_block_get_page_zip((cur)->block) -# define page_cur_get_rec(cur) (cur)->rec -#endif /* UNIV_DEBUG */ -/*********************************************************//** -Sets the cursor object to point before the first user record -on the page. */ -UNIV_INLINE -void -page_cur_set_before_first( -/*======================*/ - const buf_block_t* block, /*!< in: index page */ - page_cur_t* cur); /*!< in: cursor */ -/*********************************************************//** -Sets the cursor object to point after the last user record on -the page. */ -UNIV_INLINE -void -page_cur_set_after_last( -/*====================*/ - const buf_block_t* block, /*!< in: index page */ - page_cur_t* cur); /*!< in: cursor */ -/*********************************************************//** -Returns TRUE if the cursor is before first user record on page. -@return TRUE if at start */ -UNIV_INLINE -ibool -page_cur_is_before_first( -/*=====================*/ - const page_cur_t* cur); /*!< in: cursor */ -/*********************************************************//** -Returns TRUE if the cursor is after last user record. -@return TRUE if at end */ -UNIV_INLINE -ibool -page_cur_is_after_last( -/*===================*/ - const page_cur_t* cur); /*!< in: cursor */ -/**********************************************************//** -Positions the cursor on the given record. */ -UNIV_INLINE -void -page_cur_position( -/*==============*/ - const rec_t* rec, /*!< in: record on a page */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - page_cur_t* cur); /*!< out: page cursor */ -/**********************************************************//** -Invalidates a page cursor by setting the record pointer NULL. */ -UNIV_INLINE -void -page_cur_invalidate( -/*================*/ - page_cur_t* cur); /*!< out: page cursor */ -/**********************************************************//** -Moves the cursor to the next record on page. */ -UNIV_INLINE -void -page_cur_move_to_next( -/*==================*/ - page_cur_t* cur); /*!< in/out: cursor; must not be after last */ -/**********************************************************//** -Moves the cursor to the previous record on page. */ -UNIV_INLINE -void -page_cur_move_to_prev( -/*==================*/ - page_cur_t* cur); /*!< in/out: cursor; not before first */ -#ifndef UNIV_HOTBACKUP -/***********************************************************//** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The cursor stays at -the same logical position, but the physical position may change if it is -pointing to a compressed page that was reorganized. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to record if succeed, NULL otherwise */ -UNIV_INLINE -rec_t* -page_cur_tuple_insert( -/*==================*/ - page_cur_t* cursor, /*!< in/out: a page cursor */ - const dtuple_t* tuple, /*!< in: pointer to a data tuple */ - dict_index_t* index, /*!< in: record descriptor */ - ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ - ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */ - MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************//** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The cursor stays at -the same logical position, but the physical position may change if it is -pointing to a compressed page that was reorganized. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to record if succeed, NULL otherwise */ -UNIV_INLINE -rec_t* -page_cur_rec_insert( -/*================*/ - page_cur_t* cursor, /*!< in/out: a page cursor */ - const rec_t* rec, /*!< in: record to insert */ - dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */ -/***********************************************************//** -Inserts a record next to page cursor on an uncompressed page. -Returns pointer to inserted record if succeed, i.e., enough -space available, NULL otherwise. The cursor stays at the same position. -@return pointer to record if succeed, NULL otherwise */ -UNIV_INTERN -rec_t* -page_cur_insert_rec_low( -/*====================*/ - rec_t* current_rec,/*!< in: pointer to current record after - which the new record is inserted */ - dict_index_t* index, /*!< in: record descriptor */ - const rec_t* rec, /*!< in: pointer to a physical record */ - ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */ - MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result)); -/***********************************************************//** -Inserts a record next to page cursor on a compressed and uncompressed -page. Returns pointer to inserted record if succeed, i.e., -enough space available, NULL otherwise. -The cursor stays at the same position. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to record if succeed, NULL otherwise */ -UNIV_INTERN -rec_t* -page_cur_insert_rec_zip( -/*====================*/ - page_cur_t* cursor, /*!< in/out: page cursor */ - dict_index_t* index, /*!< in: record descriptor */ - const rec_t* rec, /*!< in: pointer to a physical record */ - ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */ - MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result)); -/*************************************************************//** -Copies records from page to a newly created page, from a given record onward, -including that record. Infimum and supremum records are not copied. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -UNIV_INTERN -void -page_copy_rec_list_end_to_created_page( -/*===================================*/ - page_t* new_page, /*!< in/out: index page to copy to */ - rec_t* rec, /*!< in: first record to copy */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr */ -/***********************************************************//** -Deletes a record at the page cursor. The cursor is moved to the -next record after the deleted one. */ -UNIV_INTERN -void -page_cur_delete_rec( -/*================*/ - page_cur_t* cursor, /*!< in/out: a page cursor */ - const dict_index_t* index, /*!< in: record descriptor */ - const ulint* offsets,/*!< in: rec_get_offsets( - cursor->rec, index) */ - mtr_t* mtr); /*!< in: mini-transaction handle */ -#ifndef UNIV_HOTBACKUP -/****************************************************************//** -Searches the right position for a page cursor. -@return number of matched fields on the left */ -UNIV_INLINE -ulint -page_cur_search( -/*============*/ - const buf_block_t* block, /*!< in: buffer block */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* tuple, /*!< in: data tuple */ - ulint mode, /*!< in: PAGE_CUR_L, - PAGE_CUR_LE, PAGE_CUR_G, or - PAGE_CUR_GE */ - page_cur_t* cursor);/*!< out: page cursor */ -/****************************************************************//** -Searches the right position for a page cursor. */ -UNIV_INTERN -void -page_cur_search_with_match( -/*=======================*/ - const buf_block_t* block, /*!< in: buffer block */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* tuple, /*!< in: data tuple */ - ulint mode, /*!< in: PAGE_CUR_L, - PAGE_CUR_LE, PAGE_CUR_G, or - PAGE_CUR_GE */ - ulint* iup_matched_fields, - /*!< in/out: already matched - fields in upper limit record */ - ulint* iup_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ - ulint* ilow_matched_fields, - /*!< in/out: already matched - fields in lower limit record */ - ulint* ilow_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ - page_cur_t* cursor);/*!< out: page cursor */ -/***********************************************************//** -Positions a page cursor on a randomly chosen user record on a page. If there -are no user records, sets the cursor on the infimum record. */ -UNIV_INTERN -void -page_cur_open_on_rnd_user_rec( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor);/*!< out: page cursor */ -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************//** -Parses a log record of a record insert on a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_cur_parse_insert_rec( -/*======================*/ - ibool is_short,/*!< in: TRUE if short inserts */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in: page or NULL */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr or NULL */ -/**********************************************************//** -Parses a log record of copying a record list end to a new created page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_parse_copy_rec_list_to_created_page( -/*=====================================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in: page or NULL */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr or NULL */ -/***********************************************************//** -Parses log record of a record delete on a page. -@return pointer to record end or NULL */ -UNIV_INTERN -byte* -page_cur_parse_delete_rec( -/*======================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in: page or NULL */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr or NULL */ -/*******************************************************//** -Removes the record from a leaf page. This function does not log -any changes. It is used by the IMPORT tablespace functions. -@return true if success, i.e., the page did not become too empty */ -UNIV_INTERN -bool -page_delete_rec( -/*============*/ - const dict_index_t* index, /*!< in: The index that the record - belongs to */ - page_cur_t* pcur, /*!< in/out: page cursor on record - to delete */ - page_zip_des_t* page_zip,/*!< in: compressed page descriptor */ - const ulint* offsets);/*!< in: offsets for record */ - -/** Index page cursor */ - -struct page_cur_t{ - byte* rec; /*!< pointer to a record on page */ - buf_block_t* block; /*!< pointer to the block containing rec */ -}; - -#ifndef UNIV_NONINL -#include "page0cur.ic" -#endif - -#endif diff --git a/storage/xtradb/include/page0cur.ic b/storage/xtradb/include/page0cur.ic deleted file mode 100644 index 6e068d9f739..00000000000 --- a/storage/xtradb/include/page0cur.ic +++ /dev/null @@ -1,328 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/page0cur.ic -The page cursor - -Created 10/4/1994 Heikki Tuuri -*************************************************************************/ - -#include "page0page.h" -#include "buf0types.h" - -#ifdef UNIV_DEBUG -# include "rem0cmp.h" - -/*********************************************************//** -Gets pointer to the page frame where the cursor is positioned. -@return page */ -UNIV_INLINE -page_t* -page_cur_get_page( -/*==============*/ - page_cur_t* cur) /*!< in: page cursor */ -{ - ut_ad(cur); - - if (cur->rec) { - ut_ad(page_align(cur->rec) == cur->block->frame); - } - - return(page_align(cur->rec)); -} - -/*********************************************************//** -Gets pointer to the buffer block where the cursor is positioned. -@return page */ -UNIV_INLINE -buf_block_t* -page_cur_get_block( -/*===============*/ - page_cur_t* cur) /*!< in: page cursor */ -{ - ut_ad(cur); - - if (cur->rec) { - ut_ad(page_align(cur->rec) == cur->block->frame); - } - - return(cur->block); -} - -/*********************************************************//** -Gets pointer to the page frame where the cursor is positioned. -@return page */ -UNIV_INLINE -page_zip_des_t* -page_cur_get_page_zip( -/*==================*/ - page_cur_t* cur) /*!< in: page cursor */ -{ - return(buf_block_get_page_zip(page_cur_get_block(cur))); -} - -/*********************************************************//** -Gets the record where the cursor is positioned. -@return record */ -UNIV_INLINE -rec_t* -page_cur_get_rec( -/*=============*/ - page_cur_t* cur) /*!< in: page cursor */ -{ - ut_ad(cur); - - if (cur->rec) { - ut_ad(page_align(cur->rec) == cur->block->frame); - } - - return(cur->rec); -} -#endif /* UNIV_DEBUG */ - -/*********************************************************//** -Sets the cursor object to point before the first user record -on the page. */ -UNIV_INLINE -void -page_cur_set_before_first( -/*======================*/ - const buf_block_t* block, /*!< in: index page */ - page_cur_t* cur) /*!< in: cursor */ -{ - cur->block = (buf_block_t*) block; - cur->rec = page_get_infimum_rec(buf_block_get_frame(cur->block)); -} - -/*********************************************************//** -Sets the cursor object to point after the last user record on -the page. */ -UNIV_INLINE -void -page_cur_set_after_last( -/*====================*/ - const buf_block_t* block, /*!< in: index page */ - page_cur_t* cur) /*!< in: cursor */ -{ - cur->block = (buf_block_t*) block; - cur->rec = page_get_supremum_rec(buf_block_get_frame(cur->block)); -} - -/*********************************************************//** -Returns TRUE if the cursor is before first user record on page. -@return TRUE if at start */ -UNIV_INLINE -ibool -page_cur_is_before_first( -/*=====================*/ - const page_cur_t* cur) /*!< in: cursor */ -{ - ut_ad(cur); - ut_ad(page_align(cur->rec) == cur->block->frame); - return(page_rec_is_infimum(cur->rec)); -} - -/*********************************************************//** -Returns TRUE if the cursor is after last user record. -@return TRUE if at end */ -UNIV_INLINE -ibool -page_cur_is_after_last( -/*===================*/ - const page_cur_t* cur) /*!< in: cursor */ -{ - ut_ad(cur); - ut_ad(page_align(cur->rec) == cur->block->frame); - return(page_rec_is_supremum(cur->rec)); -} - -/**********************************************************//** -Positions the cursor on the given record. */ -UNIV_INLINE -void -page_cur_position( -/*==============*/ - const rec_t* rec, /*!< in: record on a page */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - page_cur_t* cur) /*!< out: page cursor */ -{ - ut_ad(rec && block && cur); - ut_ad(page_align(rec) == block->frame); - - cur->rec = (rec_t*) rec; - cur->block = (buf_block_t*) block; -} - -/**********************************************************//** -Invalidates a page cursor by setting the record pointer NULL. */ -UNIV_INLINE -void -page_cur_invalidate( -/*================*/ - page_cur_t* cur) /*!< out: page cursor */ -{ - ut_ad(cur); - - cur->rec = NULL; - cur->block = NULL; -} - -/**********************************************************//** -Moves the cursor to the next record on page. */ -UNIV_INLINE -void -page_cur_move_to_next( -/*==================*/ - page_cur_t* cur) /*!< in/out: cursor; must not be after last */ -{ - ut_ad(!page_cur_is_after_last(cur)); - - cur->rec = page_rec_get_next(cur->rec); -} - -/**********************************************************//** -Moves the cursor to the previous record on page. */ -UNIV_INLINE -void -page_cur_move_to_prev( -/*==================*/ - page_cur_t* cur) /*!< in/out: page cursor, not before first */ -{ - ut_ad(!page_cur_is_before_first(cur)); - - cur->rec = page_rec_get_prev(cur->rec); -} - -#ifndef UNIV_HOTBACKUP -/****************************************************************//** -Searches the right position for a page cursor. -@return number of matched fields on the left */ -UNIV_INLINE -ulint -page_cur_search( -/*============*/ - const buf_block_t* block, /*!< in: buffer block */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* tuple, /*!< in: data tuple */ - ulint mode, /*!< in: PAGE_CUR_L, - PAGE_CUR_LE, PAGE_CUR_G, or - PAGE_CUR_GE */ - page_cur_t* cursor) /*!< out: page cursor */ -{ - ulint low_matched_fields = 0; - ulint low_matched_bytes = 0; - ulint up_matched_fields = 0; - ulint up_matched_bytes = 0; - - ut_ad(dtuple_check_typed(tuple)); - - page_cur_search_with_match(block, index, tuple, mode, - &up_matched_fields, - &up_matched_bytes, - &low_matched_fields, - &low_matched_bytes, - cursor); - return(low_matched_fields); -} - -/***********************************************************//** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The cursor stays at -the same logical position, but the physical position may change if it is -pointing to a compressed page that was reorganized. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to record if succeed, NULL otherwise */ -UNIV_INLINE -rec_t* -page_cur_tuple_insert( -/*==================*/ - page_cur_t* cursor, /*!< in/out: a page cursor */ - const dtuple_t* tuple, /*!< in: pointer to a data tuple */ - dict_index_t* index, /*!< in: record descriptor */ - ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ - ulint n_ext, /*!< in: number of externally stored columns */ - mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */ -{ - ulint size - = rec_get_converted_size(index, tuple, n_ext); - rec_t* rec; - - if (!*heap) { - *heap = mem_heap_create(size - + (4 + REC_OFFS_HEADER_SIZE - + dtuple_get_n_fields(tuple)) - * sizeof **offsets); - } - - rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size), - index, tuple, n_ext); - *offsets = rec_get_offsets( - rec, index, *offsets, ULINT_UNDEFINED, heap); - - if (buf_block_get_page_zip(cursor->block)) { - rec = page_cur_insert_rec_zip( - cursor, index, rec, *offsets, mtr); - } else { - rec = page_cur_insert_rec_low(cursor->rec, - index, rec, *offsets, mtr); - } - - ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets)); - return(rec); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The cursor stays at -the same logical position, but the physical position may change if it is -pointing to a compressed page that was reorganized. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to record if succeed, NULL otherwise */ -UNIV_INLINE -rec_t* -page_cur_rec_insert( -/*================*/ - page_cur_t* cursor, /*!< in/out: a page cursor */ - const rec_t* rec, /*!< in: record to insert */ - dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */ -{ - if (buf_block_get_page_zip(cursor->block)) { - return(page_cur_insert_rec_zip( - cursor, index, rec, offsets, mtr)); - } else { - return(page_cur_insert_rec_low(cursor->rec, - index, rec, offsets, mtr)); - } -} diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h deleted file mode 100644 index eefa0fa4c5b..00000000000 --- a/storage/xtradb/include/page0page.h +++ /dev/null @@ -1,1140 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/page0page.h -Index page routines - -Created 2/2/1994 Heikki Tuuri -*******************************************************/ - -#ifndef page0page_h -#define page0page_h - -#include "univ.i" - -#include "buf0types.h" - -#ifndef UNIV_INNOCHECKSUM - -#include "page0types.h" -#include "fil0fil.h" -#include "buf0buf.h" -#include "data0data.h" -#include "dict0dict.h" -#include "rem0rec.h" -#include "fsp0fsp.h" -#include "mtr0mtr.h" - -#ifdef UNIV_MATERIALIZE -#undef UNIV_INLINE -#define UNIV_INLINE -#endif - -/* PAGE HEADER - =========== - -Index page header starts at the first offset left free by the FIL-module */ - -typedef byte page_header_t; - -#define PAGE_HEADER FSEG_PAGE_DATA /* index page header starts at this - offset */ -/*-----------------------------*/ -#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */ -#define PAGE_HEAP_TOP 2 /* pointer to record heap top */ -#define PAGE_N_HEAP 4 /* number of records in the heap, - bit 15=flag: new-style compact page format */ -#define PAGE_FREE 6 /* pointer to start of page free record list */ -#define PAGE_GARBAGE 8 /* number of bytes in deleted records */ -#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or - NULL if this info has been reset by a delete, - for example */ -#define PAGE_DIRECTION 12 /* last insert direction: PAGE_LEFT, ... */ -#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same - direction */ -#define PAGE_N_RECS 16 /* number of user records on the page */ -#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified - a record on the page; trx_id_t; defined only - in secondary indexes and in the insert buffer - tree */ -#define PAGE_HEADER_PRIV_END 26 /* end of private data structure of the page - header which are set in a page create */ -/*----*/ -#define PAGE_LEVEL 26 /* level of the node in an index tree; the - leaf level is the level 0. This field should - not be written to after page creation. */ -#define PAGE_INDEX_ID 28 /* index id where the page belongs. - This field should not be written to after - page creation. */ -#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in - a B-tree: defined only on the root page of a - B-tree, but not in the root of an ibuf tree */ -#define PAGE_BTR_IBUF_FREE_LIST PAGE_BTR_SEG_LEAF -#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF - /* in the place of PAGE_BTR_SEG_LEAF and _TOP - there is a free list base node if the page is - the root page of an ibuf tree, and at the same - place is the free list node if the page is in - a free list */ -#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE) - /* file segment header for the non-leaf pages - in a B-tree: defined only on the root page of - a B-tree, but not in the root of an ibuf - tree */ -/*----*/ -#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE) - /* start of data on the page */ - -#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES) - /* offset of the page infimum record on an - old-style page */ -#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8) - /* offset of the page supremum record on an - old-style page */ -#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9) - /* offset of the page supremum record end on - an old-style page */ -#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES) - /* offset of the page infimum record on a - new-style compact page */ -#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8) - /* offset of the page supremum record on a - new-style compact page */ -#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8) - /* offset of the page supremum record end on - a new-style compact page */ -/*-----------------------------*/ - -/* Heap numbers */ -#define PAGE_HEAP_NO_INFIMUM 0 /* page infimum */ -#define PAGE_HEAP_NO_SUPREMUM 1 /* page supremum */ -#define PAGE_HEAP_NO_USER_LOW 2 /* first user record in - creation (insertion) order, - not necessarily collation order; - this record may have been deleted */ - -/* Directions of cursor movement */ -#define PAGE_LEFT 1 -#define PAGE_RIGHT 2 -#define PAGE_SAME_REC 3 -#define PAGE_SAME_PAGE 4 -#define PAGE_NO_DIRECTION 5 - -/* PAGE DIRECTORY - ============== -*/ - -typedef byte page_dir_slot_t; -typedef page_dir_slot_t page_dir_t; - -/* Offset of the directory start down from the page end. We call the -slot with the highest file address directory start, as it points to -the first record in the list of records. */ -#define PAGE_DIR FIL_PAGE_DATA_END - -/* We define a slot in the page directory as two bytes */ -#define PAGE_DIR_SLOT_SIZE 2 - -/* The offset of the physically lower end of the directory, counted from -page end, when the page is empty */ -#define PAGE_EMPTY_DIR_START (PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE) - -/* The maximum and minimum number of records owned by a directory slot. The -number may drop below the minimum in the first and the last slot in the -directory. */ -#define PAGE_DIR_SLOT_MAX_N_OWNED 8 -#define PAGE_DIR_SLOT_MIN_N_OWNED 4 - -/************************************************************//** -Gets the start of a page. -@return start of the page */ -UNIV_INLINE -page_t* -page_align( -/*=======*/ - const void* ptr) /*!< in: pointer to page frame */ - MY_ATTRIBUTE((const)); -/************************************************************//** -Gets the offset within a page. -@return offset from the start of the page */ -UNIV_INLINE -ulint -page_offset( -/*========*/ - const void* ptr) /*!< in: pointer to page frame */ - MY_ATTRIBUTE((const)); -/*************************************************************//** -Returns the max trx id field value. */ -UNIV_INLINE -trx_id_t -page_get_max_trx_id( -/*================*/ - const page_t* page); /*!< in: page */ -/*************************************************************//** -Sets the max trx id field value. */ -UNIV_INTERN -void -page_set_max_trx_id( -/*================*/ - buf_block_t* block, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr); /*!< in/out: mini-transaction, or NULL */ -/*************************************************************//** -Sets the max trx id field value if trx_id is bigger than the previous -value. */ -UNIV_INLINE -void -page_update_max_trx_id( -/*===================*/ - buf_block_t* block, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr); /*!< in/out: mini-transaction */ -/*************************************************************//** -Reads the given header field. */ -UNIV_INLINE -ulint -page_header_get_field( -/*==================*/ - const page_t* page, /*!< in: page */ - ulint field); /*!< in: PAGE_N_DIR_SLOTS, ... */ -/*************************************************************//** -Sets the given header field. */ -UNIV_INLINE -void -page_header_set_field( -/*==================*/ - page_t* page, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - ulint field, /*!< in: PAGE_N_DIR_SLOTS, ... */ - ulint val); /*!< in: value */ -/*************************************************************//** -Returns the offset stored in the given header field. -@return offset from the start of the page, or 0 */ -UNIV_INLINE -ulint -page_header_get_offs( -/*=================*/ - const page_t* page, /*!< in: page */ - ulint field); /*!< in: PAGE_FREE, ... */ - -/*************************************************************//** -Returns the pointer stored in the given header field, or NULL. */ -#define page_header_get_ptr(page, field) \ - (page_header_get_offs(page, field) \ - ? page + page_header_get_offs(page, field) : NULL) -/*************************************************************//** -Sets the pointer stored in the given header field. */ -UNIV_INLINE -void -page_header_set_ptr( -/*================*/ - page_t* page, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - ulint field, /*!< in/out: PAGE_FREE, ... */ - const byte* ptr); /*!< in: pointer or NULL*/ -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Resets the last insert info field in the page header. Writes to mlog -about this operation. */ -UNIV_INLINE -void -page_header_reset_last_insert( -/*==========================*/ - page_t* page, /*!< in: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - mtr_t* mtr); /*!< in: mtr */ -#endif /* !UNIV_HOTBACKUP */ -/************************************************************//** -Gets the offset of the first record on the page. -@return offset of the first record in record list, relative from page */ -UNIV_INLINE -ulint -page_get_infimum_offset( -/*====================*/ - const page_t* page); /*!< in: page which must have record(s) */ -/************************************************************//** -Gets the offset of the last record on the page. -@return offset of the last record in record list, relative from page */ -UNIV_INLINE -ulint -page_get_supremum_offset( -/*=====================*/ - const page_t* page); /*!< in: page which must have record(s) */ -#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page)) -#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page)) - -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INTERN -const rec_t* -page_rec_get_nth_const( -/*===================*/ - const page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INLINE -rec_t* -page_rec_get_nth( -/*=============*/ - page_t* page, /*< in: page */ - ulint nth) /*!< in: nth record */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. -@return middle record */ -UNIV_INLINE -rec_t* -page_get_middle_rec( -/*================*/ - page_t* page) /*!< in: page */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************//** -Compares a data tuple to a physical record. Differs from the function -cmp_dtuple_rec_with_match in the way that the record must reside on an -index page, and also page infimum and supremum records can be given in -the parameter rec. These are considered as the negative infinity and -the positive infinity in the alphabetical order. -@return 1, 0, -1, if dtuple is greater, equal, less than rec, -respectively, when only the common first fields are compared */ -UNIV_INLINE -int -page_cmp_dtuple_rec_with_match( -/*===========================*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record on a page; may also - be page infimum or supremum, in which case - matched-parameter values below are not - affected */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint* matched_fields, /*!< in/out: number of already completely - matched fields; when function returns - contains the value for current comparison */ - ulint* matched_bytes); /*!< in/out: number of already matched - bytes within the first field not completely - matched; when function returns contains the - value for current comparison */ -#endif /* !UNIV_HOTBACKUP */ -/*************************************************************//** -Gets the page number. -@return page number */ -UNIV_INLINE -ulint -page_get_page_no( -/*=============*/ - const page_t* page); /*!< in: page */ -/*************************************************************//** -Gets the tablespace identifier. -@return space id */ -UNIV_INLINE -ulint -page_get_space_id( -/*==============*/ - const page_t* page); /*!< in: page */ -/*************************************************************//** -Gets the number of user records on page (the infimum and supremum records -are not user records). -@return number of user records */ -UNIV_INLINE -ulint -page_get_n_recs( -/*============*/ - const page_t* page); /*!< in: index page */ -/***************************************************************//** -Returns the number of records before the given record in chain. -The number includes infimum and supremum records. -This is the inverse function of page_rec_get_nth(). -@return number of records */ -UNIV_INTERN -ulint -page_rec_get_n_recs_before( -/*=======================*/ - const rec_t* rec); /*!< in: the physical record */ -/*************************************************************//** -Gets the number of records in the heap. -@return number of user records */ -UNIV_INLINE -ulint -page_dir_get_n_heap( -/*================*/ - const page_t* page); /*!< in: index page */ -/*************************************************************//** -Sets the number of records in the heap. */ -UNIV_INLINE -void -page_dir_set_n_heap( -/*================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL. - Note that the size of the dense page directory - in the compressed page trailer is - n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */ - ulint n_heap);/*!< in: number of records */ -/*************************************************************//** -Gets the number of dir slots in directory. -@return number of slots */ -UNIV_INLINE -ulint -page_dir_get_n_slots( -/*=================*/ - const page_t* page); /*!< in: index page */ -/*************************************************************//** -Sets the number of dir slots in directory. */ -UNIV_INLINE -void -page_dir_set_n_slots( -/*=================*/ - page_t* page, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - ulint n_slots);/*!< in: number of slots */ -#ifdef UNIV_DEBUG -/*************************************************************//** -Gets pointer to nth directory slot. -@return pointer to dir slot */ -UNIV_INLINE -page_dir_slot_t* -page_dir_get_nth_slot( -/*==================*/ - const page_t* page, /*!< in: index page */ - ulint n); /*!< in: position */ -#else /* UNIV_DEBUG */ -# define page_dir_get_nth_slot(page, n) \ - ((page) + UNIV_PAGE_SIZE - PAGE_DIR \ - - (n + 1) * PAGE_DIR_SLOT_SIZE) -#endif /* UNIV_DEBUG */ -/**************************************************************//** -Used to check the consistency of a record on a page. -@return TRUE if succeed */ -UNIV_INLINE -ibool -page_rec_check( -/*===========*/ - const rec_t* rec); /*!< in: record */ -/***************************************************************//** -Gets the record pointed to by a directory slot. -@return pointer to record */ -UNIV_INLINE -const rec_t* -page_dir_slot_get_rec( -/*==================*/ - const page_dir_slot_t* slot); /*!< in: directory slot */ -/***************************************************************//** -This is used to set the record offset in a directory slot. */ -UNIV_INLINE -void -page_dir_slot_set_rec( -/*==================*/ - page_dir_slot_t* slot, /*!< in: directory slot */ - rec_t* rec); /*!< in: record on the page */ -/***************************************************************//** -Gets the number of records owned by a directory slot. -@return number of records */ -UNIV_INLINE -ulint -page_dir_slot_get_n_owned( -/*======================*/ - const page_dir_slot_t* slot); /*!< in: page directory slot */ -/***************************************************************//** -This is used to set the owned records field of a directory slot. */ -UNIV_INLINE -void -page_dir_slot_set_n_owned( -/*======================*/ - page_dir_slot_t*slot, /*!< in/out: directory slot */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint n); /*!< in: number of records owned by the slot */ -/************************************************************//** -Calculates the space reserved for directory slots of a given -number of records. The exact value is a fraction number -n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is -rounded upwards to an integer. */ -UNIV_INLINE -ulint -page_dir_calc_reserved_space( -/*=========================*/ - ulint n_recs); /*!< in: number of records */ -/***************************************************************//** -Looks for the directory slot which owns the given record. -@return the directory slot number */ -UNIV_INTERN -ulint -page_dir_find_owner_slot( -/*=====================*/ - const rec_t* rec); /*!< in: the physical record */ -/************************************************************//** -Determine whether the page is in new-style compact format. -@return nonzero if the page is in compact format, zero if it is in -old-style format */ -UNIV_INLINE -ulint -page_is_comp( -/*=========*/ - const page_t* page); /*!< in: index page */ -/************************************************************//** -TRUE if the record is on a page in compact format. -@return nonzero if in compact format */ -UNIV_INLINE -ulint -page_rec_is_comp( -/*=============*/ - const rec_t* rec); /*!< in: record */ -/***************************************************************//** -Returns the heap number of a record. -@return heap number */ -UNIV_INLINE -ulint -page_rec_get_heap_no( -/*=================*/ - const rec_t* rec); /*!< in: the physical record */ -/************************************************************//** -Determine whether the page is a B-tree leaf. -@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */ -UNIV_INLINE -bool -page_is_leaf( -/*=========*/ - const page_t* page) /*!< in: page */ - MY_ATTRIBUTE((warn_unused_result)); -/************************************************************//** -Determine whether the page is empty. -@return true if the page is empty (PAGE_N_RECS = 0) */ -UNIV_INLINE -bool -page_is_empty( -/*==========*/ - const page_t* page) /*!< in: page */ - MY_ATTRIBUTE((nonnull, pure)); -/************************************************************//** -Determine whether the page contains garbage. -@return true if the page contains garbage (PAGE_GARBAGE is not 0) */ -UNIV_INLINE -bool -page_has_garbage( -/*=============*/ - const page_t* page) /*!< in: page */ - MY_ATTRIBUTE((nonnull, pure)); -/************************************************************//** -Gets the pointer to the next record on the page. -@return pointer to next record */ -UNIV_INLINE -const rec_t* -page_rec_get_next_low( -/*==================*/ - const rec_t* rec, /*!< in: pointer to record */ - ulint comp); /*!< in: nonzero=compact page layout */ -/************************************************************//** -Gets the pointer to the next record on the page. -@return pointer to next record */ -UNIV_INLINE -rec_t* -page_rec_get_next( -/*==============*/ - const rec_t* rec); /*!< in: pointer to record */ -/************************************************************//** -Gets the pointer to the next record on the page. -@return pointer to next record */ -UNIV_INLINE -const rec_t* -page_rec_get_next_const( -/*====================*/ - const rec_t* rec); /*!< in: pointer to record */ -/************************************************************//** -Gets the pointer to the next non delete-marked record on the page. -If all subsequent records are delete-marked, then this function -will return the supremum record. -@return pointer to next non delete-marked record or pointer to supremum */ -UNIV_INLINE -const rec_t* -page_rec_get_next_non_del_marked( -/*=============================*/ - const rec_t* rec); /*!< in: pointer to record */ -/************************************************************//** -Sets the pointer to the next record on the page. */ -UNIV_INLINE -void -page_rec_set_next( -/*==============*/ - rec_t* rec, /*!< in: pointer to record, - must not be page supremum */ - const rec_t* next); /*!< in: pointer to next record, - must not be page infimum */ -/************************************************************//** -Gets the pointer to the previous record. -@return pointer to previous record */ -UNIV_INLINE -const rec_t* -page_rec_get_prev_const( -/*====================*/ - const rec_t* rec); /*!< in: pointer to record, must not be page - infimum */ -/************************************************************//** -Gets the pointer to the previous record. -@return pointer to previous record */ -UNIV_INLINE -rec_t* -page_rec_get_prev( -/*==============*/ - rec_t* rec); /*!< in: pointer to record, - must not be page infimum */ -/************************************************************//** -TRUE if the record is a user record on the page. -@return TRUE if a user record */ -UNIV_INLINE -ibool -page_rec_is_user_rec_low( -/*=====================*/ - ulint offset) /*!< in: record offset on page */ - MY_ATTRIBUTE((const)); -/************************************************************//** -TRUE if the record is the supremum record on a page. -@return TRUE if the supremum record */ -UNIV_INLINE -ibool -page_rec_is_supremum_low( -/*=====================*/ - ulint offset) /*!< in: record offset on page */ - MY_ATTRIBUTE((const)); -/************************************************************//** -TRUE if the record is the infimum record on a page. -@return TRUE if the infimum record */ -UNIV_INLINE -ibool -page_rec_is_infimum_low( -/*====================*/ - ulint offset) /*!< in: record offset on page */ - MY_ATTRIBUTE((const)); - -/************************************************************//** -TRUE if the record is a user record on the page. -@return TRUE if a user record */ -UNIV_INLINE -ibool -page_rec_is_user_rec( -/*=================*/ - const rec_t* rec) /*!< in: record */ - MY_ATTRIBUTE((const)); -/************************************************************//** -TRUE if the record is the supremum record on a page. -@return TRUE if the supremum record */ -UNIV_INLINE -ibool -page_rec_is_supremum( -/*=================*/ - const rec_t* rec) /*!< in: record */ - MY_ATTRIBUTE((const)); - -/************************************************************//** -TRUE if the record is the infimum record on a page. -@return TRUE if the infimum record */ -UNIV_INLINE -ibool -page_rec_is_infimum( -/*================*/ - const rec_t* rec) /*!< in: record */ - MY_ATTRIBUTE((const)); -/***************************************************************//** -Looks for the record which owns the given record. -@return the owner record */ -UNIV_INLINE -rec_t* -page_rec_find_owner_rec( -/*====================*/ - rec_t* rec); /*!< in: the physical record */ -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Write a 32-bit field in a data dictionary record. */ -UNIV_INLINE -void -page_rec_write_field( -/*=================*/ - rec_t* rec, /*!< in/out: record to update */ - ulint i, /*!< in: index of the field to update */ - ulint val, /*!< in: value to write */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/************************************************************//** -Returns the maximum combined size of records which can be inserted on top -of record heap. -@return maximum combined size for inserted records */ -UNIV_INLINE -ulint -page_get_max_insert_size( -/*=====================*/ - const page_t* page, /*!< in: index page */ - ulint n_recs);/*!< in: number of records */ -/************************************************************//** -Returns the maximum combined size of records which can be inserted on top -of record heap if page is first reorganized. -@return maximum combined size for inserted records */ -UNIV_INLINE -ulint -page_get_max_insert_size_after_reorganize( -/*======================================*/ - const page_t* page, /*!< in: index page */ - ulint n_recs);/*!< in: number of records */ -/*************************************************************//** -Calculates free space if a page is emptied. -@return free space */ -UNIV_INLINE -ulint -page_get_free_space_of_empty( -/*=========================*/ - ulint comp) /*!< in: nonzero=compact page format */ - MY_ATTRIBUTE((const)); -/**********************************************************//** -Returns the base extra size of a physical record. This is the -size of the fixed header, independent of the record size. -@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */ -UNIV_INLINE -ulint -page_rec_get_base_extra_size( -/*=========================*/ - const rec_t* rec); /*!< in: physical record */ -/************************************************************//** -Returns the sum of the sizes of the records in the record list -excluding the infimum and supremum records. -@return data in bytes */ -UNIV_INLINE -ulint -page_get_data_size( -/*===============*/ - const page_t* page); /*!< in: index page */ -/************************************************************//** -Allocates a block of memory from the head of the free list -of an index page. */ -UNIV_INLINE -void -page_mem_alloc_free( -/*================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page with enough - space available for inserting the record, - or NULL */ - rec_t* next_rec,/*!< in: pointer to the new head of the - free record list */ - ulint need); /*!< in: number of bytes allocated */ -/************************************************************//** -Allocates a block of memory from the heap of an index page. -@return pointer to start of allocated buffer, or NULL if allocation fails */ -UNIV_INTERN -byte* -page_mem_alloc_heap( -/*================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page with enough - space available for inserting the record, - or NULL */ - ulint need, /*!< in: total number of bytes needed */ - ulint* heap_no);/*!< out: this contains the heap number - of the allocated record - if allocation succeeds */ -/************************************************************//** -Puts a record to free list. */ -UNIV_INLINE -void -page_mem_free( -/*==========*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, - or NULL */ - rec_t* rec, /*!< in: pointer to the (origin of) - record */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets);/*!< in: array returned by - rec_get_offsets() */ -/**********************************************************//** -Create an uncompressed B-tree index page. -@return pointer to the page */ -UNIV_INTERN -page_t* -page_create( -/*========*/ - buf_block_t* block, /*!< in: a buffer block where the - page is created */ - mtr_t* mtr, /*!< in: mini-transaction handle */ - ulint comp); /*!< in: nonzero=compact page format */ -/**********************************************************//** -Create a compressed B-tree index page. -@return pointer to the page */ -UNIV_INTERN -page_t* -page_create_zip( -/*============*/ - buf_block_t* block, /*!< in/out: a buffer frame where the - page is created */ - dict_index_t* index, /*!< in: the index of the page */ - ulint level, /*!< in: the B-tree level of the page */ - trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************//** -Empty a previously created B-tree index page. */ -UNIV_INTERN -void -page_create_empty( -/*==============*/ - buf_block_t* block, /*!< in/out: B-tree block */ - dict_index_t* index, /*!< in: the index of the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - MY_ATTRIBUTE((nonnull(1,2))); -/*************************************************************//** -Differs from page_copy_rec_list_end, because this function does not -touch the lock table and max trx id on page or compress the page. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -UNIV_INTERN -void -page_copy_rec_list_end_no_locks( -/*============================*/ - buf_block_t* new_block, /*!< in: index page to copy to */ - buf_block_t* block, /*!< in: index page of rec */ - rec_t* rec, /*!< in: record on page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr */ -/*************************************************************//** -Copies records from page to new_page, from the given record onward, -including that record. Infimum and supremum records are not copied. -The records are copied to the start of the record list on new_page. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to the original successor of the infimum record on -new_page, or NULL on zip overflow (new_block will be decompressed) */ -UNIV_INTERN -rec_t* -page_copy_rec_list_end( -/*===================*/ - buf_block_t* new_block, /*!< in/out: index page to copy to */ - buf_block_t* block, /*!< in: index page containing rec */ - rec_t* rec, /*!< in: record on page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr */ -/*************************************************************//** -Copies records from page to new_page, up to the given record, NOT -including that record. Infimum and supremum records are not copied. -The records are copied to the end of the record list on new_page. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to the original predecessor of the supremum record on -new_page, or NULL on zip overflow (new_block will be decompressed) */ -UNIV_INTERN -rec_t* -page_copy_rec_list_start( -/*=====================*/ - buf_block_t* new_block, /*!< in/out: index page to copy to */ - buf_block_t* block, /*!< in: index page containing rec */ - rec_t* rec, /*!< in: record on page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr */ -/*************************************************************//** -Deletes records from a page from a given record onward, including that record. -The infimum and supremum records are not deleted. */ -UNIV_INTERN -void -page_delete_rec_list_end( -/*=====================*/ - rec_t* rec, /*!< in: pointer to record on page */ - buf_block_t* block, /*!< in: buffer block of the page */ - dict_index_t* index, /*!< in: record descriptor */ - ulint n_recs, /*!< in: number of records to delete, - or ULINT_UNDEFINED if not known */ - ulint size, /*!< in: the sum of the sizes of the - records in the end of the chain to - delete, or ULINT_UNDEFINED if not known */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Deletes records from page, up to the given record, NOT including -that record. Infimum and supremum records are not deleted. */ -UNIV_INTERN -void -page_delete_rec_list_start( -/*=======================*/ - rec_t* rec, /*!< in: record on page */ - buf_block_t* block, /*!< in: buffer block of the page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull)); -/*************************************************************//** -Moves record list end to another page. Moved records include -split_rec. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return TRUE on success; FALSE on compression failure (new_block will -be decompressed) */ -UNIV_INTERN -ibool -page_move_rec_list_end( -/*===================*/ - buf_block_t* new_block, /*!< in/out: index page where to move */ - buf_block_t* block, /*!< in: index page from where to move */ - rec_t* split_rec, /*!< in: first record to move */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr */ -/*************************************************************//** -Moves record list start to another page. Moved records do not include -split_rec. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return TRUE on success; FALSE on compression failure */ -UNIV_INTERN -ibool -page_move_rec_list_start( -/*=====================*/ - buf_block_t* new_block, /*!< in/out: index page where to move */ - buf_block_t* block, /*!< in/out: page containing split_rec */ - rec_t* split_rec, /*!< in: first record not to move */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull(1, 2, 4, 5))); -/****************************************************************//** -Splits a directory slot which owns too many records. */ -UNIV_INTERN -void -page_dir_split_slot( -/*================*/ - page_t* page, /*!< in: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be written, or NULL */ - ulint slot_no);/*!< in: the directory slot */ -/*************************************************************//** -Tries to balance the given directory slot with too few records -with the upper neighbor, so that there are at least the minimum number -of records owned by the slot; this may result in the merging of -two slots. */ -UNIV_INTERN -void -page_dir_balance_slot( -/*==================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint slot_no);/*!< in: the directory slot */ -/**********************************************************//** -Parses a log record of a record list end or start deletion. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_parse_delete_rec_list( -/*=======================*/ - byte type, /*!< in: MLOG_LIST_END_DELETE, - MLOG_LIST_START_DELETE, - MLOG_COMP_LIST_END_DELETE or - MLOG_COMP_LIST_START_DELETE */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in/out: buffer block or NULL */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr); /*!< in: mtr or NULL */ -/***********************************************************//** -Parses a redo log record of creating a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_parse_create( -/*==============*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - ulint comp, /*!< in: nonzero=compact page format */ - buf_block_t* block, /*!< in: block or NULL */ - mtr_t* mtr); /*!< in: mtr or NULL */ -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Prints record contents including the data relevant only in -the index page context. */ -UNIV_INTERN -void -page_rec_print( -/*===========*/ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets);/*!< in: record descriptor */ -# ifdef UNIV_BTR_PRINT -/***************************************************************//** -This is used to print the contents of the directory for -debugging purposes. */ -UNIV_INTERN -void -page_dir_print( -/*===========*/ - page_t* page, /*!< in: index page */ - ulint pr_n); /*!< in: print n first and n last entries */ -/***************************************************************//** -This is used to print the contents of the page record list for -debugging purposes. */ -UNIV_INTERN -void -page_print_list( -/*============*/ - buf_block_t* block, /*!< in: index page */ - dict_index_t* index, /*!< in: dictionary index of the page */ - ulint pr_n); /*!< in: print n first and n last entries */ -/***************************************************************//** -Prints the info in a page header. */ -UNIV_INTERN -void -page_header_print( -/*==============*/ - const page_t* page); /*!< in: index page */ -/***************************************************************//** -This is used to print the contents of the page for -debugging purposes. */ -UNIV_INTERN -void -page_print( -/*=======*/ - buf_block_t* block, /*!< in: index page */ - dict_index_t* index, /*!< in: dictionary index of the page */ - ulint dn, /*!< in: print dn first and last entries - in directory */ - ulint rn); /*!< in: print rn first and last records - in directory */ -# endif /* UNIV_BTR_PRINT */ -#endif /* !UNIV_HOTBACKUP */ -/***************************************************************//** -The following is used to validate a record on a page. This function -differs from rec_validate as it can also check the n_owned field and -the heap_no field. -@return TRUE if ok */ -UNIV_INTERN -ibool -page_rec_validate( -/*==============*/ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ -/***************************************************************//** -Checks that the first directory slot points to the infimum record and -the last to the supremum. This function is intended to track if the -bug fixed in 4.0.14 has caused corruption to users' databases. */ -UNIV_INTERN -void -page_check_dir( -/*===========*/ - const page_t* page); /*!< in: index page */ -/***************************************************************//** -This function checks the consistency of an index page when we do not -know the index. This is also resilient so that this should never crash -even if the page is total garbage. -@return TRUE if ok */ -UNIV_INTERN -ibool -page_simple_validate_old( -/*=====================*/ - const page_t* page); /*!< in: index page in ROW_FORMAT=REDUNDANT */ -/***************************************************************//** -This function checks the consistency of an index page when we do not -know the index. This is also resilient so that this should never crash -even if the page is total garbage. -@return TRUE if ok */ -UNIV_INTERN -ibool -page_simple_validate_new( -/*=====================*/ - const page_t* page); /*!< in: index page in ROW_FORMAT!=REDUNDANT */ -/***************************************************************//** -This function checks the consistency of an index page. -@return TRUE if ok */ -UNIV_INTERN -ibool -page_validate( -/*==========*/ - const page_t* page, /*!< in: index page */ - dict_index_t* index); /*!< in: data dictionary index containing - the page record type definition */ -/***************************************************************//** -Looks in the page record list for a record with the given heap number. -@return record, NULL if not found */ - -const rec_t* -page_find_rec_with_heap_no( -/*=======================*/ - const page_t* page, /*!< in: index page */ - ulint heap_no);/*!< in: heap number */ -/** Get the last non-delete-marked record on a page. -@param[in] page index tree leaf page -@return the last record, not delete-marked -@retval infimum record if all records are delete-marked */ - -const rec_t* -page_find_rec_max_not_deleted( - const page_t* page); - -#endif /* #ifndef UNIV_INNOCHECKSUM */ - -/** Issue a warning when the checksum that is stored in the page is valid, -but different than the global setting innodb_checksum_algorithm. -@param[in] current_algo current checksum algorithm -@param[in] page_checksum page valid checksum -@param[in] space_id tablespace id -@param[in] page_no page number */ -void -page_warn_strict_checksum( - srv_checksum_algorithm_t curr_algo, - srv_checksum_algorithm_t page_checksum, - ulint space_id, - ulint page_no); - -#ifndef UNIV_INNOCHECKSUM - -#ifdef UNIV_MATERIALIZE -#undef UNIV_INLINE -#define UNIV_INLINE UNIV_INLINE_ORIGINAL -#endif - -#ifndef UNIV_NONINL -#include "page0page.ic" -#endif - -#endif /* #ifndef UNIV_INNOCHECKSUM */ - -#endif diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic deleted file mode 100644 index 364536b86f8..00000000000 --- a/storage/xtradb/include/page0page.ic +++ /dev/null @@ -1,1184 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/page0page.ic -Index page routines - -Created 2/2/1994 Heikki Tuuri -*******************************************************/ - -#include "mach0data.h" -#ifdef UNIV_DEBUG -# include "log0recv.h" -#endif /* !UNIV_DEBUG */ -#ifndef UNIV_HOTBACKUP -# include "rem0cmp.h" -#endif /* !UNIV_HOTBACKUP */ -#include "mtr0log.h" -#include "page0zip.h" - -#ifdef UNIV_MATERIALIZE -#undef UNIV_INLINE -#define UNIV_INLINE -#endif - -extern my_bool srv_immediate_scrub_data_uncompressed; - -/************************************************************//** -Gets the start of a page. -@return start of the page */ -UNIV_INLINE -page_t* -page_align( -/*=======*/ - const void* ptr) /*!< in: pointer to page frame */ -{ - return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE)); -} -/************************************************************//** -Gets the offset within a page. -@return offset from the start of the page */ -UNIV_INLINE -ulint -page_offset( -/*========*/ - const void* ptr) /*!< in: pointer to page frame */ -{ - return(ut_align_offset(ptr, UNIV_PAGE_SIZE)); -} -/*************************************************************//** -Returns the max trx id field value. */ -UNIV_INLINE -trx_id_t -page_get_max_trx_id( -/*================*/ - const page_t* page) /*!< in: page */ -{ - ut_ad(page); - - return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID)); -} - -/*************************************************************//** -Sets the max trx id field value if trx_id is bigger than the previous -value. */ -UNIV_INLINE -void -page_update_max_trx_id( -/*===================*/ - buf_block_t* block, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(block); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - /* During crash recovery, this function may be called on - something else than a leaf page of a secondary index or the - insert buffer index tree (dict_index_is_sec_or_ibuf() returns - TRUE for the dummy indexes constructed during redo log - application). In that case, PAGE_MAX_TRX_ID is unused, - and trx_id is usually zero. */ - ut_ad(trx_id || recv_recovery_is_on()); - ut_ad(page_is_leaf(buf_block_get_frame(block))); - - if (page_get_max_trx_id(buf_block_get_frame(block)) < trx_id) { - - page_set_max_trx_id(block, page_zip, trx_id, mtr); - } -} - -/*************************************************************//** -Reads the given header field. */ -UNIV_INLINE -ulint -page_header_get_field( -/*==================*/ - const page_t* page, /*!< in: page */ - ulint field) /*!< in: PAGE_LEVEL, ... */ -{ - ut_ad(page); - ut_ad(field <= PAGE_INDEX_ID); - - return(mach_read_from_2(page + PAGE_HEADER + field)); -} - -/*************************************************************//** -Sets the given header field. */ -UNIV_INLINE -void -page_header_set_field( -/*==================*/ - page_t* page, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - ulint field, /*!< in: PAGE_N_DIR_SLOTS, ... */ - ulint val) /*!< in: value */ -{ - ut_ad(page); - ut_ad(field <= PAGE_N_RECS); - ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE); - ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE); - - mach_write_to_2(page + PAGE_HEADER + field, val); - if (page_zip) { - page_zip_write_header(page_zip, - page + PAGE_HEADER + field, 2, NULL); - } -} - -/*************************************************************//** -Returns the offset stored in the given header field. -@return offset from the start of the page, or 0 */ -UNIV_INLINE -ulint -page_header_get_offs( -/*=================*/ - const page_t* page, /*!< in: page */ - ulint field) /*!< in: PAGE_FREE, ... */ -{ - ulint offs; - - ut_ad((field == PAGE_FREE) - || (field == PAGE_LAST_INSERT) - || (field == PAGE_HEAP_TOP)); - - offs = page_header_get_field(page, field); - - ut_ad((field != PAGE_HEAP_TOP) || offs); - - return(offs); -} - -/*************************************************************//** -Sets the pointer stored in the given header field. */ -UNIV_INLINE -void -page_header_set_ptr( -/*================*/ - page_t* page, /*!< in: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - ulint field, /*!< in: PAGE_FREE, ... */ - const byte* ptr) /*!< in: pointer or NULL*/ -{ - ulint offs; - - ut_ad(page); - ut_ad((field == PAGE_FREE) - || (field == PAGE_LAST_INSERT) - || (field == PAGE_HEAP_TOP)); - - if (ptr == NULL) { - offs = 0; - } else { - offs = ptr - page; - } - - ut_ad((field != PAGE_HEAP_TOP) || offs); - - page_header_set_field(page, page_zip, field, offs); -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Resets the last insert info field in the page header. Writes to mlog -about this operation. */ -UNIV_INLINE -void -page_header_reset_last_insert( -/*==========================*/ - page_t* page, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(page && mtr); - - if (page_zip) { - mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0); - page_zip_write_header(page_zip, - page + (PAGE_HEADER + PAGE_LAST_INSERT), - 2, mtr); - } else { - mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0, - MLOG_2BYTES, mtr); - } -} -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************//** -Determine whether the page is in new-style compact format. -@return nonzero if the page is in compact format, zero if it is in -old-style format */ -UNIV_INLINE -ulint -page_is_comp( -/*=========*/ - const page_t* page) /*!< in: index page */ -{ - return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000); -} - -/************************************************************//** -TRUE if the record is on a page in compact format. -@return nonzero if in compact format */ -UNIV_INLINE -ulint -page_rec_is_comp( -/*=============*/ - const rec_t* rec) /*!< in: record */ -{ - return(page_is_comp(page_align(rec))); -} - -/***************************************************************//** -Returns the heap number of a record. -@return heap number */ -UNIV_INLINE -ulint -page_rec_get_heap_no( -/*=================*/ - const rec_t* rec) /*!< in: the physical record */ -{ - if (page_rec_is_comp(rec)) { - return(rec_get_heap_no_new(rec)); - } else { - return(rec_get_heap_no_old(rec)); - } -} - -/************************************************************//** -Determine whether the page is a B-tree leaf. -@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */ -UNIV_INLINE -bool -page_is_leaf( -/*=========*/ - const page_t* page) /*!< in: page */ -{ - return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL))); -} - -/************************************************************//** -Determine whether the page is empty. -@return true if the page is empty (PAGE_N_RECS = 0) */ -UNIV_INLINE -bool -page_is_empty( -/*==========*/ - const page_t* page) /*!< in: page */ -{ - return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_N_RECS))); -} - -/************************************************************//** -Determine whether the page contains garbage. -@return true if the page contains garbage (PAGE_GARBAGE is not 0) */ -UNIV_INLINE -bool -page_has_garbage( -/*=============*/ - const page_t* page) /*!< in: page */ -{ - return(!!*(const uint16*) (page + (PAGE_HEADER + PAGE_GARBAGE))); -} - -/************************************************************//** -Gets the offset of the first record on the page. -@return offset of the first record in record list, relative from page */ -UNIV_INLINE -ulint -page_get_infimum_offset( -/*====================*/ - const page_t* page) /*!< in: page which must have record(s) */ -{ - ut_ad(page); - ut_ad(!page_offset(page)); - - if (page_is_comp(page)) { - return(PAGE_NEW_INFIMUM); - } else { - return(PAGE_OLD_INFIMUM); - } -} - -/************************************************************//** -Gets the offset of the last record on the page. -@return offset of the last record in record list, relative from page */ -UNIV_INLINE -ulint -page_get_supremum_offset( -/*=====================*/ - const page_t* page) /*!< in: page which must have record(s) */ -{ - ut_ad(page); - ut_ad(!page_offset(page)); - - if (page_is_comp(page)) { - return(PAGE_NEW_SUPREMUM); - } else { - return(PAGE_OLD_SUPREMUM); - } -} - -/************************************************************//** -TRUE if the record is a user record on the page. -@return TRUE if a user record */ -UNIV_INLINE -ibool -page_rec_is_user_rec_low( -/*=====================*/ - ulint offset) /*!< in: record offset on page */ -{ - ut_ad(offset >= PAGE_NEW_INFIMUM); -#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM -# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM" -#endif -#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM -# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM" -#endif -#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM -# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM" -#endif -#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM -# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM" -#endif -#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END -# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END" -#endif -#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END -# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END" -#endif - ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - - return(offset != PAGE_NEW_SUPREMUM - && offset != PAGE_NEW_INFIMUM - && offset != PAGE_OLD_INFIMUM - && offset != PAGE_OLD_SUPREMUM); -} - -/************************************************************//** -TRUE if the record is the supremum record on a page. -@return TRUE if the supremum record */ -UNIV_INLINE -ibool -page_rec_is_supremum_low( -/*=====================*/ - ulint offset) /*!< in: record offset on page */ -{ - ut_ad(offset >= PAGE_NEW_INFIMUM); - ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - - return(offset == PAGE_NEW_SUPREMUM - || offset == PAGE_OLD_SUPREMUM); -} - -/************************************************************//** -TRUE if the record is the infimum record on a page. -@return TRUE if the infimum record */ -UNIV_INLINE -ibool -page_rec_is_infimum_low( -/*====================*/ - ulint offset) /*!< in: record offset on page */ -{ - ut_ad(offset >= PAGE_NEW_INFIMUM); - ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - - return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM); -} - -/************************************************************//** -TRUE if the record is a user record on the page. -@return TRUE if a user record */ -UNIV_INLINE -ibool -page_rec_is_user_rec( -/*=================*/ - const rec_t* rec) /*!< in: record */ -{ - ut_ad(page_rec_check(rec)); - - return(page_rec_is_user_rec_low(page_offset(rec))); -} - -/************************************************************//** -TRUE if the record is the supremum record on a page. -@return TRUE if the supremum record */ -UNIV_INLINE -ibool -page_rec_is_supremum( -/*=================*/ - const rec_t* rec) /*!< in: record */ -{ - ut_ad(page_rec_check(rec)); - - return(page_rec_is_supremum_low(page_offset(rec))); -} - -/************************************************************//** -TRUE if the record is the infimum record on a page. -@return TRUE if the infimum record */ -UNIV_INLINE -ibool -page_rec_is_infimum( -/*================*/ - const rec_t* rec) /*!< in: record */ -{ - ut_ad(page_rec_check(rec)); - - return(page_rec_is_infimum_low(page_offset(rec))); -} - -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INLINE -rec_t* -page_rec_get_nth( -/*=============*/ - page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ -{ - return((rec_t*) page_rec_get_nth_const(page, nth)); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. -@return middle record */ -UNIV_INLINE -rec_t* -page_get_middle_rec( -/*================*/ - page_t* page) /*!< in: page */ -{ - ulint middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2; - - return(page_rec_get_nth(page, middle)); -} - -/*************************************************************//** -Compares a data tuple to a physical record. Differs from the function -cmp_dtuple_rec_with_match in the way that the record must reside on an -index page, and also page infimum and supremum records can be given in -the parameter rec. These are considered as the negative infinity and -the positive infinity in the alphabetical order. -@return 1, 0, -1, if dtuple is greater, equal, less than rec, -respectively, when only the common first fields are compared */ -UNIV_INLINE -int -page_cmp_dtuple_rec_with_match( -/*===========================*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record on a page; may also - be page infimum or supremum, in which case - matched-parameter values below are not - affected */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint* matched_fields, /*!< in/out: number of already completely - matched fields; when function returns - contains the value for current comparison */ - ulint* matched_bytes) /*!< in/out: number of already matched - bytes within the first field not completely - matched; when function returns contains the - value for current comparison */ -{ - ulint rec_offset; - - ut_ad(dtuple_check_typed(dtuple)); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec)); - - rec_offset = page_offset(rec); - - if (rec_offset == PAGE_NEW_INFIMUM - || rec_offset == PAGE_OLD_INFIMUM) { - - return(1); - - } else if (rec_offset == PAGE_NEW_SUPREMUM - || rec_offset == PAGE_OLD_SUPREMUM) { - - return(-1); - } - - return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, - matched_fields, - matched_bytes)); -} -#endif /* !UNIV_HOTBACKUP */ - -/*************************************************************//** -Gets the page number. -@return page number */ -UNIV_INLINE -ulint -page_get_page_no( -/*=============*/ - const page_t* page) /*!< in: page */ -{ - ut_ad(page == page_align((page_t*) page)); - return(mach_read_from_4(page + FIL_PAGE_OFFSET)); -} - -/*************************************************************//** -Gets the tablespace identifier. -@return space id */ -UNIV_INLINE -ulint -page_get_space_id( -/*==============*/ - const page_t* page) /*!< in: page */ -{ - ut_ad(page == page_align((page_t*) page)); - return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); -} - -/*************************************************************//** -Gets the number of user records on page (infimum and supremum records -are not user records). -@return number of user records */ -UNIV_INLINE -ulint -page_get_n_recs( -/*============*/ - const page_t* page) /*!< in: index page */ -{ - return(page_header_get_field(page, PAGE_N_RECS)); -} - -/*************************************************************//** -Gets the number of dir slots in directory. -@return number of slots */ -UNIV_INLINE -ulint -page_dir_get_n_slots( -/*=================*/ - const page_t* page) /*!< in: index page */ -{ - return(page_header_get_field(page, PAGE_N_DIR_SLOTS)); -} -/*************************************************************//** -Sets the number of dir slots in directory. */ -UNIV_INLINE -void -page_dir_set_n_slots( -/*=================*/ - page_t* page, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - ulint n_slots)/*!< in: number of slots */ -{ - page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots); -} - -/*************************************************************//** -Gets the number of records in the heap. -@return number of user records */ -UNIV_INLINE -ulint -page_dir_get_n_heap( -/*================*/ - const page_t* page) /*!< in: index page */ -{ - return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff); -} - -/*************************************************************//** -Sets the number of records in the heap. */ -UNIV_INLINE -void -page_dir_set_n_heap( -/*================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL. - Note that the size of the dense page directory - in the compressed page trailer is - n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */ - ulint n_heap) /*!< in: number of records */ -{ - ut_ad(n_heap < 0x8000); - ut_ad(!page_zip || n_heap - == (page_header_get_field(page, PAGE_N_HEAP) & 0x7fff) + 1); - - page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap - | (0x8000 - & page_header_get_field(page, PAGE_N_HEAP))); -} - -#ifdef UNIV_DEBUG -/*************************************************************//** -Gets pointer to nth directory slot. -@return pointer to dir slot */ -UNIV_INLINE -page_dir_slot_t* -page_dir_get_nth_slot( -/*==================*/ - const page_t* page, /*!< in: index page */ - ulint n) /*!< in: position */ -{ - ut_ad(page_dir_get_n_slots(page) > n); - - return((page_dir_slot_t*) - page + UNIV_PAGE_SIZE - PAGE_DIR - - (n + 1) * PAGE_DIR_SLOT_SIZE); -} -#endif /* UNIV_DEBUG */ - -/**************************************************************//** -Used to check the consistency of a record on a page. -@return TRUE if succeed */ -UNIV_INLINE -ibool -page_rec_check( -/*===========*/ - const rec_t* rec) /*!< in: record */ -{ - const page_t* page = page_align(rec); - - ut_a(rec); - - ut_a(page_offset(rec) <= page_header_get_field(page, PAGE_HEAP_TOP)); - ut_a(page_offset(rec) >= PAGE_DATA); - - return(TRUE); -} - -/***************************************************************//** -Gets the record pointed to by a directory slot. -@return pointer to record */ -UNIV_INLINE -const rec_t* -page_dir_slot_get_rec( -/*==================*/ - const page_dir_slot_t* slot) /*!< in: directory slot */ -{ - return(page_align(slot) + mach_read_from_2(slot)); -} - -/***************************************************************//** -This is used to set the record offset in a directory slot. */ -UNIV_INLINE -void -page_dir_slot_set_rec( -/*==================*/ - page_dir_slot_t* slot, /*!< in: directory slot */ - rec_t* rec) /*!< in: record on the page */ -{ - ut_ad(page_rec_check(rec)); - - mach_write_to_2(slot, page_offset(rec)); -} - -/***************************************************************//** -Gets the number of records owned by a directory slot. -@return number of records */ -UNIV_INLINE -ulint -page_dir_slot_get_n_owned( -/*======================*/ - const page_dir_slot_t* slot) /*!< in: page directory slot */ -{ - const rec_t* rec = page_dir_slot_get_rec(slot); - if (page_rec_is_comp(slot)) { - return(rec_get_n_owned_new(rec)); - } else { - return(rec_get_n_owned_old(rec)); - } -} - -/***************************************************************//** -This is used to set the owned records field of a directory slot. */ -UNIV_INLINE -void -page_dir_slot_set_n_owned( -/*======================*/ - page_dir_slot_t*slot, /*!< in/out: directory slot */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint n) /*!< in: number of records owned by the slot */ -{ - rec_t* rec = (rec_t*) page_dir_slot_get_rec(slot); - if (page_rec_is_comp(slot)) { - rec_set_n_owned_new(rec, page_zip, n); - } else { - ut_ad(!page_zip); - rec_set_n_owned_old(rec, n); - } -} - -/************************************************************//** -Calculates the space reserved for directory slots of a given number of -records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE / -PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */ -UNIV_INLINE -ulint -page_dir_calc_reserved_space( -/*=========================*/ - ulint n_recs) /*!< in: number of records */ -{ - return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1) - / PAGE_DIR_SLOT_MIN_N_OWNED); -} - -/************************************************************//** -Gets the pointer to the next record on the page. -@return pointer to next record */ -UNIV_INLINE -const rec_t* -page_rec_get_next_low( -/*==================*/ - const rec_t* rec, /*!< in: pointer to record */ - ulint comp) /*!< in: nonzero=compact page layout */ -{ - ulint offs; - const page_t* page; - - ut_ad(page_rec_check(rec)); - - page = page_align(rec); - - offs = rec_get_next_offs(rec, comp); - - if (offs >= UNIV_PAGE_SIZE) { - fprintf(stderr, - "InnoDB: Next record offset is nonsensical %lu" - " in record at offset %lu\n" - "InnoDB: rec address %p, space id %lu, page %lu\n", - (ulong) offs, (ulong) page_offset(rec), - (void*) rec, - (ulong) page_get_space_id(page), - (ulong) page_get_page_no(page)); - buf_page_print(page, 0, 0); - - ut_error; - } else if (offs == 0) { - - return(NULL); - } - - return(page + offs); -} - -/************************************************************//** -Gets the pointer to the next record on the page. -@return pointer to next record */ -UNIV_INLINE -rec_t* -page_rec_get_next( -/*==============*/ - const rec_t* rec) /*!< in: pointer to record */ -{ - return((rec_t*) page_rec_get_next_low(rec, page_rec_is_comp(rec))); -} - -/************************************************************//** -Gets the pointer to the next record on the page. -@return pointer to next record */ -UNIV_INLINE -const rec_t* -page_rec_get_next_const( -/*====================*/ - const rec_t* rec) /*!< in: pointer to record */ -{ - return(page_rec_get_next_low(rec, page_rec_is_comp(rec))); -} - -/************************************************************//** -Gets the pointer to the next non delete-marked record on the page. -If all subsequent records are delete-marked, then this function -will return the supremum record. -@return pointer to next non delete-marked record or pointer to supremum */ -UNIV_INLINE -const rec_t* -page_rec_get_next_non_del_marked( -/*=============================*/ - const rec_t* rec) /*!< in: pointer to record */ -{ - const rec_t* r; - ulint page_is_compact = page_rec_is_comp(rec); - - for (r = page_rec_get_next_const(rec); - !page_rec_is_supremum(r) - && rec_get_deleted_flag(r, page_is_compact); - r = page_rec_get_next_const(r)) { - /* noop */ - } - - return(r); -} - -/************************************************************//** -Sets the pointer to the next record on the page. */ -UNIV_INLINE -void -page_rec_set_next( -/*==============*/ - rec_t* rec, /*!< in: pointer to record, - must not be page supremum */ - const rec_t* next) /*!< in: pointer to next record, - must not be page infimum */ -{ - ulint offs; - - ut_ad(page_rec_check(rec)); - ut_ad(!page_rec_is_supremum(rec)); - ut_ad(rec != next); - - ut_ad(!next || !page_rec_is_infimum(next)); - ut_ad(!next || page_align(rec) == page_align(next)); - - offs = next != NULL ? page_offset(next) : 0; - - if (page_rec_is_comp(rec)) { - rec_set_next_offs_new(rec, offs); - } else { - rec_set_next_offs_old(rec, offs); - } -} - -/************************************************************//** -Gets the pointer to the previous record. -@return pointer to previous record */ -UNIV_INLINE -const rec_t* -page_rec_get_prev_const( -/*====================*/ - const rec_t* rec) /*!< in: pointer to record, must not be page - infimum */ -{ - const page_dir_slot_t* slot; - ulint slot_no; - const rec_t* rec2; - const rec_t* prev_rec = NULL; - const page_t* page; - - ut_ad(page_rec_check(rec)); - - page = page_align(rec); - - ut_ad(!page_rec_is_infimum(rec)); - - slot_no = page_dir_find_owner_slot(rec); - - ut_a(slot_no != 0); - - slot = page_dir_get_nth_slot(page, slot_no - 1); - - rec2 = page_dir_slot_get_rec(slot); - - if (page_is_comp(page)) { - while (rec != rec2) { - prev_rec = rec2; - rec2 = page_rec_get_next_low(rec2, TRUE); - } - } else { - while (rec != rec2) { - prev_rec = rec2; - rec2 = page_rec_get_next_low(rec2, FALSE); - } - } - - ut_a(prev_rec); - - return(prev_rec); -} - -/************************************************************//** -Gets the pointer to the previous record. -@return pointer to previous record */ -UNIV_INLINE -rec_t* -page_rec_get_prev( -/*==============*/ - rec_t* rec) /*!< in: pointer to record, must not be page - infimum */ -{ - return((rec_t*) page_rec_get_prev_const(rec)); -} - -/***************************************************************//** -Looks for the record which owns the given record. -@return the owner record */ -UNIV_INLINE -rec_t* -page_rec_find_owner_rec( -/*====================*/ - rec_t* rec) /*!< in: the physical record */ -{ - ut_ad(page_rec_check(rec)); - - if (page_rec_is_comp(rec)) { - while (rec_get_n_owned_new(rec) == 0) { - rec = page_rec_get_next(rec); - } - } else { - while (rec_get_n_owned_old(rec) == 0) { - rec = page_rec_get_next(rec); - } - } - - return(rec); -} - -/**********************************************************//** -Returns the base extra size of a physical record. This is the -size of the fixed header, independent of the record size. -@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */ -UNIV_INLINE -ulint -page_rec_get_base_extra_size( -/*=========================*/ - const rec_t* rec) /*!< in: physical record */ -{ -#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES -# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES" -#endif - return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec)); -} - -/************************************************************//** -Returns the sum of the sizes of the records in the record list, excluding -the infimum and supremum records. -@return data in bytes */ -UNIV_INLINE -ulint -page_get_data_size( -/*===============*/ - const page_t* page) /*!< in: index page */ -{ - ulint ret; - - ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP) - - (page_is_comp(page) - ? PAGE_NEW_SUPREMUM_END - : PAGE_OLD_SUPREMUM_END) - - page_header_get_field(page, PAGE_GARBAGE)); - - ut_ad(ret < UNIV_PAGE_SIZE); - - return(ret); -} - - -/************************************************************//** -Allocates a block of memory from the free list of an index page. */ -UNIV_INLINE -void -page_mem_alloc_free( -/*================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page with enough - space available for inserting the record, - or NULL */ - rec_t* next_rec,/*!< in: pointer to the new head of the - free record list */ - ulint need) /*!< in: number of bytes allocated */ -{ - ulint garbage; - -#ifdef UNIV_DEBUG - const rec_t* old_rec = page_header_get_ptr(page, PAGE_FREE); - ulint next_offs; - - ut_ad(old_rec); - next_offs = rec_get_next_offs(old_rec, page_is_comp(page)); - ut_ad(next_rec == (next_offs ? page + next_offs : NULL)); -#endif - - page_header_set_ptr(page, page_zip, PAGE_FREE, next_rec); - - garbage = page_header_get_field(page, PAGE_GARBAGE); - ut_ad(garbage >= need); - - page_header_set_field(page, page_zip, PAGE_GARBAGE, garbage - need); -} - -/*************************************************************//** -Calculates free space if a page is emptied. -@return free space */ -UNIV_INLINE -ulint -page_get_free_space_of_empty( -/*=========================*/ - ulint comp) /*!< in: nonzero=compact page layout */ -{ - if (comp) { - return((ulint)(UNIV_PAGE_SIZE - - PAGE_NEW_SUPREMUM_END - - PAGE_DIR - - 2 * PAGE_DIR_SLOT_SIZE)); - } - - return((ulint)(UNIV_PAGE_SIZE - - PAGE_OLD_SUPREMUM_END - - PAGE_DIR - - 2 * PAGE_DIR_SLOT_SIZE)); -} - -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Write a 32-bit field in a data dictionary record. */ -UNIV_INLINE -void -page_rec_write_field( -/*=================*/ - rec_t* rec, /*!< in/out: record to update */ - ulint i, /*!< in: index of the field to update */ - ulint val, /*!< in: value to write */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - byte* data; - ulint len; - - data = rec_get_nth_field_old(rec, i, &len); - - ut_ad(len == 4); - - mlog_write_ulint(data, val, MLOG_4BYTES, mtr); -} -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************//** -Each user record on a page, and also the deleted user records in the heap -takes its size plus the fraction of the dir cell size / -PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the -value of page_get_free_space_of_empty, the insert is impossible, otherwise -it is allowed. This function returns the maximum combined size of records -which can be inserted on top of the record heap. -@return maximum combined size for inserted records */ -UNIV_INLINE -ulint -page_get_max_insert_size( -/*=====================*/ - const page_t* page, /*!< in: index page */ - ulint n_recs) /*!< in: number of records */ -{ - ulint occupied; - ulint free_space; - - if (page_is_comp(page)) { - occupied = page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_NEW_SUPREMUM_END - + page_dir_calc_reserved_space( - n_recs + page_dir_get_n_heap(page) - 2); - - free_space = page_get_free_space_of_empty(TRUE); - } else { - occupied = page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_OLD_SUPREMUM_END - + page_dir_calc_reserved_space( - n_recs + page_dir_get_n_heap(page) - 2); - - free_space = page_get_free_space_of_empty(FALSE); - } - - /* Above the 'n_recs +' part reserves directory space for the new - inserted records; the '- 2' excludes page infimum and supremum - records */ - - if (occupied > free_space) { - - return(0); - } - - return(free_space - occupied); -} - -/************************************************************//** -Returns the maximum combined size of records which can be inserted on top -of the record heap if a page is first reorganized. -@return maximum combined size for inserted records */ -UNIV_INLINE -ulint -page_get_max_insert_size_after_reorganize( -/*======================================*/ - const page_t* page, /*!< in: index page */ - ulint n_recs) /*!< in: number of records */ -{ - ulint occupied; - ulint free_space; - - occupied = page_get_data_size(page) - + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page)); - - free_space = page_get_free_space_of_empty(page_is_comp(page)); - - if (occupied > free_space) { - - return(0); - } - - return(free_space - occupied); -} - -/************************************************************//** -Puts a record to free list. */ -UNIV_INLINE -void -page_mem_free( -/*==========*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip, /*!< in/out: compressed page, - or NULL */ - rec_t* rec, /*!< in: pointer to the - (origin of) record */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets) /*!< in: array returned by - rec_get_offsets() */ -{ - rec_t* free; - ulint garbage; - - ut_ad(rec_offs_validate(rec, index, offsets)); - free = page_header_get_ptr(page, PAGE_FREE); - - bool scrub = srv_immediate_scrub_data_uncompressed; - if (scrub) { - /* scrub record */ - uint size = rec_offs_data_size(offsets); - memset(rec, 0, size); - } - - page_rec_set_next(rec, free); - page_header_set_ptr(page, page_zip, PAGE_FREE, rec); - - garbage = page_header_get_field(page, PAGE_GARBAGE); - - page_header_set_field(page, page_zip, PAGE_GARBAGE, - garbage + rec_offs_size(offsets)); - - if (page_zip) { - page_zip_dir_delete(page_zip, rec, index, offsets, free); - } else { - page_header_set_field(page, page_zip, PAGE_N_RECS, - page_get_n_recs(page) - 1); - } -} - -#ifdef UNIV_MATERIALIZE -#undef UNIV_INLINE -#define UNIV_INLINE UNIV_INLINE_ORIGINAL -#endif diff --git a/storage/xtradb/include/page0types.h b/storage/xtradb/include/page0types.h deleted file mode 100644 index 3b53de6cc2b..00000000000 --- a/storage/xtradb/include/page0types.h +++ /dev/null @@ -1,173 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/page0types.h -Index page routines - -Created 2/2/1994 Heikki Tuuri -*******************************************************/ - -#ifndef page0types_h -#define page0types_h - -using namespace std; - -#include <map> - -#include "univ.i" -#include "dict0types.h" -#include "mtr0types.h" - -/** Eliminates a name collision on HP-UX */ -#define page_t ib_page_t -/** Type of the index page */ -typedef byte page_t; -/** Index page cursor */ -struct page_cur_t; - -/** Compressed index page */ -typedef byte page_zip_t; - -/* The following definitions would better belong to page0zip.h, -but we cannot include page0zip.h from rem0rec.ic, because -page0*.h includes rem0rec.h and may include rem0rec.ic. */ - -/** Number of bits needed for representing different compressed page sizes */ -#define PAGE_ZIP_SSIZE_BITS 3 - -/** Maximum compressed page shift size */ -#define PAGE_ZIP_SSIZE_MAX \ - (UNIV_ZIP_SIZE_SHIFT_MAX - UNIV_ZIP_SIZE_SHIFT_MIN + 1) - -/* Make sure there are enough bits available to store the maximum zip -ssize, which is the number of shifts from 512. */ -#if PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS) -# error "PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)" -#endif - -/** Compressed page descriptor */ -struct page_zip_des_t -{ - page_zip_t* data; /*!< compressed page data */ - -#ifdef UNIV_DEBUG - unsigned m_start:16; /*!< start offset of modification log */ - bool m_external; /*!< Allocated externally, not from the - buffer pool */ -#endif /* UNIV_DEBUG */ - unsigned m_end:16; /*!< end offset of modification log */ - unsigned m_nonempty:1; /*!< TRUE if the modification log - is not empty */ - unsigned n_blobs:12; /*!< number of externally stored - columns on the page; the maximum - is 744 on a 16 KiB page */ - unsigned ssize:PAGE_ZIP_SSIZE_BITS; - /*!< 0 or compressed page shift size; - the size in bytes is - (UNIV_ZIP_SIZE_MIN >> 1) << ssize. */ -}; - -/** Compression statistics for a given page size */ -struct page_zip_stat_t { - /** Number of page compressions */ - ulint compressed; - /** Number of successful page compressions */ - ulint compressed_ok; - /** Number of page decompressions */ - ulint decompressed; - /** Duration of page compressions in microseconds */ - ib_uint64_t compressed_usec; - /** Duration of page decompressions in microseconds */ - ib_uint64_t decompressed_usec; - page_zip_stat_t() : - /* Initialize members to 0 so that when we do - stlmap[key].compressed++ and element with "key" does not - exist it gets inserted with zeroed members. */ - compressed(0), - compressed_ok(0), - decompressed(0), - compressed_usec(0), - decompressed_usec(0) - { } -}; - -#ifndef UNIV_INNOCHECKSUM - -/** Compression statistics types */ -typedef map<index_id_t, page_zip_stat_t> page_zip_stat_per_index_t; - -/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */ -extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX]; -/** Statistics on compression, indexed by dict_index_t::id */ -extern page_zip_stat_per_index_t page_zip_stat_per_index; -extern ib_mutex_t page_zip_stat_per_index_mutex; -#ifdef HAVE_PSI_INTERFACE -extern mysql_pfs_key_t page_zip_stat_per_index_mutex_key; -#endif /* HAVE_PSI_INTERFACE */ - -#endif /* !UNIV_INNOCHECKSUM */ - -/**********************************************************************//** -Write the "deleted" flag of a record on a compressed page. The flag must -already have been written on the uncompressed page. */ -UNIV_INTERN -void -page_zip_rec_set_deleted( -/*=====================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in: record on the uncompressed page */ - ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Write the "owned" flag of a record on a compressed page. The n_owned field -must already have been written on the uncompressed page. */ -UNIV_INTERN -void -page_zip_rec_set_owned( -/*===================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in: record on the uncompressed page */ - ulint flag) /*!< in: the owned flag (nonzero=TRUE) */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Shift the dense page directory when a record is deleted. */ -UNIV_INTERN -void -page_zip_dir_delete( -/*================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - byte* rec, /*!< in: deleted record */ - dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets,/*!< in: rec_get_offsets(rec) */ - const byte* free) /*!< in: previous start of the free list */ - MY_ATTRIBUTE((nonnull(1,2,3,4))); - -/**********************************************************************//** -Add a slot to the dense page directory. */ -UNIV_INTERN -void -page_zip_dir_add_slot( -/*==================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - ulint is_clustered) /*!< in: nonzero for clustered index, - zero for others */ - MY_ATTRIBUTE((nonnull)); -#endif diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h deleted file mode 100644 index adafaa6d8b6..00000000000 --- a/storage/xtradb/include/page0zip.h +++ /dev/null @@ -1,554 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/page0zip.h -Compressed page interface - -Created June 2005 by Marko Makela -*******************************************************/ - -#ifndef page0zip_h -#define page0zip_h - -#ifdef UNIV_MATERIALIZE -# undef UNIV_INLINE -# define UNIV_INLINE -#endif - -#ifndef UNIV_INNOCHECKSUM -#include "page0types.h" -#include "mtr0types.h" -#include "dict0types.h" -#include "srv0srv.h" -#include "trx0types.h" -#include "mem0mem.h" -#else -#include "univ.i" -#endif /* !UNIV_INNOCHECKSUM */ -#include "buf0types.h" - -/* Compression level to be used by zlib. Settable by user. */ -extern uint page_zip_level; - -/* Default compression level. */ -#define DEFAULT_COMPRESSION_LEVEL 6 - -/* Whether or not to log compressed page images to avoid possible -compression algorithm changes in zlib. */ -extern my_bool page_zip_log_pages; - -#ifndef UNIV_INNOCHECKSUM -/**********************************************************************//** -Determine the size of a compressed page in bytes. -@return size in bytes */ -UNIV_INLINE -ulint -page_zip_get_size( -/*==============*/ - const page_zip_des_t* page_zip) /*!< in: compressed page */ - MY_ATTRIBUTE((nonnull, pure)); -/**********************************************************************//** -Set the size of a compressed page in bytes. */ -UNIV_INLINE -void -page_zip_set_size( -/*==============*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - ulint size); /*!< in: size in bytes */ - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Determine if a record is so big that it needs to be stored externally. -@return FALSE if the entire record can be stored locally on the page */ -UNIV_INLINE -ibool -page_zip_rec_needs_ext( -/*===================*/ - ulint rec_size, /*!< in: length of the record in bytes */ - ulint comp, /*!< in: nonzero=compact format */ - ulint n_fields, /*!< in: number of fields in the record; - ignored if zip_size == 0 */ - ulint zip_size) /*!< in: compressed page size in bytes, or 0 */ - MY_ATTRIBUTE((const)); - -/**********************************************************************//** -Determine the guaranteed free space on an empty page. -@return minimum payload size on the page */ -UNIV_INTERN -ulint -page_zip_empty_size( -/*================*/ - ulint n_fields, /*!< in: number of columns in the index */ - ulint zip_size) /*!< in: compressed page size in bytes */ - MY_ATTRIBUTE((const)); -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Initialize a compressed page descriptor. */ -UNIV_INLINE -void -page_zip_des_init( -/*==============*/ - page_zip_des_t* page_zip); /*!< in/out: compressed page - descriptor */ - -/**********************************************************************//** -Configure the zlib allocator to use the given memory heap. */ -UNIV_INTERN -void -page_zip_set_alloc( -/*===============*/ - void* stream, /*!< in/out: zlib stream */ - mem_heap_t* heap); /*!< in: memory heap to use */ - -/**********************************************************************//** -Compress a page. -@return TRUE on success, FALSE on failure; page_zip will be left -intact on failure. */ -UNIV_INTERN -ibool -page_zip_compress( -/*==============*/ - page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs, - m_start, m_end, m_nonempty */ - const page_t* page, /*!< in: uncompressed page */ - dict_index_t* index, /*!< in: index of the B-tree node */ - ulint level, /*!< in: compression level */ - mtr_t* mtr) /*!< in: mini-transaction, or NULL */ - MY_ATTRIBUTE((warn_unused_result)); - -/**********************************************************************//** -Decompress a page. This function should tolerate errors on the compressed -page. Instead of letting assertions fail, it will return FALSE if an -inconsistency is detected. -@return TRUE on success, FALSE on failure */ -UNIV_INTERN -ibool -page_zip_decompress( -/*================*/ - page_zip_des_t* page_zip,/*!< in: data, ssize; - out: m_start, m_end, m_nonempty, n_blobs */ - page_t* page, /*!< out: uncompressed page, may be trashed */ - ibool all) /*!< in: TRUE=decompress the whole page; - FALSE=verify but do not copy some - page header fields that should not change - after page creation */ - MY_ATTRIBUTE((nonnull(1,2))); - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Validate a compressed page descriptor. -@return TRUE if ok */ -UNIV_INLINE -ibool -page_zip_simple_validate( -/*=====================*/ - const page_zip_des_t* page_zip); /*!< in: compressed page - descriptor */ -#endif /* UNIV_DEBUG */ - -#endif /* !UNIV_INNOCHECKSUM */ - -#ifdef UNIV_ZIP_DEBUG -/**********************************************************************//** -Check that the compressed and decompressed pages match. -@return TRUE if valid, FALSE if not */ -UNIV_INTERN -ibool -page_zip_validate_low( -/*==================*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - const page_t* page, /*!< in: uncompressed page */ - const dict_index_t* index, /*!< in: index of the page, if known */ - ibool sloppy) /*!< in: FALSE=strict, - TRUE=ignore the MIN_REC_FLAG */ - MY_ATTRIBUTE((nonnull(1,2))); -/**********************************************************************//** -Check that the compressed and decompressed pages match. */ -UNIV_INTERN -ibool -page_zip_validate( -/*==============*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - const page_t* page, /*!< in: uncompressed page */ - const dict_index_t* index) /*!< in: index of the page, if known */ - MY_ATTRIBUTE((nonnull(1,2))); -#endif /* UNIV_ZIP_DEBUG */ - -#ifndef UNIV_INNOCHECKSUM -/**********************************************************************//** -Determine how big record can be inserted without recompressing the page. -@return a positive number indicating the maximum size of a record -whose insertion is guaranteed to succeed, or zero or negative */ -UNIV_INLINE -lint -page_zip_max_ins_size( -/*==================*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - ibool is_clust)/*!< in: TRUE if clustered index */ - MY_ATTRIBUTE((nonnull, pure)); - -/**********************************************************************//** -Determine if enough space is available in the modification log. -@return TRUE if page_zip_write_rec() will succeed */ -UNIV_INLINE -ibool -page_zip_available( -/*===============*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - ibool is_clust,/*!< in: TRUE if clustered index */ - ulint length, /*!< in: combined size of the record */ - ulint create) /*!< in: nonzero=add the record to - the heap */ - MY_ATTRIBUTE((nonnull, pure)); - -/**********************************************************************//** -Write data to the uncompressed header portion of a page. The data must -already have been written to the uncompressed page. */ -UNIV_INLINE -void -page_zip_write_header( -/*==================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* str, /*!< in: address on the uncompressed page */ - ulint length, /*!< in: length of the data */ - mtr_t* mtr) /*!< in: mini-transaction, or NULL */ - MY_ATTRIBUTE((nonnull(1,2))); - -/**********************************************************************//** -Write an entire record on the compressed page. The data must already -have been written to the uncompressed page. */ -UNIV_INTERN -void -page_zip_write_rec( -/*===============*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in: record being written */ - dict_index_t* index, /*!< in: the index the record belongs to */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint create) /*!< in: nonzero=insert, zero=update */ - MY_ATTRIBUTE((nonnull)); - -/***********************************************************//** -Parses a log record of writing a BLOB pointer of a record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_zip_parse_write_blob_ptr( -/*==========================*/ - byte* ptr, /*!< in: redo log buffer */ - byte* end_ptr,/*!< in: redo log buffer end */ - page_t* page, /*!< in/out: uncompressed page */ - page_zip_des_t* page_zip);/*!< in/out: compressed page */ - -/**********************************************************************//** -Write a BLOB pointer of a record on the leaf page of a clustered index. -The information must already have been updated on the uncompressed page. */ -UNIV_INTERN -void -page_zip_write_blob_ptr( -/*====================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in/out: record whose data is being - written */ - dict_index_t* index, /*!< in: index of the page */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint n, /*!< in: column index */ - mtr_t* mtr) /*!< in: mini-transaction handle, - or NULL if no logging is needed */ - MY_ATTRIBUTE((nonnull(1,2,3,4))); - -/***********************************************************//** -Parses a log record of writing the node pointer of a record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_zip_parse_write_node_ptr( -/*==========================*/ - byte* ptr, /*!< in: redo log buffer */ - byte* end_ptr,/*!< in: redo log buffer end */ - page_t* page, /*!< in/out: uncompressed page */ - page_zip_des_t* page_zip);/*!< in/out: compressed page */ - -/**********************************************************************//** -Write the node pointer of a record on a non-leaf compressed page. */ -UNIV_INTERN -void -page_zip_write_node_ptr( -/*====================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - byte* rec, /*!< in/out: record */ - ulint size, /*!< in: data size of rec */ - ulint ptr, /*!< in: node pointer */ - mtr_t* mtr) /*!< in: mini-transaction, or NULL */ - MY_ATTRIBUTE((nonnull(1,2))); - -/**********************************************************************//** -Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */ -UNIV_INTERN -void -page_zip_write_trx_id_and_roll_ptr( -/*===============================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - byte* rec, /*!< in/out: record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint trx_id_col,/*!< in: column number of TRX_ID in rec */ - trx_id_t trx_id, /*!< in: transaction identifier */ - roll_ptr_t roll_ptr)/*!< in: roll_ptr */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Write the "deleted" flag of a record on a compressed page. The flag must -already have been written on the uncompressed page. */ -UNIV_INTERN -void -page_zip_rec_set_deleted( -/*=====================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in: record on the uncompressed page */ - ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Write the "owned" flag of a record on a compressed page. The n_owned field -must already have been written on the uncompressed page. */ -UNIV_INTERN -void -page_zip_rec_set_owned( -/*===================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in: record on the uncompressed page */ - ulint flag) /*!< in: the owned flag (nonzero=TRUE) */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Insert a record to the dense page directory. */ -UNIV_INTERN -void -page_zip_dir_insert( -/*================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* prev_rec,/*!< in: record after which to insert */ - const byte* free_rec,/*!< in: record from which rec was - allocated, or NULL */ - byte* rec); /*!< in: record to insert */ - -/**********************************************************************//** -Shift the dense page directory and the array of BLOB pointers -when a record is deleted. */ -UNIV_INTERN -void -page_zip_dir_delete( -/*================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - byte* rec, /*!< in: deleted record */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets, /*!< in: rec_get_offsets(rec) */ - const byte* free) /*!< in: previous start of - the free list */ - MY_ATTRIBUTE((nonnull(1,2,3,4))); - -/**********************************************************************//** -Add a slot to the dense page directory. */ -UNIV_INTERN -void -page_zip_dir_add_slot( -/*==================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - ulint is_clustered) /*!< in: nonzero for clustered index, - zero for others */ - MY_ATTRIBUTE((nonnull)); - -/***********************************************************//** -Parses a log record of writing to the header of a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_zip_parse_write_header( -/*========================*/ - byte* ptr, /*!< in: redo log buffer */ - byte* end_ptr,/*!< in: redo log buffer end */ - page_t* page, /*!< in/out: uncompressed page */ - page_zip_des_t* page_zip);/*!< in/out: compressed page */ - -/**********************************************************************//** -Write data to the uncompressed header portion of a page. The data must -already have been written to the uncompressed page. -However, the data portion of the uncompressed page may differ from -the compressed page when a record is being inserted in -page_cur_insert_rec_low(). */ -UNIV_INLINE -void -page_zip_write_header( -/*==================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* str, /*!< in: address on the uncompressed page */ - ulint length, /*!< in: length of the data */ - mtr_t* mtr) /*!< in: mini-transaction, or NULL */ - MY_ATTRIBUTE((nonnull(1,2))); - -/**********************************************************************//** -Reorganize and compress a page. This is a low-level operation for -compressed pages, to be used when page_zip_compress() fails. -On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written. -The function btr_page_reorganize() should be preferred whenever possible. -IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a -non-clustered index, the caller must update the insert buffer free -bits in the same mini-transaction in such a way that the modification -will be redo-logged. -@return TRUE on success, FALSE on failure; page_zip will be left -intact on failure, but page will be overwritten. */ -UNIV_INTERN -ibool -page_zip_reorganize( -/*================*/ - buf_block_t* block, /*!< in/out: page with compressed page; - on the compressed page, in: size; - out: data, n_blobs, - m_start, m_end, m_nonempty */ - dict_index_t* index, /*!< in: index of the B-tree node */ - mtr_t* mtr); /*!< in: mini-transaction */ -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Copy the records of a page byte for byte. Do not copy the page header -or trailer, except those B-tree header fields that are directly -related to the storage of records. Also copy PAGE_MAX_TRX_ID. -NOTE: The caller must update the lock table and the adaptive hash index. */ -UNIV_INTERN -void -page_zip_copy_recs( -/*===============*/ - page_zip_des_t* page_zip, /*!< out: copy of src_zip - (n_blobs, m_start, m_end, - m_nonempty, data[0..size-1]) */ - page_t* page, /*!< out: copy of src */ - const page_zip_des_t* src_zip, /*!< in: compressed page */ - const page_t* src, /*!< in: page */ - dict_index_t* index, /*!< in: index of the B-tree */ - mtr_t* mtr) /*!< in: mini-transaction */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Parses a log record of compressing an index page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_zip_parse_compress( -/*====================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< out: uncompressed page */ - page_zip_des_t* page_zip)/*!< out: compressed page */ - MY_ATTRIBUTE((warn_unused_result)); - -#endif /* !UNIV_INNOCHECKSUM */ - -/**********************************************************************//** -Calculate the compressed page checksum. -@return page checksum */ -UNIV_INTERN -ulint -page_zip_calc_checksum( -/*===================*/ - const void* data, /*!< in: compressed page */ - ulint size, /*!< in: size of compressed page */ - srv_checksum_algorithm_t algo) /*!< in: algorithm to use */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Verify a compressed page's checksum. -@return TRUE if the stored checksum is valid according to the value of -innodb_checksum_algorithm */ -UNIV_INTERN -ibool -page_zip_verify_checksum( -/*=====================*/ - const void* data, /*!< in: compressed page */ - ulint size); /*!< in: size of compressed page */ - -#ifndef UNIV_INNOCHECKSUM - -/**********************************************************************//** -Write a log record of compressing an index page without the data on the page. */ -UNIV_INLINE -void -page_zip_compress_write_log_no_data( -/*================================*/ - ulint level, /*!< in: compression level */ - const page_t* page, /*!< in: page that is compressed */ - dict_index_t* index, /*!< in: index */ - mtr_t* mtr); /*!< in: mtr */ -/**********************************************************************//** -Parses a log record of compressing an index page without the data. -@return end of log record or NULL */ -UNIV_INLINE -byte* -page_zip_parse_compress_no_data( -/*============================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr, /*!< in: buffer end */ - page_t* page, /*!< in: uncompressed page */ - page_zip_des_t* page_zip, /*!< out: compressed page */ - dict_index_t* index) /*!< in: index */ - MY_ATTRIBUTE((nonnull(1,2))); - -/**********************************************************************//** -Reset the counters used for filling -INFORMATION_SCHEMA.innodb_cmp_per_index. */ -UNIV_INLINE -void -page_zip_reset_stat_per_index(); -/*===========================*/ - -#endif /* !UNIV_INNOCHECKSUM */ - -#ifndef UNIV_HOTBACKUP -/** Check if a pointer to an uncompressed page matches a compressed page. -When we IMPORT a tablespace the blocks and accompanying frames are allocted -from outside the buffer pool. -@param ptr pointer to an uncompressed page frame -@param page_zip compressed page descriptor -@return TRUE if ptr and page_zip refer to the same block */ -# define PAGE_ZIP_MATCH(ptr, page_zip) \ - (((page_zip)->m_external \ - && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)) \ - || buf_frame_get_page_zip(ptr) == (page_zip)) -#else /* !UNIV_HOTBACKUP */ -/** Check if a pointer to an uncompressed page matches a compressed page. -@param ptr pointer to an uncompressed page frame -@param page_zip compressed page descriptor -@return TRUE if ptr and page_zip refer to the same block */ -# define PAGE_ZIP_MATCH(ptr, page_zip) \ - (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data) -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_MATERIALIZE -# undef UNIV_INLINE -# define UNIV_INLINE UNIV_INLINE_ORIGINAL -#endif - -#ifndef UNIV_INNOCHECKSUM -#ifndef UNIV_NONINL -# include "page0zip.ic" -#endif -#endif /* !UNIV_INNOCHECKSUM */ - -#endif /* page0zip_h */ diff --git a/storage/xtradb/include/page0zip.ic b/storage/xtradb/include/page0zip.ic deleted file mode 100644 index 9a583086925..00000000000 --- a/storage/xtradb/include/page0zip.ic +++ /dev/null @@ -1,458 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/page0zip.ic -Compressed page interface - -Created June 2005 by Marko Makela -*******************************************************/ - -#ifdef UNIV_MATERIALIZE -# undef UNIV_INLINE -# define UNIV_INLINE -#endif - -#include "page0zip.h" -#include "mtr0log.h" -#include "page0page.h" - -/* The format of compressed pages is as follows. - -The header and trailer of the uncompressed pages, excluding the page -directory in the trailer, are copied as is to the header and trailer -of the compressed page. - -At the end of the compressed page, there is a dense page directory -pointing to every user record contained on the page, including deleted -records on the free list. The dense directory is indexed in the -collation order, i.e., in the order in which the record list is -linked on the uncompressed page. The infimum and supremum records are -excluded. The two most significant bits of the entries are allocated -for the delete-mark and an n_owned flag indicating the last record in -a chain of records pointed to from the sparse page directory on the -uncompressed page. - -The data between PAGE_ZIP_START and the last page directory entry will -be written in compressed format, starting at offset PAGE_DATA. -Infimum and supremum records are not stored. We exclude the -REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered -from the dense page directory stored at the end of the compressed -page. - -The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and -roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of -externally stored columns are stored separately, in ascending order of -heap_no and column index, starting backwards from the dense page -directory. - -The compressed data stream may be followed by a modification log -covering the compressed portion of the page, as follows. - -MODIFICATION LOG ENTRY FORMAT -- write record: - - (heap_no - 1) << 1 (1..2 bytes) - - extra bytes backwards - - data bytes -- clear record: - - (heap_no - 1) << 1 | 1 (1..2 bytes) - -The integer values are stored in a variable-length format: -- 0xxxxxxx: 0..127 -- 1xxxxxxx xxxxxxxx: 0..32767 - -The end of the modification log is marked by a 0 byte. - -In summary, the compressed page looks like this: - -(1) Uncompressed page header (PAGE_DATA bytes) -(2) Compressed index information -(3) Compressed page data -(4) Page modification log (page_zip->m_start..page_zip->m_end) -(5) Empty zero-filled space -(6) BLOB pointers (on leaf pages) - - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column - - in descending collation order -(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes, - - indexed by heap_no - - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes - - REC_NODE_PTR_SIZE for non-leaf pages - - 0 otherwise -(8) dense page directory, stored backwards - - n_dense = n_heap - 2 - - existing records in ascending collation order - - deleted records (free list) in link order -*/ - -/** Start offset of the area that will be compressed */ -#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END -/** Size of an compressed page directory entry */ -#define PAGE_ZIP_DIR_SLOT_SIZE 2 -/** Mask of record offsets */ -#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff -/** 'owned' flag */ -#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000 -/** 'deleted' flag */ -#define PAGE_ZIP_DIR_SLOT_DEL 0x8000 - -/**********************************************************************//** -Determine the size of a compressed page in bytes. -@return size in bytes */ -UNIV_INLINE -ulint -page_zip_get_size( -/*==============*/ - const page_zip_des_t* page_zip) /*!< in: compressed page */ -{ - ulint size; - - if (!page_zip->ssize) { - return(0); - } - - size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize; - - ut_ad(size >= UNIV_ZIP_SIZE_MIN); - ut_ad(size <= UNIV_PAGE_SIZE); - - return(size); -} -/**********************************************************************//** -Set the size of a compressed page in bytes. */ -UNIV_INLINE -void -page_zip_set_size( -/*==============*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - ulint size) /*!< in: size in bytes */ -{ - if (size) { - int ssize; - - ut_ad(ut_is_2pow(size)); - - for (ssize = 1; size > (ulint) (512 << ssize); ssize++) { - } - - page_zip->ssize = ssize; - } else { - page_zip->ssize = 0; - } - - ut_ad(page_zip_get_size(page_zip) == size); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Determine if a record is so big that it needs to be stored externally. -@return FALSE if the entire record can be stored locally on the page */ -UNIV_INLINE -ibool -page_zip_rec_needs_ext( -/*===================*/ - ulint rec_size, /*!< in: length of the record in bytes */ - ulint comp, /*!< in: nonzero=compact format */ - ulint n_fields, /*!< in: number of fields in the record; - ignored if zip_size == 0 */ - ulint zip_size) /*!< in: compressed page size in bytes, or 0 */ -{ - ut_ad(rec_size - > (comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES)); - ut_ad(ut_is_2pow(zip_size)); - ut_ad(comp || !zip_size); - -#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE - if (rec_size >= REC_MAX_DATA_SIZE) { - return(TRUE); - } -#endif - - if (zip_size) { - ut_ad(comp); - /* On a compressed page, there is a two-byte entry in - the dense page directory for every record. But there - is no record header. There should be enough room for - one record on an empty leaf page. Subtract 1 byte for - the encoded heap number. Check also the available space - on the uncompressed page. */ - return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1) - >= page_zip_empty_size(n_fields, zip_size) - || rec_size >= page_get_free_space_of_empty(TRUE) / 2); - } - - return(rec_size >= page_get_free_space_of_empty(comp) / 2); -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Validate a compressed page descriptor. -@return TRUE if ok */ -UNIV_INLINE -ibool -page_zip_simple_validate( -/*=====================*/ - const page_zip_des_t* page_zip)/*!< in: compressed page descriptor */ -{ - ut_ad(page_zip); - ut_ad(page_zip->data); - ut_ad(page_zip->ssize <= PAGE_ZIP_SSIZE_MAX); - ut_ad(page_zip_get_size(page_zip) - > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE); - ut_ad(page_zip->m_start <= page_zip->m_end); - ut_ad(page_zip->m_end < page_zip_get_size(page_zip)); - ut_ad(page_zip->n_blobs - < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE); - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/**********************************************************************//** -Determine if the length of the page trailer. -@return length of the page trailer, in bytes, not including the -terminating zero byte of the modification log */ -UNIV_INLINE -ibool -page_zip_get_trailer_len( -/*=====================*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - ibool is_clust)/*!< in: TRUE if clustered index */ -{ - ulint uncompressed_size; - - ut_ad(page_zip_simple_validate(page_zip)); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - - if (!page_is_leaf(page_zip->data)) { - uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE - + REC_NODE_PTR_SIZE; - ut_ad(!page_zip->n_blobs); - } else if (is_clust) { - uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; - } else { - uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE; - ut_ad(!page_zip->n_blobs); - } - - return((page_dir_get_n_heap(page_zip->data) - 2) - * uncompressed_size - + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE); -} - -/**********************************************************************//** -Determine how big record can be inserted without recompressing the page. -@return a positive number indicating the maximum size of a record -whose insertion is guaranteed to succeed, or zero or negative */ -UNIV_INLINE -lint -page_zip_max_ins_size( -/*==================*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - ibool is_clust)/*!< in: TRUE if clustered index */ -{ - ulint trailer_len; - - trailer_len = page_zip_get_trailer_len(page_zip, is_clust); - - /* When a record is created, a pointer may be added to - the dense directory. - Likewise, space for the columns that will not be - compressed will be allocated from the page trailer. - Also the BLOB pointers will be allocated from there, but - we may as well count them in the length of the record. */ - - trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; - - return((lint) page_zip_get_size(page_zip) - - trailer_len - page_zip->m_end - - (REC_N_NEW_EXTRA_BYTES - 2)); -} - -/**********************************************************************//** -Determine if enough space is available in the modification log. -@return TRUE if enough space is available */ -UNIV_INLINE -ibool -page_zip_available( -/*===============*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - ibool is_clust,/*!< in: TRUE if clustered index */ - ulint length, /*!< in: combined size of the record */ - ulint create) /*!< in: nonzero=add the record to - the heap */ -{ - ulint trailer_len; - - ut_ad(length > REC_N_NEW_EXTRA_BYTES); - - trailer_len = page_zip_get_trailer_len(page_zip, is_clust); - - /* Subtract the fixed extra bytes and add the maximum - space needed for identifying the record (encoded heap_no). */ - length -= REC_N_NEW_EXTRA_BYTES - 2; - - if (create > 0) { - /* When a record is created, a pointer may be added to - the dense directory. - Likewise, space for the columns that will not be - compressed will be allocated from the page trailer. - Also the BLOB pointers will be allocated from there, but - we may as well count them in the length of the record. */ - - trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; - } - - return(length + trailer_len + page_zip->m_end - < page_zip_get_size(page_zip)); -} - -/**********************************************************************//** -Initialize a compressed page descriptor. */ -UNIV_INLINE -void -page_zip_des_init( -/*==============*/ - page_zip_des_t* page_zip) /*!< in/out: compressed page - descriptor */ -{ - memset(page_zip, 0, sizeof *page_zip); -} - -/**********************************************************************//** -Write a log record of writing to the uncompressed header portion of a page. */ -UNIV_INTERN -void -page_zip_write_header_log( -/*======================*/ - const byte* data,/*!< in: data on the uncompressed page */ - ulint length, /*!< in: length of the data */ - mtr_t* mtr); /*!< in: mini-transaction */ - -/**********************************************************************//** -Write data to the uncompressed header portion of a page. The data must -already have been written to the uncompressed page. -However, the data portion of the uncompressed page may differ from -the compressed page when a record is being inserted in -page_cur_insert_rec_zip(). */ -UNIV_INLINE -void -page_zip_write_header( -/*==================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* str, /*!< in: address on the uncompressed page */ - ulint length, /*!< in: length of the data */ - mtr_t* mtr) /*!< in: mini-transaction, or NULL */ -{ - ulint pos; - - ut_ad(PAGE_ZIP_MATCH(str, page_zip)); - ut_ad(page_zip_simple_validate(page_zip)); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - - pos = page_offset(str); - - ut_ad(pos < PAGE_DATA); - - memcpy(page_zip->data + pos, str, length); - - /* The following would fail in page_cur_insert_rec_zip(). */ - /* ut_ad(page_zip_validate(page_zip, str - pos)); */ - - if (mtr) { -#ifndef UNIV_HOTBACKUP - page_zip_write_header_log(str, length, mtr); -#endif /* !UNIV_HOTBACKUP */ - } -} - -/**********************************************************************//** -Write a log record of compressing an index page without the data on the page. */ -UNIV_INLINE -void -page_zip_compress_write_log_no_data( -/*================================*/ - ulint level, /*!< in: compression level */ - const page_t* page, /*!< in: page that is compressed */ - dict_index_t* index, /*!< in: index */ - mtr_t* mtr) /*!< in: mtr */ -{ - byte* log_ptr = mlog_open_and_write_index( - mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1); - - if (log_ptr) { - mach_write_to_1(log_ptr, level); - mlog_close(mtr, log_ptr + 1); - } -} - -/**********************************************************************//** -Parses a log record of compressing an index page without the data. -@return end of log record or NULL */ -UNIV_INLINE -byte* -page_zip_parse_compress_no_data( -/*============================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr, /*!< in: buffer end */ - page_t* page, /*!< in: uncompressed page */ - page_zip_des_t* page_zip, /*!< out: compressed page */ - dict_index_t* index) /*!< in: index */ -{ - ulint level; - if (end_ptr == ptr) { - return(NULL); - } - - level = mach_read_from_1(ptr); - - /* If page compression fails then there must be something wrong - because a compress log record is logged only if the compression - was successful. Crash in this case. */ - - if (page - && !page_zip_compress(page_zip, page, index, level, NULL)) { - ut_error; - } - - return(ptr + 1); -} - -/**********************************************************************//** -Reset the counters used for filling -INFORMATION_SCHEMA.innodb_cmp_per_index. */ -UNIV_INLINE -void -page_zip_reset_stat_per_index() -/*===========================*/ -{ - mutex_enter(&page_zip_stat_per_index_mutex); - - page_zip_stat_per_index.erase( - page_zip_stat_per_index.begin(), - page_zip_stat_per_index.end()); - - mutex_exit(&page_zip_stat_per_index_mutex); -} - -#ifdef UNIV_MATERIALIZE -# undef UNIV_INLINE -# define UNIV_INLINE UNIV_INLINE_ORIGINAL -#endif diff --git a/storage/xtradb/include/pars0grm.h b/storage/xtradb/include/pars0grm.h deleted file mode 100644 index 8e725fe9545..00000000000 --- a/storage/xtradb/include/pars0grm.h +++ /dev/null @@ -1,261 +0,0 @@ -/* A Bison parser, made by GNU Bison 2.3. */ - -/* Skeleton interface for Bison's Yacc-like parsers in C - - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - PARS_INT_LIT = 258, - PARS_FLOAT_LIT = 259, - PARS_STR_LIT = 260, - PARS_FIXBINARY_LIT = 261, - PARS_BLOB_LIT = 262, - PARS_NULL_LIT = 263, - PARS_ID_TOKEN = 264, - PARS_AND_TOKEN = 265, - PARS_OR_TOKEN = 266, - PARS_NOT_TOKEN = 267, - PARS_GE_TOKEN = 268, - PARS_LE_TOKEN = 269, - PARS_NE_TOKEN = 270, - PARS_PROCEDURE_TOKEN = 271, - PARS_IN_TOKEN = 272, - PARS_OUT_TOKEN = 273, - PARS_BINARY_TOKEN = 274, - PARS_BLOB_TOKEN = 275, - PARS_INT_TOKEN = 276, - PARS_INTEGER_TOKEN = 277, - PARS_FLOAT_TOKEN = 278, - PARS_CHAR_TOKEN = 279, - PARS_IS_TOKEN = 280, - PARS_BEGIN_TOKEN = 281, - PARS_END_TOKEN = 282, - PARS_IF_TOKEN = 283, - PARS_THEN_TOKEN = 284, - PARS_ELSE_TOKEN = 285, - PARS_ELSIF_TOKEN = 286, - PARS_LOOP_TOKEN = 287, - PARS_WHILE_TOKEN = 288, - PARS_RETURN_TOKEN = 289, - PARS_SELECT_TOKEN = 290, - PARS_SUM_TOKEN = 291, - PARS_COUNT_TOKEN = 292, - PARS_DISTINCT_TOKEN = 293, - PARS_FROM_TOKEN = 294, - PARS_WHERE_TOKEN = 295, - PARS_FOR_TOKEN = 296, - PARS_DDOT_TOKEN = 297, - PARS_READ_TOKEN = 298, - PARS_ORDER_TOKEN = 299, - PARS_BY_TOKEN = 300, - PARS_ASC_TOKEN = 301, - PARS_DESC_TOKEN = 302, - PARS_INSERT_TOKEN = 303, - PARS_INTO_TOKEN = 304, - PARS_VALUES_TOKEN = 305, - PARS_UPDATE_TOKEN = 306, - PARS_SET_TOKEN = 307, - PARS_DELETE_TOKEN = 308, - PARS_CURRENT_TOKEN = 309, - PARS_OF_TOKEN = 310, - PARS_CREATE_TOKEN = 311, - PARS_TABLE_TOKEN = 312, - PARS_INDEX_TOKEN = 313, - PARS_UNIQUE_TOKEN = 314, - PARS_CLUSTERED_TOKEN = 315, - PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316, - PARS_ON_TOKEN = 317, - PARS_ASSIGN_TOKEN = 318, - PARS_DECLARE_TOKEN = 319, - PARS_CURSOR_TOKEN = 320, - PARS_SQL_TOKEN = 321, - PARS_OPEN_TOKEN = 322, - PARS_FETCH_TOKEN = 323, - PARS_CLOSE_TOKEN = 324, - PARS_NOTFOUND_TOKEN = 325, - PARS_TO_CHAR_TOKEN = 326, - PARS_TO_NUMBER_TOKEN = 327, - PARS_TO_BINARY_TOKEN = 328, - PARS_BINARY_TO_NUMBER_TOKEN = 329, - PARS_SUBSTR_TOKEN = 330, - PARS_REPLSTR_TOKEN = 331, - PARS_CONCAT_TOKEN = 332, - PARS_INSTR_TOKEN = 333, - PARS_LENGTH_TOKEN = 334, - PARS_SYSDATE_TOKEN = 335, - PARS_PRINTF_TOKEN = 336, - PARS_ASSERT_TOKEN = 337, - PARS_RND_TOKEN = 338, - PARS_RND_STR_TOKEN = 339, - PARS_ROW_PRINTF_TOKEN = 340, - PARS_COMMIT_TOKEN = 341, - PARS_ROLLBACK_TOKEN = 342, - PARS_WORK_TOKEN = 343, - PARS_UNSIGNED_TOKEN = 344, - PARS_EXIT_TOKEN = 345, - PARS_FUNCTION_TOKEN = 346, - PARS_LOCK_TOKEN = 347, - PARS_SHARE_TOKEN = 348, - PARS_MODE_TOKEN = 349, - PARS_LIKE_TOKEN = 350, - PARS_LIKE_TOKEN_EXACT = 351, - PARS_LIKE_TOKEN_PREFIX = 352, - PARS_LIKE_TOKEN_SUFFIX = 353, - PARS_LIKE_TOKEN_SUBSTR = 354, - PARS_TABLE_NAME_TOKEN = 355, - PARS_COMPACT_TOKEN = 356, - PARS_BLOCK_SIZE_TOKEN = 357, - PARS_BIGINT_TOKEN = 358, - NEG = 359 - }; -#endif -/* Tokens. */ -#define PARS_INT_LIT 258 -#define PARS_FLOAT_LIT 259 -#define PARS_STR_LIT 260 -#define PARS_FIXBINARY_LIT 261 -#define PARS_BLOB_LIT 262 -#define PARS_NULL_LIT 263 -#define PARS_ID_TOKEN 264 -#define PARS_AND_TOKEN 265 -#define PARS_OR_TOKEN 266 -#define PARS_NOT_TOKEN 267 -#define PARS_GE_TOKEN 268 -#define PARS_LE_TOKEN 269 -#define PARS_NE_TOKEN 270 -#define PARS_PROCEDURE_TOKEN 271 -#define PARS_IN_TOKEN 272 -#define PARS_OUT_TOKEN 273 -#define PARS_BINARY_TOKEN 274 -#define PARS_BLOB_TOKEN 275 -#define PARS_INT_TOKEN 276 -#define PARS_INTEGER_TOKEN 277 -#define PARS_FLOAT_TOKEN 278 -#define PARS_CHAR_TOKEN 279 -#define PARS_IS_TOKEN 280 -#define PARS_BEGIN_TOKEN 281 -#define PARS_END_TOKEN 282 -#define PARS_IF_TOKEN 283 -#define PARS_THEN_TOKEN 284 -#define PARS_ELSE_TOKEN 285 -#define PARS_ELSIF_TOKEN 286 -#define PARS_LOOP_TOKEN 287 -#define PARS_WHILE_TOKEN 288 -#define PARS_RETURN_TOKEN 289 -#define PARS_SELECT_TOKEN 290 -#define PARS_SUM_TOKEN 291 -#define PARS_COUNT_TOKEN 292 -#define PARS_DISTINCT_TOKEN 293 -#define PARS_FROM_TOKEN 294 -#define PARS_WHERE_TOKEN 295 -#define PARS_FOR_TOKEN 296 -#define PARS_DDOT_TOKEN 297 -#define PARS_READ_TOKEN 298 -#define PARS_ORDER_TOKEN 299 -#define PARS_BY_TOKEN 300 -#define PARS_ASC_TOKEN 301 -#define PARS_DESC_TOKEN 302 -#define PARS_INSERT_TOKEN 303 -#define PARS_INTO_TOKEN 304 -#define PARS_VALUES_TOKEN 305 -#define PARS_UPDATE_TOKEN 306 -#define PARS_SET_TOKEN 307 -#define PARS_DELETE_TOKEN 308 -#define PARS_CURRENT_TOKEN 309 -#define PARS_OF_TOKEN 310 -#define PARS_CREATE_TOKEN 311 -#define PARS_TABLE_TOKEN 312 -#define PARS_INDEX_TOKEN 313 -#define PARS_UNIQUE_TOKEN 314 -#define PARS_CLUSTERED_TOKEN 315 -#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316 -#define PARS_ON_TOKEN 317 -#define PARS_ASSIGN_TOKEN 318 -#define PARS_DECLARE_TOKEN 319 -#define PARS_CURSOR_TOKEN 320 -#define PARS_SQL_TOKEN 321 -#define PARS_OPEN_TOKEN 322 -#define PARS_FETCH_TOKEN 323 -#define PARS_CLOSE_TOKEN 324 -#define PARS_NOTFOUND_TOKEN 325 -#define PARS_TO_CHAR_TOKEN 326 -#define PARS_TO_NUMBER_TOKEN 327 -#define PARS_TO_BINARY_TOKEN 328 -#define PARS_BINARY_TO_NUMBER_TOKEN 329 -#define PARS_SUBSTR_TOKEN 330 -#define PARS_REPLSTR_TOKEN 331 -#define PARS_CONCAT_TOKEN 332 -#define PARS_INSTR_TOKEN 333 -#define PARS_LENGTH_TOKEN 334 -#define PARS_SYSDATE_TOKEN 335 -#define PARS_PRINTF_TOKEN 336 -#define PARS_ASSERT_TOKEN 337 -#define PARS_RND_TOKEN 338 -#define PARS_RND_STR_TOKEN 339 -#define PARS_ROW_PRINTF_TOKEN 340 -#define PARS_COMMIT_TOKEN 341 -#define PARS_ROLLBACK_TOKEN 342 -#define PARS_WORK_TOKEN 343 -#define PARS_UNSIGNED_TOKEN 344 -#define PARS_EXIT_TOKEN 345 -#define PARS_FUNCTION_TOKEN 346 -#define PARS_LOCK_TOKEN 347 -#define PARS_SHARE_TOKEN 348 -#define PARS_MODE_TOKEN 349 -#define PARS_LIKE_TOKEN 350 -#define PARS_LIKE_TOKEN_EXACT 351 -#define PARS_LIKE_TOKEN_PREFIX 352 -#define PARS_LIKE_TOKEN_SUFFIX 353 -#define PARS_LIKE_TOKEN_SUBSTR 354 -#define PARS_TABLE_NAME_TOKEN 355 -#define PARS_COMPACT_TOKEN 356 -#define PARS_BLOCK_SIZE_TOKEN 357 -#define PARS_BIGINT_TOKEN 358 -#define NEG 359 - - - - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef int YYSTYPE; -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 -#endif - -extern YYSTYPE yylval; - diff --git a/storage/xtradb/include/pars0opt.h b/storage/xtradb/include/pars0opt.h deleted file mode 100644 index 1084d644c90..00000000000 --- a/storage/xtradb/include/pars0opt.h +++ /dev/null @@ -1,75 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/pars0opt.h -Simple SQL optimizer - -Created 12/21/1997 Heikki Tuuri -*******************************************************/ - -#ifndef pars0opt_h -#define pars0opt_h - -#include "univ.i" -#include "que0types.h" -#include "usr0types.h" -#include "pars0sym.h" -#include "dict0types.h" -#include "row0sel.h" - -/*******************************************************************//** -Optimizes a select. Decides which indexes to tables to use. The tables -are accessed in the order that they were written to the FROM part in the -select statement. */ -UNIV_INTERN -void -opt_search_plan( -/*============*/ - sel_node_t* sel_node); /*!< in: parsed select node */ -/*******************************************************************//** -Looks for occurrences of the columns of the table in the query subgraph and -adds them to the list of columns if an occurrence of the same column does not -already exist in the list. If the column is already in the list, puts a value -indirection to point to the occurrence in the column list, except if the -column occurrence we are looking at is in the column list, in which case -nothing is done. */ -UNIV_INTERN -void -opt_find_all_cols( -/*==============*/ - ibool copy_val, /*!< in: if TRUE, new found columns are - added as columns to copy */ - dict_index_t* index, /*!< in: index to use */ - sym_node_list_t* col_list, /*!< in: base node of a list where - to add new found columns */ - plan_t* plan, /*!< in: plan or NULL */ - que_node_t* exp); /*!< in: expression or condition */ -/********************************************************************//** -Prints info of a query plan. */ -UNIV_INTERN -void -opt_print_query_plan( -/*=================*/ - sel_node_t* sel_node); /*!< in: select node */ - -#ifndef UNIV_NONINL -#include "pars0opt.ic" -#endif - -#endif diff --git a/storage/xtradb/include/pars0opt.ic b/storage/xtradb/include/pars0opt.ic deleted file mode 100644 index 786d911ca3d..00000000000 --- a/storage/xtradb/include/pars0opt.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/pars0opt.ic -Simple SQL optimizer - -Created 12/21/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/pars0pars.h b/storage/xtradb/include/pars0pars.h deleted file mode 100644 index 73585c78a6a..00000000000 --- a/storage/xtradb/include/pars0pars.h +++ /dev/null @@ -1,826 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/pars0pars.h -SQL parser - -Created 11/19/1996 Heikki Tuuri -*******************************************************/ - -#ifndef pars0pars_h -#define pars0pars_h - -#include "univ.i" -#include "que0types.h" -#include "usr0types.h" -#include "pars0types.h" -#include "row0types.h" -#include "trx0types.h" -#include "ut0vec.h" - -/** Type of the user functions. The first argument is always InnoDB-supplied -and varies in type, while 'user_arg' is a user-supplied argument. The -meaning of the return type also varies. See the individual use cases, e.g. -the FETCH statement, for details on them. */ -typedef ibool (*pars_user_func_cb_t)(void* arg, void* user_arg); - -/** If the following is set TRUE, the parser will emit debugging -information */ -extern int yydebug; - -#ifdef UNIV_SQL_DEBUG -/** If the following is set TRUE, the lexer will print the SQL string -as it tokenizes it */ -extern ibool pars_print_lexed; -#endif /* UNIV_SQL_DEBUG */ - -/* Global variable used while parsing a single procedure or query : the code is -NOT re-entrant */ -extern sym_tab_t* pars_sym_tab_global; - -extern pars_res_word_t pars_to_char_token; -extern pars_res_word_t pars_to_number_token; -extern pars_res_word_t pars_to_binary_token; -extern pars_res_word_t pars_binary_to_number_token; -extern pars_res_word_t pars_substr_token; -extern pars_res_word_t pars_replstr_token; -extern pars_res_word_t pars_concat_token; -extern pars_res_word_t pars_length_token; -extern pars_res_word_t pars_instr_token; -extern pars_res_word_t pars_sysdate_token; -extern pars_res_word_t pars_printf_token; -extern pars_res_word_t pars_assert_token; -extern pars_res_word_t pars_rnd_token; -extern pars_res_word_t pars_rnd_str_token; -extern pars_res_word_t pars_count_token; -extern pars_res_word_t pars_sum_token; -extern pars_res_word_t pars_distinct_token; -extern pars_res_word_t pars_binary_token; -extern pars_res_word_t pars_blob_token; -extern pars_res_word_t pars_int_token; -extern pars_res_word_t pars_bigint_token; -extern pars_res_word_t pars_char_token; -extern pars_res_word_t pars_float_token; -extern pars_res_word_t pars_update_token; -extern pars_res_word_t pars_asc_token; -extern pars_res_word_t pars_desc_token; -extern pars_res_word_t pars_open_token; -extern pars_res_word_t pars_close_token; -extern pars_res_word_t pars_share_token; -extern pars_res_word_t pars_unique_token; -extern pars_res_word_t pars_clustered_token; - -extern ulint pars_star_denoter; - -/* Procedure parameter types */ -#define PARS_INPUT 0 -#define PARS_OUTPUT 1 -#define PARS_NOT_PARAM 2 - -int -yyparse(void); - -/*************************************************************//** -Parses an SQL string returning the query graph. -@return own: the query graph */ -UNIV_INTERN -que_t* -pars_sql( -/*=====*/ - pars_info_t* info, /*!< in: extra information, or NULL */ - const char* str); /*!< in: SQL string */ -/*************************************************************//** -Retrieves characters to the lexical analyzer. -@return number of characters copied or 0 on EOF */ -UNIV_INTERN -int -pars_get_lex_chars( -/*===============*/ - char* buf, /*!< in/out: buffer where to copy */ - int max_size); /*!< in: maximum number of characters which fit - in the buffer */ -/*************************************************************//** -Called by yyparse on error. */ -UNIV_INTERN -void -yyerror( -/*====*/ - const char* s); /*!< in: error message string */ -/*********************************************************************//** -Parses a variable declaration. -@return own: symbol table node of type SYM_VAR */ -UNIV_INTERN -sym_node_t* -pars_variable_declaration( -/*======================*/ - sym_node_t* node, /*!< in: symbol table node allocated for the - id of the variable */ - pars_res_word_t* type); /*!< in: pointer to a type token */ -/*********************************************************************//** -Parses a function expression. -@return own: function node in a query tree */ -UNIV_INTERN -func_node_t* -pars_func( -/*======*/ - que_node_t* res_word,/*!< in: function name reserved word */ - que_node_t* arg); /*!< in: first argument in the argument list */ -/************************************************************************* -Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded -within the search string. -@return own: function node in a query tree */ -UNIV_INTERN -int -pars_like_rebind( -/*=============*/ - sym_node_t* node, /* in: The search string node.*/ - const byte* ptr, /* in: literal to (re) bind */ - ulint len); /* in: length of literal to (re) bind*/ -/*********************************************************************//** -Parses an operator expression. -@return own: function node in a query tree */ -UNIV_INTERN -func_node_t* -pars_op( -/*====*/ - int func, /*!< in: operator token code */ - que_node_t* arg1, /*!< in: first argument */ - que_node_t* arg2); /*!< in: second argument or NULL for an unary - operator */ -/*********************************************************************//** -Parses an ORDER BY clause. Order by a single column only is supported. -@return own: order-by node in a query tree */ -UNIV_INTERN -order_node_t* -pars_order_by( -/*==========*/ - sym_node_t* column, /*!< in: column name */ - pars_res_word_t* asc); /*!< in: &pars_asc_token or pars_desc_token */ -/*********************************************************************//** -Parses a select list; creates a query graph node for the whole SELECT -statement. -@return own: select node in a query tree */ -UNIV_INTERN -sel_node_t* -pars_select_list( -/*=============*/ - que_node_t* select_list, /*!< in: select list */ - sym_node_t* into_list); /*!< in: variables list or NULL */ -/*********************************************************************//** -Parses a cursor declaration. -@return sym_node */ -UNIV_INTERN -que_node_t* -pars_cursor_declaration( -/*====================*/ - sym_node_t* sym_node, /*!< in: cursor id node in the symbol - table */ - sel_node_t* select_node); /*!< in: select node */ -/*********************************************************************//** -Parses a function declaration. -@return sym_node */ -UNIV_INTERN -que_node_t* -pars_function_declaration( -/*======================*/ - sym_node_t* sym_node); /*!< in: function id node in the symbol - table */ -/*********************************************************************//** -Parses a select statement. -@return own: select node in a query tree */ -UNIV_INTERN -sel_node_t* -pars_select_statement( -/*==================*/ - sel_node_t* select_node, /*!< in: select node already containing - the select list */ - sym_node_t* table_list, /*!< in: table list */ - que_node_t* search_cond, /*!< in: search condition or NULL */ - pars_res_word_t* for_update, /*!< in: NULL or &pars_update_token */ - pars_res_word_t* consistent_read,/*!< in: NULL or - &pars_consistent_token */ - order_node_t* order_by); /*!< in: NULL or an order-by node */ -/*********************************************************************//** -Parses a column assignment in an update. -@return column assignment node */ -UNIV_INTERN -col_assign_node_t* -pars_column_assignment( -/*===================*/ - sym_node_t* column, /*!< in: column to assign */ - que_node_t* exp); /*!< in: value to assign */ -/*********************************************************************//** -Parses a delete or update statement start. -@return own: update node in a query tree */ -UNIV_INTERN -upd_node_t* -pars_update_statement_start( -/*========================*/ - ibool is_delete, /*!< in: TRUE if delete */ - sym_node_t* table_sym, /*!< in: table name node */ - col_assign_node_t* col_assign_list);/*!< in: column assignment list, NULL - if delete */ -/*********************************************************************//** -Parses an update or delete statement. -@return own: update node in a query tree */ -UNIV_INTERN -upd_node_t* -pars_update_statement( -/*==================*/ - upd_node_t* node, /*!< in: update node */ - sym_node_t* cursor_sym, /*!< in: pointer to a cursor entry in - the symbol table or NULL */ - que_node_t* search_cond); /*!< in: search condition or NULL */ -/*********************************************************************//** -Parses an insert statement. -@return own: update node in a query tree */ -UNIV_INTERN -ins_node_t* -pars_insert_statement( -/*==================*/ - sym_node_t* table_sym, /*!< in: table name node */ - que_node_t* values_list, /*!< in: value expression list or NULL */ - sel_node_t* select); /*!< in: select condition or NULL */ -/*********************************************************************//** -Parses a procedure parameter declaration. -@return own: symbol table node of type SYM_VAR */ -UNIV_INTERN -sym_node_t* -pars_parameter_declaration( -/*=======================*/ - sym_node_t* node, /*!< in: symbol table node allocated for the - id of the parameter */ - ulint param_type, - /*!< in: PARS_INPUT or PARS_OUTPUT */ - pars_res_word_t* type); /*!< in: pointer to a type token */ -/*********************************************************************//** -Parses an elsif element. -@return elsif node */ -UNIV_INTERN -elsif_node_t* -pars_elsif_element( -/*===============*/ - que_node_t* cond, /*!< in: if-condition */ - que_node_t* stat_list); /*!< in: statement list */ -/*********************************************************************//** -Parses an if-statement. -@return if-statement node */ -UNIV_INTERN -if_node_t* -pars_if_statement( -/*==============*/ - que_node_t* cond, /*!< in: if-condition */ - que_node_t* stat_list, /*!< in: statement list */ - que_node_t* else_part); /*!< in: else-part statement list */ -/*********************************************************************//** -Parses a for-loop-statement. -@return for-statement node */ -UNIV_INTERN -for_node_t* -pars_for_statement( -/*===============*/ - sym_node_t* loop_var, /*!< in: loop variable */ - que_node_t* loop_start_limit,/*!< in: loop start expression */ - que_node_t* loop_end_limit, /*!< in: loop end expression */ - que_node_t* stat_list); /*!< in: statement list */ -/*********************************************************************//** -Parses a while-statement. -@return while-statement node */ -UNIV_INTERN -while_node_t* -pars_while_statement( -/*=================*/ - que_node_t* cond, /*!< in: while-condition */ - que_node_t* stat_list); /*!< in: statement list */ -/*********************************************************************//** -Parses an exit statement. -@return exit statement node */ -UNIV_INTERN -exit_node_t* -pars_exit_statement(void); -/*=====================*/ -/*********************************************************************//** -Parses a return-statement. -@return return-statement node */ -UNIV_INTERN -return_node_t* -pars_return_statement(void); -/*=======================*/ -/*********************************************************************//** -Parses a procedure call. -@return function node */ -UNIV_INTERN -func_node_t* -pars_procedure_call( -/*================*/ - que_node_t* res_word,/*!< in: procedure name reserved word */ - que_node_t* args); /*!< in: argument list */ -/*********************************************************************//** -Parses an assignment statement. -@return assignment statement node */ -UNIV_INTERN -assign_node_t* -pars_assignment_statement( -/*======================*/ - sym_node_t* var, /*!< in: variable to assign */ - que_node_t* val); /*!< in: value to assign */ -/*********************************************************************//** -Parses a fetch statement. into_list or user_func (but not both) must be -non-NULL. -@return fetch statement node */ -UNIV_INTERN -fetch_node_t* -pars_fetch_statement( -/*=================*/ - sym_node_t* cursor, /*!< in: cursor node */ - sym_node_t* into_list, /*!< in: variables to set, or NULL */ - sym_node_t* user_func); /*!< in: user function name, or NULL */ -/*********************************************************************//** -Parses an open or close cursor statement. -@return fetch statement node */ -UNIV_INTERN -open_node_t* -pars_open_statement( -/*================*/ - ulint type, /*!< in: ROW_SEL_OPEN_CURSOR - or ROW_SEL_CLOSE_CURSOR */ - sym_node_t* cursor); /*!< in: cursor node */ -/*********************************************************************//** -Parses a row_printf-statement. -@return row_printf-statement node */ -UNIV_INTERN -row_printf_node_t* -pars_row_printf_statement( -/*======================*/ - sel_node_t* sel_node); /*!< in: select node */ -/*********************************************************************//** -Parses a commit statement. -@return own: commit node struct */ -UNIV_INTERN -commit_node_t* -pars_commit_statement(void); -/*=======================*/ -/*********************************************************************//** -Parses a rollback statement. -@return own: rollback node struct */ -UNIV_INTERN -roll_node_t* -pars_rollback_statement(void); -/*=========================*/ -/*********************************************************************//** -Parses a column definition at a table creation. -@return column sym table node */ -UNIV_INTERN -sym_node_t* -pars_column_def( -/*============*/ - sym_node_t* sym_node, /*!< in: column node in the - symbol table */ - pars_res_word_t* type, /*!< in: data type */ - sym_node_t* len, /*!< in: length of column, or - NULL */ - void* is_unsigned, /*!< in: if not NULL, column - is of type UNSIGNED. */ - void* is_not_null); /*!< in: if not NULL, column - is of type NOT NULL. */ -/*********************************************************************//** -Parses a table creation operation. -@return table create subgraph */ -UNIV_INTERN -tab_node_t* -pars_create_table( -/*==============*/ - sym_node_t* table_sym, /*!< in: table name node in the symbol - table */ - sym_node_t* column_defs, /*!< in: list of column names */ - sym_node_t* compact, /* in: non-NULL if COMPACT table. */ - sym_node_t* block_size, /* in: block size (can be NULL) */ - void* not_fit_in_memory); - /*!< in: a non-NULL pointer means that - this is a table which in simulations - should be simulated as not fitting - in memory; thread is put to sleep - to simulate disk accesses; NOTE that - this flag is not stored to the data - dictionary on disk, and the database - will forget about non-NULL value if - it has to reload the table definition - from disk */ -/*********************************************************************//** -Parses an index creation operation. -@return index create subgraph */ -UNIV_INTERN -ind_node_t* -pars_create_index( -/*==============*/ - pars_res_word_t* unique_def, /*!< in: not NULL if a unique index */ - pars_res_word_t* clustered_def, /*!< in: not NULL if a clustered index */ - sym_node_t* index_sym, /*!< in: index name node in the symbol - table */ - sym_node_t* table_sym, /*!< in: table name node in the symbol - table */ - sym_node_t* column_list); /*!< in: list of column names */ -/*********************************************************************//** -Parses a procedure definition. -@return query fork node */ -UNIV_INTERN -que_fork_t* -pars_procedure_definition( -/*======================*/ - sym_node_t* sym_node, /*!< in: procedure id node in the symbol - table */ - sym_node_t* param_list, /*!< in: parameter declaration list */ - que_node_t* stat_list); /*!< in: statement list */ - -/*************************************************************//** -Parses a stored procedure call, when this is not within another stored -procedure, that is, the client issues a procedure call directly. -In MySQL/InnoDB, stored InnoDB procedures are invoked via the -parsed procedure tree, not via InnoDB SQL, so this function is not used. -@return query graph */ -UNIV_INTERN -que_fork_t* -pars_stored_procedure_call( -/*=======================*/ - sym_node_t* sym_node); /*!< in: stored procedure name */ -/******************************************************************//** -Completes a query graph by adding query thread and fork nodes -above it and prepares the graph for running. The fork created is of -type QUE_FORK_MYSQL_INTERFACE. -@return query thread node to run */ -UNIV_INTERN -que_thr_t* -pars_complete_graph_for_exec( -/*=========================*/ - que_node_t* node, /*!< in: root node for an incomplete - query graph, or NULL for dummy graph */ - trx_t* trx, /*!< in: transaction handle */ - mem_heap_t* heap) /*!< in: memory heap from which allocated */ - MY_ATTRIBUTE((nonnull(2,3), warn_unused_result)); - -/****************************************************************//** -Create parser info struct. -@return own: info struct */ -UNIV_INTERN -pars_info_t* -pars_info_create(void); -/*==================*/ - -/****************************************************************//** -Free info struct and everything it contains. */ -UNIV_INTERN -void -pars_info_free( -/*===========*/ - pars_info_t* info); /*!< in, own: info struct */ - -/****************************************************************//** -Add bound literal. */ -UNIV_INTERN -void -pars_info_add_literal( -/*==================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const void* address, /*!< in: address */ - ulint length, /*!< in: length of data */ - ulint type, /*!< in: type, e.g. DATA_FIXBINARY */ - ulint prtype); /*!< in: precise type, e.g. - DATA_UNSIGNED */ - -/****************************************************************//** -Equivalent to pars_info_add_literal(info, name, str, strlen(str), -DATA_VARCHAR, DATA_ENGLISH). */ -UNIV_INTERN -void -pars_info_add_str_literal( -/*======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const char* str); /*!< in: string */ -/******************************************************************** -If the literal value already exists then it rebinds otherwise it -creates a new entry.*/ -UNIV_INTERN -void -pars_info_bind_literal( -/*===================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const void* address, /* in: address */ - ulint length, /* in: length of data */ - ulint type, /* in: type, e.g. DATA_FIXBINARY */ - ulint prtype); /* in: precise type, e.g. */ -/******************************************************************** -If the literal value already exists then it rebinds otherwise it -creates a new entry.*/ -UNIV_INTERN -void -pars_info_bind_varchar_literal( -/*===========================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const byte* str, /*!< in: string */ - ulint str_len); /*!< in: string length */ -/****************************************************************//** -Equivalent to: - -char buf[4]; -mach_write_to_4(buf, val); -pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ -UNIV_INTERN -void -pars_info_bind_int4_literal( -/*=======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const ib_uint32_t* val); /*!< in: value */ -/******************************************************************** -If the literal value already exists then it rebinds otherwise it -creates a new entry. */ -UNIV_INTERN -void -pars_info_bind_int8_literal( -/*=======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const ib_uint64_t* val); /*!< in: value */ -/****************************************************************//** -Add user function. */ -UNIV_INTERN -void -pars_info_bind_function( -/*===================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: function name */ - pars_user_func_cb_t func, /*!< in: function address */ - void* arg); /*!< in: user-supplied argument */ -/****************************************************************//** -Add bound id. */ -UNIV_INTERN -void -pars_info_bind_id( -/*=============*/ - pars_info_t* info, /*!< in: info struct */ - ibool copy_name,/* in: make a copy of name if TRUE */ - const char* name, /*!< in: name */ - const char* id); /*!< in: id */ -/****************************************************************//** -Equivalent to: - -char buf[4]; -mach_write_to_4(buf, val); -pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ -UNIV_INTERN -void -pars_info_add_int4_literal( -/*=======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - lint val); /*!< in: value */ - -/****************************************************************//** -Equivalent to: - -char buf[8]; -mach_write_to_8(buf, val); -pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ -UNIV_INTERN -void -pars_info_add_ull_literal( -/*======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - ib_uint64_t val); /*!< in: value */ - -/****************************************************************//** -If the literal value already exists then it rebinds otherwise it -creates a new entry. */ -UNIV_INTERN -void -pars_info_bind_ull_literal( -/*=======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const ib_uint64_t* val) /*!< in: value */ - MY_ATTRIBUTE((nonnull)); - -/****************************************************************//** -Add bound id. */ -UNIV_INTERN -void -pars_info_add_id( -/*=============*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const char* id); /*!< in: id */ - -/****************************************************************//** -Get bound literal with the given name. -@return bound literal, or NULL if not found */ -UNIV_INTERN -pars_bound_lit_t* -pars_info_get_bound_lit( -/*====================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name); /*!< in: bound literal name to find */ - -/****************************************************************//** -Get bound id with the given name. -@return bound id, or NULL if not found */ -UNIV_INTERN -pars_bound_id_t* -pars_info_get_bound_id( -/*===================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name); /*!< in: bound id name to find */ - -/******************************************************************//** -Release any resources used by the lexer. */ -UNIV_INTERN -void -pars_lexer_close(void); -/*==================*/ - -/** Extra information supplied for pars_sql(). */ -struct pars_info_t { - mem_heap_t* heap; /*!< our own memory heap */ - - ib_vector_t* funcs; /*!< user functions, or NUll - (pars_user_func_t*) */ - ib_vector_t* bound_lits; /*!< bound literals, or NULL - (pars_bound_lit_t*) */ - ib_vector_t* bound_ids; /*!< bound ids, or NULL - (pars_bound_id_t*) */ - - ibool graph_owns_us; /*!< if TRUE (which is the default), - que_graph_free() will free us */ -}; - -/** User-supplied function and argument. */ -struct pars_user_func_t { - const char* name; /*!< function name */ - pars_user_func_cb_t func; /*!< function address */ - void* arg; /*!< user-supplied argument */ -}; - -/** Bound literal. */ -struct pars_bound_lit_t { - const char* name; /*!< name */ - const void* address; /*!< address */ - ulint length; /*!< length of data */ - ulint type; /*!< type, e.g. DATA_FIXBINARY */ - ulint prtype; /*!< precise type, e.g. DATA_UNSIGNED */ - sym_node_t* node; /*!< symbol node */ -}; - -/** Bound identifier. */ -struct pars_bound_id_t { - const char* name; /*!< name */ - const char* id; /*!< identifier */ -}; - -/** Struct used to denote a reserved word in a parsing tree */ -struct pars_res_word_t{ - int code; /*!< the token code for the reserved word from - pars0grm.h */ -}; - -/** A predefined function or operator node in a parsing tree; this construct -is also used for some non-functions like the assignment ':=' */ -struct func_node_t{ - que_common_t common; /*!< type: QUE_NODE_FUNC */ - int func; /*!< token code of the function name */ - ulint fclass; /*!< class of the function */ - que_node_t* args; /*!< argument(s) of the function */ - UT_LIST_NODE_T(func_node_t) cond_list; - /*!< list of comparison conditions; defined - only for comparison operator nodes except, - presently, for OPT_SCROLL_TYPE ones */ - UT_LIST_NODE_T(func_node_t) func_node_list; - /*!< list of function nodes in a parsed - query graph */ -}; - -/** An order-by node in a select */ -struct order_node_t{ - que_common_t common; /*!< type: QUE_NODE_ORDER */ - sym_node_t* column; /*!< order-by column */ - ibool asc; /*!< TRUE if ascending, FALSE if descending */ -}; - -/** Procedure definition node */ -struct proc_node_t{ - que_common_t common; /*!< type: QUE_NODE_PROC */ - sym_node_t* proc_id; /*!< procedure name symbol in the symbol - table of this same procedure */ - sym_node_t* param_list; /*!< input and output parameters */ - que_node_t* stat_list; /*!< statement list */ - sym_tab_t* sym_tab; /*!< symbol table of this procedure */ -}; - -/** elsif-element node */ -struct elsif_node_t{ - que_common_t common; /*!< type: QUE_NODE_ELSIF */ - que_node_t* cond; /*!< if condition */ - que_node_t* stat_list; /*!< statement list */ -}; - -/** if-statement node */ -struct if_node_t{ - que_common_t common; /*!< type: QUE_NODE_IF */ - que_node_t* cond; /*!< if condition */ - que_node_t* stat_list; /*!< statement list */ - que_node_t* else_part; /*!< else-part statement list */ - elsif_node_t* elsif_list; /*!< elsif element list */ -}; - -/** while-statement node */ -struct while_node_t{ - que_common_t common; /*!< type: QUE_NODE_WHILE */ - que_node_t* cond; /*!< while condition */ - que_node_t* stat_list; /*!< statement list */ -}; - -/** for-loop-statement node */ -struct for_node_t{ - que_common_t common; /*!< type: QUE_NODE_FOR */ - sym_node_t* loop_var; /*!< loop variable: this is the - dereferenced symbol from the - variable declarations, not the - symbol occurrence in the for loop - definition */ - que_node_t* loop_start_limit;/*!< initial value of loop variable */ - que_node_t* loop_end_limit; /*!< end value of loop variable */ - lint loop_end_value; /*!< evaluated value for the end value: - it is calculated only when the loop - is entered, and will not change within - the loop */ - que_node_t* stat_list; /*!< statement list */ -}; - -/** exit statement node */ -struct exit_node_t{ - que_common_t common; /*!< type: QUE_NODE_EXIT */ -}; - -/** return-statement node */ -struct return_node_t{ - que_common_t common; /*!< type: QUE_NODE_RETURN */ -}; - -/** Assignment statement node */ -struct assign_node_t{ - que_common_t common; /*!< type: QUE_NODE_ASSIGNMENT */ - sym_node_t* var; /*!< variable to set */ - que_node_t* val; /*!< value to assign */ -}; - -/** Column assignment node */ -struct col_assign_node_t{ - que_common_t common; /*!< type: QUE_NODE_COL_ASSIGN */ - sym_node_t* col; /*!< column to set */ - que_node_t* val; /*!< value to assign */ -}; - -/** Classes of functions */ -/* @{ */ -#define PARS_FUNC_ARITH 1 /*!< +, -, *, / */ -#define PARS_FUNC_LOGICAL 2 /*!< AND, OR, NOT */ -#define PARS_FUNC_CMP 3 /*!< comparison operators */ -#define PARS_FUNC_PREDEFINED 4 /*!< TO_NUMBER, SUBSTR, ... */ -#define PARS_FUNC_AGGREGATE 5 /*!< COUNT, DISTINCT, SUM */ -#define PARS_FUNC_OTHER 6 /*!< these are not real functions, - e.g., := */ -/* @} */ - -#ifndef UNIV_NONINL -#include "pars0pars.ic" -#endif - -#endif diff --git a/storage/xtradb/include/pars0pars.ic b/storage/xtradb/include/pars0pars.ic deleted file mode 100644 index 4c88337a265..00000000000 --- a/storage/xtradb/include/pars0pars.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/pars0pars.ic -SQL parser - -Created 11/19/1996 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/pars0sym.h b/storage/xtradb/include/pars0sym.h deleted file mode 100644 index bcf73639228..00000000000 --- a/storage/xtradb/include/pars0sym.h +++ /dev/null @@ -1,258 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/pars0sym.h -SQL parser symbol table - -Created 12/15/1997 Heikki Tuuri -*******************************************************/ - -#ifndef pars0sym_h -#define pars0sym_h - -#include "univ.i" -#include "que0types.h" -#include "usr0types.h" -#include "dict0types.h" -#include "pars0types.h" -#include "row0types.h" - -/******************************************************************//** -Creates a symbol table for a single stored procedure or query. -@return own: symbol table */ -UNIV_INTERN -sym_tab_t* -sym_tab_create( -/*===========*/ - mem_heap_t* heap); /*!< in: memory heap where to create */ -/******************************************************************//** -Frees the memory allocated dynamically AFTER parsing phase for variables -etc. in the symbol table. Does not free the mem heap where the table was -originally created. Frees also SQL explicit cursor definitions. */ -UNIV_INTERN -void -sym_tab_free_private( -/*=================*/ - sym_tab_t* sym_tab); /*!< in, own: symbol table */ -/******************************************************************//** -Adds an integer literal to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_int_lit( -/*================*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - ulint val); /*!< in: integer value */ -/******************************************************************//** -Adds an string literal to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_str_lit( -/*================*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - const byte* str, /*!< in: string with no quotes around - it */ - ulint len); /*!< in: string length */ -/******************************************************************//** -Add a bound literal to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_bound_lit( -/*==================*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - const char* name, /*!< in: name of bound literal */ - ulint* lit_type); /*!< out: type of literal (PARS_*_LIT) */ -/********************************************************************** -Rebind literal to a node in the symbol table. */ - -sym_node_t* -sym_tab_rebind_lit( -/*===============*/ - /* out: symbol table node */ - sym_node_t* node, /* in: node that is bound to literal*/ - const void* address, /* in: pointer to data */ - ulint length); /* in: length of data */ -/******************************************************************//** -Adds an SQL null literal to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_null_lit( -/*=================*/ - sym_tab_t* sym_tab); /*!< in: symbol table */ -/******************************************************************//** -Adds an identifier to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_id( -/*===========*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - byte* name, /*!< in: identifier name */ - ulint len); /*!< in: identifier length */ - -/******************************************************************//** -Add a bound identifier to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_bound_id( -/*===========*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - const char* name); /*!< in: name of bound id */ - -/** Index of sym_node_t::field_nos corresponding to the clustered index */ -#define SYM_CLUST_FIELD_NO 0 -/** Index of sym_node_t::field_nos corresponding to a secondary index */ -#define SYM_SEC_FIELD_NO 1 - -/** Types of a symbol table node */ -enum sym_tab_entry { - SYM_UNSET, /*!< Unset entry. */ - SYM_VAR = 91, /*!< declared parameter or local - variable of a procedure */ - SYM_IMPLICIT_VAR, /*!< storage for a intermediate result - of a calculation */ - SYM_LIT, /*!< literal */ - SYM_TABLE_REF_COUNTED, /*!< database table name, ref counted. Must - be closed explicitly. */ - SYM_TABLE, /*!< database table name */ - SYM_COLUMN, /*!< database table name */ - SYM_CURSOR, /*!< named cursor */ - SYM_PROCEDURE_NAME, /*!< stored procedure name */ - SYM_INDEX, /*!< database index name */ - SYM_FUNCTION /*!< user function name */ -}; - -/** Symbol table node */ -struct sym_node_t{ - que_common_t common; /*!< node type: - QUE_NODE_SYMBOL */ - /* NOTE: if the data field in 'common.val' is not NULL and the symbol - table node is not for a temporary column, the memory for the value has - been allocated from dynamic memory and it should be freed when the - symbol table is discarded */ - - /* 'alias' and 'indirection' are almost the same, but not quite. - 'alias' always points to the primary instance of the variable, while - 'indirection' does the same only if we should use the primary - instance's values for the node's data. This is usually the case, but - when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM - t WHERE id = x;"), we copy the values from the primary instance to - the cursor's instance so that they are fixed for the duration of the - cursor, and set 'indirection' to NULL. If we did not, the value of - 'x' could change between fetches and things would break horribly. - - TODO: It would be cleaner to make 'indirection' a boolean field and - always use 'alias' to refer to the primary node. */ - - sym_node_t* indirection; /*!< pointer to - another symbol table - node which contains - the value for this - node, NULL otherwise */ - sym_node_t* alias; /*!< pointer to - another symbol table - node for which this - node is an alias, - NULL otherwise */ - UT_LIST_NODE_T(sym_node_t) col_var_list; /*!< list of table - columns or a list of - input variables for an - explicit cursor */ - ibool copy_val; /*!< TRUE if a column - and its value should - be copied to dynamic - memory when fetched */ - ulint field_nos[2]; /*!< if a column, in - the position - SYM_CLUST_FIELD_NO is - the field number in the - clustered index; in - the position - SYM_SEC_FIELD_NO - the field number in the - non-clustered index to - use first; if not found - from the index, then - ULINT_UNDEFINED */ - ibool resolved; /*!< TRUE if the - meaning of a variable - or a column has been - resolved; for literals - this is always TRUE */ - enum sym_tab_entry token_type; /*!< type of the - parsed token */ - const char* name; /*!< name of an id */ - ulint name_len; /*!< id name length */ - dict_table_t* table; /*!< table definition - if a table id or a - column id */ - ulint col_no; /*!< column number if a - column */ - sel_buf_t* prefetch_buf; /*!< NULL, or a buffer - for cached column - values for prefetched - rows */ - sel_node_t* cursor_def; /*!< cursor definition - select node if a - named cursor */ - ulint param_type; /*!< PARS_INPUT, - PARS_OUTPUT, or - PARS_NOT_PARAM if not a - procedure parameter */ - sym_tab_t* sym_table; /*!< back pointer to - the symbol table */ - UT_LIST_NODE_T(sym_node_t) sym_list; /*!< list of symbol - nodes */ - sym_node_t* like_node; /* LIKE operator node*/ -}; - -/** Symbol table */ -struct sym_tab_t{ - que_t* query_graph; - /*!< query graph generated by the - parser */ - const char* sql_string; - /*!< SQL string to parse */ - size_t string_len; - /*!< SQL string length */ - int next_char_pos; - /*!< position of the next character in - sql_string to give to the lexical - analyzer */ - pars_info_t* info; /*!< extra information, or NULL */ - sym_node_list_t sym_list; - /*!< list of symbol nodes in the symbol - table */ - UT_LIST_BASE_NODE_T(func_node_t) - func_node_list; - /*!< list of function nodes in the - parsed query graph */ - mem_heap_t* heap; /*!< memory heap from which we can - allocate space */ -}; - -#ifndef UNIV_NONINL -#include "pars0sym.ic" -#endif - -#endif diff --git a/storage/xtradb/include/pars0sym.ic b/storage/xtradb/include/pars0sym.ic deleted file mode 100644 index 266c1a6310d..00000000000 --- a/storage/xtradb/include/pars0sym.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/pars0sym.ic -SQL parser symbol table - -Created 12/15/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/pars0types.h b/storage/xtradb/include/pars0types.h deleted file mode 100644 index 47f4b432d20..00000000000 --- a/storage/xtradb/include/pars0types.h +++ /dev/null @@ -1,50 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/pars0types.h -SQL parser global types - -Created 1/11/1998 Heikki Tuuri -*******************************************************/ - -#ifndef pars0types_h -#define pars0types_h - -struct pars_info_t; -struct pars_user_func_t; -struct pars_bound_lit_t; -struct pars_bound_id_t; -struct sym_node_t; -struct sym_tab_t; -struct pars_res_word_t; -struct func_node_t; -struct order_node_t; -struct proc_node_t; -struct elsif_node_t; -struct if_node_t; -struct while_node_t; -struct for_node_t; -struct exit_node_t; -struct return_node_t; -struct assign_node_t; -struct col_assign_node_t; - -typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t; - -#endif diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h deleted file mode 100644 index e5b2a1ba3fc..00000000000 --- a/storage/xtradb/include/que0que.h +++ /dev/null @@ -1,531 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/que0que.h -Query graph - -Created 5/27/1996 Heikki Tuuri -*******************************************************/ - -#ifndef que0que_h -#define que0que_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "btr0sea.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "srv0srv.h" -#include "usr0types.h" -#include "que0types.h" -#include "row0types.h" -#include "pars0types.h" - -/* If the following flag is set TRUE, the module will print trace info -of SQL execution in the UNIV_SQL_DEBUG version */ -extern ibool que_trace_on; - -/** Mutex protecting the query threads. */ -extern ib_mutex_t que_thr_mutex; - -/***********************************************************************//** -Creates a query graph fork node. -@return own: fork node */ -UNIV_INTERN -que_fork_t* -que_fork_create( -/*============*/ - que_t* graph, /*!< in: graph, if NULL then this - fork node is assumed to be the - graph root */ - que_node_t* parent, /*!< in: parent node */ - ulint fork_type, /*!< in: fork type */ - mem_heap_t* heap); /*!< in: memory heap where created */ -/***********************************************************************//** -Gets the first thr in a fork. */ -UNIV_INLINE -que_thr_t* -que_fork_get_first_thr( -/*===================*/ - que_fork_t* fork); /*!< in: query fork */ -/***********************************************************************//** -Gets the child node of the first thr in a fork. */ -UNIV_INLINE -que_node_t* -que_fork_get_child( -/*===============*/ - que_fork_t* fork); /*!< in: query fork */ -/***********************************************************************//** -Sets the parent of a graph node. */ -UNIV_INLINE -void -que_node_set_parent( -/*================*/ - que_node_t* node, /*!< in: graph node */ - que_node_t* parent);/*!< in: parent */ -/***********************************************************************//** -Creates a query graph thread node. -@return own: query thread node */ -UNIV_INTERN -que_thr_t* -que_thr_create( -/*===========*/ - que_fork_t* parent, /*!< in: parent node, i.e., a fork node */ - mem_heap_t* heap); /*!< in: memory heap where created */ -/**********************************************************************//** -Frees a query graph, but not the heap where it was created. Does not free -explicit cursor declarations, they are freed in que_graph_free. */ -UNIV_INTERN -void -que_graph_free_recursive( -/*=====================*/ - que_node_t* node); /*!< in: query graph node */ -/**********************************************************************//** -Frees a query graph. */ -UNIV_INTERN -void -que_graph_free( -/*===========*/ - que_t* graph); /*!< in: query graph; we assume that the memory - heap where this graph was created is private - to this graph: if not, then use - que_graph_free_recursive and free the heap - afterwards! */ -/**********************************************************************//** -Stops a query thread if graph or trx is in a state requiring it. The -conditions are tested in the order (1) graph, (2) trx. The lock_sys_t::mutex -has to be reserved. -@return TRUE if stopped */ -UNIV_INTERN -ibool -que_thr_stop( -/*=========*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Moves a thread from another state to the QUE_THR_RUNNING state. Increments -the n_active_thrs counters of the query graph and transaction. */ -UNIV_INTERN -void -que_thr_move_to_run_state_for_mysql( -/*================================*/ - que_thr_t* thr, /*!< in: an query thread */ - trx_t* trx); /*!< in: transaction */ -/**********************************************************************//** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL -select, when there is no error or lock wait. */ -UNIV_INTERN -void -que_thr_stop_for_mysql_no_error( -/*============================*/ - que_thr_t* thr, /*!< in: query thread */ - trx_t* trx); /*!< in: transaction */ -/**********************************************************************//** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The -query thread is stopped and made inactive, except in the case where -it was put to the lock wait state in lock0lock.cc, but the lock has already -been granted or the transaction chosen as a victim in deadlock resolution. */ -UNIV_INTERN -void -que_thr_stop_for_mysql( -/*===================*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Run a query thread. Handles lock waits. */ -UNIV_INTERN -void -que_run_threads( -/*============*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Moves a suspended query thread to the QUE_THR_RUNNING state and release -a worker thread to execute it. This function should be used to end -the wait state of a query thread waiting for a lock or a stored procedure -completion. -@return query thread instance of thread to wakeup or NULL */ -UNIV_INTERN -que_thr_t* -que_thr_end_lock_wait( -/*==================*/ - trx_t* trx); /*!< in: transaction in the - QUE_THR_LOCK_WAIT state */ -/**********************************************************************//** -Starts execution of a command in a query fork. Picks a query thread which -is not in the QUE_THR_RUNNING state and moves it to that state. If none -can be chosen, a situation which may arise in parallelized fetches, NULL -is returned. -@return a query thread of the graph moved to QUE_THR_RUNNING state, or -NULL; the query thread should be executed by que_run_threads by the -caller */ -UNIV_INTERN -que_thr_t* -que_fork_start_command( -/*===================*/ - que_fork_t* fork); /*!< in: a query fork */ -/***********************************************************************//** -Gets the trx of a query thread. */ -UNIV_INLINE -trx_t* -thr_get_trx( -/*========*/ - que_thr_t* thr); /*!< in: query thread */ -/*******************************************************************//** -Determines if this thread is rolling back an incomplete transaction -in crash recovery. -@return TRUE if thr is rolling back an incomplete transaction in crash -recovery */ -UNIV_INLINE -ibool -thr_is_recv( -/*========*/ - const que_thr_t* thr); /*!< in: query thread */ -/***********************************************************************//** -Gets the type of a graph node. */ -UNIV_INLINE -ulint -que_node_get_type( -/*==============*/ - que_node_t* node); /*!< in: graph node */ -/***********************************************************************//** -Gets pointer to the value data type field of a graph node. */ -UNIV_INLINE -dtype_t* -que_node_get_data_type( -/*===================*/ - que_node_t* node); /*!< in: graph node */ -/***********************************************************************//** -Gets pointer to the value dfield of a graph node. */ -UNIV_INLINE -dfield_t* -que_node_get_val( -/*=============*/ - que_node_t* node); /*!< in: graph node */ -/***********************************************************************//** -Gets the value buffer size of a graph node. -@return val buffer size, not defined if val.data == NULL in node */ -UNIV_INLINE -ulint -que_node_get_val_buf_size( -/*======================*/ - que_node_t* node); /*!< in: graph node */ -/***********************************************************************//** -Sets the value buffer size of a graph node. */ -UNIV_INLINE -void -que_node_set_val_buf_size( -/*======================*/ - que_node_t* node, /*!< in: graph node */ - ulint size); /*!< in: size */ -/*********************************************************************//** -Gets the next list node in a list of query graph nodes. */ -UNIV_INLINE -que_node_t* -que_node_get_next( -/*==============*/ - que_node_t* node); /*!< in: node in a list */ -/*********************************************************************//** -Gets the parent node of a query graph node. -@return parent node or NULL */ -UNIV_INLINE -que_node_t* -que_node_get_parent( -/*================*/ - que_node_t* node); /*!< in: node */ -/****************************************************************//** -Get the first containing loop node (e.g. while_node_t or for_node_t) for the -given node, or NULL if the node is not within a loop. -@return containing loop node, or NULL. */ -UNIV_INTERN -que_node_t* -que_node_get_containing_loop_node( -/*==============================*/ - que_node_t* node); /*!< in: node */ -/*********************************************************************//** -Catenates a query graph node to a list of them, possible empty list. -@return one-way list of nodes */ -UNIV_INLINE -que_node_t* -que_node_list_add_last( -/*===================*/ - que_node_t* node_list, /*!< in: node list, or NULL */ - que_node_t* node); /*!< in: node */ -/************************************************************************* -Get the last node from the list.*/ -UNIV_INLINE -que_node_t* -que_node_list_get_last( -/*===================*/ - /* out: node last node from list.*/ - que_node_t* node_list); /* in: node list, or NULL */ -/*********************************************************************//** -Gets a query graph node list length. -@return length, for NULL list 0 */ -UNIV_INLINE -ulint -que_node_list_get_len( -/*==================*/ - que_node_t* node_list); /*!< in: node list, or NULL */ -/**********************************************************************//** -Checks if graph, trx, or session is in a state where the query thread should -be stopped. -@return TRUE if should be stopped; NOTE that if the peek is made -without reserving the trx_t::mutex, then another peek with the mutex -reserved is necessary before deciding the actual stopping */ -UNIV_INLINE -ibool -que_thr_peek_stop( -/*==============*/ - que_thr_t* thr); /*!< in: query thread */ -/***********************************************************************//** -Returns TRUE if the query graph is for a SELECT statement. -@return TRUE if a select */ -UNIV_INLINE -ibool -que_graph_is_select( -/*================*/ - que_t* graph); /*!< in: graph */ -/**********************************************************************//** -Prints info of an SQL query graph node. */ -UNIV_INTERN -void -que_node_print_info( -/*================*/ - que_node_t* node); /*!< in: query graph node */ -/*********************************************************************//** -Evaluate the given SQL -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -que_eval_sql( -/*=========*/ - pars_info_t* info, /*!< in: info struct, or NULL */ - const char* sql, /*!< in: SQL string */ - ibool reserve_dict_mutex, - /*!< in: if TRUE, acquire/release - dict_sys->mutex around call to pars_sql. */ - trx_t* trx); /*!< in: trx */ - -/**********************************************************************//** -Round robin scheduler. -@return a query thread of the graph moved to QUE_THR_RUNNING state, or -NULL; the query thread should be executed by que_run_threads by the -caller */ -UNIV_INTERN -que_thr_t* -que_fork_scheduler_round_robin( -/*===========================*/ - que_fork_t* fork, /*!< in: a query fork */ - que_thr_t* thr); /*!< in: current pos */ - -/*********************************************************************//** -Initialise the query sub-system. */ -UNIV_INTERN -void -que_init(void); -/*==========*/ - -/*********************************************************************//** -Close the query sub-system. */ -UNIV_INTERN -void -que_close(void); -/*===========*/ - -/* Query graph query thread node: the fields are protected by the -trx_t::mutex with the exceptions named below */ - -struct que_thr_t{ - que_common_t common; /*!< type: QUE_NODE_THR */ - ulint magic_n; /*!< magic number to catch memory - corruption */ - que_node_t* child; /*!< graph child node */ - que_t* graph; /*!< graph where this node belongs */ - ulint state; /*!< state of the query thread */ - ibool is_active; /*!< TRUE if the thread has been set - to the run state in - que_thr_move_to_run_state, but not - deactivated in - que_thr_dec_reference_count */ - /*------------------------------*/ - /* The following fields are private to the OS thread executing the - query thread, and are not protected by any mutex: */ - - que_node_t* run_node; /*!< pointer to the node where the - subgraph down from this node is - currently executed */ - que_node_t* prev_node; /*!< pointer to the node from which - the control came */ - ulint resource; /*!< resource usage of the query thread - thus far */ - ulint lock_state; /*!< lock state of thread (table or - row) */ - struct srv_slot_t* - slot; /* The thread slot in the wait - array in srv_sys_t */ - /*------------------------------*/ - /* The following fields are links for the various lists that - this type can be on. */ - UT_LIST_NODE_T(que_thr_t) - thrs; /*!< list of thread nodes of the fork - node */ - UT_LIST_NODE_T(que_thr_t) - trx_thrs; /*!< lists of threads in wait list of - the trx */ - UT_LIST_NODE_T(que_thr_t) - queue; /*!< list of runnable thread nodes in - the server task queue */ - ulint fk_cascade_depth; /*!< maximum cascading call depth - supported for foreign key constraint - related delete/updates */ -}; - -#define QUE_THR_MAGIC_N 8476583 -#define QUE_THR_MAGIC_FREED 123461526 - -/* Query graph fork node: its fields are protected by the query thread mutex */ -struct que_fork_t{ - que_common_t common; /*!< type: QUE_NODE_FORK */ - que_t* graph; /*!< query graph of this node */ - ulint fork_type; /*!< fork type */ - ulint n_active_thrs; /*!< if this is the root of a graph, the - number query threads that have been - started in que_thr_move_to_run_state - but for which que_thr_dec_refer_count - has not yet been called */ - trx_t* trx; /*!< transaction: this is set only in - the root node */ - ulint state; /*!< state of the fork node */ - que_thr_t* caller; /*!< pointer to a possible calling query - thread */ - UT_LIST_BASE_NODE_T(que_thr_t) - thrs; /*!< list of query threads */ - /*------------------------------*/ - /* The fields in this section are defined only in the root node */ - sym_tab_t* sym_tab; /*!< symbol table of the query, - generated by the parser, or NULL - if the graph was created 'by hand' */ - pars_info_t* info; /*!< info struct, or NULL */ - /* The following cur_... fields are relevant only in a select graph */ - - ulint cur_end; /*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START, - QUE_CUR_END */ - ulint cur_pos; /*!< if there are n rows in the result - set, values 0 and n + 1 mean before - first row, or after last row, depending - on cur_end; values 1...n mean a row - index */ - ibool cur_on_row; /*!< TRUE if cursor is on a row, i.e., - it is not before the first row or - after the last row */ - sel_node_t* last_sel_node; /*!< last executed select node, or NULL - if none */ - UT_LIST_NODE_T(que_fork_t) - graphs; /*!< list of query graphs of a session - or a stored procedure */ - /*------------------------------*/ - mem_heap_t* heap; /*!< memory heap where the fork was - created */ - -}; - -/* Query fork (or graph) types */ -#define QUE_FORK_SELECT_NON_SCROLL 1 /* forward-only cursor */ -#define QUE_FORK_SELECT_SCROLL 2 /* scrollable cursor */ -#define QUE_FORK_INSERT 3 -#define QUE_FORK_UPDATE 4 -#define QUE_FORK_ROLLBACK 5 - /* This is really the undo graph used in rollback, - no signal-sending roll_node in this graph */ -#define QUE_FORK_PURGE 6 -#define QUE_FORK_EXECUTE 7 -#define QUE_FORK_PROCEDURE 8 -#define QUE_FORK_PROCEDURE_CALL 9 -#define QUE_FORK_MYSQL_INTERFACE 10 -#define QUE_FORK_RECOVERY 11 - -/* Query fork (or graph) states */ -#define QUE_FORK_ACTIVE 1 -#define QUE_FORK_COMMAND_WAIT 2 -#define QUE_FORK_INVALID 3 -#define QUE_FORK_BEING_FREED 4 - -/* Flag which is ORed to control structure statement node types */ -#define QUE_NODE_CONTROL_STAT 1024 - -/* Query graph node types */ -#define QUE_NODE_LOCK 1 -#define QUE_NODE_INSERT 2 -#define QUE_NODE_UPDATE 4 -#define QUE_NODE_CURSOR 5 -#define QUE_NODE_SELECT 6 -#define QUE_NODE_AGGREGATE 7 -#define QUE_NODE_FORK 8 -#define QUE_NODE_THR 9 -#define QUE_NODE_UNDO 10 -#define QUE_NODE_COMMIT 11 -#define QUE_NODE_ROLLBACK 12 -#define QUE_NODE_PURGE 13 -#define QUE_NODE_CREATE_TABLE 14 -#define QUE_NODE_CREATE_INDEX 15 -#define QUE_NODE_SYMBOL 16 -#define QUE_NODE_RES_WORD 17 -#define QUE_NODE_FUNC 18 -#define QUE_NODE_ORDER 19 -#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT) -#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT) -#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT) -#define QUE_NODE_ASSIGNMENT 23 -#define QUE_NODE_FETCH 24 -#define QUE_NODE_OPEN 25 -#define QUE_NODE_COL_ASSIGNMENT 26 -#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT) -#define QUE_NODE_RETURN 28 -#define QUE_NODE_ROW_PRINTF 29 -#define QUE_NODE_ELSIF 30 -#define QUE_NODE_CALL 31 -#define QUE_NODE_EXIT 32 - -/* Query thread states */ -#define QUE_THR_RUNNING 1 -#define QUE_THR_PROCEDURE_WAIT 2 -#define QUE_THR_COMPLETED 3 /* in selects this means that the - thread is at the end of its result set - (or start, in case of a scroll cursor); - in other statements, this means the - thread has done its task */ -#define QUE_THR_COMMAND_WAIT 4 -#define QUE_THR_LOCK_WAIT 5 -#define QUE_THR_SUSPENDED 7 -#define QUE_THR_ERROR 8 - -/* Query thread lock states */ -#define QUE_THR_LOCK_NOLOCK 0 -#define QUE_THR_LOCK_ROW 1 -#define QUE_THR_LOCK_TABLE 2 - -/* From where the cursor position is counted */ -#define QUE_CUR_NOT_DEFINED 1 -#define QUE_CUR_START 2 -#define QUE_CUR_END 3 - -#ifndef UNIV_NONINL -#include "que0que.ic" -#endif - -#endif diff --git a/storage/xtradb/include/que0que.ic b/storage/xtradb/include/que0que.ic deleted file mode 100644 index eff5a86d958..00000000000 --- a/storage/xtradb/include/que0que.ic +++ /dev/null @@ -1,309 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/que0que.ic -Query graph - -Created 5/27/1996 Heikki Tuuri -*******************************************************/ - -#include "usr0sess.h" - -/***********************************************************************//** -Gets the trx of a query thread. */ -UNIV_INLINE -trx_t* -thr_get_trx( -/*========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - ut_ad(thr); - - return(thr->graph->trx); -} - -/*******************************************************************//** -Determines if this thread is rolling back an incomplete transaction -in crash recovery. -@return TRUE if thr is rolling back an incomplete transaction in crash -recovery */ -UNIV_INLINE -ibool -thr_is_recv( -/*========*/ - const que_thr_t* thr) /*!< in: query thread */ -{ - return(trx_is_recv(thr->graph->trx)); -} - -/***********************************************************************//** -Gets the first thr in a fork. */ -UNIV_INLINE -que_thr_t* -que_fork_get_first_thr( -/*===================*/ - que_fork_t* fork) /*!< in: query fork */ -{ - return(UT_LIST_GET_FIRST(fork->thrs)); -} - -/***********************************************************************//** -Gets the child node of the first thr in a fork. */ -UNIV_INLINE -que_node_t* -que_fork_get_child( -/*===============*/ - que_fork_t* fork) /*!< in: query fork */ -{ - que_thr_t* thr; - - thr = UT_LIST_GET_FIRST(fork->thrs); - - return(thr->child); -} - -/***********************************************************************//** -Gets the type of a graph node. */ -UNIV_INLINE -ulint -que_node_get_type( -/*==============*/ - que_node_t* node) /*!< in: graph node */ -{ - ut_ad(node); - - return(((que_common_t*) node)->type); -} - -/***********************************************************************//** -Gets pointer to the value dfield of a graph node. */ -UNIV_INLINE -dfield_t* -que_node_get_val( -/*=============*/ - que_node_t* node) /*!< in: graph node */ -{ - ut_ad(node); - - return(&(((que_common_t*) node)->val)); -} - -/***********************************************************************//** -Gets the value buffer size of a graph node. -@return val buffer size, not defined if val.data == NULL in node */ -UNIV_INLINE -ulint -que_node_get_val_buf_size( -/*======================*/ - que_node_t* node) /*!< in: graph node */ -{ - ut_ad(node); - - return(((que_common_t*) node)->val_buf_size); -} - -/***********************************************************************//** -Sets the value buffer size of a graph node. */ -UNIV_INLINE -void -que_node_set_val_buf_size( -/*======================*/ - que_node_t* node, /*!< in: graph node */ - ulint size) /*!< in: size */ -{ - ut_ad(node); - - ((que_common_t*) node)->val_buf_size = size; -} - -/***********************************************************************//** -Sets the parent of a graph node. */ -UNIV_INLINE -void -que_node_set_parent( -/*================*/ - que_node_t* node, /*!< in: graph node */ - que_node_t* parent) /*!< in: parent */ -{ - ut_ad(node); - - ((que_common_t*) node)->parent = parent; -} - -/***********************************************************************//** -Gets pointer to the value data type field of a graph node. */ -UNIV_INLINE -dtype_t* -que_node_get_data_type( -/*===================*/ - que_node_t* node) /*!< in: graph node */ -{ - ut_ad(node); - - return(dfield_get_type(&((que_common_t*) node)->val)); -} - -/*********************************************************************//** -Catenates a query graph node to a list of them, possible empty list. -@return one-way list of nodes */ -UNIV_INLINE -que_node_t* -que_node_list_add_last( -/*===================*/ - que_node_t* node_list, /*!< in: node list, or NULL */ - que_node_t* node) /*!< in: node */ -{ - que_common_t* cnode; - que_common_t* cnode2; - - cnode = (que_common_t*) node; - - cnode->brother = NULL; - - if (node_list == NULL) { - - return(node); - } - - cnode2 = (que_common_t*) node_list; - - while (cnode2->brother != NULL) { - cnode2 = (que_common_t*) cnode2->brother; - } - - cnode2->brother = node; - - return(node_list); -} - -/************************************************************************* -Removes a query graph node from the list.*/ -UNIV_INLINE -que_node_t* -que_node_list_get_last( -/*===================*/ - /* out: last node in list.*/ - que_node_t* node_list) /* in: node list */ -{ - que_common_t* node; - - ut_a(node_list != NULL); - - node = (que_common_t*) node_list; - - /* We need the last element */ - while (node->brother != NULL) { - node = (que_common_t*) node->brother; - } - - return(node); -} -/*********************************************************************//** -Gets the next list node in a list of query graph nodes. -@return next node in a list of nodes */ -UNIV_INLINE -que_node_t* -que_node_get_next( -/*==============*/ - que_node_t* node) /*!< in: node in a list */ -{ - return(((que_common_t*) node)->brother); -} - -/*********************************************************************//** -Gets a query graph node list length. -@return length, for NULL list 0 */ -UNIV_INLINE -ulint -que_node_list_get_len( -/*==================*/ - que_node_t* node_list) /*!< in: node list, or NULL */ -{ - const que_common_t* cnode; - ulint len; - - cnode = (const que_common_t*) node_list; - len = 0; - - while (cnode != NULL) { - len++; - cnode = (const que_common_t*) cnode->brother; - } - - return(len); -} - -/*********************************************************************//** -Gets the parent node of a query graph node. -@return parent node or NULL */ -UNIV_INLINE -que_node_t* -que_node_get_parent( -/*================*/ - que_node_t* node) /*!< in: node */ -{ - return(((que_common_t*) node)->parent); -} - -/**********************************************************************//** -Checks if graph, trx, or session is in a state where the query thread should -be stopped. -@return TRUE if should be stopped; NOTE that if the peek is made -without reserving the trx mutex, then another peek with the mutex -reserved is necessary before deciding the actual stopping */ -UNIV_INLINE -ibool -que_thr_peek_stop( -/*==============*/ - que_thr_t* thr) /*!< in: query thread */ -{ - trx_t* trx; - que_t* graph; - - graph = thr->graph; - trx = graph->trx; - - if (graph->state != QUE_FORK_ACTIVE - || trx->lock.que_state == TRX_QUE_LOCK_WAIT - || (trx->lock.que_state != TRX_QUE_ROLLING_BACK - && trx->lock.que_state != TRX_QUE_RUNNING)) { - - return(TRUE); - } - - return(FALSE); -} - -/***********************************************************************//** -Returns TRUE if the query graph is for a SELECT statement. -@return TRUE if a select */ -UNIV_INLINE -ibool -que_graph_is_select( -/*================*/ - que_t* graph) /*!< in: graph */ -{ - if (graph->fork_type == QUE_FORK_SELECT_SCROLL - || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) { - - return(TRUE); - } - - return(FALSE); -} diff --git a/storage/xtradb/include/que0types.h b/storage/xtradb/include/que0types.h deleted file mode 100644 index 0f11cad301a..00000000000 --- a/storage/xtradb/include/que0types.h +++ /dev/null @@ -1,57 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/que0types.h -Query graph global types - -Created 5/27/1996 Heikki Tuuri -*******************************************************/ - -#ifndef que0types_h -#define que0types_h - -#include "data0data.h" -#include "dict0types.h" - -/* Pseudotype for all graph nodes */ -typedef void que_node_t; - -/* Query graph root is a fork node */ -typedef struct que_fork_t que_t; - -struct que_thr_t; - -/* Common struct at the beginning of each query graph node; the name of this -substruct must be 'common' */ - -struct que_common_t{ - ulint type; /*!< query node type */ - que_node_t* parent; /*!< back pointer to parent node, or NULL */ - que_node_t* brother;/* pointer to a possible brother node */ - dfield_t val; /*!< evaluated value for an expression */ - ulint val_buf_size; - /* buffer size for the evaluated value data, - if the buffer has been allocated dynamically: - if this field is != 0, and the node is a - symbol node or a function node, then we - have to free the data field in val - explicitly */ -}; - -#endif diff --git a/storage/xtradb/include/read0i_s.h b/storage/xtradb/include/read0i_s.h deleted file mode 100644 index 11b63affe09..00000000000 --- a/storage/xtradb/include/read0i_s.h +++ /dev/null @@ -1,54 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2010-2012, Percona Inc. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -#ifndef read0i_s_h -#define read0i_s_h - -#include <trx0types.h> - -struct i_s_xtradb_read_view_struct { - undo_no_t undo_no;/*!< 0 or if type is - VIEW_HIGH_GRANULARITY - transaction undo_no when this high-granularity - consistent read view was created */ - trx_id_t low_limit_no; - /*!< The view does not need to see the undo - logs for transactions whose transaction number - is strictly smaller (<) than this value: they - can be removed in purge if not needed by other - views */ - trx_id_t low_limit_id; - /*!< The read should not see any transaction - with trx id >= this value. In other words, - this is the "high water mark". */ - trx_id_t up_limit_id; - /*!< The read should see all trx ids which - are strictly smaller (<) than this value. - In other words, - this is the "low water mark". */ -}; - -typedef struct i_s_xtradb_read_view_struct i_s_xtradb_read_view_t; - -UNIV_INTERN -i_s_xtradb_read_view_t* -read_fill_i_s_xtradb_read_view(i_s_xtradb_read_view_t *rv); - - -#endif /* read0i_s_h */ diff --git a/storage/xtradb/include/read0read.h b/storage/xtradb/include/read0read.h deleted file mode 100644 index 2d6885884f7..00000000000 --- a/storage/xtradb/include/read0read.h +++ /dev/null @@ -1,232 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/read0read.h -Cursor read - -Created 2/16/1997 Heikki Tuuri -*******************************************************/ - -#ifndef read0read_h -#define read0read_h - -#include "univ.i" - - -#include "ut0byte.h" -#include "ut0lst.h" -#include "btr0types.h" -#include "trx0trx.h" -#include "trx0sys.h" -#include "read0types.h" - -/*********************************************************************//** -Opens a read view where exactly the transactions serialized before this -point in time are seen in the view. -@return own: read view struct */ -UNIV_INTERN -read_view_t* -read_view_open_now( -/*===============*/ - trx_id_t cr_trx_id, /*!< in: trx_id of creating - transaction, or 0 used in purge */ - read_view_t*& view); /*!< in,out: pre-allocated view array or - NULL if a new one needs to be created */ - -/*********************************************************************//** -Clones a read view object. This function will allocate space for two read -views contiguously, one identical in size and content as @param view (starting -at returned pointer) and another view immediately following the trx_ids array. -The second view will have space for an extra trx_id_t element. -@return read view struct */ -UNIV_INTERN -read_view_t* -read_view_clone( -/*============*/ - const read_view_t* view, /*!< in: view to clone */ - read_view_t*& prebuilt_clone);/*!< in,out: prebuilt view or - NULL */ -/*********************************************************************//** -Insert the view in the proper order into the trx_sys->view_list. The -read view list is ordered by read_view_t::low_limit_no in descending order. */ -UNIV_INTERN -void -read_view_add( -/*==========*/ - read_view_t* view); /*!< in: view to add to */ -/*********************************************************************//** -Makes a copy of the oldest existing read view, or opens a new. The view -must be closed with ..._close. -@return own: read view struct */ -UNIV_INTERN -read_view_t* -read_view_purge_open( -/*=================*/ - read_view_t*& clone_view, /*!< in,out: pre-allocated view that - will be used to clone the oldest view if - exists */ - read_view_t*& view); /*!< in,out: pre-allocated view array or - NULL if a new one needs to be created */ -/*********************************************************************//** -Remove a read view from the trx_sys->view_list. */ -UNIV_INLINE -void -read_view_remove( -/*=============*/ - read_view_t* view, /*!< in: read view, can be 0 */ - bool own_mutex); /*!< in: true if caller owns the - trx_sys_t::mutex */ -/*********************************************************************//** -Frees memory allocated by a read view. */ -UNIV_INTERN -void -read_view_free( -/*===========*/ - read_view_t*& view); /*< in,out: read view */ -/*********************************************************************//** -Closes a consistent read view for MySQL. This function is called at an SQL -statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */ -UNIV_INTERN -void -read_view_close_for_mysql( -/*======================*/ - trx_t* trx); /*!< in: trx which has a read view */ -/*********************************************************************//** -Checks if a read view sees the specified transaction. -@return true if sees */ -UNIV_INLINE -bool -read_view_sees_trx_id( -/*==================*/ - const read_view_t* view, /*!< in: read view */ - trx_id_t trx_id) /*!< in: trx id */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Prints a read view to file. */ -UNIV_INTERN -void -read_view_print( -/*============*/ - FILE* file, /*!< in: file to print to */ - const read_view_t* view); /*!< in: read view */ -/*********************************************************************//** -Create a consistent cursor view for mysql to be used in cursors. In this -consistent read view modifications done by the creating transaction or future -transactions are not visible. */ -UNIV_INTERN -cursor_view_t* -read_cursor_view_create_for_mysql( -/*==============================*/ - trx_t* cr_trx);/*!< in: trx where cursor view is created */ -/*********************************************************************//** -Close a given consistent cursor view for mysql and restore global read view -back to a transaction read view. */ -UNIV_INTERN -void -read_cursor_view_close_for_mysql( -/*=============================*/ - trx_t* trx, /*!< in: trx */ - cursor_view_t* curview); /*!< in: cursor view to be closed */ -/*********************************************************************//** -This function sets a given consistent cursor view to a transaction -read view if given consistent cursor view is not NULL. Otherwise, function -restores a global read view to a transaction read view. */ -UNIV_INTERN -void -read_cursor_set_for_mysql( -/*======================*/ - trx_t* trx, /*!< in: transaction where cursor is set */ - cursor_view_t* curview);/*!< in: consistent cursor view to be set */ - -/** Read view lists the trx ids of those transactions for which a consistent -read should not see the modifications to the database. */ - -struct read_view_t{ - ulint type; /*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */ - undo_no_t undo_no;/*!< 0 or if type is - VIEW_HIGH_GRANULARITY - transaction undo_no when this high-granularity - consistent read view was created */ - trx_id_t low_limit_no; - /*!< The view does not need to see the undo - logs for transactions whose transaction number - is strictly smaller (<) than this value: they - can be removed in purge if not needed by other - views */ - trx_id_t low_limit_id; - /*!< The read should not see any transaction - with trx id >= this value. In other words, - this is the "high water mark". */ - trx_id_t up_limit_id; - /*!< The read should see all trx ids which - are strictly smaller (<) than this value. - In other words, - this is the "low water mark". */ - ulint n_descr; - /*!< Number of cells in the trx_ids array */ - ulint max_descr; - /*!< Maximum number of cells in the trx_ids - array */ - trx_id_t* descriptors; - /*!< Additional trx ids which the read should - not see: typically, these are the read-write - active transactions at the time when the read - is serialized, except the reading transaction - itself; the trx ids in this array are in a - ascending order. These trx_ids should be - between the "low" and "high" water marks, - that is, up_limit_id and low_limit_id. */ - trx_id_t creator_trx_id; - /*!< trx id of creating transaction, or - 0 used in purge */ - UT_LIST_NODE_T(read_view_t) view_list; - /*!< List of read views in trx_sys */ -}; - -/** Read view types @{ */ -#define VIEW_NORMAL 1 /*!< Normal consistent read view - where transaction does not see changes - made by active transactions except - creating transaction. */ -#define VIEW_HIGH_GRANULARITY 2 /*!< High-granularity read view where - transaction does not see changes - made by active transactions and own - changes after a point in time when this - read view was created. */ -/* @} */ - -/** Implement InnoDB framework to support consistent read views in -cursors. This struct holds both heap where consistent read view -is allocated and pointer to a read view. */ - -struct cursor_view_t{ - mem_heap_t* heap; - /*!< Memory heap for the cursor view */ - read_view_t* read_view; - /*!< Consistent read view of the cursor*/ - ulint n_mysql_tables_in_use; - /*!< number of Innobase tables used in the - processing of this cursor */ -}; - -#ifndef UNIV_NONINL -#include "read0read.ic" -#endif - -#endif diff --git a/storage/xtradb/include/read0read.ic b/storage/xtradb/include/read0read.ic deleted file mode 100644 index 66bef8866c9..00000000000 --- a/storage/xtradb/include/read0read.ic +++ /dev/null @@ -1,131 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/read0read.ic -Cursor read - -Created 2/16/1997 Heikki Tuuri -*******************************************************/ - -#include "trx0sys.h" - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Validates a read view object. */ -static -bool -read_view_validate( -/*===============*/ - const read_view_t* view) /*!< in: view to validate */ -{ - ut_ad(mutex_own(&trx_sys->mutex)); - ut_ad(view->max_descr >= view->n_descr); - ut_ad(view->descriptors == NULL || view->max_descr > 0); - - /* Check that the view->descriptors array is in ascending order. */ - for (ulint i = 1; i < view->n_descr; ++i) { - - ut_a(view->descriptors[i] > view->descriptors[i - 1]); - } - - return(true); -} - -/** Functor to validate the view list. */ -struct ViewCheck { - - ViewCheck() : m_prev_view(0) { } - - void operator()(const read_view_t* view) - { - ut_a(m_prev_view == NULL - || m_prev_view->low_limit_no >= view->low_limit_no); - - m_prev_view = view; - } - - const read_view_t* m_prev_view; -}; - -/*********************************************************************//** -Validates a read view list. */ -static -bool -read_view_list_validate(void) -/*=========================*/ -{ - ut_ad(mutex_own(&trx_sys->mutex)); - - ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck()); - - return(true); -} -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Checks if a read view sees the specified transaction. -@return true if sees */ -UNIV_INLINE -bool -read_view_sees_trx_id( -/*==================*/ - const read_view_t* view, /*!< in: read view */ - trx_id_t trx_id) /*!< in: trx id */ -{ - if (trx_id < view->up_limit_id) { - - return(true); - } else if (trx_id >= view->low_limit_id) { - - return(false); - } - - /* Do a binary search over this view's descriptors array */ - - return(trx_find_descriptor(view->descriptors, view->n_descr, - trx_id) == NULL); -} - -/*********************************************************************//** -Remove a read view from the trx_sys->view_list. */ -UNIV_INLINE -void -read_view_remove( -/*=============*/ - read_view_t* view, /*!< in: read view, can be 0 */ - bool own_mutex) /*!< in: true if caller owns the - trx_sys_t::mutex */ -{ - if (view != 0) { - if (!own_mutex) { - mutex_enter(&trx_sys->mutex); - } - - ut_ad(read_view_validate(view)); - - UT_LIST_REMOVE(view_list, trx_sys->view_list, view); - - ut_ad(read_view_list_validate()); - - if (!own_mutex) { - mutex_exit(&trx_sys->mutex); - } - } -} - diff --git a/storage/xtradb/include/read0types.h b/storage/xtradb/include/read0types.h deleted file mode 100644 index 969f4ebb637..00000000000 --- a/storage/xtradb/include/read0types.h +++ /dev/null @@ -1,32 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/read0types.h -Cursor read - -Created 2/16/1997 Heikki Tuuri -*******************************************************/ - -#ifndef read0types_h -#define read0types_h - -struct read_view_t; -struct cursor_view_t; - -#endif diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h deleted file mode 100644 index 65116229fdc..00000000000 --- a/storage/xtradb/include/rem0cmp.h +++ /dev/null @@ -1,301 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/rem0cmp.h -Comparison services for records - -Created 7/1/1994 Heikki Tuuri -************************************************************************/ - -#ifndef rem0cmp_h -#define rem0cmp_h - -#include "univ.i" -#include "data0data.h" -#include "data0type.h" -#include "dict0dict.h" -#include "rem0rec.h" - -/*************************************************************//** -Returns TRUE if two columns are equal for comparison purposes. -@return TRUE if the columns are considered equal in comparisons */ -UNIV_INTERN -ibool -cmp_cols_are_equal( -/*===============*/ - const dict_col_t* col1, /*!< in: column 1 */ - const dict_col_t* col2, /*!< in: column 2 */ - ibool check_charsets); - /*!< in: whether to check charsets */ -/*************************************************************//** -This function is used to compare two data fields for which we know the -data type. -@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */ -UNIV_INLINE -int -cmp_data_data( -/*==========*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - const byte* data1, /*!< in: data field (== a pointer to a memory - buffer) */ - ulint len1, /*!< in: data field length or UNIV_SQL_NULL */ - const byte* data2, /*!< in: data field (== a pointer to a memory - buffer) */ - ulint len2); /*!< in: data field length or UNIV_SQL_NULL */ -/*************************************************************//** -This function is used to compare two data fields for which we know the -data type. -@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */ -UNIV_INTERN -int -cmp_data_data_slow( -/*===============*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - const byte* data1, /*!< in: data field (== a pointer to a memory - buffer) */ - ulint len1, /*!< in: data field length or UNIV_SQL_NULL */ - const byte* data2, /*!< in: data field (== a pointer to a memory - buffer) */ - ulint len2); /*!< in: data field length or UNIV_SQL_NULL */ - -/***************************************************************** -This function is used to compare two data fields for which we know the -data type to be VARCHAR. -@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */ -UNIV_INTERN -int -cmp_data_data_slow_varchar( -/*=======================*/ - const byte* lhs, /* in: data field (== a pointer to a memory - buffer) */ - ulint lhs_len,/* in: data field length or UNIV_SQL_NULL */ - const byte* rhs, /* in: data field (== a pointer to a memory - buffer) */ - ulint rhs_len);/* in: data field length or UNIV_SQL_NULL */ -/***************************************************************** -This function is used to compare two varchar/char fields. The comparison -is for the LIKE operator. -@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */ -UNIV_INTERN -int -cmp_data_data_slow_like_prefix( -/*===========================*/ - const byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - const byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2); /* in: data field length or UNIV_SQL_NULL */ -/***************************************************************** -This function is used to compare two varchar/char fields. The comparison -is for the LIKE operator. -@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */ -UNIV_INTERN -int -cmp_data_data_slow_like_suffix( -/*===========================*/ - const byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - const byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2); /* in: data field length or UNIV_SQL_NULL */ -/***************************************************************** -This function is used to compare two varchar/char fields. The comparison -is for the LIKE operator. -@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */ -UNIV_INTERN -int -cmp_data_data_slow_like_substr( -/*===========================*/ - const byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - const byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2); /* in: data field length or UNIV_SQL_NULL */ -/*************************************************************//** -This function is used to compare two dfields where at least the first -has its data type field set. -@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2, -respectively */ -UNIV_INLINE -int -cmp_dfield_dfield( -/*==============*/ - const dfield_t* dfield1,/*!< in: data field; must have type field set */ - const dfield_t* dfield2);/*!< in: data field */ -/*************************************************************//** -This function is used to compare a data tuple to a physical record. -Only dtuple->n_fields_cmp first fields are taken into account for -the data tuple! If we denote by n = n_fields_cmp, then rec must -have either m >= n fields, or it must differ from dtuple in some of -the m fields rec has. If rec has an externally stored field we do not -compare it but return with value 0 if such a comparison should be -made. -@return 1, 0, -1, if dtuple is greater, equal, less than rec, -respectively, when only the common first fields are compared, or until -the first externally stored field in rec */ -UNIV_INTERN -int -cmp_dtuple_rec_with_match_low( -/*==========================*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record which differs from - dtuple in some of the common fields, or which - has an equal number or more fields than - dtuple */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n_cmp, /*!< in: number of fields to compare */ - ulint* matched_fields, - /*!< in/out: number of already completely - matched fields; when function returns, - contains the value for current comparison */ - ulint* matched_bytes) - /*!< in/out: number of already matched - bytes within the first field not completely - matched; when function returns, contains the - value for current comparison */ - MY_ATTRIBUTE((nonnull)); -#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes) \ - cmp_dtuple_rec_with_match_low( \ - tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes) -/**************************************************************//** -Compares a data tuple to a physical record. -@see cmp_dtuple_rec_with_match -@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */ -UNIV_INTERN -int -cmp_dtuple_rec( -/*===========*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ -/**************************************************************//** -Checks if a dtuple is a prefix of a record. The last field in dtuple -is allowed to be a prefix of the corresponding field in the record. -@return TRUE if prefix */ -UNIV_INTERN -ibool -cmp_dtuple_is_prefix_of_rec( -/*========================*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ -/*************************************************************//** -Compare two physical records that contain the same number of columns, -none of which are stored externally. -@retval 1 if rec1 (including non-ordering columns) is greater than rec2 -@retval -1 if rec1 (including non-ordering columns) is less than rec2 -@retval 0 if rec1 is a duplicate of rec2 */ -UNIV_INTERN -int -cmp_rec_rec_simple( -/*===============*/ - const rec_t* rec1, /*!< in: physical record */ - const rec_t* rec2, /*!< in: physical record */ - const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ - const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index, /*!< in: data dictionary index */ - struct TABLE* table) /*!< in: MySQL table, for reporting - duplicate key value if applicable, - or NULL */ - MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result)); -/*************************************************************//** -This function is used to compare two physical records. Only the common -first fields are compared, and if an externally stored field is -encountered, then 0 is returned. -@return 1, 0, -1 if rec1 is greater, equal, less, respectively */ -UNIV_INTERN -int -cmp_rec_rec_with_match( -/*===================*/ - const rec_t* rec1, /*!< in: physical record */ - const rec_t* rec2, /*!< in: physical record */ - const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ - const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ - dict_index_t* index, /*!< in: data dictionary index */ - ibool nulls_unequal, - /* in: TRUE if this is for index statistics - cardinality estimation, and innodb_stats_method - is "nulls_unequal" or "nulls_ignored" */ - ulint* matched_fields, /*!< in/out: number of already completely - matched fields; when the function returns, - contains the value the for current - comparison */ - ulint* matched_bytes);/*!< in/out: number of already matched - bytes within the first field not completely - matched; when the function returns, contains - the value for the current comparison */ -/*************************************************************//** -This function is used to compare two physical records. Only the common -first fields are compared. -@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than -rec2; only the common first fields are compared */ -UNIV_INLINE -int -cmp_rec_rec( -/*========*/ - const rec_t* rec1, /*!< in: physical record */ - const rec_t* rec2, /*!< in: physical record */ - const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ - const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ - dict_index_t* index); /*!< in: data dictionary index */ - -/***************************************************************** -This function is used to compare two dfields where at least the first -has its data type field set. */ -UNIV_INTERN -int -cmp_dfield_dfield_like_prefix( -/*==========================*/ - /* out: 1, 0, -1, if dfield1 is greater, equal, - less than dfield2, respectively */ - dfield_t* dfield1,/* in: data field; must have type field set */ - dfield_t* dfield2);/* in: data field */ -/***************************************************************** -This function is used to compare two dfields where at least the first -has its data type field set. */ -UNIV_INLINE -int -cmp_dfield_dfield_like_substr( -/*==========================*/ - /* out: 1, 0, -1, if dfield1 is greater, equal, - less than dfield2, respectively */ - dfield_t* dfield1,/* in: data field; must have type field set */ - dfield_t* dfield2);/* in: data field */ -/***************************************************************** -This function is used to compare two dfields where at least the first -has its data type field set. */ -UNIV_INLINE -int -cmp_dfield_dfield_like_suffix( -/*==========================*/ - /* out: 1, 0, -1, if dfield1 is greater, equal, - less than dfield2, respectively */ - dfield_t* dfield1,/* in: data field; must have type field set */ - dfield_t* dfield2);/* in: data field */ - -#ifndef UNIV_NONINL -#include "rem0cmp.ic" -#endif - -#endif diff --git a/storage/xtradb/include/rem0cmp.ic b/storage/xtradb/include/rem0cmp.ic deleted file mode 100644 index 67a2dcacba1..00000000000 --- a/storage/xtradb/include/rem0cmp.ic +++ /dev/null @@ -1,186 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/rem0cmp.ic -Comparison services for records - -Created 7/1/1994 Heikki Tuuri -************************************************************************/ - -/*************************************************************//** -This function is used to compare two data fields for which we know the -data type. -@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */ -UNIV_INLINE -int -cmp_data_data( -/*==========*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - const byte* data1, /*!< in: data field (== a pointer to a memory - buffer) */ - ulint len1, /*!< in: data field length or UNIV_SQL_NULL */ - const byte* data2, /*!< in: data field (== a pointer to a memory - buffer) */ - ulint len2) /*!< in: data field length or UNIV_SQL_NULL */ -{ - return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2)); -} - -/***************************************************************** -This function is used to compare two (CHAR) data fields for the LIKE -operator. */ -UNIV_INLINE -int -cmp_data_data_like_prefix( -/*======================*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2) /* in: data field length or UNIV_SQL_NULL */ -{ - return(cmp_data_data_slow_like_prefix(data1, len1, data2, len2)); -} -/***************************************************************** -This function is used to compare two (CHAR) data fields for the LIKE -operator. */ -UNIV_INLINE -int -cmp_data_data_like_suffix( -/*======================*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2) /* in: data field length or UNIV_SQL_NULL */ -{ - return(cmp_data_data_slow_like_suffix(data1, len1, data2, len2)); -} -/***************************************************************** -This function is used to compare two (CHAR) data fields for the LIKE -operator. */ -UNIV_INLINE -int -cmp_data_data_like_substr( -/*======================*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2) /* in: data field length or UNIV_SQL_NULL */ -{ - return(cmp_data_data_slow_like_substr(data1, len1, data2, len2)); -} -/*************************************************************//** -This function is used to compare two dfields where at least the first -has its data type field set. -@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2, -respectively */ -UNIV_INLINE -int -cmp_dfield_dfield( -/*==============*/ - const dfield_t* dfield1,/*!< in: data field; must have type field set */ - const dfield_t* dfield2)/*!< in: data field */ -{ - const dtype_t* type; - - ut_ad(dfield_check_typed(dfield1)); - - type = dfield_get_type(dfield1); - - return(cmp_data_data(type->mtype, type->prtype, - (const byte*) dfield_get_data(dfield1), - dfield_get_len(dfield1), - (const byte*) dfield_get_data(dfield2), - dfield_get_len(dfield2))); -} - -/***************************************************************** -This function is used to compare two dfields where at least the first -has its data type field set. */ -UNIV_INLINE -int -cmp_dfield_dfield_like_suffix( -/*==========================*/ - /* out: 1, 0, -1, if dfield1 is greater, equal, - less than dfield2, respectively */ - dfield_t* dfield1,/* in: data field; must have type field set */ - dfield_t* dfield2)/* in: data field */ -{ - ut_ad(dfield_check_typed(dfield1)); - - return(cmp_data_data_like_suffix( - (byte*) dfield_get_data(dfield1), - dfield_get_len(dfield1), - (byte*) dfield_get_data(dfield2), - dfield_get_len(dfield2))); -} - -/***************************************************************** -This function is used to compare two dfields where at least the first -has its data type field set. */ -UNIV_INLINE -int -cmp_dfield_dfield_like_substr( -/*==========================*/ - /* out: 1, 0, -1, if dfield1 is greater, equal, - less than dfield2, respectively */ - dfield_t* dfield1,/* in: data field; must have type field set */ - dfield_t* dfield2)/* in: data field */ -{ - ut_ad(dfield_check_typed(dfield1)); - - return(cmp_data_data_like_substr( - (byte*) dfield_get_data(dfield1), - dfield_get_len(dfield1), - (byte*) dfield_get_data(dfield2), - dfield_get_len(dfield2))); -} -/*************************************************************//** -This function is used to compare two physical records. Only the common -first fields are compared. -@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than -rec2; only the common first fields are compared */ -UNIV_INLINE -int -cmp_rec_rec( -/*========*/ - const rec_t* rec1, /*!< in: physical record */ - const rec_t* rec2, /*!< in: physical record */ - const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ - const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ - dict_index_t* index) /*!< in: data dictionary index */ -{ - ulint match_f = 0; - ulint match_b = 0; - - return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - FALSE, &match_f, &match_b)); -} diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h deleted file mode 100644 index 9baf0ab380a..00000000000 --- a/storage/xtradb/include/rem0rec.h +++ /dev/null @@ -1,996 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/rem0rec.h -Record manager - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#ifndef rem0rec_h -#define rem0rec_h - -#include "univ.i" -#include "data0data.h" -#include "rem0types.h" -#include "mtr0types.h" -#include "page0types.h" - -/* Info bit denoting the predefined minimum record: this bit is set -if and only if the record is the first user record on a non-leaf -B-tree page that is the leftmost page on its level -(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */ -#define REC_INFO_MIN_REC_FLAG 0x10UL -/* The deleted flag in info bits */ -#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the - record has been delete marked */ - -/* Number of extra bytes in an old-style record, -in addition to the data and the offsets */ -#define REC_N_OLD_EXTRA_BYTES 6 -/* Number of extra bytes in a new-style record, -in addition to the data and the offsets */ -#define REC_N_NEW_EXTRA_BYTES 5 - -/* Record status values */ -#define REC_STATUS_ORDINARY 0 -#define REC_STATUS_NODE_PTR 1 -#define REC_STATUS_INFIMUM 2 -#define REC_STATUS_SUPREMUM 3 - -/* The following four constants are needed in page0zip.cc in order to -efficiently compress and decompress pages. */ - -/* The offset of heap_no in a compact record */ -#define REC_NEW_HEAP_NO 4 -/* The shift of heap_no in a compact record. -The status is stored in the low-order bits. */ -#define REC_HEAP_NO_SHIFT 3 - -/* Length of a B-tree node pointer, in bytes */ -#define REC_NODE_PTR_SIZE 4 - -/** SQL null flag in a 1-byte offset of ROW_FORMAT=REDUNDANT records */ -#define REC_1BYTE_SQL_NULL_MASK 0x80UL -/** SQL null flag in a 2-byte offset of ROW_FORMAT=REDUNDANT records */ -#define REC_2BYTE_SQL_NULL_MASK 0x8000UL - -/** In a 2-byte offset of ROW_FORMAT=REDUNDANT records, the second most -significant bit denotes that the tail of a field is stored off-page. */ -#define REC_2BYTE_EXTERN_MASK 0x4000UL - -#ifdef UNIV_DEBUG -/* Length of the rec_get_offsets() header */ -# define REC_OFFS_HEADER_SIZE 4 -#else /* UNIV_DEBUG */ -/* Length of the rec_get_offsets() header */ -# define REC_OFFS_HEADER_SIZE 2 -#endif /* UNIV_DEBUG */ - -/* Number of elements that should be initially allocated for the -offsets[] array, first passed to rec_get_offsets() */ -#define REC_OFFS_NORMAL_SIZE 100 -#define REC_OFFS_SMALL_SIZE 10 - -/******************************************************//** -The following function is used to get the pointer of the next chained record -on the same page. -@return pointer to the next chained record, or NULL if none */ -UNIV_INLINE -const rec_t* -rec_get_next_ptr_const( -/*===================*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to get the pointer of the next chained record -on the same page. -@return pointer to the next chained record, or NULL if none */ -UNIV_INLINE -rec_t* -rec_get_next_ptr( -/*=============*/ - rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to get the offset of the -next chained record on the same page. -@return the page offset of the next chained record, or 0 if none */ -UNIV_INLINE -ulint -rec_get_next_offs( -/*==============*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to set the next record offset field -of an old-style record. */ -UNIV_INLINE -void -rec_set_next_offs_old( -/*==================*/ - rec_t* rec, /*!< in: old-style physical record */ - ulint next) /*!< in: offset of the next record */ - MY_ATTRIBUTE((nonnull)); -/******************************************************//** -The following function is used to set the next record offset field -of a new-style record. */ -UNIV_INLINE -void -rec_set_next_offs_new( -/*==================*/ - rec_t* rec, /*!< in/out: new-style physical record */ - ulint next) /*!< in: offset of the next record */ - MY_ATTRIBUTE((nonnull)); -/******************************************************//** -The following function is used to get the number of fields -in an old-style record. -@return number of data fields */ -UNIV_INLINE -ulint -rec_get_n_fields_old( -/*=================*/ - const rec_t* rec) /*!< in: physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to get the number of fields -in a record. -@return number of data fields */ -UNIV_INLINE -ulint -rec_get_n_fields( -/*=============*/ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index) /*!< in: record descriptor */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to get the number of records owned by the -previous directory record. -@return number of owned records */ -UNIV_INLINE -ulint -rec_get_n_owned_old( -/*================*/ - const rec_t* rec) /*!< in: old-style physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to set the number of owned records. */ -UNIV_INLINE -void -rec_set_n_owned_old( -/*================*/ - rec_t* rec, /*!< in: old-style physical record */ - ulint n_owned) /*!< in: the number of owned */ - MY_ATTRIBUTE((nonnull)); -/******************************************************//** -The following function is used to get the number of records owned by the -previous directory record. -@return number of owned records */ -UNIV_INLINE -ulint -rec_get_n_owned_new( -/*================*/ - const rec_t* rec) /*!< in: new-style physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to set the number of owned records. */ -UNIV_INLINE -void -rec_set_n_owned_new( -/*================*/ - rec_t* rec, /*!< in/out: new-style physical record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint n_owned)/*!< in: the number of owned */ - MY_ATTRIBUTE((nonnull(1))); -/******************************************************//** -The following function is used to retrieve the info bits of -a record. -@return info bits */ -UNIV_INLINE -ulint -rec_get_info_bits( -/*==============*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to set the info bits of a record. */ -UNIV_INLINE -void -rec_set_info_bits_old( -/*==================*/ - rec_t* rec, /*!< in: old-style physical record */ - ulint bits) /*!< in: info bits */ - MY_ATTRIBUTE((nonnull)); -/******************************************************//** -The following function is used to set the info bits of a record. */ -UNIV_INLINE -void -rec_set_info_bits_new( -/*==================*/ - rec_t* rec, /*!< in/out: new-style physical record */ - ulint bits) /*!< in: info bits */ - MY_ATTRIBUTE((nonnull)); -/******************************************************//** -The following function retrieves the status bits of a new-style record. -@return status bits */ -UNIV_INLINE -ulint -rec_get_status( -/*===========*/ - const rec_t* rec) /*!< in: physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -/******************************************************//** -The following function is used to set the status bits of a new-style record. */ -UNIV_INLINE -void -rec_set_status( -/*===========*/ - rec_t* rec, /*!< in/out: physical record */ - ulint bits) /*!< in: info bits */ - MY_ATTRIBUTE((nonnull)); - -/******************************************************//** -The following function is used to retrieve the info and status -bits of a record. (Only compact records have status bits.) -@return info bits */ -UNIV_INLINE -ulint -rec_get_info_and_status_bits( -/*=========================*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to set the info and status -bits of a record. (Only compact records have status bits.) */ -UNIV_INLINE -void -rec_set_info_and_status_bits( -/*=========================*/ - rec_t* rec, /*!< in/out: compact physical record */ - ulint bits) /*!< in: info bits */ - MY_ATTRIBUTE((nonnull)); - -/******************************************************//** -The following function tells if record is delete marked. -@return nonzero if delete marked */ -UNIV_INLINE -ulint -rec_get_deleted_flag( -/*=================*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to set the deleted bit. */ -UNIV_INLINE -void -rec_set_deleted_flag_old( -/*=====================*/ - rec_t* rec, /*!< in: old-style physical record */ - ulint flag) /*!< in: nonzero if delete marked */ - MY_ATTRIBUTE((nonnull)); -/******************************************************//** -The following function is used to set the deleted bit. */ -UNIV_INLINE -void -rec_set_deleted_flag_new( -/*=====================*/ - rec_t* rec, /*!< in/out: new-style physical record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint flag) /*!< in: nonzero if delete marked */ - MY_ATTRIBUTE((nonnull(1))); -/******************************************************//** -The following function tells if a new-style record is a node pointer. -@return TRUE if node pointer */ -UNIV_INLINE -ibool -rec_get_node_ptr_flag( -/*==================*/ - const rec_t* rec) /*!< in: physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to get the order number -of an old-style record in the heap of the index page. -@return heap order number */ -UNIV_INLINE -ulint -rec_get_heap_no_old( -/*================*/ - const rec_t* rec) /*!< in: physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to set the heap number -field in an old-style record. */ -UNIV_INLINE -void -rec_set_heap_no_old( -/*================*/ - rec_t* rec, /*!< in: physical record */ - ulint heap_no)/*!< in: the heap number */ - MY_ATTRIBUTE((nonnull)); -/******************************************************//** -The following function is used to get the order number -of a new-style record in the heap of the index page. -@return heap order number */ -UNIV_INLINE -ulint -rec_get_heap_no_new( -/*================*/ - const rec_t* rec) /*!< in: physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -The following function is used to set the heap number -field in a new-style record. */ -UNIV_INLINE -void -rec_set_heap_no_new( -/*================*/ - rec_t* rec, /*!< in/out: physical record */ - ulint heap_no)/*!< in: the heap number */ - MY_ATTRIBUTE((nonnull)); -/******************************************************//** -The following function is used to test whether the data offsets -in the record are stored in one-byte or two-byte format. -@return TRUE if 1-byte form */ -UNIV_INLINE -ibool -rec_get_1byte_offs_flag( -/*====================*/ - const rec_t* rec) /*!< in: physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -/******************************************************//** -The following function is used to set the 1-byte offsets flag. */ -UNIV_INLINE -void -rec_set_1byte_offs_flag( -/*====================*/ - rec_t* rec, /*!< in: physical record */ - ibool flag) /*!< in: TRUE if 1byte form */ - MY_ATTRIBUTE((nonnull)); - -/******************************************************//** -Returns the offset of nth field end if the record is stored in the 1-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. -@return offset of the start of the field, SQL null flag ORed */ -UNIV_INLINE -ulint -rec_1_get_field_end_info( -/*=====================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -/******************************************************//** -Returns the offset of nth field end if the record is stored in the 2-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. -@return offset of the start of the field, SQL null flag and extern -storage flag ORed */ -UNIV_INLINE -ulint -rec_2_get_field_end_info( -/*=====================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -/******************************************************//** -Returns nonzero if the field is stored off-page. -@retval 0 if the field is stored in-page -@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */ -UNIV_INLINE -ulint -rec_2_is_field_extern( -/*==================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -/******************************************************//** -Determine how many of the first n columns in a compact -physical record are stored externally. -@return number of externally stored columns */ -UNIV_INTERN -ulint -rec_get_n_extern_new( -/*=================*/ - const rec_t* rec, /*!< in: compact physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint n) /*!< in: number of columns to scan */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/******************************************************//** -The following function determines the offsets to each field -in the record. It can reuse a previously allocated array. -@return the new offsets */ -UNIV_INTERN -ulint* -rec_get_offsets_func( -/*=================*/ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets,/*!< in/out: array consisting of - offsets[0] allocated elements, - or an array from rec_get_offsets(), - or NULL */ - ulint n_fields,/*!< in: maximum number of - initialized fields - (ULINT_UNDEFINED if all fields) */ -#ifdef UNIV_DEBUG - const char* file, /*!< in: file name where called */ - ulint line, /*!< in: line number where called */ -#endif /* UNIV_DEBUG */ - mem_heap_t** heap) /*!< in/out: memory heap */ -#ifdef UNIV_DEBUG - MY_ATTRIBUTE((nonnull(1,2,5,7),warn_unused_result)); -#else /* UNIV_DEBUG */ - MY_ATTRIBUTE((nonnull(1,2,5),warn_unused_result)); -#endif /* UNIV_DEBUG */ - -#ifdef UNIV_DEBUG -# define rec_get_offsets(rec,index,offsets,n,heap) \ - rec_get_offsets_func(rec,index,offsets,n,__FILE__,__LINE__,heap) -#else /* UNIV_DEBUG */ -# define rec_get_offsets(rec, index, offsets, n, heap) \ - rec_get_offsets_func(rec, index, offsets, n, heap) -#endif /* UNIV_DEBUG */ - -/******************************************************//** -The following function determines the offsets to each field -in the record. It can reuse a previously allocated array. */ -UNIV_INTERN -void -rec_get_offsets_reverse( -/*====================*/ - const byte* extra, /*!< in: the extra bytes of a - compact record in reverse order, - excluding the fixed-size - REC_N_NEW_EXTRA_BYTES */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint node_ptr,/*!< in: nonzero=node pointer, - 0=leaf node */ - ulint* offsets)/*!< in/out: array consisting of - offsets[0] allocated elements */ - MY_ATTRIBUTE((nonnull)); -#ifdef UNIV_DEBUG -/************************************************************//** -Validates offsets returned by rec_get_offsets(). -@return TRUE if valid */ -UNIV_INLINE -ibool -rec_offs_validate( -/*==============*/ - const rec_t* rec, /*!< in: record or NULL */ - const dict_index_t* index, /*!< in: record descriptor or NULL */ - const ulint* offsets)/*!< in: array returned by - rec_get_offsets() */ - MY_ATTRIBUTE((nonnull(3), warn_unused_result)); -/************************************************************//** -Updates debug data in offsets, in order to avoid bogus -rec_offs_validate() failures. */ -UNIV_INLINE -void -rec_offs_make_valid( -/*================*/ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets)/*!< in: array returned by - rec_get_offsets() */ - MY_ATTRIBUTE((nonnull)); -#else -# define rec_offs_make_valid(rec, index, offsets) ((void) 0) -#endif /* UNIV_DEBUG */ - -/************************************************************//** -The following function is used to get the offset to the nth -data field in an old-style record. -@return offset to the field */ -UNIV_INTERN -ulint -rec_get_nth_field_offs_old( -/*=======================*/ - const rec_t* rec, /*!< in: record */ - ulint n, /*!< in: index of the field */ - ulint* len) /*!< out: length of the field; UNIV_SQL_NULL - if SQL null */ - MY_ATTRIBUTE((nonnull)); -#define rec_get_nth_field_old(rec, n, len) \ -((rec) + rec_get_nth_field_offs_old(rec, n, len)) -/************************************************************//** -Gets the physical size of an old-style field. -Also an SQL null may have a field of size > 0, -if the data type is of a fixed size. -@return field size in bytes */ -UNIV_INLINE -ulint -rec_get_nth_field_size( -/*===================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: index of the field */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/************************************************************//** -The following function is used to get an offset to the nth -data field in a record. -@return offset from the origin of rec */ -UNIV_INLINE -ulint -rec_get_nth_field_offs( -/*===================*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n, /*!< in: index of the field */ - ulint* len) /*!< out: length of the field; UNIV_SQL_NULL - if SQL null */ - MY_ATTRIBUTE((nonnull)); -#define rec_get_nth_field(rec, offsets, n, len) \ -((rec) + rec_get_nth_field_offs(offsets, n, len)) -/******************************************************//** -Determine if the offsets are for a record in the new -compact format. -@return nonzero if compact format */ -UNIV_INLINE -ulint -rec_offs_comp( -/*==========*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -Determine if the offsets are for a record containing -externally stored columns. -@return nonzero if externally stored */ -UNIV_INLINE -ulint -rec_offs_any_extern( -/*================*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -Determine if the offsets are for a record containing null BLOB pointers. -@return first field containing a null BLOB pointer, or NULL if none found */ -UNIV_INLINE -const byte* -rec_offs_any_null_extern( -/*=====================*/ - const rec_t* rec, /*!< in: record */ - const ulint* offsets) /*!< in: rec_get_offsets(rec) */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -Returns nonzero if the extern bit is set in nth field of rec. -@return nonzero if externally stored */ -UNIV_INLINE -ulint -rec_offs_nth_extern( -/*================*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n) /*!< in: nth field */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -Returns nonzero if the SQL NULL bit is set in nth field of rec. -@return nonzero if SQL NULL */ -UNIV_INLINE -ulint -rec_offs_nth_sql_null( -/*==================*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n) /*!< in: nth field */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/******************************************************//** -Gets the physical size of a field. -@return length of field */ -UNIV_INLINE -ulint -rec_offs_nth_size( -/*==============*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n) /*!< in: nth field */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); - -/******************************************************//** -Returns the number of extern bits set in a record. -@return number of externally stored fields */ -UNIV_INLINE -ulint -rec_offs_n_extern( -/*==============*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/***********************************************************//** -This is used to modify the value of an already existing field in a record. -The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null. -For records in ROW_FORMAT=COMPACT (new-style records), len must not be -UNIV_SQL_NULL unless the field already is SQL null. */ -UNIV_INLINE -void -rec_set_nth_field( -/*==============*/ - rec_t* rec, /*!< in: record */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n, /*!< in: index number of the field */ - const void* data, /*!< in: pointer to the data if not SQL null */ - ulint len) /*!< in: length of the data or UNIV_SQL_NULL. - If not SQL null, must have the same - length as the previous value. - If SQL null, previous value must be - SQL null. */ - MY_ATTRIBUTE((nonnull(1,2))); -/**********************************************************//** -The following function returns the data size of an old-style physical -record, that is the sum of field lengths. SQL null fields -are counted as length 0 fields. The value returned by the function -is the distance from record origin to record end in bytes. -@return size */ -UNIV_INLINE -ulint -rec_get_data_size_old( -/*==================*/ - const rec_t* rec) /*!< in: physical record */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/**********************************************************//** -The following function returns the number of allocated elements -for an array of offsets. -@return number of elements */ -UNIV_INLINE -ulint -rec_offs_get_n_alloc( -/*=================*/ - const ulint* offsets)/*!< in: array for rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/**********************************************************//** -The following function sets the number of allocated elements -for an array of offsets. */ -UNIV_INLINE -void -rec_offs_set_n_alloc( -/*=================*/ - ulint* offsets, /*!< out: array for rec_get_offsets(), - must be allocated */ - ulint n_alloc) /*!< in: number of elements */ - MY_ATTRIBUTE((nonnull)); -#define rec_offs_init(offsets) \ - rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets) -/**********************************************************//** -The following function returns the number of fields in a record. -@return number of fields */ -UNIV_INLINE -ulint -rec_offs_n_fields( -/*==============*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/**********************************************************//** -The following function returns the data size of a physical -record, that is the sum of field lengths. SQL null fields -are counted as length 0 fields. The value returned by the function -is the distance from record origin to record end in bytes. -@return size */ -UNIV_INLINE -ulint -rec_offs_data_size( -/*===============*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/**********************************************************//** -Returns the total size of record minus data size of record. -The value returned by the function is the distance from record -start to record origin in bytes. -@return size */ -UNIV_INLINE -ulint -rec_offs_extra_size( -/*================*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/**********************************************************//** -Returns the total size of a physical record. -@return size */ -UNIV_INLINE -ulint -rec_offs_size( -/*==========*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -#ifdef UNIV_DEBUG -/**********************************************************//** -Returns a pointer to the start of the record. -@return pointer to start */ -UNIV_INLINE -byte* -rec_get_start( -/*==========*/ - const rec_t* rec, /*!< in: pointer to record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -/**********************************************************//** -Returns a pointer to the end of the record. -@return pointer to end */ -UNIV_INLINE -byte* -rec_get_end( -/*========*/ - const rec_t* rec, /*!< in: pointer to record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -#else /* UNIV_DEBUG */ -# define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets)) -# define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets)) -#endif /* UNIV_DEBUG */ -/***************************************************************//** -Copies a physical record to a buffer. -@return pointer to the origin of the copy */ -UNIV_INLINE -rec_t* -rec_copy( -/*=====*/ - void* buf, /*!< in: buffer */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ -#ifndef UNIV_HOTBACKUP -/**********************************************************//** -Determines the size of a data tuple prefix in a temporary file. -@return total size */ -UNIV_INTERN -ulint -rec_get_converted_size_temp( -/*========================*/ - const dict_index_t* index, /*!< in: record descriptor */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra) /*!< out: extra size */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); - -/******************************************************//** -Determine the offset to each field in temporary file. -@see rec_convert_dtuple_to_temp() */ -UNIV_INTERN -void -rec_init_offsets_temp( -/*==================*/ - const rec_t* rec, /*!< in: temporary file record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets)/*!< in/out: array of offsets; - in: n=rec_offs_n_fields(offsets) */ - MY_ATTRIBUTE((nonnull)); - -/*********************************************************//** -Builds a temporary file record out of a data tuple. -@see rec_init_offsets_temp() */ -UNIV_INTERN -void -rec_convert_dtuple_to_temp( -/*=======================*/ - rec_t* rec, /*!< out: record */ - const dict_index_t* index, /*!< in: record descriptor */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields) /*!< in: number of fields */ - MY_ATTRIBUTE((nonnull)); - -/**************************************************************//** -Copies the first n fields of a physical record to a new physical record in -a buffer. -@return own: copied record */ -UNIV_INTERN -rec_t* -rec_copy_prefix_to_buf( -/*===================*/ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint n_fields, /*!< in: number of fields - to copy */ - byte** buf, /*!< in/out: memory buffer - for the copied prefix, - or NULL */ - ulint* buf_size) /*!< in/out: buffer size */ - MY_ATTRIBUTE((nonnull)); -/************************************************************//** -Folds a prefix of a physical record to a ulint. -@return the folded value */ -UNIV_INLINE -ulint -rec_fold( -/*=====*/ - const rec_t* rec, /*!< in: the physical record */ - const ulint* offsets, /*!< in: array returned by - rec_get_offsets() */ - ulint n_fields, /*!< in: number of complete - fields to fold */ - ulint n_bytes, /*!< in: number of bytes to fold - in an incomplete last field */ - index_id_t tree_id) /*!< in: index tree id */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************//** -Builds a physical record out of a data tuple and -stores it into the given buffer. -@return pointer to the origin of physical record */ -UNIV_INTERN -rec_t* -rec_convert_dtuple_to_rec( -/*======================*/ - byte* buf, /*!< in: start address of the - physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* dtuple, /*!< in: data tuple */ - ulint n_ext) /*!< in: number of - externally stored columns */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/**********************************************************//** -Returns the extra size of an old-style physical record if we know its -data size and number of fields. -@return extra size */ -UNIV_INLINE -ulint -rec_get_converted_extra_size( -/*=========================*/ - ulint data_size, /*!< in: data size */ - ulint n_fields, /*!< in: number of fields */ - ulint n_ext) /*!< in: number of externally stored columns */ - MY_ATTRIBUTE((const)); -/**********************************************************//** -Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT. -@return total size */ -UNIV_INTERN -ulint -rec_get_converted_size_comp_prefix( -/*===============================*/ - const dict_index_t* index, /*!< in: record descriptor */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra) /*!< out: extra size */ - MY_ATTRIBUTE((warn_unused_result, nonnull(1,2))); -/**********************************************************//** -Determines the size of a data tuple in ROW_FORMAT=COMPACT. -@return total size */ -UNIV_INTERN -ulint -rec_get_converted_size_comp( -/*========================*/ - const dict_index_t* index, /*!< in: record descriptor; - dict_table_is_comp() is - assumed to hold, even if - it does not */ - ulint status, /*!< in: status bits of the record */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra) /*!< out: extra size */ - MY_ATTRIBUTE((nonnull(1,3))); -/**********************************************************//** -The following function returns the size of a data tuple when converted to -a physical record. -@return size */ -UNIV_INLINE -ulint -rec_get_converted_size( -/*===================*/ - dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* dtuple, /*!< in: data tuple */ - ulint n_ext) /*!< in: number of externally stored columns */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); -#ifndef UNIV_HOTBACKUP -/**************************************************************//** -Copies the first n fields of a physical record to a data tuple. -The fields are copied to the memory heap. */ -UNIV_INTERN -void -rec_copy_prefix_to_dtuple( -/*======================*/ - dtuple_t* tuple, /*!< out: data tuple */ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint n_fields, /*!< in: number of fields - to copy */ - mem_heap_t* heap) /*!< in: memory heap */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/***************************************************************//** -Validates the consistency of a physical record. -@return TRUE if ok */ -UNIV_INTERN -ibool -rec_validate( -/*=========*/ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull)); -/***************************************************************//** -Prints an old-style physical record. */ -UNIV_INTERN -void -rec_print_old( -/*==========*/ - FILE* file, /*!< in: file where to print */ - const rec_t* rec) /*!< in: physical record */ - MY_ATTRIBUTE((nonnull)); -#ifndef UNIV_HOTBACKUP -/***************************************************************//** -Prints a physical record in ROW_FORMAT=COMPACT. Ignores the -record header. */ -UNIV_INTERN -void -rec_print_comp( -/*===========*/ - FILE* file, /*!< in: file where to print */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull)); -/***************************************************************//** -Prints a physical record. */ -UNIV_INTERN -void -rec_print_new( -/*==========*/ - FILE* file, /*!< in: file where to print */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ - MY_ATTRIBUTE((nonnull)); -/***************************************************************//** -Prints a physical record. */ -UNIV_INTERN -void -rec_print( -/*======*/ - FILE* file, /*!< in: file where to print */ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index) /*!< in: record descriptor */ - MY_ATTRIBUTE((nonnull)); - -# ifdef UNIV_DEBUG -/************************************************************//** -Reads the DB_TRX_ID of a clustered index record. -@return the value of DB_TRX_ID */ -UNIV_INTERN -trx_id_t -rec_get_trx_id( -/*===========*/ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index) /*!< in: clustered index */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -# endif /* UNIV_DEBUG */ -#endif /* UNIV_HOTBACKUP */ - -/* Maximum lengths for the data in a physical record if the offsets -are given in one byte (resp. two byte) format. */ -#define REC_1BYTE_OFFS_LIMIT 0x7FUL -#define REC_2BYTE_OFFS_LIMIT 0x7FFFUL - -/* The data size of record must be smaller than this because we reserve -two upmost bits in a two byte offset for special purposes */ -#define REC_MAX_DATA_SIZE (16384) - -#ifdef WITH_WSREP -int wsrep_rec_get_foreign_key( - byte *buf, /* out: extracted key */ - ulint *buf_len, /* in/out: length of buf */ - const rec_t* rec, /* in: physical record */ - dict_index_t* index_for, /* in: index for foreign table */ - dict_index_t* index_ref, /* in: index for referenced table */ - ibool new_protocol); /* in: protocol > 1 */ -#endif /* WITH_WSREP */ -#ifndef UNIV_NONINL -#include "rem0rec.ic" -#endif - -#endif diff --git a/storage/xtradb/include/rem0rec.ic b/storage/xtradb/include/rem0rec.ic deleted file mode 100644 index 5811a77a48b..00000000000 --- a/storage/xtradb/include/rem0rec.ic +++ /dev/null @@ -1,1719 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/rem0rec.ic -Record manager - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#include "mach0data.h" -#include "ut0byte.h" -#include "dict0dict.h" -#include "btr0types.h" - -/* Compact flag ORed to the extra size returned by rec_get_offsets() */ -#define REC_OFFS_COMPACT ((ulint) 1 << 31) -/* SQL NULL flag in offsets returned by rec_get_offsets() */ -#define REC_OFFS_SQL_NULL ((ulint) 1 << 31) -/* External flag in offsets returned by rec_get_offsets() */ -#define REC_OFFS_EXTERNAL ((ulint) 1 << 30) -/* Mask for offsets returned by rec_get_offsets() */ -#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1) - -/* Offsets of the bit-fields in an old-style record. NOTE! In the table the -most significant bytes and bits are written below less significant. - - (1) byte offset (2) bit usage within byte - downward from - origin -> 1 8 bits pointer to next record - 2 8 bits pointer to next record - 3 1 bit short flag - 7 bits number of fields - 4 3 bits number of fields - 5 bits heap number - 5 8 bits heap number - 6 4 bits n_owned - 4 bits info bits -*/ - -/* Offsets of the bit-fields in a new-style record. NOTE! In the table the -most significant bytes and bits are written below less significant. - - (1) byte offset (2) bit usage within byte - downward from - origin -> 1 8 bits relative offset of next record - 2 8 bits relative offset of next record - the relative offset is an unsigned 16-bit - integer: - (offset_of_next_record - - offset_of_this_record) mod 64Ki, - where mod is the modulo as a non-negative - number; - we can calculate the offset of the next - record with the formula: - relative_offset + offset_of_this_record - mod UNIV_PAGE_SIZE - 3 3 bits status: - 000=conventional record - 001=node pointer record (inside B-tree) - 010=infimum record - 011=supremum record - 1xx=reserved - 5 bits heap number - 4 8 bits heap number - 5 4 bits n_owned - 4 bits info bits -*/ - -/* We list the byte offsets from the origin of the record, the mask, -and the shift needed to obtain each bit-field of the record. */ - -#define REC_NEXT 2 -#define REC_NEXT_MASK 0xFFFFUL -#define REC_NEXT_SHIFT 0 - -#define REC_OLD_SHORT 3 /* This is single byte bit-field */ -#define REC_OLD_SHORT_MASK 0x1UL -#define REC_OLD_SHORT_SHIFT 0 - -#define REC_OLD_N_FIELDS 4 -#define REC_OLD_N_FIELDS_MASK 0x7FEUL -#define REC_OLD_N_FIELDS_SHIFT 1 - -#define REC_NEW_STATUS 3 /* This is single byte bit-field */ -#define REC_NEW_STATUS_MASK 0x7UL -#define REC_NEW_STATUS_SHIFT 0 - -#define REC_OLD_HEAP_NO 5 -#define REC_HEAP_NO_MASK 0xFFF8UL -#if 0 /* defined in rem0rec.h for use of page0zip.cc */ -#define REC_NEW_HEAP_NO 4 -#define REC_HEAP_NO_SHIFT 3 -#endif - -#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */ -#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */ -#define REC_N_OWNED_MASK 0xFUL -#define REC_N_OWNED_SHIFT 0 - -#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */ -#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */ -#define REC_INFO_BITS_MASK 0xF0UL -#define REC_INFO_BITS_SHIFT 0 - -#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \ - ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \ - ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \ - ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \ - ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \ - ^ 0xFFFFFFFFUL -# error "sum of old-style masks != 0xFFFFFFFFUL" -#endif -#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \ - ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \ - ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \ - ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \ - ^ 0xFFFFFFUL -# error "sum of new-style masks != 0xFFFFFFUL" -#endif - -/***********************************************************//** -Sets the value of the ith field SQL null bit of an old-style record. */ -UNIV_INTERN -void -rec_set_nth_field_null_bit( -/*=======================*/ - rec_t* rec, /*!< in: record */ - ulint i, /*!< in: ith field */ - ibool val); /*!< in: value to set */ -/***********************************************************//** -Sets an old-style record field to SQL null. -The physical size of the field is not changed. */ -UNIV_INTERN -void -rec_set_nth_field_sql_null( -/*=======================*/ - rec_t* rec, /*!< in: record */ - ulint n); /*!< in: index of the field */ - -/******************************************************//** -Gets a bit field from within 1 byte. */ -UNIV_INLINE -ulint -rec_get_bit_field_1( -/*================*/ - const rec_t* rec, /*!< in: pointer to record origin */ - ulint offs, /*!< in: offset from the origin down */ - ulint mask, /*!< in: mask used to filter bits */ - ulint shift) /*!< in: shift right applied after masking */ -{ - ut_ad(rec); - - return((mach_read_from_1(rec - offs) & mask) >> shift); -} - -/******************************************************//** -Sets a bit field within 1 byte. */ -UNIV_INLINE -void -rec_set_bit_field_1( -/*================*/ - rec_t* rec, /*!< in: pointer to record origin */ - ulint val, /*!< in: value to set */ - ulint offs, /*!< in: offset from the origin down */ - ulint mask, /*!< in: mask used to filter bits */ - ulint shift) /*!< in: shift right applied after masking */ -{ - ut_ad(rec); - ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); - ut_ad(mask); - ut_ad(mask <= 0xFFUL); - ut_ad(((mask >> shift) << shift) == mask); - ut_ad(((val << shift) & mask) == (val << shift)); - - mach_write_to_1(rec - offs, - (mach_read_from_1(rec - offs) & ~mask) - | (val << shift)); -} - -/******************************************************//** -Gets a bit field from within 2 bytes. */ -UNIV_INLINE -ulint -rec_get_bit_field_2( -/*================*/ - const rec_t* rec, /*!< in: pointer to record origin */ - ulint offs, /*!< in: offset from the origin down */ - ulint mask, /*!< in: mask used to filter bits */ - ulint shift) /*!< in: shift right applied after masking */ -{ - ut_ad(rec); - - return((mach_read_from_2(rec - offs) & mask) >> shift); -} - -/******************************************************//** -Sets a bit field within 2 bytes. */ -UNIV_INLINE -void -rec_set_bit_field_2( -/*================*/ - rec_t* rec, /*!< in: pointer to record origin */ - ulint val, /*!< in: value to set */ - ulint offs, /*!< in: offset from the origin down */ - ulint mask, /*!< in: mask used to filter bits */ - ulint shift) /*!< in: shift right applied after masking */ -{ - ut_ad(rec); - ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); - ut_ad(mask > 0xFFUL); - ut_ad(mask <= 0xFFFFUL); - ut_ad((mask >> shift) & 1); - ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1))); - ut_ad(((mask >> shift) << shift) == mask); - ut_ad(((val << shift) & mask) == (val << shift)); - - mach_write_to_2(rec - offs, - (mach_read_from_2(rec - offs) & ~mask) - | (val << shift)); -} - -/******************************************************//** -The following function is used to get the pointer of the next chained record -on the same page. -@return pointer to the next chained record, or NULL if none */ -UNIV_INLINE -const rec_t* -rec_get_next_ptr_const( -/*===================*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ -{ - ulint field_value; - - ut_ad(REC_NEXT_MASK == 0xFFFFUL); - ut_ad(REC_NEXT_SHIFT == 0); - - field_value = mach_read_from_2(rec - REC_NEXT); - - if (field_value == 0) { - - return(NULL); - } - - if (comp) { -#if UNIV_PAGE_SIZE_MAX <= 32768 - /* Note that for 64 KiB pages, field_value can 'wrap around' - and the debug assertion is not valid */ - - /* In the following assertion, field_value is interpreted - as signed 16-bit integer in 2's complement arithmetics. - If all platforms defined int16_t in the standard headers, - the expression could be written simpler as - (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE - */ - ut_ad((field_value >= 32768 - ? field_value - 65536 - : field_value) - + ut_align_offset(rec, UNIV_PAGE_SIZE) - < UNIV_PAGE_SIZE); -#endif - /* There must be at least REC_N_NEW_EXTRA_BYTES + 1 - between each record. */ - ut_ad((field_value > REC_N_NEW_EXTRA_BYTES - && field_value < 32768) - || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES); - - return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE) - + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE)); - } else { - ut_ad(field_value < UNIV_PAGE_SIZE); - - return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE) - + field_value); - } -} - -/******************************************************//** -The following function is used to get the pointer of the next chained record -on the same page. -@return pointer to the next chained record, or NULL if none */ -UNIV_INLINE -rec_t* -rec_get_next_ptr( -/*=============*/ - rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ -{ - return(const_cast<rec_t*>(rec_get_next_ptr_const(rec, comp))); -} - -/******************************************************//** -The following function is used to get the offset of the next chained record -on the same page. -@return the page offset of the next chained record, or 0 if none */ -UNIV_INLINE -ulint -rec_get_next_offs( -/*==============*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ -{ - ulint field_value; -#if REC_NEXT_MASK != 0xFFFFUL -# error "REC_NEXT_MASK != 0xFFFFUL" -#endif -#if REC_NEXT_SHIFT -# error "REC_NEXT_SHIFT != 0" -#endif - - field_value = mach_read_from_2(rec - REC_NEXT); - - if (comp) { -#if UNIV_PAGE_SIZE_MAX <= 32768 - /* Note that for 64 KiB pages, field_value can 'wrap around' - and the debug assertion is not valid */ - - /* In the following assertion, field_value is interpreted - as signed 16-bit integer in 2's complement arithmetics. - If all platforms defined int16_t in the standard headers, - the expression could be written simpler as - (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE - */ - ut_ad((field_value >= 32768 - ? field_value - 65536 - : field_value) - + ut_align_offset(rec, UNIV_PAGE_SIZE) - < UNIV_PAGE_SIZE); -#endif - if (field_value == 0) { - - return(0); - } - - /* There must be at least REC_N_NEW_EXTRA_BYTES + 1 - between each record. */ - ut_ad((field_value > REC_N_NEW_EXTRA_BYTES - && field_value < 32768) - || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES); - - return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE)); - } else { - ut_ad(field_value < UNIV_PAGE_SIZE); - - return(field_value); - } -} - -/******************************************************//** -The following function is used to set the next record offset field -of an old-style record. */ -UNIV_INLINE -void -rec_set_next_offs_old( -/*==================*/ - rec_t* rec, /*!< in: old-style physical record */ - ulint next) /*!< in: offset of the next record */ -{ - ut_ad(rec); - ut_ad(UNIV_PAGE_SIZE > next); -#if REC_NEXT_MASK != 0xFFFFUL -# error "REC_NEXT_MASK != 0xFFFFUL" -#endif -#if REC_NEXT_SHIFT -# error "REC_NEXT_SHIFT != 0" -#endif - - mach_write_to_2(rec - REC_NEXT, next); -} - -/******************************************************//** -The following function is used to set the next record offset field -of a new-style record. */ -UNIV_INLINE -void -rec_set_next_offs_new( -/*==================*/ - rec_t* rec, /*!< in/out: new-style physical record */ - ulint next) /*!< in: offset of the next record */ -{ - ulint field_value; - - ut_ad(rec); - ut_ad(UNIV_PAGE_SIZE > next); - - if (!next) { - field_value = 0; - } else { - /* The following two statements calculate - next - offset_of_rec mod 64Ki, where mod is the modulo - as a non-negative number */ - - field_value = (ulint) - ((lint) next - - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE)); - field_value &= REC_NEXT_MASK; - } - - mach_write_to_2(rec - REC_NEXT, field_value); -} - -/******************************************************//** -The following function is used to get the number of fields -in an old-style record. -@return number of data fields */ -UNIV_INLINE -ulint -rec_get_n_fields_old( -/*=================*/ - const rec_t* rec) /*!< in: physical record */ -{ - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS, - REC_OLD_N_FIELDS_MASK, - REC_OLD_N_FIELDS_SHIFT); - ut_ad(ret <= REC_MAX_N_FIELDS); - ut_ad(ret > 0); - - return(ret); -} - -/******************************************************//** -The following function is used to set the number of fields -in an old-style record. */ -UNIV_INLINE -void -rec_set_n_fields_old( -/*=================*/ - rec_t* rec, /*!< in: physical record */ - ulint n_fields) /*!< in: the number of fields */ -{ - ut_ad(rec); - ut_ad(n_fields <= REC_MAX_N_FIELDS); - ut_ad(n_fields > 0); - - rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS, - REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); -} - -/******************************************************//** -The following function retrieves the status bits of a new-style record. -@return status bits */ -UNIV_INLINE -ulint -rec_get_status( -/*===========*/ - const rec_t* rec) /*!< in: physical record */ -{ - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_1(rec, REC_NEW_STATUS, - REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); - ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0); - - return(ret); -} - -/******************************************************//** -The following function is used to get the number of fields -in a record. -@return number of data fields */ -UNIV_INLINE -ulint -rec_get_n_fields( -/*=============*/ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index) /*!< in: record descriptor */ -{ - ut_ad(rec); - ut_ad(index); - - if (!dict_table_is_comp(index->table)) { - return(rec_get_n_fields_old(rec)); - } - - switch (rec_get_status(rec)) { - case REC_STATUS_ORDINARY: - return(dict_index_get_n_fields(index)); - case REC_STATUS_NODE_PTR: - return(dict_index_get_n_unique_in_tree(index) + 1); - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - return(1); - default: - ut_error; - return(ULINT_UNDEFINED); - } -} - -/******************************************************//** -The following function is used to get the number of records owned by the -previous directory record. -@return number of owned records */ -UNIV_INLINE -ulint -rec_get_n_owned_old( -/*================*/ - const rec_t* rec) /*!< in: old-style physical record */ -{ - return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED, - REC_N_OWNED_MASK, REC_N_OWNED_SHIFT)); -} - -/******************************************************//** -The following function is used to set the number of owned records. */ -UNIV_INLINE -void -rec_set_n_owned_old( -/*================*/ - rec_t* rec, /*!< in: old-style physical record */ - ulint n_owned) /*!< in: the number of owned */ -{ - rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED, - REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); -} - -/******************************************************//** -The following function is used to get the number of records owned by the -previous directory record. -@return number of owned records */ -UNIV_INLINE -ulint -rec_get_n_owned_new( -/*================*/ - const rec_t* rec) /*!< in: new-style physical record */ -{ - return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED, - REC_N_OWNED_MASK, REC_N_OWNED_SHIFT)); -} - -/******************************************************//** -The following function is used to set the number of owned records. */ -UNIV_INLINE -void -rec_set_n_owned_new( -/*================*/ - rec_t* rec, /*!< in/out: new-style physical record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint n_owned)/*!< in: the number of owned */ -{ - rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED, - REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); - if (page_zip && rec_get_status(rec) != REC_STATUS_SUPREMUM) { - page_zip_rec_set_owned(page_zip, rec, n_owned); - } -} - -/******************************************************//** -The following function is used to retrieve the info bits of a record. -@return info bits */ -UNIV_INLINE -ulint -rec_get_info_bits( -/*==============*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ -{ - return(rec_get_bit_field_1( - rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, - REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT)); -} - -/******************************************************//** -The following function is used to set the info bits of a record. */ -UNIV_INLINE -void -rec_set_info_bits_old( -/*==================*/ - rec_t* rec, /*!< in: old-style physical record */ - ulint bits) /*!< in: info bits */ -{ - rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS, - REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); -} -/******************************************************//** -The following function is used to set the info bits of a record. */ -UNIV_INLINE -void -rec_set_info_bits_new( -/*==================*/ - rec_t* rec, /*!< in/out: new-style physical record */ - ulint bits) /*!< in: info bits */ -{ - rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS, - REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); -} - -/******************************************************//** -The following function is used to set the status bits of a new-style record. */ -UNIV_INLINE -void -rec_set_status( -/*===========*/ - rec_t* rec, /*!< in/out: physical record */ - ulint bits) /*!< in: info bits */ -{ - rec_set_bit_field_1(rec, bits, REC_NEW_STATUS, - REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); -} - -/******************************************************//** -The following function is used to retrieve the info and status -bits of a record. (Only compact records have status bits.) -@return info bits */ -UNIV_INLINE -ulint -rec_get_info_and_status_bits( -/*=========================*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ -{ - ulint bits; -#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \ -& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) -# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" -#endif - if (comp) { - bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec); - } else { - bits = rec_get_info_bits(rec, FALSE); - ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT))); - } - return(bits); -} -/******************************************************//** -The following function is used to set the info and status -bits of a record. (Only compact records have status bits.) */ -UNIV_INLINE -void -rec_set_info_and_status_bits( -/*=========================*/ - rec_t* rec, /*!< in/out: physical record */ - ulint bits) /*!< in: info bits */ -{ -#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \ -& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) -# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" -#endif - rec_set_status(rec, bits & REC_NEW_STATUS_MASK); - rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK); -} - -/******************************************************//** -The following function tells if record is delete marked. -@return nonzero if delete marked */ -UNIV_INLINE -ulint -rec_get_deleted_flag( -/*=================*/ - const rec_t* rec, /*!< in: physical record */ - ulint comp) /*!< in: nonzero=compact page format */ -{ - if (comp) { - return(rec_get_bit_field_1(rec, REC_NEW_INFO_BITS, - REC_INFO_DELETED_FLAG, - REC_INFO_BITS_SHIFT)); - } else { - return(rec_get_bit_field_1(rec, REC_OLD_INFO_BITS, - REC_INFO_DELETED_FLAG, - REC_INFO_BITS_SHIFT)); - } -} - -/******************************************************//** -The following function is used to set the deleted bit. */ -UNIV_INLINE -void -rec_set_deleted_flag_old( -/*=====================*/ - rec_t* rec, /*!< in: old-style physical record */ - ulint flag) /*!< in: nonzero if delete marked */ -{ - ulint val; - - val = rec_get_info_bits(rec, FALSE); - - if (flag) { - val |= REC_INFO_DELETED_FLAG; - } else { - val &= ~REC_INFO_DELETED_FLAG; - } - - rec_set_info_bits_old(rec, val); -} - -/******************************************************//** -The following function is used to set the deleted bit. */ -UNIV_INLINE -void -rec_set_deleted_flag_new( -/*=====================*/ - rec_t* rec, /*!< in/out: new-style physical record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint flag) /*!< in: nonzero if delete marked */ -{ - ulint val; - - val = rec_get_info_bits(rec, TRUE); - - if (flag) { - val |= REC_INFO_DELETED_FLAG; - } else { - val &= ~REC_INFO_DELETED_FLAG; - } - - rec_set_info_bits_new(rec, val); - - if (page_zip) { - page_zip_rec_set_deleted(page_zip, rec, flag); - } -} - -/******************************************************//** -The following function tells if a new-style record is a node pointer. -@return TRUE if node pointer */ -UNIV_INLINE -ibool -rec_get_node_ptr_flag( -/*==================*/ - const rec_t* rec) /*!< in: physical record */ -{ - return(REC_STATUS_NODE_PTR == rec_get_status(rec)); -} - -/******************************************************//** -The following function is used to get the order number -of an old-style record in the heap of the index page. -@return heap order number */ -UNIV_INLINE -ulint -rec_get_heap_no_old( -/*================*/ - const rec_t* rec) /*!< in: physical record */ -{ - return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO, - REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT)); -} - -/******************************************************//** -The following function is used to set the heap number -field in an old-style record. */ -UNIV_INLINE -void -rec_set_heap_no_old( -/*================*/ - rec_t* rec, /*!< in: physical record */ - ulint heap_no)/*!< in: the heap number */ -{ - rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO, - REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); -} - -/******************************************************//** -The following function is used to get the order number -of a new-style record in the heap of the index page. -@return heap order number */ -UNIV_INLINE -ulint -rec_get_heap_no_new( -/*================*/ - const rec_t* rec) /*!< in: physical record */ -{ - return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO, - REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT)); -} - -/******************************************************//** -The following function is used to set the heap number -field in a new-style record. */ -UNIV_INLINE -void -rec_set_heap_no_new( -/*================*/ - rec_t* rec, /*!< in/out: physical record */ - ulint heap_no)/*!< in: the heap number */ -{ - rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO, - REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); -} - -/******************************************************//** -The following function is used to test whether the data offsets in the record -are stored in one-byte or two-byte format. -@return TRUE if 1-byte form */ -UNIV_INLINE -ibool -rec_get_1byte_offs_flag( -/*====================*/ - const rec_t* rec) /*!< in: physical record */ -{ -#if TRUE != 1 -#error "TRUE != 1" -#endif - - return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK, - REC_OLD_SHORT_SHIFT)); -} - -/******************************************************//** -The following function is used to set the 1-byte offsets flag. */ -UNIV_INLINE -void -rec_set_1byte_offs_flag( -/*====================*/ - rec_t* rec, /*!< in: physical record */ - ibool flag) /*!< in: TRUE if 1byte form */ -{ -#if TRUE != 1 -#error "TRUE != 1" -#endif - ut_ad(flag <= TRUE); - - rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK, - REC_OLD_SHORT_SHIFT); -} - -/******************************************************//** -Returns the offset of nth field end if the record is stored in the 1-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. -@return offset of the start of the field, SQL null flag ORed */ -UNIV_INLINE -ulint -rec_1_get_field_end_info( -/*=====================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ -{ - ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields_old(rec)); - - return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1))); -} - -/******************************************************//** -Returns the offset of nth field end if the record is stored in the 2-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. -@return offset of the start of the field, SQL null flag and extern -storage flag ORed */ -UNIV_INLINE -ulint -rec_2_get_field_end_info( -/*=====================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ -{ - ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields_old(rec)); - - return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2))); -} - -/******************************************************//** -Returns nonzero if the field is stored off-page. -@retval 0 if the field is stored in-page -@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */ -UNIV_INLINE -ulint -rec_2_is_field_extern( -/*==================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ -{ - return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK); -} - -/* Get the base address of offsets. The extra_size is stored at -this position, and following positions hold the end offsets of -the fields. */ -#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE) - -/**********************************************************//** -The following function returns the number of allocated elements -for an array of offsets. -@return number of elements */ -UNIV_INLINE -ulint -rec_offs_get_n_alloc( -/*=================*/ - const ulint* offsets)/*!< in: array for rec_get_offsets() */ -{ - ulint n_alloc; - ut_ad(offsets); - n_alloc = offsets[0]; - ut_ad(n_alloc > REC_OFFS_HEADER_SIZE); - UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets); - return(n_alloc); -} - -/**********************************************************//** -The following function sets the number of allocated elements -for an array of offsets. */ -UNIV_INLINE -void -rec_offs_set_n_alloc( -/*=================*/ - ulint* offsets, /*!< out: array for rec_get_offsets(), - must be allocated */ - ulint n_alloc) /*!< in: number of elements */ -{ - ut_ad(offsets); - ut_ad(n_alloc > REC_OFFS_HEADER_SIZE); - UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets); - offsets[0] = n_alloc; -} - -/**********************************************************//** -The following function returns the number of fields in a record. -@return number of fields */ -UNIV_INLINE -ulint -rec_offs_n_fields( -/*==============*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint n_fields; - ut_ad(offsets); - n_fields = offsets[1]; - ut_ad(n_fields > 0); - ut_ad(n_fields <= REC_MAX_N_FIELDS); - ut_ad(n_fields + REC_OFFS_HEADER_SIZE - <= rec_offs_get_n_alloc(offsets)); - return(n_fields); -} - -/************************************************************//** -Validates offsets returned by rec_get_offsets(). -@return TRUE if valid */ -UNIV_INLINE -ibool -rec_offs_validate( -/*==============*/ - const rec_t* rec, /*!< in: record or NULL */ - const dict_index_t* index, /*!< in: record descriptor or NULL */ - const ulint* offsets)/*!< in: array returned by - rec_get_offsets() */ -{ - ulint i = rec_offs_n_fields(offsets); - ulint last = ULINT_MAX; - ulint comp = *rec_offs_base(offsets) & REC_OFFS_COMPACT; - - if (rec) { - ut_ad((ulint) rec == offsets[2]); - if (!comp) { - ut_a(rec_get_n_fields_old(rec) >= i); - } - } - if (index) { - ulint max_n_fields; - ut_ad((ulint) index == offsets[3]); - max_n_fields = ut_max( - dict_index_get_n_fields(index), - dict_index_get_n_unique_in_tree(index) + 1); - if (comp && rec) { - switch (rec_get_status(rec)) { - case REC_STATUS_ORDINARY: - break; - case REC_STATUS_NODE_PTR: - max_n_fields = dict_index_get_n_unique_in_tree( - index) + 1; - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - max_n_fields = 1; - break; - default: - ut_error; - } - } - /* index->n_def == 0 for dummy indexes if !comp */ - ut_a(!comp || index->n_def); - ut_a(!index->n_def || i <= max_n_fields); - } - while (i--) { - ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK; - ut_a(curr <= last); - last = curr; - } - return(TRUE); -} -#ifdef UNIV_DEBUG -/************************************************************//** -Updates debug data in offsets, in order to avoid bogus -rec_offs_validate() failures. */ -UNIV_INLINE -void -rec_offs_make_valid( -/*================*/ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets)/*!< in: array returned by - rec_get_offsets() */ -{ - ut_ad(rec); - ut_ad(index); - ut_ad(offsets); - ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets)); - offsets[2] = (ulint) rec; - offsets[3] = (ulint) index; -} -#endif /* UNIV_DEBUG */ - -/************************************************************//** -The following function is used to get an offset to the nth -data field in a record. -@return offset from the origin of rec */ -UNIV_INLINE -ulint -rec_get_nth_field_offs( -/*===================*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n, /*!< in: index of the field */ - ulint* len) /*!< out: length of the field; UNIV_SQL_NULL - if SQL null */ -{ - ulint offs; - ulint length; - ut_ad(n < rec_offs_n_fields(offsets)); - ut_ad(len); - - if (n == 0) { - offs = 0; - } else { - offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK; - } - - length = rec_offs_base(offsets)[1 + n]; - - if (length & REC_OFFS_SQL_NULL) { - length = UNIV_SQL_NULL; - } else { - length &= REC_OFFS_MASK; - length -= offs; - } - - *len = length; - return(offs); -} - -/******************************************************//** -Determine if the offsets are for a record in the new -compact format. -@return nonzero if compact format */ -UNIV_INLINE -ulint -rec_offs_comp( -/*==========*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - return(*rec_offs_base(offsets) & REC_OFFS_COMPACT); -} - -/******************************************************//** -Determine if the offsets are for a record containing -externally stored columns. -@return nonzero if externally stored */ -UNIV_INLINE -ulint -rec_offs_any_extern( -/*================*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL); -} - -/******************************************************//** -Determine if the offsets are for a record containing null BLOB pointers. -@return first field containing a null BLOB pointer, or NULL if none found */ -UNIV_INLINE -const byte* -rec_offs_any_null_extern( -/*=====================*/ - const rec_t* rec, /*!< in: record */ - const ulint* offsets) /*!< in: rec_get_offsets(rec) */ -{ - ulint i; - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - if (!rec_offs_any_extern(offsets)) { - return(NULL); - } - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - if (rec_offs_nth_extern(offsets, i)) { - ulint len; - const byte* field - = rec_get_nth_field(rec, offsets, i, &len); - - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - if (!memcmp(field + len - - BTR_EXTERN_FIELD_REF_SIZE, - field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE)) { - return(field); - } - } - } - - return(NULL); -} - -/******************************************************//** -Returns nonzero if the extern bit is set in nth field of rec. -@return nonzero if externally stored */ -UNIV_INLINE -ulint -rec_offs_nth_extern( -/*================*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n) /*!< in: nth field */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - ut_ad(n < rec_offs_n_fields(offsets)); - return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL); -} - -/******************************************************//** -Returns nonzero if the SQL NULL bit is set in nth field of rec. -@return nonzero if SQL NULL */ -UNIV_INLINE -ulint -rec_offs_nth_sql_null( -/*==================*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n) /*!< in: nth field */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - ut_ad(n < rec_offs_n_fields(offsets)); - return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL); -} - -/******************************************************//** -Gets the physical size of a field. -@return length of field */ -UNIV_INLINE -ulint -rec_offs_nth_size( -/*==============*/ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n) /*!< in: nth field */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - ut_ad(n < rec_offs_n_fields(offsets)); - if (!n) { - return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK); - } - return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n]) - & REC_OFFS_MASK); -} - -/******************************************************//** -Returns the number of extern bits set in a record. -@return number of externally stored fields */ -UNIV_INLINE -ulint -rec_offs_n_extern( -/*==============*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint n = 0; - - if (rec_offs_any_extern(offsets)) { - ulint i; - - for (i = rec_offs_n_fields(offsets); i--; ) { - if (rec_offs_nth_extern(offsets, i)) { - n++; - } - } - } - - return(n); -} - -/******************************************************//** -Returns the offset of n - 1th field end if the record is stored in the 1-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. This function and the 2-byte counterpart are defined here because the -C-compiler was not able to sum negative and positive constant offsets, and -warned of constant arithmetic overflow within the compiler. -@return offset of the start of the PREVIOUS field, SQL null flag ORed */ -UNIV_INLINE -ulint -rec_1_get_prev_field_end_info( -/*==========================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ -{ - ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields_old(rec)); - - return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n))); -} - -/******************************************************//** -Returns the offset of n - 1th field end if the record is stored in the 2-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. -@return offset of the start of the PREVIOUS field, SQL null flag ORed */ -UNIV_INLINE -ulint -rec_2_get_prev_field_end_info( -/*==========================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ -{ - ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields_old(rec)); - - return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n))); -} - -/******************************************************//** -Sets the field end info for the nth field if the record is stored in the -1-byte format. */ -UNIV_INLINE -void -rec_1_set_field_end_info( -/*=====================*/ - rec_t* rec, /*!< in: record */ - ulint n, /*!< in: field index */ - ulint info) /*!< in: value to set */ -{ - ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields_old(rec)); - - mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info); -} - -/******************************************************//** -Sets the field end info for the nth field if the record is stored in the -2-byte format. */ -UNIV_INLINE -void -rec_2_set_field_end_info( -/*=====================*/ - rec_t* rec, /*!< in: record */ - ulint n, /*!< in: field index */ - ulint info) /*!< in: value to set */ -{ - ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields_old(rec)); - - mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info); -} - -/******************************************************//** -Returns the offset of nth field start if the record is stored in the 1-byte -offsets form. -@return offset of the start of the field */ -UNIV_INLINE -ulint -rec_1_get_field_start_offs( -/*=======================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ -{ - ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields_old(rec)); - - if (n == 0) { - - return(0); - } - - return(rec_1_get_prev_field_end_info(rec, n) - & ~REC_1BYTE_SQL_NULL_MASK); -} - -/******************************************************//** -Returns the offset of nth field start if the record is stored in the 2-byte -offsets form. -@return offset of the start of the field */ -UNIV_INLINE -ulint -rec_2_get_field_start_offs( -/*=======================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ -{ - ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields_old(rec)); - - if (n == 0) { - - return(0); - } - - return(rec_2_get_prev_field_end_info(rec, n) - & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK)); -} - -/******************************************************//** -The following function is used to read the offset of the start of a data field -in the record. The start of an SQL null field is the end offset of the -previous non-null field, or 0, if none exists. If n is the number of the last -field + 1, then the end offset of the last field is returned. -@return offset of the start of the field */ -UNIV_INLINE -ulint -rec_get_field_start_offs( -/*=====================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: field index */ -{ - ut_ad(rec); - ut_ad(n <= rec_get_n_fields_old(rec)); - - if (n == 0) { - - return(0); - } - - if (rec_get_1byte_offs_flag(rec)) { - - return(rec_1_get_field_start_offs(rec, n)); - } - - return(rec_2_get_field_start_offs(rec, n)); -} - -/************************************************************//** -Gets the physical size of an old-style field. -Also an SQL null may have a field of size > 0, -if the data type is of a fixed size. -@return field size in bytes */ -UNIV_INLINE -ulint -rec_get_nth_field_size( -/*===================*/ - const rec_t* rec, /*!< in: record */ - ulint n) /*!< in: index of the field */ -{ - ulint os; - ulint next_os; - - os = rec_get_field_start_offs(rec, n); - next_os = rec_get_field_start_offs(rec, n + 1); - - ut_ad(next_os - os < UNIV_PAGE_SIZE); - - return(next_os - os); -} - -/***********************************************************//** -This is used to modify the value of an already existing field in a record. -The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null. -For records in ROW_FORMAT=COMPACT (new-style records), len must not be -UNIV_SQL_NULL unless the field already is SQL null. */ -UNIV_INLINE -void -rec_set_nth_field( -/*==============*/ - rec_t* rec, /*!< in: record */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n, /*!< in: index number of the field */ - const void* data, /*!< in: pointer to the data - if not SQL null */ - ulint len) /*!< in: length of the data or UNIV_SQL_NULL */ -{ - byte* data2; - ulint len2; - - ut_ad(rec); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - if (len == UNIV_SQL_NULL) { - if (!rec_offs_nth_sql_null(offsets, n)) { - ut_a(!rec_offs_comp(offsets)); - rec_set_nth_field_sql_null(rec, n); - } - - return; - } - - data2 = rec_get_nth_field(rec, offsets, n, &len2); - if (len2 == UNIV_SQL_NULL) { - ut_ad(!rec_offs_comp(offsets)); - rec_set_nth_field_null_bit(rec, n, FALSE); - ut_ad(len == rec_get_nth_field_size(rec, n)); - } else { - ut_ad(len2 == len); - } - - ut_memcpy(data2, data, len); -} - -/**********************************************************//** -The following function returns the data size of an old-style physical -record, that is the sum of field lengths. SQL null fields -are counted as length 0 fields. The value returned by the function -is the distance from record origin to record end in bytes. -@return size */ -UNIV_INLINE -ulint -rec_get_data_size_old( -/*==================*/ - const rec_t* rec) /*!< in: physical record */ -{ - ut_ad(rec); - - return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec))); -} - -/**********************************************************//** -The following function sets the number of fields in offsets. */ -UNIV_INLINE -void -rec_offs_set_n_fields( -/*==================*/ - ulint* offsets, /*!< in/out: array returned by - rec_get_offsets() */ - ulint n_fields) /*!< in: number of fields */ -{ - ut_ad(offsets); - ut_ad(n_fields > 0); - ut_ad(n_fields <= REC_MAX_N_FIELDS); - ut_ad(n_fields + REC_OFFS_HEADER_SIZE - <= rec_offs_get_n_alloc(offsets)); - offsets[1] = n_fields; -} - -/**********************************************************//** -The following function returns the data size of a physical -record, that is the sum of field lengths. SQL null fields -are counted as length 0 fields. The value returned by the function -is the distance from record origin to record end in bytes. -@return size */ -UNIV_INLINE -ulint -rec_offs_data_size( -/*===============*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint size; - - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)] - & REC_OFFS_MASK; - ut_ad(size < UNIV_PAGE_SIZE); - return(size); -} - -/**********************************************************//** -Returns the total size of record minus data size of record. The value -returned by the function is the distance from record start to record origin -in bytes. -@return size */ -UNIV_INLINE -ulint -rec_offs_extra_size( -/*================*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint size; - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL); - ut_ad(size < UNIV_PAGE_SIZE); - return(size); -} - -/**********************************************************//** -Returns the total size of a physical record. -@return size */ -UNIV_INLINE -ulint -rec_offs_size( -/*==========*/ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets)); -} - -#ifdef UNIV_DEBUG -/**********************************************************//** -Returns a pointer to the end of the record. -@return pointer to end */ -UNIV_INLINE -byte* -rec_get_end( -/*========*/ - const rec_t* rec, /*!< in: pointer to record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ut_ad(rec_offs_validate(rec, NULL, offsets)); - return(const_cast<rec_t*>(rec + rec_offs_data_size(offsets))); -} - -/**********************************************************//** -Returns a pointer to the start of the record. -@return pointer to start */ -UNIV_INLINE -byte* -rec_get_start( -/*==========*/ - const rec_t* rec, /*!< in: pointer to record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ut_ad(rec_offs_validate(rec, NULL, offsets)); - return(const_cast<rec_t*>(rec - rec_offs_extra_size(offsets))); -} -#endif /* UNIV_DEBUG */ - -/***************************************************************//** -Copies a physical record to a buffer. -@return pointer to the origin of the copy */ -UNIV_INLINE -rec_t* -rec_copy( -/*=====*/ - void* buf, /*!< in: buffer */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint extra_len; - ulint data_len; - - ut_ad(rec != NULL); - ut_ad(buf != NULL); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(rec_validate(rec, offsets)); - - extra_len = rec_offs_extra_size(offsets); - data_len = rec_offs_data_size(offsets); - - ut_memcpy(buf, rec - extra_len, extra_len + data_len); - - return((byte*) buf + extra_len); -} - -/**********************************************************//** -Returns the extra size of an old-style physical record if we know its -data size and number of fields. -@return extra size */ -UNIV_INLINE -ulint -rec_get_converted_extra_size( -/*=========================*/ - ulint data_size, /*!< in: data size */ - ulint n_fields, /*!< in: number of fields */ - ulint n_ext) /*!< in: number of externally stored columns */ -{ - if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) { - - return(REC_N_OLD_EXTRA_BYTES + n_fields); - } - - return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields); -} - -/**********************************************************//** -The following function returns the size of a data tuple when converted to -a physical record. -@return size */ -UNIV_INLINE -ulint -rec_get_converted_size( -/*===================*/ - dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* dtuple, /*!< in: data tuple */ - ulint n_ext) /*!< in: number of externally stored columns */ -{ - ulint data_size; - ulint extra_size; - - ut_ad(index); - ut_ad(dtuple); - ut_ad(dtuple_check_typed(dtuple)); - - ut_ad(dict_index_is_univ(index) - || dtuple_get_n_fields(dtuple) - == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) - == REC_STATUS_NODE_PTR) - ? dict_index_get_n_unique_in_tree(index) + 1 - : dict_index_get_n_fields(index))); - - if (dict_table_is_comp(index->table)) { - return(rec_get_converted_size_comp(index, - dtuple_get_info_bits(dtuple) - & REC_NEW_STATUS_MASK, - dtuple->fields, - dtuple->n_fields, NULL)); - } - - data_size = dtuple_get_data_size(dtuple, 0); - - extra_size = rec_get_converted_extra_size( - data_size, dtuple_get_n_fields(dtuple), n_ext); - -#if 0 - /* This code is inactive since it may be the wrong place to add - in the size of node pointers used in parent pages AND it is not - currently needed since ha_innobase::max_supported_key_length() - ensures that the key size limit for each page size is well below - the actual limit ((free space on page / 4) - record overhead). - But those limits will need to be raised when InnoDB can - support multiple page sizes. At that time, we will need - to consider the node pointer on these universal btrees. */ - - if (dict_index_is_univ(index)) { - /* This is for the insert buffer B-tree. - All fields in the leaf tuple ascend to the - parent node plus the child page pointer. */ - - /* ibuf cannot contain externally stored fields */ - ut_ad(n_ext == 0); - - /* Add the data pointer and recompute extra_size - based on one more field. */ - data_size += REC_NODE_PTR_SIZE; - extra_size = rec_get_converted_extra_size( - data_size, - dtuple_get_n_fields(dtuple) + 1, - 0); - - /* Be sure dtuple->n_fields has this node ptr - accounted for. This function should correspond to - what rec_convert_dtuple_to_rec() needs in storage. - In optimistic insert or update-not-in-place, we will - have to ensure that if the record is converted to a - node pointer, it will not become too large.*/ - } -#endif - - return(data_size + extra_size); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Folds a prefix of a physical record to a ulint. Folds only existing fields, -that is, checks that we do not run out of the record. -@return the folded value */ -UNIV_INLINE -ulint -rec_fold( -/*=====*/ - const rec_t* rec, /*!< in: the physical record */ - const ulint* offsets, /*!< in: array returned by - rec_get_offsets() */ - ulint n_fields, /*!< in: number of complete - fields to fold */ - ulint n_bytes, /*!< in: number of bytes to fold - in an incomplete last field */ - index_id_t tree_id) /*!< in: index tree id */ -{ - ulint i; - const byte* data; - ulint len; - ulint fold; - ulint n_fields_rec; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(rec_validate(rec, offsets)); - ut_ad(n_fields + n_bytes > 0); - - n_fields_rec = rec_offs_n_fields(offsets); - ut_ad(n_fields <= n_fields_rec); - ut_ad(n_fields < n_fields_rec || n_bytes == 0); - - if (n_fields > n_fields_rec) { - n_fields = n_fields_rec; - } - - if (n_fields == n_fields_rec) { - n_bytes = 0; - } - - fold = ut_fold_ull(tree_id); - - for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, offsets, i, &len); - - if (len != UNIV_SQL_NULL) { - fold = ut_fold_ulint_pair(fold, - ut_fold_binary(data, len)); - } - } - - if (n_bytes > 0) { - data = rec_get_nth_field(rec, offsets, i, &len); - - if (len != UNIV_SQL_NULL) { - if (len > n_bytes) { - len = n_bytes; - } - - fold = ut_fold_ulint_pair(fold, - ut_fold_binary(data, len)); - } - } - - return(fold); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h deleted file mode 100644 index 5da96066f88..00000000000 --- a/storage/xtradb/include/rem0types.h +++ /dev/null @@ -1,77 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/rem0types.h -Record manager global types - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#ifndef rem0types_h -#define rem0types_h - -/* We define the physical record simply as an array of bytes */ -typedef byte rec_t; - -/* Maximum values for various fields (for non-blob tuples) */ -#define REC_MAX_N_FIELDS (1024 - 1) -#define REC_MAX_HEAP_NO (2 * 8192 - 1) -#define REC_MAX_N_OWNED (16 - 1) - -/* Maximum number of user defined fields/columns. The reserved columns -are the ones InnoDB adds internally: DB_ROW_ID, DB_TRX_ID, DB_ROLL_PTR. -We need "* 2" because mlog_parse_index() creates a dummy table object -possibly, with some of the system columns in it, and then adds the 3 -system columns (again) using dict_table_add_system_columns(). The problem -is that mlog_parse_index() cannot recognize the system columns by -just having n_fields, n_uniq and the lengths of the columns. */ -#define REC_MAX_N_USER_FIELDS (REC_MAX_N_FIELDS - DATA_N_SYS_COLS * 2) - -/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum -indexed field length (or indexed prefix length) for indexes on tables of -ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format. -Before we support UTF-8 encodings with mbmaxlen = 4, a UTF-8 character -may take at most 3 bytes. So the limit was set to 3*256, so that one -can create a column prefix index on 256 characters of a TEXT or VARCHAR -column also in the UTF-8 charset. -This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data -files would be at risk! */ -#define REC_ANTELOPE_MAX_INDEX_COL_LEN 768 - -/** Maximum indexed field length for table format UNIV_FORMAT_B and -beyond. -This (3072) is the maximum index row length allowed, so we cannot create index -prefix column longer than that. */ -#define REC_VERSION_56_MAX_INDEX_COL_LEN 3072 - -/** Innodb row types are a subset of the MySQL global enum row_type. -They are made into their own enum so that switch statements can account -for each of them. */ -enum rec_format_enum { - REC_FORMAT_REDUNDANT = 0, /*!< REDUNDANT row format */ - REC_FORMAT_COMPACT = 1, /*!< COMPACT row format */ - REC_FORMAT_COMPRESSED = 2, /*!< COMPRESSED row format */ - REC_FORMAT_DYNAMIC = 3 /*!< DYNAMIC row format */ -}; -typedef enum rec_format_enum rec_format_t; - -/** Compressed field header size in bytes */ -#define ZIP_COLUMN_HEADER_LENGTH 2 - -#endif diff --git a/storage/xtradb/include/row0ext.h b/storage/xtradb/include/row0ext.h deleted file mode 100644 index a098e2f9b29..00000000000 --- a/storage/xtradb/include/row0ext.h +++ /dev/null @@ -1,102 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0ext.h -Caching of externally stored column prefixes - -Created September 2006 Marko Makela -*******************************************************/ - -#ifndef row0ext_h -#define row0ext_h - -#include "univ.i" -#include "row0types.h" -#include "data0types.h" -#include "mem0mem.h" -#include "dict0types.h" - -/********************************************************************//** -Creates a cache of column prefixes of externally stored columns. -@return own: column prefix cache */ -UNIV_INTERN -row_ext_t* -row_ext_create( -/*===========*/ - ulint n_ext, /*!< in: number of externally stored columns */ - const ulint* ext, /*!< in: col_no's of externally stored columns - in the InnoDB table object, as reported by - dict_col_get_no(); NOT relative to the records - in the clustered index */ - ulint flags, /*!< in: table->flags */ - const dtuple_t* tuple, /*!< in: data tuple containing the field - references of the externally stored - columns; must be indexed by col_no; - the clustered index record must be - covered by a lock or a page latch - to prevent deletion (rollback or purge). */ - mem_heap_t* heap); /*!< in: heap where created */ - -/********************************************************************//** -Looks up a column prefix of an externally stored column. -@return column prefix, or NULL if the column is not stored externally, -or pointer to field_ref_zero if the BLOB pointer is unset */ -UNIV_INLINE -const byte* -row_ext_lookup_ith( -/*===============*/ - const row_ext_t* ext, /*!< in/out: column prefix cache */ - ulint i, /*!< in: index of ext->ext[] */ - ulint* len); /*!< out: length of prefix, in bytes, - at most the length determined by - DICT_MAX_FIELD_LEN_BY_FORMAT() */ -/********************************************************************//** -Looks up a column prefix of an externally stored column. -@return column prefix, or NULL if the column is not stored externally, -or pointer to field_ref_zero if the BLOB pointer is unset */ -UNIV_INLINE -const byte* -row_ext_lookup( -/*===========*/ - const row_ext_t* ext, /*!< in: column prefix cache */ - ulint col, /*!< in: column number in the InnoDB - table object, as reported by - dict_col_get_no(); NOT relative to the - records in the clustered index */ - ulint* len); /*!< out: length of prefix, in bytes, - at most the length determined by - DICT_MAX_FIELD_LEN_BY_FORMAT() */ - -/** Prefixes of externally stored columns */ -struct row_ext_t{ - ulint n_ext; /*!< number of externally stored columns */ - const ulint* ext; /*!< col_no's of externally stored columns */ - byte* buf; /*!< backing store of the column prefix cache */ - ulint max_len;/*!< maximum prefix length, it could be - REC_ANTELOPE_MAX_INDEX_COL_LEN or - REC_VERSION_56_MAX_INDEX_COL_LEN depending - on row format */ - ulint len[1]; /*!< prefix lengths; 0 if not cached */ -}; - -#ifndef UNIV_NONINL -#include "row0ext.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0ext.ic b/storage/xtradb/include/row0ext.ic deleted file mode 100644 index 39e150d91d5..00000000000 --- a/storage/xtradb/include/row0ext.ic +++ /dev/null @@ -1,87 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0ext.ic -Caching of externally stored column prefixes - -Created September 2006 Marko Makela -*******************************************************/ - -#include "rem0types.h" -#include "btr0types.h" - -/********************************************************************//** -Looks up a column prefix of an externally stored column. -@return column prefix, or NULL if the column is not stored externally, -or pointer to field_ref_zero if the BLOB pointer is unset */ -UNIV_INLINE -const byte* -row_ext_lookup_ith( -/*===============*/ - const row_ext_t* ext, /*!< in/out: column prefix cache */ - ulint i, /*!< in: index of ext->ext[] */ - ulint* len) /*!< out: length of prefix, in bytes, - at most ext->max_len */ -{ - ut_ad(ext); - ut_ad(len); - ut_ad(i < ext->n_ext); - - *len = ext->len[i]; - - ut_ad(*len <= ext->max_len); - ut_ad(ext->max_len > 0); - - if (*len == 0) { - /* The BLOB could not be fetched to the cache. */ - return(field_ref_zero); - } else { - return(ext->buf + i * ext->max_len); - } -} - -/********************************************************************//** -Looks up a column prefix of an externally stored column. -@return column prefix, or NULL if the column is not stored externally, -or pointer to field_ref_zero if the BLOB pointer is unset */ -UNIV_INLINE -const byte* -row_ext_lookup( -/*===========*/ - const row_ext_t* ext, /*!< in: column prefix cache */ - ulint col, /*!< in: column number in the InnoDB - table object, as reported by - dict_col_get_no(); NOT relative to the - records in the clustered index */ - ulint* len) /*!< out: length of prefix, in bytes, - at most ext->max_len */ -{ - ulint i; - - ut_ad(ext); - ut_ad(len); - - for (i = 0; i < ext->n_ext; i++) { - if (col == ext->ext[i]) { - return(row_ext_lookup_ith(ext, i, len)); - } - } - - return(NULL); -} diff --git a/storage/xtradb/include/row0ftsort.h b/storage/xtradb/include/row0ftsort.h deleted file mode 100644 index 7c9ed23645c..00000000000 --- a/storage/xtradb/include/row0ftsort.h +++ /dev/null @@ -1,285 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0ftsort.h -Create Full Text Index with (parallel) merge sort - -Created 10/13/2010 Jimmy Yang -*******************************************************/ - -#ifndef row0ftsort_h -#define row0ftsort_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "row0mysql.h" -#include "fts0fts.h" -#include "fts0types.h" -#include "fts0priv.h" -#include "row0merge.h" - -/** This structure defineds information the scan thread will fetch -and put to the linked list for parallel tokenization/sort threads -to process */ -typedef struct fts_doc_item fts_doc_item_t; - -/** Information about temporary files used in merge sort */ -struct fts_doc_item { - dfield_t* field; /*!< field contains document string */ - doc_id_t doc_id; /*!< document ID */ - UT_LIST_NODE_T(fts_doc_item_t) doc_list; - /*!< list of doc items */ -}; - -/** This defines the list type that scan thread would feed the parallel -tokenization threads and sort threads. */ -typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t; - -#define FTS_NUM_AUX_INDEX 6 -#define FTS_PLL_MERGE 1 - -/** Sort information passed to each individual parallel sort thread */ -struct fts_psort_t; - -/** Common info passed to each parallel sort thread */ -struct fts_psort_common_t { - row_merge_dup_t* dup; /*!< descriptor of FTS index */ - dict_table_t* new_table; /*!< source table */ - trx_t* trx; /*!< transaction */ - fts_psort_t* all_info; /*!< all parallel sort info */ - os_event_t sort_event; /*!< sort event */ - os_event_t merge_event; /*!< merge event */ - ibool opt_doc_id_size;/*!< whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort, if - Doc ID will not be big enough - to use 8 bytes value */ - fil_space_crypt_t* crypt_data; /*!< crypt data or NULL */ -}; - -struct fts_psort_t { - ulint psort_id; /*!< Parallel sort ID */ - row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX]; - /*!< sort buffer */ - merge_file_t* merge_file[FTS_NUM_AUX_INDEX]; - /*!< sort file */ - row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX]; - /*!< buffer to write to file */ - row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX]; - /*!< buffer to allocated */ - row_merge_block_t* crypt_block[FTS_NUM_AUX_INDEX]; - /*!< buffer to crypt data */ - row_merge_block_t* crypt_alloc[FTS_NUM_AUX_INDEX]; - /*!< buffer to allocated */ - ulint child_status; /*!< child thread status */ - ulint state; /*!< parent thread state */ - fts_doc_list_t fts_doc_list; /*!< doc list to process */ - fts_psort_common_t* psort_common; /*!< ptr to all psort info */ - os_thread_t thread_hdl; /*!< thread handler */ - dberr_t error; /*!< db error during psort */ - ulint memory_used; /*!< memory used by fts_doc_list */ - ib_mutex_t mutex; /*!< mutex for fts_doc_list */ -}; - -/** Structure stores information from string tokenization operation */ -struct fts_tokenize_ctx { - ulint processed_len; /*!< processed string length */ - ulint init_pos; /*!< doc start position */ - ulint buf_used; /*!< the sort buffer (ID) when - tokenization stops, which - could due to sort buffer full */ - ulint rows_added[FTS_NUM_AUX_INDEX]; - /*!< number of rows added for - each FTS index partition */ - ib_rbt_t* cached_stopword;/*!< in: stopword list */ - dfield_t sort_field[FTS_NUM_FIELDS_SORT]; - /*!< in: sort field */ -}; - -typedef struct fts_tokenize_ctx fts_tokenize_ctx_t; - -/** Structure stores information needed for the insertion phase of FTS -parallel sort. */ -struct fts_psort_insert { - trx_t* trx; /*!< Transaction used for insertion */ - que_t** ins_graph; /*!< insert graph */ - fts_table_t fts_table; /*!< auxiliary table */ - CHARSET_INFO* charset; /*!< charset info */ - mem_heap_t* heap; /*!< heap */ - ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes) - integer for Doc ID */ -}; - -typedef struct fts_psort_insert fts_psort_insert_t; - - -/** status bit used for communication between parent and child thread */ -#define FTS_PARENT_COMPLETE 1 -#define FTS_PARENT_EXITING 2 -#define FTS_CHILD_COMPLETE 1 -#define FTS_CHILD_EXITING 2 - -/** Print some debug information */ -#define FTSORT_PRINT - -#ifdef FTSORT_PRINT -#define DEBUG_FTS_SORT_PRINT(str) \ - do { \ - ut_print_timestamp(stderr); \ - fprintf(stderr, str); \ - } while (0) -#else -#define DEBUG_FTS_SORT_PRINT(str) -#endif /* FTSORT_PRINT */ - -/*************************************************************//** -Create a temporary "fts sort index" used to merge sort the -tokenized doc string. The index has three "fields": - -1) Tokenized word, -2) Doc ID -3) Word's position in original 'doc'. - -@return dict_index_t structure for the fts sort index */ -UNIV_INTERN -dict_index_t* -row_merge_create_fts_sort_index( -/*============================*/ - dict_index_t* index, /*!< in: Original FTS index - based on which this sort index - is created */ - const dict_table_t* table, /*!< in: table that FTS index - is being created on */ - ibool* opt_doc_id_size); - /*!< out: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort */ - -/********************************************************************//** -Initialize FTS parallel sort structures. -@return TRUE if all successful */ -UNIV_INTERN -ibool -row_fts_psort_info_init( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - row_merge_dup_t* dup, /*!< in,own: descriptor of - FTS index being created */ - const dict_table_t* new_table,/*!< in: table where indexes are - created */ - ibool opt_doc_id_size, - /*!< in: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort */ - fts_psort_t** psort, /*!< out: parallel sort info to be - instantiated */ - fts_psort_t** merge) /*!< out: parallel merge info - to be instantiated */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Clean up and deallocate FTS parallel sort structures, and close -temparary merge sort files */ -UNIV_INTERN -void -row_fts_psort_info_destroy( -/*=======================*/ - fts_psort_t* psort_info, /*!< parallel sort info */ - fts_psort_t* merge_info); /*!< parallel merge info */ -/********************************************************************//** -Free up merge buffers when merge sort is done */ -UNIV_INTERN -void -row_fts_free_pll_merge_buf( -/*=======================*/ - fts_psort_t* psort_info); /*!< in: parallel sort info */ - -/*********************************************************************//** -Function performs parallel tokenization of the incoming doc strings. -@return OS_THREAD_DUMMY_RETURN */ -UNIV_INTERN -os_thread_ret_t -fts_parallel_tokenization( -/*======================*/ - void* arg); /*!< in: psort_info for the thread */ -/*********************************************************************//** -Start the parallel tokenization and parallel merge sort */ -UNIV_INTERN -void -row_fts_start_psort( -/*================*/ - fts_psort_t* psort_info); /*!< in: parallel sort info */ -/*********************************************************************//** -Function performs the merge and insertion of the sorted records. -@return OS_THREAD_DUMMY_RETURN */ -UNIV_INTERN -os_thread_ret_t -fts_parallel_merge( -/*===============*/ - void* arg); /*!< in: parallel merge info */ -/*********************************************************************//** -Kick off the parallel merge and insert thread */ -UNIV_INTERN -void -row_fts_start_parallel_merge( -/*=========================*/ - fts_psort_t* merge_info); /*!< in: parallel sort info */ -/********************************************************************//** -Read sorted FTS data files and insert data tuples to auxillary tables. -@return DB_SUCCESS or error number */ -UNIV_INTERN -void -row_fts_insert_tuple( -/*=================*/ - fts_psort_insert_t* - ins_ctx, /*!< in: insert context */ - fts_tokenizer_word_t* word, /*!< in: last processed - tokenized word */ - ib_vector_t* positions, /*!< in: word position */ - doc_id_t* in_doc_id, /*!< in: last item doc id */ - dtuple_t* dtuple); /*!< in: entry to insert */ -/********************************************************************//** -Propagate a newly added record up one level in the selection tree -@return parent where this value propagated to */ -UNIV_INTERN -int -row_merge_fts_sel_propagate( -/*========================*/ - int propogated, /*<! in: tree node propagated */ - int* sel_tree, /*<! in: selection tree */ - ulint level, /*<! in: selection tree level */ - const mrec_t** mrec, /*<! in: sort record */ - ulint** offsets, /*<! in: record offsets */ - dict_index_t* index); /*<! in: FTS index */ -/********************************************************************//** -Read sorted file containing index data tuples and insert these data -tuples to the index -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -row_fts_merge_insert( -/*=================*/ - dict_index_t* index, /*!< in: index */ - dict_table_t* table, /*!< in: new table */ - fts_psort_t* psort_info, /*!< parallel sort info */ - ulint id) /* !< in: which auxiliary table's data - to insert to */ - MY_ATTRIBUTE((nonnull)); -#endif /* row0ftsort_h */ diff --git a/storage/xtradb/include/row0import.h b/storage/xtradb/include/row0import.h deleted file mode 100644 index a821c230a3b..00000000000 --- a/storage/xtradb/include/row0import.h +++ /dev/null @@ -1,91 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0import.h -Header file for import tablespace functions. - -Created 2012-02-08 by Sunny Bains -*******************************************************/ - -#ifndef row0import_h -#define row0import_h - -#include "univ.i" -#include "db0err.h" -#include "dict0types.h" - -// Forward declarations -struct trx_t; -struct dict_table_t; -struct row_prebuilt_t; - -/*****************************************************************//** -Imports a tablespace. The space id in the .ibd file must match the space id -of the table in the data dictionary. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_import_for_mysql( -/*=================*/ - dict_table_t* table, /*!< in/out: table */ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct - in MySQL */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*****************************************************************//** -Update the DICT_TF2_DISCARDED flag in SYS_TABLES. -@return DB_SUCCESS or error code. */ -UNIV_INTERN -dberr_t -row_import_update_discarded_flag( -/*=============================*/ - trx_t* trx, /*!< in/out: transaction that - covers the update */ - table_id_t table_id, /*!< in: Table for which we want - to set the root table->flags2 */ - bool discarded, /*!< in: set MIX_LEN column bit - to discarded, if true */ - bool dict_locked) /*!< in: Set to true if the - caller already owns the - dict_sys_t:: mutex. */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*****************************************************************//** -Update the (space, root page) of a table's indexes from the values -in the data dictionary. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_import_update_index_root( -/*=========================*/ - trx_t* trx, /*!< in/out: transaction that - covers the update */ - const dict_table_t* table, /*!< in: Table for which we want - to set the root page_no */ - bool reset, /*!< in: if true then set to - FIL_NUL */ - bool dict_locked) /*!< in: Set to true if the - caller already owns the - dict_sys_t:: mutex. */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#ifndef UNIV_NONINL -#include "row0import.ic" -#endif - -#endif /* row0import_h */ diff --git a/storage/xtradb/include/row0import.ic b/storage/xtradb/include/row0import.ic deleted file mode 100644 index c5bbab49f6f..00000000000 --- a/storage/xtradb/include/row0import.ic +++ /dev/null @@ -1,25 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0import.ic - -Import tablespace inline functions. - -Created 2012-02-08 Sunny Bains -*******************************************************/ diff --git a/storage/xtradb/include/row0ins.h b/storage/xtradb/include/row0ins.h deleted file mode 100644 index 71ee39070ef..00000000000 --- a/storage/xtradb/include/row0ins.h +++ /dev/null @@ -1,240 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0ins.h -Insert into a table - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#ifndef row0ins_h -#define row0ins_h - -#include "univ.i" -#include "data0data.h" -#include "que0types.h" -#include "dict0types.h" -#include "trx0types.h" -#include "row0types.h" - -/***************************************************************//** -Checks if foreign key constraint fails for an index entry. Sets shared locks -which lock either the success or the failure of the constraint. NOTE that -the caller must have a shared latch on dict_foreign_key_check_lock. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or -DB_ROW_IS_REFERENCED */ -UNIV_INTERN -dberr_t -row_ins_check_foreign_constraint( -/*=============================*/ - ibool check_ref,/*!< in: TRUE If we want to check that - the referenced table is ok, FALSE if we - want to check the foreign key table */ - dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the - tables mentioned in it must be in the - dictionary cache if they exist at all */ - dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign - table, else the referenced table */ - dtuple_t* entry, /*!< in: index entry for index */ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Creates an insert node struct. -@return own: insert node struct */ -UNIV_INTERN -ins_node_t* -ins_node_create( -/*============*/ - ulint ins_type, /*!< in: INS_VALUES, ... */ - dict_table_t* table, /*!< in: table where to insert */ - mem_heap_t* heap); /*!< in: mem heap where created */ -/*********************************************************************//** -Sets a new row to insert for an INS_DIRECT node. This function is only used -if we have constructed the row separately, which is a rare case; this -function is quite slow. */ -UNIV_INTERN -void -ins_node_set_new_row( -/*=================*/ - ins_node_t* node, /*!< in: insert node */ - dtuple_t* row); /*!< in: new row (or first row) for the node */ -/***************************************************************//** -Tries to insert an entry into a clustered index, ignoring foreign key -constraints. If a record with the same unique key is found, the other -record is necessarily marked deleted by a committed transaction, or a -unique key violation error occurs. The delete marked record is then -updated to an existing record, and we must write an undo log record on -the delete marked record. -@retval DB_SUCCESS on success -@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG) -@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed -@return error code */ -UNIV_INTERN -dberr_t -row_ins_clust_index_entry_low( -/*==========================*/ - ulint flags, /*!< in: undo logging and locking flags */ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether we wish optimistic or - pessimistic descent down the index tree */ - dict_index_t* index, /*!< in: clustered index */ - ulint n_uniq, /*!< in: 0 or index->n_uniq */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr) /*!< in: query thread or NULL */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***************************************************************//** -Tries to insert an entry into a secondary index. If a record with exactly the -same fields is found, the other record is necessarily marked deleted. -It is then unmarked. Otherwise, the entry is just inserted to the index. -@retval DB_SUCCESS on success -@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG) -@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed -@return error code */ -UNIV_INTERN -dberr_t -row_ins_sec_index_entry_low( -/*========================*/ - ulint flags, /*!< in: undo logging and locking flags */ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether we wish optimistic or - pessimistic descent down the index tree */ - dict_index_t* index, /*!< in: secondary index */ - mem_heap_t* offsets_heap, - /*!< in/out: memory heap that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during - row_log_table_apply(), or 0 */ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***************************************************************//** -Tries to insert the externally stored fields (off-page columns) -of a clustered index entry. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -UNIV_INTERN -dberr_t -row_ins_index_entry_big_rec_func( -/*=============================*/ - const dtuple_t* entry, /*!< in/out: index entry to insert */ - const big_rec_t* big_rec,/*!< in: externally stored fields */ - ulint* offsets,/*!< in/out: rec offsets */ - mem_heap_t** heap, /*!< in/out: memory heap */ - dict_index_t* index, /*!< in: index */ - const char* file, /*!< in: file name of caller */ -#ifndef DBUG_OFF - const void* thd, /*!< in: connection, or NULL */ -#endif /* DBUG_OFF */ - ulint line) /*!< in: line number of caller */ - MY_ATTRIBUTE((nonnull(1,2,3,4,5,6), warn_unused_result)); -#ifdef DBUG_OFF -# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \ - row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,line) -#else /* DBUG_OFF */ -# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \ - row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,thd,line) -#endif /* DBUG_OFF */ -/***************************************************************//** -Inserts an entry into a clustered index. Tries first optimistic, -then pessimistic descent down the tree. If the entry matches enough -to a delete marked record, performs the insert by updating or delete -unmarking the delete marked record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */ -UNIV_INTERN -dberr_t -row_ins_clust_index_entry( -/*======================*/ - dict_index_t* index, /*!< in: clustered index */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - que_thr_t* thr, /*!< in: query thread */ - ulint n_ext) /*!< in: number of externally stored columns */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***************************************************************//** -Inserts an entry into a secondary index. Tries first optimistic, -then pessimistic descent down the tree. If the entry matches enough -to a delete marked record, performs the insert by updating or delete -unmarking the delete marked record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */ -UNIV_INTERN -dberr_t -row_ins_sec_index_entry( -/*====================*/ - dict_index_t* index, /*!< in: secondary index */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***********************************************************//** -Inserts a row to a table. This is a high-level function used in -SQL execution graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_ins_step( -/*=========*/ - que_thr_t* thr); /*!< in: query thread */ - -/* Insert node structure */ - -struct ins_node_t{ - que_common_t common; /*!< node type: QUE_NODE_INSERT */ - ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */ - dtuple_t* row; /*!< row to insert */ - dict_table_t* table; /*!< table where to insert */ - sel_node_t* select; /*!< select in searched insert */ - que_node_t* values_list;/* list of expressions to evaluate and - insert in an INS_VALUES insert */ - ulint state; /*!< node execution state */ - dict_index_t* index; /*!< NULL, or the next index where the index - entry should be inserted */ - dtuple_t* entry; /*!< NULL, or entry to insert in the index; - after a successful insert of the entry, - this should be reset to NULL */ - UT_LIST_BASE_NODE_T(dtuple_t) - entry_list;/* list of entries, one for each index */ - byte* row_id_buf;/* buffer for the row id sys field in row */ - trx_id_t trx_id; /*!< trx id or the last trx which executed the - node */ - byte* trx_id_buf;/* buffer for the trx id sys field in row */ - mem_heap_t* entry_sys_heap; - /* memory heap used as auxiliary storage; - entry_list and sys fields are stored here; - if this is NULL, entry list should be created - and buffers for sys fields in row allocated */ - ulint magic_n; -}; - -#define INS_NODE_MAGIC_N 15849075 - -/* Insert node types */ -#define INS_SEARCHED 0 /* INSERT INTO ... SELECT ... */ -#define INS_VALUES 1 /* INSERT INTO ... VALUES ... */ -#define INS_DIRECT 2 /* this is for internal use in dict0crea: - insert the row directly */ - -/* Node execution states */ -#define INS_NODE_SET_IX_LOCK 1 /* we should set an IX lock on table */ -#define INS_NODE_ALLOC_ROW_ID 2 /* row id should be allocated */ -#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and - inserted */ - -#ifndef UNIV_NONINL -#include "row0ins.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0ins.ic b/storage/xtradb/include/row0ins.ic deleted file mode 100644 index 9c191d869a2..00000000000 --- a/storage/xtradb/include/row0ins.ic +++ /dev/null @@ -1,26 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0ins.ic -Insert into a table - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - - diff --git a/storage/xtradb/include/row0log.h b/storage/xtradb/include/row0log.h deleted file mode 100644 index 5ff148ff045..00000000000 --- a/storage/xtradb/include/row0log.h +++ /dev/null @@ -1,244 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0log.h -Modification log for online index creation and online table rebuild - -Created 2011-05-26 Marko Makela -*******************************************************/ - -#ifndef row0log_h -#define row0log_h - -#include "univ.i" -#include "mtr0types.h" -#include "row0types.h" -#include "rem0types.h" -#include "data0types.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" - -extern ulint onlineddl_rowlog_rows; -extern ulint onlineddl_rowlog_pct_used; -extern ulint onlineddl_pct_progress; - -/******************************************************//** -Allocate the row log for an index and flag the index -for online creation. -@retval true if success, false if not */ -UNIV_INTERN -bool -row_log_allocate( -/*=============*/ - dict_index_t* index, /*!< in/out: index */ - dict_table_t* table, /*!< in/out: new table being rebuilt, - or NULL when creating a secondary index */ - bool same_pk,/*!< in: whether the definition of the - PRIMARY KEY has remained the same */ - const dtuple_t* add_cols, - /*!< in: default values of - added columns, or NULL */ - const ulint* col_map,/*!< in: mapping of old column - numbers to new ones, or NULL if !table */ - const char* path) /*!< in: where to create temporary file */ - MY_ATTRIBUTE((nonnull(1), warn_unused_result)); - -/******************************************************//** -Free the row log for an index that was being created online. */ -UNIV_INTERN -void -row_log_free( -/*=========*/ - row_log_t*& log) /*!< in,own: row log */ - MY_ATTRIBUTE((nonnull)); - -/******************************************************//** -Free the row log for an index on which online creation was aborted. */ -UNIV_INLINE -void -row_log_abort_sec( -/*==============*/ - dict_index_t* index) /*!< in/out: index (x-latched) */ - MY_ATTRIBUTE((nonnull)); - -/******************************************************//** -Try to log an operation to a secondary index that is -(or was) being created. -@retval true if the operation was logged or can be ignored -@retval false if online index creation is not taking place */ -UNIV_INLINE -bool -row_log_online_op_try( -/*==================*/ - dict_index_t* index, /*!< in/out: index, S or X latched */ - const dtuple_t* tuple, /*!< in: index tuple */ - trx_id_t trx_id) /*!< in: transaction ID for insert, - or 0 for delete */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************//** -Logs an operation to a secondary index that is (or was) being created. */ -UNIV_INTERN -void -row_log_online_op( -/*==============*/ - dict_index_t* index, /*!< in/out: index, S or X latched */ - const dtuple_t* tuple, /*!< in: index tuple */ - trx_id_t trx_id) /*!< in: transaction ID for insert, - or 0 for delete */ - UNIV_COLD MY_ATTRIBUTE((nonnull)); - -/******************************************************//** -Gets the error status of the online index rebuild log. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_log_table_get_error( -/*====================*/ - const dict_index_t* index) /*!< in: clustered index of a table - that is being rebuilt online */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/******************************************************//** -Logs a delete operation to a table that is being rebuilt. -This will be merged in row_log_table_apply_delete(). */ -UNIV_INTERN -void -row_log_table_delete( -/*=================*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ - const byte* sys) /*!< in: DB_TRX_ID,DB_ROLL_PTR that should - be logged, or NULL to use those in rec */ - UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,3))); - -/******************************************************//** -Logs an update operation to a table that is being rebuilt. -This will be merged in row_log_table_apply_update(). */ -UNIV_INTERN -void -row_log_table_update( -/*=================*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ - const dtuple_t* old_pk) /*!< in: row_log_table_get_pk() - before the update */ - UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,3))); - -/******************************************************//** -Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR -of a table that is being rebuilt. -@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table, -or NULL if the PRIMARY KEY definition does not change */ -UNIV_INTERN -const dtuple_t* -row_log_table_get_pk( -/*=================*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index), - or NULL */ - byte* sys, /*!< out: DB_TRX_ID,DB_ROLL_PTR for - row_log_table_delete(), or NULL */ - mem_heap_t** heap) /*!< in/out: memory heap where allocated */ - UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,5), warn_unused_result)); - -/******************************************************//** -Logs an insert to a table that is being rebuilt. -This will be merged in row_log_table_apply_insert(). */ -UNIV_INTERN -void -row_log_table_insert( -/*=================*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */ - UNIV_COLD MY_ATTRIBUTE((nonnull)); -/******************************************************//** -Notes that a BLOB is being freed during online ALTER TABLE. */ -UNIV_INTERN -void -row_log_table_blob_free( -/*====================*/ - dict_index_t* index, /*!< in/out: clustered index, X-latched */ - ulint page_no)/*!< in: starting page number of the BLOB */ - UNIV_COLD MY_ATTRIBUTE((nonnull)); -/******************************************************//** -Notes that a BLOB is being allocated during online ALTER TABLE. */ -UNIV_INTERN -void -row_log_table_blob_alloc( -/*=====================*/ - dict_index_t* index, /*!< in/out: clustered index, X-latched */ - ulint page_no)/*!< in: starting page number of the BLOB */ - UNIV_COLD MY_ATTRIBUTE((nonnull)); -/******************************************************//** -Apply the row_log_table log to a table upon completing rebuild. -@return DB_SUCCESS, or error code on failure */ -UNIV_INTERN -dberr_t -row_log_table_apply( -/*================*/ - que_thr_t* thr, /*!< in: query graph */ - dict_table_t* old_table, - /*!< in: old table */ - struct TABLE* table) /*!< in/out: MySQL table - (for reporting duplicates) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/******************************************************//** -Get the latest transaction ID that has invoked row_log_online_op() -during online creation. -@return latest transaction ID, or 0 if nothing was logged */ -UNIV_INTERN -trx_id_t -row_log_get_max_trx( -/*================*/ - dict_index_t* index) /*!< in: index, must be locked */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/******************************************************//** -Merge the row log to the index upon completing index creation. -@return DB_SUCCESS, or error code on failure */ -UNIV_INTERN -dberr_t -row_log_apply( -/*==========*/ - trx_t* trx, /*!< in: transaction (for checking if - the operation was interrupted) */ - dict_index_t* index, /*!< in/out: secondary index */ - struct TABLE* table) /*!< in/out: MySQL table - (for reporting duplicates) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -#ifndef UNIV_NONINL -#include "row0log.ic" -#endif - -#endif /* row0log.h */ diff --git a/storage/xtradb/include/row0log.ic b/storage/xtradb/include/row0log.ic deleted file mode 100644 index b0f37dbd8e7..00000000000 --- a/storage/xtradb/include/row0log.ic +++ /dev/null @@ -1,84 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0log.ic -Modification log for online index creation and online table rebuild - -Created 2012-10-18 Marko Makela -*******************************************************/ - -#include "dict0dict.h" - -/******************************************************//** -Free the row log for an index on which online creation was aborted. */ -UNIV_INLINE -void -row_log_abort_sec( -/*===============*/ - dict_index_t* index) /*!< in/out: index (x-latched) */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(!dict_index_is_clust(index)); - dict_index_set_online_status(index, ONLINE_INDEX_ABORTED); - row_log_free(index->online_log); -} - -/******************************************************//** -Try to log an operation to a secondary index that is -(or was) being created. -@retval true if the operation was logged or can be ignored -@retval false if online index creation is not taking place */ -UNIV_INLINE -bool -row_log_online_op_try( -/*==================*/ - dict_index_t* index, /*!< in/out: index, S or X latched */ - const dtuple_t* tuple, /*!< in: index tuple */ - trx_id_t trx_id) /*!< in: transaction ID for insert, - or 0 for delete */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED) - || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_COMPLETE: - /* This is a normal index. Do not log anything. - The caller must perform the operation on the - index tree directly. */ - return(false); - case ONLINE_INDEX_CREATION: - /* The index is being created online. Log the - operation. */ - row_log_online_op(index, tuple, trx_id); - break; - case ONLINE_INDEX_ABORTED: - case ONLINE_INDEX_ABORTED_DROPPED: - /* The index was created online, but the operation was - aborted. Do not log the operation and tell the caller - to skip the operation. */ - break; - } - - return(true); -} diff --git a/storage/xtradb/include/row0merge.h b/storage/xtradb/include/row0merge.h deleted file mode 100644 index 04d4010ad48..00000000000 --- a/storage/xtradb/include/row0merge.h +++ /dev/null @@ -1,469 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2016, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0merge.h -Index build routines using a merge sort - -Created 13/06/2005 Jan Lindstrom -*******************************************************/ - -#ifndef row0merge_h -#define row0merge_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "mtr0mtr.h" -#include "rem0types.h" -#include "rem0rec.h" -#include "read0types.h" -#include "btr0types.h" -#include "row0mysql.h" -#include "lock0types.h" -#include "srv0srv.h" - -/* Reserve free space from every block for key_version */ -#define ROW_MERGE_RESERVE_SIZE 4 - -/* Cluster index read task is mandatory */ -#define COST_READ_CLUSTERED_INDEX 1.0 - -/* Basic fixed cost to build all type of index */ -#define COST_BUILD_INDEX_STATIC 0.5 -/* Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index */ -#define COST_BUILD_INDEX_DYNAMIC 0.5 - -/* Sum of below two must be 1.0 */ -#define PCT_COST_MERGESORT_INDEX 0.4 -#define PCT_COST_INSERT_INDEX 0.6 - -// Forward declaration -struct ib_sequence_t; - -/** @brief Block size for I/O operations in merge sort. - -The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty() -rounded to a power of 2. - -When not creating a PRIMARY KEY that contains column prefixes, this -can be set as small as UNIV_PAGE_SIZE / 2. */ -typedef byte row_merge_block_t; - -/** @brief Secondary buffer for I/O operations of merge records. - -This buffer is used for writing or reading a record that spans two -row_merge_block_t. Thus, it must be able to hold one merge record, -whose maximum size is the same as the minimum size of -row_merge_block_t. */ -typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX]; - -/** @brief Merge record in row_merge_block_t. - -The format is the same as a record in ROW_FORMAT=COMPACT with the -exception that the REC_N_NEW_EXTRA_BYTES are omitted. */ -typedef byte mrec_t; - -/** Merge record in row_merge_buf_t */ -struct mtuple_t { - dfield_t* fields; /*!< data fields */ -}; - -/** Buffer for sorting in main memory. */ -struct row_merge_buf_t { - mem_heap_t* heap; /*!< memory heap where allocated */ - dict_index_t* index; /*!< the index the tuples belong to */ - ulint total_size; /*!< total amount of data bytes */ - ulint n_tuples; /*!< number of data tuples */ - ulint max_tuples; /*!< maximum number of data tuples */ - mtuple_t* tuples; /*!< array of data tuples */ - mtuple_t* tmp_tuples; /*!< temporary copy of tuples, - for sorting */ -}; - -/** Information about temporary files used in merge sort */ -struct merge_file_t { - int fd; /*!< file descriptor */ - ulint offset; /*!< file offset (end of file) */ - ib_uint64_t n_rec; /*!< number of records in the file */ -}; - -/** Index field definition */ -struct index_field_t { - ulint col_no; /*!< column offset */ - ulint prefix_len; /*!< column prefix length, or 0 - if indexing the whole column */ - const char* col_name; /*!< column name or NULL */ -}; - -/** Definition of an index being created */ -struct index_def_t { - const char* name; /*!< index name */ - ulint ind_type; /*!< 0, DICT_UNIQUE, - or DICT_CLUSTERED */ - ulint key_number; /*!< MySQL key number, - or ULINT_UNDEFINED if none */ - ulint n_fields; /*!< number of fields in index */ - index_field_t* fields; /*!< field definitions */ -}; - -/** Structure for reporting duplicate records. */ -struct row_merge_dup_t { - dict_index_t* index; /*!< index being sorted */ - struct TABLE* table; /*!< MySQL table object */ - const ulint* col_map;/*!< mapping of column numbers - in table to the rebuilt table - (index->table), or NULL if not - rebuilding table */ - ulint n_dup; /*!< number of duplicates */ -}; - -/*************************************************************//** -Report a duplicate key. */ -UNIV_INTERN -void -row_merge_dup_report( -/*=================*/ - row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */ - const dfield_t* entry) /*!< in: duplicate index entry */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Sets an exclusive lock on a table, for the duration of creating indexes. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_merge_lock_table( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_table_t* table, /*!< in: table to lock */ - enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Drop indexes that were created before an error occurred. -The data dictionary must have been locked exclusively by the caller, -because the transaction will not be committed. */ -UNIV_INTERN -void -row_merge_drop_indexes_dict( -/*========================*/ - trx_t* trx, /*!< in/out: dictionary transaction */ - table_id_t table_id)/*!< in: table identifier */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Drop those indexes which were created before an error occurred. -The data dictionary must have been locked exclusively by the caller, -because the transaction will not be committed. */ -UNIV_INTERN -void -row_merge_drop_indexes( -/*===================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_table_t* table, /*!< in/out: table containing the indexes */ - ibool locked) /*!< in: TRUE=table locked, - FALSE=may need to do a lazy drop */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Drop all partially created indexes during crash recovery. */ -UNIV_INTERN -void -row_merge_drop_temp_indexes(void); -/*=============================*/ - -/** Create temporary merge files in the given paramater path, and if -UNIV_PFS_IO defined, register the file descriptor with Performance Schema. -@param[in] path location for creating temporary merge files. -@return File descriptor */ -UNIV_INTERN -int -row_merge_file_create_low( - const char* path) - MY_ATTRIBUTE((warn_unused_result)); -/*********************************************************************//** -Destroy a merge file. And de-register the file from Performance Schema -if UNIV_PFS_IO is defined. */ -UNIV_INTERN -void -row_merge_file_destroy_low( -/*=======================*/ - int fd); /*!< in: merge file descriptor */ - -/*********************************************************************//** -Provide a new pathname for a table that is being renamed if it belongs to -a file-per-table tablespace. The caller is responsible for freeing the -memory allocated for the return value. -@return new pathname of tablespace file, or NULL if space = 0 */ -UNIV_INTERN -char* -row_make_new_pathname( -/*==================*/ - dict_table_t* table, /*!< in: table to be renamed */ - const char* new_name); /*!< in: new name */ -/*********************************************************************//** -Rename the tables in the data dictionary. The data dictionary must -have been locked exclusively by the caller, because the transaction -will not be committed. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_merge_rename_tables_dict( -/*=========================*/ - dict_table_t* old_table, /*!< in/out: old table, renamed to - tmp_name */ - dict_table_t* new_table, /*!< in/out: new table, renamed to - old_table->name */ - const char* tmp_name, /*!< in: new name for old_table */ - trx_t* trx) /*!< in/out: dictionary transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************//** -Rename an index in the dictionary that was created. The data -dictionary must have been locked exclusively by the caller, because -the transaction will not be committed. -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -row_merge_rename_index_to_add( -/*==========================*/ - trx_t* trx, /*!< in/out: transaction */ - table_id_t table_id, /*!< in: table identifier */ - index_id_t index_id) /*!< in: index identifier */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Rename an index in the dictionary that is to be dropped. The data -dictionary must have been locked exclusively by the caller, because -the transaction will not be committed. -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -row_merge_rename_index_to_drop( -/*===========================*/ - trx_t* trx, /*!< in/out: transaction */ - table_id_t table_id, /*!< in: table identifier */ - index_id_t index_id) /*!< in: index identifier */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Create the index and load in to the dictionary. -@return index, or NULL on error */ -UNIV_INTERN -dict_index_t* -row_merge_create_index( -/*===================*/ - trx_t* trx, /*!< in/out: trx (sets error_state) */ - dict_table_t* table, /*!< in: the index is on this table */ - const index_def_t* index_def, - /*!< in: the index definition */ - const char** col_names); - /*! in: column names if columns are - renamed or NULL */ -/*********************************************************************//** -Check if a transaction can use an index. -@return TRUE if index can be used by the transaction else FALSE */ -UNIV_INTERN -ibool -row_merge_is_index_usable( -/*======================*/ - const trx_t* trx, /*!< in: transaction */ - const dict_index_t* index); /*!< in: index to check */ -/*********************************************************************//** -Drop a table. The caller must have ensured that the background stats -thread is not processing the table. This can be done by calling -dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and -before calling this function. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_merge_drop_table( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table) /*!< in: table instance to drop */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Build indexes on a table by reading a clustered index, -creating a temporary file containing index entries, merge sorting -these index entries and inserting sorted index entries to indexes. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_merge_build_indexes( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* old_table, /*!< in: table where rows are - read from */ - dict_table_t* new_table, /*!< in: table where indexes are - created; identical to old_table - unless creating a PRIMARY KEY */ - bool online, /*!< in: true if creating indexes - online */ - dict_index_t** indexes, /*!< in: indexes to be created */ - const ulint* key_numbers, /*!< in: MySQL key numbers */ - ulint n_indexes, /*!< in: size of indexes[] */ - struct TABLE* table, /*!< in/out: MySQL table, for - reporting erroneous key value - if applicable */ - const dtuple_t* add_cols, /*!< in: default values of - added columns, or NULL */ - const ulint* col_map, /*!< in: mapping of old column - numbers to new ones, or NULL - if old_table == new_table */ - ulint add_autoinc, /*!< in: number of added - AUTO_INCREMENT column, or - ULINT_UNDEFINED if none is added */ - ib_sequence_t& sequence) /*!< in/out: autoinc sequence */ - MY_ATTRIBUTE((nonnull(1,2,3,5,6,8), warn_unused_result)); -/********************************************************************//** -Write a buffer to a block. */ -UNIV_INTERN -void -row_merge_buf_write( -/*================*/ - const row_merge_buf_t* buf, /*!< in: sorted buffer */ - const merge_file_t* of, /*!< in: output file */ - row_merge_block_t* block) /*!< out: buffer for writing to file */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Sort a buffer. */ -UNIV_INTERN -void -row_merge_buf_sort( -/*===============*/ - row_merge_buf_t* buf, /*!< in/out: sort buffer */ - row_merge_dup_t* dup) /*!< in/out: reporter of duplicates - (NULL if non-unique index) */ - MY_ATTRIBUTE((nonnull(1))); -/********************************************************************//** -Write a merge block to the file system. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -row_merge_write( -/*============*/ - int fd, /*!< in: file descriptor */ - ulint offset, /*!< in: offset where to write, - in number of row_merge_block_t elements */ - const void* buf, /*!< in: data */ - fil_space_crypt_t* crypt_data, /*!< in: table crypt data */ - void* crypt_buf, /*!< in: crypt buf or NULL */ - ulint space); /*!< in: space id */ - -/********************************************************************//** -Empty a sort buffer. -@return sort buffer */ -UNIV_INTERN -row_merge_buf_t* -row_merge_buf_empty( -/*================*/ - row_merge_buf_t* buf) /*!< in,own: sort buffer */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); - -/** Create a merge file in the given location. -@param[out] merge_file merge file structure -@param[in] path location for creating temporary file -@return file descriptor, or -1 on failure */ -UNIV_INTERN -int -row_merge_file_create( - merge_file_t* merge_file, - const char* path); - -/*********************************************************************//** -Merge disk files. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_merge_sort( -/*===========*/ - trx_t* trx, /*!< in: transaction */ - const row_merge_dup_t* dup, /*!< in: descriptor of - index being created */ - merge_file_t* file, /*!< in/out: file containing - index entries */ - row_merge_block_t* block, /*!< in/out: 3 buffers */ - int* tmpfd, /*!< in/out: temporary file handle */ - const bool update_progress, /*!< in: update progress status variable or not */ - const float pct_progress, /*!< in: total progress percent until now */ - const float pct_cost, /*!< in: current progress percent */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ - __attribute__((nonnull(1,2,3,4,5))); -/*********************************************************************//** -Allocate a sort buffer. -@return own: sort buffer */ -UNIV_INTERN -row_merge_buf_t* -row_merge_buf_create( -/*=================*/ - dict_index_t* index) /*!< in: secondary index */ - MY_ATTRIBUTE((warn_unused_result, nonnull, malloc)); -/*********************************************************************//** -Deallocate a sort buffer. */ -UNIV_INTERN -void -row_merge_buf_free( -/*===============*/ - row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Destroy a merge file. */ -UNIV_INTERN -void -row_merge_file_destroy( -/*===================*/ - merge_file_t* merge_file) /*!< in/out: merge file structure */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Read a merge block from the file system. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -row_merge_read( -/*===========*/ - int fd, /*!< in: file descriptor */ - ulint offset, /*!< in: offset where to read - in number of row_merge_block_t - elements */ - row_merge_block_t* buf, /*!< out: data */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_buf, /*!< in: crypt buf or NULL */ - ulint space); /*!< in: space id */ - -/********************************************************************//** -Read a merge record. -@return pointer to next record, or NULL on I/O error or end of list */ -UNIV_INTERN -const byte* -row_merge_read_rec( -/*===============*/ - row_merge_block_t* block, /*!< in/out: file buffer */ - mrec_buf_t* buf, /*!< in/out: secondary buffer */ - const byte* b, /*!< in: pointer to record */ - const dict_index_t* index, /*!< in: index of the record */ - int fd, /*!< in: file descriptor */ - ulint* foffs, /*!< in/out: file offset */ - const mrec_t** mrec, /*!< out: pointer to merge record, - or NULL on end of list - (non-NULL on I/O error) */ - ulint* offsets,/*!< out: offsets of mrec */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ - __attribute__((nonnull(1,2,3,4,6,7,8), warn_unused_result)); -#endif /* row0merge.h */ diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h deleted file mode 100644 index a8503a5cfda..00000000000 --- a/storage/xtradb/include/row0mysql.h +++ /dev/null @@ -1,932 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0mysql.h -Interface between Innobase row operations and MySQL. -Contains also create table and other data dictionary operations. - -Created 9/17/2000 Heikki Tuuri -*******************************************************/ - -#ifndef row0mysql_h -#define row0mysql_h - -#include "univ.i" -#include "data0data.h" -#include "que0types.h" -#include "dict0types.h" -#include "trx0types.h" -#include "row0types.h" -#include "btr0pcur.h" -#include "trx0types.h" -#include "fil0crypt.h" - -// Forward declaration -struct SysIndexCallback; - -extern ibool row_rollback_on_timeout; - -struct row_prebuilt_t; - -/*******************************************************************//** -Frees the blob heap in prebuilt when no longer needed. */ -UNIV_INTERN -void -row_mysql_prebuilt_free_blob_heap( -/*==============================*/ - row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a - ha_innobase:: table handle */ - -/*******************************************************************//** -Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row -format. -@return pointer to the data, we skip the 1 or 2 bytes at the start -that are used to store the len */ -UNIV_INTERN -byte* -row_mysql_store_true_var_len( -/*=========================*/ - byte* dest, /*!< in: where to store */ - ulint len, /*!< in: length, must fit in two bytes */ - ulint lenlen);/*!< in: storage length of len: either 1 or 2 bytes */ -/*******************************************************************//** -Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and -returns a pointer to the data. -@return pointer to the data, we skip the 1 or 2 bytes at the start -that are used to store the len */ -UNIV_INTERN -const byte* -row_mysql_read_true_varchar( -/*========================*/ - ulint* len, /*!< out: variable-length field length */ - const byte* field, /*!< in: field in the MySQL format */ - ulint lenlen);/*!< in: storage length of len: either 1 - or 2 bytes */ -/*******************************************************************//** -Stores a reference to a BLOB in the MySQL format. */ -UNIV_INTERN -void -row_mysql_store_blob_ref( -/*=====================*/ - byte* dest, /*!< in: where to store */ - ulint col_len,/*!< in: dest buffer size: determines into - how many bytes the BLOB length is stored, - the space for the length may vary from 1 - to 4 bytes */ - const void* data, /*!< in: BLOB data; if the value to store - is SQL NULL this should be NULL pointer */ - ulint len); /*!< in: BLOB length; if the value to store - is SQL NULL this should be 0; remember - also to set the NULL bit in the MySQL record - header! */ -/*******************************************************************//** -Reads a reference to a BLOB in the MySQL format. -@return pointer to BLOB data */ -UNIV_INTERN -const byte* -row_mysql_read_blob_ref( -/*====================*/ - ulint* len, /*!< out: BLOB length */ - const byte* ref, /*!< in: BLOB reference in the - MySQL format */ - ulint col_len); /*!< in: BLOB reference length - (not BLOB length) */ -/**************************************************************//** -Pad a column with spaces. */ -UNIV_INTERN -void -row_mysql_pad_col( -/*==============*/ - ulint mbminlen, /*!< in: minimum size of a character, - in bytes */ - byte* pad, /*!< out: padded buffer */ - ulint len); /*!< in: number of bytes to pad */ - -/**************************************************************//** -Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format. -The counterpart of this function is row_sel_field_store_in_mysql_format() in -row0sel.cc. -@return up to which byte we used buf in the conversion */ -UNIV_INTERN -byte* -row_mysql_store_col_in_innobase_format( -/*===================================*/ - dfield_t* dfield, /*!< in/out: dfield where dtype - information must be already set when - this function is called! */ - byte* buf, /*!< in/out: buffer for a converted - integer value; this must be at least - col_len long then! NOTE that dfield - may also get a pointer to 'buf', - therefore do not discard this as long - as dfield is used! */ - ibool row_format_col, /*!< TRUE if the mysql_data is from - a MySQL row, FALSE if from a MySQL - key value; - in MySQL, a true VARCHAR storage - format differs in a row and in a - key value: in a key value the length - is always stored in 2 bytes! */ - const byte* mysql_data, /*!< in: MySQL column value, not - SQL NULL; NOTE that dfield may also - get a pointer to mysql_data, - therefore do not discard this as long - as dfield is used! */ - ulint col_len, /*!< in: MySQL column length; NOTE that - this is the storage length of the - column in the MySQL format row, not - necessarily the length of the actual - payload data; if the column is a true - VARCHAR then this is irrelevant */ - ulint comp); /*!< in: nonzero=compact format */ -/****************************************************************//** -Handles user errors and lock waits detected by the database engine. -@return true if it was a lock wait and we should continue running the -query thread */ -UNIV_INTERN -bool -row_mysql_handle_errors( -/*====================*/ - dberr_t* new_err,/*!< out: possible new error encountered in - rollback, or the old error which was - during the function entry */ - trx_t* trx, /*!< in: transaction */ - que_thr_t* thr, /*!< in: query thread, or NULL */ - trx_savept_t* savept) /*!< in: savepoint, or NULL */ - MY_ATTRIBUTE((nonnull(1,2))); -/********************************************************************//** -Create a prebuilt struct for a MySQL table handle. -@return own: a prebuilt struct */ -UNIV_INTERN -row_prebuilt_t* -row_create_prebuilt( -/*================*/ - dict_table_t* table, /*!< in: Innobase table handle */ - ulint mysql_row_len); /*!< in: length in bytes of a row in - the MySQL format */ -/********************************************************************//** -Free a prebuilt struct for a MySQL table handle. */ -UNIV_INTERN -void -row_prebuilt_free( -/*==============*/ - row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */ - ibool dict_locked); /*!< in: TRUE=data dictionary locked */ -/*********************************************************************//** -Updates the transaction pointers in query graphs stored in the prebuilt -struct. */ -UNIV_INTERN -void -row_update_prebuilt_trx( -/*====================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct - in MySQL handle */ - trx_t* trx); /*!< in: transaction handle */ -/*********************************************************************//** -Sets an AUTO_INC type lock on the table mentioned in prebuilt. The -AUTO_INC lock gives exclusive access to the auto-inc counter of the -table. The lock is reserved only for the duration of an SQL statement. -It is not compatible with another AUTO_INC or exclusive lock on the -table. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_lock_table_autoinc_for_mysql( -/*=============================*/ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL - table handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Sets a table lock on the table mentioned in prebuilt. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_lock_table_for_mysql( -/*=====================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL - table handle */ - dict_table_t* table, /*!< in: table to lock, or NULL - if prebuilt->table should be - locked as - prebuilt->select_lock_type */ - ulint mode) /*!< in: lock mode of table - (ignored if table==NULL) */ - MY_ATTRIBUTE((nonnull(1))); -/*********************************************************************//** -Does an insert for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_insert_for_mysql( -/*=================*/ - byte* mysql_rec, /*!< in: row in the MySQL format */ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL - handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Builds a dummy query graph used in selects. */ -UNIV_INTERN -void -row_prebuild_sel_graph( -/*===================*/ - row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL - handle */ -/*********************************************************************//** -Gets pointer to a prebuilt update vector used in updates. If the update -graph has not yet been built in the prebuilt struct, then this function -first builds it. -@return prebuilt update vector */ -UNIV_INTERN -upd_t* -row_get_prebuilt_update_vector( -/*===========================*/ - row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL - handle */ -/*********************************************************************//** -Checks if a table is such that we automatically created a clustered -index on it (on row id). -@return TRUE if the clustered index was generated automatically */ -UNIV_INTERN -ibool -row_table_got_default_clust_index( -/*==============================*/ - const dict_table_t* table); /*!< in: table */ -/*********************************************************************//** -Does an update or delete of a row for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_update_for_mysql( -/*=================*/ - byte* mysql_rec, /*!< in: the row to be updated, in - the MySQL format */ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL - handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or this -session is using a READ COMMITTED or READ UNCOMMITTED isolation level. -Before calling this function row_search_for_mysql() must have -initialized prebuilt->new_rec_locks to store the information which new -record locks really were set. This function removes a newly set -clustered index record lock under prebuilt->pcur or -prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that -releases the latest clustered index record lock we set. */ -UNIV_INTERN -void -row_unlock_for_mysql( -/*=================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL - handle */ - ibool has_latches_on_recs)/*!< in: TRUE if called - so that we have the latches on - the records under pcur and - clust_pcur, and we do not need - to reposition the cursors. */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Checks if a table name contains the string "/#sql" which denotes temporary -tables in MySQL. -@return true if temporary table */ -UNIV_INTERN -bool -row_is_mysql_tmp_table_name( -/*========================*/ - const char* name) MY_ATTRIBUTE((warn_unused_result)); - /*!< in: table name in the form - 'database/tablename' */ - -/*********************************************************************//** -Creates an query graph node of 'update' type to be used in the MySQL -interface. -@return own: update node */ -UNIV_INTERN -upd_node_t* -row_create_update_node_for_mysql( -/*=============================*/ - dict_table_t* table, /*!< in: table to update */ - mem_heap_t* heap); /*!< in: mem heap from which allocated */ -/**********************************************************************//** -Does a cascaded delete or set null in a foreign key operation. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_update_cascade_for_mysql( -/*=========================*/ - que_thr_t* thr, /*!< in: query thread */ - upd_node_t* node, /*!< in: update node used in the cascade - or set null operation */ - dict_table_t* table) /*!< in: table where we do the operation */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Locks the data dictionary exclusively for performing a table create or other -data dictionary modification operation. */ -UNIV_INTERN -void -row_mysql_lock_data_dictionary_func( -/*================================*/ - trx_t* trx, /*!< in/out: transaction */ - const char* file, /*!< in: file name */ - ulint line); /*!< in: line number */ -#define row_mysql_lock_data_dictionary(trx) \ - row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__) -/*********************************************************************//** -Unlocks the data dictionary exclusive lock. */ -UNIV_INTERN -void -row_mysql_unlock_data_dictionary( -/*=============================*/ - trx_t* trx); /*!< in/out: transaction */ -/*********************************************************************//** -Locks the data dictionary in shared mode from modifications, for performing -foreign key check, rollback, or other operation invisible to MySQL. */ -UNIV_INTERN -void -row_mysql_freeze_data_dictionary_func( -/*==================================*/ - trx_t* trx, /*!< in/out: transaction */ - const char* file, /*!< in: file name */ - ulint line); /*!< in: line number */ -#define row_mysql_freeze_data_dictionary(trx) \ - row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__) -/*********************************************************************//** -Unlocks the data dictionary shared lock. */ -UNIV_INTERN -void -row_mysql_unfreeze_data_dictionary( -/*===============================*/ - trx_t* trx); /*!< in/out: transaction */ -/*********************************************************************//** -Creates a table for MySQL. If the name of the table ends in -one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", -"innodb_table_monitor", then this will also start the printing of monitor -output by the master thread. If the table name ends in "innodb_mem_validate", -InnoDB will try to invoke mem_validate(). On failure the transaction will -be rolled back. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_create_table_for_mysql( -/*=======================*/ - dict_table_t* table, /*!< in, own: table definition - (will be freed, or on DB_SUCCESS - added to the data dictionary cache) */ - trx_t* trx, /*!< in/out: transaction */ - bool commit, /*!< in: if true, commit the transaction */ - fil_encryption_t mode, /*!< in: encryption mode */ - ulint key_id) /*!< in: encryption key_id */ - __attribute__((nonnull, warn_unused_result)); -/*********************************************************************//** -Does an index creation operation for MySQL. TODO: currently failure -to create an index results in dropping the whole table! This is no problem -currently as all indexes must be created at the same time as the table. -@return error number or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_create_index_for_mysql( -/*=======================*/ - dict_index_t* index, /*!< in, own: index definition - (will be freed) */ - trx_t* trx, /*!< in: transaction handle */ - const ulint* field_lengths) /*!< in: if not NULL, must contain - dict_index_get_n_fields(index) - actual field lengths for the - index columns, which are - then checked for not being too - large. */ - MY_ATTRIBUTE((nonnull(1,2), warn_unused_result)); -/*********************************************************************//** -Scans a table create SQL string and adds to the data dictionary -the foreign key constraints declared in the string. This function -should be called after the indexes for a table have been created. -Each foreign key constraint must be accompanied with indexes in -bot participating tables. The indexes are allowed to contain more -fields than mentioned in the constraint. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_table_add_foreign_constraints( -/*==============================*/ - trx_t* trx, /*!< in: transaction */ - const char* sql_string, /*!< in: table create statement where - foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES table2(c, d), - table2 can be written also with the - database name before it: test.table2 */ - size_t sql_length, /*!< in: length of sql_string */ - const char* name, /*!< in: table full name in the - normalized form - database_name/table_name */ - ibool reject_fks) /*!< in: if TRUE, fail with error - code DB_CANNOT_ADD_CONSTRAINT if - any foreign keys are found. */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -The master thread in srv0srv.cc calls this regularly to drop tables which -we must drop in background after queries to them have ended. Such lazy -dropping of tables is needed in ALTER TABLE on Unix. -@return how many tables dropped + remaining tables in list */ -UNIV_INTERN -ulint -row_drop_tables_for_mysql_in_background(void); -/*=========================================*/ -/*********************************************************************//** -Get the background drop list length. NOTE: the caller must own the kernel -mutex! -@return how many tables in list */ -UNIV_INTERN -ulint -row_get_background_drop_list_len_low(void); -/*======================================*/ -/*********************************************************************//** -Sets an exclusive lock on a table. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_mysql_lock_table( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_table_t* table, /*!< in: table to lock */ - enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */ - const char* op_info) /*!< in: string for trx->op_info */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************//** -Truncates a table for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_truncate_table_for_mysql( -/*=========================*/ - dict_table_t* table, /*!< in: table handle */ - trx_t* trx) /*!< in: transaction handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Drops a table for MySQL. If the name of the dropped table ends in -one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", -"innodb_table_monitor", then this will also stop the printing of monitor -output by the master thread. If the data dictionary was not already locked -by the transaction, the transaction will be committed. Otherwise, the -data dictionary will remain locked. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_drop_table_for_mysql( -/*=====================*/ - const char* name, /*!< in: table name */ - trx_t* trx, /*!< in: dictionary transaction handle */ - bool drop_db,/*!< in: true=dropping whole database */ - ibool create_failed,/*!<in: TRUE=create table failed - because e.g. foreign key column - type mismatch. */ - bool nonatomic = true) - /*!< in: whether it is permitted - to release and reacquire dict_operation_lock */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Drop all temporary tables during crash recovery. */ -UNIV_INTERN -void -row_mysql_drop_temp_tables(void); -/*============================*/ - -/*********************************************************************//** -Discards the tablespace of a table which stored in an .ibd file. Discarding -means that this function deletes the .ibd file and assigns a new table id for -the table. Also the flag table->ibd_file_missing is set TRUE. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_discard_tablespace_for_mysql( -/*=============================*/ - const char* name, /*!< in: table name */ - trx_t* trx) /*!< in: transaction handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*****************************************************************//** -Imports a tablespace. The space id in the .ibd file must match the space id -of the table in the data dictionary. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_import_tablespace_for_mysql( -/*============================*/ - dict_table_t* table, /*!< in/out: table */ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Drops a database for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_drop_database_for_mysql( -/*========================*/ - const char* name, /*!< in: database name which ends to '/' */ - trx_t* trx) /*!< in: transaction handle */ - MY_ATTRIBUTE((nonnull)); -/*********************************************************************//** -Renames a table for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_rename_table_for_mysql( -/*=======================*/ - const char* old_name, /*!< in: old table name */ - const char* new_name, /*!< in: new table name */ - trx_t* trx, /*!< in/out: transaction */ - bool commit) /*!< in: whether to commit trx */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Checks that the index contains entries in an ascending order, unique -constraint is not broken, and calculates the number of index entries -in the read view of the current transaction. -@return true if ok */ -UNIV_INTERN -bool -row_check_index_for_mysql( -/*======================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct - in MySQL handle */ - const dict_index_t* index, /*!< in: index */ - ulint* n_rows) /*!< out: number of entries - seen in the consistent read */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Determines if a table is a magic monitor table. -@return true if monitor table */ -UNIV_INTERN -bool -row_is_magic_monitor_table( -/*=======================*/ - const char* table_name) /*!< in: name of the table, in the - form database/table_name */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Initialize this module */ -UNIV_INTERN -void -row_mysql_init(void); -/*================*/ - -/*********************************************************************//** -Close this module */ -UNIV_INTERN -void -row_mysql_close(void); -/*=================*/ - -/*********************************************************************//** -Reassigns the table identifier of a table. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_mysql_table_id_reassign( -/*========================*/ - dict_table_t* table, /*!< in/out: table */ - trx_t* trx, /*!< in/out: transaction */ - table_id_t* new_id) /*!< out: new table id */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/* A struct describing a place for an individual column in the MySQL -row format which is presented to the table handler in ha_innobase. -This template struct is used to speed up row transformations between -Innobase and MySQL. */ - -struct mysql_row_templ_t { - ulint col_no; /*!< column number of the column */ - ulint rec_field_no; /*!< field number of the column in an - Innobase record in the current index; - not defined if template_type is - ROW_MYSQL_WHOLE_ROW */ - bool rec_field_is_prefix; /* is this field in a prefix index? */ - ulint rec_prefix_field_no; /* record field, even if just a - prefix; same as rec_field_no when not a - prefix, otherwise rec_field_no is - ULINT_UNDEFINED but this is the true - field number*/ - ulint clust_rec_field_no; /*!< field number of the column in an - Innobase record in the clustered index; - not defined if template_type is - ROW_MYSQL_WHOLE_ROW */ - ulint icp_rec_field_no; /*!< field number of the column in an - Innobase record in the current index; - not defined unless - index condition pushdown is used */ - ulint mysql_col_offset; /*!< offset of the column in the MySQL - row format */ - ulint mysql_col_len; /*!< length of the column in the MySQL - row format */ - ulint mysql_null_byte_offset; /*!< MySQL NULL bit byte offset in a - MySQL record */ - ulint mysql_null_bit_mask; /*!< bit mask to get the NULL bit, - zero if column cannot be NULL */ - ulint type; /*!< column type in Innobase mtype - numbers DATA_CHAR... */ - ulint mysql_type; /*!< MySQL type code; this is always - < 256 */ - ulint mysql_length_bytes; /*!< if mysql_type - == DATA_MYSQL_TRUE_VARCHAR, this tells - whether we should use 1 or 2 bytes to - store the MySQL true VARCHAR data - length at the start of row in the MySQL - format (NOTE that the MySQL key value - format always uses 2 bytes for the data - len) */ - ulint charset; /*!< MySQL charset-collation code - of the column, or zero */ - ulint mbminlen; /*!< minimum length of a char, in bytes, - or zero if not a char type */ - ulint mbmaxlen; /*!< maximum length of a char, in bytes, - or zero if not a char type */ - ulint is_unsigned; /*!< if a column type is an integer - type and this field is != 0, then - it is an unsigned integer type */ -}; - -#define MYSQL_FETCH_CACHE_SIZE 8 -/* After fetching this many rows, we start caching them in fetch_cache */ -#define MYSQL_FETCH_CACHE_THRESHOLD 4 - -#define ROW_PREBUILT_ALLOCATED 78540783 -#define ROW_PREBUILT_FREED 26423527 - -/** A struct for (sometimes lazily) prebuilt structures in an Innobase table - -handle used within MySQL; these are used to save CPU time. */ - -struct row_prebuilt_t { - ulint magic_n; /*!< this magic number is set to - ROW_PREBUILT_ALLOCATED when created, - or ROW_PREBUILT_FREED when the - struct has been freed */ - dict_table_t* table; /*!< Innobase table handle */ - dict_index_t* index; /*!< current index for a search, if - any */ - trx_t* trx; /*!< current transaction handle */ - unsigned sql_stat_start:1;/*!< TRUE when we start processing of - an SQL statement: we may have to set - an intention lock on the table, - create a consistent read view etc. */ - unsigned mysql_has_locked:1;/*!< this is set TRUE when MySQL - calls external_lock on this handle - with a lock flag, and set FALSE when - with the F_UNLOCK flag */ - unsigned clust_index_was_generated:1; - /*!< if the user did not define a - primary key in MySQL, then Innobase - automatically generated a clustered - index where the ordering column is - the row id: in this case this flag - is set to TRUE */ - unsigned index_usable:1; /*!< caches the value of - row_merge_is_index_usable(trx,index) */ - unsigned read_just_key:1;/*!< set to 1 when MySQL calls - ha_innobase::extra with the - argument HA_EXTRA_KEYREAD; it is enough - to read just columns defined in - the index (i.e., no read of the - clustered index record necessary) */ - unsigned used_in_HANDLER:1;/*!< TRUE if we have been using this - handle in a MySQL HANDLER low level - index cursor command: then we must - store the pcur position even in a - unique search from a clustered index, - because HANDLER allows NEXT and PREV - in such a situation */ - unsigned template_type:2;/*!< ROW_MYSQL_WHOLE_ROW, - ROW_MYSQL_REC_FIELDS, - ROW_MYSQL_DUMMY_TEMPLATE, or - ROW_MYSQL_NO_TEMPLATE */ - unsigned n_template:10; /*!< number of elements in the - template */ - unsigned null_bitmap_len:10;/*!< number of bytes in the SQL NULL - bitmap at the start of a row in the - MySQL format */ - unsigned need_to_access_clustered:1; /*!< if we are fetching - columns through a secondary index - and at least one column is not in - the secondary index, then this is - set to TRUE; note that sometimes this - is set but we later optimize out the - clustered index lookup */ - unsigned templ_contains_blob:1;/*!< TRUE if the template contains - a column with DATA_BLOB == - get_innobase_type_from_mysql_type(); - not to be confused with InnoDB - externally stored columns - (VARCHAR can be off-page too) */ - mysql_row_templ_t* mysql_template;/*!< template used to transform - rows fast between MySQL and Innobase - formats; memory for this template - is not allocated from 'heap' */ - mem_heap_t* heap; /*!< memory heap from which - these auxiliary structures are - allocated when needed */ - ins_node_t* ins_node; /*!< Innobase SQL insert node - used to perform inserts - to the table */ - byte* ins_upd_rec_buff;/*!< buffer for storing data converted - to the Innobase format from the MySQL - format */ - const byte* default_rec; /*!< the default values of all columns - (a "default row") in MySQL format */ - ulint hint_need_to_fetch_extra_cols; - /*!< normally this is set to 0; if this - is set to ROW_RETRIEVE_PRIMARY_KEY, - then we should at least retrieve all - columns in the primary key; if this - is set to ROW_RETRIEVE_ALL_COLS, then - we must retrieve all columns in the - key (if read_just_key == 1), or all - columns in the table */ - upd_node_t* upd_node; /*!< Innobase SQL update node used - to perform updates and deletes */ - trx_id_t trx_id; /*!< The table->def_trx_id when - ins_graph was built */ - que_fork_t* ins_graph; /*!< Innobase SQL query graph used - in inserts. Will be rebuilt on - trx_id or n_indexes mismatch. */ - que_fork_t* upd_graph; /*!< Innobase SQL query graph used - in updates or deletes */ - btr_pcur_t pcur; /*!< persistent cursor used in selects - and updates */ - btr_pcur_t clust_pcur; /*!< persistent cursor used in - some selects and updates */ - que_fork_t* sel_graph; /*!< dummy query graph used in - selects */ - dtuple_t* search_tuple; /*!< prebuilt dtuple used in selects */ - byte row_id[DATA_ROW_ID_LEN]; - /*!< if the clustered index was - generated, the row id of the - last row fetched is stored - here */ - doc_id_t fts_doc_id; /* if the table has an FTS index on - it then we fetch the doc_id. - FTS-FIXME: Currently we fetch it always - but in the future we must only fetch - it when FTS columns are being - updated */ - dtuple_t* clust_ref; /*!< prebuilt dtuple used in - sel/upd/del */ - ulint select_lock_type;/*!< LOCK_NONE, LOCK_S, or LOCK_X */ - ulint stored_select_lock_type;/*!< this field is used to - remember the original select_lock_type - that was decided in ha_innodb.cc, - ::store_lock(), ::external_lock(), - etc. */ - ulint row_read_type; /*!< ROW_READ_WITH_LOCKS if row locks - should be the obtained for records - under an UPDATE or DELETE cursor. - If innodb_locks_unsafe_for_binlog - is TRUE, this can be set to - ROW_READ_TRY_SEMI_CONSISTENT, so that - if the row under an UPDATE or DELETE - cursor was locked by another - transaction, InnoDB will resort - to reading the last committed value - ('semi-consistent read'). Then, - this field will be set to - ROW_READ_DID_SEMI_CONSISTENT to - indicate that. If the row does not - match the WHERE condition, MySQL will - invoke handler::unlock_row() to - clear the flag back to - ROW_READ_TRY_SEMI_CONSISTENT and - to simply skip the row. If - the row matches, the next call to - row_search_for_mysql() will lock - the row. - This eliminates lock waits in some - cases; note that this breaks - serializability. */ - ulint new_rec_locks; /*!< normally 0; if - srv_locks_unsafe_for_binlog is - TRUE or session is using READ - COMMITTED or READ UNCOMMITTED - isolation level, set in - row_search_for_mysql() if we set a new - record lock on the secondary - or clustered index; this is - used in row_unlock_for_mysql() - when releasing the lock under - the cursor if we determine - after retrieving the row that - it does not need to be locked - ('mini-rollback') */ - ulint mysql_prefix_len;/*!< byte offset of the end of - the last requested column */ - ulint mysql_row_len; /*!< length in bytes of a row in the - MySQL format */ - ulint n_rows_fetched; /*!< number of rows fetched after - positioning the current cursor */ - ulint fetch_direction;/*!< ROW_SEL_NEXT or ROW_SEL_PREV */ - byte* fetch_cache[MYSQL_FETCH_CACHE_SIZE]; - /*!< a cache for fetched rows if we - fetch many rows from the same cursor: - it saves CPU time to fetch them in a - batch; we reserve mysql_row_len - bytes for each such row; these - pointers point 4 bytes past the - allocated mem buf start, because - there is a 4 byte magic number at the - start and at the end */ - ibool keep_other_fields_on_keyread; /*!< when using fetch - cache with HA_EXTRA_KEYREAD, don't - overwrite other fields in mysql row - row buffer.*/ - ulint fetch_cache_first;/*!< position of the first not yet - fetched row in fetch_cache */ - ulint n_fetch_cached; /*!< number of not yet fetched rows - in fetch_cache */ - mem_heap_t* blob_heap; /*!< in SELECTS BLOB fields are copied - to this heap */ - mem_heap_t* old_vers_heap; /*!< memory heap where a previous - version is built in consistent read */ - bool in_fts_query; /*!< Whether we are in a FTS query */ - /*----------------------*/ - ulonglong autoinc_last_value; - /*!< last value of AUTO-INC interval */ - ulonglong autoinc_increment;/*!< The increment step of the auto - increment column. Value must be - greater than or equal to 1. Required to - calculate the next value */ - ulonglong autoinc_offset; /*!< The offset passed to - get_auto_increment() by MySQL. Required - to calculate the next value */ - dberr_t autoinc_error; /*!< The actual error code encountered - while trying to init or read the - autoinc value from the table. We - store it here so that we can return - it to MySQL */ - /*----------------------*/ - void* idx_cond; /*!< In ICP, pointer to a ha_innobase, - passed to innobase_index_cond(). - NULL if index condition pushdown is - not used. */ - ulint idx_cond_n_cols;/*!< Number of fields in idx_cond_cols. - 0 if and only if idx_cond == NULL. */ - /*----------------------*/ - ulint magic_n2; /*!< this should be the same as - magic_n */ - /*----------------------*/ - unsigned innodb_api:1; /*!< whether this is a InnoDB API - query */ - const rec_t* innodb_api_rec; /*!< InnoDB API search result */ - byte* srch_key_val1; /*!< buffer used in converting - search key values from MySQL format - to InnoDB format.*/ - byte* srch_key_val2; /*!< buffer used in converting - search key values from MySQL format - to InnoDB format.*/ - uint srch_key_val_len; /*!< Size of search key */ - -}; - -/** Callback for row_mysql_sys_index_iterate() */ -struct SysIndexCallback { - virtual ~SysIndexCallback() { } - - /** Callback method - @param mtr - current mini transaction - @param pcur - persistent cursor. */ - virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0; -}; - -#define ROW_PREBUILT_FETCH_MAGIC_N 465765687 - -#define ROW_MYSQL_WHOLE_ROW 0 -#define ROW_MYSQL_REC_FIELDS 1 -#define ROW_MYSQL_NO_TEMPLATE 2 -#define ROW_MYSQL_DUMMY_TEMPLATE 3 /* dummy template used in - row_scan_and_check_index */ - -/* Values for hint_need_to_fetch_extra_cols */ -#define ROW_RETRIEVE_PRIMARY_KEY 1 -#define ROW_RETRIEVE_ALL_COLS 2 - -/* Values for row_read_type */ -#define ROW_READ_WITH_LOCKS 0 -#define ROW_READ_TRY_SEMI_CONSISTENT 1 -#define ROW_READ_DID_SEMI_CONSISTENT 2 - -#ifndef UNIV_NONINL -#include "row0mysql.ic" -#endif - -#endif /* row0mysql.h */ diff --git a/storage/xtradb/include/row0mysql.ic b/storage/xtradb/include/row0mysql.ic deleted file mode 100644 index 2eb60898c46..00000000000 --- a/storage/xtradb/include/row0mysql.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2001, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0mysql.ic -MySQL interface for Innobase - -Created 1/23/2001 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/row0purge.h b/storage/xtradb/include/row0purge.h deleted file mode 100644 index 5df899bc399..00000000000 --- a/storage/xtradb/include/row0purge.h +++ /dev/null @@ -1,138 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0purge.h -Purge obsolete records - -Created 3/14/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0purge_h -#define row0purge_h - -#include "univ.i" -#include "data0data.h" -#include "btr0types.h" -#include "btr0pcur.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" -#include "row0purge.h" -#include "ut0vec.h" - -/********************************************************************//** -Creates a purge node to a query graph. -@return own: purge node */ -UNIV_INTERN -purge_node_t* -row_purge_node_create( -/*==================*/ - que_thr_t* parent, /*!< in: parent node, i.e., a - thr node */ - mem_heap_t* heap) /*!< in: memory heap where created */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***********************************************************//** -Determines if it is possible to remove a secondary index entry. -Removal is possible if the secondary index entry does not refer to any -not delete marked version of a clustered index record where DB_TRX_ID -is newer than the purge view. - -NOTE: This function should only be called by the purge thread, only -while holding a latch on the leaf page of the secondary index entry -(or keeping the buffer pool watch on the page). It is possible that -this function first returns true and then false, if a user transaction -inserts a record that the secondary index entry would refer to. -However, in that case, the user transaction would also re-insert the -secondary index entry after purge has removed it and released the leaf -page latch. -@return true if the secondary index record can be purged */ -UNIV_INTERN -bool -row_purge_poss_sec( -/*===============*/ - purge_node_t* node, /*!< in/out: row purge node */ - dict_index_t* index, /*!< in: secondary index */ - const dtuple_t* entry) /*!< in: secondary index entry */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*************************************************************** -Does the purge operation for a single undo log record. This is a high-level -function used in an SQL execution graph. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_purge_step( -/*===========*/ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/* Purge node structure */ - -struct purge_node_t{ - que_common_t common; /*!< node type: QUE_NODE_PURGE */ - /*----------------------*/ - /* Local storage for this graph node */ - roll_ptr_t roll_ptr;/* roll pointer to undo log record */ - ib_vector_t* undo_recs;/*!< Undo recs to purge */ - - undo_no_t undo_no;/* undo number of the record */ - - ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC, - ... */ - dict_table_t* table; /*!< table where purge is done */ - - ulint cmpl_info;/* compiler analysis info of an update */ - - upd_t* update; /*!< update vector for a clustered index - record */ - dtuple_t* ref; /*!< NULL, or row reference to the next row to - handle */ - dtuple_t* row; /*!< NULL, or a copy (also fields copied to - heap) of the indexed fields of the row to - handle */ - dict_index_t* index; /*!< NULL, or the next index whose record should - be handled */ - mem_heap_t* heap; /*!< memory heap used as auxiliary storage for - row; this must be emptied after a successful - purge of a row */ - ibool found_clust;/* TRUE if the clustered index record - determined by ref was found in the clustered - index, and we were able to position pcur on - it */ - btr_pcur_t pcur; /*!< persistent cursor used in searching the - clustered index record */ - ibool done; /* Debug flag */ - -#ifdef UNIV_DEBUG - /***********************************************************//** - Validate the persisent cursor. The purge node has two references - to the clustered index record - one via the ref member, and the - other via the persistent cursor. These two references must match - each other if the found_clust flag is set. - @return true if the persistent cursor is consistent with - the ref member.*/ - bool validate_pcur(); -#endif -}; - -#ifndef UNIV_NONINL -#include "row0purge.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0purge.ic b/storage/xtradb/include/row0purge.ic deleted file mode 100644 index 700106d1048..00000000000 --- a/storage/xtradb/include/row0purge.ic +++ /dev/null @@ -1,25 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - - -/**************************************************//** -@file include/row0purge.ic -Purge obsolete records - -Created 3/14/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/row0quiesce.h b/storage/xtradb/include/row0quiesce.h deleted file mode 100644 index 35d8184d33c..00000000000 --- a/storage/xtradb/include/row0quiesce.h +++ /dev/null @@ -1,74 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0quiesce.h - -Header file for tablespace quiesce functions. - -Created 2012-02-08 by Sunny Bains -*******************************************************/ - -#ifndef row0quiesce_h -#define row0quiesce_h - -#include "univ.i" -#include "dict0types.h" - -struct trx_t; - -/** The version number of the export meta-data text file. */ -#define IB_EXPORT_CFG_VERSION_V1 0x1UL - -/*********************************************************************//** -Quiesce the tablespace that the table resides in. */ -UNIV_INTERN -void -row_quiesce_table_start( -/*====================*/ - dict_table_t* table, /*!< in: quiesce this table */ - trx_t* trx) /*!< in/out: transaction/session */ - MY_ATTRIBUTE((nonnull)); - -/*********************************************************************//** -Set a table's quiesce state. -@return DB_SUCCESS or errro code. */ -UNIV_INTERN -dberr_t -row_quiesce_set_state( -/*==================*/ - dict_table_t* table, /*!< in: quiesce this table */ - ib_quiesce_t state, /*!< in: quiesce state to set */ - trx_t* trx) /*!< in/out: transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*********************************************************************//** -Cleanup after table quiesce. */ -UNIV_INTERN -void -row_quiesce_table_complete( -/*=======================*/ - dict_table_t* table, /*!< in: quiesce this table */ - trx_t* trx) /*!< in/out: transaction/session */ - MY_ATTRIBUTE((nonnull)); - -#ifndef UNIV_NONINL -#include "row0quiesce.ic" -#endif - -#endif /* row0quiesce_h */ diff --git a/storage/xtradb/include/row0quiesce.ic b/storage/xtradb/include/row0quiesce.ic deleted file mode 100644 index f570a6aed05..00000000000 --- a/storage/xtradb/include/row0quiesce.ic +++ /dev/null @@ -1,26 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0quiesce.ic - -Quiesce a tablespace. - -Created 2012-02-08 Sunny Bains -*******************************************************/ - diff --git a/storage/xtradb/include/row0row.h b/storage/xtradb/include/row0row.h deleted file mode 100644 index b04068c5a5d..00000000000 --- a/storage/xtradb/include/row0row.h +++ /dev/null @@ -1,343 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0row.h -General row routines - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#ifndef row0row_h -#define row0row_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "mtr0mtr.h" -#include "rem0types.h" -#include "read0types.h" -#include "row0types.h" -#include "btr0types.h" - -/*********************************************************************//** -Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of -a clustered index record. -@return offset of DATA_TRX_ID */ -UNIV_INLINE -ulint -row_get_trx_id_offset( -/*==================*/ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: record offsets */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Reads the trx id field from a clustered index record. -@return value of the field */ -UNIV_INLINE -trx_id_t -row_get_rec_trx_id( -/*===============*/ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Reads the roll pointer field from a clustered index record. -@return value of the field */ -UNIV_INLINE -roll_ptr_t -row_get_rec_roll_ptr( -/*=================*/ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*****************************************************************//** -When an insert or purge to a table is performed, this function builds -the entry to be inserted into or purged from an index on the table. -@return index entry which should be inserted or purged -@retval NULL if the externally stored columns in the clustered index record -are unavailable and ext != NULL, or row is missing some needed columns. */ -UNIV_INTERN -dtuple_t* -row_build_index_entry_low( -/*======================*/ - const dtuple_t* row, /*!< in: row which should be - inserted or purged */ - const row_ext_t* ext, /*!< in: externally stored column - prefixes, or NULL */ - dict_index_t* index, /*!< in: index on the table */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory for the index entry - is allocated */ - MY_ATTRIBUTE((warn_unused_result, nonnull(1,3,4))); -/*****************************************************************//** -When an insert or purge to a table is performed, this function builds -the entry to be inserted into or purged from an index on the table. -@return index entry which should be inserted or purged, or NULL if the -externally stored columns in the clustered index record are -unavailable and ext != NULL */ -UNIV_INLINE -dtuple_t* -row_build_index_entry( -/*==================*/ - const dtuple_t* row, /*!< in: row which should be - inserted or purged */ - const row_ext_t* ext, /*!< in: externally stored column - prefixes, or NULL */ - dict_index_t* index, /*!< in: index on the table */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory for the index entry - is allocated */ - MY_ATTRIBUTE((warn_unused_result, nonnull(1,3,4))); -/*******************************************************************//** -An inverse function to row_build_index_entry. Builds a row from a -record in a clustered index. -@return own: row built; see the NOTE below! */ -UNIV_INTERN -dtuple_t* -row_build( -/*======*/ - ulint type, /*!< in: ROW_COPY_POINTERS or - ROW_COPY_DATA; the latter - copies also the data fields to - heap while the first only - places pointers to data fields - on the index page, and thus is - more efficient */ - const dict_index_t* index, /*!< in: clustered index */ - const rec_t* rec, /*!< in: record in the clustered - index; NOTE: in the case - ROW_COPY_POINTERS the data - fields in the row will point - directly into this record, - therefore, the buffer page of - this record must be at least - s-latched and the latch held - as long as the row dtuple is used! */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) - or NULL, in which case this function - will invoke rec_get_offsets() */ - const dict_table_t* col_table, - /*!< in: table, to check which - externally stored columns - occur in the ordering columns - of an index, or NULL if - index->table should be - consulted instead; the user - columns in this table should be - the same columns as in index->table */ - const dtuple_t* add_cols, - /*!< in: default values of - added columns, or NULL */ - const ulint* col_map,/*!< in: mapping of old column - numbers to new ones, or NULL */ - row_ext_t** ext, /*!< out, own: cache of - externally stored column - prefixes, or NULL */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory needed is allocated */ - MY_ATTRIBUTE((nonnull(2,3,9))); -/*******************************************************************//** -Converts an index record to a typed data tuple. -@return index entry built; does not set info_bits, and the data fields -in the entry will point directly to rec */ -UNIV_INTERN -dtuple_t* -row_rec_to_index_entry_low( -/*=======================*/ - const rec_t* rec, /*!< in: record in the index */ - const dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint* n_ext, /*!< out: number of externally - stored columns */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory needed is allocated */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Converts an index record to a typed data tuple. NOTE that externally -stored (often big) fields are NOT copied to heap. -@return own: index entry built */ -UNIV_INTERN -dtuple_t* -row_rec_to_index_entry( -/*===================*/ - const rec_t* rec, /*!< in: record in the index */ - const dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in/out: rec_get_offsets(rec) */ - ulint* n_ext, /*!< out: number of externally - stored columns */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory needed is allocated */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Builds from a secondary index record a row reference with which we can -search the clustered index record. -@return own: row reference built; see the NOTE below! */ -UNIV_INTERN -dtuple_t* -row_build_row_ref( -/*==============*/ - ulint type, /*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap, whereas the latter only places pointers - to data fields on the index page */ - dict_index_t* index, /*!< in: secondary index */ - const rec_t* rec, /*!< in: record in the index; - NOTE: in the case ROW_COPY_POINTERS - the data fields in the row will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the row reference is used! */ - mem_heap_t* heap) /*!< in: memory heap from which the memory - needed is allocated */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ -UNIV_INTERN -void -row_build_row_ref_in_tuple( -/*=======================*/ - dtuple_t* ref, /*!< in/out: row reference built; - see the NOTE below! */ - const rec_t* rec, /*!< in: record in the index; - NOTE: the data fields in ref - will point directly into this - record, therefore, the buffer - page of this record must be at - least s-latched and the latch - held as long as the row - reference is used! */ - const dict_index_t* index, /*!< in: secondary index */ - ulint* offsets,/*!< in: rec_get_offsets(rec, index) - or NULL */ - trx_t* trx) /*!< in: transaction or NULL */ - MY_ATTRIBUTE((nonnull(1,2,3))); -/*******************************************************************//** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ -UNIV_INLINE -void -row_build_row_ref_fast( -/*===================*/ - dtuple_t* ref, /*!< in/out: typed data tuple where the - reference is built */ - const ulint* map, /*!< in: array of field numbers in rec - telling how ref should be built from - the fields of rec */ - const rec_t* rec, /*!< in: record in the index; must be - preserved while ref is used, as we do - not copy field values to heap */ - const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ -/***************************************************************//** -Searches the clustered index record for a row, if we have the row -reference. -@return TRUE if found */ -UNIV_INTERN -ibool -row_search_on_row_ref( -/*==================*/ - btr_pcur_t* pcur, /*!< out: persistent cursor, which must - be closed by the caller */ - ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ - const dict_table_t* table, /*!< in: table */ - const dtuple_t* ref, /*!< in: row reference */ - mtr_t* mtr) /*!< in/out: mtr */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Fetches the clustered index record for a secondary index record. The latches -on the secondary index record are preserved. -@return record or NULL, if no record found */ -UNIV_INTERN -rec_t* -row_get_clust_rec( -/*==============*/ - ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ - const rec_t* rec, /*!< in: record in a secondary index */ - dict_index_t* index, /*!< in: secondary index */ - dict_index_t** clust_index,/*!< out: clustered index */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/** Result of row_search_index_entry */ -enum row_search_result { - ROW_FOUND = 0, /*!< the record was found */ - ROW_NOT_FOUND, /*!< record not found */ - ROW_BUFFERED, /*!< one of BTR_INSERT, BTR_DELETE, or - BTR_DELETE_MARK was specified, the - secondary index leaf page was not in - the buffer pool, and the operation was - enqueued in the insert/delete buffer */ - ROW_NOT_DELETED_REF /*!< BTR_DELETE was specified, and - row_purge_poss_sec() failed */ -}; - -/***************************************************************//** -Searches an index record. -@return whether the record was found or buffered */ -UNIV_INTERN -enum row_search_result -row_search_index_entry( -/*===================*/ - dict_index_t* index, /*!< in: index */ - const dtuple_t* entry, /*!< in: index entry */ - ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ - btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must - be closed by the caller */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -#define ROW_COPY_DATA 1 -#define ROW_COPY_POINTERS 2 - -/* The allowed latching order of index records is the following: -(1) a secondary index record -> -(2) the clustered index record -> -(3) rollback segment data for the clustered index record. */ - -/*******************************************************************//** -Formats the raw data in "data" (in InnoDB on-disk format) using -"dict_field" and writes the result to "buf". -Not more than "buf_size" bytes are written to "buf". -The result is always NUL-terminated (provided buf_size is positive) and the -number of bytes that were written to "buf" is returned (including the -terminating NUL). -@return number of bytes that were written */ -UNIV_INTERN -ulint -row_raw_format( -/*===========*/ - const char* data, /*!< in: raw data */ - ulint data_len, /*!< in: raw data length - in bytes */ - const dict_field_t* dict_field, /*!< in: index field */ - char* buf, /*!< out: output buffer */ - ulint buf_size) /*!< in: output buffer size - in bytes */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -#ifndef UNIV_NONINL -#include "row0row.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0row.ic b/storage/xtradb/include/row0row.ic deleted file mode 100644 index ac62422be1f..00000000000 --- a/storage/xtradb/include/row0row.ic +++ /dev/null @@ -1,174 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0row.ic -General row routines - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#include "dict0dict.h" -#include "rem0rec.h" -#include "trx0undo.h" - -/*********************************************************************//** -Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of -a clustered index record. -@return offset of DATA_TRX_ID */ -UNIV_INLINE -ulint -row_get_trx_id_offset( -/*==================*/ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: record offsets */ -{ - ulint pos; - ulint offset; - ulint len; - - ut_ad(dict_index_is_clust(index)); - ut_ad(rec_offs_validate(NULL, index, offsets)); - - pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - - offset = rec_get_nth_field_offs(offsets, pos, &len); - - ut_ad(len == DATA_TRX_ID_LEN); - - return(offset); -} - -/*********************************************************************//** -Reads the trx id field from a clustered index record. -@return value of the field */ -UNIV_INLINE -trx_id_t -row_get_rec_trx_id( -/*===============*/ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - ulint offset; - - ut_ad(dict_index_is_clust(index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - offset = index->trx_id_offset; - - if (!offset) { - offset = row_get_trx_id_offset(index, offsets); - } - - return(trx_read_trx_id(rec + offset)); -} - -/*********************************************************************//** -Reads the roll pointer field from a clustered index record. -@return value of the field */ -UNIV_INLINE -roll_ptr_t -row_get_rec_roll_ptr( -/*=================*/ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - ulint offset; - - ut_ad(dict_index_is_clust(index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - offset = index->trx_id_offset; - - if (!offset) { - offset = row_get_trx_id_offset(index, offsets); - } - - return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN)); -} - -/*****************************************************************//** -When an insert or purge to a table is performed, this function builds -the entry to be inserted into or purged from an index on the table. -@return index entry which should be inserted or purged, or NULL if the -externally stored columns in the clustered index record are -unavailable and ext != NULL */ -UNIV_INLINE -dtuple_t* -row_build_index_entry( -/*==================*/ - const dtuple_t* row, /*!< in: row which should be - inserted or purged */ - const row_ext_t* ext, /*!< in: externally stored column - prefixes, or NULL */ - dict_index_t* index, /*!< in: index on the table */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory for the index entry - is allocated */ -{ - dtuple_t* entry; - - ut_ad(dtuple_check_typed(row)); - entry = row_build_index_entry_low(row, ext, index, heap); - ut_ad(!entry || dtuple_check_typed(entry)); - return(entry); -} - -/*******************************************************************//** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ -UNIV_INLINE -void -row_build_row_ref_fast( -/*===================*/ - dtuple_t* ref, /*!< in/out: typed data tuple where the - reference is built */ - const ulint* map, /*!< in: array of field numbers in rec - telling how ref should be built from - the fields of rec */ - const rec_t* rec, /*!< in: record in the index; must be - preserved while ref is used, as we do - not copy field values to heap */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - dfield_t* dfield; - const byte* field; - ulint len; - ulint ref_len; - ulint field_no; - ulint i; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_offs_any_extern(offsets)); - ref_len = dtuple_get_n_fields(ref); - - for (i = 0; i < ref_len; i++) { - dfield = dtuple_get_nth_field(ref, i); - - field_no = *(map + i); - - if (field_no != ULINT_UNDEFINED) { - - field = rec_get_nth_field(rec, offsets, - field_no, &len); - dfield_set_data(dfield, field, len); - } - } -} diff --git a/storage/xtradb/include/row0sel.h b/storage/xtradb/include/row0sel.h deleted file mode 100644 index fd5bc755a22..00000000000 --- a/storage/xtradb/include/row0sel.h +++ /dev/null @@ -1,409 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0sel.h -Select - -Created 12/19/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0sel_h -#define row0sel_h - -#include "univ.i" -#include "data0data.h" -#include "que0types.h" -#include "dict0types.h" -#include "trx0types.h" -#include "row0types.h" -#include "que0types.h" -#include "pars0sym.h" -#include "btr0pcur.h" -#include "read0read.h" -#include "row0mysql.h" - -/*********************************************************************//** -Creates a select node struct. -@return own: select node struct */ -UNIV_INTERN -sel_node_t* -sel_node_create( -/*============*/ - mem_heap_t* heap); /*!< in: memory heap where created */ -/*********************************************************************//** -Frees the memory private to a select node when a query graph is freed, -does not free the heap where the node was originally created. */ -UNIV_INTERN -void -sel_node_free_private( -/*==================*/ - sel_node_t* node); /*!< in: select node struct */ -/*********************************************************************//** -Frees a prefetch buffer for a column, including the dynamically allocated -memory for data stored there. */ -UNIV_INTERN -void -sel_col_prefetch_buf_free( -/*======================*/ - sel_buf_t* prefetch_buf); /*!< in, own: prefetch buffer */ -/*********************************************************************//** -Gets the plan node for the nth table in a join. -@return plan node */ -UNIV_INLINE -plan_t* -sel_node_get_nth_plan( -/*==================*/ - sel_node_t* node, /*!< in: select node */ - ulint i); /*!< in: get ith plan node */ -/**********************************************************************//** -Performs a select step. This is a high-level function used in SQL execution -graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_sel_step( -/*=========*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs an execution step of an open or close cursor statement node. -@return query thread to run next or NULL */ -UNIV_INLINE -que_thr_t* -open_step( -/*======*/ - que_thr_t* thr); /*!< in: query thread */ -/**********************************************************************//** -Performs a fetch for a cursor. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -fetch_step( -/*=======*/ - que_thr_t* thr); /*!< in: query thread */ -/****************************************************************//** -Sample callback function for fetch that prints each row. -@return always returns non-NULL */ -UNIV_INTERN -void* -row_fetch_print( -/*============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg); /*!< in: not used */ -/***********************************************************//** -Prints a row in a select result. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_printf_step( -/*============*/ - que_thr_t* thr); /*!< in: query thread */ -/****************************************************************//** -Converts a key value stored in MySQL format to an Innobase dtuple. The last -field of the key value may be just a prefix of a fixed length field: hence -the parameter key_len. But currently we do not allow search keys where the -last field is only a prefix of the full key field len and print a warning if -such appears. */ -UNIV_INTERN -void -row_sel_convert_mysql_key_to_innobase( -/*==================================*/ - dtuple_t* tuple, /*!< in/out: tuple where to build; - NOTE: we assume that the type info - in the tuple is already according - to index! */ - byte* buf, /*!< in: buffer to use in field - conversions; NOTE that dtuple->data - may end up pointing inside buf so - do not discard that buffer while - the tuple is being used. See - row_mysql_store_col_in_innobase_format() - in the case of DATA_INT */ - ulint buf_len, /*!< in: buffer length */ - dict_index_t* index, /*!< in: index of the key value */ - const byte* key_ptr, /*!< in: MySQL key value */ - ulint key_len, /*!< in: MySQL key value length */ - trx_t* trx); /*!< in: transaction */ -/********************************************************************//** -Searches for rows in the database. This is used in the interface to -MySQL. This function opens a cursor, and also implements fetch next -and fetch prev. NOTE that if we do a search with a full key value -from a unique index (ROW_SEL_EXACT), then we will not store the cursor -position and fetch next or fetch prev must not be tried to the cursor! -@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK, -DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */ -UNIV_INTERN -dberr_t -row_search_for_mysql( -/*=================*/ - byte* buf, /*!< in/out: buffer for the fetched - row in the MySQL format */ - ulint mode, /*!< in: search mode PAGE_CUR_L, ... */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the - table handle; this contains the info - of search_tuple, index; if search - tuple contains 0 fields then we - position the cursor at the start or - the end of the index, depending on - 'mode' */ - ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or - ROW_SEL_EXACT_PREFIX */ - ulint direction) /*!< in: 0 or ROW_SEL_NEXT or - ROW_SEL_PREV; NOTE: if this is != 0, - then prebuilt must have a pcur - with stored position! In opening of a - cursor 'direction' should be 0. */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Checks if MySQL at the moment is allowed for this table to retrieve a -consistent read result, or store it to the query cache. -@return TRUE if storing or retrieving from the query cache is permitted */ -UNIV_INTERN -ibool -row_search_check_if_query_cache_permitted( -/*======================================*/ - trx_t* trx, /*!< in: transaction object */ - const char* norm_name); /*!< in: concatenation of database name, - '/' char, table name */ -/*******************************************************************//** -Read the max AUTOINC value from an index. -@return DB_SUCCESS if all OK else error code */ -UNIV_INTERN -dberr_t -row_search_max_autoinc( -/*===================*/ - dict_index_t* index, /*!< in: index to search */ - const char* col_name, /*!< in: autoinc column name */ - ib_uint64_t* value) /*!< out: AUTOINC value read */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/** A structure for caching column values for prefetched rows */ -struct sel_buf_t{ - byte* data; /*!< data, or NULL; if not NULL, this field - has allocated memory which must be explicitly - freed; can be != NULL even when len is - UNIV_SQL_NULL */ - ulint len; /*!< data length or UNIV_SQL_NULL */ - ulint val_buf_size; - /*!< size of memory buffer allocated for data: - this can be more than len; this is defined - when data != NULL */ -}; - -/** Query plan */ -struct plan_t{ - dict_table_t* table; /*!< table struct in the dictionary - cache */ - dict_index_t* index; /*!< table index used in the search */ - btr_pcur_t pcur; /*!< persistent cursor used to search - the index */ - ibool asc; /*!< TRUE if cursor traveling upwards */ - ibool pcur_is_open; /*!< TRUE if pcur has been positioned - and we can try to fetch new rows */ - ibool cursor_at_end; /*!< TRUE if the cursor is open but - we know that there are no more - qualifying rows left to retrieve from - the index tree; NOTE though, that - there may still be unprocessed rows in - the prefetch stack; always FALSE when - pcur_is_open is FALSE */ - ibool stored_cursor_rec_processed; - /*!< TRUE if the pcur position has been - stored and the record it is positioned - on has already been processed */ - que_node_t** tuple_exps; /*!< array of expressions - which are used to calculate - the field values in the search - tuple: there is one expression - for each field in the search - tuple */ - dtuple_t* tuple; /*!< search tuple */ - ulint mode; /*!< search mode: PAGE_CUR_G, ... */ - ulint n_exact_match; /*!< number of first fields in - the search tuple which must be - exactly matched */ - ibool unique_search; /*!< TRUE if we are searching an - index record with a unique key */ - ulint n_rows_fetched; /*!< number of rows fetched using pcur - after it was opened */ - ulint n_rows_prefetched;/*!< number of prefetched rows cached - for fetch: fetching several rows in - the same mtr saves CPU time */ - ulint first_prefetched;/*!< index of the first cached row in - select buffer arrays for each column */ - ibool no_prefetch; /*!< no prefetch for this table */ - sym_node_list_t columns; /*!< symbol table nodes for the columns - to retrieve from the table */ - UT_LIST_BASE_NODE_T(func_node_t) - end_conds; /*!< conditions which determine the - fetch limit of the index segment we - have to look at: when one of these - fails, the result set has been - exhausted for the cursor in this - index; these conditions are normalized - so that in a comparison the column - for this table is the first argument */ - UT_LIST_BASE_NODE_T(func_node_t) - other_conds; /*!< the rest of search conditions we can - test at this table in a join */ - ibool must_get_clust; /*!< TRUE if index is a non-clustered - index and we must also fetch the - clustered index record; this is the - case if the non-clustered record does - not contain all the needed columns, or - if this is a single-table explicit - cursor, or a searched update or - delete */ - ulint* clust_map; /*!< map telling how clust_ref is built - from the fields of a non-clustered - record */ - dtuple_t* clust_ref; /*!< the reference to the clustered - index entry is built here if index is - a non-clustered index */ - btr_pcur_t clust_pcur; /*!< if index is non-clustered, we use - this pcur to search the clustered - index */ - mem_heap_t* old_vers_heap; /*!< memory heap used in building an old - version of a row, or NULL */ -}; - -/** Select node states */ -enum sel_node_state { - SEL_NODE_CLOSED, /*!< it is a declared cursor which is not - currently open */ - SEL_NODE_OPEN, /*!< intention locks not yet set on tables */ - SEL_NODE_FETCH, /*!< intention locks have been set */ - SEL_NODE_NO_MORE_ROWS /*!< cursor has reached the result set end */ -}; - -/** Select statement node */ -struct sel_node_t{ - que_common_t common; /*!< node type: QUE_NODE_SELECT */ - enum sel_node_state - state; /*!< node state */ - que_node_t* select_list; /*!< select list */ - sym_node_t* into_list; /*!< variables list or NULL */ - sym_node_t* table_list; /*!< table list */ - ibool asc; /*!< TRUE if the rows should be fetched - in an ascending order */ - ibool set_x_locks; /*!< TRUE if the cursor is for update or - delete, which means that a row x-lock - should be placed on the cursor row */ - ulint row_lock_mode; /*!< LOCK_X or LOCK_S */ - ulint n_tables; /*!< number of tables */ - ulint fetch_table; /*!< number of the next table to access - in the join */ - plan_t* plans; /*!< array of n_tables many plan nodes - containing the search plan and the - search data structures */ - que_node_t* search_cond; /*!< search condition */ - read_view_t* read_view; /*!< if the query is a non-locking - consistent read, its read view is - placed here, otherwise NULL */ - ibool consistent_read;/*!< TRUE if the select is a consistent, - non-locking read */ - order_node_t* order_by; /*!< order by column definition, or - NULL */ - ibool is_aggregate; /*!< TRUE if the select list consists of - aggregate functions */ - ibool aggregate_already_fetched; - /*!< TRUE if the aggregate row has - already been fetched for the current - cursor */ - ibool can_get_updated;/*!< this is TRUE if the select - is in a single-table explicit - cursor which can get updated - within the stored procedure, - or in a searched update or - delete; NOTE that to determine - of an explicit cursor if it - can get updated, the parser - checks from a stored procedure - if it contains positioned - update or delete statements */ - sym_node_t* explicit_cursor;/*!< not NULL if an explicit cursor */ - UT_LIST_BASE_NODE_T(sym_node_t) - copy_variables; /*!< variables whose values we have to - copy when an explicit cursor is opened, - so that they do not change between - fetches */ -}; - -/** Fetch statement node */ -struct fetch_node_t{ - que_common_t common; /*!< type: QUE_NODE_FETCH */ - sel_node_t* cursor_def; /*!< cursor definition */ - sym_node_t* into_list; /*!< variables to set */ - - pars_user_func_t* - func; /*!< User callback function or NULL. - The first argument to the function - is a sel_node_t*, containing the - results of the SELECT operation for - one row. If the function returns - NULL, it is not interested in - further rows and the cursor is - modified so (cursor % NOTFOUND) is - true. If it returns not-NULL, - continue normally. See - row_fetch_print() for an example - (and a useful debugging tool). */ -}; - -/** Open or close cursor operation type */ -enum open_node_op { - ROW_SEL_OPEN_CURSOR, /*!< open cursor */ - ROW_SEL_CLOSE_CURSOR /*!< close cursor */ -}; - -/** Open or close cursor statement node */ -struct open_node_t{ - que_common_t common; /*!< type: QUE_NODE_OPEN */ - enum open_node_op - op_type; /*!< operation type: open or - close cursor */ - sel_node_t* cursor_def; /*!< cursor definition */ -}; - -/** Row printf statement node */ -struct row_printf_node_t{ - que_common_t common; /*!< type: QUE_NODE_ROW_PRINTF */ - sel_node_t* sel_node; /*!< select */ -}; - -/** Search direction for the MySQL interface */ -enum row_sel_direction { - ROW_SEL_NEXT = 1, /*!< ascending direction */ - ROW_SEL_PREV = 2 /*!< descending direction */ -}; - -/** Match mode for the MySQL interface */ -enum row_sel_match_mode { - ROW_SEL_EXACT = 1, /*!< search using a complete key value */ - ROW_SEL_EXACT_PREFIX /*!< search using a key prefix which - must match rows: the prefix may - contain an incomplete field (the last - field in prefix may be just a prefix - of a fixed length column) */ -}; - -#ifndef UNIV_NONINL -#include "row0sel.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0sel.ic b/storage/xtradb/include/row0sel.ic deleted file mode 100644 index d83a3448832..00000000000 --- a/storage/xtradb/include/row0sel.ic +++ /dev/null @@ -1,105 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0sel.ic -Select - -Created 12/19/1997 Heikki Tuuri -*******************************************************/ - -#include "que0que.h" - -/*********************************************************************//** -Gets the plan node for the nth table in a join. -@return plan node */ -UNIV_INLINE -plan_t* -sel_node_get_nth_plan( -/*==================*/ - sel_node_t* node, /*!< in: select node */ - ulint i) /*!< in: get ith plan node */ -{ - ut_ad(i < node->n_tables); - - return(node->plans + i); -} - -/*********************************************************************//** -Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means -that it will start fetching from the start of the result set again, regardless -of where it was before, and it will set intention locks on the tables. */ -UNIV_INLINE -void -sel_node_reset_cursor( -/*==================*/ - sel_node_t* node) /*!< in: select node */ -{ - node->state = SEL_NODE_OPEN; -} - -/**********************************************************************//** -Performs an execution step of an open or close cursor statement node. -@return query thread to run next or NULL */ -UNIV_INLINE -que_thr_t* -open_step( -/*======*/ - que_thr_t* thr) /*!< in: query thread */ -{ - sel_node_t* sel_node; - open_node_t* node; - ulint err; - - ut_ad(thr); - - node = (open_node_t*) thr->run_node; - ut_ad(que_node_get_type(node) == QUE_NODE_OPEN); - - sel_node = node->cursor_def; - - err = DB_SUCCESS; - - if (node->op_type == ROW_SEL_OPEN_CURSOR) { - - /* if (sel_node->state == SEL_NODE_CLOSED) { */ - - sel_node_reset_cursor(sel_node); - /* } else { - err = DB_ERROR; - } */ - } else { - if (sel_node->state != SEL_NODE_CLOSED) { - - sel_node->state = SEL_NODE_CLOSED; - } else { - err = DB_ERROR; - } - } - - if (err != DB_SUCCESS) { - /* SQL error detected */ - fprintf(stderr, "SQL error %lu\n", (ulong) err); - - ut_error; - } - - thr->run_node = que_node_get_parent(node); - - return(thr); -} diff --git a/storage/xtradb/include/row0types.h b/storage/xtradb/include/row0types.h deleted file mode 100644 index 52c89cb01fa..00000000000 --- a/storage/xtradb/include/row0types.h +++ /dev/null @@ -1,55 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0types.h -Row operation global types - -Created 12/27/1996 Heikki Tuuri -*******************************************************/ - -#ifndef row0types_h -#define row0types_h - -struct plan_t; - -struct upd_t; -struct upd_field_t; -struct upd_node_t; -struct del_node_t; -struct ins_node_t; -struct sel_node_t; -struct open_node_t; -struct fetch_node_t; - -struct row_printf_node_t; -struct sel_buf_t; - -struct undo_node_t; - -struct purge_node_t; - -struct row_ext_t; - -/** Buffer for logging modifications during online index creation */ -struct row_log_t; - -/* MySQL data types */ -struct TABLE; - -#endif diff --git a/storage/xtradb/include/row0uins.h b/storage/xtradb/include/row0uins.h deleted file mode 100644 index 89e334e5433..00000000000 --- a/storage/xtradb/include/row0uins.h +++ /dev/null @@ -1,54 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0uins.h -Fresh insert undo - -Created 2/25/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0uins_h -#define row0uins_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" -#include "mtr0mtr.h" - -/***********************************************************//** -Undoes a fresh insert of a row to a table. A fresh insert means that -the same clustered index unique key did not have any record, even delete -marked, at the time of the insert. InnoDB is eager in a rollback: -if it figures out that an index record will be removed in the purge -anyway, it will remove it in the rollback. -@return DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_undo_ins( -/*=========*/ - undo_node_t* node) /*!< in: row undo node */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#ifndef UNIV_NONINL -#include "row0uins.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0uins.ic b/storage/xtradb/include/row0uins.ic deleted file mode 100644 index 54da2e49874..00000000000 --- a/storage/xtradb/include/row0uins.ic +++ /dev/null @@ -1,25 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0uins.ic -Fresh insert undo - -Created 2/25/1997 Heikki Tuuri -*******************************************************/ - diff --git a/storage/xtradb/include/row0umod.h b/storage/xtradb/include/row0umod.h deleted file mode 100644 index 4f1d8e1f66c..00000000000 --- a/storage/xtradb/include/row0umod.h +++ /dev/null @@ -1,52 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0umod.h -Undo modify of a row - -Created 2/27/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0umod_h -#define row0umod_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" -#include "mtr0mtr.h" - -/***********************************************************//** -Undoes a modify operation on a row of a table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_undo_mod( -/*=========*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -#ifndef UNIV_NONINL -#include "row0umod.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0umod.ic b/storage/xtradb/include/row0umod.ic deleted file mode 100644 index 00a8cd86e01..00000000000 --- a/storage/xtradb/include/row0umod.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0umod.ic -Undo modify of a row - -Created 2/27/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/row0undo.h b/storage/xtradb/include/row0undo.h deleted file mode 100644 index 5dddfb4eae1..00000000000 --- a/storage/xtradb/include/row0undo.h +++ /dev/null @@ -1,135 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0undo.h -Row undo - -Created 1/8/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0undo_h -#define row0undo_h - -#include "univ.i" -#include "mtr0mtr.h" -#include "trx0sys.h" -#include "btr0types.h" -#include "btr0pcur.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" - -/********************************************************************//** -Creates a row undo node to a query graph. -@return own: undo node */ -UNIV_INTERN -undo_node_t* -row_undo_node_create( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - que_thr_t* parent, /*!< in: parent node, i.e., a thr node */ - mem_heap_t* heap); /*!< in: memory heap where created */ -/***********************************************************//** -Looks for the clustered index record when node has the row reference. -The pcur in node is used in the search. If found, stores the row to node, -and stores the position of pcur, and detaches it. The pcur must be closed -by the caller in any case. -@return TRUE if found; NOTE the node->pcur must be closed by the -caller, regardless of the return value */ -UNIV_INTERN -ibool -row_undo_search_clust_to_pcur( -/*==========================*/ - undo_node_t* node); /*!< in: row undo node */ -/***********************************************************//** -Undoes a row operation in a table. This is a high-level function used -in SQL execution graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_undo_step( -/*==========*/ - que_thr_t* thr); /*!< in: query thread */ - -/* A single query thread will try to perform the undo for all successive -versions of a clustered index record, if the transaction has modified it -several times during the execution which is rolled back. It may happen -that the task is transferred to another query thread, if the other thread -is assigned to handle an undo log record in the chain of different versions -of the record, and the other thread happens to get the x-latch to the -clustered index record at the right time. - If a query thread notices that the clustered index record it is looking -for is missing, or the roll ptr field in the record doed not point to the -undo log record the thread was assigned to handle, then it gives up the undo -task for that undo log record, and fetches the next. This situation can occur -just in the case where the transaction modified the same record several times -and another thread is currently doing the undo for successive versions of -that index record. */ - -/** Execution state of an undo node */ -enum undo_exec { - UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next - undo log record */ - UNDO_NODE_INSERT, /*!< undo a fresh insert of a - row to a table */ - UNDO_NODE_MODIFY /*!< undo a modify operation - (DELETE or UPDATE) on a row - of a table */ -}; - -/** Undo node structure */ -struct undo_node_t{ - que_common_t common; /*!< node type: QUE_NODE_UNDO */ - enum undo_exec state; /*!< node execution state */ - trx_t* trx; /*!< trx for which undo is done */ - roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */ - trx_undo_rec_t* undo_rec;/*!< undo log record */ - undo_no_t undo_no;/*!< undo number of the record */ - ulint rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC, - ... */ - trx_id_t new_trx_id; /*!< trx id to restore to clustered index - record */ - btr_pcur_t pcur; /*!< persistent cursor used in searching the - clustered index record */ - dict_table_t* table; /*!< table where undo is done */ - ulint cmpl_info;/*!< compiler analysis of an update */ - upd_t* update; /*!< update vector for a clustered index - record */ - dtuple_t* ref; /*!< row reference to the next row to handle */ - dtuple_t* row; /*!< a copy (also fields copied to heap) of the - row to handle */ - row_ext_t* ext; /*!< NULL, or prefixes of the externally - stored columns of the row */ - dtuple_t* undo_row;/*!< NULL, or the row after undo */ - row_ext_t* undo_ext;/*!< NULL, or prefixes of the externally - stored columns of undo_row */ - dict_index_t* index; /*!< the next index whose record should be - handled */ - mem_heap_t* heap; /*!< memory heap used as auxiliary storage for - row; this must be emptied after undo is tried - on a row */ -}; - - -#ifndef UNIV_NONINL -#include "row0undo.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0undo.ic b/storage/xtradb/include/row0undo.ic deleted file mode 100644 index b97ffca590e..00000000000 --- a/storage/xtradb/include/row0undo.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0undo.ic -Row undo - -Created 1/8/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/row0upd.h b/storage/xtradb/include/row0upd.h deleted file mode 100644 index 4312fcf7339..00000000000 --- a/storage/xtradb/include/row0upd.h +++ /dev/null @@ -1,539 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0upd.h -Update of a row - -Created 12/27/1996 Heikki Tuuri -*******************************************************/ - -#ifndef row0upd_h -#define row0upd_h - -#include "univ.i" -#include "data0data.h" -#include "row0types.h" -#include "btr0types.h" -#include "dict0types.h" -#include "trx0types.h" - -#ifndef UNIV_HOTBACKUP -# include "btr0pcur.h" -# include "que0types.h" -# include "pars0types.h" -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Creates an update vector object. -@return own: update vector object */ -UNIV_INLINE -upd_t* -upd_create( -/*=======*/ - ulint n, /*!< in: number of fields */ - mem_heap_t* heap); /*!< in: heap from which memory allocated */ -/*********************************************************************//** -Returns the number of fields in the update vector == number of columns -to be updated by an update vector. -@return number of fields */ -UNIV_INLINE -ulint -upd_get_n_fields( -/*=============*/ - const upd_t* update); /*!< in: update vector */ -#ifdef UNIV_DEBUG -/*********************************************************************//** -Returns the nth field of an update vector. -@return update vector field */ -UNIV_INLINE -upd_field_t* -upd_get_nth_field( -/*==============*/ - const upd_t* update, /*!< in: update vector */ - ulint n); /*!< in: field position in update vector */ -#else -# define upd_get_nth_field(update, n) ((update)->fields + (n)) -#endif -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Sets an index field number to be updated by an update vector field. */ -UNIV_INLINE -void -upd_field_set_field_no( -/*===================*/ - upd_field_t* upd_field, /*!< in: update vector field */ - ulint field_no, /*!< in: field number in a clustered - index */ - dict_index_t* index, /*!< in: index */ - trx_t* trx); /*!< in: transaction */ -/*********************************************************************//** -Returns a field of an update vector by field_no. -@return update vector field, or NULL */ -UNIV_INLINE -const upd_field_t* -upd_get_field_by_field_no( -/*======================*/ - const upd_t* update, /*!< in: update vector */ - ulint no) /*!< in: field_no */ - MY_ATTRIBUTE((nonnull, pure)); -/*********************************************************************//** -Writes into the redo log the values of trx id and roll ptr and enough info -to determine their positions within a clustered index record. -@return new pointer to mlog */ -UNIV_INTERN -byte* -row_upd_write_sys_vals_to_log( -/*==========================*/ - dict_index_t* index, /*!< in: clustered index */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */ - byte* log_ptr,/*!< pointer to a buffer of size > 20 opened - in mlog */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************************//** -Updates the trx id and roll ptr field in a clustered index record when -a row is updated or marked deleted. */ -UNIV_INLINE -void -row_upd_rec_sys_fields( -/*===================*/ - rec_t* rec, /*!< in/out: record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - const trx_t* trx, /*!< in: transaction */ - roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record, - can be 0 during IMPORT */ -/*********************************************************************//** -Sets the trx id or roll ptr field of a clustered index entry. */ -UNIV_INTERN -void -row_upd_index_entry_sys_field( -/*==========================*/ - dtuple_t* entry, /*!< in/out: index entry, where the memory - buffers for sys fields are already allocated: - the function just copies the new values to - them */ - dict_index_t* index, /*!< in: clustered index */ - ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */ - ib_uint64_t val); /*!< in: value to write */ -/*********************************************************************//** -Creates an update node for a query graph. -@return own: update node */ -UNIV_INTERN -upd_node_t* -upd_node_create( -/*============*/ - mem_heap_t* heap); /*!< in: mem heap where created */ -/***********************************************************//** -Writes to the redo log the new values of the fields occurring in the index. */ -UNIV_INTERN -void -row_upd_index_write_log( -/*====================*/ - const upd_t* update, /*!< in: update vector */ - byte* log_ptr,/*!< in: pointer to mlog buffer: must - contain at least MLOG_BUF_MARGIN bytes - of free space; the buffer is closed - within this function */ - mtr_t* mtr); /*!< in: mtr into whose log to write */ -/***********************************************************//** -Returns TRUE if row update changes size of some field in index or if some -field to be updated is stored externally in rec or update. -@return TRUE if the update changes the size of some field in index or -the field is external in rec or update */ -UNIV_INTERN -ibool -row_upd_changes_field_size_or_external( -/*===================================*/ - dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - const upd_t* update);/*!< in: update vector */ -/***********************************************************//** -Returns true if row update contains disowned external fields. -@return true if the update contains disowned external fields. */ -UNIV_INTERN -bool -row_upd_changes_disowned_external( -/*==============================*/ - const upd_t* update) /*!< in: update vector */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************//** -Replaces the new column values stored in the update vector to the -record given. No field size changes are allowed. This function is -usually invoked on a clustered index. The only use case for a -secondary index is row_ins_sec_index_entry_by_modify() or its -counterpart in ibuf_insert_to_index_page(). */ -UNIV_INTERN -void -row_upd_rec_in_place( -/*=================*/ - rec_t* rec, /*!< in/out: record where replaced */ - dict_index_t* index, /*!< in: the index the record belongs to */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - const upd_t* update, /*!< in: update vector */ - page_zip_des_t* page_zip);/*!< in: compressed page with enough space - available, or NULL */ -#ifndef UNIV_HOTBACKUP -/***************************************************************//** -Builds an update vector from those fields which in a secondary index entry -differ from a record that has the equal ordering fields. NOTE: we compare -the fields as binary strings! -@return own: update vector of differing fields */ -UNIV_INTERN -upd_t* -row_upd_build_sec_rec_difference_binary( -/*====================================*/ - const rec_t* rec, /*!< in: secondary index record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - const dtuple_t* entry, /*!< in: entry to insert */ - mem_heap_t* heap) /*!< in: memory heap from which allocated */ - MY_ATTRIBUTE((warn_unused_result, nonnull)); -/***************************************************************//** -Builds an update vector from those fields, excluding the roll ptr and -trx id fields, which in an index entry differ from a record that has -the equal ordering fields. NOTE: we compare the fields as binary strings! -@return own: update vector of differing fields, excluding roll ptr and -trx id */ -UNIV_INTERN -const upd_t* -row_upd_build_difference_binary( -/*============================*/ - dict_index_t* index, /*!< in: clustered index */ - const dtuple_t* entry, /*!< in: entry to insert */ - const rec_t* rec, /*!< in: clustered index record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */ - bool no_sys, /*!< in: skip the system columns - DB_TRX_ID and DB_ROLL_PTR */ - trx_t* trx, /*!< in: transaction (for diagnostics), - or NULL */ - mem_heap_t* heap) /*!< in: memory heap from which allocated */ - MY_ATTRIBUTE((nonnull(1,2,3,7), warn_unused_result)); -/***********************************************************//** -Replaces the new column values stored in the update vector to the index entry -given. */ -UNIV_INTERN -void -row_upd_index_replace_new_col_vals_index_pos( -/*=========================================*/ - dtuple_t* entry, /*!< in/out: index entry where replaced; - the clustered index record must be - covered by a lock or a page latch to - prevent deletion (rollback or purge) */ - dict_index_t* index, /*!< in: index; NOTE that this may also be a - non-clustered index */ - const upd_t* update, /*!< in: an update vector built for the index so - that the field number in an upd_field is the - index position */ - ibool order_only, - /*!< in: if TRUE, limit the replacement to - ordering fields of index; note that this - does not work for non-clustered indexes. */ - mem_heap_t* heap); /*!< in: memory heap for allocating and - copying the new values */ -/***********************************************************//** -Replaces the new column values stored in the update vector to the index entry -given. */ -UNIV_INTERN -void -row_upd_index_replace_new_col_vals( -/*===============================*/ - dtuple_t* entry, /*!< in/out: index entry where replaced; - the clustered index record must be - covered by a lock or a page latch to - prevent deletion (rollback or purge) */ - dict_index_t* index, /*!< in: index; NOTE that this may also be a - non-clustered index */ - const upd_t* update, /*!< in: an update vector built for the - CLUSTERED index so that the field number in - an upd_field is the clustered index position */ - mem_heap_t* heap) /*!< in: memory heap for allocating and - copying the new values */ - MY_ATTRIBUTE((nonnull)); -/***********************************************************//** -Replaces the new column values stored in the update vector. */ -UNIV_INTERN -void -row_upd_replace( -/*============*/ - dtuple_t* row, /*!< in/out: row where replaced, - indexed by col_no; - the clustered index record must be - covered by a lock or a page latch to - prevent deletion (rollback or purge) */ - row_ext_t** ext, /*!< out, own: NULL, or externally - stored column prefixes */ - const dict_index_t* index, /*!< in: clustered index */ - const upd_t* update, /*!< in: an update vector built for the - clustered index */ - mem_heap_t* heap); /*!< in: memory heap */ -/***********************************************************//** -Checks if an update vector changes an ordering field of an index record. - -This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. -NOTE: we compare the fields as binary strings! -@return TRUE if update vector changes an ordering field in the index record */ -UNIV_INTERN -ibool -row_upd_changes_ord_field_binary_func( -/*==================================*/ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update, /*!< in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ -#ifdef UNIV_DEBUG - const que_thr_t*thr, /*!< in: query thread */ -#endif /* UNIV_DEBUG */ - const dtuple_t* row, /*!< in: old value of row, or NULL if the - row and the data values in update are not - known when this function is called, e.g., at - compile time */ - const row_ext_t*ext) /*!< NULL, or prefixes of the externally - stored columns in the old row */ - MY_ATTRIBUTE((warn_unused_result)); -#ifdef UNIV_DEBUG -# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \ - row_upd_changes_ord_field_binary_func(index,update,thr,row,ext) -#else /* UNIV_DEBUG */ -# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \ - row_upd_changes_ord_field_binary_func(index,update,row,ext) -#endif /* UNIV_DEBUG */ -/***********************************************************//** -Checks if an FTS indexed column is affected by an UPDATE. -@return offset within fts_t::indexes if FTS indexed column updated else -ULINT_UNDEFINED */ -UNIV_INTERN -ulint -row_upd_changes_fts_column( -/*=======================*/ - dict_table_t* table, /*!< in: table */ - upd_field_t* upd_field); /*!< in: field to check */ -/***********************************************************//** -Checks if an FTS Doc ID column is affected by an UPDATE. -@return whether Doc ID column is affected */ -UNIV_INTERN -bool -row_upd_changes_doc_id( -/*===================*/ - dict_table_t* table, /*!< in: table */ - upd_field_t* upd_field) /*!< in: field to check */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***********************************************************//** -Checks if an update vector changes an ordering field of an index record. -This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. -NOTE: we compare the fields as binary strings! -@return TRUE if update vector may change an ordering field in an index -record */ -UNIV_INTERN -ibool -row_upd_changes_some_index_ord_field_binary( -/*========================================*/ - const dict_table_t* table, /*!< in: table */ - const upd_t* update);/*!< in: update vector for the row */ -/***********************************************************//** -Updates a row in a table. This is a high-level function used -in SQL execution graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_upd_step( -/*=========*/ - que_thr_t* thr); /*!< in: query thread */ -#endif /* !UNIV_HOTBACKUP */ -/*********************************************************************//** -Parses the log data of system field values. -@return log data end or NULL */ -UNIV_INTERN -byte* -row_upd_parse_sys_vals( -/*===================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - ulint* pos, /*!< out: TRX_ID position in record */ - trx_id_t* trx_id, /*!< out: trx id */ - roll_ptr_t* roll_ptr);/*!< out: roll ptr */ -/*********************************************************************//** -Updates the trx id and roll ptr field in a clustered index record in database -recovery. */ -UNIV_INTERN -void -row_upd_rec_sys_fields_in_recovery( -/*===============================*/ - rec_t* rec, /*!< in/out: record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint pos, /*!< in: TRX_ID position in rec */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */ -/*********************************************************************//** -Parses the log data written by row_upd_index_write_log. -@return log data end or NULL */ -UNIV_INTERN -byte* -row_upd_index_parse( -/*================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - mem_heap_t* heap, /*!< in: memory heap where update vector is - built */ - upd_t** update_out);/*!< out: update vector */ - - -/* Update vector field */ -struct upd_field_t{ - unsigned field_no:16; /*!< field number in an index, usually - the clustered index, but in updating - a secondary index record in btr0cur.cc - this is the position in the secondary - index */ -#ifndef UNIV_HOTBACKUP - unsigned orig_len:16; /*!< original length of the locally - stored part of an externally stored - column, or 0 */ - que_node_t* exp; /*!< expression for calculating a new - value: it refers to column values and - constants in the symbol table of the - query graph */ -#endif /* !UNIV_HOTBACKUP */ - dfield_t new_val; /*!< new value for the column */ -}; - -/* Update vector structure */ -struct upd_t{ - ulint info_bits; /*!< new value of info bits to record; - default is 0 */ - ulint n_fields; /*!< number of update fields */ - upd_field_t* fields; /*!< array of update fields */ -}; - -#ifndef UNIV_HOTBACKUP -/* Update node structure which also implements the delete operation -of a row */ - -struct upd_node_t{ - que_common_t common; /*!< node type: QUE_NODE_UPDATE */ - ibool is_delete;/* TRUE if delete, FALSE if update */ - ibool searched_update; - /* TRUE if searched update, FALSE if - positioned */ - ibool in_mysql_interface; - /* TRUE if the update node was created - for the MySQL interface */ - dict_foreign_t* foreign;/* NULL or pointer to a foreign key - constraint if this update node is used in - doing an ON DELETE or ON UPDATE operation */ - upd_node_t* cascade_node;/* NULL or an update node template which - is used to implement ON DELETE/UPDATE CASCADE - or ... SET NULL for foreign keys */ - mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade - node is created */ - sel_node_t* select; /*!< query graph subtree implementing a base - table cursor: the rows returned will be - updated */ - btr_pcur_t* pcur; /*!< persistent cursor placed on the clustered - index record which should be updated or - deleted; the cursor is stored in the graph - of 'select' field above, except in the case - of the MySQL interface */ - dict_table_t* table; /*!< table where updated */ - upd_t* update; /*!< update vector for the row */ - ulint update_n_fields; - /* when this struct is used to implement - a cascade operation for foreign keys, we store - here the size of the buffer allocated for use - as the update vector */ - sym_node_list_t columns;/* symbol table nodes for the columns - to retrieve from the table */ - ibool has_clust_rec_x_lock; - /* TRUE if the select which retrieves the - records to update already sets an x-lock on - the clustered record; note that it must always - set at least an s-lock */ - ulint cmpl_info;/* information extracted during query - compilation; speeds up execution: - UPD_NODE_NO_ORD_CHANGE and - UPD_NODE_NO_SIZE_CHANGE, ORed */ - /*----------------------*/ - /* Local storage for this graph node */ - ulint state; /*!< node execution state */ - dict_index_t* index; /*!< NULL, or the next index whose record should - be updated */ - dtuple_t* row; /*!< NULL, or a copy (also fields copied to - heap) of the row to update; this must be reset - to NULL after a successful update */ - row_ext_t* ext; /*!< NULL, or prefixes of the externally - stored columns in the old row */ - dtuple_t* upd_row;/* NULL, or a copy of the updated row */ - row_ext_t* upd_ext;/* NULL, or prefixes of the externally - stored columns in upd_row */ - mem_heap_t* heap; /*!< memory heap used as auxiliary storage; - this must be emptied after a successful - update */ - /*----------------------*/ - sym_node_t* table_sym;/* table node in symbol table */ - que_node_t* col_assign_list; - /* column assignment list */ - ulint magic_n; -}; - -#define UPD_NODE_MAGIC_N 1579975 - -/* Node execution states */ -#define UPD_NODE_SET_IX_LOCK 1 /* execution came to the node from - a node above and if the field - has_clust_rec_x_lock is FALSE, we - should set an intention x-lock on - the table */ -#define UPD_NODE_UPDATE_CLUSTERED 2 /* clustered index record should be - updated */ -#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be - inserted, old record is already delete - marked */ -#define UPD_NODE_INSERT_BLOB 4 /* clustered index record should be - inserted, old record is already - delete-marked; non-updated BLOBs - should be inherited by the new record - and disowned by the old record */ -#define UPD_NODE_UPDATE_ALL_SEC 5 /* an ordering field of the clustered - index record was changed, or this is - a delete operation: should update - all the secondary index records */ -#define UPD_NODE_UPDATE_SOME_SEC 6 /* secondary index entries should be - looked at and updated if an ordering - field changed */ - -/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */ -#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be - changed in the update and no ordering - field of the clustered index */ -#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be - changed in the update */ - -#endif /* !UNIV_HOTBACKUP */ - -#ifndef UNIV_NONINL -#include "row0upd.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0upd.ic b/storage/xtradb/include/row0upd.ic deleted file mode 100644 index 618a77fa4bf..00000000000 --- a/storage/xtradb/include/row0upd.ic +++ /dev/null @@ -1,188 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0upd.ic -Update of a row - -Created 12/27/1996 Heikki Tuuri -*******************************************************/ - -#include "mtr0log.h" -#ifndef UNIV_HOTBACKUP -# include "trx0trx.h" -# include "trx0undo.h" -# include "row0row.h" -# include "lock0lock.h" -#endif /* !UNIV_HOTBACKUP */ -#include "page0zip.h" - -/*********************************************************************//** -Creates an update vector object. -@return own: update vector object */ -UNIV_INLINE -upd_t* -upd_create( -/*=======*/ - ulint n, /*!< in: number of fields */ - mem_heap_t* heap) /*!< in: heap from which memory allocated */ -{ - upd_t* update; - - update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t)); - - update->n_fields = n; - update->fields = (upd_field_t*) - mem_heap_zalloc(heap, sizeof(upd_field_t) * n); - - return(update); -} - -/*********************************************************************//** -Returns the number of fields in the update vector == number of columns -to be updated by an update vector. -@return number of fields */ -UNIV_INLINE -ulint -upd_get_n_fields( -/*=============*/ - const upd_t* update) /*!< in: update vector */ -{ - ut_ad(update); - - return(update->n_fields); -} - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Returns the nth field of an update vector. -@return update vector field */ -UNIV_INLINE -upd_field_t* -upd_get_nth_field( -/*==============*/ - const upd_t* update, /*!< in: update vector */ - ulint n) /*!< in: field position in update vector */ -{ - ut_ad(update); - ut_ad(n < update->n_fields); - - return((upd_field_t*) update->fields + n); -} -#endif /* UNIV_DEBUG */ - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Sets an index field number to be updated by an update vector field. */ -UNIV_INLINE -void -upd_field_set_field_no( -/*===================*/ - upd_field_t* upd_field, /*!< in: update vector field */ - ulint field_no, /*!< in: field number in a clustered - index */ - dict_index_t* index, /*!< in: index */ - trx_t* trx) /*!< in: transaction */ -{ - upd_field->field_no = field_no; - upd_field->orig_len = 0; - - if (field_no >= dict_index_get_n_fields(index)) { - fprintf(stderr, - "InnoDB: Error: trying to access field %lu in ", - (ulong) field_no); - dict_index_name_print(stderr, trx, index); - fprintf(stderr, "\n" - "InnoDB: but index only has %lu fields\n", - (ulong) dict_index_get_n_fields(index)); - ut_ad(0); - } - - dict_col_copy_type(dict_index_get_nth_col(index, field_no), - dfield_get_type(&upd_field->new_val)); -} - -/*********************************************************************//** -Returns a field of an update vector by field_no. -@return update vector field, or NULL */ -UNIV_INLINE -const upd_field_t* -upd_get_field_by_field_no( -/*======================*/ - const upd_t* update, /*!< in: update vector */ - ulint no) /*!< in: field_no */ -{ - ulint i; - for (i = 0; i < upd_get_n_fields(update); i++) { - const upd_field_t* uf = upd_get_nth_field(update, i); - - if (uf->field_no == no) { - - return(uf); - } - } - - return(NULL); -} - -/*********************************************************************//** -Updates the trx id and roll ptr field in a clustered index record when -a row is updated or marked deleted. */ -UNIV_INLINE -void -row_upd_rec_sys_fields( -/*===================*/ - rec_t* rec, /*!< in/out: record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be updated, or NULL */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - const trx_t* trx, /*!< in: transaction */ - roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record, - can be 0 during IMPORT */ -{ - ut_ad(dict_index_is_clust(index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (page_zip) { - ulint pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets, - pos, trx->id, roll_ptr); - } else { - ulint offset = index->trx_id_offset; - - if (!offset) { - offset = row_get_trx_id_offset(index, offsets); - } - -#if DATA_TRX_ID + 1 != DATA_ROLL_PTR -# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR" -#endif - /* During IMPORT the trx id in the record can be in the - future, if the .ibd file is being imported from another - instance. During IMPORT roll_ptr will be 0. */ - ut_ad(roll_ptr == 0 - || lock_check_trx_id_sanity( - trx_read_trx_id(rec + offset), - rec, index, offsets)); - - trx_write_trx_id(rec + offset, trx->id); - trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr); - } -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/row0vers.h b/storage/xtradb/include/row0vers.h deleted file mode 100644 index 7b850215701..00000000000 --- a/storage/xtradb/include/row0vers.h +++ /dev/null @@ -1,146 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0vers.h -Row versions - -Created 2/6/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0vers_h -#define row0vers_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "rem0types.h" -#include "mtr0mtr.h" -#include "read0types.h" - -/*****************************************************************//** -Finds out if an active transaction has inserted or modified a secondary -index record. -@return 0 if committed, else the active transaction id; -NOTE that this function can return false positives but never false -negatives. The caller must confirm all positive results by calling -trx_is_active() while holding lock_sys->mutex. */ -UNIV_INTERN -trx_id_t -row_vers_impl_x_locked( -/*===================*/ - const rec_t* rec, /*!< in: record in a secondary index */ - dict_index_t* index, /*!< in: the secondary index */ - const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */ -/*****************************************************************//** -Finds out if we must preserve a delete marked earlier version of a clustered -index record, because it is >= the purge view. -@return TRUE if earlier version should be preserved */ -UNIV_INTERN -ibool -row_vers_must_preserve_del_marked( -/*==============================*/ - trx_id_t trx_id, /*!< in: transaction id in the version */ - mtr_t* mtr); /*!< in: mtr holding the latch on the - clustered index record; it will also - hold the latch on purge_view */ -/*****************************************************************//** -Finds out if a version of the record, where the version >= the current -purge view, should have ientry as its secondary index entry. We check -if there is any not delete marked version of the record where the trx -id >= purge view, and the secondary index entry == ientry; exactly in -this case we return TRUE. -@return TRUE if earlier version should have */ -UNIV_INTERN -ibool -row_vers_old_has_index_entry( -/*=========================*/ - ibool also_curr,/*!< in: TRUE if also rec is included in the - versions to search; otherwise only versions - prior to it are searched */ - const rec_t* rec, /*!< in: record in the clustered index; the - caller must have a latch on the page */ - mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will - also hold the latch on purge_view */ - dict_index_t* index, /*!< in: the secondary index */ - const dtuple_t* ientry);/*!< in: the secondary index entry */ -/*****************************************************************//** -Constructs the version of a clustered index record which a consistent -read should see. We assume that the trx id stored in rec is such that -the consistent read should not see rec in its present version. -@return DB_SUCCESS or DB_MISSING_HISTORY */ -UNIV_INTERN -dberr_t -row_vers_build_for_consistent_read( -/*===============================*/ - const rec_t* rec, /*!< in: record in a clustered index; the - caller must have a latch on the page; this - latch locks the top of the stack of versions - of this records */ - mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will - also hold the latch on purge_view */ - dict_index_t* index, /*!< in: the clustered index */ - ulint** offsets,/*!< in/out: offsets returned by - rec_get_offsets(rec, index) */ - read_view_t* view, /*!< in: the consistent read view */ - mem_heap_t** offset_heap,/*!< in/out: memory heap from which - the offsets are allocated */ - mem_heap_t* in_heap,/*!< in: memory heap from which the memory for - *old_vers is allocated; memory for possible - intermediate versions is allocated and freed - locally within the function */ - rec_t** old_vers)/*!< out, own: old version, or NULL - if the history is missing or the record - does not exist in the view, that is, - it was freshly inserted afterwards */ - MY_ATTRIBUTE((nonnull(1,2,3,4,5,6,7))); - -/*****************************************************************//** -Constructs the last committed version of a clustered index record, -which should be seen by a semi-consistent read. */ -UNIV_INTERN -void -row_vers_build_for_semi_consistent_read( -/*====================================*/ - const rec_t* rec, /*!< in: record in a clustered index; the - caller must have a latch on the page; this - latch locks the top of the stack of versions - of this records */ - mtr_t* mtr, /*!< in: mtr holding the latch on rec */ - dict_index_t* index, /*!< in: the clustered index */ - ulint** offsets,/*!< in/out: offsets returned by - rec_get_offsets(rec, index) */ - mem_heap_t** offset_heap,/*!< in/out: memory heap from which - the offsets are allocated */ - mem_heap_t* in_heap,/*!< in: memory heap from which the memory for - *old_vers is allocated; memory for possible - intermediate versions is allocated and freed - locally within the function */ - const rec_t** old_vers)/*!< out: rec, old version, or NULL if the - record does not exist in the view, that is, - it was freshly inserted afterwards */ - MY_ATTRIBUTE((nonnull(1,2,3,4,5))); - - -#ifndef UNIV_NONINL -#include "row0vers.ic" -#endif - -#endif diff --git a/storage/xtradb/include/row0vers.ic b/storage/xtradb/include/row0vers.ic deleted file mode 100644 index ef43a55bf70..00000000000 --- a/storage/xtradb/include/row0vers.ic +++ /dev/null @@ -1,30 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0vers.ic -Row versions - -Created 2/6/1997 Heikki Tuuri -*******************************************************/ - -#include "row0row.h" -#include "dict0dict.h" -#include "read0read.h" -#include "page0page.h" -#include "log0recv.h" diff --git a/storage/xtradb/include/srv0conc.h b/storage/xtradb/include/srv0conc.h deleted file mode 100644 index cf61ef5528d..00000000000 --- a/storage/xtradb/include/srv0conc.h +++ /dev/null @@ -1,111 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file srv/srv0conc.h - -InnoDB concurrency manager header file - -Created 2011/04/18 Sunny Bains -*******************************************************/ - -#ifndef srv_conc_h -#define srv_conc_h - -/** We are prepared for a situation that we have this many threads waiting for -a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the -value. */ - -extern ulint srv_max_n_threads; - -/** The following controls how many threads we let inside InnoDB concurrently: -threads waiting for locks are not counted into the number because otherwise -we could get a deadlock. Value of 0 will disable the concurrency check. */ - -extern ulong srv_thread_concurrency; - -/*********************************************************************//** -Initialise the concurrency management data structures */ -void -srv_conc_init(void); -/*===============*/ - -/*********************************************************************//** -Free the concurrency management data structures */ -void -srv_conc_free(void); -/*===============*/ - -/*********************************************************************//** -Puts an OS thread to wait if there are too many concurrent threads -(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ -UNIV_INTERN -void -srv_conc_enter_innodb( -/*==================*/ - trx_t* trx); /*!< in: transaction object associated - with the thread */ - -/*********************************************************************//** -This lets a thread enter InnoDB regardless of the number of threads inside -InnoDB. This must be called when a thread ends a lock wait. */ -UNIV_INTERN -void -srv_conc_force_enter_innodb( -/*========================*/ - trx_t* trx); /*!< in: transaction object associated with - the thread */ - -/*********************************************************************//** -This must be called when a thread exits InnoDB in a lock wait or at the -end of an SQL statement. */ -UNIV_INTERN -void -srv_conc_force_exit_innodb( -/*=======================*/ - trx_t* trx); /*!< in: transaction object associated with - the thread */ - -/*********************************************************************//** -Get the count of threads waiting inside InnoDB. */ -UNIV_INTERN -ulint -srv_conc_get_waiting_threads(void); -/*==============================*/ - -/*********************************************************************//** -Get the count of threads active inside InnoDB. */ -UNIV_INTERN -ulint -srv_conc_get_active_threads(void); -/*==============================*/ - -#endif /* srv_conc_h */ diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h deleted file mode 100644 index 63fd449ee18..00000000000 --- a/storage/xtradb/include/srv0mon.h +++ /dev/null @@ -1,961 +0,0 @@ -/*********************************************************************** - -Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -***********************************************************************/ - -/**************************************************//** -@file include/srv0mon.h -Server monitor counter related defines - -Created 12/15/2009 Jimmy Yang -*******************************************************/ - -#ifndef srv0mon_h -#define srv0mon_h - -#include "univ.i" -#ifndef UNIV_HOTBACKUP - - -/** Possible status values for "mon_status" in "struct monitor_value" */ -enum monitor_running_status { - MONITOR_STARTED = 1, /*!< Monitor has been turned on */ - MONITOR_STOPPED = 2 /*!< Monitor has been turned off */ -}; - -typedef enum monitor_running_status monitor_running_t; - -/** Monitor counter value type */ -typedef ib_int64_t mon_type_t; - -/** Two monitor structures are defined in this file. One is -"monitor_value_t" which contains dynamic counter values for each -counter. The other is "monitor_info_t", which contains -static information (counter name, desc etc.) for each counter. -In addition, an enum datatype "monitor_id_t" is also defined, -it identifies each monitor with an internally used symbol, whose -integer value indexes into above two structure for its dynamic -and static information. -Developer who intend to add new counters would require to -fill in counter information as described in "monitor_info_t" and -create the internal counter ID in "monitor_id_t". */ - -/** Structure containing the actual values of a monitor counter. */ -struct monitor_value_t { - ib_time_t mon_start_time; /*!< Start time of monitoring */ - ib_time_t mon_stop_time; /*!< Stop time of monitoring */ - ib_time_t mon_reset_time; /*!< Time counter resetted */ - mon_type_t mon_value; /*!< Current counter Value */ - mon_type_t mon_max_value; /*!< Current Max value */ - mon_type_t mon_min_value; /*!< Current Min value */ - mon_type_t mon_value_reset;/*!< value at last reset */ - mon_type_t mon_max_value_start; /*!< Max value since start */ - mon_type_t mon_min_value_start; /*!< Min value since start */ - mon_type_t mon_start_value;/*!< Value at the start time */ - mon_type_t mon_last_value; /*!< Last set of values */ - monitor_running_t mon_status; /* whether monitor still running */ -}; - -/** Follwoing defines are possible values for "monitor_type" field in -"struct monitor_info" */ -enum monitor_type_t { - MONITOR_NONE = 0, /*!< No monitoring */ - MONITOR_MODULE = 1, /*!< This is a monitor module type, - not a counter */ - MONITOR_EXISTING = 2, /*!< The monitor carries information from - an existing system status variable */ - MONITOR_NO_AVERAGE = 4, /*!< Set this status if we don't want to - calculate the average value for the counter */ - MONITOR_DISPLAY_CURRENT = 8, /*!< Display current value of the - counter, rather than incremental value - over the period. Mostly for counters - displaying current resource usage */ - MONITOR_GROUP_MODULE = 16, /*!< Monitor can be turned on/off - only as a module, but not individually */ - MONITOR_DEFAULT_ON = 32,/*!< Monitor will be turned on by default at - server start up */ - MONITOR_SET_OWNER = 64, /*!< Owner of "monitor set", a set of - monitor counters */ - MONITOR_SET_MEMBER = 128,/*!< Being part of a "monitor set" */ - MONITOR_HIDDEN = 256 /*!< Do not display this monitor in the - metrics table */ -}; - -/** Counter minimum value is initialized to be max value of - mon_type_t (ib_int64_t) */ -#define MIN_RESERVED ((mon_type_t) (IB_UINT64_MAX >> 1)) -#define MAX_RESERVED (~MIN_RESERVED) - -/** This enumeration defines internal monitor identifier used internally -to identify each particular counter. Its value indexes into two arrays, -one is the "innodb_counter_value" array which records actual monitor -counter values, the other is "innodb_counter_info" array which describes -each counter's basic information (name, desc etc.). A couple of -naming rules here: -1) If the monitor defines a module, it starts with MONITOR_MODULE -2) If the monitor uses exisitng counters from "status variable", its ID -name shall start with MONITOR_OVLD - -Please refer to "innodb_counter_info" in srv/srv0mon.cc for detail -information for each monitor counter */ - -enum monitor_id_t { - /* This is to identify the default value set by the metrics - control global variables */ - MONITOR_DEFAULT_START = 0, - - /* Start of Metadata counter */ - MONITOR_MODULE_METADATA, - MONITOR_TABLE_OPEN, - MONITOR_TABLE_CLOSE, - MONITOR_TABLE_REFERENCE, - MONITOR_OVLD_META_MEM_POOL, - - /* Lock manager related counters */ - MONITOR_MODULE_LOCK, - MONITOR_DEADLOCK, - MONITOR_TIMEOUT, - MONITOR_LOCKREC_WAIT, - MONITOR_TABLELOCK_WAIT, - MONITOR_NUM_RECLOCK_REQ, - MONITOR_RECLOCK_CREATED, - MONITOR_RECLOCK_REMOVED, - MONITOR_NUM_RECLOCK, - MONITOR_TABLELOCK_CREATED, - MONITOR_TABLELOCK_REMOVED, - MONITOR_NUM_TABLELOCK, - MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT, - MONITOR_OVLD_LOCK_WAIT_TIME, - MONITOR_OVLD_LOCK_MAX_WAIT_TIME, - MONITOR_OVLD_ROW_LOCK_WAIT, - MONITOR_OVLD_LOCK_AVG_WAIT_TIME, - - /* Buffer and I/O realted counters. */ - MONITOR_MODULE_BUFFER, - MONITOR_OVLD_BUFFER_POOL_SIZE, - MONITOR_OVLD_BUF_POOL_READS, - MONITOR_OVLD_BUF_POOL_READ_REQUESTS, - MONITOR_OVLD_BUF_POOL_WRITE_REQUEST, - MONITOR_OVLD_BUF_POOL_WAIT_FREE, - MONITOR_OVLD_BUF_POOL_READ_AHEAD, - MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED, - MONITOR_OVLD_BUF_POOL_PAGE_TOTAL, - MONITOR_OVLD_BUF_POOL_PAGE_MISC, - MONITOR_OVLD_BUF_POOL_PAGES_DATA, - MONITOR_OVLD_BUF_POOL_BYTES_DATA, - MONITOR_OVLD_BUF_POOL_PAGES_DIRTY, - MONITOR_OVLD_BUF_POOL_BYTES_DIRTY, - MONITOR_OVLD_BUF_POOL_PAGES_FREE, - MONITOR_OVLD_PAGE_CREATED, - MONITOR_OVLD_PAGES_WRITTEN, - MONITOR_OVLD_INDEX_PAGES_WRITTEN, - MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN, - MONITOR_OVLD_PAGES_READ, - MONITOR_OVLD_PAGES0_READ, - MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS, - MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED, - MONITOR_OVLD_BYTE_READ, - MONITOR_OVLD_BYTE_WRITTEN, - MONITOR_FLUSH_BATCH_SCANNED, - MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL, - MONITOR_FLUSH_BATCH_SCANNED_PER_CALL, - MONITOR_FLUSH_HP_RESCAN, - MONITOR_FLUSH_BATCH_TOTAL_PAGE, - MONITOR_FLUSH_BATCH_COUNT, - MONITOR_FLUSH_BATCH_PAGES, - MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, - MONITOR_FLUSH_NEIGHBOR_COUNT, - MONITOR_FLUSH_NEIGHBOR_PAGES, - MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, - MONITOR_FLUSH_AVG_PAGE_RATE, - MONITOR_FLUSH_LSN_AVG_RATE, - MONITOR_FLUSH_PCT_FOR_DIRTY, - MONITOR_FLUSH_PCT_FOR_LSN, - MONITOR_FLUSH_SYNC_WAITS, - MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, - MONITOR_FLUSH_ADAPTIVE_COUNT, - MONITOR_FLUSH_ADAPTIVE_PAGES, - MONITOR_FLUSH_SYNC_TOTAL_PAGE, - MONITOR_FLUSH_SYNC_COUNT, - MONITOR_FLUSH_SYNC_PAGES, - MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, - MONITOR_FLUSH_BACKGROUND_COUNT, - MONITOR_FLUSH_BACKGROUND_PAGES, - MONITOR_LRU_BATCH_SCANNED, - MONITOR_LRU_BATCH_SCANNED_NUM_CALL, - MONITOR_LRU_BATCH_SCANNED_PER_CALL, - MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, - MONITOR_LRU_BATCH_FLUSH_COUNT, - MONITOR_LRU_BATCH_FLUSH_PAGES, - MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, - MONITOR_LRU_BATCH_EVICT_COUNT, - MONITOR_LRU_BATCH_EVICT_PAGES, - MONITOR_LRU_SINGLE_FLUSH_SCANNED, - MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL, - MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL, - MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT, - MONITOR_LRU_GET_FREE_SEARCH, - MONITOR_LRU_SEARCH_SCANNED, - MONITOR_LRU_SEARCH_SCANNED_NUM_CALL, - MONITOR_LRU_SEARCH_SCANNED_PER_CALL, - MONITOR_LRU_UNZIP_SEARCH_SCANNED, - MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL, - MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL, - - /* Buffer Page I/O specific counters. */ - MONITOR_MODULE_BUF_PAGE, - MONITOR_INDEX_LEAF_PAGE_READ, - MONITOR_INDEX_NON_LEAF_PAGE_READ, - MONITOR_INDEX_IBUF_LEAF_PAGE_READ, - MONITOR_INDEX_IBUF_NON_LEAF_PAGE_READ, - MONITOR_UNDO_LOG_PAGE_READ, - MONITOR_INODE_PAGE_READ, - MONITOR_IBUF_FREELIST_PAGE_READ, - MONITOR_IBUF_BITMAP_PAGE_READ, - MONITOR_SYSTEM_PAGE_READ, - MONITOR_TRX_SYSTEM_PAGE_READ, - MONITOR_FSP_HDR_PAGE_READ, - MONITOR_XDES_PAGE_READ, - MONITOR_BLOB_PAGE_READ, - MONITOR_ZBLOB_PAGE_READ, - MONITOR_ZBLOB2_PAGE_READ, - MONITOR_OTHER_PAGE_READ, - MONITOR_INDEX_LEAF_PAGE_WRITTEN, - MONITOR_INDEX_NON_LEAF_PAGE_WRITTEN, - MONITOR_INDEX_IBUF_LEAF_PAGE_WRITTEN, - MONITOR_INDEX_IBUF_NON_LEAF_PAGE_WRITTEN, - MONITOR_UNDO_LOG_PAGE_WRITTEN, - MONITOR_INODE_PAGE_WRITTEN, - MONITOR_IBUF_FREELIST_PAGE_WRITTEN, - MONITOR_IBUF_BITMAP_PAGE_WRITTEN, - MONITOR_SYSTEM_PAGE_WRITTEN, - MONITOR_TRX_SYSTEM_PAGE_WRITTEN, - MONITOR_FSP_HDR_PAGE_WRITTEN, - MONITOR_XDES_PAGE_WRITTEN, - MONITOR_BLOB_PAGE_WRITTEN, - MONITOR_ZBLOB_PAGE_WRITTEN, - MONITOR_ZBLOB2_PAGE_WRITTEN, - MONITOR_OTHER_PAGE_WRITTEN, - - /* OS level counters (I/O) */ - MONITOR_MODULE_OS, - MONITOR_OVLD_OS_FILE_READ, - MONITOR_OVLD_OS_FILE_WRITE, - MONITOR_OVLD_OS_FSYNC, - MONITOR_OS_PENDING_READS, - MONITOR_OS_PENDING_WRITES, - MONITOR_OVLD_OS_LOG_WRITTEN, - MONITOR_OVLD_OS_LOG_FSYNC, - MONITOR_OVLD_OS_LOG_PENDING_FSYNC, - MONITOR_OVLD_OS_LOG_PENDING_WRITES, - - /* Transaction related counters */ - MONITOR_MODULE_TRX, - MONITOR_TRX_RW_COMMIT, - MONITOR_TRX_RO_COMMIT, - MONITOR_TRX_NL_RO_COMMIT, - MONITOR_TRX_COMMIT_UNDO, - MONITOR_TRX_ROLLBACK, - MONITOR_TRX_ROLLBACK_SAVEPOINT, - MONITOR_TRX_ROLLBACK_ACTIVE, - MONITOR_TRX_ACTIVE, - MONITOR_RSEG_HISTORY_LEN, - MONITOR_NUM_UNDO_SLOT_USED, - MONITOR_NUM_UNDO_SLOT_CACHED, - MONITOR_RSEG_CUR_SIZE, - - /* Purge related counters */ - MONITOR_MODULE_PURGE, - MONITOR_N_DEL_ROW_PURGE, - MONITOR_N_UPD_EXIST_EXTERN, - MONITOR_PURGE_INVOKED, - MONITOR_PURGE_N_PAGE_HANDLED, - MONITOR_DML_PURGE_DELAY, - MONITOR_PURGE_STOP_COUNT, - MONITOR_PURGE_RESUME_COUNT, - - /* Recovery related counters */ - MONITOR_MODULE_RECOVERY, - MONITOR_NUM_CHECKPOINT, - MONITOR_OVLD_LSN_FLUSHDISK, - MONITOR_OVLD_LSN_CHECKPOINT, - MONITOR_OVLD_LSN_CURRENT, - MONITOR_LSN_CHECKPOINT_AGE, - MONITOR_OVLD_BUF_OLDEST_LSN, - MONITOR_OVLD_MAX_AGE_ASYNC, - MONITOR_OVLD_MAX_AGE_SYNC, - MONITOR_PENDING_LOG_WRITE, - MONITOR_PENDING_CHECKPOINT_WRITE, - MONITOR_LOG_IO, - MONITOR_OVLD_LOG_WAITS, - MONITOR_OVLD_LOG_WRITE_REQUEST, - MONITOR_OVLD_LOG_WRITES, - - /* Page Manager related counters */ - MONITOR_MODULE_PAGE, - MONITOR_PAGE_COMPRESS, - MONITOR_PAGE_DECOMPRESS, - MONITOR_PAD_INCREMENTS, - MONITOR_PAD_DECREMENTS, - - /* New monitor variables for page compression */ - MONITOR_OVLD_PAGE_COMPRESS_SAVED, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384, - MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768, - MONITOR_OVLD_PAGES_PAGE_COMPRESSED, - MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP, - MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED, - MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED, - MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR, - - /* New monitor variables for page encryption */ - MONITOR_OVLD_PAGES_ENCRYPTED, - MONITOR_OVLD_PAGES_DECRYPTED, - - /* Index related counters */ - MONITOR_MODULE_INDEX, - MONITOR_INDEX_SPLIT, - MONITOR_INDEX_MERGE_ATTEMPTS, - MONITOR_INDEX_MERGE_SUCCESSFUL, - MONITOR_INDEX_REORG_ATTEMPTS, - MONITOR_INDEX_REORG_SUCCESSFUL, - MONITOR_INDEX_DISCARD, - - /* Adaptive Hash Index related counters */ - MONITOR_MODULE_ADAPTIVE_HASH, - MONITOR_OVLD_ADAPTIVE_HASH_SEARCH, - MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE, - MONITOR_ADAPTIVE_HASH_PAGE_ADDED, - MONITOR_ADAPTIVE_HASH_PAGE_REMOVED, - MONITOR_ADAPTIVE_HASH_ROW_ADDED, - MONITOR_ADAPTIVE_HASH_ROW_REMOVED, - MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND, - MONITOR_ADAPTIVE_HASH_ROW_UPDATED, - - /* Tablespace related counters */ - MONITOR_MODULE_FIL_SYSTEM, - MONITOR_OVLD_N_FILE_OPENED, - - /* InnoDB Change Buffer related counters */ - MONITOR_MODULE_IBUF_SYSTEM, - MONITOR_OVLD_IBUF_MERGE_INSERT, - MONITOR_OVLD_IBUF_MERGE_DELETE, - MONITOR_OVLD_IBUF_MERGE_PURGE, - MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT, - MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE, - MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE, - MONITOR_OVLD_IBUF_MERGES, - MONITOR_OVLD_IBUF_SIZE, - - /* Counters for server operations */ - MONITOR_MODULE_SERVER, - MONITOR_MASTER_THREAD_SLEEP, - MONITOR_OVLD_SERVER_ACTIVITY, - MONITOR_MASTER_ACTIVE_LOOPS, - MONITOR_MASTER_IDLE_LOOPS, - MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, - MONITOR_SRV_IBUF_MERGE_MICROSECOND, - MONITOR_SRV_LOG_FLUSH_MICROSECOND, - MONITOR_SRV_MEM_VALIDATE_MICROSECOND, - MONITOR_SRV_PURGE_MICROSECOND, - MONITOR_SRV_DICT_LRU_MICROSECOND, - MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, - MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, - MONITOR_SRV_CHECKPOINT_MICROSECOND, - MONITOR_OVLD_SRV_DBLWR_WRITES, - MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN, - MONITOR_OVLD_SRV_PAGE_SIZE, - MONITOR_OVLD_RWLOCK_S_SPIN_WAITS, - MONITOR_OVLD_RWLOCK_X_SPIN_WAITS, - MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS, - MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS, - MONITOR_OVLD_RWLOCK_S_OS_WAITS, - MONITOR_OVLD_RWLOCK_X_OS_WAITS, - - /* Data DML related counters */ - MONITOR_MODULE_DML_STATS, - MONITOR_OLVD_ROW_READ, - MONITOR_OLVD_ROW_INSERTED, - MONITOR_OLVD_ROW_DELETED, - MONITOR_OLVD_ROW_UPDTATED, - MONITOR_OLVD_SYSTEM_ROW_READ, - MONITOR_OLVD_SYSTEM_ROW_INSERTED, - MONITOR_OLVD_SYSTEM_ROW_DELETED, - MONITOR_OLVD_SYSTEM_ROW_UPDATED, - - /* Data DDL related counters */ - MONITOR_MODULE_DDL_STATS, - MONITOR_BACKGROUND_DROP_INDEX, - MONITOR_BACKGROUND_DROP_TABLE, - MONITOR_ONLINE_CREATE_INDEX, - MONITOR_PENDING_ALTER_TABLE, - - MONITOR_MODULE_ICP, - MONITOR_ICP_ATTEMPTS, - MONITOR_ICP_NO_MATCH, - MONITOR_ICP_OUT_OF_RANGE, - MONITOR_ICP_MATCH, - - /* This is used only for control system to turn - on/off and reset all monitor counters */ - MONITOR_ALL_COUNTER, - - /* This must be the last member */ - NUM_MONITOR -}; - -/** This informs the monitor control system to turn -on/off and reset monitor counters through wild card match */ -#define MONITOR_WILDCARD_MATCH (NUM_MONITOR + 1) - -/** Cannot find monitor counter with a specified name */ -#define MONITOR_NO_MATCH (NUM_MONITOR + 2) - -/** struct monitor_info describes the basic/static information -about each monitor counter. */ -struct monitor_info_t { - const char* monitor_name; /*!< Monitor name */ - const char* monitor_module; /*!< Sub Module the monitor - belongs to */ - const char* monitor_desc; /*!< Brief desc of monitor counter */ - monitor_type_t monitor_type; /*!< Type of Monitor Info */ - monitor_id_t monitor_related_id;/*!< Monitor ID of counter that - related to this monitor. This is - set when the monitor belongs to - a "monitor set" */ - monitor_id_t monitor_id; /*!< Monitor ID as defined in enum - monitor_id_t */ -}; - -/** Following are the "set_option" values allowed for -srv_mon_process_existing_counter() and srv_mon_process_existing_counter() -functions. To turn on/off/reset the monitor counters. */ -enum mon_option_t { - MONITOR_TURN_ON = 1, /*!< Turn on the counter */ - MONITOR_TURN_OFF, /*!< Turn off the counter */ - MONITOR_RESET_VALUE, /*!< Reset current values */ - MONITOR_RESET_ALL_VALUE, /*!< Reset all values */ - MONITOR_GET_VALUE /*!< Option for - srv_mon_process_existing_counter() - function */ -}; - -/** Number of bit in a ulint datatype */ -#define NUM_BITS_ULINT (sizeof(ulint) * CHAR_BIT) - -/** This "monitor_set_tbl" is a bitmap records whether a particular monitor -counter has been turned on or off */ -extern ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) / - NUM_BITS_ULINT]; - -/** Macros to turn on/off the control bit in monitor_set_tbl for a monitor -counter option. */ -#define MONITOR_ON(monitor) \ - (monitor_set_tbl[monitor / NUM_BITS_ULINT] |= \ - ((ulint)1 << (monitor % NUM_BITS_ULINT))) - -#define MONITOR_OFF(monitor) \ - (monitor_set_tbl[monitor / NUM_BITS_ULINT] &= \ - ~((ulint)1 << (monitor % NUM_BITS_ULINT))) - -/** Check whether the requested monitor is turned on/off */ -#define MONITOR_IS_ON(monitor) \ - (monitor_set_tbl[monitor / NUM_BITS_ULINT] & \ - ((ulint)1 << (monitor % NUM_BITS_ULINT))) - -/** The actual monitor counter array that records each monintor counter -value */ -extern monitor_value_t innodb_counter_value[NUM_MONITOR]; - -/** Following are macro defines for basic montior counter manipulations. -Please note we do not provide any synchronization for these monitor -operations due to performance consideration. Most counters can -be placed under existing mutex protections in respective code -module. */ - -/** Macros to access various fields of a monitor counters */ -#define MONITOR_FIELD(monitor, field) \ - (innodb_counter_value[monitor].field) - -#define MONITOR_VALUE(monitor) \ - MONITOR_FIELD(monitor, mon_value) - -#define MONITOR_MAX_VALUE(monitor) \ - MONITOR_FIELD(monitor, mon_max_value) - -#define MONITOR_MIN_VALUE(monitor) \ - MONITOR_FIELD(monitor, mon_min_value) - -#define MONITOR_VALUE_RESET(monitor) \ - MONITOR_FIELD(monitor, mon_value_reset) - -#define MONITOR_MAX_VALUE_START(monitor) \ - MONITOR_FIELD(monitor, mon_max_value_start) - -#define MONITOR_MIN_VALUE_START(monitor) \ - MONITOR_FIELD(monitor, mon_min_value_start) - -#define MONITOR_LAST_VALUE(monitor) \ - MONITOR_FIELD(monitor, mon_last_value) - -#define MONITOR_START_VALUE(monitor) \ - MONITOR_FIELD(monitor, mon_start_value) - -#define MONITOR_VALUE_SINCE_START(monitor) \ - (MONITOR_VALUE(monitor) + MONITOR_VALUE_RESET(monitor)) - -#define MONITOR_STATUS(monitor) \ - MONITOR_FIELD(monitor, mon_status) - -#define MONITOR_SET_START(monitor) \ - do { \ - MONITOR_STATUS(monitor) = MONITOR_STARTED; \ - MONITOR_FIELD((monitor), mon_start_time) = time(NULL); \ - } while (0) - -#define MONITOR_SET_OFF(monitor) \ - do { \ - MONITOR_STATUS(monitor) = MONITOR_STOPPED; \ - MONITOR_FIELD((monitor), mon_stop_time) = time(NULL); \ - } while (0) - -#define MONITOR_INIT_ZERO_VALUE 0 - -/** Max and min values are initialized when we first turn on the monitor -counter, and set the MONITOR_STATUS. */ -#define MONITOR_MAX_MIN_NOT_INIT(monitor) \ - (MONITOR_STATUS(monitor) == MONITOR_INIT_ZERO_VALUE \ - && MONITOR_MIN_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE \ - && MONITOR_MAX_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE) - -#define MONITOR_INIT(monitor) \ - if (MONITOR_MAX_MIN_NOT_INIT(monitor)) { \ - MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; \ - MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED; \ - MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; \ - MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED; \ - } - -/** Macros to increment/decrement the counters. The normal -monitor counter operation expects appropriate synchronization -already exists. No additional mutex is necessary when operating -on the counters */ -#define MONITOR_INC(monitor) \ - if (MONITOR_IS_ON(monitor)) { \ - MONITOR_VALUE(monitor)++; \ - if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ - MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } - -/** Increment a monitor counter under mutex protection. -Use MONITOR_INC if appropriate mutex protection already exists. -@param mutex mutex to acquire and release -@param monitor monitor to be incremented by 1 -@param enabled whether the monitor is enabled */ -#define MONITOR_MUTEX_INC_LOW(mutex, monitor, enabled) \ - ut_ad(!mutex_own(mutex)); \ - if (enabled) { \ - mutex_enter(mutex); \ - if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ - MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \ - } \ - mutex_exit(mutex); \ - } -/** Increment a monitor counter under mutex protection. -Use MONITOR_INC if appropriate mutex protection already exists. -@param mutex mutex to acquire and release -@param monitor monitor to be incremented by 1 */ -#define MONITOR_MUTEX_INC(mutex, monitor) \ - MONITOR_MUTEX_INC_LOW(mutex, monitor, MONITOR_IS_ON(monitor)) -/** Decrement a monitor counter under mutex protection. -Use MONITOR_DEC if appropriate mutex protection already exists. -@param mutex mutex to acquire and release -@param monitor monitor to be decremented by 1 -@param enabled whether the monitor is enabled */ -#define MONITOR_MUTEX_DEC_LOW(mutex, monitor, enabled) \ - ut_ad(!mutex_own(mutex)); \ - if (MONITOR_IS_ON(monitor)) { \ - mutex_enter(mutex); \ - if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ - MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \ - } \ - mutex_exit(mutex); \ - } -/** Decrement a monitor counter under mutex protection. -Use MONITOR_DEC if appropriate mutex protection already exists. -@param mutex mutex to acquire and release -@param monitor monitor to be decremented by 1 */ -#define MONITOR_MUTEX_DEC(mutex, monitor) \ - MONITOR_MUTEX_DEC_LOW(mutex, monitor, MONITOR_IS_ON(monitor)) - -#if defined HAVE_ATOMIC_BUILTINS_64 -/** Atomically increment a monitor counter. -Use MONITOR_INC if appropriate mutex protection exists. -@param monitor monitor to be incremented by 1 -@param enabled whether the monitor is enabled */ -# define MONITOR_ATOMIC_INC_LOW(monitor, enabled) \ - if (enabled) { \ - ib_uint64_t value; \ - value = os_atomic_increment_uint64( \ - (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \ - /* Note: This is not 100% accurate because of the \ - inherent race, we ignore it due to performance. */ \ - if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) { \ - MONITOR_MAX_VALUE(monitor) = value; \ - } \ - } - -/** Atomically decrement a monitor counter. -Use MONITOR_DEC if appropriate mutex protection exists. -@param monitor monitor to be decremented by 1 -@param enabled whether the monitor is enabled */ -# define MONITOR_ATOMIC_DEC_LOW(monitor, enabled) \ - if (enabled) { \ - ib_uint64_t value; \ - value = os_atomic_decrement_uint64( \ - (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \ - /* Note: This is not 100% accurate because of the \ - inherent race, we ignore it due to performance. */ \ - if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) { \ - MONITOR_MIN_VALUE(monitor) = value; \ - } \ - } -# define srv_mon_create() ((void) 0) -# define srv_mon_free() ((void) 0) -#else /* HAVE_ATOMIC_BUILTINS_64 */ -/** Mutex protecting atomic operations on platforms that lack -built-in operations for atomic memory access */ -extern ib_mutex_t monitor_mutex; -/****************************************************************//** -Initialize the monitor subsystem. */ -UNIV_INTERN -void -srv_mon_create(void); -/*================*/ -/****************************************************************//** -Close the monitor subsystem. */ -UNIV_INTERN -void -srv_mon_free(void); -/*==============*/ - -/** Atomically increment a monitor counter. -Use MONITOR_INC if appropriate mutex protection exists. -@param monitor monitor to be incremented by 1 -@param enabled whether the monitor is enabled */ -# define MONITOR_ATOMIC_INC_LOW(monitor, enabled) \ - MONITOR_MUTEX_INC_LOW(&monitor_mutex, monitor, enabled) -/** Atomically decrement a monitor counter. -Use MONITOR_DEC if appropriate mutex protection exists. -@param monitor monitor to be decremented by 1 -@param enabled whether the monitor is enabled */ -# define MONITOR_ATOMIC_DEC_LOW(monitor, enabled) \ - MONITOR_MUTEX_DEC_LOW(&monitor_mutex, monitor, enabled) -#endif /* HAVE_ATOMIC_BUILTINS_64 */ - -/** Atomically increment a monitor counter if it is enabled. -Use MONITOR_INC if appropriate mutex protection exists. -@param monitor monitor to be incremented by 1 */ -#define MONITOR_ATOMIC_INC(monitor) \ - MONITOR_ATOMIC_INC_LOW(monitor, MONITOR_IS_ON(monitor)) -/** Atomically decrement a monitor counter if it is enabled. -Use MONITOR_DEC if appropriate mutex protection exists. -@param monitor monitor to be decremented by 1 */ -#define MONITOR_ATOMIC_DEC(monitor) \ - MONITOR_ATOMIC_DEC_LOW(monitor, MONITOR_IS_ON(monitor)) - -#define MONITOR_DEC(monitor) \ - if (MONITOR_IS_ON(monitor)) { \ - MONITOR_VALUE(monitor)--; \ - if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ - MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } - -#ifdef UNIV_DEBUG_VALGRIND -# define MONITOR_CHECK_DEFINED(value) do { \ - mon_type_t m = value; \ - UNIV_MEM_ASSERT_RW(&m, sizeof m); \ -} while (0) -#else /* UNIV_DEBUG_VALGRIND */ -# define MONITOR_CHECK_DEFINED(value) (void) 0 -#endif /* UNIV_DEBUG_VALGRIND */ - -#define MONITOR_INC_VALUE(monitor, value) \ - MONITOR_CHECK_DEFINED(value); \ - if (MONITOR_IS_ON(monitor)) { \ - MONITOR_VALUE(monitor) += (mon_type_t) (value); \ - if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ - MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } - -#define MONITOR_DEC_VALUE(monitor, value) \ - MONITOR_CHECK_DEFINED(value); \ - if (MONITOR_IS_ON(monitor)) { \ - ut_ad(MONITOR_VALUE(monitor) >= (mon_type_t) (value); \ - MONITOR_VALUE(monitor) -= (mon_type_t) (value); \ - if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ - MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } - -/* Increment/decrement counter without check the monitor on/off bit, which -could already be checked as a module group */ -#define MONITOR_INC_NOCHECK(monitor) \ - do { \ - MONITOR_VALUE(monitor)++; \ - if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ - MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } while (0) \ - -#define MONITOR_DEC_NOCHECK(monitor) \ - do { \ - MONITOR_VALUE(monitor)--; \ - if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ - MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } while (0) - -/** Directly set a monitor counter's value */ -#define MONITOR_SET(monitor, value) \ - MONITOR_CHECK_DEFINED(value); \ - if (MONITOR_IS_ON(monitor)) { \ - MONITOR_VALUE(monitor) = (mon_type_t) (value); \ - if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ - MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ - MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } - -/** Add time difference between now and input "value" (in seconds) to the -monitor counter -@param monitor monitor to update for the time difference -@param value the start time value */ -#define MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value) \ - MONITOR_CHECK_DEFINED(value); \ - if (MONITOR_IS_ON(monitor)) { \ - ullint old_time = (value); \ - value = ut_time_us(NULL); \ - MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\ - } - -/** This macro updates 3 counters in one call. However, it only checks the -main/first monitor counter 'monitor', to see it is on or off to decide -whether to do the update. -@param monitor the main monitor counter to update. It accounts for - the accumulative value for the counter. -@param monitor_n_calls counter that counts number of times this macro is - called -@param monitor_per_call counter that records the current and max value of - each incremental value -@param value incremental value to record this time */ -#define MONITOR_INC_VALUE_CUMULATIVE( \ - monitor, monitor_n_calls, monitor_per_call, value) \ - MONITOR_CHECK_DEFINED(value); \ - if (MONITOR_IS_ON(monitor)) { \ - MONITOR_VALUE(monitor_n_calls)++; \ - MONITOR_VALUE(monitor_per_call) = (mon_type_t) (value); \ - if (MONITOR_VALUE(monitor_per_call) \ - > MONITOR_MAX_VALUE(monitor_per_call)) { \ - MONITOR_MAX_VALUE(monitor_per_call) = \ - (mon_type_t) (value); \ - } \ - MONITOR_VALUE(monitor) += (mon_type_t) (value); \ - if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ - MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } - -/** Directly set a monitor counter's value, and if the value -is monotonically increasing, only max value needs to be updated */ -#define MONITOR_SET_UPD_MAX_ONLY(monitor, value) \ - MONITOR_CHECK_DEFINED(value); \ - if (MONITOR_IS_ON(monitor)) { \ - MONITOR_VALUE(monitor) = (mon_type_t) (value); \ - if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ - MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ - } \ - } - -/** Some values such as log sequence number are montomically increasing -number, do not need to record max/min values */ -#define MONITOR_SET_SIMPLE(monitor, value) \ - MONITOR_CHECK_DEFINED(value); \ - if (MONITOR_IS_ON(monitor)) { \ - MONITOR_VALUE(monitor) = (mon_type_t) (value); \ - } - -/** Reset the monitor value and max/min value to zero. The reset -operation would only be conducted when the counter is turned off */ -#define MONITOR_RESET_ALL(monitor) \ - do { \ - MONITOR_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE; \ - MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; \ - MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; \ - MONITOR_VALUE_RESET(monitor) = MONITOR_INIT_ZERO_VALUE; \ - MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED; \ - MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED; \ - MONITOR_LAST_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE; \ - MONITOR_FIELD(monitor, mon_start_time) = \ - MONITOR_INIT_ZERO_VALUE; \ - MONITOR_FIELD(monitor, mon_stop_time) = \ - MONITOR_INIT_ZERO_VALUE; \ - MONITOR_FIELD(monitor, mon_reset_time) = \ - MONITOR_INIT_ZERO_VALUE; \ - } while (0) - -/** Following four macros defines necessary operations to fetch and -consolidate information from existing system status variables. */ - -/** Save the passed-in value to mon_start_value field of monitor -counters */ -#define MONITOR_SAVE_START(monitor, value) do { \ - MONITOR_CHECK_DEFINED(value); \ - (MONITOR_START_VALUE(monitor) = \ - (mon_type_t) (value) - MONITOR_VALUE_RESET(monitor)); \ - } while (0) - -/** Save the passed-in value to mon_last_value field of monitor -counters */ -#define MONITOR_SAVE_LAST(monitor) \ - do { \ - MONITOR_LAST_VALUE(monitor) = MONITOR_VALUE(monitor); \ - MONITOR_START_VALUE(monitor) += MONITOR_VALUE(monitor); \ - } while (0) - -/** Set monitor value to the difference of value and mon_start_value -compensated by mon_last_value if accumulated value is required. */ -#define MONITOR_SET_DIFF(monitor, value) \ - MONITOR_SET_UPD_MAX_ONLY(monitor, ((value) \ - - MONITOR_VALUE_RESET(monitor) \ - - MONITOR_FIELD(monitor, mon_start_value) \ - + MONITOR_FIELD(monitor, mon_last_value))) - -/****************************************************************//** -Get monitor's monitor_info_t by its monitor id (index into the -innodb_counter_info array -@return Point to corresponding monitor_info_t, or NULL if no such -monitor */ -UNIV_INTERN -monitor_info_t* -srv_mon_get_info( -/*=============*/ - monitor_id_t monitor_id); /*!< id index into the - innodb_counter_info array */ -/****************************************************************//** -Get monitor's name by its monitor id (index into the -innodb_counter_info array -@return corresponding monitor name, or NULL if no such -monitor */ -UNIV_INTERN -const char* -srv_mon_get_name( -/*=============*/ - monitor_id_t monitor_id); /*!< id index into the - innodb_counter_info array */ - -/****************************************************************//** -Turn on/off/reset monitor counters in a module. If module_value -is NUM_MONITOR then turn on all monitor counters. -@return 0 if successful, or the first monitor that cannot be -turned on because it is already turned on. */ -UNIV_INTERN -void -srv_mon_set_module_control( -/*=======================*/ - monitor_id_t module_id, /*!< in: Module ID as in - monitor_counter_id. If it is - set to NUM_MONITOR, this means - we shall turn on all the counters */ - mon_option_t set_option); /*!< in: Turn on/off reset the - counter */ -/****************************************************************//** -This function consolidates some existing server counters used -by "system status variables". These existing system variables do not have -mechanism to start/stop and reset the counters, so we simulate these -controls by remembering the corresponding counter values when the -corresponding monitors are turned on/off/reset, and do appropriate -mathematics to deduct the actual value. */ -UNIV_INTERN -void -srv_mon_process_existing_counter( -/*=============================*/ - monitor_id_t monitor_id, /*!< in: the monitor's ID as in - monitor_counter_id */ - mon_option_t set_option); /*!< in: Turn on/off reset the - counter */ -/*************************************************************//** -This function is used to calculate the maximum counter value -since the start of monitor counter -@return max counter value since start. */ -UNIV_INLINE -mon_type_t -srv_mon_calc_max_since_start( -/*=========================*/ - monitor_id_t monitor); /*!< in: monitor id */ -/*************************************************************//** -This function is used to calculate the minimum counter value -since the start of monitor counter -@return min counter value since start. */ -UNIV_INLINE -mon_type_t -srv_mon_calc_min_since_start( -/*=========================*/ - monitor_id_t monitor); /*!< in: monitor id*/ -/*************************************************************//** -Reset a monitor, create a new base line with the current monitor -value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */ -UNIV_INTERN -void -srv_mon_reset( -/*==========*/ - monitor_id_t monitor); /*!< in: monitor id*/ -/*************************************************************//** -This function resets all values of a monitor counter */ -UNIV_INLINE -void -srv_mon_reset_all( -/*==============*/ - monitor_id_t monitor); /*!< in: monitor id*/ -/*************************************************************//** -Turn on monitor counters that are marked as default ON. */ -UNIV_INTERN -void -srv_mon_default_on(void); -/*====================*/ - -#ifndef UNIV_NONINL -#include "srv0mon.ic" -#endif -#else /* !UNIV_HOTBACKUP */ -# define MONITOR_INC(x) ((void) 0) -# define MONITOR_DEC(x) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -#endif diff --git a/storage/xtradb/include/srv0mon.ic b/storage/xtradb/include/srv0mon.ic deleted file mode 100644 index 225390c6b6f..00000000000 --- a/storage/xtradb/include/srv0mon.ic +++ /dev/null @@ -1,113 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/srv0mon.ic -Server monitoring system - -Created 1/20/2010 Jimmy Yang -************************************************************************/ - -/*************************************************************//** -This function is used to calculate the maximum counter value -since the start of monitor counter -@return max counter value since start. */ -UNIV_INLINE -mon_type_t -srv_mon_calc_max_since_start( -/*=========================*/ - monitor_id_t monitor) /*!< in: monitor id */ -{ - if (MONITOR_MAX_VALUE_START(monitor) == MAX_RESERVED) { - - /* MONITOR_MAX_VALUE_START has not yet been - initialized, the max value since start is the - max count in MONITOR_MAX_VALUE */ - MONITOR_MAX_VALUE_START(monitor) = - MONITOR_MAX_VALUE(monitor); - - } else if (MONITOR_MAX_VALUE(monitor) != MAX_RESERVED - && (MONITOR_MAX_VALUE(monitor) - + MONITOR_VALUE_RESET(monitor) - > MONITOR_MAX_VALUE_START(monitor))) { - - /* If the max value since reset (as specified - in MONITOR_MAX_VALUE) plus the reset value is - larger than MONITOR_MAX_VALUE_START, reset - MONITOR_MAX_VALUE_START to this new max value */ - MONITOR_MAX_VALUE_START(monitor) = - MONITOR_MAX_VALUE(monitor) - + MONITOR_VALUE_RESET(monitor); - } - - return(MONITOR_MAX_VALUE_START(monitor)); -} - -/*************************************************************//** -This function is used to calculate the minimum counter value -since the start of monitor counter -@return min counter value since start. */ -UNIV_INLINE -mon_type_t -srv_mon_calc_min_since_start( -/*=========================*/ - monitor_id_t monitor) /*!< in: monitor id */ -{ - if (MONITOR_MIN_VALUE_START(monitor) == MIN_RESERVED) { - - /* MONITOR_MIN_VALUE_START has not yet been - initialized, the min value since start is the - min count in MONITOR_MIN_VALUE */ - MONITOR_MIN_VALUE_START(monitor) = - MONITOR_MIN_VALUE(monitor); - - } else if (MONITOR_MIN_VALUE(monitor) != MIN_RESERVED - && (MONITOR_MIN_VALUE(monitor) - + MONITOR_VALUE_RESET(monitor) - < MONITOR_MIN_VALUE_START(monitor))) { - - /* If the min value since reset (as specified - in MONITOR_MIN_VALUE) plus the reset value is - less than MONITOR_MIN_VALUE_START, reset - MONITOR_MIN_VALUE_START to this new min value */ - MONITOR_MIN_VALUE_START(monitor) = - MONITOR_MIN_VALUE(monitor) - + MONITOR_VALUE_RESET(monitor); - } - - return(MONITOR_MIN_VALUE_START(monitor)); -} - -/*************************************************************//** -This function resets all values of a monitor counter */ -UNIV_INLINE -void -srv_mon_reset_all( -/*==============*/ - monitor_id_t monitor) /*!< in: monitor id */ -{ - /* Do not reset all counter values if monitor is still on. */ - if (MONITOR_IS_ON(monitor)) { - fprintf(stderr, "InnoDB: Cannot reset all values for " - "monitor counter %s while it is on. Please " - "turn it off and retry. \n", - srv_mon_get_name(monitor)); - } else { - MONITOR_RESET_ALL(monitor); - } -} diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h deleted file mode 100644 index 4e98ce0f1cb..00000000000 --- a/storage/xtradb/include/srv0srv.h +++ /dev/null @@ -1,1351 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2008, 2009, Google Inc. -Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/srv0srv.h -The server main program - -Created 10/10/1995 Heikki Tuuri -*******************************************************/ - -#ifndef srv0srv_h -#define srv0srv_h - -#include "univ.i" -#ifndef UNIV_HOTBACKUP -#include "log0log.h" -#include "sync0sync.h" -#include "os0sync.h" -#include "que0types.h" -#include "trx0types.h" -#include "srv0conc.h" -#include "buf0checksum.h" -#include "ut0counter.h" - -/* Global counters used inside InnoDB. */ -struct srv_stats_t { - typedef ib_counter_t<ulint, 64> ulint_ctr_64_t; - typedef simple_counter<lsn_t> lsn_ctr_1_t; - typedef simple_counter<ulint> ulint_ctr_1_t; - typedef simple_counter<ib_int64_t> ib_int64_ctr_1_t; - - /** Count the amount of data written in total (in bytes) */ - ulint_ctr_1_t data_written; - - /** Number of the log write requests done */ - ulint_ctr_1_t log_write_requests; - - /** Number of physical writes to the log performed */ - ulint_ctr_1_t log_writes; - - /** Amount of data written to the log files in bytes */ - lsn_ctr_1_t os_log_written; - - /** Number of writes being done to the log files. - Protected by log_sys->write_mutex. */ - ulint_ctr_1_t os_log_pending_writes; - - /** We increase this counter, when we don't have enough - space in the log buffer and have to flush it */ - ulint_ctr_1_t log_waits; - - /** Count the number of times the doublewrite buffer was flushed */ - ulint_ctr_1_t dblwr_writes; - - /** Store the number of pages that have been flushed to the - doublewrite buffer */ - ulint_ctr_1_t dblwr_pages_written; - - /** Store the number of write requests issued */ - ulint_ctr_1_t buf_pool_write_requests; - - /** Store the number of times when we had to wait for a free page - in the buffer pool. It happens when the buffer pool is full and we - need to make a flush, in order to be able to read or create a page. */ - ulint_ctr_1_t buf_pool_wait_free; - - /** Count the number of pages that were written from buffer - pool to the disk */ - ulint_ctr_1_t buf_pool_flushed; - - /** Number of buffer pool reads that led to the reading of - a disk page */ - ulint_ctr_1_t buf_pool_reads; - - /** Number of bytes saved by page compression */ - ulint_ctr_64_t page_compression_saved; - /** Number of 512Byte TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect512; - /** Number of 1K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect1024; - /** Number of 2K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect2048; - /** Number of 4K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect4096; - /** Number of 8K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect8192; - /** Number of 16K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect16384; - /** Number of 32K TRIM by page compression */ - ulint_ctr_64_t page_compression_trim_sect32768; - /* Number of index pages written */ - ulint_ctr_64_t index_pages_written; - /* Number of non index pages written */ - ulint_ctr_64_t non_index_pages_written; - /* Number of pages compressed with page compression */ - ulint_ctr_64_t pages_page_compressed; - /* Number of TRIM operations induced by page compression */ - ulint_ctr_64_t page_compressed_trim_op; - /* Number of TRIM operations saved by using actual write size knowledge */ - ulint_ctr_64_t page_compressed_trim_op_saved; - /* Number of pages decompressed with page compression */ - ulint_ctr_64_t pages_page_decompressed; - /* Number of page compression errors */ - ulint_ctr_64_t pages_page_compression_error; - /* Number of pages encrypted */ - ulint_ctr_64_t pages_encrypted; - /* Number of pages decrypted */ - ulint_ctr_64_t pages_decrypted; - - /** Number of data read in total (in bytes) */ - ulint_ctr_1_t data_read; - - /** Wait time of database locks */ - ib_int64_ctr_1_t n_lock_wait_time; - - /** Number of database lock waits */ - ulint_ctr_1_t n_lock_wait_count; - - /** Number of threads currently waiting on database locks */ - simple_counter<ulint, true> n_lock_wait_current_count; - - /** Number of rows read. */ - ulint_ctr_64_t n_rows_read; - - /** Number of rows updated */ - ulint_ctr_64_t n_rows_updated; - - /** Number of rows deleted */ - ulint_ctr_64_t n_rows_deleted; - - /** Number of rows inserted */ - ulint_ctr_64_t n_rows_inserted; - - /** Number of system rows read. */ - ulint_ctr_64_t n_system_rows_read; - - /** Number of system rows updated */ - ulint_ctr_64_t n_system_rows_updated; - - /** Number of system rows deleted */ - ulint_ctr_64_t n_system_rows_deleted; - - /** Number of system rows inserted */ - ulint_ctr_64_t n_system_rows_inserted; - - /** Number of times secondary index lookup triggered cluster lookup */ - ulint_ctr_64_t n_sec_rec_cluster_reads; - - /** Number of times prefix optimization avoided triggering cluster lookup */ - ulint_ctr_64_t n_sec_rec_cluster_reads_avoided; - - /** Number of lock deadlocks */ - ulint_ctr_1_t lock_deadlock_count; - - /** Number of lock waits that have been up to max time (i.e.) lock - wait timeout */ - ulint_ctr_1_t n_lock_max_wait_time; - - /** Number of times page 0 is read from tablespace */ - ulint_ctr_64_t page0_read; - - /** Number of encryption_get_latest_key_version calls */ - ulint_ctr_64_t n_key_requests; - - /** Number of spaces in keyrotation list */ - ulint_ctr_64_t key_rotation_list_length; -}; - -extern const char* srv_main_thread_op_info; - -/** Prefix used by MySQL to indicate pre-5.1 table name encoding */ -extern const char srv_mysql50_table_name_prefix[10]; - -/** Event to signal srv_monitor_thread. Not protected by a mutex. -Set after setting srv_print_innodb_monitor. */ -extern os_event_t srv_monitor_event; - -/** Event to signal the shutdown of srv_error_monitor_thread. -Not protected by a mutex. */ -extern os_event_t srv_error_event; - -/** Event for waking up buf_dump_thread. Not protected by a mutex. -Set on shutdown or by buf_dump_start() or buf_load_start(). */ -extern os_event_t srv_buf_dump_event; - -/** The buffer pool dump/load file name */ -#define SRV_BUF_DUMP_FILENAME_DEFAULT "ib_buffer_pool" -extern char* srv_buf_dump_filename; - -/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown -and/or load it during startup. */ -extern char srv_buffer_pool_dump_at_shutdown; -extern char srv_buffer_pool_load_at_startup; - -/* Whether to disable file system cache if it is defined */ -extern char srv_disable_sort_file_cache; - -/* This event is set on checkpoint completion to wake the redo log parser -thread */ -extern os_event_t srv_checkpoint_completed_event; - -/* This event is set on the online redo log following thread after a successful -log tracking iteration */ -extern os_event_t srv_redo_log_tracked_event; - -/** Whether the redo log tracker thread has been started. Does not take into -account whether the tracking is currently enabled (see srv_track_changed_pages -for that) */ -extern bool srv_redo_log_thread_started; - -/* If the last data file is auto-extended, we add this many pages to it -at a time */ -#define SRV_AUTO_EXTEND_INCREMENT \ - (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) - -/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */ -extern ib_mutex_t srv_monitor_file_mutex; - -/* prototypes for new functions added to ha_innodb.cc */ -ibool innobase_get_slow_log(); - -/* Temporary file for innodb monitor output */ -extern FILE* srv_monitor_file; -/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode. -This mutex has a very high rank; threads reserving it should not -be holding any InnoDB latches. */ -extern ib_mutex_t srv_dict_tmpfile_mutex; -/* Temporary file for output from the data dictionary */ -extern FILE* srv_dict_tmpfile; -/* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode. -This mutex has a very low rank; threads reserving it should not -acquire any further latches or sleep before releasing this one. */ -extern ib_mutex_t srv_misc_tmpfile_mutex; -/* Temporary file for miscellanous diagnostic output */ -extern FILE* srv_misc_tmpfile; - -/* Server parameters which are read from the initfile */ - -extern char* srv_data_home; - -#ifdef UNIV_LOG_ARCHIVE -extern char* srv_arch_dir; -#endif /* UNIV_LOG_ARCHIVE */ - -/** Set if InnoDB must operate in read-only mode. We don't do any -recovery and open all tables in RO mode instead of RW mode. We don't -sync the max trx id to disk either. */ -extern my_bool srv_read_only_mode; -/** Set if InnoDB operates in read-only mode or innodb-force-recovery -is greater than SRV_FORCE_NO_TRX_UNDO. */ -extern my_bool high_level_read_only; -/** store to its own file each table created by an user; data -dictionary tables are in the system tablespace 0 */ -extern my_bool srv_file_per_table; -/** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */ -extern ulong srv_thread_sleep_delay; -#if defined(HAVE_ATOMIC_BUILTINS) -/** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/ -extern ulong srv_adaptive_max_sleep_delay; -#endif /* HAVE_ATOMIC_BUILTINS */ - -/** The file format to use on new *.ibd files. */ -extern ulint srv_file_format; -/** Whether to check file format during startup. A value of -UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to -set it to the highest format we support. */ -extern ulint srv_max_file_format_at_startup; -/** Place locks to records only i.e. do not use next-key locking except -on duplicate key checking and foreign key checking */ -extern ibool srv_locks_unsafe_for_binlog; - -/** Sort buffer size in index creation */ -extern ulong srv_sort_buf_size; -/** Maximum modification log file size for online index creation */ -extern unsigned long long srv_online_max_size; - -/* If this flag is TRUE, then we will use the native aio of the -OS (provided we compiled Innobase with it in), otherwise we will -use simulated aio we build below with threads. -Currently we support native aio on windows and linux */ -extern my_bool srv_use_native_aio; -extern my_bool srv_numa_interleave; -#endif /* !UNIV_HOTBACKUP */ - -/* Use trim operation */ -extern my_bool srv_use_trim; - -/* Use posix fallocate */ -extern my_bool srv_use_posix_fallocate; - -/* Use atomic writes i.e disable doublewrite buffer */ -extern my_bool srv_use_atomic_writes; - -/* Compression algorithm*/ -extern ulong innodb_compression_algorithm; - -/* Number of flush threads */ -#define MTFLUSH_MAX_WORKER 64 -#define MTFLUSH_DEFAULT_WORKER 8 - -/* Number of threads used for multi-threaded flush */ -extern long srv_mtflush_threads; - -/* If this flag is TRUE, then we will use multi threaded flush. */ -extern my_bool srv_use_mtflush; - -/** Server undo tablespaces directory, can be absolute path. */ -extern char* srv_undo_dir; - -/** Number of undo tablespaces to use. */ -extern ulong srv_undo_tablespaces; - -/** The number of UNDO tablespaces that are open and ready to use. */ -extern ulint srv_undo_tablespaces_open; - -/* The number of undo segments to use */ -extern ulong srv_undo_logs; - -extern ulint srv_n_data_files; -extern char** srv_data_file_names; -extern ulint* srv_data_file_sizes; -extern ulint* srv_data_file_is_raw_partition; - -/** Whether the redo log tracking is currently enabled. Note that it is -possible for the log tracker thread to be running and the tracking to be -disabled */ -extern my_bool srv_track_changed_pages; -extern ulonglong srv_max_bitmap_file_size; - -extern -ulonglong srv_max_changed_pages; - -extern uint srv_n_fil_crypt_threads; -extern uint srv_n_fil_crypt_threads_started; - -extern ibool srv_auto_extend_last_data_file; -extern ulint srv_last_file_size_max; -extern char* srv_log_group_home_dir; -#ifndef UNIV_HOTBACKUP -extern ulong srv_auto_extend_increment; - -extern ibool srv_created_new_raw; - -/* Optimize prefix index queries to skip cluster index lookup when possible */ -/* Enables or disables this prefix optimization. Disabled by default. */ -extern my_bool srv_prefix_index_cluster_optimization; - -/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */ -#define SRV_N_LOG_FILES_MAX 100 -extern ulong srv_n_log_files; -extern ib_uint64_t srv_log_file_size; -extern ib_uint64_t srv_log_file_size_requested; -extern ulint srv_log_buffer_size; -extern uint srv_flush_log_at_timeout; -extern char srv_use_global_flush_log_at_trx_commit; -extern char srv_adaptive_flushing; - -#ifdef WITH_INNODB_DISALLOW_WRITES -/* When this event is reset we do not allow any file writes to take place. */ -extern os_event_t srv_allow_writes_event; -#endif /* WITH_INNODB_DISALLOW_WRITES */ -/* If this flag is TRUE, then we will load the indexes' (and tables') metadata -even if they are marked as "corrupted". Mostly it is for DBA to process -corrupted index and table */ -extern my_bool srv_load_corrupted; - -extern ulong srv_show_locks_held; -extern ulong srv_show_verbose_locks; - -/* The sort order table of the MySQL latin1_swedish_ci character set -collation */ -extern const byte* srv_latin1_ordering; -#ifndef UNIV_HOTBACKUP -extern my_bool srv_use_sys_malloc; -#else -extern ibool srv_use_sys_malloc; -#endif /* UNIV_HOTBACKUP */ -extern ulint srv_buf_pool_size; /*!< requested size in bytes */ -extern ulint srv_buf_pool_instances; /*!< requested number of buffer pool instances */ -extern ulong srv_n_page_hash_locks; /*!< number of locks to - protect buf_pool->page_hash */ -extern ulong srv_LRU_scan_depth; /*!< Scan depth for LRU - flush batch */ -extern ulong srv_flush_neighbors; /*!< whether or not to flush - neighbors of a block */ -extern ulint srv_buf_pool_old_size; /*!< previously requested size */ -extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */ -extern ulong srv_buf_pool_dump_pct; /*!< dump that may % of each buffer - pool during BP dump */ -extern ulint srv_mem_pool_size; -extern ulint srv_lock_table_size; - -extern ulong srv_foreground_preflush;/*!< Query thread preflush algorithm */ - -extern ulint srv_cleaner_max_lru_time;/*!< the maximum time limit for a - single LRU tail flush iteration by the - page cleaner thread */ - -extern ulint srv_cleaner_max_flush_time;/*!< the maximum time limit for a - single flush list flush iteration by - the page cleaner thread */ - -extern ulint srv_cleaner_flush_chunk_size; - /*!< page cleaner flush list flush - batches are further divided into this - chunk size */ - -extern ulint srv_cleaner_lru_chunk_size; - /*!< page cleaner LRU list flush - batches are further divided into this - chunk size */ - -extern ulint srv_cleaner_free_list_lwm;/*!< if free list length is lower - than this percentage of - srv_LRU_scan_depth, page cleaner LRU - flushes will issue flush batches to the - same instance in a row */ - -extern my_bool srv_cleaner_eviction_factor; - /*!< if TRUE, page cleaner heuristics - use evicted instead of flushed page - counts for its heuristics */ - -extern ulong srv_cleaner_lsn_age_factor; - /*!< page cleaner LSN age factor - formula option */ - -extern ulong srv_empty_free_list_algorithm; - /*!< Empty free list for a query thread - handling algorithm option */ - -extern ulint srv_n_file_io_threads; -extern my_bool srv_random_read_ahead; -extern ulong srv_read_ahead_threshold; -extern ulint srv_n_read_io_threads; -extern ulint srv_n_write_io_threads; -/* Defragmentation, Origianlly facebook default value is 100, but it's too high */ -#define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40 -extern my_bool srv_defragment; -extern uint srv_defragment_n_pages; -extern uint srv_defragment_stats_accuracy; -extern uint srv_defragment_fill_factor_n_recs; -extern double srv_defragment_fill_factor; -extern uint srv_defragment_frequency; -extern ulonglong srv_defragment_interval; - -extern ulong srv_idle_flush_pct; - -/* Number of IO operations per second the server can do */ -extern ulong srv_io_capacity; - -/* We use this dummy default value at startup for max_io_capacity. -The real value is set based on the value of io_capacity. */ -#define SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT (~0UL) -#define SRV_MAX_IO_CAPACITY_LIMIT (~0UL) -extern ulong srv_max_io_capacity; -/* Returns the number of IO operations that is X percent of the -capacity. PCT_IO(5) -> returns the number of IO operations that -is 5% of the max where max is srv_io_capacity. */ -#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) (p) / 100.0))) - -/* The "innodb_stats_method" setting, decides how InnoDB is going -to treat NULL value when collecting statistics. It is not defined -as enum type because the configure option takes unsigned integer type. */ -extern ulong srv_innodb_stats_method; - -#ifdef UNIV_LOG_ARCHIVE -extern bool srv_log_archive_on; -extern bool srv_archive_recovery; -extern ib_uint64_t srv_archive_recovery_limit_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - -extern char* srv_file_flush_method_str; -extern ulint srv_unix_file_flush_method; -extern ulint srv_win_file_flush_method; - -extern ulint srv_max_n_open_files; - -extern double srv_max_dirty_pages_pct; -extern double srv_max_dirty_pages_pct_lwm; - -extern double srv_adaptive_flushing_lwm; -extern ulong srv_flushing_avg_loops; - -extern ulong srv_force_recovery; - -extern ulint srv_fast_shutdown; /*!< If this is 1, do not do a - purge and index buffer merge. - If this 2, do not even flush the - buffer pool to data files at the - shutdown: we effectively 'crash' - InnoDB (but lose no committed - transactions). */ -extern ibool srv_innodb_status; - -extern unsigned long long srv_stats_transient_sample_pages; -extern my_bool srv_stats_persistent; -extern unsigned long long srv_stats_persistent_sample_pages; -extern my_bool srv_stats_auto_recalc; -extern my_bool srv_stats_include_delete_marked; -extern unsigned long long srv_stats_modified_counter; -extern my_bool srv_stats_sample_traditional; - -extern ibool srv_use_doublewrite_buf; -extern ulong srv_doublewrite_batch_size; - -extern ulong srv_log_arch_expire_sec; - -extern double srv_max_buf_pool_modified_pct; -extern ulong srv_max_purge_lag; -extern ulong srv_max_purge_lag_delay; - -extern ulong srv_replication_delay; - -extern my_bool srv_use_stacktrace; - -extern ulong srv_pass_corrupt_table; - -extern ulong srv_log_checksum_algorithm; - -extern bool srv_apply_log_only; - -extern bool srv_backup_mode; -extern bool srv_close_files; -extern bool srv_xtrabackup; - -#define IS_XTRABACKUP() (srv_xtrabackup) - -extern my_bool srv_force_primary_key; - -/* Helper macro to support srv_pass_corrupt_table checks. If 'cond' is FALSE, -execute 'code' if srv_pass_corrupt_table is non-zero, or trigger a fatal error -otherwise. The break statement in 'code' will obviously not work as -expected. */ - -#define SRV_CORRUPT_TABLE_CHECK(cond,code) \ - do { \ - if (UNIV_UNLIKELY(!(cond))) { \ - if (srv_pass_corrupt_table) { \ - code \ - } else { \ - ut_error; \ - } \ - } \ - } while(0) - -/*-------------------------------------------*/ - -extern ulint srv_read_views_memory; -extern ulint srv_descriptors_memory; - -extern my_bool srv_print_innodb_monitor; -extern my_bool srv_print_innodb_lock_monitor; -extern ibool srv_print_innodb_tablespace_monitor; -extern ibool srv_print_verbose_log; -#define DEPRECATED_MSG_INNODB_TABLE_MONITOR \ - "Using innodb_table_monitor is deprecated and it may be removed " \ - "in future releases. Please use the InnoDB INFORMATION_SCHEMA " \ - "tables instead, see " REFMAN "innodb-i_s-tables.html" -extern ibool srv_print_innodb_table_monitor; - -extern bool srv_monitor_active; -extern bool srv_error_monitor_active; - -/* TRUE during the lifetime of the buffer pool dump/load thread */ -extern bool srv_buf_dump_thread_active; - -/* TRUE during the lifetime of the stats thread */ -extern bool srv_dict_stats_thread_active; - -/* TRUE if enable log scrubbing */ -extern my_bool srv_scrub_log; - -extern ulong srv_n_spin_wait_rounds; -extern ulong srv_n_free_tickets_to_enter; -extern ulong srv_thread_sleep_delay; -extern ulong srv_spin_wait_delay; -extern ibool srv_priority_boost; - -extern ulint srv_truncated_status_writes; -extern ulint srv_available_undo_logs; - -extern ulint srv_column_compressed; -extern ulint srv_column_decompressed; - -extern ulint srv_mem_pool_size; -extern ulint srv_lock_table_size; - -#ifdef UNIV_DEBUG -extern ibool srv_print_thread_releases; -extern ibool srv_print_lock_waits; -extern ibool srv_print_buf_io; -extern ibool srv_print_log_io; -extern ibool srv_print_latch_waits; -#else /* UNIV_DEBUG */ -# define srv_print_thread_releases FALSE -# define srv_print_lock_waits FALSE -# define srv_print_buf_io FALSE -# define srv_print_log_io FALSE -# define srv_print_latch_waits FALSE -#endif /* UNIV_DEBUG */ - -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -extern my_bool srv_ibuf_disable_background_merge; -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - -#ifdef UNIV_DEBUG -extern my_bool srv_purge_view_update_only_debug; -extern uint srv_sys_space_size_debug; -#endif /* UNIV_DEBUG */ - -#define SRV_SEMAPHORE_WAIT_EXTENSION 7200 -extern ulint srv_dml_needed_delay; -extern long long srv_kill_idle_transaction; - -#ifndef HAVE_ATOMIC_BUILTINS -/** Mutex protecting some server global variables. */ -extern ib_mutex_t server_mutex; -#endif /* !HAVE_ATOMIC_BUILTINS */ - -#define SRV_MAX_N_IO_THREADS 130 - -#define SRV_MAX_N_PURGE_THREADS 32 - -/* Array of English strings describing the current state of an -i/o handler thread */ -extern const char* srv_io_thread_op_info[]; -extern const char* srv_io_thread_function[]; - -/* The tid of the cleaner thread */ -extern os_tid_t srv_cleaner_tid; - -/* The tid of the LRU manager thread */ -extern os_tid_t srv_lru_manager_tid; - -/* The tids of the purge threads */ -extern os_tid_t srv_purge_tids[]; - -/* The tids of the I/O threads */ -extern os_tid_t srv_io_tids[]; - -/* The tid of the master thread */ -extern os_tid_t srv_master_tid; - -/* The relative scheduling priority of the cleaner and LRU manager threads */ -extern ulint srv_sched_priority_cleaner; - -/* The relative scheduling priority of the purge threads */ -extern ulint srv_sched_priority_purge; - -/* The relative scheduling priority of the I/O threads */ -extern ulint srv_sched_priority_io; - -/* The relative scheduling priority of the master thread */ -extern ulint srv_sched_priority_master; - -/* The relative priority of the purge coordinator and worker threads. */ -extern my_bool srv_purge_thread_priority; - -/* The relative priority of the I/O threads. */ -extern my_bool srv_io_thread_priority; - -/* The relative priority of the cleaner thread. */ -extern my_bool srv_cleaner_thread_priority; - -/* The relative priority of the master thread. */ -extern my_bool srv_master_thread_priority; - -/* the number of purge threads to use from the worker pool (currently 0 or 1) */ -extern ulong srv_n_purge_threads; - -/* the number of pages to purge in one batch */ -extern ulong srv_purge_batch_size; - -/* the number of sync wait arrays */ -extern ulong srv_sync_array_size; - -/* print all user-level transactions deadlocks to mysqld stderr */ -extern my_bool srv_print_all_deadlocks; - -extern my_bool srv_cmp_per_index_enabled; - -/* is encryption enabled */ -extern ulong srv_encrypt_tables; - -/** Status variables to be passed to MySQL */ -extern struct export_var_t export_vars; - -/** Global counters */ -extern srv_stats_t srv_stats; - -/** When TRUE, fake change transcations take S rather than X row locks. -When FALSE, row locks are not taken at all. */ -extern my_bool srv_fake_changes_locks; - -/** Simulate compression failures. */ -extern uint srv_simulate_comp_failures; - -/** Fatal semaphore wait threshold = maximum number of seconds -that semaphore times out in InnoDB */ -#define DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT 600 -extern ulong srv_fatal_semaphore_wait_threshold; - -/** Enable semaphore request instrumentation */ -extern my_bool srv_instrument_semaphores; - -/** Buffer pool dump status frequence in percentages */ -extern ulong srv_buf_dump_status_frequency; - -# ifdef UNIV_PFS_THREAD -/* Keys to register InnoDB threads with performance schema */ -extern mysql_pfs_key_t buf_page_cleaner_thread_key; -extern mysql_pfs_key_t buf_lru_manager_thread_key; -extern mysql_pfs_key_t trx_rollback_clean_thread_key; -extern mysql_pfs_key_t io_handler_thread_key; -extern mysql_pfs_key_t srv_lock_timeout_thread_key; -extern mysql_pfs_key_t srv_error_monitor_thread_key; -extern mysql_pfs_key_t srv_monitor_thread_key; -extern mysql_pfs_key_t srv_master_thread_key; -extern mysql_pfs_key_t srv_purge_thread_key; -extern mysql_pfs_key_t recv_writer_thread_key; -extern mysql_pfs_key_t srv_log_tracking_thread_key; - -/* This macro register the current thread and its key with performance -schema */ -# define pfs_register_thread(key) \ -do { \ - struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\ - PSI_THREAD_CALL(set_thread)(psi); \ -} while (0) - -/* This macro delist the current thread from performance schema */ -# define pfs_delete_thread() \ -do { \ - PSI_THREAD_CALL(delete_current_thread)(); \ -} while (0) -# endif /* UNIV_PFS_THREAD */ - -#endif /* !UNIV_HOTBACKUP */ - -/** Types of raw partitions in innodb_data_file_path */ -enum { - SRV_NOT_RAW = 0, /*!< Not a raw partition */ - SRV_NEW_RAW, /*!< A 'newraw' partition, only to be - initialized */ - SRV_OLD_RAW /*!< An initialized raw partition */ -}; - -/** Alternatives for the file flush option in Unix; see the InnoDB manual -about what these mean */ -enum { - SRV_UNIX_FSYNC = 1, /*!< fsync, the default */ - SRV_UNIX_O_DSYNC, /*!< open log files in O_SYNC mode */ - SRV_UNIX_LITTLESYNC, /*!< do not call os_file_flush() - when writing data files, but do flush - after writing to log files */ - SRV_UNIX_NOSYNC, /*!< do not flush after writing */ - SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on - data files. This implies using - non-buffered IO but still using fsync, - the reason for which is that some FS - do not flush meta-data when - unbuffered IO happens */ - SRV_UNIX_O_DIRECT_NO_FSYNC, - /*!< do not use fsync() when using - direct IO i.e.: it can be set to avoid - the fsync() call that we make when - using SRV_UNIX_O_DIRECT. However, in - this case user/DBA should be sure about - the integrity of the meta-data */ - SRV_UNIX_ALL_O_DIRECT /*!< similar to O_DIRECT, invokes - os_file_set_nocache() on data and log files. - This implies using non-buffered IO but still - using fsync for data but not log files. */ -}; - -/** Alternatives for file i/o in Windows */ -enum { - SRV_WIN_IO_NORMAL = 1, /*!< buffered I/O */ - SRV_WIN_IO_UNBUFFERED /*!< unbuffered I/O; this is the default */ -}; - -/** Alternatives for srv_force_recovery. Non-zero values are intended -to help the user get a damaged database up so that he can dump intact -tables and rows with SELECT INTO OUTFILE. The database must not otherwise -be used with these options! A bigger number below means that all precautions -of lower numbers are included. */ -enum { - SRV_FORCE_IGNORE_CORRUPT = 1, /*!< let the server run even if it - detects a corrupt page */ - SRV_FORCE_NO_BACKGROUND = 2, /*!< prevent the main thread from - running: if a crash would occur - in purge, this prevents it */ - SRV_FORCE_NO_TRX_UNDO = 3, /*!< do not run trx rollback after - recovery */ - SRV_FORCE_NO_IBUF_MERGE = 4, /*!< prevent also ibuf operations: - if they would cause a crash, better - not do them */ - SRV_FORCE_NO_UNDO_LOG_SCAN = 5, /*!< do not look at undo logs when - starting the database: InnoDB will - treat even incomplete transactions - as committed */ - SRV_FORCE_NO_LOG_REDO = 6 /*!< do not do the log roll-forward - in connection with recovery */ -}; - -/* Alternatives for srv_innodb_stats_method, which could be changed by -setting innodb_stats_method */ -enum srv_stats_method_name_enum { - SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as - equal. This is the default setting - for innodb_stats_method */ - SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as - NOT equal. */ - SRV_STATS_NULLS_IGNORED /* NULL values are ignored */ -}; - -typedef enum srv_stats_method_name_enum srv_stats_method_name_t; - -#ifndef UNIV_HOTBACKUP -/** Types of threads existing in the system. */ -enum srv_thread_type { - SRV_NONE, /*!< None */ - SRV_WORKER, /*!< threads serving parallelized - queries and queries released from - lock wait */ - SRV_PURGE, /*!< Purge coordinator thread */ - SRV_MASTER /*!< the master thread, (whose type - number must be biggest) */ -}; - -/*********************************************************************//** -Boots Innobase server. */ -UNIV_INTERN -void -srv_boot(void); -/*==========*/ -/*********************************************************************//** -Initializes the server. */ -UNIV_INTERN -void -srv_init(void); -/*==========*/ -/*********************************************************************//** -Frees the data structures created in srv_init(). */ -UNIV_INTERN -void -srv_free(void); -/*==========*/ -/*********************************************************************//** -Initializes the synchronization primitives, memory system, and the thread -local storage. */ -UNIV_INTERN -void -srv_general_init(void); -/*==================*/ -/*********************************************************************//** -Sets the info describing an i/o thread current state. */ -UNIV_INTERN -void -srv_set_io_thread_op_info( -/*======================*/ - ulint i, /*!< in: the 'segment' of the i/o thread */ - const char* str); /*!< in: constant char string describing the - state */ -/*********************************************************************//** -Resets the info describing an i/o thread current state. */ -UNIV_INTERN -void -srv_reset_io_thread_op_info(); -/*=========================*/ -/*******************************************************************//** -Tells the purge thread that there has been activity in the database -and wakes up the purge thread if it is suspended (not sleeping). Note -that there is a small chance that the purge thread stays suspended -(we do not protect our operation with the srv_sys_t:mutex, for -performance reasons). */ -UNIV_INTERN -void -srv_wake_purge_thread_if_not_active(void); -/*=====================================*/ -/*******************************************************************//** -Tells the Innobase server that there has been activity in the database -and wakes up the master thread if it is suspended (not sleeping). Used -in the MySQL interface. Note that there is a small chance that the master -thread stays suspended (we do not protect our operation with the kernel -mutex, for performace reasons). */ -UNIV_INTERN -void -srv_active_wake_master_thread(void); -/*===============================*/ -/*******************************************************************//** -Wakes up the master thread if it is suspended or being suspended. */ -UNIV_INTERN -void -srv_wake_master_thread(void); -/*========================*/ -/******************************************************************//** -A thread which follows the redo log and outputs the changed page bitmap. -@return a dummy value */ -extern "C" -UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_redo_log_follow_thread)( -/*=======================*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ -/******************************************************************//** -Outputs to a file the output of the InnoDB Monitor. -@return FALSE if not all information printed -due to failure to obtain necessary mutex */ -UNIV_INTERN -ibool -srv_printf_innodb_monitor( -/*======================*/ - FILE* file, /*!< in: output stream */ - ibool nowait, /*!< in: whether to wait for the - lock_sys_t::mutex */ - ulint* trx_start, /*!< out: file position of the start of - the list of active transactions */ - ulint* trx_end); /*!< out: file position of the end of - the list of active transactions */ - -/******************************************************************//** -Function to pass InnoDB status variables to MySQL */ -UNIV_INTERN -void -srv_export_innodb_status(void); -/*==========================*/ -/*************************************************************//** -Removes old archived transaction log files. -Both parameters couldn't be provided at the same time. -@return DB_SUCCESS on success, otherwise DB_ERROR */ -UNIV_INTERN -dberr_t -purge_archived_logs( - time_t before_date, /*!< in: all files modified - before timestamp should be removed */ - lsn_t before_lsn); /*!< in: files with this lsn in name - and earler should be removed */ -/*==========================*/ -/*******************************************************************//** -Get current server activity count. We don't hold srv_sys::mutex while -reading this value as it is only used in heuristics. -@return activity count. */ -UNIV_INTERN -ulint -srv_get_activity_count(void); -/*========================*/ -/*******************************************************************//** -Check if there has been any activity. Considers background change buffer -merge as regular server activity unless a non-default -old_ibuf_merge_activity_count value is passed, in which case the merge will be -treated as keeping server idle. -@return FALSE if no change in activity counter. */ -UNIV_INTERN -ibool -srv_check_activity( -/*===============*/ - ulint old_activity_count, /*!< old activity count */ - /*!< old change buffer merge - activity count, or - ULINT_UNDEFINED */ - ulint old_ibuf_merge_activity_count = ULINT_UNDEFINED); -/******************************************************************//** -Increment the server activity counter. */ -UNIV_INTERN -void -srv_inc_activity_count( -/*===================*/ - bool ibuf_merge_activity = false); /*!< whether this activity bump - is caused by the background - change buffer merge */ - -/**********************************************************************//** -Enqueues a task to server task queue and releases a worker thread, if there -is a suspended one. */ -UNIV_INTERN -void -srv_que_task_enqueue_low( -/*=====================*/ - que_thr_t* thr); /*!< in: query thread */ - -/**********************************************************************//** -Check whether any background thread is active. If so, return the thread -type. -@return SRV_NONE if all are are suspended or have exited, thread -type if any are still active. */ -UNIV_INTERN -enum srv_thread_type -srv_get_active_thread_type(void); -/*============================*/ - -extern "C" { - -/*********************************************************************//** -A thread which prints the info output by various InnoDB monitors. -@return a dummy parameter */ -UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_monitor_thread)( -/*===============================*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ - -/*********************************************************************//** -The master thread controlling the server. -@return a dummy parameter */ -UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_master_thread)( -/*==============================*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ - -/************************************************************************* -A thread which prints warnings about semaphore waits which have lasted -too long. These can be used to track bugs which cause hangs. -@return a dummy parameter */ -UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_error_monitor_thread)( -/*=====================================*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ - -/*********************************************************************//** -Purge coordinator thread that schedules the purge tasks. -@return a dummy parameter */ -UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_purge_coordinator_thread)( -/*=========================================*/ - void* arg MY_ATTRIBUTE((unused))); /*!< in: a dummy parameter - required by os_thread_create */ - -/*********************************************************************//** -Worker thread that reads tasks from the work queue and executes them. -@return a dummy parameter */ -UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_worker_thread)( -/*==============================*/ - void* arg MY_ATTRIBUTE((unused))); /*!< in: a dummy parameter - required by os_thread_create */ -} /* extern "C" */ - -/**********************************************************************//** -Get count of tasks in the queue. -@return number of tasks in queue */ -UNIV_INTERN -ulint -srv_get_task_queue_length(void); -/*===========================*/ - -/** Ensure that a given number of threads of the type given are running -(or are already terminated). -@param[in] type thread type -@param[in] n number of threads that have to run */ -void -srv_release_threads(enum srv_thread_type type, ulint n); - -/** Wake up the purge threads. */ -UNIV_INTERN -void -srv_purge_wakeup(); - -/** Check whether given space id is undo tablespace id -@param[in] space_id space id to check -@return true if it is undo tablespace else false. */ -bool -srv_is_undo_tablespace( - ulint space_id); - -/** Status variables to be passed to MySQL */ -struct export_var_t{ - ulint innodb_adaptive_hash_hash_searches; - ulint innodb_adaptive_hash_non_hash_searches; - ulint innodb_background_log_sync; - ulint innodb_data_pending_reads; /*!< Pending reads */ - ulint innodb_data_pending_writes; /*!< Pending writes */ - ulint innodb_data_pending_fsyncs; /*!< Pending fsyncs */ - ulint innodb_data_fsyncs; /*!< Number of fsyncs so far */ - ulint innodb_data_read; /*!< Data bytes read */ - ulint innodb_data_writes; /*!< I/O write requests */ - ulint innodb_data_written; /*!< Data bytes written */ - ulint innodb_data_reads; /*!< I/O read requests */ - char innodb_buffer_pool_dump_status[512];/*!< Buf pool dump status */ - char innodb_buffer_pool_load_status[512];/*!< Buf pool load status */ - ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */ - ulint innodb_buffer_pool_pages_data; /*!< Data pages */ - ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */ - ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */ - ulint innodb_buffer_pool_bytes_dirty; /*!< File bytes modified */ - ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */ - ulint innodb_buffer_pool_pages_free; /*!< Free pages */ -#ifdef UNIV_DEBUG - ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */ -#endif /* UNIV_DEBUG */ - ulint innodb_buffer_pool_pages_made_not_young; - ulint innodb_buffer_pool_pages_made_young; - ulint innodb_buffer_pool_pages_old; - ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */ - ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */ - ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */ - ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */ - ulint innodb_buffer_pool_pages_LRU_flushed; /*!< buf_lru_flush_page_count */ - ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */ - ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */ - ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */ - ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/ - ulint innodb_checkpoint_age; - ulint innodb_checkpoint_max_age; - ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */ - ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */ - ulint innodb_deadlocks; - ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */ - ulint innodb_history_list_length; - ulint innodb_ibuf_size; - ulint innodb_ibuf_free_list; - ulint innodb_ibuf_segment_size; - ulint innodb_ibuf_merges; - ulint innodb_ibuf_merged_inserts; - ulint innodb_ibuf_merged_delete_marks; - ulint innodb_ibuf_merged_deletes; - ulint innodb_ibuf_discarded_inserts; - ulint innodb_ibuf_discarded_delete_marks; - ulint innodb_ibuf_discarded_deletes; - ulint innodb_log_waits; /*!< srv_log_waits */ - ulint innodb_log_write_requests; /*!< srv_log_write_requests */ - ulint innodb_log_writes; /*!< srv_log_writes */ - lsn_t innodb_os_log_written; /*!< srv_os_log_written */ - lsn_t innodb_lsn_current; - lsn_t innodb_lsn_flushed; - lsn_t innodb_lsn_last_checkpoint; - ulint innodb_master_thread_active_loops;/*!< srv_main_active_loops */ - ulint innodb_master_thread_idle_loops; /*!< srv_main_idle_loops */ - ib_int64_t innodb_max_trx_id; - ulint innodb_mem_adaptive_hash; - ulint innodb_mem_dictionary; - ulint innodb_mem_total; - ib_int64_t innodb_mutex_os_waits; - ib_int64_t innodb_mutex_spin_rounds; - ib_int64_t innodb_mutex_spin_waits; - ib_int64_t innodb_oldest_view_low_limit_trx_id; - ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */ - ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */ - ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */ - ulint innodb_page_size; /*!< UNIV_PAGE_SIZE */ - ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */ - ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read*/ - ulint innodb_page0_read; /*!< srv_stats.page0_read */ - ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */ - ib_int64_t innodb_purge_trx_id; - ib_int64_t innodb_purge_undo_no; - ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */ - ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */ - ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time - / 1000 */ - ulint innodb_row_lock_time_avg; /*!< srv_n_lock_wait_time - / 1000 - / srv_n_lock_wait_count */ - ulint innodb_row_lock_time_max; /*!< srv_n_lock_max_wait_time - / 1000 */ - ulint innodb_current_row_locks; - ulint innodb_rows_read; /*!< srv_n_rows_read */ - ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */ - ulint innodb_rows_updated; /*!< srv_n_rows_updated */ - ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */ - ulint innodb_system_rows_read; /*!< srv_n_system_rows_read */ - ulint innodb_system_rows_inserted; /*!< srv_n_system_rows_inserted */ - ulint innodb_system_rows_updated; /*!< srv_n_system_rows_updated */ - ulint innodb_system_rows_deleted; /*!< srv_n_system_rows_deleted*/ - ulint innodb_num_open_files; /*!< fil_n_file_opened */ - ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */ - ulint innodb_available_undo_logs; /*!< srv_available_undo_logs */ - ulint innodb_read_views_memory; /*!< srv_read_views_memory */ - ulint innodb_descriptors_memory; /*!< srv_descriptors_memory */ - ib_int64_t innodb_s_lock_os_waits; - ib_int64_t innodb_s_lock_spin_rounds; - ib_int64_t innodb_s_lock_spin_waits; - ib_int64_t innodb_x_lock_os_waits; - ib_int64_t innodb_x_lock_spin_rounds; - ib_int64_t innodb_x_lock_spin_waits; - - ulint innodb_defragment_compression_failures; /*!< Number of - defragment re-compression - failures */ - - ulint innodb_defragment_failures; /*!< Number of defragment - failures*/ - ulint innodb_defragment_count; /*!< Number of defragment - operations*/ - - ulint innodb_onlineddl_rowlog_rows; /*!< Online alter rows */ - ulint innodb_onlineddl_rowlog_pct_used; /*!< Online alter percentage - of used row log buffer */ - ulint innodb_onlineddl_pct_progress; /*!< Online alter progress - */ - -#ifdef UNIV_DEBUG - ulint innodb_purge_trx_id_age; /*!< rw_max_trx_id - purged trx_id */ - ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id - - purged view's min trx_id */ -#endif /* UNIV_DEBUG */ - ulint innodb_column_compressed; /*!< srv_column_compressed */ - ulint innodb_column_decompressed; /*!< srv_column_decompressed */ - - ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved - by page compression */ - ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM - by page compression */ - ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM - by page compression */ - ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM - by page compression */ - ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM - by page compression */ - ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM - by page compression */ - ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM - by page compression */ - ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM - by page compression */ - ib_int64_t innodb_index_pages_written; /*!< Number of index pages - written */ - ib_int64_t innodb_non_index_pages_written; /*!< Number of non index pages - written */ - ib_int64_t innodb_pages_page_compressed;/*!< Number of pages - compressed by page compression */ - ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations - induced by page compression */ - ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations - saved by page compression */ - ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages - decompressed by page - compression */ - ib_int64_t innodb_pages_page_compression_error;/*!< Number of page - compression errors */ - ib_int64_t innodb_pages_encrypted; /*!< Number of pages - encrypted */ - ib_int64_t innodb_pages_decrypted; /*!< Number of pages - decrypted */ - - ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */ - ulint innodb_sec_rec_cluster_reads_avoided;/*!< srv_sec_rec_cluster_reads_avoided */ - - ulint innodb_encryption_rotation_pages_read_from_cache; - ulint innodb_encryption_rotation_pages_read_from_disk; - ulint innodb_encryption_rotation_pages_modified; - ulint innodb_encryption_rotation_pages_flushed; - ulint innodb_encryption_rotation_estimated_iops; - ib_int64_t innodb_encryption_key_requests; - ib_int64_t innodb_key_rotation_list_length; - - ulint innodb_scrub_page_reorganizations; - ulint innodb_scrub_page_splits; - ulint innodb_scrub_page_split_failures_underflow; - ulint innodb_scrub_page_split_failures_out_of_filespace; - ulint innodb_scrub_page_split_failures_missing_index; - ulint innodb_scrub_page_split_failures_unknown; -}; - -/** Thread slot in the thread table. */ -struct srv_slot_t{ - srv_thread_type type; /*!< thread type: user, - utility etc. */ - ibool in_use; /*!< TRUE if this slot - is in use */ - ibool suspended; /*!< TRUE if the thread is - waiting for the event of this - slot */ - ib_time_t suspend_time; /*!< time when the thread was - suspended. Initialized by - lock_wait_table_reserve_slot() - for lock wait */ - ulong wait_timeout; /*!< wait time that if exceeded - the thread will be timed out. - Initialized by - lock_wait_table_reserve_slot() - for lock wait */ - os_event_t event; /*!< event used in suspending - the thread when it has nothing - to do */ - que_thr_t* thr; /*!< suspended query thread - (only used for user threads) */ -}; - -#else /* !UNIV_HOTBACKUP */ -# define srv_use_adaptive_hash_indexes FALSE -# define srv_use_native_aio FALSE -# define srv_numa_interleave FALSE -# define srv_force_recovery 0UL -# define srv_set_io_thread_op_info(t,info) ((void) 0) -# define srv_reset_io_thread_op_info() ((void) 0) -# define srv_is_being_started 0 -# define srv_win_file_flush_method SRV_WIN_IO_UNBUFFERED -# define srv_unix_file_flush_method SRV_UNIX_O_DSYNC -# define srv_start_raw_disk_in_use 0 -# define srv_file_per_table 1 -#endif /* !UNIV_HOTBACKUP */ - -#ifdef WITH_WSREP -UNIV_INTERN -void -wsrep_srv_conc_cancel_wait( -/*==================*/ - trx_t* trx); /*!< in: transaction object associated with the - thread */ -#endif /* WITH_WSREP */ - -#ifndef DBUG_OFF -/** false before InnoDB monitor has been printed at least once, true -afterwards */ -extern bool srv_debug_monitor_printed; -#else -#define srv_debug_monitor_printed false -#endif - -#endif diff --git a/storage/xtradb/include/srv0srv.ic b/storage/xtradb/include/srv0srv.ic deleted file mode 100644 index 53405c06f97..00000000000 --- a/storage/xtradb/include/srv0srv.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/srv0srv.ic -Server main program - -Created 10/4/1995 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h deleted file mode 100644 index b055a9d834f..00000000000 --- a/storage/xtradb/include/srv0start.h +++ /dev/null @@ -1,163 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/srv0start.h -Starts the Innobase database server - -Created 10/10/1995 Heikki Tuuri -*******************************************************/ - -#ifndef srv0start_h -#define srv0start_h - -#include "univ.i" -#include "log0log.h" -#include "ut0byte.h" - -#ifdef __WIN__ -#define SRV_PATH_SEPARATOR '\\' -#else -#define SRV_PATH_SEPARATOR '/' -#endif - -/*********************************************************************//** -Normalizes a directory path for Windows: converts slashes to backslashes. */ -UNIV_INTERN -void -srv_normalize_path_for_win( -/*=======================*/ - char* str); /*!< in/out: null-terminated character string */ -/*********************************************************************//** -Reads the data files and their sizes from a character string given in -the .cnf file. -@return TRUE if ok, FALSE on parse error */ -UNIV_INTERN -ibool -srv_parse_data_file_paths_and_sizes( -/*================================*/ - char* str); /*!< in/out: the data file path string */ -/*********************************************************************//** -Frees the memory allocated by srv_parse_data_file_paths_and_sizes() -and srv_parse_log_group_home_dirs(). */ -UNIV_INTERN -void -srv_free_paths_and_sizes(void); -/*==========================*/ -/*********************************************************************//** -Adds a slash or a backslash to the end of a string if it is missing -and the string is not empty. -@return string which has the separator if the string is not empty */ -UNIV_INTERN -char* -srv_add_path_separator_if_needed( -/*=============================*/ - char* str); /*!< in: null-terminated character string */ -#ifndef UNIV_HOTBACKUP -/****************************************************************//** -Starts Innobase and creates a new database if database files -are not found and the user wants. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -innobase_start_or_create_for_mysql(); - -/** Shut down InnoDB. */ -UNIV_INTERN -void -innodb_shutdown(); - -/*************************************************************//** -Copy the file path component of the physical file to parameter. It will -copy up to and including the terminating path separator. -@return number of bytes copied or ULINT_UNDEFINED if destination buffer - is smaller than the path to be copied. */ -UNIV_INTERN -ulint -srv_path_copy( -/*==========*/ - char* dest, /*!< out: destination buffer */ - ulint dest_len, /*!< in: max bytes to copy */ - const char* basedir, /*!< in: base directory */ - const char* table_name) /*!< in: source table name */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); - -/*****************************************************************//** -Get the meta-data filename from the table name. */ -UNIV_INTERN -void -srv_get_meta_data_filename( -/*======================*/ - dict_table_t* table, /*!< in: table */ - char* filename, /*!< out: filename */ - ulint max_len) /*!< in: filename max length */ - MY_ATTRIBUTE((nonnull)); - -/** Log sequence number at shutdown */ -extern lsn_t srv_shutdown_lsn; -/** Log sequence number immediately after startup */ -extern lsn_t srv_start_lsn; - -#ifdef HAVE_DARWIN_THREADS -/** TRUE if the F_FULLFSYNC option is available */ -extern ibool srv_have_fullfsync; -#endif - -/** TRUE if the server is being started */ -extern ibool srv_is_being_started; -/** TRUE if the server was successfully started */ -extern ibool srv_was_started; -/** TRUE if the server is being started, before rolling back any -incomplete transactions */ -extern ibool srv_startup_is_before_trx_rollback_phase; - -/** TRUE if a raw partition is in use */ -extern ibool srv_start_raw_disk_in_use; - -/** Undo tablespaces starts with space_id. */ -extern ulint srv_undo_space_id_start; - -/** Shutdown state */ -enum srv_shutdown_state { - SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */ - SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in - logs_empty_and_mark_files_at_shutdown() */ - SRV_SHUTDOWN_FLUSH_PHASE,/*!< At this phase the master and the - purge threads must have completed their - work. Once we enter this phase the - page_cleaner can clean up the buffer - pool and exit */ - SRV_SHUTDOWN_LAST_PHASE,/*!< Last phase after ensuring that - the buffer pool can be freed: flush - all file spaces and close all files */ - SRV_SHUTDOWN_EXIT_THREADS/*!< Exit all threads */ -}; - -/** Whether any undo log records can be generated */ -extern bool srv_undo_sources; - -/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to -SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ -extern enum srv_shutdown_state srv_shutdown_state; -#endif /* !UNIV_HOTBACKUP */ - -/** Log 'spaces' have id's >= this */ -#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL - -#endif diff --git a/storage/xtradb/include/sync0arr.h b/storage/xtradb/include/sync0arr.h deleted file mode 100644 index 9292026ff13..00000000000 --- a/storage/xtradb/include/sync0arr.h +++ /dev/null @@ -1,171 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/sync0arr.h -The wait array used in synchronization primitives - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef sync0arr_h -#define sync0arr_h - -#include "univ.i" -#include "ut0lst.h" -#include "ut0mem.h" -#include "os0thread.h" - -/** Synchonization cell */ -struct sync_cell_t; -/** Synchronization wait array */ -struct sync_array_t; - -/******************************************************************//** -Get an instance of the sync wait array and reserve a wait array cell -in the instance for waiting for an object. The event of the cell is -reset to nonsignalled state. -If reserving cell of the instance fails, try to get another new -instance until we can reserve an empty cell of it. -@return the instance found, never NULL. */ -UNIV_INLINE -sync_array_t* -sync_array_get_and_reserve_cell( -/*============================*/ - void* object, /*!< in: pointer to the object to wait for */ - ulint type, /*!< in: lock request type */ - const char* file, /*!< in: file where requested */ - ulint line, /*!< in: line where requested */ - ulint* index); /*!< out: index of the reserved cell */ -/******************************************************************//** -Reserves a wait array cell for waiting for an object. -The event of the cell is reset to nonsignalled state. -@return true if free cell is found, otherwise false */ -UNIV_INTERN -bool -sync_array_reserve_cell( -/*====================*/ - sync_array_t* arr, /*!< in: wait array */ - void* object, /*!< in: pointer to the object to wait for */ - ulint type, /*!< in: lock request type */ - const char* file, /*!< in: file where requested */ - ulint line, /*!< in: line where requested */ - ulint* index); /*!< out: index of the reserved cell */ -/******************************************************************//** -This function should be called when a thread starts to wait on -a wait array cell. In the debug version this function checks -if the wait for a semaphore will result in a deadlock, in which -case prints info and asserts. */ -UNIV_INTERN -void -sync_array_wait_event( -/*==================*/ - sync_array_t* arr, /*!< in: wait array */ - ulint index); /*!< in: index of the reserved cell */ -/******************************************************************//** -Frees the cell. NOTE! sync_array_wait_event frees the cell -automatically! */ -UNIV_INTERN -void -sync_array_free_cell( -/*=================*/ - sync_array_t* arr, /*!< in: wait array */ - ulint index); /*!< in: index of the cell in array */ -/**********************************************************************//** -Note that one of the wait objects was signalled. */ -UNIV_INTERN -void -sync_array_object_signalled(void); -/*=============================*/ - -/**********************************************************************//** -If the wakeup algorithm does not work perfectly at semaphore relases, -this function will do the waking (see the comment in mutex_exit). This -function should be called about every 1 second in the server. */ -UNIV_INTERN -void -sync_arr_wake_threads_if_sema_free(void); -/*====================================*/ -/**********************************************************************//** -Prints warnings of long semaphore waits to stderr. -@return TRUE if fatal semaphore wait threshold was exceeded */ -UNIV_INTERN -ibool -sync_array_print_long_waits( -/*========================*/ - os_thread_id_t* waiter, /*!< out: longest waiting thread */ - const void** sema) /*!< out: longest-waited-for semaphore */ - MY_ATTRIBUTE((nonnull)); -/********************************************************************//** -Validates the integrity of the wait array. Checks -that the number of reserved cells equals the count variable. */ -UNIV_INTERN -void -sync_array_validate( -/*================*/ - sync_array_t* arr); /*!< in: sync wait array */ -/**********************************************************************//** -Prints info of the wait array. */ -UNIV_INTERN -void -sync_array_print( -/*=============*/ - FILE* file); /*!< in: file where to print */ - -/**********************************************************************//** -Create the primary system wait array(s), they are protected by an OS mutex */ -UNIV_INTERN -void -sync_array_init( -/*============*/ - ulint n_threads); /*!< in: Number of slots to create */ -/**********************************************************************//** -Close sync array wait sub-system. */ -UNIV_INTERN -void -sync_array_close(void); -/*==================*/ - -/**********************************************************************//** -Get an instance of the sync wait array. */ -UNIV_INTERN -sync_array_t* -sync_array_get(void); -/*================*/ - -/**********************************************************************//** -Prints info of the wait array without using any mutexes/semaphores. */ -UNIV_INTERN -void -sync_array_print_xtradb(void); - -/*****************************************************************//** -Gets the nth cell in array. -@return cell */ -UNIV_INTERN -sync_cell_t* -sync_array_get_nth_cell( -/*====================*/ - sync_array_t* arr, /*!< in: sync array */ - ulint n); /*!< in: index */ - -#ifndef UNIV_NONINL -#include "sync0arr.ic" -#endif - -#endif diff --git a/storage/xtradb/include/sync0arr.ic b/storage/xtradb/include/sync0arr.ic deleted file mode 100644 index 18a46dd0a41..00000000000 --- a/storage/xtradb/include/sync0arr.ic +++ /dev/null @@ -1,64 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/sync0arr.ic -The wait array for synchronization primitives - -Inline code - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -/** User configured sync array size */ -extern ulong srv_sync_array_size; - -/******************************************************************//** -Get an instance of the sync wait array and reserve a wait array cell -in the instance for waiting for an object. The event of the cell is -reset to nonsignalled state. -If reserving cell of the instance fails, try to get another new -instance until we can reserve an empty cell of it. -@return the instance found, never NULL. */ -UNIV_INLINE -sync_array_t* -sync_array_get_and_reserve_cell( -/*============================*/ - void* object, /*!< in: pointer to the object to wait for */ - ulint type, /*!< in: lock request type */ - const char* file, /*!< in: file where requested */ - ulint line, /*!< in: line where requested */ - ulint* index) /*!< out: index of the reserved cell */ -{ - sync_array_t* sync_arr; - bool reserved = false; - - for (ulint i = 0; i < srv_sync_array_size && !reserved; ++i) { - sync_arr = sync_array_get(); - reserved = sync_array_reserve_cell(sync_arr, object, type, - file, line, index); - } - - /* This won't be true every time, for the loop above may execute - more than srv_sync_array_size times to reserve a cell. - But an assertion here makes the code more solid. */ - ut_a(reserved); - - return sync_arr; -} - diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h deleted file mode 100644 index 95d38d3be92..00000000000 --- a/storage/xtradb/include/sync0rw.h +++ /dev/null @@ -1,1094 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/sync0rw.h -The read-write lock (for threads, not for database transactions) - -Created 9/11/1995 Heikki Tuuri -*******************************************************/ - -#ifndef sync0rw_h -#define sync0rw_h - -#include "univ.i" -#ifndef UNIV_HOTBACKUP -#include "ut0lst.h" -#include "ut0counter.h" -#include "sync0sync.h" -#include "os0sync.h" - -/** Enable semaphore request instrumentation */ -extern my_bool srv_instrument_semaphores; - -/* The following undef is to prevent a name conflict with a macro -in MySQL: */ -#undef rw_lock_t -#endif /* !UNIV_HOTBACKUP */ - -/** Counters for RW locks. */ -struct rw_lock_stats_t { - typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t; - - /** number of spin waits on rw-latches, - resulted during shared (read) locks */ - ib_int64_counter_t rw_s_spin_wait_count; - - /** number of spin loop rounds on rw-latches, - resulted during shared (read) locks */ - ib_int64_counter_t rw_s_spin_round_count; - - /** number of OS waits on rw-latches, - resulted during shared (read) locks */ - ib_int64_counter_t rw_s_os_wait_count; - - /** number of unlocks (that unlock shared locks), - set only when UNIV_SYNC_PERF_STAT is defined */ - ib_int64_counter_t rw_s_exit_count; - - /** number of spin waits on rw-latches, - resulted during exclusive (write) locks */ - ib_int64_counter_t rw_x_spin_wait_count; - - /** number of spin loop rounds on rw-latches, - resulted during exclusive (write) locks */ - ib_int64_counter_t rw_x_spin_round_count; - - /** number of OS waits on rw-latches, - resulted during exclusive (write) locks */ - ib_int64_counter_t rw_x_os_wait_count; - - /** number of unlocks (that unlock exclusive locks), - set only when UNIV_SYNC_PERF_STAT is defined */ - ib_int64_counter_t rw_x_exit_count; -}; - -/* Latch types; these are used also in btr0btr.h: keep the numerical values -smaller than 30 and the order of the numerical values like below! */ -#define RW_S_LATCH 1 -#define RW_X_LATCH 2 -#define RW_NO_LATCH 3 - -#ifndef UNIV_HOTBACKUP -/* We decrement lock_word by this amount for each x_lock. It is also the -start value for the lock_word, meaning that it limits the maximum number -of concurrent read locks before the rw_lock breaks. The current value of -0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/ -#define X_LOCK_DECR 0x00100000 - -struct rw_lock_t; -struct prio_rw_lock_t; -#ifdef UNIV_SYNC_DEBUG -struct rw_lock_debug_t; -#endif /* UNIV_SYNC_DEBUG */ - -typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t; - -extern rw_lock_list_t rw_lock_list; -extern ib_mutex_t rw_lock_list_mutex; - -#ifdef UNIV_SYNC_DEBUG -/* The global mutex which protects debug info lists of all rw-locks. -To modify the debug info list of an rw-lock, this mutex has to be -acquired in addition to the mutex protecting the lock. */ -extern os_fast_mutex_t rw_lock_debug_mutex; -#endif /* UNIV_SYNC_DEBUG */ - -/** Counters for RW locks. */ -extern rw_lock_stats_t rw_lock_stats; - -#ifdef UNIV_PFS_RWLOCK -/* Following are rwlock keys used to register with MySQL -performance schema */ -# ifdef UNIV_LOG_ARCHIVE -extern mysql_pfs_key_t archive_lock_key; -# endif /* UNIV_LOG_ARCHIVE */ -extern mysql_pfs_key_t btr_search_latch_key; -extern mysql_pfs_key_t buf_block_lock_key; -# ifdef UNIV_SYNC_DEBUG -extern mysql_pfs_key_t buf_block_debug_latch_key; -# endif /* UNIV_SYNC_DEBUG */ -extern mysql_pfs_key_t dict_operation_lock_key; -extern mysql_pfs_key_t checkpoint_lock_key; -extern mysql_pfs_key_t fil_space_latch_key; -extern mysql_pfs_key_t fts_cache_rw_lock_key; -extern mysql_pfs_key_t fts_cache_init_rw_lock_key; -extern mysql_pfs_key_t trx_i_s_cache_lock_key; -extern mysql_pfs_key_t trx_purge_latch_key; -extern mysql_pfs_key_t index_tree_rw_lock_key; -extern mysql_pfs_key_t index_online_log_key; -extern mysql_pfs_key_t dict_table_stats_key; -extern mysql_pfs_key_t trx_sys_rw_lock_key; -extern mysql_pfs_key_t hash_table_rw_lock_key; -#endif /* UNIV_PFS_RWLOCK */ - - -#ifndef UNIV_PFS_RWLOCK -/******************************************************************//** -Creates, or rather, initializes an rw-lock object in a specified memory -location (which must be appropriately aligned). The rw-lock is initialized -to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. -if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is -defined, the rwlock are instrumented with performance schema probes. */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG -# define rw_lock_create(K, L, level) \ - rw_lock_create_func((L), (level), #L, __FILE__, __LINE__) -# else /* UNIV_SYNC_DEBUG */ -# define rw_lock_create(K, L, level) \ - rw_lock_create_func((L), #L, __FILE__, __LINE__) -# endif/* UNIV_SYNC_DEBUG */ -# else /* UNIV_DEBUG */ -# define rw_lock_create(K, L, level) \ - rw_lock_create_func((L), #L, __FILE__, __LINE__) -# endif /* UNIV_DEBUG */ - -/**************************************************************//** -NOTE! The following macros should be used in rw locking and -unlocking, not the corresponding function. */ - -# define rw_lock_s_lock(M) \ - rw_lock_s_lock_func((M), 0, __FILE__, __LINE__) - -# define rw_lock_s_lock_inline(M, P, F, L) \ - rw_lock_s_lock_func((M), (P), (F), (L)) - -# define rw_lock_s_lock_gen(M, P) \ - rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) - -# define rw_lock_s_lock_gen_nowait(M, P) \ - rw_lock_s_lock_low((M), (P), __FILE__, __LINE__) - -# define rw_lock_s_lock_nowait(M, F, L) \ - rw_lock_s_lock_low((M), 0, (F), (L)) - -# ifdef UNIV_SYNC_DEBUG -# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L) -# else -# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L) -# endif - - -# define rw_lock_x_lock(M) \ - rw_lock_x_lock_func((M), 0, __FILE__, __LINE__) - -# define rw_lock_x_lock_inline(M, P, F, L) \ - rw_lock_x_lock_func((M), (P), (F), (L)) - -# define rw_lock_x_lock_gen(M, P) \ - rw_lock_x_lock_func((M), (P), __FILE__, __LINE__) - -# define rw_lock_x_lock_nowait(M) \ - rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__) - -# define rw_lock_x_lock_func_nowait_inline(M, F, L) \ - rw_lock_x_lock_func_nowait((M), (F), (L)) - -# ifdef UNIV_SYNC_DEBUG -# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L) -# else -# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L) -# endif - -# define rw_lock_free(M) rw_lock_free_func(M) - -#else /* !UNIV_PFS_RWLOCK */ - -/* Following macros point to Performance Schema instrumented functions. */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG -# define rw_lock_create(K, L, level) \ - pfs_rw_lock_create_func((K), (L), (level), #L, __FILE__, __LINE__) -# else /* UNIV_SYNC_DEBUG */ -# define rw_lock_create(K, L, level) \ - pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__) -# endif/* UNIV_SYNC_DEBUG */ -# else /* UNIV_DEBUG */ -# define rw_lock_create(K, L, level) \ - pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__) -# endif /* UNIV_DEBUG */ - -/****************************************************************** -NOTE! The following macros should be used in rw locking and -unlocking, not the corresponding function. */ - -# define rw_lock_s_lock(M) \ - pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__) - -# define rw_lock_s_lock_inline(M, P, F, L) \ - pfs_rw_lock_s_lock_func((M), (P), (F), (L)) - -# define rw_lock_s_lock_gen(M, P) \ - pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) - -# define rw_lock_s_lock_gen_nowait(M, P) \ - pfs_rw_lock_s_lock_low((M), (P), __FILE__, __LINE__) - -# define rw_lock_s_lock_nowait(M, F, L) \ - pfs_rw_lock_s_lock_low((M), 0, (F), (L)) - -# ifdef UNIV_SYNC_DEBUG -# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(P, L) -# else -# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(L) -# endif - -# define rw_lock_x_lock(M) \ - pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__) - -# define rw_lock_x_lock_inline(M, P, F, L) \ - pfs_rw_lock_x_lock_func((M), (P), (F), (L)) - -# define rw_lock_x_lock_gen(M, P) \ - pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__) - -# define rw_lock_x_lock_nowait(M) \ - pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__) - -# define rw_lock_x_lock_func_nowait_inline(M, F, L) \ - pfs_rw_lock_x_lock_func_nowait((M), (F), (L)) - -# ifdef UNIV_SYNC_DEBUG -# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L) -# else -# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(L) -# endif - -# define rw_lock_free(M) pfs_rw_lock_free_func(M) - -#endif /* UNIV_PFS_RWLOCK */ - -#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0) -#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0) - -/******************************************************************//** -Creates, or rather, initializes an rw-lock object in a specified memory -location (which must be appropriately aligned). The rw-lock is initialized -to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. */ -UNIV_INTERN -void -rw_lock_create_func( -/*================*/ - rw_lock_t* lock, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ -/******************************************************************//** -Creates, or rather, initializes a priority rw-lock object in a specified memory -location (which must be appropriately aligned). The rw-lock is initialized -to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. */ -UNIV_INTERN -void -rw_lock_create_func( -/*================*/ - prio_rw_lock_t* lock, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ -/******************************************************************//** -Calling this function is obligatory only if the memory buffer containing -the rw-lock is freed. Removes an rw-lock object from the global list. The -rw-lock is checked to be in the non-locked state. */ -UNIV_INTERN -void -rw_lock_free_func( -/*==============*/ - rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Calling this function is obligatory only if the memory buffer containing -the priority rw-lock is freed. Removes an rw-lock object from the global list. -The rw-lock is checked to be in the non-locked state. */ -UNIV_INTERN -void -rw_lock_free_func( -/*==============*/ - prio_rw_lock_t* lock); /*!< in: rw-lock */ -#ifdef UNIV_DEBUG -/******************************************************************//** -Checks that the rw-lock has been initialized and that there are no -simultaneous shared and exclusive locks. -@return TRUE */ -UNIV_INTERN -ibool -rw_lock_validate( -/*=============*/ - rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Checks that the priority rw-lock has been initialized and that there are no -simultaneous shared and exclusive locks. -@return TRUE */ -UNIV_INTERN -ibool -rw_lock_validate( -/*=============*/ - prio_rw_lock_t* lock); /*!< in: rw-lock */ -#endif /* UNIV_DEBUG */ -/******************************************************************//** -Low-level function which tries to lock an rw-lock in s-mode. Performs no -spinning. -@return TRUE if success */ -UNIV_INLINE -ibool -rw_lock_s_lock_low( -/*===============*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass MY_ATTRIBUTE((unused)), - /*!< in: pass value; != 0, if the lock will be - passed to another thread to unlock */ - const char* file_name, /*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function, except if -you supply the file name and line number. Lock an rw-lock in shared mode -for the current thread. If the rw-lock is locked in exclusive mode, or -there is an exclusive lock request waiting, the function spins a preset -time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before -suspending the thread. */ -UNIV_INLINE -void -rw_lock_s_lock_func( -/*================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function, except if -you supply the file name and line number. Lock a priority rw-lock in shared -mode for the current thread, using the relative thread priority. If the -rw-lock is locked in exclusive mode, or there is an exclusive lock request -waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), -waiting for the lock, before suspending the thread. */ -UNIV_INLINE -void -rw_lock_s_lock_func( -/*================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread if the lock can be -obtained immediately. -@return TRUE if success */ -UNIV_INLINE -ibool -rw_lock_x_lock_func_nowait( -/*=======================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -Releases a shared mode lock. */ -UNIV_INLINE -void -rw_lock_s_unlock_func( -/*==================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - rw_lock_t* lock); /*!< in/out: rw-lock */ - -/******************************************************************//** -Releases a shared mode priority lock. */ -UNIV_INLINE -void -rw_lock_s_unlock_func( -/*==================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - prio_rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread. If the rw-lock is locked -in shared or exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the lock, before suspending the thread. If the same thread has an x-lock -on the rw-lock, locking succeed, with the following exception: if pass != 0, -only a single x-lock may be taken on the lock. NOTE: If the same thread has -an s-lock, locking does not succeed! */ -UNIV_INTERN -void -rw_lock_x_lock_func( -/*================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line, /*!< in: line where requested */ - bool priority_lock = false, - /*!< in: whether the lock is a priority lock */ - bool high_priority = false); - /*!< in: whether we are acquiring a priority - lock with high priority */ -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function! Lock a priority -rw-lock in exclusive mode for the current thread. If the rw-lock is locked -in shared or exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the lock, before suspending the thread. If the same thread has an x-lock -on the rw-lock, locking succeed, with the following exception: if pass != 0, -only a single x-lock may be taken on the lock. NOTE: If the same thread has -an s-lock, locking does not succeed! */ -UNIV_INTERN -void -rw_lock_x_lock_func( -/*================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -Releases an exclusive mode lock. */ -UNIV_INLINE -void -rw_lock_x_unlock_func( -/*==================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** -Releases an exclusive mode priority lock. */ -UNIV_INLINE -void -rw_lock_x_unlock_func( -/*==================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - prio_rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** -This function is used in the insert buffer to move the ownership of an -x-latch on a buffer frame to the current thread. The x-latch was set by -the buffer read operation and it protected the buffer frame while the -read was done. The ownership is moved because we want that the current -thread is able to acquire a second x-latch which is stored in an mtr. -This, in turn, is needed to pass the debug checks of index page -operations. */ -UNIV_INTERN -void -rw_lock_x_lock_move_ownership( -/*==========================*/ - rw_lock_t* lock); /*!< in: lock which was x-locked in the - buffer read */ -/******************************************************************//** -Returns the value of writer_count for the lock. Does not reserve the lock -mutex, so the caller must be sure it is not changed during the call. -@return value of writer_count */ -UNIV_INLINE -ulint -rw_lock_get_x_lock_count( -/*=====================*/ - const rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Returns the value of writer_count for the priority lock. Does not reserve the -lock mutex, so the caller must be sure it is not changed during the call. -@return value of writer_count */ -UNIV_INLINE -ulint -rw_lock_get_x_lock_count( -/*=====================*/ - const prio_rw_lock_t* lock); /*!< in: rw-lock */ -/********************************************************************//** -Check if there are threads waiting for the rw-lock. -@return 1 if waiters, 0 otherwise */ -UNIV_INLINE -ulint -rw_lock_get_waiters( -/*================*/ - const rw_lock_t* lock); /*!< in: rw-lock */ -/********************************************************************//** -Check if there are threads waiting for the priority rw-lock. -@return 1 if waiters, 0 otherwise */ -UNIV_INLINE -ulint -rw_lock_get_waiters( -/*================*/ - const prio_rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Returns the write-status of the lock - this function made more sense -with the old rw_lock implementation. -@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */ -UNIV_INLINE -ulint -rw_lock_get_writer( -/*===============*/ - const rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Returns the write-status of the priority lock - this function made more sense -with the old rw_lock implementation. -@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */ -UNIV_INLINE -ulint -rw_lock_get_writer( -/*===============*/ - const prio_rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Returns the number of readers. -@return number of readers */ -UNIV_INLINE -ulint -rw_lock_get_reader_count( -/*=====================*/ - const rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Returns the number of readers. -@return number of readers */ -UNIV_INLINE -ulint -rw_lock_get_reader_count( -/*=====================*/ - const prio_rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Decrements lock_word the specified amount if it is greater than 0. -This is used by both s_lock and x_lock operations. -@return TRUE if decr occurs */ -UNIV_INLINE -ibool -rw_lock_lock_word_decr( -/*===================*/ - rw_lock_t* lock, /*!< in/out: rw-lock */ - ulint amount); /*!< in: amount to decrement */ -/******************************************************************//** -Increments lock_word the specified amount and returns new value. -@return lock->lock_word after increment */ -UNIV_INLINE -lint -rw_lock_lock_word_incr( -/*===================*/ - rw_lock_t* lock, /*!< in/out: rw-lock */ - ulint amount); /*!< in: amount to increment */ -/******************************************************************//** -This function sets the lock->writer_thread and lock->recursive fields. -For platforms where we are using atomic builtins instead of lock->mutex -it sets the lock->writer_thread field using atomics to ensure memory -ordering. Note that it is assumed that the caller of this function -effectively owns the lock i.e.: nobody else is allowed to modify -lock->writer_thread at this point in time. -The protocol is that lock->writer_thread MUST be updated BEFORE the -lock->recursive flag is set. */ -UNIV_INLINE -void -rw_lock_set_writer_id_and_recursion_flag( -/*=====================================*/ - rw_lock_t* lock, /*!< in/out: lock to work on */ - ibool recursive); /*!< in: TRUE if recursion - allowed */ -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Checks if the thread has locked the rw-lock in the specified mode, with -the pass value == 0. */ -UNIV_INTERN -ibool -rw_lock_own( -/*========*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ - MY_ATTRIBUTE((warn_unused_result)); -/******************************************************************//** -Checks if the thread has locked the priority rw-lock in the specified mode, -with the pass value == 0. */ -UNIV_INTERN -ibool -rw_lock_own( -/*========*/ - prio_rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ - MY_ATTRIBUTE((warn_unused_result)); -#endif /* UNIV_SYNC_DEBUG */ -/******************************************************************//** -Checks if somebody has locked the rw-lock in the specified mode. */ -UNIV_INTERN -ibool -rw_lock_is_locked( -/*==============*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ -#ifdef UNIV_SYNC_DEBUG -/***************************************************************//** -Prints debug info of an rw-lock. */ -UNIV_INTERN -void -rw_lock_print( -/*==========*/ - rw_lock_t* lock); /*!< in: rw-lock */ -/***************************************************************//** -Prints debug info of currently locked rw-locks. */ -UNIV_INTERN -void -rw_lock_list_print_info( -/*====================*/ - FILE* file); /*!< in: file where to print */ -/***************************************************************//** -Returns the number of currently locked rw-locks. -Works only in the debug version. -@return number of locked rw-locks */ -UNIV_INTERN -ulint -rw_lock_n_locked(void); -/*==================*/ - -/*#####################################################################*/ - -/******************************************************************//** -Acquires the debug mutex. We cannot use the mutex defined in sync0sync, -because the debug mutex is also acquired in sync0arr while holding the OS -mutex protecting the sync array, and the ordinary mutex_enter might -recursively call routines in sync0arr, leading to a deadlock on the OS -mutex. */ -UNIV_INTERN -void -rw_lock_debug_mutex_enter(void); -/*===========================*/ -/******************************************************************//** -Releases the debug mutex. */ -UNIV_INTERN -void -rw_lock_debug_mutex_exit(void); -/*==========================*/ -/*********************************************************************//** -Prints info of a debug struct. */ -UNIV_INTERN -void -rw_lock_debug_print( -/*================*/ - FILE* f, /*!< in: output stream */ - rw_lock_debug_t* info); /*!< in: debug struct */ -#endif /* UNIV_SYNC_DEBUG */ - -/* NOTE! The structure appears here only for the compiler to know its size. -Do not use its fields directly! */ - -/** The structure used in the spin lock implementation of a read-write -lock. Several threads may have a shared lock simultaneously in this -lock, but only one writer may have an exclusive lock, in which case no -shared locks are allowed. To prevent starving of a writer blocked by -readers, a writer may queue for x-lock by decrementing lock_word: no -new readers will be let in while the thread waits for readers to -exit. */ -struct rw_lock_t { - volatile lint lock_word; - /*!< Holds the state of the lock. */ - volatile ulint waiters;/*!< 1: there are waiters */ - volatile ibool recursive;/*!< Default value FALSE which means the lock - is non-recursive. The value is typically set - to TRUE making normal rw_locks recursive. In - case of asynchronous IO, when a non-zero - value of 'pass' is passed then we keep the - lock non-recursive. - This flag also tells us about the state of - writer_thread field. If this flag is set - then writer_thread MUST contain the thread - id of the current x-holder or wait-x thread. - This flag must be reset in x_unlock - functions before incrementing the lock_word */ - volatile os_thread_id_t writer_thread; - /*!< Thread id of writer thread. Is only - guaranteed to have sane and non-stale - value iff recursive flag is set. */ - struct os_event event; /*!< Used by sync0arr.cc for thread queueing */ - struct os_event wait_ex_event; - /*!< Event for next-writer to wait on. A thread - must decrement lock_word before waiting. */ -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - ib_mutex_t mutex; /*!< The mutex protecting rw_lock_t */ -#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ - - UT_LIST_NODE_T(rw_lock_t) list; - /*!< All allocated rw locks are put into a - list */ -#ifdef UNIV_SYNC_DEBUG - UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list; - /*!< In the debug version: pointer to the debug - info list of the lock */ - ulint level; /*!< Level in the global latching order. */ -#endif /* UNIV_SYNC_DEBUG */ -#ifdef UNIV_PFS_RWLOCK - struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */ -#endif - ulint count_os_wait; /*!< Count of os_waits. May not be accurate */ - const char* cfile_name;/*!< File name where lock created */ - const char* lock_name;/*!< lock name */ - os_thread_id_t thread_id;/*!< thread id */ - const char* file_name;/*!< File name where the lock was obtained */ - ulint line; /*!< Line where the rw-lock was locked */ - /* last s-lock file/line is not guaranteed to be correct */ - const char* last_s_file_name;/*!< File name where last s-locked */ - const char* last_x_file_name;/*!< File name where last x-locked */ - ibool writer_is_wait_ex; - /*!< This is TRUE if the writer field is - RW_LOCK_WAIT_EX; this field is located far - from the memory update hotspot fields which - are at the start of this struct, thus we can - peek this field without causing much memory - bus traffic */ - unsigned cline:14; /*!< Line where created */ - unsigned last_s_line:14; /*!< Line number where last time s-locked */ - unsigned last_x_line:14; /*!< Line number where last time x-locked */ -#ifdef UNIV_DEBUG - ulint magic_n; /*!< RW_LOCK_MAGIC_N */ -/** Value of rw_lock_t::magic_n */ -#define RW_LOCK_MAGIC_N 22643 -#endif /* UNIV_DEBUG */ -}; - -/** The structure implementing a priority rw lock. */ -struct prio_rw_lock_t { - struct rw_lock_t base_lock; /* The regular rw latch - provides the lock word etc. for - the priority rw lock */ - volatile ulint high_priority_s_waiters; - /* Number of high priority S - waiters */ - struct os_event high_priority_s_event; /* High priority wait - array event for S waiters */ - volatile ulint high_priority_x_waiters; - /* Number of high priority X - waiters */ - struct os_event high_priority_x_event; - /* High priority wait arraay - event for X waiters */ - volatile ulint high_priority_wait_ex_waiter; - /* If 1, a waiting next-writer - exists and is high-priority */ -}; - -#ifdef UNIV_SYNC_DEBUG -/** The structure for storing debug info of an rw-lock. All access to this -structure must be protected by rw_lock_debug_mutex_enter(). */ -struct rw_lock_debug_t { - - os_thread_id_t thread_id; /*!< The thread id of the thread which - locked the rw-lock */ - ulint pass; /*!< Pass value given in the lock operation */ - ulint lock_type; /*!< Type of the lock: RW_LOCK_EX, - RW_LOCK_SHARED, RW_LOCK_WAIT_EX */ - const char* file_name;/*!< File name where the lock was obtained */ - ulint line; /*!< Line where the rw-lock was locked */ - UT_LIST_NODE_T(rw_lock_debug_t) list; - /*!< Debug structs are linked in a two-way - list */ -}; -#endif /* UNIV_SYNC_DEBUG */ - -/* For performance schema instrumentation, a new set of rwlock -wrap functions are created if "UNIV_PFS_RWLOCK" is defined. -The instrumentations are not planted directly into original -functions, so that we keep the underlying function as they -are. And in case, user wants to "take out" some rwlock from -instrumentation even if performance schema (UNIV_PFS_RWLOCK) -is defined, they can do so by reinstating APIs directly link to -original underlying functions. -The instrumented function names have prefix of "pfs_rw_lock_" vs. -original name prefix of "rw_lock_". Following are list of functions -that have been instrumented: - -rw_lock_create() -rw_lock_x_lock() -rw_lock_x_lock_gen() -rw_lock_x_lock_nowait() -rw_lock_x_unlock_gen() -rw_lock_s_lock() -rw_lock_s_lock_gen() -rw_lock_s_lock_nowait() -rw_lock_s_unlock_gen() -rw_lock_free() -*/ - -#ifdef UNIV_PFS_RWLOCK -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_create_func() -NOTE! Please use the corresponding macro rw_lock_create(), not -directly this function! */ -UNIV_INLINE -void -pfs_rw_lock_create_func( -/*====================*/ - PSI_rwlock_key key, /*!< in: key registered with - performance schema */ - rw_lock_t* lock, /*!< in: rw lock */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_create_func() -NOTE! Please use the corresponding macro rw_lock_create(), not -directly this function! */ -UNIV_INLINE -void -pfs_rw_lock_create_func( -/*====================*/ - PSI_rwlock_key key, /*!< in: key registered with - performance schema */ - prio_rw_lock_t* lock, /*!< in: rw lock */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline); /*!< in: file line where created */ - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_x_lock_func() -NOTE! Please use the corresponding macro rw_lock_x_lock(), not -directly this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_lock_func( -/*====================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_x_lock_func() -NOTE! Please use the corresponding macro rw_lock_x_lock(), not -directly this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_lock_func( -/*====================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ - -/******************************************************************//** -Performance schema instrumented wrap function for -rw_lock_x_lock_func_nowait() -NOTE! Please use the corresponding macro, not directly this function! -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_rw_lock_x_lock_func_nowait( -/*===========================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_lock_func() -NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_s_lock_func( -/*====================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_lock_func() -NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_s_lock_func( -/*====================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_lock_func() -NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly -this function! -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_rw_lock_s_lock_low( -/*===================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the - lock will be passed to another - thread to unlock */ - const char* file_name, /*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_lock_func() -NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly -this function! -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_rw_lock_s_lock_low( -/*===================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the - lock will be passed to another - thread to unlock */ - const char* file_name, /*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_x_lock_func() -NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_lock_func( -/*====================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_x_lock_func() -NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_lock_func( -/*====================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_unlock_func() -NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_s_unlock_func( -/*======================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the - lock may have been passed to another - thread to unlock */ -#endif - rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_unlock_func() -NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_s_unlock_func( -/*======================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the - lock may have been passed to another - thread to unlock */ -#endif - prio_rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_unlock_func() -NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_unlock_func( -/*======================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the - lock may have been passed to another - thread to unlock */ -#endif - rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_unlock_func() -NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_unlock_func( -/*======================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the - lock may have been passed to another - thread to unlock */ -#endif - prio_rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_free_func() -NOTE! Please use the corresponding macro rw_lock_free(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_free_func( -/*==================*/ - rw_lock_t* lock); /*!< in: rw-lock */ -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_free_func() -NOTE! Please use the corresponding macro rw_lock_free(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_free_func( -/*==================*/ - prio_rw_lock_t* lock); /*!< in: rw-lock */ -#endif /* UNIV_PFS_RWLOCK */ - - -#ifndef UNIV_NONINL -#include "sync0rw.ic" -#endif -#endif /* !UNIV_HOTBACKUP */ - -#endif diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic deleted file mode 100644 index 91d1e1b0cfc..00000000000 --- a/storage/xtradb/include/sync0rw.ic +++ /dev/null @@ -1,1275 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/sync0rw.ic -The read-write lock (for threads) - -Created 9/11/1995 Heikki Tuuri -*******************************************************/ - -/******************************************************************//** -Lock a regular or priority rw-lock in shared mode for the current thread. If -the rw-lock is locked in exclusive mode, or there is an exclusive lock request -waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), -waiting for the lock before suspending the thread. */ -UNIV_INTERN -void -rw_lock_s_lock_spin( -/*================*/ - void* _lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - bool priority_lock, - /*!< in: whether the lock is a priority lock */ - bool high_priority, - /*!< in: whether we are acquiring a priority - lock with high priority */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line); /*!< in: line where requested */ -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Inserts the debug information for an rw-lock. */ -UNIV_INTERN -void -rw_lock_add_debug_info( -/*===================*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint pass, /*!< in: pass value */ - ulint lock_type, /*!< in: lock type */ - const char* file_name, /*!< in: file where requested */ - ulint line); /*!< in: line where requested */ -/******************************************************************//** -Removes a debug information struct for an rw-lock. */ -UNIV_INTERN -void -rw_lock_remove_debug_info( -/*======================*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint pass, /*!< in: pass value */ - ulint lock_type); /*!< in: lock type */ -#endif /* UNIV_SYNC_DEBUG */ - -/********************************************************************//** -Check if there are threads waiting for the rw-lock. -@return 1 if waiters, 0 otherwise */ -UNIV_INLINE -ulint -rw_lock_get_waiters( -/*================*/ - const rw_lock_t* lock) /*!< in: rw-lock */ -{ - return(lock->waiters); -} - -/********************************************************************//** -Check if there are threads waiting for the priority rw-lock. -@return 1 if waiters, 0 otherwise */ -UNIV_INLINE -ulint -rw_lock_get_waiters( -/*================*/ - const prio_rw_lock_t* lock) /*!< in: rw-lock */ -{ - return rw_lock_get_waiters(&lock->base_lock) - || lock->high_priority_s_waiters - || lock->high_priority_x_waiters; -} - -/********************************************************************//** -Sets lock->waiters to 1. It is not an error if lock->waiters is already -1. On platforms where ATOMIC builtins are used this function enforces a -memory barrier. */ -UNIV_INLINE -void -rw_lock_set_waiter_flag( -/*====================*/ - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ -#ifdef INNODB_RW_LOCKS_USE_ATOMICS - (void) os_compare_and_swap_ulint(&lock->waiters, 0, 1); -#else /* INNODB_RW_LOCKS_USE_ATOMICS */ - lock->waiters = 1; - os_wmb; -#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ -} - -/********************************************************************//** -Resets lock->waiters to 0. It is not an error if lock->waiters is already -0. On platforms where ATOMIC builtins are used this function enforces a -memory barrier. */ -UNIV_INLINE -void -rw_lock_reset_waiter_flag( -/*======================*/ - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ -#ifdef INNODB_RW_LOCKS_USE_ATOMICS - (void) os_compare_and_swap_ulint(&lock->waiters, 1, 0); -#else /* INNODB_RW_LOCKS_USE_ATOMICS */ - lock->waiters = 0; - os_wmb; -#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ -} - -/******************************************************************//** -Returns the write-status of the lock - this function made more sense -with the old rw_lock implementation. -@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */ -UNIV_INLINE -ulint -rw_lock_get_writer( -/*===============*/ - const rw_lock_t* lock) /*!< in: rw-lock */ -{ - lint lock_word = lock->lock_word; - if (lock_word > 0) { - /* return NOT_LOCKED in s-lock state, like the writer - member of the old lock implementation. */ - return(RW_LOCK_NOT_LOCKED); - } else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) { - return(RW_LOCK_EX); - } else { - ut_ad(lock_word > -X_LOCK_DECR); - return(RW_LOCK_WAIT_EX); - } -} - -/******************************************************************//** -Returns the write-status of the priority lock - this function made more sense -with the old rw_lock implementation. -@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */ -UNIV_INLINE -ulint -rw_lock_get_writer( -/*===============*/ - const prio_rw_lock_t* lock) /*!< in: rw-lock */ -{ - return(rw_lock_get_writer(&lock->base_lock)); -} - -/******************************************************************//** -Returns the number of readers. -@return number of readers */ -UNIV_INLINE -ulint -rw_lock_get_reader_count( -/*=====================*/ - const rw_lock_t* lock) /*!< in: rw-lock */ -{ - lint lock_word = lock->lock_word; - if (lock_word > 0) { - /* s-locked, no x-waiters */ - return(X_LOCK_DECR - lock_word); - } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) { - /* s-locked, with x-waiters */ - return((ulint)(-lock_word)); - } - return(0); -} - -/******************************************************************//** -Returns the number of readers. -@return number of readers */ -UNIV_INLINE -ulint -rw_lock_get_reader_count( -/*=====================*/ - const prio_rw_lock_t* lock) /*!< in: rw-lock */ -{ - return(rw_lock_get_reader_count(&lock->base_lock)); -} - -#ifndef INNODB_RW_LOCKS_USE_ATOMICS -UNIV_INLINE -ib_mutex_t* -rw_lock_get_mutex( -/*==============*/ - rw_lock_t* lock) -{ - return(&(lock->mutex)); -} -#endif - -/******************************************************************//** -Returns the value of writer_count for the lock. Does not reserve the lock -mutex, so the caller must be sure it is not changed during the call. -@return value of writer_count */ -UNIV_INLINE -ulint -rw_lock_get_x_lock_count( -/*=====================*/ - const rw_lock_t* lock) /*!< in: rw-lock */ -{ - lint lock_copy = lock->lock_word; - if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) { - return(0); - } - return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR))); -} - -/******************************************************************//** -Returns the value of writer_count for the priority lock. Does not reserve the -lock mutex, so the caller must be sure it is not changed during the call. -@return value of writer_count */ -UNIV_INLINE -ulint -rw_lock_get_x_lock_count( -/*=====================*/ - const prio_rw_lock_t* lock) /*!< in: rw-lock */ -{ - return(rw_lock_get_x_lock_count(&lock->base_lock)); -} - -/******************************************************************//** -Two different implementations for decrementing the lock_word of a rw_lock: -one for systems supporting atomic operations, one for others. This does -does not support recusive x-locks: they should be handled by the caller and -need not be atomic since they are performed by the current lock holder. -Returns true if the decrement was made, false if not. -@return TRUE if decr occurs */ -UNIV_INLINE -ibool -rw_lock_lock_word_decr( -/*===================*/ - rw_lock_t* lock, /*!< in/out: rw-lock */ - ulint amount) /*!< in: amount to decrement */ -{ -#ifdef INNODB_RW_LOCKS_USE_ATOMICS - lint local_lock_word; - - os_rmb; - local_lock_word = lock->lock_word; - while (local_lock_word > 0) { - if (os_compare_and_swap_lint(&lock->lock_word, - local_lock_word, - local_lock_word - amount)) { - return(TRUE); - } - local_lock_word = lock->lock_word; - } - return(FALSE); -#else /* INNODB_RW_LOCKS_USE_ATOMICS */ - ibool success = FALSE; - mutex_enter(&(lock->mutex)); - if (lock->lock_word > 0) { - lock->lock_word -= amount; - success = TRUE; - } - mutex_exit(&(lock->mutex)); - return(success); -#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ -} - -/******************************************************************//** -Increments lock_word the specified amount and returns new value. -@return lock->lock_word after increment */ -UNIV_INLINE -lint -rw_lock_lock_word_incr( -/*===================*/ - rw_lock_t* lock, /*!< in/out: rw-lock */ - ulint amount) /*!< in: amount of increment */ -{ -#ifdef INNODB_RW_LOCKS_USE_ATOMICS - return(os_atomic_increment_lint(&lock->lock_word, amount)); -#else /* INNODB_RW_LOCKS_USE_ATOMICS */ - lint local_lock_word; - - mutex_enter(&(lock->mutex)); - - lock->lock_word += amount; - local_lock_word = lock->lock_word; - - mutex_exit(&(lock->mutex)); - - return(local_lock_word); -#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ -} - -/******************************************************************//** -This function sets the lock->writer_thread and lock->recursive fields. -For platforms where we are using atomic builtins instead of lock->mutex -it sets the lock->writer_thread field using atomics to ensure memory -ordering. Note that it is assumed that the caller of this function -effectively owns the lock i.e.: nobody else is allowed to modify -lock->writer_thread at this point in time. -The protocol is that lock->writer_thread MUST be updated BEFORE the -lock->recursive flag is set. */ -UNIV_INLINE -void -rw_lock_set_writer_id_and_recursion_flag( -/*=====================================*/ - rw_lock_t* lock, /*!< in/out: lock to work on */ - ibool recursive) /*!< in: TRUE if recursion - allowed */ -{ - os_thread_id_t curr_thread = os_thread_get_curr_id(); - -#ifdef INNODB_RW_LOCKS_USE_ATOMICS - os_thread_id_t local_thread; - ibool success; - - /* Prevent Valgrind warnings about writer_thread being - uninitialized. It does not matter if writer_thread is - uninitialized, because we are comparing writer_thread against - itself, and the operation should always succeed. */ - UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread); - - local_thread = lock->writer_thread; - success = os_compare_and_swap_thread_id( - &lock->writer_thread, local_thread, curr_thread); - ut_a(success); - lock->recursive = recursive; - -#else /* INNODB_RW_LOCKS_USE_ATOMICS */ - - mutex_enter(&lock->mutex); - lock->writer_thread = curr_thread; - lock->recursive = recursive; - mutex_exit(&lock->mutex); - -#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ -} - -/******************************************************************//** -Low-level function which tries to lock an rw-lock in s-mode. Performs no -spinning. -@return TRUE if success */ -UNIV_INLINE -ibool -rw_lock_s_lock_low( -/*===============*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass MY_ATTRIBUTE((unused)), - /*!< in: pass value; != 0, if the lock will be - passed to another thread to unlock */ - const char* file_name, /*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - if (!rw_lock_lock_word_decr(lock, 1)) { - /* Locking did not succeed */ - return(FALSE); - } - -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line); -#endif - /* These debugging values are not set safely: they may be incorrect - or even refer to a line that is invalid for the file name. */ - lock->last_s_file_name = file_name; - lock->last_s_line = line; - - if (srv_instrument_semaphores) { - lock->thread_id = os_thread_get_curr_id(); - lock->file_name = file_name; - lock->line = line; - } - - return(TRUE); /* locking succeeded */ -} - -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in shared mode for the current thread. If the rw-lock is locked -in exclusive mode, or there is an exclusive lock request waiting, the -function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for -the lock, before suspending the thread. */ -UNIV_INLINE -void -rw_lock_s_lock_func( -/*================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - /* NOTE: As we do not know the thread ids for threads which have - s-locked a latch, and s-lockers will be served only after waiting - x-lock requests have been fulfilled, then if this thread already - owns an s-lock here, it may end up in a deadlock with another thread - which requests an x-lock here. Therefore, we will forbid recursive - s-locking of a latch: the following assert will warn the programmer - of the possibility of this kind of a deadlock. If we want to implement - safe recursive s-locking, we should keep in a list the thread ids of - the threads which have s-locked a latch. This would use some CPU - time. */ - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */ - ut_ad(!rw_lock_own(lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (rw_lock_s_lock_low(lock, pass, file_name, line)) { - - return; /* Success */ - } else { - /* Did not succeed, try spin wait */ - - rw_lock_s_lock_spin(lock, pass, false, false, file_name, line); - - return; - } -} - -/******************************************************************//** -Return true if waiters of higher priority than the current thread -exist. -@true if waiterss of higher priority exist */ -UNIV_INLINE -bool -rw_lock_higher_prio_waiters_exist( -/*==============================*/ - bool priority_lock, /*!< in: whether the lock is a priority lock */ - bool high_priority, /*!< in: whether we are acquiring a priority - lock with high priority */ - void* lock) /*!< in: rw lock */ -{ - if (high_priority || !priority_lock) { - ut_ad(!(!priority_lock && high_priority)); - return(false); - } - - ut_ad(priority_lock && !high_priority); - - prio_rw_lock_t *prio_rw_lock = (prio_rw_lock_t *) lock; - return prio_rw_lock->high_priority_wait_ex_waiter > 0 - || prio_rw_lock->high_priority_s_waiters > 0 - || prio_rw_lock->high_priority_x_waiters > 0; -} - -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function, except if -you supply the file name and line number. Lock a priority rw-lock in shared -mode for the current thread, using the relative thread priority. If the -rw-lock is locked in exclusive mode, or there is an exclusive lock request -waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), -waiting for the lock, before suspending the thread. */ -UNIV_INLINE -void -rw_lock_s_lock_func( -/*================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */ - ut_ad(!rw_lock_own(lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - bool high_priority = srv_current_thread_priority > 0; - - /* Do not attempt to acquire a low-priority S latch if there are - high-priority waiters even if such attempt would be successful. This - is to prevent a high priority X request from being starved by a - sequence of overlapping regular priority S requests. */ - - if (!rw_lock_higher_prio_waiters_exist(true, high_priority, lock) - && rw_lock_s_lock_low(&lock->base_lock, pass, file_name, line)) { - - return; /* Success */ - } else { - /* Did not succeed, try spin wait */ - rw_lock_s_lock_spin(lock, pass, true, high_priority, file_name, - line); - - return; - } -} - -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread if the lock can be -obtained immediately. -@return TRUE if success */ -UNIV_INLINE -ibool -rw_lock_x_lock_func_nowait( -/*=======================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - ibool success; - ibool local_recursive= lock->recursive; - -#ifdef INNODB_RW_LOCKS_USE_ATOMICS - success = os_compare_and_swap_lint(&lock->lock_word, X_LOCK_DECR, 0); -#else - - success = FALSE; - mutex_enter(&(lock->mutex)); - if (lock->lock_word == X_LOCK_DECR) { - lock->lock_word = 0; - success = TRUE; - } - mutex_exit(&(lock->mutex)); - -#endif - /* Note: recursive must be loaded before writer_thread see - comment for rw_lock_set_writer_id_and_recursion_flag(). - To achieve this we load it before os_compare_and_swap_lint(), - which implies full memory barrier in current implementation. */ - if (success) { - rw_lock_set_writer_id_and_recursion_flag(lock, TRUE); - - } else if (local_recursive - && os_thread_eq(lock->writer_thread, - os_thread_get_curr_id())) { - /* Relock: this lock_word modification is safe since no other - threads can modify (lock, unlock, or reserve) lock_word while - there is an exclusive writer and this is the writer thread. */ - if (lock->lock_word == 0) { - lock->lock_word = -X_LOCK_DECR; - } else { - lock->lock_word--; - } - - /* Watch for too many recursive locks */ - ut_ad(lock->lock_word < 0); - - } else { - /* Failure */ - return(FALSE); - } -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); -#endif - - if (srv_instrument_semaphores) { - lock->thread_id = os_thread_get_curr_id(); - lock->file_name = file_name; - lock->line = line; - } - - lock->last_x_file_name = file_name; - lock->last_x_line = line; - - ut_ad(rw_lock_validate(lock)); - - return(TRUE); -} - -/******************************************************************//** -Releases a shared mode lock. */ -UNIV_INLINE -void -rw_lock_s_unlock_func( -/*==================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - ut_ad(lock->lock_word > -X_LOCK_DECR); - ut_ad(lock->lock_word != 0); - ut_ad(lock->lock_word < X_LOCK_DECR); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED); -#endif - - /* Increment lock_word to indicate 1 less reader */ - if (rw_lock_lock_word_incr(lock, 1) == 0) { - - /* wait_ex waiter exists. It may not be asleep, but we signal - anyway. We do not wake other waiters, because they can't - exist without wait_ex waiter and wait_ex waiter goes first.*/ - os_event_set(&lock->wait_ex_event); - sync_array_object_signalled(); - - } - - ut_ad(rw_lock_validate(lock)); - -#ifdef UNIV_SYNC_PERF_STAT - rw_s_exit_count++; -#endif -} - -/******************************************************************//** -Releases a shared mode priority lock. */ -UNIV_INLINE -void -rw_lock_s_unlock_func( -/*==================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - prio_rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - lint lock_word; - - ut_ad(lock->base_lock.lock_word > -X_LOCK_DECR); - ut_ad(lock->base_lock.lock_word != 0); - ut_ad(lock->base_lock.lock_word < X_LOCK_DECR); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(&lock->base_lock, pass, RW_LOCK_SHARED); -#endif - - /* Increment lock_word to indicate 1 less reader */ - lock_word = rw_lock_lock_word_incr(&lock->base_lock, 1); - if (lock_word == 0) { - - /* A waiting next-writer exists, either high priority or - regular, sharing the same wait event. */ - os_event_set(&lock->base_lock.wait_ex_event); - sync_array_object_signalled(); - - } else if (lock_word == X_LOCK_DECR) { - - /* S-waiters may exist during an S unlock if a high-priority - thread released it, because low-priority threads are prevented - from acquiring S lock while high-priority thread holds it. */ - if (lock->base_lock.waiters) { - - rw_lock_reset_waiter_flag(&lock->base_lock); - os_event_set(&lock->base_lock.event); - sync_array_object_signalled(); - } - } - - ut_ad(rw_lock_validate(lock)); - -#ifdef UNIV_SYNC_PERF_STAT - rw_s_exit_count++; -#endif -} - -/******************************************************************//** -Prepares an exclusive mode lock release: resets the recursion flag and removes -the debug information if needed and returns the required lock word increment -value. -@return lock word increment value to perform the unlock */ -UNIV_INLINE -ulint -rw_lock_x_prepare_unlock( -/*=====================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR); - - /* lock->recursive flag also indicates if lock->writer_thread is - valid or stale. If we are the last of the recursive callers - then we must unset lock->recursive flag to indicate that the - lock->writer_thread is now stale. - Note that since we still hold the x-lock we can safely read the - lock_word. */ - if (lock->lock_word == 0) { - /* Last caller in a possible recursive chain. */ - lock->recursive = FALSE; - } - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX); -#endif - - ulint x_lock_incr; - if (lock->lock_word == 0) { - x_lock_incr = X_LOCK_DECR; - } else if (lock->lock_word == -X_LOCK_DECR) { - x_lock_incr = X_LOCK_DECR; - } else { - ut_ad(lock->lock_word < -X_LOCK_DECR); - x_lock_incr = 1; - } - - return(x_lock_incr); -} - -/******************************************************************//** -Releases an exclusive mode lock. */ -UNIV_INLINE -void -rw_lock_x_unlock_func( -/*==================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - ulint x_lock_incr = rw_lock_x_prepare_unlock( -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - lock); - - if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) { - /* Lock is now free. May have to signal read/write waiters. - We do not need to signal wait_ex waiters, since they cannot - exist when there is a writer. */ - - if (lock->waiters) { - rw_lock_reset_waiter_flag(lock); - os_event_set(&lock->event); - sync_array_object_signalled(); - } - } - - ut_ad(rw_lock_validate(lock)); - -#ifdef UNIV_SYNC_PERF_STAT - rw_x_exit_count++; -#endif -} - -/******************************************************************//** -Releases an exclusive mode priority lock. */ -UNIV_INLINE -void -rw_lock_x_unlock_func( -/*==================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - prio_rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - ulint x_lock_incr = rw_lock_x_prepare_unlock( -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - &lock->base_lock); - - ut_ad(lock->high_priority_wait_ex_waiter == 0); - - if (rw_lock_lock_word_incr(&lock->base_lock, x_lock_incr) - == X_LOCK_DECR) { - - /* Priority lock is now free. Signal any waiters in this - order: 1) high priority X waiters; 2) high priority S waiters; - 3) regular priority waiters. - We do not need to signal wait_ex waiters, since they cannot - exist when there is a writer. */ - - if (lock->high_priority_x_waiters) { - - os_event_set(&lock->high_priority_x_event); - sync_array_object_signalled(); - } else if (lock->high_priority_s_waiters) { - - os_event_set(&lock->high_priority_s_event); - sync_array_object_signalled(); - } else if (lock->base_lock.waiters) { - - rw_lock_reset_waiter_flag(&lock->base_lock); - os_event_set(&lock->base_lock.event); - sync_array_object_signalled(); - } - } - - ut_ad(rw_lock_validate(lock)); - -#ifdef UNIV_SYNC_PERF_STAT - rw_x_exit_count++; -#endif -} - -#ifdef UNIV_PFS_RWLOCK - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_create_func(). -NOTE! Please use the corresponding macro rw_lock_create(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_create_func( -/*====================*/ - mysql_pfs_key_t key, /*!< in: key registered with - performance schema */ - rw_lock_t* lock, /*!< in: pointer to memory */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ -{ - /* Initialize the rwlock for performance schema */ - lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock); - - /* The actual function to initialize an rwlock */ - rw_lock_create_func(lock, -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - level, -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - cmutex_name, - cfile_name, - cline); -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_create_func(). -NOTE! Please use the corresponding macro rw_lock_create(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_create_func( -/*====================*/ - mysql_pfs_key_t key, /*!< in: key registered with - performance schema */ - prio_rw_lock_t* lock, /*!< in: pointer to memory */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ -{ - /* Initialize the rwlock for performance schema */ - lock->base_lock.pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock); - - /* The actual function to initialize an rwlock */ - rw_lock_create_func(lock, -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - level, -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - cmutex_name, - cfile_name, - cline); -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_x_lock_func() -NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_lock_func( -/*====================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - if (lock->pfs_psi != NULL) - { - PSI_rwlock_locker* locker; - PSI_rwlock_locker_state state; - - /* Record the entry of rw x lock request in performance schema */ - locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)( - &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, - file_name, static_cast<uint>(line)); - - rw_lock_x_lock_func( - lock, pass, file_name, static_cast<uint>(line)); - - if (locker != NULL) { - PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0); - } - } - else - { - rw_lock_x_lock_func(lock, pass, file_name, line); - } -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_x_lock_func() -NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_lock_func( -/*====================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - if (lock->base_lock.pfs_psi != NULL) - { - PSI_rwlock_locker* locker; - PSI_rwlock_locker_state state; - - /* Record the entry of rw x lock request in performance schema */ - locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)( - &state, lock->base_lock.pfs_psi, PSI_RWLOCK_WRITELOCK, - file_name, line); - - rw_lock_x_lock_func(lock, pass, file_name, line); - - if (locker != NULL) { - PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0); - } - } - else - { - rw_lock_x_lock_func(lock, pass, file_name, line); - } -} - -/******************************************************************//** -Performance schema instrumented wrap function for -rw_lock_x_lock_func_nowait() -NOTE! Please use the corresponding macro rw_lock_x_lock_func(), -not directly this function! -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_rw_lock_x_lock_func_nowait( -/*===========================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - const char* file_name,/*!< in: file name where lock - requested */ - ulint line) /*!< in: line where requested */ -{ - ibool ret; - - if (lock->pfs_psi != NULL) - { - PSI_rwlock_locker* locker; - PSI_rwlock_locker_state state; - - /* Record the entry of rw x lock request in performance schema */ - locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)( - &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, - file_name, static_cast<uint>(line)); - - ret = rw_lock_x_lock_func_nowait(lock, file_name, line); - - if (locker != NULL) { - PSI_RWLOCK_CALL(end_rwlock_wrwait)( - locker, static_cast<int>(ret)); - } - } - else - { - ret = rw_lock_x_lock_func_nowait(lock, file_name, line); - } - - return(ret); -} -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_free_func() -NOTE! Please use the corresponding macro rw_lock_free(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_free_func( -/*==================*/ - rw_lock_t* lock) /*!< in: pointer to rw-lock */ -{ - if (lock->pfs_psi != NULL) - { - PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi); - lock->pfs_psi = NULL; - } - - rw_lock_free_func(lock); -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_free_func() -NOTE! Please use the corresponding macro rw_lock_free(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_free_func( -/*==================*/ - prio_rw_lock_t* lock) /*!< in: pointer to rw-lock */ -{ - if (lock->base_lock.pfs_psi != NULL) - { - PSI_RWLOCK_CALL(destroy_rwlock)(lock->base_lock.pfs_psi); - lock->base_lock.pfs_psi = NULL; - } - - rw_lock_free_func(lock); -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_lock_func() -NOTE! Please use the corresponding macro rw_lock_s_lock(), not -directly this function! */ -UNIV_INLINE -void -pfs_rw_lock_s_lock_func( -/*====================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the - lock will be passed to another - thread to unlock */ - const char* file_name,/*!< in: file name where lock - requested */ - ulint line) /*!< in: line where requested */ -{ - if (lock->pfs_psi != NULL) - { - PSI_rwlock_locker* locker; - PSI_rwlock_locker_state state; - - /* Instrumented to inform we are aquiring a shared rwlock */ - locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)( - &state, lock->pfs_psi, PSI_RWLOCK_READLOCK, - file_name, static_cast<uint>(line)); - - rw_lock_s_lock_func(lock, pass, file_name, line); - - if (locker != NULL) { - PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0); - } - } - else - { - rw_lock_s_lock_func(lock, pass, file_name, line); - } - - return; -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_lock_func() -NOTE! Please use the corresponding macro rw_lock_s_lock(), not -directly this function! */ -UNIV_INLINE -void -pfs_rw_lock_s_lock_func( -/*====================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the - lock will be passed to another - thread to unlock */ - const char* file_name,/*!< in: file name where lock - requested */ - ulint line) /*!< in: line where requested */ -{ - if (lock->base_lock.pfs_psi != NULL) - { - PSI_rwlock_locker* locker; - PSI_rwlock_locker_state state; - - /* Instrumented to inform we are aquiring a shared rwlock */ - locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)( - &state, lock->base_lock.pfs_psi, PSI_RWLOCK_READLOCK, - file_name, line); - - rw_lock_s_lock_func(lock, pass, file_name, line); - - if (locker != NULL) { - PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0); - } - } - else - { - rw_lock_s_lock_func(lock, pass, file_name, line); - } - - return; -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_lock_func() -NOTE! Please use the corresponding macro rw_lock_s_lock(), not -directly this function! -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_rw_lock_s_lock_low( -/*===================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the - lock will be passed to another - thread to unlock */ - const char* file_name, /*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - ibool ret; - - if (lock->pfs_psi != NULL) - { - PSI_rwlock_locker* locker; - PSI_rwlock_locker_state state; - - /* Instrumented to inform we are aquiring a shared rwlock */ - locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)( - &state, lock->pfs_psi, PSI_RWLOCK_READLOCK, - file_name, static_cast<uint>(line)); - - ret = rw_lock_s_lock_low(lock, pass, file_name, line); - - if (locker != NULL) { - PSI_RWLOCK_CALL(end_rwlock_rdwait)( - locker, static_cast<int>(ret)); - } - } - else - { - ret = rw_lock_s_lock_low(lock, pass, file_name, line); - } - - return(ret); -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_lock_func() -NOTE! Please use the corresponding macro rw_lock_s_lock(), not -directly this function! -@return TRUE if success */ -UNIV_INLINE -ibool -pfs_rw_lock_s_lock_low( -/*===================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the - lock will be passed to another - thread to unlock */ - const char* file_name, /*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - return(pfs_rw_lock_s_lock_low(&lock->base_lock, pass, - file_name, line)); -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_x_unlock_func() -NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_unlock_func( -/*======================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the - lock may have been passed to another - thread to unlock */ -#endif - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - /* Inform performance schema we are unlocking the lock */ - if (lock->pfs_psi != NULL) - PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi); - - rw_lock_x_unlock_func( -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - lock); -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_x_unlock_func() -NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly -this function! */ -UNIV_INLINE -void -pfs_rw_lock_x_unlock_func( -/*======================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the - lock may have been passed to another - thread to unlock */ -#endif - prio_rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - /* Inform performance schema we are unlocking the lock */ - if (lock->base_lock.pfs_psi != NULL) - PSI_RWLOCK_CALL(unlock_rwlock)(lock->base_lock.pfs_psi); - - rw_lock_x_unlock_func( -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - lock); -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_unlock_func() -NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not -directly this function! */ -UNIV_INLINE -void -pfs_rw_lock_s_unlock_func( -/*======================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the - lock may have been passed to another - thread to unlock */ -#endif - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - /* Inform performance schema we are unlocking the lock */ - if (lock->pfs_psi != NULL) - PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi); - - rw_lock_s_unlock_func( -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - lock); - -} - -/******************************************************************//** -Performance schema instrumented wrap function for rw_lock_s_unlock_func() -NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not -directly this function! */ -UNIV_INLINE -void -pfs_rw_lock_s_unlock_func( -/*======================*/ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the - lock may have been passed to another - thread to unlock */ -#endif - prio_rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - /* Inform performance schema we are unlocking the lock */ - if (lock->base_lock.pfs_psi != NULL) - PSI_RWLOCK_CALL(unlock_rwlock)(lock->base_lock.pfs_psi); - - rw_lock_s_unlock_func( -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - lock); - -} - -#endif /* UNIV_PFS_RWLOCK */ diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h deleted file mode 100644 index af445aeb84c..00000000000 --- a/storage/xtradb/include/sync0sync.h +++ /dev/null @@ -1,1050 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2012, Facebook Inc. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/sync0sync.h -Mutex, the basic synchronization primitive - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef sync0sync_h -#define sync0sync_h - -#include "univ.i" -#include "sync0types.h" -#include "ut0lst.h" -#include "ut0mem.h" -#include "os0thread.h" -#include "os0sync.h" -#include "sync0arr.h" -#include "ut0counter.h" - -/** Enable semaphore request instrumentation */ -extern my_bool srv_instrument_semaphores; - -#if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP) -extern "C" my_bool timed_mutexes; -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - -#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK - -/* By default, buffer mutexes and rwlocks will be excluded from -instrumentation due to their large number of instances. */ -# define PFS_SKIP_BUFFER_MUTEX_RWLOCK - -/* By default, event->mutex will also be excluded from instrumentation */ -# define PFS_SKIP_EVENT_MUTEX - -#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */ - -#ifdef UNIV_PFS_MUTEX -/* Key defines to register InnoDB mutexes with performance schema */ -extern mysql_pfs_key_t autoinc_mutex_key; -extern mysql_pfs_key_t buffer_block_mutex_key; -extern mysql_pfs_key_t buf_pool_zip_mutex_key; -extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key; -extern mysql_pfs_key_t buf_pool_free_list_mutex_key; -extern mysql_pfs_key_t buf_pool_zip_free_mutex_key; -extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key; -extern mysql_pfs_key_t buf_pool_flush_state_mutex_key; -extern mysql_pfs_key_t cache_last_read_mutex_key; -extern mysql_pfs_key_t dict_foreign_err_mutex_key; -extern mysql_pfs_key_t dict_sys_mutex_key; -extern mysql_pfs_key_t file_format_max_mutex_key; -extern mysql_pfs_key_t fil_system_mutex_key; -extern mysql_pfs_key_t flush_list_mutex_key; -extern mysql_pfs_key_t fts_bg_threads_mutex_key; -extern mysql_pfs_key_t fts_delete_mutex_key; -extern mysql_pfs_key_t fts_optimize_mutex_key; -extern mysql_pfs_key_t fts_doc_id_mutex_key; -extern mysql_pfs_key_t fts_pll_tokenize_mutex_key; -extern mysql_pfs_key_t hash_table_mutex_key; -extern mysql_pfs_key_t ibuf_bitmap_mutex_key; -extern mysql_pfs_key_t ibuf_mutex_key; -extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; -extern mysql_pfs_key_t log_bmp_sys_mutex_key; -extern mysql_pfs_key_t log_sys_mutex_key; -extern mysql_pfs_key_t log_flush_order_mutex_key; -# ifndef HAVE_ATOMIC_BUILTINS -extern mysql_pfs_key_t server_mutex_key; -# endif /* !HAVE_ATOMIC_BUILTINS */ -# ifdef UNIV_MEM_DEBUG -extern mysql_pfs_key_t mem_hash_mutex_key; -# endif /* UNIV_MEM_DEBUG */ -extern mysql_pfs_key_t mem_pool_mutex_key; -extern mysql_pfs_key_t mutex_list_mutex_key; -extern mysql_pfs_key_t purge_sys_bh_mutex_key; -extern mysql_pfs_key_t recv_sys_mutex_key; -extern mysql_pfs_key_t recv_writer_mutex_key; -extern mysql_pfs_key_t rseg_mutex_key; -# ifdef UNIV_SYNC_DEBUG -extern mysql_pfs_key_t rw_lock_debug_mutex_key; -# endif /* UNIV_SYNC_DEBUG */ -extern mysql_pfs_key_t rw_lock_list_mutex_key; -extern mysql_pfs_key_t rw_lock_mutex_key; -extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key; -extern mysql_pfs_key_t srv_innodb_monitor_mutex_key; -extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key; -extern mysql_pfs_key_t srv_threads_mutex_key; -extern mysql_pfs_key_t srv_monitor_file_mutex_key; -# ifdef UNIV_SYNC_DEBUG -extern mysql_pfs_key_t sync_thread_mutex_key; -# endif /* UNIV_SYNC_DEBUG */ -extern mysql_pfs_key_t buf_dblwr_mutex_key; -extern mysql_pfs_key_t trx_undo_mutex_key; -extern mysql_pfs_key_t trx_mutex_key; -extern mysql_pfs_key_t lock_sys_mutex_key; -extern mysql_pfs_key_t lock_sys_wait_mutex_key; -extern mysql_pfs_key_t trx_sys_mutex_key; -extern mysql_pfs_key_t srv_sys_mutex_key; -extern mysql_pfs_key_t srv_sys_tasks_mutex_key; -#ifndef HAVE_ATOMIC_BUILTINS -extern mysql_pfs_key_t srv_conc_mutex_key; -#endif /* !HAVE_ATOMIC_BUILTINS */ -#ifndef HAVE_ATOMIC_BUILTINS_64 -extern mysql_pfs_key_t monitor_mutex_key; -#endif /* !HAVE_ATOMIC_BUILTINS_64 */ -extern mysql_pfs_key_t event_os_mutex_key; -extern mysql_pfs_key_t ut_list_mutex_key; -extern mysql_pfs_key_t os_mutex_key; -extern mysql_pfs_key_t zip_pad_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/******************************************************************//** -Initializes the synchronization data structures. */ -UNIV_INTERN -void -sync_init(void); -/*===========*/ -/******************************************************************//** -Frees the resources in synchronization data structures. */ -UNIV_INTERN -void -sync_close(void); -/*===========*/ - -#undef mutex_free /* Fix for MacOS X */ - -#ifdef UNIV_PFS_MUTEX -/********************************************************************** -Following mutex APIs would be performance schema instrumented -if "UNIV_PFS_MUTEX" is defined: - -mutex_create -mutex_enter -mutex_enter_first -mutex_enter_last -mutex_exit -mutex_enter_nowait -mutex_free - -These mutex APIs will point to corresponding wrapper functions that contain -the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined. -The instrumented wrapper functions have the prefix of "innodb_". - -NOTE! The following macro should be used in mutex operation, not the -corresponding function. */ - -/******************************************************************//** -Creates, or rather, initializes a mutex object to a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG -# define mutex_create(K, M, level) \ - pfs_mutex_create_func((K), (M), (level), __FILE__, __LINE__, #M) -# else -# define mutex_create(K, M, level) \ - pfs_mutex_create_func((K), (M), __FILE__, __LINE__, #M) -# endif/* UNIV_SYNC_DEBUG */ -# else -# define mutex_create(K, M, level) \ - pfs_mutex_create_func((K), (M), __FILE__, __LINE__, #M) -# endif /* UNIV_DEBUG */ - -# define mutex_enter(M) \ - pfs_mutex_enter_func((M), __FILE__, __LINE__) - -# define mutex_enter_nowait(M) \ - pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__) - -# define mutex_enter_first(M) \ - pfs_mutex_enter_func((M), __FILE__, __LINE__, IB_HIGH_PRIO) - -# define mutex_enter_last(M) \ - pfs_mutex_enter_func((M), __FILE__, __LINE__, IB_LOW_PRIO) - -# define mutex_exit(M) pfs_mutex_exit_func(M) - -# define mutex_free(M) pfs_mutex_free_func(M) - -#else /* UNIV_PFS_MUTEX */ - -/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to -original non-instrumented functions */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG -# define mutex_create(K, M, level) \ - mutex_create_func((M), (level), __FILE__, __LINE__, #M) -# else /* UNIV_SYNC_DEBUG */ -# define mutex_create(K, M, level) \ - mutex_create_func((M), __FILE__, __LINE__, #M) -# endif /* UNIV_SYNC_DEBUG */ -# else /* UNIV_DEBUG */ -# define mutex_create(K, M, level) \ - mutex_create_func((M), __FILE__, __LINE__, #M) -# endif /* UNIV_DEBUG */ - -# define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__) - -# define mutex_enter_nowait(M) \ - mutex_enter_nowait_func((M), __FILE__, __LINE__) - -# define mutex_enter_first(M) \ - mutex_enter_func((M), __FILE__, __LINE__, IB_HIGH_PRIO) - -# define mutex_enter_last(M) \ - mutex_enter_func((M), __FILE__, __LINE__, IB_LOW_PRIO) - -# define mutex_exit(M) mutex_exit_func(M) - -# define mutex_free(M) mutex_free_func(M) - -#endif /* UNIV_PFS_MUTEX */ - -/******************************************************************//** -Creates, or rather, initializes a mutex object in a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ -UNIV_INTERN -void -mutex_create_func( -/*==============*/ - ib_mutex_t* mutex, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline, /*!< in: file line where created */ - const char* cmutex_name); /*!< in: mutex name */ - -/******************************************************************//** -Creates, or rather, initializes a priority mutex object in a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ -UNIV_INTERN -void -mutex_create_func( -/*==============*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where - created */ - ulint cline, /*!< in: file line where - created */ - const char* cmutex_name); /*!< in: mutex name */ -/******************************************************************//** -NOTE! Use the corresponding macro mutex_free(), not directly this function! -Calling this function is obligatory only if the memory buffer containing -the mutex is freed. Removes a mutex object from the mutex list. The mutex -is checked to be in the reset state. */ -UNIV_INTERN -void -mutex_free_func( -/*============*/ - ib_mutex_t* mutex); /*!< in: mutex */ -/******************************************************************//** -NOTE! Use the corresponding macro mutex_free(), not directly this function! -Calling this function is obligatory only if the memory buffer containing -the mutex is freed. Removes a priority mutex object from the mutex list. The -mutex is checked to be in the reset state. */ -UNIV_INTERN -void -mutex_free_func( -/*============*/ - ib_prio_mutex_t* mutex); /*!< in: mutex */ -/**************************************************************//** -NOTE! The following macro should be used in mutex locking, not the -corresponding function. */ - -/* NOTE! currently same as mutex_enter! */ - -#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__) -/******************************************************************//** -NOTE! Use the corresponding macro in the header file, not this function -directly. Locks a mutex for the current thread. If the mutex is reserved -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting -for the mutex before suspending the thread. */ -UNIV_INLINE -void -mutex_enter_func( -/*=============*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where locked */ - ulint line); /*!< in: line where locked */ -/******************************************************************//** -NOTE! Use the corresponding macro in the header file, not this function -directly. Locks a priority mutex for the current thread. If the mutex is -reserved the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) -waiting for the mutex before suspending the thread. If the thread is suspended, -the priority argument value determines the relative order for its wake up. Any -IB_HIGH_PRIO waiters will be woken up before any IB_LOW_PRIO waiters. In case of -IB_DEFAULT_PRIO, the relative priority will be set according to -srv_current_thread_priority. */ -UNIV_INLINE -void -mutex_enter_func( -/*=============*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where - locked */ - ulint line, /*!< in: line where locked */ - enum ib_sync_priority priority = IB_DEFAULT_PRIO); - /*!<in: mutex acquisition - priority */ -/********************************************************************//** -NOTE! Use the corresponding macro in the header file, not this function -directly. Tries to lock the mutex for the current thread. If the lock is not -acquired immediately, returns with return value 1. -@return 0 if succeed, 1 if not */ -UNIV_INTERN -ulint -mutex_enter_nowait_func( -/*====================*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line); /*!< in: line where requested */ -/********************************************************************//** -NOTE! Use the corresponding macro in the header file, not this function -directly. Tries to lock the mutex for the current thread. If the lock is not -acquired immediately, returns with return value 1. -@return 0 if succeed, 1 if not */ -UNIV_INLINE -ulint -mutex_enter_nowait_func( -/*====================*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line); /*!< in: line where - requested */ -/******************************************************************//** -NOTE! Use the corresponding macro mutex_exit(), not directly this function! -Unlocks a mutex owned by the current thread. */ -UNIV_INLINE -void -mutex_exit_func( -/*============*/ - ib_mutex_t* mutex); /*!< in: pointer to mutex */ -/******************************************************************//** -NOTE! Use the corresponding macro mutex_exit(), not directly this function! -Unlocks a priority mutex owned by the current thread. */ -UNIV_INLINE -void -mutex_exit_func( -/*============*/ - ib_prio_mutex_t* mutex); /*!< in: pointer to mutex */ - - -#ifdef UNIV_PFS_MUTEX -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_create(), not directly -this function! -A wrapper function for mutex_create_func(), registers the mutex -with peformance schema if "UNIV_PFS_MUTEX" is defined when -creating the mutex */ -UNIV_INLINE -void -pfs_mutex_create_func( -/*==================*/ - PSI_mutex_key key, /*!< in: Performance Schema key */ - ib_mutex_t* mutex, /*!< in: pointer to memory */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline, /*!< in: file line where created */ - const char* cmutex_name); -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_create(), not directly -this function! -A wrapper function for mutex_create_func(), registers the mutex -with peformance schema if "UNIV_PFS_MUTEX" is defined when -creating the performance mutex */ -UNIV_INLINE -void -pfs_mutex_create_func( -/*==================*/ - PSI_mutex_key key, /*!< in: Performance Schema - key */ - ib_prio_mutex_t* mutex, /*!< in: pointer to memory */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where - created */ - ulint cline, /*!< in: file line where - created */ - const char* cmutex_name); -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_enter(), not directly -this function! -This is a performance schema instrumented wrapper function for -mutex_enter_func(). */ -UNIV_INLINE -void -pfs_mutex_enter_func( -/*=================*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where locked */ - ulint line); /*!< in: line where locked */ -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_enter(), not directly -this function! -This is a performance schema instrumented wrapper function for -mutex_enter_func(). */ -UNIV_INLINE -void -pfs_mutex_enter_func( -/*=================*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where - locked */ - ulint line, /*!< in: line where locked */ - enum ib_sync_priority priority = IB_DEFAULT_PRIO); - /*!<in: mutex acquisition - priority */ -/********************************************************************//** -NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly -this function! -This is a performance schema instrumented wrapper function for -mutex_enter_nowait_func. -@return 0 if succeed, 1 if not */ -UNIV_INLINE -ulint -pfs_mutex_enter_nowait_func( -/*========================*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line); /*!< in: line where requested */ -/********************************************************************//** -NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly -this function! -This is a performance schema instrumented wrapper function for -mutex_enter_nowait_func. -@return 0 if succeed, 1 if not */ -UNIV_INLINE -ulint -pfs_mutex_enter_nowait_func( -/*========================*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line); /*!< in: line where - requested */ -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_exit(), not directly -this function! -A wrap function of mutex_exit_func() with peformance schema instrumentation. -Unlocks a mutex owned by the current thread. */ -UNIV_INLINE -void -pfs_mutex_exit_func( -/*================*/ - ib_mutex_t* mutex); /*!< in: pointer to mutex */ -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_exit(), not directly -this function! -A wrap function of mutex_exit_func() with peformance schema instrumentation. -Unlocks a priority mutex owned by the current thread. */ -UNIV_INLINE -void -pfs_mutex_exit_func( -/*================*/ - ib_prio_mutex_t* mutex); /*!< in: pointer to mutex */ - -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_free(), not directly -this function! -Wrapper function for mutex_free_func(). Also destroys the performance -schema probes when freeing the mutex */ -UNIV_INLINE -void -pfs_mutex_free_func( -/*================*/ - ib_mutex_t* mutex); /*!< in: mutex */ -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_free(), not directly -this function! -Wrapper function for mutex_free_func(). Also destroys the performance -schema probes when freeing the priority mutex */ -UNIV_INLINE -void -pfs_mutex_free_func( -/*================*/ - ib_prio_mutex_t* mutex); /*!< in: mutex */ - -#endif /* UNIV_PFS_MUTEX */ - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Returns TRUE if no mutex or rw-lock is currently locked. -Works only in the debug version. -@return TRUE if no mutexes and rw-locks reserved */ -UNIV_INTERN -ibool -sync_all_freed(void); -/*================*/ -#endif /* UNIV_SYNC_DEBUG */ -/*##################################################################### -FUNCTION PROTOTYPES FOR DEBUGGING */ -/*******************************************************************//** -Prints wait info of the sync system. */ -UNIV_INTERN -void -sync_print_wait_info( -/*=================*/ - FILE* file); /*!< in: file where to print */ -/*******************************************************************//** -Prints info of the sync system. */ -UNIV_INTERN -void -sync_print( -/*=======*/ - FILE* file); /*!< in: file where to print */ -#ifdef UNIV_DEBUG -/******************************************************************//** -Checks that the mutex has been initialized. -@return TRUE */ -UNIV_INTERN -ibool -mutex_validate( -/*===========*/ - const ib_mutex_t* mutex); /*!< in: mutex */ -/******************************************************************//** -Checks that the current thread owns the mutex. Works only -in the debug version. -@return TRUE if owns */ -UNIV_INTERN -ibool -mutex_own( -/*======*/ - const ib_mutex_t* mutex) /*!< in: mutex */ - MY_ATTRIBUTE((warn_unused_result)); -/******************************************************************//** -Checks that the current thread owns the priority mutex. Works only -in the debug version. -@return TRUE if owns */ -UNIV_INTERN -ibool -mutex_own( -/*======*/ - const ib_prio_mutex_t* mutex) /*!< in: priority mutex */ - MY_ATTRIBUTE((warn_unused_result)); -#endif /* UNIV_DEBUG */ -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Adds a latch and its level in the thread level array. Allocates the memory -for the array if called first time for this OS thread. Makes the checks -against other latch levels stored in the array for this thread. */ -UNIV_INTERN -void -sync_thread_add_level( -/*==================*/ - void* latch, /*!< in: pointer to a mutex or an rw-lock */ - ulint level, /*!< in: level in the latching order; if - SYNC_LEVEL_VARYING, nothing is done */ - ibool relock) /*!< in: TRUE if re-entering an x-lock */ - MY_ATTRIBUTE((nonnull)); -/******************************************************************//** -Removes a latch from the thread level array if it is found there. -@return TRUE if found in the array; it is no error if the latch is -not found, as we presently are not able to determine the level for -every latch reservation the program does */ -UNIV_INTERN -ibool -sync_thread_reset_level( -/*====================*/ - void* latch); /*!< in: pointer to a mutex or an rw-lock */ -/******************************************************************//** -Checks if the level array for the current thread contains a -mutex or rw-latch at the specified level. -@return a matching latch, or NULL if not found */ -UNIV_INTERN -void* -sync_thread_levels_contains( -/*========================*/ - ulint level); /*!< in: latching order level - (SYNC_DICT, ...)*/ -/******************************************************************//** -Checks that the level array for the current thread is empty. -@return a latch, or NULL if empty except the exceptions specified below */ -UNIV_INTERN -void* -sync_thread_levels_nonempty_gen( -/*============================*/ - ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is - allowed to be owned by the thread */ - MY_ATTRIBUTE((warn_unused_result)); -/******************************************************************//** -Checks if the level array for the current thread is empty, -except for data dictionary latches. */ -#define sync_thread_levels_empty_except_dict() \ - (!sync_thread_levels_nonempty_gen(TRUE)) -/******************************************************************//** -Checks if the level array for the current thread is empty, -except for the btr_search_latch. -@return a latch, or NULL if empty except the exceptions specified below */ -UNIV_INTERN -void* -sync_thread_levels_nonempty_trx( -/*============================*/ - ibool has_search_latch) - /*!< in: TRUE if and only if the thread - is supposed to hold btr_search_latch */ - MY_ATTRIBUTE((warn_unused_result)); - -/******************************************************************//** -Gets the debug information for a reserved mutex. */ -UNIV_INTERN -void -mutex_get_debug_info( -/*=================*/ - ib_mutex_t* mutex, /*!< in: mutex */ - const char** file_name, /*!< out: file where requested */ - ulint* line, /*!< out: line where requested */ - os_thread_id_t* thread_id); /*!< out: id of the thread which owns - the mutex */ -/******************************************************************//** -Counts currently reserved mutexes. Works only in the debug version. -@return number of reserved mutexes */ -UNIV_INTERN -ulint -mutex_n_reserved(void); -/*==================*/ -#endif /* UNIV_SYNC_DEBUG */ -/******************************************************************//** -NOT to be used outside this module except in debugging! Gets the value -of the lock word. */ -UNIV_INLINE -lock_word_t -mutex_get_lock_word( -/*================*/ - const ib_mutex_t* mutex); /*!< in: mutex */ -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -NOT to be used outside this module except in debugging! Gets the waiters -field in a mutex. -@return value to set */ -UNIV_INLINE -ulint -mutex_get_waiters( -/*==============*/ - const ib_mutex_t* mutex); /*!< in: mutex */ -#endif /* UNIV_SYNC_DEBUG */ - -/* - LATCHING ORDER WITHIN THE DATABASE - ================================== - -The mutex or latch in the central memory object, for instance, a rollback -segment object, must be acquired before acquiring the latch or latches to -the corresponding file data structure. In the latching order below, these -file page object latches are placed immediately below the corresponding -central memory object latch or mutex. - -Synchronization object Notes ----------------------- ----- - -Dictionary mutex If we have a pointer to a dictionary -| object, e.g., a table, it can be -| accessed without reserving the -| dictionary mutex. We must have a -| reservation, a memoryfix, to the -| appropriate table object in this case, -| and the table must be explicitly -| released later. -V -Dictionary header -| -V -Secondary index tree latch The tree latch protects also all -| the B-tree non-leaf pages. These -V can be read with the page only -Secondary index non-leaf bufferfixed to save CPU time, -| no s-latch is needed on the page. -| Modification of a page requires an -| x-latch on the page, however. If a -| thread owns an x-latch to the tree, -| it is allowed to latch non-leaf pages -| even after it has acquired the fsp -| latch. -V -Secondary index leaf The latch on the secondary index leaf -| can be kept while accessing the -| clustered index, to save CPU time. -V -Clustered index tree latch To increase concurrency, the tree -| latch is usually released when the -| leaf page latch has been acquired. -V -Clustered index non-leaf -| -V -Clustered index leaf -| -V -Transaction system header -| -V -Transaction undo mutex The undo log entry must be written -| before any index page is modified. -| Transaction undo mutex is for the undo -| logs the analogue of the tree latch -| for a B-tree. If a thread has the -| trx undo mutex reserved, it is allowed -| to latch the undo log pages in any -| order, and also after it has acquired -| the fsp latch. -V -Rollback segment mutex The rollback segment mutex must be -| reserved, if, e.g., a new page must -| be added to an undo log. The rollback -| segment and the undo logs in its -| history list can be seen as an -| analogue of a B-tree, and the latches -| reserved similarly, using a version of -| lock-coupling. If an undo log must be -| extended by a page when inserting an -| undo log record, this corresponds to -| a pessimistic insert in a B-tree. -V -Rollback segment header -| -V -Purge system latch -| -V -Undo log pages If a thread owns the trx undo mutex, -| or for a log in the history list, the -| rseg mutex, it is allowed to latch -| undo log pages in any order, and even -| after it has acquired the fsp latch. -| If a thread does not have the -| appropriate mutex, it is allowed to -| latch only a single undo log page in -| a mini-transaction. -V -File space management latch If a mini-transaction must allocate -| several file pages, it can do that, -| because it keeps the x-latch to the -| file space management in its memo. -V -File system pages -| -V -lock_sys_wait_mutex Mutex protecting lock timeout data -| -V -lock_sys_mutex Mutex protecting lock_sys_t -| -V -trx_sys->mutex Mutex protecting trx_sys_t -| -V -Threads mutex Background thread scheduling mutex -| -V -query_thr_mutex Mutex protecting query threads -| -V -trx_mutex Mutex protecting trx_t fields -| -V -Search system mutex -| -V -Buffer pool mutexes -| -V -Log mutex -| -Any other latch -| -V -Memory pool mutex */ - -/* Latching order levels. If you modify these, you have to also update -sync_thread_add_level(). */ - -/* User transaction locks are higher than any of the latch levels below: -no latches are allowed when a thread goes to wait for a normal table -or row lock! */ -#define SYNC_USER_TRX_LOCK 9999 -#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress - latching order checking */ -#define SYNC_LEVEL_VARYING 2000 /* Level is varying. Only used with - buffer pool page locks, which do not - have a fixed level, but instead have - their level set after the page is - locked; see e.g. - ibuf_bitmap_get_map_page(). */ -#define SYNC_TRX_I_S_RWLOCK 1910 /* Used for - trx_i_s_cache_t::rw_lock */ -#define SYNC_TRX_I_S_LAST_READ 1900 /* Used for - trx_i_s_cache_t::last_read_mutex */ -#define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the - file format tag */ -#define SYNC_DICT_OPERATION 1010 /* table create, drop, etc. reserve - this in X-mode; implicit or backround - operations purge, rollback, foreign - key checks reserve this in S-mode */ -#define SYNC_FTS_CACHE 1005 /* FTS cache rwlock */ -#define SYNC_DICT 1000 -#define SYNC_DICT_AUTOINC_MUTEX 999 -#define SYNC_STATS_AUTO_RECALC 997 -#define SYNC_DICT_HEADER 995 -#define SYNC_IBUF_HEADER 914 -#define SYNC_IBUF_PESS_INSERT_MUTEX 912 -/*-------------------------------*/ -#define SYNC_INDEX_TREE 900 -#define SYNC_TREE_NODE_NEW 892 -#define SYNC_TREE_NODE_FROM_HASH 891 -#define SYNC_TREE_NODE 890 -#define SYNC_PURGE_LATCH 800 -#define SYNC_TRX_UNDO 700 -#define SYNC_RSEG 600 -#define SYNC_RSEG_HEADER_NEW 591 -#define SYNC_RSEG_HEADER 590 -#define SYNC_TRX_UNDO_PAGE 570 -#define SYNC_EXTERN_STORAGE 500 -#define SYNC_FSP 400 -#define SYNC_FSP_PAGE 395 -#define SYNC_STATS_DEFRAG 390 -/*------------------------------------- Change buffer headers */ -#define SYNC_IBUF_MUTEX 370 /* ibuf_mutex */ -/*------------------------------------- Change buffer tree */ -#define SYNC_IBUF_INDEX_TREE 360 -#define SYNC_IBUF_TREE_NODE_NEW 359 -#define SYNC_IBUF_TREE_NODE 358 -#define SYNC_IBUF_BITMAP_MUTEX 351 -#define SYNC_IBUF_BITMAP 350 -/*------------------------------------- Change log for online create index */ -#define SYNC_INDEX_ONLINE_LOG 340 -/*------------------------------------- MySQL query cache mutex */ -/*------------------------------------- MySQL binlog mutex */ -/*-------------------------------*/ -#define SYNC_LOCK_WAIT_SYS 300 -#define SYNC_LOCK_SYS 299 -#define SYNC_TRX_SYS 298 -#define SYNC_TRX 297 -#define SYNC_THREADS 295 -#define SYNC_REC_LOCK 294 -#define SYNC_TRX_SYS_HEADER 290 -#define SYNC_PURGE_QUEUE 200 -#define SYNC_LOG_ONLINE 175 -#define SYNC_LOG 170 -#define SYNC_LOG_FLUSH_ORDER 147 -#define SYNC_RECV 168 -#define SYNC_FTS_TOKENIZE 167 -#define SYNC_FTS_CACHE_INIT 166 /* Used for FTS cache initialization */ -#define SYNC_FTS_BG_THREADS 165 -#define SYNC_FTS_OPTIMIZE 164 // FIXME: is this correct number, test -#define SYNC_WORK_QUEUE 162 -#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory - heap that can be extended to the - buffer pool, its logical level is - SYNC_SEARCH_SYS, as memory allocation - can call routines there! Otherwise - the level is SYNC_MEM_HASH. */ -#define SYNC_BUF_LRU_LIST 151 -#define SYNC_BUF_PAGE_HASH 149 /* buf_pool->page_hash rw_lock */ -#define SYNC_BUF_BLOCK 146 /* Block mutex */ -#define SYNC_BUF_FREE_LIST 145 -#define SYNC_BUF_ZIP_FREE 144 -#define SYNC_BUF_ZIP_HASH 143 -#define SYNC_BUF_FLUSH_STATE 142 -#define SYNC_BUF_FLUSH_LIST 141 /* Buffer flush list mutex */ -#define SYNC_DOUBLEWRITE 139 -#define SYNC_ANY_LATCH 135 -#define SYNC_MEM_HASH 131 -#define SYNC_MEM_POOL 130 - -/* Codes used to designate lock operations */ -#define RW_LOCK_NOT_LOCKED 350 -#define RW_LOCK_EX 351 -#define RW_LOCK_EXCLUSIVE 351 -#define RW_LOCK_SHARED 352 -#define RW_LOCK_WAIT_EX 353 -#define SYNC_MUTEX 354 -#define SYNC_PRIO_MUTEX 355 -#define PRIO_RW_LOCK_EX 356 -#define PRIO_RW_LOCK_SHARED 357 - -/* NOTE! The structure appears here only for the compiler to know its size. -Do not use its fields directly! The structure used in the spin lock -implementation of a mutual exclusion semaphore. */ - -/** InnoDB mutex */ -struct ib_mutex_t { - struct os_event event; /*!< Used by sync0arr.cc for the wait queue */ - volatile lock_word_t lock_word; /*!< lock_word is the target - of the atomic test-and-set instruction when - atomic operations are enabled. */ - -#if !defined(HAVE_ATOMIC_BUILTINS) - os_fast_mutex_t - os_fast_mutex; /*!< We use this OS mutex in place of lock_word - when atomic operations are not enabled */ -#endif - ulint waiters; /*!< This ulint is set to 1 if there are (or - may be) threads waiting in the global wait - array for this mutex to be released. - Otherwise, this is 0. */ - UT_LIST_NODE_T(ib_mutex_t) list; /*!< All allocated mutexes are put into - a list. Pointers to the next and prev. */ - -#ifdef UNIV_SYNC_DEBUG - ulint level; /*!< Level in the global latching order */ -#endif /* UNIV_SYNC_DEBUG */ - - const char* file_name; /*!< File where the mutex was locked */ - ulint line; /*!< Line where the mutex was locked */ - const char* cfile_name; /*!< File name where mutex created */ - ulint cline; /*!< Line where created */ - ulong count_os_wait; /*!< count of os_wait */ - const char* cmutex_name; /*!< mutex name */ - os_thread_id_t thread_id; /*!< The thread id of the thread - which locked the mutex. */ - -#ifdef UNIV_DEBUG - -/** Value of mutex_t::magic_n */ -# define MUTEX_MAGIC_N 979585UL - ulint magic_n; /*!< MUTEX_MAGIC_N */ - ulint ib_mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */ -#endif /* UNIV_DEBUG */ - -#ifdef UNIV_PFS_MUTEX - struct PSI_mutex* pfs_psi; /*!< The performance schema - instrumentation hook */ -#endif -}; - -/** XtraDB priority mutex */ -struct ib_prio_mutex_t { - ib_mutex_t base_mutex; /* The regular mutex provides the lock - word etc. for the priority mutex */ - struct os_event high_priority_event; /* High priority wait array - event */ - volatile ulint high_priority_waiters; /* Number of threads that asked - for this mutex to be acquired with high - priority in the global wait array - waiting for this mutex to be - released. */ -}; - -/** Constant determining how long spin wait is continued before suspending -the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond -to 20 microseconds. */ - -#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds - -/** The number of iterations in the mutex_spin_wait() spin loop. -Intended for performance monitoring. */ -extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count; -/** The number of mutex_spin_wait() calls. Intended for -performance monitoring. */ -extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count; -/** The number of OS waits in mutex_spin_wait(). Intended for -performance monitoring. */ -extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count; - -/** The number of mutex_exit calls. Intended for performance monitoring. */ -extern ib_int64_t mutex_exit_count; - -#ifdef UNIV_SYNC_DEBUG -/** Latching order checks start when this is set TRUE */ -extern ibool sync_order_checks_on; -#endif /* UNIV_SYNC_DEBUG */ - -/** This variable is set to TRUE when sync_init is called */ -extern ibool sync_initialized; - -/** Global list of database mutexes (not OS mutexes) created. */ -typedef UT_LIST_BASE_NODE_T(ib_mutex_t) ut_list_base_node_t; -/** Global list of database mutexes (not OS mutexes) created. */ -extern ut_list_base_node_t mutex_list; - -/** Mutex protecting the mutex_list variable */ -extern ib_mutex_t mutex_list_mutex; - -#ifndef HAVE_ATOMIC_BUILTINS -/**********************************************************//** -Function that uses a mutex to decrement a variable atomically */ -UNIV_INLINE -void -os_atomic_dec_ulint_func( -/*=====================*/ - ib_mutex_t* mutex, /*!< in: mutex guarding the - decrement */ - volatile ulint* var, /*!< in/out: variable to - decrement */ - ulint delta); /*!< in: delta to decrement */ -/**********************************************************//** -Function that uses a mutex to increment a variable atomically */ -UNIV_INLINE -void -os_atomic_inc_ulint_func( -/*=====================*/ - ib_mutex_t* mutex, /*!< in: mutex guarding the - increment */ - volatile ulint* var, /*!< in/out: variable to - increment */ - ulint delta); /*!< in: delta to increment */ -#endif /* !HAVE_ATOMIC_BUILTINS */ - -#ifndef UNIV_NONINL -#include "sync0sync.ic" -#endif - -#endif diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic deleted file mode 100644 index fb24c0ec244..00000000000 --- a/storage/xtradb/include/sync0sync.ic +++ /dev/null @@ -1,665 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/sync0sync.ic -Mutex, the basic synchronization primitive - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -/******************************************************************//** -Sets the waiters field in a mutex. */ -UNIV_INTERN -void -mutex_set_waiters( -/*==============*/ - ib_mutex_t* mutex, /*!< in: mutex */ - ulint n); /*!< in: value to set */ -/******************************************************************//** -Reserves a mutex or a priority mutex for the current thread. If the mutex is -reserved, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) -waiting for the mutex before suspending the thread. */ -UNIV_INTERN -void -mutex_spin_wait( -/*============*/ - void* _mutex, /*!< in: pointer to mutex */ - bool high_priority, /*!< in: whether the mutex is a - priority mutex with high priority - specified */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line); /*!< in: line where requested */ -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Sets the debug information for a reserved mutex. */ -UNIV_INTERN -void -mutex_set_debug_info( -/*=================*/ - ib_mutex_t* mutex, /*!< in: mutex */ - const char* file_name, /*!< in: file where requested */ - ulint line); /*!< in: line where requested */ -#endif /* UNIV_SYNC_DEBUG */ -/******************************************************************//** -Releases the threads waiting in the primary wait array for this mutex. */ -UNIV_INTERN -void -mutex_signal_object( -/*================*/ - ib_mutex_t* mutex); /*!< in: mutex */ - -/******************************************************************//** -Performs an atomic test-and-set instruction to the lock_word field of a -mutex. -@return the previous value of lock_word: 0 or 1 */ -UNIV_INLINE -lock_word_t -ib_mutex_test_and_set( -/*==================*/ - ib_mutex_t* mutex) /*!< in: mutex */ -{ -#if defined(HAVE_ATOMIC_BUILTINS) - return(os_atomic_test_and_set(&mutex->lock_word)); -#else - ibool ret; - - ret = os_fast_mutex_trylock_full_barrier(&(mutex->os_fast_mutex)); - - if (ret == 0) { - /* We check that os_fast_mutex_trylock does not leak - and allow race conditions */ - ut_a(mutex->lock_word == 0); - - mutex->lock_word = 1; - } - - return((byte) ret); -#endif /* HAVE_ATOMIC_BUILTINS */ -} - -/******************************************************************//** -Performs a reset instruction to the lock_word field of a mutex. This -instruction also serializes memory operations to the program order. */ -UNIV_INLINE -void -mutex_reset_lock_word( -/*==================*/ - ib_mutex_t* mutex) /*!< in: mutex */ -{ -#if defined(HAVE_ATOMIC_BUILTINS) - os_atomic_clear(&mutex->lock_word); -#else - mutex->lock_word = 0; - - os_fast_mutex_unlock(&(mutex->os_fast_mutex)); -#endif /* HAVE_ATOMIC_BUILTINS */ -} - -/******************************************************************//** -Gets the value of the lock word. */ -UNIV_INLINE -lock_word_t -mutex_get_lock_word( -/*================*/ - const ib_mutex_t* mutex) /*!< in: mutex */ -{ - ut_ad(mutex); - - return(mutex->lock_word); -} - -/******************************************************************//** -Gets the waiters field in a mutex. -@return value to set */ -UNIV_INLINE -ulint -mutex_get_waiters( -/*==============*/ - const ib_mutex_t* mutex) /*!< in: mutex */ -{ - const volatile ulint* ptr; /*!< declared volatile to ensure that - the value is read from memory */ - ut_ad(mutex); - - ptr = &(mutex->waiters); - - return(*ptr); /* Here we assume that the read of a single - word from memory is atomic */ -} - -/******************************************************************//** -NOTE! Use the corresponding macro mutex_exit(), not directly this function! -Unlocks a mutex owned by the current thread. */ -UNIV_INLINE -void -mutex_exit_func( -/*============*/ - ib_mutex_t* mutex) /*!< in: pointer to mutex */ -{ - ut_ad(mutex_own(mutex)); - - mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED; - -#ifdef UNIV_SYNC_DEBUG - sync_thread_reset_level(mutex); -#endif - mutex_reset_lock_word(mutex); - - /* A problem: we assume that mutex_reset_lock word - is a memory barrier, that is when we read the waiters - field next, the read must be serialized in memory - after the reset. A speculative processor might - perform the read first, which could leave a waiting - thread hanging indefinitely. - - Our current solution call every second - sync_arr_wake_threads_if_sema_free() - to wake up possible hanging threads if - they are missed in mutex_signal_object. */ - - /* We add a memory barrier to prevent reading of the - number of waiters before releasing the lock. */ - - os_mb; - - if (mutex_get_waiters(mutex) != 0) { - - mutex_signal_object(mutex); - } - -#ifdef UNIV_SYNC_PERF_STAT - mutex_exit_count++; -#endif -} - -/******************************************************************//** -NOTE! Use the corresponding macro mutex_exit(), not directly this function! -Unlocks a priority mutex owned by the current thread. */ -UNIV_INLINE -void -mutex_exit_func( -/*============*/ - ib_prio_mutex_t* mutex) /*!< in: pointer to mutex */ -{ - ut_ad(mutex_own(mutex)); - - mutex->base_mutex.thread_id = (os_thread_id_t) ULINT_UNDEFINED; - -#ifdef UNIV_SYNC_DEBUG - sync_thread_reset_level(&mutex->base_mutex); -#endif - mutex_reset_lock_word(&mutex->base_mutex); - - /* A problem: we assume that mutex_reset_lock word - is a memory barrier, that is when we read the waiters - field next, the read must be serialized in memory - after the reset. A speculative processor might - perform the read first, which could leave a waiting - thread hanging indefinitely. - - Our current solution call every second - sync_arr_wake_threads_if_sema_free() - to wake up possible hanging threads if - they are missed in mutex_signal_object. */ - - /* Wake up any high priority waiters first. */ - if (mutex->high_priority_waiters != 0) { - - os_event_set(&mutex->high_priority_event); - sync_array_object_signalled(); - - } else if (mutex_get_waiters(&mutex->base_mutex) != 0) { - - mutex_signal_object(&mutex->base_mutex); - } - -#ifdef UNIV_SYNC_PERF_STAT - mutex_exit_count++; -#endif - -} - - -/******************************************************************//** -Locks a mutex for the current thread. If the mutex is reserved, the function -spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex -before suspending the thread. */ -UNIV_INLINE -void -mutex_enter_func( -/*=============*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where locked */ - ulint line) /*!< in: line where locked */ -{ - ut_ad(mutex_validate(mutex)); -#ifndef WITH_WSREP - /* this cannot be be granted when BF trx kills a trx in lock wait state */ - ut_ad(!mutex_own(mutex)); -#endif /* WITH_WSREP */ - - /* Note that we do not peek at the value of lock_word before trying - the atomic test_and_set; we could peek, and possibly save time. */ - - if (!ib_mutex_test_and_set(mutex)) { - mutex->thread_id = os_thread_get_curr_id(); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - if (srv_instrument_semaphores) { - mutex->file_name = file_name; - mutex->line = line; - } - - return; /* Succeeded! */ - } - - mutex_spin_wait(mutex, false, file_name, line); -} - -/******************************************************************//** -NOTE! Use the corresponding macro in the header file, not this function -directly. Locks a priority mutex for the current thread. If the mutex is -reserved the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) -waiting for the mutex before suspending the thread. If the thread is suspended, -the priority argument value determines the relative order for its wake up. Any -IB_HIGH_PRIO waiters will be woken up before any IB_LOW_PRIO waiters. In case -of IB_DEFAULT_PRIO, the relative priority will be set according to -srv_current_thread_priority. */ -UNIV_INLINE -void -mutex_enter_func( -/*=============*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where - locked */ - ulint line, /*!< in: line where locked */ - enum ib_sync_priority priority) - /*!<in: mutex acquisition - priority */ -{ - bool high_priority; - - ut_ad(mutex_validate(&mutex->base_mutex)); - ut_ad(!mutex_own(mutex)); - - /* Note that we do not peek at the value of lock_word before trying - the atomic test_and_set; we could peek, and possibly save time. */ - - if (!ib_mutex_test_and_set(&mutex->base_mutex)) { - mutex->base_mutex.thread_id = os_thread_get_curr_id(); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(&mutex->base_mutex, file_name, line); -#endif - if(srv_instrument_semaphores) { - mutex->base_mutex.file_name = file_name; - mutex->base_mutex.line = line; - } - - return; /* Succeeded! */ - } - - if (UNIV_LIKELY(priority == IB_DEFAULT_PRIO)) { - high_priority = srv_current_thread_priority; - } else { - high_priority = (priority == IB_HIGH_PRIO); - } - mutex_spin_wait(mutex, high_priority, file_name, line); -} - -/********************************************************************//** -NOTE! Use the corresponding macro in the header file, not this function -directly. Tries to lock the mutex for the current thread. If the lock is not -acquired immediately, returns with return value 1. -@return 0 if succeed, 1 if not */ -UNIV_INLINE -ulint -mutex_enter_nowait_func( -/*====================*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line) /*!< in: line where - requested */ -{ - return mutex_enter_nowait_func(&mutex->base_mutex, file_name, line); -} - -#ifdef UNIV_PFS_MUTEX -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_enter(), not directly -this function! -This is a performance schema instrumented wrapper function for -mutex_enter_func(). */ -UNIV_INLINE -void -pfs_mutex_enter_func( -/*=================*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where locked */ - ulint line) /*!< in: line where locked */ -{ - if (mutex->pfs_psi != NULL) { - PSI_mutex_locker* locker; - PSI_mutex_locker_state state; - - locker = PSI_MUTEX_CALL(start_mutex_wait)( - &state, mutex->pfs_psi, - PSI_MUTEX_LOCK, file_name, - static_cast<uint>(line)); - - mutex_enter_func(mutex, file_name, line); - - if (locker != NULL) { - PSI_MUTEX_CALL(end_mutex_wait)(locker, 0); - } - } else { - mutex_enter_func(mutex, file_name, line); - } -} - -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_enter(), not directly -this function! -This is a performance schema instrumented wrapper function for -mutex_enter_func(). */ -UNIV_INLINE -void -pfs_mutex_enter_func( -/*=================*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where - locked */ - ulint line, /*!< in: line where locked */ - enum ib_sync_priority priority) /*!<in: mutex acquisition - priority */ -{ - if (mutex->base_mutex.pfs_psi != NULL) { - PSI_mutex_locker* locker; - PSI_mutex_locker_state state; - - locker = PSI_MUTEX_CALL(start_mutex_wait)( - &state, mutex->base_mutex.pfs_psi, - PSI_MUTEX_LOCK, file_name, line); - - mutex_enter_func(mutex, file_name, line, priority); - - if (locker != NULL) { - PSI_MUTEX_CALL(end_mutex_wait)(locker, 0); - } - } else { - mutex_enter_func(mutex, file_name, line, priority); - } -} - -/********************************************************************//** -NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly -this function! -This is a performance schema instrumented wrapper function for -mutex_enter_nowait_func. -@return 0 if succeed, 1 if not */ -UNIV_INLINE -ulint -pfs_mutex_enter_nowait_func( -/*========================*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line) /*!< in: line where requested */ -{ - ulint ret; - - if (mutex->pfs_psi != NULL) { - PSI_mutex_locker* locker; - PSI_mutex_locker_state state; - - locker = PSI_MUTEX_CALL(start_mutex_wait)( - &state, mutex->pfs_psi, - PSI_MUTEX_TRYLOCK, file_name, - static_cast<uint>(line)); - - ret = mutex_enter_nowait_func(mutex, file_name, line); - - if (locker != NULL) { - PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret); - } - } else { - ret = mutex_enter_nowait_func(mutex, file_name, line); - } - - return(ret); -} - -/********************************************************************//** -NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly -this function! -This is a performance schema instrumented wrapper function for -mutex_enter_nowait_func. -@return 0 if succeed, 1 if not */ -UNIV_INLINE -ulint -pfs_mutex_enter_nowait_func( -/*========================*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line) /*!< in: line where - requested */ -{ - return pfs_mutex_enter_nowait_func(&mutex->base_mutex, file_name, - line); -} - -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_exit(), not directly -this function! -A wrap function of mutex_exit_func() with performance schema instrumentation. -Unlocks a mutex owned by the current thread. */ -UNIV_INLINE -void -pfs_mutex_exit_func( -/*================*/ - ib_mutex_t* mutex) /*!< in: pointer to mutex */ -{ - if (mutex->pfs_psi != NULL) { - PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi); - } - - mutex_exit_func(mutex); -} - -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_exit(), not directly -this function! -A wrap function of mutex_exit_func() with peformance schema instrumentation. -Unlocks a priority mutex owned by the current thread. */ -UNIV_INLINE -void -pfs_mutex_exit_func( -/*================*/ - ib_prio_mutex_t* mutex) /*!< in: pointer to mutex */ -{ - if (mutex->base_mutex.pfs_psi != NULL) { - PSI_MUTEX_CALL(unlock_mutex)(mutex->base_mutex.pfs_psi); - } - - mutex_exit_func(mutex); -} - - -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_create(), not directly -this function! -A wrapper function for mutex_create_func(), registers the mutex -with performance schema if "UNIV_PFS_MUTEX" is defined when -creating the mutex */ -UNIV_INLINE -void -pfs_mutex_create_func( -/*==================*/ - mysql_pfs_key_t key, /*!< in: Performance Schema key */ - ib_mutex_t* mutex, /*!< in: pointer to memory */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline, /*!< in: file line where created */ - const char* cmutex_name) /*!< in: mutex name */ -{ - mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex); - - mutex_create_func(mutex, -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - level, -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - cfile_name, - cline, - cmutex_name); -} - -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_create(), not directly -this function! -A wrapper function for mutex_create_func(), registers the mutex -with peformance schema if "UNIV_PFS_MUTEX" is defined when -creating the performance mutex */ -UNIV_INLINE -void -pfs_mutex_create_func( -/*==================*/ - PSI_mutex_key key, /*!< in: Performance Schema - key */ - ib_prio_mutex_t* mutex, /*!< in: pointer to memory */ -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where - created */ - ulint cline, /*!< in: file line where - created */ - const char* cmutex_name) -{ - mutex->base_mutex.pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex); - - mutex_create_func(mutex, -# ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - level, -# endif /* UNIV_SYNC_DEBUG */ -# endif /* UNIV_DEBUG */ - cfile_name, - cline, - cmutex_name); -} - - -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_free(), not directly -this function! -Wrapper function for mutex_free_func(). Also destroys the performance -schema probes when freeing the mutex */ -UNIV_INLINE -void -pfs_mutex_free_func( -/*================*/ - ib_mutex_t* mutex) /*!< in: mutex */ -{ - if (mutex->pfs_psi != NULL) { - PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi); - mutex->pfs_psi = NULL; - } - - mutex_free_func(mutex); -} - -/******************************************************************//** -NOTE! Please use the corresponding macro mutex_free(), not directly -this function! -Wrapper function for mutex_free_func(). Also destroys the performance -schema probes when freeing the priority mutex */ -UNIV_INLINE -void -pfs_mutex_free_func( -/*================*/ - ib_prio_mutex_t* mutex) /*!< in: mutex */ -{ - if (mutex->base_mutex.pfs_psi != NULL) { - PSI_MUTEX_CALL(destroy_mutex)(mutex->base_mutex.pfs_psi); - mutex->base_mutex.pfs_psi = NULL; - } - - mutex_free_func(mutex); -} - - -#endif /* UNIV_PFS_MUTEX */ - -#ifndef HAVE_ATOMIC_BUILTINS -/**********************************************************//** -Function that uses a mutex to decrement a variable atomically */ -UNIV_INLINE -void -os_atomic_dec_ulint_func( -/*=====================*/ - ib_mutex_t* mutex, /*!< in: mutex guarding the dec */ - volatile ulint* var, /*!< in/out: variable to decrement */ - ulint delta) /*!< in: delta to decrement */ -{ - mutex_enter(mutex); - - /* I don't think we will encounter a situation where - this check will not be required. */ - ut_ad(*var >= delta); - - *var -= delta; - - mutex_exit(mutex); -} - -/**********************************************************//** -Function that uses a mutex to increment a variable atomically */ -UNIV_INLINE -void -os_atomic_inc_ulint_func( -/*=====================*/ - ib_mutex_t* mutex, /*!< in: mutex guarding the increment */ - volatile ulint* var, /*!< in/out: variable to increment */ - ulint delta) /*!< in: delta to increment */ -{ - mutex_enter(mutex); - - *var += delta; - - mutex_exit(mutex); -} -#endif /* !HAVE_ATOMIC_BUILTINS */ diff --git a/storage/xtradb/include/sync0types.h b/storage/xtradb/include/sync0types.h deleted file mode 100644 index 04baaa0339d..00000000000 --- a/storage/xtradb/include/sync0types.h +++ /dev/null @@ -1,44 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/sync0types.h -Global types for sync - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef sync0types_h -#define sync0types_h - -struct ib_mutex_t; - -/* The relative priority of the current thread. If 0, low priority; if 1, high -priority. */ -extern UNIV_THREAD_LOCAL ulint srv_current_thread_priority; - -struct ib_prio_mutex_t; - -/** Priority mutex and rwlatch acquisition priorities */ -enum ib_sync_priority { - IB_DEFAULT_PRIO, - IB_LOW_PRIO, - IB_HIGH_PRIO -}; - -#endif diff --git a/storage/xtradb/include/trx0i_s.h b/storage/xtradb/include/trx0i_s.h deleted file mode 100644 index ac5e00c6834..00000000000 --- a/storage/xtradb/include/trx0i_s.h +++ /dev/null @@ -1,315 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0i_s.h -INFORMATION SCHEMA innodb_trx, innodb_locks and -innodb_lock_waits tables cache structures and public -functions. - -Created July 17, 2007 Vasil Dimov -*******************************************************/ - -#ifndef trx0i_s_h -#define trx0i_s_h - -#include "univ.i" -#include "trx0types.h" -#include "dict0types.h" -#include "ut0ut.h" - -/** The maximum amount of memory that can be consumed by innodb_trx, -innodb_locks and innodb_lock_waits information schema tables. */ -#define TRX_I_S_MEM_LIMIT 16777216 /* 16 MiB */ - -/** The maximum length of a string that can be stored in -i_s_locks_row_t::lock_data */ -#define TRX_I_S_LOCK_DATA_MAX_LEN 8192 - -/** The maximum length of a string that can be stored in -i_s_trx_row_t::trx_query */ -#define TRX_I_S_TRX_QUERY_MAX_LEN 1024 - -/** The maximum length of a string that can be stored in -i_s_trx_row_t::trx_operation_state */ -#define TRX_I_S_TRX_OP_STATE_MAX_LEN 64 - -/** The maximum length of a string that can be stored in -i_s_trx_row_t::trx_foreign_key_error */ -#define TRX_I_S_TRX_FK_ERROR_MAX_LEN 256 - -/** The maximum length of a string that can be stored in -i_s_trx_row_t::trx_isolation_level */ -#define TRX_I_S_TRX_ISOLATION_LEVEL_MAX_LEN 16 - -/** Safely copy strings in to the INNODB_TRX table's -string based columns */ -#define TRX_I_S_STRING_COPY(data, field, constraint, tcache) \ -do { \ - if (strlen(data) > constraint) { \ - char buff[constraint + 1]; \ - strncpy(buff, data, constraint); \ - buff[constraint] = '\0'; \ - \ - field = static_cast<const char*>( \ - ha_storage_put_memlim( \ - (tcache)->storage, buff, constraint + 1,\ - MAX_ALLOWED_FOR_STORAGE(tcache))); \ - } else { \ - field = static_cast<const char*>( \ - ha_storage_put_str_memlim( \ - (tcache)->storage, data, \ - MAX_ALLOWED_FOR_STORAGE(tcache))); \ - } \ -} while (0) - -/** A row of INFORMATION_SCHEMA.innodb_locks */ -struct i_s_locks_row_t; - -/** Objects of trx_i_s_cache_t::locks_hash */ -struct i_s_hash_chain_t; - -/** Objects of this type are added to the hash table -trx_i_s_cache_t::locks_hash */ -struct i_s_hash_chain_t { - i_s_locks_row_t* value; /*!< row of - INFORMATION_SCHEMA.innodb_locks*/ - i_s_hash_chain_t* next; /*!< next item in the hash chain */ -}; - -/** This structure represents INFORMATION_SCHEMA.innodb_locks row */ -struct i_s_locks_row_t { - trx_id_t lock_trx_id; /*!< transaction identifier */ - const char* lock_mode; /*!< lock mode from - lock_get_mode_str() */ - const char* lock_type; /*!< lock type from - lock_get_type_str() */ - const char* lock_table; /*!< table name from - lock_get_table_name() */ - const char* lock_index; /*!< index name from - lock_rec_get_index_name() */ - /** Information for record locks. All these are - ULINT_UNDEFINED for table locks. */ - /* @{ */ - ulint lock_space; /*!< tablespace identifier */ - ulint lock_page; /*!< page number within the_space */ - ulint lock_rec; /*!< heap number of the record - on the page */ - const char* lock_data; /*!< (some) content of the record */ - /* @} */ - - /** The following are auxiliary and not included in the table */ - /* @{ */ - table_id_t lock_table_id; - /*!< table identifier from - lock_get_table_id */ - i_s_hash_chain_t hash_chain; /*!< hash table chain node for - trx_i_s_cache_t::locks_hash */ - /* @} */ -}; - -/** This structure represents INFORMATION_SCHEMA.innodb_trx row */ -struct i_s_trx_row_t { - trx_id_t trx_id; /*!< transaction identifier */ - const char* trx_state; /*!< transaction state from - trx_get_que_state_str() */ - ib_time_t trx_started; /*!< trx_t::start_time */ - const i_s_locks_row_t* requested_lock_row; - /*!< pointer to a row - in innodb_locks if trx - is waiting, or NULL */ - ib_time_t trx_wait_started; /*!< trx_t::wait_started */ - ullint trx_weight; /*!< TRX_WEIGHT() */ - ulint trx_mysql_thread_id; /*!< thd_get_thread_id() */ - const char* trx_query; /*!< MySQL statement being - executed in the transaction */ - struct charset_info_st* trx_query_cs; - /*!< charset encode the MySQL - statement */ - const char* trx_operation_state; /*!< trx_t::op_info */ - ulint trx_tables_in_use;/*!< n_mysql_tables_in_use in - trx_t */ - ulint trx_tables_locked; - /*!< mysql_n_tables_locked in - trx_t */ - ulint trx_lock_structs;/*!< list len of trx_locks in - trx_t */ - ulint trx_lock_memory_bytes; - /*!< mem_heap_get_size( - trx->lock_heap) */ - ulint trx_rows_locked;/*!< lock_number_of_rows_locked() */ - ullint trx_rows_modified;/*!< trx_t::undo_no */ - ulint trx_concurrency_tickets; - /*!< n_tickets_to_enter_innodb in - trx_t */ - const char* trx_isolation_level; - /*!< isolation_level in trx_t */ - ibool trx_unique_checks; - /*!< check_unique_secondary in trx_t*/ - ibool trx_foreign_key_checks; - /*!< check_foreigns in trx_t */ - const char* trx_foreign_key_error; - /*!< detailed_error in trx_t */ - ibool trx_has_search_latch; - /*!< has_search_latch in trx_t */ - ulint trx_search_latch_timeout; - /*!< search_latch_timeout in trx_t */ - ulint trx_is_read_only; - /*!< trx_t::read_only */ - ulint trx_is_autocommit_non_locking; - /*!< trx_is_autocommit_non_locking(trx) - */ -}; - -/** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */ -struct i_s_lock_waits_row_t { - const i_s_locks_row_t* requested_lock_row; /*!< requested lock */ - const i_s_locks_row_t* blocking_lock_row; /*!< blocking lock */ -}; - -/** Cache of INFORMATION_SCHEMA table data */ -struct trx_i_s_cache_t; - -/** Auxiliary enum used by functions that need to select one of the -INFORMATION_SCHEMA tables */ -enum i_s_table { - I_S_INNODB_TRX, /*!< INFORMATION_SCHEMA.innodb_trx */ - I_S_INNODB_LOCKS, /*!< INFORMATION_SCHEMA.innodb_locks */ - I_S_INNODB_LOCK_WAITS /*!< INFORMATION_SCHEMA.innodb_lock_waits */ -}; - -/** This is the intermediate buffer where data needed to fill the -INFORMATION SCHEMA tables is fetched and later retrieved by the C++ -code in handler/i_s.cc. */ -extern trx_i_s_cache_t* trx_i_s_cache; - -/*******************************************************************//** -Initialize INFORMATION SCHEMA trx related cache. */ -UNIV_INTERN -void -trx_i_s_cache_init( -/*===============*/ - trx_i_s_cache_t* cache); /*!< out: cache to init */ -/*******************************************************************//** -Free the INFORMATION SCHEMA trx related cache. */ -UNIV_INTERN -void -trx_i_s_cache_free( -/*===============*/ - trx_i_s_cache_t* cache); /*!< in/out: cache to free */ - -/*******************************************************************//** -Issue a shared/read lock on the tables cache. */ -UNIV_INTERN -void -trx_i_s_cache_start_read( -/*=====================*/ - trx_i_s_cache_t* cache); /*!< in: cache */ - -/*******************************************************************//** -Release a shared/read lock on the tables cache. */ -UNIV_INTERN -void -trx_i_s_cache_end_read( -/*===================*/ - trx_i_s_cache_t* cache); /*!< in: cache */ - -/*******************************************************************//** -Issue an exclusive/write lock on the tables cache. */ -UNIV_INTERN -void -trx_i_s_cache_start_write( -/*======================*/ - trx_i_s_cache_t* cache); /*!< in: cache */ - -/*******************************************************************//** -Release an exclusive/write lock on the tables cache. */ -UNIV_INTERN -void -trx_i_s_cache_end_write( -/*====================*/ - trx_i_s_cache_t* cache); /*!< in: cache */ - - -/*******************************************************************//** -Retrieves the number of used rows in the cache for a given -INFORMATION SCHEMA table. -@return number of rows */ -UNIV_INTERN -ulint -trx_i_s_cache_get_rows_used( -/*========================*/ - trx_i_s_cache_t* cache, /*!< in: cache */ - enum i_s_table table); /*!< in: which table */ - -/*******************************************************************//** -Retrieves the nth row in the cache for a given INFORMATION SCHEMA -table. -@return row */ -UNIV_INTERN -void* -trx_i_s_cache_get_nth_row( -/*======================*/ - trx_i_s_cache_t* cache, /*!< in: cache */ - enum i_s_table table, /*!< in: which table */ - ulint n); /*!< in: row number */ - -/*******************************************************************//** -Update the transactions cache if it has not been read for some time. -@return 0 - fetched, 1 - not */ -UNIV_INTERN -int -trx_i_s_possibly_fetch_data_into_cache( -/*===================================*/ - trx_i_s_cache_t* cache); /*!< in/out: cache */ - -/*******************************************************************//** -Returns TRUE if the data in the cache is truncated due to the memory -limit posed by TRX_I_S_MEM_LIMIT. -@return TRUE if truncated */ -UNIV_INTERN -ibool -trx_i_s_cache_is_truncated( -/*=======================*/ - trx_i_s_cache_t* cache); /*!< in: cache */ - -/** The maximum length of a resulting lock_id_size in -trx_i_s_create_lock_id(), not including the terminating NUL. -":%lu:%lu:%lu" -> 63 chars */ -#define TRX_I_S_LOCK_ID_MAX_LEN (TRX_ID_MAX_LEN + 63) - -/*******************************************************************//** -Crafts a lock id string from a i_s_locks_row_t object. Returns its -second argument. This function aborts if there is not enough space in -lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you -want to be 100% sure that it will not abort. -@return resulting lock id */ -UNIV_INTERN -char* -trx_i_s_create_lock_id( -/*===================*/ - const i_s_locks_row_t* row, /*!< in: innodb_locks row */ - char* lock_id,/*!< out: resulting lock_id */ - ulint lock_id_size);/*!< in: size of the lock id - buffer */ - -UNIV_INTERN -void -trx_i_s_get_lock_sys_memory_usage(ulint *constant, ulint *variable); - -#endif /* trx0i_s_h */ diff --git a/storage/xtradb/include/trx0purge.h b/storage/xtradb/include/trx0purge.h deleted file mode 100644 index 7b9b5dc49cd..00000000000 --- a/storage/xtradb/include/trx0purge.h +++ /dev/null @@ -1,226 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0purge.h -Purge old versions - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0purge_h -#define trx0purge_h - -#include "univ.i" -#include "trx0types.h" -#include "mtr0mtr.h" -#include "trx0sys.h" -#include "que0types.h" -#include "page0page.h" -#include "usr0sess.h" -#include "fil0fil.h" - -/** The global data structure coordinating a purge */ -extern trx_purge_t* purge_sys; - -/** A dummy undo record used as a return value when we have a whole undo log -which needs no purge */ -extern trx_undo_rec_t trx_purge_dummy_rec; - -/********************************************************************//** -Calculates the file address of an undo log header when we have the file -address of its history list node. -@return file address of the log */ -UNIV_INLINE -fil_addr_t -trx_purge_get_log_from_hist( -/*========================*/ - fil_addr_t node_addr); /*!< in: file address of the history - list node of the log */ -/********************************************************************//** -Creates the global purge system control structure and inits the history -mutex. */ -UNIV_INTERN -void -trx_purge_sys_create( -/*=================*/ - ulint n_purge_threads,/*!< in: number of purge threads */ - ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/ -/********************************************************************//** -Frees the global purge system control structure. */ -UNIV_INTERN -void -trx_purge_sys_close(void); -/*======================*/ -/************************************************************************ -Adds the update undo log as the first log in the history list. Removes the -update undo log segment from the rseg slot if it is too big for reuse. */ -UNIV_INTERN -void -trx_purge_add_update_undo_to_history( -/*=================================*/ - trx_t* trx, /*!< in: transaction */ - page_t* undo_page, /*!< in: update undo log header page, - x-latched */ - mtr_t* mtr); /*!< in: mtr */ -/*******************************************************************//** -This function runs a purge batch. -@return number of undo log pages handled in the batch */ -UNIV_INTERN -ulint -trx_purge( -/*======*/ - ulint n_purge_threads, /*!< in: number of purge tasks to - submit to task queue. */ - ulint limit, /*!< in: the maximum number of - records to purge in one batch */ - bool truncate); /*!< in: truncate history if true */ -/*******************************************************************//** -Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */ -UNIV_INTERN -void -trx_purge_stop(void); -/*================*/ -/*******************************************************************//** -Resume purge, move to PURGE_STATE_RUN. */ -UNIV_INTERN -void -trx_purge_run(void); -/*================*/ - -/** Purge states */ -enum purge_state_t { - PURGE_STATE_INIT, /*!< Purge instance created */ - PURGE_STATE_RUN, /*!< Purge should be running */ - PURGE_STATE_STOP, /*!< Purge should be stopped */ - PURGE_STATE_EXIT, /*!< Purge has been shutdown */ - PURGE_STATE_DISABLED /*!< Purge was never started */ -}; - -/*******************************************************************//** -Get the purge state. -@return purge state. */ -UNIV_INTERN -purge_state_t -trx_purge_state(void); -/*=================*/ - -/** This is the purge pointer/iterator. We need both the undo no and the -transaction no up to which purge has parsed and applied the records. */ -struct purge_iter_t { - trx_id_t trx_no; /*!< Purge has advanced past all - transactions whose number is less - than this */ - undo_no_t undo_no; /*!< Purge has advanced past all records - whose undo number is less than this */ -}; - -/** The control structure used in the purge operation */ -struct trx_purge_t{ - sess_t* sess; /*!< System session running the purge - query */ - trx_t* trx; /*!< System transaction running the - purge query: this trx is not in the - trx list of the trx system and it - never ends */ - prio_rw_lock_t latch; /*!< The latch protecting the purge - view. A purge operation must acquire an - x-latch here for the instant at which - it changes the purge view: an undo - log operation can prevent this by - obtaining an s-latch here. It also - protects state and running */ - os_event_t event; /*!< State signal event; - os_event_set() and os_event_reset() - are protected by trx_purge_t::latch - X-lock */ - ulint n_stop; /*!< Counter to track number stops */ - volatile bool running; /*!< true, if purge is active, - we check this without the latch too */ - volatile purge_state_t state; /*!< Purge coordinator thread states, - we check this in several places - without holding the latch. */ - que_t* query; /*!< The query graph which will do the - parallelized purge operation */ - read_view_t* view; /*!< The purge will not remove undo logs - which are >= this view (purge view) */ - read_view_t* prebuilt_clone; /*!< Pre-built view which is used as a - temporary clone of the oldest view in - read_view_purge_open() */ - read_view_t* prebuilt_view; /*!< Pre-built view array */ - volatile ulint n_submitted; /*!< Count of total tasks submitted - to the task queue */ - volatile ulint n_completed; /*!< Count of total tasks completed */ - - /*------------------------------*/ - /* The following two fields form the 'purge pointer' which advances - during a purge, and which is used in history list truncation */ - - purge_iter_t iter; /* Limit up to which we have read and - parsed the UNDO log records. Not - necessarily purged from the indexes. - Note that this can never be less than - the limit below, we check for this - invariant in trx0purge.cc */ - purge_iter_t limit; /* The 'purge pointer' which advances - during a purge, and which is used in - history list truncation */ -#ifdef UNIV_DEBUG - purge_iter_t done; /* Indicate 'purge pointer' which have - purged already accurately. */ -#endif /* UNIV_DEBUG */ - /*-----------------------------*/ - ibool next_stored; /*!< TRUE if the info of the next record - to purge is stored below: if yes, then - the transaction number and the undo - number of the record are stored in - purge_trx_no and purge_undo_no above */ - trx_rseg_t* rseg; /*!< Rollback segment for the next undo - record to purge */ - ulint page_no; /*!< Page number for the next undo - record to purge, page number of the - log header, if dummy record */ - ulint offset; /*!< Page offset for the next undo - record to purge, 0 if the dummy - record */ - ulint hdr_page_no; /*!< Header page of the undo log where - the next record to purge belongs */ - ulint hdr_offset; /*!< Header byte offset on the page */ - /*-----------------------------*/ - mem_heap_t* heap; /*!< Temporary storage used during a - purge: can be emptied after purge - completes */ - /*-----------------------------*/ - ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on - rseg_queue_t::trx_no. It is protected - by the bh_mutex */ - ib_mutex_t bh_mutex; /*!< Mutex protecting ib_bh */ -}; - -/** Info required to purge a record */ -struct trx_purge_rec_t { - trx_undo_rec_t* undo_rec; /*!< Record to purge */ - roll_ptr_t roll_ptr; /*!< File pointr to UNDO record */ -}; - -#ifndef UNIV_NONINL -#include "trx0purge.ic" -#endif - -#endif diff --git a/storage/xtradb/include/trx0purge.ic b/storage/xtradb/include/trx0purge.ic deleted file mode 100644 index ca9cc1fb894..00000000000 --- a/storage/xtradb/include/trx0purge.ic +++ /dev/null @@ -1,62 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0purge.ic -Purge old versions - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0undo.h" - -/********************************************************************//** -Calculates the file address of an undo log header when we have the file -address of its history list node. -@return file address of the log */ -UNIV_INLINE -fil_addr_t -trx_purge_get_log_from_hist( -/*========================*/ - fil_addr_t node_addr) /*!< in: file address of the history - list node of the log */ -{ - node_addr.boffset -= TRX_UNDO_HISTORY_NODE; - - return(node_addr); -} - -#ifdef UNIV_DEBUG -/********************************************************************//** -address of its history list node. -@return TRUE if purge_sys_t::limit <= purge_sys_t::iter*/ -UNIV_INLINE -ibool -trx_purge_check_limit(void) -/*=======================*/ -{ - ut_ad(purge_sys->limit.trx_no <= purge_sys->iter.trx_no); - - if (purge_sys->limit.trx_no == purge_sys->iter.trx_no) { - ut_ad(purge_sys->limit.undo_no <= purge_sys->iter.undo_no); - } - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - diff --git a/storage/xtradb/include/trx0rec.h b/storage/xtradb/include/trx0rec.h deleted file mode 100644 index a6e202d04e4..00000000000 --- a/storage/xtradb/include/trx0rec.h +++ /dev/null @@ -1,319 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0rec.h -Transaction undo log record - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0rec_h -#define trx0rec_h - -#include "univ.i" -#include "trx0types.h" -#include "row0types.h" -#include "mtr0mtr.h" -#include "dict0types.h" -#include "data0data.h" -#include "rem0types.h" - -#ifndef UNIV_HOTBACKUP -# include "que0types.h" - -/***********************************************************************//** -Copies the undo record to the heap. -@return own: copy of undo log record */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_rec_copy( -/*==============*/ - const trx_undo_rec_t* undo_rec, /*!< in: undo log record */ - mem_heap_t* heap); /*!< in: heap where copied */ -/**********************************************************************//** -Reads the undo log record type. -@return record type */ -UNIV_INLINE -ulint -trx_undo_rec_get_type( -/*==================*/ - const trx_undo_rec_t* undo_rec); /*!< in: undo log record */ -/**********************************************************************//** -Reads from an undo log record the record compiler info. -@return compiler info */ -UNIV_INLINE -ulint -trx_undo_rec_get_cmpl_info( -/*=======================*/ - const trx_undo_rec_t* undo_rec); /*!< in: undo log record */ -/**********************************************************************//** -Returns TRUE if an undo log record contains an extern storage field. -@return TRUE if extern */ -UNIV_INLINE -ibool -trx_undo_rec_get_extern_storage( -/*============================*/ - const trx_undo_rec_t* undo_rec); /*!< in: undo log record */ -/**********************************************************************//** -Reads the undo log record number. -@return undo no */ -UNIV_INLINE -undo_no_t -trx_undo_rec_get_undo_no( -/*=====================*/ - const trx_undo_rec_t* undo_rec); /*!< in: undo log record */ -/**********************************************************************//** -Returns the start of the undo record data area. -@return offset to the data area */ -UNIV_INLINE -ulint -trx_undo_rec_get_offset( -/*====================*/ - undo_no_t undo_no) /*!< in: undo no read from node */ - MY_ATTRIBUTE((const)); - -/**********************************************************************//** -Returns the start of the undo record data area. */ -#define trx_undo_rec_get_ptr(undo_rec, undo_no) \ - ((undo_rec) + trx_undo_rec_get_offset(undo_no)) - -/**********************************************************************//** -Reads from an undo log record the general parameters. -@return remaining part of undo log record after reading these values */ -UNIV_INTERN -byte* -trx_undo_rec_get_pars( -/*==================*/ - trx_undo_rec_t* undo_rec, /*!< in: undo log record */ - ulint* type, /*!< out: undo record type: - TRX_UNDO_INSERT_REC, ... */ - ulint* cmpl_info, /*!< out: compiler info, relevant only - for update type records */ - bool* updated_extern, /*!< out: true if we updated an - externally stored fild */ - undo_no_t* undo_no, /*!< out: undo log record number */ - table_id_t* table_id) /*!< out: table id */ - MY_ATTRIBUTE((nonnull)); -/*******************************************************************//** -Builds a row reference from an undo log record. -@return pointer to remaining part of undo record */ -UNIV_INTERN -byte* -trx_undo_rec_get_row_ref( -/*=====================*/ - byte* ptr, /*!< in: remaining part of a copy of an undo log - record, at the start of the row reference; - NOTE that this copy of the undo log record must - be preserved as long as the row reference is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /*!< in: clustered index */ - dtuple_t** ref, /*!< out, own: row reference */ - mem_heap_t* heap); /*!< in: memory heap from which the memory - needed is allocated */ -/*******************************************************************//** -Skips a row reference from an undo log record. -@return pointer to remaining part of undo record */ -UNIV_INTERN -byte* -trx_undo_rec_skip_row_ref( -/*======================*/ - byte* ptr, /*!< in: remaining part in update undo log - record, at the start of the row reference */ - dict_index_t* index); /*!< in: clustered index */ -/**********************************************************************//** -Reads from an undo log update record the system field values of the old -version. -@return remaining part of undo log record after reading these values */ -UNIV_INTERN -byte* -trx_undo_update_rec_get_sys_cols( -/*=============================*/ - byte* ptr, /*!< in: remaining part of undo - log record after reading - general parameters */ - trx_id_t* trx_id, /*!< out: trx id */ - roll_ptr_t* roll_ptr, /*!< out: roll ptr */ - ulint* info_bits); /*!< out: info bits state */ -/*******************************************************************//** -Builds an update vector based on a remaining part of an undo log record. -@return remaining part of the record, NULL if an error detected, which -means that the record is corrupted */ -UNIV_INTERN -byte* -trx_undo_update_rec_get_update( -/*===========================*/ - byte* ptr, /*!< in: remaining part in update undo log - record, after reading the row reference - NOTE that this copy of the undo log record must - be preserved as long as the update vector is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /*!< in: clustered index */ - ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC, - TRX_UNDO_UPD_DEL_REC, or - TRX_UNDO_DEL_MARK_REC; in the last case, - only trx id and roll ptr fields are added to - the update vector */ - trx_id_t trx_id, /*!< in: transaction id from this undorecord */ - roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */ - ulint info_bits,/*!< in: info bits from this undo record */ - trx_t* trx, /*!< in: transaction */ - mem_heap_t* heap, /*!< in: memory heap from which the memory - needed is allocated */ - upd_t** upd); /*!< out, own: update vector */ -/*******************************************************************//** -Builds a partial row from an update undo log record, for purge. -It contains the columns which occur as ordering in any index of the table. -Any missing columns are indicated by col->mtype == DATA_MISSING. -@return pointer to remaining part of undo record */ -UNIV_INTERN -byte* -trx_undo_rec_get_partial_row( -/*=========================*/ - byte* ptr, /*!< in: remaining part in update undo log - record of a suitable type, at the start of - the stored index columns; - NOTE that this copy of the undo log record must - be preserved as long as the partial row is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /*!< in: clustered index */ - dtuple_t** row, /*!< out, own: partial row */ - ibool ignore_prefix, /*!< in: flag to indicate if we - expect blob prefixes in undo. Used - only in the assertion. */ - mem_heap_t* heap) /*!< in: memory heap from which the memory - needed is allocated */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/***********************************************************************//** -Writes information to an undo log about an insert, update, or a delete marking -of a clustered index record. This information is used in a rollback of the -transaction and in consistent reads that must look to the history of this -transaction. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -trx_undo_report_row_operation( -/*==========================*/ - que_thr_t* thr, /*!< in: query thread */ - dict_index_t* index, /*!< in: clustered index */ - const dtuple_t* clust_entry, /*!< in: in the case of an insert, - index entry to insert into the - clustered index, otherwise NULL */ - const upd_t* update, /*!< in: in the case of an update, - the update vector, otherwise NULL */ - ulint cmpl_info, /*!< in: compiler info on secondary - index updates */ - const rec_t* rec, /*!< in: case of an update or delete - marking, the record in the clustered - index, otherwise NULL */ - const ulint* offsets, /*!< in: rec_get_offsets(rec) */ - roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the - inserted undo log record, - 0 if BTR_NO_UNDO_LOG - flag was specified */ - MY_ATTRIBUTE((nonnull(1,2,8), warn_unused_result)); -/******************************************************************//** -Copies an undo record to heap. This function can be called if we know that -the undo log record exists. -@return own: copy of the record */ -UNIV_INTERN -trx_undo_rec_t* -trx_undo_get_undo_rec_low( -/*======================*/ - roll_ptr_t roll_ptr, /*!< in: roll pointer to record */ - mem_heap_t* heap) /*!< in: memory heap where copied */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Build a previous version of a clustered index record. The caller must -hold a latch on the index page of the clustered index record. -@retval true if previous version was built, or if it was an insert -or the table has been rebuilt -@retval false if the previous version is earlier than purge_view, -which means that it may have been removed */ -UNIV_INTERN -bool -trx_undo_prev_version_build( -/*========================*/ - const rec_t* index_rec,/*!< in: clustered index record in the - index tree */ - mtr_t* index_mtr,/*!< in: mtr which contains the latch to - index_rec page and purge_view */ - const rec_t* rec, /*!< in: version of a clustered index record */ - dict_index_t* index, /*!< in: clustered index */ - ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mem_heap_t* heap, /*!< in: memory heap from which the memory - needed is allocated */ - rec_t** old_vers)/*!< out, own: previous version, or NULL if - rec is the first inserted version, or if - history data has been deleted */ - MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************//** -Parses a redo log record of adding an undo log record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_add_undo_rec( -/*========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page); /*!< in: page or NULL */ -/***********************************************************//** -Parses a redo log record of erasing of an undo page end. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_erase_page_end( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr); /*!< in: mtr or NULL */ - -#ifndef UNIV_HOTBACKUP - -/* Types of an undo log record: these have to be smaller than 16, as the -compilation info multiplied by 16 is ORed to this value in an undo log -record */ - -#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */ -#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked - record */ -#define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to - a not delete marked record; also the - fields of the record can change */ -#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields - do not change */ -#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by - this and ORed to the type above */ -#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl - to denote that we updated external - storage fields: used by purge to - free the external storage */ - -#ifndef UNIV_NONINL -#include "trx0rec.ic" -#endif - -#endif /* !UNIV_HOTBACKUP */ - -#endif /* trx0rec_h */ diff --git a/storage/xtradb/include/trx0rec.ic b/storage/xtradb/include/trx0rec.ic deleted file mode 100644 index 08704f6b821..00000000000 --- a/storage/xtradb/include/trx0rec.ic +++ /dev/null @@ -1,113 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0rec.ic -Transaction undo log record - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Reads from an undo log record the record type. -@return record type */ -UNIV_INLINE -ulint -trx_undo_rec_get_type( -/*==================*/ - const trx_undo_rec_t* undo_rec) /*!< in: undo log record */ -{ - return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1)); -} - -/**********************************************************************//** -Reads from an undo log record the record compiler info. -@return compiler info */ -UNIV_INLINE -ulint -trx_undo_rec_get_cmpl_info( -/*=======================*/ - const trx_undo_rec_t* undo_rec) /*!< in: undo log record */ -{ - return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT); -} - -/**********************************************************************//** -Returns TRUE if an undo log record contains an extern storage field. -@return TRUE if extern */ -UNIV_INLINE -ibool -trx_undo_rec_get_extern_storage( -/*============================*/ - const trx_undo_rec_t* undo_rec) /*!< in: undo log record */ -{ - if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) { - - return(TRUE); - } - - return(FALSE); -} - -/**********************************************************************//** -Reads the undo log record number. -@return undo no */ -UNIV_INLINE -undo_no_t -trx_undo_rec_get_undo_no( -/*=====================*/ - const trx_undo_rec_t* undo_rec) /*!< in: undo log record */ -{ - const byte* ptr; - - ptr = undo_rec + 3; - - return(mach_ull_read_much_compressed(ptr)); -} - -/**********************************************************************//** -Returns the start of the undo record data area. -@return offset to the data area */ -UNIV_INLINE -ulint -trx_undo_rec_get_offset( -/*====================*/ - undo_no_t undo_no) /*!< in: undo no read from node */ -{ - return(3 + mach_ull_get_much_compressed_size(undo_no)); -} - -/***********************************************************************//** -Copies the undo record to the heap. -@return own: copy of undo log record */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_rec_copy( -/*==============*/ - const trx_undo_rec_t* undo_rec, /*!< in: undo log record */ - mem_heap_t* heap) /*!< in: heap where copied */ -{ - ulint len; - - len = mach_read_from_2(undo_rec) - - ut_align_offset(undo_rec, UNIV_PAGE_SIZE); - ut_ad(len < UNIV_PAGE_SIZE); - return((trx_undo_rec_t*) mem_heap_dup(heap, undo_rec, len)); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/trx0roll.h b/storage/xtradb/include/trx0roll.h deleted file mode 100644 index b2e9d8a077f..00000000000 --- a/storage/xtradb/include/trx0roll.h +++ /dev/null @@ -1,298 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0roll.h -Transaction rollback - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0roll_h -#define trx0roll_h - -#include "univ.i" -#include "btr0types.h" -#include "trx0trx.h" -#include "trx0types.h" -#include "mtr0mtr.h" -#include "trx0sys.h" - -extern bool trx_rollback_or_clean_is_active; - -/*******************************************************************//** -Determines if this transaction is rolling back an incomplete transaction -in crash recovery. -@return TRUE if trx is an incomplete transaction that is being rolled -back in crash recovery */ -UNIV_INTERN -ibool -trx_is_recv( -/*========*/ - const trx_t* trx); /*!< in: transaction */ -/*******************************************************************//** -Returns a transaction savepoint taken at this point in time. -@return savepoint */ -UNIV_INTERN -trx_savept_t -trx_savept_take( -/*============*/ - trx_t* trx); /*!< in: transaction */ -/*******************************************************************//** -Frees an undo number array. */ -UNIV_INTERN -void -trx_undo_arr_free( -/*==============*/ - trx_undo_arr_t* arr); /*!< in: undo number array */ -/*******************************************************************//** -Returns pointer to nth element in an undo number array. -@return pointer to the nth element */ -UNIV_INLINE -trx_undo_inf_t* -trx_undo_arr_get_nth_info( -/*======================*/ - trx_undo_arr_t* arr, /*!< in: undo number array */ - ulint n); /*!< in: position */ -/********************************************************************//** -Pops the topmost record when the two undo logs of a transaction are seen -as a single stack of records ordered by their undo numbers. Inserts the -undo number of the popped undo record to the array of currently processed -undo numbers in the transaction. When the query thread finishes processing -of this undo record, it must be released with trx_undo_rec_release. -@return undo log record copied to heap, NULL if none left, or if the -undo number of the top record would be less than the limit */ -UNIV_INTERN -trx_undo_rec_t* -trx_roll_pop_top_rec_of_trx( -/*========================*/ - trx_t* trx, /*!< in: transaction */ - undo_no_t limit, /*!< in: least undo number we need */ - roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */ - mem_heap_t* heap); /*!< in: memory heap where copied */ -/********************************************************************//** -Reserves an undo log record for a query thread to undo. This should be -called if the query thread gets the undo log record not using the pop -function above. -@return TRUE if succeeded */ -UNIV_INTERN -ibool -trx_undo_rec_reserve( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - undo_no_t undo_no);/*!< in: undo number of the record */ -/*******************************************************************//** -Releases a reserved undo record. */ -UNIV_INTERN -void -trx_undo_rec_release( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - undo_no_t undo_no);/*!< in: undo number */ -/*******************************************************************//** -Rollback or clean up any incomplete transactions which were -encountered in crash recovery. If the transaction already was -committed, then we clean up a possible insert undo log. If the -transaction was not yet committed, then we roll it back. */ -UNIV_INTERN -void -trx_rollback_or_clean_recovered( -/*============================*/ - ibool all); /*!< in: FALSE=roll back dictionary transactions; - TRUE=roll back all non-PREPARED transactions */ -/*******************************************************************//** -Rollback or clean up any incomplete transactions which were -encountered in crash recovery. If the transaction already was -committed, then we clean up a possible insert undo log. If the -transaction was not yet committed, then we roll it back. -Note: this is done in a background thread. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( -/*================================================*/ - void* arg MY_ATTRIBUTE((unused))); - /*!< in: a dummy parameter required by - os_thread_create */ -/*********************************************************************//** -Creates a rollback command node struct. -@return own: rollback node struct */ -UNIV_INTERN -roll_node_t* -roll_node_create( -/*=============*/ - mem_heap_t* heap); /*!< in: mem heap where created */ -/***********************************************************//** -Performs an execution step for a rollback command node in a query graph. -@return query thread to run next, or NULL */ -UNIV_INTERN -que_thr_t* -trx_rollback_step( -/*==============*/ - que_thr_t* thr); /*!< in: query thread */ -/*******************************************************************//** -Rollback a transaction used in MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_rollback_for_mysql( -/*===================*/ - trx_t* trx) /*!< in/out: transaction */ - MY_ATTRIBUTE((nonnull)); -/*******************************************************************//** -Rollback the latest SQL statement for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_rollback_last_sql_stat_for_mysql( -/*=================================*/ - trx_t* trx) /*!< in/out: transaction */ - MY_ATTRIBUTE((nonnull)); -/*******************************************************************//** -Rollback a transaction to a given savepoint or do a complete rollback. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_rollback_to_savepoint( -/*======================*/ - trx_t* trx, /*!< in: transaction handle */ - trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if - partial rollback requested, or NULL for - complete rollback */ - MY_ATTRIBUTE((nonnull(1))); -/*******************************************************************//** -Rolls back a transaction back to a named savepoint. Modifications after the -savepoint are undone but InnoDB does NOT release the corresponding locks -which are stored in memory. If a lock is 'implicit', that is, a new inserted -row holds a lock where the lock information is carried by the trx id stored in -the row, these locks are naturally released in the rollback. Savepoints which -were set after this savepoint are deleted. -@return if no savepoint of the name found then DB_NO_SAVEPOINT, -otherwise DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_rollback_to_savepoint_for_mysql( -/*================================*/ - trx_t* trx, /*!< in: transaction handle */ - const char* savepoint_name, /*!< in: savepoint name */ - ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache - position corresponding to this - savepoint; MySQL needs this - information to remove the - binlog entries of the queries - executed after the savepoint */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Creates a named savepoint. If the transaction is not yet started, starts it. -If there is already a savepoint of the same name, this call erases that old -savepoint and replaces it with a new. Savepoints are deleted in a transaction -commit or rollback. -@return always DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_savepoint_for_mysql( -/*====================*/ - trx_t* trx, /*!< in: transaction handle */ - const char* savepoint_name, /*!< in: savepoint name */ - ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache - position corresponding to this - connection at the time of the - savepoint */ - MY_ATTRIBUTE((nonnull)); -/*******************************************************************//** -Releases a named savepoint. Savepoints which -were set after this savepoint are deleted. -@return if no savepoint of the name found then DB_NO_SAVEPOINT, -otherwise DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_release_savepoint_for_mysql( -/*============================*/ - trx_t* trx, /*!< in: transaction handle */ - const char* savepoint_name) /*!< in: savepoint name */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*******************************************************************//** -Frees savepoint structs starting from savep. */ -UNIV_INTERN -void -trx_roll_savepoints_free( -/*=====================*/ - trx_t* trx, /*!< in: transaction handle */ - trx_named_savept_t* savep); /*!< in: free all savepoints > this one; - if this is NULL, free all savepoints - of trx */ - -/** A cell of trx_undo_arr_t; used during a rollback and a purge */ -struct trx_undo_inf_t{ - ibool in_use; /*!< true if cell is being used */ - trx_id_t trx_no; /*!< transaction number: not defined during - a rollback */ - undo_no_t undo_no;/*!< undo number of an undo record */ -}; - -/** During a rollback and a purge, undo numbers of undo records currently being -processed are stored in this array */ - -struct trx_undo_arr_t{ - ulint n_cells; /*!< number of cells in the array */ - ulint n_used; /*!< number of cells in use */ - trx_undo_inf_t* infos; /*!< the array of undo infos */ - mem_heap_t* heap; /*!< memory heap from which allocated */ -}; - -/** Rollback node states */ -enum roll_node_state { - ROLL_NODE_NONE = 0, /*!< Unknown state */ - ROLL_NODE_SEND, /*!< about to send a rollback signal to - the transaction */ - ROLL_NODE_WAIT /*!< rollback signal sent to the - transaction, waiting for completion */ -}; - -/** Rollback command node in a query graph */ -struct roll_node_t{ - que_common_t common; /*!< node type: QUE_NODE_ROLLBACK */ - enum roll_node_state state; /*!< node execution state */ - ibool partial;/*!< TRUE if we want a partial - rollback */ - trx_savept_t savept; /*!< savepoint to which to - roll back, in the case of a - partial rollback */ - que_thr_t* undo_thr;/*!< undo query graph */ -}; - -/** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */ -struct trx_named_savept_t{ - char* name; /*!< savepoint name */ - trx_savept_t savept; /*!< the undo number corresponding to - the savepoint */ - ib_int64_t mysql_binlog_cache_pos; - /*!< the MySQL binlog cache position - corresponding to this savepoint, not - defined if the MySQL binlogging is not - enabled */ - UT_LIST_NODE_T(trx_named_savept_t) - trx_savepoints; /*!< the list of savepoints of a - transaction */ -}; - -#ifndef UNIV_NONINL -#include "trx0roll.ic" -#endif - -#endif diff --git a/storage/xtradb/include/trx0roll.ic b/storage/xtradb/include/trx0roll.ic deleted file mode 100644 index 178e9bb730a..00000000000 --- a/storage/xtradb/include/trx0roll.ic +++ /dev/null @@ -1,40 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0roll.ic -Transaction rollback - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -/*******************************************************************//** -Returns pointer to nth element in an undo number array. -@return pointer to the nth element */ -UNIV_INLINE -trx_undo_inf_t* -trx_undo_arr_get_nth_info( -/*======================*/ - trx_undo_arr_t* arr, /*!< in: undo number array */ - ulint n) /*!< in: position */ -{ - ut_ad(arr); - ut_ad(n < arr->n_cells); - - return(arr->infos + n); -} diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h deleted file mode 100644 index e2853df7045..00000000000 --- a/storage/xtradb/include/trx0rseg.h +++ /dev/null @@ -1,231 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0rseg.h -Rollback segment - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0rseg_h -#define trx0rseg_h - -#include "univ.i" -#include "trx0types.h" -#include "trx0sys.h" -#include "ut0bh.h" - -/******************************************************************//** -Gets a rollback segment header. -@return rollback segment header, page x-latched */ -UNIV_INLINE -trx_rsegf_t* -trx_rsegf_get( -/*==========*/ - ulint space, /*!< in: space where placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the header */ - mtr_t* mtr); /*!< in: mtr */ -/******************************************************************//** -Gets a newly created rollback segment header. -@return rollback segment header, page x-latched */ -UNIV_INLINE -trx_rsegf_t* -trx_rsegf_get_new( -/*==============*/ - ulint space, /*!< in: space where placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the header */ - mtr_t* mtr); /*!< in: mtr */ -/***************************************************************//** -Gets the file page number of the nth undo log slot. -@return page number of the undo log segment */ -UNIV_INLINE -ulint -trx_rsegf_get_nth_undo( -/*===================*/ - trx_rsegf_t* rsegf, /*!< in: rollback segment header */ - ulint n, /*!< in: index of slot */ - mtr_t* mtr); /*!< in: mtr */ -/***************************************************************//** -Sets the file page number of the nth undo log slot. */ -UNIV_INLINE -void -trx_rsegf_set_nth_undo( -/*===================*/ - trx_rsegf_t* rsegf, /*!< in: rollback segment header */ - ulint n, /*!< in: index of slot */ - ulint page_no,/*!< in: page number of the undo log segment */ - mtr_t* mtr); /*!< in: mtr */ -/****************************************************************//** -Looks for a free slot for an undo log segment. -@return slot index or ULINT_UNDEFINED if not found */ -UNIV_INLINE -ulint -trx_rsegf_undo_find_free( -/*=====================*/ - trx_rsegf_t* rsegf, /*!< in: rollback segment header */ - mtr_t* mtr); /*!< in: mtr */ -/******************************************************************//** -Looks for a rollback segment, based on the rollback segment id. -@return rollback segment */ -UNIV_INLINE -trx_rseg_t* -trx_rseg_get_on_id( -/*===============*/ - ulint id); /*!< in: rollback segment id */ -/****************************************************************//** -Creates a rollback segment header. This function is called only when -a new rollback segment is created in the database. -@return page number of the created segment, FIL_NULL if fail */ -UNIV_INTERN -ulint -trx_rseg_header_create( -/*===================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint max_size, /*!< in: max size in pages */ - ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */ - mtr_t* mtr); /*!< in: mtr */ -/*********************************************************************//** -Creates the memory copies for rollback segments and initializes the -rseg array in trx_sys at a database startup. */ -UNIV_INTERN -void -trx_rseg_array_init( -/*================*/ - trx_sysf_t* sys_header, /*!< in/out: trx system header */ - ib_bh_t* ib_bh, /*!< in: rseg queue */ - mtr_t* mtr); /*!< in/out: mtr */ -/*************************************************************************** -Free's an instance of the rollback segment in memory. */ -UNIV_INTERN -void -trx_rseg_mem_free( -/*==============*/ - trx_rseg_t* rseg); /*!< in, own: instance to free */ - -/** Create a rollback segment. -@param[in] space undo tablespace ID -@return pointer to new rollback segment -@retval NULL on failure */ -UNIV_INTERN -trx_rseg_t* -trx_rseg_create(ulint space); - -/******************************************************************** -Get the number of unique rollback tablespaces in use except space id 0. -The last space id will be the sentinel value ULINT_UNDEFINED. The array -will be sorted on space id. Note: space_ids should have have space for -TRX_SYS_N_RSEGS + 1 elements. -@return number of unique rollback tablespaces in use. */ -UNIV_INTERN -ulint -trx_rseg_get_n_undo_tablespaces( -/*============================*/ - ulint* space_ids); /*!< out: array of space ids of - UNDO tablespaces */ -/* Number of undo log slots in a rollback segment file copy */ -#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16) - -/* Maximum number of transactions supported by a single rollback segment */ -#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2) - -/* The rollback segment memory object */ -struct trx_rseg_t{ - /*--------------------------------------------------------*/ - ulint id; /*!< rollback segment id == the index of - its slot in the trx system file copy */ - ib_prio_mutex_t mutex; /*!< mutex protecting the fields in this - struct except id, which is constant */ - ulint space; /*!< space where the rollback segment is - header is placed */ - ulint zip_size;/* compressed page size of space - in bytes, or 0 for uncompressed spaces */ - ulint page_no;/* page number of the rollback segment - header */ - ulint max_size;/* maximum allowed size in pages */ - ulint curr_size;/* current size in pages */ - /*--------------------------------------------------------*/ - /* Fields for update undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list; - /* List of update undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached; - /* List of update undo log segments - cached for fast reuse */ - /*--------------------------------------------------------*/ - /* Fields for insert undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list; - /* List of insert undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached; - /* List of insert undo log segments - cached for fast reuse */ - /*--------------------------------------------------------*/ - ulint last_page_no; /*!< Page number of the last not yet - purged log header in the history list; - FIL_NULL if all list purged */ - ulint last_offset; /*!< Byte offset of the last not yet - purged log header */ - trx_id_t last_trx_no; /*!< Transaction number of the last not - yet purged log */ - ibool last_del_marks; /*!< TRUE if the last not yet purged log - needs purging */ -}; - -/** For prioritising the rollback segments for purge. */ -struct rseg_queue_t { - trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */ - trx_rseg_t* rseg; /*!< Rollback segment */ -}; - -/* Undo log segment slot in a rollback segment header */ -/*-------------------------------------------------------------*/ -#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of - an undo log segment */ -/*-------------------------------------------------------------*/ -/* Slot size */ -#define TRX_RSEG_SLOT_SIZE 4 - -/* The offset of the rollback segment header on its page */ -#define TRX_RSEG FSEG_PAGE_DATA - -/* Transaction rollback segment header */ -/*-------------------------------------------------------------*/ -#define TRX_RSEG_MAX_SIZE 0 /* Maximum allowed size for rollback - segment in pages */ -#define TRX_RSEG_HISTORY_SIZE 4 /* Number of file pages occupied - by the logs in the history list */ -#define TRX_RSEG_HISTORY 8 /* The update undo logs for committed - transactions */ -#define TRX_RSEG_FSEG_HEADER (8 + FLST_BASE_NODE_SIZE) - /* Header for the file segment where - this page is placed */ -#define TRX_RSEG_UNDO_SLOTS (8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE) - /* Undo log segment slots */ -/*-------------------------------------------------------------*/ - -#ifndef UNIV_NONINL -#include "trx0rseg.ic" -#endif - -#endif diff --git a/storage/xtradb/include/trx0rseg.ic b/storage/xtradb/include/trx0rseg.ic deleted file mode 100644 index 30743da9b8c..00000000000 --- a/storage/xtradb/include/trx0rseg.ic +++ /dev/null @@ -1,167 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0rseg.ic -Rollback segment - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "srv0srv.h" -#include "mtr0log.h" -#include "trx0sys.h" - -/******************************************************************//** -Gets a rollback segment header. -@return rollback segment header, page x-latched */ -UNIV_INLINE -trx_rsegf_t* -trx_rsegf_get( -/*==========*/ - ulint space, /*!< in: space where placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the header */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block; - trx_rsegf_t* header; - - block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_RSEG_HEADER); - - header = TRX_RSEG + buf_block_get_frame(block); - - return(header); -} - -/******************************************************************//** -Gets a newly created rollback segment header. -@return rollback segment header, page x-latched */ -UNIV_INLINE -trx_rsegf_t* -trx_rsegf_get_new( -/*==============*/ - ulint space, /*!< in: space where placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the header */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block; - trx_rsegf_t* header; - - block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW); - - header = TRX_RSEG + buf_block_get_frame(block); - - return(header); -} - -/***************************************************************//** -Gets the file page number of the nth undo log slot. -@return page number of the undo log segment */ -UNIV_INLINE -ulint -trx_rsegf_get_nth_undo( -/*===================*/ - trx_rsegf_t* rsegf, /*!< in: rollback segment header */ - ulint n, /*!< in: index of slot */ - mtr_t* mtr) /*!< in: mtr */ -{ - if (n >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: trying to get slot %lu of rseg\n", - (ulong) n); - ut_error; - } - - return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS - + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr)); -} - -/***************************************************************//** -Sets the file page number of the nth undo log slot. */ -UNIV_INLINE -void -trx_rsegf_set_nth_undo( -/*===================*/ - trx_rsegf_t* rsegf, /*!< in: rollback segment header */ - ulint n, /*!< in: index of slot */ - ulint page_no,/*!< in: page number of the undo log segment */ - mtr_t* mtr) /*!< in: mtr */ -{ - if (n >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: trying to set slot %lu of rseg\n", - (ulong) n); - ut_error; - } - - mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE, - page_no, MLOG_4BYTES, mtr); -} - -/****************************************************************//** -Looks for a free slot for an undo log segment. -@return slot index or ULINT_UNDEFINED if not found */ -UNIV_INLINE -ulint -trx_rsegf_undo_find_free( -/*=====================*/ - trx_rsegf_t* rsegf, /*!< in: rollback segment header */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint i; - ulint page_no; - - for (i = 0; -#ifndef UNIV_DEBUG - i < TRX_RSEG_N_SLOTS; -#else - i < (trx_rseg_n_slots_debug ? trx_rseg_n_slots_debug : TRX_RSEG_N_SLOTS); -#endif - i++) { - - page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr); - - if (page_no == FIL_NULL) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/******************************************************************//** -Looks for a rollback segment, based on the rollback segment id. -@return rollback segment */ -UNIV_INLINE -trx_rseg_t* -trx_rseg_get_on_id( -/*===============*/ - ulint id) /*!< in: rollback segment id */ -{ - ut_a(id < TRX_SYS_N_RSEGS); - - return(trx_sys->rseg_array[id]); -} - diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h deleted file mode 100644 index 9bfffd09532..00000000000 --- a/storage/xtradb/include/trx0sys.h +++ /dev/null @@ -1,756 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0sys.h -Transaction system - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0sys_h -#define trx0sys_h - -#include "univ.i" - -#include "trx0types.h" -#include "fsp0types.h" -#include "fil0fil.h" -#include "buf0buf.h" -#ifndef UNIV_HOTBACKUP -#include "mtr0mtr.h" -#include "ut0byte.h" -#include "mem0mem.h" -#include "sync0sync.h" -#include "ut0lst.h" -#include "ut0bh.h" -#include "read0types.h" -#include "page0types.h" -#include "ut0bh.h" -#ifdef WITH_WSREP -#include "trx0xa.h" -#endif /* WITH_WSREP */ - -typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t; - -/** In a MySQL replication slave, in crash recovery we store the master log -file name and position here. */ -/* @{ */ -/** Master binlog file name */ -extern char trx_sys_mysql_master_log_name[]; -/** Master binlog file position. We have successfully got the updates -up to this position. -1 means that no crash recovery was needed, or -there was no master log position info inside InnoDB.*/ -extern ib_int64_t trx_sys_mysql_master_log_pos; -/* @} */ - -/** If this MySQL server uses binary logging, after InnoDB has been inited -and if it has done a crash recovery, we store the binlog file name and position -here. */ -/* @{ */ -/** Binlog file name */ -extern char trx_sys_mysql_bin_log_name[]; -/** Binlog file position, or -1 if unknown */ -extern ib_int64_t trx_sys_mysql_bin_log_pos; -/* @} */ - -/** The transaction system */ -extern trx_sys_t* trx_sys; - -/***************************************************************//** -Checks if a page address is the trx sys header page. -@return TRUE if trx sys header page */ -UNIV_INLINE -ibool -trx_sys_hdr_page( -/*=============*/ - ulint space, /*!< in: space */ - ulint page_no);/*!< in: page number */ -/*****************************************************************//** -Creates and initializes the central memory structures for the transaction -system. This is called when the database is started. -@return min binary heap of rsegs to purge */ -UNIV_INTERN -ib_bh_t* -trx_sys_init_at_db_start(void); -/*==========================*/ -/*****************************************************************//** -Creates the trx_sys instance and initializes ib_bh and mutex. */ -UNIV_INTERN -void -trx_sys_create(void); -/*================*/ -/*****************************************************************//** -Creates and initializes the transaction system at the database creation. */ -UNIV_INTERN -void -trx_sys_create_sys_pages(void); -/*==========================*/ -/****************************************************************//** -Looks for a free slot for a rollback segment in the trx system file copy. -@return slot index or ULINT_UNDEFINED if not found */ -UNIV_INTERN -ulint -trx_sysf_rseg_find_free( -/*====================*/ - mtr_t* mtr); /*!< in: mtr */ -/***************************************************************//** -Gets the pointer in the nth slot of the rseg array. -@return pointer to rseg object, NULL if slot not in use */ -UNIV_INLINE -trx_rseg_t* -trx_sys_get_nth_rseg( -/*=================*/ - trx_sys_t* sys, /*!< in: trx system */ - ulint n); /*!< in: index of slot */ -/**********************************************************************//** -Gets a pointer to the transaction system file copy and x-locks its page. -@return pointer to system file copy, page x-locked */ -UNIV_INLINE -trx_sysf_t* -trx_sysf_get( -/*=========*/ - mtr_t* mtr); /*!< in: mtr */ -/*****************************************************************//** -Gets the space of the nth rollback segment slot in the trx system -file copy. -@return space id */ -UNIV_INLINE -ulint -trx_sysf_rseg_get_space( -/*====================*/ - trx_sysf_t* sys_header, /*!< in: trx sys file copy */ - ulint i, /*!< in: slot index == rseg id */ - mtr_t* mtr); /*!< in: mtr */ -/*****************************************************************//** -Gets the page number of the nth rollback segment slot in the trx system -file copy. -@return page number, FIL_NULL if slot unused */ -UNIV_INLINE -ulint -trx_sysf_rseg_get_page_no( -/*======================*/ - trx_sysf_t* sys_header, /*!< in: trx sys file copy */ - ulint i, /*!< in: slot index == rseg id */ - mtr_t* mtr); /*!< in: mtr */ -/*****************************************************************//** -Sets the space id of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -void -trx_sysf_rseg_set_space( -/*====================*/ - trx_sysf_t* sys_header, /*!< in: trx sys file copy */ - ulint i, /*!< in: slot index == rseg id */ - ulint space, /*!< in: space id */ - mtr_t* mtr); /*!< in: mtr */ -/*****************************************************************//** -Sets the page number of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -void -trx_sysf_rseg_set_page_no( -/*======================*/ - trx_sysf_t* sys_header, /*!< in: trx sys file copy */ - ulint i, /*!< in: slot index == rseg id */ - ulint page_no, /*!< in: page number, FIL_NULL if - the slot is reset to unused */ - mtr_t* mtr); /*!< in: mtr */ -/*****************************************************************//** -Allocates a new transaction id. -@return new, allocated trx id */ -UNIV_INLINE -trx_id_t -trx_sys_get_new_trx_id(void); -/*========================*/ -/*****************************************************************//** -Determines the maximum transaction id. -@return maximum currently allocated trx id; will be stale after the -next call to trx_sys_get_new_trx_id() */ -UNIV_INLINE -trx_id_t -trx_sys_get_max_trx_id(void); -/*========================*/ - -/*************************************************************//** -Find a slot for a given trx ID in a descriptors array. -@return: slot pointer */ -UNIV_INLINE -trx_id_t* -trx_find_descriptor( -/*================*/ - const trx_id_t* descriptors, /*!< in: descriptors array */ - ulint n_descr, /*!< in: array size */ - trx_id_t trx_id); /*!< in: trx pointer */ - -#ifdef UNIV_DEBUG -/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */ -extern uint trx_rseg_n_slots_debug; -#endif - -/*****************************************************************//** -Writes a trx id to an index page. In case that the id size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_trx_id( -/*=============*/ - byte* ptr, /*!< in: pointer to memory where written */ - trx_id_t id); /*!< in: id */ -/*****************************************************************//** -Reads a trx id from an index page. In case that the id size changes in -some future version, this function should be used instead of -mach_read_... -@return id */ -UNIV_INLINE -trx_id_t -trx_read_trx_id( -/*============*/ - const byte* ptr); /*!< in: pointer to memory from where to read */ -/****************************************************************//** -Looks for the trx instance with the given id in the rw trx_list. -The caller must be holding trx_sys->mutex. -@return the trx handle or NULL if not found; -the pointer must not be dereferenced unless lock_sys->mutex was -acquired before calling this function and is still being held */ -UNIV_INLINE -trx_t* -trx_get_rw_trx_by_id( -/*=================*/ - trx_id_t trx_id);/*!< in: trx id to search for */ -/****************************************************************//** -Returns the minimum trx id in rw trx list. This is the smallest id for which -the trx can possibly be active. (But, you must look at the trx->state to -find out if the minimum trx id transaction itself is active, or already -committed.) -@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */ -UNIV_INLINE -trx_id_t -trx_rw_min_trx_id(void); -/*===================*/ -/****************************************************************//** -Returns pointer to a transaction instance if a rw transaction with the given id -is active. Caller must hold trx_sys->mutex. If the caller is not holding -lock_sys->mutex, the transaction may already have been committed. -@return transaction instance if active, or NULL; -the pointer must not be dereferenced unless lock_sys->mutex was -acquired before calling this function and is still being held */ -UNIV_INLINE -trx_t* -trx_rw_get_active_trx_by_id( -/*========================*/ - trx_id_t trx_id, /*!< in: trx id of the transaction */ - ibool* corrupt); /*!< in: NULL or pointer to a flag - that will be set if corrupt */ -/****************************************************************//** -Checks if a rw transaction with the given id is active. Caller must hold -trx_sys->mutex. If the caller is not holding lock_sys->mutex, the -transaction may already have been committed. -@return true if rw transaction it with a given id is active. */ -UNIV_INLINE -bool -trx_rw_is_active_low( -/*=================*/ - trx_id_t trx_id, /*!< in: trx id of the transaction */ - ibool* corrupt); /*!< in: NULL or pointer to a flag - that will be set if corrupt */ -/****************************************************************//** -Checks if a rw transaction with the given id is active. If the caller is -not holding lock_sys->mutex, the transaction may already have been -committed. -@return true if rw transaction it with a given id is active. */ -UNIV_INLINE -bool -trx_rw_is_active( -/*=============*/ - trx_id_t trx_id, /*!< in: trx id of the transaction */ - ibool* corrupt); /*!< in: NULL or pointer to a flag - that will be set if corrupt */ -#ifdef UNIV_DEBUG -/****************************************************************//** -Checks whether a trx is in one of rw_trx_list or ro_trx_list. -@return TRUE if is in */ -UNIV_INTERN -ibool -trx_in_trx_list( -/*============*/ - const trx_t* in_trx) /*!< in: transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* UNIV_DEBUG */ -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG -/***********************************************************//** -Assert that a transaction has been recovered. -@return TRUE */ -UNIV_INLINE -ibool -trx_assert_recovered( -/*=================*/ - trx_id_t trx_id) /*!< in: transaction identifier */ - MY_ATTRIBUTE((warn_unused_result)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ -/*****************************************************************//** -Updates the offset information about the end of the MySQL binlog entry -which corresponds to the transaction just being committed. In a MySQL -replication slave updates the latest master binlog position up to which -replication has proceeded. */ -UNIV_INTERN -void -trx_sys_update_mysql_binlog_offset( -/*===============================*/ - const char* file_name,/*!< in: MySQL log file name */ - ib_int64_t offset, /*!< in: position in that log file */ - ulint field, /*!< in: offset of the MySQL log info field in - the trx sys header */ -#ifdef WITH_WSREP - trx_sysf_t* sys_header, /*!< in: trx sys header */ -#endif /* WITH_WSREP */ - mtr_t* mtr); /*!< in: mtr */ -/*****************************************************************//** -Prints to stderr the MySQL binlog offset info in the trx system header if -the magic number shows it valid. */ -UNIV_INTERN -void -trx_sys_print_mysql_binlog_offset(void); -/*===================================*/ -#ifdef WITH_WSREP -/** Update WSREP checkpoint XID in sys header. */ -void -trx_sys_update_wsrep_checkpoint( - const XID* xid, /*!< in: WSREP XID */ - trx_sysf_t* sys_header, /*!< in: sys_header */ - mtr_t* mtr); /*!< in: mtr */ - -/** Read WSREP checkpoint XID from sys header. -@return true on success, false on error. */ -bool -trx_sys_read_wsrep_checkpoint( - XID* xid); /*!< out: WSREP XID */ -#endif /* WITH_WSREP */ -/*****************************************************************//** -Prints to stderr the MySQL master log offset info in the trx system header if -the magic number shows it valid. */ -UNIV_INTERN -void -trx_sys_print_mysql_master_log_pos(void); -/*====================================*/ -/*****************************************************************//** -Initializes the tablespace tag system. */ -UNIV_INTERN -void -trx_sys_file_format_init(void); -/*==========================*/ -/*****************************************************************//** -Closes the tablespace tag system. */ -UNIV_INTERN -void -trx_sys_file_format_close(void); -/*===========================*/ -/********************************************************************//** -Tags the system table space with minimum format id if it has not been -tagged yet. -WARNING: This function is only called during the startup and AFTER the -redo log application during recovery has finished. */ -UNIV_INTERN -void -trx_sys_file_format_tag_init(void); -/*==============================*/ -/*****************************************************************//** -Shutdown/Close the transaction system. */ -UNIV_INTERN -void -trx_sys_close(void); -/*===============*/ -/*****************************************************************//** -Get the name representation of the file format from its id. -@return pointer to the name */ -UNIV_INTERN -const char* -trx_sys_file_format_id_to_name( -/*===========================*/ - const ulint id); /*!< in: id of the file format */ -/*****************************************************************//** -Set the file format id unconditionally except if it's already the -same value. -@return TRUE if value updated */ -UNIV_INTERN -ibool -trx_sys_file_format_max_set( -/*========================*/ - ulint format_id, /*!< in: file format id */ - const char** name); /*!< out: max file format name or - NULL if not needed. */ -/********************************************************************* -Creates the rollback segments -@return number of rollback segments that are active. */ -UNIV_INTERN -ulint -trx_sys_create_rsegs( -/*=================*/ - ulint n_spaces, /*!< number of tablespaces for UNDO logs */ - ulint n_rsegs); /*!< number of rollback segments to create */ -/*****************************************************************//** -Get the number of transaction in the system, independent of their state. -@return count of transactions in trx_sys_t::trx_list */ -UNIV_INLINE -ulint -trx_sys_get_n_rw_trx(void); -/*======================*/ - -/********************************************************************* -Check if there are any active (non-prepared) transactions. -@return total number of active transactions or 0 if none */ -UNIV_INTERN -ulint -trx_sys_any_active_transactions(void); -/*=================================*/ -#else /* !UNIV_HOTBACKUP */ -/*****************************************************************//** -Prints to stderr the MySQL binlog info in the system header if the -magic number shows it valid. */ -UNIV_INTERN -void -trx_sys_print_mysql_binlog_offset_from_page( -/*========================================*/ - const byte* page); /*!< in: buffer containing the trx - system header page, i.e., page number - TRX_SYS_PAGE_NO in the tablespace */ -/*****************************************************************//** -Reads the file format id from the first system table space file. -Even if the call succeeds and returns TRUE, the returned format id -may be ULINT_UNDEFINED signalling that the format id was not present -in the data file. -@return TRUE if call succeeds */ -UNIV_INTERN -ibool -trx_sys_read_file_format_id( -/*========================*/ - const char *pathname, /*!< in: pathname of the first system - table space file */ - ulint *format_id); /*!< out: file format of the system table - space */ -/*****************************************************************//** -Reads the file format id from the given per-table data file. -@return TRUE if call succeeds */ -UNIV_INTERN -ibool -trx_sys_read_pertable_file_format_id( -/*=================================*/ - const char *pathname, /*!< in: pathname of a per-table - datafile */ - ulint *format_id); /*!< out: file format of the per-table - data file */ -#endif /* !UNIV_HOTBACKUP */ -/*****************************************************************//** -Get the name representation of the file format from its id. -@return pointer to the max format name */ -UNIV_INTERN -const char* -trx_sys_file_format_max_get(void); -/*=============================*/ -/*****************************************************************//** -Check for the max file format tag stored on disk. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -trx_sys_file_format_max_check( -/*==========================*/ - ulint max_format_id); /*!< in: the max format id to check */ -/********************************************************************//** -Update the file format tag in the system tablespace only if the given -format id is greater than the known max id. -@return TRUE if format_id was bigger than the known max id */ -UNIV_INTERN -ibool -trx_sys_file_format_max_upgrade( -/*============================*/ - const char** name, /*!< out: max file format name */ - ulint format_id); /*!< in: file format identifier */ -/*****************************************************************//** -Get the name representation of the file format from its id. -@return pointer to the name */ -UNIV_INTERN -const char* -trx_sys_file_format_id_to_name( -/*===========================*/ - const ulint id); /*!< in: id of the file format */ - -#ifdef UNIV_DEBUG -/*************************************************************//** -Validate the trx_sys_t::trx_list. */ -UNIV_INTERN -ibool -trx_sys_validate_trx_list(void); -/*===========================*/ -#endif /* UNIV_DEBUG */ - -/* The automatically created system rollback segment has this id */ -#define TRX_SYS_SYSTEM_RSEG_ID 0 - -/* Space id and page no where the trx system file copy resides */ -#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */ -#include "fsp0fsp.h" -#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO - -/* The offset of the transaction system header on the page */ -#define TRX_SYS FSEG_PAGE_DATA - -/** Transaction system header */ -/*------------------------------------------------------------- @{ */ -#define TRX_SYS_TRX_ID_STORE 0 /*!< the maximum trx id or trx - number modulo - TRX_SYS_TRX_ID_UPDATE_MARGIN - written to a file page by any - transaction; the assignment of - transaction ids continues from - this number rounded up by - TRX_SYS_TRX_ID_UPDATE_MARGIN - plus - TRX_SYS_TRX_ID_UPDATE_MARGIN - when the database is - started */ -#define TRX_SYS_FSEG_HEADER 8 /*!< segment header for the - tablespace segment the trx - system is created into */ -#define TRX_SYS_RSEGS (8 + FSEG_HEADER_SIZE) - /*!< the start of the array of - rollback segment specification - slots */ -/*------------------------------------------------------------- @} */ - -/* Max number of rollback segments: the number of segment specification slots -in the transaction system array; rollback segment id must fit in one (signed) -byte, therefore 128; each slot is currently 8 bytes in size. If you want -to raise the level to 256 then you will need to fix some assertions that -impose the 7 bit restriction. e.g., mach_write_to_3() */ -#define TRX_SYS_N_RSEGS 128 -/* Originally, InnoDB defined TRX_SYS_N_RSEGS as 256 but created only one -rollback segment. It initialized some arrays with this number of entries. -We must remember this limit in order to keep file compatibility. */ -#define TRX_SYS_OLD_N_RSEGS 256 - -/** Maximum length of MySQL binlog file name, in bytes. -@see trx_sys_mysql_master_log_name -@see trx_sys_mysql_bin_log_name */ -#define TRX_SYS_MYSQL_LOG_NAME_LEN 512 -/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */ -#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344 - -#if UNIV_PAGE_SIZE_MIN < 4096 -# error "UNIV_PAGE_SIZE_MIN < 4096" -#endif -/** The offset of the MySQL replication info in the trx system header; -this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ -#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000) - -/** The offset of the MySQL binlog offset info in the trx system header */ -#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000) -#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /*!< magic number which is - TRX_SYS_MYSQL_LOG_MAGIC_N - if we have valid data in the - MySQL binlog info */ -#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /*!< high 4 bytes of the offset - within that file */ -#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /*!< low 4 bytes of the offset - within that file */ -#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */ - -#ifdef WITH_WSREP -/* The offset to WSREP XID headers */ -#define TRX_SYS_WSREP_XID_INFO (UNIV_PAGE_SIZE - 3500) -#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0 -#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265 - -/* XID field: formatID, gtrid_len, bqual_len, xid_data */ -#define TRX_SYS_WSREP_XID_LEN (4 + 4 + 4 + XIDDATASIZE) -#define TRX_SYS_WSREP_XID_FORMAT 4 -#define TRX_SYS_WSREP_XID_GTRID_LEN 8 -#define TRX_SYS_WSREP_XID_BQUAL_LEN 12 -#define TRX_SYS_WSREP_XID_DATA 16 -#endif /* WITH_WSREP*/ - -/** Doublewrite buffer */ -/* @{ */ -/** The offset of the doublewrite buffer header on the trx system header page */ -#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200) -/*-------------------------------------------------------------*/ -#define TRX_SYS_DOUBLEWRITE_FSEG 0 /*!< fseg header of the fseg - containing the doublewrite - buffer */ -#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE - /*!< 4-byte magic number which - shows if we already have - created the doublewrite - buffer */ -#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE) - /*!< page number of the - first page in the first - sequence of 64 - (= FSP_EXTENT_SIZE) consecutive - pages in the doublewrite - buffer */ -#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE) - /*!< page number of the - first page in the second - sequence of 64 consecutive - pages in the doublewrite - buffer */ -#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /*!< we repeat - TRX_SYS_DOUBLEWRITE_MAGIC, - TRX_SYS_DOUBLEWRITE_BLOCK1, - TRX_SYS_DOUBLEWRITE_BLOCK2 - so that if the trx sys - header is half-written - to disk, we still may - be able to recover the - information */ -/** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N, -we must reset the doublewrite buffer, because starting from 4.1.x the -space id of a data page is stored into -FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */ -#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE) - -/*-------------------------------------------------------------*/ -/** Contents of TRX_SYS_DOUBLEWRITE_MAGIC */ -#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855 -/** Contents of TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED */ -#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386 - -/** Size of the doublewrite block in pages */ -#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE -/* @} */ - -/** File format tag */ -/* @{ */ -/** The offset of the file format tag on the trx system header page -(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */ -#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16) - -/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format -identifier is added to this constant. */ -#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL -/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */ -#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL -/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format -identifier is added to this 64-bit constant. */ -#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N \ - ((ib_uint64_t) TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH << 32 \ - | TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW) -/* @} */ - -#define TRX_DESCR_ARRAY_INITIAL_SIZE 1000 - -#ifndef UNIV_HOTBACKUP -/** The transaction system central memory data structure. */ -struct trx_sys_t{ - - ib_mutex_t mutex; /*!< mutex protecting most fields in - this structure except when noted - otherwise */ - ulint n_prepared_trx; /*!< Number of transactions currently - in the XA PREPARED state */ - ulint n_prepared_recovered_trx; /*!< Number of transactions - currently in XA PREPARED state that are - also recovered. Such transactions cannot - be added during runtime. They can only - occur after recovery if mysqld crashed - while there were XA PREPARED - transactions. We disable query cache - if such transactions exist. */ - trx_id_t max_trx_id; /*!< The smallest number not yet - assigned as a transaction id or - transaction number */ - char pad1[CACHE_LINE_SIZE]; /*!< Ensure max_trx_id does not share - cache line with other fields. */ - trx_id_t* descriptors; /*!< Array of trx descriptors */ - ulint descr_n_max; /*!< The current size of the descriptors - array. */ - char pad2[CACHE_LINE_SIZE]; /*!< Ensure static descriptor fields - do not share cache lines with - descr_n_used */ - ulint descr_n_used; /*!< Number of used elements in the - descriptors array. */ - char pad3[CACHE_LINE_SIZE]; /*!< Ensure descriptors do not share - cache line with other fields */ -#ifdef UNIV_DEBUG - trx_id_t rw_max_trx_id; /*!< Max trx id of read-write transactions - which exist or existed */ -#endif - trx_list_t rw_trx_list; /*!< List of active and committed in - memory read-write transactions, sorted - on trx id, biggest first. Recovered - transactions are always on this list. */ - char pad4[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not - share cache line with other fields */ - trx_list_t ro_trx_list; /*!< List of active and committed in - memory read-only transactions, sorted - on trx id, biggest first. NOTE: - The order for read-only transactions - is not necessary. We should exploit - this and increase concurrency during - add/remove. */ - char pad5[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not - share cache line with other fields */ - trx_list_t mysql_trx_list; /*!< List of transactions created - for MySQL. All transactions on - ro_trx_list are on mysql_trx_list. The - rw_trx_list can contain system - transactions and recovered transactions - that will not be in the mysql_trx_list. - There can be active non-locking - auto-commit read only transactions that - are on this list but not on ro_trx_list. - mysql_trx_list may additionally contain - transactions that have not yet been - started in InnoDB. */ - char pad6[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not - share cache line with other fields */ - trx_list_t trx_serial_list; - /*!< trx->no ordered List of - transactions in either TRX_PREPARED or - TRX_ACTIVE which have already been - assigned a serialization number */ - char pad7[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not - share cache line with other fields */ - trx_rseg_t* const rseg_array[TRX_SYS_N_RSEGS]; - /*!< Pointer array to rollback - segments; NULL if slot not in use; - created and destroyed in - single-threaded mode; not protected - by any mutex, because it is read-only - during multi-threaded operation */ - ulint rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY - list (update undo logs for committed - transactions), protected by - rseg->mutex */ - UT_LIST_BASE_NODE_T(read_view_t) view_list; - /*!< List of read views sorted - on trx no, biggest first */ -}; - -/** When a trx id which is zero modulo this number (which must be a power of -two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system -page is updated */ -#define TRX_SYS_TRX_ID_WRITE_MARGIN 256 -#endif /* !UNIV_HOTBACKUP */ - -#ifndef UNIV_NONINL -#include "trx0sys.ic" -#endif - -#endif diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic deleted file mode 100644 index 6024c1dc94e..00000000000 --- a/storage/xtradb/include/trx0sys.ic +++ /dev/null @@ -1,568 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0sys.ic -Transaction system - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0trx.h" -#include "data0type.h" -#ifndef UNIV_HOTBACKUP -# include "srv0srv.h" -# include "mtr0log.h" - -/* The typedef for rseg slot in the file copy */ -typedef byte trx_sysf_rseg_t; - -/* Rollback segment specification slot offsets */ -/*-------------------------------------------------------------*/ -#define TRX_SYS_RSEG_SPACE 0 /* space where the segment - header is placed; starting with - MySQL/InnoDB 5.1.7, this is - UNIV_UNDEFINED if the slot is unused */ -#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the segment - header is placed; this is FIL_NULL - if the slot is unused */ -/*-------------------------------------------------------------*/ -/* Size of a rollback segment specification slot */ -#define TRX_SYS_RSEG_SLOT_SIZE 8 - -/*****************************************************************//** -Writes the value of max_trx_id to the file based trx system header. */ -UNIV_INTERN -void -trx_sys_flush_max_trx_id(void); -/*==========================*/ - -/***************************************************************//** -Checks if a page address is the trx sys header page. -@return TRUE if trx sys header page */ -UNIV_INLINE -ibool -trx_sys_hdr_page( -/*=============*/ - ulint space, /*!< in: space */ - ulint page_no)/*!< in: page number */ -{ - if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) { - - return(TRUE); - } - - return(FALSE); -} - -/***************************************************************//** -Gets the pointer in the nth slot of the rseg array. -@return pointer to rseg object, NULL if slot not in use */ -UNIV_INLINE -trx_rseg_t* -trx_sys_get_nth_rseg( -/*=================*/ - trx_sys_t* sys, /*!< in: trx system */ - ulint n) /*!< in: index of slot */ -{ - ut_ad(n < TRX_SYS_N_RSEGS); - - return(sys->rseg_array[n]); -} - -/**********************************************************************//** -Gets a pointer to the transaction system header and x-latches its page. -@return pointer to system header, page x-latched. */ -UNIV_INLINE -trx_sysf_t* -trx_sysf_get( -/*=========*/ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block; - trx_sysf_t* header; - - ut_ad(mtr); - - block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, - RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER); - - header = TRX_SYS + buf_block_get_frame(block); - - return(header); -} - -/*****************************************************************//** -Gets the space of the nth rollback segment slot in the trx system -file copy. -@return space id */ -UNIV_INLINE -ulint -trx_sysf_rseg_get_space( -/*====================*/ - trx_sysf_t* sys_header, /*!< in: trx sys header */ - ulint i, /*!< in: slot index == rseg id */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(sys_header); - ut_ad(i < TRX_SYS_N_RSEGS); - - return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS - + i * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr)); -} - -/*****************************************************************//** -Gets the page number of the nth rollback segment slot in the trx system -header. -@return page number, FIL_NULL if slot unused */ -UNIV_INLINE -ulint -trx_sysf_rseg_get_page_no( -/*======================*/ - trx_sysf_t* sys_header, /*!< in: trx system header */ - ulint i, /*!< in: slot index == rseg id */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(sys_header); - ut_ad(i < TRX_SYS_N_RSEGS); - - return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS - + i * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr)); -} - -/*****************************************************************//** -Sets the space id of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -void -trx_sysf_rseg_set_space( -/*====================*/ - trx_sysf_t* sys_header, /*!< in: trx sys file copy */ - ulint i, /*!< in: slot index == rseg id */ - ulint space, /*!< in: space id */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(sys_header); - ut_ad(i < TRX_SYS_N_RSEGS); - - mlog_write_ulint(sys_header + TRX_SYS_RSEGS - + i * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_SPACE, - space, - MLOG_4BYTES, mtr); -} - -/*****************************************************************//** -Sets the page number of the nth rollback segment slot in the trx system -header. */ -UNIV_INLINE -void -trx_sysf_rseg_set_page_no( -/*======================*/ - trx_sysf_t* sys_header, /*!< in: trx sys header */ - ulint i, /*!< in: slot index == rseg id */ - ulint page_no, /*!< in: page number, FIL_NULL if the - slot is reset to unused */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(sys_header); - ut_ad(i < TRX_SYS_N_RSEGS); - - mlog_write_ulint(sys_header + TRX_SYS_RSEGS - + i * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_PAGE_NO, - page_no, - MLOG_4BYTES, mtr); -} -#endif /* !UNIV_HOTBACKUP */ - -/*****************************************************************//** -Writes a trx id to an index page. In case that the id size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_trx_id( -/*=============*/ - byte* ptr, /*!< in: pointer to memory where written */ - trx_id_t id) /*!< in: id */ -{ -#if DATA_TRX_ID_LEN != 6 -# error "DATA_TRX_ID_LEN != 6" -#endif - mach_write_to_6(ptr, id); -} - -#ifndef UNIV_HOTBACKUP -/*****************************************************************//** -Reads a trx id from an index page. In case that the id size changes in -some future version, this function should be used instead of -mach_read_... -@return id */ -UNIV_INLINE -trx_id_t -trx_read_trx_id( -/*============*/ - const byte* ptr) /*!< in: pointer to memory from where to read */ -{ -#if DATA_TRX_ID_LEN != 6 -# error "DATA_TRX_ID_LEN != 6" -#endif - return(mach_read_from_6(ptr)); -} - -/****************************************************************//** -Looks for the trx handle with the given id in rw_trx_list. -The caller must be holding trx_sys->mutex. -@return the trx handle or NULL if not found; -the pointer must not be dereferenced unless lock_sys->mutex was -acquired before calling this function and is still being held */ -UNIV_INLINE -trx_t* -trx_get_rw_trx_by_id( -/*=================*/ - trx_id_t trx_id) /*!< in: trx id to search for */ -{ - trx_t* trx; - ulint len; - trx_t* first; - - ut_ad(mutex_own(&trx_sys->mutex)); - - len = UT_LIST_GET_LEN(trx_sys->rw_trx_list); - - if (len == 0) { - return(NULL); - } - - /* Because the list is ordered on trx id in descending order, - we try to speed things up a bit. */ - - trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); - assert_trx_in_rw_list(trx); - - if (trx_id == trx->id) { - return(trx); - } else if (len == 1 || trx_id > trx->id) { - return(NULL); - } - - first = trx; - - trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list); - assert_trx_in_rw_list(trx); - - if (trx_id == trx->id) { - return(trx); - } else if (len == 2 || trx_id < trx->id) { - return(NULL); - } - - /* Search the list from the lower end (tail). */ - if (trx_id < (first->id + trx->id) >> 1) { - for (trx = UT_LIST_GET_PREV(trx_list, trx); - trx != NULL && trx_id > trx->id; - trx = UT_LIST_GET_PREV(trx_list, trx)) { - assert_trx_in_rw_list(trx); - } - } else { - for (trx = UT_LIST_GET_NEXT(trx_list, first); - trx != NULL && trx_id < trx->id; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - assert_trx_in_rw_list(trx); - } - } - - return((trx != NULL && trx->id == trx_id) ? trx : NULL); -} - -/****************************************************************//** -Returns the minimum trx id in trx list. This is the smallest id for which -the trx can possibly be active. (But, you must look at the trx->state -to find out if the minimum trx id transaction itself is active, or already -committed.). The caller must be holding the trx_sys_t::mutex in shared mode. -@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */ -UNIV_INLINE -trx_id_t -trx_rw_min_trx_id_low(void) -/*=======================*/ -{ - trx_id_t id; - const trx_t* trx; - - ut_ad(mutex_own(&trx_sys->mutex)); - - trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list); - - if (trx == NULL) { - id = trx_sys->max_trx_id; - } else { - assert_trx_in_rw_list(trx); - id = trx->id; - } - - return(id); -} - -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG -/***********************************************************//** -Assert that a transaction has been recovered. -@return TRUE */ -UNIV_INLINE -ibool -trx_assert_recovered( -/*=================*/ - trx_id_t trx_id) /*!< in: transaction identifier */ -{ - const trx_t* trx; - - mutex_enter(&trx_sys->mutex); - - trx = trx_get_rw_trx_by_id(trx_id); - ut_a(trx->is_recovered); - - mutex_exit(&trx_sys->mutex); - - return(TRUE); -} -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - -/****************************************************************//** -Returns the minimum trx id in rw trx list. This is the smallest id for which -the rw trx can possibly be active. (But, you must look at the trx->state -to find out if the minimum trx id transaction itself is active, or already -committed.) -@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */ -UNIV_INLINE -trx_id_t -trx_rw_min_trx_id(void) -/*===================*/ -{ - trx_id_t id; - - mutex_enter(&trx_sys->mutex); - - id = trx_rw_min_trx_id_low(); - - mutex_exit(&trx_sys->mutex); - - return(id); -} - -/****************************************************************//** -Returns pointer to a transaction instance if a rw transaction with the given id -is active. Caller must hold trx_sys->mutex. If the caller is not holding -lock_sys->mutex, the transaction may already have been committed. -@return transaction instance if active, or NULL; -the pointer must not be dereferenced unless lock_sys->mutex was -acquired before calling this function and is still being held */ -UNIV_INLINE -trx_t* -trx_rw_get_active_trx_by_id( -/*========================*/ - trx_id_t trx_id, /*!< in: trx id of the transaction */ - ibool* corrupt) /*!< in: NULL or pointer to a flag - that will be set if corrupt */ -{ - trx_t* trx; - - ut_ad(mutex_own(&trx_sys->mutex)); - - if (trx_id < trx_rw_min_trx_id_low()) { - - trx = NULL; - } else if (trx_id >= trx_sys->max_trx_id) { - - /* There must be corruption: we let the caller handle the - diagnostic prints in this case. */ - - trx = NULL; - if (corrupt != NULL) { - *corrupt = TRUE; - } - } else { - trx = trx_get_rw_trx_by_id(trx_id); - - if (trx != NULL - && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) { - - trx = NULL; - } - } - - return(trx); -} - -/****************************************************************//** -Checks if a rw transaction with the given id is active. Caller must hold -trx_sys->mutex. If the caller is not holding lock_sys->mutex, the -transaction may already have been committed. -@return true if rw transaction it with a given id is active. */ -UNIV_INLINE -bool -trx_rw_is_active_low( -/*=================*/ - trx_id_t trx_id, /*!< in: trx id of the transaction */ - ibool* corrupt) /*!< in: NULL or pointer to a flag - that will be set if corrupt */ -{ - ut_ad(mutex_own(&trx_sys->mutex)); - - if (UNIV_UNLIKELY(trx_id >= trx_sys->max_trx_id)) { - - /* There must be corruption: we let the caller handle the - diagnostic prints in this case. */ - - if (corrupt != NULL) { - *corrupt = TRUE; - } - - return(false); - } - - return(trx_find_descriptor(trx_sys->descriptors, trx_sys->descr_n_used, - trx_id) != NULL); -} - -/****************************************************************//** -Checks if a rw transaction with the given id is active. If the caller is -not holding lock_sys->mutex, the transaction may already have been -committed. -@return true if rw transaction it with a given id is active. */ -UNIV_INLINE -bool -trx_rw_is_active( -/*=============*/ - trx_id_t trx_id, /*!< in: trx id of the transaction */ - ibool* corrupt) /*!< in: NULL or pointer to a flag - that will be set if corrupt */ -{ - bool res; - - mutex_enter(&trx_sys->mutex); - - res = trx_rw_is_active_low(trx_id, corrupt); - - mutex_exit(&trx_sys->mutex); - - return(res); -} - -/*****************************************************************//** -Allocates a new transaction id. -@return new, allocated trx id */ -UNIV_INLINE -trx_id_t -trx_sys_get_new_trx_id(void) -/*========================*/ -{ -#ifndef WITH_WSREP - /* wsrep_fake_trx_id violates this assert */ - ut_ad(mutex_own(&trx_sys->mutex)); -#endif /* WITH_WSREP */ - - /* VERY important: after the database is started, max_trx_id value is - divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if - will evaluate to TRUE when this function is first time called, - and the value for trx id will be written to disk-based header! - Thus trx id values will not overlap when the database is - repeatedly started! */ - - if (!(trx_sys->max_trx_id % (trx_id_t) TRX_SYS_TRX_ID_WRITE_MARGIN)) { - - trx_sys_flush_max_trx_id(); - } - - return(trx_sys->max_trx_id++); -} - -/*****************************************************************//** -Determines the maximum transaction id. -@return maximum currently allocated trx id; will be stale after the -next call to trx_sys_get_new_trx_id() */ -UNIV_INLINE -trx_id_t -trx_sys_get_max_trx_id(void) -/*========================*/ -{ -#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN - trx_id_t max_trx_id; -#endif - - ut_ad(!mutex_own(&trx_sys->mutex)); - -#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN - /* Avoid torn reads. */ - mutex_enter(&trx_sys->mutex); - max_trx_id = trx_sys->max_trx_id; - mutex_exit(&trx_sys->mutex); - return(max_trx_id); -#else - /* Perform a dirty read. Callers should be prepared for stale - values, and we know that the value fits in a machine word, so - that it will be read and written atomically. */ - return(trx_sys->max_trx_id); -#endif -} - -/*****************************************************************//** -Get the number of transaction in the system, independent of their state. -@return count of transactions in trx_sys_t::rw_trx_list */ -UNIV_INLINE -ulint -trx_sys_get_n_rw_trx(void) -/*======================*/ -{ - ulint n_trx; - - mutex_enter(&trx_sys->mutex); - - n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); - - mutex_exit(&trx_sys->mutex); - - return(n_trx); -} - - -/*************************************************************//** -Find a slot for a given trx ID in a descriptors array. -@return: slot pointer */ -UNIV_INLINE -trx_id_t* -trx_find_descriptor( -/*================*/ - const trx_id_t* descriptors, /*!< in: descriptors array */ - ulint n_descr, /*!< in: array size */ - trx_id_t trx_id) /*!< in: trx id */ -{ - ut_ad(descriptors != trx_sys->descriptors || - mutex_own(&trx_sys->mutex)); - - if (UNIV_UNLIKELY(n_descr == 0)) { - - return(NULL); - } - - return((trx_id_t *) bsearch(&trx_id, descriptors, n_descr, - sizeof(trx_id_t), trx_descr_cmp)); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h deleted file mode 100644 index 1b490eca2af..00000000000 --- a/storage/xtradb/include/trx0trx.h +++ /dev/null @@ -1,1232 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0trx.h -The transaction - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0trx_h -#define trx0trx_h - -#include "univ.i" -#include "trx0types.h" -#include "dict0types.h" -#ifndef UNIV_HOTBACKUP -#include "lock0types.h" -#include "log0log.h" -#include "usr0types.h" -#include "que0types.h" -#include "mem0mem.h" -#include "read0types.h" -#include "trx0xa.h" -#include "ut0vec.h" -#include "fts0fts.h" - -/** Dummy session used currently in MySQL interface */ -extern sess_t* trx_dummy_sess; - -/********************************************************************//** -In XtraDB it is impossible for a transaction to own a search latch outside of -InnoDB code, so there is nothing to release on demand. We keep this function to -simplify maintenance.*/ -UNIV_INLINE -void -trx_search_latch_release_if_reserved( -/*=================================*/ - trx_t* trx MY_ATTRIBUTE((unused))); /*!< in: transaction */ -/******************************************************************//** -Set detailed error message for the transaction. */ -UNIV_INTERN -void -trx_set_detailed_error( -/*===================*/ - trx_t* trx, /*!< in: transaction struct */ - const char* msg); /*!< in: detailed error message */ -/*************************************************************//** -Set detailed error message for the transaction from a file. Note that the -file is rewinded before reading from it. */ -UNIV_INTERN -void -trx_set_detailed_error_from_file( -/*=============================*/ - trx_t* trx, /*!< in: transaction struct */ - FILE* file); /*!< in: file to read message from */ -/****************************************************************//** -Retrieves the error_info field from a trx. -@return the error info */ -UNIV_INLINE -const dict_index_t* -trx_get_error_info( -/*===============*/ - const trx_t* trx); /*!< in: trx object */ -/********************************************************************//** -Creates a transaction object for MySQL. -@return own: transaction object */ -UNIV_INTERN -trx_t* -trx_allocate_for_mysql(void); -/*========================*/ -/********************************************************************//** -Creates a transaction object for background operations by the master thread. -@return own: transaction object */ -UNIV_INTERN -trx_t* -trx_allocate_for_background(void); -/*=============================*/ -/********************************************************************//** -Frees a transaction object of a background operation of the master thread. */ -UNIV_INTERN -void -trx_free_for_background( -/*====================*/ - trx_t* trx); /*!< in, own: trx object */ -/********************************************************************//** -At shutdown, frees a transaction object that is in the PREPARED state. */ -UNIV_INTERN -void -trx_free_prepared( -/*==============*/ - trx_t* trx) /*!< in, own: trx object */ - UNIV_COLD; -/********************************************************************//** -Frees a transaction object for MySQL. */ -UNIV_INTERN -void -trx_free_for_mysql( -/*===============*/ - trx_t* trx); /*!< in, own: trx object */ -/****************************************************************//** -Creates trx objects for transactions and initializes the trx list of -trx_sys at database start. Rollback segment and undo log lists must -already exist when this function is called, because the lists of -transactions to be rolled back or cleaned up are built based on the -undo log lists. */ -UNIV_INTERN -void -trx_lists_init_at_db_start(void); -/*============================*/ - -#ifdef UNIV_DEBUG -#define trx_start_if_not_started_xa(t) \ - { \ - (t)->start_line = __LINE__; \ - (t)->start_file = __FILE__; \ - trx_start_if_not_started_xa_low((t)); \ - } -#else -#define trx_start_if_not_started_xa(t) \ - trx_start_if_not_started_xa_low((t)) -#endif /* UNIV_DEBUG */ - -/*************************************************************//** -Starts the transaction if it is not yet started. */ -UNIV_INTERN -void -trx_start_if_not_started_xa_low( -/*============================*/ - trx_t* trx); /*!< in: transaction */ -/*************************************************************//** -Starts the transaction if it is not yet started. */ -UNIV_INTERN -void -trx_start_if_not_started_low( -/*=========================*/ - trx_t* trx); /*!< in: transaction */ - -#ifdef UNIV_DEBUG -#define trx_start_if_not_started(t) \ - { \ - (t)->start_line = __LINE__; \ - (t)->start_file = __FILE__; \ - trx_start_if_not_started_low((t)); \ - } -#else -#define trx_start_if_not_started(t) \ - trx_start_if_not_started_low((t)) -#endif /* UNIV_DEBUG */ - -/*************************************************************//** -Starts the transaction for a DDL operation. */ -UNIV_INTERN -void -trx_start_for_ddl_low( -/*==================*/ - trx_t* trx, /*!< in/out: transaction */ - trx_dict_op_t op) /*!< in: dictionary operation type */ - MY_ATTRIBUTE((nonnull)); - -#ifdef UNIV_DEBUG -#define trx_start_for_ddl(t, o) \ - { \ - ut_ad((t)->start_file == 0); \ - (t)->start_line = __LINE__; \ - (t)->start_file = __FILE__; \ - trx_start_for_ddl_low((t), (o)); \ - } -#else -#define trx_start_for_ddl(t, o) \ - trx_start_for_ddl_low((t), (o)) -#endif /* UNIV_DEBUG */ - -/****************************************************************//** -Commits a transaction. */ -UNIV_INTERN -void -trx_commit( -/*=======*/ - trx_t* trx) /*!< in/out: transaction */ - MY_ATTRIBUTE((nonnull)); -/****************************************************************//** -Commits a transaction and a mini-transaction. */ -UNIV_INTERN -void -trx_commit_low( -/*===========*/ - trx_t* trx, /*!< in/out: transaction */ - mtr_t* mtr) /*!< in/out: mini-transaction (will be committed), - or NULL if trx made no modifications */ - MY_ATTRIBUTE((nonnull(1))); -/****************************************************************//** -Cleans up a transaction at database startup. The cleanup is needed if -the transaction already got to the middle of a commit when the database -crashed, and we cannot roll it back. */ -UNIV_INTERN -void -trx_cleanup_at_db_startup( -/*======================*/ - trx_t* trx); /*!< in: transaction */ -/**********************************************************************//** -Does the transaction commit for MySQL. -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -trx_commit_for_mysql( -/*=================*/ - trx_t* trx); /*!< in/out: transaction */ -/**********************************************************************//** -Does the transaction prepare for MySQL. */ -UNIV_INTERN -void -trx_prepare_for_mysql( -/*==================*/ - trx_t* trx); /*!< in/out: trx handle */ -/**********************************************************************//** -This function is used to find number of prepared transactions and -their transaction objects for a recovery. -@return number of prepared transactions */ -UNIV_INTERN -int -trx_recover_for_mysql( -/*==================*/ - XID* xid_list, /*!< in/out: prepared transactions */ - ulint len); /*!< in: number of slots in xid_list */ -/*******************************************************************//** -This function is used to find one X/Open XA distributed transaction -which is in the prepared state -@return trx or NULL; on match, the trx->xid will be invalidated; -note that the trx may have been committed, unless the caller is -holding lock_sys->mutex */ -UNIV_INTERN -trx_t * -trx_get_trx_by_xid( -/*===============*/ - const XID* xid); /*!< in: X/Open XA transaction identifier */ -/**********************************************************************//** -If required, flushes the log to disk if we called trx_commit_for_mysql() -with trx->flush_log_later == TRUE. */ -UNIV_INTERN -void -trx_commit_complete_for_mysql( -/*==========================*/ - trx_t* trx) /*!< in/out: transaction */ - MY_ATTRIBUTE((nonnull)); -/**********************************************************************//** -Marks the latest SQL statement ended. */ -UNIV_INTERN -void -trx_mark_sql_stat_end( -/*==================*/ - trx_t* trx); /*!< in: trx handle */ -/********************************************************************//** -Assigns a read view for a consistent read query. All the consistent reads -within the same transaction will get the same read view, which is created -when this function is first called for a new started transaction. -@return consistent read view */ -UNIV_INTERN -read_view_t* -trx_assign_read_view( -/*=================*/ - trx_t* trx); /*!< in: active transaction */ -/********************************************************************//** -Clones the read view from another transaction. All the consistent reads within -the receiver transaction will get the same read view as the donor transaction -@return read view clone */ -UNIV_INTERN -read_view_t* -trx_clone_read_view( -/*================*/ - trx_t* trx, /*!< in: receiver transaction */ - trx_t* from_trx) /*!< in: donor transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/****************************************************************//** -Prepares a transaction for commit/rollback. */ -UNIV_INTERN -void -trx_commit_or_rollback_prepare( -/*===========================*/ - trx_t* trx); /*!< in/out: transaction */ -/*********************************************************************//** -Creates a commit command node struct. -@return own: commit node struct */ -UNIV_INTERN -commit_node_t* -trx_commit_node_create( -/*===================*/ - mem_heap_t* heap); /*!< in: mem heap where created */ -/***********************************************************//** -Performs an execution step for a commit type node in a query graph. -@return query thread to run next, or NULL */ -UNIV_INTERN -que_thr_t* -trx_commit_step( -/*============*/ - que_thr_t* thr); /*!< in: query thread */ - -/**********************************************************************//** -Prints info about a transaction. -Caller must hold trx_sys->mutex. */ -UNIV_INTERN -void -trx_print_low( -/*==========*/ - FILE* f, - /*!< in: output stream */ - const trx_t* trx, - /*!< in: transaction */ - ulint max_query_len, - /*!< in: max query length to print, - or 0 to use the default max length */ - ulint n_rec_locks, - /*!< in: lock_number_of_rows_locked(&trx->lock) */ - ulint n_trx_locks, - /*!< in: length of trx->lock.trx_locks */ - ulint heap_size) - /*!< in: mem_heap_get_size(trx->lock.lock_heap) */ - MY_ATTRIBUTE((nonnull)); - -#ifdef WITH_WSREP -/**********************************************************************//** -Prints info about a transaction. -Transaction information may be retrieved without having trx_sys->mutex acquired -so it may not be completely accurate. The caller must own lock_sys->mutex -and the trx must have some locks to make sure that it does not escape -without locking lock_sys->mutex. */ -UNIV_INTERN -void -wsrep_trx_print_locking( -/*==============*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - ulint max_query_len) /*!< in: max query length to print, - or 0 to use the default max length */ - MY_ATTRIBUTE((nonnull)); -#endif /* WITH_WSREP */ - -/**********************************************************************//** -Prints info about a transaction. -The caller must hold lock_sys->mutex and trx_sys->mutex. -When possible, use trx_print() instead. */ -UNIV_INTERN -void -trx_print_latched( -/*==============*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - ulint max_query_len) /*!< in: max query length to print, - or 0 to use the default max length */ - MY_ATTRIBUTE((nonnull)); - -#ifdef WITH_WSREP -/**********************************************************************//** -Prints info about a transaction. -Transaction information may be retrieved without having trx_sys->mutex acquired -so it may not be completely accurate. The caller must own lock_sys->mutex -and the trx must have some locks to make sure that it does not escape -without locking lock_sys->mutex. */ -UNIV_INTERN -void -wsrep_trx_print_locking( -/*==============*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - ulint max_query_len) /*!< in: max query length to print, - or 0 to use the default max length */ - MY_ATTRIBUTE((nonnull)); -#endif /* WITH_WSREP */ -/**********************************************************************//** -Prints info about a transaction. -Acquires and releases lock_sys->mutex and trx_sys->mutex. */ -UNIV_INTERN -void -trx_print( -/*======*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - ulint max_query_len) /*!< in: max query length to print, - or 0 to use the default max length */ - MY_ATTRIBUTE((nonnull)); - -/**********************************************************************//** -Determine if a transaction is a dictionary operation. -@return dictionary operation mode */ -UNIV_INLINE -enum trx_dict_op_t -trx_get_dict_operation( -/*===================*/ - const trx_t* trx) /*!< in: transaction */ - MY_ATTRIBUTE((pure)); -/**********************************************************************//** -Flag a transaction a dictionary operation. */ -UNIV_INLINE -void -trx_set_dict_operation( -/*===================*/ - trx_t* trx, /*!< in/out: transaction */ - enum trx_dict_op_t op); /*!< in: operation, not - TRX_DICT_OP_NONE */ - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Determines if a transaction is in the given state. -The caller must hold trx_sys->mutex, or it must be the thread -that is serving a running transaction. -A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list -unless it is a non-locking autocommit read only transaction, which is only -in trx_sys->mysql_trx_list. -@return TRUE if trx->state == state */ -UNIV_INLINE -ibool -trx_state_eq( -/*=========*/ - const trx_t* trx, /*!< in: transaction */ - trx_state_t state, /*!< in: state; - if state != TRX_STATE_NOT_STARTED - asserts that - trx->state != TRX_STATE_NOT_STARTED */ - bool relaxed = false) - /*!< in: whether to allow - trx->state == TRX_STATE_NOT_STARTED - after an error has been reported */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -# ifdef UNIV_DEBUG -/**********************************************************************//** -Asserts that a transaction has been started. -The caller must hold trx_sys->mutex. -@return TRUE if started */ -UNIV_INTERN -ibool -trx_assert_started( -/*===============*/ - const trx_t* trx) /*!< in: transaction */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -# endif /* UNIV_DEBUG */ - -/**********************************************************************//** -Determines if the currently running transaction has been interrupted. -@return TRUE if interrupted */ -UNIV_INTERN -ibool -trx_is_interrupted( -/*===============*/ - const trx_t* trx); /*!< in: transaction */ -/**********************************************************************//** -Determines if the currently running transaction is in strict mode. -@return TRUE if strict */ -UNIV_INTERN -ibool -trx_is_strict( -/*==========*/ - trx_t* trx); /*!< in: transaction */ -#else /* !UNIV_HOTBACKUP */ -#define trx_is_interrupted(trx) FALSE -#endif /* !UNIV_HOTBACKUP */ - -/*******************************************************************//** -Calculates the "weight" of a transaction. The weight of one transaction -is estimated as the number of altered rows + the number of locked rows. -@param t transaction -@return transaction weight */ -#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks)) - -/*******************************************************************//** -Compares the "weight" (or size) of two transactions. Transactions that -have edited non-transactional tables are considered heavier than ones -that have not. -@return TRUE if weight(a) >= weight(b) */ -UNIV_INTERN -ibool -trx_weight_ge( -/*==========*/ - const trx_t* a, /*!< in: the first transaction to be compared */ - const trx_t* b); /*!< in: the second transaction to be compared */ - -/* Maximum length of a string that can be returned by -trx_get_que_state_str(). */ -#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */ - -/*******************************************************************//** -Retrieves transaction's que state in a human readable string. The string -should not be free()'d or modified. -@return string in the data segment */ -UNIV_INLINE -const char* -trx_get_que_state_str( -/*==================*/ - const trx_t* trx); /*!< in: transaction */ - -/****************************************************************//** -Assign a read-only transaction a rollback-segment, if it is attempting -to write to a TEMPORARY table. */ -UNIV_INTERN -void -trx_assign_rseg( -/*============*/ - trx_t* trx); /*!< A read-only transaction that - needs to be assigned a RBS. */ - -/*************************************************************//** -Callback function for trx_find_descriptor() to compare trx IDs. */ -UNIV_INTERN -int -trx_descr_cmp( -/*==========*/ - const void *a, /*!< in: pointer to first comparison argument */ - const void *b); /*!< in: pointer to second comparison argument */ - -/*************************************************************//** -Release a slot for a given trx in the global descriptors array. */ -UNIV_INTERN -void -trx_release_descriptor( -/*===================*/ - trx_t* trx); /*!< in: trx pointer */ - -/*******************************************************************//** -Transactions that aren't started by the MySQL server don't set -the trx_t::mysql_thd field. For such transactions we set the lock -wait timeout to 0 instead of the user configured value that comes -from innodb_lock_wait_timeout via trx_t::mysql_thd. -@param trx transaction -@return lock wait timeout in seconds */ -#define trx_lock_wait_timeout_get(trx) \ - ((trx)->mysql_thd != NULL \ - ? thd_lock_wait_timeout((trx)->mysql_thd) \ - : 0) - -/*******************************************************************//** -Determine if the transaction is a non-locking autocommit select -(implied read-only). -@param t transaction -@return true if non-locking autocommit select transaction. */ -#define trx_is_autocommit_non_locking(t) \ -((t)->auto_commit && (t)->will_lock == 0) - -/*******************************************************************//** -Determine if the transaction is a non-locking autocommit select -with an explicit check for the read-only status. -@param t transaction -@return true if non-locking autocommit read-only transaction. */ -#define trx_is_ac_nl_ro(t) \ -((t)->read_only && trx_is_autocommit_non_locking((t))) - -/*******************************************************************//** -Assert that the transaction is in the trx_sys_t::rw_trx_list */ -#define assert_trx_in_rw_list(t) do { \ - ut_ad(!(t)->read_only); \ - assert_trx_in_list(t); \ -} while (0) - -/*******************************************************************//** -Assert that the transaction is either in trx_sys->ro_trx_list or -trx_sys->rw_trx_list but not both and it cannot be an autocommit -non-locking select */ -#define assert_trx_in_list(t) do { \ - ut_ad((t)->in_ro_trx_list == (t)->read_only); \ - ut_ad((t)->in_rw_trx_list == !(t)->read_only); \ - ut_ad(!trx_is_autocommit_non_locking((t))); \ - switch ((t)->state) { \ - case TRX_STATE_PREPARED: \ - /* fall through */ \ - case TRX_STATE_ACTIVE: \ - case TRX_STATE_COMMITTED_IN_MEMORY: \ - continue; \ - case TRX_STATE_NOT_STARTED: \ - break; \ - } \ - ut_error; \ -} while (0) - -#ifdef UNIV_DEBUG -/*******************************************************************//** -Assert that an autocommit non-locking select cannot be in the -ro_trx_list nor the rw_trx_list and that it is a read-only transaction. -The tranasction must be in the mysql_trx_list. */ -# define assert_trx_nonlocking_or_in_list(t) \ - do { \ - if (trx_is_autocommit_non_locking(t)) { \ - trx_state_t t_state = (t)->state; \ - ut_ad((t)->read_only); \ - ut_ad(!(t)->is_recovered); \ - ut_ad(!(t)->in_ro_trx_list); \ - ut_ad(!(t)->in_rw_trx_list); \ - ut_ad((t)->in_mysql_trx_list); \ - ut_ad(t_state == TRX_STATE_NOT_STARTED \ - || t_state == TRX_STATE_ACTIVE); \ - } else { \ - assert_trx_in_list(t); \ - } \ - } while (0) -#else /* UNIV_DEBUG */ -/*******************************************************************//** -Assert that an autocommit non-locking slect cannot be in the -ro_trx_list nor the rw_trx_list and that it is a read-only transaction. -The tranasction must be in the mysql_trx_list. */ -# define assert_trx_nonlocking_or_in_list(trx) ((void)0) -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state -captures the state of the query thread during the execution of a query. -This is different from a transaction state. The query state of a transaction -can be updated asynchronously by other threads. The other threads can be -system threads, like the timeout monitor thread or user threads executing -other queries. Another thing to be mindful of is that there is a delay between -when a query thread is put into LOCK_WAIT state and before it actually starts -waiting. Between these two events it is possible that the query thread is -granted the lock it was waiting for, which implies that the state can be changed -asynchronously. - -All these operations take place within the context of locking. Therefore state -changes within the locking code must acquire both the lock mutex and the -trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or -trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient -to only acquire the trx->mutex. -To query the state either of the mutexes is sufficient within the locking -code and no mutex is required when the query thread is no longer waiting. */ - -/** The locks and state of an active transaction. Protected by -lock_sys->mutex, trx->mutex or both. */ -struct trx_lock_t { - ulint n_active_thrs; /*!< number of active query threads */ - - trx_que_t que_state; /*!< valid when trx->state - == TRX_STATE_ACTIVE: TRX_QUE_RUNNING, - TRX_QUE_LOCK_WAIT, ... */ - - lock_t* wait_lock; /*!< if trx execution state is - TRX_QUE_LOCK_WAIT, this points to - the lock request, otherwise this is - NULL; set to non-NULL when holding - both trx->mutex and lock_sys->mutex; - set to NULL when holding - lock_sys->mutex; readers should - hold lock_sys->mutex, except when - they are holding trx->mutex and - wait_lock==NULL */ - ib_uint64_t deadlock_mark; /*!< A mark field that is initialized - to and checked against lock_mark_counter - by lock_deadlock_recursive(). */ - ibool was_chosen_as_deadlock_victim; - /*!< when the transaction decides to - wait for a lock, it sets this to FALSE; - if another transaction chooses this - transaction as a victim in deadlock - resolution, it sets this to TRUE. - Protected by trx->mutex. */ - time_t wait_started; /*!< lock wait started at this time, - protected only by lock_sys->mutex */ - - que_thr_t* wait_thr; /*!< query thread belonging to this - trx that is in QUE_THR_LOCK_WAIT - state. For threads suspended in a - lock wait, this is protected by - lock_sys->mutex. Otherwise, this may - only be modified by the thread that is - serving the running transaction. */ - - mem_heap_t* lock_heap; /*!< memory heap for trx_locks; - protected by lock_sys->mutex */ - - UT_LIST_BASE_NODE_T(lock_t) - trx_locks; /*!< locks requested - by the transaction; - insertions are protected by trx->mutex - and lock_sys->mutex; removals are - protected by lock_sys->mutex */ - - ib_vector_t* table_locks; /*!< All table locks requested by this - transaction, including AUTOINC locks */ - - ibool cancel; /*!< TRUE if the transaction is being - rolled back either via deadlock - detection or due to lock timeout. The - caller has to acquire the trx_t::mutex - in order to cancel the locks. In - lock_trx_table_locks_remove() we - check for this cancel of a transaction's - locks and avoid reacquiring the trx - mutex to prevent recursive deadlocks. - Protected by both the lock sys mutex - and the trx_t::mutex. */ -}; - -#define TRX_MAGIC_N 91118598 - -/** The transaction handle - -Normally, there is a 1:1 relationship between a transaction handle -(trx) and a session (client connection). One session is associated -with exactly one user transaction. There are some exceptions to this: - -* For DDL operations, a subtransaction is allocated that modifies the -data dictionary tables. Lock waits and deadlocks are prevented by -acquiring the dict_operation_lock before starting the subtransaction -and releasing it after committing the subtransaction. - -* The purge system uses a special transaction that is not associated -with any session. - -* If the system crashed or it was quickly shut down while there were -transactions in the ACTIVE or PREPARED state, these transactions would -no longer be associated with a session when the server is restarted. - -A session may be served by at most one thread at a time. The serving -thread of a session might change in some MySQL implementations. -Therefore we do not have os_thread_get_curr_id() assertions in the code. - -Normally, only the thread that is currently associated with a running -transaction may access (read and modify) the trx object, and it may do -so without holding any mutex. The following are exceptions to this: - -* trx_rollback_resurrected() may access resurrected (connectionless) -transactions while the system is already processing new user -transactions. The trx_sys->mutex prevents a race condition between it -and lock_trx_release_locks() [invoked by trx_commit()]. - -* trx_print_low() may access transactions not associated with the current -thread. The caller must be holding trx_sys->mutex and lock_sys->mutex. - -* When a transaction handle is in the trx_sys->mysql_trx_list or -trx_sys->trx_list, some of its fields must not be modified without -holding trx_sys->mutex exclusively. - -* The locking code (in particular, lock_deadlock_recursive() and -lock_rec_convert_impl_to_expl()) will access transactions associated -to other connections. The locks of transactions are protected by -lock_sys->mutex and sometimes by trx->mutex. */ - -typedef enum { - TRX_SERVER_ABORT = 0, - TRX_WSREP_ABORT = 1 -} trx_abort_t; - -struct trx_t{ - ulint magic_n; - - ib_mutex_t mutex; /*!< Mutex protecting the fields - state and lock - (except some fields of lock, which - are protected by lock_sys->mutex) */ - - /** State of the trx from the point of view of concurrency control - and the valid state transitions. - - Possible states: - - TRX_STATE_NOT_STARTED - TRX_STATE_ACTIVE - TRX_STATE_PREPARED - TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED) - - Valid state transitions are: - - Regular transactions: - * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED - - Auto-commit non-locking read-only: - * NOT_STARTED -> ACTIVE -> NOT_STARTED - - XA (2PC): - * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED - - Recovered XA: - * NOT_STARTED -> PREPARED -> COMMITTED -> (freed) - - XA (2PC) (shutdown before ROLLBACK or COMMIT): - * NOT_STARTED -> PREPARED -> (freed) - - Latching and various transaction lists membership rules: - - XA (2PC) transactions are always treated as non-autocommit. - - Transitions to ACTIVE or NOT_STARTED occur when - !in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed). - - Autocommit non-locking read-only transactions move between states - without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list. - - When a transaction is NOT_STARTED, it can be in_mysql_trx_list if - it is a user transaction. It cannot be in ro_trx_list or rw_trx_list. - - ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list. - The transition ACTIVE->PREPARED is protected by trx_sys->mutex. - - ACTIVE->COMMITTED is possible when the transaction is in - ro_trx_list or rw_trx_list. - - Transitions to COMMITTED are protected by both lock_sys->mutex - and trx->mutex. - - NOTE: Some of these state change constraints are an overkill, - currently only required for a consistent view for printing stats. - This unnecessarily adds a huge cost for the general case. - - NOTE: In the future we should add read only transactions to the - ro_trx_list the first time they try to acquire a lock ie. by default - we treat all read-only transactions as non-locking. */ - trx_state_t state; - - trx_lock_t lock; /*!< Information about the transaction - locks and state. Protected by - trx->mutex or lock_sys->mutex - or both */ - ulint is_recovered; /*!< 0=normal transaction, - 1=recovered, must be rolled back, - protected by trx_sys->mutex when - trx->in_rw_trx_list holds */ - - /* These fields are not protected by any mutex. */ - const char* op_info; /*!< English text describing the - current operation, or an empty - string */ - ulint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */ - ulint check_foreigns; /*!< normally TRUE, but if the user - wants to suppress foreign key checks, - (in table imports, for example) we - set this FALSE */ - /*------------------------------*/ - /* MySQL has a transaction coordinator to coordinate two phase - commit between multiple storage engines and the binary log. When - an engine participates in a transaction, it's responsible for - registering itself using the trans_register_ha() API. */ - unsigned is_registered:1;/* This flag is set to 1 after the - transaction has been registered with - the coordinator using the XA API, and - is set to 0 after commit or rollback. */ - unsigned active_commit_ordered:1;/* 1 if owns prepare mutex, if - this is set to 1 then registered should - also be set to 1. This is used in the - XA code */ - /*------------------------------*/ - ulint check_unique_secondary; - /*!< normally TRUE, but if the user - wants to speed up inserts by - suppressing unique key checks - for secondary indexes when we decide - if we can use the insert buffer for - them, we set this FALSE */ - ulint support_xa; /*!< normally we do the XA two-phase - commit steps, but by setting this to - FALSE, one can save CPU time and about - 150 bytes in the undo log size as then - we skip XA steps */ - ulint fake_changes; - ulint flush_log_later;/* In 2PC, we hold the - prepare_commit mutex across - both phases. In that case, we - defer flush of the logs to disk - until after we release the - mutex. */ - ulint must_flush_log_later;/*!< this flag is set to TRUE in - trx_commit() if flush_log_later was - TRUE, and there were modifications by - the transaction; in that case we must - flush the log in - trx_commit_complete_for_mysql() */ - ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ - bool has_search_latch; - /*!< true if this trx has latched any - search system latch in S-mode */ - ulint search_latch_timeout; - /*!< If we notice that someone is - waiting for our S-lock on the search - latch to be released, we wait in - row0sel.cc for BTR_SEA_TIMEOUT new - searches until we try to keep - the search latch again over - calls from MySQL; this is intended - to reduce contention on the search - latch */ - trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */ - - /* Fields protected by the srv_conc_mutex. */ - ulint declared_to_be_inside_innodb; - /*!< this is TRUE if we have declared - this transaction in - srv_conc_enter_innodb to be inside the - InnoDB engine */ - ulint n_tickets_to_enter_innodb; - /*!< this can be > 0 only when - declared_to_... is TRUE; when we come - to srv_conc_innodb_enter, if the value - here is > 0, we decrement this by 1 */ - ulint dict_operation_lock_mode; - /*!< 0, RW_S_LATCH, or RW_X_LATCH: - the latch mode trx currently holds - on dict_operation_lock. Protected - by dict_operation_lock. */ - - trx_id_t no; /*!< transaction serialization number: - max trx id shortly before the - transaction is moved to - COMMITTED_IN_MEMORY state. - Protected by trx_sys_t::mutex - when trx->in_rw_trx_list. Initially - set to TRX_ID_MAX. */ - - time_t start_time; /*!< time the trx state last time became - TRX_STATE_ACTIVE */ - ib_uint64_t start_time_micro; /*!< start time of transaction in - microseconds */ - trx_id_t id; /*!< transaction id */ - XID xid; /*!< X/Open XA transaction - identification to identify a - transaction branch */ - lsn_t commit_lsn; /*!< lsn at the time of the commit */ - table_id_t table_id; /*!< Table to drop iff dict_operation - == TRX_DICT_OP_TABLE, or 0. */ - /*------------------------------*/ - THD* mysql_thd; /*!< MySQL thread handle corresponding - to this trx, or NULL */ - trx_abort_t abort_type; /*!< Transaction abort type */ - - const char* mysql_log_file_name; - /*!< if MySQL binlog is used, this field - contains a pointer to the latest file - name; this is NULL if binlog is not - used */ - ib_int64_t mysql_log_offset; - /*!< if MySQL binlog is used, this - field contains the end offset of the - binlog entry */ - time_t idle_start; - ib_int64_t last_stmt_start; - /*------------------------------*/ - ulint n_mysql_tables_in_use; /*!< number of Innobase tables - used in the processing of the current - SQL statement in MySQL */ - ulint mysql_n_tables_locked; - /*!< how many tables the current SQL - statement uses, except those - in consistent read */ - /*------------------------------*/ - UT_LIST_NODE_T(trx_t) - trx_list; /*!< list of transactions; - protected by trx_sys->mutex. - The same node is used for both - trx_sys_t::ro_trx_list and - trx_sys_t::rw_trx_list */ -#ifdef UNIV_DEBUG - /** The following two fields are mutually exclusive. */ - /* @{ */ - - ibool in_ro_trx_list; /*!< TRUE if in trx_sys->ro_trx_list */ - ibool in_rw_trx_list; /*!< TRUE if in trx_sys->rw_trx_list */ - /* @} */ -#endif /* UNIV_DEBUG */ - UT_LIST_NODE_T(trx_t) - mysql_trx_list; /*!< list of transactions created for - MySQL; protected by trx_sys->mutex */ -#ifdef UNIV_DEBUG - ibool in_mysql_trx_list; - /*!< TRUE if in - trx_sys->mysql_trx_list */ -#endif /* UNIV_DEBUG */ - UT_LIST_NODE_T(trx_t) - trx_serial_list;/*!< list node for - trx_sys->trx_serial_list */ - bool in_trx_serial_list; - /* Set when transaction is in the - trx_serial_list */ - /*------------------------------*/ - dberr_t error_state; /*!< 0 if no error, otherwise error - number; NOTE That ONLY the thread - doing the transaction is allowed to - set this field: this is NOT protected - by any mutex */ - const dict_index_t*error_info; /*!< if the error number indicates a - duplicate key error, a pointer to - the problematic index is stored here */ - ulint error_key_num; /*!< if the index creation fails to a - duplicate key error, a mysql key - number of that index is stored here */ - sess_t* sess; /*!< session of the trx, NULL if none */ - que_t* graph; /*!< query currently run in the session, - or NULL if none; NOTE that the query - belongs to the session, and it can - survive over a transaction commit, if - it is a stored procedure with a COMMIT - WORK statement, for instance */ - read_view_t* global_read_view; - /*!< consistent read view associated - to a transaction or NULL */ - read_view_t* read_view; /*!< consistent read view used in the - transaction or NULL, this read view - if defined can be normal read view - associated to a transaction (i.e. - same as global_read_view) or read view - associated to a cursor */ - read_view_t* prebuilt_view; /* pre-built view array */ - /*------------------------------*/ - UT_LIST_BASE_NODE_T(trx_named_savept_t) - trx_savepoints; /*!< savepoints set with SAVEPOINT ..., - oldest first */ - /*------------------------------*/ - ib_mutex_t undo_mutex; /*!< mutex protecting the fields in this - section (down to undo_no_arr), EXCEPT - last_sql_stat_start, which can be - accessed only when we know that there - cannot be any activity in the undo - logs! */ - undo_no_t undo_no; /*!< next undo log record number to - assign; since the undo log is - private for a transaction, this - is a simple ascending sequence - with no gaps; thus it represents - the number of modified/inserted - rows in a transaction */ - trx_savept_t last_sql_stat_start; - /*!< undo_no when the last sql statement - was started: in case of an error, trx - is rolled back down to this undo - number; see note at undo_mutex! */ - trx_rseg_t* rseg; /*!< rollback segment assigned to the - transaction, or NULL if not assigned - yet */ - trx_undo_t* insert_undo; /*!< pointer to the insert undo log, or - NULL if no inserts performed yet */ - trx_undo_t* update_undo; /*!< pointer to the update undo log, or - NULL if no update performed yet */ - undo_no_t roll_limit; /*!< least undo number to undo during - a rollback */ - ulint pages_undone; /*!< number of undo log pages undone - since the last undo log truncation */ - trx_undo_arr_t* undo_no_arr; /*!< array of undo numbers of undo log - records which are currently processed - by a rollback operation */ - /*------------------------------*/ - ulint n_autoinc_rows; /*!< no. of AUTO-INC rows required for - an SQL statement. This is useful for - multi-row INSERTs */ - ib_vector_t* autoinc_locks; /* AUTOINC locks held by this - transaction. Note that these are - also in the lock list trx_locks. This - vector needs to be freed explicitly - when the trx instance is destroyed. - Protected by lock_sys->mutex. */ - /*------------------------------*/ - ibool read_only; /*!< TRUE if transaction is flagged - as a READ-ONLY transaction. - if !auto_commit || will_lock > 0 - then it will added to the list - trx_sys_t::ro_trx_list. A read only - transaction will not be assigned an - UNDO log. Non-locking auto-commit - read-only transaction will not be on - either list. */ - ibool auto_commit; /*!< TRUE if it is an autocommit */ - ulint will_lock; /*!< Will acquire some locks. Increment - each time we determine that a lock will - be acquired by the MySQL layer. */ - bool ddl; /*!< true if it is a transaction that - is being started for a DDL operation */ - /*------------------------------*/ - fts_trx_t* fts_trx; /*!< FTS information, or NULL if - transaction hasn't modified tables - with FTS indexes (yet). */ - doc_id_t fts_next_doc_id;/* The document id used for updates */ - /*------------------------------*/ - ulint flush_tables; /*!< if "covering" the FLUSH TABLES", - count of tables being flushed. */ - - /*------------------------------*/ -#ifdef UNIV_DEBUG - ulint start_line; /*!< Track where it was started from */ - const char* start_file; /*!< Filename where it was started */ -#endif /* UNIV_DEBUG */ - /*------------------------------*/ - bool api_trx; /*!< trx started by InnoDB API */ - bool api_auto_commit;/*!< automatic commit */ - bool read_write; /*!< if read and write operation */ - - /*------------------------------*/ - char detailed_error[256]; /*!< detailed error message for last - error, or empty. */ -#ifdef WITH_WSREP - os_event_t wsrep_event; /* event waited for in srv_conc_slot */ -#endif /* WITH_WSREP */ - /*------------------------------*/ - ulint io_reads; - ib_uint64_t io_read; - ulint io_reads_wait_timer; - ib_uint64_t lock_que_wait_ustarted; - ulint lock_que_wait_timer; - ulint innodb_que_wait_timer; - ulint distinct_page_access; -#define DPAH_SIZE 8192 - byte* distinct_page_access_hash; - ibool take_stats; - - /* Lock wait statistics */ - ulint n_rec_lock_waits; - /*!< Number of record lock waits, - might not be exactly correct. */ - ulint n_table_lock_waits; - /*!< Number of table lock waits, - might not be exactly correct. */ - ulint total_rec_lock_wait_time; - /*!< Total rec lock wait time up - to this moment. */ - ulint total_table_lock_wait_time; - /*!< Total table lock wait time - up to this moment. */ -}; - -/* Transaction isolation levels (trx->isolation_level) */ -#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking - SELECTs are performed so that - we do not look at a possible - earlier version of a record; - thus they are not 'consistent' - reads under this isolation - level; otherwise like level - 2 */ - -#define TRX_ISO_READ_COMMITTED 1 /* somewhat Oracle-like - isolation, except that in - range UPDATE and DELETE we - must block phantom rows - with next-key locks; - SELECT ... FOR UPDATE and ... - LOCK IN SHARE MODE only lock - the index records, NOT the - gaps before them, and thus - allow free inserting; - each consistent read reads its - own snapshot */ - -#define TRX_ISO_REPEATABLE_READ 2 /* this is the default; - all consistent reads in the - same trx read the same - snapshot; - full next-key locking used - in locking reads to block - insertions into gaps */ - -#define TRX_ISO_SERIALIZABLE 3 /* all plain SELECTs are - converted to LOCK IN SHARE - MODE reads */ - -/* Treatment of duplicate values (trx->duplicates; for example, in inserts). -Multiple flags can be combined with bitwise OR. */ -#define TRX_DUP_IGNORE 1 /* duplicate rows are to be updated */ -#define TRX_DUP_REPLACE 2 /* duplicate rows are to be replaced */ - - -/* Types of a trx signal */ -#define TRX_SIG_NO_SIGNAL 0 -#define TRX_SIG_TOTAL_ROLLBACK 1 -#define TRX_SIG_ROLLBACK_TO_SAVEPT 2 -#define TRX_SIG_COMMIT 3 -#define TRX_SIG_BREAK_EXECUTION 5 - -/* Sender types of a signal */ -#define TRX_SIG_SELF 0 /* sent by the session itself, or - by an error occurring within this - session */ -#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which - must hold rights to this) */ - -/* Flag bits for trx_struct.active_flag */ -#define TRX_ACTIVE_IN_MYSQL (1<<0) -#define TRX_ACTIVE_COMMIT_ORDERED (1<<1) - -/** Commit node states */ -enum commit_node_state { - COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to - the transaction */ - COMMIT_NODE_WAIT /*!< commit signal sent to the transaction, - waiting for completion */ -}; - -/** Commit command node in a query graph */ -struct commit_node_t{ - que_common_t common; /*!< node type: QUE_NODE_COMMIT */ - enum commit_node_state - state; /*!< node execution state */ -}; - - -/** Test if trx->mutex is owned. */ -#define trx_mutex_own(t) mutex_own(&t->mutex) - -/** Acquire the trx->mutex. */ -#define trx_mutex_enter(t) do { \ - mutex_enter(&t->mutex); \ -} while (0) - -/** Release the trx->mutex. */ -#define trx_mutex_exit(t) do { \ - mutex_exit(&t->mutex); \ -} while (0) - -/** @brief The latch protecting the adaptive search system - -This latch protects the -(1) hash index; -(2) columns of a record to which we have a pointer in the hash index; - -but does NOT protect: - -(3) next record offset field in a record; -(4) next or previous records on the same page. - -Bear in mind (3) and (4) when using the hash index. -*/ -extern prio_rw_lock_t* btr_search_latch_arr; - -#ifndef UNIV_NONINL -#include "trx0trx.ic" -#endif -#endif /* !UNIV_HOTBACKUP */ - -#endif diff --git a/storage/xtradb/include/trx0trx.ic b/storage/xtradb/include/trx0trx.ic deleted file mode 100644 index eb7d62d9cad..00000000000 --- a/storage/xtradb/include/trx0trx.ic +++ /dev/null @@ -1,184 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0trx.ic -The transaction - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -/**********************************************************************//** -Determines if a transaction is in the given state. -The caller must hold trx_sys->mutex, or it must be the thread -that is serving a running transaction. -A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list -unless it is a non-locking autocommit read only transaction, which is only -in trx_sys->mysql_trx_list. -@return TRUE if trx->state == state */ -UNIV_INLINE -ibool -trx_state_eq( -/*=========*/ - const trx_t* trx, /*!< in: transaction */ - trx_state_t state, /*!< in: state; - if state != TRX_STATE_NOT_STARTED - asserts that - trx->state != TRX_STATE_NOT_STARTED */ - bool relaxed) - /*!< in: whether to allow - trx->state == TRX_STATE_NOT_STARTED - after an error has been reported */ -{ -#ifdef UNIV_DEBUG - switch (trx->state) { - case TRX_STATE_PREPARED: - ut_ad(!trx_is_autocommit_non_locking(trx)); - return(trx->state == state); - - case TRX_STATE_ACTIVE: - assert_trx_nonlocking_or_in_list(trx); - return(state == trx->state); - - case TRX_STATE_COMMITTED_IN_MEMORY: - assert_trx_in_list(trx); - return(state == trx->state); - - case TRX_STATE_NOT_STARTED: - /* This state is not allowed for running transactions. */ - ut_a(state == TRX_STATE_NOT_STARTED - || (relaxed - && thd_get_error_number(trx->mysql_thd))); - ut_ad(!trx->in_rw_trx_list); - ut_ad(!trx->in_ro_trx_list); - return(state == trx->state); - } - ut_error; -#endif /* UNIV_DEBUG */ - return(trx->state == state); -} - -/****************************************************************//** -Retrieves the error_info field from a trx. -@return the error info */ -UNIV_INLINE -const dict_index_t* -trx_get_error_info( -/*===============*/ - const trx_t* trx) /*!< in: trx object */ -{ - return(trx->error_info); -} - -/*******************************************************************//** -Retrieves transaction's que state in a human readable string. The string -should not be free()'d or modified. -@return string in the data segment */ -UNIV_INLINE -const char* -trx_get_que_state_str( -/*==================*/ - const trx_t* trx) /*!< in: transaction */ -{ - /* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */ - switch (trx->lock.que_state) { - case TRX_QUE_RUNNING: - return("RUNNING"); - case TRX_QUE_LOCK_WAIT: - return("LOCK WAIT"); - case TRX_QUE_ROLLING_BACK: - return("ROLLING BACK"); - case TRX_QUE_COMMITTING: - return("COMMITTING"); - default: - return("UNKNOWN"); - } -} - -/**********************************************************************//** -Determine if a transaction is a dictionary operation. -@return dictionary operation mode */ -UNIV_INLINE -enum trx_dict_op_t -trx_get_dict_operation( -/*===================*/ - const trx_t* trx) /*!< in: transaction */ -{ - trx_dict_op_t op = static_cast<trx_dict_op_t>(trx->dict_operation); - -#ifdef UNIV_DEBUG - switch (op) { - case TRX_DICT_OP_NONE: - case TRX_DICT_OP_TABLE: - case TRX_DICT_OP_INDEX: - return(op); - } - ut_error; -#endif /* UNIV_DEBUG */ - return(op); -} -/**********************************************************************//** -Flag a transaction a dictionary operation. */ -UNIV_INLINE -void -trx_set_dict_operation( -/*===================*/ - trx_t* trx, /*!< in/out: transaction */ - enum trx_dict_op_t op) /*!< in: operation, not - TRX_DICT_OP_NONE */ -{ -#ifdef UNIV_DEBUG - enum trx_dict_op_t old_op = trx_get_dict_operation(trx); - - switch (op) { - case TRX_DICT_OP_NONE: - ut_error; - break; - case TRX_DICT_OP_TABLE: - switch (old_op) { - case TRX_DICT_OP_NONE: - case TRX_DICT_OP_INDEX: - case TRX_DICT_OP_TABLE: - goto ok; - } - ut_error; - break; - case TRX_DICT_OP_INDEX: - ut_ad(old_op == TRX_DICT_OP_NONE); - break; - } -ok: -#endif /* UNIV_DEBUG */ - - trx->ddl = true; - trx->dict_operation = op; -} - -/********************************************************************//** -In XtraDB it is impossible for a transaction to own a search latch outside of -InnoDB code, so there is nothing to release on demand. We keep this function to -simplify maintenance.*/ -UNIV_INLINE -void -trx_search_latch_release_if_reserved( -/*=================================*/ - trx_t* trx MY_ATTRIBUTE((unused))) /*!< in: transaction */ -{ - ut_ad(!trx->has_search_latch); -} diff --git a/storage/xtradb/include/trx0types.h b/storage/xtradb/include/trx0types.h deleted file mode 100644 index 7ca95131328..00000000000 --- a/storage/xtradb/include/trx0types.h +++ /dev/null @@ -1,147 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0types.h -Transaction system global type definitions - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0types_h -#define trx0types_h - -#include "ut0byte.h" - -/** printf(3) format used for printing DB_TRX_ID and other system fields */ -#define TRX_ID_FMT IB_ID_FMT - -/** maximum length that a formatted trx_t::id could take, not including -the terminating NUL character. */ -#define TRX_ID_MAX_LEN 17 - -/** Transaction execution states when trx->state == TRX_STATE_ACTIVE */ -enum trx_que_t { - TRX_QUE_RUNNING, /*!< transaction is running */ - TRX_QUE_LOCK_WAIT, /*!< transaction is waiting for - a lock */ - TRX_QUE_ROLLING_BACK, /*!< transaction is rolling back */ - TRX_QUE_COMMITTING /*!< transaction is committing */ -}; - -/** Transaction states (trx_t::state) */ -enum trx_state_t { - TRX_STATE_NOT_STARTED, - TRX_STATE_ACTIVE, - TRX_STATE_PREPARED, /* Support for 2PC/XA */ - TRX_STATE_COMMITTED_IN_MEMORY -}; - -/** Type of data dictionary operation */ -enum trx_dict_op_t { - /** The transaction is not modifying the data dictionary. */ - TRX_DICT_OP_NONE = 0, - /** The transaction is creating a table or an index, or - dropping a table. The table must be dropped in crash - recovery. This and TRX_DICT_OP_NONE are the only possible - operation modes in crash recovery. */ - TRX_DICT_OP_TABLE = 1, - /** The transaction is creating or dropping an index in an - existing table. In crash recovery, the data dictionary - must be locked, but the table must not be dropped. */ - TRX_DICT_OP_INDEX = 2 -}; - -/** Memory objects */ -/* @{ */ -/** Transaction */ -struct trx_t; -/** The locks and state of an active transaction */ -struct trx_lock_t; -/** Transaction system */ -struct trx_sys_t; -/** Signal */ -struct trx_sig_t; -/** Rollback segment */ -struct trx_rseg_t; -/** Transaction undo log */ -struct trx_undo_t; -/** Array of undo numbers of undo records being rolled back or purged */ -struct trx_undo_arr_t; -/** A cell of trx_undo_arr_t */ -struct trx_undo_inf_t; -/** The control structure used in the purge operation */ -struct trx_purge_t; -/** Rollback command node in a query graph */ -struct roll_node_t; -/** Commit command node in a query graph */ -struct commit_node_t; -/** SAVEPOINT command node in a query graph */ -struct trx_named_savept_t; -/* @} */ - -/** Rollback contexts */ -enum trx_rb_ctx { - RB_NONE = 0, /*!< no rollback */ - RB_NORMAL, /*!< normal rollback */ - RB_RECOVERY_PURGE_REC, - /*!< rolling back an incomplete transaction, - in crash recovery, rolling back an - INSERT that was performed by updating a - delete-marked record; if the delete-marked record - no longer exists in an active read view, it will - be purged */ - RB_RECOVERY /*!< rolling back an incomplete transaction, - in crash recovery */ -}; - -/** Row identifier (DB_ROW_ID, DATA_ROW_ID) */ -typedef ib_id_t row_id_t; -/** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */ -typedef ib_id_t trx_id_t; -/** Rollback pointer (DB_ROLL_PTR, DATA_ROLL_PTR) */ -typedef ib_id_t roll_ptr_t; -/** Undo number */ -typedef ib_id_t undo_no_t; - -/** Maximum transaction identifier */ -#define TRX_ID_MAX IB_ID_MAX - -/** Transaction savepoint */ -struct trx_savept_t{ - undo_no_t least_undo_no; /*!< least undo number to undo */ -}; - -/** File objects */ -/* @{ */ -/** Transaction system header */ -typedef byte trx_sysf_t; -/** Rollback segment header */ -typedef byte trx_rsegf_t; -/** Undo segment header */ -typedef byte trx_usegf_t; -/** Undo log header */ -typedef byte trx_ulogf_t; -/** Undo log page header */ -typedef byte trx_upagef_t; - -/** Undo log record */ -typedef byte trx_undo_rec_t; -/* @} */ - -#endif diff --git a/storage/xtradb/include/trx0undo.h b/storage/xtradb/include/trx0undo.h deleted file mode 100644 index 190308112ba..00000000000 --- a/storage/xtradb/include/trx0undo.h +++ /dev/null @@ -1,595 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0undo.h -Transaction undo log - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0undo_h -#define trx0undo_h - -#include "univ.i" -#include "trx0types.h" -#include "mtr0mtr.h" -#include "trx0sys.h" -#include "page0types.h" -#include "trx0xa.h" - -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Builds a roll pointer. -@return roll pointer */ -UNIV_INLINE -roll_ptr_t -trx_undo_build_roll_ptr( -/*====================*/ - ibool is_insert, /*!< in: TRUE if insert undo log */ - ulint rseg_id, /*!< in: rollback segment id */ - ulint page_no, /*!< in: page number */ - ulint offset); /*!< in: offset of the undo entry within page */ -/***********************************************************************//** -Decodes a roll pointer. */ -UNIV_INLINE -void -trx_undo_decode_roll_ptr( -/*=====================*/ - roll_ptr_t roll_ptr, /*!< in: roll pointer */ - ibool* is_insert, /*!< out: TRUE if insert undo log */ - ulint* rseg_id, /*!< out: rollback segment id */ - ulint* page_no, /*!< out: page number */ - ulint* offset); /*!< out: offset of the undo - entry within page */ -/***********************************************************************//** -Returns TRUE if the roll pointer is of the insert type. -@return TRUE if insert undo log */ -UNIV_INLINE -ibool -trx_undo_roll_ptr_is_insert( -/*========================*/ - roll_ptr_t roll_ptr); /*!< in: roll pointer */ -/***********************************************************************//** -Returns true if the record is of the insert type. -@return true if the record was freshly inserted (not updated). */ -UNIV_INLINE -bool -trx_undo_trx_id_is_insert( -/*======================*/ - const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */ - MY_ATTRIBUTE((nonnull, pure, warn_unused_result)); -#endif /* !UNIV_HOTBACKUP */ -/*****************************************************************//** -Writes a roll ptr to an index page. In case that the size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_roll_ptr( -/*===============*/ - byte* ptr, /*!< in: pointer to memory where - written */ - roll_ptr_t roll_ptr); /*!< in: roll ptr */ -/*****************************************************************//** -Reads a roll ptr from an index page. In case that the roll ptr size -changes in some future version, this function should be used instead of -mach_read_... -@return roll ptr */ -UNIV_INLINE -roll_ptr_t -trx_read_roll_ptr( -/*==============*/ - const byte* ptr); /*!< in: pointer to memory from where to read */ -#ifndef UNIV_HOTBACKUP -/******************************************************************//** -Gets an undo log page and x-latches it. -@return pointer to page x-latched */ -UNIV_INLINE -page_t* -trx_undo_page_get( -/*==============*/ - ulint space, /*!< in: space where placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - mtr_t* mtr); /*!< in: mtr */ -/******************************************************************//** -Gets an undo log page and s-latches it. -@return pointer to page s-latched */ -UNIV_INLINE -page_t* -trx_undo_page_get_s_latched( -/*========================*/ - ulint space, /*!< in: space where placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - mtr_t* mtr); /*!< in: mtr */ -/******************************************************************//** -Returns the previous undo record on the page in the specified log, or -NULL if none exists. -@return pointer to record, NULL if none */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_prev_rec( -/*=======================*/ - trx_undo_rec_t* rec, /*!< in: undo log record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset);/*!< in: undo log header offset on page */ -/******************************************************************//** -Returns the next undo log record on the page in the specified log, or -NULL if none exists. -@return pointer to record, NULL if none */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_next_rec( -/*=======================*/ - trx_undo_rec_t* rec, /*!< in: undo log record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset);/*!< in: undo log header offset on page */ -/******************************************************************//** -Returns the last undo record on the page in the specified undo log, or -NULL if none exists. -@return pointer to record, NULL if none */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_last_rec( -/*=======================*/ - page_t* undo_page,/*!< in: undo log page */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset); /*!< in: undo log header offset on page */ -/******************************************************************//** -Returns the first undo record on the page in the specified undo log, or -NULL if none exists. -@return pointer to record, NULL if none */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_first_rec( -/*========================*/ - page_t* undo_page,/*!< in: undo log page */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset);/*!< in: undo log header offset on page */ -/***********************************************************************//** -Gets the previous record in an undo log. -@return undo log record, the page s-latched, NULL if none */ -UNIV_INTERN -trx_undo_rec_t* -trx_undo_get_prev_rec( -/*==================*/ - trx_undo_rec_t* rec, /*!< in: undo record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset, /*!< in: undo log header offset on page */ - bool shared, /*!< in: true=S-latch, false=X-latch */ - mtr_t* mtr); /*!< in: mtr */ -/***********************************************************************//** -Gets the next record in an undo log. -@return undo log record, the page s-latched, NULL if none */ -UNIV_INTERN -trx_undo_rec_t* -trx_undo_get_next_rec( -/*==================*/ - trx_undo_rec_t* rec, /*!< in: undo record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset, /*!< in: undo log header offset on page */ - mtr_t* mtr); /*!< in: mtr */ -/***********************************************************************//** -Gets the first record in an undo log. -@return undo log record, the page latched, NULL if none */ -UNIV_INTERN -trx_undo_rec_t* -trx_undo_get_first_rec( -/*===================*/ - ulint space, /*!< in: undo log header space */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset, /*!< in: undo log header offset on page */ - ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */ - mtr_t* mtr); /*!< in: mtr */ -/********************************************************************//** -Tries to add a page to the undo log segment where the undo log is placed. -@return X-latched block if success, else NULL */ -UNIV_INTERN -buf_block_t* -trx_undo_add_page( -/*==============*/ - trx_t* trx, /*!< in: transaction */ - trx_undo_t* undo, /*!< in: undo log memory object */ - mtr_t* mtr) /*!< in: mtr which does not have a latch to any - undo log page; the caller must have reserved - the rollback segment mutex */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/********************************************************************//** -Frees the last undo log page. -The caller must hold the rollback segment mutex. */ -UNIV_INTERN -void -trx_undo_free_last_page_func( -/*==========================*/ -#ifdef UNIV_DEBUG - const trx_t* trx, /*!< in: transaction */ -#endif /* UNIV_DEBUG */ - trx_undo_t* undo, /*!< in/out: undo log memory copy */ - mtr_t* mtr) /*!< in/out: mini-transaction which does not - have a latch to any undo log page or which - has allocated the undo log page */ - MY_ATTRIBUTE((nonnull)); -#ifdef UNIV_DEBUG -# define trx_undo_free_last_page(trx,undo,mtr) \ - trx_undo_free_last_page_func(trx,undo,mtr) -#else /* UNIV_DEBUG */ -# define trx_undo_free_last_page(trx,undo,mtr) \ - trx_undo_free_last_page_func(undo,mtr) -#endif /* UNIV_DEBUG */ - -/***********************************************************************//** -Truncates an undo log from the end. This function is used during a rollback -to free space from an undo log. */ -UNIV_INTERN -void -trx_undo_truncate_end( -/*=======================*/ - trx_t* trx, /*!< in: transaction whose undo log it is */ - trx_undo_t* undo, /*!< in/out: undo log */ - undo_no_t limit) /*!< in: all undo records with undo number - >= this value should be truncated */ - MY_ATTRIBUTE((nonnull)); - -/***********************************************************************//** -Truncates an undo log from the start. This function is used during a purge -operation. */ -UNIV_INTERN -void -trx_undo_truncate_start( -/*====================*/ - trx_rseg_t* rseg, /*!< in: rollback segment */ - ulint space, /*!< in: space id of the log */ - ulint hdr_page_no, /*!< in: header page number */ - ulint hdr_offset, /*!< in: header offset on the page */ - undo_no_t limit); /*!< in: all undo pages with - undo numbers < this value - should be truncated; NOTE that - the function only frees whole - pages; the header page is not - freed, but emptied, if all the - records there are < limit */ -/********************************************************************//** -Initializes the undo log lists for a rollback segment memory copy. -This function is only called when the database is started or a new -rollback segment created. -@return the combined size of undo log segments in pages */ -UNIV_INTERN -ulint -trx_undo_lists_init( -/*================*/ - trx_rseg_t* rseg); /*!< in: rollback segment memory object */ -/**********************************************************************//** -Assigns an undo log for a transaction. A new undo log is created or a cached -undo log reused. -@return DB_SUCCESS if undo log assign successful, possible error codes -are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY -DB_OUT_OF_MEMORY */ -UNIV_INTERN -dberr_t -trx_undo_assign_undo( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************************//** -Sets the state of the undo log segment at a transaction finish. -@return undo log segment header page, x-latched */ -UNIV_INTERN -page_t* -trx_undo_set_state_at_finish( -/*=========================*/ - trx_undo_t* undo, /*!< in: undo log memory copy */ - mtr_t* mtr); /*!< in: mtr */ -/******************************************************************//** -Sets the state of the undo log segment at a transaction prepare. -@return undo log segment header page, x-latched */ -UNIV_INTERN -page_t* -trx_undo_set_state_at_prepare( -/*==========================*/ - trx_t* trx, /*!< in: transaction */ - trx_undo_t* undo, /*!< in: undo log memory copy */ - mtr_t* mtr); /*!< in: mtr */ - -/**********************************************************************//** -Adds the update undo log header as the first in the history list, and -frees the memory object, or puts it to the list of cached update undo log -segments. */ -UNIV_INTERN -void -trx_undo_update_cleanup( -/*====================*/ - trx_t* trx, /*!< in: trx owning the update undo log */ - page_t* undo_page, /*!< in: update undo log header page, - x-latched */ - mtr_t* mtr); /*!< in: mtr */ -/******************************************************************//** -Frees or caches an insert undo log after a transaction commit or rollback. -Knowledge of inserts is not needed after a commit or rollback, therefore -the data can be discarded. */ -UNIV_INTERN -void -trx_undo_insert_cleanup( -/*====================*/ - trx_t* trx); /*!< in: transaction handle */ - -/********************************************************************//** -At shutdown, frees the undo logs of a PREPARED transaction. */ -UNIV_INTERN -void -trx_undo_free_prepared( -/*===================*/ - trx_t* trx) /*!< in/out: PREPARED transaction */ - UNIV_COLD MY_ATTRIBUTE((nonnull)); -#endif /* !UNIV_HOTBACKUP */ -/***********************************************************//** -Parses the redo log entry of an undo log page initialization. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_page_init( -/*=====================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr); /*!< in: mtr or NULL */ -/***********************************************************//** -Parses the redo log entry of an undo log page header create or reuse. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_page_header( -/*=======================*/ - ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr); /*!< in: mtr or NULL */ -/***********************************************************//** -Parses the redo log entry of an undo log page header discard. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_discard_latest( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr); /*!< in: mtr or NULL */ -/************************************************************************ -Frees an undo log memory copy. */ -UNIV_INTERN -void -trx_undo_mem_free( -/*==============*/ - trx_undo_t* undo); /* in: the undo object to be freed */ - -/* Types of an undo log segment */ -#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */ -#define TRX_UNDO_UPDATE 2 /* contains undo entries for updates - and delete markings: in short, - modifys (the name 'UPDATE' is a - historical relic) */ -/* States of an undo log segment */ -#define TRX_UNDO_ACTIVE 1 /* contains an undo log of an active - transaction */ -#define TRX_UNDO_CACHED 2 /* cached for quick reuse */ -#define TRX_UNDO_TO_FREE 3 /* insert undo segment can be freed */ -#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be - reused: it can be freed in purge when - all undo data in it is removed */ -#define TRX_UNDO_PREPARED 5 /* contains an undo log of an - prepared transaction */ - -#ifndef UNIV_HOTBACKUP -/** Transaction undo log memory object; this is protected by the undo_mutex -in the corresponding transaction object */ - -struct trx_undo_t{ - /*-----------------------------*/ - ulint id; /*!< undo log slot number within the - rollback segment */ - ulint type; /*!< TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - ulint state; /*!< state of the corresponding undo log - segment */ - ibool del_marks; /*!< relevant only in an update undo - log: this is TRUE if the transaction may - have delete marked records, because of - a delete of a row or an update of an - indexed field; purge is then - necessary; also TRUE if the transaction - has updated an externally stored - field */ - trx_id_t trx_id; /*!< id of the trx assigned to the undo - log */ - XID xid; /*!< X/Open XA transaction - identification */ - ibool dict_operation; /*!< TRUE if a dict operation trx */ - table_id_t table_id; /*!< if a dict operation, then the table - id */ - trx_rseg_t* rseg; /*!< rseg where the undo log belongs */ - /*-----------------------------*/ - ulint space; /*!< space id where the undo log - placed */ - ulint zip_size; /*!< compressed page size of space - in bytes, or 0 for uncompressed */ - ulint hdr_page_no; /*!< page number of the header page in - the undo log */ - ulint hdr_offset; /*!< header offset of the undo log on - the page */ - ulint last_page_no; /*!< page number of the last page in the - undo log; this may differ from - top_page_no during a rollback */ - ulint size; /*!< current size in pages */ - /*-----------------------------*/ - ulint empty; /*!< TRUE if the stack of undo log - records is currently empty */ - ulint top_page_no; /*!< page number where the latest undo - log record was catenated; during - rollback the page from which the latest - undo record was chosen */ - ulint top_offset; /*!< offset of the latest undo record, - i.e., the topmost element in the undo - log if we think of it as a stack */ - undo_no_t top_undo_no; /*!< undo number of the latest record */ - buf_block_t* guess_block; /*!< guess for the buffer block where - the top page might reside */ - /*-----------------------------*/ - UT_LIST_NODE_T(trx_undo_t) undo_list; - /*!< undo log objects in the rollback - segment are chained into lists */ -}; -#endif /* !UNIV_HOTBACKUP */ - -/** The offset of the undo log page header on pages of the undo log */ -#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA -/*-------------------------------------------------------------*/ -/** Transaction undo log page header offsets */ -/* @{ */ -#define TRX_UNDO_PAGE_TYPE 0 /*!< TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ -#define TRX_UNDO_PAGE_START 2 /*!< Byte offset where the undo log - records for the LATEST transaction - start on this page (remember that - in an update undo log, the first page - can contain several undo logs) */ -#define TRX_UNDO_PAGE_FREE 4 /*!< On each page of the undo log this - field contains the byte offset of the - first free byte on the page */ -#define TRX_UNDO_PAGE_NODE 6 /*!< The file list node in the chain - of undo log pages */ -/*-------------------------------------------------------------*/ -#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE) - /*!< Size of the transaction undo - log page header, in bytes */ -/* @} */ - -/** An update undo segment with just one page can be reused if it has -at most this many bytes used; we must leave space at least for one new undo -log header on the page */ - -#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4) - -/* An update undo log segment may contain several undo logs on its first page -if the undo logs took so little space that the segment could be cached and -reused. All the undo log headers are then on the first page, and the last one -owns the undo log records on subsequent pages if the segment is bigger than -one page. If an undo log is stored in a segment, then on the first page it is -allowed to have zero undo records, but if the segment extends to several -pages, then all the rest of the pages must contain at least one undo log -record. */ - -/** The offset of the undo log segment header on the first page of the undo -log segment */ - -#define TRX_UNDO_SEG_HDR (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE) -/** Undo log segment header */ -/* @{ */ -/*-------------------------------------------------------------*/ -#define TRX_UNDO_STATE 0 /*!< TRX_UNDO_ACTIVE, ... */ -#define TRX_UNDO_LAST_LOG 2 /*!< Offset of the last undo log header - on the segment header page, 0 if - none */ -#define TRX_UNDO_FSEG_HEADER 4 /*!< Header for the file segment which - the undo log segment occupies */ -#define TRX_UNDO_PAGE_LIST (4 + FSEG_HEADER_SIZE) - /*!< Base node for the list of pages in - the undo log segment; defined only on - the undo log segment's first page */ -/*-------------------------------------------------------------*/ -/** Size of the undo log segment header */ -#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE) -/* @} */ - - -/** The undo log header. There can be several undo log headers on the first -page of an update undo log segment. */ -/* @{ */ -/*-------------------------------------------------------------*/ -#define TRX_UNDO_TRX_ID 0 /*!< Transaction id */ -#define TRX_UNDO_TRX_NO 8 /*!< Transaction number of the - transaction; defined only if the log - is in a history list */ -#define TRX_UNDO_DEL_MARKS 16 /*!< Defined only in an update undo - log: TRUE if the transaction may have - done delete markings of records, and - thus purge is necessary */ -#define TRX_UNDO_LOG_START 18 /*!< Offset of the first undo log record - of this log on the header page; purge - may remove undo log record from the - log start, and therefore this is not - necessarily the same as this log - header end offset */ -#define TRX_UNDO_XID_EXISTS 20 /*!< TRUE if undo log header includes - X/Open XA transaction identification - XID */ -#define TRX_UNDO_DICT_TRANS 21 /*!< TRUE if the transaction is a table - create, index create, or drop - transaction: in recovery - the transaction cannot be rolled back - in the usual way: a 'rollback' rather - means dropping the created or dropped - table, if it still exists */ -#define TRX_UNDO_TABLE_ID 22 /*!< Id of the table if the preceding - field is TRUE */ -#define TRX_UNDO_NEXT_LOG 30 /*!< Offset of the next undo log header - on this page, 0 if none */ -#define TRX_UNDO_PREV_LOG 32 /*!< Offset of the previous undo log - header on this page, 0 if none */ -#define TRX_UNDO_HISTORY_NODE 34 /*!< If the log is put to the history - list, the file list node is here */ -/*-------------------------------------------------------------*/ -/** Size of the undo log header without XID information */ -#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE) - -/* Note: the writing of the undo log old header is coded by a log record -MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the -header is logged separately. In this sense, the XID is not really a member -of the undo log header. TODO: do not append the XID to the log header if XA -is not needed by the user. The XID wastes about 150 bytes of space in every -undo log. In the history list we may have millions of undo logs, which means -quite a large overhead. */ - -/** X/Open XA Transaction Identification (XID) */ -/* @{ */ -/** xid_t::formatID */ -#define TRX_UNDO_XA_FORMAT (TRX_UNDO_LOG_OLD_HDR_SIZE) -/** xid_t::gtrid_length */ -#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4) -/** xid_t::bqual_length */ -#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4) -/** Distributed transaction identifier data */ -#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4) -/*--------------------------------------------------------------*/ -#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE) - /*!< Total size of the undo log header - with the XA XID */ -/* @} */ - -#ifndef UNIV_NONINL -#include "trx0undo.ic" -#endif - -#endif diff --git a/storage/xtradb/include/trx0undo.ic b/storage/xtradb/include/trx0undo.ic deleted file mode 100644 index 577759d6c3d..00000000000 --- a/storage/xtradb/include/trx0undo.ic +++ /dev/null @@ -1,363 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/trx0undo.ic -Transaction undo log - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "data0type.h" -#include "page0page.h" - -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Builds a roll pointer. -@return roll pointer */ -UNIV_INLINE -roll_ptr_t -trx_undo_build_roll_ptr( -/*====================*/ - ibool is_insert, /*!< in: TRUE if insert undo log */ - ulint rseg_id, /*!< in: rollback segment id */ - ulint page_no, /*!< in: page number */ - ulint offset) /*!< in: offset of the undo entry within page */ -{ - roll_ptr_t roll_ptr; -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif - ut_ad(is_insert == 0 || is_insert == 1); - ut_ad(rseg_id < TRX_SYS_N_RSEGS); - ut_ad(offset < 65536); - - roll_ptr = (roll_ptr_t) is_insert << 55 - | (roll_ptr_t) rseg_id << 48 - | (roll_ptr_t) page_no << 16 - | offset; - return(roll_ptr); -} - -/***********************************************************************//** -Decodes a roll pointer. */ -UNIV_INLINE -void -trx_undo_decode_roll_ptr( -/*=====================*/ - roll_ptr_t roll_ptr, /*!< in: roll pointer */ - ibool* is_insert, /*!< out: TRUE if insert undo log */ - ulint* rseg_id, /*!< out: rollback segment id */ - ulint* page_no, /*!< out: page number */ - ulint* offset) /*!< out: offset of the undo - entry within page */ -{ -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif -#if TRUE != 1 -# error "TRUE != 1" -#endif - ut_ad(roll_ptr < (1ULL << 56)); - *offset = (ulint) roll_ptr & 0xFFFF; - roll_ptr >>= 16; - *page_no = (ulint) roll_ptr & 0xFFFFFFFF; - roll_ptr >>= 32; - *rseg_id = (ulint) roll_ptr & 0x7F; - roll_ptr >>= 7; - *is_insert = (ibool) roll_ptr; /* TRUE==1 */ -} - -/***********************************************************************//** -Returns TRUE if the roll pointer is of the insert type. -@return TRUE if insert undo log */ -UNIV_INLINE -ibool -trx_undo_roll_ptr_is_insert( -/*========================*/ - roll_ptr_t roll_ptr) /*!< in: roll pointer */ -{ -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif -#if TRUE != 1 -# error "TRUE != 1" -#endif - ut_ad(roll_ptr < (1ULL << 56)); - return((ibool) (roll_ptr >> 55)); -} - -/***********************************************************************//** -Returns true if the record is of the insert type. -@return true if the record was freshly inserted (not updated). */ -UNIV_INLINE -bool -trx_undo_trx_id_is_insert( -/*======================*/ - const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */ -{ -#if DATA_TRX_ID + 1 != DATA_ROLL_PTR -# error -#endif - return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7)); -} -#endif /* !UNIV_HOTBACKUP */ - -/*****************************************************************//** -Writes a roll ptr to an index page. In case that the size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_roll_ptr( -/*===============*/ - byte* ptr, /*!< in: pointer to memory where - written */ - roll_ptr_t roll_ptr) /*!< in: roll ptr */ -{ -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif - mach_write_to_7(ptr, roll_ptr); -} - -/*****************************************************************//** -Reads a roll ptr from an index page. In case that the roll ptr size -changes in some future version, this function should be used instead of -mach_read_... -@return roll ptr */ -UNIV_INLINE -roll_ptr_t -trx_read_roll_ptr( -/*==============*/ - const byte* ptr) /*!< in: pointer to memory from where to read */ -{ -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif - return(mach_read_from_7(ptr)); -} - -#ifndef UNIV_HOTBACKUP -/******************************************************************//** -Gets an undo log page and x-latches it. -@return pointer to page x-latched */ -UNIV_INLINE -page_t* -trx_undo_page_get( -/*==============*/ - ulint space, /*!< in: space where placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block = buf_page_get(space, zip_size, page_no, - RW_X_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); - - return(buf_block_get_frame(block)); -} - -/******************************************************************//** -Gets an undo log page and s-latches it. -@return pointer to page s-latched */ -UNIV_INLINE -page_t* -trx_undo_page_get_s_latched( -/*========================*/ - ulint space, /*!< in: space where placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number */ - mtr_t* mtr) /*!< in: mtr */ -{ - buf_block_t* block = buf_page_get(space, zip_size, page_no, - RW_S_LATCH, mtr); - buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); - - return(buf_block_get_frame(block)); -} - -/******************************************************************//** -Returns the start offset of the undo log records of the specified undo -log on the page. -@return start offset */ -UNIV_INLINE -ulint -trx_undo_page_get_start( -/*====================*/ - page_t* undo_page,/*!< in: undo log page */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset) /*!< in: undo log header offset on page */ -{ - ulint start; - - if (page_no == page_get_page_no(undo_page)) { - - start = mach_read_from_2(offset + undo_page - + TRX_UNDO_LOG_START); - } else { - start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE; - } - - return(start); -} - -/******************************************************************//** -Returns the end offset of the undo log records of the specified undo -log on the page. -@return end offset */ -UNIV_INLINE -ulint -trx_undo_page_get_end( -/*==================*/ - page_t* undo_page,/*!< in: undo log page */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset) /*!< in: undo log header offset on page */ -{ - trx_ulogf_t* log_hdr; - ulint end; - - if (page_no == page_get_page_no(undo_page)) { - - log_hdr = undo_page + offset; - - end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG); - - if (end == 0) { - end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - } - } else { - end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - } - - return(end); -} - -/******************************************************************//** -Returns the previous undo record on the page in the specified log, or -NULL if none exists. -@return pointer to record, NULL if none */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_prev_rec( -/*=======================*/ - trx_undo_rec_t* rec, /*!< in: undo log record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset) /*!< in: undo log header offset on page */ -{ - page_t* undo_page; - ulint start; - - undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE); - - start = trx_undo_page_get_start(undo_page, page_no, offset); - - if (start + undo_page == rec) { - - return(NULL); - } - - return(undo_page + mach_read_from_2(rec - 2)); -} - -/******************************************************************//** -Returns the next undo log record on the page in the specified log, or -NULL if none exists. -@return pointer to record, NULL if none */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_next_rec( -/*=======================*/ - trx_undo_rec_t* rec, /*!< in: undo log record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset) /*!< in: undo log header offset on page */ -{ - page_t* undo_page; - ulint end; - ulint next; - - undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE); - - end = trx_undo_page_get_end(undo_page, page_no, offset); - - next = mach_read_from_2(rec); - - if (next == end) { - - return(NULL); - } - - return(undo_page + next); -} - -/******************************************************************//** -Returns the last undo record on the page in the specified undo log, or -NULL if none exists. -@return pointer to record, NULL if none */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_last_rec( -/*=======================*/ - page_t* undo_page,/*!< in: undo log page */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset) /*!< in: undo log header offset on page */ -{ - ulint start; - ulint end; - - start = trx_undo_page_get_start(undo_page, page_no, offset); - end = trx_undo_page_get_end(undo_page, page_no, offset); - - if (start == end) { - - return(NULL); - } - - return(undo_page + mach_read_from_2(undo_page + end - 2)); -} - -/******************************************************************//** -Returns the first undo record on the page in the specified undo log, or -NULL if none exists. -@return pointer to record, NULL if none */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_first_rec( -/*========================*/ - page_t* undo_page,/*!< in: undo log page */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset) /*!< in: undo log header offset on page */ -{ - ulint start; - ulint end; - - start = trx_undo_page_get_start(undo_page, page_no, offset); - end = trx_undo_page_get_end(undo_page, page_no, offset); - - if (start == end) { - - return(NULL); - } - - return(undo_page + start); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/trx0xa.h b/storage/xtradb/include/trx0xa.h deleted file mode 100644 index 4d5adc68dcd..00000000000 --- a/storage/xtradb/include/trx0xa.h +++ /dev/null @@ -1,61 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/* - * Start of xa.h header - * - * Define a symbol to prevent multiple inclusions of this header file - */ -#ifndef XA_H -#define XA_H - -#include "handler.h" - -/* - * Transaction branch identification: XID and NULLXID: - */ -#ifndef XIDDATASIZE - -/** Sizes of transaction identifier */ -#define XIDDATASIZE 128 /*!< maximum size of a transaction - identifier, in bytes */ -#define MAXGTRIDSIZE 64 /*!< maximum size in bytes of gtrid */ -#define MAXBQUALSIZE 64 /*!< maximum size in bytes of bqual */ - -#endif -/** X/Open XA distributed transaction status codes */ -/* @{ */ -#define XA_OK 0 /*!< normal execution */ -#define XAER_ASYNC -2 /*!< asynchronous operation already - outstanding */ -#define XAER_RMERR -3 /*!< a resource manager error - occurred in the transaction - branch */ -#define XAER_NOTA -4 /*!< the XID is not valid */ -#define XAER_INVAL -5 /*!< invalid arguments were given */ -#define XAER_PROTO -6 /*!< routine invoked in an improper - context */ -#define XAER_RMFAIL -7 /*!< resource manager unavailable */ -#define XAER_DUPID -8 /*!< the XID already exists */ -#define XAER_OUTSIDE -9 /*!< resource manager doing - work outside transaction */ -/* @} */ -#endif /* ifndef XA_H */ -/* - * End of xa.h header - */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i deleted file mode 100644 index 23c8c0a659d..00000000000 --- a/storage/xtradb/include/univ.i +++ /dev/null @@ -1,706 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. -Copyright (c) 2008, Google Inc. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/***********************************************************************//** -@file include/univ.i -Version control for database, common definitions, and include files - -Created 1/20/1994 Heikki Tuuri -****************************************************************************/ - -#ifndef univ_i -#define univ_i - -#ifdef UNIV_HOTBACKUP -#include "hb_univ.i" -#endif /* UNIV_HOTBACKUP */ - -/* aux macros to convert M into "123" (string) if M is defined like -#define M 123 */ -#define _IB_TO_STR(s) #s -#define IB_TO_STR(s) _IB_TO_STR(s) - -#define INNODB_VERSION_MAJOR 5 -#define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 36 - -#ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 82.0 -#endif - -/* Enable UNIV_LOG_ARCHIVE in XtraDB */ -#define UNIV_LOG_ARCHIVE 1 - -/* The following is the InnoDB version as shown in -SELECT plugin_version FROM information_schema.plugins; -calculated in make_version_string() in sql/sql_show.cc like this: -"version >> 8" . "version & 0xff" -because the version is shown with only one dot, we skip the last -component, i.e. we show M.N.P as M.N */ -#define INNODB_VERSION_SHORT \ - (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR) - -#define INNODB_VERSION_STR \ - IB_TO_STR(INNODB_VERSION_MAJOR) "." \ - IB_TO_STR(INNODB_VERSION_MINOR) "." \ - IB_TO_STR(INNODB_VERSION_BUGFIX) "-" \ - IB_TO_STR(PERCONA_INNODB_VERSION) - -#define REFMAN "http://dev.mysql.com/doc/refman/" \ - IB_TO_STR(INNODB_VERSION_MAJOR) "." \ - IB_TO_STR(INNODB_VERSION_MINOR) "/en/" - -#ifdef MYSQL_DYNAMIC_PLUGIN -/* In the dynamic plugin, redefine some externally visible symbols -in order not to conflict with the symbols of a builtin InnoDB. */ - -/* Rename all C++ classes that contain virtual functions, because we -have not figured out how to apply the visibility=hidden attribute to -the virtual method table (vtable) in GCC 3. */ -# define ha_innobase ha_innodb -#endif /* MYSQL_DYNAMIC_PLUGIN */ - -#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__) -# undef __WIN__ -# define __WIN__ - -# include <windows.h> - -# ifdef _NT_ -# define __NT__ -# endif - -#else -/* The defines used with MySQL */ - -/* Include two header files from MySQL to make the Unix flavor used -in compiling more Posix-compatible. These headers also define __WIN__ -if we are compiling on Windows. */ - -#ifndef UNIV_HOTBACKUP -# include <my_global.h> -# include <my_pthread.h> -#endif /* UNIV_HOTBACKUP */ - -/* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */ -# include <sys/stat.h> -# if !defined(__WIN__) -# include <sys/mman.h> /* mmap() for os0proc.cc */ -# endif - -/* Include the header file generated by GNU autoconf */ -# ifndef __WIN__ -# ifndef UNIV_HOTBACKUP -# include "config.h" -# endif /* UNIV_HOTBACKUP */ -# endif - -# ifdef HAVE_SCHED_H -# include <sched.h> -# endif - -# ifdef HAVE_MALLOC_H -# include <malloc.h> -# endif - -/* We only try to do explicit inlining of functions with gcc and -Sun Studio */ - -# ifdef HAVE_PREAD -# define HAVE_PWRITE -# endif - -#endif /* #if (defined(WIN32) || ... */ - -#ifndef __WIN__ -#define __STDC_FORMAT_MACROS /* Enable C99 printf format macros */ -#include <inttypes.h> -#endif /* !__WIN__ */ - -/* Following defines are to enable performance schema -instrumentation in each of four InnoDB modules if -HAVE_PSI_INTERFACE is defined. */ -#if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP -# define UNIV_PFS_MUTEX -# define UNIV_PFS_RWLOCK - -# define UNIV_PFS_IO -# define UNIV_PFS_THREAD - -/* There are mutexes/rwlocks that we want to exclude from -instrumentation even if their corresponding performance schema -define is set. And this PFS_NOT_INSTRUMENTED is used -as the key value to identify those objects that would -be excluded from instrumentation. */ -# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED - -# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED) - -#endif /* HAVE_PSI_INTERFACE */ - -#ifdef __WIN__ -# define YY_NO_UNISTD_H 1 -#endif /* __WIN__ */ - -/* DEBUG VERSION CONTROL - ===================== */ - -/* When this macro is defined then additional test functions will be -compiled. These functions live at the end of each relevant source file -and have "test_" prefix. These functions are not called from anywhere in -the code, they can be called from gdb after -innobase_start_or_create_for_mysql() has executed using the call -command. Not tested on Windows. */ -/* -#define UNIV_COMPILE_TEST_FUNCS -*/ - -#if defined HAVE_valgrind && defined HAVE_VALGRIND -# define UNIV_DEBUG_VALGRIND -#endif -#if 0 -#define UNIV_DEBUG_VALGRIND /* Enable extra - Valgrind instrumentation */ -#define UNIV_DEBUG_PRINT /* Enable the compilation of - some debug print functions */ -#define UNIV_AHI_DEBUG /* Enable adaptive hash index - debugging without UNIV_DEBUG */ -#define UNIV_BUF_DEBUG /* Enable buffer pool - debugging without UNIV_DEBUG */ -#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column - debugging without UNIV_DEBUG */ -#define UNIV_DEBUG /* Enable ut_ad() assertions - and disable UNIV_INLINE */ -#define UNIV_DEBUG_LOCK_VALIDATE /* Enable - ut_ad(lock_rec_validate_page()) - assertions. */ -#define UNIV_DEBUG_FILE_ACCESSES /* Enable freed block access - debugging without UNIV_DEBUG */ -#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */ -#define UNIV_HASH_DEBUG /* debug HASH_ macros */ -#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ -#define UNIV_LOG_LSN_DEBUG /* write LSN to the redo log; -this will break redo log file compatibility, but it may be useful when -debugging redo log application problems. */ -#define UNIV_MEM_DEBUG /* detect memory leaks etc */ -#define UNIV_IBUF_DEBUG /* debug the insert buffer */ -#define UNIV_BLOB_DEBUG /* track BLOB ownership; -assumes that no BLOBs survive server restart */ -#define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer; -this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES, -and the insert buffer must be empty when the database is started */ -#define UNIV_PERF_DEBUG /* debug flag that enables - light weight performance - related stuff. */ -#define UNIV_SYNC_DEBUG /* debug mutex and latch -operations (very slow); also UNIV_DEBUG must be defined */ -#define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */ -#define UNIV_SYNC_PERF_STAT /* operation counts for - rw-locks and mutexes */ -#define UNIV_SEARCH_PERF_STAT /* statistics for the - adaptive hash index */ -#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output - in sync0sync.cc */ -#define UNIV_BTR_PRINT /* enable functions for - printing B-trees */ -#define UNIV_ZIP_DEBUG /* extensive consistency checks - for compressed pages */ -#define UNIV_ZIP_COPY /* call page_zip_copy_recs() - more often */ -#define UNIV_AIO_DEBUG /* prints info about - submitted and reaped AIO - requests to the log. */ -#define UNIV_STATS_DEBUG /* prints various stats - related debug info from - dict0stats.c */ -#define FTS_INTERNAL_DIAG_PRINT /* FTS internal debugging - info output */ -#endif - -#define UNIV_BTR_DEBUG /* check B-tree links */ -#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */ - -/* -#define UNIV_SQL_DEBUG -#define UNIV_LOG_DEBUG -*/ - /* the above option prevents forcing of log to disk - at a buffer page write: it should be tested with this - option off; also some ibuf tests are suppressed */ - -/* Linkage specifier for non-static InnoDB symbols (variables and functions) -that are only referenced from within InnoDB, not from MySQL. We disable the -GCC visibility directive on all Sun operating systems because there is no -easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */ -#define MY_ATTRIBUTE __attribute__ -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER) -# define UNIV_INTERN MY_ATTRIBUTE((visibility ("hidden"))) -#else -# define UNIV_INTERN -#endif -#if defined(INNODB_COMPILER_HINTS) \ - && defined __GNUC__ \ - && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3) -/** Starting with GCC 4.3, the "cold" attribute is used to inform the -compiler that a function is unlikely executed. The function is -optimized for size rather than speed and on many targets it is placed -into special subsection of the text section so all cold functions -appears close together improving code locality of non-cold parts of -program. The paths leading to call of cold functions within code are -marked as unlikely by the branch prediction mechanism. optimize a -rarely invoked function for size instead for speed. */ -# define UNIV_COLD MY_ATTRIBUTE((cold)) -#else -# define UNIV_COLD /* empty */ -#endif - -#ifdef UNIV_LINUX -# define UNIV_THREAD_LOCAL __thread -#else -/* FIXME: the TLS variables are silently broken on other platforms for now */ -# define UNIV_THREAD_LOCAL -#endif - -#ifndef UNIV_MUST_NOT_INLINE -/* Definition for inline version */ - -#define UNIV_INLINE static inline - -#else /* !UNIV_MUST_NOT_INLINE */ -/* If we want to compile a noninlined version we use the following macro -definitions: */ - -#define UNIV_NONINL -#define UNIV_INLINE UNIV_INTERN - -#endif /* !UNIV_MUST_NOT_INLINE */ - -#define UNIV_WORD_SIZE SIZEOF_SIZE_T - -/** The following alignment is used in memory allocations in memory heap -management to ensure correct alignment for doubles etc. */ -#define UNIV_MEM_ALIGNMENT 8 - -/* - DATABASE VERSION CONTROL - ======================== -*/ - -/** There are currently two InnoDB file formats which are used to group -features with similar restrictions and dependencies. Using an enum allows -switch statements to give a compiler warning when a new one is introduced. */ -enum innodb_file_formats_enum { - /** Antelope File Format: InnoDB/MySQL up to 5.1. - This format includes REDUNDANT and COMPACT row formats */ - UNIV_FORMAT_A = 0, - - /** Barracuda File Format: Introduced in InnoDB plugin for 5.1: - This format includes COMPRESSED and DYNAMIC row formats. It - includes the ability to create secondary indexes from data that - is not on the clustered index page and the ability to store more - data off the clustered index page. */ - UNIV_FORMAT_B = 1 -}; - -typedef enum innodb_file_formats_enum innodb_file_formats_t; - -/** Minimum supported file format */ -#define UNIV_FORMAT_MIN UNIV_FORMAT_A - -/** Maximum supported file format */ -#define UNIV_FORMAT_MAX UNIV_FORMAT_B - -/** The 2-logarithm of UNIV_PAGE_SIZE: */ -#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift - -#ifdef HAVE_LZO -#define IF_LZO(A,B) A -#else -#define IF_LZO(A,B) B -#endif - -#ifdef HAVE_LZ4 -#define IF_LZ4(A,B) A -#else -#define IF_LZ4(A,B) B -#endif - -#ifdef HAVE_LZMA -#define IF_LZMA(A,B) A -#else -#define IF_LZMA(A,B) B -#endif - -#ifdef HAVE_BZIP2 -#define IF_BZIP2(A,B) A -#else -#define IF_BZIP2(A,B) B -#endif - -#ifdef HAVE_SNAPPY -#define IF_SNAPPY(A,B) A -#else -#define IF_SNAPPY(A,B) B -#endif - -/** The universal page size of the database */ -#define UNIV_PAGE_SIZE ((ulint) srv_page_size) - -/** log2 of smallest compressed page size (1<<10 == 1024 bytes) -Note: This must never change! */ -#define UNIV_ZIP_SIZE_SHIFT_MIN 10 - -/** log2 of largest compressed page size (1<<14 == 16384 bytes). -A compressed page directory entry reserves 14 bits for the start offset -and 2 bits for flags. This limits the uncompressed page size to 16k. -*/ -#define UNIV_ZIP_SIZE_SHIFT_MAX 14 - -/* Define the Min, Max, Default page sizes. */ -/** Minimum Page Size Shift (power of 2) */ -#define UNIV_PAGE_SIZE_SHIFT_MIN 12 -/** log2 of largest page size (1<<16 == 64436 bytes). */ -/** Maximum Page Size Shift (power of 2) */ -#define UNIV_PAGE_SIZE_SHIFT_MAX 16 -/** log2 of default page size (1<<14 == 16384 bytes). */ -/** Default Page Size Shift (power of 2) */ -#define UNIV_PAGE_SIZE_SHIFT_DEF 14 -/** Original 16k InnoDB Page Size Shift, in case the default changes */ -#define UNIV_PAGE_SIZE_SHIFT_ORIG 14 - -/** Minimum page size InnoDB currently supports. */ -#define UNIV_PAGE_SIZE_MIN (1 << UNIV_PAGE_SIZE_SHIFT_MIN) -/** Maximum page size InnoDB currently supports. */ -#define UNIV_PAGE_SIZE_MAX (1 << UNIV_PAGE_SIZE_SHIFT_MAX) -/** Default page size for InnoDB tablespaces. */ -#define UNIV_PAGE_SIZE_DEF (1 << UNIV_PAGE_SIZE_SHIFT_DEF) -/** Original 16k page size for InnoDB tablespaces. */ -#define UNIV_PAGE_SIZE_ORIG (1 << UNIV_PAGE_SIZE_SHIFT_ORIG) - -/** Smallest compressed page size */ -#define UNIV_ZIP_SIZE_MIN (1 << UNIV_ZIP_SIZE_SHIFT_MIN) - -/** Largest compressed page size */ -#define UNIV_ZIP_SIZE_MAX (1 << UNIV_ZIP_SIZE_SHIFT_MAX) - -/** Number of supported page sizes (The convention 'ssize' is used -for 'log2 minus 9' or the number of shifts starting with 512.) -This number varies depending on UNIV_PAGE_SIZE. */ -#define UNIV_PAGE_SSIZE_MAX \ - (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1) - -/** Maximum number of parallel threads in a parallelized operation */ -#define UNIV_MAX_PARALLELISM 32 - -/** This is the "mbmaxlen" for my_charset_filename (defined in -strings/ctype-utf8.c), which is used to encode File and Database names. */ -#define FILENAME_CHARSET_MAXNAMLEN 5 - -/** The maximum length of an encode table name in bytes. The max -table and database names are NAME_CHAR_LEN (64) characters. After the -encoding, the max length would be NAME_CHAR_LEN (64) * -FILENAME_CHARSET_MAXNAMLEN (5) = 320 bytes. The number does not include a -terminating '\0'. InnoDB can handle longer names internally */ -#define MAX_TABLE_NAME_LEN 320 - -/** The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is -the MySQL's NAME_LEN, see check_and_convert_db_name(). */ -#define MAX_DATABASE_NAME_LEN MAX_TABLE_NAME_LEN - -/** MAX_FULL_NAME_LEN defines the full name path including the -database name and table name. In addition, 14 bytes is added for: - 2 for surrounding quotes around table name - 1 for the separating dot (.) - 9 for the #mysql50# prefix */ -#define MAX_FULL_NAME_LEN \ - (MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14) - -/** The maximum length in bytes that a database name can occupy when stored in -UTF8, including the terminating '\0', see dict_fs2utf8(). You must include -mysql_com.h if you are to use this macro. */ -#define MAX_DB_UTF8_LEN (NAME_LEN + 1) - -/** The maximum length in bytes that a table name can occupy when stored in -UTF8, including the terminating '\0', see dict_fs2utf8(). You must include -mysql_com.h if you are to use this macro. */ -#define MAX_TABLE_UTF8_LEN (NAME_LEN + sizeof(srv_mysql50_table_name_prefix)) - -/* - UNIVERSAL TYPE DEFINITIONS - ========================== -*/ - -/* Note that inside MySQL 'byte' is defined as char on Linux! */ -#define byte unsigned char - -/* Another basic type we use is unsigned long integer which should be equal to -the word size of the machine, that is on a 32-bit platform 32 bits, and on a -64-bit platform 64 bits. We also give the printf format for the type as a -macro ULINTPF. */ - - -#ifdef _WIN32 -/* Use the integer types and formatting strings defined in Visual Studio. */ -# define UINT32PF "%u" -# define INT64PF "%lld" -# define UINT64PF "%llu" -# define UINT64PFx "%016llx" -typedef __int64 ib_int64_t; -typedef unsigned __int64 ib_uint64_t; -typedef unsigned __int32 ib_uint32_t; -#else -/* Use the integer types and formatting strings defined in the C99 standard. */ -# define UINT32PF "%" PRIu32 -# define INT64PF "%" PRId64 -# define UINT64PF "%" PRIu64 -# define UINT64PFx "%016" PRIx64 -typedef int64_t ib_int64_t; -typedef uint64_t ib_uint64_t; -typedef uint32_t ib_uint32_t; -#endif - -#define IB_ID_FMT UINT64PF - -/* Type used for all log sequence number storage and arithmetics */ -typedef ib_uint64_t lsn_t; - -#ifdef _WIN64 -typedef unsigned __int64 ulint; -typedef __int64 lint; -# define ULINTPF UINT64PF -#define MYSQL_SYSVAR_ULINT MYSQL_SYSVAR_ULONGLONG -#else -typedef unsigned long int ulint; -typedef long int lint; -# define ULINTPF "%lu" -#define MYSQL_SYSVAR_ULINT MYSQL_SYSVAR_ULONG -#endif /* _WIN64 */ - -#ifndef UNIV_HOTBACKUP -typedef unsigned long long int ullint; -#endif /* UNIV_HOTBACKUP */ - -#ifndef __WIN__ -#if SIZEOF_LONG != SIZEOF_VOIDP -#error "Error: InnoDB's ulint must be of the same size as void*" -#endif -#endif - -/** The 'undefined' value for a ulint */ -#define ULINT_UNDEFINED ((ulint)(-1)) - -#define ULONG_UNDEFINED ((ulong)(-1)) - -/** The 'undefined' value for a ib_uint64_t */ -#define UINT64_UNDEFINED ((ib_uint64_t)(-1)) - -/** The bitmask of 32-bit unsigned integer */ -#define ULINT32_MASK 0xFFFFFFFF -/** The undefined 32-bit unsigned integer */ -#define ULINT32_UNDEFINED ULINT32_MASK - -/** Maximum value for a ulint */ -#define ULINT_MAX ((ulint)(-2)) - -/** Maximum value for ib_uint64_t */ -#define IB_UINT64_MAX ((ib_uint64_t) (~0ULL)) - -/** The generic InnoDB system object identifier data type */ -typedef ib_uint64_t ib_id_t; -#define IB_ID_MAX IB_UINT64_MAX - -/** The 'undefined' value for a ullint */ -#define ULLINT_UNDEFINED ((ullint)(-1)) - -/** This 'ibool' type is used within Innobase. Remember that different included -headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */ -#define ibool ulint - -#ifndef TRUE - -#define TRUE 1 -#define FALSE 0 - -#endif - -#define UNIV_NOTHROW - -/** The following number as the length of a logical field means that the field -has the SQL NULL as its value. NOTE that because we assume that the length -of a field is a 32-bit integer when we store it, for example, to an undo log -on disk, we must have also this number fit in 32 bits, also in 64-bit -computers! */ - -#define UNIV_SQL_NULL ULINT32_UNDEFINED - -/** Lengths which are not UNIV_SQL_NULL, but bigger than the following -number indicate that a field contains a reference to an externally -stored part of the field in the tablespace. The length field then -contains the sum of the following flag and the locally stored len. */ - -#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_DEF) - -#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER) -#define HAVE_GCC_GT_2 -/* Tell the compiler that variable/function is unused. */ -# define UNIV_UNUSED MY_ATTRIBUTE ((unused)) -#else -# define UNIV_UNUSED -#endif /* CHECK FOR GCC VER_GT_2 */ - -/* Some macros to improve branch prediction and reduce cache misses */ -#if defined(INNODB_COMPILER_HINTS) && defined(HAVE_GCC_GT_2) -/* Tell the compiler that 'expr' probably evaluates to 'constant'. */ -# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant) -/* Tell the compiler that a pointer is likely to be NULL */ -# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ulint) ptr, 0) -/* Minimize cache-miss latency by moving data at addr into a cache before -it is read. */ -# define UNIV_PREFETCH_R(addr) __builtin_prefetch(addr, 0, 3) -/* Minimize cache-miss latency by moving data at addr into a cache before -it is read or written. */ -# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3) - -/* Sun Studio includes sun_prefetch.h as of version 5.9 */ -#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \ - || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590) - -# include <sun_prefetch.h> - -#if __SUNPRO_C >= 0x550 -# undef UNIV_INTERN -# define UNIV_INTERN __hidden -#endif /* __SUNPRO_C >= 0x550 */ - -# define UNIV_EXPECT(expr,value) (expr) -# define UNIV_LIKELY_NULL(expr) (expr) - -# if defined(INNODB_COMPILER_HINTS) -//# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr) -# define UNIV_PREFETCH_R(addr) ((void) 0) -# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) -# else -# define UNIV_PREFETCH_R(addr) ((void) 0) -# define UNIV_PREFETCH_RW(addr) ((void) 0) -# endif /* INNODB_COMPILER_HINTS */ - -#else -/* Dummy versions of the macros */ -# define UNIV_EXPECT(expr,value) (expr) -# define UNIV_LIKELY_NULL(expr) (expr) -# define UNIV_PREFETCH_R(addr) ((void) 0) -# define UNIV_PREFETCH_RW(addr) ((void) 0) -#endif - -/* Tell the compiler that cond is likely to hold */ -#define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE) -/* Tell the compiler that cond is unlikely to hold */ -#define UNIV_UNLIKELY(cond) UNIV_EXPECT(cond, FALSE) - -/* Compile-time constant of the given array's size. */ -#define UT_ARR_SIZE(a) (sizeof(a) / sizeof((a)[0])) - -/* The return type from a thread's start function differs between Unix and -Windows, so define a typedef for it and a macro to use at the end of such -functions. */ - -#ifdef __WIN__ -#define usleep(a) Sleep((a)/1000) -typedef DWORD os_thread_ret_t; -#define OS_THREAD_DUMMY_RETURN return(0) -#else -typedef void* os_thread_ret_t; -#define OS_THREAD_DUMMY_RETURN return(NULL) -#endif - -#include <stdio.h> -#include "ut0dbg.h" -#include "ut0ut.h" -#include "db0err.h" -#ifdef UNIV_DEBUG_VALGRIND -# include <valgrind/memcheck.h> -# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size) -# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) -# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size) -# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) -# define UNIV_MEM_DESC(addr, size) VALGRIND_CREATE_BLOCK(addr, size, #addr) -# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b) -# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do { \ - const void* _p = (const void*) (ulint) \ - VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \ - if (UNIV_LIKELY_NULL(_p)) { \ - fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n", \ - __FILE__, __LINE__, \ - (const void*) (addr), (unsigned) (size), (long) \ - (((const char*) _p) - ((const char*) (addr)))); \ - if (should_abort) { \ - ut_error; \ - } \ - } \ -} while (0) -# define UNIV_MEM_ASSERT_RW(addr, size) \ - UNIV_MEM_ASSERT_RW_LOW(addr, size, false) -# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) \ - UNIV_MEM_ASSERT_RW_LOW(addr, size, true) -# define UNIV_MEM_ASSERT_W(addr, size) do { \ - const void* _p = (const void*) (ulint) \ - VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size); \ - if (UNIV_LIKELY_NULL(_p)) \ - fprintf(stderr, "%s:%d: %p[%u] unwritable at %ld\n", \ - __FILE__, __LINE__, \ - (const void*) (addr), (unsigned) (size), (long) \ - (((const char*) _p) - ((const char*) (addr)))); \ - } while (0) -# define UNIV_MEM_TRASH(addr, c, size) do { \ - ut_d(memset(addr, c, size)); \ - UNIV_MEM_INVALID(addr, size); \ - } while (0) -#else -# define UNIV_MEM_VALID(addr, size) do {} while(0) -# define UNIV_MEM_INVALID(addr, size) do {} while(0) -# define UNIV_MEM_FREE(addr, size) do {} while(0) -# define UNIV_MEM_ALLOC(addr, size) do {} while(0) -# define UNIV_MEM_DESC(addr, size) do {} while(0) -# define UNIV_MEM_UNDESC(b) do {} while(0) -# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do {} while(0) -# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0) -# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) do {} while(0) -# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0) -# define UNIV_MEM_TRASH(addr, c, size) do {} while(0) -#endif -#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \ - UNIV_MEM_ASSERT_W(addr, size); \ - UNIV_MEM_FREE(addr, size); \ -} while (0) -#define UNIV_MEM_ASSERT_AND_ALLOC(addr, size) do { \ - UNIV_MEM_ASSERT_W(addr, size); \ - UNIV_MEM_ALLOC(addr, size); \ -} while (0) - -extern ulong srv_page_size_shift; -extern ulong srv_page_size; - -#endif diff --git a/storage/xtradb/include/usr0sess.h b/storage/xtradb/include/usr0sess.h deleted file mode 100644 index b5c80b97b43..00000000000 --- a/storage/xtradb/include/usr0sess.h +++ /dev/null @@ -1,77 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/usr0sess.h -Sessions - -Created 6/25/1996 Heikki Tuuri -*******************************************************/ - -#ifndef usr0sess_h -#define usr0sess_h - -#include "univ.i" -#include "ut0byte.h" -#include "trx0types.h" -#include "srv0srv.h" -#include "trx0types.h" -#include "usr0types.h" -#include "que0types.h" -#include "data0data.h" -#include "rem0rec.h" - -/*********************************************************************//** -Opens a session. -@return own: session object */ -UNIV_INTERN -sess_t* -sess_open(void); -/*============*/ -/*********************************************************************//** -Closes a session, freeing the memory occupied by it. */ -UNIV_INTERN -void -sess_close( -/*=======*/ - sess_t* sess); /* in, own: session object */ - -/* The session handle. This data structure is only used by purge and is -not really necessary. We should get rid of it. */ -struct sess_t{ - ulint state; /*!< state of the session */ - trx_t* trx; /*!< transaction object permanently - assigned for the session: the - transaction instance designated by the - trx id changes, but the memory - structure is preserved */ - UT_LIST_BASE_NODE_T(que_t) - graphs; /*!< query graphs belonging to this - session */ -}; - -/* Session states */ -#define SESS_ACTIVE 1 -#define SESS_ERROR 2 /* session contains an error message - which has not yet been communicated - to the client */ -#ifndef UNIV_NONINL -#include "usr0sess.ic" -#endif - -#endif diff --git a/storage/xtradb/include/usr0sess.ic b/storage/xtradb/include/usr0sess.ic deleted file mode 100644 index 284e59537fe..00000000000 --- a/storage/xtradb/include/usr0sess.ic +++ /dev/null @@ -1,24 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/usr0sess.ic -Sessions - -Created 6/25/1996 Heikki Tuuri -*******************************************************/ diff --git a/storage/xtradb/include/usr0types.h b/storage/xtradb/include/usr0types.h deleted file mode 100644 index 6ba937cacc8..00000000000 --- a/storage/xtradb/include/usr0types.h +++ /dev/null @@ -1,31 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/usr0types.h -Users and sessions global types - -Created 6/25/1996 Heikki Tuuri -*******************************************************/ - -#ifndef usr0types_h -#define usr0types_h - -struct sess_t; - -#endif diff --git a/storage/xtradb/include/ut0bh.h b/storage/xtradb/include/ut0bh.h deleted file mode 100644 index 1085736c7ab..00000000000 --- a/storage/xtradb/include/ut0bh.h +++ /dev/null @@ -1,152 +0,0 @@ -/***************************************************************************//** - -Copyright (c) 2011, 2013, Oracle Corpn. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/ut0bh.h -Binary min-heap interface. - -Created 2010-05-28 by Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_UT0BH_H -#define INNOBASE_UT0BH_H - -#include "univ.i" - -/** Comparison function for objects in the binary heap. */ -typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2); - -struct ib_bh_t; - -/**********************************************************************//** -Get the number of elements in the binary heap. -@return number of elements */ -UNIV_INLINE -ulint -ib_bh_size( -/*=======*/ - const ib_bh_t* ib_bh); /*!< in: instance */ - -/**********************************************************************//** -Test if binary heap is empty. -@return TRUE if empty. */ -UNIV_INLINE -ibool -ib_bh_is_empty( -/*===========*/ - const ib_bh_t* ib_bh); /*!< in: instance */ - -/**********************************************************************//** -Test if binary heap is full. -@return TRUE if full. */ -UNIV_INLINE -ibool -ib_bh_is_full( -/*===========*/ - const ib_bh_t* ib_bh); /*!< in: instance */ - -/**********************************************************************//** -Get a pointer to the element. -@return pointer to element */ -UNIV_INLINE -void* -ib_bh_get( -/*=======*/ - ib_bh_t* ib_bh, /*!< in: instance */ - ulint i); /*!< in: index */ - -/**********************************************************************//** -Copy an element to the binary heap. -@return pointer to copied element */ -UNIV_INLINE -void* -ib_bh_set( -/*======*/ - ib_bh_t* ib_bh, /*!< in/out: instance */ - ulint i, /*!< in: index */ - const void* elem); /*!< in: element to add */ - -/**********************************************************************//** -Return the first element from the binary heap. -@return pointer to first element or NULL if empty. */ -UNIV_INLINE -void* -ib_bh_first( -/*========*/ - ib_bh_t* ib_bh); /*!< in: instance */ - -/**********************************************************************//** -Return the last element from the binary heap. -@return pointer to last element or NULL if empty. */ -UNIV_INLINE -void* -ib_bh_last( -/*========*/ - ib_bh_t* ib_bh); /*!< in/out: instance */ - -/**********************************************************************//** -Create a binary heap. -@return a new binary heap */ -UNIV_INTERN -ib_bh_t* -ib_bh_create( -/*=========*/ - ib_bh_cmp_t compare, /*!< in: comparator */ - ulint sizeof_elem, /*!< in: size of one element */ - ulint max_elems); /*!< in: max elements allowed */ - -/**********************************************************************//** -Free a binary heap. -@return a new binary heap */ -UNIV_INTERN -void -ib_bh_free( -/*=======*/ - ib_bh_t* ib_bh); /*!< in,own: instance */ - -/**********************************************************************//** -Add an element to the binary heap. Note: The element is copied. -@return pointer to added element or NULL if full. */ -UNIV_INTERN -void* -ib_bh_push( -/*=======*/ - ib_bh_t* ib_bh, /*!< in/out: instance */ - const void* elem); /*!< in: element to add */ - -/**********************************************************************//** -Remove the first element from the binary heap. */ -UNIV_INTERN -void -ib_bh_pop( -/*======*/ - ib_bh_t* ib_bh); /*!< in/out: instance */ - -/** Binary heap data structure */ -struct ib_bh_t { - ulint max_elems; /*!< max elements allowed */ - ulint n_elems; /*!< current size */ - ulint sizeof_elem; /*!< sizeof element */ - ib_bh_cmp_t compare; /*!< comparator */ -}; - -#ifndef UNIV_NONINL -#include "ut0bh.ic" -#endif - -#endif /* INNOBASE_UT0BH_H */ diff --git a/storage/xtradb/include/ut0bh.ic b/storage/xtradb/include/ut0bh.ic deleted file mode 100644 index b11de5b8b3e..00000000000 --- a/storage/xtradb/include/ut0bh.ic +++ /dev/null @@ -1,125 +0,0 @@ -/***************************************************************************//** - -Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/ut0bh.ic -Binary min-heap implementation. - -Created 2011-01-15 by Sunny Bains -*******************************************************/ - -#include "ut0bh.h" -#include "ut0mem.h" /* For ut_memcpy() */ - -/**********************************************************************//** -Get the number of elements in the binary heap. -@return number of elements */ -UNIV_INLINE -ulint -ib_bh_size( -/*=======*/ - const ib_bh_t* ib_bh) /*!< in: instance */ -{ - return(ib_bh->n_elems); -} - -/**********************************************************************//** -Test if binary heap is empty. -@return TRUE if empty. */ -UNIV_INLINE -ibool -ib_bh_is_empty( -/*===========*/ - const ib_bh_t* ib_bh) /*!< in: instance */ -{ - return(ib_bh_size(ib_bh) == 0); -} - -/**********************************************************************//** -Test if binary heap is full. -@return TRUE if full. */ -UNIV_INLINE -ibool -ib_bh_is_full( -/*===========*/ - const ib_bh_t* ib_bh) /*!< in: instance */ -{ - return(ib_bh_size(ib_bh) >= ib_bh->max_elems); -} - -/**********************************************************************//** -Get a pointer to the element. -@return pointer to element */ -UNIV_INLINE -void* -ib_bh_get( -/*=======*/ - ib_bh_t* ib_bh, /*!< in: instance */ - ulint i) /*!< in: index */ -{ - byte* ptr = (byte*) (ib_bh + 1); - - ut_a(i < ib_bh_size(ib_bh)); - - return(ptr + (ib_bh->sizeof_elem * i)); -} - -/**********************************************************************//** -Copy an element to the binary heap. -@return pointer to copied element */ -UNIV_INLINE -void* -ib_bh_set( -/*======*/ - ib_bh_t* ib_bh, /*!< in/out: instance */ - ulint i, /*!< in: index */ - const void* elem) /*!< in: element to add */ -{ - void* ptr = ib_bh_get(ib_bh, i); - - ut_memcpy(ptr, elem, ib_bh->sizeof_elem); - - return(ptr); -} - -/**********************************************************************//** -Return the first element from the binary heap. -@return pointer to first element or NULL if empty. */ -UNIV_INLINE -void* -ib_bh_first( -/*========*/ - ib_bh_t* ib_bh) /*!< in: instance */ -{ - return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0)); -} - -/**********************************************************************//** -Return the last element from the binary heap. -@return pointer to last element or NULL if empty. */ -UNIV_INLINE -void* -ib_bh_last( -/*========*/ - ib_bh_t* ib_bh) /*!< in/out: instance */ -{ - return(ib_bh_is_empty(ib_bh) - ? NULL - : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1)); -} - diff --git a/storage/xtradb/include/ut0byte.h b/storage/xtradb/include/ut0byte.h deleted file mode 100644 index 4893ab9f9af..00000000000 --- a/storage/xtradb/include/ut0byte.h +++ /dev/null @@ -1,119 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/ut0byte.h -Utilities for byte operations - -Created 1/20/1994 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0byte_h -#define ut0byte_h - - - -#include "univ.i" - -/*******************************************************//** -Creates a 64-bit integer out of two 32-bit integers. -@return created integer */ -UNIV_INLINE -ib_uint64_t -ut_ull_create( -/*==========*/ - ulint high, /*!< in: high-order 32 bits */ - ulint low) /*!< in: low-order 32 bits */ - MY_ATTRIBUTE((const)); - -/********************************************************//** -Rounds a 64-bit integer downward to a multiple of a power of 2. -@return rounded value */ -UNIV_INLINE -ib_uint64_t -ut_uint64_align_down( -/*=================*/ - ib_uint64_t n, /*!< in: number to be rounded */ - ulint align_no); /*!< in: align by this number - which must be a power of 2 */ -/********************************************************//** -Rounds ib_uint64_t upward to a multiple of a power of 2. -@return rounded value */ -UNIV_INLINE -ib_uint64_t -ut_uint64_align_up( -/*===============*/ - ib_uint64_t n, /*!< in: number to be rounded */ - ulint align_no); /*!< in: align by this number - which must be a power of 2 */ -/*********************************************************//** -The following function rounds up a pointer to the nearest aligned address. -@return aligned pointer */ -UNIV_INLINE -void* -ut_align( -/*=====*/ - const void* ptr, /*!< in: pointer */ - ulint align_no); /*!< in: align by this number */ -/*********************************************************//** -The following function rounds down a pointer to the nearest -aligned address. -@return aligned pointer */ -UNIV_INLINE -void* -ut_align_down( -/*==========*/ - const void* ptr, /*!< in: pointer */ - ulint align_no) /*!< in: align by this number */ - MY_ATTRIBUTE((const)); -/*********************************************************//** -The following function computes the offset of a pointer from the nearest -aligned address. -@return distance from aligned pointer */ -UNIV_INLINE -ulint -ut_align_offset( -/*============*/ - const void* ptr, /*!< in: pointer */ - ulint align_no) /*!< in: align by this number */ - MY_ATTRIBUTE((const)); -/*****************************************************************//** -Gets the nth bit of a ulint. -@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */ -UNIV_INLINE -ibool -ut_bit_get_nth( -/*===========*/ - ulint a, /*!< in: ulint */ - ulint n); /*!< in: nth bit requested */ -/*****************************************************************//** -Sets the nth bit of a ulint. -@return the ulint with the bit set as requested */ -UNIV_INLINE -ulint -ut_bit_set_nth( -/*===========*/ - ulint a, /*!< in: ulint */ - ulint n, /*!< in: nth bit requested */ - ibool val); /*!< in: value for the bit to set */ - -#ifndef UNIV_NONINL -#include "ut0byte.ic" -#endif - -#endif diff --git a/storage/xtradb/include/ut0byte.ic b/storage/xtradb/include/ut0byte.ic deleted file mode 100644 index 1a7af5ae33d..00000000000 --- a/storage/xtradb/include/ut0byte.ic +++ /dev/null @@ -1,173 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************************//** -@file include/ut0byte.ic -Utilities for byte operations - -Created 5/30/1994 Heikki Tuuri -*******************************************************************/ - -/*******************************************************//** -Creates a 64-bit integer out of two 32-bit integers. -@return created integer */ -UNIV_INLINE -ib_uint64_t -ut_ull_create( -/*==========*/ - ulint high, /*!< in: high-order 32 bits */ - ulint low) /*!< in: low-order 32 bits */ -{ - ut_ad(high <= ULINT32_MASK); - ut_ad(low <= ULINT32_MASK); - return(((ib_uint64_t) high) << 32 | low); -} - -/********************************************************//** -Rounds a 64-bit integer downward to a multiple of a power of 2. -@return rounded value */ -UNIV_INLINE -ib_uint64_t -ut_uint64_align_down( -/*=================*/ - ib_uint64_t n, /*!< in: number to be rounded */ - ulint align_no) /*!< in: align by this number - which must be a power of 2 */ -{ - ut_ad(align_no > 0); - ut_ad(ut_is_2pow(align_no)); - - return(n & ~((ib_uint64_t) align_no - 1)); -} - -/********************************************************//** -Rounds ib_uint64_t upward to a multiple of a power of 2. -@return rounded value */ -UNIV_INLINE -ib_uint64_t -ut_uint64_align_up( -/*===============*/ - ib_uint64_t n, /*!< in: number to be rounded */ - ulint align_no) /*!< in: align by this number - which must be a power of 2 */ -{ - ib_uint64_t align_1 = (ib_uint64_t) align_no - 1; - - ut_ad(align_no > 0); - ut_ad(ut_is_2pow(align_no)); - - return((n + align_1) & ~align_1); -} - -/*********************************************************//** -The following function rounds up a pointer to the nearest aligned address. -@return aligned pointer */ -UNIV_INLINE -void* -ut_align( -/*=====*/ - const void* ptr, /*!< in: pointer */ - ulint align_no) /*!< in: align by this number */ -{ - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - ut_ad(ptr); - - ut_ad(sizeof(void*) == sizeof(ulint)); - - return((void*)((((ulint) ptr) + align_no - 1) & ~(align_no - 1))); -} - -/*********************************************************//** -The following function rounds down a pointer to the nearest -aligned address. -@return aligned pointer */ -UNIV_INLINE -void* -ut_align_down( -/*==========*/ - const void* ptr, /*!< in: pointer */ - ulint align_no) /*!< in: align by this number */ -{ - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - ut_ad(ptr); - - ut_ad(sizeof(void*) == sizeof(ulint)); - - return((void*)(((ulint) ptr) & ~(align_no - 1))); -} - -/*********************************************************//** -The following function computes the offset of a pointer from the nearest -aligned address. -@return distance from aligned pointer */ -UNIV_INLINE -ulint -ut_align_offset( -/*============*/ - const void* ptr, /*!< in: pointer */ - ulint align_no) /*!< in: align by this number */ -{ - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - ut_ad(ptr); - - ut_ad(sizeof(void*) == sizeof(ulint)); - - return(((ulint) ptr) & (align_no - 1)); -} - -/*****************************************************************//** -Gets the nth bit of a ulint. -@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */ -UNIV_INLINE -ibool -ut_bit_get_nth( -/*===========*/ - ulint a, /*!< in: ulint */ - ulint n) /*!< in: nth bit requested */ -{ - ut_ad(n < 8 * sizeof(ulint)); -#if TRUE != 1 -# error "TRUE != 1" -#endif - return(1 & (a >> n)); -} - -/*****************************************************************//** -Sets the nth bit of a ulint. -@return the ulint with the bit set as requested */ -UNIV_INLINE -ulint -ut_bit_set_nth( -/*===========*/ - ulint a, /*!< in: ulint */ - ulint n, /*!< in: nth bit requested */ - ibool val) /*!< in: value for the bit to set */ -{ - ut_ad(n < 8 * sizeof(ulint)); -#if TRUE != 1 -# error "TRUE != 1" -#endif - if (val) { - return(((ulint) 1 << n) | a); - } else { - return(~((ulint) 1 << n) & a); - } -} diff --git a/storage/xtradb/include/ut0counter.h b/storage/xtradb/include/ut0counter.h deleted file mode 100644 index 4f736428a17..00000000000 --- a/storage/xtradb/include/ut0counter.h +++ /dev/null @@ -1,159 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/ut0counter.h - -Counter utility class - -Created 2012/04/12 by Sunny Bains -*******************************************************/ - -#ifndef UT0COUNTER_H -#define UT0COUNTER_H - -#include "univ.i" -#include <string.h> -#include "os0thread.h" - -/** CPU cache line size */ -#ifndef UNIV_HOTBACKUP -# ifdef CPU_LEVEL1_DCACHE_LINESIZE -# define CACHE_LINE_SIZE CPU_LEVEL1_DCACHE_LINESIZE -# else -# error CPU_LEVEL1_DCACHE_LINESIZE is undefined -# endif /* CPU_LEVEL1_DCACHE_LINESIZE */ -#else -# define CACHE_LINE_SIZE 64 -#endif /* UNIV_HOTBACKUP */ - -/** Default number of slots to use in ib_counter_t */ -#define IB_N_SLOTS 64 - -/** Get the offset into the counter array. */ -template <typename Type, int N> -struct generic_indexer_t { - /** @return offset within m_counter */ - size_t offset(size_t index) const UNIV_NOTHROW { - return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type))); - } -}; - -#ifdef HAVE_SCHED_GETCPU -#include <utmpx.h> -/** Use the cpu id to index into the counter array. If it fails then -use the thread id. */ -template <typename Type, int N> -struct get_sched_indexer_t : public generic_indexer_t<Type, N> { - /* @return result from sched_getcpu(), the thread id if it fails. */ - size_t get_rnd_index() const UNIV_NOTHROW { - - size_t cpu = sched_getcpu(); - if (cpu == -1) { - cpu = (lint) os_thread_get_curr_id(); - } - - return(cpu); - } -}; -#endif /* HAVE_SCHED_GETCPU */ - -/** Use the thread id to index into the counter array. */ -template <typename Type, int N> -struct thread_id_indexer_t : public generic_indexer_t<Type, N> { - /* @return a random number, currently we use the thread id. Where - thread id is represented as a pointer, it may not work as - effectively. */ - size_t get_rnd_index() const UNIV_NOTHROW { - return((lint) os_thread_get_curr_id()); - } - - /** @return a random offset to the array */ - size_t get_rnd_offset() const UNIV_NOTHROW - { - return(generic_indexer_t<Type, N>::offset(get_rnd_index())); - } -}; - -/** Class for using fuzzy counters. The counter is not protected by any -mutex and the results are not guaranteed to be 100% accurate but close -enough. Creates an array of counters and separates each element by the -CACHE_LINE_SIZE bytes */ -template < - typename Type, - int N = IB_N_SLOTS, - template<typename, int> class Indexer = thread_id_indexer_t> -struct MY_ALIGNED(CACHE_LINE_SIZE) ib_counter_t -{ -#ifdef UNIV_DEBUG - ~ib_counter_t() - { - size_t n = (CACHE_LINE_SIZE / sizeof(Type)); - - /* Check that we aren't writing outside our defined bounds. */ - for (size_t i = 0; i < UT_ARR_SIZE(m_counter); i += n) { - for (size_t j = 1; j < n - 1; ++j) { - ut_ad(m_counter[i + j] == 0); - } - } - } -#endif /* UNIV_DEBUG */ - - /** Increment the counter by 1. */ - void inc() UNIV_NOTHROW { add(1); } - - /** Increment the counter by 1. - @param[in] index a reasonably thread-unique identifier */ - void inc(size_t index) UNIV_NOTHROW { add(index, 1); } - - /** Add to the counter. - @param[in] n amount to be added */ - void add(Type n) UNIV_NOTHROW { add(m_policy.get_rnd_offset(), n); } - - /** Add to the counter. - @param[in] index a reasonably thread-unique identifier - @param[in] n amount to be added */ - void add(size_t index, Type n) UNIV_NOTHROW { - size_t i = m_policy.offset(index); - - ut_ad(i < UT_ARR_SIZE(m_counter)); - - m_counter[i] += n; - } - - /* @return total value - not 100% accurate, since it is not atomic. */ - operator Type() const UNIV_NOTHROW { - Type total = 0; - - for (size_t i = 0; i < N; ++i) { - total += m_counter[m_policy.offset(i)]; - } - - return(total); - } - -private: - /** Indexer into the array */ - Indexer<Type, N>m_policy; - - /** Slot 0 is unused. */ - Type m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))]; -}; - -#endif /* UT0COUNTER_H */ diff --git a/storage/xtradb/include/ut0crc32.h b/storage/xtradb/include/ut0crc32.h deleted file mode 100644 index d6dd376d9af..00000000000 --- a/storage/xtradb/include/ut0crc32.h +++ /dev/null @@ -1,51 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/ut0crc32.h -CRC32 implementation - -Created Aug 10, 2011 Vasil Dimov -*******************************************************/ - -#ifndef ut0crc32_h -#define ut0crc32_h - -#include "univ.i" - -/********************************************************************//** -Initializes the data structures used by ut_crc32(). Does not do any -allocations, would not hurt if called twice, but would be pointless. */ -UNIV_INTERN -void -ut_crc32_init(); -/*===========*/ - -/********************************************************************//** -Calculates CRC32. -@param ptr - data over which to calculate CRC32. -@param len - data length in bytes. -@return CRC32 (CRC-32C, using the GF(2) primitive polynomial 0x11EDC6F41, -or 0x1EDC6F41 without the high-order bit) */ -typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len); - -extern ib_ut_crc32_t ut_crc32; - -extern const char *ut_crc32_implementation; - -#endif /* ut0crc32_h */ diff --git a/storage/xtradb/include/ut0dbg.h b/storage/xtradb/include/ut0dbg.h deleted file mode 100644 index 3f5baef0a3c..00000000000 --- a/storage/xtradb/include/ut0dbg.h +++ /dev/null @@ -1,132 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*****************************************************************//** -@file include/ut0dbg.h -Debug utilities for Innobase - -Created 1/30/1994 Heikki Tuuri -**********************************************************************/ - -#ifndef ut0dbg_h -#define ut0dbg_h - -#ifdef UNIV_INNOCHECKSUM -#define ut_a assert -#define ut_ad assert -#define ut_error assert(0) -#else /* !UNIV_INNOCHECKSUM */ - -#include "univ.i" -#include <stdlib.h> -#include "os0thread.h" - -#if defined(__GNUC__) && (__GNUC__ > 2) -/** Test if an assertion fails. -@param EXPR assertion expression -@return nonzero if EXPR holds, zero if not */ -# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR))) -#else -/** This is used to eliminate compiler warnings */ -extern ulint ut_dbg_zero; -/** Test if an assertion fails. -@param EXPR assertion expression -@return nonzero if EXPR holds, zero if not */ -# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero) -#endif - -/*************************************************************//** -Report a failed assertion. */ -UNIV_INTERN -void -ut_dbg_assertion_failed( -/*====================*/ - const char* expr, /*!< in: the failed assertion */ - const char* file, /*!< in: source file containing the assertion */ - ulint line) /*!< in: line number of the assertion */ - UNIV_COLD MY_ATTRIBUTE((nonnull(2))); - -/** Abort the execution. */ -# define UT_DBG_PANIC abort() - -/** Abort execution if EXPR does not evaluate to nonzero. -@param EXPR assertion expression that should hold */ -#define ut_a(EXPR) do { \ - if (UT_DBG_FAIL(EXPR)) { \ - ut_dbg_assertion_failed(#EXPR, \ - __FILE__, (ulint) __LINE__); \ - UT_DBG_PANIC; \ - } \ -} while (0) - -/** Abort execution. */ -#define ut_error do { \ - ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__); \ - UT_DBG_PANIC; \ -} while (0) - -#ifdef UNIV_DEBUG -/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */ -#define ut_ad(EXPR) ut_a(EXPR) -/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */ -#define ut_d(EXPR) do {EXPR;} while (0) -#else -/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */ -#define ut_ad(EXPR) -/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */ -#define ut_d(EXPR) -#endif - -/** Silence warnings about an unused variable by doing a null assignment. -@param A the unused variable */ -#define UT_NOT_USED(A) A = A - -#ifdef UNIV_COMPILE_TEST_FUNCS - -#include <sys/types.h> -#include <sys/time.h> -#include <sys/resource.h> - -/** structure used for recording usage statistics */ -struct speedo_t { - struct rusage ru; /*!< getrusage() result */ - struct timeval tv; /*!< gettimeofday() result */ -}; - -/*******************************************************************//** -Resets a speedo (records the current time in it). */ -UNIV_INTERN -void -speedo_reset( -/*=========*/ - speedo_t* speedo); /*!< out: speedo */ - -/*******************************************************************//** -Shows the time elapsed and usage statistics since the last reset of a -speedo. */ -UNIV_INTERN -void -speedo_show( -/*========*/ - const speedo_t* speedo); /*!< in: speedo */ - -#endif /* UNIV_COMPILE_TEST_FUNCS */ - -#endif /* !UNIV_INNOCHECKSUM */ - -#endif diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h deleted file mode 100644 index 796a272db59..00000000000 --- a/storage/xtradb/include/ut0list.h +++ /dev/null @@ -1,189 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/ut0list.h -A double-linked list - -Created 4/26/2006 Osku Salerma -************************************************************************/ - -/*******************************************************************//** -A double-linked list. This differs from the one in ut0lst.h in that in this -one, each list node contains a pointer to the data, whereas the one in -ut0lst.h uses a strategy where the list pointers are embedded in the data -items themselves. - -Use this one when you need to store arbitrary data in the list where you -can't embed the list pointers in the data, if a data item needs to be -stored in multiple lists, etc. - -Note about the memory management: ib_list_t is a fixed-size struct whose -allocation/deallocation is done through ib_list_create/ib_list_free, but the -memory for the list nodes is allocated through a user-given memory heap, -which can either be the same for all nodes or vary per node. Most users will -probably want to create a memory heap to store the item-specific data, and -pass in this same heap to the list node creation functions, thus -automatically freeing the list node when the item's heap is freed. - -************************************************************************/ - -#ifndef IB_LIST_H -#define IB_LIST_H - -#include "mem0mem.h" - -struct ib_list_t; -struct ib_list_node_t; - -/****************************************************************//** -Create a new list using mem_alloc. Lists created with this function must be -freed with ib_list_free. -@return list */ -UNIV_INTERN -ib_list_t* -ib_list_create(void); -/*=================*/ - - -/****************************************************************//** -Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for -lists created with this function. -@return list */ -UNIV_INTERN -ib_list_t* -ib_list_create_heap( -/*================*/ - mem_heap_t* heap); /*!< in: memory heap to use */ - -/****************************************************************//** -Free a list. */ -UNIV_INTERN -void -ib_list_free( -/*=========*/ - ib_list_t* list); /*!< in: list */ - -/****************************************************************//** -Add the data to the start of the list. -@return new list node */ -UNIV_INTERN -ib_list_node_t* -ib_list_add_first( -/*==============*/ - ib_list_t* list, /*!< in: list */ - void* data, /*!< in: data */ - mem_heap_t* heap); /*!< in: memory heap to use */ - -/****************************************************************//** -Add the data to the end of the list. -@return new list node */ -UNIV_INTERN -ib_list_node_t* -ib_list_add_last( -/*=============*/ - ib_list_t* list, /*!< in: list */ - void* data, /*!< in: data */ - mem_heap_t* heap); /*!< in: memory heap to use */ - -/****************************************************************//** -Add the data after the indicated node. -@return new list node */ -UNIV_INTERN -ib_list_node_t* -ib_list_add_after( -/*==============*/ - ib_list_t* list, /*!< in: list */ - ib_list_node_t* prev_node, /*!< in: node preceding new node (can - be NULL) */ - void* data, /*!< in: data */ - mem_heap_t* heap); /*!< in: memory heap to use */ - -/****************************************************************//** -Remove the node from the list. */ -UNIV_INTERN -void -ib_list_remove( -/*===========*/ - ib_list_t* list, /*!< in: list */ - ib_list_node_t* node); /*!< in: node to remove */ - -/****************************************************************//** -Get the first node in the list. -@return first node, or NULL */ -UNIV_INLINE -ib_list_node_t* -ib_list_get_first( -/*==============*/ - ib_list_t* list); /*!< in: list */ - -/****************************************************************//** -Get the last node in the list. -@return last node, or NULL */ -UNIV_INLINE -ib_list_node_t* -ib_list_get_last( -/*=============*/ - ib_list_t* list); /*!< in: list */ - -/******************************************************************** -Check if list is empty. */ -UNIV_INLINE -ibool -ib_list_is_empty( -/*=============*/ - /* out: TRUE if empty else */ - const ib_list_t* list); /* in: list */ - -/******************************************************************** -Get number of items on list. -@return number of items on list */ -UNIV_INLINE -ulint -ib_list_len( -/*========*/ - const ib_list_t* list); /*<! in: list */ - -/* List. */ -struct ib_list_t { - ib_list_node_t* first; /*!< first node */ - ib_list_node_t* last; /*!< last node */ - ibool is_heap_list; /*!< TRUE if this list was - allocated through a heap */ -}; - -/* A list node. */ -struct ib_list_node_t { - ib_list_node_t* prev; /*!< previous node */ - ib_list_node_t* next; /*!< next node */ - void* data; /*!< user data */ -}; - -/* Quite often, the only additional piece of data you need is the per-item -memory heap, so we have this generic struct available to use in those -cases. */ -struct ib_list_helper_t { - mem_heap_t* heap; /*!< memory heap */ - void* data; /*!< user data */ -}; - -#ifndef UNIV_NONINL -#include "ut0list.ic" -#endif - -#endif diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic deleted file mode 100644 index 7a7f53adb2f..00000000000 --- a/storage/xtradb/include/ut0list.ic +++ /dev/null @@ -1,80 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/ut0list.ic -A double-linked list - -Created 4/26/2006 Osku Salerma -************************************************************************/ - -/****************************************************************//** -Get the first node in the list. -@return first node, or NULL */ -UNIV_INLINE -ib_list_node_t* -ib_list_get_first( -/*==============*/ - ib_list_t* list) /*!< in: list */ -{ - return(list->first); -} - -/****************************************************************//** -Get the last node in the list. -@return last node, or NULL */ -UNIV_INLINE -ib_list_node_t* -ib_list_get_last( -/*=============*/ - ib_list_t* list) /*!< in: list */ -{ - return(list->last); -} - -/******************************************************************** -Check if list is empty. */ -UNIV_INLINE -ibool -ib_list_is_empty( -/*=============*/ - /* out: TRUE if empty else FALSE */ - const ib_list_t* list) /* in: list */ -{ - return(!(list->first || list->last)); -} - -/******************************************************************** -Get number of items on list. -@return number of items on list */ -UNIV_INLINE -ulint -ib_list_len( -/*========*/ - const ib_list_t* list) /*<! in: list */ -{ - ulint len = 0; - ib_list_node_t* node = list->first; - - while(node) { - len++; - node = node->next; - } - - return (len); -} diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h deleted file mode 100644 index b53e7ade4c1..00000000000 --- a/storage/xtradb/include/ut0lst.h +++ /dev/null @@ -1,408 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/ut0lst.h -List utilities - -Created 9/10/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0lst_h -#define ut0lst_h - -#include "univ.i" - -/*******************************************************************//** -Return offset of F in POD T. -@param T - POD pointer -@param F - Field in T */ -#define IB_OFFSETOF(T, F) \ - (reinterpret_cast<byte*>(&(T)->F) - reinterpret_cast<byte*>(T)) - -/* This module implements the two-way linear list which should be used -if a list is used in the database. Note that a single struct may belong -to two or more lists, provided that the list are given different names. -An example of the usage of the lists can be found in fil0fil.cc. */ - -/*******************************************************************//** -This macro expands to the unnamed type definition of a struct which acts -as the two-way list base node. The base node contains pointers -to both ends of the list and a count of nodes in the list (excluding -the base node from the count). -@param TYPE the name of the list node data type */ -template <typename TYPE> -struct ut_list_base { - typedef TYPE elem_type; - - ulint count; /*!< count of nodes in list */ - TYPE* start; /*!< pointer to list start, NULL if empty */ - TYPE* end; /*!< pointer to list end, NULL if empty */ -}; - -#define UT_LIST_BASE_NODE_T(TYPE) ut_list_base<TYPE> - -/*******************************************************************//** -This macro expands to the unnamed type definition of a struct which -should be embedded in the nodes of the list, the node type must be a struct. -This struct contains the pointers to next and previous nodes in the list. -The name of the field in the node struct should be the name given -to the list. -@param TYPE the list node type name */ -/* Example: -struct LRU_node_t { - UT_LIST_NODE_T(LRU_node_t) LRU_list; - ... -} -The example implements an LRU list of name LRU_list. Its nodes are of type -LRU_node_t. */ - -template <typename TYPE> -struct ut_list_node { - TYPE* prev; /*!< pointer to the previous node, - NULL if start of list */ - TYPE* next; /*!< pointer to next node, NULL if end of list */ -}; - -#define UT_LIST_NODE_T(TYPE) ut_list_node<TYPE> - -/*******************************************************************//** -Get the list node at offset. -@param elem - list element -@param offset - offset within element. -@return reference to list node. */ -template <typename Type> -ut_list_node<Type>& -ut_elem_get_node(Type& elem, size_t offset) -{ - ut_a(offset < sizeof(elem)); - - return(*reinterpret_cast<ut_list_node<Type>*>( - reinterpret_cast<byte*>(&elem) + offset)); -} - -/*******************************************************************//** -Initializes the base node of a two-way list. -@param BASE the list base node -*/ -#define UT_LIST_INIT(BASE)\ -{\ - (BASE).count = 0;\ - (BASE).start = NULL;\ - (BASE).end = NULL;\ -}\ - -/*******************************************************************//** -Adds the node as the first element in a two-way linked list. -@param list the base node (not a pointer to it) -@param elem the element to add -@param offset offset of list node in elem. */ -template <typename List, typename Type> -void -ut_list_prepend( - List& list, - Type& elem, - size_t offset) -{ - ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset); - - elem_node.prev = 0; - elem_node.next = list.start; - - if (list.start != 0) { - ut_list_node<Type>& base_node = - ut_elem_get_node(*list.start, offset); - - ut_ad(list.start != &elem); - - base_node.prev = &elem; - } - - list.start = &elem; - - if (list.end == 0) { - list.end = &elem; - } - - ++list.count; -} - -/*******************************************************************//** -Adds the node as the first element in a two-way linked list. -@param NAME list name -@param LIST the base node (not a pointer to it) -@param ELEM the element to add */ -#define UT_LIST_ADD_FIRST(NAME, LIST, ELEM) \ - ut_list_prepend(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME)) - -/*******************************************************************//** -Adds the node as the last element in a two-way linked list. -@param list list -@param elem the element to add -@param offset offset of list node in elem */ -template <typename List, typename Type> -void -ut_list_append( - List& list, - Type& elem, - size_t offset) -{ - ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset); - - elem_node.next = 0; - elem_node.prev = list.end; - - if (list.end != 0) { - ut_list_node<Type>& base_node = - ut_elem_get_node(*list.end, offset); - - ut_ad(list.end != &elem); - - base_node.next = &elem; - } - - list.end = &elem; - - if (list.start == 0) { - list.start = &elem; - } - - ++list.count; -} - -/*******************************************************************//** -Adds the node as the last element in a two-way linked list. -@param NAME list name -@param LIST list -@param ELEM the element to add */ -#define UT_LIST_ADD_LAST(NAME, LIST, ELEM)\ - ut_list_append(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME)) - -/*******************************************************************//** -Inserts a ELEM2 after ELEM1 in a list. -@param list the base node -@param elem1 node after which ELEM2 is inserted -@param elem2 node being inserted after NODE1 -@param offset offset of list node in elem1 and elem2 */ -template <typename List, typename Type> -void -ut_list_insert( - List& list, - Type& elem1, - Type& elem2, - size_t offset) -{ - ut_ad(&elem1 != &elem2); - - ut_list_node<Type>& elem1_node = ut_elem_get_node(elem1, offset); - ut_list_node<Type>& elem2_node = ut_elem_get_node(elem2, offset); - - elem2_node.prev = &elem1; - elem2_node.next = elem1_node.next; - - if (elem1_node.next != NULL) { - ut_list_node<Type>& next_node = - ut_elem_get_node(*elem1_node.next, offset); - - next_node.prev = &elem2; - } - - elem1_node.next = &elem2; - - if (list.end == &elem1) { - list.end = &elem2; - } - - ++list.count; -} - -/*******************************************************************//** -Inserts a ELEM2 after ELEM1 in a list. -@param NAME list name -@param LIST the base node -@param ELEM1 node after which ELEM2 is inserted -@param ELEM2 node being inserted after ELEM1 */ -#define UT_LIST_INSERT_AFTER(NAME, LIST, ELEM1, ELEM2)\ - ut_list_insert(LIST, *ELEM1, *ELEM2, IB_OFFSETOF(ELEM1, NAME)) - -#ifdef UNIV_LIST_DEBUG -/** Invalidate the pointers in a list node. -@param NAME list name -@param N pointer to the node that was removed */ -# define UT_LIST_REMOVE_CLEAR(N) \ - (N).next = (Type*) -1; \ - (N).prev = (N).next -#else -/** Invalidate the pointers in a list node. -@param NAME list name -@param N pointer to the node that was removed */ -# define UT_LIST_REMOVE_CLEAR(N) -#endif /* UNIV_LIST_DEBUG */ - -/*******************************************************************//** -Removes a node from a two-way linked list. -@param list the base node (not a pointer to it) -@param elem node to be removed from the list -@param offset offset of list node within elem */ -template <typename List, typename Type> -void -ut_list_remove( - List& list, - Type& elem, - size_t offset) -{ - ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset); - - ut_a(list.count > 0); - - if (elem_node.next != NULL) { - ut_list_node<Type>& next_node = - ut_elem_get_node(*elem_node.next, offset); - - next_node.prev = elem_node.prev; - } else { - list.end = elem_node.prev; - } - - if (elem_node.prev != NULL) { - ut_list_node<Type>& prev_node = - ut_elem_get_node(*elem_node.prev, offset); - - prev_node.next = elem_node.next; - } else { - list.start = elem_node.next; - } - - UT_LIST_REMOVE_CLEAR(elem_node); - - --list.count; -} - -/*******************************************************************//** -Removes a node from a two-way linked list. - aram NAME list name -@param LIST the base node (not a pointer to it) -@param ELEM node to be removed from the list */ -#define UT_LIST_REMOVE(NAME, LIST, ELEM) \ - ut_list_remove(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME)) - -/********************************************************************//** -Gets the next node in a two-way list. -@param NAME list name -@param N pointer to a node -@return the successor of N in NAME, or NULL */ -#define UT_LIST_GET_NEXT(NAME, N)\ - (((N)->NAME).next) - -/********************************************************************//** -Gets the previous node in a two-way list. -@param NAME list name -@param N pointer to a node -@return the predecessor of N in NAME, or NULL */ -#define UT_LIST_GET_PREV(NAME, N)\ - (((N)->NAME).prev) - -/********************************************************************//** -Alternative macro to get the number of nodes in a two-way list, i.e., -its length. -@param BASE the base node (not a pointer to it). -@return the number of nodes in the list */ -#define UT_LIST_GET_LEN(BASE)\ - (BASE).count - -/********************************************************************//** -Gets the first node in a two-way list. -@param BASE the base node (not a pointer to it) -@return first node, or NULL if the list is empty */ -#define UT_LIST_GET_FIRST(BASE)\ - (BASE).start - -/********************************************************************//** -Gets the last node in a two-way list. -@param BASE the base node (not a pointer to it) -@return last node, or NULL if the list is empty */ -#define UT_LIST_GET_LAST(BASE)\ - (BASE).end - -struct NullValidate { void operator()(const void* elem) { } }; - -/********************************************************************//** -Iterate over all the elements and call the functor for each element. -@param list base node (not a pointer to it) -@param functor Functor that is called for each element in the list -@parm node pointer to member node within list element */ -template <typename List, class Functor> -void -ut_list_map( - List& list, - ut_list_node<typename List::elem_type> - List::elem_type::*node, - Functor functor) -{ - ulint count = 0; - - for (typename List::elem_type* elem = list.start; - elem != 0; - elem = (elem->*node).next, ++count) { - - functor(elem); - } - - ut_a(count == list.count); -} - -/********************************************************************//** -Checks the consistency of a two-way list. -@param list base node (not a pointer to it) -@param functor Functor that is called for each element in the list -@parm node pointer to member node within list element */ -template <typename List, class Functor> -void -ut_list_validate( - List& list, - ut_list_node<typename List::elem_type> - List::elem_type::*node, - Functor functor = NullValidate()) -{ - ut_list_map(list, node, functor); - - ulint count = 0; - - for (typename List::elem_type* elem = list.end; - elem != 0; - elem = (elem->*node).prev, ++count) { - - functor(elem); - } - - ut_a(count == list.count); -} - -/********************************************************************//** -Checks the consistency of a two-way list. -@param NAME the name of the list -@param TYPE node type -@param LIST base node (not a pointer to it) -@param FUNCTOR called for each list element */ -#define UT_LIST_VALIDATE(NAME, TYPE, LIST, FUNCTOR) \ - ut_list_validate(LIST, &TYPE::NAME, FUNCTOR) - -#define UT_LIST_CHECK(NAME, TYPE, LIST) \ - ut_list_validate(LIST, &TYPE::NAME, NullValidate()) - -#endif /* ut0lst.h */ diff --git a/storage/xtradb/include/ut0mem.h b/storage/xtradb/include/ut0mem.h deleted file mode 100644 index 81470358f2f..00000000000 --- a/storage/xtradb/include/ut0mem.h +++ /dev/null @@ -1,261 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/ut0mem.h -Memory primitives - -Created 5/30/1994 Heikki Tuuri -************************************************************************/ - -#ifndef ut0mem_h -#define ut0mem_h - -#include "univ.i" -#include <string.h> -#ifndef UNIV_HOTBACKUP -# include "os0sync.h" - -/** The total amount of memory currently allocated from the operating -system with os_mem_alloc_large() or malloc(). Does not count malloc() -if srv_use_sys_malloc is set. Protected by ut_list_mutex. */ -extern ulint ut_total_allocated_memory; - -/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */ -extern os_fast_mutex_t ut_list_mutex; -#endif /* !UNIV_HOTBACKUP */ - -/** Wrapper for memcpy(3). Copy memory area when the source and -target are not overlapping. -* @param dest in: copy to -* @param sour in: copy from -* @param n in: number of bytes to copy -* @return dest */ -UNIV_INLINE -void* -ut_memcpy(void* dest, const void* sour, ulint n); - -/** Wrapper for memmove(3). Copy memory area when the source and -target are overlapping. -* @param dest in: copy to -* @param sour in: copy from -* @param n in: number of bytes to copy -* @return dest */ -UNIV_INLINE -void* -ut_memmove(void* dest, const void* sour, ulint n); - -/** Wrapper for memcmp(3). Compare memory areas. -* @param str1 in: first memory block to compare -* @param str2 in: second memory block to compare -* @param n in: number of bytes to compare -* @return negative, 0, or positive if str1 is smaller, equal, - or greater than str2, respectively. */ -UNIV_INLINE -int -ut_memcmp(const void* str1, const void* str2, ulint n); - -/**********************************************************************//** -Initializes the mem block list at database startup. */ -UNIV_INTERN -void -ut_mem_init(void); -/*=============*/ - -/**********************************************************************//** -Allocates memory. -@return own: allocated memory */ -UNIV_INTERN -void* -ut_malloc_low( -/*==========*/ - ulint n, /*!< in: number of bytes to allocate */ - ibool assert_on_error) /*!< in: if TRUE, we crash mysqld if - the memory cannot be allocated */ - MY_ATTRIBUTE((malloc)); -/**********************************************************************//** -Allocates memory. */ -#define ut_malloc(n) ut_malloc_low(n, TRUE) -/**********************************************************************//** -Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is -a nop. */ -UNIV_INTERN -void -ut_free( -/*====*/ - void* ptr); /*!< in, own: memory block, can be NULL */ -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not -use this function because the allocation functions in mem0mem.h are the -recommended ones in InnoDB. - -man realloc in Linux, 2004: - - realloc() changes the size of the memory block pointed to - by ptr to size bytes. The contents will be unchanged to - the minimum of the old and new sizes; newly allocated mem - ory will be uninitialized. If ptr is NULL, the call is - equivalent to malloc(size); if size is equal to zero, the - call is equivalent to free(ptr). Unless ptr is NULL, it - must have been returned by an earlier call to malloc(), - calloc() or realloc(). - -RETURN VALUE - realloc() returns a pointer to the newly allocated memory, - which is suitably aligned for any kind of variable and may - be different from ptr, or NULL if the request fails. If - size was equal to 0, either NULL or a pointer suitable to - be passed to free() is returned. If realloc() fails the - original block is left untouched - it is not freed or - moved. -@return own: pointer to new mem block or NULL */ -UNIV_INTERN -void* -ut_realloc( -/*=======*/ - void* ptr, /*!< in: pointer to old block or NULL */ - ulint size); /*!< in: desired size */ -/**********************************************************************//** -Frees in shutdown all allocated memory not freed yet. */ -UNIV_INTERN -void -ut_free_all_mem(void); -/*=================*/ -#endif /* !UNIV_HOTBACKUP */ - -/** Wrapper for strcpy(3). Copy a NUL-terminated string. -* @param dest in: copy to -* @param sour in: copy from -* @return dest */ -UNIV_INLINE -char* -ut_strcpy(char* dest, const char* sour); - -/** Wrapper for strlen(3). Determine the length of a NUL-terminated string. -* @param str in: string -* @return length of the string in bytes, excluding the terminating NUL */ -UNIV_INLINE -ulint -ut_strlen(const char* str); - -/** Wrapper for strcmp(3). Compare NUL-terminated strings. -* @param str1 in: first string to compare -* @param str2 in: second string to compare -* @return negative, 0, or positive if str1 is smaller, equal, - or greater than str2, respectively. */ -UNIV_INLINE -int -ut_strcmp(const char* str1, const char* str2); - -/**********************************************************************//** -Copies up to size - 1 characters from the NUL-terminated string src to -dst, NUL-terminating the result. Returns strlen(src), so truncation -occurred if the return value >= size. -@return strlen(src) */ -UNIV_INTERN -ulint -ut_strlcpy( -/*=======*/ - char* dst, /*!< in: destination buffer */ - const char* src, /*!< in: source buffer */ - ulint size); /*!< in: size of destination buffer */ - -/**********************************************************************//** -Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last -(size - 1) bytes of src, not the first. -@return strlen(src) */ -UNIV_INTERN -ulint -ut_strlcpy_rev( -/*===========*/ - char* dst, /*!< in: destination buffer */ - const char* src, /*!< in: source buffer */ - ulint size); /*!< in: size of destination buffer */ - -/**********************************************************************//** -Return the number of times s2 occurs in s1. Overlapping instances of s2 -are only counted once. -@return the number of times s2 occurs in s1 */ -UNIV_INTERN -ulint -ut_strcount( -/*========*/ - const char* s1, /*!< in: string to search in */ - const char* s2); /*!< in: string to search for */ - -/**********************************************************************//** -Replace every occurrence of s1 in str with s2. Overlapping instances of s1 -are only replaced once. -@return own: modified string, must be freed with mem_free() */ -UNIV_INTERN -char* -ut_strreplace( -/*==========*/ - const char* str, /*!< in: string to operate on */ - const char* s1, /*!< in: string to replace */ - const char* s2); /*!< in: string to replace s1 with */ - -/******************************************************************** -Concatenate 3 strings.*/ - -char* -ut_str3cat( -/*=======*/ - /* out, own: concatenated string, must be - freed with mem_free() */ - const char* s1, /* in: string 1 */ - const char* s2, /* in: string 2 */ - const char* s3); /* in: string 3 */ - -/**********************************************************************//** -Converts a raw binary data to a NUL-terminated hex string. The output is -truncated if there is not enough space in "hex", make sure "hex_size" is at -least (2 * raw_size + 1) if you do not want this to happen. Returns the -actual number of characters written to "hex" (including the NUL). -@return number of chars written */ -UNIV_INLINE -ulint -ut_raw_to_hex( -/*==========*/ - const void* raw, /*!< in: raw data */ - ulint raw_size, /*!< in: "raw" length in bytes */ - char* hex, /*!< out: hex string */ - ulint hex_size); /*!< in: "hex" size in bytes */ - -/*******************************************************************//** -Adds single quotes to the start and end of string and escapes any quotes -by doubling them. Returns the number of bytes that were written to "buf" -(including the terminating NUL). If buf_size is too small then the -trailing bytes from "str" are discarded. -@return number of bytes that were written */ -UNIV_INLINE -ulint -ut_str_sql_format( -/*==============*/ - const char* str, /*!< in: string */ - ulint str_len, /*!< in: string length in bytes */ - char* buf, /*!< out: output buffer */ - ulint buf_size); /*!< in: output buffer size - in bytes */ - -#ifndef UNIV_NONINL -#include "ut0mem.ic" -#endif - -#endif diff --git a/storage/xtradb/include/ut0mem.ic b/storage/xtradb/include/ut0mem.ic deleted file mode 100644 index 5c9071d52cc..00000000000 --- a/storage/xtradb/include/ut0mem.ic +++ /dev/null @@ -1,317 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/ut0mem.ic -Memory primitives - -Created 5/30/1994 Heikki Tuuri -************************************************************************/ - -#include "ut0byte.h" -#include "mach0data.h" - -/** Wrapper for memcpy(3). Copy memory area when the source and -target are not overlapping. -* @param dest in: copy to -* @param sour in: copy from -* @param n in: number of bytes to copy -* @return dest */ -UNIV_INLINE -void* -ut_memcpy(void* dest, const void* sour, ulint n) -{ - return(memcpy(dest, sour, n)); -} - -/** Wrapper for memmove(3). Copy memory area when the source and -target are overlapping. -* @param dest in: copy to -* @param sour in: copy from -* @param n in: number of bytes to copy -* @return dest */ -UNIV_INLINE -void* -ut_memmove(void* dest, const void* sour, ulint n) -{ - return(memmove(dest, sour, n)); -} - -/** Wrapper for memcmp(3). Compare memory areas. -* @param str1 in: first memory block to compare -* @param str2 in: second memory block to compare -* @param n in: number of bytes to compare -* @return negative, 0, or positive if str1 is smaller, equal, - or greater than str2, respectively. */ -UNIV_INLINE -int -ut_memcmp(const void* str1, const void* str2, ulint n) -{ - return(memcmp(str1, str2, n)); -} - -/** Wrapper for strcpy(3). Copy a NUL-terminated string. -* @param dest in: copy to -* @param sour in: copy from -* @return dest */ -UNIV_INLINE -char* -ut_strcpy(char* dest, const char* sour) -{ - return(strcpy(dest, sour)); -} - -/** Wrapper for strlen(3). Determine the length of a NUL-terminated string. -* @param str in: string -* @return length of the string in bytes, excluding the terminating NUL */ -UNIV_INLINE -ulint -ut_strlen(const char* str) -{ - return(strlen(str)); -} - -/** Wrapper for strcmp(3). Compare NUL-terminated strings. -* @param str1 in: first string to compare -* @param str2 in: second string to compare -* @return negative, 0, or positive if str1 is smaller, equal, - or greater than str2, respectively. */ -UNIV_INLINE -int -ut_strcmp(const char* str1, const char* str2) -{ - return(strcmp(str1, str2)); -} - -/**********************************************************************//** -Converts a raw binary data to a NUL-terminated hex string. The output is -truncated if there is not enough space in "hex", make sure "hex_size" is at -least (2 * raw_size + 1) if you do not want this to happen. Returns the -actual number of characters written to "hex" (including the NUL). -@return number of chars written */ -UNIV_INLINE -ulint -ut_raw_to_hex( -/*==========*/ - const void* raw, /*!< in: raw data */ - ulint raw_size, /*!< in: "raw" length in bytes */ - char* hex, /*!< out: hex string */ - ulint hex_size) /*!< in: "hex" size in bytes */ -{ - -#ifdef WORDS_BIGENDIAN - -#define MK_UINT16(a, b) (((uint16) (a)) << 8 | (uint16) (b)) - -#define UINT16_GET_A(u) ((unsigned char) ((u) >> 8)) -#define UINT16_GET_B(u) ((unsigned char) ((u) & 0xFF)) - -#else /* WORDS_BIGENDIAN */ - -#define MK_UINT16(a, b) (((uint16) (b)) << 8 | (uint16) (a)) - -#define UINT16_GET_A(u) ((unsigned char) ((u) & 0xFF)) -#define UINT16_GET_B(u) ((unsigned char) ((u) >> 8)) - -#endif /* WORDS_BIGENDIAN */ - -#define MK_ALL_UINT16_WITH_A(a) \ - MK_UINT16(a, '0'), \ - MK_UINT16(a, '1'), \ - MK_UINT16(a, '2'), \ - MK_UINT16(a, '3'), \ - MK_UINT16(a, '4'), \ - MK_UINT16(a, '5'), \ - MK_UINT16(a, '6'), \ - MK_UINT16(a, '7'), \ - MK_UINT16(a, '8'), \ - MK_UINT16(a, '9'), \ - MK_UINT16(a, 'A'), \ - MK_UINT16(a, 'B'), \ - MK_UINT16(a, 'C'), \ - MK_UINT16(a, 'D'), \ - MK_UINT16(a, 'E'), \ - MK_UINT16(a, 'F') - - static const uint16 hex_map[256] = { - MK_ALL_UINT16_WITH_A('0'), - MK_ALL_UINT16_WITH_A('1'), - MK_ALL_UINT16_WITH_A('2'), - MK_ALL_UINT16_WITH_A('3'), - MK_ALL_UINT16_WITH_A('4'), - MK_ALL_UINT16_WITH_A('5'), - MK_ALL_UINT16_WITH_A('6'), - MK_ALL_UINT16_WITH_A('7'), - MK_ALL_UINT16_WITH_A('8'), - MK_ALL_UINT16_WITH_A('9'), - MK_ALL_UINT16_WITH_A('A'), - MK_ALL_UINT16_WITH_A('B'), - MK_ALL_UINT16_WITH_A('C'), - MK_ALL_UINT16_WITH_A('D'), - MK_ALL_UINT16_WITH_A('E'), - MK_ALL_UINT16_WITH_A('F') - }; - const unsigned char* rawc; - ulint read_bytes; - ulint write_bytes; - ulint i; - - rawc = (const unsigned char*) raw; - - if (hex_size == 0) { - - return(0); - } - - if (hex_size <= 2 * raw_size) { - - read_bytes = hex_size / 2; - write_bytes = hex_size; - } else { - - read_bytes = raw_size; - write_bytes = 2 * raw_size + 1; - } - -#define LOOP_READ_BYTES(ASSIGN) \ - for (i = 0; i < read_bytes; i++) { \ - ASSIGN; \ - hex += 2; \ - rawc++; \ - } - - if (ut_align_offset(hex, 2) == 0) { - - LOOP_READ_BYTES( - *(uint16*) hex = hex_map[*rawc] - ); - } else { - - LOOP_READ_BYTES( - *hex = UINT16_GET_A(hex_map[*rawc]); - *(hex + 1) = UINT16_GET_B(hex_map[*rawc]) - ); - } - - if (hex_size <= 2 * raw_size && hex_size % 2 == 0) { - - hex--; - } - - *hex = '\0'; - - return(write_bytes); -} - -/*******************************************************************//** -Adds single quotes to the start and end of string and escapes any quotes -by doubling them. Returns the number of bytes that were written to "buf" -(including the terminating NUL). If buf_size is too small then the -trailing bytes from "str" are discarded. -@return number of bytes that were written */ -UNIV_INLINE -ulint -ut_str_sql_format( -/*==============*/ - const char* str, /*!< in: string */ - ulint str_len, /*!< in: string length in bytes */ - char* buf, /*!< out: output buffer */ - ulint buf_size) /*!< in: output buffer size - in bytes */ -{ - ulint str_i; - ulint buf_i; - - buf_i = 0; - - switch (buf_size) { - case 3: - - if (str_len == 0) { - - buf[buf_i] = '\''; - buf_i++; - buf[buf_i] = '\''; - buf_i++; - } - /* FALLTHROUGH */ - case 2: - case 1: - - buf[buf_i] = '\0'; - buf_i++; - /* FALLTHROUGH */ - case 0: - - return(buf_i); - } - - /* buf_size >= 4 */ - - buf[0] = '\''; - buf_i = 1; - - for (str_i = 0; str_i < str_len; str_i++) { - - char ch; - - if (buf_size - buf_i == 2) { - - break; - } - - ch = str[str_i]; - - switch (ch) { - case '\0': - - if (buf_size - buf_i < 4) { - - goto func_exit; - } - buf[buf_i] = '\\'; - buf_i++; - buf[buf_i] = '0'; - buf_i++; - break; - case '\'': - case '\\': - - if (buf_size - buf_i < 4) { - - goto func_exit; - } - buf[buf_i] = ch; - buf_i++; - /* FALLTHROUGH */ - default: - - buf[buf_i] = ch; - buf_i++; - } - } - -func_exit: - - buf[buf_i] = '\''; - buf_i++; - buf[buf_i] = '\0'; - buf_i++; - - return(buf_i); -} diff --git a/storage/xtradb/include/ut0rbt.h b/storage/xtradb/include/ut0rbt.h deleted file mode 100644 index 5c25104b5d7..00000000000 --- a/storage/xtradb/include/ut0rbt.h +++ /dev/null @@ -1,346 +0,0 @@ -/***************************************************************************//** - -Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ -/******************************************************************//** -@file include/ut0rbt.h -Various utilities - -Created 2007-03-20 Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_UT0RBT_H -#define INNOBASE_UT0RBT_H - -#if !defined(IB_RBT_TESTING) -#include "univ.i" -#include "ut0mem.h" -#else -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> - -#define ut_malloc malloc -#define ut_free free -#define ulint unsigned long -#define ut_a(c) assert(c) -#define ut_error assert(0) -#define ibool unsigned int -#define TRUE 1 -#define FALSE 0 -#endif - -struct ib_rbt_node_t; -typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node); -typedef int (*ib_rbt_compare)(const void* p1, const void* p2); -typedef int (*ib_rbt_arg_compare)(const void*, const void* p1, const void* p2); - -/** Red black tree color types */ -enum ib_rbt_color_t { - IB_RBT_RED, - IB_RBT_BLACK -}; - -/** Red black tree node */ -struct ib_rbt_node_t { - ib_rbt_color_t color; /* color of this node */ - - ib_rbt_node_t* left; /* points left child */ - ib_rbt_node_t* right; /* points right child */ - ib_rbt_node_t* parent; /* points parent node */ - - char value[1]; /* Data value */ -}; - -/** Red black tree instance.*/ -struct ib_rbt_t { - ib_rbt_node_t* nil; /* Black colored node that is - used as a sentinel. This is - pre-allocated too.*/ - - ib_rbt_node_t* root; /* Root of the tree, this is - pre-allocated and the first - data node is the left child.*/ - - ulint n_nodes; /* Total number of data nodes */ - - ib_rbt_compare compare; /* Fn. to use for comparison */ - ib_rbt_arg_compare - compare_with_arg; /* Fn. to use for comparison - with argument */ - ulint sizeof_value; /* Sizeof the item in bytes */ - void* cmp_arg; /* Compare func argument */ -}; - -/** The result of searching for a key in the tree, this is useful for -a speedy lookup and insert if key doesn't exist.*/ -struct ib_rbt_bound_t { - const ib_rbt_node_t* - last; /* Last node visited */ - - int result; /* Result of comparing with - the last non-nil node that - was visited */ -}; - -/* Size in elements (t is an rb tree instance) */ -#define rbt_size(t) (t->n_nodes) - -/* Check whether the rb tree is empty (t is an rb tree instance) */ -#define rbt_empty(t) (rbt_size(t) == 0) - -/* Get data value (t is the data type, n is an rb tree node instance) */ -#define rbt_value(t, n) ((t*) &n->value[0]) - -/* Compare a key with the node value (t is tree, k is key, n is node)*/ -#define rbt_compare(t, k, n) (t->compare(k, n->value)) - -/* Node size. FIXME: name might clash, but currently it does not, so for easier - maintenance do not rename it for now. */ -#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1) - -/**********************************************************************//** -Free an instance of a red black tree */ -UNIV_INTERN -void -rbt_free( -/*=====*/ - ib_rbt_t* tree); /*!< in: rb tree to free */ -/**********************************************************************//** -Create an instance of a red black tree -@return rb tree instance */ -UNIV_INTERN -ib_rbt_t* -rbt_create( -/*=======*/ - size_t sizeof_value, /*!< in: size in bytes */ - ib_rbt_compare compare); /*!< in: comparator */ -/**********************************************************************//** -Create an instance of a red black tree, whose comparison function takes -an argument -@return rb tree instance */ -UNIV_INTERN -ib_rbt_t* -rbt_create_arg_cmp( -/*===============*/ - size_t sizeof_value, /*!< in: size in bytes */ - ib_rbt_arg_compare - compare, /*!< in: comparator */ - void* cmp_arg); /*!< in: compare fn arg */ -/**********************************************************************//** -Delete a node from the red black tree, identified by key */ -UNIV_INTERN -ibool -rbt_delete( -/*=======*/ - /* in: TRUE on success */ - ib_rbt_t* tree, /* in: rb tree */ - const void* key); /* in: key to delete */ -/**********************************************************************//** -Remove a node from the red black tree, NOTE: This function will not delete -the node instance, THAT IS THE CALLERS RESPONSIBILITY. -@return the deleted node with the const. */ -UNIV_INTERN -ib_rbt_node_t* -rbt_remove_node( -/*============*/ - ib_rbt_t* tree, /*!< in: rb tree */ - const ib_rbt_node_t* - node); /*!< in: node to delete, this - is a fudge and declared const - because the caller has access - only to const nodes.*/ -/**********************************************************************//** -Return a node from the red black tree, identified by -key, NULL if not found -@return node if found else return NULL */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_lookup( -/*=======*/ - const ib_rbt_t* tree, /*!< in: rb tree to search */ - const void* key); /*!< in: key to lookup */ -/**********************************************************************//** -Add data to the red black tree, identified by key (no dups yet!) -@return inserted node */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_insert( -/*=======*/ - ib_rbt_t* tree, /*!< in: rb tree */ - const void* key, /*!< in: key for ordering */ - const void* value); /*!< in: data that will be - copied to the node.*/ -/**********************************************************************//** -Add a new node to the tree, useful for data that is pre-sorted. -@return appended node */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_add_node( -/*=========*/ - ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_bound_t* parent, /*!< in: parent */ - const void* value); /*!< in: this value is copied - to the node */ -/****************************************************************//** -Add a new caller-provided node to tree at the specified position. -The node must have its key fields initialized correctly. -@return added node */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_add_preallocated_node( -/*======================*/ - ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_bound_t* parent, /*!< in: parent */ - ib_rbt_node_t* node); /*!< in: node */ -/**********************************************************************//** -Return the left most data node in the tree -@return left most node */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_first( -/*======*/ - const ib_rbt_t* tree); /*!< in: rb tree */ -/**********************************************************************//** -Return the right most data node in the tree -@return right most node */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_last( -/*=====*/ - const ib_rbt_t* tree); /*!< in: rb tree */ -/**********************************************************************//** -Return the next node from current. -@return successor node to current that is passed in. */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_next( -/*=====*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const ib_rbt_node_t* /* in: current node */ - current); -/**********************************************************************//** -Return the prev node from current. -@return precedessor node to current that is passed in */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_prev( -/*=====*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const ib_rbt_node_t* /* in: current node */ - current); -/**********************************************************************//** -Find the node that has the lowest key that is >= key. -@return node that satisfies the lower bound constraint or NULL */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_lower_bound( -/*============*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const void* key); /*!< in: key to search */ -/**********************************************************************//** -Find the node that has the greatest key that is <= key. -@return node that satisifies the upper bound constraint or NULL */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_upper_bound( -/*============*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const void* key); /*!< in: key to search */ -/**********************************************************************//** -Search for the key, a node will be retuned in parent.last, whether it -was found or not. If not found then parent.last will contain the -parent node for the possibly new key otherwise the matching node. -@return result of last comparison */ -UNIV_INTERN -int -rbt_search( -/*=======*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_bound_t* parent, /*!< in: search bounds */ - const void* key); /*!< in: key to search */ -/**********************************************************************//** -Search for the key, a node will be retuned in parent.last, whether it -was found or not. If not found then parent.last will contain the -parent node for the possibly new key otherwise the matching node. -@return result of last comparison */ -UNIV_INTERN -int -rbt_search_cmp( -/*===========*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_bound_t* parent, /*!< in: search bounds */ - const void* key, /*!< in: key to search */ - ib_rbt_compare compare, /*!< in: comparator */ - ib_rbt_arg_compare - arg_compare); /*!< in: fn to compare items - with argument */ -/**********************************************************************//** -Clear the tree, deletes (and free's) all the nodes. */ -UNIV_INTERN -void -rbt_clear( -/*======*/ - ib_rbt_t* tree); /*!< in: rb tree */ -/****************************************************************//** -Clear the tree without deleting and freeing its nodes. */ -UNIV_INTERN -void -rbt_reset( -/*======*/ - ib_rbt_t* tree); /*!< in: rb tree */ -/**********************************************************************//** -Merge the node from dst into src. Return the number of nodes merged. -@return no. of recs merged */ -UNIV_INTERN -ulint -rbt_merge_uniq( -/*===========*/ - ib_rbt_t* dst, /*!< in: dst rb tree */ - const ib_rbt_t* src); /*!< in: src rb tree */ -/**********************************************************************//** -Merge the node from dst into src. Return the number of nodes merged. -Delete the nodes from src after copying node to dst. As a side effect -the duplicates will be left untouched in the src, since we don't support -duplicates (yet). NOTE: src and dst must be similar, the function doesn't -check for this condition (yet). -@return no. of recs merged */ -UNIV_INTERN -ulint -rbt_merge_uniq_destructive( -/*=======================*/ - ib_rbt_t* dst, /*!< in: dst rb tree */ - ib_rbt_t* src); /*!< in: src rb tree */ -/**********************************************************************//** -Verify the integrity of the RB tree. For debugging. 0 failure else height -of tree (in count of black nodes). -@return TRUE if OK FALSE if tree invalid. */ -UNIV_INTERN -ibool -rbt_validate( -/*=========*/ - const ib_rbt_t* tree); /*!< in: tree to validate */ -/**********************************************************************//** -Iterate over the tree in depth first order. */ -UNIV_INTERN -void -rbt_print( -/*======*/ - const ib_rbt_t* tree, /*!< in: tree to traverse */ - ib_rbt_print_node print); /*!< in: print function */ - -#endif /* INNOBASE_UT0RBT_H */ diff --git a/storage/xtradb/include/ut0rnd.h b/storage/xtradb/include/ut0rnd.h deleted file mode 100644 index 6ed3ee3b2e5..00000000000 --- a/storage/xtradb/include/ut0rnd.h +++ /dev/null @@ -1,148 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/ut0rnd.h -Random numbers and hashing - -Created 1/20/1994 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0rnd_h -#define ut0rnd_h - -#include "univ.i" - -#ifndef UNIV_INNOCHECKSUM - -#include "ut0byte.h" - -/** The 'character code' for end of field or string (used -in folding records */ -#define UT_END_OF_FIELD 257 - -/********************************************************//** -This is used to set the random number seed. */ -UNIV_INLINE -void -ut_rnd_set_seed( -/*============*/ - ulint seed); /*!< in: seed */ -/********************************************************//** -The following function generates a series of 'random' ulint integers. -@return the next 'random' number */ -UNIV_INLINE -ulint -ut_rnd_gen_next_ulint( -/*==================*/ - ulint rnd); /*!< in: the previous random number value */ -/*********************************************************//** -The following function generates 'random' ulint integers which -enumerate the value space (let there be N of them) of ulint integers -in a pseudo-random fashion. Note that the same integer is repeated -always after N calls to the generator. -@return the 'random' number */ -UNIV_INLINE -ulint -ut_rnd_gen_ulint(void); -/*==================*/ -/********************************************************//** -Generates a random integer from a given interval. -@return the 'random' number */ -UNIV_INLINE -ulint -ut_rnd_interval( -/*============*/ - ulint low, /*!< in: low limit; can generate also this value */ - ulint high); /*!< in: high limit; can generate also this value */ -/*********************************************************//** -Generates a random iboolean value. -@return the random value */ -UNIV_INLINE -ibool -ut_rnd_gen_ibool(void); -/*=================*/ -/*******************************************************//** -The following function generates a hash value for a ulint integer -to a hash table of size table_size, which should be a prime or some -random number to work reliably. -@return hash value */ -UNIV_INLINE -ulint -ut_hash_ulint( -/*==========*/ - ulint key, /*!< in: value to be hashed */ - ulint table_size); /*!< in: hash table size */ -/*************************************************************//** -Folds a 64-bit integer. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_ull( -/*========*/ - ib_uint64_t d) /*!< in: 64-bit integer */ - MY_ATTRIBUTE((const)); -/*************************************************************//** -Folds a character string ending in the null character. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_string( -/*===========*/ - const char* str) /*!< in: null-terminated string */ - MY_ATTRIBUTE((pure)); -/***********************************************************//** -Looks for a prime number slightly greater than the given argument. -The prime is chosen so that it is not near any power of 2. -@return prime */ -UNIV_INTERN -ulint -ut_find_prime( -/*==========*/ - ulint n) /*!< in: positive number > 100 */ - MY_ATTRIBUTE((const)); - -#endif /* !UNIV_INNOCHECKSUM */ - -/*************************************************************//** -Folds a pair of ulints. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_ulint_pair( -/*===============*/ - ulint n1, /*!< in: ulint */ - ulint n2) /*!< in: ulint */ - MY_ATTRIBUTE((const)); -/*************************************************************//** -Folds a binary string. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_binary( -/*===========*/ - const byte* str, /*!< in: string of bytes */ - ulint len) /*!< in: length */ - MY_ATTRIBUTE((pure)); - - -#ifndef UNIV_NONINL -#include "ut0rnd.ic" -#endif - -#endif diff --git a/storage/xtradb/include/ut0rnd.ic b/storage/xtradb/include/ut0rnd.ic deleted file mode 100644 index 987dfac03c1..00000000000 --- a/storage/xtradb/include/ut0rnd.ic +++ /dev/null @@ -1,262 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************************//** -@file include/ut0rnd.ic -Random numbers and hashing - -Created 5/30/1994 Heikki Tuuri -*******************************************************************/ - -#define UT_HASH_RANDOM_MASK 1463735687 -#define UT_HASH_RANDOM_MASK2 1653893711 - -#ifndef UNIV_INNOCHECKSUM - -#define UT_RND1 151117737 -#define UT_RND2 119785373 -#define UT_RND3 85689495 -#define UT_RND4 76595339 -#define UT_SUM_RND2 98781234 -#define UT_SUM_RND3 126792457 -#define UT_SUM_RND4 63498502 -#define UT_XOR_RND1 187678878 -#define UT_XOR_RND2 143537923 - -/** Seed value of ut_rnd_gen_ulint() */ -extern ulint ut_rnd_ulint_counter; - -/********************************************************//** -This is used to set the random number seed. */ -UNIV_INLINE -void -ut_rnd_set_seed( -/*============*/ - ulint seed) /*!< in: seed */ -{ - ut_rnd_ulint_counter = seed; -} - -/********************************************************//** -The following function generates a series of 'random' ulint integers. -@return the next 'random' number */ -UNIV_INLINE -ulint -ut_rnd_gen_next_ulint( -/*==================*/ - ulint rnd) /*!< in: the previous random number value */ -{ - ulint n_bits; - - n_bits = 8 * sizeof(ulint); - - rnd = UT_RND2 * rnd + UT_SUM_RND3; - rnd = UT_XOR_RND1 ^ rnd; - rnd = (rnd << 20) + (rnd >> (n_bits - 20)); - rnd = UT_RND3 * rnd + UT_SUM_RND4; - rnd = UT_XOR_RND2 ^ rnd; - rnd = (rnd << 20) + (rnd >> (n_bits - 20)); - rnd = UT_RND1 * rnd + UT_SUM_RND2; - - return(rnd); -} - -/********************************************************//** -The following function generates 'random' ulint integers which -enumerate the value space of ulint integers in a pseudo random -fashion. Note that the same integer is repeated always after -2 to power 32 calls to the generator (if ulint is 32-bit). -@return the 'random' number */ -UNIV_INLINE -ulint -ut_rnd_gen_ulint(void) -/*==================*/ -{ - ulint rnd; - - ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2; - - rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter); - - return(rnd); -} - -/********************************************************//** -Generates a random integer from a given interval. -@return the 'random' number */ -UNIV_INLINE -ulint -ut_rnd_interval( -/*============*/ - ulint low, /*!< in: low limit; can generate also this value */ - ulint high) /*!< in: high limit; can generate also this value */ -{ - ulint rnd; - - ut_ad(high >= low); - - if (low == high) { - - return(low); - } - - rnd = ut_rnd_gen_ulint(); - - return(low + (rnd % (high - low))); -} - -/*********************************************************//** -Generates a random iboolean value. -@return the random value */ -UNIV_INLINE -ibool -ut_rnd_gen_ibool(void) -/*=================*/ -{ - ulint x; - - x = ut_rnd_gen_ulint(); - - if (((x >> 20) + (x >> 15)) & 1) { - - return(TRUE); - } - - return(FALSE); -} - -/*******************************************************//** -The following function generates a hash value for a ulint integer -to a hash table of size table_size, which should be a prime -or some random number for the hash table to work reliably. -@return hash value */ -UNIV_INLINE -ulint -ut_hash_ulint( -/*==========*/ - ulint key, /*!< in: value to be hashed */ - ulint table_size) /*!< in: hash table size */ -{ - ut_ad(table_size); - key = key ^ UT_HASH_RANDOM_MASK2; - - return(key % table_size); -} - -/*************************************************************//** -Folds a 64-bit integer. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_ull( -/*========*/ - ib_uint64_t d) /*!< in: 64-bit integer */ -{ - return(ut_fold_ulint_pair((ulint) d & ULINT32_MASK, - (ulint) (d >> 32))); -} - -/*************************************************************//** -Folds a character string ending in the null character. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_string( -/*===========*/ - const char* str) /*!< in: null-terminated string */ -{ - ulint fold = 0; - - ut_ad(str); - - while (*str != '\0') { - fold = ut_fold_ulint_pair(fold, (ulint)(*str)); - str++; - } - - return(fold); -} - -#endif /* !UNIV_INNOCHECKSUM */ - -/*************************************************************//** -Folds a pair of ulints. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_ulint_pair( -/*===============*/ - ulint n1, /*!< in: ulint */ - ulint n2) /*!< in: ulint */ -{ - return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1) - ^ UT_HASH_RANDOM_MASK) + n2); -} - -/*************************************************************//** -Folds a binary string. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_binary( -/*===========*/ - const byte* str, /*!< in: string of bytes */ - ulint len) /*!< in: length */ -{ - ulint fold = 0; - const byte* str_end = str + (len & 0xFFFFFFF8); - - ut_ad(str || !len); - - while (str < str_end) { - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - } - - switch (len & 0x7) { - case 7: - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - /* fall through */ - case 6: - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - /* fall through */ - case 5: - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - /* fall through */ - case 4: - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - /* fall through */ - case 3: - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - /* fall through */ - case 2: - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - /* fall through */ - case 1: - fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); - } - - return(fold); -} diff --git a/storage/xtradb/include/ut0sort.h b/storage/xtradb/include/ut0sort.h deleted file mode 100644 index 75648b5c317..00000000000 --- a/storage/xtradb/include/ut0sort.h +++ /dev/null @@ -1,106 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/ut0sort.h -Sort utility - -Created 11/9/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0sort_h -#define ut0sort_h - -#include "univ.i" - -/* This module gives a macro definition of the body of -a standard sort function for an array of elements of any -type. The comparison function is given as a parameter to -the macro. The sort algorithm is mergesort which has logarithmic -worst case. -*/ - -/*******************************************************************//** -This macro expands to the body of a standard sort function. -The sort function uses mergesort and must be defined separately -for each type of array. -Also the comparison function has to be defined individually -for each array cell type. SORT_FUN is the sort function name. -The function takes the array to be sorted (ARR), -the array of auxiliary space (AUX_ARR) of same size, -and the low (LOW), inclusive, and high (HIGH), noninclusive, -limits for the sort interval as arguments. -CMP_FUN is the comparison function name. It takes as arguments -two elements from the array and returns 1, if the first is bigger, -0 if equal, and -1 if the second bigger. */ - -#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\ -{\ - ulint ut_sort_mid77;\ - ulint ut_sort_i77;\ - ulint ut_sort_low77;\ - ulint ut_sort_high77;\ -\ - ut_ad((LOW) < (HIGH));\ - ut_ad(ARR);\ - ut_ad(AUX_ARR);\ -\ - if ((LOW) == (HIGH) - 1) {\ - return;\ - } else if ((LOW) == (HIGH) - 2) {\ - if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\ - (AUX_ARR)[LOW] = (ARR)[LOW];\ - (ARR)[LOW] = (ARR)[(HIGH) - 1];\ - (ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\ - }\ - return;\ - }\ -\ - ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\ -\ - SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\ - SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\ -\ - ut_sort_low77 = (LOW);\ - ut_sort_high77 = ut_sort_mid77;\ -\ - for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\ -\ - if (ut_sort_low77 >= ut_sort_mid77) {\ - (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\ - ut_sort_high77++;\ - } else if (ut_sort_high77 >= (HIGH)) {\ - (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\ - ut_sort_low77++;\ - } else if (CMP_FUN((ARR)[ut_sort_low77],\ - (ARR)[ut_sort_high77]) > 0) {\ - (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\ - ut_sort_high77++;\ - } else {\ - (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\ - ut_sort_low77++;\ - }\ - }\ -\ - memcpy((void*) ((ARR) + (LOW)), (AUX_ARR) + (LOW),\ - ((HIGH) - (LOW)) * sizeof *(ARR));\ -}\ - - -#endif - diff --git a/storage/xtradb/include/ut0timer.h b/storage/xtradb/include/ut0timer.h deleted file mode 100644 index f361ae79bf5..00000000000 --- a/storage/xtradb/include/ut0timer.h +++ /dev/null @@ -1,104 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, SkySQL Ab. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/ut0timer.h -Timer rountines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ -#ifndef ut0timer_h -#define ut0timer_h - -#include "univ.i" -#include "data0type.h" -#include <my_rdtsc.h> - -/* Current timer stats */ -extern struct my_timer_unit_info ut_timer; - -/**************************************************************//** -Function pointer to point selected timer function. -@return timer current value */ -extern ulonglong (*ut_timer_now)(void); - -/**************************************************************//** -Sets up the data required for use of my_timer_* functions. -Selects the best timer by high frequency, and tight resolution. -Points my_timer_now() to the selected timer function. -Initializes my_timer struct to contain the info for selected timer.*/ -UNIV_INTERN -void ut_init_timer(void); - -/**************************************************************//** -Return time passed since time then, automatically adjusted -for the estimated timer overhead. -@return time passed since "then" */ -UNIV_INLINE -ulonglong -ut_timer_since( -/*===========*/ - ulonglong then); /*!< in: time where to calculate */ -/**************************************************************//** -Get time passed since "then", and update then to now -@return time passed sinche "then" */ -UNIV_INLINE -ulonglong -ut_timer_since_and_update( -/*======================*/ - ulonglong *then); /*!< in: time where to calculate */ -/**************************************************************//** -Convert native timer units in a ulonglong into seconds in a double -@return time in a seconds */ -UNIV_INLINE -double -ut_timer_to_seconds( -/*=================*/ - ulonglong when); /*!< in: time where to calculate */ -/**************************************************************//** -Convert native timer units in a ulonglong into milliseconds in a double -@return time in milliseconds */ -UNIV_INLINE -double -ut_timer_to_milliseconds( -/*=====================*/ - ulonglong when); /*!< in: time where to calculate */ -/**************************************************************//** -Convert native timer units in a ulonglong into microseconds in a double -@return time in microseconds */ -UNIV_INLINE -double -ut_timer_to_microseconds( -/*=====================*/ - ulonglong when); /*!< in: time where to calculate */ -/**************************************************************//** -Convert microseconds in a double to native timer units in a ulonglong -@return time in microseconds */ -UNIV_INLINE -ulonglong -ut_microseconds_to_timer( -/*=====================*/ - ulonglong when); /*!< in: time where to calculate */ - -#ifndef UNIV_NONINL -#include "ut0timer.ic" -#endif - -#endif diff --git a/storage/xtradb/include/ut0timer.ic b/storage/xtradb/include/ut0timer.ic deleted file mode 100644 index 62e17a10fb1..00000000000 --- a/storage/xtradb/include/ut0timer.ic +++ /dev/null @@ -1,113 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, SkySQL Ab. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file include/ut0timer.ic -Timer rountines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ - -/**************************************************************//** -Return time passed since time then, automatically adjusted -for the estimated timer overhead. -@return time passed since "then" */ -UNIV_INLINE -ulonglong -ut_timer_since( -/*===========*/ - ulonglong then) /*!< in: time where to calculate */ -{ - return (ut_timer_now() - then) - ut_timer.overhead; -} - -/**************************************************************//** -Get time passed since "then", and update then to now -@return time passed sinche "then" */ -UNIV_INLINE -ulonglong -ut_timer_since_and_update( -/*======================*/ - ulonglong *then) /*!< in: time where to calculate */ -{ - ulonglong now = ut_timer_now(); - ulonglong ret = (now - (*then)) - ut_timer.overhead; - *then = now; - return ret; -} - -/**************************************************************//** -Convert native timer units in a ulonglong into seconds in a double -@return time in a seconds */ -UNIV_INLINE -double -ut_timer_to_seconds( -/*=================*/ - ulonglong when) /*!< in: time where to calculate */ -{ - double ret = (double)(when); - ret /= (double)(ut_timer.frequency); - return ret; -} - -/**************************************************************//** -Convert native timer units in a ulonglong into milliseconds in a double -@return time in milliseconds */ -UNIV_INLINE -double -ut_timer_to_milliseconds( -/*=====================*/ - ulonglong when) /*!< in: time where to calculate */ -{ - double ret = (double)(when); - ret *= 1000.0; - ret /= (double)(ut_timer.frequency); - return ret; -} - -/**************************************************************//** -Convert native timer units in a ulonglong into microseconds in a double -@return time in microseconds */ -UNIV_INLINE -double -ut_timer_to_microseconds( -/*=====================*/ - ulonglong when) /*!< in: time where to calculate */ -{ - double ret = (double)(when); - ret *= 1000000.0; - ret /= (double)(ut_timer.frequency); - return ret; -} - -/**************************************************************//** -Convert microseconds in a double to native timer units in a ulonglong -@return time in microseconds */ -UNIV_INLINE -ulonglong -ut_microseconds_to_timer( -/*=====================*/ - ulonglong when) /*!< in: time where to calculate */ -{ - double ret = when; - ret *= (double)(ut_timer.frequency); - ret /= 1000000.0; - return (ulonglong)ret; -} diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h deleted file mode 100644 index ca4ce0d4ef9..00000000000 --- a/storage/xtradb/include/ut0ut.h +++ /dev/null @@ -1,526 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/ut0ut.h -Various utilities - -Created 1/20/1994 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0ut_h -#define ut0ut_h - -#include "univ.i" - -#ifndef UNIV_INNOCHECKSUM - -#include "db0err.h" - -#ifndef UNIV_HOTBACKUP -# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */ -#endif /* UNIV_HOTBACKUP */ - -#include <time.h> -#ifndef MYSQL_SERVER -#include <ctype.h> -#endif - -#include <stdarg.h> /* for va_list */ - -#include <string> - -/** Index name prefix in fast index creation */ -#define TEMP_INDEX_PREFIX '\377' -/** Index name prefix in fast index creation, as a string constant */ -#define TEMP_INDEX_PREFIX_STR "\377" - -/** Time stamp */ -typedef time_t ib_time_t; - -/* In order to call a piece of code, when a function returns or when the -scope ends, use this utility class. It will invoke the given function -object in its destructor. */ -template<typename F> -struct ut_when_dtor { - ut_when_dtor(F& p) : f(p) {} - ~ut_when_dtor() { - f(); - } -private: - F& f; -}; - -#ifndef UNIV_HOTBACKUP -# if defined(HAVE_PAUSE_INSTRUCTION) - /* According to the gcc info page, asm volatile means that the - instruction has important side-effects and must not be removed. - Also asm volatile may trigger a memory barrier (spilling all registers - to memory). */ -# ifdef __SUNPRO_CC -# define UT_RELAX_CPU() asm ("pause" ) -# else -# define UT_RELAX_CPU() __asm__ __volatile__ ("pause") -# endif /* __SUNPRO_CC */ - -# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION) -# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop") -# elif defined(HAVE_WINDOWS_ATOMICS) - /* In the Win32 API, the x86 PAUSE instruction is executed by calling - the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- - independent way by using YieldProcessor. */ -# define UT_RELAX_CPU() YieldProcessor() -# elif defined(__powerpc__) && defined __GLIBC__ -#include <sys/platform/ppc.h> -# define UT_RELAX_CPU() __ppc_get_timebase() -# else -# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */ -# endif - -#if defined (__GNUC__) -# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory") -#elif defined (_MSC_VER) -# define UT_COMPILER_BARRIER() _ReadWriteBarrier() -#else -# define UT_COMPILER_BARRIER() -#endif - -# if defined(HAVE_HMT_PRIORITY_INSTRUCTION) -#include <sys/platform/ppc.h> -# define UT_LOW_PRIORITY_CPU() __ppc_set_ppr_low() -# define UT_RESUME_PRIORITY_CPU() __ppc_set_ppr_med() -# else -# define UT_LOW_PRIORITY_CPU() ((void)0) -# define UT_RESUME_PRIORITY_CPU() ((void)0) -# endif - -/*********************************************************************//** -Delays execution for at most max_wait_us microseconds or returns earlier -if cond becomes true. -@param cond in: condition to wait for; evaluated every 2 ms -@param max_wait_us in: maximum delay to wait, in microseconds */ -#define UT_WAIT_FOR(cond, max_wait_us) \ -do { \ - ullint start_us; \ - start_us = ut_time_us(NULL); \ - while (!(cond) \ - && ut_time_us(NULL) - start_us < (max_wait_us)) {\ - \ - os_thread_sleep(2000 /* 2 ms */); \ - } \ -} while (0) -#endif /* !UNIV_HOTBACKUP */ - -template <class T> T ut_min(T a, T b) { return(a < b ? a : b); } -template <class T> T ut_max(T a, T b) { return(a > b ? a : b); } - -/******************************************************//** -Calculates the minimum of two ulints. -@return minimum */ -UNIV_INLINE -ulint -ut_min( -/*===*/ - ulint n1, /*!< in: first number */ - ulint n2); /*!< in: second number */ -/******************************************************//** -Calculates the maximum of two ulints. -@return maximum */ -UNIV_INLINE -ulint -ut_max( -/*===*/ - ulint n1, /*!< in: first number */ - ulint n2); /*!< in: second number */ -/****************************************************************//** -Calculates minimum of two ulint-pairs. */ -UNIV_INLINE -void -ut_pair_min( -/*========*/ - ulint* a, /*!< out: more significant part of minimum */ - ulint* b, /*!< out: less significant part of minimum */ - ulint a1, /*!< in: more significant part of first pair */ - ulint b1, /*!< in: less significant part of first pair */ - ulint a2, /*!< in: more significant part of second pair */ - ulint b2); /*!< in: less significant part of second pair */ -/******************************************************//** -Compares two ulints. -@return 1 if a > b, 0 if a == b, -1 if a < b */ -UNIV_INLINE -int -ut_ulint_cmp( -/*=========*/ - ulint a, /*!< in: ulint */ - ulint b); /*!< in: ulint */ -/*******************************************************//** -Compares two pairs of ulints. -@return -1 if a < b, 0 if a == b, 1 if a > b */ -UNIV_INLINE -int -ut_pair_cmp( -/*========*/ - ulint a1, /*!< in: more significant part of first pair */ - ulint a2, /*!< in: less significant part of first pair */ - ulint b1, /*!< in: more significant part of second pair */ - ulint b2); /*!< in: less significant part of second pair */ -/*************************************************************//** -Determines if a number is zero or a power of two. -@param n in: number -@return nonzero if n is zero or a power of two; zero otherwise */ -#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1))) -/*************************************************************//** -Calculates fast the remainder of n/m when m is a power of two. -@param n in: numerator -@param m in: denominator, must be a power of two -@return the remainder of n/m */ -#define ut_2pow_remainder(n, m) ((n) & ((m) - 1)) -/*************************************************************//** -Calculates the biggest multiple of m that is not bigger than n -when m is a power of two. In other words, rounds n down to m * k. -@param n in: number to round down -@param m in: alignment, must be a power of two -@return n rounded down to the biggest possible integer multiple of m */ -#define ut_2pow_round(n, m) ((n) & ~((m) - 1)) -/** Align a number down to a multiple of a power of two. -@param n in: number to round down -@param m in: alignment, must be a power of two -@return n rounded down to the biggest possible integer multiple of m */ -#define ut_calc_align_down(n, m) ut_2pow_round(n, m) -/********************************************************//** -Calculates the smallest multiple of m that is not smaller than n -when m is a power of two. In other words, rounds n up to m * k. -@param n in: number to round up -@param m in: alignment, must be a power of two -@return n rounded up to the smallest possible integer multiple of m */ -#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1)) -/*************************************************************//** -Calculates fast the 2-logarithm of a number, rounded upward to an -integer. -@return logarithm in the base 2, rounded upward */ -UNIV_INLINE -ulint -ut_2_log( -/*=====*/ - ulint n); /*!< in: number */ -/*************************************************************//** -Calculates 2 to power n. -@return 2 to power n */ -UNIV_INLINE -ulint -ut_2_exp( -/*=====*/ - ulint n); /*!< in: number */ -/*************************************************************//** -Calculates fast the number rounded up to the nearest power of 2. -@return first power of 2 which is >= n */ -UNIV_INTERN -ulint -ut_2_power_up( -/*==========*/ - ulint n) /*!< in: number != 0 */ - MY_ATTRIBUTE((const)); - -/** Determine how many bytes (groups of 8 bits) are needed to -store the given number of bits. -@param b in: bits -@return number of bytes (octets) needed to represent b */ -#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8) - -/**********************************************************//** -Returns system time. We do not specify the format of the time returned: -the only way to manipulate it is to use the function ut_difftime. -@return system time */ -UNIV_INTERN -ib_time_t -ut_time(void); -/*=========*/ -#ifndef UNIV_HOTBACKUP -/**********************************************************//** -Returns system time. -Upon successful completion, the value 0 is returned; otherwise the -value -1 is returned and the global variable errno is set to indicate the -error. -@return 0 on success, -1 otherwise */ -UNIV_INTERN -int -ut_usectime( -/*========*/ - ulint* sec, /*!< out: seconds since the Epoch */ - ulint* ms); /*!< out: microseconds since the Epoch+*sec */ - -/**********************************************************//** -Returns the number of microseconds since epoch. Similar to -time(3), the return value is also stored in *tloc, provided -that tloc is non-NULL. -@return us since epoch */ -UNIV_INTERN -ullint -ut_time_us( -/*=======*/ - ullint* tloc); /*!< out: us since epoch, if non-NULL */ -/**********************************************************//** -Returns the number of milliseconds since some epoch. The -value may wrap around. It should only be used for heuristic -purposes. -@return ms since epoch */ -UNIV_INTERN -ulint -ut_time_ms(void); -/*============*/ -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************//** -Returns the number of milliseconds since some epoch. The -value may wrap around. It should only be used for heuristic -purposes. -@return ms since epoch */ -UNIV_INTERN -ulint -ut_time_ms(void); -/*============*/ - -/**********************************************************//** -Returns the difference of two times in seconds. -@return time2 - time1 expressed in seconds */ -UNIV_INTERN -double -ut_difftime( -/*========*/ - ib_time_t time2, /*!< in: time */ - ib_time_t time1); /*!< in: time */ - -#endif /* !UNIV_INNOCHECKSUM */ - -/**********************************************************//** -Prints a timestamp to a file. */ -UNIV_INTERN -void -ut_print_timestamp( -/*===============*/ - FILE* file) /*!< in: file where to print */ - UNIV_COLD MY_ATTRIBUTE((nonnull)); - -#ifndef UNIV_INNOCHECKSUM - -/**********************************************************//** -Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */ -UNIV_INTERN -void -ut_sprintf_timestamp( -/*=================*/ - char* buf); /*!< in: buffer where to sprintf */ -#ifdef UNIV_HOTBACKUP -/**********************************************************//** -Sprintfs a timestamp to a buffer with no spaces and with ':' characters -replaced by '_'. */ -UNIV_INTERN -void -ut_sprintf_timestamp_without_extra_chars( -/*=====================================*/ - char* buf); /*!< in: buffer where to sprintf */ -/**********************************************************//** -Returns current year, month, day. */ -UNIV_INTERN -void -ut_get_year_month_day( -/*==================*/ - ulint* year, /*!< out: current year */ - ulint* month, /*!< out: month */ - ulint* day); /*!< out: day */ -#else /* UNIV_HOTBACKUP */ -/*************************************************************//** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. -@return dummy value */ -UNIV_INTERN -void -ut_delay( -/*=====*/ - ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */ -#endif /* UNIV_HOTBACKUP */ -/*************************************************************//** -Prints the contents of a memory buffer in hex and ascii. */ -UNIV_INTERN -void -ut_print_buf( -/*=========*/ - FILE* file, /*!< in: file where to print */ - const void* buf, /*!< in: memory buffer */ - ulint len); /*!< in: length of the buffer */ - -/**********************************************************************//** -Outputs a NUL-terminated file name, quoted with apostrophes. */ -UNIV_INTERN -void -ut_print_filename( -/*==============*/ - FILE* f, /*!< in: output stream */ - const char* name); /*!< in: name to print */ - -#ifndef UNIV_HOTBACKUP -/* Forward declaration of transaction handle */ -struct trx_t; - -/**********************************************************************//** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ -UNIV_INTERN -void -ut_print_name( -/*==========*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - ibool table_id,/*!< in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name); /*!< in: name to print */ - -/**********************************************************************//** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ -UNIV_INTERN -void -ut_print_namel( -/*===========*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction (NULL=no quotes) */ - ibool table_id,/*!< in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name, /*!< in: name to print */ - ulint namelen);/*!< in: length of name */ -/**********************************************************************//** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ -UNIV_INTERN -std::string -ut_get_name( -/*=========*/ - const trx_t* trx, /*!< in: transaction (NULL=no quotes) */ - ibool table_id,/*!< in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name); /*!< in: name to print */ -/**********************************************************************//** -Formats a table or index name, quoted as an SQL identifier. If the name -contains a slash '/', the result will contain two identifiers separated by -a period (.), as in SQL database_name.identifier. -@return pointer to 'formatted' */ -UNIV_INTERN -char* -ut_format_name( -/*===========*/ - const char* name, /*!< in: table or index name, must be - '\0'-terminated */ - ibool is_table, /*!< in: if TRUE then 'name' is a table - name */ - char* formatted, /*!< out: formatted result, will be - '\0'-terminated */ - ulint formatted_size);/*!< out: no more than this number of - bytes will be written to 'formatted' */ - -/**********************************************************************//** -Catenate files. */ -UNIV_INTERN -void -ut_copy_file( -/*=========*/ - FILE* dest, /*!< in: output file */ - FILE* src); /*!< in: input file to be appended to output */ -#endif /* !UNIV_HOTBACKUP */ - -#ifdef __WIN__ -/**********************************************************************//** -A substitute for vsnprintf(3), formatted output conversion into -a limited buffer. Note: this function DOES NOT return the number of -characters that would have been printed if the buffer was unlimited because -VC's _vsnprintf() returns -1 in this case and we would need to call -_vscprintf() in addition to estimate that but we would need another copy -of "ap" for that and VC does not provide va_copy(). */ -UNIV_INTERN -void -ut_vsnprintf( -/*=========*/ - char* str, /*!< out: string */ - size_t size, /*!< in: str size */ - const char* fmt, /*!< in: format */ - va_list ap); /*!< in: format values */ - -/**********************************************************************//** -A substitute for snprintf(3), formatted output conversion into -a limited buffer. -@return number of characters that would have been printed if the size -were unlimited, not including the terminating '\0'. */ -UNIV_INTERN -int -ut_snprintf( -/*========*/ - char* str, /*!< out: string */ - size_t size, /*!< in: str size */ - const char* fmt, /*!< in: format */ - ...); /*!< in: format values */ -#else -/**********************************************************************//** -A wrapper for vsnprintf(3), formatted output conversion into -a limited buffer. Note: this function DOES NOT return the number of -characters that would have been printed if the buffer was unlimited because -VC's _vsnprintf() returns -1 in this case and we would need to call -_vscprintf() in addition to estimate that but we would need another copy -of "ap" for that and VC does not provide va_copy(). */ -# define ut_vsnprintf(buf, size, fmt, ap) \ - ((void) vsnprintf(buf, size, fmt, ap)) -/**********************************************************************//** -A wrapper for snprintf(3), formatted output conversion into -a limited buffer. */ -# define ut_snprintf snprintf -#endif /* __WIN__ */ - -/*************************************************************//** -Convert an error number to a human readable text message. The -returned string is static and should not be freed or modified. -@return string, describing the error */ -UNIV_INTERN -const char* -ut_strerr( -/*======*/ - dberr_t num); /*!< in: error number */ - -/**************************************************************** -Sort function for ulint arrays. */ -UNIV_INTERN -void -ut_ulint_sort( -/*==========*/ - ulint* arr, /*!< in/out: array to sort */ - ulint* aux_arr, /*!< in/out: aux array to use in sort */ - ulint low, /*!< in: lower bound */ - ulint high) /*!< in: upper bound */ - MY_ATTRIBUTE((nonnull)); - -#ifndef UNIV_NONINL -#include "ut0ut.ic" -#endif - -#endif /* !UNIV_INNOCHECKSUM */ - -#endif - diff --git a/storage/xtradb/include/ut0ut.ic b/storage/xtradb/include/ut0ut.ic deleted file mode 100644 index 4e0f76e1957..00000000000 --- a/storage/xtradb/include/ut0ut.ic +++ /dev/null @@ -1,162 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************************//** -@file include/ut0ut.ic -Various utilities - -Created 5/30/1994 Heikki Tuuri -*******************************************************************/ - -/******************************************************//** -Calculates the minimum of two ulints. -@return minimum */ -UNIV_INLINE -ulint -ut_min( -/*===*/ - ulint n1, /*!< in: first number */ - ulint n2) /*!< in: second number */ -{ - return((n1 <= n2) ? n1 : n2); -} - -/******************************************************//** -Calculates the maximum of two ulints. -@return maximum */ -UNIV_INLINE -ulint -ut_max( -/*===*/ - ulint n1, /*!< in: first number */ - ulint n2) /*!< in: second number */ -{ - return((n1 <= n2) ? n2 : n1); -} - -/****************************************************************//** -Calculates minimum of two ulint-pairs. */ -UNIV_INLINE -void -ut_pair_min( -/*========*/ - ulint* a, /*!< out: more significant part of minimum */ - ulint* b, /*!< out: less significant part of minimum */ - ulint a1, /*!< in: more significant part of first pair */ - ulint b1, /*!< in: less significant part of first pair */ - ulint a2, /*!< in: more significant part of second pair */ - ulint b2) /*!< in: less significant part of second pair */ -{ - if (a1 == a2) { - *a = a1; - *b = ut_min(b1, b2); - } else if (a1 < a2) { - *a = a1; - *b = b1; - } else { - *a = a2; - *b = b2; - } -} - -/******************************************************//** -Compares two ulints. -@return 1 if a > b, 0 if a == b, -1 if a < b */ -UNIV_INLINE -int -ut_ulint_cmp( -/*=========*/ - ulint a, /*!< in: ulint */ - ulint b) /*!< in: ulint */ -{ - if (a < b) { - return(-1); - } else if (a == b) { - return(0); - } else { - return(1); - } -} - -/*******************************************************//** -Compares two pairs of ulints. -@return -1 if a < b, 0 if a == b, 1 if a > b */ -UNIV_INLINE -int -ut_pair_cmp( -/*========*/ - ulint a1, /*!< in: more significant part of first pair */ - ulint a2, /*!< in: less significant part of first pair */ - ulint b1, /*!< in: more significant part of second pair */ - ulint b2) /*!< in: less significant part of second pair */ -{ - if (a1 > b1) { - return(1); - } else if (a1 < b1) { - return(-1); - } else if (a2 > b2) { - return(1); - } else if (a2 < b2) { - return(-1); - } else { - return(0); - } -} - -/*************************************************************//** -Calculates fast the 2-logarithm of a number, rounded upward to an -integer. -@return logarithm in the base 2, rounded upward */ -UNIV_INLINE -ulint -ut_2_log( -/*=====*/ - ulint n) /*!< in: number != 0 */ -{ - ulint res; - - res = 0; - - ut_ad(n > 0); - - n = n - 1; - - for (;;) { - n = n / 2; - - if (n == 0) { - break; - } - - res++; - } - - return(res + 1); -} - -/*************************************************************//** -Calculates 2 to power n. -@return 2 to power n */ -UNIV_INLINE -ulint -ut_2_exp( -/*=====*/ - ulint n) /*!< in: number */ -{ - return((ulint) 1 << n); -} diff --git a/storage/xtradb/include/ut0vec.h b/storage/xtradb/include/ut0vec.h deleted file mode 100644 index 432fb348a09..00000000000 --- a/storage/xtradb/include/ut0vec.h +++ /dev/null @@ -1,337 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/ut0vec.h -A vector of pointers to data items - -Created 4/6/2006 Osku Salerma -************************************************************************/ - -#ifndef IB_VECTOR_H -#define IB_VECTOR_H - -#include "univ.i" -#include "mem0mem.h" - -struct ib_alloc_t; -struct ib_vector_t; - -typedef void* (*ib_mem_alloc_t)( - /* out: Pointer to allocated memory */ - ib_alloc_t* allocator, /* in: Pointer to allocator instance */ - ulint size); /* in: Number of bytes to allocate */ - -typedef void (*ib_mem_free_t)( - ib_alloc_t* allocator, /* in: Pointer to allocator instance */ - void* ptr); /* in: Memory to free */ - -typedef void* (*ib_mem_resize_t)( - /* out: Pointer to resized memory */ - ib_alloc_t* allocator, /* in: Pointer to allocator */ - void* ptr, /* in: Memory to resize */ - ulint old_size, /* in: Old memory size in bytes */ - ulint new_size); /* in: New size in bytes */ - -typedef int (*ib_compare_t)(const void*, const void*); - -/* An automatically resizing vector datatype with the following properties: - - -All memory allocation is done through an allocator, which is responsible for -freeing it when done with the vector. -*/ - -/* This is useful shorthand for elements of type void* */ -#define ib_vector_getp(v, n) (*(void**) ib_vector_get(v, n)) -#define ib_vector_getp_const(v, n) (*(void**) ib_vector_get_const(v, n)) - -#define ib_vector_allocator(v) (v->allocator) - -/******************************************************************** -Create a new vector with the given initial size. */ -UNIV_INTERN -ib_vector_t* -ib_vector_create( -/*=============*/ - /* out: vector */ - ib_alloc_t* alloc, /* in: Allocator */ - /* in: size of the data item */ - ulint sizeof_value, - ulint size); /* in: initial size */ - -/******************************************************************** -Destroy the vector. Make sure the vector owns the allocator, e.g., -the heap in the the heap allocator. */ -UNIV_INLINE -void -ib_vector_free( -/*===========*/ - ib_vector_t* vec); /* in/out: vector */ - -/******************************************************************** -Push a new element to the vector, increasing its size if necessary, -if elem is not NULL then elem is copied to the vector.*/ -UNIV_INLINE -void* -ib_vector_push( -/*===========*/ - /* out: pointer the "new" element */ - ib_vector_t* vec, /* in/out: vector */ - const void* elem); /* in: data element */ - -/******************************************************************** -Pop the last element from the vector.*/ -UNIV_INLINE -void* -ib_vector_pop( -/*==========*/ - /* out: pointer to the "new" element */ - ib_vector_t* vec); /* in/out: vector */ - -/*******************************************************************//** -Remove an element to the vector -@return pointer to the "removed" element */ -UNIV_INLINE -void* -ib_vector_remove( -/*=============*/ - ib_vector_t* vec, /*!< in: vector */ - const void* elem); /*!< in: value to remove */ - -/******************************************************************** -Get the number of elements in the vector. */ -UNIV_INLINE -ulint -ib_vector_size( -/*===========*/ - /* out: number of elements in vector */ - const ib_vector_t* vec); /* in: vector */ - -/******************************************************************** -Increase the size of the vector. */ -UNIV_INTERN -void -ib_vector_resize( -/*=============*/ - /* out: number of elements in vector */ - ib_vector_t* vec); /* in/out: vector */ - -/******************************************************************** -Test whether a vector is empty or not. -@return TRUE if empty */ -UNIV_INLINE -ibool -ib_vector_is_empty( -/*===============*/ - const ib_vector_t* vec); /*!< in: vector */ - -/****************************************************************//** -Get the n'th element. -@return n'th element */ -UNIV_INLINE -void* -ib_vector_get( -/*==========*/ - ib_vector_t* vec, /*!< in: vector */ - ulint n); /*!< in: element index to get */ - -/******************************************************************** -Const version of the get n'th element. -@return n'th element */ -UNIV_INLINE -const void* -ib_vector_get_const( -/*================*/ - const ib_vector_t* vec, /* in: vector */ - ulint n); /* in: element index to get */ -/****************************************************************//** -Get last element. The vector must not be empty. -@return last element */ -UNIV_INLINE -void* -ib_vector_get_last( -/*===============*/ - ib_vector_t* vec); /*!< in: vector */ -/****************************************************************//** -Set the n'th element. */ -UNIV_INLINE -void -ib_vector_set( -/*==========*/ - ib_vector_t* vec, /*!< in/out: vector */ - ulint n, /*!< in: element index to set */ - void* elem); /*!< in: data element */ - -/******************************************************************** -Reset the vector size to 0 elements. */ -UNIV_INLINE -void -ib_vector_reset( -/*============*/ - ib_vector_t* vec); /* in/out: vector */ - -/******************************************************************** -Get the last element of the vector. */ -UNIV_INLINE -void* -ib_vector_last( -/*===========*/ - /* out: pointer to last element */ - ib_vector_t* vec); /* in/out: vector */ - -/******************************************************************** -Get the last element of the vector. */ -UNIV_INLINE -const void* -ib_vector_last_const( -/*=================*/ - /* out: pointer to last element */ - const ib_vector_t* vec); /* in: vector */ - -/******************************************************************** -Sort the vector elements. */ -UNIV_INLINE -void -ib_vector_sort( -/*===========*/ - ib_vector_t* vec, /* in/out: vector */ - ib_compare_t compare); /* in: the comparator to use for sort */ - -/******************************************************************** -The default ib_vector_t heap free. Does nothing. */ -UNIV_INLINE -void -ib_heap_free( -/*=========*/ - ib_alloc_t* allocator, /* in: allocator */ - void* ptr); /* in: size in bytes */ - -/******************************************************************** -The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */ -UNIV_INLINE -void* -ib_heap_malloc( -/*===========*/ - /* out: pointer to allocated memory */ - ib_alloc_t* allocator, /* in: allocator */ - ulint size); /* in: size in bytes */ - -/******************************************************************** -The default ib_vector_t heap resize. Since we can't resize the heap -we have to copy the elements from the old ptr to the new ptr. -Uses mem_heap_alloc(). */ -UNIV_INLINE -void* -ib_heap_resize( -/*===========*/ - /* out: pointer to reallocated - memory */ - ib_alloc_t* allocator, /* in: allocator */ - void* old_ptr, /* in: pointer to memory */ - ulint old_size, /* in: old size in bytes */ - ulint new_size); /* in: new size in bytes */ - -/******************************************************************** -Create a heap allocator that uses the passed in heap. */ -UNIV_INLINE -ib_alloc_t* -ib_heap_allocator_create( -/*=====================*/ - /* out: heap allocator instance */ - mem_heap_t* heap); /* in: heap to use */ - -/******************************************************************** -Free a heap allocator. */ -UNIV_INLINE -void -ib_heap_allocator_free( -/*===================*/ - ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */ - -/******************************************************************** -Wrapper for ut_free(). */ -UNIV_INLINE -void -ib_ut_free( -/*=======*/ - ib_alloc_t* allocator, /* in: allocator */ - void* ptr); /* in: size in bytes */ - -/******************************************************************** -Wrapper for ut_malloc(). */ -UNIV_INLINE -void* -ib_ut_malloc( -/*=========*/ - /* out: pointer to allocated memory */ - ib_alloc_t* allocator, /* in: allocator */ - ulint size); /* in: size in bytes */ - -/******************************************************************** -Wrapper for ut_realloc(). */ -UNIV_INLINE -void* -ib_ut_resize( -/*=========*/ - /* out: pointer to reallocated - memory */ - ib_alloc_t* allocator, /* in: allocator */ - void* old_ptr, /* in: pointer to memory */ - ulint old_size, /* in: old size in bytes */ - ulint new_size); /* in: new size in bytes */ - -/******************************************************************** -Create a heap allocator that uses the passed in heap. */ -UNIV_INLINE -ib_alloc_t* -ib_ut_allocator_create(void); -/*=========================*/ - -/******************************************************************** -Create a heap allocator that uses the passed in heap. */ -UNIV_INLINE -void -ib_ut_allocator_free( -/*=================*/ - ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */ - -/* Allocator used by ib_vector_t. */ -struct ib_alloc_t { - ib_mem_alloc_t mem_malloc; /* For allocating memory */ - ib_mem_free_t mem_release; /* For freeing memory */ - ib_mem_resize_t mem_resize; /* For resizing memory */ - void* arg; /* Currently if not NULL then it - points to the heap instance */ -}; - -/* See comment at beginning of file. */ -struct ib_vector_t { - ib_alloc_t* allocator; /* Allocator, because one size - doesn't fit all */ - void* data; /* data elements */ - ulint used; /* number of elements currently used */ - ulint total; /* number of elements allocated */ - /* Size of a data item */ - ulint sizeof_value; -}; - -#ifndef UNIV_NONINL -#include "ut0vec.ic" -#endif - -#endif /* IB_VECTOR_H */ diff --git a/storage/xtradb/include/ut0vec.ic b/storage/xtradb/include/ut0vec.ic deleted file mode 100644 index f41a85e1d1d..00000000000 --- a/storage/xtradb/include/ut0vec.ic +++ /dev/null @@ -1,425 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/ut0vec.ic -A vector of pointers to data items - -Created 4/6/2006 Osku Salerma -************************************************************************/ - -#define IB_VEC_OFFSET(v, i) (vec->sizeof_value * i) - -/******************************************************************** -The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */ -UNIV_INLINE -void* -ib_heap_malloc( -/*===========*/ - ib_alloc_t* allocator, /* in: allocator */ - ulint size) /* in: size in bytes */ -{ - mem_heap_t* heap = (mem_heap_t*) allocator->arg; - - return(mem_heap_alloc(heap, size)); -} - -/******************************************************************** -The default ib_vector_t heap free. Does nothing. */ -UNIV_INLINE -void -ib_heap_free( -/*=========*/ - ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */ - void* ptr UNIV_UNUSED) /* in: size in bytes */ -{ - /* We can't free individual elements. */ -} - -/******************************************************************** -The default ib_vector_t heap resize. Since we can't resize the heap -we have to copy the elements from the old ptr to the new ptr. -Uses mem_heap_alloc(). */ -UNIV_INLINE -void* -ib_heap_resize( -/*===========*/ - ib_alloc_t* allocator, /* in: allocator */ - void* old_ptr, /* in: pointer to memory */ - ulint old_size, /* in: old size in bytes */ - ulint new_size) /* in: new size in bytes */ -{ - void* new_ptr; - mem_heap_t* heap = (mem_heap_t*) allocator->arg; - - new_ptr = mem_heap_alloc(heap, new_size); - memcpy(new_ptr, old_ptr, old_size); - - return(new_ptr); -} - -/******************************************************************** -Create a heap allocator that uses the passed in heap. */ -UNIV_INLINE -ib_alloc_t* -ib_heap_allocator_create( -/*=====================*/ - mem_heap_t* heap) /* in: heap to use */ -{ - ib_alloc_t* heap_alloc; - - heap_alloc = (ib_alloc_t*) mem_heap_alloc(heap, sizeof(*heap_alloc)); - - heap_alloc->arg = heap; - heap_alloc->mem_release = ib_heap_free; - heap_alloc->mem_malloc = ib_heap_malloc; - heap_alloc->mem_resize = ib_heap_resize; - - return(heap_alloc); -} - -/******************************************************************** -Free a heap allocator. */ -UNIV_INLINE -void -ib_heap_allocator_free( -/*===================*/ - ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */ -{ - mem_heap_free((mem_heap_t*) ib_ut_alloc->arg); -} - -/******************************************************************** -Wrapper around ut_malloc(). */ -UNIV_INLINE -void* -ib_ut_malloc( -/*=========*/ - ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */ - ulint size) /* in: size in bytes */ -{ - return(ut_malloc(size)); -} - -/******************************************************************** -Wrapper around ut_free(). */ -UNIV_INLINE -void -ib_ut_free( -/*=======*/ - ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */ - void* ptr) /* in: size in bytes */ -{ - ut_free(ptr); -} - -/******************************************************************** -Wrapper aroung ut_realloc(). */ -UNIV_INLINE -void* -ib_ut_resize( -/*=========*/ - ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */ - void* old_ptr, /* in: pointer to memory */ - ulint old_size UNIV_UNUSED,/* in: old size in bytes */ - ulint new_size) /* in: new size in bytes */ -{ - return(ut_realloc(old_ptr, new_size)); -} - -/******************************************************************** -Create a ut allocator. */ -UNIV_INLINE -ib_alloc_t* -ib_ut_allocator_create(void) -/*========================*/ -{ - ib_alloc_t* ib_ut_alloc; - - ib_ut_alloc = (ib_alloc_t*) ut_malloc(sizeof(*ib_ut_alloc)); - - ib_ut_alloc->arg = NULL; - ib_ut_alloc->mem_release = ib_ut_free; - ib_ut_alloc->mem_malloc = ib_ut_malloc; - ib_ut_alloc->mem_resize = ib_ut_resize; - - return(ib_ut_alloc); -} - -/******************************************************************** -Free a ut allocator. */ -UNIV_INLINE -void -ib_ut_allocator_free( -/*=================*/ - ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */ -{ - ut_free(ib_ut_alloc); -} - -/******************************************************************** -Get number of elements in vector. */ -UNIV_INLINE -ulint -ib_vector_size( -/*===========*/ - /* out: number of elements in vector*/ - const ib_vector_t* vec) /* in: vector */ -{ - return(vec->used); -} - -/****************************************************************//** -Get n'th element. */ -UNIV_INLINE -void* -ib_vector_get( -/*==========*/ - ib_vector_t* vec, /*!< in: vector */ - ulint n) /*!< in: element index to get */ -{ - ut_a(n < vec->used); - - return((byte*) vec->data + IB_VEC_OFFSET(vec, n)); -} - -/******************************************************************** -Const version of the get n'th element. -@return n'th element */ -UNIV_INLINE -const void* -ib_vector_get_const( -/*================*/ - const ib_vector_t* vec, /* in: vector */ - ulint n) /* in: element index to get */ -{ - ut_a(n < vec->used); - - return((byte*) vec->data + IB_VEC_OFFSET(vec, n)); -} -/****************************************************************//** -Get last element. The vector must not be empty. -@return last element */ -UNIV_INLINE -void* -ib_vector_get_last( -/*===============*/ - ib_vector_t* vec) /*!< in: vector */ -{ - ut_a(vec->used > 0); - - return((byte*) ib_vector_get(vec, vec->used - 1)); -} - -/****************************************************************//** -Set the n'th element. */ -UNIV_INLINE -void -ib_vector_set( -/*==========*/ - ib_vector_t* vec, /*!< in/out: vector */ - ulint n, /*!< in: element index to set */ - void* elem) /*!< in: data element */ -{ - void* slot; - - ut_a(n < vec->used); - - slot = ((byte*) vec->data + IB_VEC_OFFSET(vec, n)); - memcpy(slot, elem, vec->sizeof_value); -} - -/******************************************************************** -Reset the vector size to 0 elements. */ -UNIV_INLINE -void -ib_vector_reset( -/*============*/ - /* out: void */ - ib_vector_t* vec) /* in: vector */ -{ - vec->used = 0; -} - -/******************************************************************** -Get the last element of the vector. */ -UNIV_INLINE -void* -ib_vector_last( -/*===========*/ - /* out: void */ - ib_vector_t* vec) /* in: vector */ -{ - ut_a(ib_vector_size(vec) > 0); - - return(ib_vector_get(vec, ib_vector_size(vec) - 1)); -} - -/******************************************************************** -Get the last element of the vector. */ -UNIV_INLINE -const void* -ib_vector_last_const( -/*=================*/ - /* out: void */ - const ib_vector_t* vec) /* in: vector */ -{ - ut_a(ib_vector_size(vec) > 0); - - return(ib_vector_get_const(vec, ib_vector_size(vec) - 1)); -} - -/****************************************************************//** -Remove the last element from the vector. -@return last vector element */ -UNIV_INLINE -void* -ib_vector_pop( -/*==========*/ - /* out: pointer to element */ - ib_vector_t* vec) /* in: vector */ -{ - void* elem; - - ut_a(vec->used > 0); - - elem = ib_vector_last(vec); - --vec->used; - - return(elem); -} - -/******************************************************************** -Append an element to the vector, if elem != NULL then copy the data -from elem.*/ -UNIV_INLINE -void* -ib_vector_push( -/*===========*/ - /* out: pointer to the "new" element */ - ib_vector_t* vec, /* in: vector */ - const void* elem) /* in: element to add (can be NULL) */ -{ - void* last; - - if (vec->used >= vec->total) { - ib_vector_resize(vec); - } - - last = (byte*) vec->data + IB_VEC_OFFSET(vec, vec->used); - -#ifdef UNIV_DEBUG - memset(last, 0, vec->sizeof_value); -#endif - - if (elem) { - memcpy(last, elem, vec->sizeof_value); - } - - ++vec->used; - - return(last); -} - -/*******************************************************************//** -Remove an element to the vector -@return pointer to the "removed" element */ -UNIV_INLINE -void* -ib_vector_remove( -/*=============*/ - ib_vector_t* vec, /*!< in: vector */ - const void* elem) /*!< in: value to remove */ -{ - void* current = NULL; - void* next; - ulint i; - ulint old_used_count = vec->used; - - for (i = 0; i < vec->used; i++) { - current = ib_vector_get(vec, i); - - if (*(void**) current == elem) { - if (i == vec->used - 1) { - return(ib_vector_pop(vec)); - } - - next = ib_vector_get(vec, i + 1); - memmove(current, next, vec->sizeof_value - * (vec->used - i - 1)); - --vec->used; - break; - } - } - - return((old_used_count != vec->used) ? current : NULL); -} - -/******************************************************************** -Sort the vector elements. */ -UNIV_INLINE -void -ib_vector_sort( -/*===========*/ - /* out: void */ - ib_vector_t* vec, /* in: vector */ - ib_compare_t compare)/* in: the comparator to use for sort */ -{ - qsort(vec->data, vec->used, vec->sizeof_value, compare); -} - -/******************************************************************** -Destroy the vector. Make sure the vector owns the allocator, e.g., -the heap in the the heap allocator. */ -UNIV_INLINE -void -ib_vector_free( -/*===========*/ - ib_vector_t* vec) /* in, own: vector */ -{ - /* Currently we only support two types of allocators, heap - and ut_malloc(), when the heap is freed all the elements are - freed too. With ut allocator, we need to free the elements, - the vector instance and the allocator separately. */ - - /* Only the heap allocator uses the arg field. */ - if (vec->allocator->arg) { - mem_heap_free((mem_heap_t*) vec->allocator->arg); - } else { - ib_alloc_t* allocator; - - allocator = vec->allocator; - - allocator->mem_release(allocator, vec->data); - allocator->mem_release(allocator, vec); - - ib_ut_allocator_free(allocator); - } -} - -/******************************************************************** -Test whether a vector is empty or not. -@return TRUE if empty */ -UNIV_INLINE -ibool -ib_vector_is_empty( -/*===============*/ - const ib_vector_t* vec) /*!< in: vector */ -{ - return(ib_vector_size(vec) == 0); -} diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h deleted file mode 100644 index d69363afe7b..00000000000 --- a/storage/xtradb/include/ut0wqueue.h +++ /dev/null @@ -1,125 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file include/ut0wqueue.h -A work queue - -Created 4/26/2006 Osku Salerma -************************************************************************/ - -/*******************************************************************//** -A Work queue. Threads can add work items to the queue and other threads can -wait for work items to be available and take them off the queue for -processing. -************************************************************************/ - -#ifndef IB_WORK_QUEUE_H -#define IB_WORK_QUEUE_H - -#include "ut0list.h" -#include "mem0mem.h" -#include "os0sync.h" -#include "sync0types.h" - -struct ib_wqueue_t; - -/****************************************************************//** -Create a new work queue. -@return work queue */ -UNIV_INTERN -ib_wqueue_t* -ib_wqueue_create(void); -/*===================*/ - -/****************************************************************//** -Free a work queue. */ -UNIV_INTERN -void -ib_wqueue_free( -/*===========*/ - ib_wqueue_t* wq); /*!< in: work queue */ - -/****************************************************************//** -Add a work item to the queue. */ -UNIV_INTERN -void -ib_wqueue_add( -/*==========*/ - ib_wqueue_t* wq, /*!< in: work queue */ - void* item, /*!< in: work item */ - mem_heap_t* heap); /*!< in: memory heap to use for allocating the - list node */ - -/******************************************************************** -Check if queue is empty. */ - -ibool -ib_wqueue_is_empty( -/*===============*/ - /* out: TRUE if queue empty - else FALSE */ - const ib_wqueue_t* wq); /* in: work queue */ - -/****************************************************************//** -Wait for a work item to appear in the queue. -@return work item */ -UNIV_INTERN -void* -ib_wqueue_wait( -/*===========*/ - ib_wqueue_t* wq); /*!< in: work queue */ - -/******************************************************************** -Wait for a work item to appear in the queue for specified time. */ - -void* -ib_wqueue_timedwait( -/*================*/ - /* out: work item or NULL on timeout*/ - ib_wqueue_t* wq, /* in: work queue */ - ib_time_t wait_in_usecs); /* in: wait time in micro seconds */ - -/******************************************************************** -Return first item on work queue or NULL if queue is empty -@return work item or NULL */ -void* -ib_wqueue_nowait( -/*=============*/ - ib_wqueue_t* wq); /*<! in: work queue */ - - -/******************************************************************** -Get number of items on queue. -@return number of items on queue */ -ulint -ib_wqueue_len( -/*==========*/ - ib_wqueue_t* wq); /*<! in: work queue */ - -/* Work queue. */ -struct ib_wqueue_t { - ib_mutex_t mutex; /*!< mutex protecting everything */ - ib_list_t* items; /*!< work item list */ - os_event_t event; /*!< event we use to signal additions to list; - os_event_set() and os_event_reset() are - protected by ib_wqueue_t::mutex */ -}; - -#endif diff --git a/storage/xtradb/lock/lock0iter.cc b/storage/xtradb/lock/lock0iter.cc deleted file mode 100644 index b424d2fc757..00000000000 --- a/storage/xtradb/lock/lock0iter.cc +++ /dev/null @@ -1,111 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file lock/lock0iter.cc -Lock queue iterator. Can iterate over table and record -lock queues. - -Created July 16, 2007 Vasil Dimov -*******************************************************/ - -#define LOCK_MODULE_IMPLEMENTATION - -#include "univ.i" -#include "lock0iter.h" -#include "lock0lock.h" -#include "lock0priv.h" -#include "ut0dbg.h" -#include "ut0lst.h" - -/*******************************************************************//** -Initialize lock queue iterator so that it starts to iterate from -"lock". bit_no specifies the record number within the heap where the -record is stored. It can be undefined (ULINT_UNDEFINED) in two cases: -1. If the lock is a table lock, thus we have a table lock queue; -2. If the lock is a record lock and it is a wait lock. In this case - bit_no is calculated in this function by using - lock_rec_find_set_bit(). There is exactly one bit set in the bitmap - of a wait lock. */ -UNIV_INTERN -void -lock_queue_iterator_reset( -/*======================*/ - lock_queue_iterator_t* iter, /*!< out: iterator */ - const lock_t* lock, /*!< in: lock to start from */ - ulint bit_no) /*!< in: record number in the - heap */ -{ - ut_ad(lock_mutex_own()); - - iter->current_lock = lock; - - if (bit_no != ULINT_UNDEFINED) { - - iter->bit_no = bit_no; - } else { - - switch (lock_get_type_low(lock)) { - case LOCK_TABLE: - iter->bit_no = ULINT_UNDEFINED; - break; - case LOCK_REC: - iter->bit_no = lock_rec_find_set_bit(lock); - ut_a(iter->bit_no != ULINT_UNDEFINED); - break; - default: - ut_error; - } - } -} - -/*******************************************************************//** -Gets the previous lock in the lock queue, returns NULL if there are no -more locks (i.e. the current lock is the first one). The iterator is -receded (if not-NULL is returned). -@return previous lock or NULL */ -UNIV_INTERN -const lock_t* -lock_queue_iterator_get_prev( -/*=========================*/ - lock_queue_iterator_t* iter) /*!< in/out: iterator */ -{ - const lock_t* prev_lock; - - ut_ad(lock_mutex_own()); - - switch (lock_get_type_low(iter->current_lock)) { - case LOCK_REC: - prev_lock = lock_rec_get_prev( - iter->current_lock, iter->bit_no); - break; - case LOCK_TABLE: - prev_lock = UT_LIST_GET_PREV( - un_member.tab_lock.locks, iter->current_lock); - break; - default: - ut_error; - } - - if (prev_lock != NULL) { - - iter->current_lock = prev_lock; - } - - return(prev_lock); -} diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc deleted file mode 100644 index 71612f66fcd..00000000000 --- a/storage/xtradb/lock/lock0lock.cc +++ /dev/null @@ -1,8338 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2017, MariaDB Corporation - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file lock/lock0lock.cc -The transaction lock system - -Created 5/7/1996 Heikki Tuuri -*******************************************************/ - -#define LOCK_MODULE_IMPLEMENTATION - -#include "lock0lock.h" -#include "lock0priv.h" - -#ifdef UNIV_NONINL -#include "lock0lock.ic" -#include "lock0priv.ic" -#endif - -#include "ha_prototypes.h" -#include "usr0sess.h" -#include "trx0purge.h" -#include "dict0mem.h" -#include "dict0boot.h" -#include "trx0sys.h" -#include "pars0pars.h" /* pars_complete_graph_for_exec() */ -#include "que0que.h" /* que_node_get_parent() */ -#include "row0mysql.h" /* row_mysql_handle_errors() */ -#include "row0sel.h" /* sel_node_create(), sel_node_t */ -#include "row0types.h" /* sel_node_t */ -#include "srv0mon.h" -#include "ut0vec.h" -#include "btr0btr.h" -#include "dict0boot.h" -#include <set> -#include "mysql/plugin.h" - -#include <mysql/service_wsrep.h> - -#include <string> -#include <sstream> - -/* Restricts the length of search we will do in the waits-for -graph of transactions */ -#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000 - -/* Restricts the search depth we will do in the waits-for graph of -transactions */ -#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200 - -/* When releasing transaction locks, this specifies how often we release -the lock mutex for a moment to give also others access to it */ - -#define LOCK_RELEASE_INTERVAL 1000 - -/* Safety margin when creating a new record lock: this many extra records -can be inserted to the page without need to create a lock with a bigger -bitmap */ - -#define LOCK_PAGE_BITMAP_MARGIN 64 - -/** Lock scheduling algorithm */ -ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS; - -/* An explicit record lock affects both the record and the gap before it. -An implicit x-lock does not affect the gap, it only locks the index -record from read or update. - -If a transaction has modified or inserted an index record, then -it owns an implicit x-lock on the record. On a secondary index record, -a transaction has an implicit x-lock also if it has modified the -clustered index record, the max trx id of the page where the secondary -index record resides is >= trx id of the transaction (or database recovery -is running), and there are no explicit non-gap lock requests on the -secondary index record. - -This complicated definition for a secondary index comes from the -implementation: we want to be able to determine if a secondary index -record has an implicit x-lock, just by looking at the present clustered -index record, not at the historical versions of the record. The -complicated definition can be explained to the user so that there is -nondeterminism in the access path when a query is answered: we may, -or may not, access the clustered index record and thus may, or may not, -bump into an x-lock set there. - -Different transaction can have conflicting locks set on the gap at the -same time. The locks on the gap are purely inhibitive: an insert cannot -be made, or a select cursor may have to wait if a different transaction -has a conflicting lock on the gap. An x-lock on the gap does not give -the right to insert into the gap. - -An explicit lock can be placed on a user record or the supremum record of -a page. The locks on the supremum record are always thought to be of the gap -type, though the gap bit is not set. When we perform an update of a record -where the size of the record changes, we may temporarily store its explicit -locks on the infimum record of the page, though the infimum otherwise never -carries locks. - -A waiting record lock can also be of the gap type. A waiting lock request -can be granted when there is no conflicting mode lock request by another -transaction ahead of it in the explicit lock queue. - -In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP. -It only locks the record it is placed on, not the gap before the record. -This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation -level. - -------------------------------------------------------------------------- -RULE 1: If there is an implicit x-lock on a record, and there are non-gap -------- -lock requests waiting in the queue, then the transaction holding the implicit -x-lock also has an explicit non-gap record x-lock. Therefore, as locks are -released, we can grant locks to waiting lock requests purely by looking at -the explicit lock requests in the queue. - -RULE 3: Different transactions cannot have conflicting granted non-gap locks -------- -on a record at the same time. However, they can have conflicting granted gap -locks. -RULE 4: If a there is a waiting lock request in a queue, no lock request, -------- -gap or not, can be inserted ahead of it in the queue. In record deletes -and page splits new gap type locks can be created by the database manager -for a transaction, and without rule 4, the waits-for graph of transactions -might become cyclic without the database noticing it, as the deadlock check -is only performed when a transaction itself requests a lock! -------------------------------------------------------------------------- - -An insert is allowed to a gap if there are no explicit lock requests by -other transactions on the next record. It does not matter if these lock -requests are granted or waiting, gap bit set or not, with the exception -that a gap type request set by another transaction to wait for -its turn to do an insert is ignored. On the other hand, an -implicit x-lock by another transaction does not prevent an insert, which -allows for more concurrency when using an Oracle-style sequence number -generator for the primary key with many transactions doing inserts -concurrently. - -A modify of a record is allowed if the transaction has an x-lock on the -record, or if other transactions do not have any non-gap lock requests on the -record. - -A read of a single user record with a cursor is allowed if the transaction -has a non-gap explicit, or an implicit lock on the record, or if the other -transactions have no x-lock requests on the record. At a page supremum a -read is always allowed. - -In summary, an implicit lock is seen as a granted x-lock only on the -record, not on the gap. An explicit lock with no gap bit set is a lock -both on the record and the gap. If the gap bit is set, the lock is only -on the gap. Different transaction cannot own conflicting locks on the -record at the same time, but they may own conflicting locks on the gap. -Granted locks on a record give an access right to the record, but gap type -locks just inhibit operations. - -NOTE: Finding out if some transaction has an implicit x-lock on a secondary -index record can be cumbersome. We may have to look at previous versions of -the corresponding clustered index record to find out if a delete marked -secondary index record was delete marked by an active transaction, not by -a committed one. - -FACT A: If a transaction has inserted a row, it can delete it any time -without need to wait for locks. - -PROOF: The transaction has an implicit x-lock on every index record inserted -for the row, and can thus modify each record without the need to wait. Q.E.D. - -FACT B: If a transaction has read some result set with a cursor, it can read -it again, and retrieves the same result set, if it has not modified the -result set in the meantime. Hence, there is no phantom problem. If the -biggest record, in the alphabetical order, touched by the cursor is removed, -a lock wait may occur, otherwise not. - -PROOF: When a read cursor proceeds, it sets an s-lock on each user record -it passes, and a gap type s-lock on each page supremum. The cursor must -wait until it has these locks granted. Then no other transaction can -have a granted x-lock on any of the user records, and therefore cannot -modify the user records. Neither can any other transaction insert into -the gaps which were passed over by the cursor. Page splits and merges, -and removal of obsolete versions of records do not affect this, because -when a user record or a page supremum is removed, the next record inherits -its locks as gap type locks, and therefore blocks inserts to the same gap. -Also, if a page supremum is inserted, it inherits its locks from the successor -record. When the cursor is positioned again at the start of the result set, -the records it will touch on its course are either records it touched -during the last pass or new inserted page supremums. It can immediately -access all these records, and when it arrives at the biggest record, it -notices that the result set is complete. If the biggest record was removed, -lock wait can occur because the next record only inherits a gap type lock, -and a wait may be needed. Q.E.D. */ - -/* If an index record should be changed or a new inserted, we must check -the lock on the record or the next. When a read cursor starts reading, -we will set a record level s-lock on each record it passes, except on the -initial record on which the cursor is positioned before we start to fetch -records. Our index tree search has the convention that the B-tree -cursor is positioned BEFORE the first possibly matching record in -the search. Optimizations are possible here: if the record is searched -on an equality condition to a unique key, we could actually set a special -lock on the record, a lock which would not prevent any insert before -this record. In the next key locking an x-lock set on a record also -prevents inserts just before that record. - There are special infimum and supremum records on each page. -A supremum record can be locked by a read cursor. This records cannot be -updated but the lock prevents insert of a user record to the end of -the page. - Next key locks will prevent the phantom problem where new rows -could appear to SELECT result sets after the select operation has been -performed. Prevention of phantoms ensures the serilizability of -transactions. - What should we check if an insert of a new record is wanted? -Only the lock on the next record on the same page, because also the -supremum record can carry a lock. An s-lock prevents insertion, but -what about an x-lock? If it was set by a searched update, then there -is implicitly an s-lock, too, and the insert should be prevented. -What if our transaction owns an x-lock to the next record, but there is -a waiting s-lock request on the next record? If this s-lock was placed -by a read cursor moving in the ascending order in the index, we cannot -do the insert immediately, because when we finally commit our transaction, -the read cursor should see also the new inserted record. So we should -move the read cursor backward from the next record for it to pass over -the new inserted record. This move backward may be too cumbersome to -implement. If we in this situation just enqueue a second x-lock request -for our transaction on the next record, then the deadlock mechanism -notices a deadlock between our transaction and the s-lock request -transaction. This seems to be an ok solution. - We could have the convention that granted explicit record locks, -lock the corresponding records from changing, and also lock the gaps -before them from inserting. A waiting explicit lock request locks the gap -before from inserting. Implicit record x-locks, which we derive from the -transaction id in the clustered index record, only lock the record itself -from modification, not the gap before it from inserting. - How should we store update locks? If the search is done by a unique -key, we could just modify the record trx id. Otherwise, we could put a record -x-lock on the record. If the update changes ordering fields of the -clustered index record, the inserted new record needs no record lock in -lock table, the trx id is enough. The same holds for a secondary index -record. Searched delete is similar to update. - -PROBLEM: -What about waiting lock requests? If a transaction is waiting to make an -update to a record which another modified, how does the other transaction -know to send the end-lock-wait signal to the waiting transaction? If we have -the convention that a transaction may wait for just one lock at a time, how -do we preserve it if lock wait ends? - -PROBLEM: -Checking the trx id label of a secondary index record. In the case of a -modification, not an insert, is this necessary? A secondary index record -is modified only by setting or resetting its deleted flag. A secondary index -record contains fields to uniquely determine the corresponding clustered -index record. A secondary index record is therefore only modified if we -also modify the clustered index record, and the trx id checking is done -on the clustered index record, before we come to modify the secondary index -record. So, in the case of delete marking or unmarking a secondary index -record, we do not have to care about trx ids, only the locks in the lock -table must be checked. In the case of a select from a secondary index, the -trx id is relevant, and in this case we may have to search the clustered -index record. - -PROBLEM: How to update record locks when page is split or merged, or --------------------------------------------------------------------- -a record is deleted or updated? -If the size of fields in a record changes, we perform the update by -a delete followed by an insert. How can we retain the locks set or -waiting on the record? Because a record lock is indexed in the bitmap -by the heap number of the record, when we remove the record from the -record list, it is possible still to keep the lock bits. If the page -is reorganized, we could make a table of old and new heap numbers, -and permute the bitmaps in the locks accordingly. We can add to the -table a row telling where the updated record ended. If the update does -not require a reorganization of the page, we can simply move the lock -bits for the updated record to the position determined by its new heap -number (we may have to allocate a new lock, if we run out of the bitmap -in the old one). - A more complicated case is the one where the reinsertion of the -updated record is done pessimistically, because the structure of the -tree may change. - -PROBLEM: If a supremum record is removed in a page merge, or a record ---------------------------------------------------------------------- -removed in a purge, what to do to the waiting lock requests? In a split to -the right, we just move the lock requests to the new supremum. If a record -is removed, we could move the waiting lock request to its inheritor, the -next record in the index. But, the next record may already have lock -requests on its own queue. A new deadlock check should be made then. Maybe -it is easier just to release the waiting transactions. They can then enqueue -new lock requests on appropriate records. - -PROBLEM: When a record is inserted, what locks should it inherit from the -------------------------------------------------------------------------- -upper neighbor? An insert of a new supremum record in a page split is -always possible, but an insert of a new user record requires that the upper -neighbor does not have any lock requests by other transactions, granted or -waiting, in its lock queue. Solution: We can copy the locks as gap type -locks, so that also the waiting locks are transformed to granted gap type -locks on the inserted record. */ - -#define LOCK_STACK_SIZE OS_THREAD_MAX_N - -/* LOCK COMPATIBILITY MATRIX - * IS IX S X AI - * IS + + + - + - * IX + + - - + - * S + - + - - - * X - - - - - - * AI + + - - - - * - * Note that for rows, InnoDB only acquires S or X locks. - * For tables, InnoDB normally acquires IS or IX locks. - * S or X table locks are only acquired for LOCK TABLES. - * Auto-increment (AI) locks are needed because of - * statement-level MySQL binlog. - * See also lock_mode_compatible(). - */ -static const byte lock_compatibility_matrix[5][5] = { - /** IS IX S X AI */ - /* IS */ { TRUE, TRUE, TRUE, FALSE, TRUE}, - /* IX */ { TRUE, TRUE, FALSE, FALSE, TRUE}, - /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE}, - /* X */ { FALSE, FALSE, FALSE, FALSE, FALSE}, - /* AI */ { TRUE, TRUE, FALSE, FALSE, FALSE} -}; - -/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column) - * IS IX S X AI - * IS + - - - - - * IX + + - - - - * S + - + - - - * X + + + + + - * AI - - - - + - * See lock_mode_stronger_or_eq(). - */ -static const byte lock_strength_matrix[5][5] = { - /** IS IX S X AI */ - /* IS */ { TRUE, FALSE, FALSE, FALSE, FALSE}, - /* IX */ { TRUE, TRUE, FALSE, FALSE, FALSE}, - /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE}, - /* X */ { TRUE, TRUE, TRUE, TRUE, TRUE}, - /* AI */ { FALSE, FALSE, FALSE, FALSE, TRUE} -}; - -/** Deadlock check context. */ -struct lock_deadlock_ctx_t { - const trx_t* start; /*!< Joining transaction that is - requesting a lock in an incompatible - mode */ - - const lock_t* wait_lock; /*!< Lock that trx wants */ - - ib_uint64_t mark_start; /*!< Value of lock_mark_count at - the start of the deadlock check. */ - - ulint depth; /*!< Stack depth */ - - ulint cost; /*!< Calculation steps thus far */ - - ibool too_deep; /*!< TRUE if search was too deep and - was aborted */ -}; - -/** DFS visited node information used during deadlock checking. */ -struct lock_stack_t { - const lock_t* lock; /*!< Current lock */ - const lock_t* wait_lock; /*!< Waiting for lock */ - ulint heap_no; /*!< heap number if rec lock */ -}; - -/*********************************************************************//** -Checks if a waiting record lock request still has to wait in a queue. -@return lock that is causing the wait */ -static -const lock_t* -lock_rec_has_to_wait_in_queue( -/*==========================*/ - const lock_t* wait_lock); /*!< in: waiting record lock */ - -/*************************************************************//** -Grants a lock to a waiting lock request and releases the waiting transaction. -The caller must hold lock_sys->mutex. */ -static -void -lock_grant( -/*=======*/ - lock_t* lock, /*!< in/out: waiting lock request */ - bool owns_trx_mutex); /*!< in: whether lock->trx->mutex is owned */ - -extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd); -extern "C" int thd_need_wait_for(const MYSQL_THD thd); -extern "C" -int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd); - -extern "C" -int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2); - -/** Stack to use during DFS search. Currently only a single stack is required -because there is no parallel deadlock check. This stack is protected by -the lock_sys_t::mutex. */ -static lock_stack_t* lock_stack; - -#ifdef UNIV_DEBUG -/** The count of the types of locks. */ -static const ulint lock_types = UT_ARR_SIZE(lock_compatibility_matrix); -#endif /* UNIV_DEBUG */ - -#ifdef UNIV_PFS_MUTEX -/* Key to register mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t lock_sys_mutex_key; -/* Key to register mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t lock_sys_wait_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/* Buffer to collect THDs to report waits for. */ -struct thd_wait_reports { - struct thd_wait_reports *next; /*!< List link */ - ulint used; /*!< How many elements in waitees[] */ - trx_t *waitees[64]; /*!< Trxs for thd_report_wait_for() */ -}; - - -#ifdef UNIV_DEBUG -UNIV_INTERN ibool lock_print_waits = FALSE; - -/*********************************************************************//** -Validates the lock system. -@return TRUE if ok */ -static -bool -lock_validate(); -/*============*/ - -/*********************************************************************//** -Validates the record lock queues on a page. -@return TRUE if ok */ -static -ibool -lock_rec_validate_page( -/*===================*/ - const buf_block_t* block) /*!< in: buffer block */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* UNIV_DEBUG */ - -/* The lock system */ -UNIV_INTERN lock_sys_t* lock_sys = NULL; - -/** We store info on the latest deadlock error to this buffer. InnoDB -Monitor will then fetch it and print */ -UNIV_INTERN ibool lock_deadlock_found = FALSE; -/** Only created if !srv_read_only_mode */ -static FILE* lock_latest_err_file; - -/********************************************************************//** -Checks if a joining lock request results in a deadlock. If a deadlock is -found this function will resolve the dadlock by choosing a victim transaction -and rolling it back. It will attempt to resolve all deadlocks. The returned -transaction id will be the joining transaction id or 0 if some other -transaction was chosen as a victim and rolled back or no deadlock found. - -@return id of transaction chosen as victim or 0 */ -static -trx_id_t -lock_deadlock_check_and_resolve( -/*===========================*/ - const lock_t* lock, /*!< in: lock the transaction is requesting */ - const trx_t* trx); /*!< in: transaction */ - -/*********************************************************************//** -Gets the nth bit of a record lock. -@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/ -UNIV_INLINE -ibool -lock_rec_get_nth_bit( -/*=================*/ - const lock_t* lock, /*!< in: record lock */ - ulint i) /*!< in: index of the bit */ -{ - const byte* b; - - ut_ad(lock); - ut_ad(lock_get_type_low(lock) == LOCK_REC); - - if (i >= lock->un_member.rec_lock.n_bits) { - - return(FALSE); - } - - b = ((const byte*) &lock[1]) + (i / 8); - - return(1 & *b >> (i % 8)); -} - -/*********************************************************************//** -Reports that a transaction id is insensible, i.e., in the future. */ -UNIV_INTERN -void -lock_report_trx_id_insanity( -/*========================*/ - trx_id_t trx_id, /*!< in: trx id */ - const rec_t* rec, /*!< in: user record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ - trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */ -{ - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: transaction id associated with record\n", - stderr); - rec_print_new(stderr, rec, offsets); - fputs("InnoDB: in ", stderr); - dict_index_name_print(stderr, NULL, index); - fprintf(stderr, "\n" - "InnoDB: is " TRX_ID_FMT " which is higher than the" - " global trx id counter " TRX_ID_FMT "!\n" - "InnoDB: The table is corrupt. You have to do" - " dump + drop + reimport.\n", - trx_id, max_trx_id); -} - -/*********************************************************************//** -Checks that a transaction id is sensible, i.e., not in the future. -@return true if ok */ -#ifdef UNIV_DEBUG -UNIV_INTERN -#else -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -#endif -bool -lock_check_trx_id_sanity( -/*=====================*/ - trx_id_t trx_id, /*!< in: trx id */ - const rec_t* rec, /*!< in: user record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */ -{ - bool is_ok; - trx_id_t max_trx_id; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - max_trx_id = trx_sys_get_max_trx_id(); - is_ok = trx_id < max_trx_id; - - if (UNIV_UNLIKELY(!is_ok)) { - lock_report_trx_id_insanity(trx_id, - rec, index, offsets, max_trx_id); - } - - return(is_ok); -} - -/*********************************************************************//** -Checks that a record is seen in a consistent read. -@return true if sees, or false if an earlier version of the record -should be retrieved */ -UNIV_INTERN -bool -lock_clust_rec_cons_read_sees( -/*==========================*/ - const rec_t* rec, /*!< in: user record which should be read or - passed over by a read cursor */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - read_view_t* view) /*!< in: consistent read view */ -{ - trx_id_t trx_id; - - ut_ad(dict_index_is_clust(index)); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - /* NOTE that we call this function while holding the search - system latch. */ - - trx_id = row_get_rec_trx_id(rec, index, offsets); - - return(read_view_sees_trx_id(view, trx_id)); -} - -/*********************************************************************//** -Checks that a non-clustered index record is seen in a consistent read. - -NOTE that a non-clustered index page contains so little information on -its modifications that also in the case false, the present version of -rec may be the right, but we must check this from the clustered index -record. - -@return true if certainly sees, or false if an earlier version of the -clustered index record might be needed */ -UNIV_INTERN -bool -lock_sec_rec_cons_read_sees( -/*========================*/ - const rec_t* rec, /*!< in: user record which - should be read or passed over - by a read cursor */ - const read_view_t* view) /*!< in: consistent read view */ -{ - trx_id_t max_trx_id; - - ut_ad(page_rec_is_user_rec(rec)); - - /* NOTE that we might call this function while holding the search - system latch. */ - - if (recv_recovery_is_on()) { - - return(false); - } - - max_trx_id = page_get_max_trx_id(page_align(rec)); - ut_ad(max_trx_id); - - return(max_trx_id < view->up_limit_id); -} - -/*********************************************************************//** -Creates the lock system at database start. */ -UNIV_INTERN -void -lock_sys_create( -/*============*/ - ulint n_cells) /*!< in: number of slots in lock hash table */ -{ - ulint lock_sys_sz; - - lock_sys_sz = sizeof(*lock_sys) - + OS_THREAD_MAX_N * sizeof(srv_slot_t); - - lock_sys = static_cast<lock_sys_t*>(mem_zalloc(lock_sys_sz)); - - lock_stack = static_cast<lock_stack_t*>( - mem_zalloc(sizeof(*lock_stack) * LOCK_STACK_SIZE)); - - void* ptr = &lock_sys[1]; - - lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr); - - lock_sys->last_slot = lock_sys->waiting_threads; - - mutex_create(lock_sys_mutex_key, &lock_sys->mutex, SYNC_LOCK_SYS); - - mutex_create(lock_sys_wait_mutex_key, - &lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS); - - lock_sys->timeout_event = os_event_create(); - - lock_sys->rec_hash = hash_create(n_cells); - lock_sys->rec_num = 0; - - if (!srv_read_only_mode) { - lock_latest_err_file = os_file_create_tmpfile(NULL); - ut_a(lock_latest_err_file); - } -} - -/*********************************************************************//** -Closes the lock system at database shutdown. */ -UNIV_INTERN -void -lock_sys_close(void) -/*================*/ -{ - if (lock_latest_err_file != NULL) { - fclose(lock_latest_err_file); - lock_latest_err_file = NULL; - } - - hash_table_free(lock_sys->rec_hash); - - mutex_free(&lock_sys->mutex); - mutex_free(&lock_sys->wait_mutex); - - os_event_free(lock_sys->timeout_event); - - for (srv_slot_t* slot = lock_sys->waiting_threads; - slot < lock_sys->waiting_threads + OS_THREAD_MAX_N; slot++) { - - ut_ad(!slot->in_use); - ut_ad(!slot->thr); - if (slot->event != NULL) - os_event_free(slot->event); - } - - mem_free(lock_stack); - mem_free(lock_sys); - - lock_sys = NULL; - lock_stack = NULL; -} - -/*********************************************************************//** -Gets the size of a lock struct. -@return size in bytes */ -UNIV_INTERN -ulint -lock_get_size(void) -/*===============*/ -{ - return((ulint) sizeof(lock_t)); -} - -/*********************************************************************//** -Gets the mode of a lock. -@return mode */ -UNIV_INLINE -enum lock_mode -lock_get_mode( -/*==========*/ - const lock_t* lock) /*!< in: lock */ -{ - ut_ad(lock); - - return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK)); -} - -/*********************************************************************//** -Gets the wait flag of a lock. -@return LOCK_WAIT if waiting, 0 if not */ -UNIV_INLINE -ulint -lock_get_wait( -/*==========*/ - const lock_t* lock) /*!< in: lock */ -{ - ut_ad(lock); - - return(lock->type_mode & LOCK_WAIT); -} - -/*********************************************************************//** -Gets the source table of an ALTER TABLE transaction. The table must be -covered by an IX or IS table lock. -@return the source table of transaction, if it is covered by an IX or -IS table lock; dest if there is no source table, and NULL if the -transaction is locking more than two tables or an inconsistency is -found */ -UNIV_INTERN -dict_table_t* -lock_get_src_table( -/*===============*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* dest, /*!< in: destination of ALTER TABLE */ - enum lock_mode* mode) /*!< out: lock mode of the source table */ -{ - dict_table_t* src; - lock_t* lock; - - ut_ad(!lock_mutex_own()); - - src = NULL; - *mode = LOCK_NONE; - - /* The trx mutex protects the trx_locks for our purposes. - Other transactions could want to convert one of our implicit - record locks to an explicit one. For that, they would need our - trx mutex. Waiting locks can be removed while only holding - lock_sys->mutex, but this is a running transaction and cannot - thus be holding any waiting locks. */ - trx_mutex_enter(trx); - - for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); - lock != NULL; - lock = UT_LIST_GET_NEXT(trx_locks, lock)) { - lock_table_t* tab_lock; - enum lock_mode lock_mode; - if (!(lock_get_type_low(lock) & LOCK_TABLE)) { - /* We are only interested in table locks. */ - continue; - } - tab_lock = &lock->un_member.tab_lock; - if (dest == tab_lock->table) { - /* We are not interested in the destination table. */ - continue; - } else if (!src) { - /* This presumably is the source table. */ - src = tab_lock->table; - if (UT_LIST_GET_LEN(src->locks) != 1 - || UT_LIST_GET_FIRST(src->locks) != lock) { - /* We only support the case when - there is only one lock on this table. */ - src = NULL; - goto func_exit; - } - } else if (src != tab_lock->table) { - /* The transaction is locking more than - two tables (src and dest): abort */ - src = NULL; - goto func_exit; - } - - /* Check that the source table is locked by - LOCK_IX or LOCK_IS. */ - lock_mode = lock_get_mode(lock); - if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) { - if (*mode != LOCK_NONE && *mode != lock_mode) { - /* There are multiple locks on src. */ - src = NULL; - goto func_exit; - } - *mode = lock_mode; - } - } - - if (!src) { - /* No source table lock found: flag the situation to caller */ - src = dest; - } - -func_exit: - trx_mutex_exit(trx); - return(src); -} - -/*********************************************************************//** -Determine if the given table is exclusively "owned" by the given -transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC -on the table. -@return TRUE if table is only locked by trx, with LOCK_IX, and -possibly LOCK_AUTO_INC */ -UNIV_INTERN -ibool -lock_is_table_exclusive( -/*====================*/ - const dict_table_t* table, /*!< in: table */ - const trx_t* trx) /*!< in: transaction */ -{ - const lock_t* lock; - ibool ok = FALSE; - - ut_ad(table); - ut_ad(trx); - - lock_mutex_enter(); - - for (lock = UT_LIST_GET_FIRST(table->locks); - lock != NULL; - lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) { - if (lock->trx != trx) { - /* A lock on the table is held - by some other transaction. */ - goto not_ok; - } - - if (!(lock_get_type_low(lock) & LOCK_TABLE)) { - /* We are interested in table locks only. */ - continue; - } - - switch (lock_get_mode(lock)) { - case LOCK_IX: - ok = TRUE; - break; - case LOCK_AUTO_INC: - /* It is allowed for trx to hold an - auto_increment lock. */ - break; - default: -not_ok: - /* Other table locks than LOCK_IX are not allowed. */ - ok = FALSE; - goto func_exit; - } - } - -func_exit: - lock_mutex_exit(); - - return(ok); -} - -/*********************************************************************//** -Sets the wait flag of a lock and the back pointer in trx to lock. */ -UNIV_INLINE -void -lock_set_lock_and_trx_wait( -/*=======================*/ - lock_t* lock, /*!< in: lock */ - trx_t* trx) /*!< in/out: trx */ -{ - ut_ad(lock); - ut_ad(lock->trx == trx); - ut_ad(trx->lock.wait_lock == NULL); - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(trx)); - - trx->lock.wait_lock = lock; - lock->type_mode |= LOCK_WAIT; -} - -/**********************************************************************//** -The back pointer to a waiting lock request in the transaction is set to NULL -and the wait bit in lock type_mode is reset. */ -UNIV_INLINE -void -lock_reset_lock_and_trx_wait( -/*=========================*/ - lock_t* lock) /*!< in/out: record lock */ -{ - ut_ad(lock_get_wait(lock)); - ut_ad(lock_mutex_own()); - - if (lock->trx->lock.wait_lock && - lock->trx->lock.wait_lock != lock) { - const char* stmt=NULL; - const char* stmt2=NULL; - size_t stmt_len; - trx_id_t trx_id = 0; - stmt = lock->trx->mysql_thd - ? innobase_get_stmt(lock->trx->mysql_thd, &stmt_len) - : NULL; - - if (lock->trx->lock.wait_lock && - lock->trx->lock.wait_lock->trx) { - trx_id = lock->trx->lock.wait_lock->trx->id; - stmt2 = lock->trx->lock.wait_lock->trx->mysql_thd - ? innobase_get_stmt( - lock->trx->lock.wait_lock - ->trx->mysql_thd, &stmt_len) - : NULL; - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Trx id " TRX_ID_FMT - " is waiting a lock in statement %s" - " for this trx id " TRX_ID_FMT - " and statement %s wait_lock %p", - lock->trx->id, - stmt ? stmt : "NULL", - trx_id, - stmt2 ? stmt2 : "NULL", - lock->trx->lock.wait_lock); - ut_ad(lock->trx->lock.wait_lock == lock); - } - - lock->trx->lock.wait_lock = NULL; - lock->type_mode &= ~LOCK_WAIT; -} - -/*********************************************************************//** -Gets the gap flag of a record lock. -@return LOCK_GAP or 0 */ -UNIV_INLINE -ulint -lock_rec_get_gap( -/*=============*/ - const lock_t* lock) /*!< in: record lock */ -{ - ut_ad(lock); - ut_ad(lock_get_type_low(lock) == LOCK_REC); - - return(lock->type_mode & LOCK_GAP); -} - -/*********************************************************************//** -Gets the LOCK_REC_NOT_GAP flag of a record lock. -@return LOCK_REC_NOT_GAP or 0 */ -UNIV_INLINE -ulint -lock_rec_get_rec_not_gap( -/*=====================*/ - const lock_t* lock) /*!< in: record lock */ -{ - ut_ad(lock); - ut_ad(lock_get_type_low(lock) == LOCK_REC); - - return(lock->type_mode & LOCK_REC_NOT_GAP); -} - -/*********************************************************************//** -Gets the waiting insert flag of a record lock. -@return LOCK_INSERT_INTENTION or 0 */ -UNIV_INLINE -ulint -lock_rec_get_insert_intention( -/*==========================*/ - const lock_t* lock) /*!< in: record lock */ -{ - ut_ad(lock); - ut_ad(lock_get_type_low(lock) == LOCK_REC); - - return(lock->type_mode & LOCK_INSERT_INTENTION); -} - -/*********************************************************************//** -Calculates if lock mode 1 is stronger or equal to lock mode 2. -@return nonzero if mode1 stronger or equal to mode2 */ -UNIV_INLINE -ulint -lock_mode_stronger_or_eq( -/*=====================*/ - enum lock_mode mode1, /*!< in: lock mode */ - enum lock_mode mode2) /*!< in: lock mode */ -{ - ut_ad((ulint) mode1 < lock_types); - ut_ad((ulint) mode2 < lock_types); - - return(lock_strength_matrix[mode1][mode2]); -} - -/*********************************************************************//** -Calculates if lock mode 1 is compatible with lock mode 2. -@return nonzero if mode1 compatible with mode2 */ -UNIV_INLINE -ulint -lock_mode_compatible( -/*=================*/ - enum lock_mode mode1, /*!< in: lock mode */ - enum lock_mode mode2) /*!< in: lock mode */ -{ - ut_ad((ulint) mode1 < lock_types); - ut_ad((ulint) mode2 < lock_types); - - return(lock_compatibility_matrix[mode1][mode2]); -} - -/*********************************************************************//** -Checks if a lock request for a new lock has to wait for request lock2. -@return TRUE if new lock has to wait for lock2 to be removed */ -UNIV_INLINE -ibool -lock_rec_has_to_wait( -/*=================*/ -#ifdef WITH_WSREP - ibool for_locking, /*!< is caller locking or releasing */ -#endif /* WITH_WSREP */ - const trx_t* trx, /*!< in: trx of new lock */ - ulint type_mode,/*!< in: precise mode of the new lock - to set: LOCK_S or LOCK_X, possibly - ORed to LOCK_GAP or LOCK_REC_NOT_GAP, - LOCK_INSERT_INTENTION */ - const lock_t* lock2, /*!< in: another record lock; NOTE that - it is assumed that this has a lock bit - set on the same record as in the new - lock we are setting */ - ibool lock_is_on_supremum) /*!< in: TRUE if we are setting the - lock on the 'supremum' record of an - index page: we know then that the lock - request is really for a 'gap' type lock */ -{ - ut_ad(trx && lock2); - ut_ad(lock_get_type_low(lock2) == LOCK_REC); - - if (trx != lock2->trx - && !lock_mode_compatible(static_cast<enum lock_mode>( - LOCK_MODE_MASK & type_mode), - lock_get_mode(lock2))) { - - /* We have somewhat complex rules when gap type record locks - cause waits */ - - if ((lock_is_on_supremum || (type_mode & LOCK_GAP)) - && !(type_mode & LOCK_INSERT_INTENTION)) { - - /* Gap type locks without LOCK_INSERT_INTENTION flag - do not need to wait for anything. This is because - different users can have conflicting lock types - on gaps. */ - - return(FALSE); - } - - if (!(type_mode & LOCK_INSERT_INTENTION) - && lock_rec_get_gap(lock2)) { - - /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP - does not need to wait for a gap type lock */ - - return(FALSE); - } - - if ((type_mode & LOCK_GAP) - && lock_rec_get_rec_not_gap(lock2)) { - - /* Lock on gap does not need to wait for - a LOCK_REC_NOT_GAP type lock */ - - return(FALSE); - } - - if (lock_rec_get_insert_intention(lock2)) { - - /* No lock request needs to wait for an insert - intention lock to be removed. This is ok since our - rules allow conflicting locks on gaps. This eliminates - a spurious deadlock caused by a next-key lock waiting - for an insert intention lock; when the insert - intention lock was granted, the insert deadlocked on - the waiting next-key lock. - - Also, insert intention locks do not disturb each - other. */ - - return(FALSE); - } - - if ((type_mode & LOCK_GAP || lock_rec_get_gap(lock2)) && - !thd_need_ordering_with(trx->mysql_thd, - lock2->trx->mysql_thd)) { - /* If the upper server layer has already decided on the - commit order between the transaction requesting the - lock and the transaction owning the lock, we do not - need to wait for gap locks. Such ordeering by the upper - server layer happens in parallel replication, where the - commit order is fixed to match the original order on the - master. - - Such gap locks are mainly needed to get serialisability - between transactions so that they will be binlogged in - the correct order so that statement-based replication - will give the correct results. Since the right order - was already determined on the master, we do not need - to enforce it again here. - - Skipping the locks is not essential for correctness, - since in case of deadlock we will just kill the later - transaction and retry it. But it can save some - unnecessary rollbacks and retries. */ - - return (FALSE); - } - -#ifdef WITH_WSREP - /* if BF thread is locking and has conflict with another BF - thread, we need to look at trx ordering and lock types */ - if (wsrep_thd_is_BF(trx->mysql_thd, FALSE) && - wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) { - - if (wsrep_debug) { - fprintf(stderr, - "BF-BF lock conflict, locking: %lu\n", - for_locking); - lock_rec_print(stderr, lock2); - } - - if (wsrep_trx_order_before(trx->mysql_thd, - lock2->trx->mysql_thd) && - (type_mode & LOCK_MODE_MASK) == LOCK_X && - (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) - { - if (for_locking || wsrep_debug) { - /* exclusive lock conflicts are not - accepted */ - fprintf(stderr, - "BF-BF X lock conflict," - "mode: %lu supremum: %lu\n", - type_mode, lock_is_on_supremum); - fprintf(stderr, - "conflicts states: my %d locked %d\n", - wsrep_thd_conflict_state(trx->mysql_thd, FALSE), - wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE) ); - lock_rec_print(stderr, lock2); - if (for_locking) return FALSE; - //abort(); - } - } else { - /* if lock2->index->n_uniq <= - lock2->index->n_user_defined_cols - operation is on uniq index - */ - if (wsrep_debug) fprintf(stderr, - "BF conflict, modes: %lu %lu, " - "idx: %s-%s n_uniq %u n_user %u\n", - type_mode, lock2->type_mode, - lock2->index->name, - lock2->index->table_name, - lock2->index->n_uniq, - lock2->index->n_user_defined_cols); - return FALSE; - } - } -#endif /* WITH_WSREP */ - - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************************//** -Checks if a lock request lock1 has to wait for request lock2. -@return TRUE if lock1 has to wait for lock2 to be removed */ -UNIV_INTERN -ibool -lock_has_to_wait( -/*=============*/ - const lock_t* lock1, /*!< in: waiting lock */ - const lock_t* lock2) /*!< in: another lock; NOTE that it is - assumed that this has a lock bit set - on the same record as in lock1 if the - locks are record locks */ -{ - ut_ad(lock1 && lock2); - - if (lock1->trx != lock2->trx - && !lock_mode_compatible(lock_get_mode(lock1), - lock_get_mode(lock2))) { - if (lock_get_type_low(lock1) == LOCK_REC) { - ut_ad(lock_get_type_low(lock2) == LOCK_REC); - - /* If this lock request is for a supremum record - then the second bit on the lock bitmap is set */ - -#ifdef WITH_WSREP - return(lock_rec_has_to_wait(FALSE, lock1->trx, -#else - return(lock_rec_has_to_wait(lock1->trx, -#endif /* WITH_WSREP */ - lock1->type_mode, lock2, - lock_rec_get_nth_bit( - lock1, 1))); - } - - return(TRUE); - } - - return(FALSE); -} - -/*============== RECORD LOCK BASIC FUNCTIONS ============================*/ - -/*********************************************************************//** -Gets the number of bits in a record lock bitmap. -@return number of bits */ -UNIV_INLINE -ulint -lock_rec_get_n_bits( -/*================*/ - const lock_t* lock) /*!< in: record lock */ -{ - return(lock->un_member.rec_lock.n_bits); -} - -/**********************************************************************//** -Sets the nth bit of a record lock to TRUE. */ -UNIV_INLINE -void -lock_rec_set_nth_bit( -/*=================*/ - lock_t* lock, /*!< in: record lock */ - ulint i) /*!< in: index of the bit */ -{ - ulint byte_index; - ulint bit_index; - - ut_ad(lock); - ut_ad(lock_get_type_low(lock) == LOCK_REC); - ut_ad(i < lock->un_member.rec_lock.n_bits); - - byte_index = i / 8; - bit_index = i % 8; - - ((byte*) &lock[1])[byte_index] |= 1 << bit_index; -} - -/**********************************************************************//** -Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, -if none found. -@return bit index == heap number of the record, or ULINT_UNDEFINED if -none found */ -UNIV_INTERN -ulint -lock_rec_find_set_bit( -/*==================*/ - const lock_t* lock) /*!< in: record lock with at least one bit set */ -{ - ulint i; - - for (i = 0; i < lock_rec_get_n_bits(lock); i++) { - - if (lock_rec_get_nth_bit(lock, i)) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/**********************************************************************//** -Resets the nth bit of a record lock. */ -UNIV_INLINE -void -lock_rec_reset_nth_bit( -/*===================*/ - lock_t* lock, /*!< in: record lock */ - ulint i) /*!< in: index of the bit which must be set to TRUE - when this function is called */ -{ - ulint byte_index; - ulint bit_index; - - ut_ad(lock); - ut_ad(lock_get_type_low(lock) == LOCK_REC); - ut_ad(i < lock->un_member.rec_lock.n_bits); - - byte_index = i / 8; - bit_index = i % 8; - - ((byte*) &lock[1])[byte_index] &= ~(1 << bit_index); -} - -/*********************************************************************//** -Gets the first or next record lock on a page. -@return next lock, NULL if none exists */ -UNIV_INLINE -const lock_t* -lock_rec_get_next_on_page_const( -/*============================*/ - const lock_t* lock) /*!< in: a record lock */ -{ - ulint space; - ulint page_no; - - ut_ad(lock_mutex_own()); - ut_ad(lock_get_type_low(lock) == LOCK_REC); - - space = lock->un_member.rec_lock.space; - page_no = lock->un_member.rec_lock.page_no; - - for (;;) { - lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock)); - - if (!lock) { - - break; - } - - if ((lock->un_member.rec_lock.space == space) - && (lock->un_member.rec_lock.page_no == page_no)) { - - break; - } - } - - return(lock); -} - -/*********************************************************************//** -Gets the first or next record lock on a page. -@return next lock, NULL if none exists */ -UNIV_INLINE -lock_t* -lock_rec_get_next_on_page( -/*======================*/ - lock_t* lock) /*!< in: a record lock */ -{ - return((lock_t*) lock_rec_get_next_on_page_const(lock)); -} - -/*********************************************************************//** -Gets the first record lock on a page, where the page is identified by its -file address. -@return first lock, NULL if none exists */ -UNIV_INLINE -lock_t* -lock_rec_get_first_on_page_addr( -/*============================*/ - ulint space, /*!< in: space */ - ulint page_no)/*!< in: page number */ -{ - lock_t* lock; - - ut_ad(lock_mutex_own()); - - for (lock = static_cast<lock_t*>( - HASH_GET_FIRST(lock_sys->rec_hash, - lock_rec_hash(space, page_no))); - lock != NULL; - lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) { - - if (lock->un_member.rec_lock.space == space - && lock->un_member.rec_lock.page_no == page_no) { - - break; - } - } - - return(lock); -} - -/*********************************************************************//** -Determines if there are explicit record locks on a page. -@return an explicit record lock on the page, or NULL if there are none */ -UNIV_INTERN -lock_t* -lock_rec_expl_exist_on_page( -/*========================*/ - ulint space, /*!< in: space id */ - ulint page_no)/*!< in: page number */ -{ - lock_t* lock; - - lock_mutex_enter(); - lock = lock_rec_get_first_on_page_addr(space, page_no); - lock_mutex_exit(); - - return(lock); -} - -/*********************************************************************//** -Gets the first record lock on a page, where the page is identified by a -pointer to it. -@return first lock, NULL if none exists */ -UNIV_INLINE -lock_t* -lock_rec_get_first_on_page( -/*=======================*/ - const buf_block_t* block) /*!< in: buffer block */ -{ - ulint hash; - lock_t* lock; - ulint space = buf_block_get_space(block); - ulint page_no = buf_block_get_page_no(block); - - ut_ad(lock_mutex_own()); - - hash = buf_block_get_lock_hash_val(block); - - for (lock = static_cast<lock_t*>( - HASH_GET_FIRST( lock_sys->rec_hash, hash)); - lock != NULL; - lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) { - - if ((lock->un_member.rec_lock.space == space) - && (lock->un_member.rec_lock.page_no == page_no)) { - - break; - } - } - - return(lock); -} - -/*********************************************************************//** -Gets the next explicit lock request on a record. -@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */ -UNIV_INLINE -lock_t* -lock_rec_get_next( -/*==============*/ - ulint heap_no,/*!< in: heap number of the record */ - lock_t* lock) /*!< in: lock */ -{ - ut_ad(lock_mutex_own()); - - do { - ut_ad(lock_get_type_low(lock) == LOCK_REC); - lock = lock_rec_get_next_on_page(lock); - } while (lock && !lock_rec_get_nth_bit(lock, heap_no)); - - return(lock); -} - -/*********************************************************************//** -Gets the next explicit lock request on a record. -@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */ -UNIV_INLINE -const lock_t* -lock_rec_get_next_const( -/*====================*/ - ulint heap_no,/*!< in: heap number of the record */ - const lock_t* lock) /*!< in: lock */ -{ - return(lock_rec_get_next(heap_no, (lock_t*) lock)); -} - -/*********************************************************************//** -Gets the first explicit lock request on a record. -@return first lock, NULL if none exists */ -UNIV_INLINE -lock_t* -lock_rec_get_first( -/*===============*/ - const buf_block_t* block, /*!< in: block containing the record */ - ulint heap_no)/*!< in: heap number of the record */ -{ - lock_t* lock; - - ut_ad(lock_mutex_own()); - - for (lock = lock_rec_get_first_on_page(block); lock; - lock = lock_rec_get_next_on_page(lock)) { - if (lock_rec_get_nth_bit(lock, heap_no)) { - break; - } - } - - return(lock); -} - -/*********************************************************************//** -Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock -pointer in the transaction! This function is used in lock object creation -and resetting. */ -static -void -lock_rec_bitmap_reset( -/*==================*/ - lock_t* lock) /*!< in: record lock */ -{ - ulint n_bytes; - - ut_ad(lock_get_type_low(lock) == LOCK_REC); - - /* Reset to zero the bitmap which resides immediately after the lock - struct */ - - n_bytes = lock_rec_get_n_bits(lock) / 8; - - ut_ad((lock_rec_get_n_bits(lock) % 8) == 0); - - memset(&lock[1], 0, n_bytes); -} - -/*********************************************************************//** -Copies a record lock to heap. -@return copy of lock */ -static -lock_t* -lock_rec_copy( -/*==========*/ - const lock_t* lock, /*!< in: record lock */ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ulint size; - - ut_ad(lock_get_type_low(lock) == LOCK_REC); - - size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8; - - return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size))); -} - -/*********************************************************************//** -Gets the previous record lock set on a record. -@return previous lock on the same record, NULL if none exists */ -UNIV_INTERN -const lock_t* -lock_rec_get_prev( -/*==============*/ - const lock_t* in_lock,/*!< in: record lock */ - ulint heap_no)/*!< in: heap number of the record */ -{ - lock_t* lock; - ulint space; - ulint page_no; - lock_t* found_lock = NULL; - - ut_ad(lock_mutex_own()); - ut_ad(lock_get_type_low(in_lock) == LOCK_REC); - - space = in_lock->un_member.rec_lock.space; - page_no = in_lock->un_member.rec_lock.page_no; - - for (lock = lock_rec_get_first_on_page_addr(space, page_no); - /* No op */; - lock = lock_rec_get_next_on_page(lock)) { - - ut_ad(lock); - - if (lock == in_lock) { - - return(found_lock); - } - - if (lock_rec_get_nth_bit(lock, heap_no)) { - - found_lock = lock; - } - } -} - -/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/ - -/*********************************************************************//** -Checks if a transaction has the specified table lock, or stronger. This -function should only be called by the thread that owns the transaction. -@return lock or NULL */ -UNIV_INLINE -const lock_t* -lock_table_has( -/*===========*/ - const trx_t* trx, /*!< in: transaction */ - const dict_table_t* table, /*!< in: table */ - enum lock_mode mode) /*!< in: lock mode */ -{ - lint i; - - if (ib_vector_is_empty(trx->lock.table_locks)) { - return(NULL); - } - - /* Look for stronger locks the same trx already has on the table */ - - for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) { - const lock_t* lock; - enum lock_mode lock_mode; - - lock = *static_cast<const lock_t**>( - ib_vector_get(trx->lock.table_locks, i)); - - if (lock == NULL) { - continue; - } - - lock_mode = lock_get_mode(lock); - - ut_ad(trx == lock->trx); - ut_ad(lock_get_type_low(lock) & LOCK_TABLE); - ut_ad(lock->un_member.tab_lock.table != NULL); - - if (table == lock->un_member.tab_lock.table - && lock_mode_stronger_or_eq(lock_mode, mode)) { - - ut_ad(!lock_get_wait(lock)); - - return(lock); - } - } - - return(NULL); -} - -/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/ - -/*********************************************************************//** -Checks if a transaction has a GRANTED explicit lock on rec stronger or equal -to precise_mode. -@return lock or NULL */ -UNIV_INLINE -lock_t* -lock_rec_has_expl( -/*==============*/ - ulint precise_mode,/*!< in: LOCK_S or LOCK_X - possibly ORed to LOCK_GAP or - LOCK_REC_NOT_GAP, for a - supremum record we regard this - always a gap type request */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - trx_id_t trx_id) /*!< in: transaction id */ -{ - lock_t* lock; - - ut_ad(lock_mutex_own()); - ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S - || (precise_mode & LOCK_MODE_MASK) == LOCK_X); - ut_ad(!(precise_mode & LOCK_INSERT_INTENTION)); - - for (lock = lock_rec_get_first(block, heap_no); - lock != NULL; - lock = lock_rec_get_next(heap_no, lock)) { - - if (lock->trx->id == trx_id - && !lock_rec_get_insert_intention(lock) - && lock_mode_stronger_or_eq( - lock_get_mode(lock), - static_cast<enum lock_mode>( - precise_mode & LOCK_MODE_MASK)) - && !lock_get_wait(lock) - && (!lock_rec_get_rec_not_gap(lock) - || (precise_mode & LOCK_REC_NOT_GAP) - || heap_no == PAGE_HEAP_NO_SUPREMUM) - && (!lock_rec_get_gap(lock) - || (precise_mode & LOCK_GAP) - || heap_no == PAGE_HEAP_NO_SUPREMUM)) { - - return(lock); - } - } - - return(NULL); -} - -#ifdef WITH_WSREP -static -void -lock_rec_discard(lock_t* in_lock); -#endif -#ifdef UNIV_DEBUG -/*********************************************************************//** -Checks if some other transaction has a lock request in the queue. -@return lock or NULL */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -const lock_t* -lock_rec_other_has_expl_req( -/*========================*/ - enum lock_mode mode, /*!< in: LOCK_S or LOCK_X */ - ulint gap, /*!< in: LOCK_GAP if also gap - locks are taken into account, - or 0 if not */ - ulint wait, /*!< in: LOCK_WAIT if also - waiting locks are taken into - account, or 0 if not */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - trx_id_t trx_id) /*!< in: transaction */ -{ - const lock_t* lock; - - ut_ad(lock_mutex_own()); - ut_ad(mode == LOCK_X || mode == LOCK_S); - ut_ad(gap == 0 || gap == LOCK_GAP); - ut_ad(wait == 0 || wait == LOCK_WAIT); - - for (lock = lock_rec_get_first(block, heap_no); - lock != NULL; - lock = lock_rec_get_next_const(heap_no, lock)) { - - if (lock->trx->id != trx_id - && (gap - || !(lock_rec_get_gap(lock) - || heap_no == PAGE_HEAP_NO_SUPREMUM)) - && (wait || !lock_get_wait(lock)) - && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) { - - return(lock); - } - } - - return(NULL); -} -#endif /* UNIV_DEBUG */ - -#ifdef WITH_WSREP -static -void -wsrep_kill_victim( - const trx_t * const trx, - const lock_t *lock) -{ - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(lock->trx)); - - /* quit for native mysql */ - if (!wsrep_on(trx->mysql_thd)) return; - - my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE); - my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE); - - if ((bf_this && !bf_other) || - (bf_this && bf_other && wsrep_trx_order_before( - trx->mysql_thd, lock->trx->mysql_thd))) { - - if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - if (wsrep_debug) { - fprintf(stderr, "WSREP: BF victim waiting\n"); - } - /* cannot release lock, until our lock - is in the queue*/ - } else if (lock->trx != trx) { - if (wsrep_log_conflicts) { - if (bf_this) { - fputs("\n*** Priority TRANSACTION:\n", - stderr); - } else { - fputs("\n*** Victim TRANSACTION:\n", - stderr); - } - - wsrep_trx_print_locking(stderr, trx, 3000); - - if (bf_other) { - fputs("\n*** Priority TRANSACTION:\n", - stderr); - } else { - fputs("\n*** Victim TRANSACTION:\n", - stderr); - } - wsrep_trx_print_locking(stderr, lock->trx, 3000); - - fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n", - stderr); - - if (lock_get_type(lock) == LOCK_REC) { - lock_rec_print(stderr, lock); - } else { - lock_table_print(stderr, lock); - } - } - - lock->trx->abort_type = TRX_WSREP_ABORT; - wsrep_innobase_kill_one_trx(trx->mysql_thd, - (const trx_t*) trx, lock->trx, TRUE); - lock->trx->abort_type = TRX_SERVER_ABORT; - } - } -} -#endif -/*********************************************************************//** -Checks if some other transaction has a conflicting explicit lock request -in the queue, so that we have to wait. -@return lock or NULL */ -static -const lock_t* -lock_rec_other_has_conflicting( -/*===========================*/ - enum lock_mode mode, /*!< in: LOCK_S or LOCK_X, - possibly ORed to LOCK_GAP or - LOC_REC_NOT_GAP, - LOCK_INSERT_INTENTION */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - const trx_t* trx) /*!< in: our transaction */ -{ - const lock_t* lock; - ibool is_supremum; - - ut_ad(lock_mutex_own()); - - is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM); - - for (lock = lock_rec_get_first(block, heap_no); - lock != NULL; - lock = lock_rec_get_next_const(heap_no, lock)) { - -#ifdef WITH_WSREP - if (lock_rec_has_to_wait(TRUE, trx, mode, lock, is_supremum)) { - if (wsrep_on(trx->mysql_thd)) { - trx_mutex_enter(lock->trx); - wsrep_kill_victim(trx, lock); - trx_mutex_exit(lock->trx); - } -#else - if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) { -#endif /* WITH_WSREP */ - - return(lock); - } - } - - return(NULL); -} - -/*********************************************************************//** -Looks for a suitable type record lock struct by the same trx on the same page. -This can be used to save space when a new record lock should be set on a page: -no new struct is needed, if a suitable old is found. -@return lock or NULL */ -UNIV_INLINE -lock_t* -lock_rec_find_similar_on_page( -/*==========================*/ - ulint type_mode, /*!< in: lock type_mode field */ - ulint heap_no, /*!< in: heap number of the record */ - lock_t* lock, /*!< in: lock_rec_get_first_on_page() */ - const trx_t* trx) /*!< in: transaction */ -{ - ut_ad(lock_mutex_own()); - - for (/* No op */; - lock != NULL; - lock = lock_rec_get_next_on_page(lock)) { - - if (lock->trx == trx - && lock->type_mode == type_mode - && lock_rec_get_n_bits(lock) > heap_no) { - - return(lock); - } - } - - return(NULL); -} - -/*********************************************************************//** -Checks if some transaction has an implicit x-lock on a record in a secondary -index. -@return transaction id of the transaction which has the x-lock, or 0; -NOTE that this function can return false positives but never false -negatives. The caller must confirm all positive results by calling -trx_is_active(). */ -static -trx_id_t -lock_sec_rec_some_has_impl( -/*=======================*/ - const rec_t* rec, /*!< in: user record */ - dict_index_t* index, /*!< in: secondary index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - trx_id_t trx_id; - trx_id_t max_trx_id; - const page_t* page = page_align(rec); - - ut_ad(!lock_mutex_own()); - ut_ad(!mutex_own(&trx_sys->mutex)); - ut_ad(!dict_index_is_clust(index)); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - max_trx_id = page_get_max_trx_id(page); - - /* Some transaction may have an implicit x-lock on the record only - if the max trx id for the page >= min trx id for the trx list, or - database recovery is running. We do not write the changes of a page - max trx id to the log, and therefore during recovery, this value - for a page may be incorrect. */ - - if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) { - - trx_id = 0; - - } else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) { - - buf_page_print(page, 0, 0); - - /* The page is corrupt: try to avoid a crash by returning 0 */ - trx_id = 0; - - /* In this case it is possible that some transaction has an implicit - x-lock. We have to look in the clustered index. */ - - } else { - trx_id = row_vers_impl_x_locked(rec, index, offsets); - } - - return(trx_id); -} - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Checks if some transaction, other than given trx_id, has an explicit -lock on the given rec, in the given precise_mode. -@return the transaction, whose id is not equal to trx_id, that has an -explicit lock on the given rec, in the given precise_mode or NULL.*/ -static -trx_t* -lock_rec_other_trx_holds_expl( -/*==========================*/ - ulint precise_mode, /*!< in: LOCK_S or LOCK_X - possibly ORed to LOCK_GAP or - LOCK_REC_NOT_GAP. */ - trx_id_t trx_id, /*!< in: trx holding implicit - lock on rec */ - const rec_t* rec, /*!< in: user record */ - const buf_block_t* block) /*!< in: buffer block - containing the record */ -{ - trx_t* holds = NULL; - - lock_mutex_enter(); - mutex_enter(&trx_sys->mutex); - - trx_id_t* impl_trx_desc = trx_find_descriptor(trx_sys->descriptors, - trx_sys->descr_n_used, - trx_id); - if (impl_trx_desc) { - ut_ad(trx_id == *impl_trx_desc); - ulint heap_no = page_rec_get_heap_no(rec); - ulint rw_trx_count = trx_sys->descr_n_used; - trx_id_t* rw_trx_snapshot = static_cast<trx_id_t *> - (ut_malloc(sizeof(trx_id_t) * rw_trx_count)); - memcpy(rw_trx_snapshot, trx_sys->descriptors, - sizeof(trx_id_t) * rw_trx_count); - - mutex_exit(&trx_sys->mutex); - - for (ulint i = 0; i < rw_trx_count; i++) { - - lock_t* expl_lock = lock_rec_has_expl(precise_mode, - block, heap_no, - rw_trx_snapshot[i]); - if (expl_lock && expl_lock->trx->id != trx_id) { - /* An explicit lock is held by trx other than - the trx holding the implicit lock. */ - holds = expl_lock->trx; - break; - } - } - - ut_free(rw_trx_snapshot); - - } else { - mutex_exit(&trx_sys->mutex); - } - - lock_mutex_exit(); - - return(holds); -} -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Return approximate number or record locks (bits set in the bitmap) for -this transaction. Since delete-marked records may be removed, the -record count will not be precise. -The caller must be holding lock_sys->mutex. */ -UNIV_INTERN -ulint -lock_number_of_rows_locked( -/*=======================*/ - const trx_lock_t* trx_lock) /*!< in: transaction locks */ -{ - const lock_t* lock; - ulint n_records = 0; - - ut_ad(lock_mutex_own()); - - for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks); - lock != NULL; - lock = UT_LIST_GET_NEXT(trx_locks, lock)) { - - if (lock_get_type_low(lock) == LOCK_REC) { - ulint n_bit; - ulint n_bits = lock_rec_get_n_bits(lock); - - for (n_bit = 0; n_bit < n_bits; n_bit++) { - if (lock_rec_get_nth_bit(lock, n_bit)) { - n_records++; - } - } - } - } - - return(n_records); -} - -/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/ - -#ifdef WITH_WSREP -static -void -wsrep_print_wait_locks( -/*============*/ - lock_t* c_lock) /* conflicting lock to print */ -{ - if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) { - fprintf(stderr, "WSREP: c_lock != wait lock\n"); - if (lock_get_type_low(c_lock) & LOCK_TABLE) - lock_table_print(stderr, c_lock); - else - lock_rec_print(stderr, c_lock); - - if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) - lock_table_print(stderr, c_lock->trx->lock.wait_lock); - else - lock_rec_print(stderr, c_lock->trx->lock.wait_lock); - } -} -#endif /* WITH_WSREP */ - -/*********************************************************************//** -Check if lock1 has higher priority than lock2. -NULL has lowest priority. -If neither of them is wait lock, the first one has higher priority. -If only one of them is a wait lock, it has lower priority. -Otherwise, the one with an older transaction has higher priority. -@returns true if lock1 has higher priority, false otherwise. */ -bool -has_higher_priority( - lock_t *lock1, - lock_t *lock2) -{ - if (lock1 == NULL) { - return false; - } else if (lock2 == NULL) { - return true; - } - // No preference. Compre them by wait mode and trx age. - if (!lock_get_wait(lock1)) { - return true; - } else if (!lock_get_wait(lock2)) { - return false; - } - return lock1->trx->start_time_micro <= lock2->trx->start_time_micro; -} - -/*********************************************************************//** -Insert a lock to the hash list according to the mode (whether it is a wait -lock) and the age of the transaction the it is associated with. -If the lock is not a wait lock, insert it to the head of the hash list. -Otherwise, insert it to the middle of the wait locks according to the age of -the transaciton. */ -static -dberr_t -lock_rec_insert_by_trx_age( - lock_t *in_lock) /*!< in: lock to be insert */{ - ulint space; - ulint page_no; - ulint rec_fold; - lock_t* node; - lock_t* next; - hash_cell_t* cell; - - space = in_lock->un_member.rec_lock.space; - page_no = in_lock->un_member.rec_lock.page_no; - rec_fold = lock_rec_fold(space, page_no); - cell = hash_get_nth_cell(lock_sys->rec_hash, - hash_calc_hash(rec_fold, lock_sys->rec_hash)); - - node = (lock_t *) cell->node; - // If in_lock is not a wait lock, we insert it to the head of the list. - if (node == NULL || !lock_get_wait(in_lock) || has_higher_priority(in_lock, node)) { - cell->node = in_lock; - in_lock->hash = node; - if (lock_get_wait(in_lock)) { - lock_grant(in_lock, true); - return DB_SUCCESS_LOCKED_REC; - } - return DB_SUCCESS; - } - while (node != NULL && has_higher_priority((lock_t *) node->hash, - in_lock)) { - node = (lock_t *) node->hash; - } - next = (lock_t *) node->hash; - node->hash = in_lock; - in_lock->hash = next; - - if (lock_get_wait(in_lock) && !lock_rec_has_to_wait_in_queue(in_lock)) { - lock_grant(in_lock, true); - if (cell->node != in_lock) { - // Move it to the front of the queue - node->hash = in_lock->hash; - next = (lock_t *) cell->node; - cell->node = in_lock; - in_lock->hash = next; - } - return DB_SUCCESS_LOCKED_REC; - } - - return DB_SUCCESS; -} - -static -bool -lock_queue_validate( - const lock_t *in_lock) /*!< in: lock whose hash list is to be validated */ -{ - ulint space; - ulint page_no; - ulint rec_fold; - hash_cell_t* cell; - lock_t* next; - bool wait_lock = false; - - if (in_lock == NULL) { - return true; - } - - space = in_lock->un_member.rec_lock.space; - page_no = in_lock->un_member.rec_lock.page_no; - rec_fold = lock_rec_fold(space, page_no); - cell = hash_get_nth_cell(lock_sys->rec_hash, - hash_calc_hash(rec_fold, lock_sys->rec_hash)); - next = (lock_t *) cell->node; - while (next != NULL) { - // If this is a granted lock, check that there's no wait lock before it. - if (!lock_get_wait(next)) { - ut_ad(!wait_lock); - } else { - wait_lock = true; - } - next = (lock_t *) next->hash; - } - return true; -} - -static -void -lock_rec_insert_to_head( - lock_t *in_lock, /*!< in: lock to be insert */ - ulint rec_fold) /*!< in: rec_fold of the page */ -{ - hash_cell_t* cell; - lock_t* node; - - if (in_lock == NULL) { - return; - } - - cell = hash_get_nth_cell(lock_sys->rec_hash, - hash_calc_hash(rec_fold, lock_sys->rec_hash)); - node = (lock_t *) cell->node; - if (node != in_lock) { - cell->node = in_lock; - in_lock->hash = node; - } -} - -/*********************************************************************//** -Creates a new record lock and inserts it to the lock queue. Does NOT check -for deadlocks or lock compatibility! -@return created lock */ -static -lock_t* -lock_rec_create( -/*============*/ -#ifdef WITH_WSREP - lock_t* const c_lock, /* conflicting lock */ - que_thr_t* thr, -#endif - ulint type_mode,/*!< in: lock mode and wait - flag, type is ignored and - replaced by LOCK_REC */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - trx_t* trx, /*!< in/out: transaction */ - ibool caller_owns_trx_mutex) - /*!< in: TRUE if caller owns - trx mutex */ -{ - lock_t* lock; - ulint page_no; - ulint space; - ulint rec_fold; - ulint n_bits; - ulint n_bytes; - bool wait_lock; - const page_t* page; - - ut_ad(lock_mutex_own()); - ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx)); - ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); - - /* Non-locking autocommit read-only transactions should not set - any locks. */ - assert_trx_in_list(trx); - - space = buf_block_get_space(block); - page_no = buf_block_get_page_no(block); - page = block->frame; - - btr_assert_not_corrupted(block, index); - - /* If rec is the supremum record, then we reset the gap and - LOCK_REC_NOT_GAP bits, as all locks on the supremum are - automatically of the gap type */ - - if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) { - ut_ad(!(type_mode & LOCK_REC_NOT_GAP)); - - type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP); - } - - wait_lock = type_mode & LOCK_WAIT; - - /* Make lock bitmap bigger by a safety margin */ - n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN; - n_bytes = 1 + n_bits / 8; - - lock = static_cast<lock_t*>( - mem_heap_alloc(trx->lock.lock_heap, sizeof(lock_t) + n_bytes)); - - lock->trx = trx; - - lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC; - lock->index = index; - - lock->un_member.rec_lock.space = space; - lock->un_member.rec_lock.page_no = page_no; - lock->un_member.rec_lock.n_bits = n_bytes * 8; - rec_fold = lock_rec_fold(space, page_no); - - /* Reset to zero the bitmap which resides immediately after the - lock struct */ - - lock_rec_bitmap_reset(lock); - - /* Set the bit corresponding to rec */ - lock_rec_set_nth_bit(lock, heap_no); - - lock->requested_time = ut_time(); - lock->wait_time = 0; - - index->table->n_rec_locks++; - - ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted); - -#ifdef WITH_WSREP - if (c_lock && - wsrep_on(trx->mysql_thd) && - wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { - lock_t *hash = (lock_t *)c_lock->hash; - lock_t *prev = NULL; - - while (hash && - wsrep_thd_is_BF(((lock_t *)hash)->trx->mysql_thd, TRUE) && - wsrep_trx_order_before( - ((lock_t *)hash)->trx->mysql_thd, - trx->mysql_thd)) { - prev = hash; - hash = (lock_t *)hash->hash; - } - lock->hash = hash; - if (prev) { - prev->hash = lock; - } else { - c_lock->hash = lock; - } - /* - * delayed conflict resolution '...kill_one_trx' was not called, - * if victim was waiting for some other lock - */ - trx_mutex_enter(c_lock->trx); - if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - - c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE; - - if (wsrep_debug) { - wsrep_print_wait_locks(c_lock); - } - - trx->lock.que_state = TRX_QUE_LOCK_WAIT; - lock_set_lock_and_trx_wait(lock, trx); - UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock); - - ut_ad(thr != NULL); - trx->lock.wait_thr = thr; - thr->state = QUE_THR_LOCK_WAIT; - - /* have to release trx mutex for the duration of - victim lock release. This will eventually call - lock_grant, which wants to grant trx mutex again - */ - if (caller_owns_trx_mutex) { - trx_mutex_exit(trx); - } - lock_cancel_waiting_and_release( - c_lock->trx->lock.wait_lock); - - if (caller_owns_trx_mutex) { - trx_mutex_enter(trx); - } - - /* trx might not wait for c_lock, but some other lock - does not matter if wait_lock was released above - */ - if (c_lock->trx->lock.wait_lock == c_lock) { - lock_reset_lock_and_trx_wait(lock); - } - - trx_mutex_exit(c_lock->trx); - - if (wsrep_debug) { - fprintf( - stderr, - "WSREP: c_lock canceled %llu\n", - (ulonglong) c_lock->trx->id); - } - - /* have to bail out here to avoid lock_set_lock... */ - return(lock); - } - trx_mutex_exit(c_lock->trx); - } else if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS - && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) { - if (wait_lock) { - HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock); - } else { - lock_rec_insert_to_head(lock, rec_fold); - } - } else { - HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock); - } -#else - if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS - && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) { - if (wait_lock) { - HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock); - } else { - lock_rec_insert_to_head(lock, rec_fold); - } - } else { - HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock); - } -#endif /* WITH_WSREP */ - - lock_sys->rec_num++; - - if (!caller_owns_trx_mutex) { - trx_mutex_enter(trx); - } - ut_ad(trx_mutex_own(trx)); - - if (type_mode & LOCK_WAIT) { - lock_set_lock_and_trx_wait(lock, trx); - } - - UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock); - - if (!caller_owns_trx_mutex) { - trx_mutex_exit(trx); - } - - MONITOR_INC(MONITOR_RECLOCK_CREATED); - MONITOR_INC(MONITOR_NUM_RECLOCK); - return(lock); -} - -/*********************************************************************//** -Enqueues a waiting request for a lock which cannot be granted immediately. -Checks for deadlocks. -@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or -DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that -there was a deadlock, but another transaction was chosen as a victim, -and we got the lock immediately: no need to wait then */ -static -dberr_t -lock_rec_enqueue_waiting( -/*=====================*/ -#ifdef WITH_WSREP - lock_t* c_lock, /* conflicting lock */ -#endif - ulint type_mode,/*!< in: lock mode this - transaction is requesting: - LOCK_S or LOCK_X, possibly - ORed with LOCK_GAP or - LOCK_REC_NOT_GAP, ORed with - LOCK_INSERT_INTENTION if this - waiting lock request is set - when performing an insert of - an index record */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - que_thr_t* thr) /*!< in: query thread */ -{ - trx_t* trx; - lock_t* lock; - trx_id_t victim_trx_id; - ulint sec; - ulint ms; - ulint space; - ulint page_no; - dberr_t err; - - - ut_ad(lock_mutex_own()); - ut_ad(!srv_read_only_mode); - ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); - - trx = thr_get_trx(thr); - - ut_ad(trx_mutex_own(trx)); - - /* Test if there already is some other reason to suspend thread: - we do not enqueue a lock request if the query thread should be - stopped anyway */ - - if (que_thr_stop(thr)) { - ut_error; - - return(DB_QUE_THR_SUSPENDED); - } - - switch (trx_get_dict_operation(trx)) { - case TRX_DICT_OP_NONE: - break; - case TRX_DICT_OP_TABLE: - case TRX_DICT_OP_INDEX: - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: a record lock wait happens" - " in a dictionary operation!\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - fputs(".\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - stderr); - ut_ad(0); - } - - /* Enqueue the lock request that will wait to be granted, note that - we already own the trx mutex. */ - lock = lock_rec_create( -#ifdef WITH_WSREP - c_lock, thr, -#endif /* WITH_WSREP */ - type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE); - - /* Release the mutex to obey the latching order. - This is safe, because lock_deadlock_check_and_resolve() - is invoked when a lock wait is enqueued for the currently - running transaction. Because trx is a running transaction - (it is not currently suspended because of a lock wait), - its state can only be changed by this thread, which is - currently associated with the transaction. */ - - trx_mutex_exit(trx); - - victim_trx_id = lock_deadlock_check_and_resolve(lock, trx); - - trx_mutex_enter(trx); - - if (victim_trx_id != 0) { - - ut_ad(victim_trx_id == trx->id); - - lock_reset_lock_and_trx_wait(lock); - lock_rec_reset_nth_bit(lock, heap_no); - - return(DB_DEADLOCK); - - } else if (trx->lock.wait_lock == NULL) { - - /* If there was a deadlock but we chose another - transaction as a victim, it is possible that we - already have the lock now granted! */ - - err = DB_SUCCESS_LOCKED_REC; - } else { - trx->lock.que_state = TRX_QUE_LOCK_WAIT; - - trx->lock.was_chosen_as_deadlock_victim = FALSE; - trx->lock.wait_started = ut_time(); - - if (UNIV_UNLIKELY(trx->take_stats)) { - ut_usectime(&sec, &ms); - trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms; - } - - ut_a(que_thr_stop(thr)); - -#ifdef UNIV_DEBUG - if (lock_print_waits) { - fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ", - trx->id); - ut_print_name(stderr, trx, FALSE, index->name); - } -#endif /* UNIV_DEBUG */ - - MONITOR_INC(MONITOR_LOCKREC_WAIT); - - trx->n_rec_lock_waits++; - - err = DB_LOCK_WAIT; - } - - // Move it only when it does not cause a deadlock. - if (err != DB_DEADLOCK - && innodb_lock_schedule_algorithm - == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS - && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) { - space = buf_block_get_space(block); - page_no = buf_block_get_page_no(block); - HASH_DELETE(lock_t, hash, lock_sys->rec_hash, - lock_rec_fold(space, page_no), lock); - dberr_t res = lock_rec_insert_by_trx_age(lock); - if (res != DB_SUCCESS) { - return res; - } - } - - return err; -} - -/*********************************************************************//** -Adds a record lock request in the record queue. The request is normally -added as the last in the queue, but if there are no waiting lock requests -on the record, and the request to be added is not a waiting request, we -can reuse a suitable record lock object already existing on the same page, -just setting the appropriate bit in its bitmap. This is a low-level function -which does NOT check for deadlocks or lock compatibility! -@return lock where the bit was set */ -static -lock_t* -lock_rec_add_to_queue( -/*==================*/ - ulint type_mode,/*!< in: lock mode, wait, gap - etc. flags; type is ignored - and replaced by LOCK_REC */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of the record */ - dict_index_t* index, /*!< in: index of record */ - trx_t* trx, /*!< in/out: transaction */ - ibool caller_owns_trx_mutex) - /*!< in: TRUE if caller owns the - transaction mutex */ -{ - lock_t* lock; - lock_t* first_lock; - - ut_ad(lock_mutex_own()); - ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx)); - ut_ad(dict_index_is_clust(index) - || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION); -#ifdef UNIV_DEBUG - switch (type_mode & LOCK_MODE_MASK) { - case LOCK_X: - case LOCK_S: - break; - default: - ut_error; - } - - if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) { - enum lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S - ? LOCK_X - : LOCK_S; - const lock_t* other_lock - = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT, - block, heap_no, trx->id); -#ifdef WITH_WSREP - /* this can potentionally assert with wsrep */ - if (wsrep_thd_is_wsrep(trx->mysql_thd)) { - if (wsrep_debug && other_lock) { - fprintf(stderr, - "WSREP: InnoDB assert ignored\n"); - } - } else { - ut_a(!other_lock); - } -#else - ut_a(!other_lock); -#endif /* WITH_WSREP */ - } -#endif /* UNIV_DEBUG */ - - type_mode |= LOCK_REC; - - /* If rec is the supremum record, then we can reset the gap bit, as - all locks on the supremum are automatically of the gap type, and we - try to avoid unnecessary memory consumption of a new record lock - struct for a gap type lock */ - - if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) { - ut_ad(!(type_mode & LOCK_REC_NOT_GAP)); - - /* There should never be LOCK_REC_NOT_GAP on a supremum - record, but let us play safe */ - - type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP); - } - - /* Look for a waiting lock request on the same record or on a gap */ - - for (first_lock = lock = lock_rec_get_first_on_page(block); - lock != NULL; - lock = lock_rec_get_next_on_page(lock)) { - - if (lock_get_wait(lock) - && lock_rec_get_nth_bit(lock, heap_no)) { -#ifdef WITH_WSREP - if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { - if (wsrep_debug) { - fprintf(stderr, - "BF skipping wait: " - TRX_ID_FMT "\n", - trx->id); - lock_rec_print(stderr, lock); - } - } else -#endif - goto somebody_waits; - } - } - - if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) { - - /* Look for a similar record lock on the same page: - if one is found and there are no waiting lock requests, - we can just set the bit */ - - lock = lock_rec_find_similar_on_page( - type_mode, heap_no, first_lock, trx); - - if (lock) { - - lock_rec_set_nth_bit(lock, heap_no); - - return(lock); - } - } - -somebody_waits: -#ifdef WITH_WSREP - return(lock_rec_create(NULL, NULL, - type_mode, block, heap_no, index, trx, - caller_owns_trx_mutex)); -#else - return(lock_rec_create( - type_mode, block, heap_no, index, trx, - caller_owns_trx_mutex)); -#endif /* WITH_WSREP */ -} - -/** Record locking request status */ -enum lock_rec_req_status { - /** Failed to acquire a lock */ - LOCK_REC_FAIL, - /** Succeeded in acquiring a lock (implicit or already acquired) */ - LOCK_REC_SUCCESS, - /** Explicitly created a new lock */ - LOCK_REC_SUCCESS_CREATED -}; - -/*********************************************************************//** -This is a fast routine for locking a record in the most common cases: -there are no explicit locks on the page, or there is just one lock, owned -by this transaction, and of the right type_mode. This is a low-level function -which does NOT look at implicit locks! Checks lock compatibility within -explicit locks. This function sets a normal next-key lock, or in the case of -a page supremum record, a gap type lock. -@return whether the locking succeeded */ -UNIV_INLINE -enum lock_rec_req_status -lock_rec_lock_fast( -/*===============*/ - ibool impl, /*!< in: if TRUE, no lock is set - if no wait is necessary: we - assume that the caller will - set an implicit lock */ - ulint mode, /*!< in: lock mode: LOCK_X or - LOCK_S possibly ORed to either - LOCK_GAP or LOCK_REC_NOT_GAP */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of record */ - dict_index_t* index, /*!< in: index of record */ - que_thr_t* thr) /*!< in: query thread */ -{ - lock_t* lock; - trx_t* trx; - enum lock_rec_req_status status = LOCK_REC_SUCCESS; - - ut_ad(lock_mutex_own()); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_S - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_X - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - ut_ad((LOCK_MODE_MASK & mode) == LOCK_S - || (LOCK_MODE_MASK & mode) == LOCK_X); - ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) == 0 - || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); - ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); - - DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL);); - - lock = lock_rec_get_first_on_page(block); - - trx = thr_get_trx(thr); - - if (lock == NULL) { - if (!impl) { - /* Note that we don't own the trx mutex. */ -#ifdef WITH_WSREP - lock = lock_rec_create(NULL, thr, - mode, block, heap_no, index, trx, FALSE); -#else - lock = lock_rec_create( - mode, block, heap_no, index, trx, FALSE); -#endif - - } - status = LOCK_REC_SUCCESS_CREATED; - } else { - trx_mutex_enter(trx); - - if (lock_rec_get_next_on_page(lock) - || lock->trx != trx - || lock->type_mode != (mode | LOCK_REC) - || lock_rec_get_n_bits(lock) <= heap_no) { - - status = LOCK_REC_FAIL; - } else if (!impl) { - /* If the nth bit of the record lock is already set - then we do not set a new lock bit, otherwise we do - set */ - if (!lock_rec_get_nth_bit(lock, heap_no)) { - lock_rec_set_nth_bit(lock, heap_no); - status = LOCK_REC_SUCCESS_CREATED; - } - } - - trx_mutex_exit(trx); - } - - return(status); -} - -/*********************************************************************//** -This is the general, and slower, routine for locking a record. This is a -low-level function which does NOT look at implicit locks! Checks lock -compatibility within explicit locks. This function sets a normal next-key -lock, or in the case of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, -or DB_QUE_THR_SUSPENDED */ -static -dberr_t -lock_rec_lock_slow( -/*===============*/ - ibool impl, /*!< in: if TRUE, no lock is set - if no wait is necessary: we - assume that the caller will - set an implicit lock */ - ulint mode, /*!< in: lock mode: LOCK_X or - LOCK_S possibly ORed to either - LOCK_GAP or LOCK_REC_NOT_GAP */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of record */ - dict_index_t* index, /*!< in: index of record */ - que_thr_t* thr) /*!< in: query thread */ -{ - trx_t* trx; -#ifdef WITH_WSREP - lock_t* c_lock(NULL); -#endif - dberr_t err = DB_SUCCESS; - - ut_ad(lock_mutex_own()); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_S - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_X - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - ut_ad((LOCK_MODE_MASK & mode) == LOCK_S - || (LOCK_MODE_MASK & mode) == LOCK_X); - ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) == 0 - || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); - ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); - - DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK);); - - trx = thr_get_trx(thr); - trx_mutex_enter(trx); - - if (lock_rec_has_expl(mode, block, heap_no, trx->id)) { - - /* The trx already has a strong enough lock on rec: do - nothing */ - -#ifdef WITH_WSREP - } else if ((c_lock = (lock_t *)lock_rec_other_has_conflicting( - static_cast<enum lock_mode>(mode), - block, heap_no, trx))) { -#else - } else if (lock_rec_other_has_conflicting( - static_cast<enum lock_mode>(mode), - block, heap_no, trx)) { -#endif /* WITH_WSREP */ - - /* If another transaction has a non-gap conflicting - request in the queue, as this transaction does not - have a lock strong enough already granted on the - record, we have to wait. */ - -#ifdef WITH_WSREP - /* c_lock is NULL here if jump to enqueue_waiting happened - but it's ok because lock is not NULL in that case and c_lock - is not used. */ - err = lock_rec_enqueue_waiting(c_lock, - mode, block, heap_no, index, thr); -#else - err = lock_rec_enqueue_waiting( - mode, block, heap_no, index, thr); -#endif /* WITH_WSREP */ - - } else if (!impl) { - /* Set the requested lock on the record, note that - we already own the transaction mutex. */ - - lock_rec_add_to_queue( - LOCK_REC | mode, block, heap_no, index, trx, TRUE); - - err = DB_SUCCESS_LOCKED_REC; - } - - trx_mutex_exit(trx); - - return(err); -} - -/*********************************************************************//** -Tries to lock the specified record in the mode requested. If not immediately -possible, enqueues a waiting lock request. This is a low-level function -which does NOT look at implicit locks! Checks lock compatibility within -explicit locks. This function sets a normal next-key lock, or in the case -of a page supremum record, a gap type lock. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, -or DB_QUE_THR_SUSPENDED */ -static -dberr_t -lock_rec_lock( -/*==========*/ - ibool impl, /*!< in: if TRUE, no lock is set - if no wait is necessary: we - assume that the caller will - set an implicit lock */ - ulint mode, /*!< in: lock mode: LOCK_X or - LOCK_S possibly ORed to either - LOCK_GAP or LOCK_REC_NOT_GAP */ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no,/*!< in: heap number of record */ - dict_index_t* index, /*!< in: index of record */ - que_thr_t* thr) /*!< in: query thread */ -{ - ut_ad(lock_mutex_own()); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_S - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_X - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - ut_ad((LOCK_MODE_MASK & mode) == LOCK_S - || (LOCK_MODE_MASK & mode) == LOCK_X); - ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP - || mode - (LOCK_MODE_MASK & mode) == 0); - - ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index)); - - /* We try a simplified and faster subroutine for the most - common cases */ - switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) { - case LOCK_REC_SUCCESS: - return(DB_SUCCESS); - case LOCK_REC_SUCCESS_CREATED: - return(DB_SUCCESS_LOCKED_REC); - case LOCK_REC_FAIL: - return(lock_rec_lock_slow(impl, mode, block, - heap_no, index, thr)); - } - - ut_error; - return(DB_ERROR); -} - -/*********************************************************************//** -Checks if a waiting record lock request still has to wait in a queue. -@return lock that is causing the wait */ -static -const lock_t* -lock_rec_has_to_wait_in_queue( -/*==========================*/ - const lock_t* wait_lock) /*!< in: waiting record lock */ -{ - const lock_t* lock; - ulint space; - ulint page_no; - ulint heap_no; - ulint bit_mask; - ulint bit_offset; - - ut_ad(lock_mutex_own()); - ut_ad(lock_get_wait(wait_lock)); - ut_ad(lock_get_type_low(wait_lock) == LOCK_REC); - - space = wait_lock->un_member.rec_lock.space; - page_no = wait_lock->un_member.rec_lock.page_no; - heap_no = lock_rec_find_set_bit(wait_lock); - - bit_offset = heap_no / 8; - bit_mask = static_cast<ulint>(1 << (heap_no % 8)); - - for (lock = lock_rec_get_first_on_page_addr(space, page_no); - lock != wait_lock; - lock = lock_rec_get_next_on_page_const(lock)) { - - const byte* p = (const byte*) &lock[1]; - - if (heap_no < lock_rec_get_n_bits(lock) - && (p[bit_offset] & bit_mask) - && lock_has_to_wait(wait_lock, lock)) { -#ifdef WITH_WSREP - if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) && - wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) { - /* don't wait for another BF lock */ - continue; - } -#endif - return(lock); - } - } - - return(NULL); -} - -/*************************************************************//** -Grants a lock to a waiting lock request and releases the waiting transaction. -The caller must hold lock_sys->mutex but not lock->trx->mutex. */ -static -void -lock_grant( -/*=======*/ - lock_t* lock, /*!< in/out: waiting lock request */ - bool owns_trx_mutex) /*!< in: whether lock->trx->mutex is owned */ -{ - ut_ad(lock_mutex_own()); - - lock_reset_lock_and_trx_wait(lock); - - if (!owns_trx_mutex) { - trx_mutex_enter(lock->trx); - } - - if (lock_get_mode(lock) == LOCK_AUTO_INC) { - dict_table_t* table = lock->un_member.tab_lock.table; - - if (UNIV_UNLIKELY(table->autoinc_trx == lock->trx)) { - fprintf(stderr, - "InnoDB: Error: trx already had" - " an AUTO-INC lock!\n"); - } else { - table->autoinc_trx = lock->trx; - - ib_vector_push(lock->trx->autoinc_locks, &lock); - } - } - -#ifdef UNIV_DEBUG - if (lock_print_waits) { - fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " ends\n", - lock->trx->id); - } -#endif /* UNIV_DEBUG */ - - /* If we are resolving a deadlock by choosing another transaction - as a victim, then our original transaction may not be in the - TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait - for it */ - - if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - que_thr_t* thr; - - thr = que_thr_end_lock_wait(lock->trx); - - if (thr != NULL) { - lock_wait_release_thread_if_suspended(thr); - } - } - - /* Cumulate total lock wait time for statistics */ - if (lock_get_type_low(lock) & LOCK_TABLE) { - lock->trx->total_table_lock_wait_time += - (ulint)difftime(ut_time(), lock->trx->lock.wait_started); - } else { - lock->trx->total_rec_lock_wait_time += - (ulint)difftime(ut_time(), lock->trx->lock.wait_started); - } - - lock->wait_time = (ulint)difftime(ut_time(), lock->requested_time); - - if (!owns_trx_mutex) { - trx_mutex_exit(lock->trx); - } -} - -/*************************************************************//** -Cancels a waiting record lock request and releases the waiting transaction -that requested it. NOTE: does NOT check if waiting lock requests behind this -one can now be granted! */ -static -void -lock_rec_cancel( -/*============*/ - lock_t* lock) /*!< in: waiting record lock request */ -{ - que_thr_t* thr; - - ut_ad(lock_mutex_own()); - ut_ad(lock_get_type_low(lock) == LOCK_REC); - - /* Reset the bit (there can be only one set bit) in the lock bitmap */ - lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock)); - - /* Reset the wait flag and the back pointer to lock in trx */ - - lock_reset_lock_and_trx_wait(lock); - - /* The following function releases the trx from lock wait */ - - trx_mutex_enter(lock->trx); - - thr = que_thr_end_lock_wait(lock->trx); - - if (thr != NULL) { - lock_wait_release_thread_if_suspended(thr); - } - - trx_mutex_exit(lock->trx); -} - -static -void -lock_grant_and_move_on_page( - ulint space, - ulint page_no) -{ - lock_t* lock; - lock_t* next; - lock_t* previous; - ulint rec_fold = lock_rec_fold(space, page_no); - - previous = (lock_t *) hash_get_nth_cell(lock_sys->rec_hash, - hash_calc_hash(rec_fold, lock_sys->rec_hash))->node; - if (previous == NULL) { - return; - } - if (previous->un_member.rec_lock.space == space && - previous->un_member.rec_lock.page_no == page_no) { - lock = previous; - } - else { - next = (lock_t *) previous->hash; - while (next && - (next->un_member.rec_lock.space != space || - next->un_member.rec_lock.page_no != page_no)) { - previous = next; - next = (lock_t *) previous->hash; - } - lock = (lock_t *) previous->hash; - } - - ut_ad(previous->hash == lock || previous == lock); - /* Grant locks if there are no conflicting locks ahead. - Move granted locks to the head of the list. */ - for (;lock != NULL;) { - /* If the lock is a wait lock on this page, and it does not need to wait. */ - if ((lock->un_member.rec_lock.space == space) - && (lock->un_member.rec_lock.page_no == page_no) - && lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { - - lock_grant(lock, false); - - if (previous != NULL) { - /* Move the lock to the head of the list. */ - HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock); - lock_rec_insert_to_head(lock, rec_fold); - } else { - /* Already at the head of the list. */ - previous = lock; - } - /* Move on to the next lock. */ - lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous)); - } else { - previous = lock; - lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock)); - } - } -} - -/*************************************************************//** -Removes a record lock request, waiting or granted, from the queue and -grants locks to other transactions in the queue if they now are entitled -to a lock. NOTE: all record locks contained in in_lock are removed. */ -static -void -lock_rec_dequeue_from_page( -/*=======================*/ - lock_t* in_lock) /*!< in: record lock object: all - record locks which are contained in - this lock object are removed; - transactions waiting behind will - get their lock requests granted, - if they are now qualified to it */ -{ - ulint space; - ulint page_no; - lock_t* lock; - trx_lock_t* trx_lock; - - ut_ad(lock_mutex_own()); - ut_ad(lock_get_type_low(in_lock) == LOCK_REC); - /* We may or may not be holding in_lock->trx->mutex here. */ - - trx_lock = &in_lock->trx->lock; - - space = in_lock->un_member.rec_lock.space; - page_no = in_lock->un_member.rec_lock.page_no; - - in_lock->index->table->n_rec_locks--; - - HASH_DELETE(lock_t, hash, lock_sys->rec_hash, - lock_rec_fold(space, page_no), in_lock); - lock_sys->rec_num--; - - UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock); - - MONITOR_INC(MONITOR_RECLOCK_REMOVED); - MONITOR_DEC(MONITOR_NUM_RECLOCK); - - if (innodb_lock_schedule_algorithm - == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS || - thd_is_replication_slave_thread(in_lock->trx->mysql_thd)) { - /* Check if waiting locks in the queue can now be granted: grant - locks if there are no conflicting locks ahead. Stop at the first - X lock that is waiting or has been granted. */ - - for (lock = lock_rec_get_first_on_page_addr(space, page_no); - lock != NULL; - lock = lock_rec_get_next_on_page(lock)) { - - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { - - /* Grant the lock */ - ut_ad(lock->trx != in_lock->trx); - lock_grant(lock, false); - } - } - } else { - lock_grant_and_move_on_page(space, page_no); - } -} - -/*************************************************************//** -Removes a record lock request, waiting or granted, from the queue. */ -static -void -lock_rec_discard( -/*=============*/ - lock_t* in_lock) /*!< in: record lock object: all - record locks which are contained - in this lock object are removed */ -{ - ulint space; - ulint page_no; - trx_lock_t* trx_lock; - - ut_ad(lock_mutex_own()); - ut_ad(lock_get_type_low(in_lock) == LOCK_REC); - - trx_lock = &in_lock->trx->lock; - - space = in_lock->un_member.rec_lock.space; - page_no = in_lock->un_member.rec_lock.page_no; - - in_lock->index->table->n_rec_locks--; - - HASH_DELETE(lock_t, hash, lock_sys->rec_hash, - lock_rec_fold(space, page_no), in_lock); - lock_sys->rec_num--; - - UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock); - - MONITOR_INC(MONITOR_RECLOCK_REMOVED); - MONITOR_DEC(MONITOR_NUM_RECLOCK); -} - -/*************************************************************//** -Removes record lock objects set on an index page which is discarded. This -function does not move locks, or check for waiting locks, therefore the -lock bitmaps must already be reset when this function is called. */ -static -void -lock_rec_free_all_from_discard_page( -/*================================*/ - const buf_block_t* block) /*!< in: page to be discarded */ -{ - ulint space; - ulint page_no; - lock_t* lock; - lock_t* next_lock; - - ut_ad(lock_mutex_own()); - - space = buf_block_get_space(block); - page_no = buf_block_get_page_no(block); - - lock = lock_rec_get_first_on_page_addr(space, page_no); - - while (lock != NULL) { - ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED); - ut_ad(!lock_get_wait(lock)); - - next_lock = lock_rec_get_next_on_page(lock); - - lock_rec_discard(lock); - - lock = next_lock; - } -} - -/*============= RECORD LOCK MOVING AND INHERITING ===================*/ - -/*************************************************************//** -Resets the lock bits for a single record. Releases transactions waiting for -lock requests here. */ -static -void -lock_rec_reset_and_release_wait( -/*============================*/ - const buf_block_t* block, /*!< in: buffer block containing - the record */ - ulint heap_no)/*!< in: heap number of record */ -{ - lock_t* lock; - - ut_ad(lock_mutex_own()); - - for (lock = lock_rec_get_first(block, heap_no); - lock != NULL; - lock = lock_rec_get_next(heap_no, lock)) { - - if (lock_get_wait(lock)) { - lock_rec_cancel(lock); - } else { - lock_rec_reset_nth_bit(lock, heap_no); - } - } -} - -/*************************************************************//** -Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type) -of another record as gap type locks, but does not reset the lock bits of -the other record. Also waiting lock requests on rec are inherited as -GRANTED gap locks. */ -static -void -lock_rec_inherit_to_gap( -/*====================*/ - const buf_block_t* heir_block, /*!< in: block containing the - record which inherits */ - const buf_block_t* block, /*!< in: block containing the - record from which inherited; - does NOT reset the locks on - this record */ - ulint heir_heap_no, /*!< in: heap_no of the - inheriting record */ - ulint heap_no) /*!< in: heap_no of the - donating record */ -{ - lock_t* lock; - - ut_ad(lock_mutex_own()); - - /* If srv_locks_unsafe_for_binlog is TRUE or session is using - READ COMMITTED isolation level, we do not want locks set - by an UPDATE or a DELETE to be inherited as gap type locks. But we - DO want S-locks/X-locks(taken for replace) set by a consistency - constraint to be inherited also then */ - - for (lock = lock_rec_get_first(block, heap_no); - lock != NULL; - lock = lock_rec_get_next(heap_no, lock)) { - - if (!lock_rec_get_insert_intention(lock) - && !((srv_locks_unsafe_for_binlog - || lock->trx->isolation_level - <= TRX_ISO_READ_COMMITTED) - && lock_get_mode(lock) == - (lock->trx->duplicates ? LOCK_S : LOCK_X))) { - - lock_rec_add_to_queue( - LOCK_REC | LOCK_GAP | lock_get_mode(lock), - heir_block, heir_heap_no, lock->index, - lock->trx, FALSE); - } - } -} - -/*************************************************************//** -Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type) -of another record as gap type locks, but does not reset the lock bits of the -other record. Also waiting lock requests are inherited as GRANTED gap locks. */ -static -void -lock_rec_inherit_to_gap_if_gap_lock( -/*================================*/ - const buf_block_t* block, /*!< in: buffer block */ - ulint heir_heap_no, /*!< in: heap_no of - record which inherits */ - ulint heap_no) /*!< in: heap_no of record - from which inherited; - does NOT reset the locks - on this record */ -{ - lock_t* lock; - - lock_mutex_enter(); - - for (lock = lock_rec_get_first(block, heap_no); - lock != NULL; - lock = lock_rec_get_next(heap_no, lock)) { - - if (!lock_rec_get_insert_intention(lock) - && (heap_no == PAGE_HEAP_NO_SUPREMUM - || !lock_rec_get_rec_not_gap(lock))) { - - lock_rec_add_to_queue( - LOCK_REC | LOCK_GAP | lock_get_mode(lock), - block, heir_heap_no, lock->index, - lock->trx, FALSE); - } - } - - lock_mutex_exit(); -} - -/*************************************************************//** -Moves the locks of a record to another record and resets the lock bits of -the donating record. */ -static -void -lock_rec_move( -/*==========*/ - const buf_block_t* receiver, /*!< in: buffer block containing - the receiving record */ - const buf_block_t* donator, /*!< in: buffer block containing - the donating record */ - ulint receiver_heap_no,/*!< in: heap_no of the record - which gets the locks; there - must be no lock requests - on it! */ - ulint donator_heap_no)/*!< in: heap_no of the record - which gives the locks */ -{ - lock_t* lock; - - ut_ad(lock_mutex_own()); - - ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL); - - for (lock = lock_rec_get_first(donator, donator_heap_no); - lock != NULL; - lock = lock_rec_get_next(donator_heap_no, lock)) { - - const ulint type_mode = lock->type_mode; - - lock_rec_reset_nth_bit(lock, donator_heap_no); - - if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { - lock_reset_lock_and_trx_wait(lock); - } - - /* Note that we FIRST reset the bit, and then set the lock: - the function works also if donator == receiver */ - - lock_rec_add_to_queue( - type_mode, receiver, receiver_heap_no, - lock->index, lock->trx, FALSE); - } - - ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL); -} - -/*************************************************************//** -Updates the lock table when we have reorganized a page. NOTE: we copy -also the locks set on the infimum of the page; the infimum may carry -locks if an update of a record is occurring on the page, and its locks -were temporarily stored on the infimum. */ -UNIV_INTERN -void -lock_move_reorganize_page( -/*======================*/ - const buf_block_t* block, /*!< in: old index page, now - reorganized */ - const buf_block_t* oblock) /*!< in: copy of the old, not - reorganized page */ -{ - lock_t* lock; - UT_LIST_BASE_NODE_T(lock_t) old_locks; - mem_heap_t* heap = NULL; - ulint comp; - - lock_mutex_enter(); - - lock = lock_rec_get_first_on_page(block); - - if (lock == NULL) { - lock_mutex_exit(); - - return; - } - - heap = mem_heap_create(256); - - /* Copy first all the locks on the page to heap and reset the - bitmaps in the original locks; chain the copies of the locks - using the trx_locks field in them. */ - - UT_LIST_INIT(old_locks); - - do { - /* Make a copy of the lock */ - lock_t* old_lock = lock_rec_copy(lock, heap); - - UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock); - - /* Reset bitmap of lock */ - lock_rec_bitmap_reset(lock); - - if (lock_get_wait(lock)) { - - lock_reset_lock_and_trx_wait(lock); - } - - lock = lock_rec_get_next_on_page(lock); - } while (lock != NULL); - - comp = page_is_comp(block->frame); - ut_ad(comp == page_is_comp(oblock->frame)); - - for (lock = UT_LIST_GET_FIRST(old_locks); lock; - lock = UT_LIST_GET_NEXT(trx_locks, lock)) { - /* NOTE: we copy also the locks set on the infimum and - supremum of the page; the infimum may carry locks if an - update of a record is occurring on the page, and its locks - were temporarily stored on the infimum */ - page_cur_t cur1; - page_cur_t cur2; - - page_cur_set_before_first(block, &cur1); - page_cur_set_before_first(oblock, &cur2); - - /* Set locks according to old locks */ - for (;;) { - ulint old_heap_no; - ulint new_heap_no; - - ut_ad(comp || !memcmp(page_cur_get_rec(&cur1), - page_cur_get_rec(&cur2), - rec_get_data_size_old( - page_cur_get_rec( - &cur2)))); - if (UNIV_LIKELY(comp)) { - old_heap_no = rec_get_heap_no_new( - page_cur_get_rec(&cur2)); - new_heap_no = rec_get_heap_no_new( - page_cur_get_rec(&cur1)); - } else { - old_heap_no = rec_get_heap_no_old( - page_cur_get_rec(&cur2)); - new_heap_no = rec_get_heap_no_old( - page_cur_get_rec(&cur1)); - } - - if (lock_rec_get_nth_bit(lock, old_heap_no)) { - - /* Clear the bit in old_lock. */ - ut_d(lock_rec_reset_nth_bit(lock, - old_heap_no)); - - /* NOTE that the old lock bitmap could be too - small for the new heap number! */ - - lock_rec_add_to_queue( - lock->type_mode, block, new_heap_no, - lock->index, lock->trx, FALSE); - - /* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM - && lock_get_wait(lock)) { - fprintf(stderr, - "---\n--\n!!!Lock reorg: supr type %lu\n", - lock->type_mode); - } */ - } - - if (UNIV_UNLIKELY - (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) { - - ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM); - break; - } - - page_cur_move_to_next(&cur1); - page_cur_move_to_next(&cur2); - } - -#ifdef UNIV_DEBUG - { - ulint i = lock_rec_find_set_bit(lock); - - /* Check that all locks were moved. */ - if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) { - fprintf(stderr, - "lock_move_reorganize_page():" - " %lu not moved in %p\n", - (ulong) i, (void*) lock); - ut_error; - } - } -#endif /* UNIV_DEBUG */ - } - - lock_mutex_exit(); - - mem_heap_free(heap); - -#ifdef UNIV_DEBUG_LOCK_VALIDATE - ut_ad(lock_rec_validate_page(block)); -#endif -} - -/*************************************************************//** -Moves the explicit locks on user records to another page if a record -list end is moved to another page. */ -UNIV_INTERN -void -lock_move_rec_list_end( -/*===================*/ - const buf_block_t* new_block, /*!< in: index page to move to */ - const buf_block_t* block, /*!< in: index page */ - const rec_t* rec) /*!< in: record on page: this - is the first record moved */ -{ - lock_t* lock; - const ulint comp = page_rec_is_comp(rec); - - lock_mutex_enter(); - - /* Note: when we move locks from record to record, waiting locks - and possible granted gap type locks behind them are enqueued in - the original order, because new elements are inserted to a hash - table to the end of the hash chain, and lock_rec_add_to_queue - does not reuse locks if there are waiters in the queue. */ - - for (lock = lock_rec_get_first_on_page(block); lock; - lock = lock_rec_get_next_on_page(lock)) { - page_cur_t cur1; - page_cur_t cur2; - const ulint type_mode = lock->type_mode; - - page_cur_position(rec, block, &cur1); - - if (page_cur_is_before_first(&cur1)) { - page_cur_move_to_next(&cur1); - } - - page_cur_set_before_first(new_block, &cur2); - page_cur_move_to_next(&cur2); - - /* Copy lock requests on user records to new page and - reset the lock bits on the old */ - - while (!page_cur_is_after_last(&cur1)) { - ulint heap_no; - - if (comp) { - heap_no = rec_get_heap_no_new( - page_cur_get_rec(&cur1)); - } else { - heap_no = rec_get_heap_no_old( - page_cur_get_rec(&cur1)); - ut_ad(!memcmp(page_cur_get_rec(&cur1), - page_cur_get_rec(&cur2), - rec_get_data_size_old( - page_cur_get_rec(&cur2)))); - } - - if (lock_rec_get_nth_bit(lock, heap_no)) { - lock_rec_reset_nth_bit(lock, heap_no); - - if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { - lock_reset_lock_and_trx_wait(lock); - } - - if (comp) { - heap_no = rec_get_heap_no_new( - page_cur_get_rec(&cur2)); - } else { - heap_no = rec_get_heap_no_old( - page_cur_get_rec(&cur2)); - } - - lock_rec_add_to_queue( - type_mode, new_block, heap_no, - lock->index, lock->trx, FALSE); - } - - page_cur_move_to_next(&cur1); - page_cur_move_to_next(&cur2); - } - } - - lock_mutex_exit(); - -#ifdef UNIV_DEBUG_LOCK_VALIDATE - ut_ad(lock_rec_validate_page(block)); - ut_ad(lock_rec_validate_page(new_block)); -#endif -} - -/*************************************************************//** -Moves the explicit locks on user records to another page if a record -list start is moved to another page. */ -UNIV_INTERN -void -lock_move_rec_list_start( -/*=====================*/ - const buf_block_t* new_block, /*!< in: index page to - move to */ - const buf_block_t* block, /*!< in: index page */ - const rec_t* rec, /*!< in: record on page: - this is the first - record NOT copied */ - const rec_t* old_end) /*!< in: old - previous-to-last - record on new_page - before the records - were copied */ -{ - lock_t* lock; - const ulint comp = page_rec_is_comp(rec); - - ut_ad(block->frame == page_align(rec)); - ut_ad(new_block->frame == page_align(old_end)); - - lock_mutex_enter(); - - for (lock = lock_rec_get_first_on_page(block); lock; - lock = lock_rec_get_next_on_page(lock)) { - page_cur_t cur1; - page_cur_t cur2; - const ulint type_mode = lock->type_mode; - - page_cur_set_before_first(block, &cur1); - page_cur_move_to_next(&cur1); - - page_cur_position(old_end, new_block, &cur2); - page_cur_move_to_next(&cur2); - - /* Copy lock requests on user records to new page and - reset the lock bits on the old */ - - while (page_cur_get_rec(&cur1) != rec) { - ulint heap_no; - - if (comp) { - heap_no = rec_get_heap_no_new( - page_cur_get_rec(&cur1)); - } else { - heap_no = rec_get_heap_no_old( - page_cur_get_rec(&cur1)); - ut_ad(!memcmp(page_cur_get_rec(&cur1), - page_cur_get_rec(&cur2), - rec_get_data_size_old( - page_cur_get_rec( - &cur2)))); - } - - if (lock_rec_get_nth_bit(lock, heap_no)) { - lock_rec_reset_nth_bit(lock, heap_no); - - if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { - lock_reset_lock_and_trx_wait(lock); - } - - if (comp) { - heap_no = rec_get_heap_no_new( - page_cur_get_rec(&cur2)); - } else { - heap_no = rec_get_heap_no_old( - page_cur_get_rec(&cur2)); - } - - lock_rec_add_to_queue( - type_mode, new_block, heap_no, - lock->index, lock->trx, FALSE); - } - - page_cur_move_to_next(&cur1); - page_cur_move_to_next(&cur2); - } - -#ifdef UNIV_DEBUG - if (page_rec_is_supremum(rec)) { - ulint i; - - for (i = PAGE_HEAP_NO_USER_LOW; - i < lock_rec_get_n_bits(lock); i++) { - if (UNIV_UNLIKELY - (lock_rec_get_nth_bit(lock, i))) { - - fprintf(stderr, - "lock_move_rec_list_start():" - " %lu not moved in %p\n", - (ulong) i, (void*) lock); - ut_error; - } - } - } -#endif /* UNIV_DEBUG */ - } - - lock_mutex_exit(); - -#ifdef UNIV_DEBUG_LOCK_VALIDATE - ut_ad(lock_rec_validate_page(block)); -#endif -} - -/*************************************************************//** -Updates the lock table when a page is split to the right. */ -UNIV_INTERN -void -lock_update_split_right( -/*====================*/ - const buf_block_t* right_block, /*!< in: right page */ - const buf_block_t* left_block) /*!< in: left page */ -{ - ulint heap_no = lock_get_min_heap_no(right_block); - - lock_mutex_enter(); - - /* Move the locks on the supremum of the left page to the supremum - of the right page */ - - lock_rec_move(right_block, left_block, - PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); - - /* Inherit the locks to the supremum of left page from the successor - of the infimum on right page */ - - lock_rec_inherit_to_gap(left_block, right_block, - PAGE_HEAP_NO_SUPREMUM, heap_no); - - lock_mutex_exit(); -} - -/*************************************************************//** -Updates the lock table when a page is merged to the right. */ -UNIV_INTERN -void -lock_update_merge_right( -/*====================*/ - const buf_block_t* right_block, /*!< in: right page to - which merged */ - const rec_t* orig_succ, /*!< in: original - successor of infimum - on the right page - before merge */ - const buf_block_t* left_block) /*!< in: merged index - page which will be - discarded */ -{ - lock_mutex_enter(); - - /* Inherit the locks from the supremum of the left page to the - original successor of infimum on the right page, to which the left - page was merged */ - - lock_rec_inherit_to_gap(right_block, left_block, - page_rec_get_heap_no(orig_succ), - PAGE_HEAP_NO_SUPREMUM); - - /* Reset the locks on the supremum of the left page, releasing - waiting transactions */ - - lock_rec_reset_and_release_wait(left_block, - PAGE_HEAP_NO_SUPREMUM); - - lock_rec_free_all_from_discard_page(left_block); - - lock_mutex_exit(); -} - -/*************************************************************//** -Updates the lock table when the root page is copied to another in -btr_root_raise_and_insert. Note that we leave lock structs on the -root page, even though they do not make sense on other than leaf -pages: the reason is that in a pessimistic update the infimum record -of the root page will act as a dummy carrier of the locks of the record -to be updated. */ -UNIV_INTERN -void -lock_update_root_raise( -/*===================*/ - const buf_block_t* block, /*!< in: index page to which copied */ - const buf_block_t* root) /*!< in: root page */ -{ - lock_mutex_enter(); - - /* Move the locks on the supremum of the root to the supremum - of block */ - - lock_rec_move(block, root, - PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); - lock_mutex_exit(); -} - -/*************************************************************//** -Updates the lock table when a page is copied to another and the original page -is removed from the chain of leaf pages, except if page is the root! */ -UNIV_INTERN -void -lock_update_copy_and_discard( -/*=========================*/ - const buf_block_t* new_block, /*!< in: index page to - which copied */ - const buf_block_t* block) /*!< in: index page; - NOT the root! */ -{ - lock_mutex_enter(); - - /* Move the locks on the supremum of the old page to the supremum - of new_page */ - - lock_rec_move(new_block, block, - PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); - lock_rec_free_all_from_discard_page(block); - - lock_mutex_exit(); -} - -/*************************************************************//** -Updates the lock table when a page is split to the left. */ -UNIV_INTERN -void -lock_update_split_left( -/*===================*/ - const buf_block_t* right_block, /*!< in: right page */ - const buf_block_t* left_block) /*!< in: left page */ -{ - ulint heap_no = lock_get_min_heap_no(right_block); - - lock_mutex_enter(); - - /* Inherit the locks to the supremum of the left page from the - successor of the infimum on the right page */ - - lock_rec_inherit_to_gap(left_block, right_block, - PAGE_HEAP_NO_SUPREMUM, heap_no); - - lock_mutex_exit(); -} - -/*************************************************************//** -Updates the lock table when a page is merged to the left. */ -UNIV_INTERN -void -lock_update_merge_left( -/*===================*/ - const buf_block_t* left_block, /*!< in: left page to - which merged */ - const rec_t* orig_pred, /*!< in: original predecessor - of supremum on the left page - before merge */ - const buf_block_t* right_block) /*!< in: merged index page - which will be discarded */ -{ - const rec_t* left_next_rec; - - ut_ad(left_block->frame == page_align(orig_pred)); - - lock_mutex_enter(); - - left_next_rec = page_rec_get_next_const(orig_pred); - - if (!page_rec_is_supremum(left_next_rec)) { - - /* Inherit the locks on the supremum of the left page to the - first record which was moved from the right page */ - - lock_rec_inherit_to_gap(left_block, left_block, - page_rec_get_heap_no(left_next_rec), - PAGE_HEAP_NO_SUPREMUM); - - /* Reset the locks on the supremum of the left page, - releasing waiting transactions */ - - lock_rec_reset_and_release_wait(left_block, - PAGE_HEAP_NO_SUPREMUM); - } - - /* Move the locks from the supremum of right page to the supremum - of the left page */ - - lock_rec_move(left_block, right_block, - PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM); - - lock_rec_free_all_from_discard_page(right_block); - - lock_mutex_exit(); -} - -/*************************************************************//** -Updates the lock table when a page is split and merged to -two pages. */ -UNIV_INTERN -void -lock_update_split_and_merge( - const buf_block_t* left_block, /*!< in: left page to which merged */ - const rec_t* orig_pred, /*!< in: original predecessor of - supremum on the left page before merge*/ - const buf_block_t* right_block) /*!< in: right page from which merged */ -{ - const rec_t* left_next_rec; - - ut_a(left_block && right_block); - ut_a(orig_pred); - - lock_mutex_enter(); - - left_next_rec = page_rec_get_next_const(orig_pred); - - /* Inherit the locks on the supremum of the left page to the - first record which was moved from the right page */ - lock_rec_inherit_to_gap( - left_block, left_block, - page_rec_get_heap_no(left_next_rec), - PAGE_HEAP_NO_SUPREMUM); - - /* Reset the locks on the supremum of the left page, - releasing waiting transactions */ - lock_rec_reset_and_release_wait(left_block, - PAGE_HEAP_NO_SUPREMUM); - - /* Inherit the locks to the supremum of the left page from the - successor of the infimum on the right page */ - lock_rec_inherit_to_gap(left_block, right_block, - PAGE_HEAP_NO_SUPREMUM, - lock_get_min_heap_no(right_block)); - - lock_mutex_exit(); -} - -/*************************************************************//** -Resets the original locks on heir and replaces them with gap type locks -inherited from rec. */ -UNIV_INTERN -void -lock_rec_reset_and_inherit_gap_locks( -/*=================================*/ - const buf_block_t* heir_block, /*!< in: block containing the - record which inherits */ - const buf_block_t* block, /*!< in: block containing the - record from which inherited; - does NOT reset the locks on - this record */ - ulint heir_heap_no, /*!< in: heap_no of the - inheriting record */ - ulint heap_no) /*!< in: heap_no of the - donating record */ -{ - lock_mutex_enter(); - - lock_rec_reset_and_release_wait(heir_block, heir_heap_no); - - lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no); - - lock_mutex_exit(); -} - -/*************************************************************//** -Updates the lock table when a page is discarded. */ -UNIV_INTERN -void -lock_update_discard( -/*================*/ - const buf_block_t* heir_block, /*!< in: index page - which will inherit the locks */ - ulint heir_heap_no, /*!< in: heap_no of the record - which will inherit the locks */ - const buf_block_t* block) /*!< in: index page - which will be discarded */ -{ - const page_t* page = block->frame; - const rec_t* rec; - ulint heap_no; - - lock_mutex_enter(); - - if (!lock_rec_get_first_on_page(block)) { - /* No locks exist on page, nothing to do */ - - lock_mutex_exit(); - - return; - } - - /* Inherit all the locks on the page to the record and reset all - the locks on the page */ - - if (page_is_comp(page)) { - rec = page + PAGE_NEW_INFIMUM; - - do { - heap_no = rec_get_heap_no_new(rec); - - lock_rec_inherit_to_gap(heir_block, block, - heir_heap_no, heap_no); - - lock_rec_reset_and_release_wait(block, heap_no); - - rec = page + rec_get_next_offs(rec, TRUE); - } while (heap_no != PAGE_HEAP_NO_SUPREMUM); - } else { - rec = page + PAGE_OLD_INFIMUM; - - do { - heap_no = rec_get_heap_no_old(rec); - - lock_rec_inherit_to_gap(heir_block, block, - heir_heap_no, heap_no); - - lock_rec_reset_and_release_wait(block, heap_no); - - rec = page + rec_get_next_offs(rec, FALSE); - } while (heap_no != PAGE_HEAP_NO_SUPREMUM); - } - - lock_rec_free_all_from_discard_page(block); - - lock_mutex_exit(); -} - -/*************************************************************//** -Updates the lock table when a new user record is inserted. */ -UNIV_INTERN -void -lock_update_insert( -/*===============*/ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec) /*!< in: the inserted record */ -{ - ulint receiver_heap_no; - ulint donator_heap_no; - - ut_ad(block->frame == page_align(rec)); - - /* Inherit the gap-locking locks for rec, in gap mode, from the next - record */ - - if (page_rec_is_comp(rec)) { - receiver_heap_no = rec_get_heap_no_new(rec); - donator_heap_no = rec_get_heap_no_new( - page_rec_get_next_low(rec, TRUE)); - } else { - receiver_heap_no = rec_get_heap_no_old(rec); - donator_heap_no = rec_get_heap_no_old( - page_rec_get_next_low(rec, FALSE)); - } - - lock_rec_inherit_to_gap_if_gap_lock( - block, receiver_heap_no, donator_heap_no); -} - -/*************************************************************//** -Updates the lock table when a record is removed. */ -UNIV_INTERN -void -lock_update_delete( -/*===============*/ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec) /*!< in: the record to be removed */ -{ - const page_t* page = block->frame; - ulint heap_no; - ulint next_heap_no; - - ut_ad(page == page_align(rec)); - - if (page_is_comp(page)) { - heap_no = rec_get_heap_no_new(rec); - next_heap_no = rec_get_heap_no_new(page - + rec_get_next_offs(rec, - TRUE)); - } else { - heap_no = rec_get_heap_no_old(rec); - next_heap_no = rec_get_heap_no_old(page - + rec_get_next_offs(rec, - FALSE)); - } - - lock_mutex_enter(); - - /* Let the next record inherit the locks from rec, in gap mode */ - - lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no); - - /* Reset the lock bits on rec and release waiting transactions */ - - lock_rec_reset_and_release_wait(block, heap_no); - - lock_mutex_exit(); -} - -/*********************************************************************//** -Stores on the page infimum record the explicit locks of another record. -This function is used to store the lock state of a record when it is -updated and the size of the record changes in the update. The record -is moved in such an update, perhaps to another page. The infimum record -acts as a dummy carrier record, taking care of lock releases while the -actual record is being moved. */ -UNIV_INTERN -void -lock_rec_store_on_page_infimum( -/*===========================*/ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec) /*!< in: record whose lock state - is stored on the infimum - record of the same page; lock - bits are reset on the - record */ -{ - ulint heap_no = page_rec_get_heap_no(rec); - - ut_ad(block->frame == page_align(rec)); - - lock_mutex_enter(); - - lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no); - - lock_mutex_exit(); -} - -/*********************************************************************//** -Restores the state of explicit lock requests on a single record, where the -state was stored on the infimum of the page. */ -UNIV_INTERN -void -lock_rec_restore_from_page_infimum( -/*===============================*/ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec, /*!< in: record whose lock state - is restored */ - const buf_block_t* donator)/*!< in: page (rec is not - necessarily on this page) - whose infimum stored the lock - state; lock bits are reset on - the infimum */ -{ - ulint heap_no = page_rec_get_heap_no(rec); - - lock_mutex_enter(); - - lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM); - - lock_mutex_exit(); -} - -/*=========== DEADLOCK CHECKING ======================================*/ - -/*********************************************************************//** -rewind(3) the file used for storing the latest detected deadlock and -print a heading message to stderr if printing of all deadlocks to stderr -is enabled. */ -UNIV_INLINE -void -lock_deadlock_start_print() -/*=======================*/ -{ - ut_ad(lock_mutex_own()); - ut_ad(!srv_read_only_mode); - - rewind(lock_latest_err_file); - ut_print_timestamp(lock_latest_err_file); - - if (srv_print_all_deadlocks) { - ut_print_timestamp(stderr); - fprintf(stderr, "InnoDB: transactions deadlock detected, " - "dumping detailed information.\n"); - ut_print_timestamp(stderr); - } -} - -/*********************************************************************//** -Print a message to the deadlock file and possibly to stderr. */ -UNIV_INLINE -void -lock_deadlock_fputs( -/*================*/ - const char* msg) /*!< in: message to print */ -{ - if (!srv_read_only_mode) { - fputs(msg, lock_latest_err_file); - - if (srv_print_all_deadlocks) { - fputs(msg, stderr); - } - } -} - -/*********************************************************************//** -Print transaction data to the deadlock file and possibly to stderr. */ -UNIV_INLINE -void -lock_deadlock_trx_print( -/*====================*/ - const trx_t* trx, /*!< in: transaction */ - ulint max_query_len) /*!< in: max query length to print, - or 0 to use the default max length */ -{ - ut_ad(lock_mutex_own()); - ut_ad(!srv_read_only_mode); - - ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock); - ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); - ulint heap_size = mem_heap_get_size(trx->lock.lock_heap); - - mutex_enter(&trx_sys->mutex); - - trx_print_low(lock_latest_err_file, trx, max_query_len, - n_rec_locks, n_trx_locks, heap_size); - - if (srv_print_all_deadlocks) { - trx_print_low(stderr, trx, max_query_len, - n_rec_locks, n_trx_locks, heap_size); - } - - mutex_exit(&trx_sys->mutex); -} - -/*********************************************************************//** -Print lock data to the deadlock file and possibly to stderr. */ -UNIV_INLINE -void -lock_deadlock_lock_print( -/*=====================*/ - const lock_t* lock) /*!< in: record or table type lock */ -{ - ut_ad(lock_mutex_own()); - ut_ad(!srv_read_only_mode); - - if (lock_get_type_low(lock) == LOCK_REC) { - lock_rec_print(lock_latest_err_file, lock); - - if (srv_print_all_deadlocks) { - lock_rec_print(stderr, lock); - } - } else { - lock_table_print(lock_latest_err_file, lock); - - if (srv_print_all_deadlocks) { - lock_table_print(stderr, lock); - } - } -} - -/** Used in deadlock tracking. Protected by lock_sys->mutex. */ -static ib_uint64_t lock_mark_counter = 0; - -/** Check if the search is too deep. */ -#define lock_deadlock_too_deep(c) \ - (c->depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK \ - || c->cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK) - -/********************************************************************//** -Get the next lock in the queue that is owned by a transaction whose -sub-tree has not already been searched. -@return next lock or NULL if at end of queue */ -static -const lock_t* -lock_get_next_lock( -/*===============*/ - const lock_deadlock_ctx_t* - ctx, /*!< in: deadlock context */ - const lock_t* lock, /*!< in: lock in the queue */ - ulint heap_no)/*!< in: heap no if rec lock else - ULINT_UNDEFINED */ -{ - ut_ad(lock_mutex_own()); - - do { - if (lock_get_type_low(lock) == LOCK_REC) { - ut_ad(heap_no != ULINT_UNDEFINED); - lock = lock_rec_get_next_const(heap_no, lock); - } else { - ut_ad(heap_no == ULINT_UNDEFINED); - ut_ad(lock_get_type_low(lock) == LOCK_TABLE); - - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock); - } - } while (lock != NULL - && lock->trx->lock.deadlock_mark > ctx->mark_start); - - ut_ad(lock == NULL - || lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock)); - - return(lock); -} - -/********************************************************************//** -Get the first lock to search. The search starts from the current -wait_lock. What we are really interested in is an edge from the -current wait_lock's owning transaction to another transaction that has -a lock ahead in the queue. We skip locks where the owning transaction's -sub-tree has already been searched. -@return first lock or NULL */ -static -const lock_t* -lock_get_first_lock( -/*================*/ - const lock_deadlock_ctx_t* - ctx, /*!< in: deadlock context */ - ulint* heap_no)/*!< out: heap no if rec lock, - else ULINT_UNDEFINED */ -{ - const lock_t* lock; - - ut_ad(lock_mutex_own()); - - lock = ctx->wait_lock; - - if (lock_get_type_low(lock) == LOCK_REC) { - - *heap_no = lock_rec_find_set_bit(lock); - ut_ad(*heap_no != ULINT_UNDEFINED); - - lock = lock_rec_get_first_on_page_addr( - lock->un_member.rec_lock.space, - lock->un_member.rec_lock.page_no); - - /* Position on the first lock on the physical record. */ - if (!lock_rec_get_nth_bit(lock, *heap_no)) { - lock = lock_rec_get_next_const(*heap_no, lock); - } - - } else { - *heap_no = ULINT_UNDEFINED; - ut_ad(lock_get_type_low(lock) == LOCK_TABLE); - dict_table_t* table = lock->un_member.tab_lock.table; - lock = UT_LIST_GET_FIRST(table->locks); - } - - ut_a(lock != NULL); - ut_a(lock != ctx->wait_lock || - innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS); - ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock)); - - return(lock); -} - -/********************************************************************//** -Notify that a deadlock has been detected and print the conflicting -transaction info. */ -static -void -lock_deadlock_notify( -/*=================*/ - const lock_deadlock_ctx_t* ctx, /*!< in: deadlock context */ - const lock_t* lock) /*!< in: lock causing - deadlock */ -{ - ut_ad(lock_mutex_own()); - ut_ad(!srv_read_only_mode); - - lock_deadlock_start_print(); - - lock_deadlock_fputs("\n*** (1) TRANSACTION:\n"); - - lock_deadlock_trx_print(ctx->wait_lock->trx, 3000); - - lock_deadlock_fputs("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n"); - - lock_deadlock_lock_print(ctx->wait_lock); - - lock_deadlock_fputs("*** (2) TRANSACTION:\n"); - - lock_deadlock_trx_print(lock->trx, 3000); - - lock_deadlock_fputs("*** (2) HOLDS THE LOCK(S):\n"); - - lock_deadlock_lock_print(lock); - - /* It is possible that the joining transaction was granted its - lock when we rolled back some other waiting transaction. */ - - if (ctx->start->lock.wait_lock != 0) { - lock_deadlock_fputs( - "*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n"); - - lock_deadlock_lock_print(ctx->start->lock.wait_lock); - } - -#ifdef UNIV_DEBUG - if (lock_print_waits) { - fputs("Deadlock detected\n", stderr); - } -#endif /* UNIV_DEBUG */ -} - -/********************************************************************//** -Select the victim transaction that should be rolledback. -@return victim transaction */ -static -const trx_t* -lock_deadlock_select_victim( -/*========================*/ - const lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */ -{ - ut_ad(lock_mutex_own()); - ut_ad(ctx->start->lock.wait_lock != 0); - ut_ad(ctx->wait_lock->trx != ctx->start); - - if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) { - /* The joining transaction is 'smaller', - choose it as the victim and roll it back. */ - -#ifdef WITH_WSREP - if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) { - return(ctx->wait_lock->trx); - } - else -#endif /* WITH_WSREP */ - return(ctx->start); - } - -#ifdef WITH_WSREP - if (wsrep_thd_is_BF(ctx->wait_lock->trx->mysql_thd, TRUE)) { - return(ctx->start); - } - else -#endif /* WITH_WSREP */ - return(ctx->wait_lock->trx); -} - -/********************************************************************//** -Pop the deadlock search state from the stack. -@return stack slot instance that was on top of the stack. */ -static -const lock_stack_t* -lock_deadlock_pop( -/*==============*/ - lock_deadlock_ctx_t* ctx) /*!< in/out: context */ -{ - ut_ad(lock_mutex_own()); - - ut_ad(ctx->depth > 0); - - return(&lock_stack[--ctx->depth]); -} - -/********************************************************************//** -Push the deadlock search state onto the stack. -@return slot that was used in the stack */ -static -lock_stack_t* -lock_deadlock_push( -/*===============*/ - lock_deadlock_ctx_t* ctx, /*!< in/out: context */ - const lock_t* lock, /*!< in: current lock */ - ulint heap_no) /*!< in: heap number */ -{ - ut_ad(lock_mutex_own()); - - /* Save current search state. */ - - if (LOCK_STACK_SIZE > ctx->depth) { - lock_stack_t* stack; - - stack = &lock_stack[ctx->depth++]; - - stack->lock = lock; - stack->heap_no = heap_no; - stack->wait_lock = ctx->wait_lock; - - return(stack); - } - - return(NULL); -} - -/********************************************************************//** -Looks iteratively for a deadlock. Note: the joining transaction may -have been granted its lock by the deadlock checks. -@return 0 if no deadlock else the victim transaction id.*/ -static -trx_id_t -lock_deadlock_search( -/*=================*/ - lock_deadlock_ctx_t* ctx, /*!< in/out: deadlock context */ - struct thd_wait_reports*waitee_ptr) /*!< in/out: list of waitees */ -{ - const lock_t* lock; - ulint heap_no; - - ut_ad(lock_mutex_own()); - ut_ad(!trx_mutex_own(ctx->start)); - - ut_ad(ctx->start != NULL); - ut_ad(ctx->wait_lock != NULL); - assert_trx_in_list(ctx->wait_lock->trx); - ut_ad(ctx->mark_start <= lock_mark_counter); - - /* Look at the locks ahead of wait_lock in the lock queue. */ - lock = lock_get_first_lock(ctx, &heap_no); - - for (;;) { - - /* We should never visit the same sub-tree more than once. */ - ut_ad(lock == NULL - || lock->trx->lock.deadlock_mark <= ctx->mark_start); - - while (ctx->depth > 0 && lock == NULL) { - const lock_stack_t* stack; - - /* Restore previous search state. */ - - stack = lock_deadlock_pop(ctx); - - lock = stack->lock; - heap_no = stack->heap_no; - ctx->wait_lock = stack->wait_lock; - - lock = lock_get_next_lock(ctx, lock, heap_no); - } - - if (lock == NULL) { - break; - } else if (lock == ctx->wait_lock) { - - /* We can mark this subtree as searched */ - ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start); - - lock->trx->lock.deadlock_mark = ++lock_mark_counter; - - /* We are not prepared for an overflow. This 64-bit - counter should never wrap around. At 10^9 increments - per second, it would take 10^3 years of uptime. */ - - ut_ad(lock_mark_counter > 0); - - lock = NULL; - - } else if (!lock_has_to_wait(ctx->wait_lock, lock)) { - - /* No conflict, next lock */ - lock = lock_get_next_lock(ctx, lock, heap_no); - - } else if (lock->trx == ctx->start) { - - /* Found a cycle. */ - - lock_deadlock_notify(ctx, lock); - - return(lock_deadlock_select_victim(ctx)->id); - - } else if (lock_deadlock_too_deep(ctx)) { - - /* Search too deep to continue. */ - - ctx->too_deep = TRUE; - -#ifdef WITH_WSREP - if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) { - return(ctx->wait_lock->trx->id); - } - else -#endif /* WITH_WSREP */ - /* Select the joining transaction as the victim. */ - return(ctx->start->id); - - } else { - /* We do not need to report autoinc locks to the upper - layer. These locks are released before commit, so they - can not cause deadlocks with binlog-fixed commit - order. */ - if (waitee_ptr && - (lock_get_type_low(lock) != LOCK_TABLE || - lock_get_mode(lock) != LOCK_AUTO_INC)) { - if (waitee_ptr->used == - sizeof(waitee_ptr->waitees) / - sizeof(waitee_ptr->waitees[0])) { - waitee_ptr->next = - (struct thd_wait_reports *) - mem_alloc(sizeof(*waitee_ptr)); - waitee_ptr = waitee_ptr->next; - if (!waitee_ptr) { - ctx->too_deep = TRUE; - return(ctx->start->id); - } - waitee_ptr->next = NULL; - waitee_ptr->used = 0; - } - waitee_ptr->waitees[waitee_ptr->used++] = lock->trx; - } - - if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - - /* Another trx ahead has requested a lock in an - incompatible mode, and is itself waiting for a lock. */ - - ++ctx->cost; - - /* Save current search state. */ - if (!lock_deadlock_push(ctx, lock, heap_no)) { - - /* Unable to save current search state, stack - size not big enough. */ - - ctx->too_deep = TRUE; - -#ifdef WITH_WSREP - if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) - return(lock->trx->id); - else -#endif /* WITH_WSREP */ - - return(ctx->start->id); - } - - ctx->wait_lock = lock->trx->lock.wait_lock; - lock = lock_get_first_lock(ctx, &heap_no); - - if (lock->trx->lock.deadlock_mark > ctx->mark_start) { - lock = lock_get_next_lock(ctx, lock, heap_no); - } - - } else { - lock = lock_get_next_lock(ctx, lock, heap_no); - } - } - } - - ut_a(lock == NULL && ctx->depth == 0); - - /* No deadlock found. */ - return(0); -} - -/********************************************************************//** -Print info about transaction that was rolled back. */ -static -void -lock_deadlock_joining_trx_print( -/*============================*/ - const trx_t* trx, /*!< in: transaction rolled back */ - const lock_t* lock) /*!< in: lock trx wants */ -{ - ut_ad(lock_mutex_own()); - ut_ad(!srv_read_only_mode); - - /* If the lock search exceeds the max step - or the max depth, the current trx will be - the victim. Print its information. */ - lock_deadlock_start_print(); - - lock_deadlock_fputs( - "TOO DEEP OR LONG SEARCH IN THE LOCK TABLE" - " WAITS-FOR GRAPH, WE WILL ROLL BACK" - " FOLLOWING TRANSACTION \n\n" - "*** TRANSACTION:\n"); - - lock_deadlock_trx_print(trx, 3000); - - lock_deadlock_fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n"); - - lock_deadlock_lock_print(lock); -} - -/********************************************************************//** -Rollback transaction selected as the victim. */ -static -void -lock_deadlock_trx_rollback( -/*=======================*/ - lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */ -{ - trx_t* trx; - - ut_ad(lock_mutex_own()); - - trx = ctx->wait_lock->trx; - - lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (1)\n"); - - trx_mutex_enter(trx); - - trx->lock.was_chosen_as_deadlock_victim = TRUE; - - lock_cancel_waiting_and_release(trx->lock.wait_lock); - - trx_mutex_exit(trx); -} - -static -void -lock_report_waiters_to_mysql( -/*=======================*/ - struct thd_wait_reports* waitee_buf_ptr, /*!< in: set of trxs */ - THD* mysql_thd, /*!< in: THD */ - trx_id_t victim_trx_id) /*!< in: Trx selected - as deadlock victim, if - any */ -{ - struct thd_wait_reports* p; - struct thd_wait_reports* q; - ulint i; - - p = waitee_buf_ptr; - while (p) { - i = 0; - while (i < p->used) { - trx_t *w_trx = p->waitees[i]; - /* There is no need to report waits to a trx already - selected as a victim. */ - if (w_trx->id != victim_trx_id) { - /* If thd_report_wait_for() decides to kill the - transaction, then we will get a call back into - innobase_kill_query. We mark this by setting - current_lock_mutex_owner, so we can avoid trying - to recursively take lock_sys->mutex. */ - w_trx->abort_type = TRX_REPLICATION_ABORT; - thd_report_wait_for(mysql_thd, w_trx->mysql_thd); - w_trx->abort_type = TRX_SERVER_ABORT; - } - ++i; - } - q = p->next; - if (p != waitee_buf_ptr) { - mem_free(p); - } - p = q; - } -} - - -/********************************************************************//** -Checks if a joining lock request results in a deadlock. If a deadlock is -found this function will resolve the dadlock by choosing a victim transaction -and rolling it back. It will attempt to resolve all deadlocks. The returned -transaction id will be the joining transaction id or 0 if some other -transaction was chosen as a victim and rolled back or no deadlock found. - -@return id of transaction chosen as victim or 0 */ -static -trx_id_t -lock_deadlock_check_and_resolve( -/*============================*/ - const lock_t* lock, /*!< in: lock the transaction is requesting */ - const trx_t* trx) /*!< in: transaction */ -{ - trx_id_t victim_trx_id; - struct thd_wait_reports waitee_buf; - struct thd_wait_reports*waitee_buf_ptr; - THD* start_mysql_thd; - - ut_ad(trx != NULL); - ut_ad(lock != NULL); - ut_ad(lock_mutex_own()); - assert_trx_in_list(trx); - - start_mysql_thd = trx->mysql_thd; - if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) { - waitee_buf_ptr = &waitee_buf; - } else { - waitee_buf_ptr = NULL; - } - - /* Try and resolve as many deadlocks as possible. */ - do { - lock_deadlock_ctx_t ctx; - - /* Reset the context. */ - ctx.cost = 0; - ctx.depth = 0; - ctx.start = trx; - ctx.too_deep = FALSE; - ctx.wait_lock = lock; - ctx.mark_start = lock_mark_counter; - - if (waitee_buf_ptr) { - waitee_buf_ptr->next = NULL; - waitee_buf_ptr->used = 0; - } - - victim_trx_id = lock_deadlock_search(&ctx, waitee_buf_ptr); - - /* Report waits to upper layer, as needed. */ - if (waitee_buf_ptr) { - lock_report_waiters_to_mysql(waitee_buf_ptr, - start_mysql_thd, - victim_trx_id); - } - - /* Search too deep, we rollback the joining transaction. */ - if (ctx.too_deep) { - - ut_a(trx == ctx.start); - ut_a(victim_trx_id == trx->id); - -#ifdef WITH_WSREP - if (!wsrep_thd_is_BF(ctx.start->mysql_thd, TRUE)) - { -#endif /* WITH_WSREP */ - if (!srv_read_only_mode) { - lock_deadlock_joining_trx_print(trx, lock); - } -#ifdef WITH_WSREP - } else { - /* BF processor */; - } -#endif /* WITH_WSREP */ - - } else if (victim_trx_id != 0 && victim_trx_id != trx->id) { - - ut_ad(victim_trx_id == ctx.wait_lock->trx->id); - lock_deadlock_trx_rollback(&ctx); - - lock_deadlock_found = TRUE; - - MONITOR_INC(MONITOR_DEADLOCK); - srv_stats.lock_deadlock_count.inc(); - } - } while (victim_trx_id != 0 && victim_trx_id != trx->id); - - /* If the joining transaction was selected as the victim. */ - if (victim_trx_id != 0) { - ut_a(victim_trx_id == trx->id); - - MONITOR_INC(MONITOR_DEADLOCK); - srv_stats.lock_deadlock_count.inc(); - - lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n"); - - lock_deadlock_found = TRUE; - } - - return(victim_trx_id); -} - -/*========================= TABLE LOCKS ==============================*/ - -/*********************************************************************//** -Creates a table lock object and adds it as the last in the lock queue -of the table. Does NOT check for deadlocks or lock compatibility. -@return own: new lock object */ -UNIV_INLINE -lock_t* -lock_table_create( -/*==============*/ -#ifdef WITH_WSREP - lock_t* c_lock, /*!< in: conflicting lock */ -#endif - dict_table_t* table, /*!< in/out: database table - in dictionary cache */ - ulint type_mode,/*!< in: lock mode possibly ORed with - LOCK_WAIT */ - trx_t* trx) /*!< in: trx */ -{ - lock_t* lock; - - ut_ad(table && trx); - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(trx)); - - /* Non-locking autocommit read-only transactions should not set - any locks. */ - assert_trx_in_list(trx); - - if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) { - ++table->n_waiting_or_granted_auto_inc_locks; - } - - /* For AUTOINC locking we reuse the lock instance only if - there is no wait involved else we allocate the waiting lock - from the transaction lock heap. */ - if (type_mode == LOCK_AUTO_INC) { - - lock = table->autoinc_lock; - - table->autoinc_trx = trx; - - ib_vector_push(trx->autoinc_locks, &lock); - } else { - lock = static_cast<lock_t*>( - mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock))); - } - - lock->type_mode = type_mode | LOCK_TABLE; - lock->trx = trx; - lock->requested_time = ut_time(); - lock->wait_time = 0; - - lock->un_member.tab_lock.table = table; - - ut_ad(table->n_ref_count > 0 || !table->can_be_evicted); - - UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock); - -#ifdef WITH_WSREP - if (wsrep_thd_is_wsrep(trx->mysql_thd)) { - if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { - UT_LIST_INSERT_AFTER( - un_member.tab_lock.locks, table->locks, c_lock, lock); - } else { - UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); - } - - if (c_lock) { - trx_mutex_enter(c_lock->trx); - } - - if (c_lock && c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - - c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE; - - if (wsrep_debug) { - wsrep_print_wait_locks(c_lock); - wsrep_print_wait_locks(c_lock->trx->lock.wait_lock); - } - - /* have to release trx mutex for the duration of - victim lock release. This will eventually call - lock_grant, which wants to grant trx mutex again - */ - /* caller has trx_mutex, have to release for lock cancel */ - trx_mutex_exit(trx); - lock_cancel_waiting_and_release(c_lock->trx->lock.wait_lock); - trx_mutex_enter(trx); - - /* trx might not wait for c_lock, but some other lock - does not matter if wait_lock was released above - */ - if (c_lock->trx->lock.wait_lock == c_lock) { - lock_reset_lock_and_trx_wait(lock); - } - - if (wsrep_debug) { - fprintf(stderr, "WSREP: c_lock canceled %llu\n", - (ulonglong) c_lock->trx->id); - } - } - if (c_lock) { - trx_mutex_exit(c_lock->trx); - } - } else { -#endif /* WITH_WSREP */ - UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); -#ifdef WITH_WSREP - } -#endif /* WITH_WSREP */ - - if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) { - - lock_set_lock_and_trx_wait(lock, trx); - } - - ib_vector_push(lock->trx->lock.table_locks, &lock); - - MONITOR_INC(MONITOR_TABLELOCK_CREATED); - MONITOR_INC(MONITOR_NUM_TABLELOCK); - - return(lock); -} - -/*************************************************************//** -Pops autoinc lock requests from the transaction's autoinc_locks. We -handle the case where there are gaps in the array and they need to -be popped off the stack. */ -UNIV_INLINE -void -lock_table_pop_autoinc_locks( -/*=========================*/ - trx_t* trx) /*!< in/out: transaction that owns the AUTOINC locks */ -{ - ut_ad(lock_mutex_own()); - ut_ad(!ib_vector_is_empty(trx->autoinc_locks)); - - /* Skip any gaps, gaps are NULL lock entries in the - trx->autoinc_locks vector. */ - - do { - ib_vector_pop(trx->autoinc_locks); - - if (ib_vector_is_empty(trx->autoinc_locks)) { - return; - } - - } while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL); -} - -/*************************************************************//** -Removes an autoinc lock request from the transaction's autoinc_locks. */ -UNIV_INLINE -void -lock_table_remove_autoinc_lock( -/*===========================*/ - lock_t* lock, /*!< in: table lock */ - trx_t* trx) /*!< in/out: transaction that owns the lock */ -{ - lock_t* autoinc_lock; - lint i = ib_vector_size(trx->autoinc_locks) - 1; - - ut_ad(lock_mutex_own()); - ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC); - ut_ad(lock_get_type_low(lock) & LOCK_TABLE); - ut_ad(!ib_vector_is_empty(trx->autoinc_locks)); - - /* With stored functions and procedures the user may drop - a table within the same "statement". This special case has - to be handled by deleting only those AUTOINC locks that were - held by the table being dropped. */ - - autoinc_lock = *static_cast<lock_t**>( - ib_vector_get(trx->autoinc_locks, i)); - - /* This is the default fast case. */ - - if (autoinc_lock == lock) { - lock_table_pop_autoinc_locks(trx); - } else { - /* The last element should never be NULL */ - ut_a(autoinc_lock != NULL); - - /* Handle freeing the locks from within the stack. */ - - while (--i >= 0) { - autoinc_lock = *static_cast<lock_t**>( - ib_vector_get(trx->autoinc_locks, i)); - - if (UNIV_LIKELY(autoinc_lock == lock)) { - void* null_var = NULL; - ib_vector_set(trx->autoinc_locks, i, &null_var); - return; - } - } - - /* Must find the autoinc lock. */ - ut_error; - } -} - -/*************************************************************//** -Removes a table lock request from the queue and the trx list of locks; -this is a low-level function which does NOT check if waiting requests -can now be granted. */ -UNIV_INLINE -void -lock_table_remove_low( -/*==================*/ - lock_t* lock) /*!< in/out: table lock */ -{ - trx_t* trx; - dict_table_t* table; - - ut_ad(lock_mutex_own()); - - trx = lock->trx; - table = lock->un_member.tab_lock.table; - - /* Remove the table from the transaction's AUTOINC vector, if - the lock that is being released is an AUTOINC lock. */ - if (lock_get_mode(lock) == LOCK_AUTO_INC) { - - /* The table's AUTOINC lock can get transferred to - another transaction before we get here. */ - if (table->autoinc_trx == trx) { - table->autoinc_trx = NULL; - } - - /* The locks must be freed in the reverse order from - the one in which they were acquired. This is to avoid - traversing the AUTOINC lock vector unnecessarily. - - We only store locks that were granted in the - trx->autoinc_locks vector (see lock_table_create() - and lock_grant()). Therefore it can be empty and we - need to check for that. */ - - if (!lock_get_wait(lock) - && !ib_vector_is_empty(trx->autoinc_locks)) { - - lock_table_remove_autoinc_lock(lock, trx); - } - - ut_a(table->n_waiting_or_granted_auto_inc_locks > 0); - table->n_waiting_or_granted_auto_inc_locks--; - } - - UT_LIST_REMOVE(trx_locks, trx->lock.trx_locks, lock); - UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock); - - MONITOR_INC(MONITOR_TABLELOCK_REMOVED); - MONITOR_DEC(MONITOR_NUM_TABLELOCK); -} - -/*********************************************************************//** -Enqueues a waiting request for a table lock which cannot be granted -immediately. Checks for deadlocks. -@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or -DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another -transaction was chosen as a victim, and we got the lock immediately: -no need to wait then */ -static -dberr_t -lock_table_enqueue_waiting( -/*=======================*/ -#ifdef WITH_WSREP - lock_t* c_lock, /*!< in: conflicting lock */ -#endif - ulint mode, /*!< in: lock mode this transaction is - requesting */ - dict_table_t* table, /*!< in/out: table */ - que_thr_t* thr) /*!< in: query thread */ -{ - trx_t* trx; - lock_t* lock; - trx_id_t victim_trx_id; - ulint sec; - ulint ms; - - ut_ad(lock_mutex_own()); - ut_ad(!srv_read_only_mode); - - trx = thr_get_trx(thr); - ut_ad(trx_mutex_own(trx)); - - /* Test if there already is some other reason to suspend thread: - we do not enqueue a lock request if the query thread should be - stopped anyway */ - - if (que_thr_stop(thr)) { - ut_error; - - return(DB_QUE_THR_SUSPENDED); - } - - switch (trx_get_dict_operation(trx)) { - case TRX_DICT_OP_NONE: - break; - case TRX_DICT_OP_TABLE: - case TRX_DICT_OP_INDEX: - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: a table lock wait happens" - " in a dictionary operation!\n" - "InnoDB: Table name ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(".\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - stderr); - ut_ad(0); - } - - /* Enqueue the lock request that will wait to be granted */ - -#ifdef WITH_WSREP - if (trx->lock.was_chosen_as_deadlock_victim) { - return(DB_DEADLOCK); - } - lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx); -#else - lock = lock_table_create(table, mode | LOCK_WAIT, trx); -#endif /* WITH_WSREP */ - - /* Release the mutex to obey the latching order. - This is safe, because lock_deadlock_check_and_resolve() - is invoked when a lock wait is enqueued for the currently - running transaction. Because trx is a running transaction - (it is not currently suspended because of a lock wait), - its state can only be changed by this thread, which is - currently associated with the transaction. */ - - trx_mutex_exit(trx); - - victim_trx_id = lock_deadlock_check_and_resolve(lock, trx); - - trx_mutex_enter(trx); - - if (victim_trx_id != 0) { - ut_ad(victim_trx_id == trx->id); - - /* The order here is important, we don't want to - lose the state of the lock before calling remove. */ - lock_table_remove_low(lock); - lock_reset_lock_and_trx_wait(lock); - - return(DB_DEADLOCK); - } else if (trx->lock.wait_lock == NULL) { - /* Deadlock resolution chose another transaction as a victim, - and we accidentally got our lock granted! */ - - return(DB_SUCCESS); - } - - trx->lock.que_state = TRX_QUE_LOCK_WAIT; - - trx->lock.wait_started = ut_time(); - trx->lock.was_chosen_as_deadlock_victim = FALSE; - trx->n_table_lock_waits++; - - if (UNIV_UNLIKELY(trx->take_stats)) { - ut_usectime(&sec, &ms); - trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms; - } - - ut_a(que_thr_stop(thr)); - - MONITOR_INC(MONITOR_TABLELOCK_WAIT); - - return(DB_LOCK_WAIT); -} - -/*********************************************************************//** -Checks if other transactions have an incompatible mode lock request in -the lock queue. -@return lock or NULL */ -UNIV_INLINE -const lock_t* -lock_table_other_has_incompatible( -/*==============================*/ - const trx_t* trx, /*!< in: transaction, or NULL if all - transactions should be included */ - ulint wait, /*!< in: LOCK_WAIT if also - waiting locks are taken into - account, or 0 if not */ - const dict_table_t* table, /*!< in: table */ - enum lock_mode mode) /*!< in: lock mode */ -{ - const lock_t* lock; - - ut_ad(lock_mutex_own()); - - for (lock = UT_LIST_GET_LAST(table->locks); - lock != NULL; - lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) { - - if (lock->trx != trx - && !lock_mode_compatible(lock_get_mode(lock), mode) - && (wait || !lock_get_wait(lock))) { - -#ifdef WITH_WSREP - if(wsrep_thd_is_wsrep(trx->mysql_thd)) { - if (wsrep_debug) { - fprintf(stderr, "WSREP: trx " - TRX_ID_FMT - " table lock abort\n", - trx->id); - } - trx_mutex_enter(lock->trx); - wsrep_kill_victim((trx_t *)trx, (lock_t *)lock); - trx_mutex_exit(lock->trx); - } -#endif - - return(lock); - } - } - - return(NULL); -} - -/*********************************************************************//** -Locks the specified database table in the mode given. If the lock cannot -be granted immediately, the query thread is put to wait. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_table( -/*=======*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - dict_table_t* table, /*!< in/out: database table - in dictionary cache */ - enum lock_mode mode, /*!< in: lock mode */ - que_thr_t* thr) /*!< in: query thread */ -{ -#ifdef WITH_WSREP - lock_t *c_lock = NULL; -#endif - trx_t* trx; - dberr_t err; - const lock_t* wait_for; - - ut_ad(table != NULL); - ut_ad(thr != NULL); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - ut_a(flags == 0); - - trx = thr_get_trx(thr); - - if (UNIV_UNLIKELY(trx->fake_changes && mode == LOCK_IX)) { - mode = LOCK_IS; - } - - /* Look for equal or stronger locks the same trx already - has on the table. No need to acquire the lock mutex here - because only this transacton can add/access table locks - to/from trx_t::table_locks. */ - - if (lock_table_has(trx, table, mode)) { - - return(DB_SUCCESS); - } - - lock_mutex_enter(); - - DBUG_EXECUTE_IF("fatal-semaphore-timeout", - { os_thread_sleep(3600000000); }); - - /* We have to check if the new lock is compatible with any locks - other transactions have in the table lock queue. */ - -#ifdef WITH_WSREP - wait_for = lock_table_other_has_incompatible( - trx, LOCK_WAIT, table, mode); -#else - wait_for = lock_table_other_has_incompatible( - trx, LOCK_WAIT, table, mode); -#endif - - trx_mutex_enter(trx); - - /* Another trx has a request on the table in an incompatible - mode: this trx may have to wait */ - - if (wait_for != NULL) { -#ifdef WITH_WSREP - err = lock_table_enqueue_waiting((ib_lock_t*)wait_for, mode | flags, table, thr); -#else - err = lock_table_enqueue_waiting(mode | flags, table, thr); -#endif - } else { -#ifdef WITH_WSREP - lock_table_create(c_lock, table, mode | flags, trx); -#else - lock_table_create(table, mode | flags, trx); -#endif - - ut_a(!flags || mode == LOCK_S || mode == LOCK_X); - - err = DB_SUCCESS; - } - - lock_mutex_exit(); - - trx_mutex_exit(trx); - - return(err); -} - -/*********************************************************************//** -Creates a table IX lock object for a resurrected transaction. */ -UNIV_INTERN -void -lock_table_ix_resurrect( -/*====================*/ - dict_table_t* table, /*!< in/out: table */ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(trx->is_recovered); - - if (lock_table_has(trx, table, LOCK_IX)) { - return; - } - - lock_mutex_enter(); - - /* We have to check if the new lock is compatible with any locks - other transactions have in the table lock queue. */ - - ut_ad(!lock_table_other_has_incompatible( - trx, LOCK_WAIT, table, LOCK_IX)); - - trx_mutex_enter(trx); -#ifdef WITH_WSREP - lock_table_create(NULL, table, LOCK_IX, trx); -#else - lock_table_create(table, LOCK_IX, trx); -#endif - lock_mutex_exit(); - trx_mutex_exit(trx); -} - -/*********************************************************************//** -Checks if a waiting table lock request still has to wait in a queue. -@return TRUE if still has to wait */ -static -ibool -lock_table_has_to_wait_in_queue( -/*============================*/ - const lock_t* wait_lock) /*!< in: waiting table lock */ -{ - const dict_table_t* table; - const lock_t* lock; - - ut_ad(lock_mutex_own()); - ut_ad(lock_get_wait(wait_lock)); - - table = wait_lock->un_member.tab_lock.table; - - for (lock = UT_LIST_GET_FIRST(table->locks); - lock != wait_lock; - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { - - if (lock_has_to_wait(wait_lock, lock)) { - - return(TRUE); - } - } - - return(FALSE); -} - -/*************************************************************//** -Removes a table lock request, waiting or granted, from the queue and grants -locks to other transactions in the queue, if they now are entitled to a -lock. */ -static -void -lock_table_dequeue( -/*===============*/ - lock_t* in_lock)/*!< in/out: table lock object; transactions waiting - behind will get their lock requests granted, if - they are now qualified to it */ -{ - lock_t* lock; - - ut_ad(lock_mutex_own()); - ut_a(lock_get_type_low(in_lock) == LOCK_TABLE); - - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock); - - lock_table_remove_low(in_lock); - - /* Check if waiting locks in the queue can now be granted: grant - locks if there are no conflicting locks ahead. */ - - for (/* No op */; - lock != NULL; - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { - - if (lock_get_wait(lock) - && !lock_table_has_to_wait_in_queue(lock)) { - - /* Grant the lock */ - ut_ad(in_lock->trx != lock->trx); - lock_grant(lock, false); - } - } -} - -/*=========================== LOCK RELEASE ==============================*/ -static -void -lock_grant_and_move_on_rec( - lock_t* first_lock, - ulint heap_no) -{ - lock_t* lock; - lock_t* previous; - ulint space; - ulint page_no; - ulint rec_fold; - - space = first_lock->un_member.rec_lock.space; - page_no = first_lock->un_member.rec_lock.page_no; - rec_fold = lock_rec_fold(space, page_no); - - previous = (lock_t *) hash_get_nth_cell(lock_sys->rec_hash, - hash_calc_hash(rec_fold, lock_sys->rec_hash))->node; - if (previous == NULL) { - return; - } - if (previous == first_lock) { - lock = previous; - } else { - while (previous->hash && - previous->hash != first_lock) { - previous = (lock_t *) previous->hash; - } - lock = (lock_t *) previous->hash; - } - /* Grant locks if there are no conflicting locks ahead. - Move granted locks to the head of the list. */ - for (;lock != NULL;) { - - /* If the lock is a wait lock on this page, and it does not need to wait. */ - if (lock->un_member.rec_lock.space == space - && lock->un_member.rec_lock.page_no == page_no - && lock_rec_get_nth_bit(lock, heap_no) - && lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { - - lock_grant(lock, false); - - if (previous != NULL) { - /* Move the lock to the head of the list. */ - HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock); - lock_rec_insert_to_head(lock, rec_fold); - } else { - /* Already at the head of the list. */ - previous = lock; - } - /* Move on to the next lock. */ - lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous)); - } else { - previous = lock; - lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock)); - } - } -} - -/*************************************************************//** -Removes a granted record lock of a transaction from the queue and grants -locks to other transactions waiting in the queue if they now are entitled -to a lock. */ -UNIV_INTERN -void -lock_rec_unlock( -/*============*/ - trx_t* trx, /*!< in/out: transaction that has - set a record lock */ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec, /*!< in: record */ - enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */ -{ - lock_t* first_lock; - lock_t* lock; - ulint heap_no; - const char* stmt; - size_t stmt_len; - - ut_ad(trx); - ut_ad(rec); - ut_ad(block->frame == page_align(rec)); - ut_ad(!trx->lock.wait_lock); - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); - - heap_no = page_rec_get_heap_no(rec); - - lock_mutex_enter(); - trx_mutex_enter(trx); - - first_lock = lock_rec_get_first(block, heap_no); - - /* Find the last lock with the same lock_mode and transaction - on the record. */ - - for (lock = first_lock; lock != NULL; - lock = lock_rec_get_next(heap_no, lock)) { - if (lock->trx == trx && lock_get_mode(lock) == lock_mode) { - goto released; - } - } - - lock_mutex_exit(); - trx_mutex_exit(trx); - - stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); - - ib_logf(IB_LOG_LEVEL_ERROR, - "unlock row could not find a %u mode lock on the record;" - " statement=%.*s", - lock_mode, - (int) stmt_len, stmt); - - return; - -released: - ut_a(!lock_get_wait(lock)); - lock_rec_reset_nth_bit(lock, heap_no); - - if (innodb_lock_schedule_algorithm - == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS || - thd_is_replication_slave_thread(lock->trx->mysql_thd)) { - - /* Check if we can now grant waiting lock requests */ - - for (lock = first_lock; lock != NULL; - lock = lock_rec_get_next(heap_no, lock)) { - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { - - /* Grant the lock */ - ut_ad(trx != lock->trx); - lock_grant(lock, false); - } - } - } else { - lock_grant_and_move_on_rec(first_lock, heap_no); - } - - lock_mutex_exit(); - trx_mutex_exit(trx); -} - -/*********************************************************************//** -Releases transaction locks, and releases possible other transactions waiting -because of these locks. */ -static -void -lock_release( -/*=========*/ - trx_t* trx) /*!< in/out: transaction */ -{ - lock_t* lock; - ulint count = 0; - trx_id_t max_trx_id; - - ut_ad(lock_mutex_own()); - ut_ad(!trx_mutex_own(trx)); - - max_trx_id = trx_sys_get_max_trx_id(); - - for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks); - lock != NULL; - lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) { - - if (lock_get_type_low(lock) == LOCK_REC) { - -#ifdef UNIV_DEBUG - /* Check if the transcation locked a record - in a system table in X mode. It should have set - the dict_op code correctly if it did. */ - if (lock->index->table->id < DICT_HDR_FIRST_ID - && lock_get_mode(lock) == LOCK_X) { - - ut_ad(lock_get_mode(lock) != LOCK_IX); - ut_ad(trx->dict_operation != TRX_DICT_OP_NONE); - } -#endif /* UNIV_DEBUG */ - - lock_rec_dequeue_from_page(lock); - } else { - dict_table_t* table; - - table = lock->un_member.tab_lock.table; -#ifdef UNIV_DEBUG - ut_ad(lock_get_type_low(lock) & LOCK_TABLE); - - /* Check if the transcation locked a system table - in IX mode. It should have set the dict_op code - correctly if it did. */ - if (table->id < DICT_HDR_FIRST_ID - && (lock_get_mode(lock) == LOCK_X - || lock_get_mode(lock) == LOCK_IX)) { - - ut_ad(trx->dict_operation != TRX_DICT_OP_NONE); - } -#endif /* UNIV_DEBUG */ - - if (lock_get_mode(lock) != LOCK_IS - && trx->undo_no != 0) { - - /* The trx may have modified the table. We - block the use of the MySQL query cache for - all currently active transactions. */ - - table->query_cache_inv_trx_id = max_trx_id; - } - - lock_table_dequeue(lock); - } - - if (count == LOCK_RELEASE_INTERVAL) { - /* Release the mutex for a while, so that we - do not monopolize it */ - - lock_mutex_exit(); - - lock_mutex_enter(); - - count = 0; - } - - ++count; - } - - /* We don't remove the locks one by one from the vector for - efficiency reasons. We simply reset it because we would have - released all the locks anyway. */ - - ib_vector_reset(trx->lock.table_locks); - - ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); - ut_a(ib_vector_is_empty(trx->autoinc_locks)); - ut_a(ib_vector_is_empty(trx->lock.table_locks)); - - mem_heap_empty(trx->lock.lock_heap); -} - -/* True if a lock mode is S or X */ -#define IS_LOCK_S_OR_X(lock) \ - (lock_get_mode(lock) == LOCK_S \ - || lock_get_mode(lock) == LOCK_X) - -/*********************************************************************//** -Removes table locks of the transaction on a table to be dropped. */ -static -void -lock_trx_table_locks_remove( -/*========================*/ - const lock_t* lock_to_remove) /*!< in: lock to remove */ -{ - lint i; - trx_t* trx = lock_to_remove->trx; - - ut_ad(lock_mutex_own()); - - /* It is safe to read this because we are holding the lock mutex */ - if (!trx->lock.cancel) { - trx_mutex_enter(trx); - } else { - ut_ad(trx_mutex_own(trx)); - } - - for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) { - const lock_t* lock; - - lock = *static_cast<lock_t**>( - ib_vector_get(trx->lock.table_locks, i)); - - if (lock == NULL) { - continue; - } - - ut_a(trx == lock->trx); - ut_a(lock_get_type_low(lock) & LOCK_TABLE); - ut_a(lock->un_member.tab_lock.table != NULL); - - if (lock == lock_to_remove) { - void* null_var = NULL; - ib_vector_set(trx->lock.table_locks, i, &null_var); - - if (!trx->lock.cancel) { - trx_mutex_exit(trx); - } - - return; - } - } - - if (!trx->lock.cancel) { - trx_mutex_exit(trx); - } - - /* Lock must exist in the vector. */ - ut_error; -} - -/*********************************************************************//** -Removes locks of a transaction on a table to be dropped. -If remove_also_table_sx_locks is TRUE then table-level S and X locks are -also removed in addition to other table-level and record-level locks. -No lock that is going to be removed is allowed to be a wait lock. */ -static -void -lock_remove_all_on_table_for_trx( -/*=============================*/ - dict_table_t* table, /*!< in: table to be dropped */ - trx_t* trx, /*!< in: a transaction */ - ibool remove_also_table_sx_locks)/*!< in: also removes - table S and X locks */ -{ - lock_t* lock; - lock_t* prev_lock; - - ut_ad(lock_mutex_own()); - - for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks); - lock != NULL; - lock = prev_lock) { - - prev_lock = UT_LIST_GET_PREV(trx_locks, lock); - - if (lock_get_type_low(lock) == LOCK_REC - && lock->index->table == table) { - ut_a(!lock_get_wait(lock)); - - lock_rec_discard(lock); - } else if (lock_get_type_low(lock) & LOCK_TABLE - && lock->un_member.tab_lock.table == table - && (remove_also_table_sx_locks - || !IS_LOCK_S_OR_X(lock))) { - - ut_a(!lock_get_wait(lock)); - - lock_trx_table_locks_remove(lock); - lock_table_remove_low(lock); - } - } -} - -/*******************************************************************//** -Remove any explicit record locks held by recovering transactions on -the table. -@return number of recovered transactions examined */ -static -ulint -lock_remove_recovered_trx_record_locks( -/*===================================*/ - dict_table_t* table) /*!< in: check if there are any locks - held on records in this table or on the - table itself */ -{ - trx_t* trx; - ulint n_recovered_trx = 0; - - ut_a(table != NULL); - ut_ad(lock_mutex_own()); - - mutex_enter(&trx_sys->mutex); - - for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - - lock_t* lock; - lock_t* next_lock; - - assert_trx_in_rw_list(trx); - - if (!trx->is_recovered) { - continue; - } - - /* Because we are holding the lock_sys->mutex, - implicit locks cannot be converted to explicit ones - while we are scanning the explicit locks. */ - - for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); - lock != NULL; - lock = next_lock) { - - ut_a(lock->trx == trx); - - /* Recovered transactions can't wait on a lock. */ - - ut_a(!lock_get_wait(lock)); - - next_lock = UT_LIST_GET_NEXT(trx_locks, lock); - - switch (lock_get_type_low(lock)) { - default: - ut_error; - case LOCK_TABLE: - if (lock->un_member.tab_lock.table == table) { - lock_trx_table_locks_remove(lock); - lock_table_remove_low(lock); - } - break; - case LOCK_REC: - if (lock->index->table == table) { - lock_rec_discard(lock); - } - } - } - - ++n_recovered_trx; - } - - mutex_exit(&trx_sys->mutex); - - return(n_recovered_trx); -} - -/*********************************************************************//** -Removes locks on a table to be dropped or truncated. -If remove_also_table_sx_locks is TRUE then table-level S and X locks are -also removed in addition to other table-level and record-level locks. -No lock, that is going to be removed, is allowed to be a wait lock. */ -UNIV_INTERN -void -lock_remove_all_on_table( -/*=====================*/ - dict_table_t* table, /*!< in: table to be dropped - or truncated */ - ibool remove_also_table_sx_locks)/*!< in: also removes - table S and X locks */ -{ - lock_t* lock; - - lock_mutex_enter(); - - for (lock = UT_LIST_GET_FIRST(table->locks); - lock != NULL; - /* No op */) { - - lock_t* prev_lock; - - prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); - - /* If we should remove all locks (remove_also_table_sx_locks - is TRUE), or if the lock is not table-level S or X lock, - then check we are not going to remove a wait lock. */ - if (remove_also_table_sx_locks - || !(lock_get_type(lock) == LOCK_TABLE - && IS_LOCK_S_OR_X(lock))) { - - ut_a(!lock_get_wait(lock)); - } - - lock_remove_all_on_table_for_trx( - table, lock->trx, remove_also_table_sx_locks); - - if (prev_lock == NULL) { - if (lock == UT_LIST_GET_FIRST(table->locks)) { - /* lock was not removed, pick its successor */ - lock = UT_LIST_GET_NEXT( - un_member.tab_lock.locks, lock); - } else { - /* lock was removed, pick the first one */ - lock = UT_LIST_GET_FIRST(table->locks); - } - } else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks, - prev_lock) != lock) { - /* If lock was removed by - lock_remove_all_on_table_for_trx() then pick the - successor of prev_lock ... */ - lock = UT_LIST_GET_NEXT( - un_member.tab_lock.locks, prev_lock); - } else { - /* ... otherwise pick the successor of lock. */ - lock = UT_LIST_GET_NEXT( - un_member.tab_lock.locks, lock); - } - } - - /* Note: Recovered transactions don't have table level IX or IS locks - but can have implicit record locks that have been converted to explicit - record locks. Such record locks cannot be freed by traversing the - transaction lock list in dict_table_t (as above). */ - - if (!lock_sys->rollback_complete - && lock_remove_recovered_trx_record_locks(table) == 0) { - - lock_sys->rollback_complete = TRUE; - } - - lock_mutex_exit(); -} - -/*===================== VALIDATION AND DEBUGGING ====================*/ - -/*********************************************************************//** -Prints info of a table lock. */ -UNIV_INTERN -void -lock_table_print( -/*=============*/ - FILE* file, /*!< in: file where to print */ - const lock_t* lock) /*!< in: table type lock */ -{ - ut_ad(lock_mutex_own()); - ut_a(lock_get_type_low(lock) == LOCK_TABLE); - - fputs("TABLE LOCK table ", file); - ut_print_name(file, lock->trx, TRUE, - lock->un_member.tab_lock.table->name); - fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id); - - if (lock_get_mode(lock) == LOCK_S) { - fputs(" lock mode S", file); - } else if (lock_get_mode(lock) == LOCK_X) { - fputs(" lock mode X", file); - } else if (lock_get_mode(lock) == LOCK_IS) { - fputs(" lock mode IS", file); - } else if (lock_get_mode(lock) == LOCK_IX) { - fputs(" lock mode IX", file); - } else if (lock_get_mode(lock) == LOCK_AUTO_INC) { - fputs(" lock mode AUTO-INC", file); - } else { - fprintf(file, " unknown lock mode %lu", - (ulong) lock_get_mode(lock)); - } - - if (lock_get_wait(lock)) { - fputs(" waiting", file); - } - - fprintf(file, " lock hold time %lu wait time before grant %lu ", - (ulint)difftime(ut_time(), lock->requested_time), - lock->wait_time); - - putc('\n', file); -} - -/*********************************************************************//** -Prints info of a record lock. */ -UNIV_INTERN -void -lock_rec_print( -/*===========*/ - FILE* file, /*!< in: file where to print */ - const lock_t* lock) /*!< in: record type lock */ -{ - const buf_block_t* block; - ulint space; - ulint page_no; - ulint i; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(lock_mutex_own()); - ut_a(lock_get_type_low(lock) == LOCK_REC); - - space = lock->un_member.rec_lock.space; - page_no = lock->un_member.rec_lock.page_no; - - fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ", - (ulong) space, (ulong) page_no, - (ulong) lock_rec_get_n_bits(lock)); - - dict_index_name_print(file, lock->trx, lock->index); - - /* Print number of table locks */ - fprintf(file, " trx table locks %lu total table locks %lu ", - ib_vector_size(lock->trx->lock.table_locks), - UT_LIST_GET_LEN(lock->index->table->locks)); - - fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id); - - if (lock_get_mode(lock) == LOCK_S) { - fputs(" lock mode S", file); - } else if (lock_get_mode(lock) == LOCK_X) { - fputs(" lock_mode X", file); - } else { - ut_error; - } - - if (lock_rec_get_gap(lock)) { - fputs(" locks gap before rec", file); - } - - if (lock_rec_get_rec_not_gap(lock)) { - fputs(" locks rec but not gap", file); - } - - if (lock_rec_get_insert_intention(lock)) { - fputs(" insert intention", file); - } - - if (lock_get_wait(lock)) { - fputs(" waiting", file); - } - - mtr_start(&mtr); - - fprintf(file, " lock hold time %lu wait time before grant %lu ", - (ulint)difftime(ut_time(), lock->requested_time), - lock->wait_time); - - putc('\n', file); - - if ( srv_show_verbose_locks ) { - block = buf_page_try_get(space, page_no, &mtr); - - for (i = 0; i < lock_rec_get_n_bits(lock); ++i) { - - if (!lock_rec_get_nth_bit(lock, i)) { - continue; - } - - fprintf(file, "Record lock, heap no %lu", (ulong) i); - - if (block) { - const rec_t* rec; - - rec = page_find_rec_with_heap_no( - buf_block_get_frame(block), i); - - offsets = rec_get_offsets( - rec, lock->index, offsets, - ULINT_UNDEFINED, &heap); - - putc(' ', file); - rec_print_new(file, rec, offsets); - } - - putc('\n', file); - } - } - - mtr_commit(&mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -#ifdef UNIV_DEBUG -/* Print the number of lock structs from lock_print_info_summary() only -in non-production builds for performance reasons, see -http://bugs.mysql.com/36942 */ -#define PRINT_NUM_OF_LOCK_STRUCTS -#endif /* UNIV_DEBUG */ - -#ifdef PRINT_NUM_OF_LOCK_STRUCTS -/*********************************************************************//** -Calculates the number of record lock structs in the record lock hash table. -@return number of record locks */ -static -ulint -lock_get_n_rec_locks(void) -/*======================*/ -{ - ulint n_locks = 0; - ulint i; - - ut_ad(lock_mutex_own()); - - for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { - const lock_t* lock; - - for (lock = static_cast<const lock_t*>( - HASH_GET_FIRST(lock_sys->rec_hash, i)); - lock != 0; - lock = static_cast<const lock_t*>( - HASH_GET_NEXT(hash, lock))) { - - n_locks++; - } - } - - return(n_locks); -} -#endif /* PRINT_NUM_OF_LOCK_STRUCTS */ - -/*********************************************************************//** -Prints info of locks for all transactions. -@return FALSE if not able to obtain lock mutex -and exits without printing info */ -UNIV_INTERN -ibool -lock_print_info_summary( -/*====================*/ - FILE* file, /*!< in: file where to print */ - ibool nowait) /*!< in: whether to wait for the lock mutex */ -{ - /* if nowait is FALSE, wait on the lock mutex, - otherwise return immediately if fail to obtain the - mutex. */ - if (!nowait) { - lock_mutex_enter(); - } else if (lock_mutex_enter_nowait()) { - fputs("FAIL TO OBTAIN LOCK MUTEX, " - "SKIP LOCK INFO PRINTING\n", file); - return(FALSE); - } - - if (lock_deadlock_found) { - fputs("------------------------\n" - "LATEST DETECTED DEADLOCK\n" - "------------------------\n", file); - - if (!srv_read_only_mode) { - ut_copy_file(file, lock_latest_err_file); - } - } - - fputs("------------\n" - "TRANSACTIONS\n" - "------------\n", file); - - fprintf(file, "Trx id counter " TRX_ID_FMT "\n", - trx_sys_get_max_trx_id()); - - fprintf(file, - "Purge done for trx's n:o < " TRX_ID_FMT - " undo n:o < " TRX_ID_FMT " state: ", - purge_sys->iter.trx_no, - purge_sys->iter.undo_no); - - /* Note: We are reading the state without the latch. One because it - will violate the latching order and two because we are merely querying - the state of the variable for display. */ - - switch (purge_sys->state){ - case PURGE_STATE_INIT: - /* Should never be in this state while the system is running. */ - ut_error; - - case PURGE_STATE_EXIT: - fprintf(file, "exited"); - break; - - case PURGE_STATE_DISABLED: - fprintf(file, "disabled"); - break; - - case PURGE_STATE_RUN: - fprintf(file, "running"); - /* Check if it is waiting for more data to arrive. */ - if (!purge_sys->running) { - fprintf(file, " but idle"); - } - break; - - case PURGE_STATE_STOP: - fprintf(file, "stopped"); - break; - } - - fprintf(file, "\n"); - - fprintf(file, - "History list length %lu\n", - (ulong) trx_sys->rseg_history_len); - -#ifdef PRINT_NUM_OF_LOCK_STRUCTS - fprintf(file, - "Total number of lock structs in row lock hash table %lu\n", - (ulong) lock_get_n_rec_locks()); -#endif /* PRINT_NUM_OF_LOCK_STRUCTS */ - return(TRUE); -} - -/*********************************************************************//** -Prints info of locks for each transaction. This function assumes that the -caller holds the lock mutex and more importantly it will release the lock -mutex on behalf of the caller. (This should be fixed in the future). */ -UNIV_INTERN -void -lock_print_info_all_transactions( -/*=============================*/ - FILE* file) /*!< in: file where to print */ -{ - const lock_t* lock; - ibool load_page_first = TRUE; - ulint nth_trx = 0; - ulint nth_lock = 0; - ulint i; - mtr_t mtr; - const trx_t* trx; - trx_list_t* trx_list = &trx_sys->rw_trx_list; - - fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n"); - - ut_ad(lock_mutex_own()); - - mutex_enter(&trx_sys->mutex); - - /* First print info on non-active transactions */ - - /* NOTE: information of auto-commit non-locking read-only - transactions will be omitted here. The information will be - available from INFORMATION_SCHEMA.INNODB_TRX. */ - - for (trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) { - - ut_ad(trx->in_mysql_trx_list); - - /* See state transitions and locking rules in trx0trx.h */ - - if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) { - fputs("---", file); - trx_print_latched(file, trx, 600); - } - } - -loop: - /* Since we temporarily release lock_sys->mutex and - trx_sys->mutex when reading a database page in below, - variable trx may be obsolete now and we must loop - through the trx list to get probably the same trx, - or some other trx. */ - - for (trx = UT_LIST_GET_FIRST(*trx_list), i = 0; - trx && (i < nth_trx); - trx = UT_LIST_GET_NEXT(trx_list, trx), i++) { - - assert_trx_in_list(trx); - ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list)); - } - - ut_ad(trx == NULL - || trx->read_only == (trx_list == &trx_sys->ro_trx_list)); - - if (trx == NULL) { - /* Check the read-only transaction list next. */ - if (trx_list == &trx_sys->rw_trx_list) { - trx_list = &trx_sys->ro_trx_list; - nth_trx = 0; - nth_lock = 0; - goto loop; - } - - lock_mutex_exit(); - mutex_exit(&trx_sys->mutex); - - ut_ad(lock_validate()); - - return; - } - - assert_trx_in_list(trx); - - if (nth_lock == 0) { - fputs("---", file); - - trx_print_latched(file, trx, 600); - - if (trx->read_view) { - fprintf(file, - "Trx read view will not see trx with" - " id >= " TRX_ID_FMT - ", sees < " TRX_ID_FMT "\n", - trx->read_view->low_limit_id, - trx->read_view->up_limit_id); - } - - /* Total trx lock waits and times */ - fprintf(file, "Trx #rec lock waits %lu #table lock waits %lu\n", - trx->n_rec_lock_waits, trx->n_table_lock_waits); - fprintf(file, "Trx total rec lock wait time %lu SEC\n", - trx->total_rec_lock_wait_time); - fprintf(file, "Trx total table lock wait time %lu SEC\n", - trx->total_table_lock_wait_time); - - if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - - fprintf(file, - "------- TRX HAS BEEN WAITING %lu SEC" - " FOR THIS LOCK TO BE GRANTED:\n", - (ulong) difftime(ut_time(), - trx->lock.wait_started)); - - if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) { - lock_rec_print(file, trx->lock.wait_lock); - } else { - lock_table_print(file, trx->lock.wait_lock); - } - - fputs("------------------\n", file); - } - } - - if (!srv_print_innodb_lock_monitor || !srv_show_locks_held) { - nth_trx++; - goto loop; - } - - i = 0; - - /* Look at the note about the trx loop above why we loop here: - lock may be an obsolete pointer now. */ - - lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); - - while (lock && (i < nth_lock)) { - lock = UT_LIST_GET_NEXT(trx_locks, lock); - i++; - } - - if (lock == NULL) { - nth_trx++; - nth_lock = 0; - - goto loop; - } - - if (lock_get_type_low(lock) == LOCK_REC) { - if (load_page_first) { - ulint space_id = lock->un_member.rec_lock.space; - /* Check if the space is exists or not. only - when the space is valid, try to get the page. */ - fil_space_t* space = fil_space_acquire(space_id); - ulint page_no = lock->un_member.rec_lock.page_no; - - if (!space) { - - /* It is a single table tablespace and - the .ibd file is missing (TRUNCATE - TABLE probably stole the locks): just - print the lock without attempting to - load the page in the buffer pool. */ - - fprintf(file, "RECORD LOCKS on" - " non-existing space: " ULINTPF "\n", - space_id); - goto print_rec; - } - - const ulint zip_size = fsp_flags_get_zip_size(space->flags); - - lock_mutex_exit(); - mutex_exit(&trx_sys->mutex); - - if (srv_show_verbose_locks) { - - DEBUG_SYNC_C("innodb_monitor_before_lock_page_read"); - - if (space) { - mtr_start(&mtr); - - buf_page_get_gen(space_id, zip_size, - page_no, RW_NO_LATCH, - NULL, - BUF_GET_POSSIBLY_FREED, - __FILE__, __LINE__, - &mtr); - - mtr_commit(&mtr); - - } - } - - fil_space_release(space); - - load_page_first = FALSE; - - lock_mutex_enter(); - - mutex_enter(&trx_sys->mutex); - - goto loop; - } - -print_rec: - lock_rec_print(file, lock); - } else { - ut_ad(lock_get_type_low(lock) & LOCK_TABLE); - - lock_table_print(file, lock); - } - - load_page_first = TRUE; - - nth_lock++; - - if (nth_lock >= srv_show_locks_held) { - fputs("TOO MANY LOCKS PRINTED FOR THIS TRX:" - " SUPPRESSING FURTHER PRINTS\n", - file); - - nth_trx++; - nth_lock = 0; - } - - goto loop; -} - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Find the the lock in the trx_t::trx_lock_t::table_locks vector. -@return TRUE if found */ -static -ibool -lock_trx_table_locks_find( -/*======================*/ - trx_t* trx, /*!< in: trx to validate */ - const lock_t* find_lock) /*!< in: lock to find */ -{ - lint i; - ibool found = FALSE; - - trx_mutex_enter(trx); - - for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) { - const lock_t* lock; - - lock = *static_cast<const lock_t**>( - ib_vector_get(trx->lock.table_locks, i)); - - if (lock == NULL) { - continue; - } else if (lock == find_lock) { - /* Can't be duplicates. */ - ut_a(!found); - found = TRUE; - } - - ut_a(trx == lock->trx); - ut_a(lock_get_type_low(lock) & LOCK_TABLE); - ut_a(lock->un_member.tab_lock.table != NULL); - } - - trx_mutex_exit(trx); - - return(found); -} - -/*********************************************************************//** -Validates the lock queue on a table. -@return TRUE if ok */ -static -ibool -lock_table_queue_validate( -/*======================*/ - const dict_table_t* table) /*!< in: table */ -{ - const lock_t* lock; - - ut_ad(lock_mutex_own()); - ut_ad(mutex_own(&trx_sys->mutex)); - - for (lock = UT_LIST_GET_FIRST(table->locks); - lock != NULL; - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) { - - /* lock->trx->state cannot change from or to NOT_STARTED - while we are holding the trx_sys->mutex. It may change - from ACTIVE to PREPARED, but it may not change to - COMMITTED, because we are holding the lock_sys->mutex. */ - ut_ad(trx_assert_started(lock->trx)); - - if (!lock_get_wait(lock)) { - - ut_a(!lock_table_other_has_incompatible( - lock->trx, 0, table, - lock_get_mode(lock))); - } else { - - ut_a(lock_table_has_to_wait_in_queue(lock)); - } - - ut_a(lock_trx_table_locks_find(lock->trx, lock)); - } - - return(TRUE); -} - -/*********************************************************************//** -Validates the lock queue on a single record. -@return TRUE if ok */ -static -ibool -lock_rec_queue_validate( -/*====================*/ - ibool locked_lock_trx_sys, - /*!< in: if the caller holds - both the lock mutex and - trx_sys_t->lock. */ - const buf_block_t* block, /*!< in: buffer block containing rec */ - const rec_t* rec, /*!< in: record to look at */ - const dict_index_t* index, /*!< in: index, or NULL if not known */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - const lock_t* lock; - ulint heap_no; - - ut_a(rec); - ut_a(block->frame == page_align(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); - ut_ad(lock_mutex_own() == locked_lock_trx_sys); - ut_ad(!index || dict_index_is_clust(index) - || !dict_index_is_online_ddl(index)); - - heap_no = page_rec_get_heap_no(rec); - - if (!locked_lock_trx_sys) { - lock_mutex_enter(); - mutex_enter(&trx_sys->mutex); - } - - if (!page_rec_is_user_rec(rec)) { - - for (lock = lock_rec_get_first(block, heap_no); - lock != NULL; - lock = lock_rec_get_next_const(heap_no, lock)) { - - ut_a(trx_in_trx_list(lock->trx)); - - if (lock_get_wait(lock)) { - ut_a(lock_rec_has_to_wait_in_queue(lock)); - } - - if (index) { - ut_a(lock->index == index); - } - } - - goto func_exit; - } - - if (!index); - else if (dict_index_is_clust(index)) { - trx_id_t trx_id; - trx_id_t* trx_desc; - - /* Unlike the non-debug code, this invariant can only succeed - if the check and assertion are covered by the lock mutex. */ - - trx_id = lock_clust_rec_some_has_impl(rec, index, offsets); - trx_desc = trx_find_descriptor(trx_sys->descriptors, - trx_sys->descr_n_used, - trx_id); - - ut_ad(lock_mutex_own()); - /* trx_id cannot be committed until lock_mutex_exit() - because lock_trx_release_locks() acquires lock_sys->mutex */ - - if (trx_desc != NULL - && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, - block, heap_no, trx_id)) { - - ut_ad(trx_id == *trx_desc); - ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, - block, heap_no, trx_id)); - } - } - - for (lock = lock_rec_get_first(block, heap_no); - lock != NULL; - lock = lock_rec_get_next_const(heap_no, lock)) { - - ut_a(trx_in_trx_list(lock->trx)); - - if (index) { - ut_a(lock->index == index); - } - - if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) { - - enum lock_mode mode; - - - if (lock_get_mode(lock) == LOCK_S) { - mode = LOCK_X; - } else { - mode = LOCK_S; - } - - const lock_t* other_lock - = lock_rec_other_has_expl_req( - mode, 0, 0, block, heap_no, - lock->trx->id); -#ifdef WITH_WSREP - ut_a(!other_lock - || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) - || wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE)); - -#else - ut_a(!other_lock); -#endif /* WITH_WSREP */ - - } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock) - && innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) { - // If using VATS, it's possible that a wait lock is inserted to a place in the list - // such that it does not need to wait. - ut_a(lock_rec_has_to_wait_in_queue(lock)); - } - } - - ut_ad(innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS || - lock_queue_validate(lock)); - -func_exit: - if (!locked_lock_trx_sys) { - lock_mutex_exit(); - mutex_exit(&trx_sys->mutex); - } - - return(TRUE); -} - -/*********************************************************************//** -Validates the record lock queues on a page. -@return TRUE if ok */ -static -ibool -lock_rec_validate_page( -/*===================*/ - const buf_block_t* block) /*!< in: buffer block */ -{ - const lock_t* lock; - const rec_t* rec; - ulint nth_lock = 0; - ulint nth_bit = 0; - ulint i; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(!lock_mutex_own()); - - lock_mutex_enter(); - mutex_enter(&trx_sys->mutex); -loop: - lock = lock_rec_get_first_on_page_addr(buf_block_get_space(block), - buf_block_get_page_no(block)); - - if (!lock) { - goto function_exit; - } - -#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG - ut_a(!block->page.file_page_was_freed); -#endif - - for (i = 0; i < nth_lock; i++) { - - lock = lock_rec_get_next_on_page_const(lock); - - if (!lock) { - goto function_exit; - } - } - - ut_a(trx_in_trx_list(lock->trx)); - -# ifdef UNIV_SYNC_DEBUG - /* Only validate the record queues when this thread is not - holding a space->latch. Deadlocks are possible due to - latching order violation when UNIV_DEBUG is defined while - UNIV_SYNC_DEBUG is not. */ - if (!sync_thread_levels_contains(SYNC_FSP)) -# endif /* UNIV_SYNC_DEBUG */ - for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) { - - if (i == 1 || lock_rec_get_nth_bit(lock, i)) { - - rec = page_find_rec_with_heap_no(block->frame, i); - ut_a(rec); - offsets = rec_get_offsets(rec, lock->index, offsets, - ULINT_UNDEFINED, &heap); -#if 0 - fprintf(stderr, - "Validating %u %u\n", - block->page.space, block->page.offset); -#endif - /* If this thread is holding the file space - latch (fil_space_t::latch), the following - check WILL break the latching order and may - cause a deadlock of threads. */ - - lock_rec_queue_validate( - TRUE, block, rec, lock->index, offsets); - - nth_bit = i + 1; - - goto loop; - } - } - - nth_bit = 0; - nth_lock++; - - goto loop; - -function_exit: - lock_mutex_exit(); - mutex_exit(&trx_sys->mutex); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(TRUE); -} - -/*********************************************************************//** -Validates the table locks. -@return TRUE if ok */ -static -ibool -lock_validate_table_locks( -/*======================*/ - const trx_list_t* trx_list) /*!< in: trx list */ -{ - const trx_t* trx; - - ut_ad(lock_mutex_own()); - ut_ad(mutex_own(&trx_sys->mutex)); - - ut_ad(trx_list == &trx_sys->rw_trx_list - || trx_list == &trx_sys->ro_trx_list); - - for (trx = UT_LIST_GET_FIRST(*trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - - const lock_t* lock; - - assert_trx_in_list(trx); - ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list)); - - for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); - lock != NULL; - lock = UT_LIST_GET_NEXT(trx_locks, lock)) { - - if (lock_get_type_low(lock) & LOCK_TABLE) { - - lock_table_queue_validate( - lock->un_member.tab_lock.table); - } - } - } - - return(TRUE); -} - -/*********************************************************************//** -Validate record locks up to a limit. -@return lock at limit or NULL if no more locks in the hash bucket */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -const lock_t* -lock_rec_validate( -/*==============*/ - ulint start, /*!< in: lock_sys->rec_hash - bucket */ - ib_uint64_t* limit) /*!< in/out: upper limit of - (space, page_no) */ -{ - ut_ad(lock_mutex_own()); - ut_ad(mutex_own(&trx_sys->mutex)); - - for (const lock_t* lock = static_cast<const lock_t*>( - HASH_GET_FIRST(lock_sys->rec_hash, start)); - lock != NULL; - lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) { - - ib_uint64_t current; - - ut_a(trx_in_trx_list(lock->trx)); - ut_a(lock_get_type(lock) == LOCK_REC); - - current = ut_ull_create( - lock->un_member.rec_lock.space, - lock->un_member.rec_lock.page_no); - - if (current > *limit) { - *limit = current + 1; - return(lock); - } - } - - return(0); -} - -/*********************************************************************//** -Validate a record lock's block */ -static -void -lock_rec_block_validate( -/*====================*/ - ulint space_id, - ulint page_no) -{ - /* The lock and the block that it is referring to may be freed at - this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check. - If the lock exists in lock_rec_validate_page() we assert - !block->page.file_page_was_freed. */ - - buf_block_t* block; - mtr_t mtr; - - /* Make sure that the tablespace is not deleted while we are - trying to access the page. */ - if (fil_space_t* space = fil_space_acquire(space_id)) { - - mtr_start(&mtr); - block = buf_page_get_gen( - space_id, fsp_flags_get_zip_size(space->flags), - page_no, RW_X_LATCH, NULL, - BUF_GET_POSSIBLY_FREED, - __FILE__, __LINE__, &mtr); - - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); - - ut_ad(lock_rec_validate_page(block)); - mtr_commit(&mtr); - - fil_space_release(space); - } -} - -/*********************************************************************//** -Validates the lock system. -@return TRUE if ok */ -static -bool -lock_validate() -/*===========*/ -{ - typedef std::pair<ulint, ulint> page_addr_t; - typedef std::set<page_addr_t> page_addr_set; - page_addr_set pages; - - lock_mutex_enter(); - mutex_enter(&trx_sys->mutex); - - ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list)); - ut_a(lock_validate_table_locks(&trx_sys->ro_trx_list)); - - /* Iterate over all the record locks and validate the locks. We - don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex. - Release both mutexes during the validation check. */ - - for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { - const lock_t* lock; - ib_uint64_t limit = 0; - - while ((lock = lock_rec_validate(i, &limit)) != 0) { - - ulint space = lock->un_member.rec_lock.space; - ulint page_no = lock->un_member.rec_lock.page_no; - - pages.insert(std::make_pair(space, page_no)); - } - } - - mutex_exit(&trx_sys->mutex); - lock_mutex_exit(); - - for (page_addr_set::const_iterator it = pages.begin(); - it != pages.end(); - ++it) { - lock_rec_block_validate((*it).first, (*it).second); - } - - return(true); -} -#endif /* UNIV_DEBUG */ -/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/ - -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate insert of -a record. If they do, first tests if the query thread should anyway -be suspended for some reason; if not, then puts the transaction and -the query thread to the lock wait state and inserts a waiting request -for a gap x-lock to the lock queue. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_rec_insert_check_and_lock( -/*===========================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is - set, does nothing */ - const rec_t* rec, /*!< in: record after which to insert */ - buf_block_t* block, /*!< in/out: buffer block of rec */ - dict_index_t* index, /*!< in: index */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - ibool* inherit)/*!< out: set to TRUE if the new - inserted record maybe should inherit - LOCK_GAP type locks from the successor - record */ -{ - const rec_t* next_rec; - trx_t* trx; - lock_t* lock; - dberr_t err; - ulint next_rec_heap_no; - ibool inherit_in = *inherit; -#ifdef WITH_WSREP - lock_t* c_lock=NULL; -#endif - - ut_ad(block->frame == page_align(rec)); - ut_ad(!dict_index_is_online_ddl(index) - || dict_index_is_clust(index) - || (flags & BTR_CREATE_FLAG)); - ut_ad((flags & BTR_NO_LOCKING_FLAG) || thr); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - trx = thr_get_trx(thr); - - if (UNIV_UNLIKELY(trx->fake_changes)) { - return(DB_SUCCESS); - } - - next_rec = page_rec_get_next_const(rec); - next_rec_heap_no = page_rec_get_heap_no(next_rec); - - lock_mutex_enter(); - /* Because this code is invoked for a running transaction by - the thread that is serving the transaction, it is not necessary - to hold trx->mutex here. */ - - /* When inserting a record into an index, the table must be at - least IX-locked. When we are building an index, we would pass - BTR_NO_LOCKING_FLAG and skip the locking altogether. */ - ut_ad(lock_table_has(trx, index->table, LOCK_IX)); - - lock = lock_rec_get_first(block, next_rec_heap_no); - - if (UNIV_LIKELY(lock == NULL)) { - /* We optimize CPU time usage in the simplest case */ - - lock_mutex_exit(); - - if (inherit_in && !dict_index_is_clust(index)) { - /* Update the page max trx id field */ - page_update_max_trx_id(block, - buf_block_get_page_zip(block), - trx->id, mtr); - } - - *inherit = FALSE; - - return(DB_SUCCESS); - } - - *inherit = TRUE; - - /* If another transaction has an explicit lock request which locks - the gap, waiting or granted, on the successor, the insert has to wait. - - An exception is the case where the lock by the another transaction - is a gap type lock which it placed to wait for its turn to insert. We - do not consider that kind of a lock conflicting with our insert. This - eliminates an unnecessary deadlock which resulted when 2 transactions - had to wait for their insert. Both had waiting gap type lock requests - on the successor, which produced an unnecessary deadlock. */ - -#ifdef WITH_WSREP - if ((c_lock = (ib_lock_t*)lock_rec_other_has_conflicting( - static_cast<enum lock_mode>( - LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION), - block, next_rec_heap_no, trx))) { -#else - if (lock_rec_other_has_conflicting( - static_cast<enum lock_mode>( - LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION), - block, next_rec_heap_no, trx)) { -#endif /* WITH_WSREP */ - - /* Note that we may get DB_SUCCESS also here! */ - trx_mutex_enter(trx); - -#ifdef WITH_WSREP - err = lock_rec_enqueue_waiting(c_lock, - LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, - block, next_rec_heap_no, index, thr); -#else - err = lock_rec_enqueue_waiting( - LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, - block, next_rec_heap_no, index, thr); -#endif /* WITH_WSREP */ - - trx_mutex_exit(trx); - } else { - err = DB_SUCCESS; - } - - lock_mutex_exit(); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - err = DB_SUCCESS; - /* fall through */ - case DB_SUCCESS: - if (!inherit_in || dict_index_is_clust(index)) { - break; - } - /* Update the page max trx id field */ - page_update_max_trx_id(block, - buf_block_get_page_zip(block), - trx->id, mtr); - default: - /* We only care about the two return values. */ - break; - } - -#ifdef UNIV_DEBUG - { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - rec_offs_init(offsets_); - - offsets = rec_get_offsets(next_rec, index, offsets_, - ULINT_UNDEFINED, &heap); - - ut_ad(lock_rec_queue_validate( - FALSE, block, next_rec, index, offsets)); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } -#endif /* UNIV_DEBUG */ - - return(err); -} - -/*********************************************************************//** -If a transaction has an implicit x-lock on a record, but no explicit x-lock -set on the record, sets one for it. */ -static -void -lock_rec_convert_impl_to_expl( -/*==========================*/ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: user record on page */ - dict_index_t* index, /*!< in: index of record */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - trx_id_t trx_id; - - ut_ad(!lock_mutex_own()); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); - - if (dict_index_is_clust(index)) { - trx_id = lock_clust_rec_some_has_impl(rec, index, offsets); - /* The clustered index record was last modified by - this transaction. The transaction may have been - committed a long time ago. */ - } else { - ut_ad(!dict_index_is_online_ddl(index)); - trx_id = lock_sec_rec_some_has_impl(rec, index, offsets); - /* The transaction can be committed before the - trx_is_active(trx_id, NULL) check below, because we are not - holding lock_mutex. */ - - ut_ad(!lock_rec_other_trx_holds_expl(LOCK_S | LOCK_REC_NOT_GAP, - trx_id, rec, block)); - } - - if (trx_id != 0) { - trx_id_t* impl_trx_desc; - ulint heap_no = page_rec_get_heap_no(rec); - - lock_mutex_enter(); - - /* If the transaction is still active and has no - explicit x-lock set on the record, set one for it */ - - mutex_enter(&trx_sys->mutex); - impl_trx_desc = trx_find_descriptor(trx_sys->descriptors, - trx_sys->descr_n_used, - trx_id); - mutex_exit(&trx_sys->mutex); - - /* trx_id cannot be committed until lock_mutex_exit() - because lock_trx_release_locks() acquires lock_sys->mutex */ - - if (impl_trx_desc != NULL - && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, - heap_no, trx_id)) { - ulint type_mode = (LOCK_REC | LOCK_X - | LOCK_REC_NOT_GAP); - - mutex_enter(&trx_sys->mutex); - trx_t* impl_trx = trx_rw_get_active_trx_by_id(trx_id, - NULL); - mutex_exit(&trx_sys->mutex); - ut_ad(impl_trx != NULL); - - lock_rec_add_to_queue( - type_mode, block, heap_no, index, - impl_trx, FALSE); - } - - lock_mutex_exit(); - } -} - -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate modify (update, -delete mark, or delete unmark) of a clustered index record. If they do, -first tests if the query thread should anyway be suspended for some -reason; if not, then puts the transaction and the query thread to the -lock wait state and inserts a waiting request for a record x-lock to the -lock queue. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_clust_rec_modify_check_and_lock( -/*=================================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: record which should be - modified */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - ulint heap_no; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(dict_index_is_clust(index)); - ut_ad(block->frame == page_align(rec)); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - return(DB_SUCCESS); - } - - heap_no = rec_offs_comp(offsets) - ? rec_get_heap_no_new(rec) - : rec_get_heap_no_old(rec); - - /* If a transaction has no explicit x-lock set on the record, set one - for it */ - - lock_rec_convert_impl_to_expl(block, rec, index, offsets); - - lock_mutex_enter(); - trx_t* trx = thr_get_trx(thr); - - ut_ad(lock_table_has(trx, index->table, LOCK_IX)); - - err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, - block, heap_no, index, thr); - - MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); - - lock_mutex_exit(); - - ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets)); - - if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { - err = DB_SUCCESS; - } - - return(err); -} - -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate modify (delete -mark or delete unmark) of a secondary index record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_sec_rec_modify_check_and_lock( -/*===============================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - buf_block_t* block, /*!< in/out: buffer block of rec */ - const rec_t* rec, /*!< in: record which should be - modified; NOTE: as this is a secondary - index, we always have to modify the - clustered index record first: see the - comment below */ - dict_index_t* index, /*!< in: secondary index */ - que_thr_t* thr, /*!< in: query thread - (can be NULL if BTR_NO_LOCKING_FLAG) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - dberr_t err; - ulint heap_no; - - ut_ad(!dict_index_is_clust(index)); - ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG)); - ut_ad(block->frame == page_align(rec)); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - return(DB_SUCCESS); - } - - heap_no = page_rec_get_heap_no(rec); - - /* Another transaction cannot have an implicit lock on the record, - because when we come here, we already have modified the clustered - index record, and this would not have been possible if another active - transaction had modified this secondary index record. */ - - trx_t* trx = thr_get_trx(thr); - lock_mutex_enter(); - - ut_ad(lock_table_has(trx, index->table, LOCK_IX)); - - err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, - block, heap_no, index, thr); - - MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); - - lock_mutex_exit(); - -#ifdef UNIV_DEBUG - { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - rec_offs_init(offsets_); - - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - - ut_ad(lock_rec_queue_validate( - FALSE, block, rec, index, offsets)); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } -#endif /* UNIV_DEBUG */ - - if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) { - /* Update the page max trx id field */ - /* It might not be necessary to do this if - err == DB_SUCCESS (no new lock created), - but it should not cost too much performance. */ - page_update_max_trx_id(block, - buf_block_get_page_zip(block), - thr_get_trx(thr)->id, mtr); - err = DB_SUCCESS; - } - - return(err); -} - -/*********************************************************************//** -Like lock_clust_rec_read_check_and_lock(), but reads a -secondary index record. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, -or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_sec_rec_read_check_and_lock( -/*=============================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: user record or page - supremum record which should - be read or passed over by a - read cursor */ - dict_index_t* index, /*!< in: secondary index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - enum lock_mode mode, /*!< in: mode of the lock which - the read cursor should set on - records: LOCK_S or LOCK_X; the - latter is possible in - SELECT FOR UPDATE */ - ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - ulint heap_no; - - ut_ad(!dict_index_is_clust(index)); - ut_ad(!dict_index_is_online_ddl(index)); - ut_ad(block->frame == page_align(rec)); - ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mode == LOCK_X || mode == LOCK_S); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) { - if (!srv_fake_changes_locks) { - return(DB_SUCCESS); - } - if (mode == LOCK_X) { - mode = LOCK_S; - } - } - - heap_no = page_rec_get_heap_no(rec); - - /* Some transaction may have an implicit x-lock on the record only - if the max trx id for the page >= min trx id for the trx list or a - database recovery is running. */ - - if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id() - || recv_recovery_is_on()) - && !page_rec_is_supremum(rec)) { - - lock_rec_convert_impl_to_expl(block, rec, index, offsets); - } - - trx_t* trx = thr_get_trx(thr); - lock_mutex_enter(); - - ut_ad(mode != LOCK_X - || lock_table_has(trx, index->table, LOCK_IX)); - ut_ad(mode != LOCK_S - || lock_table_has(trx, index->table, LOCK_IS)); - - err = lock_rec_lock(FALSE, mode | gap_mode, - block, heap_no, index, thr); - - MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); - - lock_mutex_exit(); - - ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets)); - - return(err); -} - -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK, -or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_clust_rec_read_check_and_lock( -/*===============================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: user record or page - supremum record which should - be read or passed over by a - read cursor */ - dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - enum lock_mode mode, /*!< in: mode of the lock which - the read cursor should set on - records: LOCK_S or LOCK_X; the - latter is possible in - SELECT FOR UPDATE */ - ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - ulint heap_no; - - ut_ad(dict_index_is_clust(index)); - ut_ad(block->frame == page_align(rec)); - ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); - ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP - || gap_mode == LOCK_REC_NOT_GAP); - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) { - if (!srv_fake_changes_locks) { - return(DB_SUCCESS); - } - if (mode == LOCK_X) { - mode = LOCK_S; - } - } - - heap_no = page_rec_get_heap_no(rec); - - if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) { - - lock_rec_convert_impl_to_expl(block, rec, index, offsets); - } - - lock_mutex_enter(); - trx_t* trx = thr_get_trx(thr); - - ut_ad(mode != LOCK_X - || lock_table_has(trx, index->table, LOCK_IX)); - ut_ad(mode != LOCK_S - || lock_table_has(trx, index->table, LOCK_IS)); - - err = lock_rec_lock(FALSE, mode | gap_mode, - block, heap_no, index, thr); - - MONITOR_INC(MONITOR_NUM_RECLOCK_REQ); - - lock_mutex_exit(); - - ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets)); - - return(err); -} -/*********************************************************************//** -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. This is an alternative version of -lock_clust_rec_read_check_and_lock() that does not require the parameter -"offsets". -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ -UNIV_INTERN -dberr_t -lock_clust_rec_read_check_and_lock_alt( -/*===================================*/ - ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG - bit is set, does nothing */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: user record or page - supremum record which should - be read or passed over by a - read cursor */ - dict_index_t* index, /*!< in: clustered index */ - enum lock_mode mode, /*!< in: mode of the lock which - the read cursor should set on - records: LOCK_S or LOCK_X; the - latter is possible in - SELECT FOR UPDATE */ - ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr) /*!< in: query thread */ -{ - mem_heap_t* tmp_heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - dberr_t err; - rec_offs_init(offsets_); - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &tmp_heap); - err = lock_clust_rec_read_check_and_lock(flags, block, rec, index, - offsets, mode, gap_mode, thr); - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) { - err = DB_SUCCESS; - } - - return(err); -} - -/*******************************************************************//** -Release the last lock from the transaction's autoinc locks. */ -UNIV_INLINE -void -lock_release_autoinc_last_lock( -/*===========================*/ - ib_vector_t* autoinc_locks) /*!< in/out: vector of AUTOINC locks */ -{ - ulint last; - lock_t* lock; - - ut_ad(lock_mutex_own()); - ut_a(!ib_vector_is_empty(autoinc_locks)); - - /* The lock to be release must be the last lock acquired. */ - last = ib_vector_size(autoinc_locks) - 1; - lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last)); - - /* Should have only AUTOINC locks in the vector. */ - ut_a(lock_get_mode(lock) == LOCK_AUTO_INC); - ut_a(lock_get_type(lock) == LOCK_TABLE); - - ut_a(lock->un_member.tab_lock.table != NULL); - - /* This will remove the lock from the trx autoinc_locks too. */ - lock_table_dequeue(lock); - - /* Remove from the table vector too. */ - lock_trx_table_locks_remove(lock); -} - -/*******************************************************************//** -Check if a transaction holds any autoinc locks. -@return TRUE if the transaction holds any AUTOINC locks. */ -static -ibool -lock_trx_holds_autoinc_locks( -/*=========================*/ - const trx_t* trx) /*!< in: transaction */ -{ - ut_a(trx->autoinc_locks != NULL); - - return(!ib_vector_is_empty(trx->autoinc_locks)); -} - -/*******************************************************************//** -Release all the transaction's autoinc locks. */ -static -void -lock_release_autoinc_locks( -/*=======================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(lock_mutex_own()); - /* If this is invoked for a running transaction by the thread - that is serving the transaction, then it is not necessary to - hold trx->mutex here. */ - - ut_a(trx->autoinc_locks != NULL); - - /* We release the locks in the reverse order. This is to - avoid searching the vector for the element to delete at - the lower level. See (lock_table_remove_low()) for details. */ - while (!ib_vector_is_empty(trx->autoinc_locks)) { - - /* lock_table_remove_low() will also remove the lock from - the transaction's autoinc_locks vector. */ - lock_release_autoinc_last_lock(trx->autoinc_locks); - } - - /* Should release all locks. */ - ut_a(ib_vector_is_empty(trx->autoinc_locks)); -} - -/*******************************************************************//** -Gets the type of a lock. Non-inline version for using outside of the -lock module. -@return LOCK_TABLE or LOCK_REC */ -UNIV_INTERN -ulint -lock_get_type( -/*==========*/ - const lock_t* lock) /*!< in: lock */ -{ - return(lock_get_type_low(lock)); -} - -/*******************************************************************//** -Gets the trx of the lock. Non-inline version for using outside of the -lock module. -@return trx_t* */ -UNIV_INTERN -trx_t* -lock_get_trx( -/*=========*/ - const lock_t* lock) /*!< in: lock */ -{ - return (lock->trx); -} - -/*******************************************************************//** -Gets the id of the transaction owning a lock. -@return transaction id */ -UNIV_INTERN -trx_id_t -lock_get_trx_id( -/*============*/ - const lock_t* lock) /*!< in: lock */ -{ - return(lock->trx->id); -} - -/*******************************************************************//** -Gets the mode of a lock in a human readable string. -The string should not be free()'d or modified. -@return lock mode */ -UNIV_INTERN -const char* -lock_get_mode_str( -/*==============*/ - const lock_t* lock) /*!< in: lock */ -{ - ibool is_gap_lock; - - is_gap_lock = lock_get_type_low(lock) == LOCK_REC - && lock_rec_get_gap(lock); - - switch (lock_get_mode(lock)) { - case LOCK_S: - if (is_gap_lock) { - return("S,GAP"); - } else { - return("S"); - } - case LOCK_X: - if (is_gap_lock) { - return("X,GAP"); - } else { - return("X"); - } - case LOCK_IS: - if (is_gap_lock) { - return("IS,GAP"); - } else { - return("IS"); - } - case LOCK_IX: - if (is_gap_lock) { - return("IX,GAP"); - } else { - return("IX"); - } - case LOCK_AUTO_INC: - return("AUTO_INC"); - default: - return("UNKNOWN"); - } -} - -/*******************************************************************//** -Gets the type of a lock in a human readable string. -The string should not be free()'d or modified. -@return lock type */ -UNIV_INTERN -const char* -lock_get_type_str( -/*==============*/ - const lock_t* lock) /*!< in: lock */ -{ - switch (lock_get_type_low(lock)) { - case LOCK_REC: - return("RECORD"); - case LOCK_TABLE: - return("TABLE"); - default: - return("UNKNOWN"); - } -} - -/*******************************************************************//** -Gets the table on which the lock is. -@return table */ -UNIV_INLINE -dict_table_t* -lock_get_table( -/*===========*/ - const lock_t* lock) /*!< in: lock */ -{ - switch (lock_get_type_low(lock)) { - case LOCK_REC: - ut_ad(dict_index_is_clust(lock->index) - || !dict_index_is_online_ddl(lock->index)); - return(lock->index->table); - case LOCK_TABLE: - return(lock->un_member.tab_lock.table); - default: - ut_error; - return(NULL); - } -} - -/*******************************************************************//** -Gets the id of the table on which the lock is. -@return id of the table */ -UNIV_INTERN -table_id_t -lock_get_table_id( -/*==============*/ - const lock_t* lock) /*!< in: lock */ -{ - dict_table_t* table; - - table = lock_get_table(lock); - - return(table->id); -} - -/*******************************************************************//** -Gets the name of the table on which the lock is. -The string should not be free()'d or modified. -@return name of the table */ -UNIV_INTERN -const char* -lock_get_table_name( -/*================*/ - const lock_t* lock) /*!< in: lock */ -{ - dict_table_t* table; - - table = lock_get_table(lock); - - return(table->name); -} - -/*******************************************************************//** -For a record lock, gets the index on which the lock is. -@return index */ -UNIV_INTERN -const dict_index_t* -lock_rec_get_index( -/*===============*/ - const lock_t* lock) /*!< in: lock */ -{ - ut_a(lock_get_type_low(lock) == LOCK_REC); - ut_ad(dict_index_is_clust(lock->index) - || !dict_index_is_online_ddl(lock->index)); - - return(lock->index); -} - -/*******************************************************************//** -For a record lock, gets the name of the index on which the lock is. -The string should not be free()'d or modified. -@return name of the index */ -UNIV_INTERN -const char* -lock_rec_get_index_name( -/*====================*/ - const lock_t* lock) /*!< in: lock */ -{ - ut_a(lock_get_type_low(lock) == LOCK_REC); - ut_ad(dict_index_is_clust(lock->index) - || !dict_index_is_online_ddl(lock->index)); - - return(lock->index->name); -} - -/*******************************************************************//** -For a record lock, gets the tablespace number on which the lock is. -@return tablespace number */ -UNIV_INTERN -ulint -lock_rec_get_space_id( -/*==================*/ - const lock_t* lock) /*!< in: lock */ -{ - ut_a(lock_get_type_low(lock) == LOCK_REC); - - return(lock->un_member.rec_lock.space); -} - -/*******************************************************************//** -For a record lock, gets the page number on which the lock is. -@return page number */ -UNIV_INTERN -ulint -lock_rec_get_page_no( -/*=================*/ - const lock_t* lock) /*!< in: lock */ -{ - ut_a(lock_get_type_low(lock) == LOCK_REC); - - return(lock->un_member.rec_lock.page_no); -} - -/*********************************************************************//** -Cancels a waiting lock request and releases possible other transactions -waiting behind it. */ -UNIV_INTERN -void -lock_cancel_waiting_and_release( -/*============================*/ - lock_t* lock) /*!< in/out: waiting lock request */ -{ - que_thr_t* thr; - - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(lock->trx)); - - lock->trx->lock.cancel = TRUE; - - if (lock_get_type_low(lock) == LOCK_REC) { - - lock_rec_dequeue_from_page(lock); - } else { - ut_ad(lock_get_type_low(lock) & LOCK_TABLE); - - if (lock->trx->autoinc_locks != NULL) { - /* Release the transaction's AUTOINC locks. */ - lock_release_autoinc_locks(lock->trx); - } - - lock_table_dequeue(lock); - } - - /* Reset the wait flag and the back pointer to lock in trx. */ - - lock_reset_lock_and_trx_wait(lock); - - /* The following function releases the trx from lock wait. */ - - thr = que_thr_end_lock_wait(lock->trx); - - if (thr != NULL) { - lock_wait_release_thread_if_suspended(thr); - } - - lock->trx->lock.cancel = FALSE; -} - -/*********************************************************************//** -Unlocks AUTO_INC type locks that were possibly reserved by a trx. This -function should be called at the the end of an SQL statement, by the -connection thread that owns the transaction (trx->mysql_thd). */ -UNIV_INTERN -void -lock_unlock_table_autoinc( -/*======================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(!lock_mutex_own()); - ut_ad(!trx_mutex_own(trx)); - ut_ad(!trx->lock.wait_lock); - /* This can be invoked on NOT_STARTED, ACTIVE, PREPARED, - but not COMMITTED transactions. */ - ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED) - || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); - - /* This function is invoked for a running transaction by the - thread that is serving the transaction. Therefore it is not - necessary to hold trx->mutex here. */ - - if (lock_trx_holds_autoinc_locks(trx)) { - lock_mutex_enter(); - - lock_release_autoinc_locks(trx); - - lock_mutex_exit(); - } -} - -/*********************************************************************//** -Releases a transaction's locks, and releases possible other transactions -waiting because of these locks. Change the state of the transaction to -TRX_STATE_COMMITTED_IN_MEMORY. */ -UNIV_INTERN -void -lock_trx_release_locks( -/*===================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - assert_trx_in_list(trx); - - if (trx_state_eq(trx, TRX_STATE_PREPARED)) { - mutex_enter(&trx_sys->mutex); - ut_a(trx_sys->n_prepared_trx > 0); - trx_sys->n_prepared_trx--; - if (trx->is_recovered) { - ut_a(trx_sys->n_prepared_recovered_trx > 0); - trx_sys->n_prepared_recovered_trx--; - } - mutex_exit(&trx_sys->mutex); - } else { - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); - } - - /* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY - is protected by both the lock_sys->mutex and the trx->mutex. - We also lock trx_sys->mutex, because state transition to - TRX_STATE_COMMITTED_IN_MEMORY must be atomic with removing trx - from the descriptors array. */ - lock_mutex_enter(); - mutex_enter(&trx_sys->mutex); - trx_mutex_enter(trx); - - /* The following assignment makes the transaction committed in memory - and makes its changes to data visible to other transactions. - NOTE that there is a small discrepancy from the strict formal - visibility rules here: a human user of the database can see - modifications made by another transaction T even before the necessary - log segment has been flushed to the disk. If the database happens to - crash before the flush, the user has seen modifications from T which - will never be a committed transaction. However, any transaction T2 - which sees the modifications of the committing transaction T, and - which also itself makes modifications to the database, will get an lsn - larger than the committing transaction T. In the case where the log - flush fails, and T never gets committed, also T2 will never get - committed. */ - - /*--------------------------------------*/ - trx->state = TRX_STATE_COMMITTED_IN_MEMORY; - /* The following also removes trx from trx_serial_list */ - trx_release_descriptor(trx); - /*--------------------------------------*/ - - /* If the background thread trx_rollback_or_clean_recovered() - is still active then there is a chance that the rollback - thread may see this trx as COMMITTED_IN_MEMORY and goes ahead - to clean it up calling trx_cleanup_at_db_startup(). This can - happen in the case we are committing a trx here that is left - in PREPARED state during the crash. Note that commit of the - rollback of a PREPARED trx happens in the recovery thread - while the rollback of other transactions happen in the - background thread. To avoid this race we unconditionally unset - the is_recovered flag. */ - - trx->is_recovered = FALSE; - - trx_mutex_exit(trx); - - mutex_exit(&trx_sys->mutex); - - lock_release(trx); - - lock_mutex_exit(); -} - -/*********************************************************************//** -Check whether the transaction has already been rolled back because it -was selected as a deadlock victim, or if it has to wait then cancel -the wait lock. -@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ -UNIV_INTERN -dberr_t -lock_trx_handle_wait( -/*=================*/ - trx_t* trx) /*!< in/out: trx lock state */ -{ - dberr_t err; - - lock_mutex_enter(); - - trx_mutex_enter(trx); - - if (trx->lock.was_chosen_as_deadlock_victim) { - err = DB_DEADLOCK; - } else if (trx->lock.wait_lock != NULL) { - lock_cancel_waiting_and_release(trx->lock.wait_lock); - err = DB_LOCK_WAIT; - } else { - /* The lock was probably granted before we got here. */ - err = DB_SUCCESS; - } - - lock_mutex_exit(); - trx_mutex_exit(trx); - - return(err); -} - -/*********************************************************************//** -Get the number of locks on a table. -@return number of locks */ -UNIV_INTERN -ulint -lock_table_get_n_locks( -/*===================*/ - const dict_table_t* table) /*!< in: table */ -{ - ulint n_table_locks; - - lock_mutex_enter(); - - n_table_locks = UT_LIST_GET_LEN(table->locks); - - lock_mutex_exit(); - - return(n_table_locks); -} - -#ifdef UNIV_DEBUG -/*******************************************************************//** -Do an exhaustive check for any locks (table or rec) against the table. -@return lock if found */ -static -const lock_t* -lock_table_locks_lookup( -/*====================*/ - const dict_table_t* table, /*!< in: check if there are - any locks held on records in - this table or on the table - itself */ - const trx_list_t* trx_list) /*!< in: trx list to check */ -{ - trx_t* trx; - - ut_a(table != NULL); - ut_ad(lock_mutex_own()); - ut_ad(mutex_own(&trx_sys->mutex)); - - ut_ad(trx_list == &trx_sys->rw_trx_list - || trx_list == &trx_sys->ro_trx_list); - - for (trx = UT_LIST_GET_FIRST(*trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - - const lock_t* lock; - - assert_trx_in_list(trx); - ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list)); - - for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); - lock != NULL; - lock = UT_LIST_GET_NEXT(trx_locks, lock)) { - - ut_a(lock->trx == trx); - - if (lock_get_type_low(lock) == LOCK_REC) { - ut_ad(!dict_index_is_online_ddl(lock->index) - || dict_index_is_clust(lock->index)); - if (lock->index->table == table) { - return(lock); - } - } else if (lock->un_member.tab_lock.table == table) { - return(lock); - } - } - } - - return(NULL); -} -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -Check if there are any locks (table or rec) against table. -@return TRUE if table has either table or record locks. */ -UNIV_INTERN -ibool -lock_table_has_locks( -/*=================*/ - const dict_table_t* table) /*!< in: check if there are any locks - held on records in this table or on the - table itself */ -{ - ibool has_locks; - - lock_mutex_enter(); - - has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0; - -#ifdef UNIV_DEBUG - if (!has_locks) { - mutex_enter(&trx_sys->mutex); - - ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list)); - ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list)); - - mutex_exit(&trx_sys->mutex); - } -#endif /* UNIV_DEBUG */ - - lock_mutex_exit(); - - return(has_locks); -} - -#ifdef UNIV_DEBUG -/*******************************************************************//** -Check if the transaction holds any locks on the sys tables -or its records. -@return the strongest lock found on any sys table or 0 for none */ -UNIV_INTERN -const lock_t* -lock_trx_has_sys_table_locks( -/*=========================*/ - const trx_t* trx) /*!< in: transaction to check */ -{ - lint i; - const lock_t* strongest_lock = 0; - lock_mode strongest = LOCK_NONE; - - lock_mutex_enter(); - - /* Find a valid mode. Note: ib_vector_size() can be 0. */ - for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) { - const lock_t* lock; - - lock = *static_cast<const lock_t**>( - ib_vector_get(trx->lock.table_locks, i)); - - if (lock != NULL - && dict_is_sys_table(lock->un_member.tab_lock.table->id)) { - - strongest = lock_get_mode(lock); - ut_ad(strongest != LOCK_NONE); - strongest_lock = lock; - break; - } - } - - if (strongest == LOCK_NONE) { - lock_mutex_exit(); - return(NULL); - } - - for (/* No op */; i >= 0; --i) { - const lock_t* lock; - - lock = *static_cast<const lock_t**>( - ib_vector_get(trx->lock.table_locks, i)); - - if (lock == NULL) { - continue; - } - - ut_ad(trx == lock->trx); - ut_ad(lock_get_type_low(lock) & LOCK_TABLE); - ut_ad(lock->un_member.tab_lock.table != NULL); - - lock_mode mode = lock_get_mode(lock); - - if (dict_is_sys_table(lock->un_member.tab_lock.table->id) - && lock_mode_stronger_or_eq(mode, strongest)) { - - strongest = mode; - strongest_lock = lock; - } - } - - lock_mutex_exit(); - - return(strongest_lock); -} - -/*******************************************************************//** -Check if the transaction holds an exclusive lock on a record. -@return whether the locks are held */ -UNIV_INTERN -bool -lock_trx_has_rec_x_lock( -/*====================*/ - const trx_t* trx, /*!< in: transaction to check */ - const dict_table_t* table, /*!< in: table to check */ - const buf_block_t* block, /*!< in: buffer block of the record */ - ulint heap_no)/*!< in: record heap number */ -{ - enum lock_mode intention_lock; - enum lock_mode rec_lock; - ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM); - - if (UNIV_UNLIKELY(trx->fake_changes)) { - - intention_lock = LOCK_IS; - rec_lock = LOCK_S; - } else { - - intention_lock = LOCK_IX; - rec_lock = LOCK_X; - } - lock_mutex_enter(); - ut_a(lock_table_has(trx, table, intention_lock)); - if (UNIV_LIKELY(srv_fake_changes_locks)) { - - ut_a(lock_rec_has_expl(rec_lock | LOCK_REC_NOT_GAP, - block, heap_no, trx->id)); - } - lock_mutex_exit(); - return(true); -} -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -Get lock mode and table/index name -@return string containing lock info */ -std::string -lock_get_info( - const lock_t* lock) -{ - std::string info; - std::string mode("mode "); - std::string index("index "); - std::string table("table "); - std::string n_uniq(" n_uniq"); - std::string n_user(" n_user"); - std::string lock_mode((lock_get_mode_str(lock))); - std::string iname(lock->index->name); - std::string tname(lock->index->table_name); - -#define SSTR( x ) reinterpret_cast< std::ostringstream & >( \ - ( std::ostringstream() << std::dec << x ) ).str() - - info = mode + lock_mode - + index + iname - + table + tname - + n_uniq + SSTR(lock->index->n_uniq) - + n_user + SSTR(lock->index->n_user_defined_cols); - - return info; -} diff --git a/storage/xtradb/lock/lock0wait.cc b/storage/xtradb/lock/lock0wait.cc deleted file mode 100644 index a447027e336..00000000000 --- a/storage/xtradb/lock/lock0wait.cc +++ /dev/null @@ -1,576 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file lock/lock0wait.cc -The transaction lock system - -Created 25/5/2010 Sunny Bains -*******************************************************/ - -#define LOCK_MODULE_IMPLEMENTATION - -#include "srv0mon.h" -#include "que0que.h" -#include "lock0lock.h" -#include "row0mysql.h" -#include "srv0start.h" -#include "ha_prototypes.h" -#include "lock0priv.h" - -#include <mysql/service_wsrep.h> - -/*********************************************************************//** -Print the contents of the lock_sys_t::waiting_threads array. */ -static -void -lock_wait_table_print(void) -/*=======================*/ -{ - ulint i; - const srv_slot_t* slot; - - ut_ad(lock_wait_mutex_own()); - - slot = lock_sys->waiting_threads; - - for (i = 0; i < OS_THREAD_MAX_N; i++, ++slot) { - - fprintf(stderr, - "Slot %lu: thread type %lu," - " in use %lu, susp %lu, timeout %lu, time %lu\n", - (ulong) i, - (ulong) slot->type, - (ulong) slot->in_use, - (ulong) slot->suspended, - slot->wait_timeout, - (ulong) difftime(ut_time(), slot->suspend_time)); - } -} - -/*********************************************************************//** -Release a slot in the lock_sys_t::waiting_threads. Adjust the array last pointer -if there are empty slots towards the end of the table. */ -static -void -lock_wait_table_release_slot( -/*=========================*/ - srv_slot_t* slot) /*!< in: slot to release */ -{ -#ifdef UNIV_DEBUG - srv_slot_t* upper = lock_sys->waiting_threads + OS_THREAD_MAX_N; -#endif /* UNIV_DEBUG */ - - lock_wait_mutex_enter(); - - ut_ad(slot->in_use); - ut_ad(slot->thr != NULL); - ut_ad(slot->thr->slot != NULL); - ut_ad(slot->thr->slot == slot); - - /* Must be within the array boundaries. */ - ut_ad(slot >= lock_sys->waiting_threads); - ut_ad(slot < upper); - - /* Note: When we reserve the slot we use the trx_t::mutex to update - the slot values to change the state to reserved. Here we are using the - lock mutex to change the state of the slot to free. This is by design, - because when we query the slot state we always hold both the lock and - trx_t::mutex. To reduce contention on the lock mutex when reserving the - slot we avoid acquiring the lock mutex. */ - - lock_mutex_enter(); - - slot->thr->slot = NULL; - slot->thr = NULL; - slot->in_use = FALSE; - - lock_mutex_exit(); - - /* Scan backwards and adjust the last free slot pointer. */ - for (slot = lock_sys->last_slot; - slot > lock_sys->waiting_threads && !slot->in_use; - --slot) { - /* No op */ - } - - /* Either the array is empty or the last scanned slot is in use. */ - ut_ad(slot->in_use || slot == lock_sys->waiting_threads); - - lock_sys->last_slot = slot + 1; - - /* The last slot is either outside of the array boundary or it's - on an empty slot. */ - ut_ad(lock_sys->last_slot == upper || !lock_sys->last_slot->in_use); - - ut_ad(lock_sys->last_slot >= lock_sys->waiting_threads); - ut_ad(lock_sys->last_slot <= upper); - - lock_wait_mutex_exit(); -} - -/*********************************************************************//** -Reserves a slot in the thread table for the current user OS thread. -@return reserved slot */ -static -srv_slot_t* -lock_wait_table_reserve_slot( -/*=========================*/ - que_thr_t* thr, /*!< in: query thread associated - with the user OS thread */ - ulong wait_timeout) /*!< in: lock wait timeout value */ -{ - ulint i; - srv_slot_t* slot; - - ut_ad(lock_wait_mutex_own()); - ut_ad(trx_mutex_own(thr_get_trx(thr))); - - slot = lock_sys->waiting_threads; - - for (i = OS_THREAD_MAX_N; i--; ++slot) { - if (!slot->in_use) { - slot->in_use = TRUE; - slot->thr = thr; - slot->thr->slot = slot; - - if (slot->event == NULL) { - slot->event = os_event_create(); - ut_a(slot->event); - } - - os_event_reset(slot->event); - slot->suspended = TRUE; - slot->suspend_time = ut_time(); - slot->wait_timeout = wait_timeout; - - if (slot == lock_sys->last_slot) { - ++lock_sys->last_slot; - } - - ut_ad(lock_sys->last_slot - <= lock_sys->waiting_threads + OS_THREAD_MAX_N); - - return(slot); - } - } - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: There appear to be %lu user" - " threads currently waiting\n" - "InnoDB: inside InnoDB, which is the" - " upper limit. Cannot continue operation.\n" - "InnoDB: As a last thing, we print" - " a list of waiting threads.\n", (ulong) OS_THREAD_MAX_N); - - lock_wait_table_print(); - - ut_error; - return(NULL); -} - -#ifdef WITH_WSREP -/*********************************************************************//** -check if lock timeout was for priority thread, -as a side effect trigger lock monitor -@return false for regular lock timeout */ -static ibool -wsrep_is_BF_lock_timeout( -/*====================*/ - trx_t* trx) /* in: trx to check for lock priority */ -{ - if (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_BF(trx->mysql_thd, FALSE)) { - fprintf(stderr, "WSREP: BF lock wait long\n"); - srv_print_innodb_monitor = TRUE; - srv_print_innodb_lock_monitor = TRUE; - os_event_set(srv_monitor_event); - return TRUE; - } - return FALSE; - } -#endif /* WITH_WSREP */ - -/***************************************************************//** -Puts a user OS thread to wait for a lock to be released. If an error -occurs during the wait trx->error_state associated with thr is -!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK -are possible errors. DB_DEADLOCK is returned if selective deadlock -resolution chose this transaction as a victim. */ -UNIV_INTERN -void -lock_wait_suspend_thread( -/*=====================*/ - que_thr_t* thr) /*!< in: query thread associated with the - user OS thread */ -{ - srv_slot_t* slot; - double wait_time; - trx_t* trx; - ulint had_dict_lock; - ibool was_declared_inside_innodb; - ib_int64_t start_time = 0; - ib_int64_t finish_time; - ulint sec; - ulint ms; - ulong lock_wait_timeout; - - trx = thr_get_trx(thr); - - if (trx->mysql_thd != 0) { - DEBUG_SYNC_C("lock_wait_suspend_thread_enter"); - } - - /* InnoDB system transactions (such as the purge, and - incomplete transactions that are being rolled back after crash - recovery) will use the global value of - innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */ - lock_wait_timeout = trx_lock_wait_timeout_get(trx); - - lock_wait_mutex_enter(); - - trx_mutex_enter(trx); - - trx->error_state = DB_SUCCESS; - - if (thr->state == QUE_THR_RUNNING) { - - ut_ad(thr->is_active); - - /* The lock has already been released or this transaction - was chosen as a deadlock victim: no need to suspend */ - - if (trx->lock.was_chosen_as_deadlock_victim) { - - trx->error_state = DB_DEADLOCK; - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } - - lock_wait_mutex_exit(); - trx_mutex_exit(trx); - return; - } - - ut_ad(!thr->is_active); - - slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout); - - lock_wait_mutex_exit(); - trx_mutex_exit(trx); - - if (thr->lock_state == QUE_THR_LOCK_ROW) { - srv_stats.n_lock_wait_count.inc(); - srv_stats.n_lock_wait_current_count.inc(); - - if (ut_usectime(&sec, &ms) == -1) { - start_time = -1; - } else { - start_time = (ib_int64_t) sec * 1000000 + ms; - } - } - - ulint lock_type = ULINT_UNDEFINED; - - /* The wait_lock can be cleared by another thread when the - lock is released. But the wait can only be initiated by the - current thread which owns the transaction. Only acquire the - mutex if the wait_lock is still active. */ - if (const lock_t* wait_lock = trx->lock.wait_lock) { - lock_mutex_enter(); - wait_lock = trx->lock.wait_lock; - if (wait_lock) { - lock_type = lock_get_type_low(wait_lock); - } - lock_mutex_exit(); - } - - had_dict_lock = trx->dict_operation_lock_mode; - - switch (had_dict_lock) { - case 0: - break; - case RW_S_LATCH: - /* Release foreign key check latch */ - row_mysql_unfreeze_data_dictionary(trx); - - DEBUG_SYNC_C("lock_wait_release_s_latch_before_sleep"); - break; - default: - /* There should never be a lock wait when the - dictionary latch is reserved in X mode. Dictionary - transactions should only acquire locks on dictionary - tables, not other tables. All access to dictionary - tables should be covered by dictionary - transactions. */ - ut_error; - } - - ut_a(trx->dict_operation_lock_mode == 0); - - /* Suspend this thread and wait for the event. */ - - was_declared_inside_innodb = trx->declared_to_be_inside_innodb; - - if (was_declared_inside_innodb) { - /* We must declare this OS thread to exit InnoDB, since a - possible other thread holding a lock which this thread waits - for must be allowed to enter, sooner or later */ - - srv_conc_force_exit_innodb(trx); - } - - /* Unknown is also treated like a record lock */ - if (lock_type == ULINT_UNDEFINED || lock_type == LOCK_REC) { - thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK); - } else { - ut_ad(lock_type == LOCK_TABLE); - thd_wait_begin(trx->mysql_thd, THD_WAIT_TABLE_LOCK); - } - - os_event_wait(slot->event); - - thd_wait_end(trx->mysql_thd); - - /* After resuming, reacquire the data dictionary latch if - necessary. */ - - if (was_declared_inside_innodb) { - - /* Return back inside InnoDB */ - - srv_conc_force_enter_innodb(trx); - } - - if (had_dict_lock) { - - row_mysql_freeze_data_dictionary(trx); - } - - wait_time = ut_difftime(ut_time(), slot->suspend_time); - - /* Release the slot for others to use */ - - lock_wait_table_release_slot(slot); - - if (thr->lock_state == QUE_THR_LOCK_ROW) { - ulint diff_time; - - if (ut_usectime(&sec, &ms) == -1) { - finish_time = -1; - } else { - finish_time = (ib_int64_t) sec * 1000000 + ms; - } - - diff_time = (finish_time > start_time) ? - (ulint) (finish_time - start_time) : 0; - - srv_stats.n_lock_wait_current_count.dec(); - srv_stats.n_lock_wait_time.add(diff_time); - - /* Only update the variable if we successfully - retrieved the start and finish times. See Bug#36819. */ - if (diff_time > lock_sys->n_lock_max_wait_time - && start_time != -1 - && finish_time != -1) { - - lock_sys->n_lock_max_wait_time = diff_time; - } - - /* Record the lock wait time for this thread */ - thd_set_lock_wait_time(trx->mysql_thd, diff_time); - - } - - if (lock_wait_timeout < 100000000 - && wait_time > (double) lock_wait_timeout) { -#ifdef WITH_WSREP - if (!wsrep_on(trx->mysql_thd) || - (!wsrep_is_BF_lock_timeout(trx) && - trx->error_state != DB_DEADLOCK)) { -#endif /* WITH_WSREP */ - - trx->error_state = DB_LOCK_WAIT_TIMEOUT; - -#ifdef WITH_WSREP - } -#endif /* WITH_WSREP */ - MONITOR_INC(MONITOR_TIMEOUT); - } - - if (trx_is_interrupted(trx)) { - - trx->error_state = DB_INTERRUPTED; - } -} - -/********************************************************************//** -Releases a user OS thread waiting for a lock to be released, if the -thread is already suspended. */ -UNIV_INTERN -void -lock_wait_release_thread_if_suspended( -/*==================================*/ - que_thr_t* thr) /*!< in: query thread associated with the - user OS thread */ -{ - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(thr_get_trx(thr))); - - /* We own both the lock mutex and the trx_t::mutex but not the - lock wait mutex. This is OK because other threads will see the state - of this slot as being in use and no other thread can change the state - of the slot to free unless that thread also owns the lock mutex. */ - - if (thr->slot != NULL && thr->slot->in_use && thr->slot->thr == thr) { - trx_t* trx = thr_get_trx(thr); - - if (trx->lock.was_chosen_as_deadlock_victim) { - - trx->error_state = DB_DEADLOCK; - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } - - os_event_set(thr->slot->event); - } -} - -/*********************************************************************//** -Check if the thread lock wait has timed out. Release its locks if the -wait has actually timed out. */ -static -void -lock_wait_check_and_cancel( -/*=======================*/ - const srv_slot_t* slot) /*!< in: slot reserved by a user - thread when the wait started */ -{ - trx_t* trx; - double wait_time; - ib_time_t suspend_time = slot->suspend_time; - - ut_ad(lock_wait_mutex_own()); - - ut_ad(slot->in_use); - - ut_ad(slot->suspended); - - wait_time = ut_difftime(ut_time(), suspend_time); - - trx = thr_get_trx(slot->thr); - - if (trx_is_interrupted(trx) - || (slot->wait_timeout < 100000000 - && (wait_time > (double) slot->wait_timeout - || wait_time < 0))) { - - /* Timeout exceeded or a wrap-around in system - time counter: cancel the lock request queued - by the transaction and release possible - other transactions waiting behind; it is - possible that the lock has already been - granted: in that case do nothing */ - - lock_mutex_enter(); - - trx_mutex_enter(trx); - - if (trx->lock.wait_lock) { - - ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT); -#ifdef WITH_WSREP - if (!wsrep_is_BF_lock_timeout(trx)) { -#endif /* WITH_WSREP */ - lock_cancel_waiting_and_release(trx->lock.wait_lock); -#ifdef WITH_WSREP - } -#endif /* WITH_WSREP */ - } - - lock_mutex_exit(); - - trx_mutex_exit(trx); - } - -} - -/*********************************************************************//** -A thread which wakes up threads whose lock wait may have lasted too long. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(lock_wait_timeout_thread)(void*) -{ - ib_int64_t sig_count = 0; - os_event_t event = lock_sys->timeout_event; - - ut_ad(!srv_read_only_mode); - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(srv_lock_timeout_thread_key); -#endif /* UNIV_PFS_THREAD */ - - do { - srv_slot_t* slot; - - /* When someone is waiting for a lock, we wake up every second - and check if a timeout has passed for a lock wait */ - - os_event_wait_time_low(event, 1000000, sig_count); - sig_count = os_event_reset(event); - - if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { - break; - } - - lock_wait_mutex_enter(); - - /* Check all slots for user threads that are waiting - on locks, and if they have exceeded the time limit. */ - - for (slot = lock_sys->waiting_threads; - slot < lock_sys->last_slot; - ++slot) { - - /* We are doing a read without the lock mutex - and/or the trx mutex. This is OK because a slot - can't be freed or reserved without the lock wait - mutex. */ - - if (slot->in_use) { - lock_wait_check_and_cancel(slot); - } - } - - sig_count = os_event_reset(event); - - lock_wait_mutex_exit(); - - } while (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP); - - lock_sys->timeout_thread_active = false; - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} diff --git a/storage/xtradb/log/log0crypt.cc b/storage/xtradb/log/log0crypt.cc deleted file mode 100644 index f6c1416d81a..00000000000 --- a/storage/xtradb/log/log0crypt.cc +++ /dev/null @@ -1,638 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013, 2015, Google Inc. All Rights Reserved. -Copyright (C) 2014, 2016, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ -/**************************************************//** -@file log0crypt.cc -Innodb log encrypt/decrypt - -Created 11/25/2013 Minli Zhu Google -Modified Jan Lindström jan.lindstrom@mariadb.com -*******************************************************/ -#include "m_string.h" -#include "log0crypt.h" -#include <mysql/service_my_crypt.h> - -#include "log0log.h" -#include "srv0start.h" // for srv_start_lsn -#include "log0recv.h" // for recv_sys - -#include "ha_prototypes.h" // IB_LOG_ - -/* Used for debugging */ -// #define DEBUG_CRYPT 1 -#define UNENCRYPTED_KEY_VER 0 - -/* If true, enable redo log encryption. */ -extern my_bool srv_encrypt_log; - - -#include <algorithm> // std::sort -#include <deque> - -/* If true, enable redo log encryption. */ -UNIV_INTERN my_bool srv_encrypt_log = FALSE; -/* - Sub system type for InnoDB redo log crypto. - Set and used to validate crypto msg. -*/ -static const byte redo_log_purpose_byte = 0x02; - -#define LOG_DEFAULT_ENCRYPTION_KEY 1 - -/* - Store this many keys into each checkpoint info -*/ -static const size_t kMaxSavedKeys = LOG_CRYPT_MAX_ENTRIES; - -struct crypt_info_t { - ib_uint64_t checkpoint_no; /*!< checkpoint no */ - uint key_version; /*!< mysqld key version */ - byte crypt_msg[MY_AES_BLOCK_SIZE]; - byte crypt_key[MY_AES_BLOCK_SIZE]; - byte crypt_nonce[MY_AES_BLOCK_SIZE]; -}; - -static std::deque<crypt_info_t> crypt_info; - -/*********************************************************************//** -Get a log block's start lsn. -@return a log block's start lsn */ -static inline -lsn_t -log_block_get_start_lsn( -/*====================*/ - lsn_t lsn, /*!< in: checkpoint lsn */ - ulint log_block_no) /*!< in: log block number */ -{ - lsn_t start_lsn = - (lsn & (lsn_t)0xffffffff00000000ULL) | - (((log_block_no - 1) & (lsn_t)0x3fffffff) << 9); - return start_lsn; -} - -/*********************************************************************//** -Get crypt info from checkpoint. -@return a crypt info or NULL if not present. */ -static -const crypt_info_t* -get_crypt_info( -/*===========*/ - ib_uint64_t checkpoint_no) -{ - /* so that no one is modifying array while we search */ - ut_ad(mutex_own(&(log_sys->mutex))); - size_t items = crypt_info.size(); - - /* a log block only stores 4-bytes of checkpoint no */ - checkpoint_no &= 0xFFFFFFFF; - for (size_t i = 0; i < items; i++) { - struct crypt_info_t* it = &crypt_info[i]; - - if (it->checkpoint_no == checkpoint_no) { - return it; - } - } - - /* If checkpoint contains more than one key and we did not - find the correct one use the first one. */ - if (items) { - return (&crypt_info[0]); - } - - return NULL; -} - -/*********************************************************************//** -Get crypt info from log block -@return a crypt info or NULL if not present. */ -static -const crypt_info_t* -get_crypt_info( -/*===========*/ - const byte* log_block) -{ - ib_uint64_t checkpoint_no = log_block_get_checkpoint_no(log_block); - return get_crypt_info(checkpoint_no); -} - -/*********************************************************************//** -Print checkpoint no from log block and all encryption keys from -checkpoints if they are present. Used for problem analysis. */ -void -log_crypt_print_checkpoint_keys( -/*============================*/ - const byte* log_block) -{ - ib_uint64_t checkpoint_no = log_block_get_checkpoint_no(log_block); - - if (crypt_info.size()) { - fprintf(stderr, - "InnoDB: redo log checkpoint: " UINT64PF " [ chk key ]: ", - checkpoint_no); - for (size_t i = 0; i < crypt_info.size(); i++) { - struct crypt_info_t* it = &crypt_info[i]; - fprintf(stderr, "[ " UINT64PF " %u ] ", - it->checkpoint_no, - it->key_version); - } - fprintf(stderr, "\n"); - } -} - -/*********************************************************************//** -Call AES CTR to encrypt/decrypt log blocks. */ -static -Crypt_result -log_blocks_crypt( -/*=============*/ - const byte* block, /*!< in: blocks before encrypt/decrypt*/ - ulint size, /*!< in: size of block */ - byte* dst_block, /*!< out: blocks after encrypt/decrypt */ - int what, /*!< in: encrypt or decrypt*/ - const crypt_info_t* crypt_info) /*!< in: crypt info or NULL */ -{ - byte *log_block = (byte*)block; - Crypt_result rc = MY_AES_OK; - uint dst_len; - byte aes_ctr_counter[MY_AES_BLOCK_SIZE]; - byte is_encrypt= what == ENCRYPTION_FLAG_ENCRYPT; - lsn_t lsn = is_encrypt ? log_sys->lsn : srv_start_lsn; - - const uint src_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE; - for (ulint i = 0; i < size ; i += OS_FILE_LOG_BLOCK_SIZE) { - ulint log_block_no = log_block_get_hdr_no(log_block); - lsn_t log_block_start_lsn = log_block_get_start_lsn( - lsn, log_block_no); - - const crypt_info_t* info = crypt_info == NULL ? get_crypt_info(log_block) : - crypt_info; -#ifdef DEBUG_CRYPT - fprintf(stderr, - "%s %lu chkpt: %lu key: %u lsn: %lu\n", - is_encrypt ? "crypt" : "decrypt", - log_block_no, - log_block_get_checkpoint_no(log_block), - info ? info->key_version : 0, - log_block_start_lsn); -#endif - /* If no key is found from checkpoint assume the log_block - to be unencrypted. If checkpoint contains the encryption key - compare log_block current checksum, if checksum matches, - block can't be encrypted. */ - if (info == NULL || - info->key_version == UNENCRYPTED_KEY_VER || - (log_block_checksum_is_ok_or_old_format(log_block, false) && - what == ENCRYPTION_FLAG_DECRYPT)) { - memcpy(dst_block, log_block, OS_FILE_LOG_BLOCK_SIZE); - goto next; - } - - ut_ad(what == ENCRYPTION_FLAG_DECRYPT ? !log_block_checksum_is_ok_or_old_format(log_block, false) : - log_block_checksum_is_ok_or_old_format(log_block, false)); - - // Assume log block header is not encrypted - memcpy(dst_block, log_block, LOG_BLOCK_HDR_SIZE); - - // aes_ctr_counter = nonce(3-byte) + start lsn to a log block - // (8-byte) + lbn (4-byte) + abn - // (1-byte, only 5 bits are used). "+" means concatenate. - bzero(aes_ctr_counter, MY_AES_BLOCK_SIZE); - memcpy(aes_ctr_counter, info->crypt_nonce, 3); - mach_write_to_8(aes_ctr_counter + 3, log_block_start_lsn); - mach_write_to_4(aes_ctr_counter + 11, log_block_no); - bzero(aes_ctr_counter + 15, 1); - - int rc; - rc = encryption_crypt(log_block + LOG_BLOCK_HDR_SIZE, src_len, - dst_block + LOG_BLOCK_HDR_SIZE, &dst_len, - (unsigned char*)(info->crypt_key), 16, - aes_ctr_counter, MY_AES_BLOCK_SIZE, - what | ENCRYPTION_FLAG_NOPAD, - LOG_DEFAULT_ENCRYPTION_KEY, - info->key_version); - - ut_a(rc == MY_AES_OK); - ut_a(dst_len == src_len); -next: - log_block += OS_FILE_LOG_BLOCK_SIZE; - dst_block += OS_FILE_LOG_BLOCK_SIZE; - } - - return rc; -} - -/*********************************************************************//** -Generate crypt key from crypt msg. -@return true if successfull, false if not. */ -static -bool -init_crypt_key( -/*===========*/ - crypt_info_t* info) /*< in/out: crypt info */ -{ - if (info->key_version == UNENCRYPTED_KEY_VER) { - memset(info->crypt_key, 0, sizeof(info->crypt_key)); - memset(info->crypt_msg, 0, sizeof(info->crypt_msg)); - memset(info->crypt_nonce, 0, sizeof(info->crypt_nonce)); - return true; - } - - byte mysqld_key[MY_AES_MAX_KEY_LENGTH] = {0}; - uint keylen= sizeof(mysqld_key); - uint rc; - - rc = encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY, info->key_version, mysqld_key, &keylen); - - if (rc) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Redo log crypto: getting mysqld crypto key " - "from key version failed err = %u. Reason could be that requested" - " key_version %u is not found or required encryption " - " key management is not found.", rc, info->key_version); - return false; - } - - uint dst_len; - int err= my_aes_crypt(MY_AES_ECB, ENCRYPTION_FLAG_NOPAD|ENCRYPTION_FLAG_ENCRYPT, - info->crypt_msg, sizeof(info->crypt_msg), //src, srclen - info->crypt_key, &dst_len, //dst, &dstlen - (unsigned char*)&mysqld_key, sizeof(mysqld_key), - NULL, 0); - - if (err != MY_AES_OK || dst_len != MY_AES_BLOCK_SIZE) { - fprintf(stderr, - "\nInnodb redo log crypto: getting redo log crypto key " - "failed err = %d len = %u.\n", err, dst_len); - return false; - } - - return true; -} - -/*********************************************************************//** -Compare function for checkpoint numbers -@return true if first checkpoint is larger than second one */ -static -bool -mysort(const crypt_info_t& i, - const crypt_info_t& j) -{ - return i.checkpoint_no > j.checkpoint_no; -} - -/*********************************************************************//** -Add crypt info to set if it is not already present -@return true if successfull, false if not- */ -static -bool -add_crypt_info( -/*===========*/ - crypt_info_t* info, /*!< in: crypt info */ - bool checkpoint_read)/*!< in: do we read checkpoint */ -{ - const crypt_info_t* found=NULL; - /* so that no one is searching array while we modify it */ - ut_ad(mutex_own(&(log_sys->mutex))); - - found = get_crypt_info(info->checkpoint_no); - - /* If one crypt info is found then we add a new one only if we - are reading checkpoint from the log. New checkpoints will always - use the first created crypt info. */ - if (found != NULL && - ( found->checkpoint_no == info->checkpoint_no || !checkpoint_read)) { - // already present... - return true; - } - - if (!init_crypt_key(info)) { - return false; - } - - crypt_info.push_back(*info); - - /* a log block only stores 4-bytes of checkpoint no */ - crypt_info.back().checkpoint_no &= 0xFFFFFFFF; - - // keep keys sorted, assuming that last added key will be used most - std::sort(crypt_info.begin(), crypt_info.end(), mysort); - - return true; -} - -/*********************************************************************//** -Encrypt log blocks. */ -UNIV_INTERN -Crypt_result -log_blocks_encrypt( -/*===============*/ - const byte* block, /*!< in: blocks before encryption */ - const ulint size, /*!< in: size of blocks, must be multiple of a log block */ - byte* dst_block) /*!< out: blocks after encryption */ -{ - return log_blocks_crypt(block, size, dst_block, ENCRYPTION_FLAG_ENCRYPT, NULL); -} - -/*********************************************************************//** -Set next checkpoint's key version to latest one, and generate current -key. Key version 0 means no encryption. */ -UNIV_INTERN -void -log_crypt_set_ver_and_key( -/*======================*/ - ib_uint64_t next_checkpoint_no) -{ - crypt_info_t info; - info.checkpoint_no = next_checkpoint_no; - - if (!srv_encrypt_log) { - info.key_version = UNENCRYPTED_KEY_VER; - } else { - info.key_version = encryption_key_get_latest_version(LOG_DEFAULT_ENCRYPTION_KEY); - } - - if (info.key_version == UNENCRYPTED_KEY_VER) { - memset(info.crypt_msg, 0, sizeof(info.crypt_msg)); - memset(info.crypt_nonce, 0, sizeof(info.crypt_nonce)); - } else { - if (my_random_bytes(info.crypt_msg, MY_AES_BLOCK_SIZE) != MY_AES_OK) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Redo log crypto: generate " - "%u-byte random number as crypto msg failed.", - MY_AES_BLOCK_SIZE); - ut_error; - } - - if (my_random_bytes(info.crypt_nonce, MY_AES_BLOCK_SIZE) != MY_AES_OK) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Redo log crypto: generate " - "%u-byte random number as AES_CTR nonce failed.", - MY_AES_BLOCK_SIZE); - ut_error; - } - - } - - add_crypt_info(&info, false); -} - -/******************************************************** -Encrypt one or more log block before it is flushed to disk */ -UNIV_INTERN -void -log_encrypt_before_write( -/*=====================*/ - ib_uint64_t next_checkpoint_no, /*!< in: log group to be flushed */ - byte* block, /*!< in/out: pointer to a log block */ - const ulint size) /*!< in: size of log blocks */ -{ - ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0); - - const crypt_info_t* info = get_crypt_info(next_checkpoint_no); - if (info == NULL) { - return; - } - - /* If the key is not encrypted or user has requested not to - encrypt, do not change log block. */ - if (info->key_version == UNENCRYPTED_KEY_VER || !srv_encrypt_log) { - return; - } - - byte* dst_frame = (byte*)malloc(size); - - //encrypt log blocks content - Crypt_result result = log_blocks_crypt(block, size, dst_frame, ENCRYPTION_FLAG_ENCRYPT, NULL); - - if (result == MY_AES_OK) { - ut_ad(block[0] == dst_frame[0]); - memcpy(block, dst_frame, size); - } - free(dst_frame); - - if (unlikely(result != MY_AES_OK)) { - ut_error; - } -} - -/******************************************************** -Decrypt a specified log segment after they are read from a log file to a buffer. -*/ -void -log_decrypt_after_read( -/*===================*/ - byte* frame, /*!< in/out: log segment */ - const ulint size) /*!< in: log segment size */ -{ - ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0); - byte* dst_frame = (byte*)malloc(size); - - // decrypt log blocks content - Crypt_result result = log_blocks_crypt(frame, size, dst_frame, ENCRYPTION_FLAG_DECRYPT, NULL); - - if (result == MY_AES_OK) { - memcpy(frame, dst_frame, size); - } - free(dst_frame); - - if (unlikely(result != MY_AES_OK)) { - ut_error; - } -} - -/*********************************************************************//** -Writes the crypto (version, msg and iv) info, which has been used for -log blocks with lsn <= this checkpoint's lsn, to a log header's -checkpoint buf. */ -UNIV_INTERN -void -log_crypt_write_checkpoint_buf( -/*===========================*/ - byte* buf) /*!< in/out: checkpoint buffer */ -{ - byte *save = buf; - - // Only write kMaxSavedKeys (sort keys to remove oldest) - std::sort(crypt_info.begin(), crypt_info.end(), mysort); - while (crypt_info.size() > kMaxSavedKeys) { - crypt_info.pop_back(); - } - - bool encrypted = false; - for (size_t i = 0; i < crypt_info.size(); i++) { - const crypt_info_t & it = crypt_info[i]; - if (it.key_version != UNENCRYPTED_KEY_VER) { - encrypted = true; - break; - } - } - - if (encrypted == false) { - // if no encryption is inuse then zero out - // crypt data for upward/downward compability - memset(buf + LOG_CRYPT_VER, 0, LOG_CRYPT_SIZE); - return; - } - - ib_uint64_t checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO); - buf += LOG_CRYPT_VER; - - mach_write_to_1(buf + 0, redo_log_purpose_byte); - mach_write_to_1(buf + 1, crypt_info.size()); - buf += 2; - for (size_t i = 0; i < crypt_info.size(); i++) { - struct crypt_info_t* it = &crypt_info[i]; - mach_write_to_4(buf + 0, it->checkpoint_no); - mach_write_to_4(buf + 4, it->key_version); - memcpy(buf + 8, it->crypt_msg, MY_AES_BLOCK_SIZE); - memcpy(buf + 24, it->crypt_nonce, MY_AES_BLOCK_SIZE); - buf += LOG_CRYPT_ENTRY_SIZE; - } - -#ifdef DEBUG_CRYPT - fprintf(stderr, "write chk: %lu [ chk key ]: ", checkpoint_no); - for (size_t i = 0; i < crypt_info.size(); i++) { - struct crypt_info_t* it = &crypt_info[i]; - fprintf(stderr, "[ %lu %u ] ", - it->checkpoint_no, - it->key_version); - } - fprintf(stderr, "\n"); -#else - (void)checkpoint_no; // unused variable -#endif - ut_a((ulint)(buf - save) <= OS_FILE_LOG_BLOCK_SIZE); -} - -/*********************************************************************//** -Read the crypto (version, msg and iv) info, which has been used for -log blocks with lsn <= this checkpoint's lsn, from a log header's -checkpoint buf. */ -UNIV_INTERN -bool -log_crypt_read_checkpoint_buf( -/*===========================*/ - const byte* buf) { /*!< in: checkpoint buffer */ - - buf += LOG_CRYPT_VER; - - byte scheme = buf[0]; - if (scheme != redo_log_purpose_byte) { - return true; - } - buf++; - size_t n = buf[0]; - buf++; - - for (size_t i = 0; i < n; i++) { - struct crypt_info_t info; - info.checkpoint_no = mach_read_from_4(buf + 0); - info.key_version = mach_read_from_4(buf + 4); - memcpy(info.crypt_msg, buf + 8, MY_AES_BLOCK_SIZE); - memcpy(info.crypt_nonce, buf + 24, MY_AES_BLOCK_SIZE); - - if (!add_crypt_info(&info, true)) { - return false; - } - buf += LOG_CRYPT_ENTRY_SIZE; - } - -#ifdef DEBUG_CRYPT - fprintf(stderr, "read [ chk key ]: "); - for (size_t i = 0; i < crypt_info.size(); i++) { - struct crypt_info_t* it = &crypt_info[i]; - fprintf(stderr, "[ %lu %u ] ", - it->checkpoint_no, - it->key_version); - } - fprintf(stderr, "\n"); -#endif - return true; -} - -/******************************************************** -Check is the checkpoint information encrypted. This check -is based on fact has log group crypt info and based -on this crypt info was the key version different from -unencrypted key version. There is no realible way to -distinguish encrypted log block from corrupted log block, -but if log block corruption is found this function is -used to find out if log block is maybe encrypted but -encryption key, key management plugin or encryption -algorithm does not match. -@return TRUE, if log block may be encrypted */ -UNIV_INTERN -ibool -log_crypt_block_maybe_encrypted( -/*============================*/ - const byte* log_block, /*!< in: log block */ - log_crypt_err_t* err_info) /*!< out: error info */ -{ - ibool maybe_encrypted = FALSE; - const crypt_info_t* crypt_info; - - *err_info = LOG_UNENCRYPTED; - crypt_info = get_crypt_info(log_block); - - if (crypt_info && - crypt_info->key_version != UNENCRYPTED_KEY_VER) { - byte mysqld_key[MY_AES_BLOCK_SIZE] = {0}; - uint keylen= sizeof(mysqld_key); - - /* Log block contains crypt info and based on key - version block could be encrypted. */ - *err_info = LOG_DECRYPT_MAYBE_FAILED; - maybe_encrypted = TRUE; - - if (encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY, - crypt_info->key_version, mysqld_key, &keylen)) { - *err_info = LOG_CRYPT_KEY_NOT_FOUND; - } - } - - return (maybe_encrypted); -} - -/******************************************************** -Print crypt error message to error log */ -UNIV_INTERN -void -log_crypt_print_error( -/*==================*/ - log_crypt_err_t err_info) /*!< out: error info */ -{ - switch(err_info) { - case LOG_CRYPT_KEY_NOT_FOUND: - ib_logf(IB_LOG_LEVEL_ERROR, - "Redo log crypto: getting mysqld crypto key " - "from key version failed. Reason could be that " - "requested key version is not found or required " - "encryption key management plugin is not found."); - break; - case LOG_DECRYPT_MAYBE_FAILED: - ib_logf(IB_LOG_LEVEL_ERROR, - "Redo log crypto: failed to decrypt log block. " - "Reason could be that requested key version is " - "not found, required encryption key management " - "plugin is not found or configured encryption " - "algorithm and/or method does not match."); - break; - default: - ut_error; /* Real bug */ - } -} diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc deleted file mode 100644 index 833f3240369..00000000000 --- a/storage/xtradb/log/log0log.cc +++ /dev/null @@ -1,4141 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2009, Google Inc. -Copyright (c) 2014, 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file log/log0log.cc -Database log - -Created 12/9/1995 Heikki Tuuri -*******************************************************/ - -#include "config.h" -#ifdef HAVE_ALLOCA_H -#include "alloca.h" -#elif defined(HAVE_MALLOC_H) -#include "malloc.h" -#endif - -/* Used for debugging */ -// #define DEBUG_CRYPT 1 - -#include "log0log.h" - -#ifdef UNIV_NONINL -#include "log0log.ic" -#endif - -#ifndef UNIV_HOTBACKUP -#if MYSQL_VERSION_ID < 100200 -# include <my_systemd.h> /* sd_notifyf() */ -#endif - -#include "mem0mem.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "lock0lock.h" -#include "log0recv.h" -#include "fil0fil.h" -#include "dict0boot.h" -#include "dict0stats_bg.h" -#include "dict0stats_bg.h" -#include "btr0defragment.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "trx0sys.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "srv0mon.h" - -/* -General philosophy of InnoDB redo-logs: - -1) Every change to a contents of a data page must be done -through mtr, which in mtr_commit() writes log records -to the InnoDB redo log. - -2) Normally these changes are performed using a mlog_write_ulint() -or similar function. - -3) In some page level operations only a code number of a -c-function and its parameters are written to the log to -reduce the size of the log. - - 3a) You should not add parameters to these kind of functions - (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse()) - - 3b) You should not add such functionality which either change - working when compared with the old or are dependent on data - outside of the page. These kind of functions should implement - self-contained page transformation and it should be unchanged - if you don't have very essential reasons to change log - semantics or format. - -*/ - -/* Global log system variable */ -UNIV_INTERN log_t* log_sys = NULL; - -/** Pointer to the log checksum calculation function */ -UNIV_INTERN log_checksum_func_t log_checksum_algorithm_ptr = - log_block_calc_checksum_innodb; - -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(log_scrub_thread)(void*); - -/* Next log block number to do dummy record filling if no log records written -for a while */ -static ulint next_lbn_to_pad = 0; - -#ifdef UNIV_PFS_RWLOCK -UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key; -# ifdef UNIV_LOG_ARCHIVE -UNIV_INTERN mysql_pfs_key_t archive_lock_key; -# endif -#endif /* UNIV_PFS_RWLOCK */ - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t log_sys_mutex_key; -UNIV_INTERN mysql_pfs_key_t log_flush_order_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#ifdef UNIV_DEBUG -UNIV_INTERN ibool log_do_write = TRUE; -#endif /* UNIV_DEBUG */ - -/* These control how often we print warnings if the last checkpoint is too -old */ -UNIV_INTERN ibool log_has_printed_chkp_warning = FALSE; -UNIV_INTERN time_t log_last_warning_time; - -#ifdef UNIV_LOG_ARCHIVE -/* Pointer to this variable is used as the i/o-message when we do i/o to an -archive */ -UNIV_INTERN byte log_archive_io; -#endif /* UNIV_LOG_ARCHIVE */ - -UNIV_INTERN ulint log_disable_checkpoint_active= 0; - -/* A margin for free space in the log buffer before a log entry is catenated */ -#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE) - -/* Margins for free space in the log buffer after a log entry is catenated */ -#define LOG_BUF_FLUSH_RATIO 2 -#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE) - -/* Margin for the free space in the smallest log group, before a new query -step which modifies the database, is started */ - -#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE) -#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE) - -/* This parameter controls asynchronous making of a new checkpoint; the value -should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */ - -#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32 - -/* This parameter controls synchronous preflushing of modified buffer pages */ -#define LOG_POOL_PREFLUSH_RATIO_SYNC 16 - -/* The same ratio for asynchronous preflushing; this value should be less than -the previous */ -#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8 - -/* Extra margin, in addition to one log file, used in archiving */ -#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE) - -/* This parameter controls asynchronous writing to the archive */ -#define LOG_ARCHIVE_RATIO_ASYNC 16 - -/* Codes used in unlocking flush latches */ -#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1 -#define LOG_UNLOCK_FLUSH_LOCK 2 - -/* States of an archiving operation */ -#define LOG_ARCHIVE_READ 1 -#define LOG_ARCHIVE_WRITE 2 - -/** Event to wake up the log scrub thread */ -static os_event_t log_scrub_event; - -static bool log_scrub_thread_active; - -/******************************************************//** -Completes a checkpoint write i/o to a log file. */ -static -void -log_io_complete_checkpoint(void); -/*============================*/ -#ifdef UNIV_LOG_ARCHIVE -/******************************************************//** -Completes an archiving i/o. */ -static -void -log_io_complete_archive(void); -/*=========================*/ -#endif /* UNIV_LOG_ARCHIVE */ - -/****************************************************************//** -Returns the oldest modified block lsn in the pool, or log_sys->lsn if none -exists. -@return LSN of oldest modification */ -static -lsn_t -log_buf_pool_get_oldest_modification(void) -/*======================================*/ -{ - lsn_t lsn; - - ut_ad(mutex_own(&(log_sys->mutex))); - - lsn = buf_pool_get_oldest_modification(); - - if (!lsn) { - - lsn = log_sys->lsn; - } - - return(lsn); -} - -/****************************************************************//** -Returns the oldest modified block lsn in the pool, or log_sys->lsn if none -exists. -@return LSN of oldest modification */ -static -lsn_t -log_buf_pool_get_oldest_modification_peek(void) -/*===========================================*/ -{ - lsn_t lsn; - - lsn = buf_pool_get_oldest_modification_peek(); - - if (!lsn) { - - lsn = log_sys->lsn; - } - - return(lsn); -} - -/****************************************************************//** -Checks if the log groups have a big enough margin of free space in -so that a new log entry can be written without overwriting log data -that is not read by the changed page bitmap thread. -@return TRUE if there is not enough free space. */ -static -ibool -log_check_tracking_margin( - ulint lsn_advance) /*!< in: an upper limit on how much log data we - plan to write. If zero, the margin will be - checked for the already-written log. */ -{ - lsn_t tracked_lsn; - lsn_t tracked_lsn_age; - - if (!srv_track_changed_pages) { - return FALSE; - } - - ut_ad(mutex_own(&(log_sys->mutex))); - - tracked_lsn = log_get_tracked_lsn(); - tracked_lsn_age = log_sys->lsn - tracked_lsn; - - /* The overwrite would happen when log_sys->log_group_capacity is - exceeded, but we use max_checkpoint_age for an extra safety margin. */ - return tracked_lsn_age + lsn_advance > log_sys->max_checkpoint_age; -} - -/** Extends the log buffer. -@param[in] len requested minimum size in bytes */ -static -void -log_buffer_extend( - ulint len) -{ - ulint move_start; - ulint move_end; - byte* tmp_buf = reinterpret_cast<byte *>(alloca(OS_FILE_LOG_BLOCK_SIZE)); - - mutex_enter(&(log_sys->mutex)); - - while (log_sys->is_extending) { - /* Another thread is trying to extend already. - Needs to wait for. */ - mutex_exit(&(log_sys->mutex)); - - log_buffer_flush_to_disk(); - - mutex_enter(&(log_sys->mutex)); - - if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) { - /* Already extended enough by the others */ - mutex_exit(&(log_sys->mutex)); - return; - } - } - - log_sys->is_extending = true; - - while (log_sys->n_pending_writes != 0 - || ut_calc_align_down(log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE) - != ut_calc_align_down(log_sys->buf_next_to_write, - OS_FILE_LOG_BLOCK_SIZE)) { - /* Buffer might have >1 blocks to write still. */ - mutex_exit(&(log_sys->mutex)); - - log_buffer_flush_to_disk(); - - mutex_enter(&(log_sys->mutex)); - } - - move_start = ut_calc_align_down( - log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE); - move_end = log_sys->buf_free; - - /* store the last log block in buffer */ - ut_memcpy(tmp_buf, log_sys->buf + move_start, - move_end - move_start); - - log_sys->buf_free -= move_start; - log_sys->buf_next_to_write -= move_start; - - /* reallocate log buffer */ - srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1; - mem_free(log_sys->buf_ptr); - log_sys->buf_ptr = static_cast<byte*>( - mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE)); - log_sys->buf = static_cast<byte*>( - ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE)); - log_sys->buf_size = LOG_BUFFER_SIZE; - log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO - - LOG_BUF_FLUSH_MARGIN; - - /* restore the last log block */ - ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start); - - ut_ad(log_sys->is_extending); - log_sys->is_extending = false; - - mutex_exit(&(log_sys->mutex)); - - ib_logf(IB_LOG_LEVEL_INFO, - "innodb_log_buffer_size was extended to %lu.", - LOG_BUFFER_SIZE); -} - -/************************************************************//** -Opens the log for log_write_low. The log must be closed with log_close. -@return start lsn of the log record */ -UNIV_INTERN -lsn_t -log_open( -/*=====*/ - ulint len) /*!< in: length of data to be catenated */ -{ - log_t* log = log_sys; - ulint len_upper_limit; -#ifdef UNIV_LOG_ARCHIVE - lsn_t archived_lsn_age; - ulint dummy; -#endif /* UNIV_LOG_ARCHIVE */ - ulint count = 0; - ulint tcount = 0; - - if (len >= log->buf_size / 2) { - DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash", - DBUG_SUICIDE();); - - /* log_buffer is too small. try to extend instead of crash. */ - ib_logf(IB_LOG_LEVEL_WARN, - "The transaction log size is too large" - " for innodb_log_buffer_size (%lu >= %lu / 2). " - "Trying to extend it.", - len, LOG_BUFFER_SIZE); - - log_buffer_extend((len + 1) * 2); - } -loop: - ut_ad(!recv_no_log_write); - - if (log->is_extending) { - - mutex_exit(&(log->mutex)); - - /* Log buffer size is extending. Writing up to the next block - should wait for the extending finished. */ - - os_thread_sleep(100000); - - ut_ad(++count < 50); - - goto loop; - } - - /* Calculate an upper limit for the space the string may take in the - log buffer */ - - len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4; - - if (log->buf_free + len_upper_limit > log->buf_size) { - - mutex_exit(&(log->mutex)); - - /* Not enough free space, do a syncronous flush of the log - buffer */ - - log_buffer_flush_to_disk(); - - srv_stats.log_waits.inc(); - - ut_ad(++count < 50); - - mutex_enter(&(log->mutex)); - - goto loop; - } - -#ifdef UNIV_LOG_ARCHIVE - if (log->archiving_state != LOG_ARCH_OFF) { - - archived_lsn_age = log->lsn - log->archived_lsn; - if (archived_lsn_age + len_upper_limit - > log->max_archived_lsn_age) { - /* Not enough free archived space in log groups: do a - synchronous archive write batch: */ - - mutex_exit(&(log->mutex)); - - ut_ad(len_upper_limit <= log->max_archived_lsn_age); - - log_archive_do(TRUE, &dummy); - - ut_ad(++count < 50); - - mutex_enter(&(log->mutex)); - - goto loop; - } - } -#endif /* UNIV_LOG_ARCHIVE */ - - if (log_check_tracking_margin(len_upper_limit) && - (++tcount + count < 50)) { - - /* This log write would violate the untracked LSN free space - margin. Limit this to 50 retries as there might be situations - where we have no choice but to proceed anyway, i.e. if the log - is about to be overflown, log tracking or not. */ - mutex_exit(&(log->mutex)); - - os_thread_sleep(10000); - - mutex_enter(&(log->mutex)); - - goto loop; - } - -#ifdef UNIV_LOG_DEBUG - log->old_buf_free = log->buf_free; - log->old_lsn = log->lsn; -#endif - return(log->lsn); -} - -/************************************************************//** -Writes to the log the string given. It is assumed that the caller holds the -log mutex. */ -UNIV_INTERN -void -log_write_low( -/*==========*/ - byte* str, /*!< in: string */ - ulint str_len) /*!< in: string length */ -{ - log_t* log = log_sys; - ulint len; - ulint data_len; - byte* log_block; - - ut_ad(mutex_own(&(log->mutex))); -part_loop: - ut_ad(!recv_no_log_write); - /* Calculate a part length */ - - data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len; - - if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - - /* The string fits within the current log block */ - - len = str_len; - } else { - data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; - - len = OS_FILE_LOG_BLOCK_SIZE - - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) - - LOG_BLOCK_TRL_SIZE; - } - - ut_memcpy(log->buf + log->buf_free, str, len); - - str_len -= len; - str = str + len; - - log_block = static_cast<byte*>( - ut_align_down( - log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE)); - - log_block_set_data_len(log_block, data_len); - - if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - /* This block became full */ - log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE); - log_block_set_checkpoint_no(log_block, - log_sys->next_checkpoint_no); - len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE; - - log->lsn += len; - - /* Initialize the next block header */ - log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn); - } else { - log->lsn += len; - } - - log->buf_free += len; - - ut_ad(log->buf_free <= log->buf_size); - - if (str_len > 0) { - goto part_loop; - } - - srv_stats.log_write_requests.inc(); -} - -/************************************************************//** -Closes the log. -@return lsn */ -UNIV_INTERN -lsn_t -log_close(void) -/*===========*/ -{ - byte* log_block; - ulint first_rec_group; - lsn_t oldest_lsn; - lsn_t lsn; - lsn_t tracked_lsn; - lsn_t tracked_lsn_age; - log_t* log = log_sys; - lsn_t checkpoint_age; - - ut_ad(mutex_own(&(log->mutex))); - ut_ad(!recv_no_log_write); - - lsn = log->lsn; - - log_block = static_cast<byte*>( - ut_align_down( - log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE)); - - first_rec_group = log_block_get_first_rec_group(log_block); - - if (first_rec_group == 0) { - /* We initialized a new log block which was not written - full by the current mtr: the next mtr log record group - will start within this block at the offset data_len */ - - log_block_set_first_rec_group( - log_block, log_block_get_data_len(log_block)); - } - - if (log->buf_free > log->max_buf_free) { - - log->check_flush_or_checkpoint = TRUE; - } - - if (srv_track_changed_pages) { - - tracked_lsn = log_get_tracked_lsn(); - tracked_lsn_age = lsn - tracked_lsn; - - if (tracked_lsn_age >= log->log_group_capacity) { - - fprintf(stderr, "InnoDB: Error: the age of the " - "oldest untracked record exceeds the log " - "group capacity!\n"); - fprintf(stderr, "InnoDB: Error: stopping the log " - "tracking thread at LSN " LSN_PF "\n", - tracked_lsn); - srv_track_changed_pages = FALSE; - } - } - - checkpoint_age = lsn - log->last_checkpoint_lsn; - - if (checkpoint_age >= log->log_group_capacity) { - /* TODO: split btr_store_big_rec_extern_fields() into small - steps so that we can release all latches in the middle, and - call log_free_check() to ensure we never write over log written - after the latest checkpoint. In principle, we should split all - big_rec operations, but other operations are smaller. */ - - if (!log_has_printed_chkp_warning - || difftime(time(NULL), log_last_warning_time) > 15) { - - log_has_printed_chkp_warning = TRUE; - log_last_warning_time = time(NULL); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: the age of the last" - " checkpoint is " LSN_PF ",\n" - "InnoDB: which exceeds the log group" - " capacity " LSN_PF ".\n" - "InnoDB: If you are using big" - " BLOB or TEXT rows, you must set the\n" - "InnoDB: combined size of log files" - " at least 10 times bigger than the\n" - "InnoDB: largest such row.\n", - checkpoint_age, - log->log_group_capacity); - } - } - - if (checkpoint_age <= log->max_modified_age_sync) { - - goto function_exit; - } - - oldest_lsn = buf_pool_get_oldest_modification(); - - if (!oldest_lsn - || lsn - oldest_lsn > log->max_modified_age_sync - || checkpoint_age > log->max_checkpoint_age_async) { - - log->check_flush_or_checkpoint = TRUE; - } -function_exit: - -#ifdef UNIV_LOG_DEBUG - log_check_log_recs(log->buf + log->old_buf_free, - log->buf_free - log->old_buf_free, log->old_lsn); -#endif - - return(lsn); -} - -/******************************************************//** -Pads the current log block full with dummy log records. Used in producing -consistent archived log files and scrubbing redo log. */ -static -void -log_pad_current_log_block(void) -/*===========================*/ -{ - byte b = MLOG_DUMMY_RECORD; - ulint pad_length; - ulint i; - lsn_t lsn; - - /* We retrieve lsn only because otherwise gcc crashed on HP-UX */ - lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE); - - pad_length = OS_FILE_LOG_BLOCK_SIZE - - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE) - - LOG_BLOCK_TRL_SIZE; - if (pad_length - == (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - - LOG_BLOCK_TRL_SIZE)) { - - pad_length = 0; - } - - for (i = 0; i < pad_length; i++) { - log_write_low(&b, 1); - } - - lsn = log_sys->lsn; - - log_close(); - log_release(); - - ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE); -} - -/******************************************************//** -Calculates the data capacity of a log group, when the log file headers are not -included. -@return capacity in bytes */ -UNIV_INTERN -lsn_t -log_group_get_capacity( -/*===================*/ - const log_group_t* group) /*!< in: log group */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files); -} - -/******************************************************//** -Calculates the offset within a log group, when the log file headers are not -included. -@return size offset (<= offset) */ -UNIV_INLINE -lsn_t -log_group_calc_size_offset( -/*=======================*/ - lsn_t offset, /*!< in: real offset within the - log group */ - const log_group_t* group) /*!< in: log group */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size)); -} - -/******************************************************//** -Calculates the offset within a log group, when the log file headers are -included. -@return real offset (>= offset) */ -UNIV_INLINE -lsn_t -log_group_calc_real_offset( -/*=======================*/ - lsn_t offset, /*!< in: size offset within the - log group */ - const log_group_t* group) /*!< in: log group */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - return(offset + LOG_FILE_HDR_SIZE - * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE))); -} - -/******************************************************//** -Calculates the offset of an lsn within a log group. -@return offset within the log group */ -static -lsn_t -log_group_calc_lsn_offset( -/*======================*/ - lsn_t lsn, /*!< in: lsn */ - const log_group_t* group) /*!< in: log group */ -{ - lsn_t gr_lsn; - lsn_t gr_lsn_size_offset; - lsn_t difference; - lsn_t group_size; - lsn_t offset; - - ut_ad(mutex_own(&(log_sys->mutex))); - - gr_lsn = group->lsn; - - gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, group); - - group_size = log_group_get_capacity(group); - - if (lsn >= gr_lsn) { - - difference = lsn - gr_lsn; - } else { - difference = gr_lsn - lsn; - - difference = difference % group_size; - - difference = group_size - difference; - } - - offset = (gr_lsn_size_offset + difference) % group_size; - - /* fprintf(stderr, - "Offset is " LSN_PF " gr_lsn_offset is " LSN_PF - " difference is " LSN_PF "\n", - offset, gr_lsn_size_offset, difference); - */ - - return(log_group_calc_real_offset(offset, group)); -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_DEBUG -UNIV_INTERN ibool log_debug_writes = FALSE; -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -Calculates where in log files we find a specified lsn. -@return log file number */ -UNIV_INTERN -ulint -log_calc_where_lsn_is( -/*==================*/ - ib_int64_t* log_file_offset, /*!< out: offset in that file - (including the header) */ - ib_uint64_t first_header_lsn, /*!< in: first log file start - lsn */ - ib_uint64_t lsn, /*!< in: lsn whose position to - determine */ - ulint n_log_files, /*!< in: total number of log - files */ - ib_int64_t log_file_size) /*!< in: log file size - (including the header) */ -{ - ib_int64_t capacity = log_file_size - LOG_FILE_HDR_SIZE; - ulint file_no; - ib_int64_t add_this_many; - - if (lsn < first_header_lsn) { - add_this_many = 1 + (first_header_lsn - lsn) - / (capacity * (ib_int64_t) n_log_files); - lsn += add_this_many - * capacity * (ib_int64_t) n_log_files; - } - - ut_a(lsn >= first_header_lsn); - - file_no = ((ulint)((lsn - first_header_lsn) / capacity)) - % n_log_files; - *log_file_offset = (lsn - first_header_lsn) % capacity; - - *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE; - - return(file_no); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Sets the field values in group to correspond to a given lsn. For this function -to work, the values must already be correctly initialized to correspond to -some lsn, for instance, a checkpoint lsn. */ -UNIV_INTERN -void -log_group_set_fields( -/*=================*/ - log_group_t* group, /*!< in/out: group */ - lsn_t lsn) /*!< in: lsn for which the values should be - set */ -{ - group->lsn_offset = log_group_calc_lsn_offset(lsn, group); - group->lsn = lsn; -} - -/*****************************************************************//** -Calculates the recommended highest values for lsn - last_checkpoint_lsn, -lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. -@return error value FALSE if the smallest log group is too small to -accommodate the number of OS threads in the database server */ -static -ibool -log_calc_max_ages(void) -/*===================*/ -{ - lsn_t margin; - ulint free; - - lsn_t smallest_capacity = ((srv_log_file_size_requested - << srv_page_size_shift) - - LOG_FILE_HDR_SIZE) - * srv_n_log_files; - - /* Add extra safety */ - smallest_capacity -= smallest_capacity / 10; - - /* For each OS thread we must reserve so much free space in the - smallest log group that it can accommodate the log entries produced - by single query steps: running out of free log space is a serious - system error which requires rebooting the database. */ - - free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency) - + LOG_CHECKPOINT_EXTRA_FREE; - if (free >= smallest_capacity / 2) { - ib_logf(IB_LOG_LEVEL_FATAL, - "The combined size of ib_logfiles" - " should be bigger than\n" - "InnoDB: 200 kB * innodb_thread_concurrency."); - } - margin = smallest_capacity - free; - margin = margin - margin / 10; /* Add still some extra safety */ - - mutex_enter(&log_sys->mutex); - - log_sys->log_group_capacity = smallest_capacity; - - log_sys->max_modified_age_async = margin - - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC; - log_sys->max_modified_age_sync = margin - - margin / LOG_POOL_PREFLUSH_RATIO_SYNC; - - log_sys->max_checkpoint_age_async = margin - margin - / LOG_POOL_CHECKPOINT_RATIO_ASYNC; - log_sys->max_checkpoint_age = margin; - -#ifdef UNIV_LOG_ARCHIVE - lsn_t archive_margin = smallest_capacity - - (srv_log_file_size_requested - LOG_FILE_HDR_SIZE) - - LOG_ARCHIVE_EXTRA_MARGIN; - log_sys->max_archived_lsn_age = archive_margin; - - log_sys->max_archived_lsn_age_async = archive_margin - - archive_margin / LOG_ARCHIVE_RATIO_ASYNC; -#endif /* UNIV_LOG_ARCHIVE */ - mutex_exit(&log_sys->mutex); - - return(true); -} - -/******************************************************//** -Initializes the log. */ -UNIV_INTERN -void -log_init(void) -/*==========*/ -{ - log_sys = static_cast<log_t*>(mem_alloc(sizeof(log_t))); - - mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG); - - mutex_create(log_flush_order_mutex_key, - &log_sys->log_flush_order_mutex, - SYNC_LOG_FLUSH_ORDER); - - mutex_enter(&(log_sys->mutex)); - - /* Start the lsn from one log block from zero: this way every - log record has a start lsn != zero, a fact which we will use */ - - log_sys->lsn = LOG_START_LSN; - - ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE); - ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE); - - log_sys->buf_ptr = static_cast<byte*>( - mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE)); - - log_sys->buf = static_cast<byte*>( - ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE)); - - log_sys->buf_size = LOG_BUFFER_SIZE; - log_sys->is_extending = false; - - log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO - - LOG_BUF_FLUSH_MARGIN; - log_sys->check_flush_or_checkpoint = TRUE; - UT_LIST_INIT(log_sys->log_groups); - - log_sys->n_log_ios = 0; - - log_sys->n_log_ios_old = log_sys->n_log_ios; - log_sys->last_printout_time = time(NULL); - /*----------------------------*/ - - log_sys->buf_next_to_write = 0; - - log_sys->write_lsn = 0; - log_sys->current_flush_lsn = 0; - log_sys->flushed_to_disk_lsn = 0; - - log_sys->written_to_some_lsn = log_sys->lsn; - log_sys->written_to_all_lsn = log_sys->lsn; - - log_sys->n_pending_writes = 0; - - log_sys->no_flush_event = os_event_create(); - - os_event_set(log_sys->no_flush_event); - - log_sys->one_flushed_event = os_event_create(); - - os_event_set(log_sys->one_flushed_event); - - /*----------------------------*/ - - log_sys->next_checkpoint_no = 0; - log_sys->last_checkpoint_lsn = log_sys->lsn; - log_sys->next_checkpoint_lsn = log_sys->lsn; - log_sys->n_pending_checkpoint_writes = 0; - - - rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock, - SYNC_NO_ORDER_CHECK); - - log_sys->checkpoint_buf_ptr = static_cast<byte*>( - mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE)); - - log_sys->checkpoint_buf = static_cast<byte*>( - ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE)); - - /*----------------------------*/ - -#ifdef UNIV_LOG_ARCHIVE - /* Under MySQL, log archiving is always off */ - log_sys->archiving_state = LOG_ARCH_OFF; - log_sys->archived_lsn = log_sys->lsn; - log_sys->next_archived_lsn = 0; - - log_sys->n_pending_archive_ios = 0; - - rw_lock_create(archive_lock_key, &log_sys->archive_lock, - SYNC_NO_ORDER_CHECK); - - log_sys->archive_buf_ptr = static_cast<byte*>( - mem_zalloc(LOG_ARCHIVE_BUF_SIZE + OS_FILE_LOG_BLOCK_SIZE)); - - log_sys->archive_buf = static_cast<byte*>( - ut_align(log_sys->archive_buf_ptr, OS_FILE_LOG_BLOCK_SIZE)); - - log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE; - - log_sys->archiving_on = os_event_create(); -#endif /* UNIV_LOG_ARCHIVE */ - - log_sys->tracked_lsn = 0; - - /*----------------------------*/ - - log_block_init(log_sys->buf, log_sys->lsn); - log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); - - log_sys->buf_free = LOG_BLOCK_HDR_SIZE; - log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; // TODO(minliz): ensure various LOG_START_LSN? - - MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, - log_sys->lsn - log_sys->last_checkpoint_lsn); - - mutex_exit(&(log_sys->mutex)); - - log_scrub_thread_active = !srv_read_only_mode && srv_scrub_log; - if (log_scrub_thread_active) { - log_scrub_event = os_event_create(); - os_thread_create(log_scrub_thread, NULL, NULL); - } - -#ifdef UNIV_LOG_DEBUG - recv_sys_create(); - recv_sys_init(buf_pool_get_curr_size()); - - recv_sys->parse_start_lsn = log_sys->lsn; - recv_sys->scanned_lsn = log_sys->lsn; - recv_sys->scanned_checkpoint_no = 0; - recv_sys->recovered_lsn = log_sys->lsn; - recv_sys->limit_lsn = LSN_MAX; -#endif -} - -/******************************************************************//** -Inits a log group to the log system. */ -UNIV_INTERN -void -log_group_init( -/*===========*/ - ulint id, /*!< in: group id */ - ulint n_files, /*!< in: number of log files */ - lsn_t file_size, /*!< in: log file size in bytes */ - ulint space_id, /*!< in: space id of the file space - which contains the log files of this - group */ - ulint archive_space_id) /*!< in: space id of the file space - which contains some archived log - files for this group; currently, only - for the first log group this is - used */ -{ - ulint i; - - log_group_t* group; - - group = static_cast<log_group_t*>(mem_alloc(sizeof(log_group_t))); - - group->id = id; - group->n_files = n_files; - group->file_size = file_size; - group->space_id = space_id; - group->state = LOG_GROUP_OK; - group->lsn = LOG_START_LSN; - group->lsn_offset = LOG_FILE_HDR_SIZE; - group->n_pending_writes = 0; - - group->file_header_bufs_ptr = static_cast<byte**>( - mem_zalloc(sizeof(byte*) * n_files)); - - group->file_header_bufs = static_cast<byte**>( - mem_zalloc(sizeof(byte**) * n_files)); - -#ifdef UNIV_LOG_ARCHIVE - group->archive_file_header_bufs_ptr = static_cast<byte**>( - mem_zalloc( sizeof(byte*) * n_files)); - - group->archive_file_header_bufs = static_cast<byte**>( - mem_zalloc(sizeof(byte*) * n_files)); -#endif /* UNIV_LOG_ARCHIVE */ - - for (i = 0; i < n_files; i++) { - group->file_header_bufs_ptr[i] = static_cast<byte*>( - mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE)); - - group->file_header_bufs[i] = static_cast<byte*>( - ut_align(group->file_header_bufs_ptr[i], - OS_FILE_LOG_BLOCK_SIZE)); - -#ifdef UNIV_LOG_ARCHIVE - group->archive_file_header_bufs_ptr[i] = static_cast<byte*>( - mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE)); - - group->archive_file_header_bufs[i] = static_cast<byte*>( - ut_align(group->archive_file_header_bufs_ptr[i], - OS_FILE_LOG_BLOCK_SIZE)); -#endif /* UNIV_LOG_ARCHIVE */ - } - -#ifdef UNIV_LOG_ARCHIVE - group->archive_space_id = archive_space_id; - - group->archived_file_no = LOG_START_LSN; - group->archived_offset = 0; -#endif /* UNIV_LOG_ARCHIVE */ - - group->checkpoint_buf_ptr = static_cast<byte*>( - mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE)); - - group->checkpoint_buf = static_cast<byte*>( - ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE)); - - UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group); - - ut_a(log_calc_max_ages()); -} - -/******************************************************************//** -Does the unlockings needed in flush i/o completion. */ -UNIV_INLINE -void -log_flush_do_unlocks( -/*=================*/ - ulint code) /*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK - and LOG_UNLOCK_NONE_FLUSHED_LOCK */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - /* NOTE that we must own the log mutex when doing the setting of the - events: this is because transactions will wait for these events to - be set, and at that moment the log flush they were waiting for must - have ended. If the log mutex were not reserved here, the i/o-thread - calling this function might be preempted for a while, and when it - resumed execution, it might be that a new flush had been started, and - this function would erroneously signal the NEW flush as completed. - Thus, the changes in the state of these events are performed - atomically in conjunction with the changes in the state of - log_sys->n_pending_writes etc. */ - - if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) { - os_event_set(log_sys->one_flushed_event); - } - - if (code & LOG_UNLOCK_FLUSH_LOCK) { - os_event_set(log_sys->no_flush_event); - } -} - -/******************************************************************//** -Checks if a flush is completed for a log group and does the completion -routine if yes. -@return LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */ -UNIV_INLINE -ulint -log_group_check_flush_completion( -/*=============================*/ - log_group_t* group) /*!< in: log group */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - if (!log_sys->one_flushed && group->n_pending_writes == 0) { -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Log flushed first to group %lu\n", - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - log_sys->written_to_some_lsn = log_sys->write_lsn; - log_sys->one_flushed = TRUE; - - return(LOG_UNLOCK_NONE_FLUSHED_LOCK); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes && (group->n_pending_writes == 0)) { - - fprintf(stderr, "Log flushed to group %lu\n", - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - return(0); -} - -/******************************************************//** -Checks if a flush is completed and does the completion routine if yes. -@return LOG_UNLOCK_FLUSH_LOCK or 0 */ -static -ulint -log_sys_check_flush_completion(void) -/*================================*/ -{ - ulint move_start; - ulint move_end; - - ut_ad(mutex_own(&(log_sys->mutex))); - - if (log_sys->n_pending_writes == 0) { - - log_sys->written_to_all_lsn = log_sys->write_lsn; - log_sys->buf_next_to_write = log_sys->write_end_offset; - - if (log_sys->write_end_offset > log_sys->max_buf_free / 2) { - /* Move the log buffer content to the start of the - buffer */ - - move_start = ut_calc_align_down( - log_sys->write_end_offset, - OS_FILE_LOG_BLOCK_SIZE); - move_end = ut_calc_align(log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE); - - ut_memmove(log_sys->buf, log_sys->buf + move_start, - move_end - move_start); - log_sys->buf_free -= move_start; - - log_sys->buf_next_to_write -= move_start; - } - - return(LOG_UNLOCK_FLUSH_LOCK); - } - - return(0); -} - -/******************************************************//** -Completes an i/o to a log file. */ -UNIV_INTERN -void -log_io_complete( -/*============*/ - log_group_t* group) /*!< in: log group or a dummy pointer */ -{ - ulint unlock; - -#ifdef UNIV_LOG_ARCHIVE - if ((byte*) group == &log_archive_io) { - /* It was an archive write */ - - log_io_complete_archive(); - - return; - } -#endif /* UNIV_LOG_ARCHIVE */ - - if ((ulint) group & 0x1UL) { - /* It was a checkpoint write */ - group = (log_group_t*)((ulint) group - 1); - - if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC - && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT - && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { - - fil_flush(group->space_id); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Checkpoint info written to group %lu\n", - group->id); - } -#endif /* UNIV_DEBUG */ - log_io_complete_checkpoint(); - - return; - } - - ut_error; /*!< We currently use synchronous writing of the - logs and cannot end up here! */ - - if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC - && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT - && srv_unix_file_flush_method != SRV_UNIX_NOSYNC - && thd_flush_log_at_trx_commit(NULL) != 2) { - - fil_flush(group->space_id); - } - - mutex_enter(&(log_sys->mutex)); - ut_ad(!recv_no_log_write); - - ut_a(group->n_pending_writes > 0); - ut_a(log_sys->n_pending_writes > 0); - - group->n_pending_writes--; - log_sys->n_pending_writes--; - MONITOR_DEC(MONITOR_PENDING_LOG_WRITE); - - unlock = log_group_check_flush_completion(group); - unlock = unlock | log_sys_check_flush_completion(); - - log_flush_do_unlocks(unlock); - - mutex_exit(&(log_sys->mutex)); -} - -/******************************************************//** -Writes a log file header to a log file space. */ -static -void -log_group_file_header_flush( -/*========================*/ - log_group_t* group, /*!< in: log group */ - ulint nth_file, /*!< in: header to the nth file in the - log file space */ - lsn_t start_lsn) /*!< in: log file data starts at this - lsn */ -{ - byte* buf; - lsn_t dest_offset; - - ut_ad(mutex_own(&(log_sys->mutex))); - ut_ad(!recv_no_log_write); - ut_a(nth_file < group->n_files); - - buf = *(group->file_header_bufs + nth_file); - - mach_write_to_4(buf + LOG_GROUP_ID, group->id); - mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); - - /* Wipe over possible label of mysqlbackup --restore */ - memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4); - - mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE, - srv_log_block_size); - - dest_offset = nth_file * group->file_size; - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Writing log file header to group %lu file %lu\n", - (ulong) group->id, (ulong) nth_file); - } -#endif /* UNIV_DEBUG */ - if (log_do_write) { - log_sys->n_log_ios++; - - MONITOR_INC(MONITOR_LOG_IO); - - srv_stats.os_log_pending_writes.inc(); - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0, - (ulint) (dest_offset / UNIV_PAGE_SIZE), - (ulint) (dest_offset % UNIV_PAGE_SIZE), - OS_FILE_LOG_BLOCK_SIZE, - buf, group, 0); - - srv_stats.os_log_pending_writes.dec(); - } -} - -/******************************************************//** -Stores a 4-byte checksum to the trailer checksum field of a log block -before writing it to a log file. This checksum is used in recovery to -check the consistency of a log block. */ -void -log_block_store_checksum( -/*=====================*/ - byte* block) /*!< in/out: pointer to a log block */ -{ - log_block_set_checksum(block, log_block_calc_checksum(block)); -} - -/******************************************************//** -Writes a buffer to a log file group. */ -UNIV_INTERN -void -log_group_write_buf( -/*================*/ - log_group_t* group, /*!< in: log group */ - byte* buf, /*!< in: buffer */ - ulint len, /*!< in: buffer len; must be divisible - by OS_FILE_LOG_BLOCK_SIZE */ - lsn_t start_lsn, /*!< in: start lsn of the buffer; must - be divisible by - OS_FILE_LOG_BLOCK_SIZE */ - ulint new_data_offset)/*!< in: start offset of new data in - buf: this parameter is used to decide - if we have to write a new log file - header */ -{ - ulint write_len; - ibool write_header; - lsn_t next_offset; - ulint i; - - ut_ad(mutex_own(&(log_sys->mutex))); - ut_ad(!recv_no_log_write); - ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0); - - if (new_data_offset == 0) { - write_header = TRUE; - } else { - write_header = FALSE; - } -loop: - if (len == 0) { - - return; - } - - next_offset = log_group_calc_lsn_offset(start_lsn, group); - - if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE) - && write_header) { - /* We start to write a new log file instance in the group */ - - ut_a(next_offset / group->file_size <= ULINT_MAX); - - log_group_file_header_flush(group, (ulint) - (next_offset / group->file_size), - start_lsn); - srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE); - - srv_stats.log_writes.inc(); - } - - if ((next_offset % group->file_size) + len > group->file_size) { - - /* if the above condition holds, then the below expression - is < len which is ulint, so the typecast is ok */ - write_len = (ulint) - (group->file_size - (next_offset % group->file_size)); - } else { - write_len = len; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - - fprintf(stderr, - "Writing log file segment to group %lu" - " offset " LSN_PF " len %lu\n" - "start lsn " LSN_PF "\n" - "First block n:o %lu last block n:o %lu\n", - (ulong) group->id, next_offset, - write_len, - start_lsn, - (ulong) log_block_get_hdr_no(buf), - (ulong) log_block_get_hdr_no( - buf + write_len - OS_FILE_LOG_BLOCK_SIZE)); - ut_a(log_block_get_hdr_no(buf) - == log_block_convert_lsn_to_no(start_lsn)); - - for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) { - - ut_a(log_block_get_hdr_no(buf) + i - == log_block_get_hdr_no( - buf + i * OS_FILE_LOG_BLOCK_SIZE)); - } - } -#endif /* UNIV_DEBUG */ - /* Calculate the checksums for each log block and write them to - the trailer fields of the log blocks */ - - for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) { - log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE); - } - - if (log_do_write) { - log_sys->n_log_ios++; - - MONITOR_INC(MONITOR_LOG_IO); - - srv_stats.os_log_pending_writes.inc(); - - ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX); - - log_encrypt_before_write(log_sys->next_checkpoint_no, - buf, write_len); - -#ifdef DEBUG_CRYPT - fprintf(stderr, "WRITE: block: %lu checkpoint: %lu %.8lx %.8lx\n", - log_block_get_hdr_no(buf), - log_block_get_checkpoint_no(buf), - log_block_calc_checksum(buf), - log_block_get_checksum(buf)); -#endif - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0, - (ulint) (next_offset / UNIV_PAGE_SIZE), - (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf, - group, 0); - - srv_stats.os_log_pending_writes.dec(); - - srv_stats.os_log_written.add(write_len); - srv_stats.log_writes.inc(); - } - - if (write_len < len) { - start_lsn += write_len; - len -= write_len; - buf += write_len; - - write_header = TRUE; - - goto loop; - } -} - -/******************************************************//** -This function is called, e.g., when a transaction wants to commit. It checks -that the log has been written to the log file up to the last log entry written -by the transaction. If there is a flush running, it waits and checks if the -flush flushed enough. If not, starts a new flush. */ -UNIV_INTERN -void -log_write_up_to( -/*============*/ - lsn_t lsn, /*!< in: log sequence number up to which - the log should be written, - LSN_MAX if not specified */ - ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, - or LOG_WAIT_ALL_GROUPS */ - ibool flush_to_disk) - /*!< in: TRUE if we want the written log - also to be flushed to disk */ -{ - log_group_t* group; - ulint start_offset; - ulint end_offset; - ulint area_start; - ulint area_end; -#ifdef UNIV_DEBUG - ulint loop_count = 0; -#endif /* UNIV_DEBUG */ - ulint unlock; - ib_uint64_t write_lsn; - ib_uint64_t flush_lsn; - - ut_ad(!srv_read_only_mode); - - if (recv_no_ibuf_operations) { - /* Recovery is running and no operations on the log files are - allowed yet (the variable name .._no_ibuf_.. is misleading) */ - - return; - } - -loop: - ut_ad(++loop_count < 100); - - mutex_enter(&(log_sys->mutex)); - ut_ad(!recv_no_log_write); - - if (flush_to_disk - && log_sys->flushed_to_disk_lsn >= lsn) { - - mutex_exit(&(log_sys->mutex)); - - return; - } - - if (!flush_to_disk - && (log_sys->written_to_all_lsn >= lsn - || (log_sys->written_to_some_lsn >= lsn - && wait != LOG_WAIT_ALL_GROUPS))) { - - mutex_exit(&(log_sys->mutex)); - - return; - } - - if (log_sys->n_pending_writes > 0) { - /* A write (+ possibly flush to disk) is running */ - - if (flush_to_disk - && log_sys->current_flush_lsn >= lsn) { - /* The write + flush will write enough: wait for it to - complete */ - - goto do_waits; - } - - if (!flush_to_disk - && log_sys->write_lsn >= lsn) { - /* The write will write enough: wait for it to - complete */ - - goto do_waits; - } - - mutex_exit(&(log_sys->mutex)); - - /* Wait for the write to complete and try to start a new - write */ - - os_event_wait(log_sys->no_flush_event); - - goto loop; - } - - if (!flush_to_disk - && log_sys->buf_free == log_sys->buf_next_to_write) { - /* Nothing to write and no flush to disk requested */ - - mutex_exit(&(log_sys->mutex)); - - return; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Writing log from " LSN_PF " up to lsn " LSN_PF "\n", - log_sys->written_to_all_lsn, - log_sys->lsn); - } -#endif /* UNIV_DEBUG */ - log_sys->n_pending_writes++; - MONITOR_INC(MONITOR_PENDING_LOG_WRITE); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - group->n_pending_writes++; /*!< We assume here that we have only - one log group! */ - - os_event_reset(log_sys->no_flush_event); - os_event_reset(log_sys->one_flushed_event); - - start_offset = log_sys->buf_next_to_write; - end_offset = log_sys->buf_free; - - area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE); - area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE); - - ut_ad(area_end - area_start > 0); - - log_sys->write_lsn = log_sys->lsn; - - if (flush_to_disk) { - log_sys->current_flush_lsn = log_sys->lsn; - } - - log_sys->one_flushed = FALSE; - - log_block_set_flush_bit(log_sys->buf + area_start, TRUE); - log_block_set_checkpoint_no( - log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, - log_sys->next_checkpoint_no); - - /* Copy the last, incompletely written, log block a log block length - up, so that when the flush operation writes from the log buffer, the - segment to write will not be changed by writers to the log */ - - ut_memcpy(log_sys->buf + area_end, - log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, - OS_FILE_LOG_BLOCK_SIZE); - - log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE; - log_sys->write_end_offset = log_sys->buf_free; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - /* Do the write to the log files */ - - while (group) { - log_group_write_buf( - group, log_sys->buf + area_start, - area_end - area_start, - ut_uint64_align_down(log_sys->written_to_all_lsn, - OS_FILE_LOG_BLOCK_SIZE), - start_offset - area_start); - - log_group_set_fields(group, log_sys->write_lsn); - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - mutex_exit(&(log_sys->mutex)); - - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC - || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { - /* O_DSYNC or ALL_O_DIRECT means the OS did not buffer the log - file at all: so we have also flushed to disk what we have - written */ - - log_sys->flushed_to_disk_lsn = log_sys->write_lsn; - - } else if (flush_to_disk) { - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - fil_flush(group->space_id); - log_sys->flushed_to_disk_lsn = log_sys->write_lsn; - } - - mutex_enter(&(log_sys->mutex)); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - ut_a(group->n_pending_writes == 1); - ut_a(log_sys->n_pending_writes == 1); - - group->n_pending_writes--; - log_sys->n_pending_writes--; - MONITOR_DEC(MONITOR_PENDING_LOG_WRITE); - - unlock = log_group_check_flush_completion(group); - unlock = unlock | log_sys_check_flush_completion(); - - log_flush_do_unlocks(unlock); - - write_lsn = log_sys->write_lsn; - flush_lsn = log_sys->flushed_to_disk_lsn; - - mutex_exit(&(log_sys->mutex)); - - innobase_mysql_log_notify(write_lsn, flush_lsn); - - return; - -do_waits: - mutex_exit(&(log_sys->mutex)); - - switch (wait) { - case LOG_WAIT_ONE_GROUP: - os_event_wait(log_sys->one_flushed_event); - break; - case LOG_WAIT_ALL_GROUPS: - os_event_wait(log_sys->no_flush_event); - break; -#ifdef UNIV_DEBUG - case LOG_NO_WAIT: - break; - default: - ut_error; -#endif /* UNIV_DEBUG */ - } -} - -/****************************************************************//** -Does a syncronous flush of the log buffer to disk. */ -UNIV_INTERN -void -log_buffer_flush_to_disk(void) -/*==========================*/ -{ - lsn_t lsn; - - ut_ad(!srv_read_only_mode); - mutex_enter(&(log_sys->mutex)); - - lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE); -} - -/****************************************************************//** -This functions writes the log buffer to the log file and if 'flush' -is set it forces a flush of the log file as well. This is meant to be -called from background master thread only as it does not wait for -the write (+ possible flush) to finish. */ -UNIV_INTERN -void -log_buffer_sync_in_background( -/*==========================*/ - ibool flush) /*!< in: flush the logs to disk */ -{ - lsn_t lsn; - - mutex_enter(&(log_sys->mutex)); - - lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - log_write_up_to(lsn, LOG_NO_WAIT, flush); -} - -/******************************************************************** - -Tries to establish a big enough margin of free space in the log buffer, such -that a new log entry can be catenated without an immediate need for a flush. */ -static -void -log_flush_margin(void) -/*==================*/ -{ - log_t* log = log_sys; - lsn_t lsn = 0; - - mutex_enter(&(log->mutex)); - - if (log->buf_free > log->max_buf_free) { - - if (log->n_pending_writes > 0) { - /* A flush is running: hope that it will provide enough - free space */ - } else { - lsn = log->lsn; - } - } - - mutex_exit(&(log->mutex)); - - if (lsn) { - log_write_up_to(lsn, LOG_NO_WAIT, FALSE); - } -} - -/****************************************************************//** -Advances the smallest lsn for which there are unflushed dirty blocks in the -buffer pool. NOTE: this function may only be called if the calling thread owns -no synchronization objects! -@return false if there was a flush batch of the same type running, -which means that we could not start this flush batch */ -static -bool -log_preflush_pool_modified_pages( -/*=============================*/ - lsn_t new_oldest) /*!< in: try to advance oldest_modified_lsn - at least to this lsn */ -{ - lsn_t current_oldest; - ulint i; - - if (recv_recovery_on) { - /* If the recovery is running, we must first apply all - log records to their respective file pages to get the - right modify lsn values to these pages: otherwise, there - might be pages on disk which are not yet recovered to the - current lsn, and even after calling this function, we could - not know how up-to-date the disk version of the database is, - and we could not make a new checkpoint on the basis of the - info on the buffer pool only. */ - - recv_apply_hashed_log_recs(true); - } - - if (!buf_page_cleaner_is_active - || (srv_foreground_preflush - == SRV_FOREGROUND_PREFLUSH_SYNC_PREFLUSH) - || (new_oldest == LSN_MAX)) { - - ulint n_pages; - - bool success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages); - - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - - if (!success) { - MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS); - } - - MONITOR_INC_VALUE_CUMULATIVE( - MONITOR_FLUSH_SYNC_TOTAL_PAGE, - MONITOR_FLUSH_SYNC_COUNT, - MONITOR_FLUSH_SYNC_PAGES, - n_pages); - - return(success); - } - - ut_ad(srv_foreground_preflush == SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF); - - current_oldest = buf_pool_get_oldest_modification(); - i = 0; - - while (current_oldest < new_oldest && current_oldest) { - - while (!buf_flush_flush_list_in_progress()) { - - /* If a flush list flush by the cleaner thread is not - running, backoff until one is started. */ - os_thread_sleep(ut_rnd_interval(0, 1 << i)); - i++; - i %= 16; - } - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - - current_oldest = buf_pool_get_oldest_modification(); - } - - return(current_oldest >= new_oldest || !current_oldest); -} - -/******************************************************//** -Completes a checkpoint. */ -static -void -log_complete_checkpoint(void) -/*=========================*/ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - ut_ad(log_sys->n_pending_checkpoint_writes == 0); - - log_sys->next_checkpoint_no++; - - ut_ad(log_sys->next_checkpoint_lsn >= log_sys->last_checkpoint_lsn); - log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn; - MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, - log_sys->lsn - log_sys->last_checkpoint_lsn); - - rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT); -} - -/******************************************************//** -Completes an asynchronous checkpoint info write i/o to a log file. */ -static -void -log_io_complete_checkpoint(void) -/*============================*/ -{ - mutex_enter(&(log_sys->mutex)); - - ut_ad(log_sys->n_pending_checkpoint_writes > 0); - - log_sys->n_pending_checkpoint_writes--; - MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE); - - if (log_sys->n_pending_checkpoint_writes == 0) { - log_complete_checkpoint(); - } - - mutex_exit(&(log_sys->mutex)); - - /* Wake the redo log watching thread to parse the log up to this - checkpoint. */ - if (srv_track_changed_pages) { - os_event_reset(srv_redo_log_tracked_event); - os_event_set(srv_checkpoint_completed_event); - } -} - -/*******************************************************************//** -Writes info to a checkpoint about a log group. */ -static -void -log_checkpoint_set_nth_group_info( -/*==============================*/ - byte* buf, /*!< in: buffer for checkpoint info */ - ulint n, /*!< in: nth slot */ - lsn_t file_no)/*!< in: archived file number */ -{ - ut_ad(n < LOG_MAX_N_GROUPS); - - mach_write_to_8(buf + LOG_CHECKPOINT_GROUP_ARRAY + - 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, - file_no); -} - -/*******************************************************************//** -Gets info from a checkpoint about a log group. */ -UNIV_INTERN -void -log_checkpoint_get_nth_group_info( -/*==============================*/ - const byte* buf, /*!< in: buffer containing checkpoint info */ - ulint n, /*!< in: nth slot */ - lsn_t* file_no)/*!< out: archived file number */ -{ - ut_ad(n < LOG_MAX_N_GROUPS); - - *file_no = mach_read_from_8(buf + LOG_CHECKPOINT_GROUP_ARRAY + - 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO); -} - -/******************************************************//** -Writes the checkpoint info to a log group header. */ -static -void -log_group_checkpoint( -/*=================*/ - log_group_t* group) /*!< in: log group */ -{ - log_group_t* group2; -#ifdef UNIV_LOG_ARCHIVE - ib_uint64_t archived_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - lsn_t lsn_offset; - ulint write_offset; - ulint fold; - byte* buf; - ulint i; - - ut_ad(!srv_read_only_mode); - ut_ad(srv_shutdown_state != SRV_SHUTDOWN_LAST_PHASE); - ut_ad(mutex_own(&(log_sys->mutex))); - ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE); - - buf = group->checkpoint_buf; - -#ifdef UNIV_DEBUG - lsn_t old_next_checkpoint_lsn - = mach_read_from_8(buf + LOG_CHECKPOINT_LSN); - ut_ad(old_next_checkpoint_lsn <= log_sys->next_checkpoint_lsn); -#endif /* UNIV_DEBUG */ - mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no); - mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn); - - log_crypt_write_checkpoint_buf(buf); - - lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn, - group); - mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32, - lsn_offset & 0xFFFFFFFFUL); - mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, - lsn_offset >> 32); - - mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size); - -#ifdef UNIV_LOG_ARCHIVE - if (log_sys->archiving_state == LOG_ARCH_OFF) { - archived_lsn = LSN_MAX; - } else { - archived_lsn = log_sys->archived_lsn; - } - - mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn); -#else /* UNIV_LOG_ARCHIVE */ - mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX); -#endif /* UNIV_LOG_ARCHIVE */ - - for (i = 0; i < LOG_MAX_N_GROUPS; i++) { - log_checkpoint_set_nth_group_info(buf, i, 0); - } - - group2 = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group2) { - log_checkpoint_set_nth_group_info(buf, group2->id, -#ifdef UNIV_LOG_ARCHIVE - group2->archived_file_no -#else /* UNIV_LOG_ARCHIVE */ - 0 -#endif /* UNIV_LOG_ARCHIVE */ - ); - - group2 = UT_LIST_GET_NEXT(log_groups, group2); - } - - fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); - mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold); - - fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, - LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); - mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold); - - /* We alternate the physical place of the checkpoint info in the first - log file */ - - if ((log_sys->next_checkpoint_no & 1) == 0) { - write_offset = LOG_CHECKPOINT_1; - } else { - write_offset = LOG_CHECKPOINT_2; - } - - if (log_do_write) { - if (log_sys->n_pending_checkpoint_writes == 0) { - - rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), - LOG_CHECKPOINT); - } - - log_sys->n_pending_checkpoint_writes++; - MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE); - - log_sys->n_log_ios++; - - MONITOR_INC(MONITOR_LOG_IO); - - /* We send as the last parameter the group machine address - added with 1, as we want to distinguish between a normal log - file write and a checkpoint field write */ - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->space_id, 0, - write_offset / UNIV_PAGE_SIZE, - write_offset % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, - buf, ((byte*) group + 1), 0); - - ut_ad(((ulint) group & 0x1UL) == 0); - } -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_HOTBACKUP -/******************************************************//** -Writes info to a buffer of a log group when log files are created in -backup restoration. */ -UNIV_INTERN -void -log_reset_first_header_and_checkpoint( -/*==================================*/ - byte* hdr_buf,/*!< in: buffer which will be written to the - start of the first log file */ - ib_uint64_t start) /*!< in: lsn of the start of the first log file; - we pretend that there is a checkpoint at - start + LOG_BLOCK_HDR_SIZE */ -{ - ulint fold; - byte* buf; - ib_uint64_t lsn; - - mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0); - mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start); - - lsn = start + LOG_BLOCK_HDR_SIZE; - - /* Write the label of mysqlbackup --restore */ - strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, - "ibbackup "); - ut_sprintf_timestamp((char*) hdr_buf - + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP - + (sizeof "ibbackup ") - 1)); - buf = hdr_buf + LOG_CHECKPOINT_1; - - mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0); - mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn); - - log_crypt_write_checkpoint_buf(buf); - - mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32, - LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE); - mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0); - - mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024); - - mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX); - - fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); - mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold); - - fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, - LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); - mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold); - - /* Starting from InnoDB-3.23.50, we should also write info on - allocated size in the tablespace, but unfortunately we do not - know it here */ -} -#endif /* UNIV_HOTBACKUP */ - -#ifndef UNIV_HOTBACKUP -/******************************************************//** -Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */ -UNIV_INTERN -void -log_group_read_checkpoint_info( -/*===========================*/ - log_group_t* group, /*!< in: log group */ - ulint field) /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - log_sys->n_log_ios++; - - MONITOR_INC(MONITOR_LOG_IO); - - fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0, - field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0); -} - -/******************************************************//** -Writes checkpoint info to groups. */ -UNIV_INTERN -void -log_groups_write_checkpoint_info(void) -/*==================================*/ -{ - log_group_t* group; - - ut_ad(mutex_own(&(log_sys->mutex))); - - if (!srv_read_only_mode) { - for (group = UT_LIST_GET_FIRST(log_sys->log_groups); - group; - group = UT_LIST_GET_NEXT(log_groups, group)) { - - log_group_checkpoint(group); - } - } -} - -/******************************************************//** -Makes a checkpoint. Note that this function does not flush dirty -blocks from the buffer pool: it only checks what is lsn of the oldest -modification in the pool, and writes information about the lsn in -log files. Use log_make_checkpoint_at to flush also the pool. -@return TRUE if success, FALSE if a checkpoint write was already running */ -UNIV_INTERN -ibool -log_checkpoint( -/*===========*/ - ibool sync, /*!< in: TRUE if synchronous operation is - desired */ - ibool write_always, /*!< in: the function normally checks if the - the new checkpoint would have a greater - lsn than the previous one: if not, then no - physical write is done; by setting this - parameter TRUE, a physical write will always be - made to log files */ - ibool safe_to_ignore) /*!< in: TRUE if checkpoint can be ignored in - the case checkpoint's are disabled */ -{ - lsn_t oldest_lsn; - - ut_ad(!srv_read_only_mode); - - if (recv_recovery_is_on()) { - recv_apply_hashed_log_recs(true); - } - - if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC && - srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT) { - fil_flush_file_spaces(FIL_TABLESPACE); - } - - mutex_enter(&(log_sys->mutex)); - - ut_ad(!recv_no_log_write); - oldest_lsn = log_buf_pool_get_oldest_modification(); - - mutex_exit(&(log_sys->mutex)); - - /* Because log also contains headers and dummy log records, - if the buffer pool contains no dirty buffers, oldest_lsn - gets the value log_sys->lsn from the previous function, - and we must make sure that the log is flushed up to that - lsn. If there are dirty buffers in the buffer pool, then our - write-ahead-logging algorithm ensures that the log has been flushed - up to oldest_lsn. */ - - log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE); - - mutex_enter(&(log_sys->mutex)); - - /* Return if this is not a forced checkpoint and either there is no - need for a checkpoint or if checkpoints are disabled */ - if (!write_always - && (log_sys->last_checkpoint_lsn >= oldest_lsn || - (safe_to_ignore && log_disable_checkpoint_active))) - { - - mutex_exit(&(log_sys->mutex)); - - return(TRUE); - } - - if (log_disable_checkpoint_active) - { - /* Wait until we are allowed to do a checkpoint */ - mutex_exit(&(log_sys->mutex)); - rw_lock_s_lock(&(log_sys->checkpoint_lock)); - rw_lock_s_unlock(&(log_sys->checkpoint_lock)); - mutex_enter(&(log_sys->mutex)); - } - - ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn); - - if (log_sys->n_pending_checkpoint_writes > 0) { - /* A checkpoint write is running */ - - mutex_exit(&(log_sys->mutex)); - - if (sync) { - /* Wait for the checkpoint write to complete */ - rw_lock_s_lock(&(log_sys->checkpoint_lock)); - rw_lock_s_unlock(&(log_sys->checkpoint_lock)); - } - - return(FALSE); - } - - ut_ad(oldest_lsn >= log_sys->next_checkpoint_lsn); - log_sys->next_checkpoint_lsn = oldest_lsn; -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, "Making checkpoint no " - LSN_PF " at lsn " LSN_PF "\n", - log_sys->next_checkpoint_no, - oldest_lsn); - } -#endif /* UNIV_DEBUG */ - - /* generate key version and key used to encrypt future blocks, - * - * NOTE: the +1 is as the next_checkpoint_no will be updated once - * the checkpoint info has been written and THEN blocks will be encrypted - * with new key - */ - if (srv_encrypt_log) { - log_crypt_set_ver_and_key(log_sys->next_checkpoint_no + 1); - } - - log_groups_write_checkpoint_info(); - - MONITOR_INC(MONITOR_NUM_CHECKPOINT); - - mutex_exit(&(log_sys->mutex)); - - if (sync) { - /* Wait for the checkpoint write to complete */ - rw_lock_s_lock(&(log_sys->checkpoint_lock)); - rw_lock_s_unlock(&(log_sys->checkpoint_lock)); - } - - return(TRUE); -} - -/****************************************************************//** -Makes a checkpoint at a given lsn or later. */ -UNIV_INTERN -void -log_make_checkpoint_at( -/*===================*/ - lsn_t lsn, /*!< in: make a checkpoint at this or a - later lsn, if LSN_MAX, makes - a checkpoint at the latest lsn */ - ibool write_always) /*!< in: the function normally checks if - the new checkpoint would have a - greater lsn than the previous one: if - not, then no physical write is done; - by setting this parameter TRUE, a - physical write will always be made to - log files */ -{ - /* Preflush pages synchronously */ - - while (!log_preflush_pool_modified_pages(lsn)) { - /* Flush as much as we can */ - } - - while (!log_checkpoint(TRUE, write_always, FALSE)) { - /* Force a checkpoint */ - } -} - -/****************************************************************//** -Disable checkpoints. This is used when doing a volumne snapshot -to ensure that we don't get checkpoint between snapshoting two -different volumes */ - -UNIV_INTERN -ibool log_disable_checkpoint() -{ - mutex_enter(&(log_sys->mutex)); - - /* - Wait if a checkpoint write is running. - This is the same code that is used in log_checkpoint() to ensure - that two checkpoints are not happening at the same time. - */ - while (log_sys->n_pending_checkpoint_writes > 0) - { - mutex_exit(&(log_sys->mutex)); - rw_lock_s_lock(&(log_sys->checkpoint_lock)); - rw_lock_s_unlock(&(log_sys->checkpoint_lock)); - mutex_enter(&(log_sys->mutex)); - } - /* - The following should never be true; It's is here just in case of - wrong usage of this function. (Better safe than sorry). - */ - - if (log_disable_checkpoint_active) - { - mutex_exit(&(log_sys->mutex)); - return 1; /* Already disabled */ - } - /* - Take the checkpoint lock to ensure we will not get any checkpoints - running - */ - rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT); - log_disable_checkpoint_active= 1; - mutex_exit(&(log_sys->mutex)); - return 0; -} - - -/****************************************************************//** -Enable checkpoints that was disabled with log_disable_checkpoint() -This lock is called by MariaDB and only when we have done call earlier -to log_disable_checkpoint(). - -Note: We can't take a log->mutex lock here running log_checkpoint() -which is waiting (log_sys->checkpoint_lock may already have it. -This is however safe to do without a mutex as log_disable_checkpoint -is protected by log_sys->checkpoint_lock. -*/ - -UNIV_INTERN -void log_enable_checkpoint() -{ - ut_ad(log_disable_checkpoint_active); - /* Test variable, mostly to protect against wrong usage */ - if (log_disable_checkpoint_active) - { - log_disable_checkpoint_active= 0; - rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT); - } -} - -/****************************************************************//** -Tries to establish a big enough margin of free space in the log groups, such -that a new log entry can be catenated without an immediate need for a -checkpoint. NOTE: this function may only be called if the calling thread -owns no synchronization objects! */ -static -void -log_checkpoint_margin(void) -/*=======================*/ -{ - log_t* log = log_sys; - lsn_t age; - lsn_t checkpoint_age; - ib_uint64_t advance; - lsn_t oldest_lsn; - ibool checkpoint_sync; - ibool do_checkpoint; - bool success; -loop: - checkpoint_sync = FALSE; - do_checkpoint = FALSE; - advance = 0; - - mutex_enter(&(log->mutex)); - ut_ad(!recv_no_log_write); - - if (log->check_flush_or_checkpoint == FALSE) { - mutex_exit(&(log->mutex)); - - return; - } - - oldest_lsn = log_buf_pool_get_oldest_modification(); - - age = log->lsn - oldest_lsn; - - if (age > log->max_modified_age_sync) { - - /* A flush is urgent: we have to do a synchronous preflush */ - advance = 2 * (age - log->max_modified_age_sync); - } - - checkpoint_age = log->lsn - log->last_checkpoint_lsn; - - if (checkpoint_age > log->max_checkpoint_age) { - /* A checkpoint is urgent: we do it synchronously */ - - checkpoint_sync = TRUE; - - do_checkpoint = TRUE; - - } else if (checkpoint_age > log->max_checkpoint_age_async) { - /* A checkpoint is not urgent: do it asynchronously */ - - do_checkpoint = TRUE; - - log->check_flush_or_checkpoint = FALSE; - } else { - log->check_flush_or_checkpoint = FALSE; - } - - mutex_exit(&(log->mutex)); - - if (advance) { - lsn_t new_oldest = oldest_lsn + advance; - - success = log_preflush_pool_modified_pages(new_oldest); - - /* If the flush succeeded, this thread has done its part - and can proceed. If it did not succeed, there was another - thread doing a flush at the same time. */ - if (!success) { - mutex_enter(&(log->mutex)); - - log->check_flush_or_checkpoint = TRUE; - - mutex_exit(&(log->mutex)); - goto loop; - } - } - - if (do_checkpoint) { - log_checkpoint(checkpoint_sync, FALSE, FALSE); - - if (checkpoint_sync) { - - goto loop; - } - } -} - -/******************************************************//** -Reads a specified log segment to a buffer. Optionally releases the log mutex -before the I/O. */ -UNIV_INTERN -void -log_group_read_log_seg( -/*===================*/ - ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */ - byte* buf, /*!< in: buffer where to read */ - log_group_t* group, /*!< in: log group */ - lsn_t start_lsn, /*!< in: read area start */ - lsn_t end_lsn, /*!< in: read area end */ - ibool release_mutex) /*!< in: whether the log_sys->mutex - should be released before the read */ -{ - ulint len; - lsn_t source_offset; - bool sync; - - ut_ad(mutex_own(&(log_sys->mutex))); - - sync = (type == LOG_RECOVER); -loop: - source_offset = log_group_calc_lsn_offset(start_lsn, group); - - ut_a(end_lsn - start_lsn <= ULINT_MAX); - len = (ulint) (end_lsn - start_lsn); - - ut_ad(len != 0); - - if ((source_offset % group->file_size) + len > group->file_size) { - - /* If the above condition is true then len (which is ulint) - is > the expression below, so the typecast is ok */ - len = (ulint) (group->file_size - - (source_offset % group->file_size)); - } - -#ifdef UNIV_LOG_ARCHIVE - if (type == LOG_ARCHIVE) { - - log_sys->n_pending_archive_ios++; - } -#endif /* UNIV_LOG_ARCHIVE */ - - log_sys->n_log_ios++; - - MONITOR_INC(MONITOR_LOG_IO); - - ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX); - - if (release_mutex) { - mutex_exit(&(log_sys->mutex)); - } - - fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0, - (ulint) (source_offset / UNIV_PAGE_SIZE), - (ulint) (source_offset % UNIV_PAGE_SIZE), - len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL, 0); - - if (release_mutex) { - mutex_enter(&log_sys->mutex); - } - -#ifdef DEBUG_CRYPT - fprintf(stderr, "BEFORE DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx offset %lu\n", - log_block_get_hdr_no(buf), - log_block_get_checkpoint_no(buf), - log_block_calc_checksum(buf), - log_block_get_checksum(buf), source_offset); -#endif - - log_decrypt_after_read(buf, len); - -#ifdef DEBUG_CRYPT - fprintf(stderr, "AFTER DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx\n", - log_block_get_hdr_no(buf), - log_block_get_checkpoint_no(buf), - log_block_calc_checksum(buf), - log_block_get_checksum(buf)); -#endif - - if (release_mutex) { - mutex_exit(&log_sys->mutex); - } - - start_lsn += len; - buf += len; - - if (recv_sys && recv_sys->report(ut_time())) { - ib_logf(IB_LOG_LEVEL_INFO, "Read redo log up to LSN=" LSN_PF, - start_lsn); - sd_notifyf(0, "STATUS=Read redo log up to LSN=" LSN_PF, - start_lsn); - } - - if (start_lsn != end_lsn) { - - if (release_mutex) { - mutex_enter(&(log_sys->mutex)); - } - goto loop; - } -} - -#ifdef UNIV_LOG_ARCHIVE -/******************************************************//** -Generates an archived log file name. */ -UNIV_INTERN -void -log_archived_file_name_gen( -/*=======================*/ - char* buf, /*!< in: buffer where to write */ - ulint buf_len,/*!< in: buffer length */ - ulint id MY_ATTRIBUTE((unused)), - /*!< in: group id; - currently we only archive the first group */ - lsn_t file_no)/*!< in: file number */ -{ - ulint dirnamelen; - - dirnamelen = strlen(srv_arch_dir); - - ut_a(buf_len > dirnamelen + - IB_ARCHIVED_LOGS_SERIAL_LEN + - IB_ARCHIVED_LOGS_PREFIX_LEN + 2); - - strcpy(buf, srv_arch_dir); - - if (buf[dirnamelen-1] != SRV_PATH_SEPARATOR) { - buf[dirnamelen++] = SRV_PATH_SEPARATOR; - } - sprintf(buf + dirnamelen, IB_ARCHIVED_LOGS_PREFIX - "%0" IB_TO_STR(IB_ARCHIVED_LOGS_SERIAL_LEN) "llu", - (unsigned long long)file_no); -} - -/******************************************************//** -Get offset within archived log file to continue to write -with. */ -UNIV_INTERN -void -log_archived_get_offset( -/*=====================*/ - log_group_t* group, /*!< in: log group */ - lsn_t file_no, /*!< in: archive log file number */ - lsn_t archived_lsn, /*!< in: last archived LSN */ - lsn_t* offset) /*!< out: offset within archived file */ -{ - char file_name[OS_FILE_MAX_PATH]; - ibool exists; - os_file_type_t type; - - log_archived_file_name_gen(file_name, - sizeof(file_name), group->id, file_no); - - ut_a(os_file_status(file_name, &exists, &type)); - - if (!exists) { - *offset = 0; - return; - } - - *offset = archived_lsn - file_no + LOG_FILE_HDR_SIZE; - - if (archived_lsn != LSN_MAX) { - *offset = archived_lsn - file_no + LOG_FILE_HDR_SIZE; - } else { - /* Archiving was OFF prior startup */ - *offset = 0; - } - - ut_a(group->file_size >= *offset + LOG_FILE_HDR_SIZE); - - return; -} - -/******************************************************//** -Writes a log file header to a log file space. */ -static -void -log_group_archive_file_header_write( -/*================================*/ - log_group_t* group, /*!< in: log group */ - ulint nth_file, /*!< in: header to the nth file in the - archive log file space */ - lsn_t file_no, /*!< in: archived file number */ - ib_uint64_t start_lsn) /*!< in: log file data starts at this - lsn */ -{ - byte* buf; - ulint dest_offset; - - ut_ad(mutex_own(&(log_sys->mutex))); - - ut_a(nth_file < group->n_files); - - buf = *(group->archive_file_header_bufs + nth_file); - - mach_write_to_4(buf + LOG_GROUP_ID, group->id); - mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); - mach_write_to_4(buf + LOG_FILE_NO, file_no); - - mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE); - - dest_offset = nth_file * group->file_size; - - log_sys->n_log_ios++; - - MONITOR_INC(MONITOR_LOG_IO); - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id, - 0, - dest_offset / UNIV_PAGE_SIZE, - dest_offset % UNIV_PAGE_SIZE, - 2 * OS_FILE_LOG_BLOCK_SIZE, - buf, &log_archive_io, 0); -} - -/******************************************************//** -Writes a log file header to a completed archived log file. */ -static -void -log_group_archive_completed_header_write( -/*=====================================*/ - log_group_t* group, /*!< in: log group */ - ulint nth_file, /*!< in: header to the nth file in the - archive log file space */ - ib_uint64_t end_lsn) /*!< in: end lsn of the file */ -{ - byte* buf; - ulint dest_offset; - - ut_ad(mutex_own(&(log_sys->mutex))); - ut_a(nth_file < group->n_files); - - buf = *(group->archive_file_header_bufs + nth_file); - - mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE); - mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn); - - dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED; - - log_sys->n_log_ios++; - - MONITOR_INC(MONITOR_LOG_IO); - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id, - 0, - dest_offset / UNIV_PAGE_SIZE, - dest_offset % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, - buf + LOG_FILE_ARCH_COMPLETED, - &log_archive_io, 0); -} - -/******************************************************//** -Does the archive writes for a single log group. */ -static -void -log_group_archive( -/*==============*/ - log_group_t* group) /*!< in: log group */ -{ - pfs_os_file_t file_handle; - lsn_t start_lsn; - lsn_t end_lsn; - char name[OS_FILE_MAX_PATH]; - byte* buf; - ulint len; - ibool ret; - lsn_t next_offset; - ulint n_files; - ulint open_mode; - - ut_ad(mutex_own(&(log_sys->mutex))); - - start_lsn = log_sys->archived_lsn; - - ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0); - - end_lsn = log_sys->next_archived_lsn; - - ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0); - - buf = log_sys->archive_buf; - - n_files = 0; - - next_offset = group->archived_offset; -loop: - if ((next_offset % group->file_size == 0) - || (fil_space_get_size(group->archive_space_id) == 0)) { - - /* Add the file to the archive file space; create or open the - file */ - - if (next_offset % group->file_size == 0) { - open_mode = OS_FILE_CREATE; - if (n_files == 0) { - /* Adjust archived_file_no to match start_lsn - which is written in file header as well */ - group->archived_file_no = start_lsn; - } - } else { - open_mode = OS_FILE_OPEN; - } - - log_archived_file_name_gen(name, sizeof(name), group->id, - group->archived_file_no + - n_files * (group->file_size - - LOG_FILE_HDR_SIZE)); - - file_handle = os_file_create(innodb_file_log_key, - name, open_mode, - OS_FILE_AIO, - OS_DATA_FILE, &ret, FALSE); - - if (!ret && (open_mode == OS_FILE_CREATE)) { - file_handle = os_file_create( - innodb_file_log_key, name, OS_FILE_OPEN, - OS_FILE_AIO, OS_DATA_FILE, &ret, FALSE); - } - - if (!ret) { - ib_logf(IB_LOG_LEVEL_FATAL, - "InnoDB: Cannot create or open" - " archive log file %s.\n", name); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, "Created archive file %s\n", name); - } -#endif /* UNIV_DEBUG */ - - ret = os_file_close(file_handle); - - ut_a(ret); - - /* Add the archive file as a node to the space */ - - ut_a(fil_node_create(name, group->file_size / UNIV_PAGE_SIZE, - group->archive_space_id, FALSE)); - - if (next_offset % group->file_size == 0) { - log_group_archive_file_header_write( - group, n_files, - group->archived_file_no + - n_files * (group->file_size - LOG_FILE_HDR_SIZE), - start_lsn); - - next_offset += LOG_FILE_HDR_SIZE; - } - } - - len = end_lsn - start_lsn; - - if (group->file_size < (next_offset % group->file_size) + len) { - - len = group->file_size - (next_offset % group->file_size); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Archiving starting at lsn " LSN_PF ", len %lu" - " to group %lu\n", - start_lsn, - (ulong) len, (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - - log_sys->n_pending_archive_ios++; - - log_sys->n_log_ios++; - - MONITOR_INC(MONITOR_LOG_IO); - - //TODO (jonaso): This must be dead code?? - log_encrypt_before_write(log_sys->next_checkpoint_no, buf, len); - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id, - 0, - (ulint) (next_offset / UNIV_PAGE_SIZE), - (ulint) (next_offset % UNIV_PAGE_SIZE), - ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf, - &log_archive_io, 0); - - start_lsn += len; - next_offset += len; - buf += len; - - if (next_offset % group->file_size == 0) { - n_files++; - } - - if (end_lsn != start_lsn) { - - goto loop; - } - - group->next_archived_file_no = group->archived_file_no + - n_files * (group->file_size - LOG_FILE_HDR_SIZE); - group->next_archived_offset = next_offset % group->file_size; - - ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0); -} - -/*****************************************************//** -(Writes to the archive of each log group.) Currently, only the first -group is archived. */ -static -void -log_archive_groups(void) -/*====================*/ -{ - log_group_t* group; - - ut_ad(mutex_own(&(log_sys->mutex))); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - log_group_archive(group); -} - -/*****************************************************//** -Completes the archiving write phase for (each log group), currently, -the first log group. */ -static -void -log_archive_write_complete_groups(void) -/*===================================*/ -{ - log_group_t* group; - lsn_t end_offset; - ulint trunc_files; - ulint n_files; - ib_uint64_t start_lsn; - ib_uint64_t end_lsn; - ulint i; - - ut_ad(mutex_own(&(log_sys->mutex))); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - group->archived_file_no = group->next_archived_file_no; - group->archived_offset = group->next_archived_offset; - - /* Truncate from the archive file space all but the last - file, or if it has been written full, all files */ - - n_files = (UNIV_PAGE_SIZE - * fil_space_get_size(group->archive_space_id)) - / group->file_size; - ut_ad(n_files > 0); - - end_offset = group->archived_offset; - - if (end_offset % group->file_size == 0) { - - trunc_files = n_files; - } else { - trunc_files = n_files - 1; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes && trunc_files) { - fprintf(stderr, - "Complete file(s) archived to group %lu\n", - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - - /* Calculate the archive file space start lsn */ - start_lsn = log_sys->next_archived_lsn - - (end_offset - LOG_FILE_HDR_SIZE + trunc_files - * (group->file_size - LOG_FILE_HDR_SIZE)); - end_lsn = start_lsn; - - for (i = 0; i < trunc_files; i++) { - - end_lsn += group->file_size - LOG_FILE_HDR_SIZE; - - /* Write a notice to the headers of archived log - files that the file write has been completed */ - - log_group_archive_completed_header_write(group, i, end_lsn); - } - - fil_space_truncate_start(group->archive_space_id, - trunc_files * group->file_size); - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fputs("Archiving writes completed\n", stderr); - } -#endif /* UNIV_DEBUG */ -} - -/******************************************************//** -Completes an archiving i/o. */ -static -void -log_archive_check_completion_low(void) -/*==================================*/ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - if (log_sys->n_pending_archive_ios == 0 - && log_sys->archiving_phase == LOG_ARCHIVE_READ) { - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fputs("Archiving read completed\n", stderr); - } -#endif /* UNIV_DEBUG */ - - /* Archive buffer has now been read in: start archive writes */ - - log_sys->archiving_phase = LOG_ARCHIVE_WRITE; - - log_archive_groups(); - } - - if (log_sys->n_pending_archive_ios == 0 - && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) { - - log_archive_write_complete_groups(); - - log_sys->archived_lsn = log_sys->next_archived_lsn; - - rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE); - } -} - -/******************************************************//** -Completes an archiving i/o. */ -static -void -log_io_complete_archive(void) -/*=========================*/ -{ - log_group_t* group; - - mutex_enter(&(log_sys->mutex)); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - mutex_exit(&(log_sys->mutex)); - - fil_flush(group->archive_space_id); - - mutex_enter(&(log_sys->mutex)); - - ut_ad(log_sys->n_pending_archive_ios > 0); - - log_sys->n_pending_archive_ios--; - - log_archive_check_completion_low(); - - mutex_exit(&(log_sys->mutex)); -} - -/********************************************************************//** -Starts an archiving operation. -@return TRUE if succeed, FALSE if an archiving operation was already running */ -UNIV_INTERN -ibool -log_archive_do( -/*===========*/ - ibool sync, /*!< in: TRUE if synchronous operation is desired */ - ulint* n_bytes)/*!< out: archive log buffer size, 0 if nothing to - archive */ -{ - ibool calc_new_limit; - lsn_t start_lsn; - lsn_t limit_lsn = LSN_MAX; - - calc_new_limit = TRUE; -loop: - mutex_enter(&(log_sys->mutex)); - - switch (log_sys->archiving_state) { - case LOG_ARCH_OFF: -arch_none: - mutex_exit(&(log_sys->mutex)); - - *n_bytes = 0; - - return(TRUE); - case LOG_ARCH_STOPPED: - case LOG_ARCH_STOPPING2: - mutex_exit(&(log_sys->mutex)); - - os_event_wait(log_sys->archiving_on); - - goto loop; - } - - start_lsn = log_sys->archived_lsn; - - if (calc_new_limit) { - ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0); - limit_lsn = start_lsn + log_sys->archive_buf_size; - - *n_bytes = log_sys->archive_buf_size; - - if (limit_lsn >= log_sys->lsn) { - - limit_lsn = ut_uint64_align_down( - log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE); - } - } - - if (log_sys->archived_lsn >= limit_lsn) { - - goto arch_none; - } - - if (log_sys->written_to_all_lsn < limit_lsn) { - - mutex_exit(&(log_sys->mutex)); - - log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE); - - calc_new_limit = FALSE; - - goto loop; - } - - if (log_sys->n_pending_archive_ios > 0) { - /* An archiving operation is running */ - - mutex_exit(&(log_sys->mutex)); - - if (sync) { - rw_lock_s_lock(&(log_sys->archive_lock)); - rw_lock_s_unlock(&(log_sys->archive_lock)); - } - - *n_bytes = log_sys->archive_buf_size; - - return(FALSE); - } - - rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE); - - log_sys->archiving_phase = LOG_ARCHIVE_READ; - - log_sys->next_archived_lsn = limit_lsn; - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Archiving from lsn " LSN_PF " to lsn " LSN_PF "\n", - log_sys->archived_lsn, limit_lsn); - } -#endif /* UNIV_DEBUG */ - - /* Read the log segment to the archive buffer */ - - log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf, - UT_LIST_GET_FIRST(log_sys->log_groups), - start_lsn, limit_lsn, FALSE); - - mutex_exit(&(log_sys->mutex)); - - if (sync) { - rw_lock_s_lock(&(log_sys->archive_lock)); - rw_lock_s_unlock(&(log_sys->archive_lock)); - } - - *n_bytes = log_sys->archive_buf_size; - - return(TRUE); -} - -/****************************************************************//** -Writes the log contents to the archive at least up to the lsn when this -function was called. */ -static -void -log_archive_all(void) -/*=================*/ -{ - lsn_t present_lsn; - - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - mutex_exit(&(log_sys->mutex)); - - return; - } - - present_lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - log_pad_current_log_block(); - - for (;;) { - - ulint archived_bytes; - - mutex_enter(&(log_sys->mutex)); - - if (present_lsn <= log_sys->archived_lsn) { - - mutex_exit(&(log_sys->mutex)); - - return; - } - - mutex_exit(&(log_sys->mutex)); - - log_archive_do(TRUE, &archived_bytes); - - if (archived_bytes == 0) - return; - } -} - -/*****************************************************//** -Closes the possible open archive log file (for each group) the first group, -and if it was open, increments the group file count by 2, if desired. */ -static -void -log_archive_close_groups( -/*=====================*/ - ibool increment_file_count) /*!< in: TRUE if we want to increment - the file count */ -{ - log_group_t* group; - ulint trunc_len; - - ut_ad(mutex_own(&(log_sys->mutex))); - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - - return; - } - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - trunc_len = UNIV_PAGE_SIZE - * fil_space_get_size(group->archive_space_id); - if (trunc_len > 0) { - ut_a(trunc_len == group->file_size); - - /* Write a notice to the headers of archived log - files that the file write has been completed */ - - log_group_archive_completed_header_write( - group, 0, log_sys->archived_lsn); - - fil_space_truncate_start(group->archive_space_id, - trunc_len); - if (increment_file_count) { - group->archived_offset = 0; - } - - } -} - -/****************************************************************//** -Writes the log contents to the archive up to the lsn when this function was -called, and stops the archiving. When archiving is started again, the archived -log file numbers start from 2 higher, so that the archiving will not write -again to the archived log files which exist when this function returns. */ -static -void -log_archive_stop(void) -/*==================*/ -{ - ibool success; - - mutex_enter(&(log_sys->mutex)); - - ut_ad(log_sys->archiving_state == LOG_ARCH_ON); - log_sys->archiving_state = LOG_ARCH_STOPPING; - - mutex_exit(&(log_sys->mutex)); - - log_archive_all(); - - mutex_enter(&(log_sys->mutex)); - - log_sys->archiving_state = LOG_ARCH_STOPPING2; - os_event_reset(log_sys->archiving_on); - - mutex_exit(&(log_sys->mutex)); - - /* Wait for a possible archiving operation to end */ - - rw_lock_s_lock(&(log_sys->archive_lock)); - rw_lock_s_unlock(&(log_sys->archive_lock)); - - mutex_enter(&(log_sys->mutex)); - - /* Close all archived log files, incrementing the file count by 2, - if appropriate */ - - log_archive_close_groups(TRUE); - - mutex_exit(&(log_sys->mutex)); - - /* Make a checkpoint, so that if recovery is needed, the file numbers - of new archived log files will start from the right value */ - - success = FALSE; - - while (!success) { - success = log_checkpoint(TRUE, TRUE, FALSE); - } - - mutex_enter(&(log_sys->mutex)); - - log_sys->archiving_state = LOG_ARCH_STOPPED; - - mutex_exit(&(log_sys->mutex)); -} - -/****************************************************************//** -Starts again archiving which has been stopped. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint -log_archive_start(void) -/*===================*/ -{ - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state != LOG_ARCH_STOPPED) { - - mutex_exit(&(log_sys->mutex)); - - return(DB_ERROR); - } - - log_sys->archiving_state = LOG_ARCH_ON; - - os_event_set(log_sys->archiving_on); - - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); -} - -/****************************************************************//** -Stop archiving the log so that a gap may occur in the archived log files. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint -log_archive_noarchivelog(void) -/*==========================*/ -{ - ut_ad(!srv_read_only_mode); -loop: - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state == LOG_ARCH_STOPPED - || log_sys->archiving_state == LOG_ARCH_OFF) { - - log_sys->archiving_state = LOG_ARCH_OFF; - - os_event_set(log_sys->archiving_on); - - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); - } - - mutex_exit(&(log_sys->mutex)); - - log_archive_stop(); - - os_thread_sleep(500000); - - goto loop; -} - -/****************************************************************//** -Start archiving the log so that a gap may occur in the archived log files. -@return DB_SUCCESS or DB_ERROR */ -UNIV_INTERN -ulint -log_archive_archivelog(void) -/*========================*/ -{ - ut_ad(!srv_read_only_mode); - - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - - log_sys->archiving_state = LOG_ARCH_ON; - - log_sys->archived_lsn - = ut_uint64_align_down(log_sys->lsn, - OS_FILE_LOG_BLOCK_SIZE); - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); - } - - mutex_exit(&(log_sys->mutex)); - - return(DB_ERROR); -} - -/****************************************************************//** -Tries to establish a big enough margin of free space in the log groups, such -that a new log entry can be catenated without an immediate need for -archiving. */ -static -void -log_archive_margin(void) -/*====================*/ -{ - log_t* log = log_sys; - ulint age; - ibool sync; - ulint dummy; -loop: - mutex_enter(&(log->mutex)); - - if (log->archiving_state == LOG_ARCH_OFF) { - mutex_exit(&(log->mutex)); - - return; - } - - age = log->lsn - log->archived_lsn; - - if (age > log->max_archived_lsn_age) { - - /* An archiving is urgent: we have to do synchronous i/o */ - - sync = TRUE; - - } else if (age > log->max_archived_lsn_age_async) { - - /* An archiving is not urgent: we do asynchronous i/o */ - - sync = FALSE; - } else { - /* No archiving required yet */ - - mutex_exit(&(log->mutex)); - - return; - } - - mutex_exit(&(log->mutex)); - - log_archive_do(sync, &dummy); - - if (sync == TRUE) { - /* Check again that enough was written to the archive */ - - goto loop; - } -} -#endif /* UNIV_LOG_ARCHIVE */ - -/********************************************************************//** -Checks that there is enough free space in the log to start a new query step. -Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this -function may only be called if the calling thread owns no synchronization -objects! */ -UNIV_INTERN -void -log_check_margins(void) -/*===================*/ -{ -loop: - log_flush_margin(); - - log_checkpoint_margin(); - - mutex_enter(&(log_sys->mutex)); - if (log_check_tracking_margin(0)) { - - mutex_exit(&(log_sys->mutex)); - os_thread_sleep(10000); - goto loop; - } - mutex_exit(&(log_sys->mutex)); - -#ifdef UNIV_LOG_ARCHIVE - log_archive_margin(); -#endif /* UNIV_LOG_ARCHIVE */ - - mutex_enter(&(log_sys->mutex)); - ut_ad(!recv_no_log_write); - - if (log_sys->check_flush_or_checkpoint) { - - mutex_exit(&(log_sys->mutex)); - - goto loop; - } - - mutex_exit(&(log_sys->mutex)); -} - -/****************************************************************//** -Makes a checkpoint at the latest lsn and writes it to first page of each -data file in the database, so that we know that the file spaces contain -all modifications up to that lsn. This can only be called at database -shutdown. This function also writes all log in log files to the log archive. */ -UNIV_INTERN -void -logs_empty_and_mark_files_at_shutdown(void) -/*=======================================*/ -{ - lsn_t lsn; - lsn_t tracked_lsn; - ulint count = 0; - ulint pending_io; - ibool server_busy; - - ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown..."); - - /* Enable checkpoints if someone had turned them off */ - if (log_disable_checkpoint_active) - log_enable_checkpoint(); - - /* Wait until the master thread and all other operations are idle: our - algorithm only works if the server is idle at shutdown */ - - srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; -loop: - if (!srv_read_only_mode) { - os_event_set(srv_error_event); - os_event_set(srv_monitor_event); - os_event_set(srv_buf_dump_event); - os_event_set(lock_sys->timeout_event); - os_event_set(dict_stats_event); - } - os_thread_sleep(100000); - - count++; - - /* Check that there are no longer transactions, except for - PREPARED ones. We need this wait even for the 'very fast' - shutdown, because the InnoDB layer may have committed or - prepared transactions and we don't want to lose them. */ - - if (ulint total_trx = srv_was_started && !srv_read_only_mode - && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO - ? trx_sys_any_active_transactions() : 0) { - if (srv_print_verbose_log && count > 600) { - ib_logf(IB_LOG_LEVEL_INFO, - "Waiting for %lu active transactions to finish", - (ulong) total_trx); - - count = 0; - } - - goto loop; - } - - /* We need these threads to stop early in shutdown. */ - const char* thread_name; - - if (srv_error_monitor_active) { - thread_name = "srv_error_monitor_thread"; - } else if (srv_monitor_active) { - thread_name = "srv_monitor_thread"; - } else if (srv_dict_stats_thread_active) { - thread_name = "dict_stats_thread"; - } else if (lock_sys->timeout_thread_active) { - thread_name = "lock_wait_timeout_thread"; - } else if (srv_buf_dump_thread_active) { - thread_name = "buf_dump_thread"; - } else if (btr_defragment_thread_active) { - thread_name = "btr_defragment_thread"; - } else if (srv_fast_shutdown != 2 && trx_rollback_or_clean_is_active) { - thread_name = "rollback of recovered transactions"; - } else { - thread_name = NULL; - } - - if (thread_name) { - ut_ad(!srv_read_only_mode); -wait_suspend_loop: - if (srv_print_verbose_log && count > 600) { - ib_logf(IB_LOG_LEVEL_INFO, - "Waiting for %s to exit", thread_name); - count = 0; - } - goto loop; - } - - /* Check that the background threads are suspended */ - - switch (srv_get_active_thread_type()) { - case SRV_NONE: - if (!srv_n_fil_crypt_threads_started) { - srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE; - break; - } - os_event_set(fil_crypt_threads_event); - thread_name = "fil_crypt_thread"; - goto wait_suspend_loop; - case SRV_PURGE: - srv_purge_wakeup(); - thread_name = "purge thread"; - goto wait_suspend_loop; - case SRV_MASTER: - thread_name = "master thread"; - goto wait_suspend_loop; - case SRV_WORKER: - thread_name = "worker threads"; - goto wait_suspend_loop; - } - - /* At this point only page_cleaner should be active. We wait - here to let it complete the flushing of the buffer pools - before proceeding further. */ - - count = 0; - while (buf_page_cleaner_is_active || buf_lru_manager_is_active) { - if (srv_print_verbose_log && count == 0) { - ib_logf(IB_LOG_LEVEL_INFO, - "Waiting for page_cleaner to " - "finish flushing of buffer pool"); - } - ++count; - os_thread_sleep(100000); - if (count > 600) { - count = 0; - } - } - - if (log_scrub_thread_active) { - ut_ad(!srv_read_only_mode); - os_event_set(log_scrub_event); - } - - mutex_enter(&log_sys->mutex); - server_busy = log_scrub_thread_active - || log_sys->n_pending_checkpoint_writes -#ifdef UNIV_LOG_ARCHIVE - || log_sys->n_pending_archive_ios -#endif /* UNIV_LOG_ARCHIVE */ - || log_sys->n_pending_writes; - mutex_exit(&log_sys->mutex); - - if (server_busy) { - if (srv_print_verbose_log && count > 600) { - ib_logf(IB_LOG_LEVEL_INFO, - "Pending checkpoint_writes: %lu. " - "Pending log flush writes: %lu", - (ulong) log_sys->n_pending_checkpoint_writes, - (ulong) log_sys->n_pending_writes); - count = 0; - } - goto loop; - } - - ut_ad(!log_scrub_thread_active); - - pending_io = buf_pool_check_no_pending_io(); - - if (pending_io) { - if (srv_print_verbose_log && count > 600) { - ib_logf(IB_LOG_LEVEL_INFO, - "Waiting for %lu buffer page I/Os to complete", - (ulong) pending_io); - count = 0; - } - - goto loop; - } - -#ifdef UNIV_LOG_ARCHIVE - log_archive_all(); -#endif /* UNIV_LOG_ARCHIVE */ - if (srv_fast_shutdown == 2) { - if (!srv_read_only_mode) { - ib_logf(IB_LOG_LEVEL_INFO, - "MySQL has requested a very fast shutdown " - "without flushing the InnoDB buffer pool to " - "data files. At the next mysqld startup " - "InnoDB will do a crash recovery!"); - - /* In this fastest shutdown we do not flush the - buffer pool: - - it is essentially a 'crash' of the InnoDB server. - Make sure that the log is all flushed to disk, so - that we can recover all committed transactions in - a crash recovery. We must not write the lsn stamps - to the data files, since at a startup InnoDB deduces - from the stamps if the previous shutdown was clean. */ - - log_buffer_flush_to_disk(); - } - - srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; - - /* Wake the log tracking thread which will then immediatelly - quit because of srv_shutdown_state value */ - if (srv_redo_log_thread_started) { - os_event_reset(srv_redo_log_tracked_event); - os_event_set(srv_checkpoint_completed_event); - } - - fil_close_all_files(); - return; - } - - if (!srv_read_only_mode) { - log_make_checkpoint_at(LSN_MAX, TRUE); - - mutex_enter(&log_sys->mutex); - - tracked_lsn = log_get_tracked_lsn(); - - lsn = log_sys->lsn; - - if (lsn != log_sys->last_checkpoint_lsn - || (srv_track_changed_pages - && (tracked_lsn != log_sys->last_checkpoint_lsn)) -#ifdef UNIV_LOG_ARCHIVE - || (srv_log_archive_on - && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE) -#endif /* UNIV_LOG_ARCHIVE */ - ) { - - mutex_exit(&log_sys->mutex); - - goto loop; - } - -#ifdef UNIV_LOG_ARCHIVE - log_archive_close_groups(TRUE); -#endif /* UNIV_LOG_ARCHIVE */ - - mutex_exit(&log_sys->mutex); - - fil_flush_file_spaces(FIL_TABLESPACE); - fil_flush_file_spaces(FIL_LOG); - - /* The call fil_write_flushed_lsn_to_data_files() will - bypass the buffer pool: therefore it is essential that - the buffer pool has been completely flushed to disk! */ - - if (!buf_all_freed()) { - if (srv_print_verbose_log && count > 600) { - ib_logf(IB_LOG_LEVEL_INFO, - "Waiting for dirty buffer pages" - " to be flushed"); - count = 0; - } - - goto loop; - } - } else { - lsn = srv_start_lsn; - } - - srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; - - /* Signal the log following thread to quit */ - if (srv_redo_log_thread_started) { - os_event_reset(srv_redo_log_tracked_event); - os_event_set(srv_checkpoint_completed_event); - } - - /* Make some checks that the server really is quiet */ - srv_thread_type type = srv_get_active_thread_type(); - ut_a(type == SRV_NONE); - - bool freed = buf_all_freed(); - ut_a(freed); - - ut_a(lsn == log_sys->lsn); - ut_ad(srv_force_recovery >= SRV_FORCE_NO_LOG_REDO - || lsn == log_sys->last_checkpoint_lsn); - - if (lsn < srv_start_lsn) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Log sequence number at shutdown " LSN_PF " " - "is lower than at startup " LSN_PF "!", - lsn, srv_start_lsn); - } - - srv_shutdown_lsn = lsn; - - if (!srv_read_only_mode) { - dberr_t err = fil_write_flushed_lsn(lsn); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Failed to write flush lsn to the " - "system tablespace at shutdown err=%s", - ut_strerr(err)); - } - } - - fil_close_all_files(); - - /* Make some checks that the server really is quiet */ - type = srv_get_active_thread_type(); - ut_a(type == SRV_NONE); - - freed = buf_all_freed(); - ut_a(freed); - - ut_a(lsn == log_sys->lsn); -} - -#ifdef UNIV_LOG_DEBUG -/******************************************************//** -Checks by parsing that the catenated log segment for a single mtr is -consistent. */ -UNIV_INTERN -ibool -log_check_log_recs( -/*===============*/ - const byte* buf, /*!< in: pointer to the start of - the log segment in the - log_sys->buf log buffer */ - ulint len, /*!< in: segment length in bytes */ - ib_uint64_t buf_start_lsn) /*!< in: buffer start lsn */ -{ - ib_uint64_t contiguous_lsn; - ib_uint64_t scanned_lsn; - const byte* start; - const byte* end; - byte* buf1; - byte* scan_buf; - - ut_ad(mutex_own(&(log_sys->mutex))); - - if (len == 0) { - - return(TRUE); - } - - start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE); - end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE); - - buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE); - scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE); - - ut_memcpy(scan_buf, start, end - start); - - recv_scan_log_recs((buf_pool_get_n_pages() - - (recv_n_pool_free_frames * srv_buf_pool_instances)) - * UNIV_PAGE_SIZE, FALSE, scan_buf, end - start, - ut_uint64_align_down(buf_start_lsn, - OS_FILE_LOG_BLOCK_SIZE), - &contiguous_lsn, &scanned_lsn); - - ut_a(scanned_lsn == buf_start_lsn + len); - ut_a(recv_sys->recovered_lsn == scanned_lsn); - - mem_free(buf1); - - return(TRUE); -} -#endif /* UNIV_LOG_DEBUG */ - -/******************************************************//** -Peeks the current lsn. -@return TRUE if success, FALSE if could not get the log system mutex */ -UNIV_INTERN -ibool -log_peek_lsn( -/*=========*/ - lsn_t* lsn) /*!< out: if returns TRUE, current lsn is here */ -{ - if (0 == mutex_enter_nowait(&(log_sys->mutex))) { - *lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - return(TRUE); - } - - return(FALSE); -} - -/******************************************************//** -Prints info of the log. */ -UNIV_INTERN -void -log_print( -/*======*/ - FILE* file) /*!< in: file where to print */ -{ - double time_elapsed; - time_t current_time; - - // mutex_enter(&(log_sys->mutex)); - - fprintf(file, - "Log sequence number " LSN_PF "\n" - "Log flushed up to " LSN_PF "\n" - "Pages flushed up to " LSN_PF "\n" - "Last checkpoint at " LSN_PF "\n", - log_sys->lsn, - log_sys->flushed_to_disk_lsn, - log_buf_pool_get_oldest_modification_peek(), - log_sys->last_checkpoint_lsn); - - fprintf(file, - "Max checkpoint age " LSN_PF "\n" - "Checkpoint age target " LSN_PF "\n" - "Modified age " LSN_PF "\n" - "Checkpoint age " LSN_PF "\n", - log_sys->max_checkpoint_age, - log_sys->max_checkpoint_age_async, - log_sys->lsn -log_buf_pool_get_oldest_modification_peek(), - log_sys->lsn - log_sys->last_checkpoint_lsn); - - current_time = time(NULL); - - time_elapsed = difftime(current_time, - log_sys->last_printout_time); - - if (time_elapsed <= 0) { - time_elapsed = 1; - } - - fprintf(file, - "%lu pending log writes, %lu pending chkp writes\n" - "%lu log i/o's done, %.2f log i/o's/second\n", - (ulong) log_sys->n_pending_writes, - (ulong) log_sys->n_pending_checkpoint_writes, - (ulong) log_sys->n_log_ios, - ((double)(log_sys->n_log_ios - log_sys->n_log_ios_old) - / time_elapsed)); - - if (srv_track_changed_pages) { - - /* The maximum tracked LSN age is equal to the maximum - checkpoint age */ - fprintf(file, - "Log tracking enabled\n" - "Log tracked up to " LSN_PF "\n" - "Max tracked LSN age " LSN_PF "\n", - log_get_tracked_lsn(), - log_sys->max_checkpoint_age); - } - - log_sys->n_log_ios_old = log_sys->n_log_ios; - log_sys->last_printout_time = current_time; - - //mutex_exit(&(log_sys->mutex)); -} - -/**********************************************************************//** -Refreshes the statistics used to print per-second averages. */ -UNIV_INTERN -void -log_refresh_stats(void) -/*===================*/ -{ - log_sys->n_log_ios_old = log_sys->n_log_ios; - log_sys->last_printout_time = time(NULL); -} - -/********************************************************//** -Closes a log group. */ -static -void -log_group_close( -/*===========*/ - log_group_t* group) /* in,own: log group to close */ -{ - ulint i; - - for (i = 0; i < group->n_files; i++) { - mem_free(group->file_header_bufs_ptr[i]); -#ifdef UNIV_LOG_ARCHIVE - mem_free(group->archive_file_header_bufs_ptr[i]); -#endif /* UNIV_LOG_ARCHIVE */ - } - - mem_free(group->file_header_bufs_ptr); - mem_free(group->file_header_bufs); - -#ifdef UNIV_LOG_ARCHIVE - mem_free(group->archive_file_header_bufs_ptr); - mem_free(group->archive_file_header_bufs); -#endif /* UNIV_LOG_ARCHIVE */ - - mem_free(group->checkpoint_buf_ptr); - - mem_free(group); -} - -/********************************************************//** -Closes all log groups. */ -UNIV_INTERN -void -log_group_close_all(void) -/*=====================*/ -{ - log_group_t* group; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) { - log_group_t* prev_group = group; - - group = UT_LIST_GET_NEXT(log_groups, group); - UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group); - - log_group_close(prev_group); - } -} - -/********************************************************//** -Shutdown the log system but do not release all the memory. */ -UNIV_INTERN -void -log_shutdown(void) -/*==============*/ -{ - log_group_close_all(); - - mem_free(log_sys->buf_ptr); - log_sys->buf_ptr = NULL; - log_sys->buf = NULL; - mem_free(log_sys->checkpoint_buf_ptr); - log_sys->checkpoint_buf_ptr = NULL; - log_sys->checkpoint_buf = NULL; - mem_free(log_sys->archive_buf_ptr); - log_sys->archive_buf_ptr = NULL; - log_sys->archive_buf = NULL; - - os_event_free(log_sys->no_flush_event); - os_event_free(log_sys->one_flushed_event); - - rw_lock_free(&log_sys->checkpoint_lock); - - mutex_free(&log_sys->mutex); - mutex_free(&log_sys->log_flush_order_mutex); - - if (!srv_read_only_mode && srv_scrub_log) { - os_event_free(log_scrub_event); - log_scrub_event = NULL; - } - -#ifdef UNIV_LOG_ARCHIVE - rw_lock_free(&log_sys->archive_lock); - os_event_free(log_sys->archiving_on); -#endif /* UNIV_LOG_ARCHIVE */ - -#ifdef UNIV_LOG_DEBUG - recv_sys_debug_free(); -#endif - - recv_sys_close(); -} - -/********************************************************//** -Free the log system data structures. */ -UNIV_INTERN -void -log_mem_free(void) -/*==============*/ -{ - if (log_sys != NULL) { - recv_sys_mem_free(); - mem_free(log_sys); - - log_sys = NULL; - } -} - -/*****************************************************************//* -If no log record has been written for a while, fill current log -block with dummy records. */ -static -void -log_scrub() -/*=========*/ -{ - ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn); - if (next_lbn_to_pad == cur_lbn) - { - log_pad_current_log_block(); - } - next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn); -} - -/* log scrubbing speed, in bytes/sec */ -UNIV_INTERN ulonglong innodb_scrub_log_speed; - -/*****************************************************************//** -This is the main thread for log scrub. It waits for an event and -when waked up fills current log block with dummy records and -sleeps again. -@return this function does not return, it calls os_thread_exit() */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(log_scrub_thread)(void*) -{ - ut_ad(!srv_read_only_mode); - - while (srv_shutdown_state < SRV_SHUTDOWN_FLUSH_PHASE) { - /* log scrubbing interval in µs. */ - ulonglong interval = 1000*1000*512/innodb_scrub_log_speed; - - os_event_wait_time(log_scrub_event, interval); - - log_scrub(); - - os_event_reset(log_scrub_event); - } - - log_scrub_thread_active = false; - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc deleted file mode 100644 index 27382977e5c..00000000000 --- a/storage/xtradb/log/log0online.cc +++ /dev/null @@ -1,1912 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011-2012 Percona Inc. All Rights Reserved. -Copyright (C) 2016, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 51 Franklin -Street, Fifth Floor, Boston, MA 02110-1301, USA - -*****************************************************************************/ - -/**************************************************//** -@file log/log0online.cc -Online database log parsing for changed page tracking - -*******************************************************/ - -#include "log0online.h" - -#include "my_dbug.h" - -#include "log0recv.h" -#include "mach0data.h" -#include "mtr0log.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "trx0sys.h" -#include "ut0rbt.h" - -enum { FOLLOW_SCAN_SIZE = 4 * (UNIV_PAGE_SIZE_MAX) }; - -#ifdef UNIV_PFS_MUTEX -/* Key to register log_bmp_sys->mutex with PFS */ -UNIV_INTERN mysql_pfs_key_t log_bmp_sys_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/** Log parsing and bitmap output data structure */ -struct log_bitmap_struct { - byte* read_buf_ptr; /*!< Unaligned log read buffer */ - byte* read_buf; /*!< log read buffer */ - byte parse_buf[RECV_PARSING_BUF_SIZE]; - /*!< log parse buffer */ - byte* parse_buf_end; /*!< parse buffer position where the - next read log data should be copied to. - If the previous log records were fully - parsed, it points to the start, - otherwise points immediatelly past the - end of the incomplete log record. */ - char bmp_file_home[FN_REFLEN]; - /*!< directory for bitmap files */ - log_online_bitmap_file_t out; /*!< The current bitmap file */ - ulint out_seq_num; /*!< the bitmap file sequence number */ - lsn_t start_lsn; /*!< the LSN of the next unparsed - record and the start of the next LSN - interval to be parsed. */ - lsn_t end_lsn; /*!< the end of the LSN interval to be - parsed, equal to the next checkpoint - LSN at the time of parse */ - lsn_t next_parse_lsn; /*!< the LSN of the next unparsed - record in the current parse */ - ib_rbt_t* modified_pages; /*!< the current modified page set, - organized as the RB-tree with the keys - of (space, 4KB-block-start-page-id) - pairs */ - ib_rbt_node_t* page_free_list; /*!< Singly-linked list of freed nodes - of modified_pages tree for later - reuse. Nodes are linked through - ib_rbt_node_t.left as this field has - both the correct type and the tree does - not mind its overwrite during - rbt_next() tree traversal. */ -}; - -/* The log parsing and bitmap output struct instance */ -static struct log_bitmap_struct* log_bmp_sys; - -/* Mutex protecting log_bmp_sys */ -static ib_mutex_t log_bmp_sys_mutex; - -/** File name stem for bitmap files. */ -static const char* bmp_file_name_stem = "ib_modified_log_"; - -/** File name template for bitmap files. The 1st format tag is a directory -name, the 2nd tag is the stem, the 3rd tag is a file sequence number, the 4th -tag is the start LSN for the file. */ -static const char* bmp_file_name_template = "%s%s%lu_%llu.xdb"; - -/* On server startup with empty database srv_start_lsn == 0, in -which case the first LSN of actual log records will be this. */ -#define MIN_TRACKED_LSN ((LOG_START_LSN) + (LOG_BLOCK_HDR_SIZE)) - -/* Tests if num bit of bitmap is set */ -#define IS_BIT_SET(bitmap, num) \ - (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL))) - -/** The bitmap file block size in bytes. All writes will be multiples of this. - */ -enum { - MODIFIED_PAGE_BLOCK_SIZE = 4096 -}; - - -/** Offsets in a file bitmap block */ -enum { - MODIFIED_PAGE_IS_LAST_BLOCK = 0,/* 1 if last block in the current - write, 0 otherwise. */ - MODIFIED_PAGE_START_LSN = 4, /* The starting tracked LSN of this and - other blocks in the same write */ - MODIFIED_PAGE_END_LSN = 12, /* The ending tracked LSN of this and - other blocks in the same write */ - MODIFIED_PAGE_SPACE_ID = 20, /* The space ID of tracked pages in - this block */ - MODIFIED_PAGE_1ST_PAGE_ID = 24, /* The page ID of the first tracked - page in this block */ - MODIFIED_PAGE_BLOCK_UNUSED_1 = 28,/* Unused in order to align the start - of bitmap at 8 byte boundary */ - MODIFIED_PAGE_BLOCK_BITMAP = 32,/* Start of the bitmap itself */ - MODIFIED_PAGE_BLOCK_UNUSED_2 = MODIFIED_PAGE_BLOCK_SIZE - 8, - /* Unused in order to align the end of - bitmap at 8 byte boundary */ - MODIFIED_PAGE_BLOCK_CHECKSUM = MODIFIED_PAGE_BLOCK_SIZE - 4 - /* The checksum of the current block */ -}; - -/** Length of the bitmap data in a block in bytes */ -enum { MODIFIED_PAGE_BLOCK_BITMAP_LEN - = MODIFIED_PAGE_BLOCK_UNUSED_2 - MODIFIED_PAGE_BLOCK_BITMAP }; - -/** Length of the bitmap data in a block in page ids */ -enum { MODIFIED_PAGE_BLOCK_ID_COUNT = MODIFIED_PAGE_BLOCK_BITMAP_LEN * 8 }; - -/****************************************************************//** -Provide a comparisson function for the RB-tree tree (space, -block_start_page) pairs. Actual implementation does not matter as -long as the ordering is full. -@return -1 if p1 < p2, 0 if p1 == p2, 1 if p1 > p2 -*/ -static -int -log_online_compare_bmp_keys( -/*========================*/ - const void* p1, /*!<in: 1st key to compare */ - const void* p2) /*!<in: 2nd key to compare */ -{ - const byte *k1 = (const byte *)p1; - const byte *k2 = (const byte *)p2; - - ulint k1_space = mach_read_from_4(k1 + MODIFIED_PAGE_SPACE_ID); - ulint k2_space = mach_read_from_4(k2 + MODIFIED_PAGE_SPACE_ID); - if (k1_space == k2_space) { - ulint k1_start_page - = mach_read_from_4(k1 + MODIFIED_PAGE_1ST_PAGE_ID); - ulint k2_start_page - = mach_read_from_4(k2 + MODIFIED_PAGE_1ST_PAGE_ID); - return k1_start_page < k2_start_page - ? -1 : k1_start_page > k2_start_page ? 1 : 0; - } - return k1_space < k2_space ? -1 : 1; -} - -/****************************************************************//** -Set a bit for tracked page in the bitmap. Expand the bitmap tree as -necessary. */ -static -void -log_online_set_page_bit( -/*====================*/ - ulint space, /*!<in: log record space id */ - ulint page_no)/*!<in: log record page id */ -{ - ut_ad(mutex_own(&log_bmp_sys_mutex)); - - ut_a(space != ULINT_UNDEFINED); - ut_a(page_no != ULINT_UNDEFINED); - - ulint block_start_page = page_no / MODIFIED_PAGE_BLOCK_ID_COUNT - * MODIFIED_PAGE_BLOCK_ID_COUNT; - ulint block_pos = block_start_page ? (page_no % block_start_page / 8) - : (page_no / 8); - uint bit_pos = page_no % 8; - - byte search_page[MODIFIED_PAGE_BLOCK_SIZE]; - mach_write_to_4(search_page + MODIFIED_PAGE_SPACE_ID, space); - mach_write_to_4(search_page + MODIFIED_PAGE_1ST_PAGE_ID, - block_start_page); - - byte *page_ptr; - ib_rbt_bound_t tree_search_pos; - if (!rbt_search(log_bmp_sys->modified_pages, &tree_search_pos, - search_page)) { - page_ptr = rbt_value(byte, tree_search_pos.last); - } - else { - ib_rbt_node_t *new_node; - - if (log_bmp_sys->page_free_list) { - new_node = log_bmp_sys->page_free_list; - log_bmp_sys->page_free_list = new_node->left; - } - else { - new_node = static_cast<ib_rbt_node_t *> - (ut_malloc - (SIZEOF_NODE(log_bmp_sys->modified_pages))); - } - memset(new_node, 0, SIZEOF_NODE(log_bmp_sys->modified_pages)); - - page_ptr = rbt_value(byte, new_node); - mach_write_to_4(page_ptr + MODIFIED_PAGE_SPACE_ID, space); - mach_write_to_4(page_ptr + MODIFIED_PAGE_1ST_PAGE_ID, - block_start_page); - - rbt_add_preallocated_node(log_bmp_sys->modified_pages, - &tree_search_pos, new_node); - } - page_ptr[MODIFIED_PAGE_BLOCK_BITMAP + block_pos] |= (1U << bit_pos); -} - -/****************************************************************//** -Calculate a bitmap block checksum. Algorithm borrowed from -log_block_calc_checksum. -@return checksum */ -UNIV_INLINE -ulint -log_online_calc_checksum( -/*=====================*/ - const byte* block) /*!<in: bitmap block */ -{ - ulint sum; - ulint sh; - ulint i; - - sum = 1; - sh = 0; - - for (i = 0; i < MODIFIED_PAGE_BLOCK_CHECKSUM; i++) { - - ulint b = block[i]; - sum &= 0x7FFFFFFFUL; - sum += b; - sum += b << sh; - sh++; - if (sh > 24) { - sh = 0; - } - } - - return sum; -} - -/****************************************************************//** -Read one bitmap data page and check it for corruption. - -@return TRUE if page read OK, FALSE if I/O error */ -static -ibool -log_online_read_bitmap_page( -/*========================*/ - log_online_bitmap_file_t *bitmap_file, /*!<in/out: bitmap - file */ - byte *page, /*!<out: read page. - Must be at least - MODIFIED_PAGE_BLOCK_SIZE - bytes long */ - ibool *checksum_ok) /*!<out: TRUE if page - checksum OK */ -{ - ulint checksum; - ulint actual_checksum; - ibool success; - - ut_a(bitmap_file->size >= MODIFIED_PAGE_BLOCK_SIZE); - ut_a(bitmap_file->offset - <= bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE); - ut_a(bitmap_file->offset % MODIFIED_PAGE_BLOCK_SIZE == 0); - - success = os_file_read(bitmap_file->file, page, bitmap_file->offset, - MODIFIED_PAGE_BLOCK_SIZE); - - if (UNIV_UNLIKELY(!success)) { - - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_WARN, - "failed reading changed page bitmap file \'%s\'", - bitmap_file->name); - return FALSE; - } - - bitmap_file->offset += MODIFIED_PAGE_BLOCK_SIZE; - ut_ad(bitmap_file->offset <= bitmap_file->size); - - checksum = mach_read_from_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM); - actual_checksum = log_online_calc_checksum(page); - *checksum_ok = (checksum == actual_checksum); - - return TRUE; -} - -/****************************************************************//** -Get the last tracked fully LSN from the bitmap file by reading -backwards untile a correct end page is found. Detects incomplete -writes and corrupted data. Sets the start output position for the -written bitmap data. - -Multiple bitmap files are handled using the following assumptions: -1) Only the last file might be corrupted. In case where no good data was found -in the last file, assume that the next to last file is OK. This assumption -does not limit crash recovery capability in any way. -2) If the whole of the last file was corrupted, assume that the start LSN in -its name is correct and use it for (re-)tracking start. - -@return the last fully tracked LSN */ -static -lsn_t -log_online_read_last_tracked_lsn(void) -/*==================================*/ -{ - byte page[MODIFIED_PAGE_BLOCK_SIZE]; - ibool is_last_page = FALSE; - ibool checksum_ok = FALSE; - lsn_t result; - os_offset_t read_offset = log_bmp_sys->out.offset; - - while ((!checksum_ok || !is_last_page) && read_offset > 0) - { - read_offset -= MODIFIED_PAGE_BLOCK_SIZE; - log_bmp_sys->out.offset = read_offset; - - if (!log_online_read_bitmap_page(&log_bmp_sys->out, page, - &checksum_ok)) { - checksum_ok = FALSE; - result = 0; - break; - } - - if (checksum_ok) { - is_last_page - = mach_read_from_4 - (page + MODIFIED_PAGE_IS_LAST_BLOCK); - } else { - - ib_logf(IB_LOG_LEVEL_WARN, - "corruption detected in \'%s\' at offset " - UINT64PF, - log_bmp_sys->out.name, read_offset); - } - }; - - result = (checksum_ok && is_last_page) - ? mach_read_from_8(page + MODIFIED_PAGE_END_LSN) : 0; - - /* Truncate the output file to discard the corrupted bitmap data, if - any */ - if (!os_file_set_eof_at(log_bmp_sys->out.file, - log_bmp_sys->out.offset)) { - ib_logf(IB_LOG_LEVEL_WARN, - "failed truncating changed page bitmap file \'%s\' to " - UINT64PF " bytes", - log_bmp_sys->out.name, log_bmp_sys->out.offset); - result = 0; - } - return result; -} - -/****************************************************************//** -Safely write the log_sys->tracked_lsn value. Uses atomic operations -if available, otherwise this field is protected with the log system -mutex. The reader counterpart function is log_get_tracked_lsn() in -log0log.c. */ -UNIV_INLINE -void -log_set_tracked_lsn( -/*================*/ - lsn_t tracked_lsn) /*!<in: new value */ -{ - log_sys->tracked_lsn = tracked_lsn; - os_wmb; -} - -/*********************************************************************//** -Check if missing, if any, LSN interval can be read and tracked using the -current LSN value, the LSN value where the tracking stopped, and the log group -capacity. - -@return TRUE if the missing interval can be tracked or if there's no missing -data. */ -static -ibool -log_online_can_track_missing( -/*=========================*/ - lsn_t last_tracked_lsn, /*!<in: last tracked LSN */ - lsn_t tracking_start_lsn) /*!<in: current LSN */ -{ - /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty - bitmap file, handle this too. */ - last_tracked_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN); - - if (last_tracked_lsn > tracking_start_lsn) { - ib_logf(IB_LOG_LEVEL_FATAL, - "last tracked LSN " LSN_PF " is ahead of tracking " - "start LSN " LSN_PF ". This can be caused by " - "mismatched bitmap files.", - last_tracked_lsn, tracking_start_lsn); - } - - return (last_tracked_lsn == tracking_start_lsn) - || (log_sys->lsn - last_tracked_lsn - <= log_sys->log_group_capacity); -} - - -/****************************************************************//** -Diagnose a gap in tracked LSN range on server startup due to crash or -very fast shutdown and try to close it by tracking the data -immediatelly, if possible. */ -static -void -log_online_track_missing_on_startup( -/*================================*/ - lsn_t last_tracked_lsn, /*!<in: last tracked LSN read from the - bitmap file */ - lsn_t tracking_start_lsn) /*!<in: last checkpoint LSN of the - current server startup */ -{ - ut_ad(last_tracked_lsn != tracking_start_lsn); - ut_ad(srv_track_changed_pages); - - ib_logf(IB_LOG_LEVEL_WARN, "last tracked LSN in \'%s\' is " LSN_PF - ", but the last checkpoint LSN is " LSN_PF ". This might be " - "due to a server crash or a very fast shutdown.", - log_bmp_sys->out.name, last_tracked_lsn, tracking_start_lsn); - - /* See if we can fully recover the missing interval */ - if (log_online_can_track_missing(last_tracked_lsn, - tracking_start_lsn)) { - - ib_logf(IB_LOG_LEVEL_INFO, - "reading the log to advance the last tracked LSN."); - - log_bmp_sys->start_lsn = ut_max(last_tracked_lsn, - MIN_TRACKED_LSN); - log_set_tracked_lsn(log_bmp_sys->start_lsn); - ut_a(log_online_follow_redo_log()); - ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn); - - ib_logf(IB_LOG_LEVEL_INFO, - "continuing tracking changed pages from LSN " LSN_PF, - log_bmp_sys->end_lsn); - } - else { - ib_logf(IB_LOG_LEVEL_WARN, - "the age of last tracked LSN exceeds log capacity, " - "tracking-based incremental backups will work only " - "from the higher LSN!"); - - log_bmp_sys->end_lsn = log_bmp_sys->start_lsn - = tracking_start_lsn; - log_set_tracked_lsn(log_bmp_sys->start_lsn); - - ib_logf(IB_LOG_LEVEL_INFO, - "starting tracking changed pages from LSN " LSN_PF, - log_bmp_sys->end_lsn); - } -} - -/*********************************************************************//** -Format a bitmap output file name to log_bmp_sys->out.name. */ -static -void -log_online_make_bitmap_name( -/*=========================*/ - lsn_t start_lsn) /*!< in: the start LSN name part */ -{ - ut_snprintf(log_bmp_sys->out.name, FN_REFLEN, bmp_file_name_template, - log_bmp_sys->bmp_file_home, bmp_file_name_stem, - log_bmp_sys->out_seq_num, start_lsn); -} - -/*********************************************************************//** -Check if an old file that has the name of a new bitmap file we are about to -create should be overwritten. */ -static -ibool -log_online_should_overwrite( -/*========================*/ - const char *path) /*!< in: path to file */ -{ - dberr_t err; - os_file_stat_t file_info; - - /* Currently, it's OK to overwrite 0-sized files only */ - err = os_file_get_status(path, &file_info, false); - return err == DB_SUCCESS && file_info.type == OS_FILE_TYPE_FILE - && file_info.size == 0LL; -} - -/*********************************************************************//** -Create a new empty bitmap output file. - -@return TRUE if operation succeeded, FALSE if I/O error */ -static -ibool -log_online_start_bitmap_file(void) -/*==============================*/ -{ - ibool success = TRUE; - - /* Check for an old file that should be deleted first */ - if (log_online_should_overwrite(log_bmp_sys->out.name)) { - - success = static_cast<ibool>( - os_file_delete_if_exists(innodb_file_bmp_key, - log_bmp_sys->out.name)); - } - - if (UNIV_LIKELY(success)) { - log_bmp_sys->out.file - = os_file_create_simple_no_error_handling( - innodb_file_bmp_key, - log_bmp_sys->out.name, - OS_FILE_CREATE, - OS_FILE_READ_WRITE_CACHED, - &success, FALSE); - } - if (UNIV_UNLIKELY(!success)) { - - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, - "cannot create \'%s\'", log_bmp_sys->out.name); - return FALSE; - } - - log_bmp_sys->out.offset = 0; - return TRUE; -} - -/*********************************************************************//** -Close the current bitmap output file and create the next one. - -@return TRUE if operation succeeded, FALSE if I/O error */ -static -ibool -log_online_rotate_bitmap_file( -/*===========================*/ - lsn_t next_file_start_lsn) /*!<in: the start LSN name - part */ -{ - if (!os_file_is_invalid(log_bmp_sys->out.file)) { - os_file_close(log_bmp_sys->out.file); - os_file_mark_invalid(&log_bmp_sys->out.file); - } - log_bmp_sys->out_seq_num++; - log_online_make_bitmap_name(next_file_start_lsn); - return log_online_start_bitmap_file(); -} - -/*********************************************************************//** -Check the name of a given file if it's a changed page bitmap file and -return file sequence and start LSN name components if it is. If is not, -the values of output parameters are undefined. - -@return TRUE if a given file is a changed page bitmap file. */ -static -ibool -log_online_is_bitmap_file( -/*======================*/ - const os_file_stat_t* file_info, /*!<in: file to - check */ - ulong* bitmap_file_seq_num, /*!<out: bitmap file - sequence number */ - lsn_t* bitmap_file_start_lsn) /*!<out: bitmap file - start LSN */ -{ - char stem[FN_REFLEN]; - - ut_ad (strlen(file_info->name) < OS_FILE_MAX_PATH); - - return ((file_info->type == OS_FILE_TYPE_FILE - || file_info->type == OS_FILE_TYPE_LINK) - && (sscanf(file_info->name, "%[a-z_]%lu_%llu.xdb", stem, - bitmap_file_seq_num, - (unsigned long long *)bitmap_file_start_lsn) == 3) - && (!strcmp(stem, bmp_file_name_stem))); -} - -/** Initialize the constant part of the log tracking subsystem */ -UNIV_INTERN -void -log_online_init(void) -{ - mutex_create(log_bmp_sys_mutex_key, &log_bmp_sys_mutex, - SYNC_LOG_ONLINE); -} - -/** Initialize the dynamic part of the log tracking subsystem */ -UNIV_INTERN -void -log_online_read_init(void) -{ - ibool success; - lsn_t tracking_start_lsn - = ut_max(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN); - os_file_dir_t bitmap_dir; - os_file_stat_t bitmap_dir_file_info; - lsn_t last_file_start_lsn = MIN_TRACKED_LSN; - size_t srv_data_home_len; - - /* Bitmap data start and end in a bitmap block must be 8-byte - aligned. */ - compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0); - compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0); - - ut_ad(srv_track_changed_pages); - - log_bmp_sys = static_cast<log_bitmap_struct *> - (ut_malloc(sizeof(*log_bmp_sys))); - log_bmp_sys->read_buf_ptr = static_cast<byte *> - (ut_malloc(FOLLOW_SCAN_SIZE + OS_FILE_LOG_BLOCK_SIZE)); - log_bmp_sys->read_buf = static_cast<byte *> - (ut_align(log_bmp_sys->read_buf_ptr, OS_FILE_LOG_BLOCK_SIZE)); - - /* Initialize bitmap file directory from srv_data_home and add a path - separator if needed. */ - srv_data_home_len = strlen(srv_data_home); - ut_a (srv_data_home_len < FN_REFLEN); - strcpy(log_bmp_sys->bmp_file_home, srv_data_home); - if (srv_data_home_len - && log_bmp_sys->bmp_file_home[srv_data_home_len - 1] - != SRV_PATH_SEPARATOR) { - - ut_a (srv_data_home_len < FN_REFLEN - 1); - log_bmp_sys->bmp_file_home[srv_data_home_len] - = SRV_PATH_SEPARATOR; - log_bmp_sys->bmp_file_home[srv_data_home_len + 1] = '\0'; - } - - /* Enumerate existing bitmap files to either open the last one to get - the last tracked LSN either to find that there are none and start - tracking from scratch. */ - log_bmp_sys->out.name[0] = '\0'; - log_bmp_sys->out_seq_num = 0; - - bitmap_dir = os_file_opendir(log_bmp_sys->bmp_file_home, TRUE); - ut_a(bitmap_dir); - while (!os_file_readdir_next_file(log_bmp_sys->bmp_file_home, - bitmap_dir, &bitmap_dir_file_info)) { - - ulong file_seq_num; - lsn_t file_start_lsn; - - if (!log_online_is_bitmap_file(&bitmap_dir_file_info, - &file_seq_num, - &file_start_lsn)) { - continue; - } - - if (file_seq_num > log_bmp_sys->out_seq_num - && bitmap_dir_file_info.size > 0) { - log_bmp_sys->out_seq_num = file_seq_num; - last_file_start_lsn = file_start_lsn; - /* No dir component (log_bmp_sys->bmp_file_home) here, - because that's the cwd */ - strncpy(log_bmp_sys->out.name, - bitmap_dir_file_info.name, FN_REFLEN - 1); - log_bmp_sys->out.name[FN_REFLEN - 1] = '\0'; - } - } - - if (os_file_closedir(bitmap_dir)) { - os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_FATAL, "cannot close \'%s\'", - log_bmp_sys->bmp_file_home); - } - - if (!log_bmp_sys->out_seq_num) { - log_bmp_sys->out_seq_num = 1; - log_online_make_bitmap_name(0); - } - - log_bmp_sys->modified_pages = rbt_create(MODIFIED_PAGE_BLOCK_SIZE, - log_online_compare_bmp_keys); - log_bmp_sys->page_free_list = NULL; - - log_bmp_sys->out.file - = os_file_create_simple_no_error_handling - (innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN, - OS_FILE_READ_WRITE_CACHED, &success, FALSE); - - if (!success) { - - /* New file, tracking from scratch */ - ut_a(log_online_start_bitmap_file()); - } - else { - - /* Read the last tracked LSN from the last file */ - lsn_t last_tracked_lsn; - lsn_t file_start_lsn; - - log_bmp_sys->out.size - = os_file_get_size(log_bmp_sys->out.file); - log_bmp_sys->out.offset = log_bmp_sys->out.size; - - if (log_bmp_sys->out.offset % MODIFIED_PAGE_BLOCK_SIZE != 0) { - - ib_logf(IB_LOG_LEVEL_WARN, - "truncated block detected in \'%s\' at offset " - UINT64PF, - log_bmp_sys->out.name, - log_bmp_sys->out.offset); - log_bmp_sys->out.offset -= - log_bmp_sys->out.offset - % MODIFIED_PAGE_BLOCK_SIZE; - } - - last_tracked_lsn = log_online_read_last_tracked_lsn(); - /* Do not rotate if we truncated the file to zero length - we - can just start writing there */ - const bool need_rotate = (last_tracked_lsn != 0); - if (!last_tracked_lsn) { - - last_tracked_lsn = last_file_start_lsn; - } - - /* Start a new file. Choose the LSN value in its name based on - if we can retrack any missing data. */ - if (log_online_can_track_missing(last_tracked_lsn, - tracking_start_lsn)) { - file_start_lsn = last_tracked_lsn; - } else { - file_start_lsn = tracking_start_lsn; - } - ut_a(!need_rotate - || log_online_rotate_bitmap_file(file_start_lsn)); - - if (last_tracked_lsn < tracking_start_lsn) { - - log_online_track_missing_on_startup - (last_tracked_lsn, tracking_start_lsn); - return; - } - - if (last_tracked_lsn > tracking_start_lsn) { - - ib_logf(IB_LOG_LEVEL_WARN, - "last tracked LSN is " LSN_PF ", but the last " - "checkpoint LSN is " LSN_PF ". The " - "tracking-based incremental backups will work " - "only from the latter LSN!", - last_tracked_lsn, tracking_start_lsn); - } - - } - - ib_logf(IB_LOG_LEVEL_INFO, "starting tracking changed pages from LSN " - LSN_PF, tracking_start_lsn); - log_bmp_sys->start_lsn = tracking_start_lsn; - log_set_tracked_lsn(tracking_start_lsn); -} - -/** Shut down the dynamic part of the log tracking subsystem */ -UNIV_INTERN -void -log_online_read_shutdown(void) -{ - mutex_enter(&log_bmp_sys_mutex); - - srv_track_changed_pages = FALSE; - - ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list; - - if (!os_file_is_invalid(log_bmp_sys->out.file)) { - os_file_close(log_bmp_sys->out.file); - os_file_mark_invalid(&log_bmp_sys->out.file); - } - - rbt_free(log_bmp_sys->modified_pages); - - while (free_list_node) { - ib_rbt_node_t *next = free_list_node->left; - ut_free(free_list_node); - free_list_node = next; - } - - ut_free(log_bmp_sys->read_buf_ptr); - ut_free(log_bmp_sys); - log_bmp_sys = NULL; - - srv_redo_log_thread_started = false; - - mutex_exit(&log_bmp_sys_mutex); -} - -/** Shut down the constant part of the log tracking subsystem */ -UNIV_INTERN -void -log_online_shutdown(void) -{ - mutex_free(&log_bmp_sys_mutex); -} - -/*********************************************************************//** -For the given minilog record type determine if the record has (space; page) -associated with it. -@return TRUE if the record has (space; page) in it */ -static -ibool -log_online_rec_has_page( -/*====================*/ - byte type) /*!<in: the minilog record type */ -{ - return type != MLOG_MULTI_REC_END && type != MLOG_DUMMY_RECORD; -} - -/*********************************************************************//** -Check if a page field for a given log record type actually contains a page -id. It does not for file operations and MLOG_LSN. -@return TRUE if page field contains actual page id, FALSE otherwise */ -static -ibool -log_online_rec_page_means_page( -/*===========================*/ - byte type) /*!<in: log record type */ -{ - return log_online_rec_has_page(type) -#ifdef UNIV_LOG_LSN_DEBUG - && type != MLOG_LSN -#endif - && type != MLOG_FILE_CREATE - && type != MLOG_FILE_RENAME - && type != MLOG_FILE_DELETE - && type != MLOG_FILE_CREATE2; -} - -/*********************************************************************//** -Parse the log data in the parse buffer for the (space, page) pairs and add -them to the modified page set as necessary. Removes the fully-parsed records -from the buffer. If an incomplete record is found, moves it to the end of the -buffer. */ -static -void -log_online_parse_redo_log(void) -/*===========================*/ -{ - ut_ad(mutex_own(&log_bmp_sys_mutex)); - - byte *ptr = log_bmp_sys->parse_buf; - byte *end = log_bmp_sys->parse_buf_end; - ulint len = 0; - - while (ptr != end - && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) { - - byte type; - ulint space; - ulint page_no; - byte* body; - - /* recv_sys is not initialized, so on corrupt log we will - SIGSEGV. But the log of a live database should not be - corrupt. */ - len = recv_parse_log_rec(ptr, end, &type, &space, &page_no, - &body); - if (len > 0) { - - if (log_online_rec_page_means_page(type)) { - - ut_a(len >= 3); - log_online_set_page_bit(space, page_no); - } - - ptr += len; - ut_ad(ptr <= end); - log_bmp_sys->next_parse_lsn - = recv_calc_lsn_on_data_add - (log_bmp_sys->next_parse_lsn, len); - } - else { - - /* Incomplete log record. Shift it to the - beginning of the parse buffer and leave it to be - completed on the next read. */ - ut_memmove(log_bmp_sys->parse_buf, ptr, end - ptr); - log_bmp_sys->parse_buf_end - = log_bmp_sys->parse_buf + (end - ptr); - ptr = end; - } - } - - if (len > 0) { - - log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf; - } -} - -/*********************************************************************//** -Check the log block checksum. -@return TRUE if the log block checksum is OK, FALSE otherwise. */ -static -ibool -log_online_is_valid_log_seg( -/*========================*/ - const byte* log_block) /*!< in: read log data */ -{ - ibool checksum_is_ok - = log_block_checksum_is_ok_or_old_format(log_block, true); - - if (!checksum_is_ok) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "log block checksum mismatch: expected " ULINTPF ", " - "calculated checksum " ULINTPF, - log_block_get_checksum(log_block), - log_block_calc_checksum(log_block)); - } - - return checksum_is_ok; -} - -/*********************************************************************//** -Copy new log data to the parse buffer while skipping log block header, -trailer and already parsed data. */ -static -void -log_online_add_to_parse_buf( -/*========================*/ - const byte* log_block, /*!< in: read log data */ - ulint data_len, /*!< in: length of read log data */ - ulint skip_len) /*!< in: how much of log data to - skip */ -{ - ut_ad(mutex_own(&log_bmp_sys_mutex)); - - ulint start_offset = skip_len ? skip_len : LOG_BLOCK_HDR_SIZE; - ulint end_offset - = (data_len == OS_FILE_LOG_BLOCK_SIZE) - ? data_len - LOG_BLOCK_TRL_SIZE - : data_len; - ulint actual_data_len = (end_offset >= start_offset) - ? end_offset - start_offset : 0; - - ut_memcpy(log_bmp_sys->parse_buf_end, log_block + start_offset, - actual_data_len); - - log_bmp_sys->parse_buf_end += actual_data_len; - - ut_a(log_bmp_sys->parse_buf_end - log_bmp_sys->parse_buf - <= RECV_PARSING_BUF_SIZE); -} - -/*********************************************************************//** -Parse the log block: first copies the read log data to the parse buffer while -skipping log block header, trailer and already parsed data. Then it actually -parses the log to add to the modified page bitmap. */ -static -void -log_online_parse_redo_log_block( -/*============================*/ - const byte* log_block, /*!< in: read log data */ - ulint skip_already_parsed_len) /*!< in: how many bytes of - log data should be skipped as - they were parsed before */ -{ - ut_ad(mutex_own(&log_bmp_sys_mutex)); - - ulint block_data_len = log_block_get_data_len(log_block); - - ut_ad(block_data_len % OS_FILE_LOG_BLOCK_SIZE == 0 - || block_data_len < OS_FILE_LOG_BLOCK_SIZE); - - log_online_add_to_parse_buf(log_block, block_data_len, - skip_already_parsed_len); - log_online_parse_redo_log(); -} - -/*********************************************************************//** -Read and parse one redo log chunk and updates the modified page bitmap. */ -static -void -log_online_follow_log_seg( -/*======================*/ - log_group_t* group, /*!< in: the log group to use */ - lsn_t block_start_lsn, /*!< in: the LSN to read from */ - lsn_t block_end_lsn) /*!< in: the LSN to read to */ -{ - ut_ad(mutex_own(&log_bmp_sys_mutex)); - - /* Pointer to the current OS_FILE_LOG_BLOCK-sized chunk of the read log - data to parse */ - byte* log_block = log_bmp_sys->read_buf; - byte* log_block_end = log_bmp_sys->read_buf - + (block_end_lsn - block_start_lsn); - - mutex_enter(&log_sys->mutex); - log_group_read_log_seg(LOG_RECOVER, log_bmp_sys->read_buf, - group, block_start_lsn, block_end_lsn, TRUE); - /* log_group_read_log_seg will release the log_sys->mutex for us */ - - while (log_block < log_block_end - && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) { - - /* How many bytes of log data should we skip in the current log - block. Skipping is necessary because we round down the next - parse LSN thus it is possible to read the already-processed log - data many times */ - ulint skip_already_parsed_len = 0; - - if (!log_online_is_valid_log_seg(log_block)) { - break; - } - - if ((block_start_lsn <= log_bmp_sys->next_parse_lsn) - && (block_start_lsn + OS_FILE_LOG_BLOCK_SIZE - > log_bmp_sys->next_parse_lsn)) { - - /* The next parse LSN is inside the current block, skip - data preceding it. */ - skip_already_parsed_len - = (ulint)(log_bmp_sys->next_parse_lsn - - block_start_lsn); - } - else { - - /* If the next parse LSN is not inside the current - block, then the only option is that we have processed - ahead already. */ - ut_a(block_start_lsn > log_bmp_sys->next_parse_lsn); - } - - /* TODO: merge the copying to the parse buf code with - skip_already_len calculations */ - log_online_parse_redo_log_block(log_block, - skip_already_parsed_len); - - log_block += OS_FILE_LOG_BLOCK_SIZE; - block_start_lsn += OS_FILE_LOG_BLOCK_SIZE; - } - - return; -} - -/*********************************************************************//** -Read and parse the redo log in a given group in FOLLOW_SCAN_SIZE-sized -chunks and updates the modified page bitmap. */ -static -void -log_online_follow_log_group( -/*========================*/ - log_group_t* group, /*!< in: the log group to use */ - lsn_t contiguous_lsn) /*!< in: the LSN of log block start - containing the log_parse_start_lsn */ -{ - ut_ad(mutex_own(&log_bmp_sys_mutex)); - - lsn_t block_start_lsn = contiguous_lsn; - lsn_t block_end_lsn; - - log_bmp_sys->next_parse_lsn = log_bmp_sys->start_lsn; - log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf; - - do { - block_end_lsn = block_start_lsn + FOLLOW_SCAN_SIZE; - - log_online_follow_log_seg(group, block_start_lsn, - block_end_lsn); - - /* Next parse LSN can become higher than the last read LSN - only in the case when the read LSN falls right on the block - boundary, in which case next parse lsn is bumped to the actual - data LSN on the next (not yet read) block. This assert is - slightly conservative. */ - ut_a(log_bmp_sys->next_parse_lsn - <= block_end_lsn + LOG_BLOCK_HDR_SIZE - + LOG_BLOCK_TRL_SIZE); - - block_start_lsn = block_end_lsn; - } while (block_end_lsn < log_bmp_sys->end_lsn); - - /* Assert that the last read log record is a full one */ - ut_a(log_bmp_sys->parse_buf_end == log_bmp_sys->parse_buf); -} - -/*********************************************************************//** -Write, flush one bitmap block to disk and advance the output position if -successful. - -@return TRUE if page written OK, FALSE if I/O error */ -static -ibool -log_online_write_bitmap_page( -/*=========================*/ - const byte *block) /*!< in: block to write */ -{ - ut_ad(mutex_own(&log_bmp_sys_mutex)); - - /* Simulate a write error */ - DBUG_EXECUTE_IF("bitmap_page_write_error", - { - ulint space_id - = mach_read_from_4(block - + MODIFIED_PAGE_SPACE_ID); - if (space_id > 0) { - ib_logf(IB_LOG_LEVEL_ERROR, - "simulating bitmap write " - "error in " - "log_online_write_bitmap_page " - "for space ID %lu", - space_id); - return FALSE; - } - }); - - /* A crash injection site that ensures last checkpoint LSN > last - tracked LSN, so that LSN tracking for this interval is tested. */ - DBUG_EXECUTE_IF("crash_before_bitmap_write", - { - ulint space_id - = mach_read_from_4(block - + MODIFIED_PAGE_SPACE_ID); - if (space_id > 0) - DBUG_SUICIDE(); - }); - - - ibool success = os_file_write(log_bmp_sys->out.name, - log_bmp_sys->out.file, block, - log_bmp_sys->out.offset, - MODIFIED_PAGE_BLOCK_SIZE); - if (UNIV_UNLIKELY(!success)) { - - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "failed writing changed page " - "bitmap file \'%s\'", log_bmp_sys->out.name); - return FALSE; - } - - success = os_file_flush(log_bmp_sys->out.file); - if (UNIV_UNLIKELY(!success)) { - - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "failed flushing changed page " - "bitmap file \'%s\'", log_bmp_sys->out.name); - return FALSE; - } - - os_file_advise(log_bmp_sys->out.file, log_bmp_sys->out.offset, - MODIFIED_PAGE_BLOCK_SIZE, OS_FILE_ADVISE_DONTNEED); - - log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE; - return TRUE; -} - -/*********************************************************************//** -Append the current changed page bitmap to the bitmap file. Clears the -bitmap tree and recycles its nodes to the free list. - -@return TRUE if bitmap written OK, FALSE if I/O error*/ -static -ibool -log_online_write_bitmap(void) -/*=========================*/ -{ - ut_ad(mutex_own(&log_bmp_sys_mutex)); - - if (log_bmp_sys->out.offset >= srv_max_bitmap_file_size) { - if (!log_online_rotate_bitmap_file(log_bmp_sys->start_lsn)) { - return FALSE; - } - } - - ib_rbt_node_t *bmp_tree_node - = (ib_rbt_node_t *)rbt_first(log_bmp_sys->modified_pages); - const ib_rbt_node_t * const last_bmp_tree_node - = rbt_last(log_bmp_sys->modified_pages); - - ibool success = TRUE; - - while (bmp_tree_node) { - - byte *page = rbt_value(byte, bmp_tree_node); - - /* In case of a bitmap page write error keep on looping over - the tree to reclaim its memory through the free list instead of - returning immediatelly. */ - if (UNIV_LIKELY(success)) { - if (bmp_tree_node == last_bmp_tree_node) { - mach_write_to_4(page - + MODIFIED_PAGE_IS_LAST_BLOCK, - 1); - } - - mach_write_to_8(page + MODIFIED_PAGE_START_LSN, - log_bmp_sys->start_lsn); - mach_write_to_8(page + MODIFIED_PAGE_END_LSN, - log_bmp_sys->end_lsn); - mach_write_to_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM, - log_online_calc_checksum(page)); - - success = log_online_write_bitmap_page(page); - } - - bmp_tree_node->left = log_bmp_sys->page_free_list; - log_bmp_sys->page_free_list = bmp_tree_node; - - bmp_tree_node = (ib_rbt_node_t*) - rbt_next(log_bmp_sys->modified_pages, bmp_tree_node); - - DBUG_EXECUTE_IF("bitmap_page_2_write_error", - if (bmp_tree_node) - { - DBUG_SET("+d,bitmap_page_write_error"); - DBUG_SET("-d,bitmap_page_2_write_error"); - }); - } - - rbt_reset(log_bmp_sys->modified_pages); - return success; -} - -/*********************************************************************//** -Read and parse the redo log up to last checkpoint LSN to build the changed -page bitmap which is then written to disk. - -@return TRUE if log tracking succeeded, FALSE if bitmap write I/O error */ -UNIV_INTERN -ibool -log_online_follow_redo_log(void) -/*============================*/ -{ - lsn_t contiguous_start_lsn; - log_group_t* group; - ibool result; - - ut_ad(!srv_read_only_mode); - - if (!srv_track_changed_pages) - return TRUE; - - DEBUG_SYNC_C("log_online_follow_redo_log"); - - mutex_enter(&log_bmp_sys_mutex); - - if (!srv_track_changed_pages) { - mutex_exit(&log_bmp_sys_mutex); - return TRUE; - } - - /* Grab the LSN of the last checkpoint, we will parse up to it */ - mutex_enter(&(log_sys->mutex)); - log_bmp_sys->end_lsn = log_sys->last_checkpoint_lsn; - mutex_exit(&(log_sys->mutex)); - - if (log_bmp_sys->end_lsn == log_bmp_sys->start_lsn) { - mutex_exit(&log_bmp_sys_mutex); - return TRUE; - } - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - ut_a(group); - - contiguous_start_lsn = ut_uint64_align_down(log_bmp_sys->start_lsn, - OS_FILE_LOG_BLOCK_SIZE); - - while (group) { - log_online_follow_log_group(group, contiguous_start_lsn); - group = UT_LIST_GET_NEXT(log_groups, group); - } - - result = log_online_write_bitmap(); - log_bmp_sys->start_lsn = log_bmp_sys->end_lsn; - log_set_tracked_lsn(log_bmp_sys->start_lsn); - - mutex_exit(&log_bmp_sys_mutex); - return result; -} - -/*********************************************************************//** -Diagnose a bitmap file range setup failure and free the partially-initialized -bitmap file range. */ -UNIV_COLD -static -void -log_online_diagnose_inconsistent_dir( -/*=================================*/ - log_online_bitmap_file_range_t *bitmap_files) /*!<in/out: bitmap file - range */ -{ - ib_logf(IB_LOG_LEVEL_WARN, - "InnoDB: Warning: inconsistent bitmap file " - "directory for a " - "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES query"); - free(bitmap_files->files); -} - -/*********************************************************************//** -List the bitmap files in srv_data_home and setup their range that contains the -specified LSN interval. This range, if non-empty, will start with a file that -has the greatest LSN equal to or less than the start LSN and will include all -the files up to the one with the greatest LSN less than the end LSN. Caller -must free bitmap_files->files when done if bitmap_files set to non-NULL and -this function returned TRUE. Field bitmap_files->count might be set to a -larger value than the actual count of the files, and space for the unused array -slots will be allocated but cleared to zeroes. - -@return TRUE if succeeded -*/ -static -ibool -log_online_setup_bitmap_file_range( -/*===============================*/ - log_online_bitmap_file_range_t *bitmap_files, /*!<in/out: bitmap file - range */ - lsn_t range_start, /*!<in: start LSN */ - lsn_t range_end) /*!<in: end LSN */ -{ - os_file_dir_t bitmap_dir; - os_file_stat_t bitmap_dir_file_info; - ulong first_file_seq_num = ULONG_MAX; - ulong last_file_seq_num = 0; - lsn_t first_file_start_lsn = LSN_MAX; - - ut_ad(range_end >= range_start); - - bitmap_files->count = 0; - bitmap_files->files = NULL; - - /* 1st pass: size the info array */ - - bitmap_dir = os_file_opendir(srv_data_home, FALSE); - if (UNIV_UNLIKELY(!bitmap_dir)) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "failed to open bitmap directory \'%s\'", - srv_data_home); - return FALSE; - } - - while (!os_file_readdir_next_file(srv_data_home, bitmap_dir, - &bitmap_dir_file_info)) { - - ulong file_seq_num; - lsn_t file_start_lsn; - - if (!log_online_is_bitmap_file(&bitmap_dir_file_info, - &file_seq_num, - &file_start_lsn) - || file_start_lsn >= range_end) { - - continue; - } - - if (file_seq_num > last_file_seq_num) { - - last_file_seq_num = file_seq_num; - } - - if (file_start_lsn >= range_start - || file_start_lsn == first_file_start_lsn - || first_file_start_lsn > range_start) { - - /* A file that falls into the range */ - - if (file_start_lsn < first_file_start_lsn) { - - first_file_start_lsn = file_start_lsn; - } - if (file_seq_num < first_file_seq_num) { - - first_file_seq_num = file_seq_num; - } - } else if (file_start_lsn > first_file_start_lsn) { - - /* A file that has LSN closer to the range start - but smaller than it, replacing another such file */ - first_file_start_lsn = file_start_lsn; - first_file_seq_num = file_seq_num; - } - } - - if (UNIV_UNLIKELY(os_file_closedir(bitmap_dir))) { - - os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'", - srv_data_home); - return FALSE; - } - - if (first_file_seq_num == ULONG_MAX && last_file_seq_num == 0) { - - bitmap_files->count = 0; - return TRUE; - } - - bitmap_files->count = last_file_seq_num - first_file_seq_num + 1; - - DEBUG_SYNC_C("setup_bitmap_range_middle"); - - /* 2nd pass: get the file names in the file_seq_num order */ - - bitmap_dir = os_file_opendir(srv_data_home, FALSE); - if (UNIV_UNLIKELY(!bitmap_dir)) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "failed to open bitmap directory \'%s\'", - srv_data_home); - return FALSE; - } - - bitmap_files->files - = static_cast<log_online_bitmap_file_range_struct::files_t *> - (ut_malloc(bitmap_files->count - * sizeof(bitmap_files->files[0]))); - memset(bitmap_files->files, 0, - bitmap_files->count * sizeof(bitmap_files->files[0])); - - while (!os_file_readdir_next_file(srv_data_home, bitmap_dir, - &bitmap_dir_file_info)) { - - ulong file_seq_num; - lsn_t file_start_lsn; - size_t array_pos; - - if (!log_online_is_bitmap_file(&bitmap_dir_file_info, - &file_seq_num, - &file_start_lsn) - || file_start_lsn >= range_end - || file_start_lsn < first_file_start_lsn) { - - continue; - } - - array_pos = file_seq_num - first_file_seq_num; - if (UNIV_UNLIKELY(array_pos >= bitmap_files->count)) { - - log_online_diagnose_inconsistent_dir(bitmap_files); - os_file_closedir(bitmap_dir); - return FALSE; - } - - - if (file_seq_num > bitmap_files->files[array_pos].seq_num) { - - bitmap_files->files[array_pos].seq_num = file_seq_num; - strncpy(bitmap_files->files[array_pos].name, - bitmap_dir_file_info.name, FN_REFLEN); - bitmap_files->files[array_pos].name[FN_REFLEN - 1] - = '\0'; - bitmap_files->files[array_pos].start_lsn - = file_start_lsn; - } - } - - if (UNIV_UNLIKELY(os_file_closedir(bitmap_dir))) { - - os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'", - srv_data_home); - free(bitmap_files->files); - return FALSE; - } - - if (!bitmap_files->files[0].seq_num - || bitmap_files->files[0].seq_num != first_file_seq_num) { - - log_online_diagnose_inconsistent_dir(bitmap_files); - return FALSE; - } - - { - size_t i; - for (i = 1; i < bitmap_files->count; i++) { - if (!bitmap_files->files[i].seq_num) { - break; - } - if ((bitmap_files->files[i].seq_num - <= bitmap_files->files[i - 1].seq_num) - || (bitmap_files->files[i].start_lsn - < bitmap_files->files[i - 1].start_lsn)) { - - log_online_diagnose_inconsistent_dir( - bitmap_files); - return FALSE; - } - } - } - - return TRUE; -} - -/****************************************************************//** -Open a bitmap file for reading. - -@return TRUE if opened successfully */ -static -ibool -log_online_open_bitmap_file_read_only( -/*==================================*/ - const char* name, /*!<in: bitmap file - name without directory, - which is assumed to be - srv_data_home */ - log_online_bitmap_file_t* bitmap_file) /*!<out: opened bitmap - file */ -{ - ibool success = FALSE; - size_t srv_data_home_len; - - ut_ad(name[0] != '\0'); - - srv_data_home_len = strlen(srv_data_home); - if (srv_data_home_len - && srv_data_home[srv_data_home_len-1] - != SRV_PATH_SEPARATOR) { - ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%c%s", - srv_data_home, SRV_PATH_SEPARATOR, name); - } else { - ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%s", - srv_data_home, name); - } - bitmap_file->file - = os_file_create_simple_no_error_handling(innodb_file_bmp_key, - bitmap_file->name, - OS_FILE_OPEN, - OS_FILE_READ_ONLY, - &success, FALSE); - if (UNIV_UNLIKELY(!success)) { - - /* Here and below assume that bitmap file names do not - contain apostrophes, thus no need for ut_print_filename(). */ - ib_logf(IB_LOG_LEVEL_WARN, - "error opening the changed page bitmap \'%s\'", - bitmap_file->name); - return FALSE; - } - - bitmap_file->size = os_file_get_size(bitmap_file->file); - bitmap_file->offset = 0; - - os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_SEQUENTIAL); - os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_NOREUSE); - - return TRUE; -} - -/****************************************************************//** -Diagnose one or both of the following situations if we read close to -the end of bitmap file: -1) Warn if the remainder of the file is less than one page. -2) Error if we cannot read any more full pages but the last read page -did not have the last-in-run flag set. - -@return FALSE for the error */ -static -ibool -log_online_diagnose_bitmap_eof( -/*===========================*/ - const log_online_bitmap_file_t* bitmap_file, /*!< in: bitmap file */ - ibool last_page_in_run)/*!< in: "last page in - run" flag value in the - last read page */ -{ - /* Check if we are too close to EOF to read a full page */ - if ((bitmap_file->size < MODIFIED_PAGE_BLOCK_SIZE) - || (bitmap_file->offset - > bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE)) { - - if (UNIV_UNLIKELY(bitmap_file->offset != bitmap_file->size)) { - - /* If we are not at EOF and we have less than one page - to read, it's junk. This error is not fatal in - itself. */ - - ib_logf(IB_LOG_LEVEL_WARN, - "junk at the end of changed page bitmap file " - "\'%s\'.", bitmap_file->name); - } - - if (UNIV_UNLIKELY(!last_page_in_run)) { - - /* We are at EOF but the last read page did not finish - a run */ - /* It's a "Warning" here because it's not a fatal error - for the whole server */ - ib_logf(IB_LOG_LEVEL_WARN, - "changed page bitmap file \'%s\', size " - UINT64PF " bytes, does not " - "contain a complete run at the next read " - "offset " UINT64PF, - bitmap_file->name, bitmap_file->size, - bitmap_file->offset); - return FALSE; - } - } - return TRUE; -} - -/*********************************************************************//** -Initialize the log bitmap iterator for a given range. The records are -processed at a bitmap block granularity, i.e. all the records in the same block -share the same start and end LSN values, the exact LSN of each record is -unavailable (nor is it defined for blocks that are touched more than once in -the LSN interval contained in the block). Thus min_lsn and max_lsn should be -set at block boundaries or bigger, otherwise the records at the 1st and the -last blocks will not be returned. Also note that there might be returned -records with LSN < min_lsn, as min_lsn is used to select the correct starting -file but not block. - -@return TRUE if the iterator is initialized OK, FALSE otherwise. */ -UNIV_INTERN -ibool -log_online_bitmap_iterator_init( -/*============================*/ - log_bitmap_iterator_t *i, /*!<in/out: iterator */ - lsn_t min_lsn,/*!< in: start LSN */ - lsn_t max_lsn)/*!< in: end LSN */ -{ - ut_a(i); - - i->max_lsn = max_lsn; - - if (UNIV_UNLIKELY(min_lsn > max_lsn)) { - - /* Empty range */ - i->in_files.count = 0; - i->in_files.files = NULL; - os_file_mark_invalid(&i->in.file); - i->page = NULL; - i->failed = FALSE; - return TRUE; - } - - if (!log_online_setup_bitmap_file_range(&i->in_files, min_lsn, - max_lsn)) { - - i->failed = TRUE; - return FALSE; - } - - i->in_i = 0; - - if (i->in_files.count == 0) { - - /* Empty range */ - os_file_mark_invalid(&i->in.file); - i->page = NULL; - i->failed = FALSE; - return TRUE; - } - - /* Open the 1st bitmap file */ - if (UNIV_UNLIKELY(!log_online_open_bitmap_file_read_only( - i->in_files.files[i->in_i].name, - &i->in))) { - - i->in_i = i->in_files.count; - free(i->in_files.files); - i->failed = TRUE; - return FALSE; - } - - i->page = static_cast<byte *>(ut_malloc(MODIFIED_PAGE_BLOCK_SIZE)); - i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN; - i->start_lsn = i->end_lsn = 0; - i->space_id = 0; - i->first_page_id = 0; - i->last_page_in_run = TRUE; - i->changed = FALSE; - i->failed = FALSE; - - return TRUE; -} - -/*********************************************************************//** -Releases log bitmap iterator. */ -UNIV_INTERN -void -log_online_bitmap_iterator_release( -/*===============================*/ - log_bitmap_iterator_t *i) /*!<in/out: iterator */ -{ - ut_a(i); - - if (!os_file_is_invalid(i->in.file)) { - - os_file_close(i->in.file); - os_file_mark_invalid(&i->in.file); - } - if (i->in_files.files) { - - ut_free(i->in_files.files); - } - if (i->page) { - - ut_free(i->page); - } - i->failed = TRUE; -} - -/*********************************************************************//** -Iterates through bits of saved bitmap blocks. -Sequentially reads blocks from bitmap file(s) and interates through -their bits. Ignores blocks with wrong checksum. -@return TRUE if iteration is successful, FALSE if all bits are iterated. */ -UNIV_INTERN -ibool -log_online_bitmap_iterator_next( -/*============================*/ - log_bitmap_iterator_t *i) /*!<in/out: iterator */ -{ - ibool checksum_ok = FALSE; - ibool success; - - ut_a(i); - - if (UNIV_UNLIKELY(i->in_files.count == 0)) { - - return FALSE; - } - - if (UNIV_LIKELY(i->bit_offset < MODIFIED_PAGE_BLOCK_BITMAP_LEN)) - { - ++i->bit_offset; - i->changed = - IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP, - i->bit_offset); - return TRUE; - } - - if (i->end_lsn >= i->max_lsn && i->last_page_in_run) - return FALSE; - - while (!checksum_ok) - { - while (i->in.size < MODIFIED_PAGE_BLOCK_SIZE - || (i->in.offset - > i->in.size - MODIFIED_PAGE_BLOCK_SIZE)) { - - /* Advance file */ - i->in_i++; - success = os_file_close_no_error_handling( - i->in.file); - os_file_mark_invalid(&i->in.file); - if (UNIV_UNLIKELY(!success)) { - - os_file_get_last_error(TRUE); - i->failed = TRUE; - return FALSE; - } - - success = log_online_diagnose_bitmap_eof( - &i->in, i->last_page_in_run); - if (UNIV_UNLIKELY(!success)) { - - i->failed = TRUE; - return FALSE; - - } - - if (i->in_i == i->in_files.count) { - - return FALSE; - } - - if (UNIV_UNLIKELY(i->in_files.files[i->in_i].seq_num - == 0)) { - - i->failed = TRUE; - return FALSE; - } - - success = log_online_open_bitmap_file_read_only( - i->in_files.files[i->in_i].name, - &i->in); - if (UNIV_UNLIKELY(!success)) { - - i->failed = TRUE; - return FALSE; - } - } - - success = log_online_read_bitmap_page(&i->in, i->page, - &checksum_ok); - if (UNIV_UNLIKELY(!success)) { - - os_file_get_last_error(TRUE); - ib_logf(IB_LOG_LEVEL_WARN, - "failed reading changed page bitmap file " - "\'%s\'", i->in_files.files[i->in_i].name); - i->failed = TRUE; - return FALSE; - } - } - - i->start_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN); - i->end_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN); - i->space_id = mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID); - i->first_page_id = mach_read_from_4(i->page - + MODIFIED_PAGE_1ST_PAGE_ID); - i->last_page_in_run = mach_read_from_4(i->page - + MODIFIED_PAGE_IS_LAST_BLOCK); - i->bit_offset = 0; - i->changed = IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP, - i->bit_offset); - - return TRUE; -} - -/************************************************************//** -Delete all the bitmap files for data less than the specified LSN. -If called with lsn == 0 (i.e. set by RESET request) or LSN_MAX, -restart the bitmap file sequence, otherwise continue it. - -@return FALSE to indicate success, TRUE for failure. */ -UNIV_INTERN -ibool -log_online_purge_changed_page_bitmaps( -/*==================================*/ - lsn_t lsn) /*!< in: LSN to purge files up to */ -{ - log_online_bitmap_file_range_t bitmap_files; - size_t i; - ibool result = FALSE; - - if (lsn == 0) { - lsn = LSN_MAX; - } - - bool log_bmp_sys_inited = false; - if (srv_redo_log_thread_started) { - /* User requests might happen with both enabled and disabled - tracking */ - log_bmp_sys_inited = true; - mutex_enter(&log_bmp_sys_mutex); - if (!srv_redo_log_thread_started) { - log_bmp_sys_inited = false; - mutex_exit(&log_bmp_sys_mutex); - } - } - - if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, LSN_MAX)) { - if (log_bmp_sys_inited) { - mutex_exit(&log_bmp_sys_mutex); - } - return TRUE; - } - - if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) { - /* If we have to delete the current output file, close it - first. */ - os_file_close(log_bmp_sys->out.file); - os_file_mark_invalid(&log_bmp_sys->out.file); - } - - for (i = 0; i < bitmap_files.count; i++) { - - /* We consider the end LSN of the current bitmap, derived from - the start LSN of the subsequent bitmap file, to determine - whether to remove the current bitmap. Note that bitmap_files - does not contain an entry for the bitmap past the given LSN so - we must check the boundary conditions as well. For example, - consider 1_0.xdb and 2_10.xdb and querying LSN 5. bitmap_files - will only contain 1_0.xdb and we must not delete it since it - represents LSNs 0-9. */ - if ((i + 1 == bitmap_files.count - || bitmap_files.files[i + 1].seq_num == 0 - || bitmap_files.files[i + 1].start_lsn > lsn) - && (lsn != LSN_MAX)) { - - break; - } - if (!os_file_delete_if_exists(innodb_file_bmp_key, - bitmap_files.files[i].name)) { - - os_file_get_last_error(TRUE); - result = TRUE; - break; - } - } - - if (log_bmp_sys_inited) { - if (lsn > log_bmp_sys->end_lsn) { - lsn_t new_file_lsn; - if (lsn == LSN_MAX) { - /* RESET restarts the sequence */ - log_bmp_sys->out_seq_num = 0; - new_file_lsn = 0; - } else { - new_file_lsn = log_bmp_sys->end_lsn; - } - if (!log_online_rotate_bitmap_file(new_file_lsn)) { - /* If file create failed, stop log tracking */ - srv_track_changed_pages = FALSE; - } - } - - mutex_exit(&log_bmp_sys_mutex); - } - - free(bitmap_files.files); - return result; -} diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc deleted file mode 100644 index fb64309cee4..00000000000 --- a/storage/xtradb/log/log0recv.cc +++ /dev/null @@ -1,3814 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file log/log0recv.cc -Recovery - -Created 9/20/1997 Heikki Tuuri -*******************************************************/ - -// First include (the generated) my_config.h, to get correct platform defines. -#include "my_config.h" -#include <stdio.h> // Solaris/x86 header file bug - -#include <vector> -#include <my_systemd.h> - -#include "log0recv.h" - -#ifdef UNIV_NONINL -#include "log0recv.ic" -#endif - -#include "log0crypt.h" - -#include "config.h" -#ifdef HAVE_ALLOCA_H -#include "alloca.h" -#elif defined(HAVE_MALLOC_H) -#include "malloc.h" -#endif - -#include "mem0mem.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "page0cur.h" -#include "page0zip.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "ibuf0ibuf.h" -#include "trx0undo.h" -#include "trx0rec.h" -#include "fil0fil.h" -#include "fil0crypt.h" -#ifndef UNIV_HOTBACKUP -# include "buf0rea.h" -# include "srv0srv.h" -# include "srv0start.h" -# include "trx0roll.h" -# include "row0merge.h" -# include "sync0sync.h" -#else /* !UNIV_HOTBACKUP */ - - -/** This is set to FALSE if the backup was originally taken with the -mysqlbackup --include regexp option: then we do not want to create tables in -directories which were not included */ -UNIV_INTERN ibool recv_replay_file_ops = TRUE; -#endif /* !UNIV_HOTBACKUP */ - -/** Log records are stored in the hash table in chunks at most of this size; -this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */ -#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t)) - -/** Read-ahead area in applying log records to file pages */ -#define RECV_READ_AHEAD_AREA 32 - -/** The recovery system */ -UNIV_INTERN recv_sys_t* recv_sys; -/** TRUE when applying redo log records during crash recovery; FALSE -otherwise. Note that this is FALSE while a background thread is -rolling back incomplete transactions. */ -UNIV_INTERN ibool recv_recovery_on; - -#ifndef UNIV_HOTBACKUP -/** TRUE when recv_init_crash_recovery() has been called. */ -UNIV_INTERN ibool recv_needed_recovery; -# ifdef UNIV_DEBUG -/** TRUE if writing to the redo log (mtr_commit) is forbidden. -Protected by log_sys->mutex. */ -UNIV_INTERN ibool recv_no_log_write = FALSE; -# endif /* UNIV_DEBUG */ - -/** TRUE if buf_page_is_corrupted() should check if the log sequence -number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by -recv_recovery_from_checkpoint_start_func(). */ -UNIV_INTERN ibool recv_lsn_checks_on; - -/** There are two conditions under which we scan the logs, the first -is normal startup and the second is when we do a recovery from an -archive. -This flag is set if we are doing a scan from the last checkpoint during -startup. If we find log entries that were written after the last checkpoint -we know that the server was not cleanly shutdown. We must then initialize -the crash recovery environment before attempting to store these entries in -the log hash table. */ -static ibool recv_log_scan_is_startup_type; - -/** If the following is TRUE, the buffer pool file pages must be invalidated -after recovery and no ibuf operations are allowed; this becomes TRUE if -the log record hash table becomes too full, and log records must be merged -to file pages already before the recovery is finished: in this case no -ibuf operations are allowed, as they could modify the pages read in the -buffer pool before the pages have been recovered to the up-to-date state. - -TRUE means that recovery is running and no operations on the log files -are allowed yet: the variable name is misleading. */ -UNIV_INTERN ibool recv_no_ibuf_operations; -/** TRUE when the redo log is being backed up */ -# define recv_is_making_a_backup FALSE -/** TRUE when recovering from a backed up redo log file */ -# define recv_is_from_backup FALSE -#else /* !UNIV_HOTBACKUP */ -# define recv_needed_recovery FALSE -/** TRUE when the redo log is being backed up */ -UNIV_INTERN ibool recv_is_making_a_backup = FALSE; -/** TRUE when recovering from a backed up redo log file */ -UNIV_INTERN ibool recv_is_from_backup = FALSE; -# define buf_pool_get_curr_size() (5 * 1024 * 1024) -#endif /* !UNIV_HOTBACKUP */ - -/** The type of the previous parsed redo log record */ -static ulint recv_previous_parsed_rec_type; -/** The offset of the previous parsed redo log record */ -static ulint recv_previous_parsed_rec_offset; -/** The 'multi' flag of the previous parsed redo log record */ -static ulint recv_previous_parsed_rec_is_multi; - -/** Maximum page number encountered in the redo log */ -UNIV_INTERN ulint recv_max_parsed_page_no; - -/** This many frames must be left free in the buffer pool when we scan -the log and store the scanned log records in the buffer pool: we will -use these free frames to read in pages when we start applying the -log records to the database. -This is the default value. If the actual size of the buffer pool is -larger than 10 MB we'll set this value to 512. */ -UNIV_INTERN ulint recv_n_pool_free_frames; - -/** The maximum lsn we see for a page during the recovery process. If this -is bigger than the lsn we are able to scan up to, that is an indication that -the recovery failed and the database may be corrupt. */ -UNIV_INTERN lsn_t recv_max_page_lsn; - -#ifdef UNIV_PFS_THREAD -UNIV_INTERN mysql_pfs_key_t trx_rollback_clean_thread_key; -#endif /* UNIV_PFS_THREAD */ - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t recv_sys_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#ifndef UNIV_HOTBACKUP -# ifdef UNIV_PFS_THREAD -UNIV_INTERN mysql_pfs_key_t recv_writer_thread_key; -# endif /* UNIV_PFS_THREAD */ - -# ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t recv_writer_mutex_key; -# endif /* UNIV_PFS_MUTEX */ - -/** Flag indicating if recv_writer thread is active. */ -static volatile bool recv_writer_thread_active; -UNIV_INTERN os_thread_t recv_writer_thread_handle = 0; -#endif /* !UNIV_HOTBACKUP */ - -/* prototypes */ - -#ifndef UNIV_HOTBACKUP -/*******************************************************//** -Initialize crash recovery environment. Can be called iff -recv_needed_recovery == FALSE. */ -static -void -recv_init_crash_recovery(void); -/*===========================*/ -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************//** -Creates the recovery system. */ -UNIV_INTERN -void -recv_sys_create(void) -/*=================*/ -{ - if (recv_sys != NULL) { - - return; - } - - recv_sys = static_cast<recv_sys_t*>(mem_zalloc(sizeof(*recv_sys))); - - mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV); - -#ifndef UNIV_HOTBACKUP - mutex_create(recv_writer_mutex_key, &recv_sys->writer_mutex, - SYNC_LEVEL_VARYING); -#endif /* !UNIV_HOTBACKUP */ - - recv_sys->heap = NULL; - recv_sys->addr_hash = NULL; -} - -/********************************************************//** -Release recovery system mutexes. */ -UNIV_INTERN -void -recv_sys_close(void) -/*================*/ -{ - if (recv_sys != NULL) { - if (recv_sys->addr_hash != NULL) { - hash_table_free(recv_sys->addr_hash); - } - - if (recv_sys->heap != NULL) { - mem_heap_free(recv_sys->heap); - } - - if (recv_sys->buf != NULL) { - ut_free(recv_sys->buf); - } - - if (recv_sys->last_block_buf_start != NULL) { - mem_free(recv_sys->last_block_buf_start); - } - -#ifndef UNIV_HOTBACKUP - ut_ad(!recv_writer_thread_active); - mutex_free(&recv_sys->writer_mutex); -#endif /* !UNIV_HOTBACKUP */ - - mutex_free(&recv_sys->mutex); - - mem_free(recv_sys); - recv_sys = NULL; - } -} - -/********************************************************//** -Frees the recovery system memory. */ -UNIV_INTERN -void -recv_sys_mem_free(void) -/*===================*/ -{ - if (recv_sys != NULL) { - if (recv_sys->addr_hash != NULL) { - hash_table_free(recv_sys->addr_hash); - } - - if (recv_sys->heap != NULL) { - mem_heap_free(recv_sys->heap); - } - - if (recv_sys->buf != NULL) { - ut_free(recv_sys->buf); - } - - if (recv_sys->last_block_buf_start != NULL) { - mem_free(recv_sys->last_block_buf_start); - } - - mem_free(recv_sys); - recv_sys = NULL; - } -} - -#ifndef UNIV_HOTBACKUP -/************************************************************ -Reset the state of the recovery system variables. */ -UNIV_INTERN -void -recv_sys_var_init(void) -/*===================*/ -{ - recv_lsn_checks_on = FALSE; - - recv_n_pool_free_frames = 256; - - recv_recovery_on = FALSE; - - recv_needed_recovery = FALSE; - - recv_lsn_checks_on = FALSE; - - recv_log_scan_is_startup_type = FALSE; - - recv_no_ibuf_operations = FALSE; - - recv_previous_parsed_rec_type = 999999; - - recv_previous_parsed_rec_offset = 0; - - recv_previous_parsed_rec_is_multi = 0; - - recv_max_parsed_page_no = 0; - - recv_n_pool_free_frames = 256; - - recv_max_page_lsn = 0; -} - -/******************************************************************//** -recv_writer thread tasked with flushing dirty pages from the buffer -pools. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(recv_writer_thread)( -/*===============================*/ - void* arg MY_ATTRIBUTE((unused))) - /*!< in: a dummy parameter required by - os_thread_create */ -{ - my_thread_init(); - ut_ad(!srv_read_only_mode); - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(recv_writer_thread_key); -#endif /* UNIV_PFS_THREAD */ - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "InnoDB: recv_writer thread running, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - - while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { - - os_thread_sleep(100000); - - mutex_enter(&recv_sys->writer_mutex); - - if (!recv_recovery_on) { - mutex_exit(&recv_sys->writer_mutex); - break; - } - - /* Flush pages from end of LRU if required */ - buf_flush_LRU_tail(); - - mutex_exit(&recv_sys->writer_mutex); - } - - recv_writer_thread_active = false; - - my_thread_end(); - /* We count the number of threads in os_thread_exit(). - A created thread should always use that to exit and not - use return() to exit. */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************ -Inits the recovery system for a recovery operation. */ -UNIV_INTERN -void -recv_sys_init( -/*==========*/ - ulint available_memory) /*!< in: available memory in bytes */ -{ - if (recv_sys->heap != NULL) { - - return; - } - -#ifndef UNIV_HOTBACKUP - mutex_enter(&(recv_sys->mutex)); - - recv_sys->heap = mem_heap_create_typed(256, - MEM_HEAP_FOR_RECV_SYS); -#else /* !UNIV_HOTBACKUP */ - recv_sys->heap = mem_heap_create(256); - recv_is_from_backup = TRUE; -#endif /* !UNIV_HOTBACKUP */ - - /* Set appropriate value of recv_n_pool_free_frames. */ - if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) { - /* Buffer pool of size greater than 10 MB. */ - recv_n_pool_free_frames = 512; - } - - recv_sys->buf = static_cast<byte*>(ut_malloc(RECV_PARSING_BUF_SIZE)); - recv_sys->len = 0; - recv_sys->recovered_offset = 0; - - recv_sys->addr_hash = hash_create(available_memory / 512); - recv_sys->n_addrs = 0; - - recv_sys->apply_log_recs = FALSE; - recv_sys->apply_batch_on = FALSE; - - recv_sys->last_block_buf_start = static_cast<byte*>( - mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE)); - - recv_sys->last_block = static_cast<byte*>(ut_align( - recv_sys->last_block_buf_start, OS_FILE_LOG_BLOCK_SIZE)); - - recv_sys->found_corrupt_log = FALSE; - recv_sys->progress_time = ut_time(); - - recv_max_page_lsn = 0; - - /* Call the constructor for recv_sys_t::dblwr member */ - new (&recv_sys->dblwr) recv_dblwr_t(); - - mutex_exit(&(recv_sys->mutex)); -} - -/** Empty a fully processed hash table. */ -static -void -recv_sys_empty_hash() -{ - ut_ad(mutex_own(&(recv_sys->mutex))); - ut_a(recv_sys->n_addrs == 0); - - hash_table_free(recv_sys->addr_hash); - mem_heap_empty(recv_sys->heap); - - recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 512); -} - -#ifndef UNIV_HOTBACKUP -# ifndef UNIV_LOG_DEBUG -/********************************************************//** -Frees the recovery system. */ -static -void -recv_sys_debug_free(void) -/*=====================*/ -{ - mutex_enter(&(recv_sys->mutex)); - - hash_table_free(recv_sys->addr_hash); - mem_heap_free(recv_sys->heap); - ut_free(recv_sys->buf); - mem_free(recv_sys->last_block_buf_start); - - recv_sys->buf = NULL; - recv_sys->heap = NULL; - recv_sys->addr_hash = NULL; - recv_sys->last_block_buf_start = NULL; - - mutex_exit(&(recv_sys->mutex)); -} -# endif /* UNIV_LOG_DEBUG */ - -# ifdef UNIV_LOG_ARCHIVE -/********************************************************//** -Truncates possible corrupted or extra records from a log group. */ -static -void -recv_truncate_group( -/*================*/ - log_group_t* group, /*!< in: log group */ - lsn_t recovered_lsn, /*!< in: recovery succeeded up to this - lsn */ - lsn_t limit_lsn, /*!< in: this was the limit for - recovery */ - lsn_t checkpoint_lsn, /*!< in: recovery was started from this - checkpoint */ - lsn_t archived_lsn) /*!< in: the log has been archived up to - this lsn */ -{ - lsn_t start_lsn; - lsn_t end_lsn; - lsn_t finish_lsn1; - lsn_t finish_lsn2; - lsn_t finish_lsn; - - if (archived_lsn == LSN_MAX) { - /* Checkpoint was taken in the NOARCHIVELOG mode */ - archived_lsn = checkpoint_lsn; - } - - finish_lsn1 = ut_uint64_align_down(archived_lsn, - OS_FILE_LOG_BLOCK_SIZE) - + log_group_get_capacity(group); - - finish_lsn2 = ut_uint64_align_up(recovered_lsn, - OS_FILE_LOG_BLOCK_SIZE) - + recv_sys->last_log_buf_size; - - if (limit_lsn != LSN_MAX) { - /* We do not know how far we should erase log records: erase - as much as possible */ - - finish_lsn = finish_lsn1; - } else { - /* It is enough to erase the length of the log buffer */ - finish_lsn = finish_lsn1 < finish_lsn2 - ? finish_lsn1 : finish_lsn2; - } - - ut_a(RECV_SCAN_SIZE <= log_sys->buf_size); - - memset(log_sys->buf, 0, RECV_SCAN_SIZE); - - start_lsn = ut_uint64_align_down(recovered_lsn, - OS_FILE_LOG_BLOCK_SIZE); - - if (start_lsn != recovered_lsn) { - /* Copy the last incomplete log block to the log buffer and - edit its data length: */ - lsn_t diff = recovered_lsn - start_lsn; - - ut_a(diff <= 0xFFFFUL); - - ut_memcpy(log_sys->buf, recv_sys->last_block, - OS_FILE_LOG_BLOCK_SIZE); - log_block_set_data_len(log_sys->buf, (ulint) diff); - } - - if (start_lsn >= finish_lsn) { - - return; - } - - for (;;) { - ulint len; - - end_lsn = start_lsn + RECV_SCAN_SIZE; - - if (end_lsn > finish_lsn) { - - end_lsn = finish_lsn; - } - - len = (ulint) (end_lsn - start_lsn); - - log_group_write_buf(group, log_sys->buf, len, start_lsn, 0); - if (end_lsn >= finish_lsn) { - - return; - } - - memset(log_sys->buf, 0, RECV_SCAN_SIZE); - - start_lsn = end_lsn; - } -} - -/********************************************************//** -Copies the log segment between group->recovered_lsn and recovered_lsn from the -most up-to-date log group to group, so that it contains the latest log data. */ -static -void -recv_copy_group( -/*============*/ - log_group_t* up_to_date_group, /*!< in: the most up-to-date log - group */ - log_group_t* group, /*!< in: copy to this log - group */ - lsn_t recovered_lsn) /*!< in: recovery succeeded up - to this lsn */ -{ - lsn_t start_lsn; - lsn_t end_lsn; - - if (group->scanned_lsn >= recovered_lsn) { - - return; - } - - ut_a(RECV_SCAN_SIZE <= log_sys->buf_size); - - start_lsn = ut_uint64_align_down(group->scanned_lsn, - OS_FILE_LOG_BLOCK_SIZE); - for (;;) { - ulint len; - - end_lsn = start_lsn + RECV_SCAN_SIZE; - - if (end_lsn > recovered_lsn) { - end_lsn = ut_uint64_align_up(recovered_lsn, - OS_FILE_LOG_BLOCK_SIZE); - } - - log_group_read_log_seg(LOG_RECOVER, log_sys->buf, - up_to_date_group, start_lsn, end_lsn, - FALSE); - - len = (ulint) (end_lsn - start_lsn); - - log_group_write_buf(group, log_sys->buf, len, start_lsn, 0); - - if (end_lsn >= recovered_lsn) { - - return; - } - - start_lsn = end_lsn; - } -} -# endif /* UNIV_LOG_ARCHIVE */ - -/********************************************************//** -Copies a log segment from the most up-to-date log group to the other log -groups, so that they all contain the latest log data. Also writes the info -about the latest checkpoint to the groups, and inits the fields in the group -memory structs to up-to-date values. */ -static -void -recv_synchronize_groups( -/*====================*/ -#ifdef UNIV_LOG_ARCHIVE - log_group_t* up_to_date_group /*!< in: the most up-to-date - log group */ -#endif - ) -{ - lsn_t start_lsn; - lsn_t end_lsn; - lsn_t recovered_lsn; - - recovered_lsn = recv_sys->recovered_lsn; - - /* Read the last recovered log block to the recovery system buffer: - the block is always incomplete */ - - start_lsn = ut_uint64_align_down(recovered_lsn, - OS_FILE_LOG_BLOCK_SIZE); - end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE); - - ut_a(start_lsn != end_lsn); - - log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block, -#ifdef UNIV_LOG_ARCHIVE - up_to_date_group, -#else /* UNIV_LOG_ARCHIVE */ - UT_LIST_GET_FIRST(log_sys->log_groups), -#endif /* UNIV_LOG_ARCHIVE */ - start_lsn, end_lsn, FALSE); - - for (log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups); - group; - group = UT_LIST_GET_NEXT(log_groups, group)) { -#ifdef UNIV_LOG_ARCHIVE - if (group != up_to_date_group) { - - /* Copy log data if needed */ - - recv_copy_group(group, up_to_date_group, - recovered_lsn); - } -#endif /* UNIV_LOG_ARCHIVE */ - /* Update the fields in the group struct to correspond to - recovered_lsn */ - - log_group_set_fields(group, recovered_lsn); - ut_a(log_sys); - - } - /* Copy the checkpoint info to the groups; remember that we have - incremented checkpoint_no by one, and the info will not be written - over the max checkpoint info, thus making the preservation of max - checkpoint info on disk certain */ - - log_groups_write_checkpoint_info(); - - mutex_exit(&(log_sys->mutex)); - - /* Wait for the checkpoint write to complete */ - rw_lock_s_lock(&(log_sys->checkpoint_lock)); - rw_lock_s_unlock(&(log_sys->checkpoint_lock)); - - mutex_enter(&(log_sys->mutex)); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************************//** -Checks the consistency of the checkpoint info -@return TRUE if ok */ -ibool -recv_check_cp_is_consistent( -/*========================*/ - const byte* buf) /*!< in: buffer containing checkpoint info */ -{ - ulint fold; - - fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); - - if ((fold & 0xFFFFFFFFUL) != mach_read_from_4( - buf + LOG_CHECKPOINT_CHECKSUM_1)) { - return(FALSE); - } - - fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, - LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); - - if ((fold & 0xFFFFFFFFUL) != mach_read_from_4( - buf + LOG_CHECKPOINT_CHECKSUM_2)) { - return(FALSE); - } - - return(TRUE); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Looks for the maximum consistent checkpoint from the log groups. -@return error code or DB_SUCCESS */ -MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -recv_find_max_checkpoint( -/*=====================*/ - log_group_t** max_group, /*!< out: max group */ - ulint* max_field) /*!< out: LOG_CHECKPOINT_1 or - LOG_CHECKPOINT_2 */ -{ - log_group_t* group; - ib_uint64_t max_no; - ib_uint64_t checkpoint_no; - ulint field; - byte* buf; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - max_no = 0; - *max_group = NULL; - *max_field = 0; - - buf = log_sys->checkpoint_buf; - - while (group) { - - ulint log_hdr_log_block_size; - - group->state = LOG_GROUP_CORRUPTED; - - /* Assert that we can reuse log_sys->checkpoint_buf to read the - part of the header that contains the log block size. */ - ut_ad(LOG_FILE_OS_FILE_LOG_BLOCK_SIZE + 4 - < OS_FILE_LOG_BLOCK_SIZE); - - fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0, - 0, 0, OS_FILE_LOG_BLOCK_SIZE, - log_sys->checkpoint_buf, NULL, NULL); - log_hdr_log_block_size - = mach_read_from_4(log_sys->checkpoint_buf - + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE); - if (log_hdr_log_block_size == 0) { - /* 0 means default value */ - log_hdr_log_block_size = 512; - } - if (UNIV_UNLIKELY(log_hdr_log_block_size - != srv_log_block_size)) { - fprintf(stderr, - "InnoDB: Error: The block size of ib_logfile " - "%lu is not equal to innodb_log_block_size " - "%lu.\n" - "InnoDB: Error: Suggestion - Recreate log " - "files.\n", - log_hdr_log_block_size, srv_log_block_size); - return(DB_ERROR); - } - - for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2; - field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) { - - log_group_read_checkpoint_info(group, field); - - if (!recv_check_cp_is_consistent(buf)) { -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Checkpoint in group" - " %lu at %lu invalid, %lu\n", - (ulong) group->id, - (ulong) field, - (ulong) mach_read_from_4( - buf - + LOG_CHECKPOINT_CHECKSUM_1)); - - } -#endif /* UNIV_DEBUG */ - goto not_consistent; - } - - group->state = LOG_GROUP_OK; - - group->lsn = mach_read_from_8( - buf + LOG_CHECKPOINT_LSN); - group->lsn_offset = mach_read_from_4( - buf + LOG_CHECKPOINT_OFFSET_LOW32); - group->lsn_offset |= ((lsn_t) mach_read_from_4( - buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32; - checkpoint_no = mach_read_from_8( - buf + LOG_CHECKPOINT_NO); - - if (!log_crypt_read_checkpoint_buf(buf)) { - return DB_ERROR; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Checkpoint number %lu" - " found in group %lu\n", - (ulong) checkpoint_no, - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - - if (checkpoint_no >= max_no) { - *max_group = group; - *max_field = field; - max_no = checkpoint_no; - } - -not_consistent: - ; - } - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - if (*max_group == NULL) { - - fprintf(stderr, - "InnoDB: No valid checkpoint found.\n" - "InnoDB: If you are attempting downgrade" - " from MySQL 5.7.9 or later,\n" - "InnoDB: please refer to " REFMAN - "upgrading-downgrading.html\n" - "InnoDB: If this error appears when you are" - " creating an InnoDB database,\n" - "InnoDB: the problem may be that during" - " an earlier attempt you managed\n" - "InnoDB: to create the InnoDB data files," - " but log file creation failed.\n" - "InnoDB: If that is the case, please refer to\n" - "InnoDB: " REFMAN "error-creating-innodb.html\n"); - return(DB_ERROR); - } - - return(DB_SUCCESS); -} -#else /* !UNIV_HOTBACKUP */ -/*******************************************************************//** -Reads the checkpoint info needed in hot backup. -@return TRUE if success */ -UNIV_INTERN -ibool -recv_read_checkpoint_info_for_backup( -/*=================================*/ - const byte* hdr, /*!< in: buffer containing the log group - header */ - lsn_t* lsn, /*!< out: checkpoint lsn */ - lsn_t* offset, /*!< out: checkpoint offset in the log group */ - lsn_t* cp_no, /*!< out: checkpoint number */ - lsn_t* first_header_lsn) - /*!< out: lsn of of the start of the - first log file */ -{ - ulint max_cp = 0; - ib_uint64_t max_cp_no = 0; - const byte* cp_buf; - - cp_buf = hdr + LOG_CHECKPOINT_1; - - if (recv_check_cp_is_consistent(cp_buf)) { - max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO); - max_cp = LOG_CHECKPOINT_1; - } - - cp_buf = hdr + LOG_CHECKPOINT_2; - - if (recv_check_cp_is_consistent(cp_buf)) { - if (mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) { - max_cp = LOG_CHECKPOINT_2; - } - } - - if (max_cp == 0) { - return(FALSE); - } - - cp_buf = hdr + max_cp; - - *lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN); - *offset = mach_read_from_4( - cp_buf + LOG_CHECKPOINT_OFFSET_LOW32); - *offset |= ((lsn_t) mach_read_from_4( - cp_buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32; - - *cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO); - - *first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN); - - return(TRUE); -} -#endif /* !UNIV_HOTBACKUP */ - -/******************************************************//** -Checks the 4-byte checksum to the trailer checksum field of a log -block. We also accept a log block in the old format before -InnoDB-3.23.52 where the checksum field contains the log block number. -@return TRUE if ok, or if the log block may be in the format of InnoDB -version predating 3.23.52 */ -UNIV_INTERN -ibool -log_block_checksum_is_ok_or_old_format( -/*===================================*/ - const byte* block, /*!< in: pointer to a log block */ - bool print_err) /*!< in print if error found */ -{ -#ifdef UNIV_LOG_DEBUG - return(TRUE); -#endif /* UNIV_LOG_DEBUG */ - - ulint block_checksum = log_block_get_checksum(block); - - if (UNIV_LIKELY(srv_log_checksum_algorithm == - SRV_CHECKSUM_ALGORITHM_NONE || - log_block_calc_checksum(block) == block_checksum)) { - - return(TRUE); - } - - if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 || - srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB || - srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) { - - const char* algo = NULL; - - ib_logf(IB_LOG_LEVEL_ERROR, - "log block checksum mismatch: expected " ULINTPF ", " - "calculated checksum " ULINTPF, - block_checksum, - log_block_calc_checksum(block)); - - if (block_checksum == LOG_NO_CHECKSUM_MAGIC) { - - algo = "none"; - } else if (block_checksum == - log_block_calc_checksum_crc32(block)) { - - algo = "crc32"; - } else if (block_checksum == - log_block_calc_checksum_innodb(block)) { - - algo = "innodb"; - } - - if (algo) { - - const char* current_algo; - - current_algo = buf_checksum_algorithm_name( - (srv_checksum_algorithm_t) - srv_log_checksum_algorithm); - - ib_logf(IB_LOG_LEVEL_ERROR, - "current InnoDB log checksum type: %s, " - "detected log checksum type: %s", - current_algo, - algo); - } - - ib_logf(IB_LOG_LEVEL_FATAL, - "STRICT method was specified for innodb_log_checksum, " - "so we intentionally assert here."); - } - - ut_ad(srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_CRC32 || - srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB); - - if (block_checksum == LOG_NO_CHECKSUM_MAGIC || - block_checksum == log_block_calc_checksum_crc32(block) || - block_checksum == log_block_calc_checksum_innodb(block)) { - - return(TRUE); - } - - if (log_block_get_hdr_no(block) == block_checksum) { - - /* We assume the log block is in the format of - InnoDB version < 3.23.52 and the block is ok */ -#if 0 - fprintf(stderr, - "InnoDB: Scanned old format < InnoDB-3.23.52" - " log block number %lu\n", - log_block_get_hdr_no(block)); -#endif - return(TRUE); - } - - if (print_err) { - fprintf(stderr, "BROKEN: block: %lu checkpoint: %lu %.8lx %.8lx\n", - log_block_get_hdr_no(block), - log_block_get_checkpoint_no(block), - log_block_calc_checksum(block), - log_block_get_checksum(block)); - } - - return(FALSE); -} - -#ifdef UNIV_HOTBACKUP -/*******************************************************************//** -Scans the log segment and n_bytes_scanned is set to the length of valid -log scanned. */ -UNIV_INTERN -void -recv_scan_log_seg_for_backup( -/*=========================*/ - byte* buf, /*!< in: buffer containing log data */ - ulint buf_len, /*!< in: data length in that buffer */ - lsn_t* scanned_lsn, /*!< in/out: lsn of buffer start, - we return scanned lsn */ - ulint* scanned_checkpoint_no, - /*!< in/out: 4 lowest bytes of the - highest scanned checkpoint number so - far */ - ulint* n_bytes_scanned)/*!< out: how much we were able to - scan, smaller than buf_len if log - data ended here */ -{ - ulint data_len; - byte* log_block; - ulint no; - - *n_bytes_scanned = 0; - - for (log_block = buf; log_block < buf + buf_len; - log_block += OS_FILE_LOG_BLOCK_SIZE) { - - no = log_block_get_hdr_no(log_block); - -#if 0 - fprintf(stderr, "Log block header no %lu\n", no); -#endif - - if (no != log_block_convert_lsn_to_no(*scanned_lsn) - || !log_block_checksum_is_ok_or_old_format(log_block)) { -#if 0 - fprintf(stderr, - "Log block n:o %lu, scanned lsn n:o %lu\n", - no, log_block_convert_lsn_to_no(*scanned_lsn)); -#endif - /* Garbage or an incompletely written log block */ - - log_block += OS_FILE_LOG_BLOCK_SIZE; -#if 0 - fprintf(stderr, - "Next log block n:o %lu\n", - log_block_get_hdr_no(log_block)); -#endif - break; - } - - if (*scanned_checkpoint_no > 0 - && log_block_get_checkpoint_no(log_block) - < *scanned_checkpoint_no - && *scanned_checkpoint_no - - log_block_get_checkpoint_no(log_block) - > 0x80000000UL) { - - /* Garbage from a log buffer flush which was made - before the most recent database recovery */ -#if 0 - fprintf(stderr, - "Scanned cp n:o %lu, block cp n:o %lu\n", - *scanned_checkpoint_no, - log_block_get_checkpoint_no(log_block)); -#endif - break; - } - - data_len = log_block_get_data_len(log_block); - - *scanned_checkpoint_no - = log_block_get_checkpoint_no(log_block); - *scanned_lsn += data_len; - - *n_bytes_scanned += data_len; - - if (data_len < OS_FILE_LOG_BLOCK_SIZE) { - /* Log data ends here */ - -#if 0 - fprintf(stderr, "Log block data len %lu\n", - data_len); -#endif - break; - } - } -} -#endif /* UNIV_HOTBACKUP */ - -/*******************************************************************//** -Tries to parse a single log record body and also applies it to a page if -specified. File ops are parsed, but not applied in this function. -@return log record end, NULL if not a complete record */ -static -byte* -recv_parse_or_apply_log_rec_body( -/*=============================*/ - byte type, /*!< in: type */ - byte* ptr, /*!< in: pointer to a buffer */ - byte* end_ptr,/*!< in: pointer to the buffer end */ - buf_block_t* block, /*!< in/out: buffer block or NULL; if - not NULL, then the log record is - applied to the page, and the log - record should be complete then */ - mtr_t* mtr, /*!< in: mtr or NULL; should be non-NULL - if and only if block is non-NULL */ - ulint space_id) - /*!< in: tablespace id obtained by - parsing initial log record */ -{ - dict_index_t* index = NULL; - page_t* page; - page_zip_des_t* page_zip; -#ifdef UNIV_DEBUG - ulint page_type; -#endif /* UNIV_DEBUG */ - - ut_ad(!block == !mtr); - - if (block) { - page = block->frame; - page_zip = buf_block_get_page_zip(block); - ut_d(page_type = fil_page_get_type(page)); - } else { - page = NULL; - page_zip = NULL; - ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED); - } - - switch (type) { -#ifdef UNIV_LOG_LSN_DEBUG - case MLOG_LSN: - /* The LSN is checked in recv_parse_log_rec(). */ - break; -#endif /* UNIV_LOG_LSN_DEBUG */ - case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES: - /* Note that crypt data can be set to empty page */ - ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip); - break; - case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_REC_INSERT, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, - block, index, mtr); - } - break; - case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_REC_CLUST_DELETE_MARK, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = btr_cur_parse_del_mark_set_clust_rec( - ptr, end_ptr, page, page_zip, index); - } - break; - case MLOG_COMP_REC_SEC_DELETE_MARK: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - /* This log record type is obsolete, but we process it for - backward compatibility with MySQL 5.0.3 and 5.0.4. */ - ut_a(!page || page_is_comp(page)); - ut_a(!page_zip); - ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index); - if (!ptr) { - break; - } - /* Fall through */ - case MLOG_REC_SEC_DELETE_MARK: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, - page, page_zip); - break; - case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_REC_UPDATE_IN_PLACE, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page, - page_zip, index); - } - break; - case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE: - case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_LIST_END_DELETE - || type == MLOG_COMP_LIST_START_DELETE, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = page_parse_delete_rec_list(type, ptr, end_ptr, - block, index, mtr); - } - break; - case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_LIST_END_COPY_CREATED, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = page_parse_copy_rec_list_to_created_page( - ptr, end_ptr, block, index, mtr); - } - break; - case MLOG_PAGE_REORGANIZE: - case MLOG_COMP_PAGE_REORGANIZE: - case MLOG_ZIP_PAGE_REORGANIZE: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type != MLOG_PAGE_REORGANIZE, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = btr_parse_page_reorganize( - ptr, end_ptr, index, - type == MLOG_ZIP_PAGE_REORGANIZE, - block, mtr); - } - break; - case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE: - /* Allow anything in page_type when creating a page. */ - ut_a(!page_zip); - ptr = page_parse_create(ptr, end_ptr, - type == MLOG_COMP_PAGE_CREATE, - block, mtr); - break; - case MLOG_UNDO_INSERT: - ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG); - ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page); - break; - case MLOG_UNDO_ERASE_END: - ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG); - ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr); - break; - case MLOG_UNDO_INIT: - /* Allow anything in page_type when creating a page. */ - ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr); - break; - case MLOG_UNDO_HDR_DISCARD: - ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG); - ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr); - break; - case MLOG_UNDO_HDR_CREATE: - case MLOG_UNDO_HDR_REUSE: - ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG); - ptr = trx_undo_parse_page_header(type, ptr, end_ptr, - page, mtr); - break; - case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - /* On a compressed page, MLOG_COMP_REC_MIN_MARK - will be followed by MLOG_COMP_REC_DELETE - or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL) - in the same mini-transaction. */ - ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip); - ptr = btr_parse_set_min_rec_mark( - ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK, - page, mtr); - break; - case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_REC_DELETE, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = page_cur_parse_delete_rec(ptr, end_ptr, - block, index, mtr); - } - break; - case MLOG_IBUF_BITMAP_INIT: - /* Allow anything in page_type when creating a page. */ - ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr); - break; - case MLOG_INIT_FILE_PAGE: - /* Allow anything in page_type when creating a page. */ - ptr = fsp_parse_init_file_page(ptr, end_ptr, block); - break; - case MLOG_WRITE_STRING: - /* Allow setting crypt_data also for empty page */ - ptr = mlog_parse_string(ptr, end_ptr, page, page_zip); - break; - case MLOG_FILE_RENAME: - /* Do not rerun file-based log entries if this is - IO completion from a page read. */ - if (page == NULL) { - ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, - (recv_recovery_is_on() - ? space_id : 0), 0); - } - break; - case MLOG_FILE_CREATE: - case MLOG_FILE_DELETE: - case MLOG_FILE_CREATE2: - /* Do not rerun file-based log entries if this is - IO completion from a page read. */ - if (page == NULL) { - ptr = fil_op_log_parse_or_replay(ptr, end_ptr, - type, 0, 0); - } - break; - case MLOG_ZIP_WRITE_NODE_PTR: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - ptr = page_zip_parse_write_node_ptr(ptr, end_ptr, - page, page_zip); - break; - case MLOG_ZIP_WRITE_BLOB_PTR: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr, - page, page_zip); - break; - case MLOG_ZIP_WRITE_HEADER: - ut_ad(!page || page_type == FIL_PAGE_INDEX); - ptr = page_zip_parse_write_header(ptr, end_ptr, - page, page_zip); - break; - case MLOG_ZIP_PAGE_COMPRESS: - /* Allow anything in page_type when creating a page. */ - ptr = page_zip_parse_compress(ptr, end_ptr, - page, page_zip); - break; - case MLOG_ZIP_PAGE_COMPRESS_NO_DATA: - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, TRUE, &index))) { - - ut_a(!page || ((ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table))); - ptr = page_zip_parse_compress_no_data( - ptr, end_ptr, page, page_zip, index); - } - break; - case MLOG_FILE_WRITE_CRYPT_DATA: - dberr_t err; - ptr = const_cast<byte*>(fil_parse_write_crypt_data(ptr, end_ptr, block, &err)); - - if (err != DB_SUCCESS) { - recv_sys->found_corrupt_log = TRUE; - } - break; - default: - ptr = NULL; - recv_sys->found_corrupt_log = TRUE; - } - - if (index) { - dict_table_t* table = index->table; - - dict_mem_index_free(index); - dict_mem_table_free(table); - } - - return(ptr); -} - -/*********************************************************************//** -Calculates the fold value of a page file address: used in inserting or -searching for a log record in the hash table. -@return folded value */ -UNIV_INLINE -ulint -recv_fold( -/*======*/ - ulint space, /*!< in: space */ - ulint page_no)/*!< in: page number */ -{ - return(ut_fold_ulint_pair(space, page_no)); -} - -/*********************************************************************//** -Calculates the hash value of a page file address: used in inserting or -searching for a log record in the hash table. -@return folded value */ -UNIV_INLINE -ulint -recv_hash( -/*======*/ - ulint space, /*!< in: space */ - ulint page_no)/*!< in: page number */ -{ - return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash)); -} - -/*********************************************************************//** -Gets the hashed file address struct for a page. -@return file address struct, NULL if not found from the hash table */ -static -recv_addr_t* -recv_get_fil_addr_struct( -/*=====================*/ - ulint space, /*!< in: space id */ - ulint page_no)/*!< in: page number */ -{ - recv_addr_t* recv_addr; - - for (recv_addr = static_cast<recv_addr_t*>( - HASH_GET_FIRST(recv_sys->addr_hash, - recv_hash(space, page_no))); - recv_addr != 0; - recv_addr = static_cast<recv_addr_t*>( - HASH_GET_NEXT(addr_hash, recv_addr))) { - - if (recv_addr->space == space - && recv_addr->page_no == page_no) { - - return(recv_addr); - } - } - - return(NULL); -} - -/*******************************************************************//** -Adds a new log record to the hash table of log records. */ -static -void -recv_add_to_hash_table( -/*===================*/ - byte type, /*!< in: log record type */ - ulint space, /*!< in: space id */ - ulint page_no, /*!< in: page number */ - byte* body, /*!< in: log record body */ - byte* rec_end, /*!< in: log record end */ - lsn_t start_lsn, /*!< in: start lsn of the mtr */ - lsn_t end_lsn) /*!< in: end lsn of the mtr */ -{ - recv_t* recv; - ulint len; - recv_data_t* recv_data; - recv_data_t** prev_field; - recv_addr_t* recv_addr; - - if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) { - /* The tablespace does not exist any more: do not store the - log record */ - - return; - } - - len = rec_end - body; - - recv = static_cast<recv_t*>( - mem_heap_alloc(recv_sys->heap, sizeof(recv_t))); - - recv->type = type; - recv->len = rec_end - body; - recv->start_lsn = start_lsn; - recv->end_lsn = end_lsn; - - recv_addr = recv_get_fil_addr_struct(space, page_no); - - if (recv_addr == NULL) { - recv_addr = static_cast<recv_addr_t*>( - mem_heap_alloc(recv_sys->heap, sizeof(recv_addr_t))); - - recv_addr->space = space; - recv_addr->page_no = page_no; - recv_addr->state = RECV_NOT_PROCESSED; - - UT_LIST_INIT(recv_addr->rec_list); - - HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash, - recv_fold(space, page_no), recv_addr); - recv_sys->n_addrs++; -#if 0 - fprintf(stderr, "Inserting log rec for space %lu, page %lu\n", - space, page_no); -#endif - } - - UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv); - - prev_field = &(recv->data); - - /* Store the log record body in chunks of less than UNIV_PAGE_SIZE: - recv_sys->heap grows into the buffer pool, and bigger chunks could not - be allocated */ - - while (rec_end > body) { - - len = rec_end - body; - - if (len > RECV_DATA_BLOCK_SIZE) { - len = RECV_DATA_BLOCK_SIZE; - } - - recv_data = static_cast<recv_data_t*>( - mem_heap_alloc(recv_sys->heap, - sizeof(recv_data_t) + len)); - - *prev_field = recv_data; - - memcpy(recv_data + 1, body, len); - - prev_field = &(recv_data->next); - - body += len; - } - - *prev_field = NULL; -} - -/*********************************************************************//** -Copies the log record body from recv to buf. */ -static -void -recv_data_copy_to_buf( -/*==================*/ - byte* buf, /*!< in: buffer of length at least recv->len */ - recv_t* recv) /*!< in: log record */ -{ - recv_data_t* recv_data; - ulint part_len; - ulint len; - - len = recv->len; - recv_data = recv->data; - - while (len > 0) { - if (len > RECV_DATA_BLOCK_SIZE) { - part_len = RECV_DATA_BLOCK_SIZE; - } else { - part_len = len; - } - - ut_memcpy(buf, ((byte*) recv_data) + sizeof(recv_data_t), - part_len); - buf += part_len; - len -= part_len; - - recv_data = recv_data->next; - } -} - -/************************************************************************//** -Applies the hashed log records to the page, if the page lsn is less than the -lsn of a log record. This can be called when a buffer page has just been -read in, or also for a page already in the buffer pool. */ -UNIV_INTERN -void -recv_recover_page_func( -/*===================*/ -#ifndef UNIV_HOTBACKUP - ibool just_read_in, - /*!< in: TRUE if the i/o handler calls - this for a freshly read page */ -#endif /* !UNIV_HOTBACKUP */ - buf_block_t* block) /*!< in/out: buffer block */ -{ - page_t* page; - page_zip_des_t* page_zip; - recv_addr_t* recv_addr; - recv_t* recv; - byte* buf; - lsn_t start_lsn; - lsn_t end_lsn; - lsn_t page_lsn; - lsn_t page_newest_lsn; - ibool modification_to_page; -#ifndef UNIV_HOTBACKUP - ibool success; -#endif /* !UNIV_HOTBACKUP */ - mtr_t mtr; - - mutex_enter(&(recv_sys->mutex)); - - if (recv_sys->apply_log_recs == FALSE) { - - /* Log records should not be applied now */ - - mutex_exit(&(recv_sys->mutex)); - - return; - } - - recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block), - buf_block_get_page_no(block)); - - if ((recv_addr == NULL) - /* bugfix: http://bugs.mysql.com/bug.php?id=44140 */ - || (recv_addr->state == RECV_BEING_READ && !just_read_in) - || (recv_addr->state == RECV_BEING_PROCESSED) - || (recv_addr->state == RECV_PROCESSED)) { - - mutex_exit(&(recv_sys->mutex)); - - return; - } - -#if 0 - fprintf(stderr, "Recovering space %lu, page %lu\n", - buf_block_get_space(block), buf_block_get_page_no(block)); -#endif - - recv_addr->state = RECV_BEING_PROCESSED; - - mutex_exit(&(recv_sys->mutex)); - - mtr_start(&mtr); - mtr_set_log_mode(&mtr, MTR_LOG_NONE); - - page = block->frame; - page_zip = buf_block_get_page_zip(block); - -#ifndef UNIV_HOTBACKUP - if (just_read_in) { - /* Move the ownership of the x-latch on the page to - this OS thread, so that we can acquire a second - x-latch on it. This is needed for the operations to - the page to pass the debug checks. */ - - rw_lock_x_lock_move_ownership(&block->lock); - } - - success = buf_page_get_known_nowait(RW_X_LATCH, block, - BUF_KEEP_OLD, - __FILE__, __LINE__, - &mtr); - ut_a(success); - - buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); -#endif /* !UNIV_HOTBACKUP */ - - /* Read the newest modification lsn from the page */ - page_lsn = mach_read_from_8(page + FIL_PAGE_LSN); - -#ifndef UNIV_HOTBACKUP - /* It may be that the page has been modified in the buffer - pool: read the newest modification lsn there */ - - page_newest_lsn = buf_page_get_newest_modification(&block->page); - - if (page_newest_lsn) { - - page_lsn = page_newest_lsn; - } -#else /* !UNIV_HOTBACKUP */ - /* In recovery from a backup we do not really use the buffer pool */ - page_newest_lsn = 0; -#endif /* !UNIV_HOTBACKUP */ - - modification_to_page = FALSE; - start_lsn = end_lsn = 0; - - recv = UT_LIST_GET_FIRST(recv_addr->rec_list); - - while (recv) { - end_lsn = recv->end_lsn; - - if (recv->len > RECV_DATA_BLOCK_SIZE) { - /* We have to copy the record body to a separate - buffer */ - - buf = static_cast<byte*>(mem_alloc(recv->len)); - - recv_data_copy_to_buf(buf, recv); - } else { - buf = ((byte*)(recv->data)) + sizeof(recv_data_t); - } - - if (recv->type == MLOG_INIT_FILE_PAGE) { - page_lsn = page_newest_lsn; - - memset(FIL_PAGE_LSN + page, 0, 8); - memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM - + page, 0, 8); - - if (page_zip) { - memset(FIL_PAGE_LSN + page_zip->data, 0, 8); - } - } - - if (recv->start_lsn >= page_lsn) { - - lsn_t end_lsn; - - if (!modification_to_page) { - - modification_to_page = TRUE; - start_lsn = recv->start_lsn; - } - - DBUG_PRINT("ib_log", - ("apply " LSN_PF ": %u len %u " - "page %u:%u", recv->start_lsn, - (unsigned) recv->type, - (unsigned) recv->len, - (unsigned) recv_addr->space, - (unsigned) recv_addr->page_no)); - - recv_parse_or_apply_log_rec_body(recv->type, buf, - buf + recv->len, - block, &mtr, - recv_addr->space); - - end_lsn = recv->start_lsn + recv->len; - mach_write_to_8(FIL_PAGE_LSN + page, end_lsn); - mach_write_to_8(UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM - + page, end_lsn); - - if (page_zip) { - mach_write_to_8(FIL_PAGE_LSN - + page_zip->data, end_lsn); - } - } - - if (recv->len > RECV_DATA_BLOCK_SIZE) { - mem_free(buf); - } - - recv = UT_LIST_GET_NEXT(rec_list, recv); - } - -#ifdef UNIV_ZIP_DEBUG - if (fil_page_get_type(page) == FIL_PAGE_INDEX) { - page_zip_des_t* page_zip = buf_block_get_page_zip(block); - - ut_a(!page_zip - || page_zip_validate_low(page_zip, page, NULL, FALSE)); - } -#endif /* UNIV_ZIP_DEBUG */ - -#ifndef UNIV_HOTBACKUP - if (modification_to_page) { - ut_a(block); - - log_flush_order_mutex_enter(); - buf_flush_recv_note_modification(block, start_lsn, end_lsn); - log_flush_order_mutex_exit(); - } -#endif /* !UNIV_HOTBACKUP */ - - /* Make sure that committing mtr does not change the modification - lsn values of page */ - - mtr.modifications = FALSE; - - mtr_commit(&mtr); - - ib_time_t time = ut_time(); - - mutex_enter(&(recv_sys->mutex)); - - if (recv_max_page_lsn < page_lsn) { - recv_max_page_lsn = page_lsn; - } - - recv_addr->state = RECV_PROCESSED; - - ut_a(recv_sys->n_addrs > 0); - if (ulint n = --recv_sys->n_addrs) { - if (recv_sys->report(time)) { - ib_logf(IB_LOG_LEVEL_INFO, - "To recover: " ULINTPF " pages from log", n); - sd_notifyf(0, "STATUS=To recover: " ULINTPF - " pages from log", n); - } - } - - mutex_exit(&recv_sys->mutex); -} - -#ifndef UNIV_HOTBACKUP -/*******************************************************************//** -Reads in pages which have hashed log records, from an area around a given -page number. -@return number of pages found */ -static -ulint -recv_read_in_area( -/*==============*/ - ulint space, /*!< in: space */ - ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint page_no)/*!< in: page number */ -{ - recv_addr_t* recv_addr; - ulint page_nos[RECV_READ_AHEAD_AREA]; - ulint low_limit; - ulint n; - - low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA); - - n = 0; - - for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA; - page_no++) { - recv_addr = recv_get_fil_addr_struct(space, page_no); - - if (recv_addr && !buf_page_peek(space, page_no)) { - - mutex_enter(&(recv_sys->mutex)); - - if (recv_addr->state == RECV_NOT_PROCESSED) { - recv_addr->state = RECV_BEING_READ; - - page_nos[n] = page_no; - - n++; - } - - mutex_exit(&(recv_sys->mutex)); - } - } - - buf_read_recv_pages(FALSE, space, zip_size, page_nos, n); - return(n); -} - -/** Apply the hash table of stored log records to persistent data pages. -@param[in] last_batch whether the change buffer merge will be - performed as part of the operation */ -UNIV_INTERN -void -recv_apply_hashed_log_recs(bool last_batch) -{ - for (;;) { - mutex_enter(&recv_sys->mutex); - - if (!recv_sys->apply_batch_on) { - break; - } - - if (recv_sys->found_corrupt_log) { - mutex_exit(&recv_sys->mutex); - return; - } - - mutex_exit(&recv_sys->mutex); - os_thread_sleep(500000); - } - - ut_ad(!last_batch == mutex_own(&log_sys->mutex)); - - if (!last_batch) { - recv_no_ibuf_operations = TRUE; - } - - if (ulint n = recv_sys->n_addrs) { - const char* msg = last_batch - ? "Starting final batch to recover " - : "Starting a batch to recover "; - ib_logf(IB_LOG_LEVEL_INFO, - "%s" ULINTPF " pages from redo log", msg, n); - sd_notifyf(0, "STATUS=%s" ULINTPF " pages from redo log", - msg, n); - } - - recv_sys->apply_log_recs = TRUE; - recv_sys->apply_batch_on = TRUE; - - for (ulint i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) { - for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>( - HASH_GET_FIRST(recv_sys->addr_hash, i)); - recv_addr; - recv_addr = static_cast<recv_addr_t*>( - HASH_GET_NEXT(addr_hash, recv_addr))) { - - ulint space = recv_addr->space; - ulint zip_size = fil_space_get_zip_size(space); - ulint page_no = recv_addr->page_no; - - if (recv_addr->state == RECV_NOT_PROCESSED) { - mutex_exit(&recv_sys->mutex); - - if (buf_page_peek(space, page_no)) { - mtr_t mtr; - mtr_start(&mtr); - buf_block_t* block = buf_page_get( - space, zip_size, page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level( - block, SYNC_NO_ORDER_CHECK); - - recv_recover_page(FALSE, block); - mtr_commit(&mtr); - } else { - recv_read_in_area(space, zip_size, - page_no); - } - - mutex_enter(&recv_sys->mutex); - } - } - } - - /* Wait until all the pages have been processed */ - - while (recv_sys->n_addrs != 0) { - - mutex_exit(&(recv_sys->mutex)); - - if (recv_sys->found_corrupt_log) { - return; - } - - os_thread_sleep(500000); - - mutex_enter(&(recv_sys->mutex)); - } - - if (!last_batch) { - bool success; - - /* Flush all the file pages to disk and invalidate them in - the buffer pool */ - - ut_d(recv_no_log_write = TRUE); - mutex_exit(&(recv_sys->mutex)); - mutex_exit(&(log_sys->mutex)); - - /* Stop the recv_writer thread from issuing any LRU - flush batches. */ - mutex_enter(&recv_sys->writer_mutex); - - /* Wait for any currently run batch to end. */ - buf_flush_wait_LRU_batch_end(); - - success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL); - - ut_a(success); - - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - - buf_pool_invalidate(); - - /* Allow batches from recv_writer thread. */ - mutex_exit(&recv_sys->writer_mutex); - - mutex_enter(&(log_sys->mutex)); - mutex_enter(&(recv_sys->mutex)); - ut_d(recv_no_log_write = FALSE); - - recv_no_ibuf_operations = FALSE; - } - - recv_sys->apply_log_recs = FALSE; - recv_sys->apply_batch_on = FALSE; - - recv_sys_empty_hash(); - - mutex_exit(&recv_sys->mutex); -} -#else /* !UNIV_HOTBACKUP */ -/*******************************************************************//** -Applies log records in the hash table to a backup. */ -UNIV_INTERN -void -recv_apply_log_recs_for_backup(void) -/*================================*/ -{ - recv_addr_t* recv_addr; - ulint n_hash_cells; - buf_block_t* block; - ulint actual_size; - ibool success; - ulint error; - ulint i; - - recv_sys->apply_log_recs = TRUE; - recv_sys->apply_batch_on = TRUE; - - block = back_block1; - - n_hash_cells = hash_get_n_cells(recv_sys->addr_hash); - - for (i = 0; i < n_hash_cells; i++) { - /* The address hash table is externally chained */ - recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node; - - while (recv_addr != NULL) { - - ulint zip_size - = fil_space_get_zip_size(recv_addr->space); - - if (zip_size == ULINT_UNDEFINED) { -#if 0 - fprintf(stderr, - "InnoDB: Warning: cannot apply" - " log record to" - " tablespace %lu page %lu,\n" - "InnoDB: because tablespace with" - " that id does not exist.\n", - recv_addr->space, recv_addr->page_no); -#endif - recv_addr->state = RECV_PROCESSED; - - ut_a(recv_sys->n_addrs); - recv_sys->n_addrs--; - - goto skip_this_recv_addr; - } - - /* We simulate a page read made by the buffer pool, to - make sure the recovery apparatus works ok. We must init - the block. */ - - buf_page_init_for_backup_restore( - recv_addr->space, recv_addr->page_no, - zip_size, block); - - /* Extend the tablespace's last file if the page_no - does not fall inside its bounds; we assume the last - file is auto-extending, and mysqlbackup copied the file - when it still was smaller */ - - success = fil_extend_space_to_desired_size( - &actual_size, - recv_addr->space, recv_addr->page_no + 1); - if (!success) { - fprintf(stderr, - "InnoDB: Fatal error: cannot extend" - " tablespace %u to hold %u pages\n", - recv_addr->space, recv_addr->page_no); - - exit(1); - } - - /* Read the page from the tablespace file using the - fil0fil.cc routines */ - - if (zip_size) { - error = fil_io(OS_FILE_READ, true, - recv_addr->space, zip_size, - recv_addr->page_no, 0, zip_size, - block->page.zip.data, NULL, 0, 0, false); - if (error == DB_SUCCESS - && !buf_zip_decompress(block, TRUE)) { - exit(1); - } - } else { - error = fil_io(OS_FILE_READ, true, - recv_addr->space, 0, - recv_addr->page_no, 0, - UNIV_PAGE_SIZE, - block->frame, NULL, 0, 0, false); - } - - if (error != DB_SUCCESS) { - fprintf(stderr, - "InnoDB: Fatal error: cannot read" - " from tablespace" - " %lu page number %lu\n", - (ulong) recv_addr->space, - (ulong) recv_addr->page_no); - - exit(1); - } - - /* Apply the log records to this page */ - recv_recover_page(FALSE, block); - - /* Write the page back to the tablespace file using the - fil0fil.cc routines */ - - buf_flush_init_for_writing( - block->frame, buf_block_get_page_zip(block), - mach_read_from_8(block->frame + FIL_PAGE_LSN)); - - if (zip_size) { - error = fil_io(OS_FILE_WRITE, true, - recv_addr->space, zip_size, - recv_addr->page_no, 0, - zip_size, - block->page.zip.data, NULL, 0, 0, false); - } else { - error = fil_io(OS_FILE_WRITE, true, - recv_addr->space, 0, - recv_addr->page_no, 0, - UNIV_PAGE_SIZE, - block->frame, NULL, 0, - block->latest_modification, - block->encrypt_later); - } -skip_this_recv_addr: - recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); - } - } - sd_notify(0, "STATUS=InnoDB: Apply batch for backup completed"); - - recv_sys_empty_hash(); -} -#endif /* !UNIV_HOTBACKUP */ - -/*******************************************************************//** -Tries to parse a single log record and returns its length. -@return length of the record, or 0 if the record was not complete */ -UNIV_INTERN -ulint -recv_parse_log_rec( -/*===============*/ - byte* ptr, /*!< in: pointer to a buffer */ - byte* end_ptr,/*!< in: pointer to the buffer end */ - byte* type, /*!< out: type */ - ulint* space, /*!< out: space id */ - ulint* page_no,/*!< out: page number */ - byte** body) /*!< out: log record body start */ -{ - byte* new_ptr; - - *body = NULL; - - if (ptr == end_ptr) { - - return(0); - } - - if (*ptr == MLOG_MULTI_REC_END) { - - *type = *ptr; - - return(1); - } - - if (*ptr == MLOG_DUMMY_RECORD) { - *type = *ptr; - - *space = ULINT_UNDEFINED - 1; /* For debugging */ - - return(1); - } - - new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space, - page_no); - *body = new_ptr; - - if (UNIV_UNLIKELY(!new_ptr)) { - - return(0); - } - -#ifdef UNIV_LOG_LSN_DEBUG - if (*type == MLOG_LSN) { - lsn_t lsn = (lsn_t) *space << 32 | *page_no; -# ifdef UNIV_LOG_DEBUG - ut_a(lsn == log_sys->old_lsn); -# else /* UNIV_LOG_DEBUG */ - ut_a(lsn == recv_sys->recovered_lsn); -# endif /* UNIV_LOG_DEBUG */ - } -#endif /* UNIV_LOG_LSN_DEBUG */ - - byte* old_ptr = new_ptr; - new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr, - NULL, NULL, *space); - if (UNIV_UNLIKELY(new_ptr == NULL)) { - - return(0); - } - - if (*page_no == 0 && *type == MLOG_4BYTES - && mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) { - ulint size; - mach_parse_compressed(old_ptr + 2, end_ptr, &size); - fil_space_set_recv_size(*space, size); - } - - if (*page_no > recv_max_parsed_page_no) { - recv_max_parsed_page_no = *page_no; - } - - return(new_ptr - ptr); -} - -/*******************************************************//** -Calculates the new value for lsn when more data is added to the log. */ -UNIV_INTERN -lsn_t -recv_calc_lsn_on_data_add( -/*======================*/ - lsn_t lsn, /*!< in: old lsn */ - ib_uint64_t len) /*!< in: this many bytes of data is - added, log block headers not included */ -{ - ulint frag_len; - ib_uint64_t lsn_len; - - frag_len = (lsn % OS_FILE_LOG_BLOCK_SIZE) - LOG_BLOCK_HDR_SIZE; - ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - - LOG_BLOCK_TRL_SIZE); - lsn_len = len; - lsn_len += (lsn_len + frag_len) - / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - - LOG_BLOCK_TRL_SIZE) - * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE); - - return(lsn + lsn_len); -} - -#ifdef UNIV_LOG_DEBUG -/*******************************************************//** -Checks that the parser recognizes incomplete initial segments of a log -record as incomplete. */ -static -void -recv_check_incomplete_log_recs( -/*===========================*/ - byte* ptr, /*!< in: pointer to a complete log record */ - ulint len) /*!< in: length of the log record */ -{ - ulint i; - byte type; - ulint space; - ulint page_no; - byte* body; - - for (i = 0; i < len; i++) { - ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space, - &page_no, &body)); - } -} -#endif /* UNIV_LOG_DEBUG */ - -/*******************************************************//** -Prints diagnostic info of corrupt log. */ -static -void -recv_report_corrupt_log( -/*====================*/ - byte* ptr, /*!< in: pointer to corrupt log record */ - byte type, /*!< in: type of the record */ - ulint space, /*!< in: space id, this may also be garbage */ - ulint page_no)/*!< in: page number, this may also be garbage */ -{ - fprintf(stderr, - "InnoDB: ############### CORRUPT LOG RECORD FOUND\n" - "InnoDB: Log record type %lu, space id %lu, page number %lu\n" - "InnoDB: Log parsing proceeded successfully up to " LSN_PF "\n" - "InnoDB: Previous log record type %lu, is multi %lu\n" - "InnoDB: Recv offset %lu, prev %lu\n", - (ulong) type, (ulong) space, (ulong) page_no, - recv_sys->recovered_lsn, - (ulong) recv_previous_parsed_rec_type, - (ulong) recv_previous_parsed_rec_is_multi, - (ulong) (ptr - recv_sys->buf), - (ulong) recv_previous_parsed_rec_offset); - - if ((ulint)(ptr - recv_sys->buf + 100) - > recv_previous_parsed_rec_offset - && (ulint)(ptr - recv_sys->buf + 100 - - recv_previous_parsed_rec_offset) - < 200000) { - fputs("InnoDB: Hex dump of corrupt log starting" - " 100 bytes before the start\n" - "InnoDB: of the previous log rec,\n" - "InnoDB: and ending 100 bytes after the start" - " of the corrupt rec:\n", - stderr); - - ut_print_buf(stderr, - recv_sys->buf - + recv_previous_parsed_rec_offset - 100, - ptr - recv_sys->buf + 200 - - recv_previous_parsed_rec_offset); - putc('\n', stderr); - } - -#ifndef UNIV_HOTBACKUP - if (!srv_force_recovery) { - fputs("InnoDB: Set innodb_force_recovery" - " to ignore this error.\n", stderr); - } -#endif /* !UNIV_HOTBACKUP */ - - fputs("InnoDB: WARNING: the log file may have been corrupt and it\n" - "InnoDB: is possible that the log scan did not proceed\n" - "InnoDB: far enough in recovery! Please run CHECK TABLE\n" - "InnoDB: on your InnoDB tables to check that they are ok!\n" - "InnoDB: If mysqld crashes after this recovery, look at\n" - "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" - "InnoDB: about forcing recovery.\n", stderr); - - fflush(stderr); -} - -/*******************************************************//** -Parses log records from a buffer and stores them to a hash table to wait -merging to file pages. -@return currently always returns FALSE */ -static -ibool -recv_parse_log_recs( -/*================*/ - ibool store_to_hash, /*!< in: TRUE if the records should be stored - to the hash table; this is set to FALSE if just - debug checking is needed */ - dberr_t* err) /*!< out: DB_SUCCESS if successfull, - DB_ERROR if parsing fails. */ -{ - byte* ptr; - byte* end_ptr; - ulint single_rec; - ulint len; - ulint total_len; - lsn_t new_recovered_lsn; - lsn_t old_lsn; - byte type; - ulint space; - ulint page_no; - byte* body; - ulint n_recs; - - ut_ad(mutex_own(&(log_sys->mutex))); - ut_ad(recv_sys->parse_start_lsn != 0); -loop: - ptr = recv_sys->buf + recv_sys->recovered_offset; - - end_ptr = recv_sys->buf + recv_sys->len; - - if (ptr == end_ptr) { - - return(FALSE); - } - - single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG; - - if (single_rec || *ptr == MLOG_DUMMY_RECORD) { - /* The mtr only modified a single page, or this is a file op */ - - old_lsn = recv_sys->recovered_lsn; - - /* Try to parse a log record, fetching its type, space id, - page no, and a pointer to the body of the log record */ - - len = recv_parse_log_rec(ptr, end_ptr, &type, &space, - &page_no, &body); - - if (len == 0 || recv_sys->found_corrupt_log) { - if (recv_sys->found_corrupt_log) { - - recv_report_corrupt_log(ptr, - type, space, page_no); - } - - return(FALSE); - } - - new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len); - - if (new_recovered_lsn > recv_sys->scanned_lsn) { - /* The log record filled a log block, and we require - that also the next log block should have been scanned - in */ - - return(FALSE); - } - - recv_previous_parsed_rec_type = (ulint) type; - recv_previous_parsed_rec_offset = recv_sys->recovered_offset; - recv_previous_parsed_rec_is_multi = 0; - - recv_sys->recovered_offset += len; - recv_sys->recovered_lsn = new_recovered_lsn; - - DBUG_PRINT("ib_log", - ("scan " LSN_PF ": log rec %u len %u " - "page %u:%u", old_lsn, - (unsigned) type, (unsigned) len, - (unsigned) space, (unsigned) page_no)); - - if (type == MLOG_DUMMY_RECORD) { - /* Do nothing */ - - } else if (!store_to_hash) { - /* In debug checking, update a replicate page - according to the log record, and check that it - becomes identical with the original page */ -#ifdef UNIV_LOG_DEBUG - recv_check_incomplete_log_recs(ptr, len); -#endif/* UNIV_LOG_DEBUG */ - - } else if (type == MLOG_FILE_CREATE - || type == MLOG_FILE_CREATE2 - || type == MLOG_FILE_RENAME - || type == MLOG_FILE_DELETE) { - ut_a(space); -#ifdef UNIV_HOTBACKUP - if (recv_replay_file_ops) { - - /* In mysqlbackup --apply-log, replay an .ibd - file operation, if possible; note that - fil_path_to_mysql_datadir is set in mysqlbackup - to point to the datadir we should use there */ - - if (NULL == fil_op_log_parse_or_replay( - body, end_ptr, type, - space, page_no)) { - fprintf(stderr, - "InnoDB: Error: file op" - " log record of type %lu" - " space %lu not complete in\n" - "InnoDB: the replay phase." - " Path %s\n", - (ulint) type, space, - (char*)(body + 2)); - - *err = DB_ERROR; - return(FALSE); - } - } -#endif - /* In normal mysqld crash recovery we do not try to - replay file operations */ -#ifdef UNIV_LOG_LSN_DEBUG - } else if (type == MLOG_LSN) { - /* Do not add these records to the hash table. - The page number and space id fields are misused - for something else. */ -#endif /* UNIV_LOG_LSN_DEBUG */ - } else { - recv_add_to_hash_table(type, space, page_no, body, - ptr + len, old_lsn, - recv_sys->recovered_lsn); - } - } else { - /* Check that all the records associated with the single mtr - are included within the buffer */ - - total_len = 0; - n_recs = 0; - - for (;;) { - len = recv_parse_log_rec(ptr, end_ptr, &type, &space, - &page_no, &body); - if (len == 0 || recv_sys->found_corrupt_log) { - - if (recv_sys->found_corrupt_log) { - - recv_report_corrupt_log( - ptr, type, space, page_no); - } - - return(FALSE); - } - - recv_previous_parsed_rec_type = (ulint) type; - recv_previous_parsed_rec_offset - = recv_sys->recovered_offset + total_len; - recv_previous_parsed_rec_is_multi = 1; - -#ifdef UNIV_LOG_DEBUG - if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) { - recv_check_incomplete_log_recs(ptr, len); - } -#endif /* UNIV_LOG_DEBUG */ - - DBUG_PRINT("ib_log", - ("scan " LSN_PF ": multi-log rec %u " - "len %u page %u:%u", - recv_sys->recovered_lsn, - (unsigned) type, (unsigned) len, - (unsigned) space, (unsigned) page_no)); - - total_len += len; - n_recs++; - - ptr += len; - - if (type == MLOG_MULTI_REC_END) { - - /* Found the end mark for the records */ - - break; - } - } - - new_recovered_lsn = recv_calc_lsn_on_data_add( - recv_sys->recovered_lsn, total_len); - - if (new_recovered_lsn > recv_sys->scanned_lsn) { - /* The log record filled a log block, and we require - that also the next log block should have been scanned - in */ - - return(FALSE); - } - - /* Add all the records to the hash table */ - - ptr = recv_sys->buf + recv_sys->recovered_offset; - - for (;;) { - old_lsn = recv_sys->recovered_lsn; - len = recv_parse_log_rec(ptr, end_ptr, &type, &space, - &page_no, &body); - if (recv_sys->found_corrupt_log) { - - recv_report_corrupt_log(ptr, - type, space, page_no); - } - - ut_a(len != 0); - ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG)); - - recv_sys->recovered_offset += len; - recv_sys->recovered_lsn - = recv_calc_lsn_on_data_add(old_lsn, len); - if (type == MLOG_MULTI_REC_END) { - - /* Found the end mark for the records */ - - break; - } - - if (store_to_hash -#ifdef UNIV_LOG_LSN_DEBUG - && type != MLOG_LSN -#endif /* UNIV_LOG_LSN_DEBUG */ - ) { - recv_add_to_hash_table(type, space, page_no, - body, ptr + len, - old_lsn, - new_recovered_lsn); - } - - ptr += len; - } - } - - goto loop; -} - -/*******************************************************//** -Adds data from a new log block to the parsing buffer of recv_sys if -recv_sys->parse_start_lsn is non-zero. -@return TRUE if more data added */ -static -ibool -recv_sys_add_to_parsing_buf( -/*========================*/ - const byte* log_block, /*!< in: log block */ - lsn_t scanned_lsn) /*!< in: lsn of how far we were able - to find data in this log block */ -{ - ulint more_len; - ulint data_len; - ulint start_offset; - ulint end_offset; - - ut_ad(scanned_lsn >= recv_sys->scanned_lsn); - - if (!recv_sys->parse_start_lsn) { - /* Cannot start parsing yet because no start point for - it found */ - - return(FALSE); - } - - data_len = log_block_get_data_len(log_block); - - if (recv_sys->parse_start_lsn >= scanned_lsn) { - - return(FALSE); - - } else if (recv_sys->scanned_lsn >= scanned_lsn) { - - return(FALSE); - - } else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) { - more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn); - } else { - more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn); - } - - if (more_len == 0) { - - return(FALSE); - } - - ut_ad(data_len >= more_len); - - start_offset = data_len - more_len; - - if (start_offset < LOG_BLOCK_HDR_SIZE) { - start_offset = LOG_BLOCK_HDR_SIZE; - } - - end_offset = data_len; - - if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; - } - - ut_ad(start_offset <= end_offset); - - if (start_offset < end_offset) { - ut_memcpy(recv_sys->buf + recv_sys->len, - log_block + start_offset, end_offset - start_offset); - - recv_sys->len += end_offset - start_offset; - - ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE); - } - - return(TRUE); -} - -/*******************************************************//** -Moves the parsing buffer data left to the buffer start. */ -static -void -recv_sys_justify_left_parsing_buf(void) -/*===================================*/ -{ - ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset, - recv_sys->len - recv_sys->recovered_offset); - - recv_sys->len -= recv_sys->recovered_offset; - - recv_sys->recovered_offset = 0; -} - -/*******************************************************//** -Scans log from a buffer and stores new log data to the parsing buffer. -Parses and hashes the log records if new data found. Unless -UNIV_HOTBACKUP is defined, this function will apply log records -automatically when the hash table becomes full. -@return TRUE if limit_lsn has been reached, or not able to scan any -more in this log group */ -UNIV_INTERN -ibool -recv_scan_log_recs( -/*===============*/ - ulint available_memory,/*!< in: we let the hash table of recs - to grow to this size, at the maximum */ - ibool store_to_hash, /*!< in: TRUE if the records should be - stored to the hash table; this is set - to FALSE if just debug checking is - needed */ - const byte* buf, /*!< in: buffer containing a log - segment or garbage */ - ulint len, /*!< in: buffer length */ - lsn_t start_lsn, /*!< in: buffer start lsn */ - lsn_t* contiguous_lsn, /*!< in/out: it is known that all log - groups contain contiguous log data up - to this lsn */ - lsn_t* group_scanned_lsn,/*!< out: scanning succeeded up to - this lsn */ - dberr_t* err) /*!< out: error code or DB_SUCCESS */ -{ - const byte* log_block; - ulint no; - lsn_t scanned_lsn; - ibool finished; - ulint data_len; - ibool more_data; - bool maybe_encrypted=false; - - ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE); - ut_a(store_to_hash <= TRUE); - - finished = FALSE; - - log_block = buf; - scanned_lsn = start_lsn; - more_data = FALSE; - *err = DB_SUCCESS; - - do { - log_crypt_err_t log_crypt_err; - - no = log_block_get_hdr_no(log_block); - /* - fprintf(stderr, "Log block header no %lu\n", no); - - fprintf(stderr, "Scanned lsn no %lu\n", - log_block_convert_lsn_to_no(scanned_lsn)); - */ - - if (no != log_block_convert_lsn_to_no(scanned_lsn) - || !log_block_checksum_is_ok_or_old_format(log_block, true)) { - - if (no == log_block_convert_lsn_to_no(scanned_lsn) - && !log_block_checksum_is_ok_or_old_format( - log_block, true)) { - fprintf(stderr, - "InnoDB: Log block no %lu at" - " lsn " LSN_PF " has\n" - "InnoDB: ok header, but checksum field" - " contains %lu, should be %lu\n", - (ulong) no, - scanned_lsn, - (ulong) log_block_get_checksum( - log_block), - (ulong) log_block_calc_checksum( - log_block)); - } - - maybe_encrypted = log_crypt_block_maybe_encrypted(log_block, - &log_crypt_err); - - /* Garbage or an incompletely written log block */ - - /* Print checkpoint encryption keys if present */ - log_crypt_print_checkpoint_keys(log_block); - finished = TRUE; - - if (maybe_encrypted) { - /* Log block maybe encrypted finish processing*/ - log_crypt_print_error(log_crypt_err); - *err = DB_ERROR; - return (TRUE); - } - - /* Stop if we encounter a garbage log block */ - if (!srv_force_recovery) { - fputs("InnoDB: Set innodb_force_recovery" - " to ignore this error.\n", stderr); - *err = DB_ERROR; - return (TRUE); - } - - break; - - } - - if (log_block_get_flush_bit(log_block)) { - /* This block was a start of a log flush operation: - we know that the previous flush operation must have - been completed for all log groups before this block - can have been flushed to any of the groups. Therefore, - we know that log data is contiguous up to scanned_lsn - in all non-corrupt log groups. */ - - if (scanned_lsn > *contiguous_lsn) { - *contiguous_lsn = scanned_lsn; - } - } - - data_len = log_block_get_data_len(log_block); - - if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE)) - && scanned_lsn + data_len > recv_sys->scanned_lsn - && (recv_sys->scanned_checkpoint_no > 0) - && (log_block_get_checkpoint_no(log_block) - < recv_sys->scanned_checkpoint_no) - && (recv_sys->scanned_checkpoint_no - - log_block_get_checkpoint_no(log_block) - > 0x80000000UL)) { - - /* Garbage from a log buffer flush which was made - before the most recent database recovery */ - - finished = TRUE; -#ifdef UNIV_LOG_DEBUG - /* This is not really an error, but currently - we stop here in the debug version: */ - - *err = DB_ERROR; - return (TRUE); -#endif - break; - } - - if (!recv_sys->parse_start_lsn - && (log_block_get_first_rec_group(log_block) > 0)) { - - /* We found a point from which to start the parsing - of log records */ - - recv_sys->parse_start_lsn = scanned_lsn - + log_block_get_first_rec_group(log_block); - recv_sys->scanned_lsn = recv_sys->parse_start_lsn; - recv_sys->recovered_lsn = recv_sys->parse_start_lsn; - } - - scanned_lsn += data_len; - - if (scanned_lsn > recv_sys->scanned_lsn) { - - /* We have found more entries. If this scan is - of startup type, we must initiate crash recovery - environment before parsing these log records. */ - -#ifndef UNIV_HOTBACKUP - if (recv_log_scan_is_startup_type - && !recv_needed_recovery) { - if (!srv_read_only_mode) { - ib_logf(IB_LOG_LEVEL_INFO, - "Starting crash recovery from " - "checkpoint LSN=" LSN_PF, - recv_sys->scanned_lsn); - - recv_init_crash_recovery(); - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "innodb_read_only prevents" - " crash recovery"); - recv_needed_recovery = TRUE; - return(TRUE); - } - } -#endif /* !UNIV_HOTBACKUP */ - - /* We were able to find more log data: add it to the - parsing buffer if parse_start_lsn is already - non-zero */ - - if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE - >= RECV_PARSING_BUF_SIZE) { - fprintf(stderr, - "InnoDB: Error: log parsing" - " buffer overflow." - " Recovery may have failed!\n"); - - recv_sys->found_corrupt_log = TRUE; - -#ifndef UNIV_HOTBACKUP - if (!srv_force_recovery) { - fputs("InnoDB: Set" - " innodb_force_recovery" - " to ignore this error.\n", - stderr); - *err = DB_ERROR; - return (TRUE); - } -#endif /* !UNIV_HOTBACKUP */ - - } else if (!recv_sys->found_corrupt_log) { - more_data = recv_sys_add_to_parsing_buf( - log_block, scanned_lsn); - } - - recv_sys->scanned_lsn = scanned_lsn; - recv_sys->scanned_checkpoint_no - = log_block_get_checkpoint_no(log_block); - } - - if (data_len < OS_FILE_LOG_BLOCK_SIZE) { - /* Log data for this group ends here */ - - finished = TRUE; - break; - } else { - log_block += OS_FILE_LOG_BLOCK_SIZE; - } - } while (log_block < buf + len && !finished); - - *group_scanned_lsn = scanned_lsn; - - if (more_data && !recv_sys->found_corrupt_log) { - /* Try to parse more log records */ - - recv_parse_log_recs(store_to_hash, err); - - if (*err != DB_SUCCESS) { - return (TRUE); - } - -#ifndef UNIV_HOTBACKUP - if (store_to_hash - && mem_heap_get_size(recv_sys->heap) > available_memory) { - - /* Hash table of log records has grown too big: - empty it; FALSE means no ibuf operations - allowed, as we cannot add new records to the - log yet: they would be produced by ibuf - operations */ - - recv_apply_hashed_log_recs(false); - } -#endif /* !UNIV_HOTBACKUP */ - - if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) { - /* Move parsing buffer data to the buffer start */ - - recv_sys_justify_left_parsing_buf(); - } - } - - return(finished); -} - -#ifndef UNIV_HOTBACKUP -/*******************************************************//** -Scans log from a buffer and stores new log data to the parsing buffer. Parses -and hashes the log records if new data found. */ -static -void -recv_group_scan_log_recs( -/*=====================*/ - log_group_t* group, /*!< in: log group */ - lsn_t* contiguous_lsn, /*!< in/out: it is known that all log - groups contain contiguous log data up - to this lsn */ - lsn_t* group_scanned_lsn,/*!< out: scanning succeeded up to - this lsn */ - dberr_t* err) /*!< out: error code or DB_SUCCESS */ -{ - ibool finished; - lsn_t start_lsn; - lsn_t end_lsn; - - finished = FALSE; - *err = DB_SUCCESS; - - start_lsn = *contiguous_lsn; - - while (!finished) { - end_lsn = start_lsn + RECV_SCAN_SIZE; - - log_group_read_log_seg(LOG_RECOVER, log_sys->buf, - group, start_lsn, end_lsn, FALSE); - - finished = recv_scan_log_recs( - (buf_pool_get_n_pages() - - (recv_n_pool_free_frames * srv_buf_pool_instances)) - * UNIV_PAGE_SIZE, - TRUE, log_sys->buf, RECV_SCAN_SIZE, - start_lsn, contiguous_lsn, group_scanned_lsn, - err); - - if (*err != DB_SUCCESS) { - break; - } - - start_lsn = end_lsn; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Scanned group %lu up to" - " log sequence number " LSN_PF "\n", - (ulong) group->id, - *group_scanned_lsn); - } -#endif /* UNIV_DEBUG */ -} - -/*******************************************************//** -Initialize crash recovery environment. Can be called iff -recv_needed_recovery == FALSE. */ -static -void -recv_init_crash_recovery(void) -/*==========================*/ -{ - ut_ad(!srv_read_only_mode); - ut_a(!recv_needed_recovery); - - recv_needed_recovery = TRUE; - - fil_load_single_table_tablespaces(); - - /* If we are using the doublewrite method, we will - check if there are half-written pages in data files, - and restore them from the doublewrite buffer if - possible */ - - if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - buf_dblwr_process(); - - /* Spawn the background thread to flush dirty pages - from the buffer pools. */ - recv_writer_thread_active = true; - recv_writer_thread_handle = os_thread_create( - recv_writer_thread, 0, 0); - } -} - -/** Recovers from a checkpoint. When this function returns, the database is able -to start processing of new user transactions, but the function -recv_recovery_from_checkpoint_finish should be called later to complete -the recovery and free the resources used in it. -@param[in] type LOG_CHECKPOINT or LOG_ARCHIVE -@param[in] limit_lsn recover up to this lsn if possible -@param[in] flushed_lsn flushed lsn from first data file -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -recv_recovery_from_checkpoint_start_func( -#ifdef UNIV_LOG_ARCHIVE - ulint type, - lsn_t limit_lsn, -#endif /* UNIV_LOG_ARCHIVE */ - lsn_t flushed_lsn) -{ - log_group_t* group; - log_group_t* max_cp_group; - ulint max_cp_field; - lsn_t checkpoint_lsn; - ib_uint64_t checkpoint_no; - lsn_t group_scanned_lsn = 0; - lsn_t contiguous_lsn; -#ifdef UNIV_LOG_ARCHIVE - log_group_t* up_to_date_group; - lsn_t archived_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - byte* buf; - byte* log_hdr_buf; - byte* log_hdr_buf_base = reinterpret_cast<byte *> - (alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE)); - dberr_t err; - - /* Initialize red-black tree for fast insertions into the - flush_list during recovery process. */ - buf_flush_init_flush_rbt(); - - ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr); - - log_hdr_buf = static_cast<byte *> - (ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE)); - -#ifdef UNIV_LOG_ARCHIVE - ut_ad(type != LOG_CHECKPOINT || limit_lsn == LSN_MAX); -/** TRUE when recovering from a checkpoint */ -# define TYPE_CHECKPOINT (type == LOG_CHECKPOINT) -/** Recover up to this log sequence number */ -# define LIMIT_LSN limit_lsn -#else /* UNIV_LOG_ARCHIVE */ -/** TRUE when recovering from a checkpoint */ -# define TYPE_CHECKPOINT 1 -/** Recover up to this log sequence number */ -# define LIMIT_LSN LSN_MAX -#endif /* UNIV_LOG_ARCHIVE */ - - if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) { - - ib_logf(IB_LOG_LEVEL_INFO, - "The user has set SRV_FORCE_NO_LOG_REDO on, " - "skipping log redo"); - - return(DB_SUCCESS); - } - - recv_recovery_on = TRUE; - - recv_sys->limit_lsn = LIMIT_LSN; - - mutex_enter(&(log_sys->mutex)); - - /* Look for the latest checkpoint from any of the log groups */ - - err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field); - - if (err != DB_SUCCESS) { - - mutex_exit(&(log_sys->mutex)); - - return(err); - } - - log_group_read_checkpoint_info(max_cp_group, max_cp_field); - - buf = log_sys->checkpoint_buf; - - checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN); - checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO); -#ifdef UNIV_LOG_ARCHIVE - archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN); -#endif /* UNIV_LOG_ARCHIVE */ - - /* Read the first log file header to print a note if this is - a recovery from a restored InnoDB Hot Backup */ - - fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0, - 0, 0, LOG_FILE_HDR_SIZE, - log_hdr_buf, max_cp_group, 0); - - if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, - (byte*)"ibbackup", (sizeof "ibbackup") - 1)) { - - if (srv_read_only_mode) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot restore from mysqlbackup, InnoDB " - "running in read-only mode!"); - - return(DB_ERROR); - } - - /* This log file was created by mysqlbackup --restore: print - a note to the user about it */ - - ib_logf(IB_LOG_LEVEL_INFO, - "The log file was created by mysqlbackup --apply-log " - "at %s. The following crash recovery is part of a " - "normal restore.", - log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP); - - /* Wipe over the label now */ - - memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, - ' ', 4); - /* Write to the log file to wipe over the label */ - fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, - max_cp_group->space_id, 0, - 0, 0, OS_FILE_LOG_BLOCK_SIZE, - log_hdr_buf, max_cp_group, 0); - } - -#ifdef UNIV_LOG_ARCHIVE - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - log_checkpoint_get_nth_group_info(buf, group->id, - &(group->archived_file_no)); - - log_archived_get_offset(group, group->archived_file_no, - archived_lsn, &(group->archived_offset)); - - group = UT_LIST_GET_NEXT(log_groups, group); - } -#endif /* UNIV_LOG_ARCHIVE */ - - if (TYPE_CHECKPOINT) { - /* Start reading the log groups from the checkpoint lsn up. The - variable contiguous_lsn contains an lsn up to which the log is - known to be contiguously written to all log groups. */ - recv_sys->parse_start_lsn = checkpoint_lsn; - recv_sys->scanned_lsn = checkpoint_lsn; - recv_sys->scanned_checkpoint_no = 0; - recv_sys->recovered_lsn = checkpoint_lsn; - srv_start_lsn = checkpoint_lsn; - } - - contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn, - OS_FILE_LOG_BLOCK_SIZE); -#ifdef UNIV_LOG_ARCHIVE - if (TYPE_CHECKPOINT) { - up_to_date_group = max_cp_group; - } else { - ulint capacity; - dberr_t err; - - /* Try to recover the remaining part from logs: first from - the logs of the archived group */ - - group = recv_sys->archive_group; - capacity = log_group_get_capacity(group); - - if (recv_sys->scanned_lsn > checkpoint_lsn + capacity - || checkpoint_lsn > recv_sys->scanned_lsn + capacity) { - - mutex_exit(&(log_sys->mutex)); - - /* The group does not contain enough log: probably - an archived log file was missing or corrupt */ - - return(DB_ERROR); - } - - recv_group_scan_log_recs(group, &contiguous_lsn, - &group_scanned_lsn, &err); - - if (err != DB_SUCCESS || recv_sys->scanned_lsn < checkpoint_lsn) { - - mutex_exit(&(log_sys->mutex)); - - /* The group did not contain enough log: an archived - log file was missing or invalid, or the log group - was corrupt */ - - return(DB_ERROR); - } - - group->scanned_lsn = group_scanned_lsn; - up_to_date_group = group; - } -#endif /* UNIV_LOG_ARCHIVE */ - - ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - -#ifdef UNIV_LOG_ARCHIVE - if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) { - group = UT_LIST_GET_NEXT(log_groups, group); - } -#endif /* UNIV_LOG_ARCHIVE */ - - /* Set the flag to publish that we are doing startup scan. */ - recv_log_scan_is_startup_type = TYPE_CHECKPOINT; - while (group) { -#ifdef UNIV_LOG_ARCHIVE - lsn_t old_scanned_lsn = recv_sys->scanned_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - dberr_t err = DB_SUCCESS; - - recv_group_scan_log_recs(group, &contiguous_lsn, - &group_scanned_lsn, &err); - - if (err != DB_SUCCESS) { - return (err); - } - - group->scanned_lsn = group_scanned_lsn; - -#ifdef UNIV_LOG_ARCHIVE - if (old_scanned_lsn < group_scanned_lsn) { - /* We found a more up-to-date group */ - - up_to_date_group = group; - } - - if ((type == LOG_ARCHIVE) - && (group == recv_sys->archive_group)) { - group = UT_LIST_GET_NEXT(log_groups, group); - } -#endif /* UNIV_LOG_ARCHIVE */ - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - /* Done with startup scan. Clear the flag. */ - recv_log_scan_is_startup_type = FALSE; - - if (srv_read_only_mode && recv_needed_recovery) { - return(DB_READ_ONLY); - } - - if (TYPE_CHECKPOINT) { - /* NOTE: we always do a 'recovery' at startup, but only if - there is something wrong we will print a message to the - user about recovery: */ - - if (checkpoint_lsn != flushed_lsn) { - if (!recv_needed_recovery) { - ib_logf(IB_LOG_LEVEL_INFO, - "The log sequence number " - LSN_PF - " in ibdata file do not match" - " the log sequence number " - LSN_PF - " in the ib_logfiles!", - flushed_lsn, - checkpoint_lsn); - - if (!srv_read_only_mode) { - recv_init_crash_recovery(); - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Can't initiate database " - "recovery, running " - "in read-only-mode."); - return(DB_READ_ONLY); - } - } - } - } - - /* We currently have only one log group */ - if (group_scanned_lsn < checkpoint_lsn - || group_scanned_lsn < recv_max_page_lsn) { - ib_logf(IB_LOG_LEVEL_ERROR, - "We scanned the log up to " - LSN_PF ". A checkpoint was at " LSN_PF - " and the maximum LSN on a database page was " LSN_PF - ". It is possible that the database is now corrupt!", - group_scanned_lsn, checkpoint_lsn, recv_max_page_lsn); - } - - if (recv_sys->recovered_lsn < checkpoint_lsn) { - - mutex_exit(&(log_sys->mutex)); - - if (recv_sys->recovered_lsn >= LIMIT_LSN) { - - return(DB_SUCCESS); - } - - /* No harm in trying to do RO access. */ - if (!srv_read_only_mode) { - return (DB_READ_ONLY); - } - - return(DB_ERROR); - } - - /* Synchronize the uncorrupted log groups to the most up-to-date log - group; we also copy checkpoint info to groups */ - - log_sys->next_checkpoint_lsn = checkpoint_lsn; - log_sys->next_checkpoint_no = checkpoint_no + 1; - /* here the checkpoint info is written without any redo logging ongoing - * and next_checkpoint_no is updated directly hence no +1 */ - log_crypt_set_ver_and_key(log_sys->next_checkpoint_no); - -#ifdef UNIV_LOG_ARCHIVE - log_sys->archived_lsn = archived_lsn; - - recv_synchronize_groups(up_to_date_group); -#else /* UNIV_LOG_ARCHIVE */ - recv_synchronize_groups(); -#endif /* UNIV_LOG_ARCHIVE */ - - if (!recv_needed_recovery) { - ut_a(checkpoint_lsn == recv_sys->recovered_lsn); - } else { - srv_start_lsn = recv_sys->recovered_lsn; - } - - log_sys->lsn = recv_sys->recovered_lsn; - - ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE); - - log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE; - log_sys->buf_next_to_write = log_sys->buf_free; - log_sys->written_to_some_lsn = log_sys->lsn; - log_sys->written_to_all_lsn = log_sys->lsn; - - log_sys->last_checkpoint_lsn = checkpoint_lsn; - - MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, - log_sys->lsn - log_sys->last_checkpoint_lsn); - - log_sys->next_checkpoint_no = checkpoint_no + 1; - log_crypt_set_ver_and_key(log_sys->next_checkpoint_no); - -#ifdef UNIV_LOG_ARCHIVE - if (archived_lsn == LSN_MAX) { - - log_sys->archiving_state = LOG_ARCH_OFF; - } -#endif /* UNIV_LOG_ARCHIVE */ - - mutex_enter(&recv_sys->mutex); - - recv_sys->apply_log_recs = TRUE; - - mutex_exit(&recv_sys->mutex); - - mutex_exit(&log_sys->mutex); - - recv_lsn_checks_on = TRUE; - - /* The database is now ready to start almost normal processing of user - transactions: transaction rollbacks and the application of the log - records in the hash table can be run in background. */ - - return(DB_SUCCESS); - -#undef TYPE_CHECKPOINT -#undef LIMIT_LSN -} - -/********************************************************//** -Completes recovery from a checkpoint. */ -UNIV_INTERN -void -recv_recovery_from_checkpoint_finish(void) -/*======================================*/ -{ - if (recv_needed_recovery) { - trx_sys_print_mysql_master_log_pos(); - trx_sys_print_mysql_binlog_offset(); - } - - if (recv_sys->found_corrupt_log) { - - fprintf(stderr, - "InnoDB: WARNING: the log file may have been" - " corrupt and it\n" - "InnoDB: is possible that the log scan or parsing" - " did not proceed\n" - "InnoDB: far enough in recovery. Please run" - " CHECK TABLE\n" - "InnoDB: on your InnoDB tables to check that" - " they are ok!\n" - "InnoDB: It may be safest to recover your" - " InnoDB database from\n" - "InnoDB: a backup!\n"); - } - - /* Make sure that the recv_writer thread is done. This is - required because it grabs various mutexes and we want to - ensure that when we enable sync_order_checks there is no - mutex currently held by any thread. */ - mutex_enter(&recv_sys->writer_mutex); - - /* Free the resources of the recovery system */ - recv_recovery_on = FALSE; - - /* By acquring the mutex we ensure that the recv_writer thread - won't trigger any more LRU batchtes. Now wait for currently - in progress batches to finish. */ - buf_flush_wait_LRU_batch_end(); - - mutex_exit(&recv_sys->writer_mutex); - - ulint count = 0; - while (recv_writer_thread_active) { - ++count; - os_thread_sleep(100000); - if (srv_print_verbose_log && count > 600) { - ib_logf(IB_LOG_LEVEL_INFO, - "Waiting for recv_writer to " - "finish flushing of buffer pool"); - count = 0; - } - } - -#ifdef __WIN__ - if (recv_writer_thread_handle) { - CloseHandle(recv_writer_thread_handle); - recv_writer_thread_handle = 0; - } -#endif /* __WIN__ */ - -#ifndef UNIV_LOG_DEBUG - recv_sys_debug_free(); -#endif - /* Free up the flush_rbt. */ - buf_flush_free_flush_rbt(); - - /* Roll back any recovered data dictionary transactions, so - that the data dictionary tables will be free of any locks. - The data dictionary latch should guarantee that there is at - most one data dictionary transaction active at a time. */ - if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { - trx_rollback_or_clean_recovered(FALSE); - } -} - -/********************************************************//** -Initiates the rollback of active transactions. */ -UNIV_INTERN -void -recv_recovery_rollback_active(void) -/*===============================*/ -{ -#ifdef UNIV_SYNC_DEBUG - /* Wait for a while so that created threads have time to suspend - themselves before we switch the latching order checks on */ - os_thread_sleep(1000000); - - ut_ad(!recv_writer_thread_active); - - /* Switch latching order checks on in sync0sync.cc */ - sync_order_checks_on = TRUE; -#endif - /* We can't start any (DDL) transactions if UNDO logging - has been disabled, additionally disable ROLLBACK of recovered - user transactions. */ - if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO - && !srv_read_only_mode) { - - /* Drop partially created indexes. */ - row_merge_drop_temp_indexes(); - /* Drop temporary tables. */ - row_mysql_drop_temp_tables(); - - /* Drop any auxiliary tables that were not dropped when the - parent table was dropped. This can happen if the parent table - was dropped but the server crashed before the auxiliary tables - were dropped. */ - fts_drop_orphaned_tables(); - - /* Rollback the uncommitted transactions which have no user - session */ - - trx_rollback_or_clean_is_active = true; - os_thread_create(trx_rollback_or_clean_all_recovered, 0, 0); - } -} - -/******************************************************//** -Resets the logs. The contents of log files will be lost! */ -UNIV_INTERN -void -recv_reset_logs( -/*============*/ -#ifdef UNIV_LOG_ARCHIVE - ulint arch_log_no, /*!< in: next archived log file number */ - ibool new_logs_created,/*!< in: TRUE if resetting logs - is done at the log creation; - FALSE if it is done after - archive recovery */ -#endif /* UNIV_LOG_ARCHIVE */ - lsn_t lsn) /*!< in: reset to this lsn - rounded up to be divisible by - OS_FILE_LOG_BLOCK_SIZE, after - which we add - LOG_BLOCK_HDR_SIZE */ -{ - log_group_t* group; - - ut_ad(mutex_own(&(log_sys->mutex))); - - log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - group->lsn = log_sys->lsn; - group->lsn_offset = LOG_FILE_HDR_SIZE; -#ifdef UNIV_LOG_ARCHIVE - group->archived_file_no = arch_log_no; - group->archived_offset = 0; - - if (!new_logs_created) { - recv_truncate_group(group, group->lsn, group->lsn, - group->lsn, group->lsn); - } -#endif /* UNIV_LOG_ARCHIVE */ - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - log_sys->buf_next_to_write = 0; - log_sys->written_to_some_lsn = log_sys->lsn; - log_sys->written_to_all_lsn = log_sys->lsn; - - log_sys->next_checkpoint_no = 0; - log_sys->last_checkpoint_lsn = 0; - -#ifdef UNIV_LOG_ARCHIVE - log_sys->archived_lsn = log_sys->lsn; -#endif /* UNIV_LOG_ARCHIVE */ - - log_sys->tracked_lsn = log_sys->lsn; - - log_block_init(log_sys->buf, log_sys->lsn); - log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); - - log_sys->buf_free = LOG_BLOCK_HDR_SIZE; - log_sys->lsn += LOG_BLOCK_HDR_SIZE; - - MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, - (log_sys->lsn - log_sys->last_checkpoint_lsn)); - - mutex_exit(&(log_sys->mutex)); - - /* Reset the checkpoint fields in logs */ - - log_make_checkpoint_at(LSN_MAX, TRUE); - - mutex_enter(&(log_sys->mutex)); -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_HOTBACKUP -/******************************************************//** -Creates new log files after a backup has been restored. */ -UNIV_INTERN -void -recv_reset_log_files_for_backup( -/*============================*/ - const char* log_dir, /*!< in: log file directory path */ - ulint n_log_files, /*!< in: number of log files */ - lsn_t log_file_size, /*!< in: log file size */ - lsn_t lsn) /*!< in: new start lsn, must be - divisible by OS_FILE_LOG_BLOCK_SIZE */ -{ - os_file_t log_file; - ibool success; - byte* buf; - ulint i; - ulint log_dir_len; - char name[5000]; - static const char ib_logfile_basename[] = "ib_logfile"; - - log_dir_len = strlen(log_dir); - /* full path name of ib_logfile consists of log dir path + basename - + number. This must fit in the name buffer. - */ - ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name)); - - buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE); - memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE); - - for (i = 0; i < n_log_files; i++) { - - sprintf(name, "%s%s%lu", log_dir, - ib_logfile_basename, (ulong) i); - - log_file = os_file_create_simple(innodb_file_log_key, - name, OS_FILE_CREATE, - OS_FILE_READ_WRITE, - &success); - if (!success) { - fprintf(stderr, - "InnoDB: Cannot create %s. Check that" - " the file does not exist yet.\n", name); - - exit(1); - } - - fprintf(stderr, - "Setting log file size to %llu\n", - log_file_size); - - success = os_file_set_size(name, log_file, log_file_size); - - if (!success) { - fprintf(stderr, - "InnoDB: Cannot set %s size to %llu\n", - name, log_file_size); - exit(1); - } - - os_file_flush(log_file); - os_file_close(log_file); - } - - /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */ - - log_reset_first_header_and_checkpoint(buf, lsn); - - log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn); - log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE, - LOG_BLOCK_HDR_SIZE); - sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0); - - log_file = os_file_create_simple(innodb_file_log_key, - name, OS_FILE_OPEN, - OS_FILE_READ_WRITE, &success); - if (!success) { - fprintf(stderr, "InnoDB: Cannot open %s.\n", name); - - exit(1); - } - - os_file_write(name, log_file, buf, 0, - LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE); - os_file_flush(log_file); - os_file_close(log_file); - - ut_free(buf); -} -#endif /* UNIV_HOTBACKUP */ - -/******************************************************//** -Checks the 4-byte checksum to the trailer checksum field of a log -block. We also accept a log block in the old format before -InnoDB-3.23.52 where the checksum field contains the log block number. -@return TRUE if ok, or if the log block may be in the format of InnoDB -version predating 3.23.52 */ -UNIV_INTERN -ibool -log_block_checksum_is_ok_or_old_format( -/*===================================*/ - const byte* block) /*!< in: pointer to a log block */ -{ -#ifdef UNIV_LOG_DEBUG - return(TRUE); -#endif /* UNIV_LOG_DEBUG */ - - ulint block_checksum = log_block_get_checksum(block); - - if (UNIV_LIKELY(srv_log_checksum_algorithm == - SRV_CHECKSUM_ALGORITHM_NONE || - log_block_calc_checksum(block) == block_checksum)) { - - return(TRUE); - } - - if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 || - srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB || - srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) { - - const char* algo = NULL; - - ib_logf(IB_LOG_LEVEL_ERROR, - "log block checksum mismatch: expected " ULINTPF ", " - "calculated checksum " ULINTPF, - block_checksum, - log_block_calc_checksum(block)); - - if (block_checksum == LOG_NO_CHECKSUM_MAGIC) { - - algo = "none"; - } else if (block_checksum == - log_block_calc_checksum_crc32(block)) { - - algo = "crc32"; - } else if (block_checksum == - log_block_calc_checksum_innodb(block)) { - - algo = "innodb"; - } - - if (algo) { - - const char* current_algo; - - current_algo = buf_checksum_algorithm_name( - (srv_checksum_algorithm_t) - srv_log_checksum_algorithm); - - ib_logf(IB_LOG_LEVEL_ERROR, - "current InnoDB log checksum type: %s, " - "detected log checksum type: %s", - current_algo, - algo); - } - - ib_logf(IB_LOG_LEVEL_FATAL, - "STRICT method was specified for innodb_log_checksum, " - "so we intentionally assert here."); - } - - ut_ad(srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_CRC32 || - srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB); - - if (block_checksum == LOG_NO_CHECKSUM_MAGIC || - block_checksum == log_block_calc_checksum_crc32(block) || - block_checksum == log_block_calc_checksum_innodb(block)) { - - return(TRUE); - } - - if (log_block_get_hdr_no(block) == block_checksum) { - - /* We assume the log block is in the format of - InnoDB version < 3.23.52 and the block is ok */ -#if 0 - fprintf(stderr, - "InnoDB: Scanned old format < InnoDB-3.23.52" - " log block number %lu\n", - log_block_get_hdr_no(block)); -#endif - return(TRUE); - } - - return(FALSE); -} - -void recv_dblwr_t::add(byte* page) -{ - pages.push_back(page); -} - -byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no) -{ - std::vector<byte*> matches; - byte* result = 0; - - for (std::list<byte*>::iterator i = pages.begin(); - i != pages.end(); ++i) { - - if ((page_get_space_id(*i) == space_id) - && (page_get_page_no(*i) == page_no)) { - matches.push_back(*i); - } - } - - if (matches.size() == 1) { - result = matches[0]; - } else if (matches.size() > 1) { - - lsn_t max_lsn = 0; - lsn_t page_lsn = 0; - - for (std::vector<byte*>::iterator i = matches.begin(); - i != matches.end(); ++i) { - - page_lsn = mach_read_from_8(*i + FIL_PAGE_LSN); - - if (page_lsn > max_lsn) { - max_lsn = page_lsn; - result = *i; - } - } - } - - return(result); -} diff --git a/storage/xtradb/mach/mach0data.cc b/storage/xtradb/mach/mach0data.cc deleted file mode 100644 index feeedb01609..00000000000 --- a/storage/xtradb/mach/mach0data.cc +++ /dev/null @@ -1,125 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file mach/mach0data.cc -Utilities for converting data from the database file -to the machine format. - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#include "mach0data.h" - -#ifdef UNIV_NONINL -#include "mach0data.ic" -#endif - -/*********************************************************//** -Reads a ulint in a compressed form if the log record fully contains it. -@return pointer to end of the stored field, NULL if not complete */ -UNIV_INTERN -byte* -mach_parse_compressed( -/*==================*/ - byte* ptr, /*!< in: pointer to buffer from where to read */ - byte* end_ptr,/*!< in: pointer to end of the buffer */ - ulint* val) /*!< out: read value (< 2^32) */ -{ - ulint flag; - - ut_ad(ptr && end_ptr && val); - - if (ptr >= end_ptr) { - - return(NULL); - } - - flag = mach_read_from_1(ptr); - - if (flag < 0x80UL) { - *val = flag; - return(ptr + 1); - } - - /* Workaround GCC bug - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77673: - the compiler moves mach_read_from_4 right to the beginning of the - function, causing and out-of-bounds read if we are reading a short - integer close to the end of buffer. */ -#if defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__clang__) -#define DEPLOY_FENCE -#endif - -#ifdef DEPLOY_FENCE - __atomic_thread_fence(__ATOMIC_ACQUIRE); -#endif - - if (flag < 0xC0UL) { - if (end_ptr < ptr + 2) { - return(NULL); - } - - *val = mach_read_from_2(ptr) & 0x7FFFUL; - - return(ptr + 2); - } - -#ifdef DEPLOY_FENCE - __atomic_thread_fence(__ATOMIC_ACQUIRE); -#endif - - if (flag < 0xE0UL) { - if (end_ptr < ptr + 3) { - return(NULL); - } - - *val = mach_read_from_3(ptr) & 0x3FFFFFUL; - - return(ptr + 3); - } - -#ifdef DEPLOY_FENCE - __atomic_thread_fence(__ATOMIC_ACQUIRE); -#endif - - if (flag < 0xF0UL) { - if (end_ptr < ptr + 4) { - return(NULL); - } - - *val = mach_read_from_4(ptr) & 0x1FFFFFFFUL; - - return(ptr + 4); - } - -#ifdef DEPLOY_FENCE - __atomic_thread_fence(__ATOMIC_ACQUIRE); -#endif - -#undef DEPLOY_FENCE - - ut_ad(flag == 0xF0UL); - - if (end_ptr < ptr + 5) { - return(NULL); - } - - *val = mach_read_from_4(ptr + 1); - return(ptr + 5); -} diff --git a/storage/xtradb/mem/mem0dbg.cc b/storage/xtradb/mem/mem0dbg.cc deleted file mode 100644 index a77785a369a..00000000000 --- a/storage/xtradb/mem/mem0dbg.cc +++ /dev/null @@ -1,1050 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file mem/mem0dbg.cc -The memory management: the debug code. This is not a compilation module, -but is included in mem0mem.* ! - -Created 6/9/1994 Heikki Tuuri -*************************************************************************/ - -#ifdef UNIV_MEM_DEBUG -# ifndef UNIV_HOTBACKUP -# include "ha_prototypes.h" -/* The mutex which protects in the debug version the hash table -containing the list of live memory heaps, and also the global -variables below. */ -UNIV_INTERN ib_mutex_t mem_hash_mutex; - -#ifdef UNIV_PFS_MUTEX -/* Key to register mem_hash_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t mem_hash_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -# endif /* !UNIV_HOTBACKUP */ - -/* The following variables contain information about the -extent of memory allocations. Only used in the debug version. -Protected by mem_hash_mutex above. */ - -static ulint mem_n_created_heaps = 0; -static ulint mem_n_allocations = 0; -static ulint mem_total_allocated_memory = 0; -UNIV_INTERN ulint mem_current_allocated_memory = 0; -static ulint mem_max_allocated_memory = 0; -# ifndef UNIV_HOTBACKUP -static ulint mem_last_print_info = 0; -static ibool mem_hash_initialized = FALSE; -# endif /* !UNIV_HOTBACKUP */ - -/* Size of the hash table for memory management tracking */ -#define MEM_HASH_SIZE 997 - -/* The node of the list containing currently allocated memory heaps */ - -struct mem_hash_node_t { - UT_LIST_NODE_T(mem_hash_node_t) - list; /*!< hash list node */ - mem_heap_t* heap; /*!< memory heap */ - const char* file_name;/* file where heap was created*/ - ulint line; /*!< file line of creation */ - ulint nth_heap;/* this is the nth heap created */ - UT_LIST_NODE_T(mem_hash_node_t) - all_list;/* list of all created heaps */ -}; - -typedef UT_LIST_BASE_NODE_T(mem_hash_node_t) mem_hash_cell_t; - -/* The hash table of allocated heaps */ -static mem_hash_cell_t mem_hash_table[MEM_HASH_SIZE]; - -/* The base node of the list of all allocated heaps */ -static mem_hash_cell_t mem_all_list_base; - - - -UNIV_INLINE -mem_hash_cell_t* -mem_hash_get_nth_cell(ulint i); - -/* Accessor function for the hash table. Returns a pointer to the -table cell. */ -UNIV_INLINE -mem_hash_cell_t* -mem_hash_get_nth_cell(ulint i) -{ - ut_a(i < MEM_HASH_SIZE); - - return(&(mem_hash_table[i])); -} - -/* Accessor functions for a memory field in the debug version */ -UNIV_INTERN -void -mem_field_header_set_len(byte* field, ulint len) -{ - mach_write_to_4(field - 2 * sizeof(ulint), len); -} - -UNIV_INTERN -ulint -mem_field_header_get_len(byte* field) -{ - return(mach_read_from_4(field - 2 * sizeof(ulint))); -} - -UNIV_INTERN -void -mem_field_header_set_check(byte* field, ulint check) -{ - mach_write_to_4(field - sizeof(ulint), check); -} - -UNIV_INTERN -ulint -mem_field_header_get_check(byte* field) -{ - return(mach_read_from_4(field - sizeof(ulint))); -} - -UNIV_INTERN -void -mem_field_trailer_set_check(byte* field, ulint check) -{ - mach_write_to_4(field + mem_field_header_get_len(field), check); -} - -UNIV_INTERN -ulint -mem_field_trailer_get_check(byte* field) -{ - return(mach_read_from_4(field - + mem_field_header_get_len(field))); -} -#endif /* UNIV_MEM_DEBUG */ - -#ifndef UNIV_HOTBACKUP -/******************************************************************//** -Initializes the memory system. */ -UNIV_INTERN -void -mem_init( -/*=====*/ - ulint size) /*!< in: common pool size in bytes */ -{ -#ifdef UNIV_MEM_DEBUG - - ulint i; - - /* Initialize the hash table */ - ut_a(FALSE == mem_hash_initialized); - - mutex_create(mem_hash_mutex_key, &mem_hash_mutex, SYNC_MEM_HASH); - - for (i = 0; i < MEM_HASH_SIZE; i++) { - UT_LIST_INIT(*mem_hash_get_nth_cell(i)); - } - - UT_LIST_INIT(mem_all_list_base); - - mem_hash_initialized = TRUE; -#endif - - if (UNIV_LIKELY(srv_use_sys_malloc)) { - /* When innodb_use_sys_malloc is set, the - mem_comm_pool won't be used for any allocations. We - create a dummy mem_comm_pool, because some statistics - and debugging code relies on it being initialized. */ - size = 1; - } - - mem_comm_pool = mem_pool_create(size); -} - -/******************************************************************//** -Closes the memory system. */ -UNIV_INTERN -void -mem_close(void) -/*===========*/ -{ - mem_pool_free(mem_comm_pool); - mem_comm_pool = NULL; -#ifdef UNIV_MEM_DEBUG - mutex_free(&mem_hash_mutex); - mem_hash_initialized = FALSE; -#endif /* UNIV_MEM_DEBUG */ -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_MEM_DEBUG -/******************************************************************//** -Initializes an allocated memory field in the debug version. */ -UNIV_INTERN -void -mem_field_init( -/*===========*/ - byte* buf, /*!< in: memory field */ - ulint n) /*!< in: how many bytes the user requested */ -{ - ulint rnd; - byte* usr_buf; - - usr_buf = buf + MEM_FIELD_HEADER_SIZE; - - /* In the debug version write the length field and the - check fields to the start and the end of the allocated storage. - The field header consists of a length field and - a random number field, in this order. The field trailer contains - the same random number as a check field. */ - - mem_field_header_set_len(usr_buf, n); - - rnd = ut_rnd_gen_ulint(); - - mem_field_header_set_check(usr_buf, rnd); - mem_field_trailer_set_check(usr_buf, rnd); - - /* Update the memory allocation information */ - - mutex_enter(&mem_hash_mutex); - - mem_total_allocated_memory += n; - mem_current_allocated_memory += n; - mem_n_allocations++; - - if (mem_current_allocated_memory > mem_max_allocated_memory) { - mem_max_allocated_memory = mem_current_allocated_memory; - } - - mutex_exit(&mem_hash_mutex); - - /* In the debug version set the buffer to a random - combination of 0xBA and 0xBE */ - - mem_init_buf(usr_buf, n); -} - -/******************************************************************//** -Erases an allocated memory field in the debug version. */ -UNIV_INTERN -void -mem_field_erase( -/*============*/ - byte* buf, /*!< in: memory field */ - ulint n MY_ATTRIBUTE((unused))) - /*!< in: how many bytes the user requested */ -{ - byte* usr_buf; - - usr_buf = buf + MEM_FIELD_HEADER_SIZE; - - mutex_enter(&mem_hash_mutex); - mem_current_allocated_memory -= n; - mutex_exit(&mem_hash_mutex); - - /* Check that the field lengths agree */ - ut_ad(n == (ulint) mem_field_header_get_len(usr_buf)); - - /* In the debug version, set the freed space to a random - combination of 0xDE and 0xAD */ - - mem_erase_buf(buf, MEM_SPACE_NEEDED(n)); -} - -/***************************************************************//** -Initializes a buffer to a random combination of hex BA and BE. -Used to initialize allocated memory. */ -UNIV_INTERN -void -mem_init_buf( -/*=========*/ - byte* buf, /*!< in: pointer to buffer */ - ulint n) /*!< in: length of buffer */ -{ - byte* ptr; - - UNIV_MEM_ASSERT_W(buf, n); - - for (ptr = buf; ptr < buf + n; ptr++) { - - if (ut_rnd_gen_ibool()) { - *ptr = 0xBA; - } else { - *ptr = 0xBE; - } - } - - UNIV_MEM_INVALID(buf, n); -} - -/***************************************************************//** -Initializes a buffer to a random combination of hex DE and AD. -Used to erase freed memory. */ -UNIV_INTERN -void -mem_erase_buf( -/*==========*/ - byte* buf, /*!< in: pointer to buffer */ - ulint n) /*!< in: length of buffer */ -{ - byte* ptr; - - UNIV_MEM_ASSERT_W(buf, n); - - for (ptr = buf; ptr < buf + n; ptr++) { - if (ut_rnd_gen_ibool()) { - *ptr = 0xDE; - } else { - *ptr = 0xAD; - } - } - - UNIV_MEM_FREE(buf, n); -} - -/***************************************************************//** -Inserts a created memory heap to the hash table of current allocated -memory heaps. */ -UNIV_INTERN -void -mem_hash_insert( -/*============*/ - mem_heap_t* heap, /*!< in: the created heap */ - const char* file_name, /*!< in: file name of creation */ - ulint line) /*!< in: line where created */ -{ - mem_hash_node_t* new_node; - ulint cell_no ; - - ut_ad(mem_heap_check(heap)); - - mutex_enter(&mem_hash_mutex); - - cell_no = ut_hash_ulint((ulint) heap, MEM_HASH_SIZE); - - /* Allocate a new node to the list */ - new_node = static_cast<mem_hash_node_t*>(ut_malloc(sizeof(*new_node))); - - new_node->heap = heap; - new_node->file_name = file_name; - new_node->line = line; - new_node->nth_heap = mem_n_created_heaps; - - /* Insert into lists */ - UT_LIST_ADD_FIRST(list, *mem_hash_get_nth_cell(cell_no), new_node); - - UT_LIST_ADD_LAST(all_list, mem_all_list_base, new_node); - - mem_n_created_heaps++; - - mutex_exit(&mem_hash_mutex); -} - -/***************************************************************//** -Removes a memory heap (which is going to be freed by the caller) -from the list of live memory heaps. Returns the size of the heap -in terms of how much memory in bytes was allocated for the user of -the heap (not the total space occupied by the heap). -Also validates the heap. -NOTE: This function does not free the storage occupied by the -heap itself, only the node in the list of heaps. */ -UNIV_INTERN -void -mem_hash_remove( -/*============*/ - mem_heap_t* heap, /*!< in: the heap to be freed */ - const char* file_name, /*!< in: file name of freeing */ - ulint line) /*!< in: line where freed */ -{ - mem_hash_node_t* node; - ulint cell_no; - ibool error; - ulint size; - - ut_ad(mem_heap_check(heap)); - - mutex_enter(&mem_hash_mutex); - - cell_no = ut_hash_ulint((ulint) heap, MEM_HASH_SIZE); - - /* Look for the heap in the hash table list */ - node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no)); - - while (node != NULL) { - if (node->heap == heap) { - - break; - } - - node = UT_LIST_GET_NEXT(list, node); - } - - if (node == NULL) { - fprintf(stderr, - "Memory heap or buffer freed in %s line %lu" - " did not exist.\n", - innobase_basename(file_name), (ulong) line); - ut_error; - } - - /* Remove from lists */ - UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node); - - UT_LIST_REMOVE(all_list, mem_all_list_base, node); - - /* Validate the heap which will be freed */ - mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size, - NULL, NULL); - if (error) { - fprintf(stderr, - "Inconsistency in memory heap or" - " buffer n:o %lu created\n" - "in %s line %lu and tried to free in %s line %lu.\n" - "Hex dump of 400 bytes around memory heap" - " first block start:\n", - node->nth_heap, - innobase_basename(node->file_name), (ulong) node->line, - innobase_basename(file_name), (ulong) line); - ut_print_buf(stderr, (byte*) node->heap - 200, 400); - fputs("\nDump of the mem heap:\n", stderr); - mem_heap_validate_or_print(node->heap, NULL, TRUE, &error, - &size, NULL, NULL); - ut_error; - } - - /* Free the memory occupied by the node struct */ - ut_free(node); - - mem_current_allocated_memory -= size; - - mutex_exit(&mem_hash_mutex); -} -#endif /* UNIV_MEM_DEBUG */ - -#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG -/***************************************************************//** -Checks a memory heap for consistency and prints the contents if requested. -Outputs the sum of sizes of buffers given to the user (only in -the debug version), the physical size of the heap and the number of -blocks in the heap. In case of error returns 0 as sizes and number -of blocks. */ -UNIV_INTERN -void -mem_heap_validate_or_print( -/*=======================*/ - mem_heap_t* heap, /*!< in: memory heap */ - byte* top MY_ATTRIBUTE((unused)), - /*!< in: calculate and validate only until - this top pointer in the heap is reached, - if this pointer is NULL, ignored */ - ibool print, /*!< in: if TRUE, prints the contents - of the heap; works only in - the debug version */ - ibool* error, /*!< out: TRUE if error */ - ulint* us_size,/*!< out: allocated memory - (for the user) in the heap, - if a NULL pointer is passed as this - argument, it is ignored; in the - non-debug version this is always -1 */ - ulint* ph_size,/*!< out: physical size of the heap, - if a NULL pointer is passed as this - argument, it is ignored */ - ulint* n_blocks) /*!< out: number of blocks in the heap, - if a NULL pointer is passed as this - argument, it is ignored */ -{ - mem_block_t* block; - ulint total_len = 0; - ulint block_count = 0; - ulint phys_len = 0; -#ifdef UNIV_MEM_DEBUG - ulint len; - byte* field; - byte* user_field; - ulint check_field; -#endif - - /* Pessimistically, we set the parameters to error values */ - if (us_size != NULL) { - *us_size = 0; - } - if (ph_size != NULL) { - *ph_size = 0; - } - if (n_blocks != NULL) { - *n_blocks = 0; - } - *error = TRUE; - - block = heap; - - if (block->magic_n != MEM_BLOCK_MAGIC_N) { - return; - } - - if (print) { - fputs("Memory heap:", stderr); - } - - while (block != NULL) { - phys_len += mem_block_get_len(block); - - if ((block->type == MEM_HEAP_BUFFER) - && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) { - - fprintf(stderr, - "InnoDB: Error: mem block %p" - " length %lu > UNIV_PAGE_SIZE\n", - (void*) block, - (ulong) mem_block_get_len(block)); - /* error */ - - return; - } - -#ifdef UNIV_MEM_DEBUG - /* We can trace the fields of the block only in the debug - version */ - if (print) { - fprintf(stderr, " Block %ld:", block_count); - } - - field = (byte*) block + mem_block_get_start(block); - - if (top && (field == top)) { - - goto completed; - } - - while (field < (byte*) block + mem_block_get_free(block)) { - - /* Calculate the pointer to the storage - which was given to the user */ - - user_field = field + MEM_FIELD_HEADER_SIZE; - - len = mem_field_header_get_len(user_field); - - if (print) { - ut_print_buf(stderr, user_field, len); - putc('\n', stderr); - } - - total_len += len; - check_field = mem_field_header_get_check(user_field); - - if (check_field - != mem_field_trailer_get_check(user_field)) { - /* error */ - - fprintf(stderr, - "InnoDB: Error: block %lx mem" - " field %lx len %lu\n" - "InnoDB: header check field is" - " %lx but trailer %lx\n", - (ulint) block, - (ulint) field, len, check_field, - mem_field_trailer_get_check( - user_field)); - - return; - } - - /* Move to next field */ - field = field + MEM_SPACE_NEEDED(len); - - if (top && (field == top)) { - - goto completed; - } - - } - - /* At the end check that we have arrived to the first free - position */ - - if (field != (byte*) block + mem_block_get_free(block)) { - /* error */ - - fprintf(stderr, - "InnoDB: Error: block %lx end of" - " mem fields %lx\n" - "InnoDB: but block free at %lx\n", - (ulint) block, (ulint) field, - (ulint)((byte*) block - + mem_block_get_free(block))); - - return; - } - -#endif - - block = UT_LIST_GET_NEXT(list, block); - block_count++; - } -#ifdef UNIV_MEM_DEBUG -completed: -#endif - if (us_size != NULL) { - *us_size = total_len; - } - if (ph_size != NULL) { - *ph_size = phys_len; - } - if (n_blocks != NULL) { - *n_blocks = block_count; - } - *error = FALSE; -} - -/**************************************************************//** -Prints the contents of a memory heap. */ -static -void -mem_heap_print( -/*===========*/ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ibool error; - ulint us_size; - ulint phys_size; - ulint n_blocks; - - ut_ad(mem_heap_check(heap)); - - mem_heap_validate_or_print(heap, NULL, TRUE, &error, - &us_size, &phys_size, &n_blocks); - fprintf(stderr, - "\nheap type: %lu; size: user size %lu;" - " physical size %lu; blocks %lu.\n", - (ulong) heap->type, (ulong) us_size, - (ulong) phys_size, (ulong) n_blocks); - ut_a(!error); -} - -/**************************************************************//** -Validates the contents of a memory heap. -@return TRUE if ok */ -UNIV_INTERN -ibool -mem_heap_validate( -/*==============*/ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ibool error; - ulint us_size; - ulint phys_size; - ulint n_blocks; - - ut_ad(mem_heap_check(heap)); - - mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size, - &phys_size, &n_blocks); - if (error) { - mem_heap_print(heap); - } - - ut_a(!error); - - return(TRUE); -} -#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */ - -#ifdef UNIV_DEBUG -/**************************************************************//** -Checks that an object is a memory heap (or a block of it). -@return TRUE if ok */ -UNIV_INTERN -ibool -mem_heap_check( -/*===========*/ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -#ifdef UNIV_MEM_DEBUG -/*****************************************************************//** -TRUE if no memory is currently allocated. -@return TRUE if no heaps exist */ -UNIV_INTERN -ibool -mem_all_freed(void) -/*===============*/ -{ - mem_hash_node_t* node; - ulint heap_count = 0; - ulint i; - - mem_validate(); - - mutex_enter(&mem_hash_mutex); - - for (i = 0; i < MEM_HASH_SIZE; i++) { - - node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i)); - while (node != NULL) { - heap_count++; - node = UT_LIST_GET_NEXT(list, node); - } - } - - mutex_exit(&mem_hash_mutex); - - if (heap_count == 0) { -# ifndef UNIV_HOTBACKUP - ut_a(mem_pool_get_reserved(mem_comm_pool) == 0); -# endif /* !UNIV_HOTBACKUP */ - - return(TRUE); - } else { - return(FALSE); - } -} - -/*****************************************************************//** -Validates the dynamic memory allocation system. -@return TRUE if error */ -UNIV_INTERN -ibool -mem_validate_no_assert(void) -/*========================*/ -{ - mem_hash_node_t* node; - ulint n_heaps = 0; - ulint allocated_mem; - ulint ph_size; - ulint total_allocated_mem = 0; - ibool error = FALSE; - ulint n_blocks; - ulint i; - -# ifndef UNIV_HOTBACKUP - mem_pool_validate(mem_comm_pool); -# endif /* !UNIV_HOTBACKUP */ - - mutex_enter(&mem_hash_mutex); - - for (i = 0; i < MEM_HASH_SIZE; i++) { - - node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i)); - - while (node != NULL) { - n_heaps++; - - mem_heap_validate_or_print(node->heap, NULL, - FALSE, &error, - &allocated_mem, - &ph_size, &n_blocks); - - if (error) { - fprintf(stderr, - "\nERROR!!!!!!!!!!!!!!!!!!!" - "!!!!!!!!!!!!!!!!!!!!!!!\n\n" - "Inconsistency in memory heap" - " or buffer created\n" - "in %s line %lu.\n", - innobase_basename(node->file_name), - node->line); - - mutex_exit(&mem_hash_mutex); - - return(TRUE); - } - - total_allocated_mem += allocated_mem; - node = UT_LIST_GET_NEXT(list, node); - } - } - - if ((n_heaps == 0) && (mem_current_allocated_memory != 0)) { - error = TRUE; - } - - if (mem_total_allocated_memory < mem_current_allocated_memory) { - error = TRUE; - } - - if (mem_max_allocated_memory > mem_total_allocated_memory) { - error = TRUE; - } - - if (mem_n_created_heaps < n_heaps) { - error = TRUE; - } - - mutex_exit(&mem_hash_mutex); - - return(error); -} - -/************************************************************//** -Validates the dynamic memory -@return TRUE if ok */ -UNIV_INTERN -ibool -mem_validate(void) -/*==============*/ -{ - ut_a(!mem_validate_no_assert()); - - return(TRUE); -} -#endif /* UNIV_MEM_DEBUG */ - -/************************************************************//** -Tries to find neigboring memory allocation blocks and dumps to stderr -the neighborhood of a given pointer. */ -UNIV_INTERN -void -mem_analyze_corruption( -/*===================*/ - void* ptr) /*!< in: pointer to place of possible corruption */ -{ - byte* p; - ulint i; - ulint dist; - - fputs("InnoDB: Apparent memory corruption: mem dump ", stderr); - ut_print_buf(stderr, (byte*) ptr - 250, 500); - - fputs("\nInnoDB: Scanning backward trying to find" - " previous allocated mem blocks\n", stderr); - - p = (byte*) ptr; - dist = 0; - - for (i = 0; i < 10; i++) { - for (;;) { - if (((ulint) p) % 4 == 0) { - - if (*((ulint*) p) == MEM_BLOCK_MAGIC_N) { - fprintf(stderr, - "Mem block at - %lu," - " file %s, line %lu\n", - (ulong) dist, - (p + sizeof(ulint)), - (ulong) - (*(ulint*)(p + 8 - + sizeof(ulint)))); - - break; - } - - if (*((ulint*) p) == MEM_FREED_BLOCK_MAGIC_N) { - fprintf(stderr, - "Freed mem block at - %lu," - " file %s, line %lu\n", - (ulong) dist, - (p + sizeof(ulint)), - (ulong) - (*(ulint*)(p + 8 - + sizeof(ulint)))); - - break; - } - } - - p--; - dist++; - } - - p--; - dist++; - } - - fprintf(stderr, - "InnoDB: Scanning forward trying to find next" - " allocated mem blocks\n"); - - p = (byte*) ptr; - dist = 0; - - for (i = 0; i < 10; i++) { - for (;;) { - if (((ulint) p) % 4 == 0) { - - if (*((ulint*) p) == MEM_BLOCK_MAGIC_N) { - fprintf(stderr, - "Mem block at + %lu, file %s," - " line %lu\n", - (ulong) dist, - (p + sizeof(ulint)), - (ulong) - (*(ulint*)(p + 8 - + sizeof(ulint)))); - - break; - } - - if (*((ulint*) p) == MEM_FREED_BLOCK_MAGIC_N) { - fprintf(stderr, - "Freed mem block at + %lu," - " file %s, line %lu\n", - (ulong) dist, - (p + sizeof(ulint)), - (ulong) - (*(ulint*)(p + 8 - + sizeof(ulint)))); - - break; - } - } - - p++; - dist++; - } - - p++; - dist++; - } -} - -#ifndef UNIV_HOTBACKUP -/*****************************************************************//** -Prints information of dynamic memory usage and currently allocated -memory heaps or buffers. Can only be used in the debug version. */ -static -void -mem_print_info_low( -/*===============*/ - ibool print_all) /*!< in: if TRUE, all heaps are printed, - else only the heaps allocated after the - previous call of this function */ -{ -#ifdef UNIV_MEM_DEBUG - mem_hash_node_t* node; - ulint n_heaps = 0; - ulint allocated_mem; - ulint ph_size; - ulint total_allocated_mem = 0; - ibool error; - ulint n_blocks; -#endif - FILE* outfile; - - /* outfile = fopen("ibdebug", "a"); */ - - outfile = stdout; - - fprintf(outfile, "\n"); - fprintf(outfile, - "________________________________________________________\n"); - fprintf(outfile, "MEMORY ALLOCATION INFORMATION\n\n"); - -#ifndef UNIV_MEM_DEBUG - - UT_NOT_USED(print_all); - - mem_pool_print_info(outfile, mem_comm_pool); - - fprintf(outfile, - "Sorry, non-debug version cannot give more memory info\n"); - - /* fclose(outfile); */ - - return; -#else - mutex_enter(&mem_hash_mutex); - - fprintf(outfile, "LIST OF CREATED HEAPS AND ALLOCATED BUFFERS: \n\n"); - - if (!print_all) { - fprintf(outfile, "AFTER THE LAST PRINT INFO\n"); - } - - node = UT_LIST_GET_FIRST(mem_all_list_base); - - while (node != NULL) { - n_heaps++; - - if (!print_all && node->nth_heap < mem_last_print_info) { - - goto next_heap; - } - - mem_heap_validate_or_print(node->heap, NULL, - FALSE, &error, &allocated_mem, - &ph_size, &n_blocks); - total_allocated_mem += allocated_mem; - - fprintf(outfile, - "%lu: file %s line %lu of size %lu phys.size %lu" - " with %lu blocks, type %lu\n", - node->nth_heap, - innobase_basename(node->file_name), node->line, - allocated_mem, ph_size, n_blocks, - (node->heap)->type); -next_heap: - node = UT_LIST_GET_NEXT(all_list, node); - } - - fprintf(outfile, "\n"); - - fprintf(outfile, "Current allocated memory : %lu\n", - mem_current_allocated_memory); - fprintf(outfile, "Current allocated heaps and buffers : %lu\n", - n_heaps); - fprintf(outfile, "Cumulative allocated memory : %lu\n", - mem_total_allocated_memory); - fprintf(outfile, "Maximum allocated memory : %lu\n", - mem_max_allocated_memory); - fprintf(outfile, "Cumulative created heaps and buffers : %lu\n", - mem_n_created_heaps); - fprintf(outfile, "Cumulative number of allocations : %lu\n", - mem_n_allocations); - - mem_last_print_info = mem_n_created_heaps; - - mutex_exit(&mem_hash_mutex); - - mem_pool_print_info(outfile, mem_comm_pool); - - /* mem_validate(); */ - - /* fclose(outfile); */ -#endif -} - -/*****************************************************************//** -Prints information of dynamic memory usage and currently allocated memory -heaps or buffers. Can only be used in the debug version. */ -UNIV_INTERN -void -mem_print_info(void) -/*================*/ -{ - mem_print_info_low(TRUE); -} - -/*****************************************************************//** -Prints information of dynamic memory usage and currently allocated memory -heaps or buffers since the last ..._print_info or..._print_new_info. */ -UNIV_INTERN -void -mem_print_new_info(void) -/*====================*/ -{ - mem_print_info_low(FALSE); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/mem/mem0mem.cc b/storage/xtradb/mem/mem0mem.cc deleted file mode 100644 index e066aff5b30..00000000000 --- a/storage/xtradb/mem/mem0mem.cc +++ /dev/null @@ -1,583 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file mem/mem0mem.cc -The memory management - -Created 6/9/1994 Heikki Tuuri -*************************************************************************/ - -#include "mem0mem.h" -#ifdef UNIV_NONINL -#include "mem0mem.ic" -#endif - -#include "buf0buf.h" -#include "srv0srv.h" -#include "mem0dbg.cc" -#include <stdarg.h> - -/* - THE MEMORY MANAGEMENT - ===================== - -The basic element of the memory management is called a memory -heap. A memory heap is conceptually a -stack from which memory can be allocated. The stack may grow infinitely. -The top element of the stack may be freed, or -the whole stack can be freed at one time. The advantage of the -memory heap concept is that we can avoid using the malloc and free -functions of C which are quite expensive, for example, on the Solaris + GCC -system (50 MHz Sparc, 1993) the pair takes 3 microseconds, -on Win NT + 100MHz Pentium, 2.5 microseconds. -When we use a memory heap, -we can allocate larger blocks of memory at a time and thus -reduce overhead. Slightly more efficient the method is when we -allocate the memory from the index page buffer pool, as we can -claim a new page fast. This is called buffer allocation. -When we allocate the memory from the dynamic memory of the -C environment, that is called dynamic allocation. - -The default way of operation of the memory heap is the following. -First, when the heap is created, an initial block of memory is -allocated. In dynamic allocation this may be about 50 bytes. -If more space is needed, additional blocks are allocated -and they are put into a linked list. -After the initial block, each allocated block is twice the size of the -previous, until a threshold is attained, after which the sizes -of the blocks stay the same. An exception is, of course, the case -where the caller requests a memory buffer whose size is -bigger than the threshold. In that case a block big enough must -be allocated. - -The heap is physically arranged so that if the current block -becomes full, a new block is allocated and always inserted in the -chain of blocks as the last block. - -In the debug version of the memory management, all the allocated -heaps are kept in a list (which is implemented as a hash table). -Thus we can notice if the caller tries to free an already freed -heap. In addition, each buffer given to the caller contains -start field at the start and a trailer field at the end of the buffer. - -The start field has the following content: -A. sizeof(ulint) bytes of field length (in the standard byte order) -B. sizeof(ulint) bytes of check field (a random number) - -The trailer field contains: -A. sizeof(ulint) bytes of check field (the same random number as at the start) - -Thus we can notice if something has been copied over the -borders of the buffer, which is illegal. -The memory in the buffers is initialized to a random byte sequence. -After freeing, all the blocks in the heap are set to random bytes -to help us discover errors which result from the use of -buffers in an already freed heap. */ - -#ifdef MEM_PERIODIC_CHECK - -ibool mem_block_list_inited; -/* List of all mem blocks allocated; protected by the mem_comm_pool mutex */ -UT_LIST_BASE_NODE_T(mem_block_t) mem_block_list; - -#endif - -/**********************************************************************//** -Duplicates a NUL-terminated string, allocated from a memory heap. -@return own: a copy of the string */ -UNIV_INTERN -char* -mem_heap_strdup( -/*============*/ - mem_heap_t* heap, /*!< in: memory heap where string is allocated */ - const char* str) /*!< in: string to be copied */ -{ - return(static_cast<char*>(mem_heap_dup(heap, str, strlen(str) + 1))); -} - -/**********************************************************************//** -Duplicate a block of data, allocated from a memory heap. -@return own: a copy of the data */ -UNIV_INTERN -void* -mem_heap_dup( -/*=========*/ - mem_heap_t* heap, /*!< in: memory heap where copy is allocated */ - const void* data, /*!< in: data to be copied */ - ulint len) /*!< in: length of data, in bytes */ -{ - return(memcpy(mem_heap_alloc(heap, len), data, len)); -} - -/**********************************************************************//** -Concatenate two strings and return the result, using a memory heap. -@return own: the result */ -UNIV_INTERN -char* -mem_heap_strcat( -/*============*/ - mem_heap_t* heap, /*!< in: memory heap where string is allocated */ - const char* s1, /*!< in: string 1 */ - const char* s2) /*!< in: string 2 */ -{ - char* s; - ulint s1_len = strlen(s1); - ulint s2_len = strlen(s2); - - s = static_cast<char*>(mem_heap_alloc(heap, s1_len + s2_len + 1)); - - memcpy(s, s1, s1_len); - memcpy(s + s1_len, s2, s2_len); - - s[s1_len + s2_len] = '\0'; - - return(s); -} - - -/****************************************************************//** -Helper function for mem_heap_printf. -@return length of formatted string, including terminating NUL */ -static -ulint -mem_heap_printf_low( -/*================*/ - char* buf, /*!< in/out: buffer to store formatted string - in, or NULL to just calculate length */ - const char* format, /*!< in: format string */ - va_list ap) /*!< in: arguments */ -{ - ulint len = 0; - - while (*format) { - - /* Does this format specifier have the 'l' length modifier. */ - ibool is_long = FALSE; - - /* Length of one parameter. */ - size_t plen; - - if (*format++ != '%') { - /* Non-format character. */ - - len++; - - if (buf) { - *buf++ = *(format - 1); - } - - continue; - } - - if (*format == 'l') { - is_long = TRUE; - format++; - } - - switch (*format++) { - case 's': - /* string */ - { - char* s = va_arg(ap, char*); - - /* "%ls" is a non-sensical format specifier. */ - ut_a(!is_long); - - plen = strlen(s); - len += plen; - - if (buf) { - memcpy(buf, s, plen); - buf += plen; - } - } - - break; - - case 'u': - /* unsigned int */ - { - char tmp[32]; - unsigned long val; - - /* We only support 'long' values for now. */ - ut_a(is_long); - - val = va_arg(ap, unsigned long); - - plen = sprintf(tmp, "%lu", val); - len += plen; - - if (buf) { - memcpy(buf, tmp, plen); - buf += plen; - } - } - - break; - - case '%': - - /* "%l%" is a non-sensical format specifier. */ - ut_a(!is_long); - - len++; - - if (buf) { - *buf++ = '%'; - } - - break; - - default: - ut_error; - } - } - - /* For the NUL character. */ - len++; - - if (buf) { - *buf = '\0'; - } - - return(len); -} - -/****************************************************************//** -A simple sprintf replacement that dynamically allocates the space for the -formatted string from the given heap. This supports a very limited set of -the printf syntax: types 's' and 'u' and length modifier 'l' (which is -required for the 'u' type). -@return heap-allocated formatted string */ -UNIV_INTERN -char* -mem_heap_printf( -/*============*/ - mem_heap_t* heap, /*!< in: memory heap */ - const char* format, /*!< in: format string */ - ...) -{ - va_list ap; - char* str; - ulint len; - - /* Calculate length of string */ - len = 0; - va_start(ap, format); - len = mem_heap_printf_low(NULL, format, ap); - va_end(ap); - - /* Now create it for real. */ - str = static_cast<char*>(mem_heap_alloc(heap, len)); - va_start(ap, format); - mem_heap_printf_low(str, format, ap); - va_end(ap); - - return(str); -} - -/***************************************************************//** -Creates a memory heap block where data can be allocated. -@return own: memory heap block, NULL if did not succeed (only possible -for MEM_HEAP_BTR_SEARCH type heaps) */ -UNIV_INTERN -mem_block_t* -mem_heap_create_block_func( -/*=======================*/ - mem_heap_t* heap, /*!< in: memory heap or NULL if first block - should be created */ - ulint n, /*!< in: number of bytes needed for user data */ -#ifdef UNIV_DEBUG - const char* file_name,/*!< in: file name where created */ - ulint line, /*!< in: line where created */ -#endif /* UNIV_DEBUG */ - ulint type) /*!< in: type of heap: MEM_HEAP_DYNAMIC or - MEM_HEAP_BUFFER */ -{ -#ifndef UNIV_HOTBACKUP - buf_block_t* buf_block = NULL; -#endif /* !UNIV_HOTBACKUP */ - mem_block_t* block; - ulint len; - - ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER) - || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH)); - - if (heap && heap->magic_n != MEM_BLOCK_MAGIC_N) { - mem_analyze_corruption(heap); - } - - /* In dynamic allocation, calculate the size: block header + data. */ - len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n); - -#ifndef UNIV_HOTBACKUP - if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) { - - ut_ad(type == MEM_HEAP_DYNAMIC || n <= MEM_MAX_ALLOC_IN_BUF); - - block = static_cast<mem_block_t*>( - mem_area_alloc(&len, mem_comm_pool)); - } else { - len = UNIV_PAGE_SIZE; - - if ((type & MEM_HEAP_BTR_SEARCH) && heap) { - /* We cannot allocate the block from the - buffer pool, but must get the free block from - the heap header free block field */ - - buf_block = static_cast<buf_block_t*>(heap->free_block); - heap->free_block = NULL; - - if (UNIV_UNLIKELY(!buf_block)) { - - return(NULL); - } - } else { - buf_block = buf_block_alloc(NULL); - } - - block = (mem_block_t*) buf_block->frame; - } - - if(!block) { - ib_logf(IB_LOG_LEVEL_FATAL, - " InnoDB: Unable to allocate memory of size %lu.\n", - len); - } - block->buf_block = buf_block; - block->free_block = NULL; -#else /* !UNIV_HOTBACKUP */ - len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n); - block = ut_malloc(len); - ut_ad(block); -#endif /* !UNIV_HOTBACKUP */ - - block->magic_n = MEM_BLOCK_MAGIC_N; - ut_d(ut_strlcpy_rev(block->file_name, file_name, - sizeof(block->file_name))); - ut_d(block->line = line); - -#ifdef MEM_PERIODIC_CHECK - mutex_enter(&(mem_comm_pool->mutex)); - - if (!mem_block_list_inited) { - mem_block_list_inited = TRUE; - UT_LIST_INIT(mem_block_list); - } - - UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block); - - mutex_exit(&(mem_comm_pool->mutex)); -#endif - mem_block_set_len(block, len); - mem_block_set_type(block, type); - mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE); - mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE); - - if (UNIV_UNLIKELY(heap == NULL)) { - /* This is the first block of the heap. The field - total_size should be initialized here */ - block->total_size = len; - } else { - /* Not the first allocation for the heap. This block's - total_length field should be set to undefined. */ - ut_d(block->total_size = ULINT_UNDEFINED); - UNIV_MEM_INVALID(&block->total_size, - sizeof block->total_size); - - heap->total_size += len; - } - - ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len); - - return(block); -} - -/***************************************************************//** -Adds a new block to a memory heap. -@return created block, NULL if did not succeed (only possible for -MEM_HEAP_BTR_SEARCH type heaps) */ -UNIV_INTERN -mem_block_t* -mem_heap_add_block( -/*===============*/ - mem_heap_t* heap, /*!< in: memory heap */ - ulint n) /*!< in: number of bytes user needs */ -{ - mem_block_t* block; - mem_block_t* new_block; - ulint new_size; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - /* We have to allocate a new block. The size is always at least - doubled until the standard size is reached. After that the size - stays the same, except in cases where the caller needs more space. */ - - new_size = 2 * mem_block_get_len(block); - - if (heap->type != MEM_HEAP_DYNAMIC) { - /* From the buffer pool we allocate buffer frames */ - ut_a(n <= MEM_MAX_ALLOC_IN_BUF); - - if (new_size > MEM_MAX_ALLOC_IN_BUF) { - new_size = MEM_MAX_ALLOC_IN_BUF; - } - } else if (new_size > MEM_BLOCK_STANDARD_SIZE) { - - new_size = MEM_BLOCK_STANDARD_SIZE; - } - - if (new_size < n) { - new_size = n; - } - - new_block = mem_heap_create_block(heap, new_size, heap->type, - heap->file_name, heap->line); - if (new_block == NULL) { - - return(NULL); - } - - /* Add the new block as the last block */ - - UT_LIST_INSERT_AFTER(list, heap->base, block, new_block); - - return(new_block); -} - -/******************************************************************//** -Frees a block from a memory heap. */ -UNIV_INTERN -void -mem_heap_block_free( -/*================*/ - mem_heap_t* heap, /*!< in: heap */ - mem_block_t* block) /*!< in: block to free */ -{ - ulint type; - ulint len; -#ifndef UNIV_HOTBACKUP - buf_block_t* buf_block; - - buf_block = static_cast<buf_block_t*>(block->buf_block); -#endif /* !UNIV_HOTBACKUP */ - - if (block->magic_n != MEM_BLOCK_MAGIC_N) { - mem_analyze_corruption(block); - } - - UT_LIST_REMOVE(list, heap->base, block); - -#ifdef MEM_PERIODIC_CHECK - mutex_enter(&(mem_comm_pool->mutex)); - - UT_LIST_REMOVE(mem_block_list, mem_block_list, block); - - mutex_exit(&(mem_comm_pool->mutex)); -#endif - - ut_ad(heap->total_size >= block->len); - heap->total_size -= block->len; - - type = heap->type; - len = block->len; - block->magic_n = MEM_FREED_BLOCK_MAGIC_N; - -#ifndef UNIV_HOTBACKUP - if (!srv_use_sys_malloc) { -#ifdef UNIV_MEM_DEBUG - /* In the debug version we set the memory to a random - combination of hex 0xDE and 0xAD. */ - - mem_erase_buf((byte*) block, len); -#else /* UNIV_MEM_DEBUG */ - UNIV_MEM_ASSERT_AND_FREE(block, len); -#endif /* UNIV_MEM_DEBUG */ - - } - if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) { - - ut_ad(!buf_block); - mem_area_free(block, mem_comm_pool); - } else { - ut_ad(type & MEM_HEAP_BUFFER); - - buf_block_free(buf_block); - } -#else /* !UNIV_HOTBACKUP */ -#ifdef UNIV_MEM_DEBUG - /* In the debug version we set the memory to a random - combination of hex 0xDE and 0xAD. */ - - mem_erase_buf((byte*) block, len); -#else /* UNIV_MEM_DEBUG */ - UNIV_MEM_ASSERT_AND_FREE(block, len); -#endif /* UNIV_MEM_DEBUG */ - ut_free(block); -#endif /* !UNIV_HOTBACKUP */ -} - -#ifndef UNIV_HOTBACKUP -/******************************************************************//** -Frees the free_block field from a memory heap. */ -UNIV_INTERN -void -mem_heap_free_block_free( -/*=====================*/ - mem_heap_t* heap) /*!< in: heap */ -{ - if (UNIV_LIKELY_NULL(heap->free_block)) { - - buf_block_free(static_cast<buf_block_t*>(heap->free_block)); - - heap->free_block = NULL; - } -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef MEM_PERIODIC_CHECK -/******************************************************************//** -Goes through the list of all allocated mem blocks, checks their magic -numbers, and reports possible corruption. */ -UNIV_INTERN -void -mem_validate_all_blocks(void) -/*=========================*/ -{ - mem_block_t* block; - - mutex_enter(&(mem_comm_pool->mutex)); - - block = UT_LIST_GET_FIRST(mem_block_list); - - while (block) { - if (block->magic_n != MEM_BLOCK_MAGIC_N) { - mem_analyze_corruption(block); - } - - block = UT_LIST_GET_NEXT(mem_block_list, block); - } - - mutex_exit(&(mem_comm_pool->mutex)); -} -#endif diff --git a/storage/xtradb/mem/mem0pool.cc b/storage/xtradb/mem/mem0pool.cc deleted file mode 100644 index 42d0417c768..00000000000 --- a/storage/xtradb/mem/mem0pool.cc +++ /dev/null @@ -1,728 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file mem/mem0pool.cc -The lowest-level memory management - -Created 5/12/1997 Heikki Tuuri -*************************************************************************/ - -#include "mem0pool.h" -#ifdef UNIV_NONINL -#include "mem0pool.ic" -#endif - -#include "srv0srv.h" -#include "sync0sync.h" -#include "ut0mem.h" -#include "ut0lst.h" -#include "ut0byte.h" -#include "mem0mem.h" -#include "srv0start.h" - -/* We would like to use also the buffer frames to allocate memory. This -would be desirable, because then the memory consumption of the database -would be fixed, and we might even lock the buffer pool to the main memory. -The problem here is that the buffer management routines can themselves call -memory allocation, while the buffer pool mutex is reserved. - -The main components of the memory consumption are: - -1. buffer pool, -2. parsed and optimized SQL statements, -3. data dictionary cache, -4. log buffer, -5. locks for each transaction, -6. hash table for the adaptive index, -7. state and buffers for each SQL query currently being executed, -8. session for each user, and -9. stack for each OS thread. - -Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially -consume very much memory. Items 7 and 8 should consume quite little memory, -and the OS should take care of item 9, which too should consume little memory. - -A solution to the memory management: - -1. the buffer pool size is set separately; -2. log buffer size is set separately; -3. the common pool size for all the other entries, except 8, is set separately. - -Problems: we may waste memory if the common pool is set too big. Another -problem is the locks, which may take very much space in big transactions. -Then the shared pool size should be set very big. We can allow locks to take -space from the buffer pool, but the SQL optimizer is then unaware of the -usable size of the buffer pool. We could also combine the objects in the -common pool and the buffers in the buffer pool into a single LRU list and -manage it uniformly, but this approach does not take into account the parsing -and other costs unique to SQL statements. - -The locks for a transaction can be seen as a part of the state of the -transaction. Hence, they should be stored in the common pool. We still -have the problem of a very big update transaction, for example, which -will set very many x-locks on rows, and the locks will consume a lot -of memory, say, half of the buffer pool size. - -Another problem is what to do if we are not able to malloc a requested -block of memory from the common pool. Then we can request memory from -the operating system. If it does not help, a system error results. - -Because 5 and 6 may potentially consume very much memory, we let them grow -into the buffer pool. We may let the locks of a transaction take frames -from the buffer pool, when the corresponding memory heap block has grown to -the size of a buffer frame. Similarly for the hash node cells of the locks, -and for the adaptive index. Thus, for each individual transaction, its locks -can occupy at most about the size of the buffer frame of memory in the common -pool, and after that its locks will grow into the buffer pool. */ - -/** Mask used to extract the free bit from area->size */ -#define MEM_AREA_FREE 1 - -/** The smallest memory area total size */ -#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE) - - -/** Data structure for a memory pool. The space is allocated using the buddy -algorithm, where free list i contains areas of size 2 to power i. */ -struct mem_pool_t{ - byte* buf; /*!< memory pool */ - ulint size; /*!< memory common pool size */ - ulint reserved; /*!< amount of currently allocated - memory */ - ib_mutex_t mutex; /*!< mutex protecting this struct */ - UT_LIST_BASE_NODE_T(mem_area_t) - free_list[64]; /*!< lists of free memory areas: an - area is put to the list whose number - is the 2-logarithm of the area size */ -}; - -/** The common memory pool */ -UNIV_INTERN mem_pool_t* mem_comm_pool = NULL; - -#ifdef UNIV_PFS_MUTEX -/* Key to register mutex in mem_pool_t with performance schema */ -UNIV_INTERN mysql_pfs_key_t mem_pool_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/* We use this counter to check that the mem pool mutex does not leak; -this is to track a strange assertion failure reported at -mysql@lists.mysql.com */ - -UNIV_INTERN ulint mem_n_threads_inside = 0; - -/********************************************************************//** -Reserves the mem pool mutex if we are not in server shutdown. Use -this function only in memory free functions, since only memory -free functions are used during server shutdown. */ -UNIV_INLINE -void -mem_pool_mutex_enter( -/*=================*/ - mem_pool_t* pool) /*!< in: memory pool */ -{ - if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) { - mutex_enter(&(pool->mutex)); - } -} - -/********************************************************************//** -Releases the mem pool mutex if we are not in server shutdown. As -its corresponding mem_pool_mutex_enter() function, use it only -in memory free functions */ -UNIV_INLINE -void -mem_pool_mutex_exit( -/*================*/ - mem_pool_t* pool) /*!< in: memory pool */ -{ - if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) { - mutex_exit(&(pool->mutex)); - } -} - -/********************************************************************//** -Returns memory area size. -@return size */ -UNIV_INLINE -ulint -mem_area_get_size( -/*==============*/ - mem_area_t* area) /*!< in: area */ -{ - return(area->size_and_free & ~MEM_AREA_FREE); -} - -/********************************************************************//** -Sets memory area size. */ -UNIV_INLINE -void -mem_area_set_size( -/*==============*/ - mem_area_t* area, /*!< in: area */ - ulint size) /*!< in: size */ -{ - area->size_and_free = (area->size_and_free & MEM_AREA_FREE) - | size; -} - -/********************************************************************//** -Returns memory area free bit. -@return TRUE if free */ -UNIV_INLINE -ibool -mem_area_get_free( -/*==============*/ - mem_area_t* area) /*!< in: area */ -{ -#if TRUE != MEM_AREA_FREE -# error "TRUE != MEM_AREA_FREE" -#endif - return(area->size_and_free & MEM_AREA_FREE); -} - -/********************************************************************//** -Sets memory area free bit. */ -UNIV_INLINE -void -mem_area_set_free( -/*==============*/ - mem_area_t* area, /*!< in: area */ - ibool free) /*!< in: free bit value */ -{ -#if TRUE != MEM_AREA_FREE -# error "TRUE != MEM_AREA_FREE" -#endif - area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE) - | free; -} - -/********************************************************************//** -Creates a memory pool. -@return memory pool */ -UNIV_INTERN -mem_pool_t* -mem_pool_create( -/*============*/ - ulint size) /*!< in: pool size in bytes */ -{ - mem_pool_t* pool; - mem_area_t* area; - ulint i; - ulint used; - - pool = static_cast<mem_pool_t*>(ut_malloc(sizeof(mem_pool_t))); - - pool->buf = static_cast<byte*>(ut_malloc_low(size, TRUE)); - pool->size = size; - - mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL); - - /* Initialize the free lists */ - - for (i = 0; i < 64; i++) { - - UT_LIST_INIT(pool->free_list[i]); - } - - used = 0; - - while (size - used >= MEM_AREA_MIN_SIZE) { - - i = ut_2_log(size - used); - - if (ut_2_exp(i) > size - used) { - - /* ut_2_log rounds upward */ - - i--; - } - - area = (mem_area_t*)(pool->buf + used); - - mem_area_set_size(area, ut_2_exp(i)); - mem_area_set_free(area, TRUE); - UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area, - ut_2_exp(i) - MEM_AREA_EXTRA_SIZE); - - UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area); - - used = used + ut_2_exp(i); - } - - ut_ad(size >= used); - - pool->reserved = 0; - - return(pool); -} - -/********************************************************************//** -Frees a memory pool. */ -UNIV_INTERN -void -mem_pool_free( -/*==========*/ - mem_pool_t* pool) /*!< in, own: memory pool */ -{ - mutex_free(&pool->mutex); - ut_free(pool->buf); - ut_free(pool); -} - -/********************************************************************//** -Fills the specified free list. -@return TRUE if we were able to insert a block to the free list */ -static -ibool -mem_pool_fill_free_list( -/*====================*/ - ulint i, /*!< in: free list index */ - mem_pool_t* pool) /*!< in: memory pool */ -{ - mem_area_t* area; - mem_area_t* area2; - ibool ret; - - ut_ad(mutex_own(&(pool->mutex))); - - if (UNIV_UNLIKELY(i >= 63)) { - /* We come here when we have run out of space in the - memory pool: */ - - return(FALSE); - } - - area = UT_LIST_GET_FIRST(pool->free_list[i + 1]); - - if (area == NULL) { - if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: mem pool free list %lu" - " length is %lu\n" - "InnoDB: though the list is empty!\n", - (ulong) i + 1, - (ulong) - UT_LIST_GET_LEN(pool->free_list[i + 1])); - } - - ret = mem_pool_fill_free_list(i + 1, pool); - - if (ret == FALSE) { - - return(FALSE); - } - - area = UT_LIST_GET_FIRST(pool->free_list[i + 1]); - } - - if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) { - mem_analyze_corruption(area); - - ut_error; - } - - UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area); - - area2 = (mem_area_t*)(((byte*) area) + ut_2_exp(i)); - UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE); - - mem_area_set_size(area2, ut_2_exp(i)); - mem_area_set_free(area2, TRUE); - - UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2); - - mem_area_set_size(area, ut_2_exp(i)); - - UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area); - - return(TRUE); -} - -/********************************************************************//** -Allocates memory from a pool. NOTE: This low-level function should only be -used in mem0mem.*! -@return own: allocated memory buffer */ -UNIV_INTERN -void* -mem_area_alloc( -/*===========*/ - ulint* psize, /*!< in: requested size in bytes; for optimum - space usage, the size should be a power of 2 - minus MEM_AREA_EXTRA_SIZE; - out: allocated size in bytes (greater than - or equal to the requested size) */ - mem_pool_t* pool) /*!< in: memory pool */ -{ - mem_area_t* area; - ulint size; - ulint n; - ibool ret; - - /* If we are using os allocator just make a simple call - to malloc */ - if (UNIV_LIKELY(srv_use_sys_malloc)) { - return(malloc(*psize)); - } - - size = *psize; - n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE)); - - mutex_enter(&(pool->mutex)); - mem_n_threads_inside++; - - ut_a(mem_n_threads_inside == 1); - - area = UT_LIST_GET_FIRST(pool->free_list[n]); - - if (area == NULL) { - ret = mem_pool_fill_free_list(n, pool); - - if (ret == FALSE) { - /* Out of memory in memory pool: we try to allocate - from the operating system with the regular malloc: */ - - mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); - - return(ut_malloc(size)); - } - - area = UT_LIST_GET_FIRST(pool->free_list[n]); - } - - if (!mem_area_get_free(area)) { - fprintf(stderr, - "InnoDB: Error: Removing element from mem pool" - " free list %lu though the\n" - "InnoDB: element is not marked free!\n", - (ulong) n); - - mem_analyze_corruption(area); - - /* Try to analyze a strange assertion failure reported at - mysql@lists.mysql.com where the free bit IS 1 in the - hex dump above */ - - if (mem_area_get_free(area)) { - fprintf(stderr, - "InnoDB: Probably a race condition" - " because now the area is marked free!\n"); - } - - ut_error; - } - - if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) { - fprintf(stderr, - "InnoDB: Error: Removing element from mem pool" - " free list %lu\n" - "InnoDB: though the list length is 0!\n", - (ulong) n); - mem_analyze_corruption(area); - - ut_error; - } - - ut_ad(mem_area_get_size(area) == ut_2_exp(n)); - - mem_area_set_free(area, FALSE); - - UT_LIST_REMOVE(free_list, pool->free_list[n], area); - - pool->reserved += mem_area_get_size(area); - - mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); - - ut_ad(mem_pool_validate(pool)); - - *psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE; - UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*) area, *psize); - - return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*) area))); -} - -/********************************************************************//** -Gets the buddy of an area, if it exists in pool. -@return the buddy, NULL if no buddy in pool */ -UNIV_INLINE -mem_area_t* -mem_area_get_buddy( -/*===============*/ - mem_area_t* area, /*!< in: memory area */ - ulint size, /*!< in: memory area size */ - mem_pool_t* pool) /*!< in: memory pool */ -{ - mem_area_t* buddy; - - ut_ad(size != 0); - - if (((((byte*) area) - pool->buf) % (2 * size)) == 0) { - - /* The buddy is in a higher address */ - - buddy = (mem_area_t*)(((byte*) area) + size); - - if ((((byte*) buddy) - pool->buf) + size > pool->size) { - - /* The buddy is not wholly contained in the pool: - there is no buddy */ - - buddy = NULL; - } - } else { - /* The buddy is in a lower address; NOTE that area cannot - be at the pool lower end, because then we would end up to - the upper branch in this if-clause: the remainder would be - 0 */ - - buddy = (mem_area_t*)(((byte*) area) - size); - } - - return(buddy); -} - -/********************************************************************//** -Frees memory to a pool. */ -UNIV_INTERN -void -mem_area_free( -/*==========*/ - void* ptr, /*!< in, own: pointer to allocated memory - buffer */ - mem_pool_t* pool) /*!< in: memory pool */ -{ - mem_area_t* area; - mem_area_t* buddy; - void* new_ptr; - ulint size; - ulint n; - - if (UNIV_LIKELY(srv_use_sys_malloc)) { - free(ptr); - - return; - } - - /* It may be that the area was really allocated from the OS with - regular malloc: check if ptr points within our memory pool */ - - if ((byte*) ptr < pool->buf || (byte*) ptr >= pool->buf + pool->size) { - ut_free(ptr); - - return; - } - - area = (mem_area_t*) (((byte*) ptr) - MEM_AREA_EXTRA_SIZE); - - if (mem_area_get_free(area)) { - fprintf(stderr, - "InnoDB: Error: Freeing element to mem pool" - " free list though the\n" - "InnoDB: element is marked free!\n"); - - mem_analyze_corruption(area); - ut_error; - } - - size = mem_area_get_size(area); - UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE); - - if (size == 0) { - fprintf(stderr, - "InnoDB: Error: Mem area size is 0. Possibly a" - " memory overrun of the\n" - "InnoDB: previous allocated area!\n"); - - mem_analyze_corruption(area); - ut_error; - } - -#ifdef UNIV_LIGHT_MEM_DEBUG - if (((byte*) area) + size < pool->buf + pool->size) { - - ulint next_size; - - next_size = mem_area_get_size( - (mem_area_t*)(((byte*) area) + size)); - if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) { - fprintf(stderr, - "InnoDB: Error: Memory area size %lu," - " next area size %lu not a power of 2!\n" - "InnoDB: Possibly a memory overrun of" - " the buffer being freed here.\n", - (ulong) size, (ulong) next_size); - mem_analyze_corruption(area); - - ut_error; - } - } -#endif - buddy = mem_area_get_buddy(area, size, pool); - - n = ut_2_log(size); - - mem_pool_mutex_enter(pool); - mem_n_threads_inside++; - - ut_a(mem_n_threads_inside == 1); - - if (buddy && mem_area_get_free(buddy) - && (size == mem_area_get_size(buddy))) { - - /* The buddy is in a free list */ - - if ((byte*) buddy < (byte*) area) { - new_ptr = ((byte*) buddy) + MEM_AREA_EXTRA_SIZE; - - mem_area_set_size(buddy, 2 * size); - mem_area_set_free(buddy, FALSE); - } else { - new_ptr = ptr; - - mem_area_set_size(area, 2 * size); - } - - /* Remove the buddy from its free list and merge it to area */ - - UT_LIST_REMOVE(free_list, pool->free_list[n], buddy); - - pool->reserved += ut_2_exp(n); - - mem_n_threads_inside--; - mem_pool_mutex_exit(pool); - - mem_area_free(new_ptr, pool); - - return; - } else { - UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area); - - mem_area_set_free(area, TRUE); - - ut_ad(pool->reserved >= size); - - pool->reserved -= size; - } - - mem_n_threads_inside--; - mem_pool_mutex_exit(pool); - - ut_ad(mem_pool_validate(pool)); -} - -/********************************************************************//** -Validates a memory pool. -@return TRUE if ok */ -UNIV_INTERN -ibool -mem_pool_validate( -/*==============*/ - mem_pool_t* pool) /*!< in: memory pool */ -{ - mem_area_t* area; - mem_area_t* buddy; - ulint free; - ulint i; - - mem_pool_mutex_enter(pool); - - free = 0; - - for (i = 0; i < 64; i++) { - - UT_LIST_CHECK(free_list, mem_area_t, pool->free_list[i]); - - for (area = UT_LIST_GET_FIRST(pool->free_list[i]); - area != 0; - area = UT_LIST_GET_NEXT(free_list, area)) { - - ut_a(mem_area_get_free(area)); - ut_a(mem_area_get_size(area) == ut_2_exp(i)); - - buddy = mem_area_get_buddy(area, ut_2_exp(i), pool); - - ut_a(!buddy || !mem_area_get_free(buddy) - || (ut_2_exp(i) != mem_area_get_size(buddy))); - - free += ut_2_exp(i); - } - } - - ut_a(free + pool->reserved == pool->size); - - mem_pool_mutex_exit(pool); - - return(TRUE); -} - -/********************************************************************//** -Prints info of a memory pool. */ -UNIV_INTERN -void -mem_pool_print_info( -/*================*/ - FILE* outfile,/*!< in: output file to write to */ - mem_pool_t* pool) /*!< in: memory pool */ -{ - ulint i; - - mem_pool_validate(pool); - - fprintf(outfile, "INFO OF A MEMORY POOL\n"); - - mutex_enter(&(pool->mutex)); - - for (i = 0; i < 64; i++) { - if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) { - - fprintf(outfile, - "Free list length %lu for" - " blocks of size %lu\n", - (ulong) UT_LIST_GET_LEN(pool->free_list[i]), - (ulong) ut_2_exp(i)); - } - } - - fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size, - (ulong) pool->reserved); - mutex_exit(&(pool->mutex)); -} - -/********************************************************************//** -Returns the amount of reserved memory. -@return reserved memory in bytes */ -UNIV_INTERN -ulint -mem_pool_get_reserved( -/*==================*/ - mem_pool_t* pool) /*!< in: memory pool */ -{ - ulint reserved; - - mutex_enter(&(pool->mutex)); - - reserved = pool->reserved; - - mutex_exit(&(pool->mutex)); - - return(reserved); -} diff --git a/storage/xtradb/mtr/mtr0log.cc b/storage/xtradb/mtr/mtr0log.cc deleted file mode 100644 index 82df1df63d4..00000000000 --- a/storage/xtradb/mtr/mtr0log.cc +++ /dev/null @@ -1,620 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file mtr/mtr0log.cc -Mini-transaction log routines - -Created 12/7/1995 Heikki Tuuri -*******************************************************/ - -#include "mtr0log.h" - -#ifdef UNIV_NONINL -#include "mtr0log.ic" -#endif - -#include "buf0buf.h" -#include "dict0dict.h" -#include "log0recv.h" -#include "page0page.h" - -#ifndef UNIV_HOTBACKUP -# include "dict0boot.h" - -/********************************************************//** -Catenates n bytes to the mtr log. */ -UNIV_INTERN -void -mlog_catenate_string( -/*=================*/ - mtr_t* mtr, /*!< in: mtr */ - const byte* str, /*!< in: string to write */ - ulint len) /*!< in: string length */ -{ - dyn_array_t* mlog; - - if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { - - return; - } - - mlog = &(mtr->log); - - dyn_push_string(mlog, str, len); -} - -/********************************************************//** -Writes the initial part of a log record consisting of one-byte item -type and four-byte space and page numbers. Also pushes info -to the mtr memo that a buffer page has been modified. */ -UNIV_INTERN -void -mlog_write_initial_log_record( -/*==========================*/ - const byte* ptr, /*!< in: pointer to (inside) a buffer - frame holding the file page where - modification is made */ - byte type, /*!< in: log item type: MLOG_1BYTE, ... */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - byte* log_ptr; - - ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type)); - ut_ad(type > MLOG_8BYTES); - - log_ptr = mlog_open(mtr, 11); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr); - - mlog_close(mtr, log_ptr); -} -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************//** -Parses an initial log record written by mlog_write_initial_log_record. -@return parsed record end, NULL if not a complete record */ -UNIV_INTERN -byte* -mlog_parse_initial_log_record( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - byte* type, /*!< out: log record type: MLOG_1BYTE, ... */ - ulint* space, /*!< out: space id */ - ulint* page_no)/*!< out: page number */ -{ - if (end_ptr < ptr + 1) { - - return(NULL); - } - - *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG); - ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type)); - - ptr++; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - ptr = mach_parse_compressed(ptr, end_ptr, space); - - if (ptr == NULL) { - - return(NULL); - } - - ptr = mach_parse_compressed(ptr, end_ptr, page_no); - - return(ptr); -} - -/********************************************************//** -Parses a log record written by mlog_write_ulint or mlog_write_ull. -@return parsed record end, NULL if not a complete record or a corrupt record */ -UNIV_INTERN -byte* -mlog_parse_nbytes( -/*==============*/ - ulint type, /*!< in: log record type: MLOG_1BYTE, ... */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - byte* page, /*!< in: page where to apply the log record, or NULL */ - void* page_zip)/*!< in/out: compressed page, or NULL */ -{ - ulint offset; - ulint val; - ib_uint64_t dval; - - ut_a(type <= MLOG_8BYTES); - if (end_ptr < ptr + 2) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX || - /* scrubbing changes page type from FIL_PAGE_INDEX to - * FIL_PAGE_TYPE_ALLOCATED (rest of this assertion is below) */ - (type == MLOG_2BYTES && offset == FIL_PAGE_TYPE)); - - if (offset >= UNIV_PAGE_SIZE) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - if (type == MLOG_8BYTES) { - ptr = mach_ull_parse_compressed(ptr, end_ptr, &dval); - - if (ptr == NULL) { - - return(NULL); - } - - if (page) { - if (page_zip) { - mach_write_to_8 - (((page_zip_des_t*) page_zip)->data - + offset, dval); - } - mach_write_to_8(page + offset, dval); - } - - return(ptr); - } - - ptr = mach_parse_compressed(ptr, end_ptr, &val); - - if (ptr == NULL) { - - return(NULL); - } - - switch (type) { - case MLOG_1BYTE: - if (UNIV_UNLIKELY(val > 0xFFUL)) { - goto corrupt; - } - if (page) { - if (page_zip) { - mach_write_to_1 - (((page_zip_des_t*) page_zip)->data - + offset, val); - } - mach_write_to_1(page + offset, val); - } - break; - case MLOG_2BYTES: - if (UNIV_UNLIKELY(val > 0xFFFFUL)) { - goto corrupt; - } - if (page) { - if (page_zip) { - mach_write_to_2 - (((page_zip_des_t*) page_zip)->data - + offset, val); - } - mach_write_to_2(page + offset, val); - } - ut_a(!page || !page_zip || - fil_page_get_type(page) != FIL_PAGE_INDEX || - /* scrubbing changes page type from FIL_PAGE_INDEX to - * FIL_PAGE_TYPE_ALLOCATED */ - (type == MLOG_2BYTES && - offset == FIL_PAGE_TYPE && - val == FIL_PAGE_TYPE_ALLOCATED)); - - break; - case MLOG_4BYTES: - if (page) { - if (page_zip) { - mach_write_to_4 - (((page_zip_des_t*) page_zip)->data - + offset, val); - } - mach_write_to_4(page + offset, val); - } - break; - default: - corrupt: - recv_sys->found_corrupt_log = TRUE; - ptr = NULL; - } - - return(ptr); -} - -/********************************************************//** -Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log -record to the mini-transaction log if mtr is not NULL. */ -UNIV_INTERN -void -mlog_write_ulint( -/*=============*/ - byte* ptr, /*!< in: pointer where to write */ - ulint val, /*!< in: value to write */ - byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - switch (type) { - case MLOG_1BYTE: - mach_write_to_1(ptr, val); - break; - case MLOG_2BYTES: - mach_write_to_2(ptr, val); - break; - case MLOG_4BYTES: - mach_write_to_4(ptr, val); - break; - default: - ut_error; - } - - if (mtr != 0) { - byte* log_ptr = mlog_open(mtr, 11 + 2 + 5); - - /* If no logging is requested, we may return now */ - - if (log_ptr != 0) { - - log_ptr = mlog_write_initial_log_record_fast( - ptr, type, log_ptr, mtr); - - mach_write_to_2(log_ptr, page_offset(ptr)); - log_ptr += 2; - - log_ptr += mach_write_compressed(log_ptr, val); - - mlog_close(mtr, log_ptr); - } - } -} - -/********************************************************//** -Writes 8 bytes to a file page. Writes the corresponding log -record to the mini-transaction log, only if mtr is not NULL */ -UNIV_INTERN -void -mlog_write_ull( -/*===========*/ - byte* ptr, /*!< in: pointer where to write */ - ib_uint64_t val, /*!< in: value to write */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - mach_write_to_8(ptr, val); - - if (mtr != 0) { - byte* log_ptr = mlog_open(mtr, 11 + 2 + 9); - - /* If no logging is requested, we may return now */ - if (log_ptr != 0) { - - log_ptr = mlog_write_initial_log_record_fast( - ptr, MLOG_8BYTES, log_ptr, mtr); - - mach_write_to_2(log_ptr, page_offset(ptr)); - log_ptr += 2; - - log_ptr += mach_ull_write_compressed(log_ptr, val); - - mlog_close(mtr, log_ptr); - } - } -} - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Writes a string to a file page buffered in the buffer pool. Writes the -corresponding log record to the mini-transaction log. */ -UNIV_INTERN -void -mlog_write_string( -/*==============*/ - byte* ptr, /*!< in: pointer where to write */ - const byte* str, /*!< in: string to write */ - ulint len, /*!< in: string length */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ut_ad(ptr && mtr); - ut_a(len < UNIV_PAGE_SIZE); - - memcpy(ptr, str, len); - - mlog_log_string(ptr, len, mtr); -} - -/********************************************************//** -Logs a write of a string to a file page buffered in the buffer pool. -Writes the corresponding log record to the mini-transaction log. */ -UNIV_INTERN -void -mlog_log_string( -/*============*/ - byte* ptr, /*!< in: pointer written to */ - ulint len, /*!< in: string length */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - byte* log_ptr; - - ut_ad(ptr && mtr); - ut_ad(len <= UNIV_PAGE_SIZE); - - log_ptr = mlog_open(mtr, 30); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING, - log_ptr, mtr); - mach_write_to_2(log_ptr, page_offset(ptr)); - log_ptr += 2; - - mach_write_to_2(log_ptr, len); - log_ptr += 2; - - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, ptr, len); -} -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************//** -Parses a log record written by mlog_write_string. -@return parsed record end, NULL if not a complete record */ -UNIV_INTERN -byte* -mlog_parse_string( -/*==============*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - byte* page, /*!< in: page where to apply the log record, or NULL */ - void* page_zip)/*!< in/out: compressed page, or NULL */ -{ - ulint offset; - ulint len; - - ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); - - if (end_ptr < ptr + 4) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - len = mach_read_from_2(ptr); - ptr += 2; - - if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - if (end_ptr < ptr + len) { - - return(NULL); - } - - if (page) { - if (page_zip) { - memcpy(((page_zip_des_t*) page_zip)->data - + offset, ptr, len); - } - memcpy(page + offset, ptr, len); - } - - return(ptr + len); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************//** -Opens a buffer for mlog, writes the initial log record and, -if needed, the field lengths of an index. -@return buffer, NULL if log mode MTR_LOG_NONE */ -UNIV_INTERN -byte* -mlog_open_and_write_index( -/*======================*/ - mtr_t* mtr, /*!< in: mtr */ - const byte* rec, /*!< in: index record or page */ - const dict_index_t* index, /*!< in: record descriptor */ - byte type, /*!< in: log item type */ - ulint size) /*!< in: requested buffer size in bytes - (if 0, calls mlog_close() and - returns NULL) */ -{ - byte* log_ptr; - const byte* log_start; - const byte* log_end; - - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - - if (!page_rec_is_comp(rec)) { - log_start = log_ptr = mlog_open(mtr, 11 + size); - if (!log_ptr) { - return(NULL); /* logging is disabled */ - } - log_ptr = mlog_write_initial_log_record_fast(rec, type, - log_ptr, mtr); - log_end = log_ptr + 11 + size; - } else { - ulint i; - ulint n = dict_index_get_n_fields(index); - /* total size needed */ - ulint total = 11 + size + (n + 2) * 2; - ulint alloc = total; - /* allocate at most DYN_ARRAY_DATA_SIZE at a time */ - if (alloc > DYN_ARRAY_DATA_SIZE) { - alloc = DYN_ARRAY_DATA_SIZE; - } - log_start = log_ptr = mlog_open(mtr, alloc); - if (!log_ptr) { - return(NULL); /* logging is disabled */ - } - log_end = log_ptr + alloc; - log_ptr = mlog_write_initial_log_record_fast(rec, type, - log_ptr, mtr); - mach_write_to_2(log_ptr, n); - log_ptr += 2; - mach_write_to_2(log_ptr, - dict_index_get_n_unique_in_tree(index)); - log_ptr += 2; - for (i = 0; i < n; i++) { - dict_field_t* field; - const dict_col_t* col; - ulint len; - - field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(field); - len = field->fixed_len; - ut_ad(len < 0x7fff); - if (len == 0 - && (col->len > 255 || col->mtype == DATA_BLOB)) { - /* variable-length field - with maximum length > 255 */ - len = 0x7fff; - } - if (col->prtype & DATA_NOT_NULL) { - len |= 0x8000; - } - if (log_ptr + 2 > log_end) { - mlog_close(mtr, log_ptr); - ut_a(total > (ulint) (log_ptr - log_start)); - total -= log_ptr - log_start; - alloc = total; - if (alloc > DYN_ARRAY_DATA_SIZE) { - alloc = DYN_ARRAY_DATA_SIZE; - } - log_start = log_ptr = mlog_open(mtr, alloc); - if (!log_ptr) { - return(NULL); /* logging is disabled */ - } - log_end = log_ptr + alloc; - } - mach_write_to_2(log_ptr, len); - log_ptr += 2; - } - } - if (size == 0) { - mlog_close(mtr, log_ptr); - log_ptr = NULL; - } else if (log_ptr + size > log_end) { - mlog_close(mtr, log_ptr); - log_ptr = mlog_open(mtr, size); - } - return(log_ptr); -} -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************//** -Parses a log record written by mlog_open_and_write_index. -@return parsed record end, NULL if not a complete record */ -UNIV_INTERN -byte* -mlog_parse_index( -/*=============*/ - byte* ptr, /*!< in: buffer */ - const byte* end_ptr,/*!< in: buffer end */ - ibool comp, /*!< in: TRUE=compact row format */ - dict_index_t** index) /*!< out, own: dummy index */ -{ - ulint i, n, n_uniq; - dict_table_t* table; - dict_index_t* ind; - - ut_ad(comp == FALSE || comp == TRUE); - - if (comp) { - if (end_ptr < ptr + 4) { - return(NULL); - } - n = mach_read_from_2(ptr); - ptr += 2; - n_uniq = mach_read_from_2(ptr); - ptr += 2; - ut_ad(n_uniq <= n); - if (end_ptr < ptr + n * 2) { - return(NULL); - } - } else { - n = n_uniq = 1; - } - table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, - comp ? DICT_TF_COMPACT : 0, 0); - ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY", - DICT_HDR_SPACE, 0, n); - ind->table = table; - ind->n_uniq = (unsigned int) n_uniq; - if (n_uniq != n) { - ut_a(n_uniq + DATA_ROLL_PTR <= n); - ind->type = DICT_CLUSTERED; - } - if (comp) { - for (i = 0; i < n; i++) { - ulint len = mach_read_from_2(ptr); - ptr += 2; - /* The high-order bit of len is the NOT NULL flag; - the rest is 0 or 0x7fff for variable-length fields, - and 1..0x7ffe for fixed-length fields. */ - dict_mem_table_add_col( - table, NULL, NULL, - ((len + 1) & 0x7fff) <= 1 - ? DATA_BINARY : DATA_FIXBINARY, - len & 0x8000 ? DATA_NOT_NULL : 0, - len & 0x7fff); - - dict_index_add_col(ind, table, - dict_table_get_nth_col(table, i), - 0); - } - dict_table_add_system_columns(table, table->heap); - if (n_uniq != n) { - /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */ - ut_a(DATA_TRX_ID_LEN - == dict_index_get_nth_col(ind, DATA_TRX_ID - 1 - + n_uniq)->len); - ut_a(DATA_ROLL_PTR_LEN - == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1 - + n_uniq)->len); - ind->fields[DATA_TRX_ID - 1 + n_uniq].col - = &table->cols[n + DATA_TRX_ID]; - ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col - = &table->cols[n + DATA_ROLL_PTR]; - } - } - /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ - ind->cached = TRUE; - *index = ind; - return(ptr); -} diff --git a/storage/xtradb/mtr/mtr0mtr.cc b/storage/xtradb/mtr/mtr0mtr.cc deleted file mode 100644 index e564b270d00..00000000000 --- a/storage/xtradb/mtr/mtr0mtr.cc +++ /dev/null @@ -1,474 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file mtr/mtr0mtr.cc -Mini-transaction buffer - -Created 11/26/1995 Heikki Tuuri -*******************************************************/ - -#include "mtr0mtr.h" - -#ifdef UNIV_NONINL -#include "mtr0mtr.ic" -#endif - -#include "buf0buf.h" -#include "buf0flu.h" -#include "page0types.h" -#include "mtr0log.h" -#include "log0log.h" -#include "buf0flu.h" - -#ifndef UNIV_HOTBACKUP -# include "log0recv.h" - -/***************************************************//** -Checks if a mini-transaction is dirtying a clean page. -@return TRUE if the mtr is dirtying a clean page. */ -UNIV_INTERN -ibool -mtr_block_dirtied( -/*==============*/ - const buf_block_t* block) /*!< in: block being x-fixed */ -{ - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_ad(block->page.buf_fix_count > 0); - - /* It is OK to read oldest_modification because no - other thread can be performing a write of it and it - is only during write that the value is reset to 0. */ - return(block->page.oldest_modification == 0); -} - -/*****************************************************************//** -Releases the item in the slot given. */ -static MY_ATTRIBUTE((nonnull)) -void -mtr_memo_slot_release_func( -/*=======================*/ -#ifdef UNIV_DEBUG - mtr_t* mtr, /*!< in/out: mini-transaction */ -#endif /* UNIV_DEBUG */ - mtr_memo_slot_t* slot) /*!< in: memo slot */ -{ - void* object = slot->object; - slot->object = NULL; - - /* slot release is a local operation for the current mtr. - We must not be holding the flush_order mutex while - doing this. */ - ut_ad(!log_flush_order_mutex_own()); - - switch (slot->type) { - case MTR_MEMO_PAGE_S_FIX: - case MTR_MEMO_PAGE_X_FIX: - case MTR_MEMO_BUF_FIX: - buf_page_release((buf_block_t*) object, slot->type); - break; - case MTR_MEMO_S_LOCK: - rw_lock_s_unlock((prio_rw_lock_t*) object); - break; - case MTR_MEMO_X_LOCK: - rw_lock_x_unlock((prio_rw_lock_t*) object); - break; -#ifdef UNIV_DEBUG - default: - ut_ad(slot->type == MTR_MEMO_MODIFY); - ut_ad(mtr_memo_contains(mtr, object, MTR_MEMO_PAGE_X_FIX)); -#endif /* UNIV_DEBUG */ - } -} - -#ifdef UNIV_DEBUG -# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(mtr, slot) -#else /* UNIV_DEBUG */ -# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(slot) -#endif /* UNIV_DEBUG */ - -/**********************************************************//** -Releases the mlocks and other objects stored in an mtr memo. -They are released in the order opposite to which they were pushed -to the memo. */ -static MY_ATTRIBUTE((nonnull)) -void -mtr_memo_pop_all( -/*=============*/ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in - commit */ - - for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo); - block; - block = dyn_array_get_prev_block(&mtr->memo, block)) { - const mtr_memo_slot_t* start - = reinterpret_cast<mtr_memo_slot_t*>( - dyn_block_get_data(block)); - mtr_memo_slot_t* slot - = reinterpret_cast<mtr_memo_slot_t*>( - dyn_block_get_data(block) - + dyn_block_get_used(block)); - - ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t))); - - while (slot-- != start) { - if (slot->object != NULL) { - mtr_memo_slot_release(mtr, slot); - } - } - } -} - -/*****************************************************************//** -Releases the item in the slot given. */ -static -void -mtr_memo_slot_note_modification( -/*============================*/ - mtr_t* mtr, /*!< in: mtr */ - mtr_memo_slot_t* slot) /*!< in: memo slot */ -{ - ut_ad(mtr->modifications); - ut_ad(!srv_read_only_mode); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - - if (slot->object != NULL && slot->type == MTR_MEMO_PAGE_X_FIX) { - buf_block_t* block = (buf_block_t*) slot->object; - - ut_ad(!mtr->made_dirty || log_flush_order_mutex_own()); - buf_flush_note_modification(block, mtr); - } -} - -/**********************************************************//** -Add the modified pages to the buffer flush list. They are released -in the order opposite to which they were pushed to the memo. NOTE! It is -essential that the x-rw-lock on a modified buffer page is not released -before buf_page_note_modification is called for that page! Otherwise, -some thread might race to modify it, and the flush list sort order on -lsn would be destroyed. */ -static -void -mtr_memo_note_modifications( -/*========================*/ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(!srv_read_only_mode); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in - commit */ - - for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo); - block; - block = dyn_array_get_prev_block(&mtr->memo, block)) { - const mtr_memo_slot_t* start - = reinterpret_cast<mtr_memo_slot_t*>( - dyn_block_get_data(block)); - mtr_memo_slot_t* slot - = reinterpret_cast<mtr_memo_slot_t*>( - dyn_block_get_data(block) - + dyn_block_get_used(block)); - - ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t))); - - while (slot-- != start) { - if (slot->object != NULL) { - mtr_memo_slot_note_modification(mtr, slot); - } - } - } -} - -/************************************************************//** -Append the dirty pages to the flush list. */ -static -void -mtr_add_dirtied_pages_to_flush_list( -/*================================*/ - mtr_t* mtr) /*!< in/out: mtr */ -{ - ut_ad(!srv_read_only_mode); - - /* No need to acquire log_flush_order_mutex if this mtr has - not dirtied a clean page. log_flush_order_mutex is used to - ensure ordered insertions in the flush_list. We need to - insert in the flush_list iff the page in question was clean - before modifications. */ - if (mtr->made_dirty) { - log_flush_order_mutex_enter(); - } - - /* It is now safe to release the log mutex because the - flush_order mutex will ensure that we are the first one - to insert into the flush list. */ - log_release(); - - if (mtr->modifications) { - mtr_memo_note_modifications(mtr); - } - - if (mtr->made_dirty) { - log_flush_order_mutex_exit(); - } -} - -/************************************************************//** -Writes the contents of a mini-transaction log, if any, to the database log. */ -static -void -mtr_log_reserve_and_write( -/*======================*/ - mtr_t* mtr) /*!< in/out: mtr */ -{ - dyn_array_t* mlog; - ulint data_size; - byte* first_data; - - ut_ad(!srv_read_only_mode); - - mlog = &(mtr->log); - - first_data = dyn_block_get_data(mlog); - - if (mtr->n_log_recs > 1) { - mlog_catenate_ulint(mtr, MLOG_MULTI_REC_END, MLOG_1BYTE); - } else { - *first_data = (byte)((ulint)*first_data - | MLOG_SINGLE_REC_FLAG); - } - - if (mlog->heap == NULL) { - ulint len; - - len = mtr->log_mode != MTR_LOG_NO_REDO - ? dyn_block_get_used(mlog) : 0; - - mtr->end_lsn = log_reserve_and_write_fast( - first_data, len, &mtr->start_lsn); - - if (mtr->end_lsn) { - - /* Success. We have the log mutex. - Add pages to flush list and exit */ - mtr_add_dirtied_pages_to_flush_list(mtr); - - return; - } - } else { - mutex_enter(&log_sys->mutex); - } - - data_size = dyn_array_get_data_size(mlog); - - /* Open the database log for log_write_low */ - mtr->start_lsn = log_open(data_size); - - if (mtr->log_mode == MTR_LOG_ALL) { - - for (dyn_block_t* block = mlog; - block != 0; - block = dyn_array_get_next_block(mlog, block)) { - - log_write_low( - dyn_block_get_data(block), - dyn_block_get_used(block)); - } - - } else { - ut_ad(mtr->log_mode == MTR_LOG_NONE - || mtr->log_mode == MTR_LOG_NO_REDO); - /* Do nothing */ - } - - mtr->end_lsn = log_close(); - - mtr_add_dirtied_pages_to_flush_list(mtr); -} -#endif /* !UNIV_HOTBACKUP */ - -/***************************************************************//** -Commits a mini-transaction. */ -UNIV_INTERN -void -mtr_commit( -/*=======*/ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad(!mtr->inside_ibuf); - ut_d(mtr->state = MTR_COMMITTING); - -#ifndef UNIV_HOTBACKUP - /* This is a dirty read, for debugging. */ - ut_ad(!recv_no_log_write); - - if (mtr->modifications && mtr->n_log_recs) { - ut_ad(!srv_read_only_mode); - mtr_log_reserve_and_write(mtr); - } - - mtr_memo_pop_all(mtr); -#endif /* !UNIV_HOTBACKUP */ - - dyn_array_free(&(mtr->memo)); - dyn_array_free(&(mtr->log)); -#ifdef UNIV_DEBUG_VALGRIND - /* Declare everything uninitialized except - mtr->start_lsn, mtr->end_lsn and mtr->state. */ - { - lsn_t start_lsn = mtr->start_lsn; - lsn_t end_lsn = mtr->end_lsn; - UNIV_MEM_INVALID(mtr, sizeof *mtr); - mtr->start_lsn = start_lsn; - mtr->end_lsn = end_lsn; - } -#endif /* UNIV_DEBUG_VALGRIND */ - ut_d(mtr->state = MTR_COMMITTED); -} - -#ifndef UNIV_HOTBACKUP -/***************************************************//** -Releases an object in the memo stack. -@return true if released */ -UNIV_INTERN -bool -mtr_memo_release( -/*=============*/ - mtr_t* mtr, /*!< in/out: mini-transaction */ - void* object, /*!< in: object */ - ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */ -{ - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - /* We cannot release a page that has been written to in the - middle of a mini-transaction. */ - ut_ad(!mtr->modifications || type != MTR_MEMO_PAGE_X_FIX); - - for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo); - block; - block = dyn_array_get_prev_block(&mtr->memo, block)) { - const mtr_memo_slot_t* start - = reinterpret_cast<mtr_memo_slot_t*>( - dyn_block_get_data(block)); - mtr_memo_slot_t* slot - = reinterpret_cast<mtr_memo_slot_t*>( - dyn_block_get_data(block) - + dyn_block_get_used(block)); - - ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t))); - - while (slot-- != start) { - if (object == slot->object && type == slot->type) { - mtr_memo_slot_release(mtr, slot); - return(true); - } - } - } - - return(false); -} -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************//** -Reads 1 - 4 bytes from a file page buffered in the buffer pool. -@return value read */ -UNIV_INTERN -ulint -mtr_read_ulint( -/*===========*/ - const byte* ptr, /*!< in: pointer from where to read */ - ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - mtr_t* mtr MY_ATTRIBUTE((unused))) - /*!< in: mini-transaction handle */ -{ - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX)); - - return(mach_read_ulint(ptr, type)); -} - -#ifdef UNIV_DEBUG -# ifndef UNIV_HOTBACKUP -/**********************************************************//** -Checks if memo contains the given page. -@return TRUE if contains */ -UNIV_INTERN -ibool -mtr_memo_contains_page( -/*===================*/ - mtr_t* mtr, /*!< in: mtr */ - const byte* ptr, /*!< in: pointer to buffer frame */ - ulint type) /*!< in: type of object */ -{ - return(mtr_memo_contains(mtr, buf_block_align(ptr), type)); -} - -/*********************************************************//** -Prints info of an mtr handle. */ -UNIV_INTERN -void -mtr_print( -/*======*/ - mtr_t* mtr) /*!< in: mtr */ -{ - fprintf(stderr, - "Mini-transaction handle: memo size %lu bytes" - " log size %lu bytes\n", - (ulong) dyn_array_get_data_size(&(mtr->memo)), - (ulong) dyn_array_get_data_size(&(mtr->log))); -} -# endif /* !UNIV_HOTBACKUP */ -#endif /* UNIV_DEBUG */ - -/**********************************************************//** -Releases a buf_page stored in an mtr memo after a -savepoint. */ -UNIV_INTERN -void -mtr_release_buf_page_at_savepoint( -/*=============================*/ - mtr_t* mtr, /*!< in: mtr */ - ulint savepoint, /*!< in: savepoint */ - buf_block_t* block) /*!< in: block to release */ -{ - mtr_memo_slot_t* slot; - dyn_array_t* memo; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - memo = &(mtr->memo); - - ut_ad(dyn_array_get_data_size(memo) > savepoint); - - slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint); - - ut_ad(slot->object == block); - ut_ad(slot->type == MTR_MEMO_PAGE_S_FIX || - slot->type == MTR_MEMO_PAGE_X_FIX || - slot->type == MTR_MEMO_BUF_FIX); - - buf_page_release((buf_block_t*) slot->object, slot->type); - slot->object = NULL; -} diff --git a/storage/xtradb/mysql-test/storage_engine/alter_tablespace.opt b/storage/xtradb/mysql-test/storage_engine/alter_tablespace.opt deleted file mode 100644 index cf4b117e1b1..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/alter_tablespace.opt +++ /dev/null @@ -1,2 +0,0 @@ ---innodb-file-per-table=1 - diff --git a/storage/xtradb/mysql-test/storage_engine/autoinc_secondary.rdiff b/storage/xtradb/mysql-test/storage_engine/autoinc_secondary.rdiff deleted file mode 100644 index 00cda7c4435..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/autoinc_secondary.rdiff +++ /dev/null @@ -1,30 +0,0 @@ ---- suite/storage_engine/autoinc_secondary.result 2012-07-12 04:34:18.153885986 +0400 -+++ suite/storage_engine/autoinc_secondary.reject 2012-07-15 17:47:03.937703666 +0400 -@@ -13,18 +13,15 @@ - 5 a - DROP TABLE t1; - CREATE TABLE t1 (a <CHAR_COLUMN>, b <INT_COLUMN> AUTO_INCREMENT, PRIMARY KEY (a,b)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --INSERT INTO t1 (a) VALUES ('a'),('b'),('b'),('c'),('a'); --SELECT LAST_INSERT_ID(); --LAST_INSERT_ID() --1 --SELECT a,b FROM t1; --a b --a 1 --a 2 --b 1 --b 2 --c 1 --DROP TABLE t1; -+ERROR 42000: Incorrect table definition; there can be only one auto column and it must be defined as a key -+# ERROR: Statement ended with errno 1075, errname ER_WRONG_AUTO_KEY (expected to succeed) -+# ------------ UNEXPECTED RESULT ------------ -+# The statement|command finished with ER_WRONG_AUTO_KEY. -+# Multi-part keys or PK or AUTO_INCREMENT (on a secondary column) or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors. -+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def. -+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped. -+# Also, this problem may cause a chain effect (more errors of different kinds in the test). -+# ------------------------------------------- - CREATE TABLE t1 (a <CHAR_COLUMN>, b <INT_COLUMN> AUTO_INCREMENT, PRIMARY KEY (a,b), <CUSTOM_INDEX>(b)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - INSERT INTO t1 (a) VALUES ('a'),('b'),('b'),('c'),('a'); - SELECT LAST_INSERT_ID(); diff --git a/storage/xtradb/mysql-test/storage_engine/cache_index.rdiff b/storage/xtradb/mysql-test/storage_engine/cache_index.rdiff deleted file mode 100644 index e04df87aa34..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/cache_index.rdiff +++ /dev/null @@ -1,71 +0,0 @@ ---- suite/storage_engine/cache_index.result 2012-07-15 00:22:19.822493731 +0400 -+++ suite/storage_engine/cache_index.reject 2012-07-15 17:47:18.321522834 +0400 -@@ -12,31 +12,31 @@ - SET GLOBAL <CACHE_NAME>.key_buffer_size=128*1024; - CACHE INDEX t1 INDEX (a), t2 IN <CACHE_NAME>; - Table Op Msg_type Msg_text --test.t1 assign_to_keycache status OK --test.t2 assign_to_keycache status OK -+test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache -+test.t2 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache - LOAD INDEX INTO CACHE t1, t2; - Table Op Msg_type Msg_text --test.t1 preload_keys status OK --test.t2 preload_keys status OK -+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys -+test.t2 preload_keys note The storage engine for the table doesn't support preload_keys - INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d'); - SET GLOBAL <CACHE_NAME>.key_buffer_size=8*1024; - LOAD INDEX INTO CACHE t1, t2 IGNORE LEAVES; - Table Op Msg_type Msg_text --test.t1 preload_keys status OK --test.t2 preload_keys status OK -+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys -+test.t2 preload_keys note The storage engine for the table doesn't support preload_keys - SET GLOBAL <CACHE_NAME>.key_cache_age_threshold = 100, <CACHE_NAME>.key_cache_block_size = 512, <CACHE_NAME>.key_cache_division_limit = 1, <CACHE_NAME>.key_cache_segments=2; - INSERT INTO t1 (a,b) VALUES (5,'e'),(6,'f'); - LOAD INDEX INTO CACHE t1; - Table Op Msg_type Msg_text --test.t1 preload_keys status OK -+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys - SET GLOBAL new_<CACHE_NAME>.key_buffer_size=128*1024; - CACHE INDEX t1 IN new_<CACHE_NAME>; - Table Op Msg_type Msg_text --test.t1 assign_to_keycache status OK -+test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache - INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h'); - LOAD INDEX INTO CACHE t1 IGNORE LEAVES; - Table Op Msg_type Msg_text --test.t1 preload_keys status OK -+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys - INSERT INTO t1 (a,b) VALUES (9,'i'); - DROP TABLE t2; - DROP TABLE t1; -@@ -47,11 +47,11 @@ - ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - CACHE INDEX t1 IN <CACHE_NAME>; - Table Op Msg_type Msg_text --test.t1 assign_to_keycache status OK -+test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache - INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'); - LOAD INDEX INTO CACHE t1; - Table Op Msg_type Msg_text --test.t1 preload_keys status OK -+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys - DROP TABLE t1; - CREATE TABLE t1 (a <INT_COLUMN>, - b <CHAR_COLUMN>, -@@ -59,11 +59,11 @@ - ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - CACHE INDEX t1 IN <CACHE_NAME>; - Table Op Msg_type Msg_text --test.t1 assign_to_keycache status OK -+test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache - INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'); - LOAD INDEX INTO CACHE t1; - Table Op Msg_type Msg_text --test.t1 preload_keys status OK -+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys - DROP TABLE t1; - SET GLOBAL <CACHE_NAME>.key_buffer_size=0; - SET GLOBAL new_<CACHE_NAME>.key_buffer_size=0; diff --git a/storage/xtradb/mysql-test/storage_engine/checksum_table_live.rdiff b/storage/xtradb/mysql-test/storage_engine/checksum_table_live.rdiff deleted file mode 100644 index 71c782848a6..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/checksum_table_live.rdiff +++ /dev/null @@ -1,13 +0,0 @@ ---- suite/storage_engine/checksum_table_live.result 2012-07-12 21:05:44.497062968 +0400 -+++ suite/storage_engine/checksum_table_live.reject 2012-07-15 17:47:28.105399836 +0400 -@@ -11,8 +11,8 @@ - test.t1 4272806499 - CHECKSUM TABLE t1, t2 QUICK; - Table Checksum --test.t1 4272806499 --test.t2 0 -+test.t1 NULL -+test.t2 NULL - CHECKSUM TABLE t1, t2 EXTENDED; - Table Checksum - test.t1 4272806499 diff --git a/storage/xtradb/mysql-test/storage_engine/col_opt_not_null.opt b/storage/xtradb/mysql-test/storage_engine/col_opt_not_null.opt deleted file mode 100644 index 40445305fc6..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/col_opt_not_null.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb_log_file_size=100M diff --git a/storage/xtradb/mysql-test/storage_engine/col_opt_null.opt b/storage/xtradb/mysql-test/storage_engine/col_opt_null.opt deleted file mode 100644 index 40445305fc6..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/col_opt_null.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb_log_file_size=100M diff --git a/storage/xtradb/mysql-test/storage_engine/define_engine.inc b/storage/xtradb/mysql-test/storage_engine/define_engine.inc deleted file mode 100644 index 77e384d2351..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/define_engine.inc +++ /dev/null @@ -1,49 +0,0 @@ -########################################### -# -# This is a template of the include file define_engine.inc which -# should be placed in storage/<engine>/mysql-test/storage_engine folder. -# -################################ -# -# The name of the engine under test must be defined in $ENGINE variable. -# You can set it either here (uncomment and edit) or in your environment. -# -let $ENGINE = InnoDB; -# -################################ -# -# The following three variables define specific options for columns and tables. -# Normally there should be none needed, but for some engines it can be different. -# If the engine requires specific column option for all or indexed columns, -# set them inside the comment, e.g. /*!NOT NULL*/. -# Do the same for table options if needed, e.g. /*!INSERT_METHOD=LAST*/ - -let $default_col_opts = /*!*/; -let $default_col_indexed_opts = /*!*/; -let $default_tbl_opts = /*!*/; - -# INDEX, UNIQUE INDEX, PRIMARY KEY, special index type - choose the fist that the engine allows, -# or set it to /*!*/ if none is supported - -let $default_index = /*!INDEX*/; - -# If the engine does not support the following types, replace them with the closest possible - -let $default_int_type = INT(11); -let $default_char_type = CHAR(8); - -################################ - ---disable_query_log ---disable_result_log - -# Here you can place your custom MTR code which needs to be executed before each test, -# e.g. creation of an additional schema or table, etc. -# The cleanup part should be defined in cleanup_engine.inc - -CALL mtr.add_suppression("InnoDB: Resizing redo log from .* to .* pages, LSN=.*"); -CALL mtr.add_suppression("InnoDB: Starting to delete and rewrite log files."); -CALL mtr.add_suppression("InnoDB: New log files created, LSN=.*"); - ---enable_query_log ---enable_result_log diff --git a/storage/xtradb/mysql-test/storage_engine/disabled.def b/storage/xtradb/mysql-test/storage_engine/disabled.def deleted file mode 100644 index 2f3793047f4..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/disabled.def +++ /dev/null @@ -1,8 +0,0 @@ -autoinc_vars : MySQL:65225 (InnoDB miscalculates auto-increment) -tbl_opt_ai : MySQL:65901 (AUTO_INCREMENT option on InnoDB table is ignored if added before autoinc column) -delete_low_prio : InnoDB does not use table-level locking -insert_high_prio : InnoDB does not use table-level locking -insert_low_prio : InnoDB does not use table-level locking -select_high_prio : InnoDB does not use table-level locking -update_low_prio : InnoDB does not use table-level locking - diff --git a/storage/xtradb/mysql-test/storage_engine/fulltext_search.rdiff b/storage/xtradb/mysql-test/storage_engine/fulltext_search.rdiff deleted file mode 100644 index a68fe83070e..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/fulltext_search.rdiff +++ /dev/null @@ -1,49 +0,0 @@ ---- suite/storage_engine/fulltext_search.result 2013-11-27 18:50:16.000000000 +0400 -+++ suite/storage_engine/fulltext_search.reject 2014-02-05 15:33:26.000000000 +0400 -@@ -52,15 +52,14 @@ - INSERT INTO t1 (v0,v1,v2) VALUES ('text4','Contributing more...','...is a good idea'),('text5','test','test'); - SELECT v0, MATCH(v1) AGAINST('contributing') AS rating FROM t1 WHERE MATCH(v1) AGAINST ('contributing'); - v0 rating --text4 1.3705332279205322 -+text4 0.4885590672492981 - SELECT v0 FROM t1 WHERE MATCH(v1,v2) AGAINST ('-test1 +critical +Cook*' IN BOOLEAN MODE); --v0 --text1 -+ERROR HY000: Can't find FULLTEXT index matching the column list - SELECT v0 FROM t1 WHERE MATCH(v1,v2) AGAINST ('-patch +critical +Cook*' IN BOOLEAN MODE); --v0 -+ERROR HY000: Can't find FULLTEXT index matching the column list - SELECT v0, MATCH(v1) AGAINST('database' WITH QUERY EXPANSION) AS rating FROM t1 WHERE MATCH(v1) AGAINST ('database' WITH QUERY EXPANSION); - v0 rating --text1 178.11756896972656 -+text1 151.4530487060547 - DROP TABLE t1; - CREATE TABLE t1 (v0 VARCHAR(64) <CUSTOM_COL_OPTIONS>, - v1 VARCHAR(16384) <CUSTOM_COL_OPTIONS>, -@@ -112,14 +111,15 @@ - ), ('text2','test1','test2'); - SELECT v0 FROM t1 WHERE MATCH(v1,v2) AGAINST ('contributing' IN NATURAL LANGUAGE MODE); - v0 -+text1 - INSERT INTO t1 (v0,v1,v2) VALUES ('text3','test','test'); - SELECT v0, MATCH(v1,v2) AGAINST('contributing' IN NATURAL LANGUAGE MODE) AS rating FROM t1 WHERE MATCH(v1,v2) AGAINST ('contributing' IN NATURAL LANGUAGE MODE); - v0 rating --text1 0.2809644043445587 -+text1 0.45528939366340637 - INSERT INTO t1 (v0,v1,v2) VALUES ('text4','Contributing more...','...is a good idea'),('text5','test','test'); - SELECT v0, MATCH(v1) AGAINST('contributing') AS rating FROM t1 WHERE MATCH(v1) AGAINST ('contributing'); - v0 rating --text4 1.3705332279205322 -+text4 0.4885590672492981 - SELECT v0 FROM t1 WHERE MATCH(v1,v2) AGAINST ('-test1 +critical +Cook*' IN BOOLEAN MODE); - v0 - text1 -@@ -127,6 +127,6 @@ - v0 - SELECT v0, MATCH(v1,v2) AGAINST('database' WITH QUERY EXPANSION) AS rating FROM t1 WHERE MATCH(v1,v2) AGAINST ('database' WITH QUERY EXPANSION); - v0 rating --text1 190.56150817871094 --text4 1.1758291721343994 -+text1 229.60874938964844 -+text4 0.31671249866485596 - DROP TABLE t1; diff --git a/storage/xtradb/mysql-test/storage_engine/index_enable_disable.rdiff b/storage/xtradb/mysql-test/storage_engine/index_enable_disable.rdiff deleted file mode 100644 index f8e812e7edb..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/index_enable_disable.rdiff +++ /dev/null @@ -1,33 +0,0 @@ ---- suite/storage_engine/index_enable_disable.result 2012-07-15 00:30:05.296641931 +0400 -+++ suite/storage_engine/index_enable_disable.reject 2012-07-15 17:49:12.988081281 +0400 -@@ -11,15 +11,19 @@ - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment - t1 1 a 1 a # # NULL NULL YES BTREE - ALTER TABLE t1 DISABLE KEYS; -+Warnings: -+Note 1031 Storage engine <STORAGE_ENGINE> of the table `test`.`t1` doesn't have this option - SHOW INDEX IN t1; - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment --t1 1 a 1 a # # NULL NULL YES BTREE disabled -+t1 1 a 1 a # # NULL NULL YES BTREE - EXPLAIN SELECT a FROM t1 ORDER BY a; - id select_type table type possible_keys key key_len ref rows Extra --1 SIMPLE t1 ALL NULL NULL NULL NULL 19 Using filesort -+1 SIMPLE t1 index NULL a 5 NULL 19 Using index - INSERT INTO t1 (a) VALUES - (11),(12),(13),(14),(15),(16),(17),(18),(19),(20); - ALTER TABLE t1 ENABLE KEYS; -+Warnings: -+Note 1031 Storage engine <STORAGE_ENGINE> of the table `test`.`t1` doesn't have this option - SHOW INDEX IN t1; - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment - t1 1 a 1 a # # NULL NULL YES BTREE -@@ -32,6 +36,8 @@ - (1),(2),(3),(4),(5),(6),(7),(8),(9), - (21),(22),(23),(24),(25),(26),(27),(28),(29); - ALTER TABLE t1 DISABLE KEYS; -+Warnings: -+Note 1031 Storage engine <STORAGE_ENGINE> of the table `test`.`t1` doesn't have this option - INSERT INTO t1 (a) VALUES (29); - ERROR 23000: Duplicate entry '29' for key 'a' - # Statement ended with one of expected results (ER_DUP_ENTRY,ER_DUP_KEY). diff --git a/storage/xtradb/mysql-test/storage_engine/index_type_hash.rdiff b/storage/xtradb/mysql-test/storage_engine/index_type_hash.rdiff deleted file mode 100644 index 02f9d93588f..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/index_type_hash.rdiff +++ /dev/null @@ -1,60 +0,0 @@ ---- suite/storage_engine/index_type_hash.result 2012-07-15 01:10:17.919128889 +0400 -+++ suite/storage_engine/index_type_hash.reject 2012-07-15 17:49:26.135915989 +0400 -@@ -4,7 +4,7 @@ - ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - SHOW KEYS IN t1; - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment --t1 1 a 1 a # # NULL NULL # HASH -+t1 1 a 1 a # # NULL NULL # BTREE - DROP TABLE t1; - CREATE TABLE t1 (a <INT_COLUMN>, - b <CHAR_COLUMN>, -@@ -12,8 +12,8 @@ - ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - SHOW KEYS IN t1; - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment --t1 1 a_b 1 a # # NULL NULL # HASH a_b index --t1 1 a_b 2 b # # NULL NULL # HASH a_b index -+t1 1 a_b 1 a # # NULL NULL # BTREE a_b index -+t1 1 a_b 2 b # # NULL NULL # BTREE a_b index - DROP TABLE t1; - CREATE TABLE t1 (a <INT_COLUMN>, - b <CHAR_COLUMN>, -@@ -22,8 +22,8 @@ - ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - SHOW KEYS IN t1; - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment --t1 1 a 1 a # # NULL NULL # HASH --t1 1 b 1 b # # NULL NULL # HASH -+t1 1 a 1 a # # NULL NULL # BTREE -+t1 1 b 1 b # # NULL NULL # BTREE - DROP TABLE t1; - CREATE TABLE t1 (a <INT_COLUMN>, - b <CHAR_COLUMN>, -@@ -31,7 +31,7 @@ - ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - SHOW KEYS IN t1; - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment --t1 0 a 1 a # # NULL NULL # HASH -+t1 0 a 1 a # # NULL NULL # BTREE - INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'); - INSERT INTO t1 (a,b) VALUES (1,'c'); - ERROR 23000: Duplicate entry '1' for key 'a' -@@ -43,7 +43,7 @@ - ALTER TABLE t1 ADD <CUSTOM_INDEX> (a) USING HASH COMMENT 'simple index on a'; - SHOW INDEX FROM t1; - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment --t1 1 a 1 a # # NULL NULL # HASH simple index on a -+t1 1 a 1 a # # NULL NULL # BTREE simple index on a - ALTER TABLE t1 DROP KEY a; - DROP TABLE t1; - CREATE TABLE t1 (a <INT_COLUMN>, -@@ -52,7 +52,7 @@ - ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - SHOW KEYS IN t1; - Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment --t1 0 a 1 a # # NULL NULL # HASH -+t1 0 a 1 a # # NULL NULL # BTREE - INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b'); - INSERT INTO t1 (a,b) VALUES (1,'c'); - ERROR 23000: Duplicate entry '1' for key 'a' diff --git a/storage/xtradb/mysql-test/storage_engine/insert_delayed.rdiff b/storage/xtradb/mysql-test/storage_engine/insert_delayed.rdiff deleted file mode 100644 index 9e6cddf03f0..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/insert_delayed.rdiff +++ /dev/null @@ -1,26 +0,0 @@ ---- suite/storage_engine/insert_delayed.result 2013-01-23 01:23:49.461254916 +0400 -+++ suite/storage_engine/insert_delayed.reject 2013-01-23 01:47:05.975698364 +0400 -@@ -5,7 +5,16 @@ - connect con0,localhost,root,,; - SET lock_wait_timeout = 1; - INSERT DELAYED INTO t1 (a,b) VALUES (3,'c'); -+ERROR HY000: DELAYED option not supported for table 't1' -+# ------------ UNEXPECTED RESULT ------------ -+# The statement|command finished with ER_DELAYED_NOT_SUPPORTED. -+# INSERT DELAYED or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors. -+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def. -+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped. -+# Also, this problem may cause a chain effect (more errors of different kinds in the test). -+# ------------------------------------------- - INSERT DELAYED INTO t1 SET a=4, b='d'; -+ERROR HY000: DELAYED option not supported for table 't1' - INSERT DELAYED INTO t1 (a,b) SELECT 5, 'e'; - ERROR HY000: Lock wait timeout exceeded; try restarting transaction - disconnect con0; -@@ -20,6 +29,4 @@ - a b - 1 f - 2 b --3 c --4 d - DROP TABLE t1; diff --git a/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff b/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff deleted file mode 100644 index 6b7a52046e2..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff +++ /dev/null @@ -1,25 +0,0 @@ ---- suite/storage_engine/lock_concurrent.result 2012-06-24 23:55:19.539380000 +0400 -+++ suite/storage_engine/lock_concurrent.reject 2012-07-15 17:50:21.279222746 +0400 -@@ -4,6 +4,14 @@ - connect con1,localhost,root,,; - SET lock_wait_timeout = 1; - LOCK TABLES t1 READ LOCAL; -+ERROR HY000: Lock wait timeout exceeded; try restarting transaction -+# ------------ UNEXPECTED RESULT ------------ -+# The statement|command finished with ER_LOCK_WAIT_TIMEOUT. -+# LOCK .. WRITE CONCURRENT or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors. -+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def. -+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped. -+# Also, this problem may cause a chain effect (more errors of different kinds in the test). -+# ------------------------------------------- - UNLOCK TABLES; - connection default; - UNLOCK TABLES; -@@ -11,6 +19,7 @@ - LOCK TABLES t1 READ LOCAL; - connection default; - LOCK TABLES t1 WRITE CONCURRENT, t1 AS t2 READ; -+ERROR HY000: Lock wait timeout exceeded; try restarting transaction - UNLOCK TABLES; - connection con1; - UNLOCK TABLES; diff --git a/storage/xtradb/mysql-test/storage_engine/optimize_table.rdiff b/storage/xtradb/mysql-test/storage_engine/optimize_table.rdiff deleted file mode 100644 index 54d1f600516..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/optimize_table.rdiff +++ /dev/null @@ -1,37 +0,0 @@ ---- suite/storage_engine/optimize_table.result 2012-07-12 19:13:53.741428591 +0400 -+++ suite/storage_engine/optimize_table.reject 2012-07-15 17:50:30.843102510 +0400 -@@ -5,25 +5,32 @@ - INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d'); - OPTIMIZE TABLE t1; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead - test.t1 optimize status OK - INSERT INTO t2 (a,b) VALUES (4,'d'); - OPTIMIZE NO_WRITE_TO_BINLOG TABLE t2; - Table Op Msg_type Msg_text -+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead - test.t2 optimize status OK - INSERT INTO t2 (a,b) VALUES (5,'e'); - INSERT INTO t1 (a,b) VALUES (6,'f'); - OPTIMIZE LOCAL TABLE t1, t2; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead - test.t1 optimize status OK -+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead - test.t2 optimize status OK - OPTIMIZE TABLE t1, t2; - Table Op Msg_type Msg_text --test.t1 optimize status Table is already up to date --test.t2 optimize status Table is already up to date -+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead -+test.t1 optimize status OK -+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead -+test.t2 optimize status OK - DROP TABLE t1, t2; - CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - INSERT INTO t1 (a,b) VALUES (1,'a'),(100,'b'),(2,'c'),(3,'d'); - OPTIMIZE TABLE t1; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead - test.t1 optimize status OK - DROP TABLE t1; diff --git a/storage/xtradb/mysql-test/storage_engine/parts/checksum_table.rdiff b/storage/xtradb/mysql-test/storage_engine/parts/checksum_table.rdiff deleted file mode 100644 index c8aabb787e9..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/parts/checksum_table.rdiff +++ /dev/null @@ -1,13 +0,0 @@ ---- suite/storage_engine/parts/checksum_table.result 2013-11-08 22:30:34.000000000 +0400 -+++ suite/storage_engine/parts/checksum_table.reject 2013-11-08 22:32:30.000000000 +0400 -@@ -31,8 +31,8 @@ - test.t1 4272806499 - CHECKSUM TABLE t1, t2 QUICK; - Table Checksum --test.t1 4272806499 --test.t2 0 -+test.t1 NULL -+test.t2 NULL - CHECKSUM TABLE t1, t2 EXTENDED; - Table Checksum - test.t1 4272806499 diff --git a/storage/xtradb/mysql-test/storage_engine/parts/create_table.rdiff b/storage/xtradb/mysql-test/storage_engine/parts/create_table.rdiff deleted file mode 100644 index 0df91c6fc6e..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/parts/create_table.rdiff +++ /dev/null @@ -1,20 +0,0 @@ ---- suite/storage_engine/parts/create_table.result 2012-07-12 21:56:38.618667460 +0400 -+++ suite/storage_engine/parts/create_table.reject 2012-07-15 20:06:43.496358345 +0400 -@@ -65,7 +65,7 @@ - 1 SIMPLE t1 abc,def # # # # # # # - EXPLAIN PARTITIONS SELECT a FROM t1 WHERE a = 100; - id select_type table partitions type possible_keys key key_len ref rows Extra --1 SIMPLE NULL NULL # # # # # # # -+1 SIMPLE t1 def # # # # # # # - INSERT INTO t1 (a) VALUES (50); - ERROR HY000: Table has no partition for value 50 - DROP TABLE t1; -@@ -81,7 +81,7 @@ - 1 SIMPLE t1 abc_abcsp0,def_defsp0 # # # # # # # - EXPLAIN PARTITIONS SELECT a FROM t1 WHERE a = 100; - id select_type table partitions type possible_keys key key_len ref rows Extra --1 SIMPLE NULL NULL # # # # # # # -+1 SIMPLE t1 def_defsp0 # # # # # # # - SELECT TABLE_SCHEMA, TABLE_NAME, PARTITION_NAME, SUBPARTITION_NAME, PARTITION_METHOD, SUBPARTITION_METHOD - FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_NAME = 't1'; - TABLE_SCHEMA TABLE_NAME PARTITION_NAME SUBPARTITION_NAME PARTITION_METHOD SUBPARTITION_METHOD diff --git a/storage/xtradb/mysql-test/storage_engine/parts/disabled.def b/storage/xtradb/mysql-test/storage_engine/parts/disabled.def deleted file mode 100644 index 796bdfc751b..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/parts/disabled.def +++ /dev/null @@ -1 +0,0 @@ -repair_table : InnoDB of 5.6.10 does not support repair on partitioned tables (fixed by 5.6.14) diff --git a/storage/xtradb/mysql-test/storage_engine/parts/optimize_table.rdiff b/storage/xtradb/mysql-test/storage_engine/parts/optimize_table.rdiff deleted file mode 100644 index a35ba5167d9..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/parts/optimize_table.rdiff +++ /dev/null @@ -1,58 +0,0 @@ ---- suite/storage_engine/parts/optimize_table.result 2013-07-18 22:55:38.000000000 +0400 -+++ suite/storage_engine/parts/optimize_table.reject 2013-08-05 19:45:19.000000000 +0400 -@@ -9,18 +9,22 @@ - INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d'); - ALTER TABLE t1 OPTIMIZE PARTITION p1; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize on partitions. All partitions will be rebuilt and analyzed. - test.t1 optimize status OK - INSERT INTO t2 (a,b) VALUES (4,'d'); - ALTER TABLE t2 OPTIMIZE PARTITION p0 NO_WRITE_TO_BINLOG; - Table Op Msg_type Msg_text -+test.t2 optimize note Table does not support optimize on partitions. All partitions will be rebuilt and analyzed. - test.t2 optimize status OK - INSERT INTO t1 (a,b) VALUES (6,'f'); - ALTER TABLE t1 OPTIMIZE PARTITION ALL LOCAL; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize on partitions. All partitions will be rebuilt and analyzed. - test.t1 optimize status OK - INSERT INTO t2 (a,b) VALUES (5,'e'); - ALTER TABLE t2 OPTIMIZE PARTITION p1,p0; - Table Op Msg_type Msg_text -+test.t2 optimize note Table does not support optimize on partitions. All partitions will be rebuilt and analyzed. - test.t2 optimize status OK - DROP TABLE t1, t2; - DROP TABLE IF EXISTS t1,t2; -@@ -30,25 +34,32 @@ - INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d'); - OPTIMIZE TABLE t1; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead - test.t1 optimize status OK - INSERT INTO t2 (a,b) VALUES (4,'d'); - OPTIMIZE NO_WRITE_TO_BINLOG TABLE t2; - Table Op Msg_type Msg_text -+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead - test.t2 optimize status OK - INSERT INTO t2 (a,b) VALUES (5,'e'); - INSERT INTO t1 (a,b) VALUES (6,'f'); - OPTIMIZE LOCAL TABLE t1, t2; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead - test.t1 optimize status OK -+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead - test.t2 optimize status OK - OPTIMIZE TABLE t1, t2; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead - test.t1 optimize status OK -+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead - test.t2 optimize status OK - DROP TABLE t1, t2; - CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS> PARTITION BY HASH(a) PARTITIONS 2; - INSERT INTO t1 (a,b) VALUES (1,'a'),(100,'b'),(2,'c'),(3,'d'); - OPTIMIZE TABLE t1; - Table Op Msg_type Msg_text -+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead - test.t1 optimize status OK - DROP TABLE t1; diff --git a/storage/xtradb/mysql-test/storage_engine/parts/repair_table.rdiff b/storage/xtradb/mysql-test/storage_engine/parts/repair_table.rdiff deleted file mode 100644 index 35b150e82d1..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/parts/repair_table.rdiff +++ /dev/null @@ -1,158 +0,0 @@ ---- suite/storage_engine/parts/repair_table.result 2013-07-18 22:55:38.000000000 +0400 -+++ suite/storage_engine/parts/repair_table.reject 2013-08-05 19:54:09.000000000 +0400 -@@ -25,7 +25,7 @@ - INSERT INTO t1 (a,b) VALUES (10,'j'); - ALTER TABLE t1 REPAIR PARTITION p1 QUICK USE_FRM; - Table Op Msg_type Msg_text --test.t1 repair status OK -+test.t1 repair note The storage engine for the table doesn't support repair - INSERT INTO t2 (a,b) VALUES (12,'l'); - ALTER TABLE t2 REPAIR PARTITION NO_WRITE_TO_BINLOG ALL QUICK EXTENDED USE_FRM; - Table Op Msg_type Msg_text -@@ -58,8 +58,8 @@ - INSERT INTO t2 (a,b) VALUES (11,'k'); - REPAIR TABLE t1, t2 QUICK USE_FRM; - Table Op Msg_type Msg_text --test.t1 repair status OK --test.t2 repair status OK -+test.t1 repair note The storage engine for the table doesn't support repair -+test.t2 repair note The storage engine for the table doesn't support repair - INSERT INTO t1 (a,b) VALUES (12,'l'); - INSERT INTO t2 (a,b) VALUES (13,'m'); - REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2 QUICK EXTENDED USE_FRM; -@@ -101,119 +101,13 @@ - INSERT INTO t1 (a,b) VALUES (10,'j'); - REPAIR TABLE t1 USE_FRM; - Table Op Msg_type Msg_text --test.t1 repair status OK --t1#P#p0.MYD --t1#P#p0.MYI --t1#P#p1.MYD --t1#P#p1.MYI -+test.t1 repair note The storage engine for the table doesn't support repair - t1.frm - t1.par - INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o'); - # Statement ended with one of expected results (0,144). - # If you got a difference in error message, just add it to rdiff file - FLUSH TABLE t1; --Restoring <DATADIR>/test/t1#P#p0.MYD --CHECK TABLE t1; --Table Op Msg_type Msg_text --test.t1 check error Size of datafile is: 26 Should be: 39 --test.t1 check error Partition p0 returned error --test.t1 check error Corrupt --SELECT a,b FROM t1; --a b --8 h --10 j --7 g --15 o --Warnings: --Error 145 Table './test/t1#P#p0' is marked as crashed and should be repaired --Error 1194 Table 't1' is marked as crashed and should be repaired --Error 1034 Number of rows changed from 3 to 2 --# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144). --# If you got a difference in error message, just add it to rdiff file --INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o'); --# Statement ended with one of expected results (0,144). --# If you got a difference in error message, just add it to rdiff file --FLUSH TABLE t1; --Restoring <DATADIR>/test/t1#P#p0.MYI --CHECK TABLE t1; --Table Op Msg_type Msg_text --test.t1 check warning Size of datafile is: 39 Should be: 26 --test.t1 check error Record-count is not ok; is 3 Should be: 2 --test.t1 check warning Found 3 key parts. Should be: 2 --test.t1 check error Partition p0 returned error --test.t1 check error Corrupt --SELECT a,b FROM t1; --a b --8 h --10 j --14 n --7 g --15 o --15 o --Warnings: --Error 145 Table './test/t1#P#p0' is marked as crashed and should be repaired --Error 1194 Table 't1' is marked as crashed and should be repaired --Error 1034 Number of rows changed from 2 to 3 --# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144). --# If you got a difference in error message, just add it to rdiff file --INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o'); --# Statement ended with one of expected results (0,144). --# If you got a difference in error message, just add it to rdiff file --FLUSH TABLE t1; --Restoring <DATADIR>/test/t1#P#p1.MYD --CHECK TABLE t1; --Table Op Msg_type Msg_text --test.t1 check error Size of datafile is: 39 Should be: 52 --test.t1 check error Partition p1 returned error --test.t1 check error Corrupt --SELECT a,b FROM t1; --a b --8 h --10 j --14 n --14 n --7 g --15 o --15 o --Warnings: --Error 145 Table './test/t1#P#p1' is marked as crashed and should be repaired --Error 1194 Table 't1' is marked as crashed and should be repaired --Error 1034 Number of rows changed from 4 to 3 --# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144). --# If you got a difference in error message, just add it to rdiff file --INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o'); --# Statement ended with one of expected results (0,144). --# If you got a difference in error message, just add it to rdiff file --FLUSH TABLE t1; --Restoring <DATADIR>/test/t1#P#p1.MYI --CHECK TABLE t1; --Table Op Msg_type Msg_text --test.t1 check warning Size of datafile is: 52 Should be: 39 --test.t1 check error Record-count is not ok; is 4 Should be: 3 --test.t1 check warning Found 4 key parts. Should be: 3 --test.t1 check error Partition p1 returned error --test.t1 check error Corrupt --SELECT a,b FROM t1; --a b --8 h --10 j --14 n --14 n --14 n --7 g --15 o --15 o --15 o --Warnings: --Error 145 Table './test/t1#P#p1' is marked as crashed and should be repaired --Error 1194 Table 't1' is marked as crashed and should be repaired --Error 1034 Number of rows changed from 3 to 4 --# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144). --# If you got a difference in error message, just add it to rdiff file --INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o'); --# Statement ended with one of expected results (0,144). --# If you got a difference in error message, just add it to rdiff file --FLUSH TABLE t1; - Restoring <DATADIR>/test/t1.par - CHECK TABLE t1; - Table Op Msg_type Msg_text -@@ -223,14 +117,8 @@ - 8 h - 10 j - 14 n --14 n --14 n --14 n - 7 g - 15 o --15 o --15 o --15 o - # Statement ended with one of expected results (0,ER_NOT_KEYFILE,144). - # If you got a difference in error message, just add it to rdiff file - DROP TABLE t1; diff --git a/storage/xtradb/mysql-test/storage_engine/parts/suite.opt b/storage/xtradb/mysql-test/storage_engine/parts/suite.opt deleted file mode 100644 index 66f581b56d0..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/parts/suite.opt +++ /dev/null @@ -1,2 +0,0 @@ ---innodb - diff --git a/storage/xtradb/mysql-test/storage_engine/repair_table.rdiff b/storage/xtradb/mysql-test/storage_engine/repair_table.rdiff deleted file mode 100644 index be3709c5833..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/repair_table.rdiff +++ /dev/null @@ -1,138 +0,0 @@ ---- suite/storage_engine/repair_table.result 2013-10-03 20:35:06.000000000 +0400 -+++ suite/storage_engine/repair_table.reject 2013-11-08 22:04:22.000000000 +0400 -@@ -4,56 +4,57 @@ - CREATE TABLE t2 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - REPAIR TABLE t1; - Table Op Msg_type Msg_text --test.t1 repair status OK -+test.t1 repair note The storage engine for the table doesn't support repair - INSERT INTO t1 (a,b) VALUES (3,'c'); - INSERT INTO t2 (a,b) VALUES (4,'d'); - REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2; - Table Op Msg_type Msg_text --test.t1 repair status OK --test.t2 repair status OK -+test.t1 repair note The storage engine for the table doesn't support repair -+test.t2 repair note The storage engine for the table doesn't support repair - INSERT INTO t2 (a,b) VALUES (5,'e'),(6,'f'); - REPAIR LOCAL TABLE t2; - Table Op Msg_type Msg_text --test.t2 repair status OK -+test.t2 repair note The storage engine for the table doesn't support repair - INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h'); - INSERT INTO t2 (a,b) VALUES (9,'i'); - REPAIR LOCAL TABLE t2, t1 EXTENDED; - Table Op Msg_type Msg_text --test.t2 repair status OK --test.t1 repair status OK -+test.t2 repair note The storage engine for the table doesn't support repair -+test.t1 repair note The storage engine for the table doesn't support repair - INSERT INTO t1 (a,b) VALUES (10,'j'); - INSERT INTO t2 (a,b) VALUES (11,'k'); - REPAIR TABLE t1, t2 QUICK USE_FRM; - Table Op Msg_type Msg_text --test.t1 repair warning Number of rows changed from 0 to 6 --test.t1 repair status OK --test.t2 repair warning Number of rows changed from 0 to 5 --test.t2 repair status OK -+test.t1 repair note The storage engine for the table doesn't support repair -+test.t2 repair note The storage engine for the table doesn't support repair - INSERT INTO t1 (a,b) VALUES (12,'l'); - INSERT INTO t2 (a,b) VALUES (13,'m'); - REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2 QUICK EXTENDED USE_FRM; - Table Op Msg_type Msg_text --test.t1 repair warning Number of rows changed from 0 to 7 --test.t1 repair status OK --test.t2 repair warning Number of rows changed from 0 to 6 --test.t2 repair status OK -+test.t1 repair note The storage engine for the table doesn't support repair -+test.t2 repair note The storage engine for the table doesn't support repair - FLUSH TABLE t1; - INSERT INTO t1 (a,b) VALUES (14,'n'); --ERROR HY000: Incorrect file format 't1' - # Statement ended with one of expected results (0,130,ER_FAILED_READ_FROM_PAR_FILE,ER_OPEN_AS_READONLY). - # If you got a difference in error message, just add it to rdiff file - CHECK TABLE t1; - Table Op Msg_type Msg_text --test.t1 check Error Incorrect file format 't1' --test.t1 check error Corrupt -+test.t1 check status OK - SELECT a,b FROM t1; --ERROR HY000: Incorrect file format 't1' -+a b -+1 a -+2 b -+3 c -+7 g -+8 h -+10 j -+12 l -+14 n - # Statement ended with one of expected results (0,130,ER_FAILED_READ_FROM_PAR_FILE,ER_OPEN_AS_READONLY). - # If you got a difference in error message, just add it to rdiff file - REPAIR TABLE t1; - Table Op Msg_type Msg_text --test.t1 repair Error Incorrect file format 't1' --test.t1 repair error Corrupt -+test.t1 repair note The storage engine for the table doesn't support repair - DROP TABLE t1, t2; - call mtr.add_suppression("Got an error from thread_id=.*"); - call mtr.add_suppression("MySQL thread id .*, query id .* localhost.*root Checking table"); -@@ -62,45 +63,32 @@ - CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - REPAIR TABLE t1; - Table Op Msg_type Msg_text --test.t1 repair status OK -+test.t1 repair note The storage engine for the table doesn't support repair - INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h'); - REPAIR TABLE t1 EXTENDED; - Table Op Msg_type Msg_text --test.t1 repair status OK -+test.t1 repair note The storage engine for the table doesn't support repair - INSERT INTO t1 (a,b) VALUES (10,'j'); - REPAIR TABLE t1 USE_FRM; - Table Op Msg_type Msg_text --test.t1 repair warning Number of rows changed from 0 to 3 --test.t1 repair status OK --t1.MYD --t1.MYI -+test.t1 repair note The storage engine for the table doesn't support repair - t1.frm -+t1.ibd - INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o'); - # Statement ended with one of expected results (0,144). - # If you got a difference in error message, just add it to rdiff file - FLUSH TABLE t1; --Restoring <DATADIR>/test/t1.MYD -+Restoring <DATADIR>/test/t1.ibd - CHECK TABLE t1; - Table Op Msg_type Msg_text --test.t1 check error Size of datafile is: 39 Should be: 65 --test.t1 check error Corrupt -+test.t1 check status OK - SELECT a,b FROM t1; --ERROR HY000: Incorrect key file for table 't1'; try to repair it --# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144). --# If you got a difference in error message, just add it to rdiff file --INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o'); --ERROR HY000: Table './test/t1' is marked as crashed and last (automatic?) repair failed --# Statement ended with one of expected results (0,144). --# If you got a difference in error message, just add it to rdiff file --FLUSH TABLE t1; --Restoring <DATADIR>/test/t1.MYI --CHECK TABLE t1; --Table Op Msg_type Msg_text --test.t1 check warning Table is marked as crashed and last repair failed --test.t1 check error Size of datafile is: 39 Should be: 65 --test.t1 check error Corrupt --SELECT a,b FROM t1; --ERROR HY000: Table './test/t1' is marked as crashed and last (automatic?) repair failed -+a b -+7 g -+8 h -+10 j -+14 n -+15 o - # Statement ended with one of expected results (0,ER_NOT_KEYFILE,144). - # If you got a difference in error message, just add it to rdiff file - DROP TABLE t1; diff --git a/storage/xtradb/mysql-test/storage_engine/suite.opt b/storage/xtradb/mysql-test/storage_engine/suite.opt deleted file mode 100644 index 8c10cefc626..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/suite.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb diff --git a/storage/xtradb/mysql-test/storage_engine/suite.pm b/storage/xtradb/mysql-test/storage_engine/suite.pm deleted file mode 100644 index e186a532dcc..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/suite.pm +++ /dev/null @@ -1,8 +0,0 @@ -package My::Suite::SE::XtraDB; - -@ISA = qw(My::Suite); - -return "Need XtraDB engine"; - -bless { }; - diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff deleted file mode 100644 index e09e50b17ec..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff +++ /dev/null @@ -1,23 +0,0 @@ ---- suite/storage_engine/tbl_opt_data_index_dir.result 2013-10-03 20:35:06.000000000 +0400 -+++ suite/storage_engine/tbl_opt_data_index_dir.reject 2013-11-08 22:06:54.000000000 +0400 -@@ -1,10 +1,12 @@ - DROP TABLE IF EXISTS t1; -+Warnings: -+Warning 1618 <INDEX DIRECTORY> option ignored - SHOW CREATE TABLE t1; - Table Create Table - t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` char(8) DEFAULT NULL --) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>' INDEX DIRECTORY='<INDEX_DIR>' -+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>' - Warnings: - Warning 1618 <INDEX DIRECTORY> option ignored - SHOW CREATE TABLE t1; -@@ -12,5 +14,5 @@ - t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` char(8) DEFAULT NULL --) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>' INDEX DIRECTORY='<INDEX_DIR>' -+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>' - DROP TABLE t1; diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff deleted file mode 100644 index 468b82926f0..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff +++ /dev/null @@ -1,11 +0,0 @@ ---- suite/storage_engine/tbl_opt_insert_method.result 2012-06-24 23:55:19.539380000 +0400 -+++ suite/storage_engine/tbl_opt_insert_method.reject 2012-07-15 17:51:09.978610512 +0400 -@@ -5,7 +5,7 @@ - t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL, - `b` char(8) DEFAULT NULL --) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INSERT_METHOD=FIRST -+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 - ALTER TABLE t1 INSERT_METHOD=NO; - SHOW CREATE TABLE t1; - Table Create Table diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_key_block_size.opt b/storage/xtradb/mysql-test/storage_engine/tbl_opt_key_block_size.opt deleted file mode 100644 index 7cd737b2b87..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_key_block_size.opt +++ /dev/null @@ -1,3 +0,0 @@ ---innodb-file-per-table=1 ---innodb-file-format=Barracuda - diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.opt b/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.opt deleted file mode 100644 index 7cd737b2b87..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.opt +++ /dev/null @@ -1,3 +0,0 @@ ---innodb-file-per-table=1 ---innodb-file-format=Barracuda - diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.rdiff deleted file mode 100644 index a6572ffa7f0..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.rdiff +++ /dev/null @@ -1,10 +0,0 @@ ---- suite/storage_engine/tbl_opt_row_format.result 2012-06-24 23:55:19.539380000 +0400 -+++ suite/storage_engine/tbl_opt_row_format.reject 2012-07-15 19:26:02.235049157 +0400 -@@ -1,5 +1,7 @@ - DROP TABLE IF EXISTS t1; - CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS> ROW_FORMAT=FIXED; -+Warnings: -+Warning 1478 <STORAGE_ENGINE>: assuming ROW_FORMAT=COMPACT. - SHOW CREATE TABLE t1; - Table Create Table - t1 CREATE TABLE `t1` ( diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_union.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_union.rdiff deleted file mode 100644 index cbdf5818022..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_union.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- suite/storage_engine/tbl_opt_union.result 2012-06-24 23:55:19.539380000 +0400 -+++ suite/storage_engine/tbl_opt_union.reject 2012-07-15 17:51:31.014346053 +0400 -@@ -4,11 +4,11 @@ - Table Create Table - t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL --) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 UNION=(`child1`) -+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 - ALTER TABLE t1 UNION = (child1,child2); - SHOW CREATE TABLE t1; - Table Create Table - t1 CREATE TABLE `t1` ( - `a` int(11) DEFAULT NULL --) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 UNION=(`child1`,`child2`) -+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 - DROP TABLE t1, child1, child2; diff --git a/storage/xtradb/mysql-test/storage_engine/trx/cons_snapshot_serializable.rdiff b/storage/xtradb/mysql-test/storage_engine/trx/cons_snapshot_serializable.rdiff deleted file mode 100644 index e6149be58dc..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/trx/cons_snapshot_serializable.rdiff +++ /dev/null @@ -1,18 +0,0 @@ ---- suite/storage_engine/trx/cons_snapshot_serializable.result 2013-11-27 18:46:36.000000000 +0400 -+++ suite/storage_engine/trx/cons_snapshot_serializable.reject 2013-11-28 19:17:02.000000000 +0400 -@@ -5,12 +5,15 @@ - CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE; - START TRANSACTION WITH CONSISTENT SNAPSHOT; -+Warnings: -+Warning 138 InnoDB: WITH CONSISTENT SNAPSHOT was ignored because this phrase can only be used with REPEATABLE READ isolation level. - connection con2; - INSERT INTO t1 (a) VALUES (1); - connection con1; - # If consistent read works on this isolation level (SERIALIZABLE), the following SELECT should not return the value we inserted (1) - SELECT a FROM t1; - a -+1 - COMMIT; - connection default; - disconnect con1; diff --git a/storage/xtradb/mysql-test/storage_engine/trx/level_read_committed.rdiff b/storage/xtradb/mysql-test/storage_engine/trx/level_read_committed.rdiff deleted file mode 100644 index cb64d32138b..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/trx/level_read_committed.rdiff +++ /dev/null @@ -1,11 +0,0 @@ ---- suite/storage_engine/trx/level_read_committed.result 2013-11-28 19:18:48.000000000 +0400 -+++ suite/storage_engine/trx/level_read_committed.reject 2013-11-28 19:18:59.000000000 +0400 -@@ -77,6 +77,8 @@ - CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; - START TRANSACTION WITH CONSISTENT SNAPSHOT; -+Warnings: -+Warning 138 InnoDB: WITH CONSISTENT SNAPSHOT was ignored because this phrase can only be used with REPEATABLE READ isolation level. - connection con2; - INSERT INTO t1 (a) VALUES (1); - connection con1; diff --git a/storage/xtradb/mysql-test/storage_engine/trx/level_read_uncommitted.rdiff b/storage/xtradb/mysql-test/storage_engine/trx/level_read_uncommitted.rdiff deleted file mode 100644 index 6a79abe3ca5..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/trx/level_read_uncommitted.rdiff +++ /dev/null @@ -1,11 +0,0 @@ ---- suite/storage_engine/trx/level_read_uncommitted.result 2013-11-28 19:18:48.000000000 +0400 -+++ suite/storage_engine/trx/level_read_uncommitted.reject 2013-11-28 19:19:50.000000000 +0400 -@@ -102,6 +102,8 @@ - CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; - SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED; - START TRANSACTION WITH CONSISTENT SNAPSHOT; -+Warnings: -+Warning 138 InnoDB: WITH CONSISTENT SNAPSHOT was ignored because this phrase can only be used with REPEATABLE READ isolation level. - connection con2; - INSERT INTO t1 (a) VALUES (1); - connection con1; diff --git a/storage/xtradb/mysql-test/storage_engine/trx/suite.opt b/storage/xtradb/mysql-test/storage_engine/trx/suite.opt deleted file mode 100644 index 64bbe8b554c..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/trx/suite.opt +++ /dev/null @@ -1,3 +0,0 @@ ---innodb ---innodb-lock-wait-timeout=1 - diff --git a/storage/xtradb/mysql-test/storage_engine/type_blob.opt b/storage/xtradb/mysql-test/storage_engine/type_blob.opt deleted file mode 100644 index 40445305fc6..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/type_blob.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb_log_file_size=100M diff --git a/storage/xtradb/mysql-test/storage_engine/type_char_indexes.rdiff b/storage/xtradb/mysql-test/storage_engine/type_char_indexes.rdiff deleted file mode 100644 index 7a388552c57..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/type_char_indexes.rdiff +++ /dev/null @@ -1,11 +0,0 @@ ---- suite/storage_engine/type_char_indexes.result 2012-07-12 19:27:42.191013570 +0400 -+++ suite/storage_engine/type_char_indexes.reject 2012-07-15 17:51:55.810034331 +0400 -@@ -135,7 +135,7 @@ - r3a - EXPLAIN SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16; - id select_type table type possible_keys key key_len ref rows Extra --# # # range # v16 # # # # -+# # # ALL # NULL # # # # - SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16; - c c20 v16 v128 - a char1 varchar1a varchar1b diff --git a/storage/xtradb/mysql-test/storage_engine/type_float_indexes.rdiff b/storage/xtradb/mysql-test/storage_engine/type_float_indexes.rdiff deleted file mode 100644 index 6ebfd61d876..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/type_float_indexes.rdiff +++ /dev/null @@ -1,11 +0,0 @@ ---- suite/storage_engine/type_float_indexes.result 2012-07-12 19:37:27.031661128 +0400 -+++ suite/storage_engine/type_float_indexes.reject 2012-07-15 17:52:12.189828410 +0400 -@@ -60,7 +60,7 @@ - ALTER TABLE t1 ADD UNIQUE KEY(d); - EXPLAIN SELECT d FROM t1 WHERE r > 0 and d > 0 ORDER BY d; - id select_type table type possible_keys key key_len ref rows Extra --# # # # # d # # # # -+# # # # # NULL # # # # - SELECT d FROM t1 WHERE r > 0 and d > 0 ORDER BY d; - d - 1.2345 diff --git a/storage/xtradb/mysql-test/storage_engine/type_spatial_indexes.rdiff b/storage/xtradb/mysql-test/storage_engine/type_spatial_indexes.rdiff deleted file mode 100644 index 154116b748c..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/type_spatial_indexes.rdiff +++ /dev/null @@ -1,712 +0,0 @@ ---- suite/storage_engine/type_spatial_indexes.result 2013-08-05 18:08:49.000000000 +0400 -+++ suite/storage_engine/type_spatial_indexes.reject 2013-08-05 18:25:24.000000000 +0400 -@@ -702,699 +702,15 @@ - DROP DATABASE IF EXISTS gis_ogs; - CREATE DATABASE gis_ogs; - CREATE TABLE gis_point (fid <INT_COLUMN>, g POINT NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE gis_line (fid <INT_COLUMN>, g LINESTRING NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE gis_polygon (fid <INT_COLUMN>, g POLYGON NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE gis_multi_point (fid <INT_COLUMN>, g MULTIPOINT NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE gis_multi_line (fid <INT_COLUMN>, g MULTILINESTRING NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE gis_multi_polygon (fid <INT_COLUMN>, g MULTIPOLYGON NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE gis_geometrycollection (fid <INT_COLUMN>, g GEOMETRYCOLLECTION NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE gis_geometry (fid <INT_COLUMN>, g GEOMETRY NOT NULL) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --USE gis_ogs; --CREATE TABLE lakes (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --shore POLYGON NOT NULL, SPATIAL INDEX s(shore)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE road_segments (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --aliases CHAR(64) <CUSTOM_COL_OPTIONS>, --num_lanes INT <CUSTOM_COL_OPTIONS>, --centerline LINESTRING NOT NULL, SPATIAL INDEX c(centerline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE divided_routes (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --num_lanes INT <CUSTOM_COL_OPTIONS>, --centerlines MULTILINESTRING NOT NULL, SPATIAL INDEX c(centerlines)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE forests (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --boundary MULTIPOLYGON NOT NULL, SPATIAL INDEX b(boundary)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE bridges (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --position POINT NOT NULL, SPATIAL INDEX p(position)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE streams (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --centerline LINESTRING NOT NULL, SPATIAL INDEX c(centerline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE buildings (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --position POINT NOT NULL, --footprint POLYGON NOT NULL, SPATIAL INDEX p(position), SPATIAL INDEX f(footprint)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE ponds (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --type CHAR(64) <CUSTOM_COL_OPTIONS>, --shores MULTIPOLYGON NOT NULL, SPATIAL INDEX s(shores)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE named_places (fid INT <CUSTOM_COL_OPTIONS>, --name CHAR(64) <CUSTOM_COL_OPTIONS>, --boundary POLYGON NOT NULL, SPATIAL INDEX b(boundary)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --CREATE TABLE map_neatlines (fid INT <CUSTOM_COL_OPTIONS>, --neatline POLYGON NOT NULL, SPATIAL INDEX n(neatline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>; --USE test; --SHOW FIELDS FROM gis_point; --Field Type Null Key Default Extra --fid int(11) YES NULL --g point NO MUL NULL --SHOW FIELDS FROM gis_line; --Field Type Null Key Default Extra --fid int(11) YES NULL --g linestring NO MUL NULL --SHOW FIELDS FROM gis_polygon; --Field Type Null Key Default Extra --fid int(11) YES NULL --g polygon NO MUL NULL --SHOW FIELDS FROM gis_multi_point; --Field Type Null Key Default Extra --fid int(11) YES NULL --g multipoint NO MUL NULL --SHOW FIELDS FROM gis_multi_line; --Field Type Null Key Default Extra --fid int(11) YES NULL --g multilinestring NO MUL NULL --SHOW FIELDS FROM gis_multi_polygon; --Field Type Null Key Default Extra --fid int(11) YES NULL --g multipolygon NO MUL NULL --SHOW FIELDS FROM gis_geometrycollection; --Field Type Null Key Default Extra --fid int(11) YES NULL --g geometrycollection NO MUL NULL --SHOW FIELDS FROM gis_geometry; --Field Type Null Key Default Extra --fid int(11) YES NULL --g geometry NO NULL --INSERT INTO gis_point (fid,g) VALUES --(101, PointFromText('POINT(10 10)')), --(102, PointFromText('POINT(20 10)')), --(103, PointFromText('POINT(20 20)')), --(104, PointFromWKB(AsWKB(PointFromText('POINT(10 20)')))); --INSERT INTO gis_line (fid,g) VALUES --(105, LineFromText('LINESTRING(0 0,0 10,10 0)')), --(106, LineStringFromText('LINESTRING(10 10,20 10,20 20,10 20,10 10)')), --(107, LineStringFromWKB(AsWKB(LineString(Point(10, 10), Point(40, 10))))); --INSERT INTO gis_polygon (fid,g) VALUES --(108, PolygonFromText('POLYGON((10 10,20 10,20 20,10 20,10 10))')), --(109, PolyFromText('POLYGON((0 0,50 0,50 50,0 50,0 0), (10 10,20 10,20 20,10 20,10 10))')), --(110, PolyFromWKB(AsWKB(Polygon(LineString(Point(0, 0), Point(30, 0), Point(30, 30), Point(0, 0)))))); --INSERT INTO gis_multi_point (fid,g) VALUES --(111, MultiPointFromText('MULTIPOINT(0 0,10 10,10 20,20 20)')), --(112, MPointFromText('MULTIPOINT(1 1,11 11,11 21,21 21)')), --(113, MPointFromWKB(AsWKB(MultiPoint(Point(3, 6), Point(4, 10))))); --INSERT INTO gis_multi_line (fid,g) VALUES --(114, MultiLineStringFromText('MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48))')), --(115, MLineFromText('MULTILINESTRING((10 48,10 21,10 0))')), --(116, MLineFromWKB(AsWKB(MultiLineString(LineString(Point(1, 2), Point(3, 5)), LineString(Point(2, 5), Point(5, 8), Point(21, 7)))))); --INSERT INTO gis_multi_polygon (fid,g) VALUES --(117, MultiPolygonFromText('MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))')), --(118, MPolyFromText('MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))')), --(119, MPolyFromWKB(AsWKB(MultiPolygon(Polygon(LineString(Point(0, 3), Point(3, 3), Point(3, 0), Point(0, 3))))))); --INSERT INTO gis_geometrycollection (fid,g) VALUES --(120, GeomCollFromText('GEOMETRYCOLLECTION(POINT(0 0), LINESTRING(0 0,10 10))')), --(121, GeometryFromWKB(AsWKB(GeometryCollection(Point(44, 6), LineString(Point(3, 6), Point(7, 9)))))), --(122, GeomFromText('GeometryCollection()')), --(123, GeomFromText('GeometryCollection EMPTY')); --INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_point; --INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_line; --INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_polygon; --INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_point; --INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_line; --INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_polygon; --INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_geometrycollection; --SELECT fid, AsText(g) FROM gis_point; --fid AsText(g) --101 POINT(10 10) --102 POINT(20 10) --103 POINT(20 20) --104 POINT(10 20) --SELECT fid, AsText(g) FROM gis_line; --fid AsText(g) --105 LINESTRING(0 0,0 10,10 0) --106 LINESTRING(10 10,20 10,20 20,10 20,10 10) --107 LINESTRING(10 10,40 10) --SELECT fid, AsText(g) FROM gis_polygon; --fid AsText(g) --108 POLYGON((10 10,20 10,20 20,10 20,10 10)) --109 POLYGON((0 0,50 0,50 50,0 50,0 0),(10 10,20 10,20 20,10 20,10 10)) --110 POLYGON((0 0,30 0,30 30,0 0)) --SELECT fid, AsText(g) FROM gis_multi_point; --fid AsText(g) --111 MULTIPOINT(0 0,10 10,10 20,20 20) --112 MULTIPOINT(1 1,11 11,11 21,21 21) --113 MULTIPOINT(3 6,4 10) --SELECT fid, AsText(g) FROM gis_multi_line; --fid AsText(g) --114 MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48)) --115 MULTILINESTRING((10 48,10 21,10 0)) --116 MULTILINESTRING((1 2,3 5),(2 5,5 8,21 7)) --SELECT fid, AsText(g) FROM gis_multi_polygon; --fid AsText(g) --117 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18))) --118 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18))) --119 MULTIPOLYGON(((0 3,3 3,3 0,0 3))) --SELECT fid, AsText(g) FROM gis_geometrycollection; --fid AsText(g) --120 GEOMETRYCOLLECTION(POINT(0 0),LINESTRING(0 0,10 10)) --121 GEOMETRYCOLLECTION(POINT(44 6),LINESTRING(3 6,7 9)) --122 GEOMETRYCOLLECTION EMPTY --123 GEOMETRYCOLLECTION EMPTY --SELECT fid, AsText(g) FROM gis_geometry; --fid AsText(g) --101 POINT(10 10) --102 POINT(20 10) --103 POINT(20 20) --104 POINT(10 20) --105 LINESTRING(0 0,0 10,10 0) --106 LINESTRING(10 10,20 10,20 20,10 20,10 10) --107 LINESTRING(10 10,40 10) --108 POLYGON((10 10,20 10,20 20,10 20,10 10)) --109 POLYGON((0 0,50 0,50 50,0 50,0 0),(10 10,20 10,20 20,10 20,10 10)) --110 POLYGON((0 0,30 0,30 30,0 0)) --111 MULTIPOINT(0 0,10 10,10 20,20 20) --112 MULTIPOINT(1 1,11 11,11 21,21 21) --113 MULTIPOINT(3 6,4 10) --114 MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48)) --115 MULTILINESTRING((10 48,10 21,10 0)) --116 MULTILINESTRING((1 2,3 5),(2 5,5 8,21 7)) --117 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18))) --118 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18))) --119 MULTIPOLYGON(((0 3,3 3,3 0,0 3))) --120 GEOMETRYCOLLECTION(POINT(0 0),LINESTRING(0 0,10 10)) --121 GEOMETRYCOLLECTION(POINT(44 6),LINESTRING(3 6,7 9)) --122 GEOMETRYCOLLECTION EMPTY --123 GEOMETRYCOLLECTION EMPTY --SELECT fid, Dimension(g) FROM gis_geometry; --fid Dimension(g) --101 0 --102 0 --103 0 --104 0 --105 1 --106 1 --107 1 --108 2 --109 2 --110 2 --111 0 --112 0 --113 0 --114 1 --115 1 --116 1 --117 2 --118 2 --119 2 --120 1 --121 1 --122 0 --123 0 --SELECT fid, GeometryType(g) FROM gis_geometry; --fid GeometryType(g) --101 POINT --102 POINT --103 POINT --104 POINT --105 LINESTRING --106 LINESTRING --107 LINESTRING --108 POLYGON --109 POLYGON --110 POLYGON --111 MULTIPOINT --112 MULTIPOINT --113 MULTIPOINT --114 MULTILINESTRING --115 MULTILINESTRING --116 MULTILINESTRING --117 MULTIPOLYGON --118 MULTIPOLYGON --119 MULTIPOLYGON --120 GEOMETRYCOLLECTION --121 GEOMETRYCOLLECTION --122 GEOMETRYCOLLECTION --123 GEOMETRYCOLLECTION --SELECT fid, IsEmpty(g) FROM gis_geometry; --fid IsEmpty(g) --101 0 --102 0 --103 0 --104 0 --105 0 --106 0 --107 0 --108 0 --109 0 --110 0 --111 0 --112 0 --113 0 --114 0 --115 0 --116 0 --117 0 --118 0 --119 0 --120 0 --121 0 --122 0 --123 0 --SELECT fid, AsText(Envelope(g)) FROM gis_geometry; --fid AsText(Envelope(g)) --101 POLYGON((10 10,10 10,10 10,10 10,10 10)) --102 POLYGON((20 10,20 10,20 10,20 10,20 10)) --103 POLYGON((20 20,20 20,20 20,20 20,20 20)) --104 POLYGON((10 20,10 20,10 20,10 20,10 20)) --105 POLYGON((0 0,10 0,10 10,0 10,0 0)) --106 POLYGON((10 10,20 10,20 20,10 20,10 10)) --107 POLYGON((10 10,40 10,40 10,10 10,10 10)) --108 POLYGON((10 10,20 10,20 20,10 20,10 10)) --109 POLYGON((0 0,50 0,50 50,0 50,0 0)) --110 POLYGON((0 0,30 0,30 30,0 30,0 0)) --111 POLYGON((0 0,20 0,20 20,0 20,0 0)) --112 POLYGON((1 1,21 1,21 21,1 21,1 1)) --113 POLYGON((3 6,4 6,4 10,3 10,3 6)) --114 POLYGON((10 0,16 0,16 48,10 48,10 0)) --115 POLYGON((10 0,10 0,10 48,10 48,10 0)) --116 POLYGON((1 2,21 2,21 8,1 8,1 2)) --117 POLYGON((28 0,84 0,84 42,28 42,28 0)) --118 POLYGON((28 0,84 0,84 42,28 42,28 0)) --119 POLYGON((0 0,3 0,3 3,0 3,0 0)) --120 POLYGON((0 0,10 0,10 10,0 10,0 0)) --121 POLYGON((3 6,44 6,44 9,3 9,3 6)) --122 GEOMETRYCOLLECTION EMPTY --123 GEOMETRYCOLLECTION EMPTY --SELECT fid, X(g) FROM gis_point; --fid X(g) --101 10 --102 20 --103 20 --104 10 --SELECT fid, Y(g) FROM gis_point; --fid Y(g) --101 10 --102 10 --103 20 --104 20 --SELECT fid, AsText(StartPoint(g)) FROM gis_line; --fid AsText(StartPoint(g)) --105 POINT(0 0) --106 POINT(10 10) --107 POINT(10 10) --SELECT fid, AsText(EndPoint(g)) FROM gis_line; --fid AsText(EndPoint(g)) --105 POINT(10 0) --106 POINT(10 10) --107 POINT(40 10) --SELECT fid, GLength(g) FROM gis_line; --fid GLength(g) --105 24.14213562373095 --106 40 --107 30 --SELECT fid, NumPoints(g) FROM gis_line; --fid NumPoints(g) --105 3 --106 5 --107 2 --SELECT fid, AsText(PointN(g, 2)) FROM gis_line; --fid AsText(PointN(g, 2)) --105 POINT(0 10) --106 POINT(20 10) --107 POINT(40 10) --SELECT fid, IsClosed(g) FROM gis_line; --fid IsClosed(g) --105 0 --106 1 --107 0 --SELECT fid, AsText(Centroid(g)) FROM gis_polygon; --fid AsText(Centroid(g)) --108 POINT(15 15) --109 POINT(25.416666666666668 25.416666666666668) --110 POINT(20 10) --SELECT fid, Area(g) FROM gis_polygon; --fid Area(g) --108 100 --109 2400 --110 450 --SELECT fid, AsText(ExteriorRing(g)) FROM gis_polygon; --fid AsText(ExteriorRing(g)) --108 LINESTRING(10 10,20 10,20 20,10 20,10 10) --109 LINESTRING(0 0,50 0,50 50,0 50,0 0) --110 LINESTRING(0 0,30 0,30 30,0 0) --SELECT fid, NumInteriorRings(g) FROM gis_polygon; --fid NumInteriorRings(g) --108 0 --109 1 --110 0 --SELECT fid, AsText(InteriorRingN(g, 1)) FROM gis_polygon; --fid AsText(InteriorRingN(g, 1)) --108 NULL --109 LINESTRING(10 10,20 10,20 20,10 20,10 10) --110 NULL --SELECT fid, IsClosed(g) FROM gis_multi_line; --fid IsClosed(g) --114 0 --115 0 --116 0 --SELECT fid, AsText(Centroid(g)) FROM gis_multi_polygon; --fid AsText(Centroid(g)) --117 POINT(55.58852775304245 17.426536064113982) --118 POINT(55.58852775304245 17.426536064113982) --119 POINT(2 2) --SELECT fid, Area(g) FROM gis_multi_polygon; --fid Area(g) --117 1684.5 --118 1684.5 --119 4.5 --SELECT fid, NumGeometries(g) from gis_multi_point; --fid NumGeometries(g) --111 4 --112 4 --113 2 --SELECT fid, NumGeometries(g) from gis_multi_line; --fid NumGeometries(g) --114 2 --115 1 --116 2 --SELECT fid, NumGeometries(g) from gis_multi_polygon; --fid NumGeometries(g) --117 2 --118 2 --119 1 --SELECT fid, NumGeometries(g) from gis_geometrycollection; --fid NumGeometries(g) --120 2 --121 2 --122 0 --123 0 --SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_point; --fid AsText(GeometryN(g, 2)) --111 POINT(10 10) --112 POINT(11 11) --113 POINT(4 10) --SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_line; --fid AsText(GeometryN(g, 2)) --114 LINESTRING(16 0,16 23,16 48) --115 NULL --116 LINESTRING(2 5,5 8,21 7) --SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_polygon; --fid AsText(GeometryN(g, 2)) --117 POLYGON((59 18,67 18,67 13,59 13,59 18)) --118 POLYGON((59 18,67 18,67 13,59 13,59 18)) --119 NULL --SELECT fid, AsText(GeometryN(g, 2)) from gis_geometrycollection; --fid AsText(GeometryN(g, 2)) --120 LINESTRING(0 0,10 10) --121 LINESTRING(3 6,7 9) --122 NULL --123 NULL --SELECT fid, AsText(GeometryN(g, 1)) from gis_geometrycollection; --fid AsText(GeometryN(g, 1)) --120 POINT(0 0) --121 POINT(44 6) --122 NULL --123 NULL --SELECT g1.fid as first, g2.fid as second, --Within(g1.g, g2.g) as w, Contains(g1.g, g2.g) as c, Overlaps(g1.g, g2.g) as o, --Equals(g1.g, g2.g) as e, Disjoint(g1.g, g2.g) as d, Touches(g1.g, g2.g) as t, --Intersects(g1.g, g2.g) as i, Crosses(g1.g, g2.g) as r --FROM gis_geometrycollection g1, gis_geometrycollection g2 ORDER BY first, second; --first second w c o e d t i r --120 120 1 1 0 1 0 0 1 0 --120 121 0 0 1 0 0 0 1 0 --120 122 NULL NULL NULL NULL NULL NULL NULL NULL --120 123 NULL NULL NULL NULL NULL NULL NULL NULL --121 120 0 0 1 0 0 0 1 0 --121 121 1 1 0 1 0 0 1 0 --121 122 NULL NULL NULL NULL NULL NULL NULL NULL --121 123 NULL NULL NULL NULL NULL NULL NULL NULL --122 120 NULL NULL NULL NULL NULL NULL NULL NULL --122 121 NULL NULL NULL NULL NULL NULL NULL NULL --122 122 NULL NULL NULL NULL NULL NULL NULL NULL --122 123 NULL NULL NULL NULL NULL NULL NULL NULL --123 120 NULL NULL NULL NULL NULL NULL NULL NULL --123 121 NULL NULL NULL NULL NULL NULL NULL NULL --123 122 NULL NULL NULL NULL NULL NULL NULL NULL --123 123 NULL NULL NULL NULL NULL NULL NULL NULL --DROP TABLE gis_point, gis_line, gis_polygon, gis_multi_point, gis_multi_line, gis_multi_polygon, gis_geometrycollection, gis_geometry; --USE gis_ogs; --# Lakes --INSERT INTO lakes (fid,name,shore) VALUES ( --101, 'BLUE LAKE', --PolyFromText( --'POLYGON( -- (52 18,66 23,73 9,48 6,52 18), -- (59 18,67 18,67 13,59 13,59 18) -- )', --101)); --# Road Segments --INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(102, 'Route 5', NULL, 2, --LineFromText( --'LINESTRING( 0 18, 10 21, 16 23, 28 26, 44 31 )' ,101)); --INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(103, 'Route 5', 'Main Street', 4, --LineFromText( --'LINESTRING( 44 31, 56 34, 70 38 )' ,101)); --INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(104, 'Route 5', NULL, 2, --LineFromText( --'LINESTRING( 70 38, 72 48 )' ,101)); --INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(105, 'Main Street', NULL, 4, --LineFromText( --'LINESTRING( 70 38, 84 42 )' ,101)); --INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(106, 'Dirt Road by Green Forest', NULL, --1, --LineFromText( --'LINESTRING( 28 26, 28 0 )',101)); --# DividedRoutes --INSERT INTO divided_routes (fid,name,num_lanes,centerlines) VALUES(119, 'Route 75', 4, --MLineFromText( --'MULTILINESTRING((10 48,10 21,10 0), -- (16 0,16 23,16 48))', 101)); --# Forests --INSERT INTO forests (fid,name,boundary) VALUES(109, 'Green Forest', --MPolyFromText( --'MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26), -- (52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))', --101)); --# Bridges --INSERT INTO bridges (fid,name,position) VALUES(110, 'Cam Bridge', PointFromText( --'POINT( 44 31 )', 101)); --# Streams --INSERT INTO streams (fid,name,centerline) VALUES(111, 'Cam Stream', --LineFromText( --'LINESTRING( 38 48, 44 41, 41 36, 44 31, 52 18 )', 101)); --INSERT INTO streams (fid,name,centerline) VALUES(112, NULL, --LineFromText( --'LINESTRING( 76 0, 78 4, 73 9 )', 101)); --# Buildings --INSERT INTO buildings (fid,name,position,footprint) VALUES(113, '123 Main Street', --PointFromText( --'POINT( 52 30 )', 101), --PolyFromText( --'POLYGON( ( 50 31, 54 31, 54 29, 50 29, 50 31) )', 101)); --INSERT INTO buildings (fid,name,position,footprint) VALUES(114, '215 Main Street', --PointFromText( --'POINT( 64 33 )', 101), --PolyFromText( --'POLYGON( ( 66 34, 62 34, 62 32, 66 32, 66 34) )', 101)); --# Ponds --INSERT INTO ponds (fid,name,type,shores) VALUES(120, NULL, 'Stock Pond', --MPolyFromText( --'MULTIPOLYGON( ( ( 24 44, 22 42, 24 40, 24 44) ), -- ( ( 26 44, 26 40, 28 42, 26 44) ) )', 101)); --# Named Places --INSERT INTO named_places (fid,name,boundary) VALUES(117, 'Ashton', --PolyFromText( --'POLYGON( ( 62 48, 84 48, 84 30, 56 30, 56 34, 62 48) )', 101)); --INSERT INTO named_places (fid,name,boundary) VALUES(118, 'Goose Island', --PolyFromText( --'POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )', 101)); --# Map Neatlines --INSERT INTO map_neatlines (fid,neatline) VALUES(115, --PolyFromText( --'POLYGON( ( 0 0, 0 48, 84 48, 84 0, 0 0 ) )', 101)); --SELECT Dimension(shore) --FROM lakes --WHERE name = 'Blue Lake'; --Dimension(shore) --2 --SELECT GeometryType(centerlines) --FROM divided_routes --WHERE name = 'Route 75'; --GeometryType(centerlines) --MULTILINESTRING --SELECT AsText(boundary) --FROM named_places --WHERE name = 'Goose Island'; --AsText(boundary) --POLYGON((67 13,67 18,59 18,59 13,67 13)) --SELECT AsText(PolyFromWKB(AsBinary(boundary),101)) --FROM named_places --WHERE name = 'Goose Island'; --AsText(PolyFromWKB(AsBinary(boundary),101)) --POLYGON((67 13,67 18,59 18,59 13,67 13)) --SELECT SRID(boundary) --FROM named_places --WHERE name = 'Goose Island'; --SRID(boundary) --101 --SELECT IsEmpty(centerline) --FROM road_segments --WHERE name = 'Route 5' --AND aliases = 'Main Street'; --IsEmpty(centerline) --0 --SELECT AsText(Envelope(boundary)) --FROM named_places --WHERE name = 'Goose Island'; --AsText(Envelope(boundary)) --POLYGON((59 13,67 13,67 18,59 18,59 13)) --SELECT X(position) --FROM bridges --WHERE name = 'Cam Bridge'; --X(position) --44 --SELECT Y(position) --FROM bridges --WHERE name = 'Cam Bridge'; --Y(position) --31 --SELECT AsText(StartPoint(centerline)) --FROM road_segments --WHERE fid = 102; --AsText(StartPoint(centerline)) --POINT(0 18) --SELECT AsText(EndPoint(centerline)) --FROM road_segments --WHERE fid = 102; --AsText(EndPoint(centerline)) --POINT(44 31) --SELECT GLength(centerline) --FROM road_segments --WHERE fid = 106; --GLength(centerline) --26 --SELECT NumPoints(centerline) --FROM road_segments --WHERE fid = 102; --NumPoints(centerline) --5 --SELECT AsText(PointN(centerline, 1)) --FROM road_segments --WHERE fid = 102; --AsText(PointN(centerline, 1)) --POINT(0 18) --SELECT AsText(Centroid(boundary)) --FROM named_places --WHERE name = 'Goose Island'; --AsText(Centroid(boundary)) --POINT(63 15.5) --SELECT Area(boundary) --FROM named_places --WHERE name = 'Goose Island'; --Area(boundary) --40 --SELECT AsText(ExteriorRing(shore)) --FROM lakes --WHERE name = 'Blue Lake'; --AsText(ExteriorRing(shore)) --LINESTRING(52 18,66 23,73 9,48 6,52 18) --SELECT NumInteriorRings(shore) --FROM lakes --WHERE name = 'Blue Lake'; --NumInteriorRings(shore) --1 --SELECT AsText(InteriorRingN(shore, 1)) --FROM lakes --WHERE name = 'Blue Lake'; --AsText(InteriorRingN(shore, 1)) --LINESTRING(59 18,67 18,67 13,59 13,59 18) --SELECT NumGeometries(centerlines) --FROM divided_routes --WHERE name = 'Route 75'; --NumGeometries(centerlines) --2 --SELECT AsText(GeometryN(centerlines, 2)) --FROM divided_routes --WHERE name = 'Route 75'; --AsText(GeometryN(centerlines, 2)) --LINESTRING(16 0,16 23,16 48) --SELECT IsClosed(centerlines) --FROM divided_routes --WHERE name = 'Route 75'; --IsClosed(centerlines) --0 --SELECT GLength(centerlines) --FROM divided_routes --WHERE name = 'Route 75'; --GLength(centerlines) --96 --SELECT AsText(Centroid(shores)) --FROM ponds --WHERE fid = 120; --AsText(Centroid(shores)) --POINT(25 42) --SELECT Area(shores) --FROM ponds --WHERE fid = 120; --Area(shores) --8 --SELECT ST_Equals(boundary, --PolyFromText('POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )',1)) --FROM named_places --WHERE name = 'Goose Island'; --ST_Equals(boundary, --PolyFromText('POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )',1)) --1 --SELECT ST_Disjoint(centerlines, boundary) --FROM divided_routes, named_places --WHERE divided_routes.name = 'Route 75' --AND named_places.name = 'Ashton'; --ST_Disjoint(centerlines, boundary) --1 --SELECT ST_Touches(centerline, shore) --FROM streams, lakes --WHERE streams.name = 'Cam Stream' --AND lakes.name = 'Blue Lake'; --ST_Touches(centerline, shore) --1 --SELECT Crosses(road_segments.centerline, divided_routes.centerlines) --FROM road_segments, divided_routes --WHERE road_segments.fid = 102 --AND divided_routes.name = 'Route 75'; --Crosses(road_segments.centerline, divided_routes.centerlines) --1 --SELECT ST_Intersects(road_segments.centerline, divided_routes.centerlines) --FROM road_segments, divided_routes --WHERE road_segments.fid = 102 --AND divided_routes.name = 'Route 75'; --ST_Intersects(road_segments.centerline, divided_routes.centerlines) --1 --SELECT ST_Contains(forests.boundary, named_places.boundary) --FROM forests, named_places --WHERE forests.name = 'Green Forest' --AND named_places.name = 'Ashton'; --ST_Contains(forests.boundary, named_places.boundary) --0 --SELECT ST_Distance(position, boundary) --FROM bridges, named_places --WHERE bridges.name = 'Cam Bridge' --AND named_places.name = 'Ashton'; --ST_Distance(position, boundary) --12 --SELECT AsText(ST_Difference(named_places.boundary, forests.boundary)) --FROM named_places, forests --WHERE named_places.name = 'Ashton' --AND forests.name = 'Green Forest'; --AsText(ST_Difference(named_places.boundary, forests.boundary)) --POLYGON((56 34,62 48,84 48,84 42,56 34)) --SELECT AsText(ST_Union(shore, boundary)) --FROM lakes, named_places --WHERE lakes.name = 'Blue Lake' --AND named_places.name = 'Goose Island'; --AsText(ST_Union(shore, boundary)) --POLYGON((48 6,52 18,66 23,73 9,48 6)) --SELECT AsText(ST_SymDifference(shore, boundary)) --FROM lakes, named_places --WHERE lakes.name = 'Blue Lake' --AND named_places.name = 'Ashton'; --AsText(ST_SymDifference(shore, boundary)) --MULTIPOLYGON(((48 6,52 18,66 23,73 9,48 6),(59 13,59 18,67 18,67 13,59 13)),((56 30,56 34,62 48,84 48,84 30,56 30))) --SELECT count(*) --FROM buildings, bridges --WHERE ST_Contains(ST_Buffer(bridges.position, 15.0), buildings.footprint) = 1; --count(*) --1 -+ERROR HY000: The storage engine <STORAGE_ENGINE> doesn't support SPATIAL indexes -+# ERROR: Statement ended with errno 1464, errname ER_TABLE_CANT_HANDLE_SPKEYS (expected to succeed) -+# ------------ UNEXPECTED RESULT ------------ -+# [ CREATE TABLE gis_point (fid INT(11) /*!*/ /*Custom column options*/, g POINT NOT NULL, SPATIAL INDEX(g)) ENGINE=InnoDB /*!*/ /*Custom table options*/ ] -+# The statement|command finished with ER_TABLE_CANT_HANDLE_SPKEYS. -+# Geometry types or spatial indexes or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors. -+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def. -+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped. -+# Also, this problem may cause a chain effect (more errors of different kinds in the test). -+# ------------------------------------------- - DROP DATABASE gis_ogs; - USE test; diff --git a/storage/xtradb/mysql-test/storage_engine/type_text.opt b/storage/xtradb/mysql-test/storage_engine/type_text.opt deleted file mode 100644 index 40445305fc6..00000000000 --- a/storage/xtradb/mysql-test/storage_engine/type_text.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb_log_file_size=100M diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc deleted file mode 100644 index 7fbee0312ee..00000000000 --- a/storage/xtradb/os/os0file.cc +++ /dev/null @@ -1,6545 +0,0 @@ -/*********************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General -Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -***********************************************************************/ - -/**************************************************//** -@file os/os0file.cc -The interface to the operating system file i/o primitives - -Created 10/21/1995 Heikki Tuuri -*******************************************************/ - -#include "os0file.h" - -#ifdef UNIV_NONINL -#include "os0file.ic" -#endif -#include "ha_prototypes.h" -#include "ut0mem.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "fil0fil.h" -#include "fsp0fsp.h" -#include "fil0pagecompress.h" -#include "buf0buf.h" -#include "btr0types.h" -#include "trx0trx.h" -#include "srv0mon.h" -#include "srv0srv.h" -#ifdef HAVE_LINUX_UNISTD_H -#include "unistd.h" -#endif -#ifndef UNIV_HOTBACKUP -# include "os0sync.h" -# include "os0thread.h" -#else /* !UNIV_HOTBACKUP */ -# ifdef __WIN__ -/* Add includes for the _stat() call to compile on Windows */ -# include <sys/types.h> -# include <sys/stat.h> -# include <errno.h> -# endif /* __WIN__ */ -#endif /* !UNIV_HOTBACKUP */ - -#if defined(LINUX_NATIVE_AIO) -#include <libaio.h> -#endif - -#ifdef _WIN32 -#define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1 -#endif - -#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H) -# include <sys/ioctl.h> -# ifndef DFS_IOCTL_ATOMIC_WRITE_SET -# define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint) -# endif -#endif - -#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) -#include <sys/statvfs.h> -#endif - -#if defined(UNIV_LINUX) && defined(HAVE_LINUX_FALLOC_H) -#include <linux/falloc.h> -#endif - -#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE -# include <fcntl.h> -# include <linux/falloc.h> -#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */ - -#ifdef HAVE_LZO -#include "lzo/lzo1x.h" -#endif - -#ifdef HAVE_SNAPPY -#include "snappy-c.h" -#endif - -/** Insert buffer segment id */ -static const ulint IO_IBUF_SEGMENT = 0; - -/** Log segment id */ -static const ulint IO_LOG_SEGMENT = 1; - -/* This specifies the file permissions InnoDB uses when it creates files in -Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to -my_umask */ - -#ifndef __WIN__ -/** Umask for creating files */ -UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; -# define os_file_invalid (-1) -#else -/** Umask for creating files */ -UNIV_INTERN ulint os_innodb_umask = 0; -# define ECANCELED 125 -# define os_file_invalid INVALID_HANDLE_VALUE -#endif /* __WIN__ */ - -#ifndef UNIV_HOTBACKUP -/* We use these mutexes to protect lseek + file i/o operation, if the -OS does not provide an atomic pread or pwrite, or similar */ -#define OS_FILE_N_SEEK_MUTEXES 16 -UNIV_INTERN os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES]; - -/* In simulated aio, merge at most this many consecutive i/os */ -#define OS_AIO_MERGE_N_CONSECUTIVE 64 - -#ifdef WITH_INNODB_DISALLOW_WRITES -#define WAIT_ALLOW_WRITES() if (!IS_XTRABACKUP()) os_event_wait(srv_allow_writes_event) -#else -#define WAIT_ALLOW_WRITES() do { } while (0) -#endif /* WITH_INNODB_DISALLOW_WRITES */ - -/********************************************************************** - -InnoDB AIO Implementation: -========================= - -We support native AIO for windows and linux. For rest of the platforms -we simulate AIO by special io-threads servicing the IO-requests. - -Simulated AIO: -============== - -In platforms where we 'simulate' AIO following is a rough explanation -of the high level design. -There are four io-threads (for ibuf, log, read, write). -All synchronous IO requests are serviced by the calling thread using -os_file_write/os_file_read. The Asynchronous requests are queued up -in an array (there are four such arrays) by the calling thread. -Later these requests are picked up by the io-thread and are serviced -synchronously. - -Windows native AIO: -================== - -If srv_use_native_aio is not set then windows follow the same -code as simulated AIO. If the flag is set then native AIO interface -is used. On windows, one of the limitation is that if a file is opened -for AIO no synchronous IO can be done on it. Therefore we have an -extra fifth array to queue up synchronous IO requests. -There are innodb_file_io_threads helper threads. These threads work -on the four arrays mentioned above in Simulated AIO. No thread is -required for the sync array. -If a synchronous IO request is made, it is first queued in the sync -array. Then the calling thread itself waits on the request, thus -making the call synchronous. -If an AIO request is made the calling thread not only queues it in the -array but also submits the requests. The helper thread then collects -the completed IO request and calls completion routine on it. - -Linux native AIO: -================= - -If we have libaio installed on the system and innodb_use_native_aio -is set to TRUE we follow the code path of native AIO, otherwise we -do simulated AIO. -There are innodb_file_io_threads helper threads. These threads work -on the four arrays mentioned above in Simulated AIO. -If a synchronous IO request is made, it is handled by calling -os_file_write/os_file_read. -If an AIO request is made the calling thread not only queues it in the -array but also submits the requests. The helper thread then collects -the completed IO request and calls completion routine on it. - -**********************************************************************/ - -/** Flag: enable debug printout for asynchronous i/o */ -UNIV_INTERN ibool os_aio_print_debug = FALSE; - -#ifdef UNIV_PFS_IO -/* Keys to register InnoDB I/O with performance schema */ -UNIV_INTERN mysql_pfs_key_t innodb_file_data_key; -UNIV_INTERN mysql_pfs_key_t innodb_file_log_key; -UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key; -UNIV_INTERN mysql_pfs_key_t innodb_file_bmp_key; -#endif /* UNIV_PFS_IO */ - -/** The asynchronous i/o array slot structure */ -struct os_aio_slot_t{ -#ifdef WIN_ASYNC_IO - OVERLAPPED control; /*!< Windows control block for the - aio request, MUST be first element in the structure*/ - void *arr; /*!< Array this slot belongs to*/ -#endif - - ibool is_read; /*!< TRUE if a read operation */ - ulint pos; /*!< index of the slot in the aio - array */ - ibool reserved; /*!< TRUE if this slot is reserved */ - time_t reservation_time;/*!< time when reserved */ - ulint len; /*!< length of the block to read or - write */ - byte* buf; /*!< buffer used in i/o */ - ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */ - ulint is_log; /*!< 1 is OS_FILE_LOG or 0 */ - ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */ - - os_offset_t offset; /*!< file offset in bytes */ - pfs_os_file_t file; /*!< file where to read or write */ - const char* name; /*!< file name or path */ - ibool io_already_done;/*!< used only in simulated aio: - TRUE if the physical i/o already - made and only the slot message - needs to be passed to the caller - of os_aio_simulated_handle */ - ulint space_id; - fil_node_t* message1; /*!< message which is given by the */ - void* message2; /*!< the requester of an aio operation - and which can be used to identify - which pending aio operation was - completed */ - ulint bitmap; - - - ulint* write_size; /*!< Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ - - ulint file_block_size;/*!< file block size */ - -#ifdef LINUX_NATIVE_AIO - struct iocb control; /* Linux control block for aio */ - int n_bytes; /* bytes written/read. */ - int ret; /* AIO return code */ -#endif /* WIN_ASYNC_IO */ -}; - -/** The asynchronous i/o array structure */ -struct os_aio_array_t{ - os_ib_mutex_t mutex; /*!< the mutex protecting the aio array */ - os_event_t not_full; - /*!< The event which is set to the - signaled state when there is space in - the aio outside the ibuf segment; - os_event_set() and os_event_reset() - are protected by os_aio_array_t::mutex */ - os_event_t is_empty; - /*!< The event which is set to the - signaled state when there are no - pending i/os in this array; - os_event_set() and os_event_reset() - are protected by os_aio_array_t::mutex */ - ulint n_slots;/*!< Total number of slots in the aio - array. This must be divisible by - n_threads. */ - ulint n_segments; - /*!< Number of segments in the aio - array of pending aio requests. A - thread can wait separately for any one - of the segments. */ - ulint cur_seg;/*!< We reserve IO requests in round - robin fashion to different segments. - This points to the segment that is to - be used to service next IO request. */ - ulint n_reserved; - /*!< Number of reserved slots in the - aio array outside the ibuf segment */ - os_aio_slot_t* slots; /*!< Pointer to the slots in the array */ - -#if defined(LINUX_NATIVE_AIO) - io_context_t* aio_ctx; - /* completion queue for IO. There is - one such queue per segment. Each thread - will work on one ctx exclusively. */ - struct io_event* aio_events; - /* The array to collect completed IOs. - There is one such event for each - possible pending IO. The size of the - array is equal to n_slots. */ -#endif /* LINUX_NATIV_AIO */ -}; - -#if defined(LINUX_NATIVE_AIO) -/** timeout for each io_getevents() call = 500ms. */ -#define OS_AIO_REAP_TIMEOUT (500000000UL) - -/** time to sleep, in microseconds if io_setup() returns EAGAIN. */ -#define OS_AIO_IO_SETUP_RETRY_SLEEP (500000UL) - -/** number of attempts before giving up on io_setup(). */ -#define OS_AIO_IO_SETUP_RETRY_ATTEMPTS 5 -#endif - -/** Array of events used in simulated aio. */ -static os_event_t* os_aio_segment_wait_events; - -/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These -are NULL when the module has not yet been initialized. @{ */ -static os_aio_array_t* os_aio_read_array = NULL; /*!< Reads */ -static os_aio_array_t* os_aio_write_array = NULL; /*!< Writes */ -static os_aio_array_t* os_aio_ibuf_array = NULL; /*!< Insert buffer */ -static os_aio_array_t* os_aio_log_array = NULL; /*!< Redo log */ -static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */ -/* @} */ - -/** Number of asynchronous I/O segments. Set by os_aio_init(). */ -static ulint os_aio_n_segments = ULINT_UNDEFINED; - -/** If the following is TRUE, read i/o handler threads try to -wait until a batch of new read requests have been posted */ -static ibool os_aio_recommend_sleep_for_read_threads = FALSE; -#endif /* !UNIV_HOTBACKUP */ - -UNIV_INTERN ulint os_n_file_reads = 0; -UNIV_INTERN ulint os_bytes_read_since_printout = 0; -UNIV_INTERN ulint os_n_file_writes = 0; -UNIV_INTERN ulint os_n_fsyncs = 0; -UNIV_INTERN ulint os_n_file_reads_old = 0; -UNIV_INTERN ulint os_n_file_writes_old = 0; -UNIV_INTERN ulint os_n_fsyncs_old = 0; -UNIV_INTERN time_t os_last_printout; - -UNIV_INTERN ibool os_has_said_disk_full = FALSE; - -#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO) -/** After first fallocate failure we will disable os_file_trim */ -static bool os_fallocate_failed; - -/**********************************************************************//** -Directly manipulate the allocated disk space by deallocating for the file referred to -by fd for the byte range starting at offset and continuing for len bytes. -Within the specified range, partial file system blocks are zeroed, and whole -file system blocks are removed from the file. After a successful call, -subsequent reads from this range will return zeroes. -@return true if success, false if error */ -static -ibool -os_file_trim( -/*=========*/ - os_aio_slot_t* slot); /*!< in: slot structure */ -#endif /* WIN_ASYNC_IO || LINUX_NATIVE_AIO */ - -/****************************************************************//** -Does error handling when a file operation fails. -@return TRUE if we should retry the operation */ -ibool -os_file_handle_error_no_exit( -/*=========================*/ - const char* name, /*!< in: name of a file or NULL */ - const char* operation, /*!< in: operation */ - ibool on_error_silent,/*!< in: if TRUE then don't print - any message to the log. */ - const char* file, /*!< in: file name */ - const ulint line); /*!< in: line */ - -/****************************************************************//** -Tries to enable the atomic write feature, if available, for the specified file -handle. -@return TRUE if success */ -static __attribute__((warn_unused_result)) -ibool -os_file_set_atomic_writes( -/*======================*/ - const char* name, /*!< in: name of the file */ - os_file_t file); /*!< in: handle to the file */ - -#ifdef UNIV_DEBUG -# ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Validates the consistency the aio system some of the time. -@return TRUE if ok or the check was skipped */ -UNIV_INTERN -ibool -os_aio_validate_skip(void) -/*======================*/ -{ -/** Try os_aio_validate() every this many times */ -# define OS_AIO_VALIDATE_SKIP 13 - - /** The os_aio_validate() call skip counter. - Use a signed type because of the race condition below. */ - static int os_aio_validate_count = OS_AIO_VALIDATE_SKIP; - - /* There is a race condition below, but it does not matter, - because this call is only for heuristic purposes. We want to - reduce the call frequency of the costly os_aio_validate() - check in debug builds. */ - if (--os_aio_validate_count > 0) { - return(TRUE); - } - - os_aio_validate_count = OS_AIO_VALIDATE_SKIP; - return(os_aio_validate()); -} -# endif /* !UNIV_HOTBACKUP */ -#endif /* UNIV_DEBUG */ - -#ifdef _WIN32 -/** IO completion port used by background io threads */ -static HANDLE completion_port; -/** IO completion port used by background io READ threads */ -static HANDLE read_completion_port; -/** Thread local storage index for the per-thread event used for synchronous IO */ -static DWORD tls_sync_io = TLS_OUT_OF_INDEXES; -#endif - -#ifdef __WIN__ -/***********************************************************************//** -Gets the operating system version. Currently works only on Windows. -@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA, -OS_WIN7. */ -UNIV_INTERN -ulint -os_get_os_version(void) -/*===================*/ -{ - OSVERSIONINFO os_info; - - os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - - ut_a(GetVersionEx(&os_info)); - - if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) { - return(OS_WIN31); - } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { - return(OS_WIN95); - } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { - switch (os_info.dwMajorVersion) { - case 3: - case 4: - return(OS_WINNT); - case 5: - return (os_info.dwMinorVersion == 0) - ? OS_WIN2000 : OS_WINXP; - case 6: - return (os_info.dwMinorVersion == 0) - ? OS_WINVISTA : OS_WIN7; - default: - return(OS_WIN7); - } - } else { - ut_error; - return(0); - } -} -#endif /* __WIN__ */ - - -#ifdef _WIN32 -/* -Windows : Handling synchronous IO on files opened asynchronously. - -If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to -a completion port, then every IO on this file would normally be enqueued to the -completion port. Sometimes however we would like to do a synchronous IO. This is -possible if we initialitze have overlapped.hEvent with a valid event and set its -lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info) - -We'll create this special event once for each thread and store in thread local -storage. -*/ - - -/***********************************************************************//** -Initialize tls index.for event handle used for synchronized IO on files that -might be opened with FILE_FLAG_OVERLAPPED. -*/ -static void win_init_syncio_event() -{ - tls_sync_io = TlsAlloc(); - ut_a(tls_sync_io != TLS_OUT_OF_INDEXES); -} - -/***********************************************************************//** -Retrieve per-thread event for doing synchronous io on asyncronously opened files -*/ -static HANDLE win_get_syncio_event() -{ - HANDLE h; - if(tls_sync_io == TLS_OUT_OF_INDEXES){ - win_init_syncio_event(); - } - - h = (HANDLE)TlsGetValue(tls_sync_io); - if (h) - return h; - h = CreateEventA(NULL, FALSE, FALSE, NULL); - ut_a(h); - h = (HANDLE)((uintptr_t)h | 1); - TlsSetValue(tls_sync_io, h); - return h; -} - -/* - TLS destructor, inspired by Chromium code - http://src.chromium.org/svn/trunk/src/base/threading/thread_local_storage_win.cc -*/ - -static void win_free_syncio_event() -{ - HANDLE h = win_get_syncio_event(); - if (h) { - CloseHandle(h); - } -} - -static void NTAPI win_tls_thread_exit(PVOID module, DWORD reason, PVOID reserved) { - if (DLL_THREAD_DETACH == reason || DLL_PROCESS_DETACH == reason) - win_free_syncio_event(); -} - -extern "C" { -#ifdef _WIN64 -#pragma comment(linker, "/INCLUDE:_tls_used") -#pragma comment(linker, "/INCLUDE:p_thread_callback_base") -#pragma const_seg(".CRT$XLB") -extern const PIMAGE_TLS_CALLBACK p_thread_callback_base; -const PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit; -#pragma data_seg() -#else -#pragma comment(linker, "/INCLUDE:__tls_used") -#pragma comment(linker, "/INCLUDE:_p_thread_callback_base") -#pragma data_seg(".CRT$XLB") -PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit; -#pragma data_seg() -#endif -} -#endif /*_WIN32 */ - -/***********************************************************************//** -For an EINVAL I/O error, prints a diagnostic message if innodb_flush_method -== ALL_O_DIRECT. -@return true if the diagnostic message was printed -@return false if the diagnostic message does not apply */ -static -bool -os_diagnose_all_o_direct_einval( -/*============================*/ - ulint err) /*!< in: C error code */ -{ - if ((err == EINVAL) - && (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)) { - ib_logf(IB_LOG_LEVEL_INFO, - "The error might be caused by redo log I/O not " - "satisfying innodb_flush_method=ALL_O_DIRECT " - "requirements by the underlying file system."); - if (srv_log_block_size != 512) - ib_logf(IB_LOG_LEVEL_INFO, - "This might be caused by an incompatible " - "non-default innodb_log_block_size value %lu.", - srv_log_block_size); - ib_logf(IB_LOG_LEVEL_INFO, - "Please file a bug at https://bugs.percona.com and " - "include this error message, my.cnf settings, and " - "information about the file system where the redo log " - "resides."); - ib_logf(IB_LOG_LEVEL_INFO, - "A possible workaround is to change " - "innodb_flush_method value to something else " - "than ALL_O_DIRECT."); - return(true); - } - return(false); -} - -/***********************************************************************//** -Retrieves the last error number if an error occurs in a file io function. -The number should be retrieved before any other OS calls (because they may -overwrite the error number). If the number is not known to this program, -the OS error number + 100 is returned. -@return error number, or OS error number + 100 */ -static -ulint -os_file_get_last_error_low( -/*=======================*/ - bool report_all_errors, /*!< in: TRUE if we want an error - message printed of all errors */ - bool on_error_silent) /*!< in: TRUE then don't print any - diagnostic to the log */ -{ -#ifdef __WIN__ - - ulint err = (ulint) GetLastError(); - if (err == ERROR_SUCCESS) { - return(0); - } - - if (report_all_errors - || (!on_error_silent - && err != ERROR_DISK_FULL - && err != ERROR_FILE_EXISTS)) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Operating system error number %lu" - " in a file operation.\n", (ulong) err); - - if (err == ERROR_PATH_NOT_FOUND) { - fprintf(stderr, - "InnoDB: The error means the system" - " cannot find the path specified.\n"); - - if (srv_is_being_started) { - fprintf(stderr, - "InnoDB: If you are installing InnoDB," - " remember that you must create\n" - "InnoDB: directories yourself, InnoDB" - " does not create them.\n"); - } - } else if (err == ERROR_ACCESS_DENIED) { - fprintf(stderr, - "InnoDB: The error means mysqld does not have" - " the access rights to\n" - "InnoDB: the directory. It may also be" - " you have created a subdirectory\n" - "InnoDB: of the same name as a data file.\n"); - } else if (err == ERROR_SHARING_VIOLATION - || err == ERROR_LOCK_VIOLATION) { - fprintf(stderr, - "InnoDB: The error means that another program" - " is using InnoDB's files.\n" - "InnoDB: This might be a backup or antivirus" - " software or another instance\n" - "InnoDB: of MySQL." - " Please close it to get rid of this error.\n"); - } else if (err == ERROR_WORKING_SET_QUOTA - || err == ERROR_NO_SYSTEM_RESOURCES) { - fprintf(stderr, - "InnoDB: The error means that there are no" - " sufficient system resources or quota to" - " complete the operation.\n"); - } else if (err == ERROR_OPERATION_ABORTED) { - fprintf(stderr, - "InnoDB: The error means that the I/O" - " operation has been aborted\n" - "InnoDB: because of either a thread exit" - " or an application request.\n" - "InnoDB: Retry attempt is made.\n"); - } else if (err == ECANCELED || err == ENOTTY) { - if (strerror(err) != NULL) { - fprintf(stderr, - "InnoDB: Error number %d" - " means '%s'.\n", - err, strerror(err)); - } - - if(srv_use_atomic_writes) { - fprintf(stderr, - "InnoDB: Error trying to enable atomic writes on " - "non-supported destination!\n"); - } - } else { - fprintf(stderr, - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN - "operating-system-error-codes.html\n"); - } - } - - fflush(stderr); - - if (err == ERROR_FILE_NOT_FOUND) { - return(OS_FILE_NOT_FOUND); - } else if (err == ERROR_DISK_FULL) { - return(OS_FILE_DISK_FULL); - } else if (err == ERROR_FILE_EXISTS) { - return(OS_FILE_ALREADY_EXISTS); - } else if (err == ERROR_SHARING_VIOLATION - || err == ERROR_LOCK_VIOLATION) { - return(OS_FILE_SHARING_VIOLATION); - } else if (err == ERROR_WORKING_SET_QUOTA - || err == ERROR_NO_SYSTEM_RESOURCES) { - return(OS_FILE_INSUFFICIENT_RESOURCE); - } else if (err == ERROR_OPERATION_ABORTED) { - return(OS_FILE_OPERATION_ABORTED); - } else if (err == ERROR_ACCESS_DENIED) { - return(OS_FILE_ACCESS_VIOLATION); - } else if (err == ERROR_BUFFER_OVERFLOW) { - return(OS_FILE_NAME_TOO_LONG); - } else { - return(OS_FILE_ERROR_MAX + err); - } -#else - int err = errno; - if (err == 0) { - return(0); - } - - if (report_all_errors - || (err != ENOSPC && err != EEXIST && !on_error_silent)) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Operating system error number %d" - " in a file operation.\n", err); - - if (err == ENOENT) { - fprintf(stderr, - "InnoDB: The error means the system" - " cannot find the path specified.\n"); - - if (srv_is_being_started) { - fprintf(stderr, - "InnoDB: If you are installing InnoDB," - " remember that you must create\n" - "InnoDB: directories yourself, InnoDB" - " does not create them.\n"); - } - } else if (err == EACCES) { - fprintf(stderr, - "InnoDB: The error means mysqld does not have" - " the access rights to\n" - "InnoDB: the directory.\n"); - } else if (err == ECANCELED || err == ENOTTY) { - if (strerror(err) != NULL) { - fprintf(stderr, - "InnoDB: Error number %d" - " means '%s'.\n", - err, strerror(err)); - } - - - if(srv_use_atomic_writes) { - fprintf(stderr, - "InnoDB: Error trying to enable atomic writes on " - "non-supported destination!\n"); - } - } else if (!os_diagnose_all_o_direct_einval(err)) { - if (strerror(err) != NULL) { - fprintf(stderr, - "InnoDB: Error number %d" - " means '%s'.\n", - err, strerror(err)); - } - - - fprintf(stderr, - "InnoDB: Some operating system" - " error numbers are described at\n" - "InnoDB: " - REFMAN - "operating-system-error-codes.html\n"); - } - } - - fflush(stderr); - - switch (err) { - case ENOSPC: - return(OS_FILE_DISK_FULL); - case ENOENT: - return(OS_FILE_NOT_FOUND); - case EEXIST: - return(OS_FILE_ALREADY_EXISTS); - case ENAMETOOLONG: - return(OS_FILE_NAME_TOO_LONG); - case EXDEV: - case ENOTDIR: - case EISDIR: - return(OS_FILE_PATH_ERROR); - case EAGAIN: - if (srv_use_native_aio) { - return(OS_FILE_AIO_RESOURCES_RESERVED); - } - break; - case ECANCELED: - case ENOTTY: - return(OS_FILE_OPERATION_NOT_SUPPORTED); - case EINTR: - if (srv_use_native_aio) { - return(OS_FILE_AIO_INTERRUPTED); - } - break; - case EACCES: - return(OS_FILE_ACCESS_VIOLATION); - } - return(OS_FILE_ERROR_MAX + err); -#endif -} - -/***********************************************************************//** -Retrieves the last error number if an error occurs in a file io function. -The number should be retrieved before any other OS calls (because they may -overwrite the error number). If the number is not known to this program, -the OS error number + 100 is returned. -@return error number, or OS error number + 100 */ -UNIV_INTERN -ulint -os_file_get_last_error( -/*===================*/ - bool report_all_errors) /*!< in: TRUE if we want an error - message printed of all errors */ -{ - return(os_file_get_last_error_low(report_all_errors, false)); -} - -/****************************************************************//** -Does error handling when a file operation fails. -Conditionally exits (calling exit(3)) based on should_exit value and the -error type, if should_exit is TRUE then on_error_silent is ignored. -@return TRUE if we should retry the operation */ -ibool -os_file_handle_error_cond_exit( -/*===========================*/ - const char* name, /*!< in: name of a file or NULL */ - const char* operation, /*!< in: operation */ - ibool should_exit, /*!< in: call exit(3) if unknown error - and this parameter is TRUE */ - ibool on_error_silent,/*!< in: if TRUE then don't print - any message to the log iff it is - an unknown non-fatal error */ - const char* file, /*!< in: file name */ - const ulint line) /*!< in: line */ -{ - ulint err; - - err = os_file_get_last_error_low(false, on_error_silent); - - switch (err) { - case OS_FILE_DISK_FULL: - /* We only print a warning about disk full once */ - - if (os_has_said_disk_full) { - - return(FALSE); - } - - /* Disk full error is reported irrespective of the - on_error_silent setting. */ - - if (name) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Encountered a problem with" - " file %s\n", name); - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Disk is full. Try to clean the disk" - " to free space.\n"); - - os_has_said_disk_full = TRUE; - - fprintf(stderr, - " InnoDB: at file %s and at line %ld\n", file, line); - - fflush(stderr); - - ut_error; - return(FALSE); - - case OS_FILE_AIO_RESOURCES_RESERVED: - case OS_FILE_AIO_INTERRUPTED: - - return(TRUE); - - case OS_FILE_PATH_ERROR: - case OS_FILE_ALREADY_EXISTS: - case OS_FILE_ACCESS_VIOLATION: - - return(FALSE); - - case OS_FILE_SHARING_VIOLATION: - - os_thread_sleep(10000000); /* 10 sec */ - return(TRUE); - - case OS_FILE_OPERATION_ABORTED: - case OS_FILE_INSUFFICIENT_RESOURCE: - - os_thread_sleep(100000); /* 100 ms */ - return(TRUE); - - default: - - /* If it is an operation that can crash on error then it - is better to ignore on_error_silent and print an error message - to the log. */ - - if (should_exit || !on_error_silent) { - fprintf(stderr, - " InnoDB: Operation %s to file %s and at line %ld\n", - operation, file, line); - } - - if (should_exit || !on_error_silent) { - ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS " - "error " ULINTPF ".%s", name ? name : "(unknown)", - operation, err, should_exit - ? " Cannot continue operation" : ""); - } - - if (should_exit) { - abort(); - } - } - - return(FALSE); -} - -/****************************************************************//** -Does error handling when a file operation fails. -@return TRUE if we should retry the operation */ -static -ibool -os_file_handle_error( -/*=================*/ - const char* name, /*!< in: name of a file or NULL */ - const char* operation, /*!< in: operation */ - const char* file, /*!< in: file name */ - const ulint line) /*!< in: line */ -{ - /* exit in case of unknown error */ - return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line)); -} - -/****************************************************************//** -Does error handling when a file operation fails. -@return TRUE if we should retry the operation */ -ibool -os_file_handle_error_no_exit( -/*=========================*/ - const char* name, /*!< in: name of a file or NULL */ - const char* operation, /*!< in: operation */ - ibool on_error_silent,/*!< in: if TRUE then don't print - any message to the log. */ - const char* file, /*!< in: file name */ - const ulint line) /*!< in: line */ -{ - /* don't exit in case of unknown error */ - return(os_file_handle_error_cond_exit( - name, operation, FALSE, on_error_silent, file, line)); -} - -#undef USE_FILE_LOCK -#define USE_FILE_LOCK -#if defined(UNIV_HOTBACKUP) || defined(__WIN__) -/* InnoDB Hot Backup does not lock the data files. - * On Windows, mandatory locking is used. - */ -# undef USE_FILE_LOCK -#endif -#ifdef USE_FILE_LOCK -/****************************************************************//** -Obtain an exclusive lock on a file. -@return 0 on success */ -static -int -os_file_lock( -/*=========*/ - int fd, /*!< in: file descriptor */ - const char* name) /*!< in: file name */ -{ - struct flock lk; - - ut_ad(!srv_read_only_mode); - - lk.l_type = F_WRLCK; - lk.l_whence = SEEK_SET; - lk.l_start = lk.l_len = 0; - - if (fcntl(fd, F_SETLK, &lk) == -1) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unable to lock %s, error: %d", name, errno); - - if (errno == EAGAIN || errno == EACCES) { - ib_logf(IB_LOG_LEVEL_INFO, - "Check that you do not already have " - "another mysqld process using the " - "same InnoDB data or log files."); - } - - return(-1); - } - - return(0); -} -#endif /* USE_FILE_LOCK */ - -#ifndef UNIV_HOTBACKUP -/****************************************************************//** -Creates the seek mutexes used in positioned reads and writes. */ -void -os_io_init_simple(void) -/*===================*/ -{ - for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { - os_file_seek_mutexes[i] = os_mutex_create(); - } -#ifdef _WIN32 - win_init_syncio_event(); -#endif -} - -/** Create a temporary file. This function is like tmpfile(3), but -the temporary file is created in the given parameter path. If the path -is null then it will create the file in the mysql server configuration -parameter (--tmpdir). -@param[in] path location for creating temporary file -@return temporary file handle, or NULL on error */ -UNIV_INTERN -FILE* -os_file_create_tmpfile( - const char* path) -{ - WAIT_ALLOW_WRITES(); - - FILE* file = NULL; - int fd = innobase_mysql_tmpfile(path); - - ut_ad(!srv_read_only_mode); - - if (fd >= 0) { - file = fdopen(fd, "w+b"); - } - - if (!file) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unable to create temporary file;" - " errno: %d\n", errno); - if (fd >= 0) { - close(fd); - } - } - - return(file); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************************//** -The os_file_opendir() function opens a directory stream corresponding to the -directory named by the dirname argument. The directory stream is positioned -at the first entry. In both Unix and Windows we automatically skip the '.' -and '..' items at the start of the directory listing. -@return directory stream, NULL if error */ -UNIV_INTERN -os_file_dir_t -os_file_opendir( -/*============*/ - const char* dirname, /*!< in: directory name; it must not - contain a trailing '\' or '/' */ - ibool error_is_fatal) /*!< in: TRUE if we should treat an - error as a fatal error; if we try to - open symlinks then we do not wish a - fatal error if it happens not to be - a directory */ -{ - os_file_dir_t dir; -#ifdef __WIN__ - LPWIN32_FIND_DATA lpFindFileData; - char path[OS_FILE_MAX_PATH + 3]; - - ut_a(strlen(dirname) < OS_FILE_MAX_PATH); - - strcpy(path, dirname); - strcpy(path + strlen(path), "\\*"); - - /* Note that in Windows opening the 'directory stream' also retrieves - the first entry in the directory. Since it is '.', that is no problem, - as we will skip over the '.' and '..' entries anyway. */ - - lpFindFileData = static_cast<LPWIN32_FIND_DATA>( - ut_malloc(sizeof(WIN32_FIND_DATA))); - - dir = FindFirstFile((LPCTSTR) path, lpFindFileData); - - ut_free(lpFindFileData); - - if (dir == INVALID_HANDLE_VALUE) { - - if (error_is_fatal) { - os_file_handle_error(dirname, "opendir", __FILE__, __LINE__); - } - - return(NULL); - } - - return(dir); -#else - dir = opendir(dirname); - - if (dir == NULL && error_is_fatal) { - os_file_handle_error(dirname, "opendir", __FILE__, __LINE__); - } - - return(dir); -#endif /* __WIN__ */ -} - -/***********************************************************************//** -Closes a directory stream. -@return 0 if success, -1 if failure */ -UNIV_INTERN -int -os_file_closedir( -/*=============*/ - os_file_dir_t dir) /*!< in: directory stream */ -{ -#ifdef __WIN__ - BOOL ret; - - ret = FindClose(dir); - - if (!ret) { - os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__); - - return(-1); - } - - return(0); -#else - int ret; - - ret = closedir(dir); - - if (ret) { - os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__); - } - - return(ret); -#endif /* __WIN__ */ -} - -/***********************************************************************//** -This function returns information of the next file in the directory. We jump -over the '.' and '..' entries in the directory. -@return 0 if ok, -1 if error, 1 if at the end of the directory */ -UNIV_INTERN -int -os_file_readdir_next_file( -/*======================*/ - const char* dirname,/*!< in: directory name or path */ - os_file_dir_t dir, /*!< in: directory stream */ - os_file_stat_t* info) /*!< in/out: buffer where the info is returned */ -{ -#ifdef __WIN__ - LPWIN32_FIND_DATA lpFindFileData; - BOOL ret; - - lpFindFileData = static_cast<LPWIN32_FIND_DATA>( - ut_malloc(sizeof(WIN32_FIND_DATA))); -next_file: - ret = FindNextFile(dir, lpFindFileData); - - if (ret) { - ut_a(strlen((char*) lpFindFileData->cFileName) - < OS_FILE_MAX_PATH); - - if (strcmp((char*) lpFindFileData->cFileName, ".") == 0 - || strcmp((char*) lpFindFileData->cFileName, "..") == 0) { - - goto next_file; - } - - strcpy(info->name, (char*) lpFindFileData->cFileName); - - info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow) - + (((ib_int64_t)(lpFindFileData->nFileSizeHigh)) - << 32); - - if (lpFindFileData->dwFileAttributes - & FILE_ATTRIBUTE_REPARSE_POINT) { - /* TODO: test Windows symlinks */ - /* TODO: MySQL has apparently its own symlink - implementation in Windows, dbname.sym can - redirect a database directory: - REFMAN "windows-symbolic-links.html" */ - info->type = OS_FILE_TYPE_LINK; - } else if (lpFindFileData->dwFileAttributes - & FILE_ATTRIBUTE_DIRECTORY) { - info->type = OS_FILE_TYPE_DIR; - } else { - /* It is probably safest to assume that all other - file types are normal. Better to check them rather - than blindly skip them. */ - - info->type = OS_FILE_TYPE_FILE; - } - } - - ut_free(lpFindFileData); - - if (ret) { - return(0); - } else if (GetLastError() == ERROR_NO_MORE_FILES) { - - return(1); - } else { - os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__); - return(-1); - } -#else - struct dirent* ent; - char* full_path; - int ret; - struct stat statinfo; - -next_file: - - ent = readdir(dir); - - if (ent == NULL) { - - return(1); - } - ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH); - - if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) { - - goto next_file; - } - - strcpy(info->name, ent->d_name); - - full_path = static_cast<char*>( - ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10)); - - sprintf(full_path, "%s/%s", dirname, ent->d_name); - - ret = stat(full_path, &statinfo); - - if (ret) { - - if (errno == ENOENT) { - /* readdir() returned a file that does not exist, - it must have been deleted in the meantime. Do what - would have happened if the file was deleted before - readdir() - ignore and go to the next entry. - If this is the last entry then info->name will still - contain the name of the deleted file when this - function returns, but this is not an issue since the - caller shouldn't be looking at info when end of - directory is returned. */ - - ut_free(full_path); - - goto next_file; - } - - os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__); - - ut_free(full_path); - - return(-1); - } - - info->size = (ib_int64_t) statinfo.st_size; - - if (S_ISDIR(statinfo.st_mode)) { - info->type = OS_FILE_TYPE_DIR; - } else if (S_ISLNK(statinfo.st_mode)) { - info->type = OS_FILE_TYPE_LINK; - } else if (S_ISREG(statinfo.st_mode)) { - info->type = OS_FILE_TYPE_FILE; - } else { - info->type = OS_FILE_TYPE_UNKNOWN; - } - - ut_free(full_path); - - return(0); -#endif -} - -/*****************************************************************//** -This function attempts to create a directory named pathname. The new -directory gets default permissions. On Unix the permissions are -(0770 & ~umask). If the directory exists already, nothing is done and -the call succeeds, unless the fail_if_exists arguments is true. -If another error occurs, such as a permission error, this does not crash, -but reports the error and returns FALSE. -@return TRUE if call succeeds, FALSE on error */ -UNIV_INTERN -ibool -os_file_create_directory( -/*=====================*/ - const char* pathname, /*!< in: directory name as - null-terminated string */ - ibool fail_if_exists) /*!< in: if TRUE, pre-existing directory - is treated as an error. */ -{ -#ifdef __WIN__ - BOOL rcode; - - rcode = CreateDirectory((LPCTSTR) pathname, NULL); - if (!(rcode != 0 - || (GetLastError() == ERROR_ALREADY_EXISTS - && !fail_if_exists))) { - - os_file_handle_error_no_exit( - pathname, "CreateDirectory", FALSE, __FILE__, __LINE__); - - return(FALSE); - } - - return(TRUE); -#else - int rcode; - WAIT_ALLOW_WRITES(); - - rcode = mkdir(pathname, 0770); - - if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) { - /* failure */ - os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__); - - return(FALSE); - } - - return (TRUE); -#endif /* __WIN__ */ -} - -/****************************************************************//** -NOTE! Use the corresponding macro os_file_create_simple(), not directly -this function! -A simple function to open or create a file. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INTERN -os_file_t -os_file_create_simple_func( -/*=======================*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: create mode */ - ulint access_type,/*!< in: OS_FILE_READ_ONLY or - OS_FILE_READ_WRITE */ - ibool* success)/*!< out: TRUE if succeed, FALSE if error */ -{ - os_file_t file; - ibool retry; - - *success = FALSE; -#ifdef __WIN__ - DWORD access; - DWORD create_flag; - DWORD attributes = 0; - - ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); - ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT)); - - if (create_mode == OS_FILE_OPEN) { - - create_flag = OPEN_EXISTING; - - } else if (srv_read_only_mode) { - - create_flag = OPEN_EXISTING; - - } else if (create_mode == OS_FILE_CREATE) { - - create_flag = CREATE_NEW; - - } else if (create_mode == OS_FILE_CREATE_PATH) { - - ut_a(!srv_read_only_mode); - - /* Create subdirs along the path if needed */ - *success = os_file_create_subdirs_if_needed(name); - - if (!*success) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unable to create subdirectories '%s'", - name); - - return((os_file_t) -1); - } - - create_flag = CREATE_NEW; - create_mode = OS_FILE_CREATE; - - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown file create mode (%lu) for file '%s'", - create_mode, name); - - return((os_file_t) -1); - } - - if (access_type == OS_FILE_READ_ONLY) { - access = GENERIC_READ; - } else if (srv_read_only_mode) { - - ib_logf(IB_LOG_LEVEL_INFO, - "read only mode set. Unable to " - "open file '%s' in RW mode, trying RO mode", name); - - access = GENERIC_READ; - - } else if (access_type == OS_FILE_READ_WRITE - || access_type == OS_FILE_READ_WRITE_CACHED) { - access = GENERIC_READ | GENERIC_WRITE; - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown file access type (%lu) for file '%s'", - access_type, name); - - return((os_file_t) -1); - } - - do { - /* Use default security attributes and no template file. */ - - file = CreateFile( - (LPCTSTR) name, access, FILE_SHARE_READ, NULL, - create_flag, attributes, NULL); - - if (file == INVALID_HANDLE_VALUE) { - - *success = FALSE; - - retry = os_file_handle_error( - name, create_mode == OS_FILE_OPEN ? - "open" : "create", __FILE__, __LINE__); - - } else { - *success = TRUE; - retry = false; - } - - } while (retry); - -#else /* __WIN__ */ - int create_flag; - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) - WAIT_ALLOW_WRITES(); - - ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); - ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT)); - - if (create_mode == OS_FILE_OPEN) { - - if (access_type == OS_FILE_READ_ONLY) { - create_flag = O_RDONLY; - } else if (srv_read_only_mode) { - create_flag = O_RDONLY; - } else { - create_flag = O_RDWR; - } - - } else if (srv_read_only_mode) { - - create_flag = O_RDONLY; - - } else if (create_mode == OS_FILE_CREATE) { - - create_flag = O_RDWR | O_CREAT | O_EXCL; - - } else if (create_mode == OS_FILE_CREATE_PATH) { - - /* Create subdirs along the path if needed */ - - *success = os_file_create_subdirs_if_needed(name); - - if (!*success) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unable to create subdirectories '%s'", - name); - - return((os_file_t) -1); - } - - create_flag = O_RDWR | O_CREAT | O_EXCL; - create_mode = OS_FILE_CREATE; - } else { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown file create mode (%lu) for file '%s'", - create_mode, name); - - return((os_file_t) -1); - } - - do { - file = ::open(name, create_flag, os_innodb_umask); - - if (file == -1) { - *success = FALSE; - - retry = os_file_handle_error( - name, - create_mode == OS_FILE_OPEN - ? "open" : "create", __FILE__, __LINE__); - } else { - *success = TRUE; - retry = false; - } - - } while (retry); - -#ifdef USE_FILE_LOCK - if (!srv_read_only_mode - && *success - && (access_type == OS_FILE_READ_WRITE - || access_type == OS_FILE_READ_WRITE_CACHED) - && os_file_lock(file, name)) { - - *success = FALSE; - close(file); - file = -1; - } -#endif /* USE_FILE_LOCK */ - -#endif /* __WIN__ */ - - return(file); -} - -/** Disable OS I/O caching on the file if the file type and server -configuration requires it. -@param file handle to the file -@param name name of the file, for diagnostics -@param mode_str operation on the file, for diagnostics -@param type OS_LOG_FILE or OS_DATA_FILE -@param access_type if OS_FILE_READ_WRITE_CACHED, then caching will be disabled -unconditionally, ignored otherwise */ -static -void -os_file_set_nocache_if_needed(os_file_t file, const char* name, - const char *mode_str, ulint type, - ulint access_type) -{ - if (srv_read_only_mode || access_type == OS_FILE_READ_WRITE_CACHED) { - return; - } - - if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT - || (type == OS_DATA_FILE - && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT - || (srv_unix_file_flush_method - == SRV_UNIX_O_DIRECT_NO_FSYNC)))) - /* Do fsync() on log files when setting O_DIRECT fails. - See log_io_complete() */ - if (!os_file_set_nocache(file, name, mode_str) - && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) - srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; -} - -/****************************************************************//** -NOTE! Use the corresponding macro -os_file_create_simple_no_error_handling(), not directly this function! -A simple function to open or create a file. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INTERN -pfs_os_file_t -os_file_create_simple_no_error_handling_func( -/*=========================================*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: create mode */ - ulint access_type,/*!< in: OS_FILE_READ_ONLY, - OS_FILE_READ_WRITE, - OS_FILE_READ_ALLOW_DELETE (used by a backup - program reading the file), or - OS_FILE_READ_WRITE_CACHED (disable O_DIRECT - if it would be enabled otherwise) */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes) /*! in: atomic writes table option - value */ -{ - pfs_os_file_t file; - atomic_writes_t awrites = (atomic_writes_t) atomic_writes; - - *success = FALSE; -#ifdef __WIN__ - DWORD access; - DWORD create_flag; - DWORD attributes = 0; - DWORD share_mode = FILE_SHARE_READ; - ut_a(name); - - ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); - ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT)); - - if (create_mode == OS_FILE_OPEN) { - create_flag = OPEN_EXISTING; - } else if (srv_read_only_mode) { - create_flag = OPEN_EXISTING; - } else if (create_mode == OS_FILE_CREATE) { - create_flag = CREATE_NEW; - } else { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown file create mode (%lu) for file '%s'", - create_mode, name); - file = INVALID_HANDLE_VALUE; - return(file); - } - - if (access_type == OS_FILE_READ_ONLY) { - access = GENERIC_READ; - } else if (srv_read_only_mode) { - access = GENERIC_READ; - } else if (access_type == OS_FILE_READ_WRITE - || access_type == OS_FILE_READ_WRITE_CACHED) { - access = GENERIC_READ | GENERIC_WRITE; - } else if (access_type == OS_FILE_READ_ALLOW_DELETE) { - - ut_a(!srv_read_only_mode); - - access = GENERIC_READ; - - /*!< A backup program has to give mysqld the maximum - freedom to do what it likes with the file */ - - share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE; - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown file access type (%lu) for file '%s'", - access_type, name); - file = INVALID_HANDLE_VALUE; - return(file); - } - - if (IS_XTRABACKUP()) { - share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE; - } - - file = CreateFile((LPCTSTR) name, - access, - share_mode, - NULL, // Security attributes - create_flag, - attributes, - NULL); // No template file - - /* If we have proper file handle and atomic writes should be used, - try to set atomic writes and if that fails when creating a new - table, produce a error. If atomic writes are used on existing - file, ignore error and use traditional writes for that file */ - if (file != INVALID_HANDLE_VALUE - && (awrites == ATOMIC_WRITES_ON || - (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) - && !os_file_set_atomic_writes(name, file)) { - if (create_mode == OS_FILE_CREATE) { - fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); - CloseHandle(file); - os_file_delete_if_exists_func(name); - *success = FALSE; - file = INVALID_HANDLE_VALUE; - } - } - - *success = file != INVALID_HANDLE_VALUE; -#else /* __WIN__ */ - int create_flag; - const char* mode_str = NULL; - ut_a(name); - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) - WAIT_ALLOW_WRITES(); - - ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); - ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT)); - - if (create_mode == OS_FILE_OPEN) { - - mode_str = "OPEN"; - - if (access_type == OS_FILE_READ_ONLY) { - - create_flag = O_RDONLY; - - } else if (srv_read_only_mode) { - - create_flag = O_RDONLY; - - } else { - - ut_a(access_type == OS_FILE_READ_WRITE - || access_type == OS_FILE_READ_ALLOW_DELETE - || access_type == OS_FILE_READ_WRITE_CACHED); - - create_flag = O_RDWR; - } - - } else if (srv_read_only_mode) { - - mode_str = "OPEN"; - - create_flag = O_RDONLY; - - } else if (create_mode == OS_FILE_CREATE) { - - mode_str = "CREATE"; - - create_flag = O_RDWR | O_CREAT | O_EXCL; - - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown file create mode (%lu) for file '%s'", - create_mode, name); - file = -1; - return(file); - } - - file = open(name, create_flag, os_innodb_umask); - - *success = file != -1; - - /* This function is always called for data files, we should disable - OS caching (O_DIRECT) here as we do in os_file_create_func(), so - we open the same file in the same mode, see man page of open(2). */ - if (*success) { - os_file_set_nocache_if_needed(file, name, mode_str, - OS_DATA_FILE, access_type); - } - -#ifdef USE_FILE_LOCK - if (!srv_read_only_mode - && *success - && (access_type == OS_FILE_READ_WRITE - || access_type == OS_FILE_READ_WRITE_CACHED) - && os_file_lock(file, name)) { - - *success = FALSE; - close(file); - file = -1; - - } -#endif /* USE_FILE_LOCK */ - - /* If we have proper file handle and atomic writes should be used, - try to set atomic writes and if that fails when creating a new - table, produce a error. If atomic writes are used on existing - file, ignore error and use traditional writes for that file */ - if (file != -1 - && (awrites == ATOMIC_WRITES_ON || - (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) - && !os_file_set_atomic_writes(name, file)) { - if (create_mode == OS_FILE_CREATE) { - fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); - close(file); - os_file_delete_if_exists_func(name); - *success = FALSE; - file = -1; - } - } - -#endif /* __WIN__ */ - - return(file); -} - -/****************************************************************//** -Tries to disable OS caching on an opened file descriptor. -@return TRUE if operation is success and FALSE otherwise */ -UNIV_INTERN -bool -os_file_set_nocache( -/*================*/ - os_file_t fd /*!< in: file descriptor to alter */ - MY_ATTRIBUTE((unused)), - const char* file_name /*!< in: used in the diagnostic - message */ - MY_ATTRIBUTE((unused)), - const char* operation_name MY_ATTRIBUTE((unused))) - /*!< in: "open" or "create"; used - in the diagnostic message */ -{ - /* some versions of Solaris may not have DIRECTIO_ON */ -#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) - if (directio(fd, DIRECTIO_ON) == -1) { - int errno_save = errno; - - ib_logf(IB_LOG_LEVEL_ERROR, - "Failed to set DIRECTIO_ON on file %s: %s: %s, " - "continuing anyway.", - file_name, operation_name, strerror(errno_save)); - return false; - } -#elif defined(O_DIRECT) - if (fcntl(fd, F_SETFL, O_DIRECT) == -1) { - int errno_save = errno; - static bool warning_message_printed = false; - if (errno_save == EINVAL) { - if (!warning_message_printed) { - warning_message_printed = true; -# ifdef UNIV_LINUX - ib_logf(IB_LOG_LEVEL_WARN, - "Failed to set O_DIRECT on file " - "%s: %s: %s, continuing anyway. " - "O_DIRECT is known to result " - "in 'Invalid argument' on Linux on " - "tmpfs, see MySQL Bug#26662.", - file_name, operation_name, - strerror(errno_save)); -# else /* UNIV_LINUX */ - goto short_warning; -# endif /* UNIV_LINUX */ - } - } else { -# ifndef UNIV_LINUX -short_warning: -# endif - ib_logf(IB_LOG_LEVEL_WARN, - "Failed to set O_DIRECT on file %s: %s: %s, " - "continuing anyway.", - file_name, operation_name, strerror(errno_save)); - } - return false; - } -#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */ - return true; -} - - -/****************************************************************//** -Tries to enable the atomic write feature, if available, for the specified file -handle. -@return TRUE if success */ -static MY_ATTRIBUTE((warn_unused_result)) -ibool -os_file_set_atomic_writes( -/*======================*/ - const char* name /*!< in: name of the file */ - MY_ATTRIBUTE((unused)), - os_file_t file /*!< in: handle to the file */ - MY_ATTRIBUTE((unused))) - -{ -#ifdef DFS_IOCTL_ATOMIC_WRITE_SET - int atomic_option = 1; - - if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) { - - fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on " - "file %s on non-supported platform!\n", name); - os_file_handle_error_no_exit(name, "ioctl(DFS_IOCTL_ATOMIC_WRITE_SET)", FALSE, __FILE__, __LINE__); - return(FALSE); - } - - return(TRUE); -#else - fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on " - "file %s on non-supported platform!\n", name); - return(FALSE); -#endif -} - -/****************************************************************//** -NOTE! Use the corresponding macro os_file_create(), not directly -this function! -Opens an existing file or creates a new. -@return own: handle to the file, not defined if error, error number -can be retrieved with os_file_get_last_error */ -UNIV_INTERN -pfs_os_file_t -os_file_create_func( -/*================*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - ulint create_mode,/*!< in: create mode */ - ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, - non-buffered i/o is desired, - OS_FILE_NORMAL, if any normal file; - NOTE that it also depends on type, os_aio_.. - and srv_.. variables whether we really use - async i/o or unbuffered i/o: look in the - function source code for the exact rules */ - ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success,/*!< out: TRUE if succeed, FALSE if error */ - ulint atomic_writes) /*! in: atomic writes table option - value */ -{ - pfs_os_file_t file; - ibool retry; - ibool on_error_no_exit; - ibool on_error_silent; - atomic_writes_t awrites = (atomic_writes_t) atomic_writes; - -#ifdef __WIN__ - DBUG_EXECUTE_IF( - "ib_create_table_fail_disk_full", - *success = FALSE; - SetLastError(ERROR_DISK_FULL); - file = INVALID_HANDLE_VALUE; - return(file); - ); -#else /* __WIN__ */ - DBUG_EXECUTE_IF( - "ib_create_table_fail_disk_full", - *success = FALSE; - errno = ENOSPC; - file = -1; - return(file); - ); -#endif /* __WIN__ */ - -#ifdef __WIN__ - DWORD create_flag; - DWORD share_mode = FILE_SHARE_READ; - - on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT - ? TRUE : FALSE; - - on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT - ? TRUE : FALSE; - - create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT; - create_mode &= ~OS_FILE_ON_ERROR_SILENT; - if (srv_backup_mode){ - /* Permit others to write, while I'm reading. */ - share_mode |= FILE_SHARE_WRITE; - } - if (create_mode == OS_FILE_OPEN_RAW) { - - ut_a(!srv_read_only_mode); - - create_flag = OPEN_EXISTING; - - /* On Windows Physical devices require admin privileges and - have to have the write-share mode set. See the remarks - section for the CreateFile() function documentation in MSDN. */ - - share_mode |= FILE_SHARE_WRITE; - - } else if (create_mode == OS_FILE_OPEN - || create_mode == OS_FILE_OPEN_RETRY) { - - create_flag = OPEN_EXISTING; - - } else if (srv_read_only_mode) { - - create_flag = OPEN_EXISTING; - - } else if (create_mode == OS_FILE_CREATE) { - - create_flag = CREATE_NEW; - - } else if (create_mode == OS_FILE_OVERWRITE) { - - create_flag = CREATE_ALWAYS; - - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown file create mode (%lu) for file '%s'", - create_mode, name); - - file = INVALID_HANDLE_VALUE; - return(file); - } - - DWORD attributes = 0; - -#ifdef UNIV_HOTBACKUP - attributes |= FILE_FLAG_NO_BUFFERING; -#else - if (purpose == OS_FILE_AIO) { -#ifdef WIN_ASYNC_IO - /* If specified, use asynchronous (overlapped) io and no - buffering of writes in the OS */ - - if (srv_use_native_aio) { - attributes |= FILE_FLAG_OVERLAPPED; - } -#endif /* WIN_ASYNC_IO */ - - } else if (purpose == OS_FILE_NORMAL) { - /* Use default setting. */ - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown purpose flag (%lu) while opening file '%s'", - purpose, name); - file = INVALID_HANDLE_VALUE; - return(file); - } - -#ifdef UNIV_NON_BUFFERED_IO - // TODO: Create a bug, this looks wrong. The flush log - // parameter is dynamic. - if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) { - - /* Do not use unbuffered i/o for the log files because - value 2 denotes that we do not flush the log at every - commit, but only once per second */ - - } else if (srv_win_file_flush_method == SRV_WIN_IO_UNBUFFERED) { - - attributes |= FILE_FLAG_NO_BUFFERING; - } -#endif /* UNIV_NON_BUFFERED_IO */ - -#endif /* UNIV_HOTBACKUP */ - DWORD access = GENERIC_READ; - - if (!srv_read_only_mode) { - access |= GENERIC_WRITE; - } - - if (type == OS_LOG_FILE) { - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { - /* Map O_DSYNC to WRITE_THROUGH */ - attributes |= FILE_FLAG_WRITE_THROUGH; - } else if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { - /* Open log file without buffering */ - attributes |= FILE_FLAG_NO_BUFFERING; - } - } - - do { - /* Use default security attributes and no template file. */ - file = CreateFile( - (LPCTSTR) name, access, share_mode, NULL, - create_flag, attributes, NULL); - - if (file == INVALID_HANDLE_VALUE) { - const char* operation; - - operation = (create_mode == OS_FILE_CREATE - && !srv_read_only_mode) - ? "create" : "open"; - - *success = FALSE; - - if (on_error_no_exit) { - retry = os_file_handle_error_no_exit( - name, operation, on_error_silent, __FILE__, __LINE__); - } else { - retry = os_file_handle_error(name, operation, __FILE__, __LINE__); - } - } else { - *success = TRUE; - retry = FALSE; - if (srv_use_native_aio && ((attributes & FILE_FLAG_OVERLAPPED) != 0)) { - ut_a(CreateIoCompletionPort(file, completion_port, 0, 0)); - } - } - - } while (retry); - - /* If we have proper file handle and atomic writes should be used, - try to set atomic writes and if that fails when creating a new - table, produce a error. If atomic writes are used on existing - file, ignore error and use traditional writes for that file */ - if (file != INVALID_HANDLE_VALUE && type == OS_DATA_FILE - && (awrites == ATOMIC_WRITES_ON || - (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) - && !os_file_set_atomic_writes(name, file)) { - if (create_mode == OS_FILE_CREATE) { - fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); - CloseHandle(file); - os_file_delete_if_exists_func(name); - *success = FALSE; - file = INVALID_HANDLE_VALUE; - } - } - -#else /* __WIN__ */ - int create_flag; - const char* mode_str = NULL; - if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) - WAIT_ALLOW_WRITES(); - - on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT - ? TRUE : FALSE; - on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT - ? TRUE : FALSE; - - create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT; - create_mode &= ~OS_FILE_ON_ERROR_SILENT; - - if (create_mode == OS_FILE_OPEN - || create_mode == OS_FILE_OPEN_RAW - || create_mode == OS_FILE_OPEN_RETRY) { - - mode_str = "OPEN"; - - create_flag = srv_read_only_mode ? O_RDONLY : O_RDWR; - - } else if (srv_read_only_mode) { - - mode_str = "OPEN"; - - create_flag = O_RDONLY; - - } else if (create_mode == OS_FILE_CREATE) { - - mode_str = "CREATE"; - create_flag = O_RDWR | O_CREAT | O_EXCL; - - } else if (create_mode == OS_FILE_OVERWRITE) { - - mode_str = "OVERWRITE"; - create_flag = O_RDWR | O_CREAT | O_TRUNC; - - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unknown file create mode (%lu) for file '%s'", - create_mode, name); - - file = -1; - return(file); - } - - ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE); - ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL); - -#ifdef O_SYNC - /* We let O_SYNC only affect log files; note that we map O_DSYNC to - O_SYNC because the datasync options seemed to corrupt files in 2001 - in both Linux and Solaris */ - - if (!srv_read_only_mode - && type == OS_LOG_FILE - && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { - - create_flag |= O_SYNC; - } -#endif /* O_SYNC */ - - do { - file = open(name, create_flag, os_innodb_umask); - - if (file == -1) { - const char* operation; - - operation = (create_mode == OS_FILE_CREATE - && !srv_read_only_mode) - ? "create" : "open"; - - *success = FALSE; - - if (on_error_no_exit) { - retry = os_file_handle_error_no_exit( - name, operation, on_error_silent, __FILE__, __LINE__); - } else { - retry = os_file_handle_error(name, operation, __FILE__, __LINE__); - } - } else { - *success = TRUE; - retry = false; - } - - } while (retry); - - /* We disable OS caching (O_DIRECT) only on data files */ - - if (*success) { - os_file_set_nocache_if_needed(file, name, mode_str, type, 0); - } - -#ifdef USE_FILE_LOCK - if (!srv_read_only_mode - && *success - && create_mode != OS_FILE_OPEN_RAW - && os_file_lock(file, name)) { - - if (create_mode == OS_FILE_OPEN_RETRY) { - - ut_a(!srv_read_only_mode); - - ib_logf(IB_LOG_LEVEL_INFO, - "Retrying to lock the first data file"); - - for (int i = 0; i < 100; i++) { - os_thread_sleep(1000000); - - if (!os_file_lock(file, name)) { - *success = TRUE; - return(file); - } - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Unable to open the first data file"); - } - - *success = FALSE; - close(file); - file = -1; - } -#endif /* USE_FILE_LOCK */ - - /* If we have proper file handle and atomic writes should be used, - try to set atomic writes and if that fails when creating a new - table, produce a error. If atomic writes are used on existing - file, ignore error and use traditional writes for that file */ - if (file != -1 && type == OS_DATA_FILE - && (awrites == ATOMIC_WRITES_ON || - (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT)) - && !os_file_set_atomic_writes(name, file)) { - if (create_mode == OS_FILE_CREATE) { - fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n"); - close(file); - os_file_delete_if_exists_func(name); - *success = FALSE; - file = -1; - } - } - - -#endif /* __WIN__ */ - - return(file); -} - -/***********************************************************************//** -Deletes a file if it exists. The file has to be closed before calling this. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_delete_if_exists_func( -/*==========================*/ - const char* name) /*!< in: file path as a null-terminated - string */ -{ -#ifdef __WIN__ - bool ret; - ulint count = 0; -loop: - /* In Windows, deleting an .ibd file may fail if mysqlbackup is copying - it */ - - ret = DeleteFile((LPCTSTR) name); - - if (ret) { - return(true); - } - - DWORD lasterr = GetLastError(); - if (lasterr == ERROR_FILE_NOT_FOUND - || lasterr == ERROR_PATH_NOT_FOUND) { - /* the file does not exist, this not an error */ - - return(true); - } - - count++; - - if (count > 100 && 0 == (count % 10)) { - os_file_get_last_error(true); /* print error information */ - - ib_logf(IB_LOG_LEVEL_WARN, "Delete of file %s failed.", name); - } - - os_thread_sleep(500000); /* sleep for 0.5 second */ - - if (count > 2000) { - - return(false); - } - - goto loop; -#else - int ret; - WAIT_ALLOW_WRITES(); - - ret = unlink(name); - - if (ret != 0 && errno != ENOENT) { - os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__); - - return(false); - } - - return(true); -#endif /* __WIN__ */ -} - -/***********************************************************************//** -Deletes a file. The file has to be closed before calling this. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_delete_func( -/*================*/ - const char* name) /*!< in: file path as a null-terminated - string */ -{ -#ifdef __WIN__ - BOOL ret; - ulint count = 0; -loop: - /* In Windows, deleting an .ibd file may fail if mysqlbackup is copying - it */ - - ret = DeleteFile((LPCTSTR) name); - - if (ret) { - return(true); - } - - if (GetLastError() == ERROR_FILE_NOT_FOUND) { - /* If the file does not exist, we classify this as a 'mild' - error and return */ - - return(false); - } - - count++; - - if (count > 100 && 0 == (count % 10)) { - os_file_get_last_error(true); /* print error information */ - - fprintf(stderr, - "InnoDB: Warning: cannot delete file %s\n" - "InnoDB: Are you running mysqlbackup" - " to back up the file?\n", name); - } - - os_thread_sleep(1000000); /* sleep for a second */ - - if (count > 2000) { - - return(false); - } - - goto loop; -#else - int ret; - WAIT_ALLOW_WRITES(); - - ret = unlink(name); - - if (ret != 0) { - os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__); - - return(false); - } - - return(true); -#endif -} - -/***********************************************************************//** -NOTE! Use the corresponding macro os_file_rename(), not directly this function! -Renames a file (can also move it to another directory). It is safest that the -file is closed before calling this function. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_rename_func( -/*================*/ - const char* oldpath,/*!< in: old file path as a null-terminated - string */ - const char* newpath)/*!< in: new file path */ -{ -#ifdef UNIV_DEBUG - os_file_type_t type; - ibool exists; - - /* New path must not exist. */ - ut_ad(os_file_status(newpath, &exists, &type)); - ut_ad(!exists); - - /* Old path must exist. */ - ut_ad(os_file_status(oldpath, &exists, &type)); - ut_ad(exists); -#endif /* UNIV_DEBUG */ - -#ifdef __WIN__ - BOOL ret; - - ret = MoveFileEx((LPCTSTR)oldpath, (LPCTSTR)newpath, MOVEFILE_REPLACE_EXISTING); - - if (ret) { - return(TRUE); - } - - os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__); - - return(FALSE); -#else - int ret; - WAIT_ALLOW_WRITES(); - - ret = rename(oldpath, newpath); - - if (ret != 0) { - os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__); - - return(FALSE); - } - - return(TRUE); -#endif /* __WIN__ */ -} - -/***********************************************************************//** -NOTE! Use the corresponding macro os_file_close(), not directly this function! -Closes a file handle. In case of error, error number can be retrieved with -os_file_get_last_error. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_close_func( -/*===============*/ - os_file_t file) /*!< in, own: handle to a file */ -{ -#ifdef __WIN__ - BOOL ret; - - ret = CloseHandle(file); - - if (ret) { - return(TRUE); - } - - os_file_handle_error(NULL, "close", __FILE__, __LINE__); - - return(FALSE); -#else - int ret; - - ret = close(file); - - if (ret == -1) { - os_file_handle_error(NULL, "close", __FILE__, __LINE__); - - return(FALSE); - } - - return(TRUE); -#endif /* __WIN__ */ -} - -/***********************************************************************//** -Closes a file handle. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_close_no_error_handling_func( -/*============================*/ - os_file_t file) /*!< in, own: handle to a file */ -{ -#ifdef __WIN__ - BOOL ret; - - ret = CloseHandle(file); - - if (ret) { - return(true); - } - - return(false); -#else - int ret; - - ret = close(file); - - if (ret == -1) { - - return(false); - } - - return(true); -#endif /* __WIN__ */ -} - -#ifdef HAVE_POSIX_FALLOCATE -/***********************************************************************//** -Ensures that disk space is allocated for the file. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_allocate_func( - os_file_t file, /*!< in, own: handle to a file */ - os_offset_t offset, /*!< in: file region offset */ - os_offset_t len) /*!< in: file region length */ -{ - return(posix_fallocate(file, offset, len) == 0); -} -#endif - -/***********************************************************************//** -Checks if the file is marked as invalid. -@return TRUE if invalid */ -UNIV_INTERN -bool -os_file_is_invalid( - pfs_os_file_t file) /*!< in, own: handle to a file */ -{ - return(file == os_file_invalid); -} - -/***********************************************************************//** -Marks the file as invalid. */ -UNIV_INTERN -void -os_file_mark_invalid( - pfs_os_file_t* file) /*!< out: pointer to a handle to a file */ -{ - file->m_file = os_file_invalid; -} - -/***********************************************************************//** -Announces an intention to access file data in a specific pattern in the -future. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_advise( - pfs_os_file_t file, /*!< in, own: handle to a file */ - os_offset_t offset, /*!< in: file region offset */ - os_offset_t len, /*!< in: file region length */ - ulint advice)/*!< in: advice for access pattern */ -{ -#ifdef __WIN__ - return(true); -#else -#ifdef UNIV_LINUX - int native_advice = 0; - if ((advice & OS_FILE_ADVISE_NORMAL) != 0) - native_advice |= POSIX_FADV_NORMAL; - if ((advice & OS_FILE_ADVISE_RANDOM) != 0) - native_advice |= POSIX_FADV_RANDOM; - if ((advice & OS_FILE_ADVISE_SEQUENTIAL) != 0) - native_advice |= POSIX_FADV_SEQUENTIAL; - if ((advice & OS_FILE_ADVISE_WILLNEED) != 0) - native_advice |= POSIX_FADV_WILLNEED; - if ((advice & OS_FILE_ADVISE_DONTNEED) != 0) - native_advice |= POSIX_FADV_DONTNEED; - if ((advice & OS_FILE_ADVISE_NOREUSE) != 0) - native_advice |= POSIX_FADV_NOREUSE; - - return(posix_fadvise(file, offset, len, native_advice) == 0); -#else - return(true); -#endif -#endif /* __WIN__ */ -} - -/***********************************************************************//** -Gets a file size. -@return file size, or (os_offset_t) -1 on failure */ -UNIV_INTERN -os_offset_t -os_file_get_size( -/*=============*/ - pfs_os_file_t file) /*!< in: handle to a file */ -{ -#ifdef __WIN__ - os_offset_t offset; - DWORD high; - DWORD low; - - low = GetFileSize(file, &high); - - if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) { - return((os_offset_t) -1); - } - - offset = (os_offset_t) low | ((os_offset_t) high << 32); - - return(offset); -#else - return((os_offset_t) lseek(file, 0, SEEK_END)); - -#endif /* __WIN__ */ -} - -/** Set the size of a newly created file. -@param[in] name file name -@param[in] file file handle -@param[in] size desired file size -@param[in] sparse whether to create a sparse file (no preallocating) -@return whether the operation succeeded */ -UNIV_INTERN -bool -os_file_set_size( - const char* name, - pfs_os_file_t file, - os_offset_t size, - bool is_sparse) -{ -#ifdef _WIN32 - FILE_END_OF_FILE_INFO feof; - feof.EndOfFile.QuadPart = size; - bool success = SetFileInformationByHandle(file, - FileEndOfFileInfo, - &feof, sizeof feof); - if (!success) { - ib_logf(IB_LOG_LEVEL_ERROR, "os_file_set_size() of file %s" - " to " INT64PF " bytes failed with %u", - name, size, GetLastError()); - } - return(success); -#else - if (is_sparse) { - bool success = !ftruncate(file, size); - if (!success) { - ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s" - " to " INT64PF " bytes failed with error %d", - name, size, errno); - } - return(success); - } - -# ifdef HAVE_POSIX_FALLOCATE - if (srv_use_posix_fallocate) { - int err; - do { - err = posix_fallocate(file, 0, size); - } while (err == EINTR - && srv_shutdown_state == SRV_SHUTDOWN_NONE); - - if (err) { - ib_logf(IB_LOG_LEVEL_ERROR, - "preallocating " INT64PF " bytes for" - "file %s failed with error %d", - size, name, err); - } - return(!err); - } -# endif - - /* Write up to 1 megabyte at a time. */ - ulint buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE)) - * UNIV_PAGE_SIZE; - os_offset_t current_size = 0; - - byte* buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE)); - - if (!buf2) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot allocate " ULINTPF " bytes to extend file\n", - buf_size + UNIV_PAGE_SIZE); - return(false); - } - - /* Align the buffer for possible raw i/o */ - byte* buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE)); - bool ret; - - do { - ulint n_bytes; - - if (size - current_size < (os_offset_t) buf_size) { - n_bytes = (ulint) (size - current_size); - } else { - n_bytes = buf_size; - } - - ret = os_file_write(name, file, buf, current_size, n_bytes); - - if (!ret) { - break; - } - - current_size += n_bytes; - } while (current_size < size); - - free(buf2); - - return(ret && os_file_flush(file)); -#endif -} - -/***********************************************************************//** -Truncates a file at its current position. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_set_eof( -/*============*/ - FILE* file) /*!< in: file to be truncated */ -{ -#ifdef __WIN__ - HANDLE h = (HANDLE) _get_osfhandle(fileno(file)); - return(SetEndOfFile(h)); -#else /* __WIN__ */ - WAIT_ALLOW_WRITES(); - return(!ftruncate(fileno(file), ftell(file))); -#endif /* __WIN__ */ -} - -/***********************************************************************//** -Truncates a file at the specified position. -@return TRUE if success */ -UNIV_INTERN -bool -os_file_set_eof_at_func( - os_file_t file, /*!< in: handle to a file */ - ib_uint64_t new_len)/*!< in: new file length */ -{ -#ifdef __WIN__ - LARGE_INTEGER li, li2; - li.QuadPart = new_len; - return(SetFilePointerEx(file, li, &li2,FILE_BEGIN) - && SetEndOfFile(file)); -#else - WAIT_ALLOW_WRITES(); - /* TODO: works only with -D_FILE_OFFSET_BITS=64 ? */ - return(!ftruncate(file, new_len)); -#endif -} - - -#ifndef __WIN__ -/***********************************************************************//** -Wrapper to fsync(2) that retries the call on some errors. -Returns the value 0 if successful; otherwise the value -1 is returned and -the global variable errno is set to indicate the error. -@return 0 if success, -1 otherwise */ - -static -int -os_file_fsync( -/*==========*/ - os_file_t file) /*!< in: handle to a file */ -{ - int ret; - int failures; - ibool retry; - - failures = 0; - - do { - ret = fsync(file); - - os_n_fsyncs++; - - if (ret == -1 && errno == ENOLCK) { - - if (failures % 100 == 0) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: fsync(): " - "No locks available; retrying\n"); - } - - os_thread_sleep(200000 /* 0.2 sec */); - - failures++; - - retry = TRUE; - } else if (ret == -1 && errno == EINTR) { - /* Handle signal interruptions correctly */ - retry = TRUE; - } else { - - retry = FALSE; - } - } while (retry); - - return(ret); -} -#endif /* !__WIN__ */ - -/***********************************************************************//** -NOTE! Use the corresponding macro os_file_flush(), not directly this function! -Flushes the write buffers of a given file to the disk. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_flush_func( -/*===============*/ - os_file_t file) /*!< in, own: handle to a file */ -{ -#ifdef __WIN__ - BOOL ret; - - os_n_fsyncs++; - - ret = FlushFileBuffers(file); - - if (ret) { - return(TRUE); - } - - /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is - actually a raw device, we choose to ignore that error if we are using - raw disks */ - - if (srv_start_raw_disk_in_use && GetLastError() - == ERROR_INVALID_FUNCTION) { - return(TRUE); - } - - os_file_handle_error(NULL, "flush", __FILE__, __LINE__); - - /* It is a fatal error if a file flush does not succeed, because then - the database can get corrupt on disk */ - ut_error; - - return(FALSE); -#else - int ret; - WAIT_ALLOW_WRITES(); - -#if defined(HAVE_DARWIN_THREADS) -# ifndef F_FULLFSYNC - /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */ -# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */ -# elif F_FULLFSYNC != 51 -# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3" -# endif - /* Apple has disabled fsync() for internal disk drives in OS X. That - caused corruption for a user when he tested a power outage. Let us in - OS X use a nonstandard flush method recommended by an Apple - engineer. */ - - if (!srv_have_fullfsync) { - /* If we are not on an operating system that supports this, - then fall back to a plain fsync. */ - - ret = os_file_fsync(file); - } else { - ret = fcntl(file, F_FULLFSYNC, NULL); - - if (ret) { - /* If we are not on a file system that supports this, - then fall back to a plain fsync. */ - ret = os_file_fsync(file); - } - } -#else - ret = os_file_fsync(file); -#endif - - if (ret == 0) { - return(TRUE); - } - - /* Since Linux returns EINVAL if the 'file' is actually a raw device, - we choose to ignore that error if we are using raw disks */ - - if (srv_start_raw_disk_in_use && errno == EINVAL) { - - return(TRUE); - } - - ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed"); - - os_file_handle_error(NULL, "flush", __FILE__, __LINE__); - - /* It is a fatal error if a file flush does not succeed, because then - the database can get corrupt on disk */ - ut_error; - - return(FALSE); -#endif -} - -#ifndef __WIN__ -/*******************************************************************//** -Does a synchronous read operation in Posix. -@return number of bytes read, -1 if error */ -static MY_ATTRIBUTE((nonnull(2), warn_unused_result)) -ssize_t -os_file_pread( -/*==========*/ - os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - ulint n, /*!< in: number of bytes to read */ - os_offset_t offset, /*!< in: file offset from where to read */ - trx_t* trx) -{ - off_t offs; - ulint sec; - ulint ms; - ib_uint64_t start_time; - ib_uint64_t finish_time; - - ut_ad(n); - - /* If off_t is > 4 bytes in size, then we assume we can pass a - 64-bit address */ - offs = (off_t) offset; - - if (sizeof(off_t) <= 4) { - if (offset != (os_offset_t) offs) { - ib_logf(IB_LOG_LEVEL_ERROR, - "File read at offset > 4 GB"); - } - } - - os_n_file_reads++; - - if (UNIV_UNLIKELY(trx && trx->take_stats)) - { - trx->io_reads++; - trx->io_read += n; - ut_usectime(&sec, &ms); - start_time = (ib_uint64_t)sec * 1000000 + ms; - } else { - start_time = 0; - } - - const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS); -#ifdef HAVE_PREAD - MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor); - - ssize_t n_bytes; - - /* Handle partial reads and signal interruptions correctly */ - for (n_bytes = 0; n_bytes < (ssize_t) n; ) { - ssize_t n_read = pread(file, buf, (ssize_t)n - n_bytes, offs); - if (n_read > 0) { - n_bytes += n_read; - offs += n_read; - buf = (char *)buf + n_read; - } else if (n_read == -1 && errno == EINTR) { - continue; - } else { - break; - } - } - - MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor); - - if (UNIV_UNLIKELY(start_time != 0)) - { - ut_usectime(&sec, &ms); - finish_time = (ib_uint64_t)sec * 1000000 + ms; - trx->io_reads_wait_timer += (ulint)(finish_time - start_time); - } - - return(n_bytes); -#else - { - off_t ret_offset; - ssize_t ret; - ssize_t n_read; -#ifndef UNIV_HOTBACKUP - ulint i; -#endif /* !UNIV_HOTBACKUP */ - - MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor); -#ifndef UNIV_HOTBACKUP - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - ret_offset = lseek(file, offs, SEEK_SET); - - if (ret_offset < 0) { - ret = -1; - } else { - /* Handle signal interruptions correctly */ - for (ret = 0; ret < (ssize_t) n; ) { - n_read = read(file, buf, (ssize_t)n); - if (n_read > 0) { - ret += n_read; - } else if (n_read == -1 && errno == EINTR) { - continue; - } else { - break; - } - } - } - -#ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -#endif /* !UNIV_HOTBACKUP */ - - MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor); - - if (UNIV_UNLIKELY(start_time != 0) - { - ut_usectime(&sec, &ms); - finish_time = (ib_uint64_t)sec * 1000000 + ms; - trx->io_reads_wait_timer += (ulint)(finish_time - start_time); - } - - return(ret); - } -#endif -} - -/*******************************************************************//** -Does a synchronous write operation in Posix. -@return number of bytes written, -1 if error */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ssize_t -os_file_pwrite( -/*===========*/ - os_file_t file, /*!< in: handle to a file */ - const void* buf, /*!< in: buffer from where to write */ - ulint n, /*!< in: number of bytes to write */ - os_offset_t offset) /*!< in: file offset where to write */ -{ - ssize_t ret; - ssize_t n_written; - off_t offs; - - ut_ad(n); - ut_ad(!srv_read_only_mode); - - /* If off_t is > 4 bytes in size, then we assume we can pass a - 64-bit address */ - offs = (off_t) offset; - - if (sizeof(off_t) <= 4) { - if (offset != (os_offset_t) offs) { - ib_logf(IB_LOG_LEVEL_ERROR, - "File write at offset > 4 GB."); - } - } - - os_n_file_writes++; - - const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_WRITES); -#ifdef HAVE_PWRITE - MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor); - - /* Handle partial writes and signal interruptions correctly */ - for (ret = 0; ret < (ssize_t) n; ) { - n_written = pwrite(file, buf, (ssize_t)n - ret, offs); - DBUG_EXECUTE_IF("xb_simulate_all_o_direct_write_failure", - n_written = -1; - errno = EINVAL;); - if (n_written >= 0) { - ret += n_written; - offs += n_written; - buf = (char *)buf + n_written; - } else if (n_written == -1 && errno == EINTR) { - continue; - } else { - break; - } - } - - MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor); - - return(ret); -#else - { - off_t ret_offset; -# ifndef UNIV_HOTBACKUP - ulint i; -# endif /* !UNIV_HOTBACKUP */ - - MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor); - -# ifndef UNIV_HOTBACKUP - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); -# endif /* UNIV_HOTBACKUP */ - - ret_offset = lseek(file, offs, SEEK_SET); - - if (ret_offset < 0) { - ret = -1; - - goto func_exit; - } - - /* Handle signal interruptions correctly */ - for (ret = 0; ret < (ssize_t) n; ) { - n_written = write(file, buf, (ssize_t)n); - if (n_written > 0) { - ret += n_written; - } else if (n_written == -1 && errno == EINTR) { - continue; - } else { - break; - } - } - -func_exit: -# ifndef UNIV_HOTBACKUP - os_mutex_exit(os_file_seek_mutexes[i]); -# endif /* !UNIV_HOTBACKUP */ - - MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor); - return(ret); - } -#endif /* HAVE_PWRITE */ -} -#endif - -/*******************************************************************//** -NOTE! Use the corresponding macro os_file_read(), not directly this -function! -Requests a synchronous positioned read operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -os_file_read_func( -/*==============*/ - os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n, /*!< in: number of bytes to read */ - trx_t* trx) -{ -#ifdef __WIN__ - BOOL ret; - DWORD len; - ibool retry; - OVERLAPPED overlapped; - - - /* On 64-bit Windows, ulint is 64 bits. But offset and n should be - no more than 32 bits. */ - ut_a((n & 0xFFFFFFFFUL) == n); - - os_n_file_reads++; - os_bytes_read_since_printout += n; - const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS); - -try_again: - ut_ad(buf); - ut_ad(n > 0); - - MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor); - - memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = (DWORD)(offset & 0xFFFFFFFF); - overlapped.OffsetHigh = (DWORD)(offset >> 32); - overlapped.hEvent = win_get_syncio_event(); - ret = ReadFile(file, buf, n, NULL, &overlapped); - if (ret) { - ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); - } - else if(GetLastError() == ERROR_IO_PENDING) { - ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); - } - MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor); - - if (ret && len == n) { - return(TRUE); - } -#else /* __WIN__ */ - ibool retry; - ssize_t ret; - - os_bytes_read_since_printout += n; - -try_again: - ret = os_file_pread(file, buf, n, offset, trx); - - DBUG_EXECUTE_IF("xb_simulate_all_o_direct_read_failure", - ret = -1; - errno = EINVAL;); - - if ((ulint) ret == n) { - return(TRUE); - } else if (ret == -1) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Error in system call pread(). The operating" - " system error number is %lu.",(ulint) errno); - } else { - /* Partial read occurred */ - ib_logf(IB_LOG_LEVEL_ERROR, - "Tried to read " ULINTPF " bytes at offset " - UINT64PF ". Was only able to read %ld.", - n, offset, (lint) ret); - } -#endif /* __WIN__ */ - retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__); - - if (retry) { - goto try_again; - } - - fprintf(stderr, - "InnoDB: Fatal error: cannot read from file." - " OS error number %lu.\n", -#ifdef __WIN__ - (ulong) GetLastError() -#else - (ulong) errno -#endif /* __WIN__ */ - ); - fflush(stderr); - - ut_error; - - return(FALSE); -} - -/*******************************************************************//** -NOTE! Use the corresponding macro os_file_read_no_error_handling(), -not directly this function! -Requests a synchronous positioned read operation. This function does not do -any error handling. In case of error it returns FALSE. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -os_file_read_no_error_handling_func( -/*================================*/ - os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read */ - os_offset_t offset, /*!< in: file offset where to read */ - ulint n) /*!< in: number of bytes to read */ -{ -#ifdef __WIN__ - BOOL ret; - DWORD len; - ibool retry; - OVERLAPPED overlapped; - overlapped.Offset = (DWORD)(offset & 0xFFFFFFFF); - overlapped.OffsetHigh = (DWORD)(offset >> 32); - - - /* On 64-bit Windows, ulint is 64 bits. But offset and n should be - no more than 32 bits. */ - ut_a((n & 0xFFFFFFFFUL) == n); - - os_n_file_reads++; - os_bytes_read_since_printout += n; - const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS); - -try_again: - ut_ad(buf); - ut_ad(n > 0); - - MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor); - - memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = (DWORD)(offset & 0xFFFFFFFF); - overlapped.OffsetHigh = (DWORD)(offset >> 32); - overlapped.hEvent = win_get_syncio_event(); - ret = ReadFile(file, buf, n, NULL, &overlapped); - if (ret) { - ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); - } - else if(GetLastError() == ERROR_IO_PENDING) { - ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); - } - MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor); - - if (ret && len == n) { - return(TRUE); - } -#else /* __WIN__ */ - ibool retry; - ssize_t ret; - - os_bytes_read_since_printout += n; - -try_again: - ret = os_file_pread(file, buf, n, offset, NULL); - - if ((ulint) ret == n) { - return(TRUE); - } else if (ret == -1) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Error in system call pread(). The operating" - " system error number is %lu.",(ulint) errno); - } else { - /* Partial read occurred */ - ib_logf(IB_LOG_LEVEL_ERROR, - "Tried to read " ULINTPF " bytes at offset " - UINT64PF ". Was only able to read %ld.", - n, offset, (lint) ret); - } -#endif /* __WIN__ */ - retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__); - - if (retry) { - goto try_again; - } - - return(FALSE); -} - -/*******************************************************************//** -Rewind file to its start, read at most size - 1 bytes from it to str, and -NUL-terminate str. All errors are silently ignored. This function is -mostly meant to be used with temporary files. */ -UNIV_INTERN -void -os_file_read_string( -/*================*/ - FILE* file, /*!< in: file to read from */ - char* str, /*!< in: buffer where to read */ - ulint size) /*!< in: size of buffer */ -{ - size_t flen; - - if (size == 0) { - return; - } - - rewind(file); - flen = fread(str, 1, size - 1, file); - str[flen] = '\0'; -} - -/*******************************************************************//** -NOTE! Use the corresponding macro os_file_write(), not directly -this function! -Requests a synchronous write operation. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -os_file_write_func( -/*===============*/ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - os_file_t file, /*!< in: handle to a file */ - const void* buf, /*!< in: buffer from which to write */ - os_offset_t offset, /*!< in: file offset where to write */ - ulint n) /*!< in: number of bytes to write */ -{ - ut_ad(!srv_read_only_mode); - -#ifdef __WIN__ - BOOL ret; - DWORD len; - ulint n_retries = 0; - ulint err; - OVERLAPPED overlapped; - DWORD saved_error = 0; - - /* On 64-bit Windows, ulint is 64 bits. But offset and n should be - no more than 32 bits. */ - ut_a((n & 0xFFFFFFFFUL) == n); - - os_n_file_writes++; - - ut_ad(buf); - ut_ad(n > 0); - const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_WRITES); -retry: - - MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor); - - memset (&overlapped, 0, sizeof (overlapped)); - overlapped.Offset = (DWORD)(offset & 0xFFFFFFFF); - overlapped.OffsetHigh = (DWORD)(offset >> 32); - - overlapped.hEvent = win_get_syncio_event(); - ret = WriteFile(file, buf, n, NULL, &overlapped); - if (ret) { - ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE); - } - else if ( GetLastError() == ERROR_IO_PENDING) { - ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE); - } - - MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor); - - if (ret && len == n) { - - return(TRUE); - } - - /* If some background file system backup tool is running, then, at - least in Windows 2000, we may get here a specific error. Let us - retry the operation 100 times, with 1 second waits. */ - - if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) { - - os_thread_sleep(1000000); - - n_retries++; - - goto retry; - } - - if (!os_has_said_disk_full) { - char *winmsg = NULL; - - saved_error = GetLastError(); - err = (ulint) saved_error; - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: Write to file %s failed" - " at offset %llu.\n" - "InnoDB: %lu bytes should have been written," - " only %lu were written.\n" - "InnoDB: Operating system error number %lu.\n" - "InnoDB: Check that your OS and file system" - " support files of this size.\n" - "InnoDB: Check also that the disk is not full" - " or a disk quota exceeded.\n", - name, offset, - (ulong) n, (ulong) len, (ulong) err); - - /* Ask Windows to prepare a standard message for a - GetLastError() */ - - FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, saved_error, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), - (LPSTR)&winmsg, 0, NULL); - - if (winmsg) { - fprintf(stderr, - "InnoDB: FormatMessage: Error number %lu means '%s'.\n", - (ulong) saved_error, winmsg); - LocalFree(winmsg); - } - - if (strerror((int) err) != NULL) { - fprintf(stderr, - "InnoDB: Error number %lu means '%s'.\n", - (ulong) err, strerror((int) err)); - } - - fprintf(stderr, - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n"); - - os_has_said_disk_full = TRUE; - } - - return(FALSE); -#else - ssize_t ret; - WAIT_ALLOW_WRITES(); - - ret = os_file_pwrite(file, buf, n, offset); - - if ((ulint) ret == n) { - - return(TRUE); - } - - if (!os_has_said_disk_full) { - - ut_print_timestamp(stderr); - - if(ret == -1) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Failure of system call pwrite(). Operating" - " system error number is %lu.", - (ulint) errno); - } else { - fprintf(stderr, - " InnoDB: Error: Write to file %s failed" - " at offset " UINT64PF ".\n" - "InnoDB: %lu bytes should have been written," - " only %ld were written.\n" - "InnoDB: Operating system error number %lu.\n" - "InnoDB: Check that your OS and file system" - " support files of this size.\n" - "InnoDB: Check also that the disk is not full" - " or a disk quota exceeded.\n", - name, offset, n, (lint) ret, - (ulint) errno); - } - - if (strerror(errno) != NULL) { - fprintf(stderr, - "InnoDB: Error number %d means '%s'.\n", - errno, strerror(errno)); - } - - fprintf(stderr, - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - REFMAN "operating-system-error-codes.html\n"); - - os_diagnose_all_o_direct_einval(errno); - - os_has_said_disk_full = TRUE; - } - - return(FALSE); -#endif -} - -/*******************************************************************//** -Check the existence and type of the given file. -@return TRUE if call succeeded */ -UNIV_INTERN -ibool -os_file_status( -/*===========*/ - const char* path, /*!< in: pathname of the file */ - ibool* exists, /*!< out: TRUE if file exists */ - os_file_type_t* type) /*!< out: type of the file (if it exists) */ -{ -#ifdef __WIN__ - int ret; - struct _stat64 statinfo; - - ret = _stat64(path, &statinfo); - if (ret && (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG)) { - /* file does not exist */ - *exists = FALSE; - return(TRUE); - } else if (ret) { - /* file exists, but stat call failed */ - - os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); - - return(FALSE); - } - - if (_S_IFDIR & statinfo.st_mode) { - *type = OS_FILE_TYPE_DIR; - } else if (_S_IFREG & statinfo.st_mode) { - *type = OS_FILE_TYPE_FILE; - } else { - *type = OS_FILE_TYPE_UNKNOWN; - } - - *exists = TRUE; - - return(TRUE); -#else - int ret; - struct stat statinfo; - - ret = stat(path, &statinfo); - if (ret && (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG)) { - /* file does not exist */ - *exists = FALSE; - return(TRUE); - } else if (ret) { - /* file exists, but stat call failed */ - - os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); - - return(FALSE); - } - - if (S_ISDIR(statinfo.st_mode)) { - *type = OS_FILE_TYPE_DIR; - } else if (S_ISLNK(statinfo.st_mode)) { - *type = OS_FILE_TYPE_LINK; - } else if (S_ISREG(statinfo.st_mode)) { - *type = OS_FILE_TYPE_FILE; - } else { - *type = OS_FILE_TYPE_UNKNOWN; - } - - *exists = TRUE; - - return(TRUE); -#endif -} - -/*******************************************************************//** -This function returns information about the specified file -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -os_file_get_status( -/*===============*/ - const char* path, /*!< in: pathname of the file */ - os_file_stat_t* stat_info, /*!< information of a file in a - directory */ - bool check_rw_perm) /*!< in: for testing whether the - file can be opened in RW mode */ -{ - int ret; - -#ifdef __WIN__ - struct _stat64 statinfo; - - ret = _stat64(path, &statinfo); - - if (ret && (errno == ENOENT || errno == ENOTDIR)) { - /* file does not exist */ - - return(DB_NOT_FOUND); - - } else if (ret) { - /* file exists, but stat call failed */ - - os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); - - return(DB_FAIL); - - } else if (_S_IFDIR & statinfo.st_mode) { - stat_info->type = OS_FILE_TYPE_DIR; - } else if (_S_IFREG & statinfo.st_mode) { - - DWORD access = GENERIC_READ; - - if (!srv_read_only_mode) { - access |= GENERIC_WRITE; - } - - stat_info->type = OS_FILE_TYPE_FILE; - - /* Check if we can open it in read-only mode. */ - - if (check_rw_perm) { - HANDLE fh; - - fh = CreateFile( - (LPCTSTR) path, // File to open - access, - FILE_SHARE_READ|FILE_SHARE_WRITE, - NULL, // Default security - OPEN_EXISTING, // Existing file only - FILE_ATTRIBUTE_NORMAL, // Normal file - NULL); // No attr. template - - if (fh == INVALID_HANDLE_VALUE) { - stat_info->rw_perm = false; - } else { - stat_info->rw_perm = true; - CloseHandle(fh); - } - } - } else { - stat_info->type = OS_FILE_TYPE_UNKNOWN; - } -#else - struct stat statinfo; - - ret = stat(path, &statinfo); - - if (ret && (errno == ENOENT || errno == ENOTDIR)) { - /* file does not exist */ - - return(DB_NOT_FOUND); - - } else if (ret) { - /* file exists, but stat call failed */ - - os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__); - - return(DB_FAIL); - - } - - switch (statinfo.st_mode & S_IFMT) { - case S_IFDIR: - stat_info->type = OS_FILE_TYPE_DIR; - break; - case S_IFLNK: - stat_info->type = OS_FILE_TYPE_LINK; - break; - case S_IFBLK: - /* Handle block device as regular file. */ - case S_IFCHR: - /* Handle character device as regular file. */ - case S_IFREG: - stat_info->type = OS_FILE_TYPE_FILE; - break; - default: - stat_info->type = OS_FILE_TYPE_UNKNOWN; - } - - - if (check_rw_perm && stat_info->type == OS_FILE_TYPE_FILE) { - - int fh; - int access; - - access = !srv_read_only_mode ? O_RDWR : O_RDONLY; - - fh = ::open(path, access, os_innodb_umask); - - if (fh == -1) { - stat_info->rw_perm = false; - } else { - stat_info->rw_perm = true; - close(fh); - } - } - -#endif /* _WIN_ */ - - stat_info->ctime = statinfo.st_ctime; - stat_info->atime = statinfo.st_atime; - stat_info->mtime = statinfo.st_mtime; - stat_info->size = statinfo.st_size; - - return(DB_SUCCESS); -} - -/* path name separator character */ -#ifdef __WIN__ -# define OS_FILE_PATH_SEPARATOR '\\' -#else -# define OS_FILE_PATH_SEPARATOR '/' -#endif - -/****************************************************************//** -This function returns a new path name after replacing the basename -in an old path with a new basename. The old_path is a full path -name including the extension. The tablename is in the normal -form "databasename/tablename". The new base name is found after -the forward slash. Both input strings are null terminated. - -This function allocates memory to be returned. It is the callers -responsibility to free the return value after it is no longer needed. - -@return own: new full pathname */ -UNIV_INTERN -char* -os_file_make_new_pathname( -/*======================*/ - const char* old_path, /*!< in: pathname */ - const char* tablename) /*!< in: contains new base name */ -{ - ulint dir_len; - char* last_slash; - char* base_name; - char* new_path; - ulint new_path_len; - - /* Split the tablename into its database and table name components. - They are separated by a '/'. */ - last_slash = strrchr((char*) tablename, '/'); - base_name = last_slash ? last_slash + 1 : (char*) tablename; - - /* Find the offset of the last slash. We will strip off the - old basename.ibd which starts after that slash. */ - last_slash = strrchr((char*) old_path, OS_FILE_PATH_SEPARATOR); - dir_len = last_slash ? last_slash - old_path : strlen(old_path); - - /* allocate a new path and move the old directory path to it. */ - new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd"; - new_path = static_cast<char*>(mem_alloc(new_path_len)); - memcpy(new_path, old_path, dir_len); - - ut_snprintf(new_path + dir_len, - new_path_len - dir_len, - "%c%s.ibd", - OS_FILE_PATH_SEPARATOR, - base_name); - - return(new_path); -} - -/****************************************************************//** -This function returns a remote path name by combining a data directory -path provided in a DATA DIRECTORY clause with the tablename which is -in the form 'database/tablename'. It strips the file basename (which -is the tablename) found after the last directory in the path provided. -The full filepath created will include the database name as a directory -under the path provided. The filename is the tablename with the '.ibd' -extension. All input and output strings are null-terminated. - -This function allocates memory to be returned. It is the callers -responsibility to free the return value after it is no longer needed. - -@return own: A full pathname; data_dir_path/databasename/tablename.ibd */ -UNIV_INTERN -char* -os_file_make_remote_pathname( -/*=========================*/ - const char* data_dir_path, /*!< in: pathname */ - const char* tablename, /*!< in: tablename */ - const char* extention) /*!< in: file extention; ibd,cfg */ -{ - ulint data_dir_len; - char* last_slash; - char* new_path; - ulint new_path_len; - - ut_ad(extention && strlen(extention) == 3); - - /* Find the offset of the last slash. We will strip off the - old basename or tablename which starts after that slash. */ - last_slash = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR); - data_dir_len = last_slash ? last_slash - data_dir_path : strlen(data_dir_path); - - /* allocate a new path and move the old directory path to it. */ - new_path_len = data_dir_len + strlen(tablename) - + sizeof "/." + strlen(extention); - new_path = static_cast<char*>(mem_alloc(new_path_len)); - memcpy(new_path, data_dir_path, data_dir_len); - ut_snprintf(new_path + data_dir_len, - new_path_len - data_dir_len, - "%c%s.%s", - OS_FILE_PATH_SEPARATOR, - tablename, - extention); - - srv_normalize_path_for_win(new_path); - - return(new_path); -} - -/****************************************************************//** -This function reduces a null-terminated full remote path name into -the path that is sent by MySQL for DATA DIRECTORY clause. It replaces -the 'databasename/tablename.ibd' found at the end of the path with just -'tablename'. - -Since the result is always smaller than the path sent in, no new memory -is allocated. The caller should allocate memory for the path sent in. -This function manipulates that path in place. - -If the path format is not as expected, just return. The result is used -to inform a SHOW CREATE TABLE command. */ -UNIV_INTERN -void -os_file_make_data_dir_path( -/*========================*/ - char* data_dir_path) /*!< in/out: full path/data_dir_path */ -{ - char* ptr; - char* tablename; - ulint tablename_len; - - /* Replace the period before the extension with a null byte. */ - ptr = strrchr((char*) data_dir_path, '.'); - if (!ptr) { - return; - } - ptr[0] = '\0'; - - /* The tablename starts after the last slash. */ - ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR); - if (!ptr) { - return; - } - ptr[0] = '\0'; - tablename = ptr + 1; - - /* The databasename starts after the next to last slash. */ - ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR); - if (!ptr) { - return; - } - tablename_len = ut_strlen(tablename); - - ut_memmove(++ptr, tablename, tablename_len); - - ptr[tablename_len] = '\0'; -} - -/****************************************************************//** -The function os_file_dirname returns a directory component of a -null-terminated pathname string. In the usual case, dirname returns -the string up to, but not including, the final '/', and basename -is the component following the final '/'. Trailing '/' characters -are not counted as part of the pathname. - -If path does not contain a slash, dirname returns the string ".". - -Concatenating the string returned by dirname, a "/", and the basename -yields a complete pathname. - -The return value is a copy of the directory component of the pathname. -The copy is allocated from heap. It is the caller responsibility -to free it after it is no longer needed. - -The following list of examples (taken from SUSv2) shows the strings -returned by dirname and basename for different paths: - - path dirname basename - "/usr/lib" "/usr" "lib" - "/usr/" "/" "usr" - "usr" "." "usr" - "/" "/" "/" - "." "." "." - ".." "." ".." - -@return own: directory component of the pathname */ -UNIV_INTERN -char* -os_file_dirname( -/*============*/ - const char* path) /*!< in: pathname */ -{ - /* Find the offset of the last slash */ - const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR); - if (!last_slash) { - /* No slash in the path, return "." */ - - return(mem_strdup(".")); - } - - /* Ok, there is a slash */ - - if (last_slash == path) { - /* last slash is the first char of the path */ - - return(mem_strdup("/")); - } - - /* Non-trivial directory component */ - - return(mem_strdupl(path, last_slash - path)); -} - -/****************************************************************//** -Creates all missing subdirectories along the given path. -@return TRUE if call succeeded FALSE otherwise */ -UNIV_INTERN -ibool -os_file_create_subdirs_if_needed( -/*=============================*/ - const char* path) /*!< in: path name */ -{ - if (srv_read_only_mode) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "read only mode set. Can't create subdirectories '%s'", - path); - - return(FALSE); - - } - - char* subdir = os_file_dirname(path); - - if (strlen(subdir) == 1 - && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) { - /* subdir is root or cwd, nothing to do */ - mem_free(subdir); - - return(TRUE); - } - - /* Test if subdir exists */ - os_file_type_t type; - ibool subdir_exists; - ibool success = os_file_status(subdir, &subdir_exists, &type); - - if (success && !subdir_exists) { - - /* subdir does not exist, create it */ - success = os_file_create_subdirs_if_needed(subdir); - - if (!success) { - mem_free(subdir); - - return(FALSE); - } - - success = os_file_create_directory(subdir, FALSE); - } - - mem_free(subdir); - - return(success); -} - -#ifndef UNIV_HOTBACKUP -/****************************************************************//** -Returns a pointer to the nth slot in the aio array. -@return pointer to slot */ -static -os_aio_slot_t* -os_aio_array_get_nth_slot( -/*======================*/ - os_aio_array_t* array, /*!< in: aio array */ - ulint index) /*!< in: index of the slot */ -{ - ut_a(index < array->n_slots); - - return(&array->slots[index]); -} - -#if defined(LINUX_NATIVE_AIO) -/******************************************************************//** -Creates an io_context for native linux AIO. -@return TRUE on success. */ -static -ibool -os_aio_linux_create_io_ctx( -/*=======================*/ - ulint max_events, /*!< in: number of events. */ - io_context_t* io_ctx) /*!< out: io_ctx to initialize. */ -{ - int ret; - ulint retries = 0; - -retry: - memset(io_ctx, 0x0, sizeof(*io_ctx)); - - /* Initialize the io_ctx. Tell it how many pending - IO requests this context will handle. */ - - ret = io_setup(max_events, io_ctx); - if (ret == 0) { -#if defined(UNIV_AIO_DEBUG) - fprintf(stderr, - "InnoDB: Linux native AIO:" - " initialized io_ctx for segment\n"); -#endif - /* Success. Return now. */ - return(TRUE); - } - - /* If we hit EAGAIN we'll make a few attempts before failing. */ - - switch (ret) { - case -EAGAIN: - if (retries == 0) { - /* First time around. */ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: io_setup() failed" - " with EAGAIN. Will make %d attempts" - " before giving up.\n", - OS_AIO_IO_SETUP_RETRY_ATTEMPTS); - } - - if (retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) { - ++retries; - fprintf(stderr, - "InnoDB: Warning: io_setup() attempt" - " %lu failed.\n", - retries); - os_thread_sleep(OS_AIO_IO_SETUP_RETRY_SLEEP); - goto retry; - } - - /* Have tried enough. Better call it a day. */ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: io_setup() failed" - " with EAGAIN after %d attempts.\n", - OS_AIO_IO_SETUP_RETRY_ATTEMPTS); - break; - - case -ENOSYS: - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: Linux Native AIO interface" - " is not supported on this platform. Please" - " check your OS documentation and install" - " appropriate binary of InnoDB.\n"); - - break; - - default: - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: Linux Native AIO setup" - " returned following error[%d]\n", -ret); - break; - } - - fprintf(stderr, - "InnoDB: You can disable Linux Native AIO by" - " setting innodb_use_native_aio = 0 in my.cnf\n"); - return(FALSE); -} - -/******************************************************************//** -Checks if the system supports native linux aio. On some kernel -versions where native aio is supported it won't work on tmpfs. In such -cases we can't use native aio as it is not possible to mix simulated -and native aio. -@return: TRUE if supported, FALSE otherwise. */ -static -ibool -os_aio_native_aio_supported(void) -/*=============================*/ -{ - int fd; - io_context_t io_ctx; - char name[1000]; - - if (!os_aio_linux_create_io_ctx(1, &io_ctx)) { - /* The platform does not support native aio. */ - return(FALSE); - } else if (!srv_read_only_mode) { - /* Now check if tmpdir supports native aio ops. */ - fd = innobase_mysql_tmpfile(NULL); - - if (fd < 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "Unable to create temp file to check " - "native AIO support."); - - return(FALSE); - } - } else { - - srv_normalize_path_for_win(srv_log_group_home_dir); - - ulint dirnamelen = strlen(srv_log_group_home_dir); - ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile"); - memcpy(name, srv_log_group_home_dir, dirnamelen); - - /* Add a path separator if needed. */ - if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) { - name[dirnamelen++] = SRV_PATH_SEPARATOR; - } - - strcpy(name + dirnamelen, "ib_logfile0"); - - fd = ::open(name, O_RDONLY); - - if (fd == -1) { - - ib_logf(IB_LOG_LEVEL_WARN, - "Unable to open \"%s\" to check " - "native AIO read support.", name); - - return(FALSE); - } - } - - struct io_event io_event; - - memset(&io_event, 0x0, sizeof(io_event)); - - byte* buf = static_cast<byte*>(ut_malloc(UNIV_PAGE_SIZE * 2)); - byte* ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); - - struct iocb iocb; - - /* Suppress valgrind warning. */ - memset(buf, 0x00, UNIV_PAGE_SIZE * 2); - memset(&iocb, 0x0, sizeof(iocb)); - - struct iocb* p_iocb = &iocb; - - if (!srv_read_only_mode) { - io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0); - } else { - ut_a(UNIV_PAGE_SIZE >= 512); - io_prep_pread(p_iocb, fd, ptr, 512, 0); - } - - int err = io_submit(io_ctx, 1, &p_iocb); - - if (err >= 1) { - /* Now collect the submitted IO request. */ - err = io_getevents(io_ctx, 1, 1, &io_event, NULL); - } - - ut_free(buf); - close(fd); - - switch (err) { - case 1: - return(TRUE); - - case -EINVAL: - case -ENOSYS: - ib_logf(IB_LOG_LEVEL_ERROR, - "Linux Native AIO not supported. You can either " - "move %s to a file system that supports native " - "AIO or you can set innodb_use_native_aio to " - "FALSE to avoid this message.", - srv_read_only_mode ? name : "tmpdir"); - - /* fall through. */ - default: - ib_logf(IB_LOG_LEVEL_ERROR, - "Linux Native AIO check on %s returned error[%d]", - srv_read_only_mode ? name : "tmpdir", -err); - } - - return(FALSE); -} -#endif /* LINUX_NATIVE_AIO */ - -/******************************************************************//** -Creates an aio wait array. Note that we return NULL in case of failure. -We don't care about freeing memory here because we assume that a -failure will result in server refusing to start up. -@return own: aio array, NULL on failure */ -static -os_aio_array_t* -os_aio_array_create( -/*================*/ - ulint n, /*!< in: maximum number of pending aio - operations allowed; n must be - divisible by n_segments */ - ulint n_segments) /*!< in: number of segments in the aio array */ -{ - os_aio_array_t* array; -#ifdef LINUX_NATIVE_AIO - struct io_event* io_event = NULL; -#endif - ut_a(n > 0); - ut_a(n_segments > 0); - - array = static_cast<os_aio_array_t*>(ut_malloc(sizeof(*array))); - memset(array, 0x0, sizeof(*array)); - - array->mutex = os_mutex_create(); - array->not_full = os_event_create(); - array->is_empty = os_event_create(); - - os_event_set(array->is_empty); - - array->n_slots = n; - array->n_segments = n_segments; - - array->slots = static_cast<os_aio_slot_t*>( - ut_malloc(n * sizeof(*array->slots))); - - memset(array->slots, 0x0, n * sizeof(*array->slots)); - -#if defined(LINUX_NATIVE_AIO) - array->aio_ctx = NULL; - array->aio_events = NULL; - - /* If we are not using native aio interface then skip this - part of initialization. */ - if (!srv_use_native_aio) { - goto skip_native_aio; - } - - /* Initialize the io_context array. One io_context - per segment in the array. */ - - array->aio_ctx = static_cast<io_context**>( - ut_malloc(n_segments * sizeof(*array->aio_ctx))); - - for (ulint i = 0; i < n_segments; ++i) { - if (!os_aio_linux_create_io_ctx(n/n_segments, - &array->aio_ctx[i])) { - /* If something bad happened during aio setup - we disable linux native aio. - The disadvantage will be a small memory leak - at shutdown but that's ok compared to a crash - or a not working server. - This frequently happens when running the test suite - with many threads on a system with low fs.aio-max-nr! - */ - - fprintf(stderr, - " InnoDB: Warning: Linux Native AIO disabled " - "because os_aio_linux_create_io_ctx() " - "failed. To get rid of this warning you can " - "try increasing system " - "fs.aio-max-nr to 1048576 or larger or " - "setting innodb_use_native_aio = 0 in my.cnf\n"); - srv_use_native_aio = FALSE; - goto skip_native_aio; - } - } - - /* Initialize the event array. One event per slot. */ - io_event = static_cast<struct io_event*>( - ut_malloc(n * sizeof(*io_event))); - - memset(io_event, 0x0, sizeof(*io_event) * n); - array->aio_events = io_event; - -skip_native_aio: -#endif /* LINUX_NATIVE_AIO */ - for (ulint i = 0; i < n; i++) { - os_aio_slot_t* slot; - - slot = os_aio_array_get_nth_slot(array, i); - slot->pos = i; - slot->reserved = FALSE; -#ifdef LINUX_NATIVE_AIO - memset(&slot->control, 0x0, sizeof(slot->control)); - slot->n_bytes = 0; - slot->ret = 0; -#endif /* WIN_ASYNC_IO */ - } - - return(array); -} - -/************************************************************************//** -Frees an aio wait array. */ -static -void -os_aio_array_free( -/*==============*/ - os_aio_array_t*& array) /*!< in, own: array to free */ -{ - os_mutex_free(array->mutex); - os_event_free(array->not_full); - os_event_free(array->is_empty); - -#if defined(LINUX_NATIVE_AIO) - if (srv_use_native_aio) { - ut_free(array->aio_events); - ut_free(array->aio_ctx); - } -#endif /* LINUX_NATIVE_AIO */ - - ut_free(array->slots); - ut_free(array); - - array = 0; -} - -/*********************************************************************** -Initializes the asynchronous io system. Creates one array each for ibuf -and log i/o. Also creates one array each for read and write where each -array is divided logically into n_read_segs and n_write_segs -respectively. The caller must create an i/o handler thread for each -segment in these arrays. This function also creates the sync array. -No i/o handler thread needs to be created for that */ -UNIV_INTERN -ibool -os_aio_init( -/*========*/ - ulint n_per_seg, /*<! in: maximum number of pending aio - operations allowed per segment */ - ulint n_read_segs, /*<! in: number of reader threads */ - ulint n_write_segs, /*<! in: number of writer threads */ - ulint n_slots_sync) /*<! in: number of slots in the sync aio - array */ -{ - os_io_init_simple(); - -#if defined(LINUX_NATIVE_AIO) - /* Check if native aio is supported on this system and tmpfs */ - if (srv_use_native_aio && !os_aio_native_aio_supported()) { - - ib_logf(IB_LOG_LEVEL_WARN, "Linux Native AIO disabled."); - - srv_use_native_aio = FALSE; - } -#endif /* LINUX_NATIVE_AIO */ - - srv_reset_io_thread_op_info(); - - os_aio_read_array = os_aio_array_create( - n_read_segs * n_per_seg, n_read_segs); - - if (os_aio_read_array == NULL) { - return(FALSE); - } - - ulint start = (srv_read_only_mode) ? 0 : 2; - ulint n_segs = n_read_segs + start; - - /* 0 is the ibuf segment and 1 is the insert buffer segment. */ - for (ulint i = start; i < n_segs; ++i) { - ut_a(i < SRV_MAX_N_IO_THREADS); - srv_io_thread_function[i] = "read thread"; - } - - ulint n_segments = n_read_segs; - - if (!srv_read_only_mode) { - - os_aio_log_array = os_aio_array_create(n_per_seg, 1); - - if (os_aio_log_array == NULL) { - return(FALSE); - } - - ++n_segments; - - srv_io_thread_function[1] = "log thread"; - - os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1); - - if (os_aio_ibuf_array == NULL) { - return(FALSE); - } - - ++n_segments; - - srv_io_thread_function[0] = "insert buffer thread"; - - os_aio_write_array = os_aio_array_create( - n_write_segs * n_per_seg, n_write_segs); - - if (os_aio_write_array == NULL) { - return(FALSE); - } - - n_segments += n_write_segs; - - for (ulint i = start + n_read_segs; i < n_segments; ++i) { - ut_a(i < SRV_MAX_N_IO_THREADS); - srv_io_thread_function[i] = "write thread"; - } - - ut_ad(n_segments >= 4); - } else { - ut_ad(n_segments > 0); - } - - os_aio_sync_array = os_aio_array_create(n_slots_sync, 1); - - if (os_aio_sync_array == NULL) { - return(FALSE); - } - - os_aio_n_segments = n_segments; - - os_aio_validate(); - - os_last_printout = ut_time(); - -#ifdef _WIN32 - ut_a(completion_port == 0 && read_completion_port == 0); - completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); - read_completion_port = srv_read_only_mode? completion_port : CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0); - ut_a(completion_port && read_completion_port); -#endif - - if (srv_use_native_aio) { - return(TRUE); - } - - os_aio_segment_wait_events = static_cast<os_event_t*>( - ut_malloc(n_segments * sizeof *os_aio_segment_wait_events)); - - for (ulint i = 0; i < n_segments; ++i) { - os_aio_segment_wait_events[i] = os_event_create(); - } - - return(TRUE); -} - -/*********************************************************************** -Frees the asynchronous io system. */ -UNIV_INTERN -void -os_aio_free(void) -/*=============*/ -{ - if (os_aio_ibuf_array != 0) { - os_aio_array_free(os_aio_ibuf_array); - } - - if (os_aio_log_array != 0) { - os_aio_array_free(os_aio_log_array); - } - - if (os_aio_write_array != 0) { - os_aio_array_free(os_aio_write_array); - } - - if (os_aio_sync_array != 0) { - os_aio_array_free(os_aio_sync_array); - } - - os_aio_array_free(os_aio_read_array); - - if (!srv_use_native_aio) { - for (ulint i = 0; i < os_aio_n_segments; i++) { - os_event_free(os_aio_segment_wait_events[i]); - } - } - - for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { - os_mutex_free(os_file_seek_mutexes[i]); - } - - ut_free(os_aio_segment_wait_events); - os_aio_segment_wait_events = 0; - os_aio_n_segments = 0; -#ifdef _WIN32 - completion_port = 0; - read_completion_port = 0; -#endif -} - -#ifdef WIN_ASYNC_IO -/************************************************************************//** -Wakes up all async i/o threads in the array in Windows async i/o at -shutdown. */ -static -void -os_aio_array_wake_win_aio_at_shutdown( -/*==================================*/ - os_aio_array_t* array) /*!< in: aio array */ -{ - if(completion_port) - { - PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL); - PostQueuedCompletionStatus(read_completion_port, 0, IOCP_SHUTDOWN_KEY, NULL); - } -} -#endif - -/************************************************************************//** -Wakes up all async i/o threads so that they know to exit themselves in -shutdown. */ -UNIV_INTERN -void -os_aio_wake_all_threads_at_shutdown(void) -/*=====================================*/ -{ -#ifdef WIN_ASYNC_IO - /* This code wakes up all ai/o threads in Windows native aio */ - os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array); - if (os_aio_write_array != 0) { - os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array); - } - - if (os_aio_ibuf_array != 0) { - os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array); - } - - if (os_aio_log_array != 0) { - os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array); - } -#elif defined(LINUX_NATIVE_AIO) - /* When using native AIO interface the io helper threads - wait on io_getevents with a timeout value of 500ms. At - each wake up these threads check the server status. - No need to do anything to wake them up. */ -#endif /* !WIN_ASYNC_AIO */ - - if (srv_use_native_aio) { - return; - } - - /* This loop wakes up all simulated ai/o threads */ - - for (ulint i = 0; i < os_aio_n_segments; i++) { - - os_event_set(os_aio_segment_wait_events[i]); - } -} - -/************************************************************************//** -Waits until there are no pending writes in os_aio_write_array. There can -be other, synchronous, pending writes. */ -UNIV_INTERN -void -os_aio_wait_until_no_pending_writes(void) -/*=====================================*/ -{ - ut_ad(!srv_read_only_mode); - os_event_wait(os_aio_write_array->is_empty); -} - -/**********************************************************************//** -Calculates segment number for a slot. -@return segment number (which is the number used by, for example, -i/o-handler threads) */ -static -ulint -os_aio_get_segment_no_from_slot( -/*============================*/ - os_aio_array_t* array, /*!< in: aio wait array */ - os_aio_slot_t* slot) /*!< in: slot in this array */ -{ - ulint segment; - ulint seg_len; - - if (array == os_aio_ibuf_array) { - ut_ad(!srv_read_only_mode); - - segment = IO_IBUF_SEGMENT; - - } else if (array == os_aio_log_array) { - ut_ad(!srv_read_only_mode); - - segment = IO_LOG_SEGMENT; - - } else if (array == os_aio_read_array) { - seg_len = os_aio_read_array->n_slots - / os_aio_read_array->n_segments; - - segment = (srv_read_only_mode ? 0 : 2) + slot->pos / seg_len; - } else { - ut_ad(!srv_read_only_mode); - ut_a(array == os_aio_write_array); - - seg_len = os_aio_write_array->n_slots - / os_aio_write_array->n_segments; - - segment = os_aio_read_array->n_segments + 2 - + slot->pos / seg_len; - } - - return(segment); -} - -/**********************************************************************//** -Calculates local segment number and aio array from global segment number. -@return local segment number within the aio array */ -static -ulint -os_aio_get_array_and_local_segment( -/*===============================*/ - os_aio_array_t** array, /*!< out: aio wait array */ - ulint global_segment)/*!< in: global segment number */ -{ - ulint segment; - - ut_a(global_segment < os_aio_n_segments); - - if (srv_read_only_mode) { - *array = os_aio_read_array; - - return(global_segment); - } else if (global_segment == IO_IBUF_SEGMENT) { - *array = os_aio_ibuf_array; - segment = 0; - - } else if (global_segment == IO_LOG_SEGMENT) { - *array = os_aio_log_array; - segment = 0; - - } else if (global_segment < os_aio_read_array->n_segments + 2) { - *array = os_aio_read_array; - - segment = global_segment - 2; - } else { - *array = os_aio_write_array; - - segment = global_segment - (os_aio_read_array->n_segments + 2); - } - - return(segment); -} - -/*******************************************************************//** -Requests for a slot in the aio array. If no slot is available, waits until -not_full-event becomes signaled. -@return pointer to slot */ -static -os_aio_slot_t* -os_aio_array_reserve_slot( -/*======================*/ - ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ - ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */ - os_aio_array_t* array, /*!< in: aio array */ - fil_node_t* message1,/*!< in: message to be passed along with - the aio operation */ - void* message2,/*!< in: message to be passed along with - the aio operation */ - pfs_os_file_t file, /*!< in: file handle */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - void* buf, /*!< in: buffer where to read or from which - to write */ - os_offset_t offset, /*!< in: file offset */ - ulint len, /*!< in: length of the block to read or write */ - ulint page_size, /*!< in: page size in bytes */ - ulint space_id, - ulint* write_size)/*!< in/out: Actual write size initialized - after first successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ -{ - os_aio_slot_t* slot = NULL; -#ifdef WIN_ASYNC_IO - OVERLAPPED* control; - -#elif defined(LINUX_NATIVE_AIO) - - struct iocb* iocb; - off_t aio_offset; - -#endif /* WIN_ASYNC_IO */ - ulint i; - ulint counter; - ulint slots_per_seg; - ulint local_seg; - -#ifdef WIN_ASYNC_IO - ut_a((len & 0xFFFFFFFFUL) == len); -#endif /* WIN_ASYNC_IO */ - - /* No need of a mutex. Only reading constant fields */ - slots_per_seg = array->n_slots / array->n_segments; - - /* We attempt to keep adjacent blocks in the same local - segment. This can help in merging IO requests when we are - doing simulated AIO */ - local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6)) - % array->n_segments; - -loop: - os_mutex_enter(array->mutex); - - if (array->n_reserved == array->n_slots) { - os_mutex_exit(array->mutex); - - if (!srv_use_native_aio) { - /* If the handler threads are suspended, wake them - so that we get more slots */ - - os_aio_simulated_wake_handler_threads(); - } - - os_event_wait(array->not_full); - - goto loop; - } - - /* We start our search for an available slot from our preferred - local segment and do a full scan of the array. We are - guaranteed to find a slot in full scan. */ - for (i = local_seg * slots_per_seg, counter = 0; - counter < array->n_slots; - i++, counter++) { - - i %= array->n_slots; - - slot = os_aio_array_get_nth_slot(array, i); - - if (slot->reserved == FALSE) { - goto found; - } - } - - /* We MUST always be able to get hold of a reserved slot. */ - ut_error; - -found: - ut_a(slot->reserved == FALSE); - array->n_reserved++; - - if (array->n_reserved == 1) { - os_event_reset(array->is_empty); - } - - if (array->n_reserved == array->n_slots) { - os_event_reset(array->not_full); - } - - slot->reserved = TRUE; - slot->reservation_time = ut_time(); - slot->message1 = message1; - slot->message2 = message2; - slot->file = file; - slot->name = name; - slot->len = len; - slot->type = type; - slot->buf = static_cast<byte*>(buf); - slot->offset = offset; - slot->io_already_done = FALSE; - slot->space_id = space_id; - slot->is_log = is_log; - slot->page_size = page_size; - - if (message1) { - slot->file_block_size = fil_node_get_block_size(message1); - } - - slot->buf = (byte *)buf; - -#ifdef WIN_ASYNC_IO - control = &slot->control; - control->Offset = (DWORD) offset & 0xFFFFFFFF; - control->OffsetHigh = (DWORD) (offset >> 32); - control->hEvent = 0; - slot->arr = array; - -#elif defined(LINUX_NATIVE_AIO) - - /* If we are not using native AIO skip this part. */ - if (!srv_use_native_aio) { - goto skip_native_aio; - } - - /* Check if we are dealing with 64 bit arch. - If not then make sure that offset fits in 32 bits. */ - aio_offset = (off_t) offset; - - ut_a(sizeof(aio_offset) >= sizeof(offset) - || ((os_offset_t) aio_offset) == offset); - - iocb = &slot->control; - - if (type == OS_FILE_READ) { - io_prep_pread(iocb, file, buf, len, aio_offset); - } else { - ut_a(type == OS_FILE_WRITE); - io_prep_pwrite(iocb, file, buf, len, aio_offset); - } - - iocb->data = (void*) slot; - slot->n_bytes = 0; - slot->ret = 0; - -skip_native_aio: -#endif /* LINUX_NATIVE_AIO */ - os_mutex_exit(array->mutex); - - return(slot); -} - -/*******************************************************************//** -Frees a slot in the aio array. */ -static -void -os_aio_array_free_slot( -/*===================*/ - os_aio_array_t* array, /*!< in: aio array */ - os_aio_slot_t* slot) /*!< in: pointer to slot */ -{ - os_mutex_enter(array->mutex); - - ut_ad(slot->reserved); - - slot->reserved = FALSE; - - array->n_reserved--; - - if (array->n_reserved == array->n_slots - 1) { - os_event_set(array->not_full); - } - - if (array->n_reserved == 0) { - os_event_set(array->is_empty); - } - -#ifdef LINUX_NATIVE_AIO - - if (srv_use_native_aio) { - memset(&slot->control, 0x0, sizeof(slot->control)); - slot->n_bytes = 0; - slot->ret = 0; - /*fprintf(stderr, "Freed up Linux native slot.\n");*/ - } else { - /* These fields should not be used if we are not - using native AIO. */ - ut_ad(slot->n_bytes == 0); - ut_ad(slot->ret == 0); - } - -#endif - os_mutex_exit(array->mutex); -} - -/**********************************************************************//** -Wakes up a simulated aio i/o-handler thread if it has something to do. */ -static -void -os_aio_simulated_wake_handler_thread( -/*=================================*/ - ulint global_segment) /*!< in: the number of the segment in the aio - arrays */ -{ - os_aio_array_t* array; - ulint segment; - - ut_ad(!srv_use_native_aio); - - segment = os_aio_get_array_and_local_segment(&array, global_segment); - - ulint n = array->n_slots / array->n_segments; - - segment *= n; - - /* Look through n slots after the segment * n'th slot */ - - os_mutex_enter(array->mutex); - - for (ulint i = 0; i < n; ++i) { - const os_aio_slot_t* slot; - - slot = os_aio_array_get_nth_slot(array, segment + i); - - if (slot->reserved) { - - /* Found an i/o request */ - - os_mutex_exit(array->mutex); - - os_event_t event; - - event = os_aio_segment_wait_events[global_segment]; - - os_event_set(event); - - return; - } - } - - os_mutex_exit(array->mutex); -} - -/**********************************************************************//** -Wakes up simulated aio i/o-handler threads if they have something to do. */ -UNIV_INTERN -void -os_aio_simulated_wake_handler_threads(void) -/*=======================================*/ -{ - if (srv_use_native_aio) { - /* We do not use simulated aio: do nothing */ - - return; - } - - os_aio_recommend_sleep_for_read_threads = FALSE; - - for (ulint i = 0; i < os_aio_n_segments; i++) { - os_aio_simulated_wake_handler_thread(i); - } -} - -#ifdef _WIN32 -/**********************************************************************//** -This function can be called if one wants to post a batch of reads and -prefers an i/o-handler thread to handle them all at once later. You must -call os_aio_simulated_wake_handler_threads later to ensure the threads -are not left sleeping! */ -UNIV_INTERN -void -os_aio_simulated_put_read_threads_to_sleep() -{ - -/* The idea of putting background IO threads to sleep is only for -Windows when using simulated AIO. Windows XP seems to schedule -background threads too eagerly to allow for coalescing during -readahead requests. */ - - os_aio_array_t* array; - - if (srv_use_native_aio) { - /* We do not use simulated aio: do nothing */ - - return; - } - - os_aio_recommend_sleep_for_read_threads = TRUE; - - for (ulint i = 0; i < os_aio_n_segments; i++) { - os_aio_get_array_and_local_segment(&array, i); - - if (array == os_aio_read_array) { - - os_event_reset(os_aio_segment_wait_events[i]); - } - } -} -#endif /* _WIN32 */ - -#if defined(LINUX_NATIVE_AIO) -/*******************************************************************//** -Dispatch an AIO request to the kernel. -@return TRUE on success. */ -static -ibool -os_aio_linux_dispatch( -/*==================*/ - os_aio_array_t* array, /*!< in: io request array. */ - os_aio_slot_t* slot) /*!< in: an already reserved slot. */ -{ - int ret; - ulint io_ctx_index; - struct iocb* iocb; - - ut_ad(slot != NULL); - ut_ad(array); - - ut_a(slot->reserved); - - /* Find out what we are going to work with. - The iocb struct is directly in the slot. - The io_context is one per segment. */ - - iocb = &slot->control; - io_ctx_index = (slot->pos * array->n_segments) / array->n_slots; - - ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb); - -#if defined(UNIV_AIO_DEBUG) - fprintf(stderr, - "io_submit[%c] ret[%d]: slot[%p] ctx[%p] seg[%lu]\n", - (slot->type == OS_FILE_WRITE) ? 'w' : 'r', ret, slot, - array->aio_ctx[io_ctx_index], (ulong) io_ctx_index); -#endif - - /* io_submit returns number of successfully - queued requests or -errno. */ - if (UNIV_UNLIKELY(ret != 1)) { - errno = -ret; - return(FALSE); - } - - return(TRUE); -} -#endif /* LINUX_NATIVE_AIO */ - - -/*******************************************************************//** -NOTE! Use the corresponding macro os_aio(), not directly this function! -Requests an asynchronous i/o operation. -@return TRUE if request was queued successfully, FALSE if fail */ -UNIV_INTERN -ibool -os_aio_func( -/*========*/ - ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */ - ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */ - ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed - to OS_AIO_SIMULATED_WAKE_LATER: the - last flag advises this function not to wake - i/o-handler threads, but the caller will - do the waking explicitly later, in this - way the caller can post several requests in - a batch; NOTE that the batch must not be - so big that it exhausts the slots in aio - arrays! NOTE that a simulated batch - may introduce hidden chances of deadlocks, - because i/os are not actually handled until - all have been posted: use with great - caution! */ - const char* name, /*!< in: name of the file or path as a - null-terminated string */ - pfs_os_file_t file, /*!< in: handle to a file */ - void* buf, /*!< in: buffer where to read or from which - to write */ - os_offset_t offset, /*!< in: file offset where to read or write */ - ulint n, /*!< in: number of bytes to read or write */ - ulint page_size, /*!< in: page size in bytes */ - fil_node_t* message1,/*!< in: message for the aio handler - (can be used to identify a completed - aio operation); ignored if mode is - OS_AIO_SYNC */ - void* message2,/*!< in: message for the aio handler - (can be used to identify a completed - aio operation); ignored if mode is - OS_AIO_SYNC */ - ulint space_id, - trx_t* trx, - ulint* write_size)/*!< in/out: Actual write size initialized - after fist successfull trim - operation for this page and if - initialized we do not trim again if - actual page size does not decrease. */ -{ - os_aio_array_t* array; - os_aio_slot_t* slot; -#ifdef WIN_ASYNC_IO - void* buffer = NULL; - DWORD len = (DWORD) n; - BOOL ret; -#endif - ulint wake_later; - ut_ad(buf); - ut_ad(n > 0); - ut_ad(n % OS_MIN_LOG_BLOCK_SIZE == 0); - ut_ad(offset % OS_MIN_LOG_BLOCK_SIZE == 0); - ut_ad(os_aio_validate_skip()); -#ifdef WIN_ASYNC_IO - ut_ad((n & 0xFFFFFFFFUL) == n); -#endif - - - wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; - mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - mode = OS_AIO_SYNC; os_has_said_disk_full = FALSE;); - - if (mode == OS_AIO_SYNC) { - ibool ret; - /* This is actually an ordinary synchronous read or write: - no need to use an i/o-handler thread */ - - if (type == OS_FILE_READ) { - ret = os_file_read_func(file, buf, offset, n, trx); - } else { - ut_ad(!srv_read_only_mode); - ut_a(type == OS_FILE_WRITE); - - ret = os_file_write(name, file, buf, offset, n); - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - os_has_said_disk_full = FALSE; ret = 0; errno = 28;); - - if (!ret) { - os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE, - __FILE__, __LINE__); - } - } - - if (!ret) { - fprintf(stderr, "FAIL"); - } - - return ret; - } - -try_again: - switch (mode) { - case OS_AIO_NORMAL: - if (type == OS_FILE_READ) { - array = os_aio_read_array; - } else { - ut_ad(!srv_read_only_mode); - array = os_aio_write_array; - } - break; - case OS_AIO_IBUF: - ut_ad(type == OS_FILE_READ); - /* Reduce probability of deadlock bugs in connection with ibuf: - do not let the ibuf i/o handler sleep */ - - wake_later = FALSE; - - if (srv_read_only_mode) { - array = os_aio_read_array; - } else { - array = os_aio_ibuf_array; - } - break; - case OS_AIO_LOG: - if (srv_read_only_mode) { - array = os_aio_read_array; - } else { - array = os_aio_log_array; - } - break; - case OS_AIO_SYNC: - array = os_aio_sync_array; -#if defined(LINUX_NATIVE_AIO) - /* In Linux native AIO we don't use sync IO array. */ - ut_a(!srv_use_native_aio); -#endif /* LINUX_NATIVE_AIO */ - break; - default: - ut_error; - array = NULL; /* Eliminate compiler warning */ - } - - if (trx && type == OS_FILE_READ) - { - trx->io_reads++; - trx->io_read += n; - } - - slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file, - name, buf, offset, n, page_size, space_id, - write_size); - - if (type == OS_FILE_READ) { - if (srv_use_native_aio) { - os_n_file_reads++; - os_bytes_read_since_printout += n; -#ifdef WIN_ASYNC_IO - ret = ReadFile(file, buf, (DWORD) n, &len, - &(slot->control)); - if(!ret && GetLastError() != ERROR_IO_PENDING) - goto err_exit; - -#elif defined(LINUX_NATIVE_AIO) - if (!os_aio_linux_dispatch(array, slot)) { - goto err_exit; - } -#endif /* WIN_ASYNC_IO */ - } else { - if (!wake_later) { - os_aio_simulated_wake_handler_thread( - os_aio_get_segment_no_from_slot( - array, slot)); - } - } - } else if (type == OS_FILE_WRITE) { - ut_ad(!srv_read_only_mode); - if (srv_use_native_aio) { - os_n_file_writes++; -#ifdef WIN_ASYNC_IO - n = slot->len; - buffer = buf; - ret = WriteFile(file, buffer, (DWORD) n, &len, - &(slot->control)); - - if(!ret && GetLastError() != ERROR_IO_PENDING) - goto err_exit; -#elif defined(LINUX_NATIVE_AIO) - if (!os_aio_linux_dispatch(array, slot)) { - goto err_exit; - } -#endif /* WIN_ASYNC_IO */ - } else { - if (!wake_later) { - os_aio_simulated_wake_handler_thread( - os_aio_get_segment_no_from_slot( - array, slot)); - } - } - } else { - ut_error; - } - - /* aio was queued successfully! */ - return(TRUE); - -#if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO -err_exit: -#endif /* LINUX_NATIVE_AIO || WIN_ASYNC_IO */ - os_aio_array_free_slot(array, slot); - - if (os_file_handle_error( - name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) { - - goto try_again; - } - - return(FALSE); -} - -#ifdef WIN_ASYNC_IO -#define READ_SEGMENT(x) (x < srv_n_read_io_threads) -#define WRITE_SEGMENT(x) !READ_SEGMENT(x) - -/**********************************************************************//** -This function is only used in Windows asynchronous i/o. -Waits for an aio operation to complete. This function is used to wait the -for completed requests. The aio array of pending requests is divided -into segments. The thread specifies which segment or slot it wants to wait -for. NOTE: this function will also take care of freeing the aio slot, -therefore no other thread is allowed to do the freeing! -@return TRUE if the aio operation succeeded */ -UNIV_INTERN -ibool -os_aio_windows_handle( -/*==================*/ - ulint segment, /*!< in: the number of the segment in the aio - arrays to wait for; segment 0 is the ibuf - i/o thread, segment 1 the log i/o thread, - then follow the non-ibuf read threads, and as - the last are the non-ibuf write threads; if - this is ULINT_UNDEFINED, then it means that - sync aio is used, and this parameter is - ignored */ - ulint pos, /*!< this parameter is used only in sync aio: - wait for the aio slot at this position */ - fil_node_t**message1, /*!< out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2, - ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ - ulint* space_id) -{ - ulint orig_seg = segment; - os_aio_slot_t* slot; - ibool ret_val; - BOOL ret; - DWORD len; - BOOL retry = FALSE; - ULONG_PTR key; - HANDLE port = READ_SEGMENT(segment)? read_completion_port : completion_port; - - for(;;) { - ret = GetQueuedCompletionStatus(port, &len, &key, - (OVERLAPPED **)&slot, INFINITE); - - /* If shutdown key was received, repost the shutdown message and exit */ - if (ret && (key == IOCP_SHUTDOWN_KEY)) { - PostQueuedCompletionStatus(port, 0, key, NULL); - os_thread_exit(NULL); - } - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } - - if(WRITE_SEGMENT(segment)&& slot->type == OS_FILE_READ) { - /* - Redirect read completions to the dedicated completion port - and thread. We need to split read and write threads. If we do not - do that, and just allow all io threads process all IO, it is possible - to get stuck in a deadlock in buffer pool code, - - Currently, the problem is solved this way - "write io" threads - always get all completion notifications, from both async reads and - writes. Write completion is handled in the same thread that gets it. - Read completion is forwarded via PostQueueCompletionStatus()) - to the second completion port dedicated solely to reads. One of the - "read io" threads waiting on this port will finally handle the IO. - - Forwarding IO completion this way costs a context switch , and this - seems tolerable since asynchronous reads are by far less frequent. - */ - ut_a(PostQueuedCompletionStatus(read_completion_port, len, key, - &slot->control)); - } - else { - break; - } - } - *message1 = slot->message1; - *message2 = slot->message2; - - *type = slot->type; - *space_id = slot->space_id; - - if (ret && len == slot->len) { - - ret_val = TRUE; - } else if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) { - - retry = TRUE; - } else { - - ret_val = FALSE; - } - - if (retry) { - - ut_a((slot->len & 0xFFFFFFFFUL) == slot->len); - - switch (slot->type) { - case OS_FILE_WRITE: - ret_val = os_file_write( - slot->name, slot->file, slot->buf, - slot->offset, slot->len); - break; - case OS_FILE_READ: - ret_val = os_file_read( - slot->file, slot->buf, - slot->offset, slot->len); - break; - default: - ut_error; - } - - } - - if (slot->type == OS_FILE_WRITE) { - if (!slot->is_log && srv_use_trim && !os_fallocate_failed) { - // Deallocate unused blocks from file system - os_file_trim(slot); - } - } - - os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot); - - return(ret_val); -} -#endif - -#if defined(LINUX_NATIVE_AIO) -/******************************************************************//** -This function is only used in Linux native asynchronous i/o. This is -called from within the io-thread. If there are no completed IO requests -in the slot array, the thread calls this function to collect more -requests from the kernel. -The io-thread waits on io_getevents(), which is a blocking call, with -a timeout value. Unless the system is very heavy loaded, keeping the -io-thread very busy, the io-thread will spend most of its time waiting -in this function. -The io-thread also exits in this function. It checks server status at -each wakeup and that is why we use timed wait in io_getevents(). */ -static -void -os_aio_linux_collect( -/*=================*/ - os_aio_array_t* array, /*!< in/out: slot array. */ - ulint segment, /*!< in: local segment no. */ - ulint seg_size) /*!< in: segment size. */ -{ - int i; - int ret; - ulint start_pos; - ulint end_pos; - struct timespec timeout; - struct io_event* events; - struct io_context* io_ctx; - - /* sanity checks. */ - ut_ad(array != NULL); - ut_ad(seg_size > 0); - ut_ad(segment < array->n_segments); - - /* Which part of event array we are going to work on. */ - events = &array->aio_events[segment * seg_size]; - - /* Which io_context we are going to use. */ - io_ctx = array->aio_ctx[segment]; - - /* Starting point of the segment we will be working on. */ - start_pos = segment * seg_size; - - /* End point. */ - end_pos = start_pos + seg_size; - -retry: - - /* Initialize the events. The timeout value is arbitrary. - We probably need to experiment with it a little. */ - memset(events, 0, sizeof(*events) * seg_size); - timeout.tv_sec = 0; - timeout.tv_nsec = OS_AIO_REAP_TIMEOUT; - - ret = io_getevents(io_ctx, 1, seg_size, events, &timeout); - - if (ret > 0) { - for (i = 0; i < ret; i++) { - os_aio_slot_t* slot; - struct iocb* control; - - control = (struct iocb*) events[i].obj; - ut_a(control != NULL); - - slot = (os_aio_slot_t*) control->data; - - /* Some sanity checks. */ - ut_a(slot != NULL); - ut_a(slot->reserved); - -#if defined(UNIV_AIO_DEBUG) - fprintf(stderr, - "io_getevents[%c]: slot[%p] ctx[%p]" - " seg[%lu]\n", - (slot->type == OS_FILE_WRITE) ? 'w' : 'r', - slot, io_ctx, segment); -#endif - - /* We are not scribbling previous segment. */ - ut_a(slot->pos >= start_pos); - - /* We have not overstepped to next segment. */ - ut_a(slot->pos < end_pos); - - if (slot->type == OS_FILE_WRITE) { - if (!slot->is_log && srv_use_trim - && !os_fallocate_failed) { - // Deallocate unused blocks from file system - os_file_trim(slot); - } - } - - /* Mark this request as completed. The error handling - will be done in the calling function. */ - os_mutex_enter(array->mutex); - slot->n_bytes = events[i].res; - slot->ret = events[i].res2; - slot->io_already_done = TRUE; - os_mutex_exit(array->mutex); - } - return; - } - - if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) { - return; - } - - /* This error handling is for any error in collecting the - IO requests. The errors, if any, for any particular IO - request are simply passed on to the calling routine. */ - - switch (ret) { - case -EAGAIN: - /* Not enough resources! Try again. */ - case -EINTR: - /* Interrupted! I have tested the behaviour in case of an - interrupt. If we have some completed IOs available then - the return code will be the number of IOs. We get EINTR only - if there are no completed IOs and we have been interrupted. */ - case 0: - /* No pending request! Go back and check again. */ - goto retry; - } - - /* All other errors should cause a trap for now. */ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: unexpected ret_code[%d] from io_getevents()!\n", - ret); - ut_error; -} - -/**********************************************************************//** -This function is only used in Linux native asynchronous i/o. -Waits for an aio operation to complete. This function is used to wait for -the completed requests. The aio array of pending requests is divided -into segments. The thread specifies which segment or slot it wants to wait -for. NOTE: this function will also take care of freeing the aio slot, -therefore no other thread is allowed to do the freeing! -@return TRUE if the IO was successful */ -UNIV_INTERN -ibool -os_aio_linux_handle( -/*================*/ - ulint global_seg, /*!< in: segment number in the aio array - to wait for; segment 0 is the ibuf - i/o thread, segment 1 is log i/o thread, - then follow the non-ibuf read threads, - and the last are the non-ibuf write - threads. */ - fil_node_t**message1, /*!< out: the messages passed with the */ - void** message2, /*!< aio request; note that in case the - aio operation failed, these output - parameters are valid and can be used to - restart the operation. */ - ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ - ulint* space_id) -{ - ulint segment; - os_aio_array_t* array; - os_aio_slot_t* slot; - ulint n; - ulint i; - ibool ret = FALSE; - - /* Should never be doing Sync IO here. */ - ut_a(global_seg != ULINT_UNDEFINED); - - /* Find the array and the local segment. */ - segment = os_aio_get_array_and_local_segment(&array, global_seg); - n = array->n_slots / array->n_segments; - - wait_for_event: - /* Loop until we have found a completed request. */ - for (;;) { - ibool any_reserved = FALSE; - os_mutex_enter(array->mutex); - for (i = 0; i < n; ++i) { - slot = os_aio_array_get_nth_slot( - array, i + segment * n); - if (!slot->reserved) { - continue; - } else if (slot->io_already_done) { - /* Something for us to work on. */ - goto found; - } else { - any_reserved = TRUE; - } - } - - os_mutex_exit(array->mutex); - - /* There is no completed request. - If there is no pending request at all, - and the system is being shut down, exit. */ - if (UNIV_UNLIKELY - (!any_reserved - && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) { - *message1 = NULL; - *message2 = NULL; - return(TRUE); - } - - /* Wait for some request. Note that we return - from wait iff we have found a request. */ - - srv_set_io_thread_op_info(global_seg, - "waiting for completed aio requests"); - os_aio_linux_collect(array, segment, n); - } - -found: - /* Note that it may be that there are more then one completed - IO requests. We process them one at a time. We may have a case - here to improve the performance slightly by dealing with all - requests in one sweep. */ - srv_set_io_thread_op_info(global_seg, - "processing completed aio requests"); - - /* Ensure that we are scribbling only our segment. */ - ut_a(i < n); - - ut_ad(slot != NULL); - ut_ad(slot->reserved); - ut_ad(slot->io_already_done); - - *message1 = slot->message1; - *message2 = slot->message2; - - *type = slot->type; - *space_id = slot->space_id; - - if (slot->ret == 0 && slot->n_bytes == (long) slot->len) { - - ret = TRUE; - } else if ((slot->ret == 0) && (slot->n_bytes > 0) - && (slot->n_bytes < (long) slot->len)) { - /* Partial read or write scenario */ - int submit_ret; - struct iocb* iocb; - slot->buf = (byte*)slot->buf + slot->n_bytes; - slot->offset = slot->offset + slot->n_bytes; - slot->len = slot->len - slot->n_bytes; - /* Resetting the bytes read/written */ - slot->n_bytes = 0; - slot->io_already_done = FALSE; - iocb = &(slot->control); - - if (slot->type == OS_FILE_READ) { - io_prep_pread(&slot->control, slot->file, - slot->buf, slot->len, - (off_t) slot->offset); - } else { - ut_a(slot->type == OS_FILE_WRITE); - io_prep_pwrite(&slot->control, slot->file, - slot->buf, slot->len, - (off_t) slot->offset); - } - /* Resubmit an I/O request */ - submit_ret = io_submit(array->aio_ctx[segment], 1, &iocb); - if (submit_ret < 0 ) { - /* Aborting in case of submit failure */ - ib_logf(IB_LOG_LEVEL_FATAL, - "Native Linux AIO interface. io_submit()" - " call failed when resubmitting a partial" - " I/O request on the file %s.", - slot->name); - } else { - ret = FALSE; - os_mutex_exit(array->mutex); - goto wait_for_event; - } - } else { - errno = -slot->ret; - - if (slot->ret == 0) { - fprintf(stderr, - "InnoDB: Number of bytes after aio %d requested %lu\n" - "InnoDB: from file %s\n", - slot->n_bytes, slot->len, slot->name); - } - - /* os_file_handle_error does tell us if we should retry - this IO. As it stands now, we don't do this retry when - reaping requests from a different context than - the dispatcher. This non-retry logic is the same for - windows and linux native AIO. - We should probably look into this to transparently - re-submit the IO. */ - os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__); - - ret = FALSE; - } - - os_mutex_exit(array->mutex); - - os_aio_array_free_slot(array, slot); - - return(ret); -} -#endif /* LINUX_NATIVE_AIO */ - -/**********************************************************************//** -Does simulated aio. This function should be called by an i/o-handler -thread. -@return TRUE if the aio operation succeeded */ -UNIV_INTERN -ibool -os_aio_simulated_handle( -/*====================*/ - ulint global_segment, /*!< in: the number of the segment in the aio - arrays to wait for; segment 0 is the ibuf - i/o thread, segment 1 the log i/o thread, - then follow the non-ibuf read threads, and as - the last are the non-ibuf write threads */ - fil_node_t**message1, /*!< out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2, - ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */ - ulint* space_id) -{ - os_aio_array_t* array; - ulint segment; - os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE]; - ulint n_consecutive; - ulint total_len; - ulint offs; - os_offset_t lowest_offset; - ulint biggest_age; - ulint age; - byte* combined_buf; - byte* combined_buf2; - ibool ret; - ibool any_reserved; - ulint n; - os_aio_slot_t* aio_slot; - - /* Fix compiler warning */ - *consecutive_ios = NULL; - - segment = os_aio_get_array_and_local_segment(&array, global_segment); - -restart: - /* NOTE! We only access constant fields in os_aio_array. Therefore - we do not have to acquire the protecting mutex yet */ - - srv_set_io_thread_op_info(global_segment, - "looking for i/o requests (a)"); - ut_ad(os_aio_validate_skip()); - ut_ad(segment < array->n_segments); - - n = array->n_slots / array->n_segments; - - /* Look through n slots after the segment * n'th slot */ - - if (array == os_aio_read_array - && os_aio_recommend_sleep_for_read_threads) { - - /* Give other threads chance to add several i/os to the array - at once. */ - - goto recommended_sleep; - } - - srv_set_io_thread_op_info(global_segment, - "looking for i/o requests (b)"); - - /* Check if there is a slot for which the i/o has already been - done */ - any_reserved = FALSE; - - os_mutex_enter(array->mutex); - - for (ulint i = 0; i < n; i++) { - os_aio_slot_t* slot; - - slot = os_aio_array_get_nth_slot(array, i + segment * n); - - if (!slot->reserved) { - continue; - } else if (slot->io_already_done) { - - if (os_aio_print_debug) { - fprintf(stderr, - "InnoDB: i/o for slot %lu" - " already done, returning\n", - (ulong) i); - } - - aio_slot = slot; - ret = TRUE; - goto slot_io_done; - } else { - any_reserved = TRUE; - } - } - - /* There is no completed request. - If there is no pending request at all, - and the system is being shut down, exit. */ - if (!any_reserved && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_mutex_exit(array->mutex); - *message1 = NULL; - *message2 = NULL; - return(TRUE); - } - - n_consecutive = 0; - - /* If there are at least 2 seconds old requests, then pick the oldest - one to prevent starvation. If several requests have the same age, - then pick the one at the lowest offset. */ - - biggest_age = 0; - lowest_offset = IB_UINT64_MAX; - - for (ulint i = 0; i < n; i++) { - os_aio_slot_t* slot; - - slot = os_aio_array_get_nth_slot(array, i + segment * n); - - if (slot->reserved) { - - age = (ulint) difftime( - ut_time(), slot->reservation_time); - - if ((age >= 2 && age > biggest_age) - || (age >= 2 && age == biggest_age - && slot->offset < lowest_offset)) { - - /* Found an i/o request */ - consecutive_ios[0] = slot; - - n_consecutive = 1; - - biggest_age = age; - lowest_offset = slot->offset; - } - } - } - - if (n_consecutive == 0) { - /* There were no old requests. Look for an i/o request at the - lowest offset in the array (we ignore the high 32 bits of the - offset in these heuristics) */ - - lowest_offset = IB_UINT64_MAX; - - for (ulint i = 0; i < n; i++) { - os_aio_slot_t* slot; - - slot = os_aio_array_get_nth_slot( - array, i + segment * n); - - if (slot->reserved && slot->offset < lowest_offset) { - - /* Found an i/o request */ - consecutive_ios[0] = slot; - - n_consecutive = 1; - - lowest_offset = slot->offset; - } - } - } - - if (n_consecutive == 0) { - - /* No i/o requested at the moment */ - - goto wait_for_io; - } - - /* if n_consecutive != 0, then we have assigned - something valid to consecutive_ios[0] */ - ut_ad(n_consecutive != 0); - ut_ad(consecutive_ios[0] != NULL); - - aio_slot = consecutive_ios[0]; - - /* Check if there are several consecutive blocks to read or write */ - -consecutive_loop: - for (ulint i = 0; i < n; i++) { - os_aio_slot_t* slot; - - slot = os_aio_array_get_nth_slot(array, i + segment * n); - if (slot->reserved - && slot != aio_slot - && slot->offset == aio_slot->offset + aio_slot->len - && slot->type == aio_slot->type - && slot->file == aio_slot->file) { - - /* Found a consecutive i/o request */ - - consecutive_ios[n_consecutive] = slot; - n_consecutive++; - - aio_slot = slot; - - if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) { - - goto consecutive_loop; - } else { - break; - } - } - } - - srv_set_io_thread_op_info(global_segment, "consecutive i/o requests"); - - /* We have now collected n_consecutive i/o requests in the array; - allocate a single buffer which can hold all data, and perform the - i/o */ - - total_len = 0; - aio_slot = consecutive_ios[0]; - - for (ulint i = 0; i < n_consecutive; i++) { - total_len += consecutive_ios[i]->len; - } - - if (n_consecutive == 1) { - /* We can use the buffer of the i/o request */ - combined_buf = aio_slot->buf; - combined_buf2 = NULL; - } else { - combined_buf2 = static_cast<byte*>( - ut_malloc(total_len + UNIV_PAGE_SIZE)); - - ut_a(combined_buf2); - - combined_buf = static_cast<byte*>( - ut_align(combined_buf2, UNIV_PAGE_SIZE)); - } - - /* We release the array mutex for the time of the i/o: NOTE that - this assumes that there is just one i/o-handler thread serving - a single segment of slots! */ - - os_mutex_exit(array->mutex); - - if (aio_slot->type == OS_FILE_WRITE && n_consecutive > 1) { - /* Copy the buffers to the combined buffer */ - offs = 0; - - for (ulint i = 0; i < n_consecutive; i++) { - - ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf, - consecutive_ios[i]->len); - - offs += consecutive_ios[i]->len; - } - } - - srv_set_io_thread_op_info(global_segment, "doing file i/o"); - - /* Do the i/o with ordinary, synchronous i/o functions: */ - if (aio_slot->type == OS_FILE_WRITE) { - ut_ad(!srv_read_only_mode); - ret = os_file_write( - aio_slot->name, aio_slot->file, combined_buf, - aio_slot->offset, total_len); - - DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28", - os_has_said_disk_full = FALSE; ret = 0; errno = 28;); - - if (!ret) { - os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE, - __FILE__, __LINE__); - } - - } else { - ret = os_file_read( - aio_slot->file, combined_buf, - aio_slot->offset, total_len); - } - - srv_set_io_thread_op_info(global_segment, "file i/o done"); - - if (aio_slot->type == OS_FILE_READ && n_consecutive > 1) { - /* Copy the combined buffer to individual buffers */ - offs = 0; - - for (ulint i = 0; i < n_consecutive; i++) { - - ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs, - consecutive_ios[i]->len); - offs += consecutive_ios[i]->len; - } - } - - if (combined_buf2) { - ut_free(combined_buf2); - } - - os_mutex_enter(array->mutex); - - /* Mark the i/os done in slots */ - - for (ulint i = 0; i < n_consecutive; i++) { - consecutive_ios[i]->io_already_done = TRUE; - } - - /* We return the messages for the first slot now, and if there were - several slots, the messages will be returned with subsequent calls - of this function */ - -slot_io_done: - - ut_a(aio_slot->reserved); - - *message1 = aio_slot->message1; - *message2 = aio_slot->message2; - - *type = aio_slot->type; - *space_id = aio_slot->space_id; - - os_mutex_exit(array->mutex); - - os_aio_array_free_slot(array, aio_slot); - - return(ret); - -wait_for_io: - srv_set_io_thread_op_info(global_segment, "resetting wait event"); - - /* We wait here until there again can be i/os in the segment - of this thread */ - - os_event_reset(os_aio_segment_wait_events[global_segment]); - - os_mutex_exit(array->mutex); - -recommended_sleep: - srv_set_io_thread_op_info(global_segment, "waiting for i/o request"); - - os_event_wait(os_aio_segment_wait_events[global_segment]); - - goto restart; -} - -/**********************************************************************//** -Validates the consistency of an aio array. -@return true if ok */ -static -bool -os_aio_array_validate( -/*==================*/ - os_aio_array_t* array) /*!< in: aio wait array */ -{ - ulint i; - ulint n_reserved = 0; - - os_mutex_enter(array->mutex); - - ut_a(array->n_slots > 0); - ut_a(array->n_segments > 0); - - for (i = 0; i < array->n_slots; i++) { - os_aio_slot_t* slot; - - slot = os_aio_array_get_nth_slot(array, i); - - if (slot->reserved) { - n_reserved++; - ut_a(slot->len > 0); - } - } - - ut_a(array->n_reserved == n_reserved); - - os_mutex_exit(array->mutex); - - return(true); -} - -/**********************************************************************//** -Validates the consistency the aio system. -@return TRUE if ok */ -UNIV_INTERN -ibool -os_aio_validate(void) -/*=================*/ -{ - os_aio_array_validate(os_aio_read_array); - - if (os_aio_write_array != 0) { - os_aio_array_validate(os_aio_write_array); - } - - if (os_aio_ibuf_array != 0) { - os_aio_array_validate(os_aio_ibuf_array); - } - - if (os_aio_log_array != 0) { - os_aio_array_validate(os_aio_log_array); - } - - if (os_aio_sync_array != 0) { - os_aio_array_validate(os_aio_sync_array); - } - - return(TRUE); -} - -/**********************************************************************//** -Prints pending IO requests per segment of an aio array. -We probably don't need per segment statistics but they can help us -during development phase to see if the IO requests are being -distributed as expected. */ -static -void -os_aio_print_segment_info( -/*======================*/ - FILE* file, /*!< in: file where to print */ - ulint* n_seg, /*!< in: pending IO array */ - os_aio_array_t* array) /*!< in: array to process */ -{ - ulint i; - - ut_ad(array); - ut_ad(n_seg); - ut_ad(array->n_segments > 0); - - if (array->n_segments == 1) { - return; - } - - fprintf(file, " ["); - for (i = 0; i < array->n_segments; i++) { - if (i != 0) { - fprintf(file, ", "); - } - - fprintf(file, "%lu", n_seg[i]); - } - fprintf(file, "] "); -} - -/**********************************************************************//** -Prints info about the aio array. */ -UNIV_INTERN -void -os_aio_print_array( -/*==============*/ - FILE* file, /*!< in: file where to print */ - os_aio_array_t* array) /*!< in: aio array to print */ -{ - ulint n_reserved = 0; - ulint n_res_seg[SRV_MAX_N_IO_THREADS]; - - os_mutex_enter(array->mutex); - - ut_a(array->n_slots > 0); - ut_a(array->n_segments > 0); - - memset(n_res_seg, 0x0, sizeof(n_res_seg)); - - for (ulint i = 0; i < array->n_slots; ++i) { - os_aio_slot_t* slot; - ulint seg_no; - - slot = os_aio_array_get_nth_slot(array, i); - - seg_no = (i * array->n_segments) / array->n_slots; - - if (slot->reserved) { - ++n_reserved; - ++n_res_seg[seg_no]; - - ut_a(slot->len > 0); - } - } - - ut_a(array->n_reserved == n_reserved); - - fprintf(file, " %lu", (ulong) n_reserved); - - os_aio_print_segment_info(file, n_res_seg, array); - - os_mutex_exit(array->mutex); -} - -/**********************************************************************//** -Prints info of the aio arrays. */ -UNIV_INTERN -void -os_aio_print( -/*=========*/ - FILE* file) /*!< in: file where to print */ -{ - time_t current_time; - double time_elapsed; - double avg_bytes_read; - - for (ulint i = 0; i < srv_n_file_io_threads; ++i) { - fprintf(file, "I/O thread %lu state: %s (%s)", - (ulong) i, - srv_io_thread_op_info[i], - srv_io_thread_function[i]); - -#ifndef _WIN32 - if (!srv_use_native_aio - && os_aio_segment_wait_events[i]->is_set()) { - fprintf(file, " ev set"); - } -#endif /* _WIN32 */ - - fprintf(file, "\n"); - } - - fputs("Pending normal aio reads:", file); - - os_aio_print_array(file, os_aio_read_array); - - if (os_aio_write_array != 0) { - fputs(", aio writes:", file); - os_aio_print_array(file, os_aio_write_array); - } - - if (os_aio_ibuf_array != 0) { - fputs(",\n ibuf aio reads:", file); - os_aio_print_array(file, os_aio_ibuf_array); - } - - if (os_aio_log_array != 0) { - fputs(", log i/o's:", file); - os_aio_print_array(file, os_aio_log_array); - } - - if (os_aio_sync_array != 0) { - fputs(", sync i/o's:", file); - os_aio_print_array(file, os_aio_sync_array); - } - - putc('\n', file); - current_time = ut_time(); - time_elapsed = 0.001 + difftime(current_time, os_last_printout); - - fprintf(file, - "Pending flushes (fsync) log: " ULINTPF - "; buffer pool: " ULINTPF "\n" - ULINTPF " OS file reads, " - ULINTPF " OS file writes, " - ULINTPF " OS fsyncs\n", - fil_n_pending_log_flushes, - fil_n_pending_tablespace_flushes, - os_n_file_reads, - os_n_file_writes, - os_n_fsyncs); - - const ulint n_reads = ulint(MONITOR_VALUE(MONITOR_OS_PENDING_READS)); - const ulint n_writes = ulint(MONITOR_VALUE(MONITOR_OS_PENDING_WRITES)); - - if (n_reads != 0 || n_writes != 0) { - fprintf(file, - ULINTPF " pending reads, " ULINTPF " pending writes\n", - n_reads, n_writes); - } - - if (os_n_file_reads == os_n_file_reads_old) { - avg_bytes_read = 0.0; - } else { - avg_bytes_read = (double) os_bytes_read_since_printout - / (os_n_file_reads - os_n_file_reads_old); - } - - fprintf(file, - "%.2f reads/s, %lu avg bytes/read," - " %.2f writes/s, %.2f fsyncs/s\n", - (os_n_file_reads - os_n_file_reads_old) - / time_elapsed, - (ulong) avg_bytes_read, - (os_n_file_writes - os_n_file_writes_old) - / time_elapsed, - (os_n_fsyncs - os_n_fsyncs_old) - / time_elapsed); - - os_n_file_reads_old = os_n_file_reads; - os_n_file_writes_old = os_n_file_writes; - os_n_fsyncs_old = os_n_fsyncs; - os_bytes_read_since_printout = 0; - - os_last_printout = current_time; -} - -/**********************************************************************//** -Refreshes the statistics used to print per-second averages. */ -UNIV_INTERN -void -os_aio_refresh_stats(void) -/*======================*/ -{ - os_n_file_reads_old = os_n_file_reads; - os_n_file_writes_old = os_n_file_writes; - os_n_fsyncs_old = os_n_fsyncs; - os_bytes_read_since_printout = 0; - - os_last_printout = time(NULL); -} - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Checks that all slots in the system have been freed, that is, there are -no pending io operations. -@return TRUE if all free */ -UNIV_INTERN -ibool -os_aio_all_slots_free(void) -/*=======================*/ -{ - os_aio_array_t* array; - ulint n_res = 0; - - array = os_aio_read_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - if (!srv_read_only_mode) { - ut_a(os_aio_write_array == 0); - - array = os_aio_write_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - ut_a(os_aio_ibuf_array == 0); - - array = os_aio_ibuf_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - } - - ut_a(os_aio_log_array == 0); - - array = os_aio_log_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - array = os_aio_sync_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - if (n_res == 0) { - - return(TRUE); - } - - return(FALSE); -} -#endif /* UNIV_DEBUG */ - -#endif /* !UNIV_HOTBACKUP */ - -#ifdef _WIN32 -#include <winioctl.h> -#ifndef FSCTL_FILE_LEVEL_TRIM -#define FSCTL_FILE_LEVEL_TRIM CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA) -typedef struct _FILE_LEVEL_TRIM_RANGE { - DWORDLONG Offset; - DWORDLONG Length; -} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE; - -typedef struct _FILE_LEVEL_TRIM { - DWORD Key; - DWORD NumRanges; - FILE_LEVEL_TRIM_RANGE Ranges[1]; -} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM; -#endif -#endif - -#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO) -/**********************************************************************//** -Directly manipulate the allocated disk space by deallocating for the file referred to -by fd for the byte range starting at offset and continuing for len bytes. -Within the specified range, partial file system blocks are zeroed, and whole -file system blocks are removed from the file. After a successful call, -subsequent reads from this range will return zeroes. -@return true if success, false if error */ -static -ibool -os_file_trim( -/*=========*/ - os_aio_slot_t* slot) /*!< in: slot structure */ -{ - size_t len = slot->len; - size_t trim_len = slot->page_size - slot->len; - os_offset_t off __attribute__((unused)) = slot->offset + len; - size_t bsize = slot->file_block_size; - -#ifdef UNIV_TRIM_DEBUG - fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n", - slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize); -#endif - - // Nothing to do if trim length is zero or if actual write - // size is initialized and it is smaller than current write size. - // In first write if we trim we set write_size to actual bytes - // written and rest of the page is trimmed. In following writes - // there is no need to trim again if write_size only increases - // because rest of the page is already trimmed. If actual write - // size decreases we need to trim again. - if (trim_len == 0 || - (slot->write_size && - *slot->write_size > 0 && - len >= *slot->write_size)) { - - if (slot->write_size) { - if (*slot->write_size > 0 && len >= *slot->write_size) { - srv_stats.page_compressed_trim_op_saved.inc(); - } - - *slot->write_size = len; - } - - return (TRUE); - } - -#ifdef __linux__ -#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) - int ret = fallocate(slot->file, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - off, trim_len); - - if (ret) { - /* After first failure do not try to trim again */ - os_fallocate_failed = true; - srv_use_trim = FALSE; - ib_logf(IB_LOG_LEVEL_WARN, - "fallocate() failed with error %d." - " start: " UINT64PF " len: " ULINTPF " payload: " ULINTPF "." - " Disabling fallocate for now.", - errno, off, ulint(trim_len), ulint(len)); - - os_file_handle_error_no_exit(slot->name, - " fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ", - FALSE, __FILE__, __LINE__); - - if (slot->write_size) { - *slot->write_size = 0; - } - - return (FALSE); - } else { - if (slot->write_size) { - *slot->write_size = len; - } - } -#else - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: fallocate not supported on this installation." - " InnoDB: Disabling fallocate for now."); - os_fallocate_failed = true; - srv_use_trim = FALSE; - if (slot->write_size) { - *slot->write_size = 0; - } - -#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */ - -#elif defined(_WIN32) - FILE_LEVEL_TRIM flt; - flt.Key = 0; - flt.NumRanges = 1; - flt.Ranges[0].Offset = off; - flt.Ranges[0].Length = trim_len; - - OVERLAPPED overlapped = { 0 }; - overlapped.hEvent = win_get_syncio_event(); - BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM, - &flt, sizeof(flt), NULL, NULL, NULL, &overlapped); - DWORD tmp; - if (ret) { - ret = GetOverlappedResult(slot->file, &overlapped, &tmp, FALSE); - } - else if (GetLastError() == ERROR_IO_PENDING) { - ret = GetOverlappedResult(slot->file, &overlapped, &tmp, TRUE); - } - if (!ret) { - DWORD last_error = GetLastError(); - /* After first failure do not try to trim again */ - os_fallocate_failed = true; - srv_use_trim = FALSE; - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Warning: DeviceIoControl(FSCTL_FILE_LEVEL_TRIM) call failed with error %u%s. Disabling trimming.\n", - last_error, last_error == ERROR_NOT_SUPPORTED ? "(ERROR_NOT_SUPPORTED)" : ""); - - if (slot->write_size) { - *slot->write_size = 0; - } - return (FALSE); - } else { - if (slot->write_size) { - *slot->write_size = len; - } - } -#endif - - switch(bsize) { - case 512: - srv_stats.page_compression_trim_sect512.add((trim_len / bsize)); - break; - case 1024: - srv_stats.page_compression_trim_sect1024.add((trim_len / bsize)); - break; - case 2948: - srv_stats.page_compression_trim_sect2048.add((trim_len / bsize)); - break; - case 4096: - srv_stats.page_compression_trim_sect4096.add((trim_len / bsize)); - break; - case 8192: - srv_stats.page_compression_trim_sect8192.add((trim_len / bsize)); - break; - case 16384: - srv_stats.page_compression_trim_sect16384.add((trim_len / bsize)); - break; - case 32768: - srv_stats.page_compression_trim_sect32768.add((trim_len / bsize)); - break; - default: - break; - } - - srv_stats.page_compressed_trim_op.inc(); - - return (TRUE); - -} -#endif /* WIN_ASYNC_IO || LINUX_NATIVE_AIO */ - -/***********************************************************************//** -Try to get number of bytes per sector from file system. -@return file block size */ -UNIV_INTERN -ulint -os_file_get_block_size( -/*===================*/ - os_file_t file, /*!< in: handle to a file */ - const char* name) /*!< in: file name */ -{ - ulint fblock_size = 512; - -#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H) - struct statvfs fstat; - int err; - - err = fstatvfs(file, &fstat); - - if (err != 0) { - fprintf(stderr, "InnoDB: Warning: fstatvfs() failed on file %s\n", name); - os_file_handle_error_no_exit(name, "fstatvfs()", FALSE, __FILE__, __LINE__); - } else { - fblock_size = fstat.f_bsize; - } -#endif /* UNIV_LINUX */ -#ifdef __WIN__ - { - DWORD SectorsPerCluster = 0; - DWORD BytesPerSector = 0; - DWORD NumberOfFreeClusters = 0; - DWORD TotalNumberOfClusters = 0; - - /* - if (GetFreeSpace((LPCTSTR)name, &SectorsPerCluster, &BytesPerSector, &NumberOfFreeClusters, &TotalNumberOfClusters)) { - fblock_size = BytesPerSector; - } else { - fprintf(stderr, "InnoDB: Warning: GetFreeSpace() failed on file %s\n", name); - os_file_handle_error_no_exit(name, "GetFreeSpace()", FALSE, __FILE__, __LINE__); - } - */ - } -#endif /* __WIN__*/ - - /* Currently we support file block size up to 4Kb */ - if (fblock_size > 4096 || fblock_size < 512) { - if (fblock_size < 512) { - fblock_size = 512; - } else { - fblock_size = 4096; - } - } - - return fblock_size; -} diff --git a/storage/xtradb/os/os0proc.cc b/storage/xtradb/os/os0proc.cc deleted file mode 100644 index ff6d65e4ae6..00000000000 --- a/storage/xtradb/os/os0proc.cc +++ /dev/null @@ -1,232 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file os/os0proc.cc -The interface to the operating system -process control primitives - -Created 9/30/1995 Heikki Tuuri -*******************************************************/ - -#include "os0proc.h" -#ifdef UNIV_NONINL -#include "os0proc.ic" -#endif - -#include "ut0mem.h" -#include "ut0byte.h" - -/* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and -MAP_ANON but MAP_ANON is marked as deprecated */ -#if defined(MAP_ANONYMOUS) -#define OS_MAP_ANON MAP_ANONYMOUS -#elif defined(MAP_ANON) -#define OS_MAP_ANON MAP_ANON -#endif - -UNIV_INTERN ibool os_use_large_pages; -/* Large page size. This may be a boot-time option on some platforms */ -UNIV_INTERN ulint os_large_page_size; - -/****************************************************************//** -Converts the current process id to a number. It is not guaranteed that the -number is unique. In Linux returns the 'process number' of the current -thread. That number is the same as one sees in 'top', for example. In Linux -the thread id is not the same as one sees in 'top'. -@return process id as a number */ -UNIV_INTERN -ulint -os_proc_get_number(void) -/*====================*/ -{ -#ifdef __WIN__ - return((ulint)GetCurrentProcessId()); -#else - return((ulint) getpid()); -#endif -} - -/****************************************************************//** -Allocates large pages memory. -@return allocated memory */ -UNIV_INTERN -void* -os_mem_alloc_large( -/*===============*/ - ulint* n) /*!< in/out: number of bytes */ -{ - void* ptr; - ulint size; -#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX - int shmid; - struct shmid_ds buf; - - if (!os_use_large_pages || !os_large_page_size) { - goto skip; - } - - /* Align block size to os_large_page_size */ - ut_ad(ut_is_2pow(os_large_page_size)); - size = ut_2pow_round(*n + (os_large_page_size - 1), - os_large_page_size); - - shmid = shmget(IPC_PRIVATE, (size_t) size, SHM_HUGETLB | SHM_R | SHM_W); - if (shmid < 0) { - fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate" - " %lu bytes. errno %d\n", size, errno); - ptr = NULL; - } else { - ptr = shmat(shmid, NULL, 0); - if (ptr == (void*)-1) { - fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to" - " attach shared memory segment, errno %d\n", - errno); - ptr = NULL; - } - - /* Remove the shared memory segment so that it will be - automatically freed after memory is detached or - process exits */ - shmctl(shmid, IPC_RMID, &buf); - } - - if (ptr) { - *n = size; - os_fast_mutex_lock(&ut_list_mutex); - ut_total_allocated_memory += size; - os_fast_mutex_unlock(&ut_list_mutex); - UNIV_MEM_ALLOC(ptr, size); - return(ptr); - } - - fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional" - " memory pool\n"); -skip: -#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */ - -#ifdef __WIN__ - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - - /* Align block size to system page size */ - ut_ad(ut_is_2pow(system_info.dwPageSize)); - /* system_info.dwPageSize is only 32-bit. Casting to ulint is required - on 64-bit Windows. */ - size = *n = ut_2pow_round(*n + (system_info.dwPageSize - 1), - (ulint) system_info.dwPageSize); - ptr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, - PAGE_READWRITE); - if (!ptr) { - fprintf(stderr, "InnoDB: VirtualAlloc(%lu bytes) failed;" - " Windows error %lu\n", - (ulong) size, (ulong) GetLastError()); - } else { - os_fast_mutex_lock(&ut_list_mutex); - ut_total_allocated_memory += size; - os_fast_mutex_unlock(&ut_list_mutex); - UNIV_MEM_ALLOC(ptr, size); - } -#elif !defined OS_MAP_ANON - size = *n; - ptr = ut_malloc_low(size, TRUE, FALSE); -#else -# ifdef HAVE_GETPAGESIZE - size = getpagesize(); -# else - size = UNIV_PAGE_SIZE; -# endif - /* Align block size to system page size */ - ut_ad(ut_is_2pow(size)); - size = *n = ut_2pow_round(*n + (size - 1), size); - ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | OS_MAP_ANON, -1, 0); - if (UNIV_UNLIKELY(ptr == (void*) -1)) { - fprintf(stderr, "InnoDB: mmap(%lu bytes) failed;" - " errno %lu\n", - (ulong) size, (ulong) errno); - ptr = NULL; - } else { - os_fast_mutex_lock(&ut_list_mutex); - ut_total_allocated_memory += size; - os_fast_mutex_unlock(&ut_list_mutex); - UNIV_MEM_ALLOC(ptr, size); - } -#endif - return(ptr); -} - -/****************************************************************//** -Frees large pages memory. */ -UNIV_INTERN -void -os_mem_free_large( -/*==============*/ - void *ptr, /*!< in: pointer returned by - os_mem_alloc_large() */ - ulint size) /*!< in: size returned by - os_mem_alloc_large() */ -{ - os_fast_mutex_lock(&ut_list_mutex); - ut_a(ut_total_allocated_memory >= size); - os_fast_mutex_unlock(&ut_list_mutex); - -#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX - if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) { - os_fast_mutex_lock(&ut_list_mutex); - ut_a(ut_total_allocated_memory >= size); - ut_total_allocated_memory -= size; - os_fast_mutex_unlock(&ut_list_mutex); - UNIV_MEM_FREE(ptr, size); - return; - } -#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */ -#ifdef __WIN__ - /* When RELEASE memory, the size parameter must be 0. - Do not use MEM_RELEASE with MEM_DECOMMIT. */ - if (!VirtualFree(ptr, 0, MEM_RELEASE)) { - fprintf(stderr, "InnoDB: VirtualFree(%p, %lu) failed;" - " Windows error %lu\n", - ptr, (ulong) size, (ulong) GetLastError()); - } else { - os_fast_mutex_lock(&ut_list_mutex); - ut_a(ut_total_allocated_memory >= size); - ut_total_allocated_memory -= size; - os_fast_mutex_unlock(&ut_list_mutex); - UNIV_MEM_FREE(ptr, size); - } -#elif !defined OS_MAP_ANON - ut_free(ptr); -#else -# if defined(UNIV_SOLARIS) - if (munmap(static_cast<caddr_t>(ptr), size)) { -# else - if (munmap(ptr, size)) { -# endif /* UNIV_SOLARIS */ - fprintf(stderr, "InnoDB: munmap(%p, %lu) failed;" - " errno %lu\n", - ptr, (ulong) size, (ulong) errno); - } else { - os_fast_mutex_lock(&ut_list_mutex); - ut_a(ut_total_allocated_memory >= size); - ut_total_allocated_memory -= size; - os_fast_mutex_unlock(&ut_list_mutex); - UNIV_MEM_FREE(ptr, size); - } -#endif -} diff --git a/storage/xtradb/os/os0stacktrace.cc b/storage/xtradb/os/os0stacktrace.cc deleted file mode 100644 index c4c428e0db3..00000000000 --- a/storage/xtradb/os/os0stacktrace.cc +++ /dev/null @@ -1,131 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2013 SkySQL Ab. All Rights Reserved. - - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -#include "os0thread.h" - -#if defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS - -#if HAVE_EXECINFO_H -#include <execinfo.h> -#endif - -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#ifndef __USE_GNU -#define __USE_GNU -#endif -#ifndef _XOPEN_SOURCE -#define _XOPEN_SOURCE -#endif - -/* Since kernel version 2.2 the undocumented parameter to the signal handler has been declared -obsolete in adherence with POSIX.1b. A more correct way to retrieve additional information is -to use the SA_SIGINFO option when setting the handler */ -#undef USE_SIGCONTEXT - -#ifndef USE_SIGCONTEXT -/* get REG_EIP / REG_RIP from ucontext.h */ -#include <ucontext.h> - - #ifndef EIP - #define EIP 14 - #endif - - #if (defined (__x86_64__)) - #ifndef REG_RIP - #define REG_RIP REG_INDEX(rip) /* seems to be 16 */ - #endif - #endif - -#endif - -#define OS_STACKTRACE_MAX_DEPTH 128 - -/***************************************************************//** -Prints stacktrace for this thread. -*/ -void -os_stacktrace_print( -/*================*/ - int sig_num, - siginfo_t* info, - void* ucontext) -{ - void* array[OS_STACKTRACE_MAX_DEPTH]; - char** messages; - int size, i; - void* caller_address = NULL; - - /* Get the address at the time the signal was raised */ -#if defined(__x86_64__) - ucontext_t* uc = (ucontext_t*) ucontext; - caller_address = (void*) uc->uc_mcontext.gregs[REG_RIP] ; -#elif defined(__hppa__) - ucontext_t* uc = (ucontext_t*) ucontext; - caller_address = (void*) (uc->uc_mcontext.sc_iaoq[0] & ~0x3UL) ; -#elif (defined (__ppc__)) || (defined (__powerpc__)) - ucontext_t* uc = (ucontext_t*) ucontext; - caller_address = (void*) uc->uc_mcontext.regs->nip ; -#elif defined(__sparc__) - struct sigcontext* sc = (struct sigcontext*) ucontext; -#if __WORDSIZE == 64 - caller_address = (void*) sc->sigc_regs.tpc ; -#else - caller_address = (void*) sc->si_regs.pc ; -#endif -#elif defined(__i386__) - ucontext_t* uc = (ucontext_t*) ucontext; - caller_address = (void*) uc->uc_mcontext.gregs[REG_EIP] ; -#else - /* Unsupported return */ - return; -#endif - - fprintf(stderr, "InnoDB: signal %d (%s), address is %p from %p\n", - sig_num, strsignal(sig_num), info->si_addr, - (void *)caller_address); - - size = backtrace(array, OS_STACKTRACE_MAX_DEPTH); - - /* overwrite sigaction with caller's address */ - array[1] = caller_address; - - messages = backtrace_symbols(array, size); - - fprintf(stderr, - "InnoDB: Stacktrace for Thread %lu \n", - (ulong) os_thread_pf(os_thread_get_curr_id())); - - /* skip first stack frame (points here) */ - for (i = 1; i < size && messages != NULL; ++i) - { - fprintf(stderr, "InnoDB: [bt]: (%d) %s\n", i, messages[i]); - } - - free(messages); -} - -#endif /* __linux__ */ diff --git a/storage/xtradb/os/os0sync.cc b/storage/xtradb/os/os0sync.cc deleted file mode 100644 index df878c88105..00000000000 --- a/storage/xtradb/os/os0sync.cc +++ /dev/null @@ -1,635 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file os/os0sync.cc -The interface to the operating system -synchronization primitives. - -Created 9/6/1995 Heikki Tuuri -*******************************************************/ - -#include "os0sync.h" -#ifdef UNIV_NONINL -#include "os0sync.ic" -#endif - -#ifdef __WIN__ -#include <windows.h> -#endif - -#include "ut0mem.h" -#include "srv0start.h" -#include "srv0srv.h" - -/* Type definition for an operating system mutex struct */ -struct os_mutex_t{ - os_event_t event; /*!< Used by sync0arr.cc for queing threads */ - void* handle; /*!< OS handle to mutex */ - ulint count; /*!< we use this counter to check - that the same thread does not - recursively lock the mutex: we - do not assume that the OS mutex - supports recursive locking, though - NT seems to do that */ -}; - -// All the os_*_count variables are accessed atomically - -/** This is incremented by 1 in os_thread_create and decremented by 1 in -os_thread_exit. */ -UNIV_INTERN ulint os_thread_count = 0; - -UNIV_INTERN ulint os_event_count = 0; -UNIV_INTERN ulint os_mutex_count = 0; -UNIV_INTERN ulint os_fast_mutex_count = 0; - -/* The number of microsecnds in a second. */ -static const ulint MICROSECS_IN_A_SECOND = 1000000; - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t event_os_mutex_key; -UNIV_INTERN mysql_pfs_key_t os_mutex_key; -#endif - -/*********************************************************//** -Initialitze condition variable */ -UNIV_INLINE -void -os_cond_init( -/*=========*/ - os_cond_t* cond) /*!< in: condition variable. */ -{ - ut_a(cond); - -#ifdef __WIN__ - InitializeConditionVariable(cond); -#else - ut_a(pthread_cond_init(cond, NULL) == 0); -#endif -} - -/*********************************************************//** -Do a timed wait on condition variable. -@return TRUE if timed out, FALSE otherwise */ -UNIV_INLINE -ibool -os_cond_wait_timed( -/*===============*/ - os_cond_t* cond, /*!< in: condition variable. */ - os_fast_mutex_t* fast_mutex, /*!< in: fast mutex */ -#ifndef __WIN__ - const struct timespec* abstime /*!< in: timeout */ -#else - DWORD time_in_ms /*!< in: timeout in - milliseconds*/ -#endif /* !__WIN__ */ -) -{ - fast_mutex_t* mutex = &fast_mutex->mutex; -#ifdef __WIN__ - BOOL ret; - DWORD err; - - - ret = SleepConditionVariableCS(cond, mutex, time_in_ms); - - if (!ret) { - err = GetLastError(); - /* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx, - "Condition variables are subject to spurious wakeups - (those not associated with an explicit wake) and stolen wakeups - (another thread manages to run before the woken thread)." - Check for both types of timeouts. - Conditions are checked by the caller.*/ - if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) { - return(TRUE); - } - } - - ut_a(ret); - - return(FALSE); -#else - int ret; - - ret = pthread_cond_timedwait(cond, mutex, abstime); - - switch (ret) { - case 0: - case ETIMEDOUT: - /* We play it safe by checking for EINTR even though - according to the POSIX documentation it can't return EINTR. */ - case EINTR: - break; - - default: - fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: " - "%d: abstime={%lu,%lu}\n", - ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec); - ut_error; - } - - return(ret == ETIMEDOUT); -#endif -} -/*********************************************************//** -Wait on condition variable */ -UNIV_INLINE -void -os_cond_wait( -/*=========*/ - os_cond_t* cond, /*!< in: condition variable. */ - os_fast_mutex_t* fast_mutex)/*!< in: fast mutex */ -{ - fast_mutex_t* mutex = &fast_mutex->mutex; - ut_a(cond); - ut_a(mutex); - -#ifdef __WIN__ - ut_a(SleepConditionVariableCS(cond, mutex, INFINITE)); -#else - ut_a(pthread_cond_wait(cond, mutex) == 0); -#endif -} - -/*********************************************************//** -Wakes all threads waiting for condition variable */ -UNIV_INLINE -void -os_cond_broadcast( -/*==============*/ - os_cond_t* cond) /*!< in: condition variable. */ -{ - ut_a(cond); - -#ifdef __WIN__ - WakeAllConditionVariable(cond); -#else - ut_a(pthread_cond_broadcast(cond) == 0); -#endif -} - -/*********************************************************//** -Destroys condition variable */ -UNIV_INLINE -void -os_cond_destroy( -/*============*/ - os_cond_t* cond) /*!< in: condition variable. */ -{ -#ifdef __WIN__ - /* Do nothing */ -#else - ut_a(pthread_cond_destroy(cond) == 0); -#endif -} - -/*********************************************************//** -Initializes global event and OS 'slow' mutex lists. */ -UNIV_INTERN -void -os_sync_init(void) -/*==============*/ -{ -} - -/** Create an event semaphore, i.e., a semaphore which may just have two -states: signaled and nonsignaled. The created event is manual reset: it must be -reset explicitly by calling sync_os_reset_event. -@param[in,out] event memory block where to create the event */ -UNIV_INTERN -void -os_event_create(os_event_t event) -{ -#ifndef PFS_SKIP_EVENT_MUTEX - os_fast_mutex_init(event_os_mutex_key, &event->os_mutex); -#else - os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &event->os_mutex); -#endif - - os_cond_init(&(event->cond_var)); - - event->init_count_and_set(); - - os_atomic_increment_ulint(&os_event_count, 1); -} - -/*********************************************************//** -Creates an event semaphore, i.e., a semaphore which may just have two -states: signaled and nonsignaled. The created event is manual reset: it -must be reset explicitly by calling sync_os_reset_event. -@return the event handle */ -UNIV_INTERN -os_event_t -os_event_create(void) -/*==================*/ -{ - os_event_t event = static_cast<os_event_t>(ut_malloc(sizeof(*event))); - - os_event_create(event); - - return(event); -} - -/**********************************************************//** -Sets an event semaphore to the signaled state: lets waiting threads -proceed. */ -UNIV_INTERN -void -os_event_set( -/*=========*/ - os_event_t event) /*!< in: event to set */ -{ - ut_a(event); - - os_fast_mutex_lock(&(event->os_mutex)); - - if (UNIV_UNLIKELY(event->is_set())) { - /* Do nothing */ - } else { - event->set(); - event->inc_signal_count(); - os_cond_broadcast(&(event->cond_var)); - } - - os_fast_mutex_unlock(&(event->os_mutex)); -} - -/**********************************************************//** -Resets an event semaphore to the nonsignaled state. Waiting threads will -stop to wait for the event. -The return value should be passed to os_even_wait_low() if it is desired -that this thread should not wait in case of an intervening call to -os_event_set() between this os_event_reset() and the -os_event_wait_low() call. See comments for os_event_wait_low(). -@return current signal_count. */ -UNIV_INTERN -ib_int64_t -os_event_reset( -/*===========*/ - os_event_t event) /*!< in: event to reset */ -{ - ib_int64_t ret = 0; - - ut_a(event); - - os_fast_mutex_lock(&(event->os_mutex)); - - if (UNIV_UNLIKELY(!event->is_set())) { - /* Do nothing */ - } else { - event->reset(); - } - ret = event->signal_count(); - - os_fast_mutex_unlock(&(event->os_mutex)); - return(ret); -} - -/**********************************************************//** -Frees an event object. */ -UNIV_INTERN -void -os_event_free( -/*==========*/ - os_event_t event, /*!< in: event to free */ - bool free_memory)/*!< in: if true, deallocate the event - memory block too */ - -{ - ut_a(event); - - os_fast_mutex_free(&(event->os_mutex)); - - os_cond_destroy(&(event->cond_var)); - - os_atomic_decrement_ulint(&os_event_count, 1); - - if (free_memory) - ut_free(event); -} - -/**********************************************************//** -Waits for an event object until it is in the signaled state. - -Typically, if the event has been signalled after the os_event_reset() -we'll return immediately because event->is_set == TRUE. -There are, however, situations (e.g.: sync_array code) where we may -lose this information. For example: - -thread A calls os_event_reset() -thread B calls os_event_set() [event->is_set == TRUE] -thread C calls os_event_reset() [event->is_set == FALSE] -thread A calls os_event_wait() [infinite wait!] -thread C calls os_event_wait() [infinite wait!] - -Where such a scenario is possible, to avoid infinite wait, the -value returned by os_event_reset() should be passed in as -reset_sig_count. */ -UNIV_INTERN -void -os_event_wait_low( -/*==============*/ - os_event_t event, /*!< in: event to wait */ - ib_int64_t reset_sig_count)/*!< in: zero or the value - returned by previous call of - os_event_reset(). */ -{ - - os_fast_mutex_lock(&event->os_mutex); - - if (!reset_sig_count) { - reset_sig_count = event->signal_count(); - } - - while (!event->is_set() && event->signal_count() == reset_sig_count) { - os_cond_wait(&(event->cond_var), &(event->os_mutex)); - - /* Solaris manual said that spurious wakeups may occur: we - have to check if the event really has been signaled after - we came here to wait */ - } - - os_fast_mutex_unlock(&event->os_mutex); -} - -/**********************************************************//** -Waits for an event object until it is in the signaled state or -a timeout is exceeded. -@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ -UNIV_INTERN -ulint -os_event_wait_time_low( -/*===================*/ - os_event_t event, /*!< in: event to wait */ - ulint time_in_usec, /*!< in: timeout in - microseconds, or - OS_SYNC_INFINITE_TIME */ - ib_int64_t reset_sig_count) /*!< in: zero or the value - returned by previous call of - os_event_reset(). */ -{ - ibool timed_out = FALSE; - -#ifdef __WIN__ - DWORD time_in_ms; - if (time_in_usec != OS_SYNC_INFINITE_TIME) { - time_in_ms = static_cast<DWORD>(time_in_usec / 1000); - } else { - time_in_ms = INFINITE; - } -#else - struct timespec abstime; - - if (time_in_usec != OS_SYNC_INFINITE_TIME) { - struct timeval tv; - int ret; - ulint sec; - ulint usec; - - ret = ut_usectime(&sec, &usec); - ut_a(ret == 0); - - tv.tv_sec = sec; - tv.tv_usec = usec; - - tv.tv_usec += time_in_usec; - - if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) { - tv.tv_sec += tv.tv_usec / MICROSECS_IN_A_SECOND; - tv.tv_usec %= MICROSECS_IN_A_SECOND; - } - - abstime.tv_sec = tv.tv_sec; - abstime.tv_nsec = tv.tv_usec * 1000; - } else { - abstime.tv_nsec = 999999999; - abstime.tv_sec = (time_t) ULINT_MAX; - } - - ut_a(abstime.tv_nsec <= 999999999); - -#endif /* __WIN__ */ - - os_fast_mutex_lock(&event->os_mutex); - - if (!reset_sig_count) { - reset_sig_count = event->signal_count(); - } - - do { - if (event->is_set() - || event->signal_count() != reset_sig_count) { - - break; - } - - timed_out = os_cond_wait_timed( - &event->cond_var, &event->os_mutex, -#ifndef __WIN__ - &abstime -#else - time_in_ms -#endif /* !__WIN__ */ - ); - - } while (!timed_out); - - os_fast_mutex_unlock(&event->os_mutex); - - return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0); -} - -/*********************************************************//** -Creates an operating system mutex semaphore. Because these are slow, the -mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible. -@return the mutex handle */ -UNIV_INTERN -os_ib_mutex_t -os_mutex_create(void) -/*=================*/ -{ - os_fast_mutex_t* mutex; - os_ib_mutex_t mutex_str; - - mutex = static_cast<os_fast_mutex_t*>( - ut_malloc(sizeof(os_fast_mutex_t))); - - os_fast_mutex_init(os_mutex_key, mutex); - - mutex_str = static_cast<os_ib_mutex_t>(ut_malloc(sizeof *mutex_str)); - - mutex_str->handle = mutex; - mutex_str->count = 0; - mutex_str->event = os_event_create(); - - os_atomic_increment_ulint(&os_mutex_count, 1); - - return(mutex_str); -} - -/**********************************************************//** -Acquires ownership of a mutex semaphore. */ -UNIV_INTERN -void -os_mutex_enter( -/*===========*/ - os_ib_mutex_t mutex) /*!< in: mutex to acquire */ -{ - os_fast_mutex_lock(static_cast<os_fast_mutex_t*>(mutex->handle)); - - (mutex->count)++; - - ut_a(mutex->count == 1); -} - -/**********************************************************//** -Releases ownership of a mutex. */ -UNIV_INTERN -void -os_mutex_exit( -/*==========*/ - os_ib_mutex_t mutex) /*!< in: mutex to release */ -{ - ut_a(mutex); - - ut_a(mutex->count == 1); - - (mutex->count)--; - os_fast_mutex_unlock(static_cast<os_fast_mutex_t*>(mutex->handle)); -} - -/**********************************************************//** -Frees a mutex object. */ -UNIV_INTERN -void -os_mutex_free( -/*==========*/ - os_ib_mutex_t mutex) /*!< in: mutex to free */ -{ - ut_a(mutex); - - os_event_free(mutex->event); - - os_atomic_decrement_ulint(&os_mutex_count, 1); - - os_fast_mutex_free(static_cast<os_fast_mutex_t*>(mutex->handle)); - ut_free(mutex->handle); - ut_free(mutex); -} - -/*********************************************************//** -Initializes an operating system fast mutex semaphore. */ -UNIV_INTERN -void -os_fast_mutex_init_func( -/*====================*/ - fast_mutex_t* fast_mutex) /*!< in: fast mutex */ -{ -#ifdef __WIN__ - ut_a(fast_mutex); - - InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex); -#else - ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST)); -#endif - os_atomic_increment_ulint(&os_fast_mutex_count, 1); -} - -/**********************************************************//** -Acquires ownership of a fast mutex. */ -UNIV_INTERN -void -os_fast_mutex_lock_func( -/*====================*/ - fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ -{ -#ifdef __WIN__ - EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex); -#else - pthread_mutex_lock(fast_mutex); -#endif -} - -/**********************************************************//** -Releases ownership of a fast mutex. */ -UNIV_INTERN -void -os_fast_mutex_unlock_func( -/*======================*/ - fast_mutex_t* fast_mutex) /*!< in: mutex to release */ -{ -#ifdef __WIN__ - LeaveCriticalSection(fast_mutex); -#else - pthread_mutex_unlock(fast_mutex); -#endif -} - -/**********************************************************//** -Releases ownership of a fast mutex. Implies a full memory barrier even on -platforms such as PowerPC where this is not normally required. */ -UNIV_INTERN -void -os_fast_mutex_unlock_full_barrier( -/*=================*/ - os_fast_mutex_t* fast_mutex) /*!< in: mutex to release */ -{ -#ifdef __WIN__ - LeaveCriticalSection(&fast_mutex->mutex); -#else - pthread_mutex_unlock(&fast_mutex->mutex); -#ifdef __powerpc__ - os_mb; -#endif -#endif -} - -/**********************************************************//** -Frees a mutex object. */ -UNIV_INTERN -void -os_fast_mutex_free_func( -/*====================*/ - fast_mutex_t* fast_mutex) /*!< in: mutex to free */ -{ -#ifdef __WIN__ - ut_a(fast_mutex); - - DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex); -#else - int ret; - - ret = pthread_mutex_destroy(fast_mutex); - - if (UNIV_UNLIKELY(ret != 0)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: error: return value %lu when calling\n" - "InnoDB: pthread_mutex_destroy().\n", (ulint) ret); - fprintf(stderr, - "InnoDB: Byte contents of the pthread mutex at %p:\n", - (void*) fast_mutex); - ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t)); - putc('\n', stderr); - } -#endif - - os_atomic_decrement_ulint(&os_fast_mutex_count, 1); -} diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc deleted file mode 100644 index 8baf06b9bb7..00000000000 --- a/storage/xtradb/os/os0thread.cc +++ /dev/null @@ -1,355 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file os/os0thread.cc -The interface to the operating system thread control primitives - -Created 9/8/1995 Heikki Tuuri -*******************************************************/ - -#include "os0thread.h" -#ifdef UNIV_NONINL -#include "os0thread.ic" -#endif - -#ifdef __WIN__ -#include <windows.h> -#elif UNIV_LINUX -#include <sys/time.h> -#include <sys/resource.h> -#include <unistd.h> -#include <sys/syscall.h> -#include <sys/types.h> -#endif - -#ifndef UNIV_HOTBACKUP -#include "srv0srv.h" -#include "os0sync.h" - -/***************************************************************//** -Compares two thread ids for equality. -@return TRUE if equal */ -UNIV_INTERN -ibool -os_thread_eq( -/*=========*/ - os_thread_id_t a, /*!< in: OS thread or thread id */ - os_thread_id_t b) /*!< in: OS thread or thread id */ -{ -#ifdef __WIN__ - if (a == b) { - return(TRUE); - } - - return(FALSE); -#else - if (pthread_equal(a, b)) { - return(TRUE); - } - - return(FALSE); -#endif -} - -/****************************************************************//** -Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is -unique for the thread though! -@return thread identifier as a number */ -UNIV_INTERN -ulint -os_thread_pf( -/*=========*/ - os_thread_id_t a) /*!< in: OS thread identifier */ -{ -#ifdef UNIV_HPUX10 - /* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2, - field3. We do not know if field1 determines the thread uniquely. */ - - return((ulint)(a.field1)); -#else - return((ulint) a); -#endif -} - -/*****************************************************************//** -Returns the thread identifier of current thread. Currently the thread -identifier in Unix is the thread handle itself. Note that in HP-UX -pthread_t is a struct of 3 fields. -@return current thread identifier */ -UNIV_INTERN -os_thread_id_t -os_thread_get_curr_id(void) -/*=======================*/ -{ -#ifdef __WIN__ - return(GetCurrentThreadId()); -#else - return(pthread_self()); -#endif -} - -/*****************************************************************//** -Returns the system-specific thread identifier of current thread. On Linux, -returns tid. On other systems currently returns os_thread_get_curr_id(). - -@return current thread identifier */ -UNIV_INTERN -os_tid_t -os_thread_get_tid(void) -/*===================*/ -{ -#ifdef UNIV_LINUX - return((os_tid_t)syscall(SYS_gettid)); -#else - return(os_thread_get_curr_id()); -#endif -} - - -/****************************************************************//** -Creates a new thread of execution. The execution starts from -the function given. The start function takes a void* parameter -and returns an ulint. -@return handle to the thread */ -UNIV_INTERN -os_thread_t -os_thread_create_func( -/*==================*/ - os_thread_func_t func, /*!< in: pointer to function - from which to start */ - void* arg, /*!< in: argument to start - function */ - os_thread_id_t* thread_id) /*!< out: id of the created - thread, or NULL */ -{ - /* the new thread should look recent changes up here so far. */ - os_wmb; - -#ifdef __WIN__ - os_thread_t thread; - DWORD win_thread_id; - - os_atomic_increment_ulint(&os_thread_count, 1); - - thread = CreateThread(NULL, /* no security attributes */ - 0, /* default size stack */ - func, - arg, - 0, /* thread runs immediately */ - &win_thread_id); - - if (thread_id) { - *thread_id = win_thread_id; - } - - return((os_thread_t)thread); -#else - int ret; - os_thread_t pthread; - pthread_attr_t attr; - -#ifndef UNIV_HPUX10 - pthread_attr_init(&attr); -#endif - -#ifdef UNIV_AIX - /* We must make sure a thread stack is at least 32 kB, otherwise - InnoDB might crash; we do not know if the default stack size on - AIX is always big enough. An empirical test on AIX-4.3 suggested - the size was 96 kB, though. */ - - ret = pthread_attr_setstacksize(&attr, - (size_t)(PTHREAD_STACK_MIN - + 32 * 1024)); - if (ret) { - fprintf(stderr, - "InnoDB: Error: pthread_attr_setstacksize" - " returned %d\n", ret); - exit(1); - } -#endif - ulint new_count = os_atomic_increment_ulint(&os_thread_count, 1); - ut_a(new_count <= OS_THREAD_MAX_N); - -#ifdef UNIV_HPUX10 - ret = pthread_create(&pthread, pthread_attr_default, func, arg); -#else - ret = pthread_create(&pthread, &attr, func, arg); -#endif - ut_a(ret == 0); - -#ifndef UNIV_HPUX10 - pthread_attr_destroy(&attr); -#endif - - if (thread_id) { - *thread_id = pthread; - } - - return(pthread); -#endif -} - -/** Waits until the specified thread completes and joins it. -Its return value is ignored. -@param[in,out] thread thread to join */ -UNIV_INTERN -void -os_thread_join( - os_thread_t thread) -{ - /* This function is currently only used to workaround glibc bug - described in http://bugs.mysql.com/bug.php?id=82886 - - On Windows, no workarounds are necessary, all threads - are "detached" upon thread exit (handle is closed), so we do - nothing. - */ -#ifdef __WIN__ - /* Do nothing. */ -#else -#ifdef UNIV_DEBUG - const int ret MY_ATTRIBUTE((unused)) = -#endif /* UNIV_DEBUG */ - pthread_join(thread, NULL); - - /* Waiting on already-quit threads is allowed. */ - ut_ad(ret == 0 || ret == ESRCH); -#endif /* __WIN__ */ -} - -/*****************************************************************//** -Exits the current thread. */ -UNIV_INTERN -void -os_thread_exit( -/*===========*/ - void* exit_value, /*!< in: exit value; in Windows this void* - is cast as a DWORD */ - bool detach) /*!< in: if true, the thread will be detached - right before exiting. If false, another thread - is responsible for joining this thread. */ -{ -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Thread exits, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif - -#ifdef UNIV_PFS_THREAD - pfs_delete_thread(); -#endif - - os_atomic_decrement_ulint(&os_thread_count, 1); - -#ifdef __WIN__ - ExitThread((DWORD) exit_value); -#else - if (detach) { - pthread_detach(pthread_self()); - } - pthread_exit(exit_value); -#endif -} - -/*****************************************************************//** -Advises the os to give up remainder of the thread's time slice. */ -UNIV_INTERN -void -os_thread_yield(void) -/*=================*/ -{ -#if defined(__WIN__) - SwitchToThread(); -#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H)) - sched_yield(); -#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG) - pthread_yield(); -#elif defined(HAVE_PTHREAD_YIELD_ONE_ARG) - pthread_yield(0); -#else - os_thread_sleep(0); -#endif -} -#endif /* !UNIV_HOTBACKUP */ - -/*****************************************************************//** -The thread sleeps at least the time given in microseconds. */ -UNIV_INTERN -void -os_thread_sleep( -/*============*/ - ulint tm) /*!< in: time in microseconds */ -{ -#ifdef __WIN__ - Sleep((DWORD) tm / 1000); -#else - struct timeval t; - - t.tv_sec = tm / 1000000; - t.tv_usec = tm % 1000000; - - select(0, NULL, NULL, NULL, &t); -#endif -} - -/*****************************************************************//** -Set relative scheduling priority for a given thread on Linux. Currently a -no-op on other systems. - -@return An actual thread priority after the update */ -UNIV_INTERN -ulint -os_thread_set_priority( -/*===================*/ - os_tid_t thread_id, /*!< in: thread id */ - ulint relative_priority) /*!< in: system-specific - priority value */ -{ -#ifdef UNIV_LINUX - lint thread_nice = 19 - relative_priority; - if (setpriority(PRIO_PROCESS, thread_id, thread_nice) == -1) { - ib_logf(IB_LOG_LEVEL_WARN, - "Setting thread %lu nice to %ld failed, " - "current nice %d, errno %d", - os_thread_pf(thread_id), thread_nice, - getpriority(PRIO_PROCESS, thread_id), errno); - } - return(19 - getpriority(PRIO_PROCESS, thread_id)); -#else - return(relative_priority); -#endif -} - -/*****************************************************************//** -Get priority for a given thread on Linux. Currently a -no-op on other systems. - -@return An actual thread priority */ -UNIV_INTERN -ulint -os_thread_get_priority( -/*===================*/ - os_tid_t thread_id) /*!< in: thread id */ -{ -#ifdef UNIV_LINUX - return (getpriority(PRIO_PROCESS, thread_id)); -#else - return (0); -#endif -} diff --git a/storage/xtradb/page/page0cur.cc b/storage/xtradb/page/page0cur.cc deleted file mode 100644 index 76e4c2aed9b..00000000000 --- a/storage/xtradb/page/page0cur.cc +++ /dev/null @@ -1,2180 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file page/page0cur.cc -The page cursor - -Created 10/4/1994 Heikki Tuuri -*************************************************************************/ - -#include "page0cur.h" -#ifdef UNIV_NONINL -#include "page0cur.ic" -#endif - -#include "page0zip.h" -#include "btr0btr.h" -#include "mtr0log.h" -#include "log0recv.h" -#include "ut0ut.h" -#ifndef UNIV_HOTBACKUP -#include "rem0cmp.h" - -#ifdef PAGE_CUR_ADAPT -# ifdef UNIV_SEARCH_PERF_STAT -static ulint page_cur_short_succ = 0; -# endif /* UNIV_SEARCH_PERF_STAT */ - -/*******************************************************************//** -This is a linear congruential generator PRNG. Returns a pseudo random -number between 0 and 2^64-1 inclusive. The formula and the constants -being used are: -X[n+1] = (a * X[n] + c) mod m -where: -X[0] = ut_time_us(NULL) -a = 1103515245 (3^5 * 5 * 7 * 129749) -c = 12345 (3 * 5 * 823) -m = 18446744073709551616 (2^64) - -@return number between 0 and 2^64-1 */ -static -ib_uint64_t -page_cur_lcg_prng(void) -/*===================*/ -{ -#define LCG_a 1103515245 -#define LCG_c 12345 - static ib_uint64_t lcg_current = 0; - static ibool initialized = FALSE; - - if (!initialized) { - lcg_current = (ib_uint64_t) ut_time_us(NULL); - initialized = TRUE; - } - - /* no need to "% 2^64" explicitly because lcg_current is - 64 bit and this will be done anyway */ - lcg_current = LCG_a * lcg_current + LCG_c; - - return(lcg_current); -} - -/****************************************************************//** -Tries a search shortcut based on the last insert. -@return TRUE on success */ -UNIV_INLINE -ibool -page_cur_try_search_shortcut( -/*=========================*/ - const buf_block_t* block, /*!< in: index page */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* tuple, /*!< in: data tuple */ - ulint* iup_matched_fields, - /*!< in/out: already matched - fields in upper limit record */ - ulint* iup_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ - ulint* ilow_matched_fields, - /*!< in/out: already matched - fields in lower limit record */ - ulint* ilow_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ - page_cur_t* cursor) /*!< out: page cursor */ -{ - const rec_t* rec; - const rec_t* next_rec; - ulint low_match; - ulint low_bytes; - ulint up_match; - ulint up_bytes; -#ifdef UNIV_SEARCH_DEBUG - page_cur_t cursor2; -#endif - ibool success = FALSE; - const page_t* page = buf_block_get_frame(block); - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(dtuple_check_typed(tuple)); - - rec = page_header_get_ptr(page, PAGE_LAST_INSERT); - offsets = rec_get_offsets(rec, index, offsets, - dtuple_get_n_fields(tuple), &heap); - - ut_ad(rec); - ut_ad(page_rec_is_user_rec(rec)); - - ut_pair_min(&low_match, &low_bytes, - *ilow_matched_fields, *ilow_matched_bytes, - *iup_matched_fields, *iup_matched_bytes); - - up_match = low_match; - up_bytes = low_bytes; - - if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets, - &low_match, &low_bytes) < 0) { - goto exit_func; - } - - next_rec = page_rec_get_next_const(rec); - offsets = rec_get_offsets(next_rec, index, offsets, - dtuple_get_n_fields(tuple), &heap); - - if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, - &up_match, &up_bytes) >= 0) { - goto exit_func; - } - - page_cur_position(rec, block, cursor); - -#ifdef UNIV_SEARCH_DEBUG - page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG, - iup_matched_fields, - iup_matched_bytes, - ilow_matched_fields, - ilow_matched_bytes, - &cursor2); - ut_a(cursor2.rec == cursor->rec); - - if (!page_rec_is_supremum(next_rec)) { - - ut_a(*iup_matched_fields == up_match); - ut_a(*iup_matched_bytes == up_bytes); - } - - ut_a(*ilow_matched_fields == low_match); - ut_a(*ilow_matched_bytes == low_bytes); -#endif - if (!page_rec_is_supremum(next_rec)) { - - *iup_matched_fields = up_match; - *iup_matched_bytes = up_bytes; - } - - *ilow_matched_fields = low_match; - *ilow_matched_bytes = low_bytes; - -#ifdef UNIV_SEARCH_PERF_STAT - page_cur_short_succ++; -#endif - success = TRUE; -exit_func: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(success); -} - -#endif - -#ifdef PAGE_CUR_LE_OR_EXTENDS -/****************************************************************//** -Checks if the nth field in a record is a character type field which extends -the nth field in tuple, i.e., the field is longer or equal in length and has -common first characters. -@return TRUE if rec field extends tuple field */ -static -ibool -page_cur_rec_field_extends( -/*=======================*/ - const dtuple_t* tuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: record */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n) /*!< in: compare nth field */ -{ - const dtype_t* type; - const dfield_t* dfield; - const byte* rec_f; - ulint rec_f_len; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - dfield = dtuple_get_nth_field(tuple, n); - - type = dfield_get_type(dfield); - - rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len); - - if (type->mtype == DATA_VARCHAR - || type->mtype == DATA_CHAR - || type->mtype == DATA_FIXBINARY - || type->mtype == DATA_BINARY - || type->mtype == DATA_BLOB - || type->mtype == DATA_VARMYSQL - || type->mtype == DATA_MYSQL) { - - if (dfield_get_len(dfield) != UNIV_SQL_NULL - && rec_f_len != UNIV_SQL_NULL - && rec_f_len >= dfield_get_len(dfield) - && !cmp_data_data_slow(type->mtype, type->prtype, - dfield_get_data(dfield), - dfield_get_len(dfield), - rec_f, dfield_get_len(dfield))) { - - return(TRUE); - } - } - - return(FALSE); -} -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - -/****************************************************************//** -Searches the right position for a page cursor. */ -UNIV_INTERN -void -page_cur_search_with_match( -/*=======================*/ - const buf_block_t* block, /*!< in: buffer block */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* tuple, /*!< in: data tuple */ - ulint mode, /*!< in: PAGE_CUR_L, - PAGE_CUR_LE, PAGE_CUR_G, or - PAGE_CUR_GE */ - ulint* iup_matched_fields, - /*!< in/out: already matched - fields in upper limit record */ - ulint* iup_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ - ulint* ilow_matched_fields, - /*!< in/out: already matched - fields in lower limit record */ - ulint* ilow_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ - page_cur_t* cursor) /*!< out: page cursor */ -{ - ulint up; - ulint low; - ulint mid; - const page_t* page; - const page_dir_slot_t* slot; - const rec_t* up_rec; - const rec_t* low_rec; - const rec_t* mid_rec; - ulint up_matched_fields; - ulint up_matched_bytes; - ulint low_matched_fields; - ulint low_matched_bytes; - ulint cur_matched_fields; - ulint cur_matched_bytes; - int cmp; -#ifdef UNIV_SEARCH_DEBUG - int dbg_cmp; - ulint dbg_matched_fields; - ulint dbg_matched_bytes; -#endif -#ifdef UNIV_ZIP_DEBUG - const page_zip_des_t* page_zip = buf_block_get_page_zip(block); -#endif /* UNIV_ZIP_DEBUG */ - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes - && ilow_matched_fields && ilow_matched_bytes && cursor); - ut_ad(dtuple_validate(tuple)); -#ifdef UNIV_DEBUG -# ifdef PAGE_CUR_DBG - if (mode != PAGE_CUR_DBG) -# endif /* PAGE_CUR_DBG */ -# ifdef PAGE_CUR_LE_OR_EXTENDS - if (mode != PAGE_CUR_LE_OR_EXTENDS) -# endif /* PAGE_CUR_LE_OR_EXTENDS */ - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || mode == PAGE_CUR_G || mode == PAGE_CUR_GE); -#endif /* UNIV_DEBUG */ - page = buf_block_get_frame(block); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - page_check_dir(page); - -#ifdef PAGE_CUR_ADAPT - if (page_is_leaf(page) - && (mode == PAGE_CUR_LE) - && (page_header_get_field(page, PAGE_N_DIRECTION) > 3) - && (page_header_get_ptr(page, PAGE_LAST_INSERT)) - && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) { - - if (page_cur_try_search_shortcut( - block, index, tuple, - iup_matched_fields, iup_matched_bytes, - ilow_matched_fields, ilow_matched_bytes, - cursor)) { - return; - } - } -# ifdef PAGE_CUR_DBG - if (mode == PAGE_CUR_DBG) { - mode = PAGE_CUR_LE; - } -# endif -#endif - - /* The following flag does not work for non-latin1 char sets because - cmp_full_field does not tell how many bytes matched */ -#ifdef PAGE_CUR_LE_OR_EXTENDS - ut_a(mode != PAGE_CUR_LE_OR_EXTENDS); -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - - /* If mode PAGE_CUR_G is specified, we are trying to position the - cursor to answer a query of the form "tuple < X", where tuple is - the input parameter, and X denotes an arbitrary physical record on - the page. We want to position the cursor on the first X which - satisfies the condition. */ - - up_matched_fields = *iup_matched_fields; - up_matched_bytes = *iup_matched_bytes; - low_matched_fields = *ilow_matched_fields; - low_matched_bytes = *ilow_matched_bytes; - - /* Perform binary search. First the search is done through the page - directory, after that as a linear search in the list of records - owned by the upper limit directory slot. */ - - low = 0; - up = page_dir_get_n_slots(page) - 1; - - /* Perform binary search until the lower and upper limit directory - slots come to the distance 1 of each other */ - - while (up - low > 1) { - mid = (low + up) / 2; - slot = page_dir_get_nth_slot(page, mid); - mid_rec = page_dir_slot_get_rec(slot); - - ut_pair_min(&cur_matched_fields, &cur_matched_bytes, - low_matched_fields, low_matched_bytes, - up_matched_fields, up_matched_bytes); - - offsets = rec_get_offsets(mid_rec, index, offsets, - dtuple_get_n_fields_cmp(tuple), - &heap); - - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, - &cur_matched_fields, - &cur_matched_bytes); - if (UNIV_LIKELY(cmp > 0)) { -low_slot_match: - low = mid; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - - } else if (UNIV_EXPECT(cmp, -1)) { -#ifdef PAGE_CUR_LE_OR_EXTENDS - if (mode == PAGE_CUR_LE_OR_EXTENDS - && page_cur_rec_field_extends( - tuple, mid_rec, offsets, - cur_matched_fields)) { - - goto low_slot_match; - } -#endif /* PAGE_CUR_LE_OR_EXTENDS */ -up_slot_match: - up = mid; - up_matched_fields = cur_matched_fields; - up_matched_bytes = cur_matched_bytes; - - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE -#ifdef PAGE_CUR_LE_OR_EXTENDS - || mode == PAGE_CUR_LE_OR_EXTENDS -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - ) { - - goto low_slot_match; - } else { - - goto up_slot_match; - } - } - - slot = page_dir_get_nth_slot(page, low); - low_rec = page_dir_slot_get_rec(slot); - slot = page_dir_get_nth_slot(page, up); - up_rec = page_dir_slot_get_rec(slot); - - /* Perform linear search until the upper and lower records come to - distance 1 of each other. */ - - while (page_rec_get_next_const(low_rec) != up_rec) { - - mid_rec = page_rec_get_next_const(low_rec); - - ut_pair_min(&cur_matched_fields, &cur_matched_bytes, - low_matched_fields, low_matched_bytes, - up_matched_fields, up_matched_bytes); - - offsets = rec_get_offsets(mid_rec, index, offsets, - dtuple_get_n_fields_cmp(tuple), - &heap); - - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, - &cur_matched_fields, - &cur_matched_bytes); - if (UNIV_LIKELY(cmp > 0)) { -low_rec_match: - low_rec = mid_rec; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - - } else if (UNIV_EXPECT(cmp, -1)) { -#ifdef PAGE_CUR_LE_OR_EXTENDS - if (mode == PAGE_CUR_LE_OR_EXTENDS - && page_cur_rec_field_extends( - tuple, mid_rec, offsets, - cur_matched_fields)) { - - goto low_rec_match; - } -#endif /* PAGE_CUR_LE_OR_EXTENDS */ -up_rec_match: - up_rec = mid_rec; - up_matched_fields = cur_matched_fields; - up_matched_bytes = cur_matched_bytes; - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE -#ifdef PAGE_CUR_LE_OR_EXTENDS - || mode == PAGE_CUR_LE_OR_EXTENDS -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - ) { - - goto low_rec_match; - } else { - - goto up_rec_match; - } - } - -#ifdef UNIV_SEARCH_DEBUG - - /* Check that the lower and upper limit records have the - right alphabetical order compared to tuple. */ - dbg_matched_fields = 0; - dbg_matched_bytes = 0; - - offsets = rec_get_offsets(low_rec, index, offsets, - ULINT_UNDEFINED, &heap); - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets, - &dbg_matched_fields, - &dbg_matched_bytes); - if (mode == PAGE_CUR_G) { - ut_a(dbg_cmp >= 0); - } else if (mode == PAGE_CUR_GE) { - ut_a(dbg_cmp == 1); - } else if (mode == PAGE_CUR_L) { - ut_a(dbg_cmp == 1); - } else if (mode == PAGE_CUR_LE) { - ut_a(dbg_cmp >= 0); - } - - if (!page_rec_is_infimum(low_rec)) { - - ut_a(low_matched_fields == dbg_matched_fields); - ut_a(low_matched_bytes == dbg_matched_bytes); - } - - dbg_matched_fields = 0; - dbg_matched_bytes = 0; - - offsets = rec_get_offsets(up_rec, index, offsets, - ULINT_UNDEFINED, &heap); - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets, - &dbg_matched_fields, - &dbg_matched_bytes); - if (mode == PAGE_CUR_G) { - ut_a(dbg_cmp == -1); - } else if (mode == PAGE_CUR_GE) { - ut_a(dbg_cmp <= 0); - } else if (mode == PAGE_CUR_L) { - ut_a(dbg_cmp <= 0); - } else if (mode == PAGE_CUR_LE) { - ut_a(dbg_cmp == -1); - } - - if (!page_rec_is_supremum(up_rec)) { - - ut_a(up_matched_fields == dbg_matched_fields); - ut_a(up_matched_bytes == dbg_matched_bytes); - } -#endif - if (mode <= PAGE_CUR_GE) { - page_cur_position(up_rec, block, cursor); - } else { - page_cur_position(low_rec, block, cursor); - } - - *iup_matched_fields = up_matched_fields; - *iup_matched_bytes = up_matched_bytes; - *ilow_matched_fields = low_matched_fields; - *ilow_matched_bytes = low_matched_bytes; - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/***********************************************************//** -Positions a page cursor on a randomly chosen user record on a page. If there -are no user records, sets the cursor on the infimum record. */ -UNIV_INTERN -void -page_cur_open_on_rnd_user_rec( -/*==========================*/ - buf_block_t* block, /*!< in: page */ - page_cur_t* cursor) /*!< out: page cursor */ -{ - ulint rnd; - ulint n_recs = page_get_n_recs(buf_block_get_frame(block)); - - page_cur_set_before_first(block, cursor); - - if (UNIV_UNLIKELY(n_recs == 0)) { - - return; - } - - rnd = (ulint) (page_cur_lcg_prng() % n_recs); - - do { - page_cur_move_to_next(cursor); - } while (rnd--); -} - -/***********************************************************//** -Writes the log record of a record insert on a page. */ -static -void -page_cur_insert_rec_write_log( -/*==========================*/ - rec_t* insert_rec, /*!< in: inserted physical record */ - ulint rec_size, /*!< in: insert_rec size */ - rec_t* cursor_rec, /*!< in: record the - cursor is pointing to */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - ulint cur_rec_size; - ulint extra_size; - ulint cur_extra_size; - const byte* ins_ptr; - byte* log_ptr; - const byte* log_end; - ulint i; - - ut_a(rec_size < UNIV_PAGE_SIZE); - ut_ad(page_align(insert_rec) == page_align(cursor_rec)); - ut_ad(!page_rec_is_comp(insert_rec) - == !dict_table_is_comp(index->table)); - - { - mem_heap_t* heap = NULL; - ulint cur_offs_[REC_OFFS_NORMAL_SIZE]; - ulint ins_offs_[REC_OFFS_NORMAL_SIZE]; - - ulint* cur_offs; - ulint* ins_offs; - - rec_offs_init(cur_offs_); - rec_offs_init(ins_offs_); - - cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_, - ULINT_UNDEFINED, &heap); - ins_offs = rec_get_offsets(insert_rec, index, ins_offs_, - ULINT_UNDEFINED, &heap); - - extra_size = rec_offs_extra_size(ins_offs); - cur_extra_size = rec_offs_extra_size(cur_offs); - ut_ad(rec_size == rec_offs_size(ins_offs)); - cur_rec_size = rec_offs_size(cur_offs); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - ins_ptr = insert_rec - extra_size; - - i = 0; - - if (cur_extra_size == extra_size) { - ulint min_rec_size = ut_min(cur_rec_size, rec_size); - - const byte* cur_ptr = cursor_rec - cur_extra_size; - - /* Find out the first byte in insert_rec which differs from - cursor_rec; skip the bytes in the record info */ - - do { - if (*ins_ptr == *cur_ptr) { - i++; - ins_ptr++; - cur_ptr++; - } else if ((i < extra_size) - && (i >= extra_size - - page_rec_get_base_extra_size - (insert_rec))) { - i = extra_size; - ins_ptr = insert_rec; - cur_ptr = cursor_rec; - } else { - break; - } - } while (i < min_rec_size); - } - - if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) { - - if (page_rec_is_comp(insert_rec)) { - log_ptr = mlog_open_and_write_index( - mtr, insert_rec, index, MLOG_COMP_REC_INSERT, - 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN); - if (UNIV_UNLIKELY(!log_ptr)) { - /* Logging in mtr is switched off - during crash recovery: in that case - mlog_open returns NULL */ - return; - } - } else { - log_ptr = mlog_open(mtr, 11 - + 2 + 5 + 1 + 5 + 5 - + MLOG_BUF_MARGIN); - if (UNIV_UNLIKELY(!log_ptr)) { - /* Logging in mtr is switched off - during crash recovery: in that case - mlog_open returns NULL */ - return; - } - - log_ptr = mlog_write_initial_log_record_fast( - insert_rec, MLOG_REC_INSERT, log_ptr, mtr); - } - - log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; - /* Write the cursor rec offset as a 2-byte ulint */ - mach_write_to_2(log_ptr, page_offset(cursor_rec)); - log_ptr += 2; - } else { - log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN); - if (!log_ptr) { - /* Logging in mtr is switched off during crash - recovery: in that case mlog_open returns NULL */ - return; - } - log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; - } - - if (page_rec_is_comp(insert_rec)) { - if (UNIV_UNLIKELY - (rec_get_info_and_status_bits(insert_rec, TRUE) - != rec_get_info_and_status_bits(cursor_rec, TRUE))) { - - goto need_extra_info; - } - } else { - if (UNIV_UNLIKELY - (rec_get_info_and_status_bits(insert_rec, FALSE) - != rec_get_info_and_status_bits(cursor_rec, FALSE))) { - - goto need_extra_info; - } - } - - if (extra_size != cur_extra_size || rec_size != cur_rec_size) { -need_extra_info: - /* Write the record end segment length - and the extra info storage flag */ - log_ptr += mach_write_compressed(log_ptr, - 2 * (rec_size - i) + 1); - - /* Write the info bits */ - mach_write_to_1(log_ptr, - rec_get_info_and_status_bits( - insert_rec, - page_rec_is_comp(insert_rec))); - log_ptr++; - - /* Write the record origin offset */ - log_ptr += mach_write_compressed(log_ptr, extra_size); - - /* Write the mismatch index */ - log_ptr += mach_write_compressed(log_ptr, i); - - ut_a(i < UNIV_PAGE_SIZE); - ut_a(extra_size < UNIV_PAGE_SIZE); - } else { - /* Write the record end segment length - and the extra info storage flag */ - log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i)); - } - - /* Write to the log the inserted index record end segment which - differs from the cursor record */ - - rec_size -= i; - - if (log_ptr + rec_size <= log_end) { - memcpy(log_ptr, ins_ptr, rec_size); - mlog_close(mtr, log_ptr + rec_size); - } else { - mlog_close(mtr, log_ptr); - ut_a(rec_size < UNIV_PAGE_SIZE); - mlog_catenate_string(mtr, ins_ptr, rec_size); - } -} -#else /* !UNIV_HOTBACKUP */ -# define page_cur_insert_rec_write_log(ins_rec,size,cur,index,mtr) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses a log record of a record insert on a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_cur_parse_insert_rec( -/*======================*/ - ibool is_short,/*!< in: TRUE if short inserts */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in: page or NULL */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ulint origin_offset; - ulint end_seg_len; - ulint mismatch_index; - page_t* page; - rec_t* cursor_rec; - byte buf1[1024]; - byte* buf; - byte* ptr2 = ptr; - ulint info_and_status_bits = 0; /* remove warning */ - page_cur_t cursor; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - page = block ? buf_block_get_frame(block) : NULL; - - if (is_short) { - cursor_rec = page_rec_get_prev(page_get_supremum_rec(page)); - } else { - ulint offset; - - /* Read the cursor rec offset as a 2-byte ulint */ - - if (UNIV_UNLIKELY(end_ptr < ptr + 2)) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - - cursor_rec = page + offset; - - if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) { - - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - } - - ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len); - - if (ptr == NULL) { - - return(NULL); - } - - if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - if (end_seg_len & 0x1UL) { - /* Read the info bits */ - - if (end_ptr < ptr + 1) { - - return(NULL); - } - - info_and_status_bits = mach_read_from_1(ptr); - ptr++; - - ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset); - - if (ptr == NULL) { - - return(NULL); - } - - ut_a(origin_offset < UNIV_PAGE_SIZE); - - ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index); - - if (ptr == NULL) { - - return(NULL); - } - - ut_a(mismatch_index < UNIV_PAGE_SIZE); - } - - if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) { - - return(NULL); - } - - if (!block) { - - return(ptr + (end_seg_len >> 1)); - } - - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page)); - - /* Read from the log the inserted index record end segment which - differs from the cursor record */ - - offsets = rec_get_offsets(cursor_rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (!(end_seg_len & 0x1UL)) { - info_and_status_bits = rec_get_info_and_status_bits( - cursor_rec, page_is_comp(page)); - origin_offset = rec_offs_extra_size(offsets); - mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1); - } - - end_seg_len >>= 1; - - if (mismatch_index + end_seg_len < sizeof buf1) { - buf = buf1; - } else { - buf = static_cast<byte*>( - mem_alloc(mismatch_index + end_seg_len)); - } - - /* Build the inserted record to buf */ - - if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) { - fprintf(stderr, - "Is short %lu, info_and_status_bits %lu, offset %lu, " - "o_offset %lu\n" - "mismatch index %lu, end_seg_len %lu\n" - "parsed len %lu\n", - (ulong) is_short, (ulong) info_and_status_bits, - (ulong) page_offset(cursor_rec), - (ulong) origin_offset, - (ulong) mismatch_index, (ulong) end_seg_len, - (ulong) (ptr - ptr2)); - - fputs("Dump of 300 bytes of log:\n", stderr); - ut_print_buf(stderr, ptr2, 300); - putc('\n', stderr); - - buf_page_print(page, 0, 0); - - ut_error; - } - - ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index); - ut_memcpy(buf + mismatch_index, ptr, end_seg_len); - - if (page_is_comp(page)) { - rec_set_info_and_status_bits(buf + origin_offset, - info_and_status_bits); - } else { - rec_set_info_bits_old(buf + origin_offset, - info_and_status_bits); - } - - page_cur_position(cursor_rec, block, &cursor); - - offsets = rec_get_offsets(buf + origin_offset, index, offsets, - ULINT_UNDEFINED, &heap); - if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor, - buf + origin_offset, - index, offsets, mtr))) { - /* The redo log record should only have been written - after the write was successful. */ - ut_error; - } - - if (buf != buf1) { - - mem_free(buf); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return(ptr + end_seg_len); -} - -/***********************************************************//** -Inserts a record next to page cursor on an uncompressed page. -Returns pointer to inserted record if succeed, i.e., enough -space available, NULL otherwise. The cursor stays at the same position. -@return pointer to record if succeed, NULL otherwise */ -UNIV_INTERN -rec_t* -page_cur_insert_rec_low( -/*====================*/ - rec_t* current_rec,/*!< in: pointer to current record after - which the new record is inserted */ - dict_index_t* index, /*!< in: record descriptor */ - const rec_t* rec, /*!< in: pointer to a physical record */ - ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */ -{ - byte* insert_buf; - ulint rec_size; - page_t* page; /*!< the relevant page */ - rec_t* last_insert; /*!< cursor position at previous - insert */ - rec_t* free_rec; /*!< a free record that was reused, - or NULL */ - rec_t* insert_rec; /*!< inserted record */ - ulint heap_no; /*!< heap number of the inserted - record */ - - ut_ad(rec_offs_validate(rec, index, offsets)); - - page = page_align(current_rec); - ut_ad(dict_table_is_comp(index->table) - == (ibool) !!page_is_comp(page)); - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || recv_recovery_is_on() - || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); - - ut_ad(!page_rec_is_supremum(current_rec)); - - /* 1. Get the size of the physical record in the page */ - rec_size = rec_offs_size(offsets); - -#ifdef UNIV_DEBUG_VALGRIND - { - const void* rec_start - = rec - rec_offs_extra_size(offsets); - ulint extra_size - = rec_offs_extra_size(offsets) - - (rec_offs_comp(offsets) - ? REC_N_NEW_EXTRA_BYTES - : REC_N_OLD_EXTRA_BYTES); - - /* All data bytes of the record must be valid. */ - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - /* The variable-length header must be valid. */ - UNIV_MEM_ASSERT_RW(rec_start, extra_size); - } -#endif /* UNIV_DEBUG_VALGRIND */ - - /* 2. Try to find suitable space from page memory management */ - - free_rec = page_header_get_ptr(page, PAGE_FREE); - if (UNIV_LIKELY_NULL(free_rec)) { - /* Try to allocate from the head of the free list. */ - ulint foffsets_[REC_OFFS_NORMAL_SIZE]; - ulint* foffsets = foffsets_; - mem_heap_t* heap = NULL; - - rec_offs_init(foffsets_); - - foffsets = rec_get_offsets( - free_rec, index, foffsets, ULINT_UNDEFINED, &heap); - if (rec_offs_size(foffsets) < rec_size) { - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - goto use_heap; - } - - insert_buf = free_rec - rec_offs_extra_size(foffsets); - - if (page_is_comp(page)) { - heap_no = rec_get_heap_no_new(free_rec); - page_mem_alloc_free(page, NULL, - rec_get_next_ptr(free_rec, TRUE), - rec_size); - } else { - heap_no = rec_get_heap_no_old(free_rec); - page_mem_alloc_free(page, NULL, - rec_get_next_ptr(free_rec, FALSE), - rec_size); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } else { -use_heap: - free_rec = NULL; - insert_buf = page_mem_alloc_heap(page, NULL, - rec_size, &heap_no); - - if (UNIV_UNLIKELY(insert_buf == NULL)) { - return(NULL); - } - } - - /* 3. Create the record */ - insert_rec = rec_copy(insert_buf, rec, offsets); - rec_offs_make_valid(insert_rec, index, offsets); - - /* This is because assertion below is debug assertion */ -#ifdef UNIV_DEBUG - if (UNIV_UNLIKELY(current_rec == insert_rec)) { - ulint extra_len, data_len; - extra_len = rec_offs_extra_size(offsets); - data_len = rec_offs_data_size(offsets); - - fprintf(stderr, "InnoDB: Error: current_rec == insert_rec " - " extra_len %lu data_len %lu insert_buf %p rec %p\n", - extra_len, data_len, insert_buf, rec); - fprintf(stderr, "InnoDB; Physical record: \n"); - rec_print(stderr, rec, index); - fprintf(stderr, "InnoDB: Inserted record: \n"); - rec_print(stderr, insert_rec, index); - fprintf(stderr, "InnoDB: Current record: \n"); - rec_print(stderr, current_rec, index); - ut_a(current_rec != insert_rec); - } -#endif /* UNIV_DEBUG */ - - /* 4. Insert the record in the linked list of records */ - ut_ad(current_rec != insert_rec); - - { - /* next record after current before the insertion */ - rec_t* next_rec = page_rec_get_next(current_rec); -#ifdef UNIV_DEBUG - if (page_is_comp(page)) { - ut_ad(rec_get_status(current_rec) - <= REC_STATUS_INFIMUM); - ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM); - ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM); - } -#endif - page_rec_set_next(insert_rec, next_rec); - page_rec_set_next(current_rec, insert_rec); - } - - page_header_set_field(page, NULL, PAGE_N_RECS, - 1 + page_get_n_recs(page)); - - /* 5. Set the n_owned field in the inserted record to zero, - and set the heap_no field */ - if (page_is_comp(page)) { - rec_set_n_owned_new(insert_rec, NULL, 0); - rec_set_heap_no_new(insert_rec, heap_no); - } else { - rec_set_n_owned_old(insert_rec, 0); - rec_set_heap_no_old(insert_rec, heap_no); - } - - UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets), - rec_offs_size(offsets)); - /* 6. Update the last insertion info in page header */ - - last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT); - ut_ad(!last_insert || !page_is_comp(page) - || rec_get_node_ptr_flag(last_insert) - == rec_get_node_ptr_flag(insert_rec)); - - if (UNIV_UNLIKELY(last_insert == NULL)) { - page_header_set_field(page, NULL, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0); - - } else if ((last_insert == current_rec) - && (page_header_get_field(page, PAGE_DIRECTION) - != PAGE_LEFT)) { - - page_header_set_field(page, NULL, PAGE_DIRECTION, - PAGE_RIGHT); - page_header_set_field(page, NULL, PAGE_N_DIRECTION, - page_header_get_field( - page, PAGE_N_DIRECTION) + 1); - - } else if ((page_rec_get_next(insert_rec) == last_insert) - && (page_header_get_field(page, PAGE_DIRECTION) - != PAGE_RIGHT)) { - - page_header_set_field(page, NULL, PAGE_DIRECTION, - PAGE_LEFT); - page_header_set_field(page, NULL, PAGE_N_DIRECTION, - page_header_get_field( - page, PAGE_N_DIRECTION) + 1); - } else { - page_header_set_field(page, NULL, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0); - } - - page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec); - - /* 7. It remains to update the owner record. */ - { - rec_t* owner_rec = page_rec_find_owner_rec(insert_rec); - ulint n_owned; - if (page_is_comp(page)) { - n_owned = rec_get_n_owned_new(owner_rec); - rec_set_n_owned_new(owner_rec, NULL, n_owned + 1); - } else { - n_owned = rec_get_n_owned_old(owner_rec); - rec_set_n_owned_old(owner_rec, n_owned + 1); - } - - /* 8. Now we have incremented the n_owned field of the owner - record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, - we have to split the corresponding directory slot in two. */ - - if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) { - page_dir_split_slot( - page, NULL, - page_dir_find_owner_slot(owner_rec)); - } - } - - /* 9. Write log record of the insert */ - if (UNIV_LIKELY(mtr != NULL)) { - page_cur_insert_rec_write_log(insert_rec, rec_size, - current_rec, index, mtr); - } - - btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert"); - - return(insert_rec); -} - -/***********************************************************//** -Inserts a record next to page cursor on a compressed and uncompressed -page. Returns pointer to inserted record if succeed, i.e., -enough space available, NULL otherwise. -The cursor stays at the same position. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to record if succeed, NULL otherwise */ -UNIV_INTERN -rec_t* -page_cur_insert_rec_zip( -/*====================*/ - page_cur_t* cursor, /*!< in/out: page cursor */ - dict_index_t* index, /*!< in: record descriptor */ - const rec_t* rec, /*!< in: pointer to a physical record */ - ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */ -{ - byte* insert_buf; - ulint rec_size; - page_t* page; /*!< the relevant page */ - rec_t* last_insert; /*!< cursor position at previous - insert */ - rec_t* free_rec; /*!< a free record that was reused, - or NULL */ - rec_t* insert_rec; /*!< inserted record */ - ulint heap_no; /*!< heap number of the inserted - record */ - page_zip_des_t* page_zip; - - page_zip = page_cur_get_page_zip(cursor); - ut_ad(page_zip); - - ut_ad(rec_offs_validate(rec, index, offsets)); - - page = page_cur_get_page(cursor); - ut_ad(dict_table_is_comp(index->table)); - ut_ad(page_is_comp(page)); - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || recv_recovery_is_on() - || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); - - ut_ad(!page_cur_is_after_last(cursor)); -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - /* 1. Get the size of the physical record in the page */ - rec_size = rec_offs_size(offsets); - -#ifdef UNIV_DEBUG_VALGRIND - { - const void* rec_start - = rec - rec_offs_extra_size(offsets); - ulint extra_size - = rec_offs_extra_size(offsets) - - (rec_offs_comp(offsets) - ? REC_N_NEW_EXTRA_BYTES - : REC_N_OLD_EXTRA_BYTES); - - /* All data bytes of the record must be valid. */ - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - /* The variable-length header must be valid. */ - UNIV_MEM_ASSERT_RW(rec_start, extra_size); - } -#endif /* UNIV_DEBUG_VALGRIND */ - - const bool reorg_before_insert = page_has_garbage(page) - && rec_size > page_get_max_insert_size(page, 1) - && rec_size <= page_get_max_insert_size_after_reorganize( - page, 1); - - /* 2. Try to find suitable space from page memory management */ - if (!page_zip_available(page_zip, dict_index_is_clust(index), - rec_size, 1) - || reorg_before_insert) { - /* The values can change dynamically. */ - bool log_compressed = page_zip_log_pages; - ulint level = page_zip_level; -#ifdef UNIV_DEBUG - rec_t* cursor_rec = page_cur_get_rec(cursor); -#endif /* UNIV_DEBUG */ - - /* If we are not writing compressed page images, we - must reorganize the page before attempting the - insert. */ - if (recv_recovery_is_on()) { - /* Insert into the uncompressed page only. - The page reorganization or creation that we - would attempt outside crash recovery would - have been covered by a previous redo log record. */ - } else if (page_is_empty(page)) { - ut_ad(page_cur_is_before_first(cursor)); - - /* This is an empty page. Recreate it to - get rid of the modification log. */ - page_create_zip(page_cur_get_block(cursor), index, - page_header_get_field(page, PAGE_LEVEL), - 0, mtr); - ut_ad(!page_header_get_ptr(page, PAGE_FREE)); - - if (page_zip_available( - page_zip, dict_index_is_clust(index), - rec_size, 1)) { - goto use_heap; - } - - /* The cursor should remain on the page infimum. */ - return(NULL); - } else if (!page_zip->m_nonempty && !page_has_garbage(page)) { - /* The page has been freshly compressed, so - reorganizing it will not help. */ - } else if (log_compressed && !reorg_before_insert) { - /* Insert into uncompressed page only, and - try page_zip_reorganize() afterwards. */ - } else if (btr_page_reorganize_low( - recv_recovery_is_on(), level, - cursor, index, mtr)) { - ut_ad(!page_header_get_ptr(page, PAGE_FREE)); - - if (page_zip_available( - page_zip, dict_index_is_clust(index), - rec_size, 1)) { - /* After reorganizing, there is space - available. */ - goto use_heap; - } - } else { - ut_ad(cursor->rec == cursor_rec); - return(NULL); - } - - /* Try compressing the whole page afterwards. */ - insert_rec = page_cur_insert_rec_low( - cursor->rec, index, rec, offsets, NULL); - - /* If recovery is on, this implies that the compression - of the page was successful during runtime. Had that not - been the case or had the redo logging of compressed - pages been enabled during runtime then we'd have seen - a MLOG_ZIP_PAGE_COMPRESS redo record. Therefore, we - know that we don't need to reorganize the page. We, - however, do need to recompress the page. That will - happen when the next redo record is read which must - be of type MLOG_ZIP_PAGE_COMPRESS_NO_DATA and it must - contain a valid compression level value. - This implies that during recovery from this point till - the next redo is applied the uncompressed and - compressed versions are not identical and - page_zip_validate will fail but that is OK because - we call page_zip_validate only after processing - all changes to a page under a single mtr during - recovery. */ - if (insert_rec == NULL) { - /* Out of space. - This should never occur during crash recovery, - because the MLOG_COMP_REC_INSERT should only - be logged after a successful operation. */ - ut_ad(!recv_recovery_is_on()); - } else if (recv_recovery_is_on()) { - /* This should be followed by - MLOG_ZIP_PAGE_COMPRESS_NO_DATA, - which should succeed. */ - rec_offs_make_valid(insert_rec, index, offsets); - } else { - ulint pos = page_rec_get_n_recs_before(insert_rec); - ut_ad(pos > 0); - - if (!log_compressed) { - if (page_zip_compress( - page_zip, page, index, - level, NULL)) { - page_cur_insert_rec_write_log( - insert_rec, rec_size, - cursor->rec, index, mtr); - page_zip_compress_write_log_no_data( - level, page, index, mtr); - - rec_offs_make_valid( - insert_rec, index, offsets); - return(insert_rec); - } - - /* Page compress failed. If this happened on a - leaf page, put the data size into the sample - buffer. */ - if (page_is_leaf(page)) { - ulint occupied = page_get_data_size(page) - + page_dir_calc_reserved_space( - page_get_n_recs(page)); - index->stat_defrag_data_size_sample[ - index->stat_defrag_sample_next_slot] = - occupied; - index->stat_defrag_sample_next_slot = - (index->stat_defrag_sample_next_slot - + 1) % STAT_DEFRAG_DATA_SIZE_N_SAMPLE; - } - - ut_ad(cursor->rec - == (pos > 1 - ? page_rec_get_nth( - page, pos - 1) - : page + PAGE_NEW_INFIMUM)); - } else { - /* We are writing entire page images - to the log. Reduce the redo log volume - by reorganizing the page at the same time. */ - if (page_zip_reorganize( - cursor->block, index, mtr)) { - /* The page was reorganized: - Seek to pos. */ - if (pos > 1) { - cursor->rec = page_rec_get_nth( - page, pos - 1); - } else { - cursor->rec = page - + PAGE_NEW_INFIMUM; - } - - insert_rec = page + rec_get_next_offs( - cursor->rec, TRUE); - rec_offs_make_valid( - insert_rec, index, offsets); - return(insert_rec); - } - - /* Theoretically, we could try one - last resort of btr_page_reorganize_low() - followed by page_zip_available(), but - that would be very unlikely to - succeed. (If the full reorganized page - failed to compress, why would it - succeed to compress the page, plus log - the insert of this record? */ - } - - /* Out of space: restore the page */ - btr_blob_dbg_remove(page, index, "insert_zip_fail"); - if (!page_zip_decompress(page_zip, page, FALSE)) { - ut_error; /* Memory corrupted? */ - } - ut_ad(page_validate(page, index)); - btr_blob_dbg_add(page, index, "insert_zip_fail"); - insert_rec = NULL; - } - - return(insert_rec); - } - - free_rec = page_header_get_ptr(page, PAGE_FREE); - if (UNIV_LIKELY_NULL(free_rec)) { - /* Try to allocate from the head of the free list. */ - lint extra_size_diff; - ulint foffsets_[REC_OFFS_NORMAL_SIZE]; - ulint* foffsets = foffsets_; - mem_heap_t* heap = NULL; - - rec_offs_init(foffsets_); - - foffsets = rec_get_offsets(free_rec, index, foffsets, - ULINT_UNDEFINED, &heap); - if (rec_offs_size(foffsets) < rec_size) { -too_small: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - goto use_heap; - } - - insert_buf = free_rec - rec_offs_extra_size(foffsets); - - /* On compressed pages, do not relocate records from - the free list. If extra_size would grow, use the heap. */ - extra_size_diff - = rec_offs_extra_size(offsets) - - rec_offs_extra_size(foffsets); - - if (UNIV_UNLIKELY(extra_size_diff < 0)) { - /* Add an offset to the extra_size. */ - if (rec_offs_size(foffsets) - < rec_size - extra_size_diff) { - - goto too_small; - } - - insert_buf -= extra_size_diff; - } else if (UNIV_UNLIKELY(extra_size_diff)) { - /* Do not allow extra_size to grow */ - - goto too_small; - } - - heap_no = rec_get_heap_no_new(free_rec); - page_mem_alloc_free(page, page_zip, - rec_get_next_ptr(free_rec, TRUE), - rec_size); - - if (!page_is_leaf(page)) { - /* Zero out the node pointer of free_rec, - in case it will not be overwritten by - insert_rec. */ - - ut_ad(rec_size > REC_NODE_PTR_SIZE); - - if (rec_offs_extra_size(foffsets) - + rec_offs_data_size(foffsets) > rec_size) { - - memset(rec_get_end(free_rec, foffsets) - - REC_NODE_PTR_SIZE, 0, - REC_NODE_PTR_SIZE); - } - } else if (dict_index_is_clust(index)) { - /* Zero out the DB_TRX_ID and DB_ROLL_PTR - columns of free_rec, in case it will not be - overwritten by insert_rec. */ - - ulint trx_id_col; - ulint trx_id_offs; - ulint len; - - trx_id_col = dict_index_get_sys_col_pos(index, - DATA_TRX_ID); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - - trx_id_offs = rec_get_nth_field_offs(foffsets, - trx_id_col, &len); - ut_ad(len == DATA_TRX_ID_LEN); - - if (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + trx_id_offs - + rec_offs_extra_size(foffsets) > rec_size) { - /* We will have to zero out the - DB_TRX_ID and DB_ROLL_PTR, because - they will not be fully overwritten by - insert_rec. */ - - memset(free_rec + trx_id_offs, 0, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - } - - ut_ad(free_rec + trx_id_offs + DATA_TRX_ID_LEN - == rec_get_nth_field(free_rec, foffsets, - trx_id_col + 1, &len)); - ut_ad(len == DATA_ROLL_PTR_LEN); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } else { -use_heap: - free_rec = NULL; - insert_buf = page_mem_alloc_heap(page, page_zip, - rec_size, &heap_no); - - if (UNIV_UNLIKELY(insert_buf == NULL)) { - return(NULL); - } - - page_zip_dir_add_slot(page_zip, dict_index_is_clust(index)); - } - - /* 3. Create the record */ - insert_rec = rec_copy(insert_buf, rec, offsets); - rec_offs_make_valid(insert_rec, index, offsets); - - /* 4. Insert the record in the linked list of records */ - ut_ad(cursor->rec != insert_rec); - - { - /* next record after current before the insertion */ - const rec_t* next_rec = page_rec_get_next_low( - cursor->rec, TRUE); - ut_ad(rec_get_status(cursor->rec) - <= REC_STATUS_INFIMUM); - ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM); - ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM); - - page_rec_set_next(insert_rec, next_rec); - page_rec_set_next(cursor->rec, insert_rec); - } - - page_header_set_field(page, page_zip, PAGE_N_RECS, - 1 + page_get_n_recs(page)); - - /* 5. Set the n_owned field in the inserted record to zero, - and set the heap_no field */ - rec_set_n_owned_new(insert_rec, NULL, 0); - rec_set_heap_no_new(insert_rec, heap_no); - - UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets), - rec_offs_size(offsets)); - - page_zip_dir_insert(page_zip, cursor->rec, free_rec, insert_rec); - - /* 6. Update the last insertion info in page header */ - - last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT); - ut_ad(!last_insert - || rec_get_node_ptr_flag(last_insert) - == rec_get_node_ptr_flag(insert_rec)); - - if (UNIV_UNLIKELY(last_insert == NULL)) { - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0); - - } else if ((last_insert == cursor->rec) - && (page_header_get_field(page, PAGE_DIRECTION) - != PAGE_LEFT)) { - - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_RIGHT); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, - page_header_get_field( - page, PAGE_N_DIRECTION) + 1); - - } else if ((page_rec_get_next(insert_rec) == last_insert) - && (page_header_get_field(page, PAGE_DIRECTION) - != PAGE_RIGHT)) { - - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_LEFT); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, - page_header_get_field( - page, PAGE_N_DIRECTION) + 1); - } else { - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0); - } - - page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec); - - /* 7. It remains to update the owner record. */ - { - rec_t* owner_rec = page_rec_find_owner_rec(insert_rec); - ulint n_owned; - - n_owned = rec_get_n_owned_new(owner_rec); - rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1); - - /* 8. Now we have incremented the n_owned field of the owner - record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, - we have to split the corresponding directory slot in two. */ - - if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) { - page_dir_split_slot( - page, page_zip, - page_dir_find_owner_slot(owner_rec)); - } - } - - page_zip_write_rec(page_zip, insert_rec, index, offsets, 1); - - btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok"); - - /* 9. Write log record of the insert */ - if (UNIV_LIKELY(mtr != NULL)) { - page_cur_insert_rec_write_log(insert_rec, rec_size, - cursor->rec, index, mtr); - } - - return(insert_rec); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************//** -Writes a log record of copying a record list end to a new created page. -@return 4-byte field where to write the log data length, or NULL if -logging is disabled */ -UNIV_INLINE -byte* -page_copy_rec_list_to_created_page_write_log( -/*=========================================*/ - page_t* page, /*!< in: index page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - byte* log_ptr; - - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - log_ptr = mlog_open_and_write_index(mtr, page, index, - page_is_comp(page) - ? MLOG_COMP_LIST_END_COPY_CREATED - : MLOG_LIST_END_COPY_CREATED, 4); - if (UNIV_LIKELY(log_ptr != NULL)) { - mlog_close(mtr, log_ptr + 4); - } - - return(log_ptr); -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************//** -Parses a log record of copying a record list end to a new created page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_parse_copy_rec_list_to_created_page( -/*=====================================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in: page or NULL */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - byte* rec_end; - ulint log_data_len; - page_t* page; - page_zip_des_t* page_zip; - - if (ptr + 4 > end_ptr) { - - return(NULL); - } - - log_data_len = mach_read_from_4(ptr); - ptr += 4; - - rec_end = ptr + log_data_len; - - if (rec_end > end_ptr) { - - return(NULL); - } - - if (!block) { - - return(rec_end); - } - - while (ptr < rec_end) { - ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, - block, index, mtr); - } - - ut_a(ptr == rec_end); - - page = buf_block_get_frame(block); - page_zip = buf_block_get_page_zip(block); - - page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL); - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0); - - return(rec_end); -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Copies records from page to a newly created page, from a given record onward, -including that record. Infimum and supremum records are not copied. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if this is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -UNIV_INTERN -void -page_copy_rec_list_end_to_created_page( -/*===================================*/ - page_t* new_page, /*!< in/out: index page to copy to */ - rec_t* rec, /*!< in: first record to copy */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_dir_slot_t* slot = 0; /* remove warning */ - byte* heap_top; - rec_t* insert_rec = 0; /* remove warning */ - rec_t* prev_rec; - ulint count; - ulint n_recs; - ulint slot_index; - ulint rec_size; - ulint log_mode; - byte* log_ptr; - ulint log_data_len; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW); - ut_ad(page_align(rec) != new_page); - ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page)); - - if (page_rec_is_infimum(rec)) { - - rec = page_rec_get_next(rec); - } - - if (page_rec_is_supremum(rec)) { - - return; - } - -#ifdef UNIV_DEBUG - /* To pass the debug tests we have to set these dummy values - in the debug version */ - page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2); - page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, - new_page + UNIV_PAGE_SIZE - 1); -#endif - - log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, - index, mtr); - - log_data_len = dyn_array_get_data_size(&(mtr->log)); - - /* Individual inserts are logged in a shorter form */ - - log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS); - - prev_rec = page_get_infimum_rec(new_page); - if (page_is_comp(new_page)) { - heap_top = new_page + PAGE_NEW_SUPREMUM_END; - } else { - heap_top = new_page + PAGE_OLD_SUPREMUM_END; - } - count = 0; - slot_index = 0; - n_recs = 0; - - do { - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - insert_rec = rec_copy(heap_top, rec, offsets); - - if (page_is_comp(new_page)) { - rec_set_next_offs_new(prev_rec, - page_offset(insert_rec)); - - rec_set_n_owned_new(insert_rec, NULL, 0); - rec_set_heap_no_new(insert_rec, - PAGE_HEAP_NO_USER_LOW + n_recs); - } else { - rec_set_next_offs_old(prev_rec, - page_offset(insert_rec)); - - rec_set_n_owned_old(insert_rec, 0); - rec_set_heap_no_old(insert_rec, - PAGE_HEAP_NO_USER_LOW + n_recs); - } - - count++; - n_recs++; - - if (UNIV_UNLIKELY - (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) { - - slot_index++; - - slot = page_dir_get_nth_slot(new_page, slot_index); - - page_dir_slot_set_rec(slot, insert_rec); - page_dir_slot_set_n_owned(slot, NULL, count); - - count = 0; - } - - rec_size = rec_offs_size(offsets); - - ut_ad(heap_top < new_page + UNIV_PAGE_SIZE); - - heap_top += rec_size; - - rec_offs_make_valid(insert_rec, index, offsets); - btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end"); - - page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec, - index, mtr); - prev_rec = insert_rec; - rec = page_rec_get_next(rec); - } while (!page_rec_is_supremum(rec)); - - if ((slot_index > 0) && (count + 1 - + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2 - <= PAGE_DIR_SLOT_MAX_N_OWNED)) { - /* We can merge the two last dir slots. This operation is - here to make this function imitate exactly the equivalent - task made using page_cur_insert_rec, which we use in database - recovery to reproduce the task performed by this function. - To be able to check the correctness of recovery, it is good - that it imitates exactly. */ - - count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2; - - page_dir_slot_set_n_owned(slot, NULL, 0); - - slot_index--; - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len; - - ut_a(log_data_len < 100 * UNIV_PAGE_SIZE); - - if (UNIV_LIKELY(log_ptr != NULL)) { - mach_write_to_4(log_ptr, log_data_len); - } - - if (page_is_comp(new_page)) { - rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM); - } else { - rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM); - } - - slot = page_dir_get_nth_slot(new_page, 1 + slot_index); - - page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page)); - page_dir_slot_set_n_owned(slot, NULL, count + 1); - - page_dir_set_n_slots(new_page, NULL, 2 + slot_index); - page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top); - page_dir_set_n_heap(new_page, NULL, PAGE_HEAP_NO_USER_LOW + n_recs); - page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs); - - page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL); - page_header_set_field(new_page, NULL, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0); - - /* Restore the log mode */ - - mtr_set_log_mode(mtr, log_mode); -} - -/***********************************************************//** -Writes log record of a record delete on a page. */ -UNIV_INLINE -void -page_cur_delete_rec_write_log( -/*==========================*/ - rec_t* rec, /*!< in: record to be deleted */ - const dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mini-transaction handle */ -{ - byte* log_ptr; - - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - - log_ptr = mlog_open_and_write_index(mtr, rec, index, - page_rec_is_comp(rec) - ? MLOG_COMP_REC_DELETE - : MLOG_REC_DELETE, 2); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery: - in that case mlog_open returns NULL */ - return; - } - - /* Write the cursor rec offset as a 2-byte ulint */ - mach_write_to_2(log_ptr, page_offset(rec)); - - mlog_close(mtr, log_ptr + 2); -} -#else /* !UNIV_HOTBACKUP */ -# define page_cur_delete_rec_write_log(rec,index,mtr) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses log record of a record delete on a page. -@return pointer to record end or NULL */ -UNIV_INTERN -byte* -page_cur_parse_delete_rec( -/*======================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in: page or NULL */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ulint offset; - page_cur_t cursor; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - /* Read the cursor rec offset as a 2-byte ulint */ - offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(offset <= UNIV_PAGE_SIZE); - - if (block) { - page_t* page = buf_block_get_frame(block); - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_t* rec = page + offset; - rec_offs_init(offsets_); - - page_cur_position(rec, block, &cursor); - ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page)); - - page_cur_delete_rec(&cursor, index, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), - mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - return(ptr); -} - -/***********************************************************//** -Deletes a record at the page cursor. The cursor is moved to the next -record after the deleted one. */ -UNIV_INTERN -void -page_cur_delete_rec( -/*================*/ - page_cur_t* cursor, /*!< in/out: a page cursor */ - const dict_index_t* index, /*!< in: record descriptor */ - const ulint* offsets,/*!< in: rec_get_offsets( - cursor->rec, index) */ - mtr_t* mtr) /*!< in: mini-transaction handle - or NULL */ -{ - page_dir_slot_t* cur_dir_slot; - page_dir_slot_t* prev_slot; - page_t* page; - page_zip_des_t* page_zip; - rec_t* current_rec; - rec_t* prev_rec = NULL; - rec_t* next_rec; - ulint cur_slot_no; - ulint cur_n_owned; - rec_t* rec; - - page = page_cur_get_page(cursor); - page_zip = page_cur_get_page_zip(cursor); - - /* page_zip_validate() will fail here when - btr_cur_pessimistic_delete() invokes btr_set_min_rec_mark(). - Then, both "page_zip" and "page" would have the min-rec-mark - set on the smallest user record, but "page" would additionally - have it set on the smallest-but-one record. Because sloppy - page_zip_validate_low() only ignores min-rec-flag differences - in the smallest user record, it cannot be used here either. */ - - current_rec = cursor->rec; - ut_ad(rec_offs_validate(current_rec, index, offsets)); - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || recv_recovery_is_on() - || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); - - /* The record must not be the supremum or infimum record. */ - ut_ad(page_rec_is_user_rec(current_rec)); - - if (page_get_n_recs(page) == 1 && !recv_recovery_is_on()) { - /* Empty the page, unless we are applying the redo log - during crash recovery. During normal operation, the - page_create_empty() gets logged as one of MLOG_PAGE_CREATE, - MLOG_COMP_PAGE_CREATE, MLOG_ZIP_PAGE_COMPRESS. */ - ut_ad(page_is_leaf(page)); - /* Usually, this should be the root page, - and the whole index tree should become empty. - However, this could also be a call in - btr_cur_pessimistic_update() to delete the only - record in the page and to insert another one. */ - page_cur_move_to_next(cursor); - ut_ad(page_cur_is_after_last(cursor)); - page_create_empty(page_cur_get_block(cursor), - const_cast<dict_index_t*>(index), mtr); - return; - } - - /* Save to local variables some data associated with current_rec */ - cur_slot_no = page_dir_find_owner_slot(current_rec); - ut_ad(cur_slot_no > 0); - cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no); - cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot); - - /* 0. Write the log record */ - if (mtr != 0) { - page_cur_delete_rec_write_log(current_rec, index, mtr); - } - - /* 1. Reset the last insert info in the page header and increment - the modify clock for the frame */ - - page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL); - - /* The page gets invalid for optimistic searches: increment the - frame modify clock only if there is an mini-transaction covering - the change. During IMPORT we allocate local blocks that are not - part of the buffer pool. */ - - if (mtr != 0) { - buf_block_modify_clock_inc(page_cur_get_block(cursor)); - } - - /* 2. Find the next and the previous record. Note that the cursor is - left at the next record. */ - - ut_ad(cur_slot_no > 0); - prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1); - - rec = (rec_t*) page_dir_slot_get_rec(prev_slot); - - /* rec now points to the record of the previous directory slot. Look - for the immediate predecessor of current_rec in a loop. */ - - while(current_rec != rec) { - prev_rec = rec; - rec = page_rec_get_next(rec); - } - - page_cur_move_to_next(cursor); - next_rec = cursor->rec; - - /* 3. Remove the record from the linked list of records */ - - page_rec_set_next(prev_rec, next_rec); - - /* 4. If the deleted record is pointed to by a dir slot, update the - record pointer in slot. In the following if-clause we assume that - prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED - >= 2. */ - -#if PAGE_DIR_SLOT_MIN_N_OWNED < 2 -# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2" -#endif - ut_ad(cur_n_owned > 1); - - if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) { - page_dir_slot_set_rec(cur_dir_slot, prev_rec); - } - - /* 5. Update the number of owned records of the slot */ - - page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1); - - /* 6. Free the memory occupied by the record */ - btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index), - offsets, "delete"); - page_mem_free(page, page_zip, current_rec, index, offsets); - - /* 7. Now we have decremented the number of owned records of the slot. - If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the - slots. */ - - if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) { - page_dir_balance_slot(page, page_zip, cur_slot_no); - } - -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ -} - -#ifdef UNIV_COMPILE_TEST_FUNCS - -/*******************************************************************//** -Print the first n numbers, generated by page_cur_lcg_prng() to make sure -(visually) that it works properly. */ -void -test_page_cur_lcg_prng( -/*===================*/ - int n) /*!< in: print first n numbers */ -{ - int i; - unsigned long long rnd; - - for (i = 0; i < n; i++) { - rnd = page_cur_lcg_prng(); - printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n", - rnd, - rnd % 2, - rnd % 3, - rnd % 5, - rnd % 7, - rnd % 11); - } -} - -#endif /* UNIV_COMPILE_TEST_FUNCS */ diff --git a/storage/xtradb/page/page0page.cc b/storage/xtradb/page/page0page.cc deleted file mode 100644 index 3f8e47adafd..00000000000 --- a/storage/xtradb/page/page0page.cc +++ /dev/null @@ -1,2872 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file page/page0page.cc -Index page routines - -Created 2/2/1994 Heikki Tuuri -*******************************************************/ - -#define THIS_MODULE -#include "page0page.h" -#ifdef UNIV_NONINL -#include "page0page.ic" -#endif -#undef THIS_MODULE - -#include "ha_prototypes.h" -#include "buf0checksum.h" - -#ifndef UNIV_INNOCHECKSUM - -#include "page0cur.h" -#include "page0zip.h" -#include "buf0buf.h" -#include "btr0btr.h" -#ifndef UNIV_HOTBACKUP -# include "srv0srv.h" -# include "lock0lock.h" -# include "fut0lst.h" -# include "btr0sea.h" -#endif /* !UNIV_HOTBACKUP */ - -/* THE INDEX PAGE - ============== - -The index page consists of a page header which contains the page's -id and other information. On top of it are the index records -in a heap linked into a one way linear list according to alphabetic order. - -Just below page end is an array of pointers which we call page directory, -to about every sixth record in the list. The pointers are placed in -the directory in the alphabetical order of the records pointed to, -enabling us to make binary search using the array. Each slot n:o I -in the directory points to a record, where a 4-bit field contains a count -of those records which are in the linear list between pointer I and -the pointer I - 1 in the directory, including the record -pointed to by pointer I and not including the record pointed to by I - 1. -We say that the record pointed to by slot I, or that slot I, owns -these records. The count is always kept in the range 4 to 8, with -the exception that it is 1 for the first slot, and 1--8 for the second slot. - -An essentially binary search can be performed in the list of index -records, like we could do if we had pointer to every record in the -page directory. The data structure is, however, more efficient when -we are doing inserts, because most inserts are just pushed on a heap. -Only every 8th insert requires block move in the directory pointer -table, which itself is quite small. A record is deleted from the page -by just taking it off the linear list and updating the number of owned -records-field of the record which owns it, and updating the page directory, -if necessary. A special case is the one when the record owns itself. -Because the overhead of inserts is so small, we may also increase the -page size from the projected default of 8 kB to 64 kB without too -much loss of efficiency in inserts. Bigger page becomes actual -when the disk transfer rate compared to seek and latency time rises. -On the present system, the page size is set so that the page transfer -time (3 ms) is 20 % of the disk random access time (15 ms). - -When the page is split, merged, or becomes full but contains deleted -records, we have to reorganize the page. - -Assuming a page size of 8 kB, a typical index page of a secondary -index contains 300 index entries, and the size of the page directory -is 50 x 4 bytes = 200 bytes. */ - -/***************************************************************//** -Looks for the directory slot which owns the given record. -@return the directory slot number */ -UNIV_INTERN -ulint -page_dir_find_owner_slot( -/*=====================*/ - const rec_t* rec) /*!< in: the physical record */ -{ - const page_t* page; - register uint16 rec_offs_bytes; - register const page_dir_slot_t* slot; - register const page_dir_slot_t* first_slot; - register const rec_t* r = rec; - - ut_ad(page_rec_check(rec)); - - page = page_align(rec); - first_slot = page_dir_get_nth_slot(page, 0); - slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1); - - if (page_is_comp(page)) { - while (rec_get_n_owned_new(r) == 0) { - r = rec_get_next_ptr_const(r, TRUE); - ut_ad(r >= page + PAGE_NEW_SUPREMUM); - ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR)); - } - } else { - while (rec_get_n_owned_old(r) == 0) { - r = rec_get_next_ptr_const(r, FALSE); - ut_ad(r >= page + PAGE_OLD_SUPREMUM); - ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR)); - } - } - - rec_offs_bytes = mach_encode_2(r - page); - - while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) { - - if (UNIV_UNLIKELY(slot == first_slot)) { - fprintf(stderr, - "InnoDB: Probable data corruption on" - " page %lu\n" - "InnoDB: Original record ", - (ulong) page_get_page_no(page)); - - if (page_is_comp(page)) { - fputs("(compact record)", stderr); - } else { - rec_print_old(stderr, rec); - } - - fputs("\n" - "InnoDB: on that page.\n" - "InnoDB: Cannot find the dir slot for record ", - stderr); - if (page_is_comp(page)) { - fputs("(compact record)", stderr); - } else { - rec_print_old(stderr, page - + mach_decode_2(rec_offs_bytes)); - } - fputs("\n" - "InnoDB: on that page!\n", stderr); - - buf_page_print(page, 0, 0); - - ut_error; - } - - slot += PAGE_DIR_SLOT_SIZE; - } - - return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE); -} - -/**************************************************************//** -Used to check the consistency of a directory slot. -@return TRUE if succeed */ -static -ibool -page_dir_slot_check( -/*================*/ - const page_dir_slot_t* slot) /*!< in: slot */ -{ - const page_t* page; - ulint n_slots; - ulint n_owned; - - ut_a(slot); - - page = page_align(slot); - - n_slots = page_dir_get_n_slots(page); - - ut_a(slot <= page_dir_get_nth_slot(page, 0)); - ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1)); - - ut_a(page_rec_check(page_dir_slot_get_rec(slot))); - - if (page_is_comp(page)) { - n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot)); - } else { - n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot)); - } - - if (slot == page_dir_get_nth_slot(page, 0)) { - ut_a(n_owned == 1); - } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) { - ut_a(n_owned >= 1); - ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); - } else { - ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED); - ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); - } - - return(TRUE); -} - -/*************************************************************//** -Sets the max trx id field value. */ -UNIV_INTERN -void -page_set_max_trx_id( -/*================*/ - buf_block_t* block, /*!< in/out: page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */ -{ - page_t* page = buf_block_get_frame(block); -#ifndef UNIV_HOTBACKUP - ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); -#endif /* !UNIV_HOTBACKUP */ - - /* It is not necessary to write this change to the redo log, as - during a database recovery we assume that the max trx id of every - page is the maximum trx id assigned before the crash. */ - - if (page_zip) { - mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id); - page_zip_write_header(page_zip, - page + (PAGE_HEADER + PAGE_MAX_TRX_ID), - 8, mtr); -#ifndef UNIV_HOTBACKUP - } else if (mtr) { - mlog_write_ull(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), - trx_id, mtr); -#endif /* !UNIV_HOTBACKUP */ - } else { - mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id); - } -} - -/************************************************************//** -Allocates a block of memory from the heap of an index page. -@return pointer to start of allocated buffer, or NULL if allocation fails */ -UNIV_INTERN -byte* -page_mem_alloc_heap( -/*================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page with enough - space available for inserting the record, - or NULL */ - ulint need, /*!< in: total number of bytes needed */ - ulint* heap_no)/*!< out: this contains the heap number - of the allocated record - if allocation succeeds */ -{ - byte* block; - ulint avl_space; - - ut_ad(page && heap_no); - - avl_space = page_get_max_insert_size(page, 1); - - if (avl_space >= need) { - block = page_header_get_ptr(page, PAGE_HEAP_TOP); - - page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP, - block + need); - *heap_no = page_dir_get_n_heap(page); - - page_dir_set_n_heap(page, page_zip, 1 + *heap_no); - - return(block); - } - - return(NULL); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************//** -Writes a log record of page creation. */ -UNIV_INLINE -void -page_create_write_log( -/*==================*/ - buf_frame_t* frame, /*!< in: a buffer frame where the page is - created */ - mtr_t* mtr, /*!< in: mini-transaction handle */ - ibool comp) /*!< in: TRUE=compact page format */ -{ - mlog_write_initial_log_record(frame, comp - ? MLOG_COMP_PAGE_CREATE - : MLOG_PAGE_CREATE, mtr); -} -#else /* !UNIV_HOTBACKUP */ -# define page_create_write_log(frame,mtr,comp) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses a redo log record of creating a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_parse_create( -/*==============*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */ - ulint comp, /*!< in: nonzero=compact page format */ - buf_block_t* block, /*!< in: block or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ut_ad(ptr && end_ptr); - - /* The record is empty, except for the record initial part */ - - if (block) { - page_create(block, mtr, comp); - } - - return(ptr); -} - -/**********************************************************//** -The index page creation function. -@return pointer to the page */ -static -page_t* -page_create_low( -/*============*/ - buf_block_t* block, /*!< in: a buffer block where the - page is created */ - ulint comp) /*!< in: nonzero=compact page format */ -{ - page_dir_slot_t* slot; - mem_heap_t* heap; - dtuple_t* tuple; - dfield_t* field; - byte* heap_top; - rec_t* infimum_rec; - rec_t* supremum_rec; - page_t* page; - dict_index_t* index; - ulint* offsets; - - ut_ad(block); -#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA -# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA" -#endif -#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA -# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA" -#endif - - /* The infimum and supremum records use a dummy index. */ - if (UNIV_LIKELY(comp)) { - index = dict_ind_compact; - } else { - index = dict_ind_redundant; - } - - /* 1. INCREMENT MODIFY CLOCK */ - buf_block_modify_clock_inc(block); - - page = buf_block_get_frame(block); - - fil_page_set_type(page, FIL_PAGE_INDEX); - - heap = mem_heap_create(200); - - /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */ - - /* Create first a data tuple for infimum record */ - tuple = dtuple_create(heap, 1); - dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM); - field = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(field, "infimum", 8); - dtype_set(dfield_get_type(field), - DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8); - /* Set the corresponding physical record to its place in the page - record heap */ - - heap_top = page + PAGE_DATA; - - infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0); - - if (UNIV_LIKELY(comp)) { - ut_a(infimum_rec == page + PAGE_NEW_INFIMUM); - - rec_set_n_owned_new(infimum_rec, NULL, 1); - rec_set_heap_no_new(infimum_rec, 0); - } else { - ut_a(infimum_rec == page + PAGE_OLD_INFIMUM); - - rec_set_n_owned_old(infimum_rec, 1); - rec_set_heap_no_old(infimum_rec, 0); - } - - offsets = rec_get_offsets(infimum_rec, index, NULL, - ULINT_UNDEFINED, &heap); - - heap_top = rec_get_end(infimum_rec, offsets); - - /* Create then a tuple for supremum */ - - tuple = dtuple_create(heap, 1); - dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM); - field = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(field, "supremum", comp ? 8 : 9); - dtype_set(dfield_get_type(field), - DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9); - - supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0); - - if (UNIV_LIKELY(comp)) { - ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM); - - rec_set_n_owned_new(supremum_rec, NULL, 1); - rec_set_heap_no_new(supremum_rec, 1); - } else { - ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM); - - rec_set_n_owned_old(supremum_rec, 1); - rec_set_heap_no_old(supremum_rec, 1); - } - - offsets = rec_get_offsets(supremum_rec, index, offsets, - ULINT_UNDEFINED, &heap); - heap_top = rec_get_end(supremum_rec, offsets); - - ut_ad(heap_top == page - + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)); - - mem_heap_free(heap); - - /* 4. INITIALIZE THE PAGE */ - - page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2); - page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top); - page_header_set_field(page, NULL, PAGE_N_HEAP, comp - ? 0x8000 | PAGE_HEAP_NO_USER_LOW - : PAGE_HEAP_NO_USER_LOW); - page_header_set_ptr(page, NULL, PAGE_FREE, NULL); - page_header_set_field(page, NULL, PAGE_GARBAGE, 0); - page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL); - page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION); - page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0); - page_header_set_field(page, NULL, PAGE_N_RECS, 0); - page_set_max_trx_id(block, NULL, 0, NULL); - memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START - - page_offset(heap_top)); - - /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */ - - /* Set the slots to point to infimum and supremum. */ - - slot = page_dir_get_nth_slot(page, 0); - page_dir_slot_set_rec(slot, infimum_rec); - - slot = page_dir_get_nth_slot(page, 1); - page_dir_slot_set_rec(slot, supremum_rec); - - /* Set the next pointers in infimum and supremum */ - - if (UNIV_LIKELY(comp)) { - rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM); - rec_set_next_offs_new(supremum_rec, 0); - } else { - rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM); - rec_set_next_offs_old(supremum_rec, 0); - } - - return(page); -} - -/**********************************************************//** -Create an uncompressed B-tree index page. -@return pointer to the page */ -UNIV_INTERN -page_t* -page_create( -/*========*/ - buf_block_t* block, /*!< in: a buffer block where the - page is created */ - mtr_t* mtr, /*!< in: mini-transaction handle */ - ulint comp) /*!< in: nonzero=compact page format */ -{ - page_create_write_log(buf_block_get_frame(block), mtr, comp); - return(page_create_low(block, comp)); -} - -/**********************************************************//** -Create a compressed B-tree index page. -@return pointer to the page */ -UNIV_INTERN -page_t* -page_create_zip( -/*============*/ - buf_block_t* block, /*!< in/out: a buffer frame where the - page is created */ - dict_index_t* index, /*!< in: the index of the page */ - ulint level, /*!< in: the B-tree level of the page */ - trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - page_t* page; - page_zip_des_t* page_zip = buf_block_get_page_zip(block); - - ut_ad(block); - ut_ad(page_zip); - ut_ad(index); - ut_ad(dict_table_is_comp(index->table)); - - page = page_create_low(block, TRUE); - mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level); - mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id); - - if (!page_zip_compress(page_zip, page, index, - page_zip_level, mtr)) { - /* The compression of a newly created page - should always succeed. */ - ut_error; - } - - return(page); -} - -/**********************************************************//** -Empty a previously created B-tree index page. */ -UNIV_INTERN -void -page_create_empty( -/*==============*/ - buf_block_t* block, /*!< in/out: B-tree block */ - dict_index_t* index, /*!< in: the index of the page */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - trx_id_t max_trx_id = 0; - const page_t* page = buf_block_get_frame(block); - page_zip_des_t* page_zip= buf_block_get_page_zip(block); - - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - - if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) { - max_trx_id = page_get_max_trx_id(page); - ut_ad(max_trx_id); - } - - if (page_zip) { - page_create_zip(block, index, - page_header_get_field(page, PAGE_LEVEL), - max_trx_id, mtr); - } else { - page_create(block, mtr, page_is_comp(page)); - - if (max_trx_id) { - page_update_max_trx_id( - block, page_zip, max_trx_id, mtr); - } - } -} - -/*************************************************************//** -Differs from page_copy_rec_list_end, because this function does not -touch the lock table and max trx id on page or compress the page. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -UNIV_INTERN -void -page_copy_rec_list_end_no_locks( -/*============================*/ - buf_block_t* new_block, /*!< in: index page to copy to */ - buf_block_t* block, /*!< in: index page of rec */ - rec_t* rec, /*!< in: record on page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* new_page = buf_block_get_frame(new_block); - page_cur_t cur1; - rec_t* cur2; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - page_cur_position(rec, block, &cur1); - - if (page_cur_is_before_first(&cur1)) { - - page_cur_move_to_next(&cur1); - } - - btr_assert_not_corrupted(new_block, index); - ut_a(page_is_comp(new_page) == page_rec_is_comp(rec)); - ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint) - (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); - - cur2 = page_get_infimum_rec(buf_block_get_frame(new_block)); - - /* Copy records from the original page to the new page */ - - while (!page_cur_is_after_last(&cur1)) { - rec_t* cur1_rec = page_cur_get_rec(&cur1); - rec_t* ins_rec; - offsets = rec_get_offsets(cur1_rec, index, offsets, - ULINT_UNDEFINED, &heap); - ins_rec = page_cur_insert_rec_low(cur2, index, - cur1_rec, offsets, mtr); - if (UNIV_UNLIKELY(!ins_rec)) { - /* Track an assertion failure reported on the mailing - list on June 18th, 2003 */ - - buf_page_print(new_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(page_align(rec), 0, - BUF_PAGE_PRINT_NO_CRASH); - ut_print_timestamp(stderr); - - fprintf(stderr, - "InnoDB: rec offset %lu, cur1 offset %lu," - " cur2 offset %lu\n", - (ulong) page_offset(rec), - (ulong) page_offset(page_cur_get_rec(&cur1)), - (ulong) page_offset(cur2)); - ut_error; - } - - page_cur_move_to_next(&cur1); - cur2 = ins_rec; - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Copies records from page to new_page, from a given record onward, -including that record. Infimum and supremum records are not copied. -The records are copied to the start of the record list on new_page. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to the original successor of the infimum record on -new_page, or NULL on zip overflow (new_block will be decompressed) */ -UNIV_INTERN -rec_t* -page_copy_rec_list_end( -/*===================*/ - buf_block_t* new_block, /*!< in/out: index page to copy to */ - buf_block_t* block, /*!< in: index page containing rec */ - rec_t* rec, /*!< in: record on page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* new_page = buf_block_get_frame(new_block); - page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); - page_t* page = page_align(rec); - rec_t* ret = page_rec_get_next( - page_get_infimum_rec(new_page)); - ulint log_mode = 0; /* remove warning */ - -#ifdef UNIV_ZIP_DEBUG - if (new_page_zip) { - page_zip_des_t* page_zip = buf_block_get_page_zip(block); - ut_a(page_zip); - - /* Strict page_zip_validate() may fail here. - Furthermore, btr_compress() may set FIL_PAGE_PREV to - FIL_NULL on new_page while leaving it intact on - new_page_zip. So, we cannot validate new_page_zip. */ - ut_a(page_zip_validate_low(page_zip, page, index, TRUE)); - } -#endif /* UNIV_ZIP_DEBUG */ - ut_ad(buf_block_get_frame(block) == page); - ut_ad(page_is_leaf(page) == page_is_leaf(new_page)); - ut_ad(page_is_comp(page) == page_is_comp(new_page)); - /* Here, "ret" may be pointing to a user record or the - predefined supremum record. */ - - if (new_page_zip) { - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - } - - if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) { - page_copy_rec_list_end_to_created_page(new_page, rec, - index, mtr); - } else { - page_copy_rec_list_end_no_locks(new_block, block, rec, - index, mtr); - } - - /* Update PAGE_MAX_TRX_ID on the uncompressed page. - Modifications will be redo logged and copied to the compressed - page in page_zip_compress() or page_zip_reorganize() below. */ - if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) { - page_update_max_trx_id(new_block, NULL, - page_get_max_trx_id(page), mtr); - } - - if (new_page_zip) { - mtr_set_log_mode(mtr, log_mode); - - if (!page_zip_compress(new_page_zip, new_page, - index, page_zip_level, mtr)) { - /* Before trying to reorganize the page, - store the number of preceding records on the page. */ - ulint ret_pos - = page_rec_get_n_recs_before(ret); - /* Before copying, "ret" was the successor of - the predefined infimum record. It must still - have at least one predecessor (the predefined - infimum record, or a freshly copied record - that is smaller than "ret"). */ - ut_a(ret_pos > 0); - - if (!page_zip_reorganize(new_block, index, mtr)) { - - btr_blob_dbg_remove(new_page, index, - "copy_end_reorg_fail"); - if (!page_zip_decompress(new_page_zip, - new_page, FALSE)) { - ut_error; - } - ut_ad(page_validate(new_page, index)); - btr_blob_dbg_add(new_page, index, - "copy_end_reorg_fail"); - return(NULL); - } else { - /* The page was reorganized: - Seek to ret_pos. */ - ret = new_page + PAGE_NEW_INFIMUM; - - do { - ret = rec_get_next_ptr(ret, TRUE); - } while (--ret_pos); - } - } - } - - /* Update the lock table and possible hash index */ - - lock_move_rec_list_end(new_block, block, rec); - - btr_search_move_or_delete_hash_entries(new_block, block, index); - - return(ret); -} - -/*************************************************************//** -Copies records from page to new_page, up to the given record, -NOT including that record. Infimum and supremum records are not copied. -The records are copied to the end of the record list on new_page. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return pointer to the original predecessor of the supremum record on -new_page, or NULL on zip overflow (new_block will be decompressed) */ -UNIV_INTERN -rec_t* -page_copy_rec_list_start( -/*=====================*/ - buf_block_t* new_block, /*!< in/out: index page to copy to */ - buf_block_t* block, /*!< in: index page containing rec */ - rec_t* rec, /*!< in: record on page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* new_page = buf_block_get_frame(new_block); - page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); - page_cur_t cur1; - rec_t* cur2; - ulint log_mode = 0 /* remove warning */; - mem_heap_t* heap = NULL; - rec_t* ret - = page_rec_get_prev(page_get_supremum_rec(new_page)); - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - /* Here, "ret" may be pointing to a user record or the - predefined infimum record. */ - - if (page_rec_is_infimum(rec)) { - - return(ret); - } - - if (new_page_zip) { - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - } - - page_cur_set_before_first(block, &cur1); - page_cur_move_to_next(&cur1); - - cur2 = ret; - - /* Copy records from the original page to the new page */ - - while (page_cur_get_rec(&cur1) != rec) { - rec_t* cur1_rec = page_cur_get_rec(&cur1); - offsets = rec_get_offsets(cur1_rec, index, offsets, - ULINT_UNDEFINED, &heap); - cur2 = page_cur_insert_rec_low(cur2, index, - cur1_rec, offsets, mtr); - ut_a(cur2); - - page_cur_move_to_next(&cur1); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - /* Update PAGE_MAX_TRX_ID on the uncompressed page. - Modifications will be redo logged and copied to the compressed - page in page_zip_compress() or page_zip_reorganize() below. */ - if (dict_index_is_sec_or_ibuf(index) - && page_is_leaf(page_align(rec))) { - page_update_max_trx_id(new_block, NULL, - page_get_max_trx_id(page_align(rec)), - mtr); - } - - if (new_page_zip) { - mtr_set_log_mode(mtr, log_mode); - - DBUG_EXECUTE_IF("page_copy_rec_list_start_compress_fail", - goto zip_reorganize;); - - if (!page_zip_compress(new_page_zip, new_page, index, - page_zip_level, mtr)) { - - ulint ret_pos; -#ifndef DBUG_OFF -zip_reorganize: -#endif /* DBUG_OFF */ - /* Before trying to reorganize the page, - store the number of preceding records on the page. */ - ret_pos = page_rec_get_n_recs_before(ret); - /* Before copying, "ret" was the predecessor - of the predefined supremum record. If it was - the predefined infimum record, then it would - still be the infimum, and we would have - ret_pos == 0. */ - - if (UNIV_UNLIKELY - (!page_zip_reorganize(new_block, index, mtr))) { - - btr_blob_dbg_remove(new_page, index, - "copy_start_reorg_fail"); - if (UNIV_UNLIKELY - (!page_zip_decompress(new_page_zip, - new_page, FALSE))) { - ut_error; - } - ut_ad(page_validate(new_page, index)); - btr_blob_dbg_add(new_page, index, - "copy_start_reorg_fail"); - return(NULL); - } - - /* The page was reorganized: Seek to ret_pos. */ - ret = page_rec_get_nth(new_page, ret_pos); - } - } - - /* Update the lock table and possible hash index */ - - lock_move_rec_list_start(new_block, block, rec, ret); - - btr_search_move_or_delete_hash_entries(new_block, block, index); - - return(ret); -} - -/**********************************************************//** -Writes a log record of a record list end or start deletion. */ -UNIV_INLINE -void -page_delete_rec_list_write_log( -/*===========================*/ - rec_t* rec, /*!< in: record on page */ - dict_index_t* index, /*!< in: record descriptor */ - byte type, /*!< in: operation type: - MLOG_LIST_END_DELETE, ... */ - mtr_t* mtr) /*!< in: mtr */ -{ - byte* log_ptr; - ut_ad(type == MLOG_LIST_END_DELETE - || type == MLOG_LIST_START_DELETE - || type == MLOG_COMP_LIST_END_DELETE - || type == MLOG_COMP_LIST_START_DELETE); - - log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2); - if (log_ptr) { - /* Write the parameter as a 2-byte ulint */ - mach_write_to_2(log_ptr, page_offset(rec)); - mlog_close(mtr, log_ptr + 2); - } -} -#else /* !UNIV_HOTBACKUP */ -# define page_delete_rec_list_write_log(rec,index,type,mtr) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************//** -Parses a log record of a record list end or start deletion. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_parse_delete_rec_list( -/*=======================*/ - byte type, /*!< in: MLOG_LIST_END_DELETE, - MLOG_LIST_START_DELETE, - MLOG_COMP_LIST_END_DELETE or - MLOG_COMP_LIST_START_DELETE */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - buf_block_t* block, /*!< in/out: buffer block or NULL */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - page_t* page; - ulint offset; - - ut_ad(type == MLOG_LIST_END_DELETE - || type == MLOG_LIST_START_DELETE - || type == MLOG_COMP_LIST_END_DELETE - || type == MLOG_COMP_LIST_START_DELETE); - - /* Read the record offset as a 2-byte ulint */ - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - - if (!block) { - - return(ptr); - } - - page = buf_block_get_frame(block); - - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - if (type == MLOG_LIST_END_DELETE - || type == MLOG_COMP_LIST_END_DELETE) { - page_delete_rec_list_end(page + offset, block, index, - ULINT_UNDEFINED, ULINT_UNDEFINED, - mtr); - } else { - page_delete_rec_list_start(page + offset, block, index, mtr); - } - - return(ptr); -} - -/*************************************************************//** -Deletes records from a page from a given record onward, including that record. -The infimum and supremum records are not deleted. */ -UNIV_INTERN -void -page_delete_rec_list_end( -/*=====================*/ - rec_t* rec, /*!< in: pointer to record on page */ - buf_block_t* block, /*!< in: buffer block of the page */ - dict_index_t* index, /*!< in: record descriptor */ - ulint n_recs, /*!< in: number of records to delete, - or ULINT_UNDEFINED if not known */ - ulint size, /*!< in: the sum of the sizes of the - records in the end of the chain to - delete, or ULINT_UNDEFINED if not known */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_dir_slot_t*slot; - ulint slot_index; - rec_t* last_rec; - rec_t* prev_rec; - ulint n_owned; - page_zip_des_t* page_zip = buf_block_get_page_zip(block); - page_t* page = page_align(rec); - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE); - ut_ad(!page_zip || page_rec_is_comp(rec)); -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - if (page_rec_is_supremum(rec)) { - ut_ad(n_recs == 0 || n_recs == ULINT_UNDEFINED); - /* Nothing to do, there are no records bigger than the - page supremum. */ - return; - } - - if (recv_recovery_is_on()) { - /* If we are replaying a redo log record, we must - replay it exactly. Since MySQL 5.6.11, we should be - generating a redo log record for page creation if - the page would become empty. Thus, this branch should - only be executed when applying redo log that was - generated by an older version of MySQL. */ - } else if (page_rec_is_infimum(rec) - || n_recs == page_get_n_recs(page)) { -delete_all: - /* We are deleting all records. */ - page_create_empty(block, index, mtr); - return; - } else if (page_is_comp(page)) { - if (page_rec_get_next_low(page + PAGE_NEW_INFIMUM, 1) == rec) { - /* We are deleting everything from the first - user record onwards. */ - goto delete_all; - } - } else { - if (page_rec_get_next_low(page + PAGE_OLD_INFIMUM, 0) == rec) { - /* We are deleting everything from the first - user record onwards. */ - goto delete_all; - } - } - - /* Reset the last insert info in the page header and increment - the modify clock for the frame */ - - page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL); - - /* The page gets invalid for optimistic searches: increment the - frame modify clock */ - - buf_block_modify_clock_inc(block); - - page_delete_rec_list_write_log(rec, index, page_is_comp(page) - ? MLOG_COMP_LIST_END_DELETE - : MLOG_LIST_END_DELETE, mtr); - - if (page_zip) { - ulint log_mode; - - ut_a(page_is_comp(page)); - /* Individual deletes are not logged */ - - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - - do { - page_cur_t cur; - page_cur_position(rec, block, &cur); - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - rec = rec_get_next_ptr(rec, TRUE); -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - page_cur_delete_rec(&cur, index, offsets, mtr); - } while (page_offset(rec) != PAGE_NEW_SUPREMUM); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - /* Restore log mode */ - - mtr_set_log_mode(mtr, log_mode); - return; - } - - prev_rec = page_rec_get_prev(rec); - - last_rec = page_rec_get_prev(page_get_supremum_rec(page)); - - bool scrub = srv_immediate_scrub_data_uncompressed; - if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED) || - scrub) { - rec_t* rec2 = rec; - /* Calculate the sum of sizes and the number of records */ - size = 0; - n_recs = 0; - - do { - ulint s; - offsets = rec_get_offsets(rec2, index, offsets, - ULINT_UNDEFINED, &heap); - s = rec_offs_size(offsets); - ut_ad(rec2 - page + s - rec_offs_extra_size(offsets) - < UNIV_PAGE_SIZE); - ut_ad(size + s < UNIV_PAGE_SIZE); - size += s; - n_recs++; - - if (scrub) { - /* scrub record */ - uint recsize = rec_offs_data_size(offsets); - memset(rec2, 0, recsize); - } - - rec2 = page_rec_get_next(rec2); - } while (!page_rec_is_supremum(rec2)); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - ut_ad(size < UNIV_PAGE_SIZE); - - /* Update the page directory; there is no need to balance the number - of the records owned by the supremum record, as it is allowed to be - less than PAGE_DIR_SLOT_MIN_N_OWNED */ - - if (page_is_comp(page)) { - rec_t* rec2 = rec; - ulint count = 0; - - while (rec_get_n_owned_new(rec2) == 0) { - count++; - - rec2 = rec_get_next_ptr(rec2, TRUE); - } - - ut_ad(rec_get_n_owned_new(rec2) > count); - - n_owned = rec_get_n_owned_new(rec2) - count; - slot_index = page_dir_find_owner_slot(rec2); - ut_ad(slot_index > 0); - slot = page_dir_get_nth_slot(page, slot_index); - } else { - rec_t* rec2 = rec; - ulint count = 0; - - while (rec_get_n_owned_old(rec2) == 0) { - count++; - - rec2 = rec_get_next_ptr(rec2, FALSE); - } - - ut_ad(rec_get_n_owned_old(rec2) > count); - - n_owned = rec_get_n_owned_old(rec2) - count; - slot_index = page_dir_find_owner_slot(rec2); - ut_ad(slot_index > 0); - slot = page_dir_get_nth_slot(page, slot_index); - } - - page_dir_slot_set_rec(slot, page_get_supremum_rec(page)); - page_dir_slot_set_n_owned(slot, NULL, n_owned); - - page_dir_set_n_slots(page, NULL, slot_index + 1); - - /* Remove the record chain segment from the record chain */ - page_rec_set_next(prev_rec, page_get_supremum_rec(page)); - - btr_blob_dbg_op(page, rec, index, "delete_end", - btr_blob_dbg_remove_rec); - - /* Catenate the deleted chain segment to the page free list */ - - page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE)); - page_header_set_ptr(page, NULL, PAGE_FREE, rec); - - page_header_set_field(page, NULL, PAGE_GARBAGE, size - + page_header_get_field(page, PAGE_GARBAGE)); - - page_header_set_field(page, NULL, PAGE_N_RECS, - (ulint)(page_get_n_recs(page) - n_recs)); -} - -/*************************************************************//** -Deletes records from page, up to the given record, NOT including -that record. Infimum and supremum records are not deleted. */ -UNIV_INTERN -void -page_delete_rec_list_start( -/*=======================*/ - rec_t* rec, /*!< in: record on page */ - buf_block_t* block, /*!< in: buffer block of the page */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_cur_t cur1; - ulint log_mode; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - mem_heap_t* heap = NULL; - byte type; - - rec_offs_init(offsets_); - - ut_ad((ibool) !!page_rec_is_comp(rec) - == dict_table_is_comp(index->table)); -#ifdef UNIV_ZIP_DEBUG - { - page_zip_des_t* page_zip= buf_block_get_page_zip(block); - page_t* page = buf_block_get_frame(block); - - /* page_zip_validate() would detect a min_rec_mark mismatch - in btr_page_split_and_insert() - between btr_attach_half_pages() and insert_page = ... - when btr_page_get_split_rec_to_left() holds - (direction == FSP_DOWN). */ - ut_a(!page_zip - || page_zip_validate_low(page_zip, page, index, TRUE)); - } -#endif /* UNIV_ZIP_DEBUG */ - - if (page_rec_is_infimum(rec)) { - return; - } - - if (page_rec_is_supremum(rec)) { - /* We are deleting all records. */ - page_create_empty(block, index, mtr); - return; - } - - if (page_rec_is_comp(rec)) { - type = MLOG_COMP_LIST_START_DELETE; - } else { - type = MLOG_LIST_START_DELETE; - } - - page_delete_rec_list_write_log(rec, index, type, mtr); - - page_cur_set_before_first(block, &cur1); - page_cur_move_to_next(&cur1); - - /* Individual deletes are not logged */ - - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - - while (page_cur_get_rec(&cur1) != rec) { - offsets = rec_get_offsets(page_cur_get_rec(&cur1), index, - offsets, ULINT_UNDEFINED, &heap); - page_cur_delete_rec(&cur1, index, offsets, mtr); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - /* Restore log mode */ - - mtr_set_log_mode(mtr, log_mode); -} - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Moves record list end to another page. Moved records include -split_rec. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return TRUE on success; FALSE on compression failure (new_block will -be decompressed) */ -UNIV_INTERN -ibool -page_move_rec_list_end( -/*===================*/ - buf_block_t* new_block, /*!< in/out: index page where to move */ - buf_block_t* block, /*!< in: index page from where to move */ - rec_t* split_rec, /*!< in: first record to move */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* new_page = buf_block_get_frame(new_block); - ulint old_data_size; - ulint new_data_size; - ulint old_n_recs; - ulint new_n_recs; - - old_data_size = page_get_data_size(new_page); - old_n_recs = page_get_n_recs(new_page); -#ifdef UNIV_ZIP_DEBUG - { - page_zip_des_t* new_page_zip - = buf_block_get_page_zip(new_block); - page_zip_des_t* page_zip - = buf_block_get_page_zip(block); - ut_a(!new_page_zip == !page_zip); - ut_a(!new_page_zip - || page_zip_validate(new_page_zip, new_page, index)); - ut_a(!page_zip - || page_zip_validate(page_zip, page_align(split_rec), - index)); - } -#endif /* UNIV_ZIP_DEBUG */ - - if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block, - split_rec, index, mtr))) { - return(FALSE); - } - - new_data_size = page_get_data_size(new_page); - new_n_recs = page_get_n_recs(new_page); - - ut_ad(new_data_size >= old_data_size); - - page_delete_rec_list_end(split_rec, block, index, - new_n_recs - old_n_recs, - new_data_size - old_data_size, mtr); - - return(TRUE); -} - -/*************************************************************//** -Moves record list start to another page. Moved records do not include -split_rec. - -IMPORTANT: The caller will have to update IBUF_BITMAP_FREE -if new_block is a compressed leaf page in a secondary index. -This has to be done either within the same mini-transaction, -or by invoking ibuf_reset_free_bits() before mtr_commit(). - -@return TRUE on success; FALSE on compression failure */ -UNIV_INTERN -ibool -page_move_rec_list_start( -/*=====================*/ - buf_block_t* new_block, /*!< in/out: index page where to move */ - buf_block_t* block, /*!< in/out: page containing split_rec */ - rec_t* split_rec, /*!< in: first record not to move */ - dict_index_t* index, /*!< in: record descriptor */ - mtr_t* mtr) /*!< in: mtr */ -{ - if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block, - split_rec, index, mtr))) { - return(FALSE); - } - - page_delete_rec_list_start(split_rec, block, index, mtr); - - return(TRUE); -} -#endif /* !UNIV_HOTBACKUP */ - -/**************************************************************//** -Used to delete n slots from the directory. This function updates -also n_owned fields in the records, so that the first slot after -the deleted ones inherits the records of the deleted slots. */ -UNIV_INLINE -void -page_dir_delete_slot( -/*=================*/ - page_t* page, /*!< in/out: the index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint slot_no)/*!< in: slot to be deleted */ -{ - page_dir_slot_t* slot; - ulint n_owned; - ulint i; - ulint n_slots; - - ut_ad(!page_zip || page_is_comp(page)); - ut_ad(slot_no > 0); - ut_ad(slot_no + 1 < page_dir_get_n_slots(page)); - - n_slots = page_dir_get_n_slots(page); - - /* 1. Reset the n_owned fields of the slots to be - deleted */ - slot = page_dir_get_nth_slot(page, slot_no); - n_owned = page_dir_slot_get_n_owned(slot); - page_dir_slot_set_n_owned(slot, page_zip, 0); - - /* 2. Update the n_owned value of the first non-deleted slot */ - - slot = page_dir_get_nth_slot(page, slot_no + 1); - page_dir_slot_set_n_owned(slot, page_zip, - n_owned + page_dir_slot_get_n_owned(slot)); - - /* 3. Destroy the slot by copying slots */ - for (i = slot_no + 1; i < n_slots; i++) { - rec_t* rec = (rec_t*) - page_dir_slot_get_rec(page_dir_get_nth_slot(page, i)); - page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec); - } - - /* 4. Zero out the last slot, which will be removed */ - mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0); - - /* 5. Update the page header */ - page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1); -} - -/**************************************************************//** -Used to add n slots to the directory. Does not set the record pointers -in the added slots or update n_owned values: this is the responsibility -of the caller. */ -UNIV_INLINE -void -page_dir_add_slot( -/*==============*/ - page_t* page, /*!< in/out: the index page */ - page_zip_des_t* page_zip,/*!< in/out: comprssed page, or NULL */ - ulint start) /*!< in: the slot above which the new slots - are added */ -{ - page_dir_slot_t* slot; - ulint n_slots; - - n_slots = page_dir_get_n_slots(page); - - ut_ad(start < n_slots - 1); - - /* Update the page header */ - page_dir_set_n_slots(page, page_zip, n_slots + 1); - - /* Move slots up */ - slot = page_dir_get_nth_slot(page, n_slots); - memmove(slot, slot + PAGE_DIR_SLOT_SIZE, - (n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE); -} - -/****************************************************************//** -Splits a directory slot which owns too many records. */ -UNIV_INTERN -void -page_dir_split_slot( -/*================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page whose - uncompressed part will be written, or NULL */ - ulint slot_no)/*!< in: the directory slot */ -{ - rec_t* rec; - page_dir_slot_t* new_slot; - page_dir_slot_t* prev_slot; - page_dir_slot_t* slot; - ulint i; - ulint n_owned; - - ut_ad(!page_zip || page_is_comp(page)); - ut_ad(slot_no > 0); - - slot = page_dir_get_nth_slot(page, slot_no); - - n_owned = page_dir_slot_get_n_owned(slot); - ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1); - - /* 1. We loop to find a record approximately in the middle of the - records owned by the slot. */ - - prev_slot = page_dir_get_nth_slot(page, slot_no - 1); - rec = (rec_t*) page_dir_slot_get_rec(prev_slot); - - for (i = 0; i < n_owned / 2; i++) { - rec = page_rec_get_next(rec); - } - - ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED); - - /* 2. We add one directory slot immediately below the slot to be - split. */ - - page_dir_add_slot(page, page_zip, slot_no - 1); - - /* The added slot is now number slot_no, and the old slot is - now number slot_no + 1 */ - - new_slot = page_dir_get_nth_slot(page, slot_no); - slot = page_dir_get_nth_slot(page, slot_no + 1); - - /* 3. We store the appropriate values to the new slot. */ - - page_dir_slot_set_rec(new_slot, rec); - page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2); - - /* 4. Finally, we update the number of records field of the - original slot */ - - page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2)); -} - -/*************************************************************//** -Tries to balance the given directory slot with too few records with the upper -neighbor, so that there are at least the minimum number of records owned by -the slot; this may result in the merging of two slots. */ -UNIV_INTERN -void -page_dir_balance_slot( -/*==================*/ - page_t* page, /*!< in/out: index page */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - ulint slot_no)/*!< in: the directory slot */ -{ - page_dir_slot_t* slot; - page_dir_slot_t* up_slot; - ulint n_owned; - ulint up_n_owned; - rec_t* old_rec; - rec_t* new_rec; - - ut_ad(!page_zip || page_is_comp(page)); - ut_ad(slot_no > 0); - - slot = page_dir_get_nth_slot(page, slot_no); - - /* The last directory slot cannot be balanced with the upper - neighbor, as there is none. */ - - if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) { - - return; - } - - up_slot = page_dir_get_nth_slot(page, slot_no + 1); - - n_owned = page_dir_slot_get_n_owned(slot); - up_n_owned = page_dir_slot_get_n_owned(up_slot); - - ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1); - - /* If the upper slot has the minimum value of n_owned, we will merge - the two slots, therefore we assert: */ - ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED); - - if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) { - - /* In this case we can just transfer one record owned - by the upper slot to the property of the lower slot */ - old_rec = (rec_t*) page_dir_slot_get_rec(slot); - - if (page_is_comp(page)) { - new_rec = rec_get_next_ptr(old_rec, TRUE); - - rec_set_n_owned_new(old_rec, page_zip, 0); - rec_set_n_owned_new(new_rec, page_zip, n_owned + 1); - } else { - new_rec = rec_get_next_ptr(old_rec, FALSE); - - rec_set_n_owned_old(old_rec, 0); - rec_set_n_owned_old(new_rec, n_owned + 1); - } - - page_dir_slot_set_rec(slot, new_rec); - - page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1); - } else { - /* In this case we may merge the two slots */ - page_dir_delete_slot(page, page_zip, slot_no); - } -} - -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INTERN -const rec_t* -page_rec_get_nth_const( -/*===================*/ - const page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ -{ - const page_dir_slot_t* slot; - ulint i; - ulint n_owned; - const rec_t* rec; - - if (nth == 0) { - return(page_get_infimum_rec(page)); - } - - ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); - - for (i = 0;; i++) { - - slot = page_dir_get_nth_slot(page, i); - n_owned = page_dir_slot_get_n_owned(slot); - - if (n_owned > nth) { - break; - } else { - nth -= n_owned; - } - } - - ut_ad(i > 0); - slot = page_dir_get_nth_slot(page, i - 1); - rec = page_dir_slot_get_rec(slot); - - if (page_is_comp(page)) { - do { - rec = page_rec_get_next_low(rec, TRUE); - ut_ad(rec); - } while (nth--); - } else { - do { - rec = page_rec_get_next_low(rec, FALSE); - ut_ad(rec); - } while (nth--); - } - - return(rec); -} - -/***************************************************************//** -Returns the number of records before the given record in chain. -The number includes infimum and supremum records. -@return number of records */ -UNIV_INTERN -ulint -page_rec_get_n_recs_before( -/*=======================*/ - const rec_t* rec) /*!< in: the physical record */ -{ - const page_dir_slot_t* slot; - const rec_t* slot_rec; - const page_t* page; - ulint i; - lint n = 0; - - ut_ad(page_rec_check(rec)); - - page = page_align(rec); - if (page_is_comp(page)) { - while (rec_get_n_owned_new(rec) == 0) { - - rec = rec_get_next_ptr_const(rec, TRUE); - n--; - } - - for (i = 0; ; i++) { - slot = page_dir_get_nth_slot(page, i); - slot_rec = page_dir_slot_get_rec(slot); - - n += rec_get_n_owned_new(slot_rec); - - if (rec == slot_rec) { - - break; - } - } - } else { - while (rec_get_n_owned_old(rec) == 0) { - - rec = rec_get_next_ptr_const(rec, FALSE); - n--; - } - - for (i = 0; ; i++) { - slot = page_dir_get_nth_slot(page, i); - slot_rec = page_dir_slot_get_rec(slot); - - n += rec_get_n_owned_old(slot_rec); - - if (rec == slot_rec) { - - break; - } - } - } - - n--; - - ut_ad(n >= 0); - ut_ad((ulong) n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); - - return((ulint) n); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Prints record contents including the data relevant only in -the index page context. */ -UNIV_INTERN -void -page_rec_print( -/*===========*/ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: record descriptor */ -{ - ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); - rec_print_new(stderr, rec, offsets); - if (page_rec_is_comp(rec)) { - fprintf(stderr, - " n_owned: %lu; heap_no: %lu; next rec: %lu\n", - (ulong) rec_get_n_owned_new(rec), - (ulong) rec_get_heap_no_new(rec), - (ulong) rec_get_next_offs(rec, TRUE)); - } else { - fprintf(stderr, - " n_owned: %lu; heap_no: %lu; next rec: %lu\n", - (ulong) rec_get_n_owned_old(rec), - (ulong) rec_get_heap_no_old(rec), - (ulong) rec_get_next_offs(rec, FALSE)); - } - - page_rec_check(rec); - rec_validate(rec, offsets); -} - -# ifdef UNIV_BTR_PRINT -/***************************************************************//** -This is used to print the contents of the directory for -debugging purposes. */ -UNIV_INTERN -void -page_dir_print( -/*===========*/ - page_t* page, /*!< in: index page */ - ulint pr_n) /*!< in: print n first and n last entries */ -{ - ulint n; - ulint i; - page_dir_slot_t* slot; - - n = page_dir_get_n_slots(page); - - fprintf(stderr, "--------------------------------\n" - "PAGE DIRECTORY\n" - "Page address %p\n" - "Directory stack top at offs: %lu; number of slots: %lu\n", - page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)), - (ulong) n); - for (i = 0; i < n; i++) { - slot = page_dir_get_nth_slot(page, i); - if ((i == pr_n) && (i < n - pr_n)) { - fputs(" ... \n", stderr); - } - if ((i < pr_n) || (i >= n - pr_n)) { - fprintf(stderr, - "Contents of slot: %lu: n_owned: %lu," - " rec offs: %lu\n", - (ulong) i, - (ulong) page_dir_slot_get_n_owned(slot), - (ulong) - page_offset(page_dir_slot_get_rec(slot))); - } - } - fprintf(stderr, "Total of %lu records\n" - "--------------------------------\n", - (ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page))); -} - -/***************************************************************//** -This is used to print the contents of the page record list for -debugging purposes. */ -UNIV_INTERN -void -page_print_list( -/*============*/ - buf_block_t* block, /*!< in: index page */ - dict_index_t* index, /*!< in: dictionary index of the page */ - ulint pr_n) /*!< in: print n first and n last entries */ -{ - page_t* page = block->frame; - page_cur_t cur; - ulint count; - ulint n_recs; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table)); - - fprintf(stderr, - "--------------------------------\n" - "PAGE RECORD LIST\n" - "Page address %p\n", page); - - n_recs = page_get_n_recs(page); - - page_cur_set_before_first(block, &cur); - count = 0; - for (;;) { - offsets = rec_get_offsets(cur.rec, index, offsets, - ULINT_UNDEFINED, &heap); - page_rec_print(cur.rec, offsets); - - if (count == pr_n) { - break; - } - if (page_cur_is_after_last(&cur)) { - break; - } - page_cur_move_to_next(&cur); - count++; - } - - if (n_recs > 2 * pr_n) { - fputs(" ... \n", stderr); - } - - while (!page_cur_is_after_last(&cur)) { - page_cur_move_to_next(&cur); - - if (count + pr_n >= n_recs) { - offsets = rec_get_offsets(cur.rec, index, offsets, - ULINT_UNDEFINED, &heap); - page_rec_print(cur.rec, offsets); - } - count++; - } - - fprintf(stderr, - "Total of %lu records \n" - "--------------------------------\n", - (ulong) (count + 1)); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/***************************************************************//** -Prints the info in a page header. */ -UNIV_INTERN -void -page_header_print( -/*==============*/ - const page_t* page) -{ - fprintf(stderr, - "--------------------------------\n" - "PAGE HEADER INFO\n" - "Page address %p, n records %lu (%s)\n" - "n dir slots %lu, heap top %lu\n" - "Page n heap %lu, free %lu, garbage %lu\n" - "Page last insert %lu, direction %lu, n direction %lu\n", - page, (ulong) page_header_get_field(page, PAGE_N_RECS), - page_is_comp(page) ? "compact format" : "original format", - (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS), - (ulong) page_header_get_field(page, PAGE_HEAP_TOP), - (ulong) page_dir_get_n_heap(page), - (ulong) page_header_get_field(page, PAGE_FREE), - (ulong) page_header_get_field(page, PAGE_GARBAGE), - (ulong) page_header_get_field(page, PAGE_LAST_INSERT), - (ulong) page_header_get_field(page, PAGE_DIRECTION), - (ulong) page_header_get_field(page, PAGE_N_DIRECTION)); -} - -/***************************************************************//** -This is used to print the contents of the page for -debugging purposes. */ -UNIV_INTERN -void -page_print( -/*=======*/ - buf_block_t* block, /*!< in: index page */ - dict_index_t* index, /*!< in: dictionary index of the page */ - ulint dn, /*!< in: print dn first and last entries - in directory */ - ulint rn) /*!< in: print rn first and last records - in directory */ -{ - page_t* page = block->frame; - - page_header_print(page); - page_dir_print(page, dn); - page_print_list(block, index, rn); -} -# endif /* UNIV_BTR_PRINT */ -#endif /* !UNIV_HOTBACKUP */ - -/***************************************************************//** -The following is used to validate a record on a page. This function -differs from rec_validate as it can also check the n_owned field and -the heap_no field. -@return TRUE if ok */ -UNIV_INTERN -ibool -page_rec_validate( -/*==============*/ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint n_owned; - ulint heap_no; - const page_t* page; - - page = page_align(rec); - ut_a(!page_is_comp(page) == !rec_offs_comp(offsets)); - - page_rec_check(rec); - rec_validate(rec, offsets); - - if (page_rec_is_comp(rec)) { - n_owned = rec_get_n_owned_new(rec); - heap_no = rec_get_heap_no_new(rec); - } else { - n_owned = rec_get_n_owned_old(rec); - heap_no = rec_get_heap_no_old(rec); - } - - if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) { - fprintf(stderr, - "InnoDB: Dir slot of rec %lu, n owned too big %lu\n", - (ulong) page_offset(rec), (ulong) n_owned); - return(FALSE); - } - - if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) { - fprintf(stderr, - "InnoDB: Heap no of rec %lu too big %lu %lu\n", - (ulong) page_offset(rec), (ulong) heap_no, - (ulong) page_dir_get_n_heap(page)); - return(FALSE); - } - - return(TRUE); -} - -#ifndef UNIV_HOTBACKUP -/***************************************************************//** -Checks that the first directory slot points to the infimum record and -the last to the supremum. This function is intended to track if the -bug fixed in 4.0.14 has caused corruption to users' databases. */ -UNIV_INTERN -void -page_check_dir( -/*===========*/ - const page_t* page) /*!< in: index page */ -{ - ulint n_slots; - ulint infimum_offs; - ulint supremum_offs; - - n_slots = page_dir_get_n_slots(page); - infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0)); - supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page, - n_slots - 1)); - - if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) { - - fprintf(stderr, - "InnoDB: Page directory corruption:" - " infimum not pointed to\n"); - buf_page_print(page, 0, 0); - } - - if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) { - - fprintf(stderr, - "InnoDB: Page directory corruption:" - " supremum not pointed to\n"); - buf_page_print(page, 0, 0); - } -} -#endif /* !UNIV_HOTBACKUP */ - -/***************************************************************//** -This function checks the consistency of an index page when we do not -know the index. This is also resilient so that this should never crash -even if the page is total garbage. -@return TRUE if ok */ -UNIV_INTERN -ibool -page_simple_validate_old( -/*=====================*/ - const page_t* page) /*!< in: index page in ROW_FORMAT=REDUNDANT */ -{ - const page_dir_slot_t* slot; - ulint slot_no; - ulint n_slots; - const rec_t* rec; - const byte* rec_heap_top; - ulint count; - ulint own_count; - ibool ret = FALSE; - - ut_a(!page_is_comp(page)); - - /* Check first that the record heap and the directory do not - overlap. */ - - n_slots = page_dir_get_n_slots(page); - - if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) { - fprintf(stderr, - "InnoDB: Nonsensical number %lu of page dir slots\n", - (ulong) n_slots); - - goto func_exit; - } - - rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP); - - if (UNIV_UNLIKELY(rec_heap_top - > page_dir_get_nth_slot(page, n_slots - 1))) { - - fprintf(stderr, - "InnoDB: Record heap and dir overlap on a page," - " heap top %lu, dir %lu\n", - (ulong) page_header_get_field(page, PAGE_HEAP_TOP), - (ulong) - page_offset(page_dir_get_nth_slot(page, n_slots - 1))); - - goto func_exit; - } - - /* Validate the record list in a loop checking also that it is - consistent with the page record directory. */ - - count = 0; - own_count = 1; - slot_no = 0; - slot = page_dir_get_nth_slot(page, slot_no); - - rec = page_get_infimum_rec(page); - - for (;;) { - if (UNIV_UNLIKELY(rec > rec_heap_top)) { - fprintf(stderr, - "InnoDB: Record %lu is above" - " rec heap top %lu\n", - (ulong)(rec - page), - (ulong)(rec_heap_top - page)); - - goto func_exit; - } - - if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) { - /* This is a record pointed to by a dir slot */ - if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) - != own_count)) { - - fprintf(stderr, - "InnoDB: Wrong owned count %lu, %lu," - " rec %lu\n", - (ulong) rec_get_n_owned_old(rec), - (ulong) own_count, - (ulong)(rec - page)); - - goto func_exit; - } - - if (UNIV_UNLIKELY - (page_dir_slot_get_rec(slot) != rec)) { - fprintf(stderr, - "InnoDB: Dir slot does not point" - " to right rec %lu\n", - (ulong)(rec - page)); - - goto func_exit; - } - - own_count = 0; - - if (!page_rec_is_supremum(rec)) { - slot_no++; - slot = page_dir_get_nth_slot(page, slot_no); - } - } - - if (page_rec_is_supremum(rec)) { - - break; - } - - if (UNIV_UNLIKELY - (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA - || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Next record offset" - " nonsensical %lu for rec %lu\n", - (ulong) rec_get_next_offs(rec, FALSE), - (ulong) (rec - page)); - - goto func_exit; - } - - count++; - - if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Page record list appears" - " to be circular %lu\n", - (ulong) count); - goto func_exit; - } - - rec = page_rec_get_next_const(rec); - own_count++; - } - - if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) { - fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n"); - - goto func_exit; - } - - if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { - fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n", - (ulong) slot_no, (ulong) (n_slots - 1)); - goto func_exit; - } - - if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW - != count + 1)) { - fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW, - (ulong) (count + 1)); - - goto func_exit; - } - - /* Check then the free list */ - rec = page_header_get_ptr(page, PAGE_FREE); - - while (rec != NULL) { - if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA - || rec >= page + UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Free list record has" - " a nonsensical offset %lu\n", - (ulong) (rec - page)); - - goto func_exit; - } - - if (UNIV_UNLIKELY(rec > rec_heap_top)) { - fprintf(stderr, - "InnoDB: Free list record %lu" - " is above rec heap top %lu\n", - (ulong) (rec - page), - (ulong) (rec_heap_top - page)); - - goto func_exit; - } - - count++; - - if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Page free list appears" - " to be circular %lu\n", - (ulong) count); - goto func_exit; - } - - rec = page_rec_get_next_const(rec); - } - - if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { - - fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n", - (ulong) page_dir_get_n_heap(page), - (ulong) (count + 1)); - - goto func_exit; - } - - ret = TRUE; - -func_exit: - return(ret); -} - -/***************************************************************//** -This function checks the consistency of an index page when we do not -know the index. This is also resilient so that this should never crash -even if the page is total garbage. -@return TRUE if ok */ -UNIV_INTERN -ibool -page_simple_validate_new( -/*=====================*/ - const page_t* page) /*!< in: index page in ROW_FORMAT!=REDUNDANT */ -{ - const page_dir_slot_t* slot; - ulint slot_no; - ulint n_slots; - const rec_t* rec; - const byte* rec_heap_top; - ulint count; - ulint own_count; - ibool ret = FALSE; - - ut_a(page_is_comp(page)); - - /* Check first that the record heap and the directory do not - overlap. */ - - n_slots = page_dir_get_n_slots(page); - - if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) { - fprintf(stderr, - "InnoDB: Nonsensical number %lu" - " of page dir slots\n", (ulong) n_slots); - - goto func_exit; - } - - rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP); - - if (UNIV_UNLIKELY(rec_heap_top - > page_dir_get_nth_slot(page, n_slots - 1))) { - - fprintf(stderr, - "InnoDB: Record heap and dir overlap on a page," - " heap top %lu, dir %lu\n", - (ulong) page_header_get_field(page, PAGE_HEAP_TOP), - (ulong) - page_offset(page_dir_get_nth_slot(page, n_slots - 1))); - - goto func_exit; - } - - /* Validate the record list in a loop checking also that it is - consistent with the page record directory. */ - - count = 0; - own_count = 1; - slot_no = 0; - slot = page_dir_get_nth_slot(page, slot_no); - - rec = page_get_infimum_rec(page); - - for (;;) { - if (UNIV_UNLIKELY(rec > rec_heap_top)) { - fprintf(stderr, - "InnoDB: Record %lu is above rec" - " heap top %lu\n", - (ulong) page_offset(rec), - (ulong) page_offset(rec_heap_top)); - - goto func_exit; - } - - if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) { - /* This is a record pointed to by a dir slot */ - if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) - != own_count)) { - - fprintf(stderr, - "InnoDB: Wrong owned count %lu, %lu," - " rec %lu\n", - (ulong) rec_get_n_owned_new(rec), - (ulong) own_count, - (ulong) page_offset(rec)); - - goto func_exit; - } - - if (UNIV_UNLIKELY - (page_dir_slot_get_rec(slot) != rec)) { - fprintf(stderr, - "InnoDB: Dir slot does not point" - " to right rec %lu\n", - (ulong) page_offset(rec)); - - goto func_exit; - } - - own_count = 0; - - if (!page_rec_is_supremum(rec)) { - slot_no++; - slot = page_dir_get_nth_slot(page, slot_no); - } - } - - if (page_rec_is_supremum(rec)) { - - break; - } - - if (UNIV_UNLIKELY - (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA - || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Next record offset nonsensical %lu" - " for rec %lu\n", - (ulong) rec_get_next_offs(rec, TRUE), - (ulong) page_offset(rec)); - - goto func_exit; - } - - count++; - - if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Page record list appears" - " to be circular %lu\n", - (ulong) count); - goto func_exit; - } - - rec = page_rec_get_next_const(rec); - own_count++; - } - - if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) { - fprintf(stderr, "InnoDB: n owned is zero" - " in a supremum rec\n"); - - goto func_exit; - } - - if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { - fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n", - (ulong) slot_no, (ulong) (n_slots - 1)); - goto func_exit; - } - - if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW - != count + 1)) { - fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW, - (ulong) (count + 1)); - - goto func_exit; - } - - /* Check then the free list */ - rec = page_header_get_ptr(page, PAGE_FREE); - - while (rec != NULL) { - if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA - || rec >= page + UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Free list record has" - " a nonsensical offset %lu\n", - (ulong) page_offset(rec)); - - goto func_exit; - } - - if (UNIV_UNLIKELY(rec > rec_heap_top)) { - fprintf(stderr, - "InnoDB: Free list record %lu" - " is above rec heap top %lu\n", - (ulong) page_offset(rec), - (ulong) page_offset(rec_heap_top)); - - goto func_exit; - } - - count++; - - if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Page free list appears" - " to be circular %lu\n", - (ulong) count); - goto func_exit; - } - - rec = page_rec_get_next_const(rec); - } - - if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { - - fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n", - (ulong) page_dir_get_n_heap(page), - (ulong) (count + 1)); - - goto func_exit; - } - - ret = TRUE; - -func_exit: - return(ret); -} - -/***************************************************************//** -This function checks the consistency of an index page. -@return TRUE if ok */ -UNIV_INTERN -ibool -page_validate( -/*==========*/ - const page_t* page, /*!< in: index page */ - dict_index_t* index) /*!< in: data dictionary index containing - the page record type definition */ -{ - const page_dir_slot_t* slot; - mem_heap_t* heap; - byte* buf; - ulint count; - ulint own_count; - ulint rec_own_count; - ulint slot_no; - ulint data_size; - const rec_t* rec; - const rec_t* old_rec = NULL; - ulint offs; - ulint n_slots; - ibool ret = FALSE; - ulint i; - ulint* offsets = NULL; - ulint* old_offsets = NULL; - - if (UNIV_UNLIKELY((ibool) !!page_is_comp(page) - != dict_table_is_comp(index->table))) { - fputs("InnoDB: 'compact format' flag mismatch\n", stderr); - goto func_exit2; - } - if (page_is_comp(page)) { - if (UNIV_UNLIKELY(!page_simple_validate_new(page))) { - goto func_exit2; - } - } else { - if (UNIV_UNLIKELY(!page_simple_validate_old(page))) { - goto func_exit2; - } - } - - if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page) - && !page_is_empty(page)) { - trx_id_t max_trx_id = page_get_max_trx_id(page); - trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id(); - - if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) { - ib_logf(IB_LOG_LEVEL_ERROR, - "PAGE_MAX_TRX_ID out of bounds: " - TRX_ID_FMT ", " TRX_ID_FMT, - max_trx_id, sys_max_trx_id); - goto func_exit2; - } - } - - heap = mem_heap_create(UNIV_PAGE_SIZE + 200); - - /* The following buffer is used to check that the - records in the page record heap do not overlap */ - - buf = static_cast<byte*>(mem_heap_zalloc(heap, UNIV_PAGE_SIZE)); - - /* Check first that the record heap and the directory do not - overlap. */ - - n_slots = page_dir_get_n_slots(page); - - if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP) - <= page_dir_get_nth_slot(page, n_slots - 1)))) { - - fprintf(stderr, - "InnoDB: Record heap and dir overlap" - " on space %lu page %lu index %s, %p, %p\n", - (ulong) page_get_space_id(page), - (ulong) page_get_page_no(page), index->name, - page_header_get_ptr(page, PAGE_HEAP_TOP), - page_dir_get_nth_slot(page, n_slots - 1)); - - goto func_exit; - } - - /* Validate the record list in a loop checking also that - it is consistent with the directory. */ - count = 0; - data_size = 0; - own_count = 1; - slot_no = 0; - slot = page_dir_get_nth_slot(page, slot_no); - - rec = page_get_infimum_rec(page); - - for (;;) { - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (page_is_comp(page) && page_rec_is_user_rec(rec) - && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec) - == page_is_leaf(page))) { - fputs("InnoDB: node_ptr flag mismatch\n", stderr); - goto func_exit; - } - - if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) { - goto func_exit; - } - -#ifndef UNIV_HOTBACKUP - /* Check that the records are in the ascending order */ - if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW) - && !page_rec_is_supremum(rec)) { - if (UNIV_UNLIKELY - (1 != cmp_rec_rec(rec, old_rec, - offsets, old_offsets, index))) { - fprintf(stderr, - "InnoDB: Records in wrong order" - " on space %lu page %lu index %s\n", - (ulong) page_get_space_id(page), - (ulong) page_get_page_no(page), - index->name); - fputs("\nInnoDB: previous record ", stderr); - rec_print_new(stderr, old_rec, old_offsets); - fputs("\nInnoDB: record ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - - goto func_exit; - } - } -#endif /* !UNIV_HOTBACKUP */ - - if (page_rec_is_user_rec(rec)) { - - data_size += rec_offs_size(offsets); - } - - offs = page_offset(rec_get_start(rec, offsets)); - i = rec_offs_size(offsets); - if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) { - fputs("InnoDB: record offset out of bounds\n", stderr); - goto func_exit; - } - - while (i--) { - if (UNIV_UNLIKELY(buf[offs + i])) { - /* No other record may overlap this */ - - fputs("InnoDB: Record overlaps another\n", - stderr); - goto func_exit; - } - - buf[offs + i] = 1; - } - - if (page_is_comp(page)) { - rec_own_count = rec_get_n_owned_new(rec); - } else { - rec_own_count = rec_get_n_owned_old(rec); - } - - if (UNIV_UNLIKELY(rec_own_count)) { - /* This is a record pointed to by a dir slot */ - if (UNIV_UNLIKELY(rec_own_count != own_count)) { - fprintf(stderr, - "InnoDB: Wrong owned count %lu, %lu\n", - (ulong) rec_own_count, - (ulong) own_count); - goto func_exit; - } - - if (page_dir_slot_get_rec(slot) != rec) { - fputs("InnoDB: Dir slot does not" - " point to right rec\n", - stderr); - goto func_exit; - } - - page_dir_slot_check(slot); - - own_count = 0; - if (!page_rec_is_supremum(rec)) { - slot_no++; - slot = page_dir_get_nth_slot(page, slot_no); - } - } - - if (page_rec_is_supremum(rec)) { - break; - } - - count++; - own_count++; - old_rec = rec; - rec = page_rec_get_next_const(rec); - - /* set old_offsets to offsets; recycle offsets */ - { - ulint* offs = old_offsets; - old_offsets = offsets; - offsets = offs; - } - } - - if (page_is_comp(page)) { - if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) { - - goto n_owned_zero; - } - } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) { -n_owned_zero: - fputs("InnoDB: n owned is zero\n", stderr); - goto func_exit; - } - - if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { - fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n", - (ulong) slot_no, (ulong) (n_slots - 1)); - goto func_exit; - } - - if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW - != count + 1)) { - fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW, - (ulong) (count + 1)); - goto func_exit; - } - - if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) { - fprintf(stderr, - "InnoDB: Summed data size %lu, returned by func %lu\n", - (ulong) data_size, (ulong) page_get_data_size(page)); - goto func_exit; - } - - /* Check then the free list */ - rec = page_header_get_ptr(page, PAGE_FREE); - - while (rec != NULL) { - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) { - - goto func_exit; - } - - count++; - offs = page_offset(rec_get_start(rec, offsets)); - i = rec_offs_size(offsets); - if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) { - fputs("InnoDB: record offset out of bounds\n", stderr); - goto func_exit; - } - - while (i--) { - - if (UNIV_UNLIKELY(buf[offs + i])) { - fputs("InnoDB: Record overlaps another" - " in free list\n", stderr); - goto func_exit; - } - - buf[offs + i] = 1; - } - - rec = page_rec_get_next_const(rec); - } - - if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { - fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n", - (ulong) page_dir_get_n_heap(page), - (ulong) count + 1); - goto func_exit; - } - - ret = TRUE; - -func_exit: - mem_heap_free(heap); - - if (UNIV_UNLIKELY(ret == FALSE)) { -func_exit2: - fprintf(stderr, - "InnoDB: Apparent corruption" - " in space %lu page %lu index %s\n", - (ulong) page_get_space_id(page), - (ulong) page_get_page_no(page), - index->name); - buf_page_print(page, 0, 0); - } - - return(ret); -} - -#ifndef UNIV_HOTBACKUP -/***************************************************************//** -Looks in the page record list for a record with the given heap number. -@return record, NULL if not found */ -UNIV_INTERN -const rec_t* -page_find_rec_with_heap_no( -/*=======================*/ - const page_t* page, /*!< in: index page */ - ulint heap_no)/*!< in: heap number */ -{ - const rec_t* rec; - - if (page_is_comp(page)) { - rec = page + PAGE_NEW_INFIMUM; - - for(;;) { - ulint rec_heap_no = rec_get_heap_no_new(rec); - - if (rec_heap_no == heap_no) { - - return(rec); - } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) { - - return(NULL); - } - - rec = page + rec_get_next_offs(rec, TRUE); - } - } else { - rec = page + PAGE_OLD_INFIMUM; - - for (;;) { - ulint rec_heap_no = rec_get_heap_no_old(rec); - - if (rec_heap_no == heap_no) { - - return(rec); - } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) { - - return(NULL); - } - - rec = page + rec_get_next_offs(rec, FALSE); - } - } -} -#endif /* !UNIV_HOTBACKUP */ - -/*******************************************************//** -Removes the record from a leaf page. This function does not log -any changes. It is used by the IMPORT tablespace functions. -The cursor is moved to the next record after the deleted one. -@return true if success, i.e., the page did not become too empty */ -UNIV_INTERN -bool -page_delete_rec( -/*============*/ - const dict_index_t* index, /*!< in: The index that the record - belongs to */ - page_cur_t* pcur, /*!< in/out: page cursor on record - to delete */ - page_zip_des_t* page_zip,/*!< in: compressed page descriptor */ - const ulint* offsets)/*!< in: offsets for record */ -{ - bool no_compress_needed; - buf_block_t* block = pcur->block; - page_t* page = buf_block_get_frame(block); - - ut_ad(page_is_leaf(page)); - - if (!rec_offs_any_extern(offsets) - && ((page_get_data_size(page) - rec_offs_size(offsets) - < BTR_CUR_PAGE_COMPRESS_LIMIT) - || (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL - && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL) - || (page_get_n_recs(page) < 2))) { - - ulint root_page_no = dict_index_get_page(index); - - /* The page fillfactor will drop below a predefined - minimum value, OR the level in the B-tree contains just - one page, OR the page will become empty: we recommend - compression if this is not the root page. */ - - no_compress_needed = page_get_page_no(page) == root_page_no; - } else { - no_compress_needed = true; - } - - if (no_compress_needed) { -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - page_cur_delete_rec(pcur, index, offsets, 0); - -#ifdef UNIV_ZIP_DEBUG - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - } - - return(no_compress_needed); -} - -/** Get the last non-delete-marked record on a page. -@param[in] page index tree leaf page -@return the last record, not delete-marked -@retval infimum record if all records are delete-marked */ - -const rec_t* -page_find_rec_max_not_deleted( - const page_t* page) -{ - const rec_t* rec = page_get_infimum_rec(page); - const rec_t* prev_rec = NULL; // remove warning - - /* Because the page infimum is never delete-marked, - prev_rec will always be assigned to it first. */ - ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec))); - if (page_is_comp(page)) { - do { - if (!rec_get_deleted_flag(rec, true)) { - prev_rec = rec; - } - rec = page_rec_get_next_low(rec, true); - } while (rec != page + PAGE_NEW_SUPREMUM); - } else { - do { - if (!rec_get_deleted_flag(rec, false)) { - prev_rec = rec; - } - rec = page_rec_get_next_low(rec, false); - } while (rec != page + PAGE_OLD_SUPREMUM); - } - return(prev_rec); -} - -#endif /* #ifndef UNIV_INNOCHECKSUM */ - -/** Issue a warning when the checksum that is stored in the page is valid, -but different than the global setting innodb_checksum_algorithm. -@param[in] current_algo current checksum algorithm -@param[in] page_checksum page valid checksum -@param[in] space_id tablespace id -@param[in] page_no page number */ -void -page_warn_strict_checksum( - srv_checksum_algorithm_t curr_algo, - srv_checksum_algorithm_t page_checksum, - ulint space_id, - ulint page_no) -{ - srv_checksum_algorithm_t curr_algo_nonstrict; - switch (curr_algo) { - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_CRC32; - break; - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_INNODB; - break; - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_NONE; - break; - default: - ut_error; - } - -#ifdef UNIV_INNOCHECKSUM - fprintf(stderr, -#else - ib_logf(IB_LOG_LEVEL_WARN, -#endif - "innodb_checksum_algorithm is set to \"%s\"" - " but the page [page id: space=" ULINTPF "," - " page number=" ULINTPF "] contains a valid checksum \"%s\"." - " Accepting the page as valid. Change innodb_checksum_algorithm" - " to \"%s\" to silently accept such pages or rewrite all pages" - " so that they contain \"%s\" checksum.", - buf_checksum_algorithm_name(curr_algo), - space_id, page_no, - buf_checksum_algorithm_name(page_checksum), - buf_checksum_algorithm_name(curr_algo_nonstrict), - buf_checksum_algorithm_name(curr_algo_nonstrict)); -} diff --git a/storage/xtradb/page/page0zip.cc b/storage/xtradb/page/page0zip.cc deleted file mode 100644 index 32e76fb44e6..00000000000 --- a/storage/xtradb/page/page0zip.cc +++ /dev/null @@ -1,5066 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, SkySQL Ab. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file page/page0zip.cc -Compressed page interface - -Created June 2005 by Marko Makela -*******************************************************/ - -// First include (the generated) my_config.h, to get correct platform defines. -#include "my_config.h" - -#include <map> -using namespace std; - -#define THIS_MODULE -#include "page0zip.h" -#ifdef UNIV_NONINL -# include "page0zip.ic" -#endif -#undef THIS_MODULE -#include "buf0checksum.h" -#include "page0page.h" -#ifndef UNIV_INNOCHECKSUM -#include "mtr0log.h" -#include "dict0dict.h" -#include "btr0cur.h" -#include "log0recv.h" -#endif /* !UNIV_INNOCHECKSUM */ -#include "zlib.h" -#include "fil0fil.h" -#include "ut0sort.h" -#include "page0types.h" -#ifndef UNIV_HOTBACKUP -#ifndef UNIV_INNOCHECKSUM -# include "buf0buf.h" -# include "btr0sea.h" -# include "dict0boot.h" -# include "lock0lock.h" -# include "srv0mon.h" -# include "srv0srv.h" -#endif /* !UNIV_INNOCHECKSUM */ -# include "buf0lru.h" -# include "ut0crc32.h" -#else /* !UNIV_HOTBACKUP */ -# define lock_move_reorganize_page(block, temp_block) ((void) 0) -# define buf_LRU_stat_inc_unzip() ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_INNOCHECKSUM -#include "mach0data.h" -#endif /* UNIV_INNOCHECKSUM */ - -#ifndef UNIV_HOTBACKUP -#ifndef UNIV_INNOCHECKSUM -/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */ -UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX]; -/** Statistics on compression, indexed by index->id */ -UNIV_INTERN page_zip_stat_per_index_t page_zip_stat_per_index; -/** Mutex protecting page_zip_stat_per_index */ -UNIV_INTERN ib_mutex_t page_zip_stat_per_index_mutex; -#ifdef HAVE_PSI_INTERFACE -UNIV_INTERN mysql_pfs_key_t page_zip_stat_per_index_mutex_key; -#endif /* HAVE_PSI_INTERFACE */ -#endif /* !UNIV_INNOCHECKSUM */ -#endif /* !UNIV_HOTBACKUP */ - -/* Compression level to be used by zlib. Settable by user. */ -UNIV_INTERN uint page_zip_level = DEFAULT_COMPRESSION_LEVEL; - -/* Whether or not to log compressed page images to avoid possible -compression algorithm changes in zlib. */ -UNIV_INTERN my_bool page_zip_log_pages = false; - -/* Please refer to ../include/page0zip.ic for a description of the -compressed page format. */ - -#ifndef UNIV_INNOCHECKSUM - -/* The infimum and supremum records are omitted from the compressed page. -On compress, we compare that the records are there, and on uncompress we -restore the records. */ -/** Extra bytes of an infimum record */ -static const byte infimum_extra[] = { - 0x01, /* info_bits=0, n_owned=1 */ - 0x00, 0x02 /* heap_no=0, status=2 */ - /* ?, ? */ /* next=(first user rec, or supremum) */ -}; -/** Data bytes of an infimum record */ -static const byte infimum_data[] = { - 0x69, 0x6e, 0x66, 0x69, - 0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */ -}; -/** Extra bytes and data bytes of a supremum record */ -static const byte supremum_extra_data[] = { - /* 0x0?, */ /* info_bits=0, n_owned=1..8 */ - 0x00, 0x0b, /* heap_no=1, status=3 */ - 0x00, 0x00, /* next=0 */ - 0x73, 0x75, 0x70, 0x72, - 0x65, 0x6d, 0x75, 0x6d /* "supremum" */ -}; - -#endif /* !UNIV_INNOCHECKSUM */ - -/** Assert that a block of memory is filled with zero bytes. -Compare at most sizeof(field_ref_zero) bytes. -@param b in: memory block -@param s in: size of the memory block, in bytes */ -#define ASSERT_ZERO(b, s) \ - ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero))) -/** Assert that a BLOB pointer is filled with zero bytes. -@param b in: BLOB pointer */ -#define ASSERT_ZERO_BLOB(b) \ - ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero)) - -/* Enable some extra debugging output. This code can be enabled -independently of any UNIV_ debugging conditions. */ -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG -# include <stdarg.h> -MY_ATTRIBUTE((format (printf, 1, 2))) -/**********************************************************************//** -Report a failure to decompress or compress. -@return number of characters printed */ -static -int -page_zip_fail_func( -/*===============*/ - const char* fmt, /*!< in: printf(3) format string */ - ...) /*!< in: arguments corresponding to fmt */ -{ - int res; - va_list ap; - - ut_print_timestamp(stderr); - fputs(" InnoDB: ", stderr); - va_start(ap, fmt); - res = vfprintf(stderr, fmt, ap); - va_end(ap); - - return(res); -} -/** Wrapper for page_zip_fail_func() -@param fmt_args in: printf(3) format string and arguments */ -# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args -#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ -/** Dummy wrapper for page_zip_fail_func() -@param fmt_args ignored: printf(3) format string and arguments */ -# define page_zip_fail(fmt_args) /* empty */ -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ - -#ifndef UNIV_INNOCHECKSUM -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Determine the guaranteed free space on an empty page. -@return minimum payload size on the page */ -UNIV_INTERN -ulint -page_zip_empty_size( -/*================*/ - ulint n_fields, /*!< in: number of columns in the index */ - ulint zip_size) /*!< in: compressed page size in bytes */ -{ - lint size = zip_size - /* subtract the page header and the longest - uncompressed data needed for one record */ - - (PAGE_DATA - + PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN - + 1/* encoded heap_no==2 in page_zip_write_rec() */ - + 1/* end of modification log */ - - REC_N_NEW_EXTRA_BYTES/* omitted bytes */) - /* subtract the space for page_zip_fields_encode() */ - - compressBound(static_cast<uLong>(2 * (n_fields + 1))); - return(size > 0 ? (ulint) size : 0); -} -#endif /* !UNIV_HOTBACKUP */ - -/*************************************************************//** -Gets the number of elements in the dense page directory, -including deleted records (the free list). -@return number of elements in the dense page directory */ -UNIV_INLINE -ulint -page_zip_dir_elems( -/*===============*/ - const page_zip_des_t* page_zip) /*!< in: compressed page */ -{ - /* Exclude the page infimum and supremum from the record count. */ - return(page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW); -} - -/*************************************************************//** -Gets the size of the compressed page trailer (the dense page directory), -including deleted records (the free list). -@return length of dense page directory, in bytes */ -UNIV_INLINE -ulint -page_zip_dir_size( -/*==============*/ - const page_zip_des_t* page_zip) /*!< in: compressed page */ -{ - return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip)); -} - -/*************************************************************//** -Gets an offset to the compressed page trailer (the dense page directory), -including deleted records (the free list). -@return offset of the dense page directory */ -UNIV_INLINE -ulint -page_zip_dir_start_offs( -/*====================*/ - const page_zip_des_t* page_zip, /*!< in: compressed page */ - ulint n_dense) /*!< in: directory size */ -{ - ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip)); - - return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE); -} - -/*************************************************************//** -Gets a pointer to the compressed page trailer (the dense page directory), -including deleted records (the free list). -@param[in] page_zip compressed page -@param[in] n_dense number of entries in the directory -@return pointer to the dense page directory */ -#define page_zip_dir_start_low(page_zip, n_dense) \ - ((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense)) -/*************************************************************//** -Gets a pointer to the compressed page trailer (the dense page directory), -including deleted records (the free list). -@param[in] page_zip compressed page -@return pointer to the dense page directory */ -#define page_zip_dir_start(page_zip) \ - page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip)) - -/*************************************************************//** -Gets the size of the compressed page trailer (the dense page directory), -only including user records (excluding the free list). -@return length of dense page directory comprising existing records, in bytes */ -UNIV_INLINE -ulint -page_zip_dir_user_size( -/*===================*/ - const page_zip_des_t* page_zip) /*!< in: compressed page */ -{ - ulint size = PAGE_ZIP_DIR_SLOT_SIZE - * page_get_n_recs(page_zip->data); - ut_ad(size <= page_zip_dir_size(page_zip)); - return(size); -} - -/*************************************************************//** -Find the slot of the given record in the dense page directory. -@return dense directory slot, or NULL if record not found */ -UNIV_INLINE -byte* -page_zip_dir_find_low( -/*==================*/ - byte* slot, /*!< in: start of records */ - byte* end, /*!< in: end of records */ - ulint offset) /*!< in: offset of user record */ -{ - ut_ad(slot <= end); - - for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) { - if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK) - == offset) { - return(slot); - } - } - - return(NULL); -} - -/*************************************************************//** -Find the slot of the given non-free record in the dense page directory. -@return dense directory slot, or NULL if record not found */ -UNIV_INLINE -byte* -page_zip_dir_find( -/*==============*/ - page_zip_des_t* page_zip, /*!< in: compressed page */ - ulint offset) /*!< in: offset of user record */ -{ - byte* end = page_zip->data + page_zip_get_size(page_zip); - - ut_ad(page_zip_simple_validate(page_zip)); - - return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip), - end, - offset)); -} - -/*************************************************************//** -Find the slot of the given free record in the dense page directory. -@return dense directory slot, or NULL if record not found */ -UNIV_INLINE -byte* -page_zip_dir_find_free( -/*===================*/ - page_zip_des_t* page_zip, /*!< in: compressed page */ - ulint offset) /*!< in: offset of user record */ -{ - byte* end = page_zip->data + page_zip_get_size(page_zip); - - ut_ad(page_zip_simple_validate(page_zip)); - - return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip), - end - page_zip_dir_user_size(page_zip), - offset)); -} - -/*************************************************************//** -Read a given slot in the dense page directory. -@return record offset on the uncompressed page, possibly ORed with -PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */ -UNIV_INLINE -ulint -page_zip_dir_get( -/*=============*/ - const page_zip_des_t* page_zip, /*!< in: compressed page */ - ulint slot) /*!< in: slot - (0=first user record) */ -{ - ut_ad(page_zip_simple_validate(page_zip)); - ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE); - return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip) - - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1))); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Write a log record of compressing an index page. */ -static -void -page_zip_compress_write_log( -/*========================*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - const page_t* page, /*!< in: uncompressed page */ - dict_index_t* index, /*!< in: index of the B-tree node */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - byte* log_ptr; - ulint trailer_size; - - ut_ad(!dict_index_is_ibuf(index)); - - log_ptr = mlog_open(mtr, 11 + 2 + 2); - - if (!log_ptr) { - - return; - } - - /* Read the number of user records. */ - trailer_size = page_dir_get_n_heap(page_zip->data) - - PAGE_HEAP_NO_USER_LOW; - /* Multiply by uncompressed of size stored per record */ - if (!page_is_leaf(page)) { - trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE; - } else if (dict_index_is_clust(index)) { - trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; - } else { - trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE; - } - /* Add the space occupied by BLOB pointers. */ - trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; - ut_a(page_zip->m_end > PAGE_DATA); -#if FIL_PAGE_DATA > PAGE_DATA -# error "FIL_PAGE_DATA > PAGE_DATA" -#endif - ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip)); - - log_ptr = mlog_write_initial_log_record_fast((page_t*) page, - MLOG_ZIP_PAGE_COMPRESS, - log_ptr, mtr); - mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE); - log_ptr += 2; - mach_write_to_2(log_ptr, trailer_size); - log_ptr += 2; - mlog_close(mtr, log_ptr); - - /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */ - mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4); - mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4); - /* Write most of the page header, the compressed stream and - the modification log. */ - mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE, - page_zip->m_end - FIL_PAGE_TYPE); - /* Write the uncompressed trailer of the compressed page. */ - mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip) - - trailer_size, trailer_size); -} -#endif /* !UNIV_HOTBACKUP */ - -/******************************************************//** -Determine how many externally stored columns are contained -in existing records with smaller heap_no than rec. */ -static -ulint -page_zip_get_n_prev_extern( -/*=======================*/ - const page_zip_des_t* page_zip,/*!< in: dense page directory on - compressed page */ - const rec_t* rec, /*!< in: compact physical record - on a B-tree leaf page */ - const dict_index_t* index) /*!< in: record descriptor */ -{ - const page_t* page = page_align(rec); - ulint n_ext = 0; - ulint i; - ulint left; - ulint heap_no; - ulint n_recs = page_get_n_recs(page_zip->data); - - ut_ad(page_is_leaf(page)); - ut_ad(page_is_comp(page)); - ut_ad(dict_table_is_comp(index->table)); - ut_ad(dict_index_is_clust(index)); - ut_ad(!dict_index_is_ibuf(index)); - - heap_no = rec_get_heap_no_new(rec); - ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); - left = heap_no - PAGE_HEAP_NO_USER_LOW; - if (UNIV_UNLIKELY(!left)) { - return(0); - } - - for (i = 0; i < n_recs; i++) { - const rec_t* r = page + (page_zip_dir_get(page_zip, i) - & PAGE_ZIP_DIR_SLOT_MASK); - - if (rec_get_heap_no_new(r) < heap_no) { - n_ext += rec_get_n_extern_new(r, index, - ULINT_UNDEFINED); - if (!--left) { - break; - } - } - } - - return(n_ext); -} - -/**********************************************************************//** -Encode the length of a fixed-length column. -@return buf + length of encoded val */ -static -byte* -page_zip_fixed_field_encode( -/*========================*/ - byte* buf, /*!< in: pointer to buffer where to write */ - ulint val) /*!< in: value to write */ -{ - ut_ad(val >= 2); - - if (UNIV_LIKELY(val < 126)) { - /* - 0 = nullable variable field of at most 255 bytes length; - 1 = not null variable field of at most 255 bytes length; - 126 = nullable variable field with maximum length >255; - 127 = not null variable field with maximum length >255 - */ - *buf++ = (byte) val; - } else { - *buf++ = (byte) (0x80 | val >> 8); - *buf++ = (byte) val; - } - - return(buf); -} - -/**********************************************************************//** -Write the index information for the compressed page. -@return used size of buf */ -static -ulint -page_zip_fields_encode( -/*===================*/ - ulint n, /*!< in: number of fields to compress */ - dict_index_t* index, /*!< in: index comprising at least n fields */ - ulint trx_id_pos,/*!< in: position of the trx_id column - in the index, or ULINT_UNDEFINED if - this is a non-leaf page */ - byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */ -{ - const byte* buf_start = buf; - ulint i; - ulint col; - ulint trx_id_col = 0; - /* sum of lengths of preceding non-nullable fixed fields, or 0 */ - ulint fixed_sum = 0; - - ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n); - - for (i = col = 0; i < n; i++) { - dict_field_t* field = dict_index_get_nth_field(index, i); - ulint val; - - if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) { - val = 1; /* set the "not nullable" flag */ - } else { - val = 0; /* nullable field */ - } - - if (!field->fixed_len) { - /* variable-length field */ - const dict_col_t* column - = dict_field_get_col(field); - - if (UNIV_UNLIKELY(column->len > 255) - || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) { - val |= 0x7e; /* max > 255 bytes */ - } - - if (fixed_sum) { - /* write out the length of any - preceding non-nullable fields */ - buf = page_zip_fixed_field_encode( - buf, fixed_sum << 1 | 1); - fixed_sum = 0; - col++; - } - - *buf++ = (byte) val; - col++; - } else if (val) { - /* fixed-length non-nullable field */ - - if (fixed_sum && UNIV_UNLIKELY - (fixed_sum + field->fixed_len - > DICT_MAX_FIXED_COL_LEN)) { - /* Write out the length of the - preceding non-nullable fields, - to avoid exceeding the maximum - length of a fixed-length column. */ - buf = page_zip_fixed_field_encode( - buf, fixed_sum << 1 | 1); - fixed_sum = 0; - col++; - } - - if (i && UNIV_UNLIKELY(i == trx_id_pos)) { - if (fixed_sum) { - /* Write out the length of any - preceding non-nullable fields, - and start a new trx_id column. */ - buf = page_zip_fixed_field_encode( - buf, fixed_sum << 1 | 1); - col++; - } - - trx_id_col = col; - fixed_sum = field->fixed_len; - } else { - /* add to the sum */ - fixed_sum += field->fixed_len; - } - } else { - /* fixed-length nullable field */ - - if (fixed_sum) { - /* write out the length of any - preceding non-nullable fields */ - buf = page_zip_fixed_field_encode( - buf, fixed_sum << 1 | 1); - fixed_sum = 0; - col++; - } - - buf = page_zip_fixed_field_encode( - buf, field->fixed_len << 1); - col++; - } - } - - if (fixed_sum) { - /* Write out the lengths of last fixed-length columns. */ - buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1); - } - - if (trx_id_pos != ULINT_UNDEFINED) { - /* Write out the position of the trx_id column */ - i = trx_id_col; - } else { - /* Write out the number of nullable fields */ - i = index->n_nullable; - } - - if (i < 128) { - *buf++ = (byte) i; - } else { - *buf++ = (byte) (0x80 | i >> 8); - *buf++ = (byte) i; - } - - ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2); - return((ulint) (buf - buf_start)); -} - -/**********************************************************************//** -Populate the dense page directory from the sparse directory. */ -static -void -page_zip_dir_encode( -/*================*/ - const page_t* page, /*!< in: compact page */ - byte* buf, /*!< in: pointer to dense page directory[-1]; - out: dense directory on compressed page */ - const rec_t** recs) /*!< in: pointer to an array of 0, or NULL; - out: dense page directory sorted by ascending - address (and heap_no) */ -{ - const byte* rec; - ulint status; - ulint min_mark; - ulint heap_no; - ulint i; - ulint n_heap; - ulint offs; - - min_mark = 0; - - if (page_is_leaf(page)) { - status = REC_STATUS_ORDINARY; - } else { - status = REC_STATUS_NODE_PTR; - if (UNIV_UNLIKELY - (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) { - min_mark = REC_INFO_MIN_REC_FLAG; - } - } - - n_heap = page_dir_get_n_heap(page); - - /* Traverse the list of stored records in the collation order, - starting from the first user record. */ - - rec = page + PAGE_NEW_INFIMUM; - - i = 0; - - for (;;) { - ulint info_bits; - offs = rec_get_next_offs(rec, TRUE); - if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) { - break; - } - rec = page + offs; - heap_no = rec_get_heap_no_new(rec); - ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW); - ut_a(heap_no < n_heap); - ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR); - ut_a(offs >= PAGE_ZIP_START); -#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1) -# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2" -#endif -#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_ZIP_SIZE_MAX - 1 -# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1" -#endif - if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) { - offs |= PAGE_ZIP_DIR_SLOT_OWNED; - } - - info_bits = rec_get_info_bits(rec, TRUE); - if (info_bits & REC_INFO_DELETED_FLAG) { - info_bits &= ~REC_INFO_DELETED_FLAG; - offs |= PAGE_ZIP_DIR_SLOT_DEL; - } - ut_a(info_bits == min_mark); - /* Only the smallest user record can have - REC_INFO_MIN_REC_FLAG set. */ - min_mark = 0; - - mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs); - - if (UNIV_LIKELY_NULL(recs)) { - /* Ensure that each heap_no occurs at most once. */ - ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]); - /* exclude infimum and supremum */ - recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec; - } - - ut_a(rec_get_status(rec) == status); - } - - offs = page_header_get_field(page, PAGE_FREE); - - /* Traverse the free list (of deleted records). */ - while (offs) { - ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK)); - rec = page + offs; - - heap_no = rec_get_heap_no_new(rec); - ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW); - ut_a(heap_no < n_heap); - - ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */ - ut_a(rec_get_status(rec) == status); - - mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs); - - if (UNIV_LIKELY_NULL(recs)) { - /* Ensure that each heap_no occurs at most once. */ - ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]); - /* exclude infimum and supremum */ - recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec; - } - - offs = rec_get_next_offs(rec, TRUE); - } - - /* Ensure that each heap no occurs at least once. */ - ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap); -} - -extern "C" { - -/**********************************************************************//** -Allocate memory for zlib. */ -static -void* -page_zip_zalloc( -/*============*/ - void* opaque, /*!< in/out: memory heap */ - uInt items, /*!< in: number of items to allocate */ - uInt size) /*!< in: size of an item in bytes */ -{ - return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque), items * size)); -} - -/**********************************************************************//** -Deallocate memory for zlib. */ -static -void -page_zip_free( -/*==========*/ - void* opaque MY_ATTRIBUTE((unused)), /*!< in: memory heap */ - void* address MY_ATTRIBUTE((unused)))/*!< in: object to free */ -{ -} - -} /* extern "C" */ - -/**********************************************************************//** -Configure the zlib allocator to use the given memory heap. */ -UNIV_INTERN -void -page_zip_set_alloc( -/*===============*/ - void* stream, /*!< in/out: zlib stream */ - mem_heap_t* heap) /*!< in: memory heap to use */ -{ - z_stream* strm = static_cast<z_stream*>(stream); - - strm->zalloc = page_zip_zalloc; - strm->zfree = page_zip_free; - strm->opaque = heap; -} - -#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG -/** Symbol for enabling compression and decompression diagnostics */ -# define PAGE_ZIP_COMPRESS_DBG -#endif - -#ifdef PAGE_ZIP_COMPRESS_DBG -/** Set this variable in a debugger to enable -excessive logging in page_zip_compress(). */ -UNIV_INTERN ibool page_zip_compress_dbg; -/** Set this variable in a debugger to enable -binary logging of the data passed to deflate(). -When this variable is nonzero, it will act -as a log file name generator. */ -UNIV_INTERN unsigned page_zip_compress_log; - -/**********************************************************************//** -Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set. -@return deflate() status: Z_OK, Z_BUF_ERROR, ... */ -static -int -page_zip_compress_deflate( -/*======================*/ - FILE* logfile,/*!< in: log file, or NULL */ - z_streamp strm, /*!< in/out: compressed stream for deflate() */ - int flush) /*!< in: deflate() flushing method */ -{ - int status; - if (UNIV_UNLIKELY(page_zip_compress_dbg)) { - ut_print_buf(stderr, strm->next_in, strm->avail_in); - } - if (UNIV_LIKELY_NULL(logfile)) { - fwrite(strm->next_in, 1, strm->avail_in, logfile); - } - status = deflate(strm, flush); - if (UNIV_UNLIKELY(page_zip_compress_dbg)) { - fprintf(stderr, " -> %d\n", status); - } - return(status); -} - -/* Redefine deflate(). */ -# undef deflate -/** Debug wrapper for the zlib compression routine deflate(). -Log the operation if page_zip_compress_dbg is set. -@param strm in/out: compressed stream -@param flush in: flushing method -@return deflate() status: Z_OK, Z_BUF_ERROR, ... */ -# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush) -/** Declaration of the logfile parameter */ -# define FILE_LOGFILE FILE* logfile, -/** The logfile parameter */ -# define LOGFILE logfile, -#else /* PAGE_ZIP_COMPRESS_DBG */ -/** Empty declaration of the logfile parameter */ -# define FILE_LOGFILE -/** Missing logfile parameter */ -# define LOGFILE -#endif /* PAGE_ZIP_COMPRESS_DBG */ - -/**********************************************************************//** -Compress the records of a node pointer page. -@return Z_OK, or a zlib error code */ -static -int -page_zip_compress_node_ptrs( -/*========================*/ - FILE_LOGFILE - z_stream* c_stream, /*!< in/out: compressed page stream */ - const rec_t** recs, /*!< in: dense page directory - sorted by address */ - ulint n_dense, /*!< in: size of recs[] */ - dict_index_t* index, /*!< in: the index of the page */ - byte* storage, /*!< in: end of dense page directory */ - mem_heap_t* heap) /*!< in: temporary memory heap */ -{ - int err = Z_OK; - ulint* offsets = NULL; - - do { - const rec_t* rec = *recs++; - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - /* Only leaf nodes may contain externally stored columns. */ - ut_ad(!rec_offs_any_extern(offsets)); - - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - /* Compress the extra bytes. */ - c_stream->avail_in = static_cast<uInt>( - rec - REC_N_NEW_EXTRA_BYTES - c_stream->next_in); - - if (c_stream->avail_in) { - err = deflate(c_stream, Z_NO_FLUSH); - if (UNIV_UNLIKELY(err != Z_OK)) { - break; - } - } - ut_ad(!c_stream->avail_in); - - /* Compress the data bytes, except node_ptr. */ - c_stream->next_in = (byte*) rec; - c_stream->avail_in = static_cast<uInt>( - rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE); - - if (c_stream->avail_in) { - err = deflate(c_stream, Z_NO_FLUSH); - if (UNIV_UNLIKELY(err != Z_OK)) { - break; - } - } - - ut_ad(!c_stream->avail_in); - - memcpy(storage - REC_NODE_PTR_SIZE - * (rec_get_heap_no_new(rec) - 1), - c_stream->next_in, REC_NODE_PTR_SIZE); - c_stream->next_in += REC_NODE_PTR_SIZE; - } while (--n_dense); - - return(err); -} - -/**********************************************************************//** -Compress the records of a leaf node of a secondary index. -@return Z_OK, or a zlib error code */ -static -int -page_zip_compress_sec( -/*==================*/ - FILE_LOGFILE - z_stream* c_stream, /*!< in/out: compressed page stream */ - const rec_t** recs, /*!< in: dense page directory - sorted by address */ - ulint n_dense) /*!< in: size of recs[] */ -{ - int err = Z_OK; - - ut_ad(n_dense > 0); - - do { - const rec_t* rec = *recs++; - - /* Compress everything up to this record. */ - c_stream->avail_in = static_cast<uInt>( - rec - REC_N_NEW_EXTRA_BYTES - - c_stream->next_in); - - if (UNIV_LIKELY(c_stream->avail_in != 0)) { - UNIV_MEM_ASSERT_RW(c_stream->next_in, - c_stream->avail_in); - err = deflate(c_stream, Z_NO_FLUSH); - if (UNIV_UNLIKELY(err != Z_OK)) { - break; - } - } - - ut_ad(!c_stream->avail_in); - ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES); - - /* Skip the REC_N_NEW_EXTRA_BYTES. */ - - c_stream->next_in = (byte*) rec; - } while (--n_dense); - - return(err); -} - -/**********************************************************************//** -Compress a record of a leaf node of a clustered index that contains -externally stored columns. -@return Z_OK, or a zlib error code */ -static -int -page_zip_compress_clust_ext( -/*========================*/ - FILE_LOGFILE - z_stream* c_stream, /*!< in/out: compressed page stream */ - const rec_t* rec, /*!< in: record */ - const ulint* offsets, /*!< in: rec_get_offsets(rec) */ - ulint trx_id_col, /*!< in: position of of DB_TRX_ID */ - byte* deleted, /*!< in: dense directory entry pointing - to the head of the free list */ - byte* storage, /*!< in: end of dense page directory */ - byte** externs, /*!< in/out: pointer to the next - available BLOB pointer */ - ulint* n_blobs) /*!< in/out: number of - externally stored columns */ -{ - int err; - ulint i; - - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - ulint len; - const byte* src; - - if (UNIV_UNLIKELY(i == trx_id_col)) { - ut_ad(!rec_offs_nth_extern(offsets, i)); - /* Store trx_id and roll_ptr - in uncompressed form. */ - src = rec_get_nth_field(rec, offsets, i, &len); - ut_ad(src + DATA_TRX_ID_LEN - == rec_get_nth_field(rec, offsets, - i + 1, &len)); - ut_ad(len == DATA_ROLL_PTR_LEN); - - /* Compress any preceding bytes. */ - c_stream->avail_in = static_cast<uInt>( - src - c_stream->next_in); - - if (c_stream->avail_in) { - err = deflate(c_stream, Z_NO_FLUSH); - if (UNIV_UNLIKELY(err != Z_OK)) { - - return(err); - } - } - - ut_ad(!c_stream->avail_in); - ut_ad(c_stream->next_in == src); - - memcpy(storage - - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) - * (rec_get_heap_no_new(rec) - 1), - c_stream->next_in, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - c_stream->next_in - += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; - - /* Skip also roll_ptr */ - i++; - } else if (rec_offs_nth_extern(offsets, i)) { - src = rec_get_nth_field(rec, offsets, i, &len); - ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); - src += len - BTR_EXTERN_FIELD_REF_SIZE; - - c_stream->avail_in = static_cast<uInt>( - src - c_stream->next_in); - if (UNIV_LIKELY(c_stream->avail_in != 0)) { - err = deflate(c_stream, Z_NO_FLUSH); - if (UNIV_UNLIKELY(err != Z_OK)) { - - return(err); - } - } - - ut_ad(!c_stream->avail_in); - ut_ad(c_stream->next_in == src); - - /* Reserve space for the data at - the end of the space reserved for - the compressed data and the page - modification log. */ - - if (UNIV_UNLIKELY - (c_stream->avail_out - <= BTR_EXTERN_FIELD_REF_SIZE)) { - /* out of space */ - return(Z_BUF_ERROR); - } - - ut_ad(*externs == c_stream->next_out - + c_stream->avail_out - + 1/* end of modif. log */); - - c_stream->next_in - += BTR_EXTERN_FIELD_REF_SIZE; - - /* Skip deleted records. */ - if (UNIV_LIKELY_NULL - (page_zip_dir_find_low( - storage, deleted, - page_offset(rec)))) { - continue; - } - - (*n_blobs)++; - c_stream->avail_out - -= BTR_EXTERN_FIELD_REF_SIZE; - *externs -= BTR_EXTERN_FIELD_REF_SIZE; - - /* Copy the BLOB pointer */ - memcpy(*externs, c_stream->next_in - - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - } - } - - return(Z_OK); -} - -/**********************************************************************//** -Compress the records of a leaf node of a clustered index. -@return Z_OK, or a zlib error code */ -static -int -page_zip_compress_clust( -/*====================*/ - FILE_LOGFILE - z_stream* c_stream, /*!< in/out: compressed page stream */ - const rec_t** recs, /*!< in: dense page directory - sorted by address */ - ulint n_dense, /*!< in: size of recs[] */ - dict_index_t* index, /*!< in: the index of the page */ - ulint* n_blobs, /*!< in: 0; out: number of - externally stored columns */ - ulint trx_id_col, /*!< index of the trx_id column */ - byte* deleted, /*!< in: dense directory entry pointing - to the head of the free list */ - byte* storage, /*!< in: end of dense page directory */ - mem_heap_t* heap) /*!< in: temporary memory heap */ -{ - int err = Z_OK; - ulint* offsets = NULL; - /* BTR_EXTERN_FIELD_REF storage */ - byte* externs = storage - n_dense - * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - ut_ad(*n_blobs == 0); - - do { - const rec_t* rec = *recs++; - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - ut_ad(rec_offs_n_fields(offsets) - == dict_index_get_n_fields(index)); - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - /* Compress the extra bytes. */ - c_stream->avail_in = static_cast<uInt>( - rec - REC_N_NEW_EXTRA_BYTES - - c_stream->next_in); - - if (c_stream->avail_in) { - err = deflate(c_stream, Z_NO_FLUSH); - if (UNIV_UNLIKELY(err != Z_OK)) { - - goto func_exit; - } - } - ut_ad(!c_stream->avail_in); - ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES); - - /* Compress the data bytes. */ - - c_stream->next_in = (byte*) rec; - - /* Check if there are any externally stored columns. - For each externally stored column, store the - BTR_EXTERN_FIELD_REF separately. */ - if (rec_offs_any_extern(offsets)) { - ut_ad(dict_index_is_clust(index)); - - err = page_zip_compress_clust_ext( - LOGFILE - c_stream, rec, offsets, trx_id_col, - deleted, storage, &externs, n_blobs); - - if (UNIV_UNLIKELY(err != Z_OK)) { - - goto func_exit; - } - } else { - ulint len; - const byte* src; - - /* Store trx_id and roll_ptr in uncompressed form. */ - src = rec_get_nth_field(rec, offsets, - trx_id_col, &len); - ut_ad(src + DATA_TRX_ID_LEN - == rec_get_nth_field(rec, offsets, - trx_id_col + 1, &len)); - ut_ad(len == DATA_ROLL_PTR_LEN); - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - /* Compress any preceding bytes. */ - c_stream->avail_in = static_cast<uInt>( - src - c_stream->next_in); - - if (c_stream->avail_in) { - err = deflate(c_stream, Z_NO_FLUSH); - if (UNIV_UNLIKELY(err != Z_OK)) { - - return(err); - } - } - - ut_ad(!c_stream->avail_in); - ut_ad(c_stream->next_in == src); - - memcpy(storage - - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) - * (rec_get_heap_no_new(rec) - 1), - c_stream->next_in, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - c_stream->next_in - += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; - - /* Skip also roll_ptr */ - ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets)); - } - - /* Compress the last bytes of the record. */ - c_stream->avail_in = static_cast<uInt>( - rec + rec_offs_data_size(offsets) - c_stream->next_in); - - if (c_stream->avail_in) { - err = deflate(c_stream, Z_NO_FLUSH); - if (UNIV_UNLIKELY(err != Z_OK)) { - - goto func_exit; - } - } - ut_ad(!c_stream->avail_in); - } while (--n_dense); - -func_exit: - return(err); -} - -/**********************************************************************//** -Compress a page. -@return TRUE on success, FALSE on failure; page_zip will be left -intact on failure. */ -UNIV_INTERN -ibool -page_zip_compress( -/*==============*/ - page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs, - m_start, m_end, m_nonempty */ - const page_t* page, /*!< in: uncompressed page */ - dict_index_t* index, /*!< in: index of the B-tree node */ - ulint level, /*!< in: compression level */ - mtr_t* mtr) /*!< in: mini-transaction, or NULL */ -{ - z_stream c_stream; - int err; - ulint n_fields;/* number of index fields needed */ - byte* fields; /*!< index field information */ - byte* buf; /*!< compressed payload of the page */ - byte* buf_end;/* end of buf */ - ulint n_dense; - ulint slot_size;/* amount of uncompressed bytes per record */ - const rec_t** recs; /*!< dense page directory, sorted by address */ - mem_heap_t* heap; - ulint trx_id_col; - ulint n_blobs = 0; - byte* storage;/* storage of uncompressed columns */ -#ifndef UNIV_HOTBACKUP - ullint usec = ut_time_us(NULL); -#endif /* !UNIV_HOTBACKUP */ -#ifdef PAGE_ZIP_COMPRESS_DBG - FILE* logfile = NULL; -#endif - /* A local copy of srv_cmp_per_index_enabled to avoid reading that - variable multiple times in this function since it can be changed at - anytime. */ - my_bool cmp_per_index_enabled = srv_cmp_per_index_enabled; - - ut_a(page_is_comp(page)); - ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(page_simple_validate_new((page_t*) page)); - ut_ad(page_zip_simple_validate(page_zip)); - ut_ad(dict_table_is_comp(index->table)); - ut_ad(!dict_index_is_ibuf(index)); - - UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); - - /* Check the data that will be omitted. */ - ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES), - infimum_extra, sizeof infimum_extra)); - ut_a(!memcmp(page + PAGE_NEW_INFIMUM, - infimum_data, sizeof infimum_data)); - ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] - /* info_bits == 0, n_owned <= max */ - <= PAGE_DIR_SLOT_MAX_N_OWNED); - ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1), - supremum_extra_data, sizeof supremum_extra_data)); - - if (page_is_empty(page)) { - ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE) - == PAGE_NEW_SUPREMUM); - } - - if (page_is_leaf(page)) { - n_fields = dict_index_get_n_fields(index); - } else { - n_fields = dict_index_get_n_unique_in_tree(index); - } - - /* The dense directory excludes the infimum and supremum records. */ - n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW; -#ifdef PAGE_ZIP_COMPRESS_DBG - if (UNIV_UNLIKELY(page_zip_compress_dbg)) { - fprintf(stderr, "compress %p %p %lu %lu %lu\n", - (void*) page_zip, (void*) page, - (ibool) page_is_leaf(page), - n_fields, n_dense); - } - if (UNIV_UNLIKELY(page_zip_compress_log)) { - /* Create a log file for every compression attempt. */ - char logfilename[9]; - ut_snprintf(logfilename, sizeof logfilename, - "%08x", page_zip_compress_log++); - logfile = fopen(logfilename, "wb"); - - if (logfile) { - /* Write the uncompressed page to the log. */ - fwrite(page, 1, UNIV_PAGE_SIZE, logfile); - /* Record the compressed size as zero. - This will be overwritten at successful exit. */ - putc(0, logfile); - putc(0, logfile); - putc(0, logfile); - putc(0, logfile); - } - } -#endif /* PAGE_ZIP_COMPRESS_DBG */ -#ifndef UNIV_HOTBACKUP - page_zip_stat[page_zip->ssize - 1].compressed++; - if (cmp_per_index_enabled) { - mutex_enter(&page_zip_stat_per_index_mutex); - page_zip_stat_per_index[index->id].compressed++; - mutex_exit(&page_zip_stat_per_index_mutex); - } -#endif /* !UNIV_HOTBACKUP */ - - if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE - >= page_zip_get_size(page_zip))) { - - goto err_exit; - } - - MONITOR_INC(MONITOR_PAGE_COMPRESS); - - /* Simulate a compression failure with a probability determined by - innodb_simulate_comp_failures, only if the page has 2 or more - records. */ - - if (srv_simulate_comp_failures - && !dict_index_is_ibuf(index) - && page_get_n_recs(page) >= 2 - && ((ulint)(rand() % 100) < srv_simulate_comp_failures) - && strcasecmp(index->table_name, "IBUF_DUMMY") != 0) { - -#ifdef UNIV_DEBUG - fprintf(stderr, - "InnoDB: Simulating a compression failure" - " for table %s, index %s, page %lu (%s)\n", - index->table_name, - index->name, - page_get_page_no(page), - page_is_leaf(page) ? "leaf" : "non-leaf"); - -#endif - - goto err_exit; - } - - heap = mem_heap_create(page_zip_get_size(page_zip) - + n_fields * (2 + sizeof(ulint)) - + REC_OFFS_HEADER_SIZE - + n_dense * ((sizeof *recs) - - PAGE_ZIP_DIR_SLOT_SIZE) - + UNIV_PAGE_SIZE * 4 - + (512 << MAX_MEM_LEVEL)); - - recs = static_cast<const rec_t**>( - mem_heap_zalloc(heap, n_dense * sizeof *recs)); - - fields = static_cast<byte*>(mem_heap_alloc(heap, (n_fields + 1) * 2)); - - buf = static_cast<byte*>( - mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA)); - - buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA; - - /* Compress the data payload. */ - page_zip_set_alloc(&c_stream, heap); - - err = deflateInit2(&c_stream, static_cast<int>(level), - Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT, - MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY); - ut_a(err == Z_OK); - - c_stream.next_out = buf; - /* Subtract the space reserved for uncompressed data. */ - /* Page header and the end marker of the modification log */ - c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1); - - /* Dense page directory and uncompressed columns, if any */ - if (page_is_leaf(page)) { - if (dict_index_is_clust(index)) { - trx_id_col = dict_index_get_sys_col_pos( - index, DATA_TRX_ID); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - - slot_size = PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; - } else { - /* Signal the absence of trx_id - in page_zip_fields_encode() */ - ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID) - == ULINT_UNDEFINED); - trx_id_col = 0; - slot_size = PAGE_ZIP_DIR_SLOT_SIZE; - } - } else { - slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE; - trx_id_col = ULINT_UNDEFINED; - } - - if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size - + 6/* sizeof(zlib header and footer) */)) { - goto zlib_error; - } - - c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size); - c_stream.avail_in = static_cast<uInt>( - page_zip_fields_encode(n_fields, index, trx_id_col, fields)); - c_stream.next_in = fields; - if (UNIV_LIKELY(!trx_id_col)) { - trx_id_col = ULINT_UNDEFINED; - } - - UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in); - err = deflate(&c_stream, Z_FULL_FLUSH); - if (err != Z_OK) { - goto zlib_error; - } - - ut_ad(!c_stream.avail_in); - - page_zip_dir_encode(page, buf_end, recs); - - c_stream.next_in = (byte*) page + PAGE_ZIP_START; - - storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE; - - /* Compress the records in heap_no order. */ - if (UNIV_UNLIKELY(!n_dense)) { - } else if (!page_is_leaf(page)) { - /* This is a node pointer page. */ - err = page_zip_compress_node_ptrs(LOGFILE - &c_stream, recs, n_dense, - index, storage, heap); - if (UNIV_UNLIKELY(err != Z_OK)) { - goto zlib_error; - } - } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) { - /* This is a leaf page in a secondary index. */ - err = page_zip_compress_sec(LOGFILE - &c_stream, recs, n_dense); - if (UNIV_UNLIKELY(err != Z_OK)) { - goto zlib_error; - } - } else { - /* This is a leaf page in a clustered index. */ - err = page_zip_compress_clust(LOGFILE - &c_stream, recs, n_dense, - index, &n_blobs, trx_id_col, - buf_end - PAGE_ZIP_DIR_SLOT_SIZE - * page_get_n_recs(page), - storage, heap); - if (UNIV_UNLIKELY(err != Z_OK)) { - goto zlib_error; - } - } - - /* Finish the compression. */ - ut_ad(!c_stream.avail_in); - /* Compress any trailing garbage, in case the last record was - allocated from an originally longer space on the free list, - or the data of the last record from page_zip_compress_sec(). */ - c_stream.avail_in = static_cast<uInt>( - page_header_get_field(page, PAGE_HEAP_TOP) - - (c_stream.next_in - page)); - ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR); - - UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in); - err = deflate(&c_stream, Z_FINISH); - - if (UNIV_UNLIKELY(err != Z_STREAM_END)) { -zlib_error: - deflateEnd(&c_stream); - mem_heap_free(heap); -err_exit: -#ifdef PAGE_ZIP_COMPRESS_DBG - if (logfile) { - fclose(logfile); - } -#endif /* PAGE_ZIP_COMPRESS_DBG */ -#ifndef UNIV_HOTBACKUP - if (page_is_leaf(page)) { - dict_index_zip_failure(index); - } - - ullint time_diff = ut_time_us(NULL) - usec; - page_zip_stat[page_zip->ssize - 1].compressed_usec - += time_diff; - if (cmp_per_index_enabled) { - mutex_enter(&page_zip_stat_per_index_mutex); - page_zip_stat_per_index[index->id].compressed_usec - += time_diff; - mutex_exit(&page_zip_stat_per_index_mutex); - } -#endif /* !UNIV_HOTBACKUP */ - return(FALSE); - } - - err = deflateEnd(&c_stream); - ut_a(err == Z_OK); - - ut_ad(buf + c_stream.total_out == c_stream.next_out); - ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out); - - /* Valgrind believes that zlib does not initialize some bits - in the last 7 or 8 bytes of the stream. Make Valgrind happy. */ - UNIV_MEM_VALID(buf, c_stream.total_out); - - /* Zero out the area reserved for the modification log. - Space for the end marker of the modification log is not - included in avail_out. */ - memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */); - -#ifdef UNIV_DEBUG - page_zip->m_start = -#endif /* UNIV_DEBUG */ - page_zip->m_end = PAGE_DATA + c_stream.total_out; - page_zip->m_nonempty = FALSE; - page_zip->n_blobs = n_blobs; - /* Copy those header fields that will not be written - in buf_flush_init_for_writing() */ - memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV, - FIL_PAGE_LSN - FIL_PAGE_PREV); - memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2); - memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA, - PAGE_DATA - FIL_PAGE_DATA); - /* Copy the rest of the compressed page */ - memcpy(page_zip->data + PAGE_DATA, buf, - page_zip_get_size(page_zip) - PAGE_DATA); - mem_heap_free(heap); -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - if (mtr) { -#ifndef UNIV_HOTBACKUP - page_zip_compress_write_log(page_zip, page, index, mtr); -#endif /* !UNIV_HOTBACKUP */ - } - - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - -#ifdef PAGE_ZIP_COMPRESS_DBG - if (logfile) { - /* Record the compressed size of the block. */ - byte sz[4]; - mach_write_to_4(sz, c_stream.total_out); - fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET); - fwrite(sz, 1, sizeof sz, logfile); - fclose(logfile); - } -#endif /* PAGE_ZIP_COMPRESS_DBG */ -#ifndef UNIV_HOTBACKUP - ullint time_diff = ut_time_us(NULL) - usec; - page_zip_stat[page_zip->ssize - 1].compressed_ok++; - page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff; - if (cmp_per_index_enabled) { - mutex_enter(&page_zip_stat_per_index_mutex); - page_zip_stat_per_index[index->id].compressed_ok++; - page_zip_stat_per_index[index->id].compressed_usec += time_diff; - mutex_exit(&page_zip_stat_per_index_mutex); - } - - if (page_is_leaf(page)) { - dict_index_zip_success(index); - } -#endif /* !UNIV_HOTBACKUP */ - - return(TRUE); -} - -/**********************************************************************//** -Compare two page directory entries. -@return positive if rec1 > rec2 */ -UNIV_INLINE -ibool -page_zip_dir_cmp( -/*=============*/ - const rec_t* rec1, /*!< in: rec1 */ - const rec_t* rec2) /*!< in: rec2 */ -{ - return(rec1 > rec2); -} - -/**********************************************************************//** -Sort the dense page directory by address (heap_no). */ -static -void -page_zip_dir_sort( -/*==============*/ - rec_t** arr, /*!< in/out: dense page directory */ - rec_t** aux_arr,/*!< in/out: work area */ - ulint low, /*!< in: lower bound of the sorting area, inclusive */ - ulint high) /*!< in: upper bound of the sorting area, exclusive */ -{ - UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high, - page_zip_dir_cmp); -} - -/**********************************************************************//** -Deallocate the index information initialized by page_zip_fields_decode(). */ -static -void -page_zip_fields_free( -/*=================*/ - dict_index_t* index) /*!< in: dummy index to be freed */ -{ - if (index) { - dict_table_t* table = index->table; - dict_index_zip_pad_mutex_destroy(index); - mem_heap_free(index->heap); - - dict_mem_table_free(table); - } -} - -/**********************************************************************//** -Read the index information for the compressed page. -@return own: dummy index describing the page, or NULL on error */ -static -dict_index_t* -page_zip_fields_decode( -/*===================*/ - const byte* buf, /*!< in: index information */ - const byte* end, /*!< in: end of buf */ - ulint* trx_id_col)/*!< in: NULL for non-leaf pages; - for leaf pages, pointer to where to store - the position of the trx_id column */ -{ - const byte* b; - ulint n; - ulint i; - ulint val; - dict_table_t* table; - dict_index_t* index; - - /* Determine the number of fields. */ - for (b = buf, n = 0; b < end; n++) { - if (*b++ & 0x80) { - b++; /* skip the second byte */ - } - } - - n--; /* n_nullable or trx_id */ - - if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) { - - page_zip_fail(("page_zip_fields_decode: n = %lu\n", - (ulong) n)); - return(NULL); - } - - if (UNIV_UNLIKELY(b > end)) { - - page_zip_fail(("page_zip_fields_decode: %p > %p\n", - (const void*) b, (const void*) end)); - return(NULL); - } - - table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n, - DICT_TF_COMPACT, 0); - index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY", - DICT_HDR_SPACE, 0, n); - index->table = table; - index->n_uniq = n; - /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ - index->cached = TRUE; - - /* Initialize the fields. */ - for (b = buf, i = 0; i < n; i++) { - ulint mtype; - ulint len; - - val = *b++; - - if (UNIV_UNLIKELY(val & 0x80)) { - /* fixed length > 62 bytes */ - val = (val & 0x7f) << 8 | *b++; - len = val >> 1; - mtype = DATA_FIXBINARY; - } else if (UNIV_UNLIKELY(val >= 126)) { - /* variable length with max > 255 bytes */ - len = 0x7fff; - mtype = DATA_BINARY; - } else if (val <= 1) { - /* variable length with max <= 255 bytes */ - len = 0; - mtype = DATA_BINARY; - } else { - /* fixed length < 62 bytes */ - len = val >> 1; - mtype = DATA_FIXBINARY; - } - - dict_mem_table_add_col(table, NULL, NULL, mtype, - val & 1 ? DATA_NOT_NULL : 0, len); - dict_index_add_col(index, table, - dict_table_get_nth_col(table, i), 0); - } - - val = *b++; - if (UNIV_UNLIKELY(val & 0x80)) { - val = (val & 0x7f) << 8 | *b++; - } - - /* Decode the position of the trx_id column. */ - if (trx_id_col) { - if (!val) { - val = ULINT_UNDEFINED; - } else if (UNIV_UNLIKELY(val >= n)) { - page_zip_fields_free(index); - index = NULL; - } else { - index->type = DICT_CLUSTERED; - } - - *trx_id_col = val; - } else { - /* Decode the number of nullable fields. */ - if (UNIV_UNLIKELY(index->n_nullable > val)) { - page_zip_fields_free(index); - index = NULL; - } else { - index->n_nullable = val; - } - } - - ut_ad(b == end); - - return(index); -} - -/**********************************************************************//** -Populate the sparse page directory from the dense directory. -@return TRUE on success, FALSE on failure */ -static -ibool -page_zip_dir_decode( -/*================*/ - const page_zip_des_t* page_zip,/*!< in: dense page directory on - compressed page */ - page_t* page, /*!< in: compact page with valid header; - out: trailer and sparse page directory - filled in */ - rec_t** recs, /*!< out: dense page directory sorted by - ascending address (and heap_no) */ - rec_t** recs_aux,/*!< in/out: scratch area */ - ulint n_dense)/*!< in: number of user records, and - size of recs[] and recs_aux[] */ -{ - ulint i; - ulint n_recs; - byte* slot; - - n_recs = page_get_n_recs(page); - - if (UNIV_UNLIKELY(n_recs > n_dense)) { - page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n", - (ulong) n_recs, (ulong) n_dense)); - return(FALSE); - } - - /* Traverse the list of stored records in the sorting order, - starting from the first user record. */ - - slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE); - UNIV_PREFETCH_RW(slot); - - /* Zero out the page trailer. */ - memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR); - - mach_write_to_2(slot, PAGE_NEW_INFIMUM); - slot -= PAGE_DIR_SLOT_SIZE; - UNIV_PREFETCH_RW(slot); - - /* Initialize the sparse directory and copy the dense directory. */ - for (i = 0; i < n_recs; i++) { - ulint offs = page_zip_dir_get(page_zip, i); - - if (offs & PAGE_ZIP_DIR_SLOT_OWNED) { - mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK); - slot -= PAGE_DIR_SLOT_SIZE; - UNIV_PREFETCH_RW(slot); - } - - if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK) - < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) { - page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n", - (unsigned) i, (unsigned) n_recs, - (ulong) offs)); - return(FALSE); - } - - recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK); - } - - mach_write_to_2(slot, PAGE_NEW_SUPREMUM); - { - const page_dir_slot_t* last_slot = page_dir_get_nth_slot( - page, page_dir_get_n_slots(page) - 1); - - if (UNIV_UNLIKELY(slot != last_slot)) { - page_zip_fail(("page_zip_dir_decode 3: %p != %p\n", - (const void*) slot, - (const void*) last_slot)); - return(FALSE); - } - } - - /* Copy the rest of the dense directory. */ - for (; i < n_dense; i++) { - ulint offs = page_zip_dir_get(page_zip, i); - - if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) { - page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n", - (unsigned) i, (unsigned) n_dense, - (ulong) offs)); - return(FALSE); - } - - recs[i] = page + offs; - } - - if (UNIV_LIKELY(n_dense > 1)) { - page_zip_dir_sort(recs, recs_aux, 0, n_dense); - } - return(TRUE); -} - -/**********************************************************************//** -Initialize the REC_N_NEW_EXTRA_BYTES of each record. -@return TRUE on success, FALSE on failure */ -static -ibool -page_zip_set_extra_bytes( -/*=====================*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - page_t* page, /*!< in/out: uncompressed page */ - ulint info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */ -{ - ulint n; - ulint i; - ulint n_owned = 1; - ulint offs; - rec_t* rec; - - n = page_get_n_recs(page); - rec = page + PAGE_NEW_INFIMUM; - - for (i = 0; i < n; i++) { - offs = page_zip_dir_get(page_zip, i); - - if (offs & PAGE_ZIP_DIR_SLOT_DEL) { - info_bits |= REC_INFO_DELETED_FLAG; - } - if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) { - info_bits |= n_owned; - n_owned = 1; - } else { - n_owned++; - } - offs &= PAGE_ZIP_DIR_SLOT_MASK; - if (UNIV_UNLIKELY(offs < PAGE_ZIP_START - + REC_N_NEW_EXTRA_BYTES)) { - page_zip_fail(("page_zip_set_extra_bytes 1:" - " %u %u %lx\n", - (unsigned) i, (unsigned) n, - (ulong) offs)); - return(FALSE); - } - - rec_set_next_offs_new(rec, offs); - rec = page + offs; - rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits; - info_bits = 0; - } - - /* Set the next pointer of the last user record. */ - rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM); - - /* Set n_owned of the supremum record. */ - page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned; - - /* The dense directory excludes the infimum and supremum records. */ - n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW; - - if (i >= n) { - if (UNIV_LIKELY(i == n)) { - return(TRUE); - } - - page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n", - (unsigned) i, (unsigned) n)); - return(FALSE); - } - - offs = page_zip_dir_get(page_zip, i); - - /* Set the extra bytes of deleted records on the free list. */ - for (;;) { - if (UNIV_UNLIKELY(!offs) - || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) { - - page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n", - (ulong) offs)); - return(FALSE); - } - - rec = page + offs; - rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */ - - if (++i == n) { - break; - } - - offs = page_zip_dir_get(page_zip, i); - rec_set_next_offs_new(rec, offs); - } - - /* Terminate the free list. */ - rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */ - rec_set_next_offs_new(rec, 0); - - return(TRUE); -} - -/**********************************************************************//** -Apply the modification log to a record containing externally stored -columns. Do not copy the fields that are stored separately. -@return pointer to modification log, or NULL on failure */ -static -const byte* -page_zip_apply_log_ext( -/*===================*/ - rec_t* rec, /*!< in/out: record */ - const ulint* offsets, /*!< in: rec_get_offsets(rec) */ - ulint trx_id_col, /*!< in: position of of DB_TRX_ID */ - const byte* data, /*!< in: modification log */ - const byte* end) /*!< in: end of modification log */ -{ - ulint i; - ulint len; - byte* next_out = rec; - - /* Check if there are any externally stored columns. - For each externally stored column, skip the - BTR_EXTERN_FIELD_REF. */ - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - byte* dst; - - if (UNIV_UNLIKELY(i == trx_id_col)) { - /* Skip trx_id and roll_ptr */ - dst = rec_get_nth_field(rec, offsets, - i, &len); - if (UNIV_UNLIKELY(dst - next_out >= end - data) - || UNIV_UNLIKELY - (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) - || rec_offs_nth_extern(offsets, i)) { - page_zip_fail(("page_zip_apply_log_ext:" - " trx_id len %lu," - " %p - %p >= %p - %p\n", - (ulong) len, - (const void*) dst, - (const void*) next_out, - (const void*) end, - (const void*) data)); - return(NULL); - } - - memcpy(next_out, data, dst - next_out); - data += dst - next_out; - next_out = dst + (DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN); - } else if (rec_offs_nth_extern(offsets, i)) { - dst = rec_get_nth_field(rec, offsets, - i, &len); - ut_ad(len - >= BTR_EXTERN_FIELD_REF_SIZE); - - len += dst - next_out - - BTR_EXTERN_FIELD_REF_SIZE; - - if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log_ext: " - "ext %p+%lu >= %p\n", - (const void*) data, - (ulong) len, - (const void*) end)); - return(NULL); - } - - memcpy(next_out, data, len); - data += len; - next_out += len - + BTR_EXTERN_FIELD_REF_SIZE; - } - } - - /* Copy the last bytes of the record. */ - len = rec_get_end(rec, offsets) - next_out; - if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log_ext: " - "last %p+%lu >= %p\n", - (const void*) data, - (ulong) len, - (const void*) end)); - return(NULL); - } - memcpy(next_out, data, len); - data += len; - - return(data); -} - -/**********************************************************************//** -Apply the modification log to an uncompressed page. -Do not copy the fields that are stored separately. -@return pointer to end of modification log, or NULL on failure */ -static -const byte* -page_zip_apply_log( -/*===============*/ - const byte* data, /*!< in: modification log */ - ulint size, /*!< in: maximum length of the log, in bytes */ - rec_t** recs, /*!< in: dense page directory, - sorted by address (indexed by - heap_no - PAGE_HEAP_NO_USER_LOW) */ - ulint n_dense,/*!< in: size of recs[] */ - ulint trx_id_col,/*!< in: column number of trx_id in the index, - or ULINT_UNDEFINED if none */ - ulint heap_status, - /*!< in: heap_no and status bits for - the next record to uncompress */ - dict_index_t* index, /*!< in: index of the page */ - ulint* offsets)/*!< in/out: work area for - rec_get_offsets_reverse() */ -{ - const byte* const end = data + size; - - for (;;) { - ulint val; - rec_t* rec; - ulint len; - ulint hs; - - val = *data++; - if (UNIV_UNLIKELY(!val)) { - return(data - 1); - } - if (val & 0x80) { - val = (val & 0x7f) << 8 | *data++; - if (UNIV_UNLIKELY(!val)) { - page_zip_fail(("page_zip_apply_log:" - " invalid val %x%x\n", - data[-2], data[-1])); - return(NULL); - } - } - if (UNIV_UNLIKELY(data >= end)) { - page_zip_fail(("page_zip_apply_log: %p >= %p\n", - (const void*) data, - (const void*) end)); - return(NULL); - } - if (UNIV_UNLIKELY((val >> 1) > n_dense)) { - page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n", - (ulong) val, (ulong) n_dense)); - return(NULL); - } - - /* Determine the heap number and status bits of the record. */ - rec = recs[(val >> 1) - 1]; - - hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT; - hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1); - - /* This may either be an old record that is being - overwritten (updated in place, or allocated from - the free list), or a new record, with the next - available_heap_no. */ - if (UNIV_UNLIKELY(hs > heap_status)) { - page_zip_fail(("page_zip_apply_log: %lu > %lu\n", - (ulong) hs, (ulong) heap_status)); - return(NULL); - } else if (hs == heap_status) { - /* A new record was allocated from the heap. */ - if (UNIV_UNLIKELY(val & 1)) { - /* Only existing records may be cleared. */ - page_zip_fail(("page_zip_apply_log:" - " attempting to create" - " deleted rec %lu\n", - (ulong) hs)); - return(NULL); - } - heap_status += 1 << REC_HEAP_NO_SHIFT; - } - - mach_write_to_2(rec - REC_NEW_HEAP_NO, hs); - - if (val & 1) { - /* Clear the data bytes of the record. */ - mem_heap_t* heap = NULL; - ulint* offs; - offs = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - memset(rec, 0, rec_offs_data_size(offs)); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - continue; - } - -#if REC_STATUS_NODE_PTR != TRUE -# error "REC_STATUS_NODE_PTR != TRUE" -#endif - rec_get_offsets_reverse(data, index, - hs & REC_STATUS_NODE_PTR, - offsets); - rec_offs_make_valid(rec, index, offsets); - - /* Copy the extra bytes (backwards). */ - { - byte* start = rec_get_start(rec, offsets); - byte* b = rec - REC_N_NEW_EXTRA_BYTES; - while (b != start) { - *--b = *data++; - } - } - - /* Copy the data bytes. */ - if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) { - /* Non-leaf nodes should not contain any - externally stored columns. */ - if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) { - page_zip_fail(("page_zip_apply_log: " - "%lu&REC_STATUS_NODE_PTR\n", - (ulong) hs)); - return(NULL); - } - - data = page_zip_apply_log_ext( - rec, offsets, trx_id_col, data, end); - - if (UNIV_UNLIKELY(!data)) { - return(NULL); - } - } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) { - len = rec_offs_data_size(offsets) - - REC_NODE_PTR_SIZE; - /* Copy the data bytes, except node_ptr. */ - if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log: " - "node_ptr %p+%lu >= %p\n", - (const void*) data, - (ulong) len, - (const void*) end)); - return(NULL); - } - memcpy(rec, data, len); - data += len; - } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) { - len = rec_offs_data_size(offsets); - - /* Copy all data bytes of - a record in a secondary index. */ - if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log: " - "sec %p+%lu >= %p\n", - (const void*) data, - (ulong) len, - (const void*) end)); - return(NULL); - } - - memcpy(rec, data, len); - data += len; - } else { - /* Skip DB_TRX_ID and DB_ROLL_PTR. */ - ulint l = rec_get_nth_field_offs(offsets, - trx_id_col, &len); - byte* b; - - if (UNIV_UNLIKELY(data + l >= end) - || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN))) { - page_zip_fail(("page_zip_apply_log: " - "trx_id %p+%lu >= %p\n", - (const void*) data, - (ulong) l, - (const void*) end)); - return(NULL); - } - - /* Copy any preceding data bytes. */ - memcpy(rec, data, l); - data += l; - - /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */ - b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - len = rec_get_end(rec, offsets) - b; - if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log: " - "clust %p+%lu >= %p\n", - (const void*) data, - (ulong) len, - (const void*) end)); - return(NULL); - } - memcpy(b, data, len); - data += len; - } - } -} - -/**********************************************************************//** -Set the heap_no in a record, and skip the fixed-size record header -that is not included in the d_stream. -@return TRUE on success, FALSE if d_stream does not end at rec */ -static -ibool -page_zip_decompress_heap_no( -/*========================*/ - z_stream* d_stream, /*!< in/out: compressed page stream */ - rec_t* rec, /*!< in/out: record */ - ulint& heap_status) /*!< in/out: heap_no and status bits */ -{ - if (d_stream->next_out != rec - REC_N_NEW_EXTRA_BYTES) { - /* n_dense has grown since the page was last compressed. */ - return(FALSE); - } - - /* Skip the REC_N_NEW_EXTRA_BYTES. */ - d_stream->next_out = rec; - - /* Set heap_no and the status bits. */ - mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status); - heap_status += 1 << REC_HEAP_NO_SHIFT; - return(TRUE); -} - -/**********************************************************************//** -Decompress the records of a node pointer page. -@return TRUE on success, FALSE on failure */ -static -ibool -page_zip_decompress_node_ptrs( -/*==========================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - z_stream* d_stream, /*!< in/out: compressed page stream */ - rec_t** recs, /*!< in: dense page directory - sorted by address */ - ulint n_dense, /*!< in: size of recs[] */ - dict_index_t* index, /*!< in: the index of the page */ - ulint* offsets, /*!< in/out: temporary offsets */ - mem_heap_t* heap) /*!< in: temporary memory heap */ -{ - ulint heap_status = REC_STATUS_NODE_PTR - | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT; - ulint slot; - const byte* storage; - - /* Subtract the space reserved for uncompressed data. */ - d_stream->avail_in -= static_cast<uInt>( - n_dense * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE)); - - /* Decompress the records in heap_no order. */ - for (slot = 0; slot < n_dense; slot++) { - rec_t* rec = recs[slot]; - - d_stream->avail_out = static_cast<uInt>( - rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out); - - ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE - - PAGE_ZIP_START - PAGE_DIR); - switch (inflate(d_stream, Z_SYNC_FLUSH)) { - case Z_STREAM_END: - page_zip_decompress_heap_no( - d_stream, rec, heap_status); - goto zlib_done; - case Z_OK: - case Z_BUF_ERROR: - if (!d_stream->avail_out) { - break; - } - /* fall through */ - default: - page_zip_fail(("page_zip_decompress_node_ptrs:" - " 1 inflate(Z_SYNC_FLUSH)=%s\n", - d_stream->msg)); - goto zlib_error; - } - - if (!page_zip_decompress_heap_no( - d_stream, rec, heap_status)) { - ut_ad(0); - } - - /* Read the offsets. The status bits are needed here. */ - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - /* Non-leaf nodes should not have any externally - stored columns. */ - ut_ad(!rec_offs_any_extern(offsets)); - - /* Decompress the data bytes, except node_ptr. */ - d_stream->avail_out =static_cast<uInt>( - rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE); - - switch (inflate(d_stream, Z_SYNC_FLUSH)) { - case Z_STREAM_END: - goto zlib_done; - case Z_OK: - case Z_BUF_ERROR: - if (!d_stream->avail_out) { - break; - } - /* fall through */ - default: - page_zip_fail(("page_zip_decompress_node_ptrs:" - " 2 inflate(Z_SYNC_FLUSH)=%s\n", - d_stream->msg)); - goto zlib_error; - } - - /* Clear the node pointer in case the record - will be deleted and the space will be reallocated - to a smaller record. */ - memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE); - d_stream->next_out += REC_NODE_PTR_SIZE; - - ut_ad(d_stream->next_out == rec_get_end(rec, offsets)); - } - - /* Decompress any trailing garbage, in case the last record was - allocated from an originally longer space on the free list. */ - d_stream->avail_out = static_cast<uInt>( - page_header_get_field(page_zip->data, PAGE_HEAP_TOP) - - page_offset(d_stream->next_out)); - if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE - - PAGE_ZIP_START - PAGE_DIR)) { - - page_zip_fail(("page_zip_decompress_node_ptrs:" - " avail_out = %u\n", - d_stream->avail_out)); - goto zlib_error; - } - - if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) { - page_zip_fail(("page_zip_decompress_node_ptrs:" - " inflate(Z_FINISH)=%s\n", - d_stream->msg)); -zlib_error: - inflateEnd(d_stream); - return(FALSE); - } - - /* Note that d_stream->avail_out > 0 may hold here - if the modification log is nonempty. */ - -zlib_done: - if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) { - ut_error; - } - - { - page_t* page = page_align(d_stream->next_out); - - /* Clear the unused heap space on the uncompressed page. */ - memset(d_stream->next_out, 0, - page_dir_get_nth_slot(page, - page_dir_get_n_slots(page) - 1) - - d_stream->next_out); - } - -#ifdef UNIV_DEBUG - page_zip->m_start = PAGE_DATA + d_stream->total_in; -#endif /* UNIV_DEBUG */ - - /* Apply the modification log. */ - { - const byte* mod_log_ptr; - mod_log_ptr = page_zip_apply_log(d_stream->next_in, - d_stream->avail_in + 1, - recs, n_dense, - ULINT_UNDEFINED, heap_status, - index, offsets); - - if (UNIV_UNLIKELY(!mod_log_ptr)) { - return(FALSE); - } - page_zip->m_end = mod_log_ptr - page_zip->data; - page_zip->m_nonempty = mod_log_ptr != d_stream->next_in; - } - - if (UNIV_UNLIKELY - (page_zip_get_trailer_len(page_zip, - dict_index_is_clust(index)) - + page_zip->m_end >= page_zip_get_size(page_zip))) { - page_zip_fail(("page_zip_decompress_node_ptrs:" - " %lu + %lu >= %lu, %lu\n", - (ulong) page_zip_get_trailer_len( - page_zip, dict_index_is_clust(index)), - (ulong) page_zip->m_end, - (ulong) page_zip_get_size(page_zip), - (ulong) dict_index_is_clust(index))); - return(FALSE); - } - - /* Restore the uncompressed columns in heap_no order. */ - storage = page_zip_dir_start_low(page_zip, n_dense); - - for (slot = 0; slot < n_dense; slot++) { - rec_t* rec = recs[slot]; - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - /* Non-leaf nodes should not have any externally - stored columns. */ - ut_ad(!rec_offs_any_extern(offsets)); - storage -= REC_NODE_PTR_SIZE; - - memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE, - storage, REC_NODE_PTR_SIZE); - } - - return(TRUE); -} - -/**********************************************************************//** -Decompress the records of a leaf node of a secondary index. -@return TRUE on success, FALSE on failure */ -static -ibool -page_zip_decompress_sec( -/*====================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - z_stream* d_stream, /*!< in/out: compressed page stream */ - rec_t** recs, /*!< in: dense page directory - sorted by address */ - ulint n_dense, /*!< in: size of recs[] */ - dict_index_t* index, /*!< in: the index of the page */ - ulint* offsets) /*!< in/out: temporary offsets */ -{ - ulint heap_status = REC_STATUS_ORDINARY - | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT; - ulint slot; - - ut_a(!dict_index_is_clust(index)); - - /* Subtract the space reserved for uncompressed data. */ - d_stream->avail_in -= static_cast<uint>( - n_dense * PAGE_ZIP_DIR_SLOT_SIZE); - - for (slot = 0; slot < n_dense; slot++) { - rec_t* rec = recs[slot]; - - /* Decompress everything up to this record. */ - d_stream->avail_out = static_cast<uint>( - rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out); - - if (UNIV_LIKELY(d_stream->avail_out)) { - switch (inflate(d_stream, Z_SYNC_FLUSH)) { - case Z_STREAM_END: - page_zip_decompress_heap_no( - d_stream, rec, heap_status); - goto zlib_done; - case Z_OK: - case Z_BUF_ERROR: - if (!d_stream->avail_out) { - break; - } - /* fall through */ - default: - page_zip_fail(("page_zip_decompress_sec:" - " inflate(Z_SYNC_FLUSH)=%s\n", - d_stream->msg)); - goto zlib_error; - } - } - - if (!page_zip_decompress_heap_no( - d_stream, rec, heap_status)) { - ut_ad(0); - } - } - - /* Decompress the data of the last record and any trailing garbage, - in case the last record was allocated from an originally longer space - on the free list. */ - d_stream->avail_out = static_cast<uInt>( - page_header_get_field(page_zip->data, PAGE_HEAP_TOP) - - page_offset(d_stream->next_out)); - if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE - - PAGE_ZIP_START - PAGE_DIR)) { - - page_zip_fail(("page_zip_decompress_sec:" - " avail_out = %u\n", - d_stream->avail_out)); - goto zlib_error; - } - - if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) { - page_zip_fail(("page_zip_decompress_sec:" - " inflate(Z_FINISH)=%s\n", - d_stream->msg)); -zlib_error: - inflateEnd(d_stream); - return(FALSE); - } - - /* Note that d_stream->avail_out > 0 may hold here - if the modification log is nonempty. */ - -zlib_done: - if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) { - ut_error; - } - - { - page_t* page = page_align(d_stream->next_out); - - /* Clear the unused heap space on the uncompressed page. */ - memset(d_stream->next_out, 0, - page_dir_get_nth_slot(page, - page_dir_get_n_slots(page) - 1) - - d_stream->next_out); - } - -#ifdef UNIV_DEBUG - page_zip->m_start = PAGE_DATA + d_stream->total_in; -#endif /* UNIV_DEBUG */ - - /* Apply the modification log. */ - { - const byte* mod_log_ptr; - mod_log_ptr = page_zip_apply_log(d_stream->next_in, - d_stream->avail_in + 1, - recs, n_dense, - ULINT_UNDEFINED, heap_status, - index, offsets); - - if (UNIV_UNLIKELY(!mod_log_ptr)) { - return(FALSE); - } - page_zip->m_end = mod_log_ptr - page_zip->data; - page_zip->m_nonempty = mod_log_ptr != d_stream->next_in; - } - - if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE) - + page_zip->m_end >= page_zip_get_size(page_zip))) { - - page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n", - (ulong) page_zip_get_trailer_len( - page_zip, FALSE), - (ulong) page_zip->m_end, - (ulong) page_zip_get_size(page_zip))); - return(FALSE); - } - - /* There are no uncompressed columns on leaf pages of - secondary indexes. */ - - return(TRUE); -} - -/**********************************************************************//** -Decompress a record of a leaf node of a clustered index that contains -externally stored columns. -@return TRUE on success */ -static -ibool -page_zip_decompress_clust_ext( -/*==========================*/ - z_stream* d_stream, /*!< in/out: compressed page stream */ - rec_t* rec, /*!< in/out: record */ - const ulint* offsets, /*!< in: rec_get_offsets(rec) */ - ulint trx_id_col) /*!< in: position of of DB_TRX_ID */ -{ - ulint i; - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - ulint len; - byte* dst; - - if (UNIV_UNLIKELY(i == trx_id_col)) { - /* Skip trx_id and roll_ptr */ - dst = rec_get_nth_field(rec, offsets, i, &len); - if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN)) { - - page_zip_fail(("page_zip_decompress_clust_ext:" - " len[%lu] = %lu\n", - (ulong) i, (ulong) len)); - return(FALSE); - } - - if (rec_offs_nth_extern(offsets, i)) { - - page_zip_fail(("page_zip_decompress_clust_ext:" - " DB_TRX_ID at %lu is ext\n", - (ulong) i)); - return(FALSE); - } - - d_stream->avail_out = static_cast<uInt>( - dst - d_stream->next_out); - - switch (inflate(d_stream, Z_SYNC_FLUSH)) { - case Z_STREAM_END: - case Z_OK: - case Z_BUF_ERROR: - if (!d_stream->avail_out) { - break; - } - /* fall through */ - default: - page_zip_fail(("page_zip_decompress_clust_ext:" - " 1 inflate(Z_SYNC_FLUSH)=%s\n", - d_stream->msg)); - return(FALSE); - } - - ut_ad(d_stream->next_out == dst); - - /* Clear DB_TRX_ID and DB_ROLL_PTR in order to - avoid uninitialized bytes in case the record - is affected by page_zip_apply_log(). */ - memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - d_stream->next_out += DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN; - } else if (rec_offs_nth_extern(offsets, i)) { - dst = rec_get_nth_field(rec, offsets, i, &len); - ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); - dst += len - BTR_EXTERN_FIELD_REF_SIZE; - - d_stream->avail_out = static_cast<uInt>( - dst - d_stream->next_out); - switch (inflate(d_stream, Z_SYNC_FLUSH)) { - case Z_STREAM_END: - case Z_OK: - case Z_BUF_ERROR: - if (!d_stream->avail_out) { - break; - } - /* fall through */ - default: - page_zip_fail(("page_zip_decompress_clust_ext:" - " 2 inflate(Z_SYNC_FLUSH)=%s\n", - d_stream->msg)); - return(FALSE); - } - - ut_ad(d_stream->next_out == dst); - - /* Clear the BLOB pointer in case - the record will be deleted and the - space will not be reused. Note that - the final initialization of the BLOB - pointers (copying from "externs" - or clearing) will have to take place - only after the page modification log - has been applied. Otherwise, we - could end up with an uninitialized - BLOB pointer when a record is deleted, - reallocated and deleted. */ - memset(d_stream->next_out, 0, - BTR_EXTERN_FIELD_REF_SIZE); - d_stream->next_out - += BTR_EXTERN_FIELD_REF_SIZE; - } - } - - return(TRUE); -} - -/**********************************************************************//** -Compress the records of a leaf node of a clustered index. -@return TRUE on success, FALSE on failure */ -static -ibool -page_zip_decompress_clust( -/*======================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - z_stream* d_stream, /*!< in/out: compressed page stream */ - rec_t** recs, /*!< in: dense page directory - sorted by address */ - ulint n_dense, /*!< in: size of recs[] */ - dict_index_t* index, /*!< in: the index of the page */ - ulint trx_id_col, /*!< index of the trx_id column */ - ulint* offsets, /*!< in/out: temporary offsets */ - mem_heap_t* heap) /*!< in: temporary memory heap */ -{ - int err; - ulint slot; - ulint heap_status = REC_STATUS_ORDINARY - | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT; - const byte* storage; - const byte* externs; - - ut_a(dict_index_is_clust(index)); - - /* Subtract the space reserved for uncompressed data. */ - d_stream->avail_in -= static_cast<uInt>(n_dense) - * (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN); - - /* Decompress the records in heap_no order. */ - for (slot = 0; slot < n_dense; slot++) { - rec_t* rec = recs[slot]; - - d_stream->avail_out =static_cast<uInt>( - rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out); - - ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE - - PAGE_ZIP_START - PAGE_DIR); - err = inflate(d_stream, Z_SYNC_FLUSH); - switch (err) { - case Z_STREAM_END: - page_zip_decompress_heap_no( - d_stream, rec, heap_status); - goto zlib_done; - case Z_OK: - case Z_BUF_ERROR: - if (UNIV_LIKELY(!d_stream->avail_out)) { - break; - } - /* fall through */ - default: - page_zip_fail(("page_zip_decompress_clust:" - " 1 inflate(Z_SYNC_FLUSH)=%s\n", - d_stream->msg)); - goto zlib_error; - } - - if (!page_zip_decompress_heap_no( - d_stream, rec, heap_status)) { - ut_ad(0); - } - - /* Read the offsets. The status bits are needed here. */ - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - /* This is a leaf page in a clustered index. */ - - /* Check if there are any externally stored columns. - For each externally stored column, restore the - BTR_EXTERN_FIELD_REF separately. */ - - if (rec_offs_any_extern(offsets)) { - if (UNIV_UNLIKELY - (!page_zip_decompress_clust_ext( - d_stream, rec, offsets, trx_id_col))) { - - goto zlib_error; - } - } else { - /* Skip trx_id and roll_ptr */ - ulint len; - byte* dst = rec_get_nth_field(rec, offsets, - trx_id_col, &len); - if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN)) { - - page_zip_fail(("page_zip_decompress_clust:" - " len = %lu\n", (ulong) len)); - goto zlib_error; - } - - d_stream->avail_out = static_cast<uInt>( - dst - d_stream->next_out); - - switch (inflate(d_stream, Z_SYNC_FLUSH)) { - case Z_STREAM_END: - case Z_OK: - case Z_BUF_ERROR: - if (!d_stream->avail_out) { - break; - } - /* fall through */ - default: - page_zip_fail(("page_zip_decompress_clust:" - " 2 inflate(Z_SYNC_FLUSH)=%s\n", - d_stream->msg)); - goto zlib_error; - } - - ut_ad(d_stream->next_out == dst); - - /* Clear DB_TRX_ID and DB_ROLL_PTR in order to - avoid uninitialized bytes in case the record - is affected by page_zip_apply_log(). */ - memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - d_stream->next_out += DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN; - } - - /* Decompress the last bytes of the record. */ - d_stream->avail_out = static_cast<uInt>( - rec_get_end(rec, offsets) - d_stream->next_out); - - switch (inflate(d_stream, Z_SYNC_FLUSH)) { - case Z_STREAM_END: - case Z_OK: - case Z_BUF_ERROR: - if (!d_stream->avail_out) { - break; - } - /* fall through */ - default: - page_zip_fail(("page_zip_decompress_clust:" - " 3 inflate(Z_SYNC_FLUSH)=%s\n", - d_stream->msg)); - goto zlib_error; - } - } - - /* Decompress any trailing garbage, in case the last record was - allocated from an originally longer space on the free list. */ - d_stream->avail_out = static_cast<uInt>( - page_header_get_field(page_zip->data, PAGE_HEAP_TOP) - - page_offset(d_stream->next_out)); - if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE - - PAGE_ZIP_START - PAGE_DIR)) { - - page_zip_fail(("page_zip_decompress_clust:" - " avail_out = %u\n", - d_stream->avail_out)); - goto zlib_error; - } - - if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) { - page_zip_fail(("page_zip_decompress_clust:" - " inflate(Z_FINISH)=%s\n", - d_stream->msg)); -zlib_error: - inflateEnd(d_stream); - return(FALSE); - } - - /* Note that d_stream->avail_out > 0 may hold here - if the modification log is nonempty. */ - -zlib_done: - if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) { - ut_error; - } - - { - page_t* page = page_align(d_stream->next_out); - - /* Clear the unused heap space on the uncompressed page. */ - memset(d_stream->next_out, 0, - page_dir_get_nth_slot(page, - page_dir_get_n_slots(page) - 1) - - d_stream->next_out); - } - -#ifdef UNIV_DEBUG - page_zip->m_start = PAGE_DATA + d_stream->total_in; -#endif /* UNIV_DEBUG */ - - /* Apply the modification log. */ - { - const byte* mod_log_ptr; - mod_log_ptr = page_zip_apply_log(d_stream->next_in, - d_stream->avail_in + 1, - recs, n_dense, - trx_id_col, heap_status, - index, offsets); - - if (UNIV_UNLIKELY(!mod_log_ptr)) { - return(FALSE); - } - page_zip->m_end = mod_log_ptr - page_zip->data; - page_zip->m_nonempty = mod_log_ptr != d_stream->next_in; - } - - if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE) - + page_zip->m_end >= page_zip_get_size(page_zip))) { - - page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n", - (ulong) page_zip_get_trailer_len( - page_zip, TRUE), - (ulong) page_zip->m_end, - (ulong) page_zip_get_size(page_zip))); - return(FALSE); - } - - storage = page_zip_dir_start_low(page_zip, n_dense); - - externs = storage - n_dense - * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - /* Restore the uncompressed columns in heap_no order. */ - - for (slot = 0; slot < n_dense; slot++) { - ulint i; - ulint len; - byte* dst; - rec_t* rec = recs[slot]; - ibool exists = !page_zip_dir_find_free( - page_zip, page_offset(rec)); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - dst = rec_get_nth_field(rec, offsets, - trx_id_col, &len); - ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; - memcpy(dst, storage, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - /* Check if there are any externally stored - columns in this record. For each externally - stored column, restore or clear the - BTR_EXTERN_FIELD_REF. */ - if (!rec_offs_any_extern(offsets)) { - continue; - } - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - if (!rec_offs_nth_extern(offsets, i)) { - continue; - } - dst = rec_get_nth_field(rec, offsets, i, &len); - - if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) { - page_zip_fail(("page_zip_decompress_clust:" - " %lu < 20\n", - (ulong) len)); - return(FALSE); - } - - dst += len - BTR_EXTERN_FIELD_REF_SIZE; - - if (UNIV_LIKELY(exists)) { - /* Existing record: - restore the BLOB pointer */ - externs -= BTR_EXTERN_FIELD_REF_SIZE; - - if (UNIV_UNLIKELY - (externs < page_zip->data - + page_zip->m_end)) { - page_zip_fail(("page_zip_" - "decompress_clust: " - "%p < %p + %lu\n", - (const void*) externs, - (const void*) - page_zip->data, - (ulong) - page_zip->m_end)); - return(FALSE); - } - - memcpy(dst, externs, - BTR_EXTERN_FIELD_REF_SIZE); - - page_zip->n_blobs++; - } else { - /* Deleted record: - clear the BLOB pointer */ - memset(dst, 0, - BTR_EXTERN_FIELD_REF_SIZE); - } - } - } - - return(TRUE); -} - -/**********************************************************************//** -Decompress a page. This function should tolerate errors on the compressed -page. Instead of letting assertions fail, it will return FALSE if an -inconsistency is detected. -@return TRUE on success, FALSE on failure */ -UNIV_INTERN -ibool -page_zip_decompress( -/*================*/ - page_zip_des_t* page_zip,/*!< in: data, ssize; - out: m_start, m_end, m_nonempty, n_blobs */ - page_t* page, /*!< out: uncompressed page, may be trashed */ - ibool all) /*!< in: TRUE=decompress the whole page; - FALSE=verify but do not copy some - page header fields that should not change - after page creation */ -{ - z_stream d_stream; - dict_index_t* index = NULL; - rec_t** recs; /*!< dense page directory, sorted by address */ - ulint n_dense;/* number of user records on the page */ - ulint trx_id_col = ULINT_UNDEFINED; - mem_heap_t* heap; - ulint* offsets; -#ifndef UNIV_HOTBACKUP - ullint usec = ut_time_us(NULL); -#endif /* !UNIV_HOTBACKUP */ - - ut_ad(page_zip_simple_validate(page_zip)); - UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - - /* The dense directory excludes the infimum and supremum records. */ - n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW; - if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE - >= page_zip_get_size(page_zip))) { - page_zip_fail(("page_zip_decompress 1: %lu %lu\n", - (ulong) n_dense, - (ulong) page_zip_get_size(page_zip))); - return(FALSE); - } - - heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE); - - recs = static_cast<rec_t**>( - mem_heap_alloc(heap, n_dense * (2 * sizeof *recs))); - - if (all) { - /* Copy the page header. */ - memcpy(page, page_zip->data, PAGE_DATA); - } else { - /* Check that the bytes that we skip are identical. */ -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG - ut_a(!memcmp(FIL_PAGE_TYPE + page, - FIL_PAGE_TYPE + page_zip->data, - PAGE_HEADER - FIL_PAGE_TYPE)); - ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page, - PAGE_HEADER + PAGE_LEVEL + page_zip->data, - PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL))); -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ - - /* Copy the mutable parts of the page header. */ - memcpy(page, page_zip->data, FIL_PAGE_TYPE); - memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data, - PAGE_LEVEL - PAGE_N_DIR_SLOTS); - -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG - /* Check that the page headers match after copying. */ - ut_a(!memcmp(page, page_zip->data, PAGE_DATA)); -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ - } - -#ifdef UNIV_ZIP_DEBUG - /* Clear the uncompressed page, except the header. */ - memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA); -#endif /* UNIV_ZIP_DEBUG */ - UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA); - - /* Copy the page directory. */ - if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs, - recs + n_dense, n_dense))) { -zlib_error: - mem_heap_free(heap); - return(FALSE); - } - - /* Copy the infimum and supremum records. */ - memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES), - infimum_extra, sizeof infimum_extra); - if (page_is_empty(page)) { - rec_set_next_offs_new(page + PAGE_NEW_INFIMUM, - PAGE_NEW_SUPREMUM); - } else { - rec_set_next_offs_new(page + PAGE_NEW_INFIMUM, - page_zip_dir_get(page_zip, 0) - & PAGE_ZIP_DIR_SLOT_MASK); - } - memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data); - memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1), - supremum_extra_data, sizeof supremum_extra_data); - - page_zip_set_alloc(&d_stream, heap); - - d_stream.next_in = page_zip->data + PAGE_DATA; - /* Subtract the space reserved for - the page header and the end marker of the modification log. */ - d_stream.avail_in = static_cast<uInt>( - page_zip_get_size(page_zip) - (PAGE_DATA + 1)); - d_stream.next_out = page + PAGE_ZIP_START; - d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START; - - if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT) - != Z_OK)) { - ut_error; - } - - /* Decode the zlib header and the index information. */ - if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) { - - page_zip_fail(("page_zip_decompress:" - " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg)); - goto zlib_error; - } - - if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) { - - page_zip_fail(("page_zip_decompress:" - " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg)); - goto zlib_error; - } - - index = page_zip_fields_decode( - page + PAGE_ZIP_START, d_stream.next_out, - page_is_leaf(page) ? &trx_id_col : NULL); - - if (UNIV_UNLIKELY(!index)) { - - goto zlib_error; - } - - /* Decompress the user records. */ - page_zip->n_blobs = 0; - d_stream.next_out = page + PAGE_ZIP_START; - - { - /* Pre-allocate the offsets for rec_get_offsets_reverse(). */ - ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE - + dict_index_get_n_fields(index); - - offsets = static_cast<ulint*>( - mem_heap_alloc(heap, n * sizeof(ulint))); - - *offsets = n; - } - - /* Decompress the records in heap_no order. */ - if (!page_is_leaf(page)) { - /* This is a node pointer page. */ - ulint info_bits; - - if (UNIV_UNLIKELY - (!page_zip_decompress_node_ptrs(page_zip, &d_stream, - recs, n_dense, index, - offsets, heap))) { - goto err_exit; - } - - info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL - ? REC_INFO_MIN_REC_FLAG : 0; - - if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page, - info_bits))) { - goto err_exit; - } - } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) { - /* This is a leaf page in a secondary index. */ - if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream, - recs, n_dense, - index, offsets))) { - goto err_exit; - } - - if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, - page, 0))) { -err_exit: - page_zip_fields_free(index); - mem_heap_free(heap); - return(FALSE); - } - } else { - /* This is a leaf page in a clustered index. */ - if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip, - &d_stream, recs, - n_dense, index, - trx_id_col, - offsets, heap))) { - goto err_exit; - } - - if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, - page, 0))) { - goto err_exit; - } - } - - ut_a(page_is_comp(page)); - UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); - - page_zip_fields_free(index); - mem_heap_free(heap); -#ifndef UNIV_HOTBACKUP - ullint time_diff = ut_time_us(NULL) - usec; - page_zip_stat[page_zip->ssize - 1].decompressed++; - page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff; - - index_id_t index_id = btr_page_get_index_id(page); - - if (srv_cmp_per_index_enabled) { - mutex_enter(&page_zip_stat_per_index_mutex); - page_zip_stat_per_index[index_id].decompressed++; - page_zip_stat_per_index[index_id].decompressed_usec += time_diff; - mutex_exit(&page_zip_stat_per_index_mutex); - } -#endif /* !UNIV_HOTBACKUP */ - - /* Update the stat counter for LRU policy. */ - buf_LRU_stat_inc_unzip(); - - MONITOR_INC(MONITOR_PAGE_DECOMPRESS); - - return(TRUE); -} - -#ifdef UNIV_ZIP_DEBUG -/**********************************************************************//** -Dump a block of memory on the standard error stream. */ -static -void -page_zip_hexdump_func( -/*==================*/ - const char* name, /*!< in: name of the data structure */ - const void* buf, /*!< in: data */ - ulint size) /*!< in: length of the data, in bytes */ -{ - const byte* s = static_cast<const byte*>(buf); - ulint addr; - const ulint width = 32; /* bytes per line */ - - fprintf(stderr, "%s:\n", name); - - for (addr = 0; addr < size; addr += width) { - ulint i; - - fprintf(stderr, "%04lx ", (ulong) addr); - - i = ut_min(width, size - addr); - - while (i--) { - fprintf(stderr, "%02x", *s++); - } - - putc('\n', stderr); - } -} - -/** Dump a block of memory on the standard error stream. -@param buf in: data -@param size in: length of the data, in bytes */ -#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size) - -/** Flag: make page_zip_validate() compare page headers only */ -UNIV_INTERN ibool page_zip_validate_header_only = FALSE; - -/**********************************************************************//** -Check that the compressed and decompressed pages match. -@return TRUE if valid, FALSE if not */ -UNIV_INTERN -ibool -page_zip_validate_low( -/*==================*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - const page_t* page, /*!< in: uncompressed page */ - const dict_index_t* index, /*!< in: index of the page, if known */ - ibool sloppy) /*!< in: FALSE=strict, - TRUE=ignore the MIN_REC_FLAG */ -{ - page_zip_des_t temp_page_zip; - byte* temp_page_buf; - page_t* temp_page; - ibool valid; - - if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV, - FIL_PAGE_LSN - FIL_PAGE_PREV) - || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2) - || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA, - PAGE_DATA - FIL_PAGE_DATA)) { - page_zip_fail(("page_zip_validate: page header\n")); - page_zip_hexdump(page_zip, sizeof *page_zip); - page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip)); - page_zip_hexdump(page, UNIV_PAGE_SIZE); - return(FALSE); - } - - ut_a(page_is_comp(page)); - - if (page_zip_validate_header_only) { - return(TRUE); - } - - /* page_zip_decompress() expects the uncompressed page to be - UNIV_PAGE_SIZE aligned. */ - temp_page_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); - temp_page = static_cast<byte*>(ut_align(temp_page_buf, UNIV_PAGE_SIZE)); - - UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - - temp_page_zip = *page_zip; - valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE); - if (!valid) { - fputs("page_zip_validate(): failed to decompress\n", stderr); - goto func_exit; - } - if (page_zip->n_blobs != temp_page_zip.n_blobs) { - page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n", - page_zip->n_blobs, temp_page_zip.n_blobs)); - valid = FALSE; - } -#ifdef UNIV_DEBUG - if (page_zip->m_start != temp_page_zip.m_start) { - page_zip_fail(("page_zip_validate: m_start: %u!=%u\n", - page_zip->m_start, temp_page_zip.m_start)); - valid = FALSE; - } -#endif /* UNIV_DEBUG */ - if (page_zip->m_end != temp_page_zip.m_end) { - page_zip_fail(("page_zip_validate: m_end: %u!=%u\n", - page_zip->m_end, temp_page_zip.m_end)); - valid = FALSE; - } - if (page_zip->m_nonempty != temp_page_zip.m_nonempty) { - page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n", - page_zip->m_nonempty, - temp_page_zip.m_nonempty)); - valid = FALSE; - } - if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER, - UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) { - - /* In crash recovery, the "minimum record" flag may be - set incorrectly until the mini-transaction is - committed. Let us tolerate that difference when we - are performing a sloppy validation. */ - - ulint* offsets; - mem_heap_t* heap; - const rec_t* rec; - const rec_t* trec; - byte info_bits_diff; - ulint offset - = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE); - ut_a(offset >= PAGE_NEW_SUPREMUM); - offset -= 5/*REC_NEW_INFO_BITS*/; - - info_bits_diff = page[offset] ^ temp_page[offset]; - - if (info_bits_diff == REC_INFO_MIN_REC_FLAG) { - temp_page[offset] = page[offset]; - - if (!memcmp(page + PAGE_HEADER, - temp_page + PAGE_HEADER, - UNIV_PAGE_SIZE - PAGE_HEADER - - FIL_PAGE_DATA_END)) { - - /* Only the minimum record flag - differed. Let us ignore it. */ - page_zip_fail(("page_zip_validate: " - "min_rec_flag " - "(%s" - "%lu,%lu,0x%02lx)\n", - sloppy ? "ignored, " : "", - page_get_space_id(page), - page_get_page_no(page), - (ulong) page[offset])); - valid = sloppy; - goto func_exit; - } - } - - /* Compare the pointers in the PAGE_FREE list. */ - rec = page_header_get_ptr(page, PAGE_FREE); - trec = page_header_get_ptr(temp_page, PAGE_FREE); - - while (rec || trec) { - if (page_offset(rec) != page_offset(trec)) { - page_zip_fail(("page_zip_validate: " - "PAGE_FREE list: %u!=%u\n", - (unsigned) page_offset(rec), - (unsigned) page_offset(trec))); - valid = FALSE; - goto func_exit; - } - - rec = page_rec_get_next_low(rec, TRUE); - trec = page_rec_get_next_low(trec, TRUE); - } - - /* Compare the records. */ - heap = NULL; - offsets = NULL; - rec = page_rec_get_next_low( - page + PAGE_NEW_INFIMUM, TRUE); - trec = page_rec_get_next_low( - temp_page + PAGE_NEW_INFIMUM, TRUE); - - do { - if (page_offset(rec) != page_offset(trec)) { - page_zip_fail(("page_zip_validate: " - "record list: 0x%02x!=0x%02x\n", - (unsigned) page_offset(rec), - (unsigned) page_offset(trec))); - valid = FALSE; - break; - } - - if (index) { - /* Compare the data. */ - offsets = rec_get_offsets( - rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (memcmp(rec - rec_offs_extra_size(offsets), - trec - rec_offs_extra_size(offsets), - rec_offs_size(offsets))) { - page_zip_fail( - ("page_zip_validate: " - "record content: 0x%02x", - (unsigned) page_offset(rec))); - valid = FALSE; - break; - } - } - - rec = page_rec_get_next_low(rec, TRUE); - trec = page_rec_get_next_low(trec, TRUE); - } while (rec || trec); - - if (heap) { - mem_heap_free(heap); - } - } - -func_exit: - if (!valid) { - page_zip_hexdump(page_zip, sizeof *page_zip); - page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip)); - page_zip_hexdump(page, UNIV_PAGE_SIZE); - page_zip_hexdump(temp_page, UNIV_PAGE_SIZE); - } - ut_free(temp_page_buf); - return(valid); -} - -/**********************************************************************//** -Check that the compressed and decompressed pages match. -@return TRUE if valid, FALSE if not */ -UNIV_INTERN -ibool -page_zip_validate( -/*==============*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - const page_t* page, /*!< in: uncompressed page */ - const dict_index_t* index) /*!< in: index of the page, if known */ -{ - return(page_zip_validate_low(page_zip, page, index, - recv_recovery_is_on())); -} -#endif /* UNIV_ZIP_DEBUG */ - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Assert that the compressed and decompressed page headers match. -@return TRUE */ -static -ibool -page_zip_header_cmp( -/*================*/ - const page_zip_des_t* page_zip,/*!< in: compressed page */ - const byte* page) /*!< in: uncompressed page */ -{ - ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV, - FIL_PAGE_LSN - FIL_PAGE_PREV)); - ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, - 2)); - ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA, - PAGE_DATA - FIL_PAGE_DATA)); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/**********************************************************************//** -Write a record on the compressed page that contains externally stored -columns. The data must already have been written to the uncompressed page. -@return end of modification log */ -static -byte* -page_zip_write_rec_ext( -/*===================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - const page_t* page, /*!< in: page containing rec */ - const byte* rec, /*!< in: record being written */ - dict_index_t* index, /*!< in: record descriptor */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ - ulint create, /*!< in: nonzero=insert, zero=update */ - ulint trx_id_col, /*!< in: position of DB_TRX_ID */ - ulint heap_no, /*!< in: heap number of rec */ - byte* storage, /*!< in: end of dense page directory */ - byte* data) /*!< in: end of modification log */ -{ - const byte* start = rec; - ulint i; - ulint len; - byte* externs = storage; - ulint n_ext = rec_offs_n_extern(offsets); - - ut_ad(rec_offs_validate(rec, index, offsets)); - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) - * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW); - - /* Note that this will not take into account - the BLOB columns of rec if create==TRUE. */ - ut_ad(data + rec_offs_data_size(offsets) - - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) - - n_ext * BTR_EXTERN_FIELD_REF_SIZE - < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs); - - { - ulint blob_no = page_zip_get_n_prev_extern( - page_zip, rec, index); - byte* ext_end = externs - page_zip->n_blobs - * BTR_EXTERN_FIELD_REF_SIZE; - ut_ad(blob_no <= page_zip->n_blobs); - externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE; - - if (create) { - page_zip->n_blobs += static_cast<unsigned>(n_ext); - ASSERT_ZERO_BLOB(ext_end - n_ext - * BTR_EXTERN_FIELD_REF_SIZE); - memmove(ext_end - n_ext - * BTR_EXTERN_FIELD_REF_SIZE, - ext_end, - externs - ext_end); - } - - ut_a(blob_no + n_ext <= page_zip->n_blobs); - } - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - const byte* src; - - if (UNIV_UNLIKELY(i == trx_id_col)) { - ut_ad(!rec_offs_nth_extern(offsets, - i)); - ut_ad(!rec_offs_nth_extern(offsets, - i + 1)); - /* Locate trx_id and roll_ptr. */ - src = rec_get_nth_field(rec, offsets, - i, &len); - ut_ad(len == DATA_TRX_ID_LEN); - ut_ad(src + DATA_TRX_ID_LEN - == rec_get_nth_field( - rec, offsets, - i + 1, &len)); - ut_ad(len == DATA_ROLL_PTR_LEN); - - /* Log the preceding fields. */ - ASSERT_ZERO(data, src - start); - memcpy(data, start, src - start); - data += src - start; - start = src + (DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN); - - /* Store trx_id and roll_ptr. */ - memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) - * (heap_no - 1), - src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - i++; /* skip also roll_ptr */ - } else if (rec_offs_nth_extern(offsets, i)) { - src = rec_get_nth_field(rec, offsets, - i, &len); - - ut_ad(dict_index_is_clust(index)); - ut_ad(len - >= BTR_EXTERN_FIELD_REF_SIZE); - src += len - BTR_EXTERN_FIELD_REF_SIZE; - - ASSERT_ZERO(data, src - start); - memcpy(data, start, src - start); - data += src - start; - start = src + BTR_EXTERN_FIELD_REF_SIZE; - - /* Store the BLOB pointer. */ - externs -= BTR_EXTERN_FIELD_REF_SIZE; - ut_ad(data < externs); - memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE); - } - } - - /* Log the last bytes of the record. */ - len = rec_offs_data_size(offsets) - (start - rec); - - ASSERT_ZERO(data, len); - memcpy(data, start, len); - data += len; - - return(data); -} - -/**********************************************************************//** -Write an entire record on the compressed page. The data must already -have been written to the uncompressed page. */ -UNIV_INTERN -void -page_zip_write_rec( -/*===============*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in: record being written */ - dict_index_t* index, /*!< in: the index the record belongs to */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint create) /*!< in: nonzero=insert, zero=update */ -{ - const page_t* page; - byte* data; - byte* storage; - ulint heap_no; - byte* slot; - - ut_ad(PAGE_ZIP_MATCH(rec, page_zip)); - ut_ad(page_zip_simple_validate(page_zip)); - ut_ad(page_zip_get_size(page_zip) - > PAGE_DATA + page_zip_dir_size(page_zip)); - ut_ad(rec_offs_comp(offsets)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - ut_ad(page_zip->m_start >= PAGE_DATA); - - page = page_align(rec); - - ut_ad(page_zip_header_cmp(page_zip, page)); - ut_ad(page_simple_validate_new((page_t*) page)); - - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - slot = page_zip_dir_find(page_zip, page_offset(rec)); - ut_a(slot); - /* Copy the delete mark. */ - if (rec_get_deleted_flag(rec, TRUE)) { - *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8; - } else { - *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8); - } - - ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START); - ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE - - PAGE_DIR - PAGE_DIR_SLOT_SIZE - * page_dir_get_n_slots(page)); - - heap_no = rec_get_heap_no_new(rec); - ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */ - ut_ad(heap_no < page_dir_get_n_heap(page)); - - /* Append to the modification log. */ - data = page_zip->data + page_zip->m_end; - ut_ad(!*data); - - /* Identify the record by writing its heap number - 1. - 0 is reserved to indicate the end of the modification log. */ - - if (UNIV_UNLIKELY(heap_no - 1 >= 64)) { - *data++ = (byte) (0x80 | (heap_no - 1) >> 7); - ut_ad(!*data); - } - *data++ = (byte) ((heap_no - 1) << 1); - ut_ad(!*data); - - { - const byte* start = rec - rec_offs_extra_size(offsets); - const byte* b = rec - REC_N_NEW_EXTRA_BYTES; - - /* Write the extra bytes backwards, so that - rec_offs_extra_size() can be easily computed in - page_zip_apply_log() by invoking - rec_get_offsets_reverse(). */ - - while (b != start) { - *data++ = *--b; - ut_ad(!*data); - } - } - - /* Write the data bytes. Store the uncompressed bytes separately. */ - storage = page_zip_dir_start(page_zip); - - if (page_is_leaf(page)) { - ulint len; - - if (dict_index_is_clust(index)) { - ulint trx_id_col; - - trx_id_col = dict_index_get_sys_col_pos(index, - DATA_TRX_ID); - ut_ad(trx_id_col != ULINT_UNDEFINED); - - /* Store separately trx_id, roll_ptr and - the BTR_EXTERN_FIELD_REF of each BLOB column. */ - if (rec_offs_any_extern(offsets)) { - data = page_zip_write_rec_ext( - page_zip, page, - rec, index, offsets, create, - trx_id_col, heap_no, storage, data); - } else { - /* Locate trx_id and roll_ptr. */ - const byte* src - = rec_get_nth_field(rec, offsets, - trx_id_col, &len); - ut_ad(len == DATA_TRX_ID_LEN); - ut_ad(src + DATA_TRX_ID_LEN - == rec_get_nth_field( - rec, offsets, - trx_id_col + 1, &len)); - ut_ad(len == DATA_ROLL_PTR_LEN); - - /* Log the preceding fields. */ - ASSERT_ZERO(data, src - rec); - memcpy(data, rec, src - rec); - data += src - rec; - - /* Store trx_id and roll_ptr. */ - memcpy(storage - - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN) - * (heap_no - 1), - src, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; - - /* Log the last bytes of the record. */ - len = rec_offs_data_size(offsets) - - (src - rec); - - ASSERT_ZERO(data, len); - memcpy(data, src, len); - data += len; - } - } else { - /* Leaf page of a secondary index: - no externally stored columns */ - ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID) - == ULINT_UNDEFINED); - ut_ad(!rec_offs_any_extern(offsets)); - - /* Log the entire record. */ - len = rec_offs_data_size(offsets); - - ASSERT_ZERO(data, len); - memcpy(data, rec, len); - data += len; - } - } else { - /* This is a node pointer page. */ - ulint len; - - /* Non-leaf nodes should not have any externally - stored columns. */ - ut_ad(!rec_offs_any_extern(offsets)); - - /* Copy the data bytes, except node_ptr. */ - len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE; - ut_ad(data + len < storage - REC_NODE_PTR_SIZE - * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)); - ASSERT_ZERO(data, len); - memcpy(data, rec, len); - data += len; - - /* Copy the node pointer to the uncompressed area. */ - memcpy(storage - REC_NODE_PTR_SIZE - * (heap_no - 1), - rec + len, - REC_NODE_PTR_SIZE); - } - - ut_a(!*data); - ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip)); - page_zip->m_end = data - page_zip->data; - page_zip->m_nonempty = TRUE; - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page_align(rec), index)); -#endif /* UNIV_ZIP_DEBUG */ -} - -/***********************************************************//** -Parses a log record of writing a BLOB pointer of a record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_zip_parse_write_blob_ptr( -/*==========================*/ - byte* ptr, /*!< in: redo log buffer */ - byte* end_ptr,/*!< in: redo log buffer end */ - page_t* page, /*!< in/out: uncompressed page */ - page_zip_des_t* page_zip)/*!< in/out: compressed page */ -{ - ulint offset; - ulint z_offset; - - ut_ad(!page == !page_zip); - - if (UNIV_UNLIKELY - (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - z_offset = mach_read_from_2(ptr + 2); - - if (UNIV_UNLIKELY(offset < PAGE_ZIP_START) - || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) { -corrupt: - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - if (page) { - if (UNIV_UNLIKELY(!page_zip) - || UNIV_UNLIKELY(!page_is_leaf(page))) { - - goto corrupt; - } - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, NULL)); -#endif /* UNIV_ZIP_DEBUG */ - - memcpy(page + offset, - ptr + 4, BTR_EXTERN_FIELD_REF_SIZE); - memcpy(page_zip->data + z_offset, - ptr + 4, BTR_EXTERN_FIELD_REF_SIZE); - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, NULL)); -#endif /* UNIV_ZIP_DEBUG */ - } - - return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE)); -} - -/**********************************************************************//** -Write a BLOB pointer of a record on the leaf page of a clustered index. -The information must already have been updated on the uncompressed page. */ -UNIV_INTERN -void -page_zip_write_blob_ptr( -/*====================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in/out: record whose data is being - written */ - dict_index_t* index, /*!< in: index of the page */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint n, /*!< in: column index */ - mtr_t* mtr) /*!< in: mini-transaction handle, - or NULL if no logging is needed */ -{ - const byte* field; - byte* externs; - const page_t* page = page_align(rec); - ulint blob_no; - ulint len; - - ut_ad(PAGE_ZIP_MATCH(rec, page_zip)); - ut_ad(page_simple_validate_new((page_t*) page)); - ut_ad(page_zip_simple_validate(page_zip)); - ut_ad(page_zip_get_size(page_zip) - > PAGE_DATA + page_zip_dir_size(page_zip)); - ut_ad(rec_offs_comp(offsets)); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(rec_offs_any_extern(offsets)); - ut_ad(rec_offs_nth_extern(offsets, n)); - - ut_ad(page_zip->m_start >= PAGE_DATA); - ut_ad(page_zip_header_cmp(page_zip, page)); - - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - blob_no = page_zip_get_n_prev_extern(page_zip, rec, index) - + rec_get_n_extern_new(rec, index, n); - ut_a(blob_no < page_zip->n_blobs); - - externs = page_zip->data + page_zip_get_size(page_zip) - - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW) - * (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - field = rec_get_nth_field(rec, offsets, n, &len); - - externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE; - field += len - BTR_EXTERN_FIELD_REF_SIZE; - - memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE); - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - - if (mtr) { -#ifndef UNIV_HOTBACKUP - byte* log_ptr = mlog_open( - mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE); - if (UNIV_UNLIKELY(!log_ptr)) { - return; - } - - log_ptr = mlog_write_initial_log_record_fast( - (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr); - mach_write_to_2(log_ptr, page_offset(field)); - log_ptr += 2; - mach_write_to_2(log_ptr, externs - page_zip->data); - log_ptr += 2; - memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE); - log_ptr += BTR_EXTERN_FIELD_REF_SIZE; - mlog_close(mtr, log_ptr); -#endif /* !UNIV_HOTBACKUP */ - } -} - -/***********************************************************//** -Parses a log record of writing the node pointer of a record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_zip_parse_write_node_ptr( -/*==========================*/ - byte* ptr, /*!< in: redo log buffer */ - byte* end_ptr,/*!< in: redo log buffer end */ - page_t* page, /*!< in/out: uncompressed page */ - page_zip_des_t* page_zip)/*!< in/out: compressed page */ -{ - ulint offset; - ulint z_offset; - - ut_ad(!page == !page_zip); - - if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - z_offset = mach_read_from_2(ptr + 2); - - if (UNIV_UNLIKELY(offset < PAGE_ZIP_START) - || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) { -corrupt: - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - if (page) { - byte* storage_end; - byte* field; - byte* storage; - ulint heap_no; - - if (UNIV_UNLIKELY(!page_zip) - || UNIV_UNLIKELY(page_is_leaf(page))) { - - goto corrupt; - } - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, NULL)); -#endif /* UNIV_ZIP_DEBUG */ - - field = page + offset; - storage = page_zip->data + z_offset; - - storage_end = page_zip_dir_start(page_zip); - - heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE; - - if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE) - || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW) - || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) { - - goto corrupt; - } - - memcpy(field, ptr + 4, REC_NODE_PTR_SIZE); - memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE); - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, NULL)); -#endif /* UNIV_ZIP_DEBUG */ - } - - return(ptr + (2 + 2 + REC_NODE_PTR_SIZE)); -} - -/**********************************************************************//** -Write the node pointer of a record on a non-leaf compressed page. */ -UNIV_INTERN -void -page_zip_write_node_ptr( -/*====================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - byte* rec, /*!< in/out: record */ - ulint size, /*!< in: data size of rec */ - ulint ptr, /*!< in: node pointer */ - mtr_t* mtr) /*!< in: mini-transaction, or NULL */ -{ - byte* field; - byte* storage; -#ifdef UNIV_DEBUG - page_t* page = page_align(rec); -#endif /* UNIV_DEBUG */ - - ut_ad(PAGE_ZIP_MATCH(rec, page_zip)); - ut_ad(page_simple_validate_new(page)); - ut_ad(page_zip_simple_validate(page_zip)); - ut_ad(page_zip_get_size(page_zip) - > PAGE_DATA + page_zip_dir_size(page_zip)); - ut_ad(page_rec_is_comp(rec)); - - ut_ad(page_zip->m_start >= PAGE_DATA); - ut_ad(page_zip_header_cmp(page_zip, page)); - - ut_ad(!page_is_leaf(page)); - - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - UNIV_MEM_ASSERT_RW(rec, size); - - storage = page_zip_dir_start(page_zip) - - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE; - field = rec + size - REC_NODE_PTR_SIZE; - -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG - ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE)); -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ -#if REC_NODE_PTR_SIZE != 4 -# error "REC_NODE_PTR_SIZE != 4" -#endif - mach_write_to_4(field, ptr); - memcpy(storage, field, REC_NODE_PTR_SIZE); - - if (mtr) { -#ifndef UNIV_HOTBACKUP - byte* log_ptr = mlog_open(mtr, - 11 + 2 + 2 + REC_NODE_PTR_SIZE); - if (UNIV_UNLIKELY(!log_ptr)) { - return; - } - - log_ptr = mlog_write_initial_log_record_fast( - field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr); - mach_write_to_2(log_ptr, page_offset(field)); - log_ptr += 2; - mach_write_to_2(log_ptr, storage - page_zip->data); - log_ptr += 2; - memcpy(log_ptr, field, REC_NODE_PTR_SIZE); - log_ptr += REC_NODE_PTR_SIZE; - mlog_close(mtr, log_ptr); -#endif /* !UNIV_HOTBACKUP */ - } -} - -/**********************************************************************//** -Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */ -UNIV_INTERN -void -page_zip_write_trx_id_and_roll_ptr( -/*===============================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - byte* rec, /*!< in/out: record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint trx_id_col,/*!< in: column number of TRX_ID in rec */ - trx_id_t trx_id, /*!< in: transaction identifier */ - roll_ptr_t roll_ptr)/*!< in: roll_ptr */ -{ - byte* field; - byte* storage; -#ifdef UNIV_DEBUG - page_t* page = page_align(rec); -#endif /* UNIV_DEBUG */ - ulint len; - - ut_ad(PAGE_ZIP_MATCH(rec, page_zip)); - - ut_ad(page_simple_validate_new(page)); - ut_ad(page_zip_simple_validate(page_zip)); - ut_ad(page_zip_get_size(page_zip) - > PAGE_DATA + page_zip_dir_size(page_zip)); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(rec_offs_comp(offsets)); - - ut_ad(page_zip->m_start >= PAGE_DATA); - ut_ad(page_zip_header_cmp(page_zip, page)); - - ut_ad(page_is_leaf(page)); - - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - - storage = page_zip_dir_start(page_zip) - - (rec_get_heap_no_new(rec) - 1) - * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - -#if DATA_TRX_ID + 1 != DATA_ROLL_PTR -# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR" -#endif - field = rec_get_nth_field(rec, offsets, trx_id_col, &len); - ut_ad(len == DATA_TRX_ID_LEN); - ut_ad(field + DATA_TRX_ID_LEN - == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len)); - ut_ad(len == DATA_ROLL_PTR_LEN); -#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG - ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)); -#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ -#if DATA_TRX_ID_LEN != 6 -# error "DATA_TRX_ID_LEN != 6" -#endif - mach_write_to_6(field, trx_id); -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif - mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr); - memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); -} - -/**********************************************************************//** -Clear an area on the uncompressed and compressed page. -Do not clear the data payload, as that would grow the modification log. */ -static -void -page_zip_clear_rec( -/*===============*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - byte* rec, /*!< in: record to clear */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */ -{ - ulint heap_no; - page_t* page = page_align(rec); - byte* storage; - byte* field; - ulint len; - /* page_zip_validate() would fail here if a record - containing externally stored columns is being deleted. */ - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!page_zip_dir_find(page_zip, page_offset(rec))); - ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec))); - ut_ad(page_zip_header_cmp(page_zip, page)); - - heap_no = rec_get_heap_no_new(rec); - ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); - - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - if (!page_is_leaf(page)) { - /* Clear node_ptr. On the compressed page, - there is an array of node_ptr immediately before the - dense page directory, at the very end of the page. */ - storage = page_zip_dir_start(page_zip); - ut_ad(dict_index_get_n_unique_in_tree(index) == - rec_offs_n_fields(offsets) - 1); - field = rec_get_nth_field(rec, offsets, - rec_offs_n_fields(offsets) - 1, - &len); - ut_ad(len == REC_NODE_PTR_SIZE); - - ut_ad(!rec_offs_any_extern(offsets)); - memset(field, 0, REC_NODE_PTR_SIZE); - memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE, - 0, REC_NODE_PTR_SIZE); - } else if (dict_index_is_clust(index)) { - /* Clear trx_id and roll_ptr. On the compressed page, - there is an array of these fields immediately before the - dense page directory, at the very end of the page. */ - const ulint trx_id_pos - = dict_col_get_clust_pos( - dict_table_get_sys_col( - index->table, DATA_TRX_ID), index); - storage = page_zip_dir_start(page_zip); - field = rec_get_nth_field(rec, offsets, trx_id_pos, &len); - ut_ad(len == DATA_TRX_ID_LEN); - - memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - memset(storage - (heap_no - 1) - * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN), - 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - if (rec_offs_any_extern(offsets)) { - ulint i; - - for (i = rec_offs_n_fields(offsets); i--; ) { - /* Clear all BLOB pointers in order to make - page_zip_validate() pass. */ - if (rec_offs_nth_extern(offsets, i)) { - field = rec_get_nth_field( - rec, offsets, i, &len); - ut_ad(len - == BTR_EXTERN_FIELD_REF_SIZE); - memset(field + len - - BTR_EXTERN_FIELD_REF_SIZE, - 0, BTR_EXTERN_FIELD_REF_SIZE); - } - } - } - } else { - ut_ad(!rec_offs_any_extern(offsets)); - } - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ -} - -/**********************************************************************//** -Write the "deleted" flag of a record on a compressed page. The flag must -already have been written on the uncompressed page. */ -UNIV_INTERN -void -page_zip_rec_set_deleted( -/*=====================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in: record on the uncompressed page */ - ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */ -{ - byte* slot = page_zip_dir_find(page_zip, page_offset(rec)); - ut_a(slot); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - if (flag) { - *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8); - } else { - *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8); - } -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page_align(rec), NULL)); -#endif /* UNIV_ZIP_DEBUG */ -} - -/**********************************************************************//** -Write the "owned" flag of a record on a compressed page. The n_owned field -must already have been written on the uncompressed page. */ -UNIV_INTERN -void -page_zip_rec_set_owned( -/*===================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* rec, /*!< in: record on the uncompressed page */ - ulint flag) /*!< in: the owned flag (nonzero=TRUE) */ -{ - byte* slot = page_zip_dir_find(page_zip, page_offset(rec)); - ut_a(slot); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - if (flag) { - *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8); - } else { - *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8); - } -} - -/**********************************************************************//** -Insert a record to the dense page directory. */ -UNIV_INTERN -void -page_zip_dir_insert( -/*================*/ - page_zip_des_t* page_zip,/*!< in/out: compressed page */ - const byte* prev_rec,/*!< in: record after which to insert */ - const byte* free_rec,/*!< in: record from which rec was - allocated, or NULL */ - byte* rec) /*!< in: record to insert */ -{ - ulint n_dense; - byte* slot_rec; - byte* slot_free; - - ut_ad(prev_rec != rec); - ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec); - ut_ad(page_zip_simple_validate(page_zip)); - - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - - if (page_rec_is_infimum(prev_rec)) { - /* Use the first slot. */ - slot_rec = page_zip->data + page_zip_get_size(page_zip); - } else { - byte* end = page_zip->data + page_zip_get_size(page_zip); - byte* start = end - page_zip_dir_user_size(page_zip); - - if (UNIV_LIKELY(!free_rec)) { - /* PAGE_N_RECS was already incremented - in page_cur_insert_rec_zip(), but the - dense directory slot at that position - contains garbage. Skip it. */ - start += PAGE_ZIP_DIR_SLOT_SIZE; - } - - slot_rec = page_zip_dir_find_low(start, end, - page_offset(prev_rec)); - ut_a(slot_rec); - } - - /* Read the old n_dense (n_heap may have been incremented). */ - n_dense = page_dir_get_n_heap(page_zip->data) - - (PAGE_HEAP_NO_USER_LOW + 1); - - if (UNIV_LIKELY_NULL(free_rec)) { - /* The record was allocated from the free list. - Shift the dense directory only up to that slot. - Note that in this case, n_dense is actually - off by one, because page_cur_insert_rec_zip() - did not increment n_heap. */ - ut_ad(rec_get_heap_no_new(rec) < n_dense + 1 - + PAGE_HEAP_NO_USER_LOW); - ut_ad(rec >= free_rec); - slot_free = page_zip_dir_find(page_zip, page_offset(free_rec)); - ut_ad(slot_free); - slot_free += PAGE_ZIP_DIR_SLOT_SIZE; - } else { - /* The record was allocated from the heap. - Shift the entire dense directory. */ - ut_ad(rec_get_heap_no_new(rec) == n_dense - + PAGE_HEAP_NO_USER_LOW); - - /* Shift to the end of the dense page directory. */ - slot_free = page_zip->data + page_zip_get_size(page_zip) - - PAGE_ZIP_DIR_SLOT_SIZE * n_dense; - } - - /* Shift the dense directory to allocate place for rec. */ - memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free, - slot_rec - slot_free); - - /* Write the entry for the inserted record. - The "owned" and "deleted" flags must be zero. */ - mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec)); -} - -/**********************************************************************//** -Shift the dense page directory and the array of BLOB pointers -when a record is deleted. */ -UNIV_INTERN -void -page_zip_dir_delete( -/*================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - byte* rec, /*!< in: deleted record */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets, /*!< in: rec_get_offsets(rec) */ - const byte* free) /*!< in: previous start of - the free list */ -{ - byte* slot_rec; - byte* slot_free; - ulint n_ext; - page_t* page = page_align(rec); - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(rec_offs_comp(offsets)); - - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets)); - UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets), - rec_offs_extra_size(offsets)); - - slot_rec = page_zip_dir_find(page_zip, page_offset(rec)); - - ut_a(slot_rec); - - /* This could not be done before page_zip_dir_find(). */ - page_header_set_field(page, page_zip, PAGE_N_RECS, - (ulint)(page_get_n_recs(page) - 1)); - - if (UNIV_UNLIKELY(!free)) { - /* Make the last slot the start of the free list. */ - slot_free = page_zip->data + page_zip_get_size(page_zip) - - PAGE_ZIP_DIR_SLOT_SIZE - * (page_dir_get_n_heap(page_zip->data) - - PAGE_HEAP_NO_USER_LOW); - } else { - slot_free = page_zip_dir_find_free(page_zip, - page_offset(free)); - ut_a(slot_free < slot_rec); - /* Grow the free list by one slot by moving the start. */ - slot_free += PAGE_ZIP_DIR_SLOT_SIZE; - } - - if (UNIV_LIKELY(slot_rec > slot_free)) { - memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE, - slot_free, - slot_rec - slot_free); - } - - /* Write the entry for the deleted record. - The "owned" and "deleted" flags will be cleared. */ - mach_write_to_2(slot_free, page_offset(rec)); - - if (!page_is_leaf(page) || !dict_index_is_clust(index)) { - ut_ad(!rec_offs_any_extern(offsets)); - goto skip_blobs; - } - - n_ext = rec_offs_n_extern(offsets); - if (UNIV_UNLIKELY(n_ext)) { - /* Shift and zero fill the array of BLOB pointers. */ - ulint blob_no; - byte* externs; - byte* ext_end; - - blob_no = page_zip_get_n_prev_extern(page_zip, rec, index); - ut_a(blob_no + n_ext <= page_zip->n_blobs); - - externs = page_zip->data + page_zip_get_size(page_zip) - - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW) - * (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - - ext_end = externs - page_zip->n_blobs - * BTR_EXTERN_FIELD_REF_SIZE; - externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE; - - page_zip->n_blobs -= static_cast<unsigned>(n_ext); - /* Shift and zero fill the array. */ - memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end, - (page_zip->n_blobs - blob_no) - * BTR_EXTERN_FIELD_REF_SIZE); - memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE); - } - -skip_blobs: - /* The compression algorithm expects info_bits and n_owned - to be 0 for deleted records. */ - rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */ - - page_zip_clear_rec(page_zip, rec, index, offsets); -} - -/**********************************************************************//** -Add a slot to the dense page directory. */ -UNIV_INTERN -void -page_zip_dir_add_slot( -/*==================*/ - page_zip_des_t* page_zip, /*!< in/out: compressed page */ - ulint is_clustered) /*!< in: nonzero for clustered index, - zero for others */ -{ - ulint n_dense; - byte* dir; - byte* stored; - - ut_ad(page_is_comp(page_zip->data)); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - - /* Read the old n_dense (n_heap has already been incremented). */ - n_dense = page_dir_get_n_heap(page_zip->data) - - (PAGE_HEAP_NO_USER_LOW + 1); - - dir = page_zip->data + page_zip_get_size(page_zip) - - PAGE_ZIP_DIR_SLOT_SIZE * n_dense; - - if (!page_is_leaf(page_zip->data)) { - ut_ad(!page_zip->n_blobs); - stored = dir - n_dense * REC_NODE_PTR_SIZE; - } else if (is_clustered) { - /* Move the BLOB pointer array backwards to make space for the - roll_ptr and trx_id columns and the dense directory slot. */ - byte* externs; - - stored = dir - n_dense - * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - externs = stored - - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; - ASSERT_ZERO(externs - - (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN), - PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN), - externs, stored - externs); - } else { - stored = dir - - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; - ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE, - PAGE_ZIP_DIR_SLOT_SIZE); - } - - /* Move the uncompressed area backwards to make space - for one directory slot. */ - memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored); -} - -/***********************************************************//** -Parses a log record of writing to the header of a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_zip_parse_write_header( -/*========================*/ - byte* ptr, /*!< in: redo log buffer */ - byte* end_ptr,/*!< in: redo log buffer end */ - page_t* page, /*!< in/out: uncompressed page */ - page_zip_des_t* page_zip)/*!< in/out: compressed page */ -{ - ulint offset; - ulint len; - - ut_ad(ptr && end_ptr); - ut_ad(!page == !page_zip); - - if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) { - - return(NULL); - } - - offset = (ulint) *ptr++; - len = (ulint) *ptr++; - - if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) { -corrupt: - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - if (UNIV_UNLIKELY(end_ptr < ptr + len)) { - - return(NULL); - } - - if (page) { - if (UNIV_UNLIKELY(!page_zip)) { - - goto corrupt; - } -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, NULL)); -#endif /* UNIV_ZIP_DEBUG */ - - memcpy(page + offset, ptr, len); - memcpy(page_zip->data + offset, ptr, len); - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, NULL)); -#endif /* UNIV_ZIP_DEBUG */ - } - - return(ptr + len); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Write a log record of writing to the uncompressed header portion of a page. */ -UNIV_INTERN -void -page_zip_write_header_log( -/*======================*/ - const byte* data, /*!< in: data on the uncompressed page */ - ulint length, /*!< in: length of the data */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - byte* log_ptr = mlog_open(mtr, 11 + 1 + 1); - ulint offset = page_offset(data); - - ut_ad(offset < PAGE_DATA); - ut_ad(offset + length < PAGE_DATA); -#if PAGE_DATA > 255 -# error "PAGE_DATA > 255" -#endif - ut_ad(length < 256); - - /* If no logging is requested, we may return now */ - if (UNIV_UNLIKELY(!log_ptr)) { - - return; - } - - log_ptr = mlog_write_initial_log_record_fast( - (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr); - *log_ptr++ = (byte) offset; - *log_ptr++ = (byte) length; - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, data, length); -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Reorganize and compress a page. This is a low-level operation for -compressed pages, to be used when page_zip_compress() fails. -On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written. -The function btr_page_reorganize() should be preferred whenever possible. -IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a -non-clustered index, the caller must update the insert buffer free -bits in the same mini-transaction in such a way that the modification -will be redo-logged. -@return TRUE on success, FALSE on failure; page_zip will be left -intact on failure, but page will be overwritten. */ -UNIV_INTERN -ibool -page_zip_reorganize( -/*================*/ - buf_block_t* block, /*!< in/out: page with compressed page; - on the compressed page, in: size; - out: data, n_blobs, - m_start, m_end, m_nonempty */ - dict_index_t* index, /*!< in: index of the B-tree node */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ -#ifndef UNIV_HOTBACKUP - buf_pool_t* buf_pool = buf_pool_from_block(block); -#endif /* !UNIV_HOTBACKUP */ - page_zip_des_t* page_zip = buf_block_get_page_zip(block); - page_t* page = buf_block_get_frame(block); - buf_block_t* temp_block; - page_t* temp_page; - ulint log_mode; - - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(page_is_comp(page)); - ut_ad(!dict_index_is_ibuf(index)); - /* Note that page_zip_validate(page_zip, page, index) may fail here. */ - UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); - UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - - /* Disable logging */ - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - -#ifndef UNIV_HOTBACKUP - temp_block = buf_block_alloc(buf_pool); - btr_search_drop_page_hash_index(block); - block->check_index_page_at_flush = TRUE; -#else /* !UNIV_HOTBACKUP */ - ut_ad(block == back_block1); - temp_block = back_block2; -#endif /* !UNIV_HOTBACKUP */ - temp_page = temp_block->frame; - - /* Copy the old page to temporary space */ - buf_frame_copy(temp_page, page); - - btr_blob_dbg_remove(page, index, "zip_reorg"); - - /* Recreate the page: note that global data on page (possible - segment headers, next page-field, etc.) is preserved intact */ - - page_create(block, mtr, TRUE); - - /* Copy the records from the temporary space to the recreated page; - do not copy the lock bits yet */ - - page_copy_rec_list_end_no_locks(block, temp_block, - page_get_infimum_rec(temp_page), - index, mtr); - - if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) { - /* Copy max trx id to recreated page */ - trx_id_t max_trx_id = page_get_max_trx_id(temp_page); - page_set_max_trx_id(block, NULL, max_trx_id, NULL); - ut_ad(max_trx_id != 0); - } - - /* Restore logging. */ - mtr_set_log_mode(mtr, log_mode); - - if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) { - -#ifndef UNIV_HOTBACKUP - buf_block_free(temp_block); -#endif /* !UNIV_HOTBACKUP */ - return(FALSE); - } - - lock_move_reorganize_page(block, temp_block); - -#ifndef UNIV_HOTBACKUP - buf_block_free(temp_block); -#endif /* !UNIV_HOTBACKUP */ - return(TRUE); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Copy the records of a page byte for byte. Do not copy the page header -or trailer, except those B-tree header fields that are directly -related to the storage of records. Also copy PAGE_MAX_TRX_ID. -NOTE: The caller must update the lock table and the adaptive hash index. */ -UNIV_INTERN -void -page_zip_copy_recs( -/*===============*/ - page_zip_des_t* page_zip, /*!< out: copy of src_zip - (n_blobs, m_start, m_end, - m_nonempty, data[0..size-1]) */ - page_t* page, /*!< out: copy of src */ - const page_zip_des_t* src_zip, /*!< in: compressed page */ - const page_t* src, /*!< in: page */ - dict_index_t* index, /*!< in: index of the B-tree */ - mtr_t* mtr) /*!< in: mini-transaction */ -{ - ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX)); - ut_ad(!dict_index_is_ibuf(index)); -#ifdef UNIV_ZIP_DEBUG - /* The B-tree operations that call this function may set - FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag - mismatch. A strict page_zip_validate() will be executed later - during the B-tree operations. */ - ut_a(page_zip_validate_low(src_zip, src, index, TRUE)); -#endif /* UNIV_ZIP_DEBUG */ - ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip)); - if (UNIV_UNLIKELY(src_zip->n_blobs)) { - ut_a(page_is_leaf(src)); - ut_a(dict_index_is_clust(index)); - } - - /* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary - indexes. It does not matter on other pages. */ - ut_a(dict_index_is_clust(index) || !page_is_leaf(src) - || page_get_max_trx_id(src)); - - UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE); - UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip)); - UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE); - UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip)); - - /* Copy those B-tree page header fields that are related to - the records stored in the page. Also copy the field - PAGE_MAX_TRX_ID. Skip the rest of the page header and - trailer. On the compressed page, there is no trailer. */ -#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END -# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END" -#endif - memcpy(PAGE_HEADER + page, PAGE_HEADER + src, - PAGE_HEADER_PRIV_END); - memcpy(PAGE_DATA + page, PAGE_DATA + src, - UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END); - memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data, - PAGE_HEADER_PRIV_END); - memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data, - page_zip_get_size(page_zip) - PAGE_DATA); - - /* Copy all fields of src_zip to page_zip, except the pointer - to the compressed data page. */ - { - page_zip_t* data = page_zip->data; - memcpy(page_zip, src_zip, sizeof *page_zip); - page_zip->data = data; - } - ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index)) - + page_zip->m_end < page_zip_get_size(page_zip)); - - if (!page_is_leaf(src) - && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL) - && UNIV_LIKELY(mach_read_from_4(page - + FIL_PAGE_PREV) != FIL_NULL)) { - /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */ - ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM, - TRUE); - if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) { - rec_t* rec = page + offs; - ut_a(rec[-REC_N_NEW_EXTRA_BYTES] - & REC_INFO_MIN_REC_FLAG); - rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG; - } - } - -#ifdef UNIV_ZIP_DEBUG - ut_a(page_zip_validate(page_zip, page, index)); -#endif /* UNIV_ZIP_DEBUG */ - btr_blob_dbg_add(page, index, "page_zip_copy_recs"); - - page_zip_compress_write_log(page_zip, page, index, mtr); -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Parses a log record of compressing an index page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_zip_parse_compress( -/*====================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< out: uncompressed page */ - page_zip_des_t* page_zip)/*!< out: compressed page */ -{ - ulint size; - ulint trailer_size; - - ut_ad(!page == !page_zip); - - if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) { - - return(NULL); - } - - size = mach_read_from_2(ptr); - ptr += 2; - trailer_size = mach_read_from_2(ptr); - ptr += 2; - - if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) { - - return(NULL); - } - - if (page) { - if (UNIV_UNLIKELY(!page_zip) - || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) { -corrupt: - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4); - memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4); - memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size); - memset(page_zip->data + FIL_PAGE_TYPE + size, 0, - page_zip_get_size(page_zip) - trailer_size - - (FIL_PAGE_TYPE + size)); - memcpy(page_zip->data + page_zip_get_size(page_zip) - - trailer_size, ptr + 8 + size, trailer_size); - - if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page, - TRUE))) { - - goto corrupt; - } - } - - return(ptr + 8 + size + trailer_size); -} -#endif /* !UNIV_INNOCHECKSUM */ - -/**********************************************************************//** -Calculate the compressed page checksum. -@return page checksum */ -UNIV_INTERN -ulint -page_zip_calc_checksum( -/*===================*/ - const void* data, /*!< in: compressed page */ - ulint size, /*!< in: size of compressed page */ - srv_checksum_algorithm_t algo) /*!< in: algorithm to use */ -{ - uLong adler; - ib_uint32_t crc32; - const Bytef* s = static_cast<const byte*>(data); - - /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN, - and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */ - - switch (algo) { - case SRV_CHECKSUM_ALGORITHM_CRC32: - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - - ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - crc32 = ut_crc32(s + FIL_PAGE_OFFSET, - FIL_PAGE_LSN - FIL_PAGE_OFFSET) - ^ ut_crc32(s + FIL_PAGE_TYPE, 2) - ^ ut_crc32(s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - return((ulint) crc32); - case SRV_CHECKSUM_ALGORITHM_INNODB: - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - adler = adler32(0L, s + FIL_PAGE_OFFSET, - FIL_PAGE_LSN - FIL_PAGE_OFFSET); - adler = adler32(adler, s + FIL_PAGE_TYPE, 2); - adler = adler32( - adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - static_cast<uInt>(size) - - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - return((ulint) adler); - case SRV_CHECKSUM_ALGORITHM_NONE: - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - return(BUF_NO_CHECKSUM_MAGIC); - /* no default so the compiler will emit a warning if new enum - is added and not handled here */ - } - - ut_error; - return(0); -} - -/**********************************************************************//** -Verify a compressed page's checksum. -@return TRUE if the stored checksum is valid according to the value of -innodb_checksum_algorithm */ -UNIV_INTERN -ibool -page_zip_verify_checksum( -/*=====================*/ - const void* data, /*!< in: compressed page */ - ulint size) /*!< in: size of compressed page */ -{ - ib_uint32_t stored; - ib_uint32_t calc; - ib_uint32_t crc32 = 0 /* silence bogus warning */; - ib_uint32_t innodb = 0 /* silence bogus warning */; - - stored = static_cast<ib_uint32_t>(mach_read_from_4( - static_cast<const unsigned char*>(data) + FIL_PAGE_SPACE_OR_CHKSUM)); - - ulint page_no = mach_read_from_4(static_cast<const unsigned char*> (data) + FIL_PAGE_OFFSET); - ulint space_id = mach_read_from_4(static_cast<const unsigned char*> - (data) + FIL_PAGE_SPACE_ID); - -#if FIL_PAGE_LSN % 8 -#error "FIL_PAGE_LSN must be 64 bit aligned" -#endif - - /* Check if page is empty */ - if (stored == 0 - && *reinterpret_cast<const ib_uint64_t*>(static_cast<const char*>( - data) - + FIL_PAGE_LSN) == 0) { - /* make sure that the page is really empty */ - ulint i; - for (i = 0; i < size; i++) { - if (*((const char*) data + i) != 0) { - return(FALSE); - } - } - /* Empty page */ - return(TRUE); - } - - const srv_checksum_algorithm_t curr_algo = - static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); - - if (curr_algo == SRV_CHECKSUM_ALGORITHM_NONE) { - return(TRUE); - } - - calc = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, curr_algo)); - - if (stored == calc) { - return(TRUE); - } - - switch (curr_algo) { - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - case SRV_CHECKSUM_ALGORITHM_CRC32: - - if (stored == BUF_NO_CHECKSUM_MAGIC) { - if (curr_algo - == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_NONE, - space_id, page_no); - } - - return(TRUE); - } - - innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, SRV_CHECKSUM_ALGORITHM_INNODB)); - - if (stored == innodb) { - if (curr_algo - == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_INNODB, - space_id, page_no); - } - - return(TRUE); - } - - break; - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - case SRV_CHECKSUM_ALGORITHM_INNODB: - - if (stored == BUF_NO_CHECKSUM_MAGIC) { - if (curr_algo - == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_NONE, - space_id, page_no); - } - - return(TRUE); - } - - crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, SRV_CHECKSUM_ALGORITHM_CRC32)); - - if (stored == crc32) { - if (curr_algo - == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_CRC32, - space_id, page_no); - } - - return(TRUE); - } - - break; - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - - crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, SRV_CHECKSUM_ALGORITHM_CRC32)); - - if (stored == crc32) { - page_warn_strict_checksum( - curr_algo, SRV_CHECKSUM_ALGORITHM_CRC32, - space_id, page_no); - - return(TRUE); - } - - innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, SRV_CHECKSUM_ALGORITHM_INNODB)); - - if (stored == innodb) { - page_warn_strict_checksum( - curr_algo, - SRV_CHECKSUM_ALGORITHM_INNODB, - space_id, page_no); - return(TRUE); - } - - break; - case SRV_CHECKSUM_ALGORITHM_NONE: - ut_error; - /* no default so the compiler will emit a warning if new enum - is added and not handled here */ - } - - return(FALSE); -} diff --git a/storage/xtradb/pars/lexyy.cc b/storage/xtradb/pars/lexyy.cc deleted file mode 100644 index 62122bb9f6f..00000000000 --- a/storage/xtradb/pars/lexyy.cc +++ /dev/null @@ -1,3132 +0,0 @@ -#include "univ.i" -#line 2 "lexyy.cc" - -#line 4 "lexyy.cc" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 35 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L - -/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, - * if you want the limit (max/min) macros for int types. - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS 1 -#endif - -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! C99 */ - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -/* C99 requires __STDC__ to be defined as 1. */ -#if defined (__STDC__) - -#define YY_USE_CONST - -#endif /* defined (__STDC__) */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* Returned upon end-of-file. */ -#define YY_NULL 0 - -/* Promotes a possibly negative, possibly signed char to an unsigned - * integer for use as an array index. If the signed char is negative, - * we want to instead treat it as an 8-bit unsigned char, hence the - * double cast. - */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) - -/* Enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN. - */ -#define BEGIN (yy_start) = 1 + 2 * - -/* Translate the current start state into a value that can be later handed - * to BEGIN to return to the state. The YYSTATE alias is for lex - * compatibility. - */ -#define YY_START (((yy_start) - 1) / 2) -#define YYSTATE YY_START - -/* Action number for EOF rule of a given start state. */ -#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) - -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE yyrestart(yyin ) - -#define YY_END_OF_BUFFER_CHAR 0 - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k. - * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. - * Ditto for the __ia64__ case accordingly. - */ -#define YY_BUF_SIZE 32768 -#else -#define YY_BUF_SIZE 16384 -#endif /* __ia64__ */ -#endif - -/* The state buf must be large enough to hold one state per character in the main buffer. - */ -#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) - -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE -typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif - -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef size_t yy_size_t; -#endif - -extern yy_size_t yyleng; - -extern FILE *yyin, *yyout; - -#define EOB_ACT_CONTINUE_SCAN 0 -#define EOB_ACT_END_OF_FILE 1 -#define EOB_ACT_LAST_MATCH 2 - - #define YY_LESS_LINENO(n) - -/* Return all but the first "n" matched characters back to the input stream. */ -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - *yy_cp = (yy_hold_char); \ - YY_RESTORE_YY_MORE_OFFSET \ - (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ - YY_DO_BEFORE_ACTION; /* set up yytext again */ \ - } \ - while ( 0 ) - -#define unput(c) yyunput( c, (yytext_ptr) ) - -#ifndef YY_STRUCT_YY_BUFFER_STATE -#define YY_STRUCT_YY_BUFFER_STATE -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - yy_size_t yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - int yy_bs_lineno; /**< The line count. */ - int yy_bs_column; /**< The column count. */ - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; - -#define YY_BUFFER_NEW 0 -#define YY_BUFFER_NORMAL 1 - /* When an EOF's been seen but there's still some text to process - * then we mark the buffer as YY_EOF_PENDING, to indicate that we - * shouldn't try reading from the input source any more. We might - * still have a bunch of tokens to match, though, because of - * possible backing-up. - * - * When we actually see the EOF, we change the status to "new" - * (via yyrestart()), so that the user can continue scanning by - * just pointing yyin at a new input file. - */ -#define YY_BUFFER_EOF_PENDING 2 - - }; -#endif /* !YY_STRUCT_YY_BUFFER_STATE */ - -/* Stack of input buffers. */ -static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ -static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ -static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */ - -/* We provide macros for accessing buffer states in case in the - * future we want to put the buffer states in a more general - * "scanner state". - * - * Returns the top of the stack, or NULL. - */ -#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ - ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ - : NULL) - -/* Same as previous macro, but useful when we know that the buffer stack is not - * NULL or when we need an lvalue. For internal use only. - */ -#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] - -/* yy_hold_char holds the character lost when yytext is formed. */ -static char yy_hold_char; -static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */ -yy_size_t yyleng; - -/* Points to current character in buffer. */ -static char *yy_c_buf_p = (char *) 0; -static int yy_init = 0; /* whether we need to initialize */ -static int yy_start = 0; /* start state number */ - -/* Flag which is used to allow yywrap()'s to do buffer switches - * instead of setting up a fresh yyin. A bit of a hack ... - */ -static int yy_did_buffer_switch_on_eof; - -void yyrestart (FILE *input_file ); -MY_ATTRIBUTE((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ); -static YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ); -void yy_delete_buffer (YY_BUFFER_STATE b ); -void yy_flush_buffer (YY_BUFFER_STATE b ); -void yypush_buffer_state (YY_BUFFER_STATE new_buffer ); -void yypop_buffer_state (void ); - -static void yyensure_buffer_stack (void ); -static void yy_load_buffer_state (void ); -static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ); - -#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ) - -YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ); -YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ); -YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len ); - -void *yyalloc (yy_size_t ); -void *yyrealloc (void *,yy_size_t ); -void yyfree (void * ); - -#define yy_new_buffer yy_create_buffer - -#define yy_set_interactive(is_interactive) \ - { \ - if ( ! YY_CURRENT_BUFFER ){ \ - yyensure_buffer_stack (); \ - YY_CURRENT_BUFFER_LVALUE = \ - yy_create_buffer(yyin,YY_BUF_SIZE ); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ - } - -#define yy_set_bol(at_bol) \ - { \ - if ( ! YY_CURRENT_BUFFER ){\ - yyensure_buffer_stack (); \ - YY_CURRENT_BUFFER_LVALUE = \ - yy_create_buffer(yyin,YY_BUF_SIZE ); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ - } - -#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) - -/* Begin user sect3 */ - -#define yywrap(n) 1 -#define YY_SKIP_YYWRAP - -typedef unsigned char YY_CHAR; - -FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; - -typedef int yy_state_type; - -extern int yylineno; - -int yylineno = 1; - -extern char *yytext; -#define yytext_ptr yytext - -static yy_state_type yy_get_previous_state (void ); -static yy_state_type yy_try_NUL_trans (yy_state_type current_state ); -static int yy_get_next_buffer (void ); -static void yy_fatal_error (yyconst char msg[] ); - -/* Done after the current pattern has been matched and before the - * corresponding action - sets up yytext. - */ -#define YY_DO_BEFORE_ACTION \ - (yytext_ptr) = yy_bp; \ - yyleng = (size_t) (yy_cp - yy_bp); \ - (yy_hold_char) = *yy_cp; \ - *yy_cp = '\0'; \ - (yy_c_buf_p) = yy_cp; - -#define YY_NUM_RULES 124 -#define YY_END_OF_BUFFER 125 -/* This struct is not used in this scanner, - but its presence is necessary. */ -struct yy_trans_info - { - flex_int32_t yy_verify; - flex_int32_t yy_nxt; - }; -static yyconst flex_int16_t yy_accept[425] = - { 0, - 0, 0, 119, 119, 0, 0, 0, 0, 125, 123, - 122, 122, 8, 123, 114, 5, 103, 109, 112, 110, - 107, 111, 123, 113, 1, 123, 108, 106, 104, 105, - 117, 96, 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, - 115, 116, 119, 120, 6, 7, 9, 10, 122, 4, - 98, 118, 2, 1, 3, 99, 100, 102, 101, 0, - 96, 0, 96, 96, 96, 96, 96, 44, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 28, 17, 25, 96, 96, 96, - - 96, 96, 96, 54, 63, 96, 14, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 119, 120, 120, 121, 6, - 7, 9, 10, 2, 0, 97, 13, 45, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 96, 96, 27, 96, 96, - 96, 41, 96, 96, 96, 96, 21, 96, 96, 96, - 96, 96, 15, 96, 96, 96, 18, 96, 96, 96, - 96, 96, 82, 96, 96, 96, 51, 96, 12, 96, - 36, 96, 96, 96, 96, 96, 96, 96, 96, 96, - - 96, 96, 0, 97, 96, 96, 96, 96, 20, 96, - 24, 96, 96, 96, 96, 96, 96, 96, 96, 96, - 96, 96, 46, 96, 96, 30, 96, 89, 96, 96, - 39, 96, 96, 96, 96, 96, 48, 96, 94, 91, - 32, 93, 96, 11, 66, 96, 96, 96, 42, 96, - 96, 96, 96, 96, 96, 96, 96, 96, 96, 29, - 96, 96, 96, 96, 96, 96, 96, 96, 96, 87, - 0, 96, 26, 96, 96, 96, 68, 96, 96, 96, - 96, 37, 96, 96, 96, 96, 96, 96, 96, 31, - 67, 23, 96, 59, 96, 77, 96, 96, 96, 43, - - 96, 96, 96, 96, 96, 96, 96, 96, 92, 96, - 96, 56, 96, 96, 96, 96, 96, 96, 96, 40, - 33, 0, 81, 95, 19, 96, 96, 85, 96, 76, - 55, 96, 65, 96, 52, 96, 96, 96, 47, 96, - 78, 96, 80, 96, 96, 34, 96, 96, 96, 35, - 74, 96, 96, 96, 96, 60, 96, 50, 49, 96, - 96, 96, 57, 53, 64, 96, 96, 96, 22, 96, - 96, 75, 83, 96, 96, 79, 96, 70, 96, 96, - 96, 96, 96, 38, 96, 90, 69, 96, 86, 96, - 96, 96, 88, 96, 96, 61, 96, 16, 96, 72, - - 71, 96, 58, 96, 84, 96, 96, 96, 96, 96, - 96, 96, 96, 96, 96, 73, 96, 96, 96, 96, - 96, 96, 62, 0 - } ; - -static yyconst flex_int32_t yy_ec[256] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 1, 4, 5, 6, 7, 1, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, - 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, - 1, 1, 1, 1, 51, 1, 34, 34, 34, 34, - - 34, 34, 34, 34, 34, 34, 34, 52, 34, 34, - 34, 34, 53, 34, 54, 34, 34, 34, 34, 34, - 34, 34, 55, 1, 56, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1 - } ; - -static yyconst flex_int32_t yy_meta[57] = - { 0, - 1, 1, 1, 2, 3, 1, 1, 4, 1, 1, - 5, 1, 1, 1, 1, 6, 7, 1, 1, 1, - 8, 1, 1, 6, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 1, 1 - } ; - -static yyconst flex_int16_t yy_base[438] = - { 0, - 0, 0, 293, 287, 284, 281, 272, 256, 254, 1357, - 55, 57, 1357, 0, 1357, 1357, 1357, 1357, 1357, 1357, - 1357, 1357, 238, 227, 46, 205, 1357, 43, 1357, 203, - 1357, 46, 50, 56, 52, 66, 64, 51, 81, 92, - 91, 94, 96, 111, 113, 116, 130, 134, 53, 143, - 1357, 1357, 0, 106, 0, 212, 0, 210, 141, 0, - 1357, 1357, 192, 56, 173, 1357, 1357, 1357, 1357, 168, - 140, 150, 152, 154, 155, 161, 167, 171, 177, 172, - 184, 174, 188, 189, 191, 194, 203, 212, 215, 217, - 219, 221, 226, 228, 231, 240, 233, 235, 246, 251, - - 258, 253, 255, 256, 269, 271, 278, 272, 285, 283, - 287, 289, 296, 305, 298, 315, 319, 321, 322, 326, - 332, 333, 342, 339, 343, 0, 112, 173, 1357, 0, - 155, 0, 156, 132, 93, 0, 355, 357, 358, 360, - 364, 367, 374, 370, 379, 380, 389, 383, 390, 392, - 395, 408, 411, 409, 415, 418, 425, 427, 429, 436, - 431, 441, 446, 448, 450, 452, 453, 462, 471, 464, - 473, 474, 478, 485, 488, 490, 491, 494, 500, 501, - 504, 506, 507, 517, 518, 519, 520, 521, 522, 523, - 533, 536, 538, 543, 549, 554, 555, 561, 556, 566, - - 567, 576, 60, 0, 573, 578, 580, 582, 583, 593, - 589, 596, 598, 603, 605, 607, 610, 617, 619, 621, - 622, 628, 633, 634, 635, 639, 640, 649, 650, 652, - 653, 655, 659, 664, 668, 669, 665, 671, 674, 678, - 681, 685, 687, 688, 692, 697, 698, 701, 703, 704, - 707, 708, 717, 713, 728, 730, 724, 740, 734, 745, - 746, 750, 751, 756, 757, 760, 761, 762, 771, 773, - 42, 778, 782, 783, 787, 789, 792, 794, 793, 804, - 805, 808, 809, 810, 819, 823, 826, 828, 829, 830, - 835, 840, 844, 846, 847, 856, 857, 858, 859, 860, - - 863, 872, 873, 878, 879, 882, 885, 889, 894, 895, - 896, 898, 905, 910, 908, 912, 914, 915, 926, 930, - 931, 73, 932, 933, 935, 937, 942, 944, 946, 947, - 948, 949, 951, 958, 961, 965, 967, 972, 978, 979, - 981, 984, 983, 985, 994, 988, 999, 1000, 1001, 1004, - 1013, 1015, 1022, 1016, 1019, 1026, 1032, 1033, 1035, 1036, - 1038, 1039, 1048, 1049, 1050, 1051, 1053, 1054, 1060, 1063, - 1065, 1066, 1069, 1070, 1072, 1082, 1084, 1085, 1087, 1096, - 1097, 1098, 1099, 1101, 1113, 1114, 1115, 1116, 1117, 1118, - 1119, 1128, 1130, 1131, 1134, 1133, 1135, 1137, 1150, 1151, - - 1153, 1155, 1157, 1162, 1160, 1167, 1172, 1173, 1174, 1176, - 1185, 1190, 1183, 1187, 1189, 1199, 1204, 1206, 1208, 1210, - 1215, 1220, 1222, 1357, 1269, 1278, 1287, 1290, 1293, 1297, - 1306, 1315, 1324, 1333, 1340, 1344, 1347 - } ; - -static yyconst flex_int16_t yy_def[438] = - { 0, - 424, 1, 425, 425, 426, 426, 427, 427, 424, 424, - 424, 424, 424, 428, 424, 424, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 429, 424, 424, 424, 424, - 424, 430, 430, 430, 430, 430, 34, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 424, 424, 431, 432, 433, 424, 434, 424, 424, 428, - 424, 424, 424, 424, 429, 424, 424, 424, 424, 435, - 430, 436, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 431, 432, 432, 424, 433, - 424, 434, 424, 424, 424, 437, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - - 430, 430, 424, 437, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 424, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 424, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 430, 430, 430, 430, 430, 430, 430, - 430, 430, 430, 0, 424, 424, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424 - } ; - -static yyconst flex_int16_t yy_nxt[1414] = - { 0, - 10, 11, 12, 13, 10, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 10, 32, 33, 34, 35, 36, 37, - 38, 38, 39, 38, 38, 40, 41, 42, 43, 44, - 38, 45, 46, 47, 48, 49, 50, 38, 38, 38, - 38, 38, 38, 38, 51, 52, 59, 59, 59, 59, - 63, 70, 64, 67, 68, 70, 70, 70, 70, 72, - 63, 70, 64, 72, 72, 72, 72, 123, 75, 72, - 84, 70, 76, 73, 85, 77, 136, 79, 74, 72, - 86, 80, 90, 322, 81, 71, 70, 82, 78, 91, - - 83, 87, 92, 88, 72, 93, 70, 70, 94, 70, - 95, 70, 271, 89, 72, 72, 128, 72, 96, 72, - 98, 129, 424, 97, 99, 104, 70, 424, 70, 101, - 100, 70, 102, 105, 72, 106, 72, 107, 103, 72, - 108, 110, 59, 59, 113, 70, 203, 114, 134, 70, - 111, 112, 109, 72, 118, 70, 115, 72, 70, 133, - 116, 119, 131, 72, 117, 70, 72, 70, 120, 70, - 70, 121, 135, 122, 124, 72, 70, 72, 72, 137, - 138, 125, 70, 128, 72, 140, 70, 70, 129, 70, - 72, 141, 70, 424, 72, 72, 139, 72, 142, 70, - - 72, 144, 150, 70, 70, 143, 70, 72, 134, 70, - 145, 72, 72, 133, 72, 152, 146, 72, 70, 131, - 147, 148, 156, 69, 153, 66, 72, 70, 149, 151, - 70, 154, 70, 155, 70, 72, 70, 62, 72, 158, - 72, 70, 72, 70, 72, 157, 70, 159, 70, 72, - 70, 72, 61, 424, 72, 70, 72, 161, 72, 58, - 160, 70, 162, 72, 163, 164, 70, 165, 70, 72, - 70, 70, 168, 70, 72, 58, 72, 170, 72, 72, - 169, 72, 166, 167, 70, 172, 70, 70, 56, 171, - 174, 56, 72, 70, 72, 72, 173, 54, 70, 175, - - 70, 72, 70, 54, 70, 176, 72, 180, 72, 424, - 72, 70, 72, 70, 183, 177, 424, 178, 424, 72, - 70, 72, 181, 179, 184, 424, 182, 424, 72, 188, - 70, 186, 424, 189, 70, 185, 70, 70, 72, 187, - 190, 70, 72, 424, 72, 72, 193, 70, 70, 72, - 194, 191, 424, 424, 70, 72, 72, 70, 70, 424, - 198, 192, 72, 424, 196, 72, 72, 200, 424, 424, - 70, 201, 70, 70, 197, 70, 195, 199, 72, 70, - 72, 72, 70, 72, 202, 70, 205, 72, 424, 70, - 72, 208, 206, 72, 70, 70, 207, 72, 70, 209, - - 210, 424, 72, 72, 70, 70, 72, 70, 424, 216, - 70, 211, 72, 72, 424, 72, 218, 424, 72, 424, - 424, 212, 213, 70, 70, 214, 70, 217, 215, 424, - 70, 72, 72, 70, 72, 223, 219, 220, 72, 222, - 70, 72, 70, 221, 70, 424, 70, 424, 72, 424, - 72, 70, 72, 226, 72, 230, 70, 227, 224, 72, - 225, 70, 229, 70, 72, 70, 424, 70, 70, 72, - 424, 72, 228, 72, 232, 72, 72, 70, 233, 70, - 234, 236, 231, 424, 424, 72, 70, 72, 70, 70, - 424, 237, 238, 70, 72, 235, 72, 72, 240, 239, - - 70, 72, 242, 70, 424, 70, 70, 243, 72, 70, - 424, 72, 241, 72, 72, 70, 70, 72, 246, 70, - 244, 70, 70, 72, 72, 245, 248, 72, 249, 72, - 72, 247, 70, 70, 70, 70, 70, 70, 70, 250, - 72, 72, 72, 72, 72, 72, 72, 255, 70, 424, - 251, 70, 253, 70, 424, 424, 72, 252, 70, 72, - 424, 72, 256, 258, 70, 257, 72, 424, 254, 70, - 70, 70, 72, 259, 261, 262, 70, 72, 72, 72, - 260, 70, 70, 424, 72, 266, 263, 265, 70, 72, - 72, 70, 424, 70, 264, 70, 72, 70, 70, 72, - - 267, 72, 269, 72, 70, 72, 72, 268, 70, 424, - 270, 70, 72, 70, 272, 273, 72, 274, 70, 72, - 70, 72, 70, 275, 277, 70, 72, 276, 72, 280, - 72, 281, 70, 72, 70, 279, 70, 70, 424, 424, - 72, 278, 72, 70, 72, 72, 286, 284, 70, 70, - 70, 72, 424, 282, 70, 70, 72, 72, 72, 285, - 283, 424, 72, 72, 70, 70, 288, 70, 70, 290, - 70, 287, 72, 72, 70, 72, 72, 424, 72, 70, - 70, 291, 72, 70, 70, 289, 70, 72, 72, 70, - 424, 72, 72, 70, 72, 292, 70, 72, 293, 297, - - 70, 72, 70, 70, 72, 295, 294, 70, 72, 296, - 72, 72, 70, 70, 298, 72, 70, 424, 70, 70, - 72, 72, 70, 70, 72, 299, 72, 72, 70, 302, - 72, 72, 70, 424, 424, 424, 72, 424, 300, 70, - 72, 301, 306, 70, 424, 70, 303, 72, 304, 70, - 305, 72, 307, 72, 308, 70, 424, 72, 309, 424, - 70, 70, 312, 72, 311, 70, 70, 310, 72, 72, - 424, 70, 70, 72, 72, 70, 70, 70, 313, 72, - 72, 314, 424, 72, 72, 72, 70, 317, 70, 319, - 320, 424, 424, 70, 72, 315, 72, 70, 70, 321, - - 316, 72, 70, 318, 70, 72, 72, 70, 70, 70, - 72, 424, 72, 424, 424, 72, 72, 72, 424, 70, - 70, 323, 327, 70, 70, 70, 324, 72, 72, 424, - 329, 72, 72, 72, 70, 325, 328, 331, 70, 326, - 424, 70, 72, 70, 70, 70, 72, 332, 330, 72, - 70, 72, 72, 72, 335, 70, 424, 424, 72, 70, - 333, 70, 70, 72, 334, 336, 337, 72, 424, 72, - 72, 70, 70, 70, 70, 70, 338, 424, 70, 72, - 72, 72, 72, 72, 424, 340, 72, 70, 70, 341, - 339, 424, 343, 70, 70, 72, 72, 70, 424, 344, - - 70, 72, 72, 342, 70, 72, 348, 424, 72, 70, - 70, 70, 72, 70, 424, 346, 345, 72, 72, 72, - 70, 72, 347, 70, 424, 70, 349, 70, 72, 70, - 70, 72, 350, 72, 354, 72, 351, 72, 72, 352, - 356, 70, 353, 358, 355, 70, 70, 70, 70, 72, - 70, 357, 70, 72, 72, 72, 72, 70, 72, 70, - 72, 70, 70, 70, 70, 72, 70, 72, 359, 72, - 72, 72, 72, 70, 72, 424, 70, 424, 424, 361, - 70, 72, 70, 362, 72, 360, 365, 70, 72, 363, - 72, 366, 364, 70, 70, 72, 70, 424, 70, 70, - - 70, 72, 72, 70, 72, 367, 72, 72, 72, 70, - 368, 72, 424, 424, 70, 70, 70, 72, 424, 70, - 369, 370, 72, 72, 72, 424, 374, 72, 70, 371, - 70, 70, 424, 375, 70, 372, 72, 70, 72, 72, - 373, 70, 72, 376, 379, 72, 377, 70, 70, 72, - 70, 70, 424, 70, 70, 72, 72, 378, 72, 72, - 380, 72, 72, 70, 70, 70, 70, 383, 70, 70, - 382, 72, 72, 72, 72, 70, 72, 72, 70, 381, - 70, 70, 424, 72, 70, 70, 72, 70, 72, 72, - 387, 386, 72, 72, 384, 72, 385, 70, 424, 70, - - 70, 424, 70, 424, 389, 72, 388, 72, 72, 390, - 72, 70, 70, 70, 70, 392, 70, 424, 424, 72, - 72, 72, 72, 393, 72, 391, 396, 424, 70, 70, - 70, 70, 70, 70, 70, 394, 72, 72, 72, 72, - 72, 72, 72, 70, 398, 70, 70, 395, 70, 70, - 70, 72, 70, 72, 72, 424, 72, 72, 72, 424, - 72, 399, 403, 397, 404, 70, 70, 400, 70, 401, - 70, 424, 70, 72, 72, 70, 72, 70, 72, 405, - 72, 402, 70, 72, 424, 72, 424, 70, 70, 70, - 72, 70, 406, 424, 407, 72, 72, 72, 70, 72, - - 70, 412, 70, 424, 70, 70, 72, 424, 72, 410, - 72, 408, 72, 72, 70, 409, 424, 413, 414, 70, - 415, 70, 72, 70, 411, 70, 424, 72, 416, 72, - 70, 72, 424, 72, 419, 70, 424, 70, 72, 417, - 418, 424, 424, 72, 420, 72, 424, 424, 421, 424, - 424, 424, 424, 424, 424, 424, 422, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 423, 53, - 53, 53, 53, 53, 53, 53, 53, 53, 55, 55, - 55, 55, 55, 55, 55, 55, 55, 57, 57, 57, - 57, 57, 57, 57, 57, 57, 60, 424, 60, 65, - - 65, 65, 71, 71, 424, 71, 126, 126, 126, 126, - 424, 126, 126, 126, 126, 127, 127, 127, 127, 127, - 127, 127, 127, 127, 130, 130, 130, 424, 130, 130, - 130, 130, 130, 132, 424, 132, 132, 132, 132, 132, - 132, 132, 136, 424, 424, 424, 424, 424, 136, 72, - 72, 424, 72, 204, 424, 204, 9, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - 424, 424, 424 - } ; - -static yyconst flex_int16_t yy_chk[1414] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 11, 11, 12, 12, - 25, 32, 25, 28, 28, 33, 38, 35, 49, 32, - 64, 34, 64, 33, 38, 35, 49, 49, 33, 34, - 35, 36, 33, 32, 35, 33, 322, 34, 32, 36, - 35, 34, 37, 271, 34, 37, 39, 34, 33, 37, - - 34, 36, 37, 36, 39, 37, 41, 40, 37, 42, - 39, 43, 203, 36, 41, 40, 54, 42, 39, 43, - 40, 54, 127, 39, 40, 43, 44, 127, 45, 41, - 40, 46, 42, 43, 44, 43, 45, 43, 42, 46, - 43, 45, 59, 59, 46, 47, 135, 46, 134, 48, - 45, 45, 44, 47, 47, 71, 46, 48, 50, 133, - 46, 47, 131, 71, 46, 72, 50, 73, 47, 74, - 75, 48, 70, 48, 50, 73, 76, 74, 75, 73, - 74, 50, 77, 128, 76, 75, 78, 80, 128, 82, - 77, 76, 79, 65, 78, 80, 74, 82, 76, 81, - - 79, 79, 82, 83, 84, 77, 85, 81, 63, 86, - 80, 83, 84, 58, 85, 84, 80, 86, 87, 56, - 81, 81, 86, 30, 84, 26, 87, 88, 81, 83, - 89, 84, 90, 85, 91, 88, 92, 24, 89, 88, - 90, 93, 91, 94, 92, 87, 95, 89, 97, 93, - 98, 94, 23, 9, 95, 96, 97, 91, 98, 8, - 90, 99, 92, 96, 93, 94, 100, 96, 102, 99, - 103, 104, 98, 101, 100, 7, 102, 100, 103, 104, - 99, 101, 96, 96, 105, 101, 106, 108, 6, 100, - 103, 5, 105, 107, 106, 108, 102, 4, 110, 106, - - 109, 107, 111, 3, 112, 107, 110, 110, 109, 0, - 111, 113, 112, 115, 111, 108, 0, 109, 0, 113, - 114, 115, 110, 109, 112, 0, 110, 0, 114, 114, - 116, 113, 0, 115, 117, 112, 118, 119, 116, 113, - 116, 120, 117, 0, 118, 119, 118, 121, 122, 120, - 119, 116, 0, 0, 124, 121, 122, 123, 125, 0, - 122, 117, 124, 0, 121, 123, 125, 124, 0, 0, - 137, 124, 138, 139, 121, 140, 120, 123, 137, 141, - 138, 139, 142, 140, 125, 144, 139, 141, 0, 143, - 142, 142, 140, 144, 145, 146, 141, 143, 148, 143, - - 143, 0, 145, 146, 147, 149, 148, 150, 0, 148, - 151, 144, 147, 149, 0, 150, 150, 0, 151, 0, - 0, 145, 146, 152, 154, 147, 153, 149, 147, 0, - 155, 152, 154, 156, 153, 154, 151, 151, 155, 153, - 157, 156, 158, 152, 159, 0, 161, 0, 157, 0, - 158, 160, 159, 157, 161, 161, 162, 157, 155, 160, - 156, 163, 160, 164, 162, 165, 0, 166, 167, 163, - 0, 164, 159, 165, 164, 166, 167, 168, 165, 170, - 166, 167, 163, 0, 0, 168, 169, 170, 171, 172, - 0, 167, 168, 173, 169, 166, 171, 172, 170, 169, - - 174, 173, 172, 175, 0, 176, 177, 173, 174, 178, - 0, 175, 171, 176, 177, 179, 180, 178, 176, 181, - 174, 182, 183, 179, 180, 175, 179, 181, 180, 182, - 183, 178, 184, 185, 186, 187, 188, 189, 190, 181, - 184, 185, 186, 187, 188, 189, 190, 186, 191, 0, - 182, 192, 184, 193, 0, 0, 191, 183, 194, 192, - 0, 193, 188, 192, 195, 190, 194, 0, 185, 196, - 197, 199, 195, 193, 195, 195, 198, 196, 197, 199, - 194, 200, 201, 0, 198, 198, 195, 197, 205, 200, - 201, 202, 0, 206, 196, 207, 205, 208, 209, 202, - - 199, 206, 201, 207, 211, 208, 209, 200, 210, 0, - 202, 212, 211, 213, 205, 206, 210, 207, 214, 212, - 215, 213, 216, 208, 212, 217, 214, 210, 215, 215, - 216, 216, 218, 217, 219, 214, 220, 221, 0, 0, - 218, 213, 219, 222, 220, 221, 221, 219, 223, 224, - 225, 222, 0, 217, 226, 227, 223, 224, 225, 220, - 218, 0, 226, 227, 228, 229, 224, 230, 231, 227, - 232, 222, 228, 229, 233, 230, 231, 0, 232, 234, - 237, 229, 233, 235, 236, 225, 238, 234, 237, 239, - 0, 235, 236, 240, 238, 230, 241, 239, 232, 236, - - 242, 240, 243, 244, 241, 234, 233, 245, 242, 235, - 243, 244, 246, 247, 238, 245, 248, 0, 249, 250, - 246, 247, 251, 252, 248, 243, 249, 250, 254, 248, - 251, 252, 253, 0, 0, 0, 254, 0, 246, 257, - 253, 247, 253, 255, 0, 256, 250, 257, 251, 259, - 252, 255, 254, 256, 255, 258, 0, 259, 256, 0, - 260, 261, 259, 258, 258, 262, 263, 257, 260, 261, - 0, 264, 265, 262, 263, 266, 267, 268, 261, 264, - 265, 262, 0, 266, 267, 268, 269, 265, 270, 267, - 268, 0, 0, 272, 269, 263, 270, 273, 274, 269, - - 264, 272, 275, 266, 276, 273, 274, 277, 279, 278, - 275, 0, 276, 0, 0, 277, 279, 278, 0, 280, - 281, 272, 278, 282, 283, 284, 274, 280, 281, 0, - 280, 282, 283, 284, 285, 275, 279, 283, 286, 276, - 0, 287, 285, 288, 289, 290, 286, 284, 281, 287, - 291, 288, 289, 290, 287, 292, 0, 0, 291, 293, - 285, 294, 295, 292, 286, 288, 289, 293, 0, 294, - 295, 296, 297, 298, 299, 300, 293, 0, 301, 296, - 297, 298, 299, 300, 0, 297, 301, 302, 303, 298, - 295, 0, 301, 304, 305, 302, 303, 306, 0, 302, - - 307, 304, 305, 299, 308, 306, 306, 0, 307, 309, - 310, 311, 308, 312, 0, 304, 303, 309, 310, 311, - 313, 312, 305, 315, 0, 314, 307, 316, 313, 317, - 318, 315, 308, 314, 314, 316, 310, 317, 318, 311, - 316, 319, 313, 318, 315, 320, 321, 323, 324, 319, - 325, 317, 326, 320, 321, 323, 324, 327, 325, 328, - 326, 329, 330, 331, 332, 327, 333, 328, 319, 329, - 330, 331, 332, 334, 333, 0, 335, 0, 0, 326, - 336, 334, 337, 327, 335, 325, 334, 338, 336, 329, - 337, 336, 332, 339, 340, 338, 341, 0, 343, 342, - - 344, 339, 340, 346, 341, 337, 343, 342, 344, 345, - 338, 346, 0, 0, 347, 348, 349, 345, 0, 350, - 340, 342, 347, 348, 349, 0, 348, 350, 351, 344, - 352, 354, 0, 349, 355, 345, 351, 353, 352, 354, - 347, 356, 355, 352, 355, 353, 353, 357, 358, 356, - 359, 360, 0, 361, 362, 357, 358, 354, 359, 360, - 357, 361, 362, 363, 364, 365, 366, 362, 367, 368, - 361, 363, 364, 365, 366, 369, 367, 368, 370, 360, - 371, 372, 0, 369, 373, 374, 370, 375, 371, 372, - 370, 368, 373, 374, 366, 375, 367, 376, 0, 377, - - 378, 0, 379, 0, 374, 376, 371, 377, 378, 375, - 379, 380, 381, 382, 383, 379, 384, 0, 0, 380, - 381, 382, 383, 380, 384, 377, 383, 0, 385, 386, - 387, 388, 389, 390, 391, 381, 385, 386, 387, 388, - 389, 390, 391, 392, 388, 393, 394, 382, 396, 395, - 397, 392, 398, 393, 394, 0, 396, 395, 397, 0, - 398, 390, 395, 385, 397, 399, 400, 391, 401, 392, - 402, 0, 403, 399, 400, 405, 401, 404, 402, 399, - 403, 394, 406, 405, 0, 404, 0, 407, 408, 409, - 406, 410, 402, 0, 404, 407, 408, 409, 413, 410, - - 411, 410, 414, 0, 415, 412, 413, 0, 411, 408, - 414, 406, 415, 412, 416, 407, 0, 411, 412, 417, - 413, 418, 416, 419, 409, 420, 0, 417, 414, 418, - 421, 419, 0, 420, 418, 422, 0, 423, 421, 415, - 417, 0, 0, 422, 419, 423, 0, 0, 420, 0, - 0, 0, 0, 0, 0, 0, 421, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 422, 425, - 425, 425, 425, 425, 425, 425, 425, 425, 426, 426, - 426, 426, 426, 426, 426, 426, 426, 427, 427, 427, - 427, 427, 427, 427, 427, 427, 428, 0, 428, 429, - - 429, 429, 430, 430, 0, 430, 431, 431, 431, 431, - 0, 431, 431, 431, 431, 432, 432, 432, 432, 432, - 432, 432, 432, 432, 433, 433, 433, 0, 433, 433, - 433, 433, 433, 434, 0, 434, 434, 434, 434, 434, - 434, 434, 435, 0, 0, 0, 0, 0, 435, 436, - 436, 0, 436, 437, 0, 437, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - - 424, 424, 424, 424, 424, 424, 424, 424, 424, 424, - 424, 424, 424 - } ; - -static yy_state_type yy_last_accepting_state; -static char *yy_last_accepting_cpos; - -extern int yy_flex_debug; -int yy_flex_debug = 0; - -/* The intent behind this definition is that it'll catch - * any uses of REJECT which flex missed. - */ -#define REJECT reject_used_but_not_detected -#define yymore() yymore_used_but_not_detected -#define YY_MORE_ADJ 0 -#define YY_RESTORE_YY_MORE_OFFSET -char *yytext; -#line 1 "pars0lex.l" -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ -/****************************************************** -SQL parser lexical analyzer: input file for the GNU Flex lexer generator - -The InnoDB parser is frozen because MySQL takes care of SQL parsing. -Therefore we normally keep the InnoDB parser C files as they are, and do -not automatically generate them from pars0grm.y and pars0lex.l. - -How to make the InnoDB parser and lexer C files: - -1. Run ./make_flex.sh to generate lexer files. - -2. Run ./make_bison.sh to generate parser files. - -These instructions seem to work at least with bison-1.875d and flex-2.5.31 on -Linux. - -Created 12/14/1997 Heikki Tuuri -*******************************************************/ -#define YY_NO_INPUT 1 -#define YY_NO_UNISTD_H 1 -#line 53 "pars0lex.l" -#define YYSTYPE que_node_t* - -#include "univ.i" -#include "pars0pars.h" -#include "pars0grm.h" -#include "pars0sym.h" -#include "mem0mem.h" -#include "os0proc.h" - -#define malloc(A) ut_malloc(A) -#define free(A) ut_free(A) -#define realloc(P, A) ut_realloc(P, A) -#define exit(A) ut_error - -/* Note: We cast &result to int* from yysize_t* */ -#define YY_INPUT(buf, result, max_size) \ - (result = pars_get_lex_chars(buf, max_size)) - -/* String buffer for removing quotes */ -static ulint stringbuf_len_alloc = 0; /* Allocated length */ -static ulint stringbuf_len = 0; /* Current length */ -static char* stringbuf; /* Start of buffer */ -/** Appends a string to the buffer. */ -static -void -string_append( -/*==========*/ - const char* str, /*!< in: string to be appended */ - ulint len) /*!< in: length of the string */ -{ - if (stringbuf == NULL) { - stringbuf = static_cast<char*>(malloc(1)); - stringbuf_len_alloc = 1; - } - - if (stringbuf_len + len > stringbuf_len_alloc) { - while (stringbuf_len + len > stringbuf_len_alloc) { - stringbuf_len_alloc <<= 1; - } - - stringbuf = static_cast<char*>( - realloc(stringbuf, stringbuf_len_alloc)); - } - - memcpy(stringbuf + stringbuf_len, str, len); - stringbuf_len += len; -} - - - - -#line 1006 "lexyy.cc" - -#define INITIAL 0 -#define comment 1 -#define quoted 2 -#define id 3 - -#ifndef YY_NO_UNISTD_H -/* Special case for "unistd.h", since it is non-ANSI. We include it way - * down here because we want the user's section 1 to have been scanned first. - * The user has a chance to override it with an option. - */ -#include <unistd.h> -#endif - -#ifndef YY_EXTRA_TYPE -#define YY_EXTRA_TYPE void * -#endif - -static int yy_init_globals (void ); - -/* Accessor methods to globals. - These are made visible to non-reentrant scanners for convenience. */ - -MY_ATTRIBUTE((unused)) static int yylex_destroy (void ); - -int yyget_debug (void ); - -void yyset_debug (int debug_flag ); - -YY_EXTRA_TYPE yyget_extra (void ); - -void yyset_extra (YY_EXTRA_TYPE user_defined ); - -FILE *yyget_in (void ); - -void yyset_in (FILE * in_str ); - -FILE *yyget_out (void ); - -void yyset_out (FILE * out_str ); - -yy_size_t yyget_leng (void ); - -char *yyget_text (void ); - -int yyget_lineno (void ); - -void yyset_lineno (int line_number ); - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int yywrap (void ); -#else -extern int yywrap (void ); -#endif -#endif - -#ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int ); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * ); -#endif - -#ifndef YY_NO_INPUT - -#ifdef __cplusplus -static int yyinput (void ); -#else -static int input (void ); -#endif - -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#ifdef __ia64__ -/* On IA-64, the buffer size is 16k, not 8k */ -#define YY_READ_BUF_SIZE 16384 -#else -#define YY_READ_BUF_SIZE 8192 -#endif /* __ia64__ */ -#endif - -/* Copy whatever the last rule matched to the standard output. */ -#ifndef ECHO -/* This used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ -#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0) -#endif - -/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, - * is returned in "result". - */ -#ifndef YY_INPUT -#define YY_INPUT(buf,result,max_size) \ - if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ - { \ - int c = '*'; \ - size_t n; \ - for ( n = 0; n < max_size && \ - (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ - buf[n] = (char) c; \ - if ( c == '\n' ) \ - buf[n++] = (char) c; \ - if ( c == EOF && ferror( yyin ) ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - result = n; \ - } \ - else \ - { \ - errno=0; \ - while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ - { \ - if( errno != EINTR) \ - { \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - break; \ - } \ - errno=0; \ - clearerr(yyin); \ - } \ - }\ -\ - -#endif - -/* No semi-colon after return; correct usage is to write "yyterminate();" - - * we don't want an extra ';' after the "return" because that will cause - * some compilers to complain about unreachable statements. - */ -#ifndef yyterminate -#define yyterminate() return YY_NULL -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Report a fatal error. */ -#ifndef YY_FATAL_ERROR -#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) -#endif - -/* end tables serialization structures and prototypes */ - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL_IS_OURS 1 - -extern int yylex (void); - -#define YY_DECL int yylex (void) -#endif /* !YY_DECL */ - -/* Code executed at the beginning of each rule, after yytext and yyleng - * have been set up. - */ -#ifndef YY_USER_ACTION -#define YY_USER_ACTION -#endif - -/* Code executed at the end of each rule. */ -#ifndef YY_BREAK -#define YY_BREAK break; -#endif - -#define YY_RULE_SETUP \ - YY_USER_ACTION - -/** The main scanner function which does all the work. - */ -YY_DECL -{ - register yy_state_type yy_current_state; - register char *yy_cp, *yy_bp; - register int yy_act; - -#line 112 "pars0lex.l" - - -#line 1197 "lexyy.cc" - - if ( !(yy_init) ) - { - (yy_init) = 1; - -#ifdef YY_USER_INIT - YY_USER_INIT; -#endif - - if ( ! (yy_start) ) - (yy_start) = 1; /* first start state */ - - if ( ! yyin ) - yyin = stdin; - - if ( ! yyout ) - yyout = stdout; - - if ( ! YY_CURRENT_BUFFER ) { - yyensure_buffer_stack (); - YY_CURRENT_BUFFER_LVALUE = - yy_create_buffer(yyin,YY_BUF_SIZE ); - } - - yy_load_buffer_state( ); - } - - while ( 1 ) /* loops until end-of-file is reached */ - { - yy_cp = (yy_c_buf_p); - - /* Support of yytext. */ - *yy_cp = (yy_hold_char); - - /* yy_bp points to the position in yy_ch_buf of the start of - * the current run. - */ - yy_bp = yy_cp; - - yy_current_state = (yy_start); -yy_match: - do - { - register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; - if ( yy_accept[yy_current_state] ) - { - (yy_last_accepting_state) = yy_current_state; - (yy_last_accepting_cpos) = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 425 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - ++yy_cp; - } - while ( yy_current_state != 424 ); - yy_cp = (yy_last_accepting_cpos); - yy_current_state = (yy_last_accepting_state); - -yy_find_action: - yy_act = yy_accept[yy_current_state]; - - YY_DO_BEFORE_ACTION; - -do_action: /* This label is used only to access EOF actions. */ - - switch ( yy_act ) - { /* beginning of action switch */ - case 0: /* must back up */ - /* undo the effects of YY_DO_BEFORE_ACTION */ - *yy_cp = (yy_hold_char); - yy_cp = (yy_last_accepting_cpos); - yy_current_state = (yy_last_accepting_state); - goto yy_find_action; - -case 1: -YY_RULE_SETUP -#line 114 "pars0lex.l" -{ - yylval = sym_tab_add_int_lit(pars_sym_tab_global, - atoi(yytext)); - return(PARS_INT_LIT); -} - YY_BREAK -case 2: -YY_RULE_SETUP -#line 120 "pars0lex.l" -{ - ut_error; /* not implemented */ - - return(PARS_FLOAT_LIT); -} - YY_BREAK -case 3: -YY_RULE_SETUP -#line 126 "pars0lex.l" -{ - ulint type; - - yylval = sym_tab_add_bound_lit(pars_sym_tab_global, - yytext + 1, &type); - - return((int) type); -} - YY_BREAK -case 4: -YY_RULE_SETUP -#line 135 "pars0lex.l" -{ - yylval = sym_tab_add_bound_id(pars_sym_tab_global, - yytext + 1); - - return(PARS_ID_TOKEN); -} - YY_BREAK -case 5: -YY_RULE_SETUP -#line 142 "pars0lex.l" -{ -/* Quoted character string literals are handled in an explicit -start state 'quoted'. This state is entered and the buffer for -the scanned string is emptied upon encountering a starting quote. - -In the state 'quoted', only two actions are possible (defined below). */ - BEGIN(quoted); - stringbuf_len = 0; -} - YY_BREAK -case 6: -/* rule 6 can match eol */ -YY_RULE_SETUP -#line 151 "pars0lex.l" -{ - /* Got a sequence of characters other than "'": - append to string buffer */ - string_append(yytext, yyleng); -} - YY_BREAK -case 7: -YY_RULE_SETUP -#line 156 "pars0lex.l" -{ - /* Got a sequence of "'" characters: - append half of them to string buffer, - as "''" represents a single "'". - We apply truncating division, - so that "'''" will result in "'". */ - - string_append(yytext, yyleng / 2); - - /* If we got an odd number of quotes, then the - last quote we got is the terminating quote. - At the end of the string, we return to the - initial start state and report the scanned - string literal. */ - - if (yyleng % 2) { - BEGIN(INITIAL); - yylval = sym_tab_add_str_lit( - pars_sym_tab_global, - (byte*) stringbuf, stringbuf_len); - return(PARS_STR_LIT); - } -} - YY_BREAK -case 8: -YY_RULE_SETUP -#line 180 "pars0lex.l" -{ -/* Quoted identifiers are handled in an explicit start state 'id'. -This state is entered and the buffer for the scanned string is emptied -upon encountering a starting quote. - -In the state 'id', only two actions are possible (defined below). */ - BEGIN(id); - stringbuf_len = 0; -} - YY_BREAK -case 9: -/* rule 9 can match eol */ -YY_RULE_SETUP -#line 189 "pars0lex.l" -{ - /* Got a sequence of characters other than '"': - append to string buffer */ - string_append(yytext, yyleng); -} - YY_BREAK -case 10: -YY_RULE_SETUP -#line 194 "pars0lex.l" -{ - /* Got a sequence of '"' characters: - append half of them to string buffer, - as '""' represents a single '"'. - We apply truncating division, - so that '"""' will result in '"'. */ - - string_append(yytext, yyleng / 2); - - /* If we got an odd number of quotes, then the - last quote we got is the terminating quote. - At the end of the string, we return to the - initial start state and report the scanned - identifier. */ - - if (yyleng % 2) { - BEGIN(INITIAL); - yylval = sym_tab_add_id( - pars_sym_tab_global, - (byte*) stringbuf, stringbuf_len); - - return(PARS_ID_TOKEN); - } -} - YY_BREAK -case 11: -YY_RULE_SETUP -#line 219 "pars0lex.l" -{ - yylval = sym_tab_add_null_lit(pars_sym_tab_global); - - return(PARS_NULL_LIT); -} - YY_BREAK -case 12: -YY_RULE_SETUP -#line 225 "pars0lex.l" -{ - /* Implicit cursor name */ - yylval = sym_tab_add_str_lit(pars_sym_tab_global, - (byte*) yytext, yyleng); - return(PARS_SQL_TOKEN); -} - YY_BREAK -case 13: -YY_RULE_SETUP -#line 232 "pars0lex.l" -{ - return(PARS_AND_TOKEN); -} - YY_BREAK -case 14: -YY_RULE_SETUP -#line 236 "pars0lex.l" -{ - return(PARS_OR_TOKEN); -} - YY_BREAK -case 15: -YY_RULE_SETUP -#line 240 "pars0lex.l" -{ - return(PARS_NOT_TOKEN); -} - YY_BREAK -case 16: -YY_RULE_SETUP -#line 244 "pars0lex.l" -{ - return(PARS_PROCEDURE_TOKEN); -} - YY_BREAK -case 17: -YY_RULE_SETUP -#line 248 "pars0lex.l" -{ - return(PARS_IN_TOKEN); -} - YY_BREAK -case 18: -YY_RULE_SETUP -#line 252 "pars0lex.l" -{ - return(PARS_OUT_TOKEN); -} - YY_BREAK -case 19: -YY_RULE_SETUP -#line 256 "pars0lex.l" -{ - return(PARS_BINARY_TOKEN); -} - YY_BREAK -case 20: -YY_RULE_SETUP -#line 260 "pars0lex.l" -{ - return(PARS_BLOB_TOKEN); -} - YY_BREAK -case 21: -YY_RULE_SETUP -#line 264 "pars0lex.l" -{ - return(PARS_INT_TOKEN); -} - YY_BREAK -case 22: -YY_RULE_SETUP -#line 268 "pars0lex.l" -{ - return(PARS_INT_TOKEN); -} - YY_BREAK -case 23: -YY_RULE_SETUP -#line 272 "pars0lex.l" -{ - return(PARS_FLOAT_TOKEN); -} - YY_BREAK -case 24: -YY_RULE_SETUP -#line 276 "pars0lex.l" -{ - return(PARS_CHAR_TOKEN); -} - YY_BREAK -case 25: -YY_RULE_SETUP -#line 280 "pars0lex.l" -{ - return(PARS_IS_TOKEN); -} - YY_BREAK -case 26: -YY_RULE_SETUP -#line 284 "pars0lex.l" -{ - return(PARS_BEGIN_TOKEN); -} - YY_BREAK -case 27: -YY_RULE_SETUP -#line 288 "pars0lex.l" -{ - return(PARS_END_TOKEN); -} - YY_BREAK -case 28: -YY_RULE_SETUP -#line 292 "pars0lex.l" -{ - return(PARS_IF_TOKEN); -} - YY_BREAK -case 29: -YY_RULE_SETUP -#line 296 "pars0lex.l" -{ - return(PARS_THEN_TOKEN); -} - YY_BREAK -case 30: -YY_RULE_SETUP -#line 300 "pars0lex.l" -{ - return(PARS_ELSE_TOKEN); -} - YY_BREAK -case 31: -YY_RULE_SETUP -#line 304 "pars0lex.l" -{ - return(PARS_ELSIF_TOKEN); -} - YY_BREAK -case 32: -YY_RULE_SETUP -#line 308 "pars0lex.l" -{ - return(PARS_LOOP_TOKEN); -} - YY_BREAK -case 33: -YY_RULE_SETUP -#line 312 "pars0lex.l" -{ - return(PARS_WHILE_TOKEN); -} - YY_BREAK -case 34: -YY_RULE_SETUP -#line 316 "pars0lex.l" -{ - return(PARS_RETURN_TOKEN); -} - YY_BREAK -case 35: -YY_RULE_SETUP -#line 320 "pars0lex.l" -{ - return(PARS_SELECT_TOKEN); -} - YY_BREAK -case 36: -YY_RULE_SETUP -#line 324 "pars0lex.l" -{ - return(PARS_SUM_TOKEN); -} - YY_BREAK -case 37: -YY_RULE_SETUP -#line 328 "pars0lex.l" -{ - return(PARS_COUNT_TOKEN); -} - YY_BREAK -case 38: -YY_RULE_SETUP -#line 332 "pars0lex.l" -{ - return(PARS_DISTINCT_TOKEN); -} - YY_BREAK -case 39: -YY_RULE_SETUP -#line 336 "pars0lex.l" -{ - return(PARS_FROM_TOKEN); -} - YY_BREAK -case 40: -YY_RULE_SETUP -#line 340 "pars0lex.l" -{ - return(PARS_WHERE_TOKEN); -} - YY_BREAK -case 41: -YY_RULE_SETUP -#line 344 "pars0lex.l" -{ - return(PARS_FOR_TOKEN); -} - YY_BREAK -case 42: -YY_RULE_SETUP -#line 348 "pars0lex.l" -{ - return(PARS_READ_TOKEN); -} - YY_BREAK -case 43: -YY_RULE_SETUP -#line 352 "pars0lex.l" -{ - return(PARS_ORDER_TOKEN); -} - YY_BREAK -case 44: -YY_RULE_SETUP -#line 356 "pars0lex.l" -{ - return(PARS_BY_TOKEN); -} - YY_BREAK -case 45: -YY_RULE_SETUP -#line 360 "pars0lex.l" -{ - return(PARS_ASC_TOKEN); -} - YY_BREAK -case 46: -YY_RULE_SETUP -#line 364 "pars0lex.l" -{ - return(PARS_DESC_TOKEN); -} - YY_BREAK -case 47: -YY_RULE_SETUP -#line 368 "pars0lex.l" -{ - return(PARS_INSERT_TOKEN); -} - YY_BREAK -case 48: -YY_RULE_SETUP -#line 372 "pars0lex.l" -{ - return(PARS_INTO_TOKEN); -} - YY_BREAK -case 49: -YY_RULE_SETUP -#line 376 "pars0lex.l" -{ - return(PARS_VALUES_TOKEN); -} - YY_BREAK -case 50: -YY_RULE_SETUP -#line 380 "pars0lex.l" -{ - return(PARS_UPDATE_TOKEN); -} - YY_BREAK -case 51: -YY_RULE_SETUP -#line 384 "pars0lex.l" -{ - return(PARS_SET_TOKEN); -} - YY_BREAK -case 52: -YY_RULE_SETUP -#line 388 "pars0lex.l" -{ - return(PARS_DELETE_TOKEN); -} - YY_BREAK -case 53: -YY_RULE_SETUP -#line 392 "pars0lex.l" -{ - return(PARS_CURRENT_TOKEN); -} - YY_BREAK -case 54: -YY_RULE_SETUP -#line 396 "pars0lex.l" -{ - return(PARS_OF_TOKEN); -} - YY_BREAK -case 55: -YY_RULE_SETUP -#line 400 "pars0lex.l" -{ - return(PARS_CREATE_TOKEN); -} - YY_BREAK -case 56: -YY_RULE_SETUP -#line 404 "pars0lex.l" -{ - return(PARS_TABLE_TOKEN); -} - YY_BREAK -case 57: -YY_RULE_SETUP -#line 408 "pars0lex.l" -{ - return(PARS_COMPACT_TOKEN); -} - YY_BREAK -case 58: -YY_RULE_SETUP -#line 412 "pars0lex.l" -{ - return(PARS_BLOCK_SIZE_TOKEN); -} - YY_BREAK -case 59: -YY_RULE_SETUP -#line 416 "pars0lex.l" -{ - return(PARS_INDEX_TOKEN); -} - YY_BREAK -case 60: -YY_RULE_SETUP -#line 420 "pars0lex.l" -{ - return(PARS_UNIQUE_TOKEN); -} - YY_BREAK -case 61: -YY_RULE_SETUP -#line 424 "pars0lex.l" -{ - return(PARS_CLUSTERED_TOKEN); -} - YY_BREAK -case 62: -YY_RULE_SETUP -#line 428 "pars0lex.l" -{ - return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN); -} - YY_BREAK -case 63: -YY_RULE_SETUP -#line 432 "pars0lex.l" -{ - return(PARS_ON_TOKEN); -} - YY_BREAK -case 64: -YY_RULE_SETUP -#line 436 "pars0lex.l" -{ - return(PARS_DECLARE_TOKEN); -} - YY_BREAK -case 65: -YY_RULE_SETUP -#line 440 "pars0lex.l" -{ - return(PARS_CURSOR_TOKEN); -} - YY_BREAK -case 66: -YY_RULE_SETUP -#line 444 "pars0lex.l" -{ - return(PARS_OPEN_TOKEN); -} - YY_BREAK -case 67: -YY_RULE_SETUP -#line 448 "pars0lex.l" -{ - return(PARS_FETCH_TOKEN); -} - YY_BREAK -case 68: -YY_RULE_SETUP -#line 452 "pars0lex.l" -{ - return(PARS_CLOSE_TOKEN); -} - YY_BREAK -case 69: -YY_RULE_SETUP -#line 456 "pars0lex.l" -{ - return(PARS_NOTFOUND_TOKEN); -} - YY_BREAK -case 70: -YY_RULE_SETUP -#line 460 "pars0lex.l" -{ - return(PARS_TO_CHAR_TOKEN); -} - YY_BREAK -case 71: -YY_RULE_SETUP -#line 464 "pars0lex.l" -{ - return(PARS_TO_NUMBER_TOKEN); -} - YY_BREAK -case 72: -YY_RULE_SETUP -#line 468 "pars0lex.l" -{ - return(PARS_TO_BINARY_TOKEN); -} - YY_BREAK -case 73: -YY_RULE_SETUP -#line 472 "pars0lex.l" -{ - return(PARS_BINARY_TO_NUMBER_TOKEN); -} - YY_BREAK -case 74: -YY_RULE_SETUP -#line 476 "pars0lex.l" -{ - return(PARS_SUBSTR_TOKEN); -} - YY_BREAK -case 75: -YY_RULE_SETUP -#line 480 "pars0lex.l" -{ - return(PARS_REPLSTR_TOKEN); -} - YY_BREAK -case 76: -YY_RULE_SETUP -#line 484 "pars0lex.l" -{ - return(PARS_CONCAT_TOKEN); -} - YY_BREAK -case 77: -YY_RULE_SETUP -#line 488 "pars0lex.l" -{ - return(PARS_INSTR_TOKEN); -} - YY_BREAK -case 78: -YY_RULE_SETUP -#line 492 "pars0lex.l" -{ - return(PARS_LENGTH_TOKEN); -} - YY_BREAK -case 79: -YY_RULE_SETUP -#line 496 "pars0lex.l" -{ - return(PARS_SYSDATE_TOKEN); -} - YY_BREAK -case 80: -YY_RULE_SETUP -#line 500 "pars0lex.l" -{ - return(PARS_PRINTF_TOKEN); -} - YY_BREAK -case 81: -YY_RULE_SETUP -#line 504 "pars0lex.l" -{ - return(PARS_ASSERT_TOKEN); -} - YY_BREAK -case 82: -YY_RULE_SETUP -#line 508 "pars0lex.l" -{ - return(PARS_RND_TOKEN); -} - YY_BREAK -case 83: -YY_RULE_SETUP -#line 512 "pars0lex.l" -{ - return(PARS_RND_STR_TOKEN); -} - YY_BREAK -case 84: -YY_RULE_SETUP -#line 516 "pars0lex.l" -{ - return(PARS_ROW_PRINTF_TOKEN); -} - YY_BREAK -case 85: -YY_RULE_SETUP -#line 520 "pars0lex.l" -{ - return(PARS_COMMIT_TOKEN); -} - YY_BREAK -case 86: -YY_RULE_SETUP -#line 524 "pars0lex.l" -{ - return(PARS_ROLLBACK_TOKEN); -} - YY_BREAK -case 87: -YY_RULE_SETUP -#line 528 "pars0lex.l" -{ - return(PARS_WORK_TOKEN); -} - YY_BREAK -case 88: -YY_RULE_SETUP -#line 532 "pars0lex.l" -{ - return(PARS_UNSIGNED_TOKEN); -} - YY_BREAK -case 89: -YY_RULE_SETUP -#line 536 "pars0lex.l" -{ - return(PARS_EXIT_TOKEN); -} - YY_BREAK -case 90: -YY_RULE_SETUP -#line 540 "pars0lex.l" -{ - return(PARS_FUNCTION_TOKEN); -} - YY_BREAK -case 91: -YY_RULE_SETUP -#line 544 "pars0lex.l" -{ - return(PARS_LOCK_TOKEN); -} - YY_BREAK -case 92: -YY_RULE_SETUP -#line 548 "pars0lex.l" -{ - return(PARS_SHARE_TOKEN); -} - YY_BREAK -case 93: -YY_RULE_SETUP -#line 552 "pars0lex.l" -{ - return(PARS_MODE_TOKEN); -} - YY_BREAK -case 94: -YY_RULE_SETUP -#line 556 "pars0lex.l" -{ - return(PARS_LIKE_TOKEN); -} - YY_BREAK -case 95: -YY_RULE_SETUP -#line 560 "pars0lex.l" -{ - return(PARS_BIGINT_TOKEN); -} - YY_BREAK -case 96: -YY_RULE_SETUP -#line 564 "pars0lex.l" -{ - yylval = sym_tab_add_id(pars_sym_tab_global, - (byte*) yytext, - ut_strlen(yytext)); - return(PARS_ID_TOKEN); -} - YY_BREAK -case 97: -YY_RULE_SETUP -#line 571 "pars0lex.l" -{ - yylval = sym_tab_add_id(pars_sym_tab_global, - (byte*) yytext, - ut_strlen(yytext)); - return(PARS_TABLE_NAME_TOKEN); -} - YY_BREAK -case 98: -YY_RULE_SETUP -#line 578 "pars0lex.l" -{ - return(PARS_DDOT_TOKEN); -} - YY_BREAK -case 99: -YY_RULE_SETUP -#line 582 "pars0lex.l" -{ - return(PARS_ASSIGN_TOKEN); -} - YY_BREAK -case 100: -YY_RULE_SETUP -#line 586 "pars0lex.l" -{ - return(PARS_LE_TOKEN); -} - YY_BREAK -case 101: -YY_RULE_SETUP -#line 590 "pars0lex.l" -{ - return(PARS_GE_TOKEN); -} - YY_BREAK -case 102: -YY_RULE_SETUP -#line 594 "pars0lex.l" -{ - return(PARS_NE_TOKEN); -} - YY_BREAK -case 103: -YY_RULE_SETUP -#line 598 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 104: -YY_RULE_SETUP -#line 603 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 105: -YY_RULE_SETUP -#line 608 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 106: -YY_RULE_SETUP -#line 613 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 107: -YY_RULE_SETUP -#line 618 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 108: -YY_RULE_SETUP -#line 623 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 109: -YY_RULE_SETUP -#line 628 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 110: -YY_RULE_SETUP -#line 633 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 111: -YY_RULE_SETUP -#line 638 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 112: -YY_RULE_SETUP -#line 643 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 113: -YY_RULE_SETUP -#line 648 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 114: -YY_RULE_SETUP -#line 653 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 115: -YY_RULE_SETUP -#line 658 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 116: -YY_RULE_SETUP -#line 663 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 117: -YY_RULE_SETUP -#line 668 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 118: -YY_RULE_SETUP -#line 673 "pars0lex.l" -BEGIN(comment); /* eat up comment */ - YY_BREAK -case 119: -/* rule 119 can match eol */ -YY_RULE_SETUP -#line 675 "pars0lex.l" - - YY_BREAK -case 120: -/* rule 120 can match eol */ -YY_RULE_SETUP -#line 676 "pars0lex.l" - - YY_BREAK -case 121: -YY_RULE_SETUP -#line 677 "pars0lex.l" -BEGIN(INITIAL); - YY_BREAK -case 122: -/* rule 122 can match eol */ -YY_RULE_SETUP -#line 679 "pars0lex.l" -/* eat up whitespace */ - YY_BREAK -case 123: -YY_RULE_SETUP -#line 682 "pars0lex.l" -{ - fprintf(stderr,"Unrecognized character: %02x\n", - *yytext); - - ut_error; - - return(0); -} - YY_BREAK -case 124: -YY_RULE_SETUP -#line 691 "pars0lex.l" -YY_FATAL_ERROR( "flex scanner jammed" ); - YY_BREAK -#line 2237 "lexyy.cc" -case YY_STATE_EOF(INITIAL): -case YY_STATE_EOF(comment): -case YY_STATE_EOF(quoted): -case YY_STATE_EOF(id): - yyterminate(); - - case YY_END_OF_BUFFER: - { - /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; - - /* Undo the effects of YY_DO_BEFORE_ACTION. */ - *yy_cp = (yy_hold_char); - YY_RESTORE_YY_MORE_OFFSET - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) - { - /* We're scanning a new file or input source. It's - * possible that this happened because the user - * just pointed yyin at a new source and called - * yylex(). If so, then we have to assure - * consistency between YY_CURRENT_BUFFER and our - * globals. Here is the right place to do so, because - * this is the first action (other than possibly a - * back-up) that will match for the new input source. - */ - (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; - } - - /* Note that here we test for yy_c_buf_p "<=" to the position - * of the first EOB in the buffer, since yy_c_buf_p will - * already have been incremented past the NUL character - * (since all states make transitions on EOB to the - * end-of-buffer state). Contrast this with the test - * in input(). - */ - if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) - { /* This was really a NUL. */ - yy_state_type yy_next_state; - - (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( ); - - /* Okay, we're now positioned to make the NUL - * transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we don't - * want to build jamming into it because then it - * will run more slowly). - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state ); - - yy_bp = (yytext_ptr) + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* Consume the NUL. */ - yy_cp = ++(yy_c_buf_p); - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { - yy_cp = (yy_last_accepting_cpos); - yy_current_state = (yy_last_accepting_state); - goto yy_find_action; - } - } - - else switch ( yy_get_next_buffer( ) ) - { - case EOB_ACT_END_OF_FILE: - { - (yy_did_buffer_switch_on_eof) = 0; - - if ( yywrap( ) ) - { - /* Note: because we've taken care in - * yy_get_next_buffer() to have set up - * yytext, we can now set up - * yy_c_buf_p so that if some total - * hoser (like flex itself) wants to - * call the scanner after we return the - * YY_NULL, it'll still work - another - * YY_NULL will get returned. - */ - (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF(YY_START); - goto do_action; - } - - else - { - if ( ! (yy_did_buffer_switch_on_eof) ) - YY_NEW_FILE; - } - break; - } - - case EOB_ACT_CONTINUE_SCAN: - (yy_c_buf_p) = - (yytext_ptr) + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( ); - - yy_cp = (yy_c_buf_p); - yy_bp = (yytext_ptr) + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - (yy_c_buf_p) = - &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; - - yy_current_state = yy_get_previous_state( ); - - yy_cp = (yy_c_buf_p); - yy_bp = (yytext_ptr) + YY_MORE_ADJ; - goto yy_find_action; - } - break; - } - - default: - YY_FATAL_ERROR( - "fatal flex scanner internal error--no action found" ); - } /* end of action switch */ - } /* end of scanning one token */ -} /* end of yylex */ - -/* yy_get_next_buffer - try to read in a new buffer - * - * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file - */ -static int yy_get_next_buffer (void) -{ - register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; - register char *source = (yytext_ptr); - register int number_to_move, i; - int ret_val; - - if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) - YY_FATAL_ERROR( - "fatal flex scanner internal error--end of buffer missed" ); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) - { /* Don't try to fill the buffer, so this is an EOF. */ - if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) - { - /* We matched a single character, the EOB, so - * treat this as a final EOF. - */ - return EOB_ACT_END_OF_FILE; - } - - else - { - /* We matched some text prior to the EOB, first - * process it. - */ - return EOB_ACT_LAST_MATCH; - } - } - - /* Try to read more data. */ - - /* First move last chars to start of buffer. */ - number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1; - - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; - - else - { - int num_to_read = static_cast<int>( - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1); - - while ( num_to_read <= 0 ) - { /* Not enough room in the buffer - grow it. */ - - /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = YY_CURRENT_BUFFER; - - int yy_c_buf_p_offset = - (int) ((yy_c_buf_p) - b->yy_ch_buf); - - if ( b->yy_is_our_buffer ) - { - int new_size = static_cast<int>(b->yy_buf_size * 2); - - if ( new_size <= 0 ) - b->yy_buf_size += b->yy_buf_size / 8; - else - b->yy_buf_size *= 2; - - b->yy_ch_buf = (char *) - /* Include room in for 2 EOB chars. */ - yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ); - } - else - /* Can't grow it, we don't own it. */ - b->yy_ch_buf = 0; - - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( - "fatal error - scanner input buffer overflow" ); - - (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset]; - - num_to_read = static_cast<int>( - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - - number_to_move - 1); - - } - - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; - - /* Read in more data. */ - YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), - (yy_n_chars), (size_t) num_to_read ); - - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); - } - - if ( (yy_n_chars) == 0 ) - { - if ( number_to_move == YY_MORE_ADJ ) - { - ret_val = EOB_ACT_END_OF_FILE; - yyrestart(yyin ); - } - - else - { - ret_val = EOB_ACT_LAST_MATCH; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = - YY_BUFFER_EOF_PENDING; - } - } - - else - ret_val = EOB_ACT_CONTINUE_SCAN; - - if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { - /* Extend the array by 50%, plus the number we really need. */ - yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1); - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ); - if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); - } - - (yy_n_chars) += number_to_move; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; - - (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; - - return ret_val; -} - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - - yy_state_type yy_get_previous_state (void) -{ - register yy_state_type yy_current_state; - register char *yy_cp; - - yy_current_state = (yy_start); - - for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) - { - register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); - if ( yy_accept[yy_current_state] ) - { - (yy_last_accepting_state) = yy_current_state; - (yy_last_accepting_cpos) = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 425 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - } - - return yy_current_state; -} - -/* yy_try_NUL_trans - try to make a transition on the NUL character - * - * synopsis - * next_state = yy_try_NUL_trans( current_state ); - */ - static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) -{ - register int yy_is_jam; - register char *yy_cp = (yy_c_buf_p); - - register YY_CHAR yy_c = 1; - if ( yy_accept[yy_current_state] ) - { - (yy_last_accepting_state) = yy_current_state; - (yy_last_accepting_cpos) = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 425 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - yy_is_jam = (yy_current_state == 424); - - return yy_is_jam ? 0 : yy_current_state; -} - -#ifndef YY_NO_INPUT -#ifdef __cplusplus - static int yyinput (void) -#else - static int input (void) -#endif - -{ - int c; - - *(yy_c_buf_p) = (yy_hold_char); - - if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) - { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) - /* This was really a NUL. */ - *(yy_c_buf_p) = '\0'; - - else - { /* need more input */ - int offset = (int)((yy_c_buf_p) - (yytext_ptr)); - ++(yy_c_buf_p); - - switch ( yy_get_next_buffer( ) ) - { - case EOB_ACT_LAST_MATCH: - /* This happens because yy_g_n_b() - * sees that we've accumulated a - * token and flags that we need to - * try matching the token before - * proceeding. But for input(), - * there's no matching to consider. - * So convert the EOB_ACT_LAST_MATCH - * to EOB_ACT_END_OF_FILE. - */ - - /* Reset buffer status. */ - yyrestart(yyin ); - - /*FALLTHROUGH*/ - - case EOB_ACT_END_OF_FILE: - { - if ( yywrap( ) ) - return EOF; - - if ( ! (yy_did_buffer_switch_on_eof) ) - YY_NEW_FILE; -#ifdef __cplusplus - return yyinput(); -#else - return input(); -#endif - } - - case EOB_ACT_CONTINUE_SCAN: - (yy_c_buf_p) = (yytext_ptr) + offset; - break; - } - } - } - - c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ - *(yy_c_buf_p) = '\0'; /* preserve yytext */ - (yy_hold_char) = *++(yy_c_buf_p); - - return c; -} -#endif /* ifndef YY_NO_INPUT */ - -/** Immediately switch to a different input stream. - * @param input_file A readable stream. - * - * @note This function does not reset the start condition to @c INITIAL . - */ - void yyrestart (FILE * input_file ) -{ - - if ( ! YY_CURRENT_BUFFER ){ - yyensure_buffer_stack (); - YY_CURRENT_BUFFER_LVALUE = - yy_create_buffer(yyin,YY_BUF_SIZE ); - } - - yy_init_buffer(YY_CURRENT_BUFFER,input_file ); - yy_load_buffer_state( ); -} - -/** Switch to a different input buffer. - * @param new_buffer The new input buffer. - * - */ - MY_ATTRIBUTE((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) -{ - - /* TODO. We should be able to replace this entire function body - * with - * yypop_buffer_state(); - * yypush_buffer_state(new_buffer); - */ - yyensure_buffer_stack (); - if ( YY_CURRENT_BUFFER == new_buffer ) - return; - - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *(yy_c_buf_p) = (yy_hold_char); - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); - } - - YY_CURRENT_BUFFER_LVALUE = new_buffer; - yy_load_buffer_state( ); - - /* We don't actually know whether we did this switch during - * EOF (yywrap()) processing, but the only time this flag - * is looked at is after yywrap() is called, so it's safe - * to go ahead and always set it. - */ - (yy_did_buffer_switch_on_eof) = 1; -} - -static void yy_load_buffer_state (void) -{ - (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; - yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; - (yy_hold_char) = *(yy_c_buf_p); -} - -/** Allocate and initialize an input buffer state. - * @param file A readable stream. - * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. - * - * @return the allocated buffer state. - */ - static YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) -{ - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_buf_size = size; - - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_is_our_buffer = 1; - - yy_init_buffer(b,file ); - - return b; -} - -/** Destroy the buffer. - * @param b a buffer created with yy_create_buffer() - * - */ - void yy_delete_buffer (YY_BUFFER_STATE b ) -{ - - if ( ! b ) - return; - - if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ - YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; - - if ( b->yy_is_our_buffer ) - yyfree((void *) b->yy_ch_buf ); - - yyfree((void *) b ); -} - -/* Initializes or reinitializes a buffer. - * This function is sometimes called more than once on the same buffer, - * such as during a yyrestart() or at EOF. - */ - static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) - -{ - int oerrno = errno; - - yy_flush_buffer(b ); - - b->yy_input_file = file; - b->yy_fill_buffer = 1; - - /* If b is the current buffer, then yy_init_buffer was _probably_ - * called from yyrestart() or through yy_get_next_buffer. - * In that case, we don't want to reset the lineno or column. - */ - if (b != YY_CURRENT_BUFFER){ - b->yy_bs_lineno = 1; - b->yy_bs_column = 0; - } - - b->yy_is_interactive = 0; - - errno = oerrno; -} - -/** Discard all buffered characters. On the next scan, YY_INPUT will be called. - * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. - * - */ - void yy_flush_buffer (YY_BUFFER_STATE b ) -{ - if ( ! b ) - return; - - b->yy_n_chars = 0; - - /* We always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; - - b->yy_buf_pos = &b->yy_ch_buf[0]; - - b->yy_at_bol = 1; - b->yy_buffer_status = YY_BUFFER_NEW; - - if ( b == YY_CURRENT_BUFFER ) - yy_load_buffer_state( ); -} - -/** Pushes the new state onto the stack. The new state becomes - * the current state. This function will allocate the stack - * if necessary. - * @param new_buffer The new state. - * - */ -void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) -{ - if (new_buffer == NULL) - return; - - yyensure_buffer_stack(); - - /* This block is copied from yy_switch_to_buffer. */ - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *(yy_c_buf_p) = (yy_hold_char); - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); - } - - /* Only push if top exists. Otherwise, replace top. */ - if (YY_CURRENT_BUFFER) - (yy_buffer_stack_top)++; - YY_CURRENT_BUFFER_LVALUE = new_buffer; - - /* copied from yy_switch_to_buffer. */ - yy_load_buffer_state( ); - (yy_did_buffer_switch_on_eof) = 1; -} - -/** Removes and deletes the top of the stack, if present. - * The next element becomes the new top. - * - */ -void yypop_buffer_state (void) -{ - if (!YY_CURRENT_BUFFER) - return; - - yy_delete_buffer(YY_CURRENT_BUFFER ); - YY_CURRENT_BUFFER_LVALUE = NULL; - if ((yy_buffer_stack_top) > 0) - --(yy_buffer_stack_top); - - if (YY_CURRENT_BUFFER) { - yy_load_buffer_state( ); - (yy_did_buffer_switch_on_eof) = 1; - } -} - -/* Allocates the stack if it does not exist. - * Guarantees space for at least one push. - */ -static void yyensure_buffer_stack (void) -{ - int num_to_alloc; - - if (!(yy_buffer_stack)) { - - /* First allocation is just for 2 elements, since we don't know if this - * scanner will even need a stack. We use 2 instead of 1 to avoid an - * immediate realloc on the next call. - */ - num_to_alloc = 1; - (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc - (num_to_alloc * sizeof(struct yy_buffer_state*) - ); - if ( ! (yy_buffer_stack) ) - YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); - - memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); - - (yy_buffer_stack_max) = num_to_alloc; - (yy_buffer_stack_top) = 0; - return; - } - - if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ - - /* Increase the buffer to prepare for a possible push. */ - int grow_size = 8 /* arbitrary grow size */; - - num_to_alloc = static_cast<int>( - (yy_buffer_stack_max) + grow_size); - (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc - ((yy_buffer_stack), - num_to_alloc * sizeof(struct yy_buffer_state*) - ); - if ( ! (yy_buffer_stack) ) - YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); - - /* zero only the new slots.*/ - memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); - (yy_buffer_stack_max) = num_to_alloc; - } -} - -#ifndef YY_EXIT_FAILURE -#define YY_EXIT_FAILURE 2 -#endif - -static void yy_fatal_error (yyconst char* msg ) -{ - (void) fprintf( stderr, "%s\n", msg ); - exit( YY_EXIT_FAILURE ); -} - -/* Redefine yyless() so it works in section 3 code. */ - -#undef yyless -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - yytext[yyleng] = (yy_hold_char); \ - (yy_c_buf_p) = yytext + yyless_macro_arg; \ - (yy_hold_char) = *(yy_c_buf_p); \ - *(yy_c_buf_p) = '\0'; \ - yyleng = yyless_macro_arg; \ - } \ - while ( 0 ) - -/* Accessor methods (get/set functions) to struct members. */ - -/** Get the current line number. - * - */ -int yyget_lineno (void) -{ - - return yylineno; -} - -/** Get the input stream. - * - */ -FILE *yyget_in (void) -{ - return yyin; -} - -/** Get the output stream. - * - */ -FILE *yyget_out (void) -{ - return yyout; -} - -/** Get the length of the current token. - * - */ -yy_size_t yyget_leng (void) -{ - return yyleng; -} - -/** Get the current token. - * - */ - -char *yyget_text (void) -{ - return yytext; -} - -/** Set the current line number. - * @param line_number - * - */ -void yyset_lineno (int line_number ) -{ - - yylineno = line_number; -} - -/** Set the input stream. This does not discard the current - * input buffer. - * @param in_str A readable stream. - * - * @see yy_switch_to_buffer - */ -void yyset_in (FILE * in_str ) -{ - yyin = in_str ; -} - -void yyset_out (FILE * out_str ) -{ - yyout = out_str ; -} - -int yyget_debug (void) -{ - return yy_flex_debug; -} - -void yyset_debug (int bdebug ) -{ - yy_flex_debug = bdebug ; -} - -static int yy_init_globals (void) -{ - /* Initialization is the same as for the non-reentrant scanner. - * This function is called from yylex_destroy(), so don't allocate here. - */ - - (yy_buffer_stack) = 0; - (yy_buffer_stack_top) = 0; - (yy_buffer_stack_max) = 0; - (yy_c_buf_p) = (char *) 0; - (yy_init) = 0; - (yy_start) = 0; - -/* Defined in main.c */ -#ifdef YY_STDINIT - yyin = stdin; - yyout = stdout; -#else - yyin = (FILE *) 0; - yyout = (FILE *) 0; -#endif - - /* For future reference: Set errno on error, since we are called by - * yylex_init() - */ - return 0; -} - -/* yylex_destroy is for both reentrant and non-reentrant scanners. */ -MY_ATTRIBUTE((unused)) static int yylex_destroy (void) -{ - - /* Pop the buffer stack, destroying each element. */ - while(YY_CURRENT_BUFFER){ - yy_delete_buffer(YY_CURRENT_BUFFER ); - YY_CURRENT_BUFFER_LVALUE = NULL; - yypop_buffer_state(); - } - - /* Destroy the stack itself. */ - yyfree((yy_buffer_stack) ); - (yy_buffer_stack) = NULL; - - /* Reset the globals. This is important in a non-reentrant scanner so the next time - * yylex() is called, initialization will occur. */ - yy_init_globals( ); - - return 0; -} - -/* - * Internal utility routines. - */ - -#ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ) -{ - register int i; - for ( i = 0; i < n; ++i ) - s1[i] = s2[i]; -} -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s ) -{ - register int n; - for ( n = 0; s[n]; ++n ) - ; - - return n; -} -#endif - -void *yyalloc (yy_size_t size ) -{ - return (void *) malloc( size ); -} - -void *yyrealloc (void * ptr, yy_size_t size ) -{ - /* The cast to (char *) in the following accommodates both - * implementations that use char* generic pointers, and those - * that use void* generic pointers. It works with the latter - * because both ANSI C and C++ allow castless assignment from - * any pointer type to void*, and deal with argument conversions - * as though doing an assignment. - */ - return (void *) realloc( (char *) ptr, size ); -} - -void yyfree (void * ptr ) -{ - free( (char*) ptr ); /* see yyrealloc() for (char *) cast */ -} - -#define YYTABLES_NAME "yytables" - -#line 691 "pars0lex.l" - - - -/********************************************************************** -Release any resources used by the lexer. */ -UNIV_INTERN -void -pars_lexer_close(void) -/*==================*/ -{ - if (yy_buffer_stack) - yylex_destroy(); - if (stringbuf) - free(stringbuf); - stringbuf = NULL; - stringbuf_len_alloc = stringbuf_len = 0; -} - diff --git a/storage/xtradb/pars/make_bison.sh b/storage/xtradb/pars/make_bison.sh deleted file mode 100755 index 2618be102bc..00000000000 --- a/storage/xtradb/pars/make_bison.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. -# -# This program is free software; you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free Software -# Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA -# -# generate parser files from bison input files. - -set -eu -TMPFILE=pars0grm.tab.c -OUTFILE=pars0grm.cc - -bison -d pars0grm.y -mv pars0grm.tab.h ../include/pars0grm.h - -sed -e ' -s/'"$TMPFILE"'/'"$OUTFILE"'/; -s/^\(\(YYSTYPE\|int\) yy\(char\|nerrs\)\)/static \1/; -s/\(\(YYSTYPE\|int\) yy\(lval\|parse\)\)/UNIV_INTERN \1/; -' < "$TMPFILE" > "$OUTFILE" - -rm "$TMPFILE" diff --git a/storage/xtradb/pars/make_flex.sh b/storage/xtradb/pars/make_flex.sh deleted file mode 100755 index c3db8aea298..00000000000 --- a/storage/xtradb/pars/make_flex.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -# -# Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -# -# This program is free software; you can redistribute it and/or modify it under -# the terms of the GNU General Public License as published by the Free Software -# Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with -# this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA -# -# generate lexer files from flex input files. - -set -eu - -TMPFILE=_flex_tmp.cc -OUTFILE=lexyy.cc - -flex -o $TMPFILE pars0lex.l - -# AIX needs its includes done in a certain order, so include "univ.i" first -# to be sure we get it right. -echo '#include "univ.i"' > $OUTFILE - -# flex assigns a pointer to an int in one place without a cast, resulting in -# a warning on Win64. Add the cast. Also define some symbols as static. -sed -e ' -s/'"$TMPFILE"'/'"$OUTFILE"'/; -s/\(int offset = \)\((yy_c_buf_p) - (yytext_ptr)\);/\1(int)(\2);/; -s/\(void yy\(restart\|_\(delete\|flush\)_buffer\)\)/static \1/; -s/\(void yy_switch_to_buffer\)/MY_ATTRIBUTE((unused)) static \1/; -s/\(void yy\(push\|pop\)_buffer_state\)/MY_ATTRIBUTE((unused)) static \1/; -s/\(YY_BUFFER_STATE yy_create_buffer\)/static \1/; -s/\(\(int\|void\) yy[gs]et_\)/MY_ATTRIBUTE((unused)) static \1/; -s/\(void \*\?yy\(\(re\)\?alloc\|free\)\)/static \1/; -s/\(extern \)\?\(int yy\(leng\|lineno\|_flex_debug\)\)/static \2/; -s/\(int yylex_destroy\)/MY_ATTRIBUTE((unused)) static \1/; -s/\(extern \)\?\(int yylex \)/UNIV_INTERN \2/; -s/^\(\(FILE\|char\) *\* *yyget\)/MY_ATTRIBUTE((unused)) static \1/; -s/^\(extern \)\?\(\(FILE\|char\) *\* *yy\)/static \2/; -' < $TMPFILE >> $OUTFILE - -rm $TMPFILE diff --git a/storage/xtradb/pars/pars0grm.cc b/storage/xtradb/pars/pars0grm.cc deleted file mode 100644 index b360f36e597..00000000000 --- a/storage/xtradb/pars/pars0grm.cc +++ /dev/null @@ -1,3034 +0,0 @@ -/* A Bison parser, made by GNU Bison 2.3. */ - -/* Skeleton implementation for Bison's Yacc-like parsers in C - - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, - Boston, MA 02110-1301, USA. */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* C LALR(1) parser skeleton written by Richard Stallman, by - simplifying the original so-called "semantic" parser. */ - -/* All symbols defined below should begin with yy or YY, to avoid - infringing on user name space. This should be done even for local - variables, as they might otherwise be expanded by user macros. - There are some unavoidable exceptions within include files to - define necessary library symbols; they are noted "INFRINGES ON - USER NAME SPACE" below. */ - -/* Identify Bison output. */ -#define YYBISON 1 - -/* Bison version. */ -#define YYBISON_VERSION "2.3" - -/* Skeleton name. */ -#define YYSKELETON_NAME "yacc.c" - -/* Pure parsers. */ -#define YYPURE 0 - -/* Using locations. */ -#define YYLSP_NEEDED 0 - - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - PARS_INT_LIT = 258, - PARS_FLOAT_LIT = 259, - PARS_STR_LIT = 260, - PARS_FIXBINARY_LIT = 261, - PARS_BLOB_LIT = 262, - PARS_NULL_LIT = 263, - PARS_ID_TOKEN = 264, - PARS_AND_TOKEN = 265, - PARS_OR_TOKEN = 266, - PARS_NOT_TOKEN = 267, - PARS_GE_TOKEN = 268, - PARS_LE_TOKEN = 269, - PARS_NE_TOKEN = 270, - PARS_PROCEDURE_TOKEN = 271, - PARS_IN_TOKEN = 272, - PARS_OUT_TOKEN = 273, - PARS_BINARY_TOKEN = 274, - PARS_BLOB_TOKEN = 275, - PARS_INT_TOKEN = 276, - PARS_INTEGER_TOKEN = 277, - PARS_FLOAT_TOKEN = 278, - PARS_CHAR_TOKEN = 279, - PARS_IS_TOKEN = 280, - PARS_BEGIN_TOKEN = 281, - PARS_END_TOKEN = 282, - PARS_IF_TOKEN = 283, - PARS_THEN_TOKEN = 284, - PARS_ELSE_TOKEN = 285, - PARS_ELSIF_TOKEN = 286, - PARS_LOOP_TOKEN = 287, - PARS_WHILE_TOKEN = 288, - PARS_RETURN_TOKEN = 289, - PARS_SELECT_TOKEN = 290, - PARS_SUM_TOKEN = 291, - PARS_COUNT_TOKEN = 292, - PARS_DISTINCT_TOKEN = 293, - PARS_FROM_TOKEN = 294, - PARS_WHERE_TOKEN = 295, - PARS_FOR_TOKEN = 296, - PARS_DDOT_TOKEN = 297, - PARS_READ_TOKEN = 298, - PARS_ORDER_TOKEN = 299, - PARS_BY_TOKEN = 300, - PARS_ASC_TOKEN = 301, - PARS_DESC_TOKEN = 302, - PARS_INSERT_TOKEN = 303, - PARS_INTO_TOKEN = 304, - PARS_VALUES_TOKEN = 305, - PARS_UPDATE_TOKEN = 306, - PARS_SET_TOKEN = 307, - PARS_DELETE_TOKEN = 308, - PARS_CURRENT_TOKEN = 309, - PARS_OF_TOKEN = 310, - PARS_CREATE_TOKEN = 311, - PARS_TABLE_TOKEN = 312, - PARS_INDEX_TOKEN = 313, - PARS_UNIQUE_TOKEN = 314, - PARS_CLUSTERED_TOKEN = 315, - PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316, - PARS_ON_TOKEN = 317, - PARS_ASSIGN_TOKEN = 318, - PARS_DECLARE_TOKEN = 319, - PARS_CURSOR_TOKEN = 320, - PARS_SQL_TOKEN = 321, - PARS_OPEN_TOKEN = 322, - PARS_FETCH_TOKEN = 323, - PARS_CLOSE_TOKEN = 324, - PARS_NOTFOUND_TOKEN = 325, - PARS_TO_CHAR_TOKEN = 326, - PARS_TO_NUMBER_TOKEN = 327, - PARS_TO_BINARY_TOKEN = 328, - PARS_BINARY_TO_NUMBER_TOKEN = 329, - PARS_SUBSTR_TOKEN = 330, - PARS_REPLSTR_TOKEN = 331, - PARS_CONCAT_TOKEN = 332, - PARS_INSTR_TOKEN = 333, - PARS_LENGTH_TOKEN = 334, - PARS_SYSDATE_TOKEN = 335, - PARS_PRINTF_TOKEN = 336, - PARS_ASSERT_TOKEN = 337, - PARS_RND_TOKEN = 338, - PARS_RND_STR_TOKEN = 339, - PARS_ROW_PRINTF_TOKEN = 340, - PARS_COMMIT_TOKEN = 341, - PARS_ROLLBACK_TOKEN = 342, - PARS_WORK_TOKEN = 343, - PARS_UNSIGNED_TOKEN = 344, - PARS_EXIT_TOKEN = 345, - PARS_FUNCTION_TOKEN = 346, - PARS_LOCK_TOKEN = 347, - PARS_SHARE_TOKEN = 348, - PARS_MODE_TOKEN = 349, - PARS_LIKE_TOKEN = 350, - PARS_LIKE_TOKEN_EXACT = 351, - PARS_LIKE_TOKEN_PREFIX = 352, - PARS_LIKE_TOKEN_SUFFIX = 353, - PARS_LIKE_TOKEN_SUBSTR = 354, - PARS_TABLE_NAME_TOKEN = 355, - PARS_COMPACT_TOKEN = 356, - PARS_BLOCK_SIZE_TOKEN = 357, - PARS_BIGINT_TOKEN = 358, - NEG = 359 - }; -#endif -/* Tokens. */ -#define PARS_INT_LIT 258 -#define PARS_FLOAT_LIT 259 -#define PARS_STR_LIT 260 -#define PARS_FIXBINARY_LIT 261 -#define PARS_BLOB_LIT 262 -#define PARS_NULL_LIT 263 -#define PARS_ID_TOKEN 264 -#define PARS_AND_TOKEN 265 -#define PARS_OR_TOKEN 266 -#define PARS_NOT_TOKEN 267 -#define PARS_GE_TOKEN 268 -#define PARS_LE_TOKEN 269 -#define PARS_NE_TOKEN 270 -#define PARS_PROCEDURE_TOKEN 271 -#define PARS_IN_TOKEN 272 -#define PARS_OUT_TOKEN 273 -#define PARS_BINARY_TOKEN 274 -#define PARS_BLOB_TOKEN 275 -#define PARS_INT_TOKEN 276 -#define PARS_INTEGER_TOKEN 277 -#define PARS_FLOAT_TOKEN 278 -#define PARS_CHAR_TOKEN 279 -#define PARS_IS_TOKEN 280 -#define PARS_BEGIN_TOKEN 281 -#define PARS_END_TOKEN 282 -#define PARS_IF_TOKEN 283 -#define PARS_THEN_TOKEN 284 -#define PARS_ELSE_TOKEN 285 -#define PARS_ELSIF_TOKEN 286 -#define PARS_LOOP_TOKEN 287 -#define PARS_WHILE_TOKEN 288 -#define PARS_RETURN_TOKEN 289 -#define PARS_SELECT_TOKEN 290 -#define PARS_SUM_TOKEN 291 -#define PARS_COUNT_TOKEN 292 -#define PARS_DISTINCT_TOKEN 293 -#define PARS_FROM_TOKEN 294 -#define PARS_WHERE_TOKEN 295 -#define PARS_FOR_TOKEN 296 -#define PARS_DDOT_TOKEN 297 -#define PARS_READ_TOKEN 298 -#define PARS_ORDER_TOKEN 299 -#define PARS_BY_TOKEN 300 -#define PARS_ASC_TOKEN 301 -#define PARS_DESC_TOKEN 302 -#define PARS_INSERT_TOKEN 303 -#define PARS_INTO_TOKEN 304 -#define PARS_VALUES_TOKEN 305 -#define PARS_UPDATE_TOKEN 306 -#define PARS_SET_TOKEN 307 -#define PARS_DELETE_TOKEN 308 -#define PARS_CURRENT_TOKEN 309 -#define PARS_OF_TOKEN 310 -#define PARS_CREATE_TOKEN 311 -#define PARS_TABLE_TOKEN 312 -#define PARS_INDEX_TOKEN 313 -#define PARS_UNIQUE_TOKEN 314 -#define PARS_CLUSTERED_TOKEN 315 -#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316 -#define PARS_ON_TOKEN 317 -#define PARS_ASSIGN_TOKEN 318 -#define PARS_DECLARE_TOKEN 319 -#define PARS_CURSOR_TOKEN 320 -#define PARS_SQL_TOKEN 321 -#define PARS_OPEN_TOKEN 322 -#define PARS_FETCH_TOKEN 323 -#define PARS_CLOSE_TOKEN 324 -#define PARS_NOTFOUND_TOKEN 325 -#define PARS_TO_CHAR_TOKEN 326 -#define PARS_TO_NUMBER_TOKEN 327 -#define PARS_TO_BINARY_TOKEN 328 -#define PARS_BINARY_TO_NUMBER_TOKEN 329 -#define PARS_SUBSTR_TOKEN 330 -#define PARS_REPLSTR_TOKEN 331 -#define PARS_CONCAT_TOKEN 332 -#define PARS_INSTR_TOKEN 333 -#define PARS_LENGTH_TOKEN 334 -#define PARS_SYSDATE_TOKEN 335 -#define PARS_PRINTF_TOKEN 336 -#define PARS_ASSERT_TOKEN 337 -#define PARS_RND_TOKEN 338 -#define PARS_RND_STR_TOKEN 339 -#define PARS_ROW_PRINTF_TOKEN 340 -#define PARS_COMMIT_TOKEN 341 -#define PARS_ROLLBACK_TOKEN 342 -#define PARS_WORK_TOKEN 343 -#define PARS_UNSIGNED_TOKEN 344 -#define PARS_EXIT_TOKEN 345 -#define PARS_FUNCTION_TOKEN 346 -#define PARS_LOCK_TOKEN 347 -#define PARS_SHARE_TOKEN 348 -#define PARS_MODE_TOKEN 349 -#define PARS_LIKE_TOKEN 350 -#define PARS_LIKE_TOKEN_EXACT 351 -#define PARS_LIKE_TOKEN_PREFIX 352 -#define PARS_LIKE_TOKEN_SUFFIX 353 -#define PARS_LIKE_TOKEN_SUBSTR 354 -#define PARS_TABLE_NAME_TOKEN 355 -#define PARS_COMPACT_TOKEN 356 -#define PARS_BLOCK_SIZE_TOKEN 357 -#define PARS_BIGINT_TOKEN 358 -#define NEG 359 - - - - -/* Copy the first part of user declarations. */ -#line 28 "pars0grm.y" - -/* The value of the semantic attribute is a pointer to a query tree node -que_node_t */ - -#include "univ.i" -#include <math.h> /* Can't be before univ.i */ -#include "pars0pars.h" -#include "mem0mem.h" -#include "que0types.h" -#include "que0que.h" -#include "row0sel.h" - -#define YYSTYPE que_node_t* - -/* #define __STDC__ */ - -int -yylex(void); - - -/* Enabling traces. */ -#ifndef YYDEBUG -# define YYDEBUG 0 -#endif - -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - -/* Enabling the token table. */ -#ifndef YYTOKEN_TABLE -# define YYTOKEN_TABLE 0 -#endif - -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef int YYSTYPE; -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 -#endif - - - -/* Copy the second part of user declarations. */ - - -/* Line 216 of yacc.c. */ -#line 334 "pars0grm.cc" - -#ifdef short -# undef short -#endif - -#ifdef YYTYPE_UINT8 -typedef YYTYPE_UINT8 yytype_uint8; -#else -typedef unsigned char yytype_uint8; -#endif - -#ifdef YYTYPE_INT8 -typedef YYTYPE_INT8 yytype_int8; -#elif (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -typedef signed char yytype_int8; -#else -typedef short int yytype_int8; -#endif - -#ifdef YYTYPE_UINT16 -typedef YYTYPE_UINT16 yytype_uint16; -#else -typedef unsigned short int yytype_uint16; -#endif - -#ifdef YYTYPE_INT16 -typedef YYTYPE_INT16 yytype_int16; -#else -typedef short int yytype_int16; -#endif - -#ifndef YYSIZE_T -# ifdef __SIZE_TYPE__ -# define YYSIZE_T __SIZE_TYPE__ -# elif defined size_t -# define YYSIZE_T size_t -# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# else -# define YYSIZE_T unsigned int -# endif -#endif - -#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) - -#ifndef YY_ -# if defined YYENABLE_NLS && YYENABLE_NLS -# if ENABLE_NLS -# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ -# define YY_(msgid) dgettext ("bison-runtime", msgid) -# endif -# endif -# ifndef YY_ -# define YY_(msgid) msgid -# endif -#endif - -/* Suppress unused-variable warnings by "using" E. */ -#if ! defined lint || defined __GNUC__ -# define YYUSE(e) ((void) (e)) -#else -# define YYUSE(e) /* empty */ -#endif - -/* Identity function, used to suppress warnings about constant conditions. */ -#ifndef lint -# define YYID(n) (n) -#else -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static int -YYID (int i) -#else -static int -YYID (i) - int i; -#endif -{ - return i; -} -#endif - -#if ! defined yyoverflow || YYERROR_VERBOSE - -/* The parser invokes alloca or malloc; define the necessary symbols. */ - -# ifdef YYSTACK_USE_ALLOCA -# if YYSTACK_USE_ALLOCA -# ifdef __GNUC__ -# define YYSTACK_ALLOC __builtin_alloca -# elif defined __BUILTIN_VA_ARG_INCR -# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ -# elif defined _AIX -# define YYSTACK_ALLOC __alloca -# elif defined _MSC_VER -# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ -# define alloca _alloca -# else -# define YYSTACK_ALLOC alloca -# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# endif -# endif -# endif - -# ifdef YYSTACK_ALLOC - /* Pacify GCC's `empty if-body' warning. */ -# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) -# ifndef YYSTACK_ALLOC_MAXIMUM - /* The OS might guarantee only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - invoke alloca (N) if N exceeds 4096. Use a slightly smaller number - to allow for a few compiler-allocated temporary stack slots. */ -# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ -# endif -# else -# define YYSTACK_ALLOC YYMALLOC -# define YYSTACK_FREE YYFREE -# ifndef YYSTACK_ALLOC_MAXIMUM -# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM -# endif -# if (defined __cplusplus && ! defined _STDLIB_H \ - && ! ((defined YYMALLOC || defined malloc) \ - && (defined YYFREE || defined free))) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# ifndef _STDLIB_H -# define _STDLIB_H 1 -# endif -# endif -# ifndef YYMALLOC -# define YYMALLOC malloc -# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# ifndef YYFREE -# define YYFREE free -# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -void free (void*); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - - -#if (! defined yyoverflow \ - && (! defined __cplusplus \ - || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) - -/* A type that is properly aligned for any stack member. */ -union yyalloc -{ - yytype_int16 yyss; - YYSTYPE yyvs; - }; - -/* The size of the maximum gap between one aligned stack and the next. */ -# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) - -/* The size of an array large to enough to hold all stacks, each with - N elements. */ -# define YYSTACK_BYTES(N) \ - ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ - + YYSTACK_GAP_MAXIMUM) - -/* Copy COUNT objects from FROM to TO. The source and destination do - not overlap. */ -# ifndef YYCOPY -# if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) -# else -# define YYCOPY(To, From, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ - while (YYID (0)) -# endif -# endif - -/* Relocate STACK from its old location to the new one. The - local variables YYSIZE and YYSTACKSIZE give the old and new number of - elements in the stack, and YYPTR gives the new location of the - stack. Advance YYPTR to a properly aligned location for the next - stack. */ -# define YYSTACK_RELOCATE(Stack) \ - do \ - { \ - YYSIZE_T yynewbytes; \ - YYCOPY (&yyptr->Stack, Stack, yysize); \ - Stack = &yyptr->Stack; \ - yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ - yyptr += yynewbytes / sizeof (*yyptr); \ - } \ - while (YYID (0)) - -#endif - -/* YYFINAL -- State number of the termination state. */ -#define YYFINAL 5 -/* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 816 - -/* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 120 -/* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 73 -/* YYNRULES -- Number of rules. */ -#define YYNRULES 183 -/* YYNRULES -- Number of states. */ -#define YYNSTATES 350 - -/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ -#define YYUNDEFTOK 2 -#define YYMAXUTOK 359 - -#define YYTRANSLATE(YYX) \ - ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) - -/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ -static const yytype_uint8 yytranslate[] = -{ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 112, 2, 2, - 114, 115, 109, 108, 117, 107, 2, 110, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 113, - 105, 104, 106, 116, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 118, 2, 119, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, - 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 101, 102, 103, 111 -}; - -#if YYDEBUG -/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in - YYRHS. */ -static const yytype_uint16 yyprhs[] = -{ - 0, 0, 3, 6, 8, 11, 14, 17, 20, 23, - 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, - 56, 59, 62, 65, 68, 71, 73, 76, 78, 83, - 85, 87, 89, 91, 93, 95, 97, 101, 105, 109, - 113, 116, 120, 124, 128, 132, 136, 140, 144, 148, - 152, 156, 159, 163, 167, 169, 171, 173, 175, 177, - 179, 181, 183, 185, 187, 189, 190, 192, 196, 203, - 208, 210, 212, 214, 218, 220, 224, 225, 227, 231, - 232, 234, 238, 240, 245, 251, 256, 257, 259, 263, - 265, 269, 271, 272, 275, 276, 279, 280, 285, 286, - 288, 290, 291, 296, 305, 309, 315, 318, 322, 324, - 328, 333, 338, 341, 344, 348, 351, 354, 357, 361, - 366, 368, 371, 372, 375, 377, 385, 392, 403, 405, - 407, 410, 413, 418, 423, 429, 431, 435, 436, 440, - 441, 443, 444, 447, 448, 450, 451, 453, 454, 458, - 468, 470, 474, 475, 477, 478, 480, 491, 493, 495, - 498, 501, 503, 505, 507, 509, 511, 513, 517, 521, - 522, 524, 528, 532, 533, 535, 538, 545, 550, 552, - 554, 555, 557, 560 -}; - -/* YYRHS -- A `-1'-separated list of the rules' RHS. */ -static const yytype_int16 yyrhs[] = -{ - 121, 0, -1, 192, 113, -1, 127, -1, 128, 113, - -1, 160, 113, -1, 161, 113, -1, 162, 113, -1, - 159, 113, -1, 163, 113, -1, 155, 113, -1, 142, - 113, -1, 144, 113, -1, 154, 113, -1, 152, 113, - -1, 153, 113, -1, 149, 113, -1, 150, 113, -1, - 164, 113, -1, 166, 113, -1, 165, 113, -1, 181, - 113, -1, 182, 113, -1, 175, 113, -1, 179, 113, - -1, 122, -1, 123, 122, -1, 9, -1, 125, 114, - 133, 115, -1, 3, -1, 4, -1, 5, -1, 6, - -1, 7, -1, 8, -1, 66, -1, 124, 108, 124, - -1, 124, 107, 124, -1, 124, 109, 124, -1, 124, - 110, 124, -1, 107, 124, -1, 114, 124, 115, -1, - 124, 104, 124, -1, 124, 95, 5, -1, 124, 105, - 124, -1, 124, 106, 124, -1, 124, 13, 124, -1, - 124, 14, 124, -1, 124, 15, 124, -1, 124, 10, - 124, -1, 124, 11, 124, -1, 12, 124, -1, 9, - 112, 70, -1, 66, 112, 70, -1, 71, -1, 72, - -1, 73, -1, 74, -1, 75, -1, 77, -1, 78, - -1, 79, -1, 80, -1, 83, -1, 84, -1, -1, - 116, -1, 126, 117, 116, -1, 118, 9, 114, 126, - 115, 119, -1, 129, 114, 133, 115, -1, 76, -1, - 81, -1, 82, -1, 9, 114, 115, -1, 180, -1, - 131, 117, 180, -1, -1, 9, -1, 132, 117, 9, - -1, -1, 124, -1, 133, 117, 124, -1, 124, -1, - 37, 114, 109, 115, -1, 37, 114, 38, 9, 115, - -1, 36, 114, 124, 115, -1, -1, 134, -1, 135, - 117, 134, -1, 109, -1, 135, 49, 132, -1, 135, - -1, -1, 40, 124, -1, -1, 41, 51, -1, -1, - 92, 17, 93, 94, -1, -1, 46, -1, 47, -1, - -1, 44, 45, 9, 140, -1, 35, 136, 39, 131, - 137, 138, 139, 141, -1, 48, 49, 180, -1, 143, - 50, 114, 133, 115, -1, 143, 142, -1, 9, 104, - 124, -1, 145, -1, 146, 117, 145, -1, 40, 54, - 55, 9, -1, 51, 180, 52, 146, -1, 148, 137, - -1, 148, 147, -1, 53, 39, 180, -1, 151, 137, - -1, 151, 147, -1, 85, 142, -1, 9, 63, 124, - -1, 31, 124, 29, 123, -1, 156, -1, 157, 156, - -1, -1, 30, 123, -1, 157, -1, 28, 124, 29, - 123, 158, 27, 28, -1, 33, 124, 32, 123, 27, - 32, -1, 41, 9, 17, 124, 42, 124, 32, 123, - 27, 32, -1, 90, -1, 34, -1, 67, 9, -1, - 69, 9, -1, 68, 9, 49, 132, -1, 68, 9, - 49, 130, -1, 9, 183, 169, 170, 171, -1, 167, - -1, 168, 117, 167, -1, -1, 114, 3, 115, -1, - -1, 89, -1, -1, 12, 8, -1, -1, 61, -1, - -1, 101, -1, -1, 102, 104, 3, -1, 56, 57, - 180, 114, 168, 115, 172, 173, 174, -1, 9, -1, - 176, 117, 9, -1, -1, 59, -1, -1, 60, -1, - 56, 177, 178, 58, 9, 62, 180, 114, 176, 115, - -1, 9, -1, 100, -1, 86, 88, -1, 87, 88, - -1, 21, -1, 22, -1, 103, -1, 24, -1, 19, - -1, 20, -1, 9, 17, 183, -1, 9, 18, 183, - -1, -1, 184, -1, 185, 117, 184, -1, 9, 183, - 113, -1, -1, 186, -1, 187, 186, -1, 64, 65, - 9, 25, 142, 113, -1, 64, 91, 9, 113, -1, - 188, -1, 189, -1, -1, 190, -1, 191, 190, -1, - 16, 9, 114, 185, 115, 25, 187, 191, 26, 123, - 27, -1 -}; - -/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ -static const yytype_uint16 yyrline[] = -{ - 0, 162, 162, 165, 166, 167, 168, 169, 170, 171, - 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, - 182, 183, 184, 185, 186, 190, 191, 196, 197, 199, - 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 215, 216, 217, 218, 219, 220, - 221, 222, 223, 225, 230, 231, 232, 233, 235, 236, - 237, 238, 239, 240, 241, 244, 246, 247, 251, 257, - 262, 263, 264, 268, 272, 273, 278, 279, 280, 285, - 286, 287, 291, 292, 297, 303, 310, 311, 312, 317, - 319, 322, 326, 327, 331, 332, 337, 338, 343, 344, - 345, 349, 350, 357, 372, 377, 380, 388, 394, 395, - 400, 406, 415, 423, 431, 438, 446, 454, 460, 467, - 473, 474, 479, 480, 482, 486, 493, 499, 509, 513, - 517, 524, 531, 535, 543, 552, 553, 558, 559, 564, - 565, 571, 572, 578, 579, 585, 586, 591, 592, 597, - 608, 609, 614, 615, 619, 620, 624, 638, 639, 643, - 648, 653, 654, 655, 656, 657, 658, 662, 667, 675, - 676, 677, 682, 688, 690, 691, 695, 703, 709, 710, - 713, 715, 716, 720 -}; -#endif - -#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE -/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at YYNTOKENS, nonterminals. */ -static const char *const yytname[] = -{ - "$end", "error", "$undefined", "PARS_INT_LIT", "PARS_FLOAT_LIT", - "PARS_STR_LIT", "PARS_FIXBINARY_LIT", "PARS_BLOB_LIT", "PARS_NULL_LIT", - "PARS_ID_TOKEN", "PARS_AND_TOKEN", "PARS_OR_TOKEN", "PARS_NOT_TOKEN", - "PARS_GE_TOKEN", "PARS_LE_TOKEN", "PARS_NE_TOKEN", - "PARS_PROCEDURE_TOKEN", "PARS_IN_TOKEN", "PARS_OUT_TOKEN", - "PARS_BINARY_TOKEN", "PARS_BLOB_TOKEN", "PARS_INT_TOKEN", - "PARS_INTEGER_TOKEN", "PARS_FLOAT_TOKEN", "PARS_CHAR_TOKEN", - "PARS_IS_TOKEN", "PARS_BEGIN_TOKEN", "PARS_END_TOKEN", "PARS_IF_TOKEN", - "PARS_THEN_TOKEN", "PARS_ELSE_TOKEN", "PARS_ELSIF_TOKEN", - "PARS_LOOP_TOKEN", "PARS_WHILE_TOKEN", "PARS_RETURN_TOKEN", - "PARS_SELECT_TOKEN", "PARS_SUM_TOKEN", "PARS_COUNT_TOKEN", - "PARS_DISTINCT_TOKEN", "PARS_FROM_TOKEN", "PARS_WHERE_TOKEN", - "PARS_FOR_TOKEN", "PARS_DDOT_TOKEN", "PARS_READ_TOKEN", - "PARS_ORDER_TOKEN", "PARS_BY_TOKEN", "PARS_ASC_TOKEN", "PARS_DESC_TOKEN", - "PARS_INSERT_TOKEN", "PARS_INTO_TOKEN", "PARS_VALUES_TOKEN", - "PARS_UPDATE_TOKEN", "PARS_SET_TOKEN", "PARS_DELETE_TOKEN", - "PARS_CURRENT_TOKEN", "PARS_OF_TOKEN", "PARS_CREATE_TOKEN", - "PARS_TABLE_TOKEN", "PARS_INDEX_TOKEN", "PARS_UNIQUE_TOKEN", - "PARS_CLUSTERED_TOKEN", "PARS_DOES_NOT_FIT_IN_MEM_TOKEN", - "PARS_ON_TOKEN", "PARS_ASSIGN_TOKEN", "PARS_DECLARE_TOKEN", - "PARS_CURSOR_TOKEN", "PARS_SQL_TOKEN", "PARS_OPEN_TOKEN", - "PARS_FETCH_TOKEN", "PARS_CLOSE_TOKEN", "PARS_NOTFOUND_TOKEN", - "PARS_TO_CHAR_TOKEN", "PARS_TO_NUMBER_TOKEN", "PARS_TO_BINARY_TOKEN", - "PARS_BINARY_TO_NUMBER_TOKEN", "PARS_SUBSTR_TOKEN", "PARS_REPLSTR_TOKEN", - "PARS_CONCAT_TOKEN", "PARS_INSTR_TOKEN", "PARS_LENGTH_TOKEN", - "PARS_SYSDATE_TOKEN", "PARS_PRINTF_TOKEN", "PARS_ASSERT_TOKEN", - "PARS_RND_TOKEN", "PARS_RND_STR_TOKEN", "PARS_ROW_PRINTF_TOKEN", - "PARS_COMMIT_TOKEN", "PARS_ROLLBACK_TOKEN", "PARS_WORK_TOKEN", - "PARS_UNSIGNED_TOKEN", "PARS_EXIT_TOKEN", "PARS_FUNCTION_TOKEN", - "PARS_LOCK_TOKEN", "PARS_SHARE_TOKEN", "PARS_MODE_TOKEN", - "PARS_LIKE_TOKEN", "PARS_LIKE_TOKEN_EXACT", "PARS_LIKE_TOKEN_PREFIX", - "PARS_LIKE_TOKEN_SUFFIX", "PARS_LIKE_TOKEN_SUBSTR", - "PARS_TABLE_NAME_TOKEN", "PARS_COMPACT_TOKEN", "PARS_BLOCK_SIZE_TOKEN", - "PARS_BIGINT_TOKEN", "'='", "'<'", "'>'", "'-'", "'+'", "'*'", "'/'", - "NEG", "'%'", "';'", "'('", "')'", "'?'", "','", "'{'", "'}'", "$accept", - "top_statement", "statement", "statement_list", "exp", "function_name", - "question_mark_list", "stored_procedure_call", - "predefined_procedure_call", "predefined_procedure_name", - "user_function_call", "table_list", "variable_list", "exp_list", - "select_item", "select_item_list", "select_list", "search_condition", - "for_update_clause", "lock_shared_clause", "order_direction", - "order_by_clause", "select_statement", "insert_statement_start", - "insert_statement", "column_assignment", "column_assignment_list", - "cursor_positioned", "update_statement_start", - "update_statement_searched", "update_statement_positioned", - "delete_statement_start", "delete_statement_searched", - "delete_statement_positioned", "row_printf_statement", - "assignment_statement", "elsif_element", "elsif_list", "else_part", - "if_statement", "while_statement", "for_statement", "exit_statement", - "return_statement", "open_cursor_statement", "close_cursor_statement", - "fetch_statement", "column_def", "column_def_list", "opt_column_len", - "opt_unsigned", "opt_not_null", "not_fit_in_memory", "compact", - "block_size", "create_table", "column_list", "unique_def", - "clustered_def", "create_index", "table_name", "commit_statement", - "rollback_statement", "type_name", "parameter_declaration", - "parameter_declaration_list", "variable_declaration", - "variable_declaration_list", "cursor_declaration", - "function_declaration", "declaration", "declaration_list", - "procedure_definition", 0 -}; -#endif - -# ifdef YYPRINT -/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to - token YYLEX-NUM. */ -static const yytype_uint16 yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, - 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, - 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, - 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, - 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, - 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, - 355, 356, 357, 358, 61, 60, 62, 45, 43, 42, - 47, 359, 37, 59, 40, 41, 63, 44, 123, 125 -}; -# endif - -/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ -static const yytype_uint8 yyr1[] = -{ - 0, 120, 121, 122, 122, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, - 122, 122, 122, 122, 122, 123, 123, 124, 124, 124, - 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, - 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, - 124, 124, 124, 124, 125, 125, 125, 125, 125, 125, - 125, 125, 125, 125, 125, 126, 126, 126, 127, 128, - 129, 129, 129, 130, 131, 131, 132, 132, 132, 133, - 133, 133, 134, 134, 134, 134, 135, 135, 135, 136, - 136, 136, 137, 137, 138, 138, 139, 139, 140, 140, - 140, 141, 141, 142, 143, 144, 144, 145, 146, 146, - 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, - 157, 157, 158, 158, 158, 159, 160, 161, 162, 163, - 164, 165, 166, 166, 167, 168, 168, 169, 169, 170, - 170, 171, 171, 172, 172, 173, 173, 174, 174, 175, - 176, 176, 177, 177, 178, 178, 179, 180, 180, 181, - 182, 183, 183, 183, 183, 183, 183, 184, 184, 185, - 185, 185, 186, 187, 187, 187, 188, 189, 190, 190, - 191, 191, 191, 192 -}; - -/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ -static const yytype_uint8 yyr2[] = -{ - 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 2, 1, 4, 1, - 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, - 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 2, 3, 3, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 0, 1, 3, 6, 4, - 1, 1, 1, 3, 1, 3, 0, 1, 3, 0, - 1, 3, 1, 4, 5, 4, 0, 1, 3, 1, - 3, 1, 0, 2, 0, 2, 0, 4, 0, 1, - 1, 0, 4, 8, 3, 5, 2, 3, 1, 3, - 4, 4, 2, 2, 3, 2, 2, 2, 3, 4, - 1, 2, 0, 2, 1, 7, 6, 10, 1, 1, - 2, 2, 4, 4, 5, 1, 3, 0, 3, 0, - 1, 0, 2, 0, 1, 0, 1, 0, 3, 9, - 1, 3, 0, 1, 0, 1, 10, 1, 1, 2, - 2, 1, 1, 1, 1, 1, 1, 3, 3, 0, - 1, 3, 3, 0, 1, 2, 6, 4, 1, 1, - 0, 1, 2, 11 -}; - -/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state - STATE-NUM when YYTABLE doesn't specify something else to do. Zero - means the default is an error. */ -static const yytype_uint8 yydefact[] = -{ - 0, 0, 0, 0, 0, 1, 2, 169, 0, 170, - 0, 0, 0, 0, 0, 165, 166, 161, 162, 164, - 163, 167, 168, 173, 171, 0, 174, 180, 0, 0, - 175, 178, 179, 181, 0, 172, 0, 0, 0, 182, - 0, 0, 0, 0, 0, 129, 86, 0, 0, 0, - 0, 152, 0, 0, 0, 70, 71, 72, 0, 0, - 0, 128, 0, 25, 0, 3, 0, 0, 0, 0, - 0, 92, 0, 0, 92, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 177, 0, 29, 30, 31, 32, 33, 34, - 27, 0, 35, 54, 55, 56, 57, 58, 59, 60, - 61, 62, 63, 64, 0, 0, 0, 0, 0, 0, - 0, 89, 82, 87, 91, 0, 0, 0, 157, 158, - 0, 0, 0, 153, 154, 130, 0, 131, 117, 159, - 160, 0, 183, 26, 4, 79, 11, 0, 106, 12, - 0, 112, 113, 16, 17, 115, 116, 14, 15, 13, - 10, 8, 5, 6, 7, 9, 18, 20, 19, 23, - 24, 21, 22, 0, 118, 0, 51, 0, 40, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 79, 0, 0, 0, 76, 0, - 0, 0, 104, 0, 114, 0, 155, 0, 76, 65, - 80, 0, 79, 0, 93, 176, 52, 53, 41, 49, - 50, 46, 47, 48, 122, 43, 42, 44, 45, 37, - 36, 38, 39, 0, 0, 0, 0, 0, 77, 90, - 88, 92, 74, 0, 0, 108, 111, 0, 0, 77, - 133, 132, 66, 0, 69, 0, 0, 0, 0, 0, - 120, 124, 0, 28, 0, 85, 0, 83, 0, 0, - 0, 94, 0, 0, 0, 0, 135, 0, 0, 0, - 0, 0, 81, 105, 110, 123, 0, 121, 0, 126, - 84, 78, 75, 0, 96, 0, 107, 109, 137, 143, - 0, 0, 73, 68, 67, 0, 125, 95, 0, 101, - 0, 0, 139, 144, 145, 136, 0, 119, 0, 0, - 103, 0, 0, 140, 141, 146, 147, 0, 0, 0, - 0, 138, 0, 134, 0, 149, 150, 0, 97, 98, - 127, 142, 0, 156, 0, 99, 100, 102, 148, 151 -}; - -/* YYDEFGOTO[NTERM-NUM]. */ -static const yytype_int16 yydefgoto[] = -{ - -1, 2, 63, 64, 210, 117, 253, 65, 66, 67, - 250, 241, 239, 211, 123, 124, 125, 151, 294, 309, - 347, 320, 68, 69, 70, 245, 246, 152, 71, 72, - 73, 74, 75, 76, 77, 78, 260, 261, 262, 79, - 80, 81, 82, 83, 84, 85, 86, 276, 277, 312, - 324, 333, 314, 326, 335, 87, 337, 134, 207, 88, - 130, 89, 90, 21, 9, 10, 26, 27, 31, 32, - 33, 34, 3 -}; - -/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ -#define YYPACT_NINF -179 -static const yytype_int16 yypact[] = -{ - 24, 36, 58, -48, -25, -179, -179, 57, 31, -179, - -74, 14, 14, 50, 57, -179, -179, -179, -179, -179, - -179, -179, -179, 72, -179, 14, -179, 3, -26, -28, - -179, -179, -179, -179, 4, -179, 91, 95, 589, -179, - 80, -6, 43, 285, 285, -179, 19, 99, 69, -5, - 81, -13, 110, 112, 114, -179, -179, -179, 89, 37, - 41, -179, 122, -179, 406, -179, 25, 40, 44, -3, - 46, 116, 49, 51, 116, 52, 53, 54, 55, 56, - 59, 61, 62, 70, 73, 74, 75, 76, 77, 78, - 79, 89, -179, 285, -179, -179, -179, -179, -179, -179, - 82, 285, 83, -179, -179, -179, -179, -179, -179, -179, - -179, -179, -179, -179, 285, 285, 577, 92, 618, 94, - 97, -179, 706, -179, -33, 124, 153, -5, -179, -179, - 141, -5, -5, -179, 136, -179, 148, -179, -179, -179, - -179, 98, -179, -179, -179, 285, -179, 101, -179, -179, - 195, -179, -179, -179, -179, -179, -179, -179, -179, -179, - -179, -179, -179, -179, -179, -179, -179, -179, -179, -179, - -179, -179, -179, 100, 706, 135, 6, 154, -7, 206, - 285, 285, 285, 285, 285, 589, 218, 285, 285, 285, - 285, 285, 285, 285, 285, 589, 285, -27, 216, 173, - -5, 285, -179, 217, -179, 113, -179, 171, 221, 119, - 706, -56, 285, 185, 706, -179, -179, -179, -179, 6, - 6, 27, 27, 706, 345, -179, 27, 27, 27, 35, - 35, -7, -7, -53, 467, 223, 232, 127, -179, 126, - -179, -31, -179, 638, 151, -179, 142, 251, 253, 150, - -179, 126, -179, -46, -179, 285, -45, 256, 589, 285, - -179, 240, 249, -179, 245, -179, 166, -179, 273, 285, - -5, 242, 285, 285, 217, 14, -179, -39, 222, 170, - 167, 179, 706, -179, -179, 589, 679, -179, 268, -179, - -179, -179, -179, 247, 207, 686, 706, -179, 186, 243, - 251, -5, -179, -179, -179, 589, -179, -179, 286, 261, - 589, 303, 219, -179, 224, -179, 193, 589, 226, 272, - -179, 528, 205, -179, 310, -179, 233, 314, 230, 317, - 302, -179, 328, -179, 235, -179, -179, -38, -179, 7, - -179, -179, 334, -179, 331, -179, -179, -179, -179, -179 -}; - -/* YYPGOTO[NTERM-NUM]. */ -static const yytype_int16 yypgoto[] = -{ - -179, -179, -63, -178, -41, -179, -179, -179, -179, -179, - -179, -179, 133, -155, 143, -179, -179, -68, -179, -179, - -179, -179, -40, -179, -179, 71, -179, 269, -179, -179, - -179, -179, -179, -179, -179, -179, 85, -179, -179, -179, - -179, -179, -179, -179, -179, -179, -179, 47, -179, -179, - -179, -179, -179, -179, -179, -179, -179, -179, -179, -179, - -117, -179, -179, -12, 330, -179, 321, -179, -179, -179, - 315, -179, -179 -}; - -/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule which - number is the opposite. If zero, do what YYDEFACT says. - If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -1 -static const yytype_uint16 yytable[] = -{ - 22, 143, 116, 118, 128, 122, 155, 224, 184, 269, - 202, 236, 25, 28, 204, 205, 198, 234, 138, 182, - 183, 184, 94, 95, 96, 97, 98, 99, 100, 148, - 38, 101, 46, 15, 16, 17, 18, 36, 19, 233, - 1, 13, 184, 14, 132, 4, 133, 147, 11, 12, - 184, 173, 174, 345, 346, 119, 120, 256, 5, 254, - 176, 255, 263, 37, 255, 6, 8, 29, 29, 280, - 283, 281, 255, 178, 179, 23, 299, 343, 300, 344, - 285, 25, 237, 242, 199, 102, 270, 35, 186, 7, - 103, 104, 105, 106, 107, 129, 108, 109, 110, 111, - 40, 186, 112, 113, 41, 91, 93, 92, 126, 214, - 187, 188, 189, 190, 191, 192, 193, 20, 127, 135, - 131, 136, 186, 137, 46, 139, 114, 317, 121, 140, - 186, 141, 321, 115, 190, 191, 192, 193, 144, 219, - 220, 221, 222, 223, 192, 193, 226, 227, 228, 229, - 230, 231, 232, 292, 145, 235, 150, 146, 122, 149, - 243, 143, 153, 200, 154, 157, 158, 159, 160, 161, - 201, 143, 162, 271, 163, 164, 94, 95, 96, 97, - 98, 99, 100, 165, 316, 101, 166, 167, 168, 169, - 170, 171, 172, 203, 175, 177, 206, 208, 94, 95, - 96, 97, 98, 99, 100, 216, 194, 101, 196, 119, - 120, 197, 209, 215, 282, 212, 180, 181, 286, 182, - 183, 184, 143, 225, 217, 238, 244, 247, 214, 248, - 249, 295, 296, 180, 181, 252, 182, 183, 184, 102, - 257, 266, 267, 268, 103, 104, 105, 106, 107, 213, - 108, 109, 110, 111, 143, 273, 112, 113, 143, 274, - 275, 102, 278, 298, 279, 284, 103, 104, 105, 106, - 107, 259, 108, 109, 110, 111, 288, 289, 112, 113, - 114, 290, 291, 293, 301, 302, 303, 115, 94, 95, - 96, 97, 98, 99, 100, 304, 306, 101, 307, 308, - 311, 186, 114, 318, 313, 319, 322, 327, 323, 115, - 187, 188, 189, 190, 191, 192, 193, 329, 186, 328, - 331, 218, 332, 336, 338, 325, 339, 187, 188, 189, - 190, 191, 192, 193, 340, 334, 341, 348, 265, 342, - 349, 251, 240, 156, 24, 297, 287, 315, 30, 39, - 0, 102, 0, 0, 42, 0, 103, 104, 105, 106, - 107, 0, 108, 109, 110, 111, 0, 0, 112, 113, - 0, 0, 0, 43, 0, 258, 259, 0, 44, 45, - 46, 0, 0, 0, 0, 0, 47, 0, 0, 0, - 0, 0, 114, 48, 0, 0, 49, 0, 50, 115, - 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 52, 53, 54, 42, 0, 0, 0, 0, - 0, 55, 0, 0, 0, 0, 56, 57, 0, 0, - 58, 59, 60, 142, 43, 61, 0, 0, 0, 44, - 45, 46, 0, 0, 0, 0, 0, 47, 0, 0, - 0, 0, 0, 0, 48, 0, 0, 49, 0, 50, - 0, 0, 51, 62, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 52, 53, 54, 42, 0, 0, 0, - 0, 0, 55, 0, 0, 0, 0, 56, 57, 0, - 0, 58, 59, 60, 264, 43, 61, 0, 0, 0, - 44, 45, 46, 0, 0, 0, 0, 0, 47, 0, - 0, 0, 0, 0, 0, 48, 0, 0, 49, 0, - 50, 0, 0, 51, 62, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 52, 53, 54, 42, 0, 0, - 0, 0, 0, 55, 0, 0, 0, 0, 56, 57, - 0, 0, 58, 59, 60, 330, 43, 61, 0, 0, - 0, 44, 45, 46, 0, 0, 0, 0, 0, 47, - 0, 0, 0, 0, 0, 0, 48, 0, 0, 49, - 0, 50, 0, 0, 51, 62, 0, 180, 181, 0, - 182, 183, 184, 0, 0, 52, 53, 54, 42, 0, - 0, 0, 0, 0, 55, 0, 185, 0, 0, 56, - 57, 0, 0, 58, 59, 60, 0, 43, 61, 0, - 0, 0, 44, 45, 46, 0, 0, 0, 180, 181, - 47, 182, 183, 184, 0, 0, 0, 48, 0, 0, - 49, 0, 50, 0, 0, 51, 62, 0, 180, 181, - 195, 182, 183, 184, 0, 0, 52, 53, 54, 0, - 0, 0, 0, 0, 0, 55, 0, 0, 0, 0, - 56, 57, 186, 0, 58, 59, 60, 0, 0, 61, - 272, 187, 188, 189, 190, 191, 192, 193, 0, 180, - 181, 0, 182, 183, 184, 0, 180, 181, 0, 182, - 183, 184, 0, 0, 0, 0, 0, 62, 305, 0, - 0, 0, 0, 186, 0, 0, 180, 181, 310, 182, - 183, 184, 187, 188, 189, 190, 191, 192, 193, 0, - 0, 0, 0, 186, 0, 0, 0, 0, 0, 0, - 0, 0, 187, 188, 189, 190, 191, 192, 193, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 186, 0, 0, 0, 0, 0, - 0, 186, 0, 187, 188, 189, 190, 191, 192, 193, - 187, 188, 189, 190, 191, 192, 193, 0, 0, 0, - 0, 186, 0, 0, 0, 0, 0, 0, 0, 0, - 187, 188, 189, 190, 191, 192, 193 -}; - -static const yytype_int16 yycheck[] = -{ - 12, 64, 43, 44, 9, 46, 74, 185, 15, 40, - 127, 38, 9, 25, 131, 132, 49, 195, 58, 13, - 14, 15, 3, 4, 5, 6, 7, 8, 9, 69, - 26, 12, 35, 19, 20, 21, 22, 65, 24, 194, - 16, 115, 15, 117, 57, 9, 59, 50, 17, 18, - 15, 91, 93, 46, 47, 36, 37, 212, 0, 115, - 101, 117, 115, 91, 117, 113, 9, 64, 64, 115, - 115, 117, 117, 114, 115, 25, 115, 115, 117, 117, - 258, 9, 109, 200, 117, 66, 117, 113, 95, 114, - 71, 72, 73, 74, 75, 100, 77, 78, 79, 80, - 9, 95, 83, 84, 9, 25, 63, 113, 9, 150, - 104, 105, 106, 107, 108, 109, 110, 103, 49, 9, - 39, 9, 95, 9, 35, 88, 107, 305, 109, 88, - 95, 9, 310, 114, 107, 108, 109, 110, 113, 180, - 181, 182, 183, 184, 109, 110, 187, 188, 189, 190, - 191, 192, 193, 270, 114, 196, 40, 113, 199, 113, - 201, 224, 113, 39, 113, 113, 113, 113, 113, 113, - 17, 234, 113, 241, 113, 113, 3, 4, 5, 6, - 7, 8, 9, 113, 301, 12, 113, 113, 113, 113, - 113, 113, 113, 52, 112, 112, 60, 49, 3, 4, - 5, 6, 7, 8, 9, 70, 114, 12, 114, 36, - 37, 114, 114, 113, 255, 114, 10, 11, 259, 13, - 14, 15, 285, 5, 70, 9, 9, 114, 269, 58, - 9, 272, 273, 10, 11, 116, 13, 14, 15, 66, - 55, 9, 115, 117, 71, 72, 73, 74, 75, 54, - 77, 78, 79, 80, 317, 104, 83, 84, 321, 117, - 9, 66, 9, 275, 114, 9, 71, 72, 73, 74, - 75, 31, 77, 78, 79, 80, 27, 32, 83, 84, - 107, 115, 9, 41, 62, 115, 119, 114, 3, 4, - 5, 6, 7, 8, 9, 116, 28, 12, 51, 92, - 114, 95, 107, 17, 61, 44, 3, 114, 89, 114, - 104, 105, 106, 107, 108, 109, 110, 45, 95, 93, - 115, 115, 12, 9, 94, 101, 9, 104, 105, 106, - 107, 108, 109, 110, 32, 102, 8, 3, 115, 104, - 9, 208, 199, 74, 14, 274, 261, 300, 27, 34, - -1, 66, -1, -1, 9, -1, 71, 72, 73, 74, - 75, -1, 77, 78, 79, 80, -1, -1, 83, 84, - -1, -1, -1, 28, -1, 30, 31, -1, 33, 34, - 35, -1, -1, -1, -1, -1, 41, -1, -1, -1, - -1, -1, 107, 48, -1, -1, 51, -1, 53, 114, - -1, 56, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 67, 68, 69, 9, -1, -1, -1, -1, - -1, 76, -1, -1, -1, -1, 81, 82, -1, -1, - 85, 86, 87, 27, 28, 90, -1, -1, -1, 33, - 34, 35, -1, -1, -1, -1, -1, 41, -1, -1, - -1, -1, -1, -1, 48, -1, -1, 51, -1, 53, - -1, -1, 56, 118, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 67, 68, 69, 9, -1, -1, -1, - -1, -1, 76, -1, -1, -1, -1, 81, 82, -1, - -1, 85, 86, 87, 27, 28, 90, -1, -1, -1, - 33, 34, 35, -1, -1, -1, -1, -1, 41, -1, - -1, -1, -1, -1, -1, 48, -1, -1, 51, -1, - 53, -1, -1, 56, 118, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 67, 68, 69, 9, -1, -1, - -1, -1, -1, 76, -1, -1, -1, -1, 81, 82, - -1, -1, 85, 86, 87, 27, 28, 90, -1, -1, - -1, 33, 34, 35, -1, -1, -1, -1, -1, 41, - -1, -1, -1, -1, -1, -1, 48, -1, -1, 51, - -1, 53, -1, -1, 56, 118, -1, 10, 11, -1, - 13, 14, 15, -1, -1, 67, 68, 69, 9, -1, - -1, -1, -1, -1, 76, -1, 29, -1, -1, 81, - 82, -1, -1, 85, 86, 87, -1, 28, 90, -1, - -1, -1, 33, 34, 35, -1, -1, -1, 10, 11, - 41, 13, 14, 15, -1, -1, -1, 48, -1, -1, - 51, -1, 53, -1, -1, 56, 118, -1, 10, 11, - 32, 13, 14, 15, -1, -1, 67, 68, 69, -1, - -1, -1, -1, -1, -1, 76, -1, -1, -1, -1, - 81, 82, 95, -1, 85, 86, 87, -1, -1, 90, - 42, 104, 105, 106, 107, 108, 109, 110, -1, 10, - 11, -1, 13, 14, 15, -1, 10, 11, -1, 13, - 14, 15, -1, -1, -1, -1, -1, 118, 29, -1, - -1, -1, -1, 95, -1, -1, 10, 11, 32, 13, - 14, 15, 104, 105, 106, 107, 108, 109, 110, -1, - -1, -1, -1, 95, -1, -1, -1, -1, -1, -1, - -1, -1, 104, 105, 106, 107, 108, 109, 110, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 95, -1, -1, -1, -1, -1, - -1, 95, -1, 104, 105, 106, 107, 108, 109, 110, - 104, 105, 106, 107, 108, 109, 110, -1, -1, -1, - -1, 95, -1, -1, -1, -1, -1, -1, -1, -1, - 104, 105, 106, 107, 108, 109, 110 -}; - -/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ -static const yytype_uint8 yystos[] = -{ - 0, 16, 121, 192, 9, 0, 113, 114, 9, 184, - 185, 17, 18, 115, 117, 19, 20, 21, 22, 24, - 103, 183, 183, 25, 184, 9, 186, 187, 183, 64, - 186, 188, 189, 190, 191, 113, 65, 91, 26, 190, - 9, 9, 9, 28, 33, 34, 35, 41, 48, 51, - 53, 56, 67, 68, 69, 76, 81, 82, 85, 86, - 87, 90, 118, 122, 123, 127, 128, 129, 142, 143, - 144, 148, 149, 150, 151, 152, 153, 154, 155, 159, - 160, 161, 162, 163, 164, 165, 166, 175, 179, 181, - 182, 25, 113, 63, 3, 4, 5, 6, 7, 8, - 9, 12, 66, 71, 72, 73, 74, 75, 77, 78, - 79, 80, 83, 84, 107, 114, 124, 125, 124, 36, - 37, 109, 124, 134, 135, 136, 9, 49, 9, 100, - 180, 39, 57, 59, 177, 9, 9, 9, 142, 88, - 88, 9, 27, 122, 113, 114, 113, 50, 142, 113, - 40, 137, 147, 113, 113, 137, 147, 113, 113, 113, - 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, - 113, 113, 113, 142, 124, 112, 124, 112, 124, 124, - 10, 11, 13, 14, 15, 29, 95, 104, 105, 106, - 107, 108, 109, 110, 114, 32, 114, 114, 49, 117, - 39, 17, 180, 52, 180, 180, 60, 178, 49, 114, - 124, 133, 114, 54, 124, 113, 70, 70, 115, 124, - 124, 124, 124, 124, 123, 5, 124, 124, 124, 124, - 124, 124, 124, 133, 123, 124, 38, 109, 9, 132, - 134, 131, 180, 124, 9, 145, 146, 114, 58, 9, - 130, 132, 116, 126, 115, 117, 133, 55, 30, 31, - 156, 157, 158, 115, 27, 115, 9, 115, 117, 40, - 117, 137, 42, 104, 117, 9, 167, 168, 9, 114, - 115, 117, 124, 115, 9, 123, 124, 156, 27, 32, - 115, 9, 180, 41, 138, 124, 124, 145, 183, 115, - 117, 62, 115, 119, 116, 29, 28, 51, 92, 139, - 32, 114, 169, 61, 172, 167, 180, 123, 17, 44, - 141, 123, 3, 89, 170, 101, 173, 114, 93, 45, - 27, 115, 12, 171, 102, 174, 9, 176, 94, 9, - 32, 8, 104, 115, 117, 46, 47, 140, 3, 9 -}; - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 - -#define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrorlab - - -/* Like YYERROR except do call yyerror. This remains here temporarily - to ease the transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ - -#define YYFAIL goto yyerrlab - -#define YYRECOVERING() (!!yyerrstatus) - -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - yytoken = YYTRANSLATE (yychar); \ - YYPOPSTACK (1); \ - goto yybackup; \ - } \ - else \ - { \ - yyerror (YY_("syntax error: cannot back up")); \ - YYERROR; \ - } \ -while (YYID (0)) - - -#define YYTERROR 1 -#define YYERRCODE 256 - - -/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. - If N is 0, then set CURRENT to the empty location which ends - the previous symbol: RHS[0] (always defined). */ - -#define YYRHSLOC(Rhs, K) ((Rhs)[K]) -#ifndef YYLLOC_DEFAULT -# define YYLLOC_DEFAULT(Current, Rhs, N) \ - do \ - if (YYID (N)) \ - { \ - (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = (Current).last_line = \ - YYRHSLOC (Rhs, 0).last_line; \ - (Current).first_column = (Current).last_column = \ - YYRHSLOC (Rhs, 0).last_column; \ - } \ - while (YYID (0)) -#endif - - -/* YY_LOCATION_PRINT -- Print the location on the stream. - This macro was not mandated originally: define only if we know - we won't break user code: when these are the locations we know. */ - -#ifndef YY_LOCATION_PRINT -# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL -# define YY_LOCATION_PRINT(File, Loc) \ - fprintf (File, "%d.%d-%d.%d", \ - (Loc).first_line, (Loc).first_column, \ - (Loc).last_line, (Loc).last_column) -# else -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -# endif -#endif - - -/* YYLEX -- calling `yylex' with the right arguments. */ - -#ifdef YYLEX_PARAM -# define YYLEX yylex (YYLEX_PARAM) -#else -# define YYLEX yylex () -#endif - -/* Enable debugging if requested. */ -#if YYDEBUG - -# ifndef YYFPRINTF -# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ -# define YYFPRINTF fprintf -# endif - -# define YYDPRINTF(Args) \ -do { \ - if (yydebug) \ - YYFPRINTF Args; \ -} while (YYID (0)) - -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ -do { \ - if (yydebug) \ - { \ - YYFPRINTF (stderr, "%s ", Title); \ - yy_symbol_print (stderr, \ - Type, Value); \ - YYFPRINTF (stderr, "\n"); \ - } \ -} while (YYID (0)) - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) -#else -static void -yy_symbol_value_print (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; -#endif -{ - if (!yyvaluep) - return; -# ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# else - YYUSE (yyoutput); -# endif - switch (yytype) - { - default: - break; - } -} - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep) -#else -static void -yy_symbol_print (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE const * const yyvaluep; -#endif -{ - if (yytype < YYNTOKENS) - YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); - else - YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); - - yy_symbol_value_print (yyoutput, yytype, yyvaluep); - YYFPRINTF (yyoutput, ")"); -} - -/*------------------------------------------------------------------. -| yy_stack_print -- Print the state stack from its BOTTOM up to its | -| TOP (included). | -`------------------------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_stack_print (yytype_int16 *bottom, yytype_int16 *top) -#else -static void -yy_stack_print (bottom, top) - yytype_int16 *bottom; - yytype_int16 *top; -#endif -{ - YYFPRINTF (stderr, "Stack now"); - for (; bottom <= top; ++bottom) - YYFPRINTF (stderr, " %d", *bottom); - YYFPRINTF (stderr, "\n"); -} - -# define YY_STACK_PRINT(Bottom, Top) \ -do { \ - if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ -} while (YYID (0)) - - -/*------------------------------------------------. -| Report that the YYRULE is going to be reduced. | -`------------------------------------------------*/ - -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yy_reduce_print (YYSTYPE *yyvsp, int yyrule) -#else -static void -yy_reduce_print (yyvsp, yyrule) - YYSTYPE *yyvsp; - int yyrule; -#endif -{ - int yynrhs = yyr2[yyrule]; - int yyi; - unsigned long int yylno = yyrline[yyrule]; - YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", - yyrule - 1, yylno); - /* The symbols being reduced. */ - for (yyi = 0; yyi < yynrhs; yyi++) - { - fprintf (stderr, " $%d = ", yyi + 1); - yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], - &(yyvsp[(yyi + 1) - (yynrhs)]) - ); - fprintf (stderr, "\n"); - } -} - -# define YY_REDUCE_PRINT(Rule) \ -do { \ - if (yydebug) \ - yy_reduce_print (yyvsp, Rule); \ -} while (YYID (0)) - -/* Nonzero means print parse trace. It is left uninitialized so that - multiple parsers can coexist. */ -int yydebug; -#else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) -#endif /* !YYDEBUG */ - - -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#ifndef YYINITDEPTH -# define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only - if the built-in stack extension method is used). - - Do not make this value too large; the results are undefined if - YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) - evaluated with infinite-precision integer arithmetic. */ - -#ifndef YYMAXDEPTH -# define YYMAXDEPTH 10000 -#endif - - - -#if YYERROR_VERBOSE - -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen strlen -# else -/* Return the length of YYSTR. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static YYSIZE_T -yystrlen (const char *yystr) -#else -static YYSIZE_T -yystrlen (yystr) - const char *yystr; -#endif -{ - YYSIZE_T yylen; - for (yylen = 0; yystr[yylen]; yylen++) - continue; - return yylen; -} -# endif -# endif - -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static char * -yystpcpy (char *yydest, const char *yysrc) -#else -static char * -yystpcpy (yydest, yysrc) - char *yydest; - const char *yysrc; -#endif -{ - char *yyd = yydest; - const char *yys = yysrc; - - while ((*yyd++ = *yys++) != '\0') - continue; - - return yyd - 1; -} -# endif -# endif - -# ifndef yytnamerr -/* Copy to YYRES the contents of YYSTR after stripping away unnecessary - quotes and backslashes, so that it's suitable for yyerror. The - heuristic is that double-quoting is unnecessary unless the string - contains an apostrophe, a comma, or backslash (other than - backslash-backslash). YYSTR is taken from yytname. If YYRES is - null, do not copy; instead, return the length of what the result - would have been. */ -static YYSIZE_T -yytnamerr (char *yyres, const char *yystr) -{ - if (*yystr == '"') - { - YYSIZE_T yyn = 0; - char const *yyp = yystr; - - for (;;) - switch (*++yyp) - { - case '\'': - case ',': - goto do_not_strip_quotes; - - case '\\': - if (*++yyp != '\\') - goto do_not_strip_quotes; - /* Fall through. */ - default: - if (yyres) - yyres[yyn] = *yyp; - yyn++; - break; - - case '"': - if (yyres) - yyres[yyn] = '\0'; - return yyn; - } - do_not_strip_quotes: ; - } - - if (! yyres) - return yystrlen (yystr); - - return yystpcpy (yyres, yystr) - yyres; -} -# endif - -/* Copy into YYRESULT an error message about the unexpected token - YYCHAR while in state YYSTATE. Return the number of bytes copied, - including the terminating null byte. If YYRESULT is null, do not - copy anything; just return the number of bytes that would be - copied. As a special case, return 0 if an ordinary "syntax error" - message will do. Return YYSIZE_MAXIMUM if overflow occurs during - size calculation. */ -static YYSIZE_T -yysyntax_error (char *yyresult, int yystate, int yychar) -{ - int yyn = yypact[yystate]; - - if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) - return 0; - else - { - int yytype = YYTRANSLATE (yychar); - YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); - YYSIZE_T yysize = yysize0; - YYSIZE_T yysize1; - int yysize_overflow = 0; - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; - int yyx; - -# if 0 - /* This is so xgettext sees the translatable formats that are - constructed on the fly. */ - YY_("syntax error, unexpected %s"); - YY_("syntax error, unexpected %s, expecting %s"); - YY_("syntax error, unexpected %s, expecting %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s"); - YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); -# endif - char *yyfmt; - char const *yyf; - static char const yyunexpected[] = "syntax error, unexpected %s"; - static char const yyexpecting[] = ", expecting %s"; - static char const yyor[] = " or %s"; - char yyformat[sizeof yyunexpected - + sizeof yyexpecting - 1 - + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) - * (sizeof yyor - 1))]; - char const *yyprefix = yyexpecting; - - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yycount = 1; - - yyarg[0] = yytname[yytype]; - yyfmt = yystpcpy (yyformat, yyunexpected); - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - yyformat[sizeof yyunexpected - 1] = '\0'; - break; - } - yyarg[yycount++] = yytname[yyx]; - yysize1 = yysize + yytnamerr (0, yytname[yyx]); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - yyfmt = yystpcpy (yyfmt, yyprefix); - yyprefix = yyor; - } - - yyf = YY_(yyformat); - yysize1 = yysize + yystrlen (yyf); - yysize_overflow |= (yysize1 < yysize); - yysize = yysize1; - - if (yysize_overflow) - return YYSIZE_MAXIMUM; - - if (yyresult) - { - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - char *yyp = yyresult; - int yyi = 0; - while ((*yyp = *yyf) != '\0') - { - if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyf += 2; - } - else - { - yyp++; - yyf++; - } - } - } - return yysize; - } -} -#endif /* YYERROR_VERBOSE */ - - -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ - -/*ARGSUSED*/ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) -#else -static void -yydestruct (yymsg, yytype, yyvaluep) - const char *yymsg; - int yytype; - YYSTYPE *yyvaluep; -#endif -{ - YYUSE (yyvaluep); - - if (!yymsg) - yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); -} - - -/* Prevent warnings from -Wmissing-prototypes. */ - -#ifdef YYPARSE_PARAM -#if defined __STDC__ || defined __cplusplus -int yyparse (void *YYPARSE_PARAM); -#else -int yyparse (); -#endif -#else /* ! YYPARSE_PARAM */ -#if defined __STDC__ || defined __cplusplus -int yyparse (void); -#else -int yyparse (); -#endif -#endif /* ! YYPARSE_PARAM */ - - - -/* The look-ahead symbol. */ -int yychar; - -/* The semantic value of the look-ahead symbol. */ -YYSTYPE yylval; - -/* Number of syntax errors so far. */ -int yynerrs; - - - -/*----------. -| yyparse. | -`----------*/ - -#ifdef YYPARSE_PARAM -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void *YYPARSE_PARAM) -#else -int -yyparse (YYPARSE_PARAM) - void *YYPARSE_PARAM; -#endif -#else /* ! YYPARSE_PARAM */ -#if (defined __STDC__ || defined __C99__FUNC__ \ - || defined __cplusplus || defined _MSC_VER) -int -yyparse (void) -#else -int -yyparse () - -#endif -#endif -{ - - int yystate; - int yyn; - int yyresult; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; - /* Look-ahead token as an internal (translated) token number. */ - int yytoken = 0; -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYSIZE_T yymsg_alloc = sizeof yymsgbuf; -#endif - - /* Three stacks and their tools: - `yyss': related to states, - `yyvs': related to semantic values, - `yyls': related to locations. - - Refer to the stacks thru separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ - - /* The state stack. */ - yytype_int16 yyssa[YYINITDEPTH]; - yytype_int16 *yyss = yyssa; - yytype_int16 *yyssp; - - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs = yyvsa; - YYSTYPE *yyvsp; - - - -#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) - - YYSIZE_T yystacksize = YYINITDEPTH; - - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; - - - /* The number of symbols on the RHS of the reduced rule. - Keep to zero when no symbol should be popped. */ - int yylen = 0; - - YYDPRINTF ((stderr, "Starting parse\n")); - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - - yyssp = yyss; - yyvsp = yyvs; - - goto yysetstate; - -/*------------------------------------------------------------. -| yynewstate -- Push a new state, which is found in yystate. | -`------------------------------------------------------------*/ - yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. So pushing a state here evens the stacks. */ - yyssp++; - - yysetstate: - *yyssp = yystate; - - if (yyss + yystacksize - 1 <= yyssp) - { - /* Get the current used size of the three stacks, in elements. */ - YYSIZE_T yysize = yyssp - yyss + 1; - -#ifdef yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - YYSTYPE *yyvs1 = yyvs; - yytype_int16 *yyss1 = yyss; - - - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow (YY_("memory exhausted"), - &yyss1, yysize * sizeof (*yyssp), - &yyvs1, yysize * sizeof (*yyvsp), - - &yystacksize); - - yyss = yyss1; - yyvs = yyvs1; - } -#else /* no yyoverflow */ -# ifndef YYSTACK_RELOCATE - goto yyexhaustedlab; -# else - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyexhaustedlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; - - { - yytype_int16 *yyss1 = yyss; - union yyalloc *yyptr = - (union yyalloc*) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); - if (! yyptr) - goto yyexhaustedlab; - YYSTACK_RELOCATE (yyss); - YYSTACK_RELOCATE (yyvs); - -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif -#endif /* no yyoverflow */ - - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; - - - YYDPRINTF ((stderr, "Stack size increased to %lu\n", - (unsigned long int) yystacksize)); - - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } - - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - - goto yybackup; - -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - - /* Do appropriate processing given the current state. Read a - look-ahead token if we need one and don't already have one. */ - - /* First try to decide what to do without reference to look-ahead token. */ - yyn = yypact[yystate]; - if (yyn == YYPACT_NINF) - goto yydefault; - - /* Not known => get a look-ahead token if don't already have one. */ - - /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = YYLEX; - } - - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); - } - - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yyn == 0 || yyn == YYTABLE_NINF) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - - if (yyn == YYFINAL) - YYACCEPT; - - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; - - /* Shift the look-ahead token. */ - YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); - - /* Discard the shifted token unless it is eof. */ - if (yychar != YYEOF) - yychar = YYEMPTY; - - yystate = yyn; - *++yyvsp = yylval; - - goto yynewstate; - - -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; - - -/*-----------------------------. -| yyreduce -- Do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; - - /* If YYLEN is nonzero, implement the default value of the action: - `$$ = $1'. - - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; - - - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 25: -#line 190 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 26: -#line 192 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); ;} - break; - - case 27: -#line 196 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]);;} - break; - - case 28: -#line 198 "pars0grm.y" - { (yyval) = pars_func((yyvsp[(1) - (4)]), (yyvsp[(3) - (4)])); ;} - break; - - case 29: -#line 199 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]);;} - break; - - case 30: -#line 200 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]);;} - break; - - case 31: -#line 201 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]);;} - break; - - case 32: -#line 202 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]);;} - break; - - case 33: -#line 203 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]);;} - break; - - case 34: -#line 204 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]);;} - break; - - case 35: -#line 205 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]);;} - break; - - case 36: -#line 206 "pars0grm.y" - { (yyval) = pars_op('+', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 37: -#line 207 "pars0grm.y" - { (yyval) = pars_op('-', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 38: -#line 208 "pars0grm.y" - { (yyval) = pars_op('*', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 39: -#line 209 "pars0grm.y" - { (yyval) = pars_op('/', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 40: -#line 210 "pars0grm.y" - { (yyval) = pars_op('-', (yyvsp[(2) - (2)]), NULL); ;} - break; - - case 41: -#line 211 "pars0grm.y" - { (yyval) = (yyvsp[(2) - (3)]); ;} - break; - - case 42: -#line 212 "pars0grm.y" - { (yyval) = pars_op('=', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 43: -#line 214 "pars0grm.y" - { (yyval) = pars_op(PARS_LIKE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 44: -#line 215 "pars0grm.y" - { (yyval) = pars_op('<', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 45: -#line 216 "pars0grm.y" - { (yyval) = pars_op('>', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 46: -#line 217 "pars0grm.y" - { (yyval) = pars_op(PARS_GE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 47: -#line 218 "pars0grm.y" - { (yyval) = pars_op(PARS_LE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 48: -#line 219 "pars0grm.y" - { (yyval) = pars_op(PARS_NE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 49: -#line 220 "pars0grm.y" - { (yyval) = pars_op(PARS_AND_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 50: -#line 221 "pars0grm.y" - { (yyval) = pars_op(PARS_OR_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 51: -#line 222 "pars0grm.y" - { (yyval) = pars_op(PARS_NOT_TOKEN, (yyvsp[(2) - (2)]), NULL); ;} - break; - - case 52: -#line 224 "pars0grm.y" - { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[(1) - (3)]), NULL); ;} - break; - - case 53: -#line 226 "pars0grm.y" - { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[(1) - (3)]), NULL); ;} - break; - - case 54: -#line 230 "pars0grm.y" - { (yyval) = &pars_to_char_token; ;} - break; - - case 55: -#line 231 "pars0grm.y" - { (yyval) = &pars_to_number_token; ;} - break; - - case 56: -#line 232 "pars0grm.y" - { (yyval) = &pars_to_binary_token; ;} - break; - - case 57: -#line 234 "pars0grm.y" - { (yyval) = &pars_binary_to_number_token; ;} - break; - - case 58: -#line 235 "pars0grm.y" - { (yyval) = &pars_substr_token; ;} - break; - - case 59: -#line 236 "pars0grm.y" - { (yyval) = &pars_concat_token; ;} - break; - - case 60: -#line 237 "pars0grm.y" - { (yyval) = &pars_instr_token; ;} - break; - - case 61: -#line 238 "pars0grm.y" - { (yyval) = &pars_length_token; ;} - break; - - case 62: -#line 239 "pars0grm.y" - { (yyval) = &pars_sysdate_token; ;} - break; - - case 63: -#line 240 "pars0grm.y" - { (yyval) = &pars_rnd_token; ;} - break; - - case 64: -#line 241 "pars0grm.y" - { (yyval) = &pars_rnd_str_token; ;} - break; - - case 68: -#line 252 "pars0grm.y" - { (yyval) = pars_stored_procedure_call( - static_cast<sym_node_t*>((yyvsp[(2) - (6)]))); ;} - break; - - case 69: -#line 258 "pars0grm.y" - { (yyval) = pars_procedure_call((yyvsp[(1) - (4)]), (yyvsp[(3) - (4)])); ;} - break; - - case 70: -#line 262 "pars0grm.y" - { (yyval) = &pars_replstr_token; ;} - break; - - case 71: -#line 263 "pars0grm.y" - { (yyval) = &pars_printf_token; ;} - break; - - case 72: -#line 264 "pars0grm.y" - { (yyval) = &pars_assert_token; ;} - break; - - case 73: -#line 268 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (3)]); ;} - break; - - case 74: -#line 272 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 75: -#line 274 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 76: -#line 278 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 77: -#line 279 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 78: -#line 281 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 79: -#line 285 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 80: -#line 286 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)]));;} - break; - - case 81: -#line 287 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 82: -#line 291 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]); ;} - break; - - case 83: -#line 293 "pars0grm.y" - { (yyval) = pars_func(&pars_count_token, - que_node_list_add_last(NULL, - sym_tab_add_int_lit( - pars_sym_tab_global, 1))); ;} - break; - - case 84: -#line 298 "pars0grm.y" - { (yyval) = pars_func(&pars_count_token, - que_node_list_add_last(NULL, - pars_func(&pars_distinct_token, - que_node_list_add_last( - NULL, (yyvsp[(4) - (5)]))))); ;} - break; - - case 85: -#line 304 "pars0grm.y" - { (yyval) = pars_func(&pars_sum_token, - que_node_list_add_last(NULL, - (yyvsp[(3) - (4)]))); ;} - break; - - case 86: -#line 310 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 87: -#line 311 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 88: -#line 313 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 89: -#line 317 "pars0grm.y" - { (yyval) = pars_select_list(&pars_star_denoter, - NULL); ;} - break; - - case 90: -#line 320 "pars0grm.y" - { (yyval) = pars_select_list( - (yyvsp[(1) - (3)]), static_cast<sym_node_t*>((yyvsp[(3) - (3)]))); ;} - break; - - case 91: -#line 322 "pars0grm.y" - { (yyval) = pars_select_list((yyvsp[(1) - (1)]), NULL); ;} - break; - - case 92: -#line 326 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 93: -#line 327 "pars0grm.y" - { (yyval) = (yyvsp[(2) - (2)]); ;} - break; - - case 94: -#line 331 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 95: -#line 333 "pars0grm.y" - { (yyval) = &pars_update_token; ;} - break; - - case 96: -#line 337 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 97: -#line 339 "pars0grm.y" - { (yyval) = &pars_share_token; ;} - break; - - case 98: -#line 343 "pars0grm.y" - { (yyval) = &pars_asc_token; ;} - break; - - case 99: -#line 344 "pars0grm.y" - { (yyval) = &pars_asc_token; ;} - break; - - case 100: -#line 345 "pars0grm.y" - { (yyval) = &pars_desc_token; ;} - break; - - case 101: -#line 349 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 102: -#line 351 "pars0grm.y" - { (yyval) = pars_order_by( - static_cast<sym_node_t*>((yyvsp[(3) - (4)])), - static_cast<pars_res_word_t*>((yyvsp[(4) - (4)]))); ;} - break; - - case 103: -#line 362 "pars0grm.y" - { (yyval) = pars_select_statement( - static_cast<sel_node_t*>((yyvsp[(2) - (8)])), - static_cast<sym_node_t*>((yyvsp[(4) - (8)])), - static_cast<que_node_t*>((yyvsp[(5) - (8)])), - static_cast<pars_res_word_t*>((yyvsp[(6) - (8)])), - static_cast<pars_res_word_t*>((yyvsp[(7) - (8)])), - static_cast<order_node_t*>((yyvsp[(8) - (8)]))); ;} - break; - - case 104: -#line 373 "pars0grm.y" - { (yyval) = (yyvsp[(3) - (3)]); ;} - break; - - case 105: -#line 378 "pars0grm.y" - { (yyval) = pars_insert_statement( - static_cast<sym_node_t*>((yyvsp[(1) - (5)])), (yyvsp[(4) - (5)]), NULL); ;} - break; - - case 106: -#line 381 "pars0grm.y" - { (yyval) = pars_insert_statement( - static_cast<sym_node_t*>((yyvsp[(1) - (2)])), - NULL, - static_cast<sel_node_t*>((yyvsp[(2) - (2)]))); ;} - break; - - case 107: -#line 388 "pars0grm.y" - { (yyval) = pars_column_assignment( - static_cast<sym_node_t*>((yyvsp[(1) - (3)])), - static_cast<que_node_t*>((yyvsp[(3) - (3)]))); ;} - break; - - case 108: -#line 394 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 109: -#line 396 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 110: -#line 402 "pars0grm.y" - { (yyval) = (yyvsp[(4) - (4)]); ;} - break; - - case 111: -#line 408 "pars0grm.y" - { (yyval) = pars_update_statement_start( - FALSE, - static_cast<sym_node_t*>((yyvsp[(2) - (4)])), - static_cast<col_assign_node_t*>((yyvsp[(4) - (4)]))); ;} - break; - - case 112: -#line 416 "pars0grm.y" - { (yyval) = pars_update_statement( - static_cast<upd_node_t*>((yyvsp[(1) - (2)])), - NULL, - static_cast<que_node_t*>((yyvsp[(2) - (2)]))); ;} - break; - - case 113: -#line 424 "pars0grm.y" - { (yyval) = pars_update_statement( - static_cast<upd_node_t*>((yyvsp[(1) - (2)])), - static_cast<sym_node_t*>((yyvsp[(2) - (2)])), - NULL); ;} - break; - - case 114: -#line 432 "pars0grm.y" - { (yyval) = pars_update_statement_start( - TRUE, - static_cast<sym_node_t*>((yyvsp[(3) - (3)])), NULL); ;} - break; - - case 115: -#line 439 "pars0grm.y" - { (yyval) = pars_update_statement( - static_cast<upd_node_t*>((yyvsp[(1) - (2)])), - NULL, - static_cast<que_node_t*>((yyvsp[(2) - (2)]))); ;} - break; - - case 116: -#line 447 "pars0grm.y" - { (yyval) = pars_update_statement( - static_cast<upd_node_t*>((yyvsp[(1) - (2)])), - static_cast<sym_node_t*>((yyvsp[(2) - (2)])), - NULL); ;} - break; - - case 117: -#line 455 "pars0grm.y" - { (yyval) = pars_row_printf_statement( - static_cast<sel_node_t*>((yyvsp[(2) - (2)]))); ;} - break; - - case 118: -#line 461 "pars0grm.y" - { (yyval) = pars_assignment_statement( - static_cast<sym_node_t*>((yyvsp[(1) - (3)])), - static_cast<que_node_t*>((yyvsp[(3) - (3)]))); ;} - break; - - case 119: -#line 469 "pars0grm.y" - { (yyval) = pars_elsif_element((yyvsp[(2) - (4)]), (yyvsp[(4) - (4)])); ;} - break; - - case 120: -#line 473 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 121: -#line 475 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); ;} - break; - - case 122: -#line 479 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 123: -#line 481 "pars0grm.y" - { (yyval) = (yyvsp[(2) - (2)]); ;} - break; - - case 124: -#line 482 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]); ;} - break; - - case 125: -#line 489 "pars0grm.y" - { (yyval) = pars_if_statement((yyvsp[(2) - (7)]), (yyvsp[(4) - (7)]), (yyvsp[(5) - (7)])); ;} - break; - - case 126: -#line 495 "pars0grm.y" - { (yyval) = pars_while_statement((yyvsp[(2) - (6)]), (yyvsp[(4) - (6)])); ;} - break; - - case 127: -#line 503 "pars0grm.y" - { (yyval) = pars_for_statement( - static_cast<sym_node_t*>((yyvsp[(2) - (10)])), - (yyvsp[(4) - (10)]), (yyvsp[(6) - (10)]), (yyvsp[(8) - (10)])); ;} - break; - - case 128: -#line 509 "pars0grm.y" - { (yyval) = pars_exit_statement(); ;} - break; - - case 129: -#line 513 "pars0grm.y" - { (yyval) = pars_return_statement(); ;} - break; - - case 130: -#line 518 "pars0grm.y" - { (yyval) = pars_open_statement( - ROW_SEL_OPEN_CURSOR, - static_cast<sym_node_t*>((yyvsp[(2) - (2)]))); ;} - break; - - case 131: -#line 525 "pars0grm.y" - { (yyval) = pars_open_statement( - ROW_SEL_CLOSE_CURSOR, - static_cast<sym_node_t*>((yyvsp[(2) - (2)]))); ;} - break; - - case 132: -#line 532 "pars0grm.y" - { (yyval) = pars_fetch_statement( - static_cast<sym_node_t*>((yyvsp[(2) - (4)])), - static_cast<sym_node_t*>((yyvsp[(4) - (4)])), NULL); ;} - break; - - case 133: -#line 536 "pars0grm.y" - { (yyval) = pars_fetch_statement( - static_cast<sym_node_t*>((yyvsp[(2) - (4)])), - NULL, - static_cast<sym_node_t*>((yyvsp[(4) - (4)]))); ;} - break; - - case 134: -#line 544 "pars0grm.y" - { (yyval) = pars_column_def( - static_cast<sym_node_t*>((yyvsp[(1) - (5)])), - static_cast<pars_res_word_t*>((yyvsp[(2) - (5)])), - static_cast<sym_node_t*>((yyvsp[(3) - (5)])), - (yyvsp[(4) - (5)]), (yyvsp[(5) - (5)])); ;} - break; - - case 135: -#line 552 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 136: -#line 554 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 137: -#line 558 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 138: -#line 560 "pars0grm.y" - { (yyval) = (yyvsp[(2) - (3)]); ;} - break; - - case 139: -#line 564 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 140: -#line 566 "pars0grm.y" - { (yyval) = &pars_int_token; - /* pass any non-NULL pointer */ ;} - break; - - case 141: -#line 571 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 142: -#line 573 "pars0grm.y" - { (yyval) = &pars_int_token; - /* pass any non-NULL pointer */ ;} - break; - - case 143: -#line 578 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 144: -#line 580 "pars0grm.y" - { (yyval) = &pars_int_token; - /* pass any non-NULL pointer */ ;} - break; - - case 145: -#line 585 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 146: -#line 586 "pars0grm.y" - { (yyval) = &pars_int_token; - /* pass any non-NULL pointer */ ;} - break; - - case 147: -#line 591 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 148: -#line 593 "pars0grm.y" - { (yyval) = (yyvsp[(3) - (3)]); ;} - break; - - case 149: -#line 600 "pars0grm.y" - { (yyval) = pars_create_table( - static_cast<sym_node_t*>((yyvsp[(3) - (9)])), - static_cast<sym_node_t*>((yyvsp[(5) - (9)])), - static_cast<sym_node_t*>((yyvsp[(8) - (9)])), - static_cast<sym_node_t*>((yyvsp[(9) - (9)])), (yyvsp[(7) - (9)])); ;} - break; - - case 150: -#line 608 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 151: -#line 610 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 152: -#line 614 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 153: -#line 615 "pars0grm.y" - { (yyval) = &pars_unique_token; ;} - break; - - case 154: -#line 619 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 155: -#line 620 "pars0grm.y" - { (yyval) = &pars_clustered_token; ;} - break; - - case 156: -#line 629 "pars0grm.y" - { (yyval) = pars_create_index( - static_cast<pars_res_word_t*>((yyvsp[(2) - (10)])), - static_cast<pars_res_word_t*>((yyvsp[(3) - (10)])), - static_cast<sym_node_t*>((yyvsp[(5) - (10)])), - static_cast<sym_node_t*>((yyvsp[(7) - (10)])), - static_cast<sym_node_t*>((yyvsp[(9) - (10)]))); ;} - break; - - case 157: -#line 638 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]); ;} - break; - - case 158: -#line 639 "pars0grm.y" - { (yyval) = (yyvsp[(1) - (1)]); ;} - break; - - case 159: -#line 644 "pars0grm.y" - { (yyval) = pars_commit_statement(); ;} - break; - - case 160: -#line 649 "pars0grm.y" - { (yyval) = pars_rollback_statement(); ;} - break; - - case 161: -#line 653 "pars0grm.y" - { (yyval) = &pars_int_token; ;} - break; - - case 162: -#line 654 "pars0grm.y" - { (yyval) = &pars_int_token; ;} - break; - - case 163: -#line 655 "pars0grm.y" - { (yyval) = &pars_bigint_token; ;} - break; - - case 164: -#line 656 "pars0grm.y" - { (yyval) = &pars_char_token; ;} - break; - - case 165: -#line 657 "pars0grm.y" - { (yyval) = &pars_binary_token; ;} - break; - - case 166: -#line 658 "pars0grm.y" - { (yyval) = &pars_blob_token; ;} - break; - - case 167: -#line 663 "pars0grm.y" - { (yyval) = pars_parameter_declaration( - static_cast<sym_node_t*>((yyvsp[(1) - (3)])), - PARS_INPUT, - static_cast<pars_res_word_t*>((yyvsp[(3) - (3)]))); ;} - break; - - case 168: -#line 668 "pars0grm.y" - { (yyval) = pars_parameter_declaration( - static_cast<sym_node_t*>((yyvsp[(1) - (3)])), - PARS_OUTPUT, - static_cast<pars_res_word_t*>((yyvsp[(3) - (3)]))); ;} - break; - - case 169: -#line 675 "pars0grm.y" - { (yyval) = NULL; ;} - break; - - case 170: -#line 676 "pars0grm.y" - { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;} - break; - - case 171: -#line 678 "pars0grm.y" - { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;} - break; - - case 172: -#line 683 "pars0grm.y" - { (yyval) = pars_variable_declaration( - static_cast<sym_node_t*>((yyvsp[(1) - (3)])), - static_cast<pars_res_word_t*>((yyvsp[(2) - (3)]))); ;} - break; - - case 176: -#line 697 "pars0grm.y" - { (yyval) = pars_cursor_declaration( - static_cast<sym_node_t*>((yyvsp[(3) - (6)])), - static_cast<sel_node_t*>((yyvsp[(5) - (6)]))); ;} - break; - - case 177: -#line 704 "pars0grm.y" - { (yyval) = pars_function_declaration( - static_cast<sym_node_t*>((yyvsp[(3) - (4)]))); ;} - break; - - case 183: -#line 726 "pars0grm.y" - { (yyval) = pars_procedure_definition( - static_cast<sym_node_t*>((yyvsp[(2) - (11)])), - static_cast<sym_node_t*>((yyvsp[(4) - (11)])), - (yyvsp[(10) - (11)])); ;} - break; - - -/* Line 1267 of yacc.c. */ -#line 2826 "pars0grm.cc" - default: break; - } - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); - - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - - *++yyvsp = yyval; - - - /* Now `shift' the result of the reduction. Determine what state - that goes to, based on the state we popped back to and the rule - number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; - if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTOKENS]; - - goto yynewstate; - - -/*------------------------------------. -| yyerrlab -- here on detecting error | -`------------------------------------*/ -yyerrlab: - /* If not already recovering from an error, report this error. */ - if (!yyerrstatus) - { - ++yynerrs; -#if ! YYERROR_VERBOSE - yyerror (YY_("syntax error")); -#else - { - YYSIZE_T yysize = yysyntax_error (0, yystate, yychar); - if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) - { - YYSIZE_T yyalloc = 2 * yysize; - if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) - yyalloc = YYSTACK_ALLOC_MAXIMUM; - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char*) YYSTACK_ALLOC (yyalloc); - if (yymsg) - yymsg_alloc = yyalloc; - else - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - } - } - - if (0 < yysize && yysize <= yymsg_alloc) - { - (void) yysyntax_error (yymsg, yystate, yychar); - yyerror (yymsg); - } - else - { - yyerror (YY_("syntax error")); - if (yysize != 0) - goto yyexhaustedlab; - } - } -#endif - } - - - - if (yyerrstatus == 3) - { - /* If just tried and failed to reuse look-ahead token after an - error, discard it. */ - - if (yychar <= YYEOF) - { - /* Return failure if at end of input. */ - if (yychar == YYEOF) - YYABORT; - } - else - { - yydestruct ("Error: discarding", - yytoken, &yylval); - yychar = YYEMPTY; - } - } - - /* Else will try to reuse look-ahead token after shifting the error - token. */ - goto yyerrlab1; - - -/*---------------------------------------------------. -| yyerrorlab -- error raised explicitly by YYERROR. | -`---------------------------------------------------*/ -yyerrorlab: - - /* Pacify compilers like GCC when the user code never invokes - YYERROR and the label yyerrorlab therefore never appears in user - code. */ - if (/*CONSTCOND*/ 0) - goto yyerrorlab; - - /* Do not reclaim the symbols of the rule which action triggered - this YYERROR. */ - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - yystate = *yyssp; - goto yyerrlab1; - - -/*-------------------------------------------------------------. -| yyerrlab1 -- common code for both syntax error and YYERROR. | -`-------------------------------------------------------------*/ -yyerrlab1: - yyerrstatus = 3; /* Each real token shifted decrements this. */ - - for (;;) - { - yyn = yypact[yystate]; - if (yyn != YYPACT_NINF) - { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) - { - yyn = yytable[yyn]; - if (0 < yyn) - break; - } - } - - /* Pop the current state because it cannot handle the error token. */ - if (yyssp == yyss) - YYABORT; - - - yydestruct ("Error: popping", - yystos[yystate], yyvsp); - YYPOPSTACK (1); - yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); - } - - if (yyn == YYFINAL) - YYACCEPT; - - *++yyvsp = yylval; - - - /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); - - yystate = yyn; - goto yynewstate; - - -/*-------------------------------------. -| yyacceptlab -- YYACCEPT comes here. | -`-------------------------------------*/ -yyacceptlab: - yyresult = 0; - goto yyreturn; - -/*-----------------------------------. -| yyabortlab -- YYABORT comes here. | -`-----------------------------------*/ -yyabortlab: - yyresult = 1; - goto yyreturn; - -#ifndef yyoverflow -/*-------------------------------------------------. -| yyexhaustedlab -- memory exhaustion comes here. | -`-------------------------------------------------*/ -yyexhaustedlab: - yyerror (YY_("memory exhausted")); - yyresult = 2; - /* Fall through. */ -#endif - -yyreturn: - if (yychar != YYEOF && yychar != YYEMPTY) - yydestruct ("Cleanup: discarding lookahead", - yytoken, &yylval); - /* Do not reclaim the symbols of the rule which action triggered - this YYABORT or YYACCEPT. */ - YYPOPSTACK (yylen); - YY_STACK_PRINT (yyss, yyssp); - while (yyssp != yyss) - { - yydestruct ("Cleanup: popping", - yystos[*yyssp], yyvsp); - YYPOPSTACK (1); - } -#ifndef yyoverflow - if (yyss != yyssa) - YYSTACK_FREE (yyss); -#endif -#if YYERROR_VERBOSE - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); -#endif - /* Make sure YYID is used. */ - return YYID (yyresult); -} - - -#line 732 "pars0grm.y" - - diff --git a/storage/xtradb/pars/pars0grm.y b/storage/xtradb/pars/pars0grm.y deleted file mode 100644 index 60913287cc4..00000000000 --- a/storage/xtradb/pars/pars0grm.y +++ /dev/null @@ -1,732 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/****************************************************** -SQL parser: input file for the GNU Bison parser generator - -Look from pars0lex.l for instructions how to generate the C files for -the InnoDB parser. - -Created 12/14/1997 Heikki Tuuri -*******************************************************/ - -%{ -/* The value of the semantic attribute is a pointer to a query tree node -que_node_t */ - -#include "univ.i" -#include <math.h> /* Can't be before univ.i */ -#include "pars0pars.h" -#include "mem0mem.h" -#include "que0types.h" -#include "que0que.h" -#include "row0sel.h" - -#define YYSTYPE que_node_t* - -/* #define __STDC__ */ - -int -yylex(void); -%} - -%token PARS_INT_LIT -%token PARS_FLOAT_LIT -%token PARS_STR_LIT -%token PARS_FIXBINARY_LIT -%token PARS_BLOB_LIT -%token PARS_NULL_LIT -%token PARS_ID_TOKEN -%token PARS_AND_TOKEN -%token PARS_OR_TOKEN -%token PARS_NOT_TOKEN -%token PARS_GE_TOKEN -%token PARS_LE_TOKEN -%token PARS_NE_TOKEN -%token PARS_PROCEDURE_TOKEN -%token PARS_IN_TOKEN -%token PARS_OUT_TOKEN -%token PARS_BINARY_TOKEN -%token PARS_BLOB_TOKEN -%token PARS_INT_TOKEN -%token PARS_INTEGER_TOKEN -%token PARS_FLOAT_TOKEN -%token PARS_CHAR_TOKEN -%token PARS_IS_TOKEN -%token PARS_BEGIN_TOKEN -%token PARS_END_TOKEN -%token PARS_IF_TOKEN -%token PARS_THEN_TOKEN -%token PARS_ELSE_TOKEN -%token PARS_ELSIF_TOKEN -%token PARS_LOOP_TOKEN -%token PARS_WHILE_TOKEN -%token PARS_RETURN_TOKEN -%token PARS_SELECT_TOKEN -%token PARS_SUM_TOKEN -%token PARS_COUNT_TOKEN -%token PARS_DISTINCT_TOKEN -%token PARS_FROM_TOKEN -%token PARS_WHERE_TOKEN -%token PARS_FOR_TOKEN -%token PARS_DDOT_TOKEN -%token PARS_READ_TOKEN -%token PARS_ORDER_TOKEN -%token PARS_BY_TOKEN -%token PARS_ASC_TOKEN -%token PARS_DESC_TOKEN -%token PARS_INSERT_TOKEN -%token PARS_INTO_TOKEN -%token PARS_VALUES_TOKEN -%token PARS_UPDATE_TOKEN -%token PARS_SET_TOKEN -%token PARS_DELETE_TOKEN -%token PARS_CURRENT_TOKEN -%token PARS_OF_TOKEN -%token PARS_CREATE_TOKEN -%token PARS_TABLE_TOKEN -%token PARS_INDEX_TOKEN -%token PARS_UNIQUE_TOKEN -%token PARS_CLUSTERED_TOKEN -%token PARS_DOES_NOT_FIT_IN_MEM_TOKEN -%token PARS_ON_TOKEN -%token PARS_ASSIGN_TOKEN -%token PARS_DECLARE_TOKEN -%token PARS_CURSOR_TOKEN -%token PARS_SQL_TOKEN -%token PARS_OPEN_TOKEN -%token PARS_FETCH_TOKEN -%token PARS_CLOSE_TOKEN -%token PARS_NOTFOUND_TOKEN -%token PARS_TO_CHAR_TOKEN -%token PARS_TO_NUMBER_TOKEN -%token PARS_TO_BINARY_TOKEN -%token PARS_BINARY_TO_NUMBER_TOKEN -%token PARS_SUBSTR_TOKEN -%token PARS_REPLSTR_TOKEN -%token PARS_CONCAT_TOKEN -%token PARS_INSTR_TOKEN -%token PARS_LENGTH_TOKEN -%token PARS_SYSDATE_TOKEN -%token PARS_PRINTF_TOKEN -%token PARS_ASSERT_TOKEN -%token PARS_RND_TOKEN -%token PARS_RND_STR_TOKEN -%token PARS_ROW_PRINTF_TOKEN -%token PARS_COMMIT_TOKEN -%token PARS_ROLLBACK_TOKEN -%token PARS_WORK_TOKEN -%token PARS_UNSIGNED_TOKEN -%token PARS_EXIT_TOKEN -%token PARS_FUNCTION_TOKEN -%token PARS_LOCK_TOKEN -%token PARS_SHARE_TOKEN -%token PARS_MODE_TOKEN -%token PARS_LIKE_TOKEN -%token PARS_LIKE_TOKEN_EXACT -%token PARS_LIKE_TOKEN_PREFIX -%token PARS_LIKE_TOKEN_SUFFIX -%token PARS_LIKE_TOKEN_SUBSTR -%token PARS_TABLE_NAME_TOKEN -%token PARS_COMPACT_TOKEN -%token PARS_BLOCK_SIZE_TOKEN -%token PARS_BIGINT_TOKEN - -%left PARS_AND_TOKEN PARS_OR_TOKEN -%left PARS_NOT_TOKEN -%left '=' '<' '>' PARS_GE_TOKEN PARS_LE_TOKEN -%left '-' '+' -%left '*' '/' -%left NEG /* negation--unary minus */ -%left '%' - -/* Grammar follows */ -%% - -top_statement: - procedure_definition ';' - -statement: - stored_procedure_call - | predefined_procedure_call ';' - | while_statement ';' - | for_statement ';' - | exit_statement ';' - | if_statement ';' - | return_statement ';' - | assignment_statement ';' - | select_statement ';' - | insert_statement ';' - | row_printf_statement ';' - | delete_statement_searched ';' - | delete_statement_positioned ';' - | update_statement_searched ';' - | update_statement_positioned ';' - | open_cursor_statement ';' - | fetch_statement ';' - | close_cursor_statement ';' - | commit_statement ';' - | rollback_statement ';' - | create_table ';' - | create_index ';' -; - -statement_list: - statement { $$ = que_node_list_add_last(NULL, $1); } - | statement_list statement - { $$ = que_node_list_add_last($1, $2); } -; - -exp: - PARS_ID_TOKEN { $$ = $1;} - | function_name '(' exp_list ')' - { $$ = pars_func($1, $3); } - | PARS_INT_LIT { $$ = $1;} - | PARS_FLOAT_LIT { $$ = $1;} - | PARS_STR_LIT { $$ = $1;} - | PARS_FIXBINARY_LIT { $$ = $1;} - | PARS_BLOB_LIT { $$ = $1;} - | PARS_NULL_LIT { $$ = $1;} - | PARS_SQL_TOKEN { $$ = $1;} - | exp '+' exp { $$ = pars_op('+', $1, $3); } - | exp '-' exp { $$ = pars_op('-', $1, $3); } - | exp '*' exp { $$ = pars_op('*', $1, $3); } - | exp '/' exp { $$ = pars_op('/', $1, $3); } - | '-' exp %prec NEG { $$ = pars_op('-', $2, NULL); } - | '(' exp ')' { $$ = $2; } - | exp '=' exp { $$ = pars_op('=', $1, $3); } - | exp PARS_LIKE_TOKEN PARS_STR_LIT - { $$ = pars_op(PARS_LIKE_TOKEN, $1, $3); } - | exp '<' exp { $$ = pars_op('<', $1, $3); } - | exp '>' exp { $$ = pars_op('>', $1, $3); } - | exp PARS_GE_TOKEN exp { $$ = pars_op(PARS_GE_TOKEN, $1, $3); } - | exp PARS_LE_TOKEN exp { $$ = pars_op(PARS_LE_TOKEN, $1, $3); } - | exp PARS_NE_TOKEN exp { $$ = pars_op(PARS_NE_TOKEN, $1, $3); } - | exp PARS_AND_TOKEN exp{ $$ = pars_op(PARS_AND_TOKEN, $1, $3); } - | exp PARS_OR_TOKEN exp { $$ = pars_op(PARS_OR_TOKEN, $1, $3); } - | PARS_NOT_TOKEN exp { $$ = pars_op(PARS_NOT_TOKEN, $2, NULL); } - | PARS_ID_TOKEN '%' PARS_NOTFOUND_TOKEN - { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); } - | PARS_SQL_TOKEN '%' PARS_NOTFOUND_TOKEN - { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); } -; - -function_name: - PARS_TO_CHAR_TOKEN { $$ = &pars_to_char_token; } - | PARS_TO_NUMBER_TOKEN { $$ = &pars_to_number_token; } - | PARS_TO_BINARY_TOKEN { $$ = &pars_to_binary_token; } - | PARS_BINARY_TO_NUMBER_TOKEN - { $$ = &pars_binary_to_number_token; } - | PARS_SUBSTR_TOKEN { $$ = &pars_substr_token; } - | PARS_CONCAT_TOKEN { $$ = &pars_concat_token; } - | PARS_INSTR_TOKEN { $$ = &pars_instr_token; } - | PARS_LENGTH_TOKEN { $$ = &pars_length_token; } - | PARS_SYSDATE_TOKEN { $$ = &pars_sysdate_token; } - | PARS_RND_TOKEN { $$ = &pars_rnd_token; } - | PARS_RND_STR_TOKEN { $$ = &pars_rnd_str_token; } -; - -question_mark_list: - /* Nothing */ - | '?' - | question_mark_list ',' '?' -; - -stored_procedure_call: - '{' PARS_ID_TOKEN '(' question_mark_list ')' '}' - { $$ = pars_stored_procedure_call( - static_cast<sym_node_t*>($2)); } -; - -predefined_procedure_call: - predefined_procedure_name '(' exp_list ')' - { $$ = pars_procedure_call($1, $3); } -; - -predefined_procedure_name: - PARS_REPLSTR_TOKEN { $$ = &pars_replstr_token; } - | PARS_PRINTF_TOKEN { $$ = &pars_printf_token; } - | PARS_ASSERT_TOKEN { $$ = &pars_assert_token; } -; - -user_function_call: - PARS_ID_TOKEN '(' ')' { $$ = $1; } -; - -table_list: - table_name { $$ = que_node_list_add_last(NULL, $1); } - | table_list ',' table_name - { $$ = que_node_list_add_last($1, $3); } -; - -variable_list: - /* Nothing */ { $$ = NULL; } - | PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); } - | variable_list ',' PARS_ID_TOKEN - { $$ = que_node_list_add_last($1, $3); } -; - -exp_list: - /* Nothing */ { $$ = NULL; } - | exp { $$ = que_node_list_add_last(NULL, $1);} - | exp_list ',' exp { $$ = que_node_list_add_last($1, $3); } -; - -select_item: - exp { $$ = $1; } - | PARS_COUNT_TOKEN '(' '*' ')' - { $$ = pars_func(&pars_count_token, - que_node_list_add_last(NULL, - sym_tab_add_int_lit( - pars_sym_tab_global, 1))); } - | PARS_COUNT_TOKEN '(' PARS_DISTINCT_TOKEN PARS_ID_TOKEN ')' - { $$ = pars_func(&pars_count_token, - que_node_list_add_last(NULL, - pars_func(&pars_distinct_token, - que_node_list_add_last( - NULL, $4)))); } - | PARS_SUM_TOKEN '(' exp ')' - { $$ = pars_func(&pars_sum_token, - que_node_list_add_last(NULL, - $3)); } -; - -select_item_list: - /* Nothing */ { $$ = NULL; } - | select_item { $$ = que_node_list_add_last(NULL, $1); } - | select_item_list ',' select_item - { $$ = que_node_list_add_last($1, $3); } -; - -select_list: - '*' { $$ = pars_select_list(&pars_star_denoter, - NULL); } - | select_item_list PARS_INTO_TOKEN variable_list - { $$ = pars_select_list( - $1, static_cast<sym_node_t*>($3)); } - | select_item_list { $$ = pars_select_list($1, NULL); } -; - -search_condition: - /* Nothing */ { $$ = NULL; } - | PARS_WHERE_TOKEN exp { $$ = $2; } -; - -for_update_clause: - /* Nothing */ { $$ = NULL; } - | PARS_FOR_TOKEN PARS_UPDATE_TOKEN - { $$ = &pars_update_token; } -; - -lock_shared_clause: - /* Nothing */ { $$ = NULL; } - | PARS_LOCK_TOKEN PARS_IN_TOKEN PARS_SHARE_TOKEN PARS_MODE_TOKEN - { $$ = &pars_share_token; } -; - -order_direction: - /* Nothing */ { $$ = &pars_asc_token; } - | PARS_ASC_TOKEN { $$ = &pars_asc_token; } - | PARS_DESC_TOKEN { $$ = &pars_desc_token; } -; - -order_by_clause: - /* Nothing */ { $$ = NULL; } - | PARS_ORDER_TOKEN PARS_BY_TOKEN PARS_ID_TOKEN order_direction - { $$ = pars_order_by( - static_cast<sym_node_t*>($3), - static_cast<pars_res_word_t*>($4)); } -; - -select_statement: - PARS_SELECT_TOKEN select_list - PARS_FROM_TOKEN table_list - search_condition - for_update_clause - lock_shared_clause - order_by_clause { $$ = pars_select_statement( - static_cast<sel_node_t*>($2), - static_cast<sym_node_t*>($4), - static_cast<que_node_t*>($5), - static_cast<pars_res_word_t*>($6), - static_cast<pars_res_word_t*>($7), - static_cast<order_node_t*>($8)); } -; - -insert_statement_start: - PARS_INSERT_TOKEN PARS_INTO_TOKEN - table_name { $$ = $3; } -; - -insert_statement: - insert_statement_start PARS_VALUES_TOKEN '(' exp_list ')' - { $$ = pars_insert_statement( - static_cast<sym_node_t*>($1), $4, NULL); } - | insert_statement_start select_statement - { $$ = pars_insert_statement( - static_cast<sym_node_t*>($1), - NULL, - static_cast<sel_node_t*>($2)); } -; - -column_assignment: - PARS_ID_TOKEN '=' exp { $$ = pars_column_assignment( - static_cast<sym_node_t*>($1), - static_cast<que_node_t*>($3)); } -; - -column_assignment_list: - column_assignment { $$ = que_node_list_add_last(NULL, $1); } - | column_assignment_list ',' column_assignment - { $$ = que_node_list_add_last($1, $3); } -; - -cursor_positioned: - PARS_WHERE_TOKEN - PARS_CURRENT_TOKEN PARS_OF_TOKEN - PARS_ID_TOKEN { $$ = $4; } -; - -update_statement_start: - PARS_UPDATE_TOKEN table_name - PARS_SET_TOKEN - column_assignment_list { $$ = pars_update_statement_start( - FALSE, - static_cast<sym_node_t*>($2), - static_cast<col_assign_node_t*>($4)); } -; - -update_statement_searched: - update_statement_start - search_condition { $$ = pars_update_statement( - static_cast<upd_node_t*>($1), - NULL, - static_cast<que_node_t*>($2)); } -; - -update_statement_positioned: - update_statement_start - cursor_positioned { $$ = pars_update_statement( - static_cast<upd_node_t*>($1), - static_cast<sym_node_t*>($2), - NULL); } -; - -delete_statement_start: - PARS_DELETE_TOKEN PARS_FROM_TOKEN - table_name { $$ = pars_update_statement_start( - TRUE, - static_cast<sym_node_t*>($3), NULL); } -; - -delete_statement_searched: - delete_statement_start - search_condition { $$ = pars_update_statement( - static_cast<upd_node_t*>($1), - NULL, - static_cast<que_node_t*>($2)); } -; - -delete_statement_positioned: - delete_statement_start - cursor_positioned { $$ = pars_update_statement( - static_cast<upd_node_t*>($1), - static_cast<sym_node_t*>($2), - NULL); } -; - -row_printf_statement: - PARS_ROW_PRINTF_TOKEN select_statement - { $$ = pars_row_printf_statement( - static_cast<sel_node_t*>($2)); } -; - -assignment_statement: - PARS_ID_TOKEN PARS_ASSIGN_TOKEN exp - { $$ = pars_assignment_statement( - static_cast<sym_node_t*>($1), - static_cast<que_node_t*>($3)); } -; - -elsif_element: - PARS_ELSIF_TOKEN - exp PARS_THEN_TOKEN statement_list - { $$ = pars_elsif_element($2, $4); } -; - -elsif_list: - elsif_element { $$ = que_node_list_add_last(NULL, $1); } - | elsif_list elsif_element - { $$ = que_node_list_add_last($1, $2); } -; - -else_part: - /* Nothing */ { $$ = NULL; } - | PARS_ELSE_TOKEN statement_list - { $$ = $2; } - | elsif_list { $$ = $1; } -; - -if_statement: - PARS_IF_TOKEN exp PARS_THEN_TOKEN statement_list - else_part - PARS_END_TOKEN PARS_IF_TOKEN - { $$ = pars_if_statement($2, $4, $5); } -; - -while_statement: - PARS_WHILE_TOKEN exp PARS_LOOP_TOKEN statement_list - PARS_END_TOKEN PARS_LOOP_TOKEN - { $$ = pars_while_statement($2, $4); } -; - -for_statement: - PARS_FOR_TOKEN PARS_ID_TOKEN PARS_IN_TOKEN - exp PARS_DDOT_TOKEN exp - PARS_LOOP_TOKEN statement_list - PARS_END_TOKEN PARS_LOOP_TOKEN - { $$ = pars_for_statement( - static_cast<sym_node_t*>($2), - $4, $6, $8); } -; - -exit_statement: - PARS_EXIT_TOKEN { $$ = pars_exit_statement(); } -; - -return_statement: - PARS_RETURN_TOKEN { $$ = pars_return_statement(); } -; - -open_cursor_statement: - PARS_OPEN_TOKEN PARS_ID_TOKEN - { $$ = pars_open_statement( - ROW_SEL_OPEN_CURSOR, - static_cast<sym_node_t*>($2)); } -; - -close_cursor_statement: - PARS_CLOSE_TOKEN PARS_ID_TOKEN - { $$ = pars_open_statement( - ROW_SEL_CLOSE_CURSOR, - static_cast<sym_node_t*>($2)); } -; - -fetch_statement: - PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN variable_list - { $$ = pars_fetch_statement( - static_cast<sym_node_t*>($2), - static_cast<sym_node_t*>($4), NULL); } - | PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN user_function_call - { $$ = pars_fetch_statement( - static_cast<sym_node_t*>($2), - NULL, - static_cast<sym_node_t*>($4)); } -; - -column_def: - PARS_ID_TOKEN type_name opt_column_len opt_unsigned opt_not_null - { $$ = pars_column_def( - static_cast<sym_node_t*>($1), - static_cast<pars_res_word_t*>($2), - static_cast<sym_node_t*>($3), - $4, $5); } -; - -column_def_list: - column_def { $$ = que_node_list_add_last(NULL, $1); } - | column_def_list ',' column_def - { $$ = que_node_list_add_last($1, $3); } -; - -opt_column_len: - /* Nothing */ { $$ = NULL; } - | '(' PARS_INT_LIT ')' - { $$ = $2; } -; - -opt_unsigned: - /* Nothing */ { $$ = NULL; } - | PARS_UNSIGNED_TOKEN - { $$ = &pars_int_token; - /* pass any non-NULL pointer */ } -; - -opt_not_null: - /* Nothing */ { $$ = NULL; } - | PARS_NOT_TOKEN PARS_NULL_LIT - { $$ = &pars_int_token; - /* pass any non-NULL pointer */ } -; - -not_fit_in_memory: - /* Nothing */ { $$ = NULL; } - | PARS_DOES_NOT_FIT_IN_MEM_TOKEN - { $$ = &pars_int_token; - /* pass any non-NULL pointer */ } -; - -compact: - /* Nothing */ { $$ = NULL; } - | PARS_COMPACT_TOKEN { $$ = &pars_int_token; - /* pass any non-NULL pointer */ } -; - -block_size: - /* Nothing */ { $$ = NULL; } - | PARS_BLOCK_SIZE_TOKEN '=' PARS_INT_LIT - { $$ = $3; } -; - -create_table: - PARS_CREATE_TOKEN PARS_TABLE_TOKEN - table_name '(' column_def_list ')' - not_fit_in_memory compact block_size - { $$ = pars_create_table( - static_cast<sym_node_t*>($3), - static_cast<sym_node_t*>($5), - static_cast<sym_node_t*>($8), - static_cast<sym_node_t*>($9), $7); } -; - -column_list: - PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); } - | column_list ',' PARS_ID_TOKEN - { $$ = que_node_list_add_last($1, $3); } -; - -unique_def: - /* Nothing */ { $$ = NULL; } - | PARS_UNIQUE_TOKEN { $$ = &pars_unique_token; } -; - -clustered_def: - /* Nothing */ { $$ = NULL; } - | PARS_CLUSTERED_TOKEN { $$ = &pars_clustered_token; } -; - -create_index: - PARS_CREATE_TOKEN unique_def - clustered_def - PARS_INDEX_TOKEN - PARS_ID_TOKEN PARS_ON_TOKEN - table_name - '(' column_list ')' { $$ = pars_create_index( - static_cast<pars_res_word_t*>($2), - static_cast<pars_res_word_t*>($3), - static_cast<sym_node_t*>($5), - static_cast<sym_node_t*>($7), - static_cast<sym_node_t*>($9)); } -; - -table_name: - PARS_ID_TOKEN { $$ = $1; } - | PARS_TABLE_NAME_TOKEN { $$ = $1; } -; - -commit_statement: - PARS_COMMIT_TOKEN PARS_WORK_TOKEN - { $$ = pars_commit_statement(); } -; - -rollback_statement: - PARS_ROLLBACK_TOKEN PARS_WORK_TOKEN - { $$ = pars_rollback_statement(); } -; - -type_name: - PARS_INT_TOKEN { $$ = &pars_int_token; } - | PARS_INTEGER_TOKEN { $$ = &pars_int_token; } - | PARS_BIGINT_TOKEN { $$ = &pars_bigint_token; } - | PARS_CHAR_TOKEN { $$ = &pars_char_token; } - | PARS_BINARY_TOKEN { $$ = &pars_binary_token; } - | PARS_BLOB_TOKEN { $$ = &pars_blob_token; } -; - -parameter_declaration: - PARS_ID_TOKEN PARS_IN_TOKEN type_name - { $$ = pars_parameter_declaration( - static_cast<sym_node_t*>($1), - PARS_INPUT, - static_cast<pars_res_word_t*>($3)); } - | PARS_ID_TOKEN PARS_OUT_TOKEN type_name - { $$ = pars_parameter_declaration( - static_cast<sym_node_t*>($1), - PARS_OUTPUT, - static_cast<pars_res_word_t*>($3)); } -; - -parameter_declaration_list: - /* Nothing */ { $$ = NULL; } - | parameter_declaration { $$ = que_node_list_add_last(NULL, $1); } - | parameter_declaration_list ',' parameter_declaration - { $$ = que_node_list_add_last($1, $3); } -; - -variable_declaration: - PARS_ID_TOKEN type_name ';' - { $$ = pars_variable_declaration( - static_cast<sym_node_t*>($1), - static_cast<pars_res_word_t*>($2)); } -; - -variable_declaration_list: - /* Nothing */ - | variable_declaration - | variable_declaration_list variable_declaration -; - -cursor_declaration: - PARS_DECLARE_TOKEN PARS_CURSOR_TOKEN PARS_ID_TOKEN - PARS_IS_TOKEN select_statement ';' - { $$ = pars_cursor_declaration( - static_cast<sym_node_t*>($3), - static_cast<sel_node_t*>($5)); } -; - -function_declaration: - PARS_DECLARE_TOKEN PARS_FUNCTION_TOKEN PARS_ID_TOKEN ';' - { $$ = pars_function_declaration( - static_cast<sym_node_t*>($3)); } -; - -declaration: - cursor_declaration - | function_declaration -; - -declaration_list: - /* Nothing */ - | declaration - | declaration_list declaration -; - -procedure_definition: - PARS_PROCEDURE_TOKEN PARS_ID_TOKEN '(' parameter_declaration_list ')' - PARS_IS_TOKEN - variable_declaration_list - declaration_list - PARS_BEGIN_TOKEN - statement_list - PARS_END_TOKEN { $$ = pars_procedure_definition( - static_cast<sym_node_t*>($2), - static_cast<sym_node_t*>($4), - $10); } -; - -%% diff --git a/storage/xtradb/pars/pars0lex.l b/storage/xtradb/pars/pars0lex.l deleted file mode 100644 index f800410fa3f..00000000000 --- a/storage/xtradb/pars/pars0lex.l +++ /dev/null @@ -1,706 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/****************************************************** -SQL parser lexical analyzer: input file for the GNU Flex lexer generator - -The InnoDB parser is frozen because MySQL takes care of SQL parsing. -Therefore we normally keep the InnoDB parser C files as they are, and do -not automatically generate them from pars0grm.y and pars0lex.l. - -How to make the InnoDB parser and lexer C files: - -1. Run ./make_flex.sh to generate lexer files. - -2. Run ./make_bison.sh to generate parser files. - -These instructions seem to work at least with bison-1.875d and flex-2.5.31 on -Linux. - -Created 12/14/1997 Heikki Tuuri -*******************************************************/ - -%option nostdinit -%option 8bit -%option warn -%option pointer -%option never-interactive -%option nodefault -%option noinput -%option nounput -%option noyywrap -%option noyy_scan_buffer -%option noyy_scan_bytes -%option noyy_scan_string -%option nounistd - -%{ -#define YYSTYPE que_node_t* - -#include "univ.i" -#include "pars0pars.h" -#include "pars0grm.h" -#include "pars0sym.h" -#include "mem0mem.h" -#include "os0proc.h" - -#define malloc(A) ut_malloc(A) -#define free(A) ut_free(A) -#define realloc(P, A) ut_realloc(P, A) -#define exit(A) ut_error - -/* Note: We cast &result to int* from yysize_t* */ -#define YY_INPUT(buf, result, max_size) \ - pars_get_lex_chars(buf, (int*) &result, max_size) - -/* String buffer for removing quotes */ -static ulint stringbuf_len_alloc = 0; /* Allocated length */ -static ulint stringbuf_len = 0; /* Current length */ -static char* stringbuf; /* Start of buffer */ -/** Appends a string to the buffer. */ -static -void -string_append( -/*==========*/ - const char* str, /*!< in: string to be appended */ - ulint len) /*!< in: length of the string */ -{ - if (stringbuf == NULL) { - stringbuf = static_cast<char*>(malloc(1)); - stringbuf_len_alloc = 1; - } - - if (stringbuf_len + len > stringbuf_len_alloc) { - while (stringbuf_len + len > stringbuf_len_alloc) { - stringbuf_len_alloc <<= 1; - } - - stringbuf = static_cast<char*>( - realloc(stringbuf, stringbuf_len_alloc)); - } - - memcpy(stringbuf + stringbuf_len, str, len); - stringbuf_len += len; -} - -%} - -DIGIT [0-9] -ID [a-z_A-Z][a-z_A-Z0-9]* -TABLE_NAME [a-z_A-Z][@a-z_A-Z0-9]*\/(#sql-|[a-z_A-Z])[a-z_A-Z0-9]* -BOUND_LIT \:[a-z_A-Z0-9]+ -BOUND_ID \$[a-z_A-Z0-9]+ - -%x comment -%x quoted -%x id -%% - -{DIGIT}+ { - yylval = sym_tab_add_int_lit(pars_sym_tab_global, - atoi(yytext)); - return(PARS_INT_LIT); -} - -{DIGIT}+"."{DIGIT}* { - ut_error; /* not implemented */ - - return(PARS_FLOAT_LIT); -} - -{BOUND_LIT} { - ulint type; - - yylval = sym_tab_add_bound_lit(pars_sym_tab_global, - yytext + 1, &type); - - return((int) type); -} - -{BOUND_ID} { - yylval = sym_tab_add_bound_id(pars_sym_tab_global, - yytext + 1); - - return(PARS_ID_TOKEN); -} - -"'" { -/* Quoted character string literals are handled in an explicit -start state 'quoted'. This state is entered and the buffer for -the scanned string is emptied upon encountering a starting quote. - -In the state 'quoted', only two actions are possible (defined below). */ - BEGIN(quoted); - stringbuf_len = 0; -} -<quoted>[^\']+ { - /* Got a sequence of characters other than "'": - append to string buffer */ - string_append(yytext, yyleng); -} -<quoted>"'"+ { - /* Got a sequence of "'" characters: - append half of them to string buffer, - as "''" represents a single "'". - We apply truncating division, - so that "'''" will result in "'". */ - - string_append(yytext, yyleng / 2); - - /* If we got an odd number of quotes, then the - last quote we got is the terminating quote. - At the end of the string, we return to the - initial start state and report the scanned - string literal. */ - - if (yyleng % 2) { - BEGIN(INITIAL); - yylval = sym_tab_add_str_lit( - pars_sym_tab_global, - (byte*) stringbuf, stringbuf_len); - return(PARS_STR_LIT); - } -} - -\" { -/* Quoted identifiers are handled in an explicit start state 'id'. -This state is entered and the buffer for the scanned string is emptied -upon encountering a starting quote. - -In the state 'id', only two actions are possible (defined below). */ - BEGIN(id); - stringbuf_len = 0; -} -<id>[^\"]+ { - /* Got a sequence of characters other than '"': - append to string buffer */ - string_append(yytext, yyleng); -} -<id>\"+ { - /* Got a sequence of '"' characters: - append half of them to string buffer, - as '""' represents a single '"'. - We apply truncating division, - so that '"""' will result in '"'. */ - - string_append(yytext, yyleng / 2); - - /* If we got an odd number of quotes, then the - last quote we got is the terminating quote. - At the end of the string, we return to the - initial start state and report the scanned - identifier. */ - - if (yyleng % 2) { - BEGIN(INITIAL); - yylval = sym_tab_add_id( - pars_sym_tab_global, - (byte*) stringbuf, stringbuf_len); - - return(PARS_ID_TOKEN); - } -} - -"NULL" { - yylval = sym_tab_add_null_lit(pars_sym_tab_global); - - return(PARS_NULL_LIT); -} - -"SQL" { - /* Implicit cursor name */ - yylval = sym_tab_add_str_lit(pars_sym_tab_global, - (byte*) yytext, yyleng); - return(PARS_SQL_TOKEN); -} - -"AND" { - return(PARS_AND_TOKEN); -} - -"OR" { - return(PARS_OR_TOKEN); -} - -"NOT" { - return(PARS_NOT_TOKEN); -} - -"PROCEDURE" { - return(PARS_PROCEDURE_TOKEN); -} - -"IN" { - return(PARS_IN_TOKEN); -} - -"OUT" { - return(PARS_OUT_TOKEN); -} - -"BINARY" { - return(PARS_BINARY_TOKEN); -} - -"BLOB" { - return(PARS_BLOB_TOKEN); -} - -"INT" { - return(PARS_INT_TOKEN); -} - -"INTEGER" { - return(PARS_INT_TOKEN); -} - -"FLOAT" { - return(PARS_FLOAT_TOKEN); -} - -"CHAR" { - return(PARS_CHAR_TOKEN); -} - -"IS" { - return(PARS_IS_TOKEN); -} - -"BEGIN" { - return(PARS_BEGIN_TOKEN); -} - -"END" { - return(PARS_END_TOKEN); -} - -"IF" { - return(PARS_IF_TOKEN); -} - -"THEN" { - return(PARS_THEN_TOKEN); -} - -"ELSE" { - return(PARS_ELSE_TOKEN); -} - -"ELSIF" { - return(PARS_ELSIF_TOKEN); -} - -"LOOP" { - return(PARS_LOOP_TOKEN); -} - -"WHILE" { - return(PARS_WHILE_TOKEN); -} - -"RETURN" { - return(PARS_RETURN_TOKEN); -} - -"SELECT" { - return(PARS_SELECT_TOKEN); -} - -"SUM" { - return(PARS_SUM_TOKEN); -} - -"COUNT" { - return(PARS_COUNT_TOKEN); -} - -"DISTINCT" { - return(PARS_DISTINCT_TOKEN); -} - -"FROM" { - return(PARS_FROM_TOKEN); -} - -"WHERE" { - return(PARS_WHERE_TOKEN); -} - -"FOR" { - return(PARS_FOR_TOKEN); -} - -"READ" { - return(PARS_READ_TOKEN); -} - -"ORDER" { - return(PARS_ORDER_TOKEN); -} - -"BY" { - return(PARS_BY_TOKEN); -} - -"ASC" { - return(PARS_ASC_TOKEN); -} - -"DESC" { - return(PARS_DESC_TOKEN); -} - -"INSERT" { - return(PARS_INSERT_TOKEN); -} - -"INTO" { - return(PARS_INTO_TOKEN); -} - -"VALUES" { - return(PARS_VALUES_TOKEN); -} - -"UPDATE" { - return(PARS_UPDATE_TOKEN); -} - -"SET" { - return(PARS_SET_TOKEN); -} - -"DELETE" { - return(PARS_DELETE_TOKEN); -} - -"CURRENT" { - return(PARS_CURRENT_TOKEN); -} - -"OF" { - return(PARS_OF_TOKEN); -} - -"CREATE" { - return(PARS_CREATE_TOKEN); -} - -"TABLE" { - return(PARS_TABLE_TOKEN); -} - -"COMPACT" { - return(PARS_COMPACT_TOKEN); -} - -"BLOCK_SIZE" { - return(PARS_BLOCK_SIZE_TOKEN); -} - -"INDEX" { - return(PARS_INDEX_TOKEN); -} - -"UNIQUE" { - return(PARS_UNIQUE_TOKEN); -} - -"CLUSTERED" { - return(PARS_CLUSTERED_TOKEN); -} - -"DOES_NOT_FIT_IN_MEMORY" { - return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN); -} - -"ON" { - return(PARS_ON_TOKEN); -} - -"DECLARE" { - return(PARS_DECLARE_TOKEN); -} - -"CURSOR" { - return(PARS_CURSOR_TOKEN); -} - -"OPEN" { - return(PARS_OPEN_TOKEN); -} - -"FETCH" { - return(PARS_FETCH_TOKEN); -} - -"CLOSE" { - return(PARS_CLOSE_TOKEN); -} - -"NOTFOUND" { - return(PARS_NOTFOUND_TOKEN); -} - -"TO_CHAR" { - return(PARS_TO_CHAR_TOKEN); -} - -"TO_NUMBER" { - return(PARS_TO_NUMBER_TOKEN); -} - -"TO_BINARY" { - return(PARS_TO_BINARY_TOKEN); -} - -"BINARY_TO_NUMBER" { - return(PARS_BINARY_TO_NUMBER_TOKEN); -} - -"SUBSTR" { - return(PARS_SUBSTR_TOKEN); -} - -"REPLSTR" { - return(PARS_REPLSTR_TOKEN); -} - -"CONCAT" { - return(PARS_CONCAT_TOKEN); -} - -"INSTR" { - return(PARS_INSTR_TOKEN); -} - -"LENGTH" { - return(PARS_LENGTH_TOKEN); -} - -"SYSDATE" { - return(PARS_SYSDATE_TOKEN); -} - -"PRINTF" { - return(PARS_PRINTF_TOKEN); -} - -"ASSERT" { - return(PARS_ASSERT_TOKEN); -} - -"RND" { - return(PARS_RND_TOKEN); -} - -"RND_STR" { - return(PARS_RND_STR_TOKEN); -} - -"ROW_PRINTF" { - return(PARS_ROW_PRINTF_TOKEN); -} - -"COMMIT" { - return(PARS_COMMIT_TOKEN); -} - -"ROLLBACK" { - return(PARS_ROLLBACK_TOKEN); -} - -"WORK" { - return(PARS_WORK_TOKEN); -} - -"UNSIGNED" { - return(PARS_UNSIGNED_TOKEN); -} - -"EXIT" { - return(PARS_EXIT_TOKEN); -} - -"FUNCTION" { - return(PARS_FUNCTION_TOKEN); -} - -"LOCK" { - return(PARS_LOCK_TOKEN); -} - -"SHARE" { - return(PARS_SHARE_TOKEN); -} - -"MODE" { - return(PARS_MODE_TOKEN); -} - -"LIKE" { - return(PARS_LIKE_TOKEN); -} - -"BIGINT" { - return(PARS_BIGINT_TOKEN); -} - -{ID} { - yylval = sym_tab_add_id(pars_sym_tab_global, - (byte*) yytext, - ut_strlen(yytext)); - return(PARS_ID_TOKEN); -} - -{TABLE_NAME} { - yylval = sym_tab_add_id(pars_sym_tab_global, - (byte*) yytext, - ut_strlen(yytext)); - return(PARS_TABLE_NAME_TOKEN); -} - -".." { - return(PARS_DDOT_TOKEN); -} - -":=" { - return(PARS_ASSIGN_TOKEN); -} - -"<=" { - return(PARS_LE_TOKEN); -} - -">=" { - return(PARS_GE_TOKEN); -} - -"<>" { - return(PARS_NE_TOKEN); -} - -"(" { - - return((int)(*yytext)); -} - -"=" { - - return((int)(*yytext)); -} - -">" { - - return((int)(*yytext)); -} - -"<" { - - return((int)(*yytext)); -} - -"," { - - return((int)(*yytext)); -} - -";" { - - return((int)(*yytext)); -} - -")" { - - return((int)(*yytext)); -} - -"+" { - - return((int)(*yytext)); -} - -"-" { - - return((int)(*yytext)); -} - -"*" { - - return((int)(*yytext)); -} - -"/" { - - return((int)(*yytext)); -} - -"%" { - - return((int)(*yytext)); -} - -"{" { - - return((int)(*yytext)); -} - -"}" { - - return((int)(*yytext)); -} - -"?" { - - return((int)(*yytext)); -} - -"/*" BEGIN(comment); /* eat up comment */ - -<comment>[^*]* -<comment>"*"+[^*/]* -<comment>"*"+"/" BEGIN(INITIAL); - -[ \t\n]+ /* eat up whitespace */ - - -. { - fprintf(stderr,"Unrecognized character: %02x\n", - *yytext); - - ut_error; - - return(0); -} - -%% - -/********************************************************************** -Release any resources used by the lexer. */ -UNIV_INTERN -void -pars_lexer_close(void) -/*==================*/ -{ - if (yy_buffer_stack) - yylex_destroy(); - if (stringbuf) - free(stringbuf); - stringbuf = NULL; - stringbuf_len_alloc = stringbuf_len = 0; -} diff --git a/storage/xtradb/pars/pars0opt.cc b/storage/xtradb/pars/pars0opt.cc deleted file mode 100644 index 5a7e1861d74..00000000000 --- a/storage/xtradb/pars/pars0opt.cc +++ /dev/null @@ -1,1261 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file pars/pars0opt.cc -Simple SQL optimizer - -Created 12/21/1997 Heikki Tuuri -*******************************************************/ - -#include "pars0opt.h" - -#ifdef UNIV_NONINL -#include "pars0opt.ic" -#endif - -#include "row0sel.h" -#include "row0ins.h" -#include "row0upd.h" -#include "dict0dict.h" -#include "dict0mem.h" -#include "que0que.h" -#include "pars0grm.h" -#include "pars0pars.h" -#include "lock0lock.h" - -#define OPT_EQUAL 1 /* comparison by = */ -#define OPT_COMPARISON 2 /* comparison by <, >, <=, or >= */ - -#define OPT_NOT_COND 1 -#define OPT_END_COND 2 -#define OPT_TEST_COND 3 -#define OPT_SCROLL_COND 4 - - -/*******************************************************************//** -Inverts a comparison operator. -@return the equivalent operator when the order of the arguments is switched */ -static -int -opt_invert_cmp_op( -/*==============*/ - int op) /*!< in: operator */ -{ - if (op == '<') { - return('>'); - } else if (op == '>') { - return('<'); - } else if (op == '=') { - return('='); - } else if (op == PARS_LE_TOKEN) { - return(PARS_GE_TOKEN); - } else if (op == PARS_GE_TOKEN) { - return(PARS_LE_TOKEN); - } else { - /* TODO: LIKE operator */ - ut_error; - } - - return(0); -} - -/*******************************************************************//** -Checks if the value of an expression can be calculated BEFORE the nth table -in a join is accessed. If this is the case, it can possibly be used in an -index search for the nth table. -@return TRUE if already determined */ -static -ibool -opt_check_exp_determined_before( -/*============================*/ - que_node_t* exp, /*!< in: expression */ - sel_node_t* sel_node, /*!< in: select node */ - ulint nth_table) /*!< in: nth table will be accessed */ -{ - func_node_t* func_node; - sym_node_t* sym_node; - dict_table_t* table; - que_node_t* arg; - ulint i; - - ut_ad(exp && sel_node); - - if (que_node_get_type(exp) == QUE_NODE_FUNC) { - func_node = static_cast<func_node_t*>(exp); - - arg = func_node->args; - - while (arg) { - if (!opt_check_exp_determined_before(arg, sel_node, - nth_table)) { - return(FALSE); - } - - arg = que_node_get_next(arg); - } - - return(TRUE); - } - - ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL); - - sym_node = static_cast<sym_node_t*>(exp); - - if (sym_node->token_type != SYM_COLUMN) { - - return(TRUE); - } - - for (i = 0; i < nth_table; i++) { - - table = sel_node_get_nth_plan(sel_node, i)->table; - - if (sym_node->table == table) { - - return(TRUE); - } - } - - return(FALSE); -} - -/*******************************************************************//** -Looks in a comparison condition if a column value is already restricted by -it BEFORE the nth table is accessed. -@return expression restricting the value of the column, or NULL if not known */ -static -que_node_t* -opt_look_for_col_in_comparison_before( -/*==================================*/ - ulint cmp_type, /*!< in: OPT_EQUAL, OPT_COMPARISON */ - ulint col_no, /*!< in: column number */ - func_node_t* search_cond, /*!< in: comparison condition */ - sel_node_t* sel_node, /*!< in: select node */ - ulint nth_table, /*!< in: nth table in a join (a query - from a single table is considered a - join of 1 table) */ - ulint* op) /*!< out: comparison operator ('=', - PARS_GE_TOKEN, ... ); this is inverted - if the column appears on the right - side */ -{ - sym_node_t* sym_node; - dict_table_t* table; - que_node_t* exp; - que_node_t* arg; - - ut_ad(search_cond); - - ut_a((search_cond->func == '<') - || (search_cond->func == '>') - || (search_cond->func == '=') - || (search_cond->func == PARS_GE_TOKEN) - || (search_cond->func == PARS_LE_TOKEN) - || (search_cond->func == PARS_LIKE_TOKEN_EXACT) - || (search_cond->func == PARS_LIKE_TOKEN_PREFIX) - || (search_cond->func == PARS_LIKE_TOKEN_SUFFIX) - || (search_cond->func == PARS_LIKE_TOKEN_SUBSTR)); - - table = sel_node_get_nth_plan(sel_node, nth_table)->table; - - if ((cmp_type == OPT_EQUAL) - && (search_cond->func != '=') - && (search_cond->func != PARS_LIKE_TOKEN_EXACT) - && (search_cond->func != PARS_LIKE_TOKEN_PREFIX)) { - - return(NULL); - - } else if ((cmp_type == OPT_COMPARISON) - && (search_cond->func != '<') - && (search_cond->func != '>') - && (search_cond->func != PARS_GE_TOKEN) - && (search_cond->func != PARS_LE_TOKEN) - && (search_cond->func != PARS_LIKE_TOKEN_PREFIX) - && (search_cond->func != PARS_LIKE_TOKEN_SUFFIX)) { - - return(NULL); - } - - arg = search_cond->args; - - if (que_node_get_type(arg) == QUE_NODE_SYMBOL) { - sym_node = static_cast<sym_node_t*>(arg); - - if ((sym_node->token_type == SYM_COLUMN) - && (sym_node->table == table) - && (sym_node->col_no == col_no)) { - - /* sym_node contains the desired column id */ - - /* Check if the expression on the right side of the - operator is already determined */ - - exp = que_node_get_next(arg); - - if (opt_check_exp_determined_before(exp, sel_node, - nth_table)) { - *op = search_cond->func; - - return(exp); - } - } - } - - exp = search_cond->args; - arg = que_node_get_next(arg); - - if (que_node_get_type(arg) == QUE_NODE_SYMBOL) { - sym_node = static_cast<sym_node_t*>(arg); - - if ((sym_node->token_type == SYM_COLUMN) - && (sym_node->table == table) - && (sym_node->col_no == col_no)) { - - if (opt_check_exp_determined_before(exp, sel_node, - nth_table)) { - *op = opt_invert_cmp_op(search_cond->func); - - return(exp); - } - } - } - - return(NULL); -} - -/*******************************************************************//** -Looks in a search condition if a column value is already restricted by the -search condition BEFORE the nth table is accessed. Takes into account that -if we will fetch in an ascending order, we cannot utilize an upper limit for -a column value; in a descending order, respectively, a lower limit. -@return expression restricting the value of the column, or NULL if not known */ -static -que_node_t* -opt_look_for_col_in_cond_before( -/*============================*/ - ulint cmp_type, /*!< in: OPT_EQUAL, OPT_COMPARISON */ - ulint col_no, /*!< in: column number */ - func_node_t* search_cond, /*!< in: search condition or NULL */ - sel_node_t* sel_node, /*!< in: select node */ - ulint nth_table, /*!< in: nth table in a join (a query - from a single table is considered a - join of 1 table) */ - ulint* op) /*!< out: comparison operator ('=', - PARS_GE_TOKEN, ... ) */ -{ - func_node_t* new_cond; - que_node_t* exp; - - if (search_cond == NULL) { - - return(NULL); - } - - ut_a(que_node_get_type(search_cond) == QUE_NODE_FUNC); - ut_a(search_cond->func != PARS_OR_TOKEN); - ut_a(search_cond->func != PARS_NOT_TOKEN); - - if (search_cond->func == PARS_AND_TOKEN) { - new_cond = static_cast<func_node_t*>(search_cond->args); - - exp = opt_look_for_col_in_cond_before(cmp_type, col_no, - new_cond, sel_node, - nth_table, op); - if (exp) { - - return(exp); - } - - new_cond = static_cast<func_node_t*>( - que_node_get_next(new_cond)); - - exp = opt_look_for_col_in_cond_before(cmp_type, col_no, - new_cond, sel_node, - nth_table, op); - return(exp); - } - - exp = opt_look_for_col_in_comparison_before(cmp_type, col_no, - search_cond, sel_node, - nth_table, op); - if (exp == NULL) { - - return(NULL); - } - - /* If we will fetch in an ascending order, we cannot utilize an upper - limit for a column value; in a descending order, respectively, a lower - limit */ - - if (sel_node->asc && ((*op == '<') || (*op == PARS_LE_TOKEN))) { - - return(NULL); - - } else if (!sel_node->asc - && ((*op == '>') || (*op == PARS_GE_TOKEN))) { - - return(NULL); - } - - return(exp); -} - -/*******************************************************************//** -Calculates the goodness for an index according to a select node. The -goodness is 4 times the number of first fields in index whose values we -already know exactly in the query. If we have a comparison condition for -an additional field, 2 point are added. If the index is unique, and we know -all the unique fields for the index we add 1024 points. For a clustered index -we add 1 point. -@return goodness */ -static -ulint -opt_calc_index_goodness( -/*====================*/ - dict_index_t* index, /*!< in: index */ - sel_node_t* sel_node, /*!< in: parsed select node */ - ulint nth_table, /*!< in: nth table in a join */ - que_node_t** index_plan, /*!< in/out: comparison expressions for - this index */ - ulint* last_op) /*!< out: last comparison operator, if - goodness > 1 */ -{ - que_node_t* exp; - ulint goodness; - ulint n_fields; - ulint col_no; - ulint op; - ulint j; - - /* At least for now we don't support using FTS indexes for queries - done through InnoDB's own SQL parser. */ - if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) { - return(0); - } - - goodness = 0; - - /* Note that as higher level node pointers in the B-tree contain - page addresses as the last field, we must not put more fields in - the search tuple than dict_index_get_n_unique_in_tree(index); see - the note in btr_cur_search_to_nth_level. */ - - n_fields = dict_index_get_n_unique_in_tree(index); - - for (j = 0; j < n_fields; j++) { - - col_no = dict_index_get_nth_col_no(index, j); - - exp = opt_look_for_col_in_cond_before( - OPT_EQUAL, col_no, - static_cast<func_node_t*>(sel_node->search_cond), - sel_node, nth_table, &op); - if (exp) { - /* The value for this column is exactly known already - at this stage of the join */ - - index_plan[j] = exp; - *last_op = op; - goodness += 4; - } else { - /* Look for non-equality comparisons */ - - exp = opt_look_for_col_in_cond_before( - OPT_COMPARISON, col_no, - static_cast<func_node_t*>( - sel_node->search_cond), - sel_node, nth_table, &op); - if (exp) { - index_plan[j] = exp; - *last_op = op; - goodness += 2; - } - - break; - } - } - - if (goodness >= 4 * dict_index_get_n_unique(index)) { - goodness += 1024; - - if (dict_index_is_clust(index)) { - - goodness += 1024; - } - } - - /* We have to test for goodness here, as last_op may not be set */ - if (goodness && dict_index_is_clust(index)) { - - goodness++; - } - - return(goodness); -} - -/*******************************************************************//** -Calculates the number of matched fields based on an index goodness. -@return number of excatly or partially matched fields */ -UNIV_INLINE -ulint -opt_calc_n_fields_from_goodness( -/*============================*/ - ulint goodness) /*!< in: goodness */ -{ - return(((goodness % 1024) + 2) / 4); -} - -/*******************************************************************//** -Converts a comparison operator to the corresponding search mode PAGE_CUR_GE, -... -@return search mode */ -UNIV_INLINE -ulint -opt_op_to_search_mode( -/*==================*/ - ibool asc, /*!< in: TRUE if the rows should be fetched in an - ascending order */ - ulint op) /*!< in: operator '=', PARS_GE_TOKEN, ... */ -{ - if (op == '=' - || op == PARS_LIKE_TOKEN_EXACT - || op == PARS_LIKE_TOKEN_PREFIX - || op == PARS_LIKE_TOKEN_SUFFIX - || op == PARS_LIKE_TOKEN_SUBSTR) { - - if (asc) { - return(PAGE_CUR_GE); - } else { - return(PAGE_CUR_LE); - } - } else if (op == '<') { - ut_a(!asc); - return(PAGE_CUR_L); - } else if (op == '>') { - ut_a(asc); - return(PAGE_CUR_G); - } else if (op == PARS_GE_TOKEN) { - ut_a(asc); - return(PAGE_CUR_GE); - } else if (op == PARS_LE_TOKEN) { - ut_a(!asc); - return(PAGE_CUR_LE); - } else { - ut_error; - } - - return(0); -} - -/*******************************************************************//** -Determines if a node is an argument node of a function node. -@return TRUE if is an argument */ -static -ibool -opt_is_arg( -/*=======*/ - que_node_t* arg_node, /*!< in: possible argument node */ - func_node_t* func_node) /*!< in: function node */ -{ - que_node_t* arg; - - arg = func_node->args; - - while (arg) { - if (arg == arg_node) { - - return(TRUE); - } - - arg = que_node_get_next(arg); - } - - return(FALSE); -} - -/*******************************************************************//** -Decides if the fetching of rows should be made in a descending order, and -also checks that the chosen query plan produces a result which satisfies -the order-by. */ -static -void -opt_check_order_by( -/*===============*/ - sel_node_t* sel_node) /*!< in: select node; asserts an error - if the plan does not agree with the - order-by */ -{ - order_node_t* order_node; - dict_table_t* order_table; - ulint order_col_no; - plan_t* plan; - ulint i; - - if (!sel_node->order_by) { - - return; - } - - order_node = sel_node->order_by; - order_col_no = order_node->column->col_no; - order_table = order_node->column->table; - - /* If there is an order-by clause, the first non-exactly matched field - in the index used for the last table in the table list should be the - column defined in the order-by clause, and for all the other tables - we should get only at most a single row, otherwise we cannot presently - calculate the order-by, as we have no sort utility */ - - for (i = 0; i < sel_node->n_tables; i++) { - - plan = sel_node_get_nth_plan(sel_node, i); - - if (i < sel_node->n_tables - 1) { - ut_a(dict_index_get_n_unique(plan->index) - <= plan->n_exact_match); - } else { - ut_a(plan->table == order_table); - - ut_a((dict_index_get_n_unique(plan->index) - <= plan->n_exact_match) - || (dict_index_get_nth_col_no(plan->index, - plan->n_exact_match) - == order_col_no)); - } - } -} - -/*******************************************************************//** -Optimizes a select. Decides which indexes to tables to use. The tables -are accessed in the order that they were written to the FROM part in the -select statement. */ -static -void -opt_search_plan_for_table( -/*======================*/ - sel_node_t* sel_node, /*!< in: parsed select node */ - ulint i, /*!< in: this is the ith table */ - dict_table_t* table) /*!< in: table */ -{ - plan_t* plan; - dict_index_t* index; - dict_index_t* best_index; - ulint n_fields; - ulint goodness; - ulint last_op = 75946965; /* Eliminate a Purify - warning */ - ulint best_goodness; - ulint best_last_op = 0; /* remove warning */ - que_node_t* index_plan[256]; - que_node_t* best_index_plan[256]; - - plan = sel_node_get_nth_plan(sel_node, i); - - plan->table = table; - plan->asc = sel_node->asc; - plan->pcur_is_open = FALSE; - plan->cursor_at_end = FALSE; - - /* Calculate goodness for each index of the table */ - - index = dict_table_get_first_index(table); - best_index = index; /* Eliminate compiler warning */ - best_goodness = 0; - - /* should be do ... until ? comment by Jani */ - while (index) { - goodness = opt_calc_index_goodness(index, sel_node, i, - index_plan, &last_op); - if (goodness > best_goodness) { - - best_index = index; - best_goodness = goodness; - n_fields = opt_calc_n_fields_from_goodness(goodness); - - ut_memcpy(best_index_plan, index_plan, - n_fields * sizeof(void*)); - best_last_op = last_op; - } - - dict_table_next_uncorrupted_index(index); - } - - plan->index = best_index; - - n_fields = opt_calc_n_fields_from_goodness(best_goodness); - - if (n_fields == 0) { - plan->tuple = NULL; - plan->n_exact_match = 0; - } else { - plan->tuple = dtuple_create(pars_sym_tab_global->heap, - n_fields); - dict_index_copy_types(plan->tuple, plan->index, n_fields); - - plan->tuple_exps = static_cast<que_node_t**>( - mem_heap_alloc( - pars_sym_tab_global->heap, - n_fields * sizeof(void*))); - - ut_memcpy(plan->tuple_exps, best_index_plan, - n_fields * sizeof(void*)); - if (best_last_op == '=' - || best_last_op == PARS_LIKE_TOKEN_EXACT - || best_last_op == PARS_LIKE_TOKEN_PREFIX - || best_last_op == PARS_LIKE_TOKEN_SUFFIX - || best_last_op == PARS_LIKE_TOKEN_SUBSTR) { - plan->n_exact_match = n_fields; - } else { - plan->n_exact_match = n_fields - 1; - } - - plan->mode = opt_op_to_search_mode(sel_node->asc, - best_last_op); - } - - if (dict_index_is_clust(best_index) - && (plan->n_exact_match >= dict_index_get_n_unique(best_index))) { - - plan->unique_search = TRUE; - } else { - plan->unique_search = FALSE; - } - - plan->old_vers_heap = NULL; - - btr_pcur_init(&(plan->pcur)); - btr_pcur_init(&(plan->clust_pcur)); -} - -/*******************************************************************//** -Looks at a comparison condition and decides if it can, and need, be tested for -a table AFTER the table has been accessed. -@return OPT_NOT_COND if not for this table, else OPT_END_COND, -OPT_TEST_COND, or OPT_SCROLL_COND, where the last means that the -condition need not be tested, except when scroll cursors are used */ -static -ulint -opt_classify_comparison( -/*====================*/ - sel_node_t* sel_node, /*!< in: select node */ - ulint i, /*!< in: ith table in the join */ - func_node_t* cond) /*!< in: comparison condition */ -{ - plan_t* plan; - ulint n_fields; - ulint op; - ulint j; - - ut_ad(cond && sel_node); - - plan = sel_node_get_nth_plan(sel_node, i); - - /* Check if the condition is determined after the ith table has been - accessed, but not after the i - 1:th */ - - if (!opt_check_exp_determined_before(cond, sel_node, i + 1)) { - - return(OPT_NOT_COND); - } - - if ((i > 0) && opt_check_exp_determined_before(cond, sel_node, i)) { - - return(OPT_NOT_COND); - } - - /* If the condition is an exact match condition used in constructing - the search tuple, it is classified as OPT_END_COND */ - - if (plan->tuple) { - n_fields = dtuple_get_n_fields(plan->tuple); - } else { - n_fields = 0; - } - - for (j = 0; j < plan->n_exact_match; j++) { - - if (opt_is_arg(plan->tuple_exps[j], cond)) { - - return(OPT_END_COND); - } - } - - /* If the condition is an non-exact match condition used in - constructing the search tuple, it is classified as OPT_SCROLL_COND. - When the cursor is positioned, and if a non-scroll cursor is used, - there is no need to test this condition; if a scroll cursor is used - the testing is necessary when the cursor is reversed. */ - - if ((n_fields > plan->n_exact_match) - && opt_is_arg(plan->tuple_exps[n_fields - 1], cond)) { - - return(OPT_SCROLL_COND); - } - - /* If the condition is a non-exact match condition on the first field - in index for which there is no exact match, and it limits the search - range from the opposite side of the search tuple already BEFORE we - access the table, it is classified as OPT_END_COND */ - - if ((dict_index_get_n_fields(plan->index) > plan->n_exact_match) - && opt_look_for_col_in_comparison_before( - OPT_COMPARISON, - dict_index_get_nth_col_no(plan->index, - plan->n_exact_match), - cond, sel_node, i, &op)) { - - if (sel_node->asc && ((op == '<') || (op == PARS_LE_TOKEN))) { - - return(OPT_END_COND); - } - - if (!sel_node->asc && ((op == '>') || (op == PARS_GE_TOKEN))) { - - return(OPT_END_COND); - } - } - - /* Otherwise, cond is classified as OPT_TEST_COND */ - - return(OPT_TEST_COND); -} - -/*******************************************************************//** -Recursively looks for test conditions for a table in a join. */ -static -void -opt_find_test_conds( -/*================*/ - sel_node_t* sel_node, /*!< in: select node */ - ulint i, /*!< in: ith table in the join */ - func_node_t* cond) /*!< in: conjunction of search - conditions or NULL */ -{ - func_node_t* new_cond; - ulint fclass; - plan_t* plan; - - if (cond == NULL) { - - return; - } - - if (cond->func == PARS_AND_TOKEN) { - new_cond = static_cast<func_node_t*>(cond->args); - - opt_find_test_conds(sel_node, i, new_cond); - - new_cond = static_cast<func_node_t*>( - que_node_get_next(new_cond)); - - opt_find_test_conds(sel_node, i, new_cond); - - return; - } - - plan = sel_node_get_nth_plan(sel_node, i); - - fclass = opt_classify_comparison(sel_node, i, cond); - - if (fclass == OPT_END_COND) { - UT_LIST_ADD_LAST(cond_list, plan->end_conds, cond); - - } else if (fclass == OPT_TEST_COND) { - UT_LIST_ADD_LAST(cond_list, plan->other_conds, cond); - - } -} - -/*******************************************************************//** -Normalizes a list of comparison conditions so that a column of the table -appears on the left side of the comparison if possible. This is accomplished -by switching the arguments of the operator. */ -static -void -opt_normalize_cmp_conds( -/*====================*/ - func_node_t* cond, /*!< in: first in a list of comparison - conditions, or NULL */ - dict_table_t* table) /*!< in: table */ -{ - que_node_t* arg1; - que_node_t* arg2; - sym_node_t* sym_node; - - while (cond) { - arg1 = cond->args; - arg2 = que_node_get_next(arg1); - - if (que_node_get_type(arg2) == QUE_NODE_SYMBOL) { - - sym_node = static_cast<sym_node_t*>(arg2); - - if ((sym_node->token_type == SYM_COLUMN) - && (sym_node->table == table)) { - - /* Switch the order of the arguments */ - - cond->args = arg2; - que_node_list_add_last(NULL, arg2); - que_node_list_add_last(arg2, arg1); - - /* Invert the operator */ - cond->func = opt_invert_cmp_op(cond->func); - } - } - - cond = UT_LIST_GET_NEXT(cond_list, cond); - } -} - -/*******************************************************************//** -Finds out the search condition conjuncts we can, and need, to test as the ith -table in a join is accessed. The search tuple can eliminate the need to test -some conjuncts. */ -static -void -opt_determine_and_normalize_test_conds( -/*===================================*/ - sel_node_t* sel_node, /*!< in: select node */ - ulint i) /*!< in: ith table in the join */ -{ - plan_t* plan; - - plan = sel_node_get_nth_plan(sel_node, i); - - UT_LIST_INIT(plan->end_conds); - UT_LIST_INIT(plan->other_conds); - - /* Recursively go through the conjuncts and classify them */ - - opt_find_test_conds( - sel_node, - i, - static_cast<func_node_t*>(sel_node->search_cond)); - - opt_normalize_cmp_conds(UT_LIST_GET_FIRST(plan->end_conds), - plan->table); - - ut_a(UT_LIST_GET_LEN(plan->end_conds) >= plan->n_exact_match); -} - -/*******************************************************************//** -Looks for occurrences of the columns of the table in the query subgraph and -adds them to the list of columns if an occurrence of the same column does not -already exist in the list. If the column is already in the list, puts a value -indirection to point to the occurrence in the column list, except if the -column occurrence we are looking at is in the column list, in which case -nothing is done. */ -UNIV_INTERN -void -opt_find_all_cols( -/*==============*/ - ibool copy_val, /*!< in: if TRUE, new found columns are - added as columns to copy */ - dict_index_t* index, /*!< in: index of the table to use */ - sym_node_list_t* col_list, /*!< in: base node of a list where - to add new found columns */ - plan_t* plan, /*!< in: plan or NULL */ - que_node_t* exp) /*!< in: expression or condition or - NULL */ -{ - func_node_t* func_node; - que_node_t* arg; - sym_node_t* sym_node; - sym_node_t* col_node; - ulint col_pos; - - if (exp == NULL) { - - return; - } - - if (que_node_get_type(exp) == QUE_NODE_FUNC) { - func_node = static_cast<func_node_t*>(exp); - - for (arg = func_node->args; - arg != 0; - arg = que_node_get_next(arg)) { - - opt_find_all_cols( - copy_val, index, col_list, plan, arg); - } - - return; - } - - ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL); - - sym_node = static_cast<sym_node_t*>(exp); - - if (sym_node->token_type != SYM_COLUMN) { - - return; - } - - if (sym_node->table != index->table) { - - return; - } - - /* Look for an occurrence of the same column in the plan column - list */ - - col_node = UT_LIST_GET_FIRST(*col_list); - - while (col_node) { - if (col_node->col_no == sym_node->col_no) { - - if (col_node == sym_node) { - /* sym_node was already in a list: do - nothing */ - - return; - } - - /* Put an indirection */ - sym_node->indirection = col_node; - sym_node->alias = col_node; - - return; - } - - col_node = UT_LIST_GET_NEXT(col_var_list, col_node); - } - - /* The same column did not occur in the list: add it */ - - UT_LIST_ADD_LAST(col_var_list, *col_list, sym_node); - - sym_node->copy_val = copy_val; - - /* Fill in the field_no fields in sym_node */ - - sym_node->field_nos[SYM_CLUST_FIELD_NO] = dict_index_get_nth_col_pos( - dict_table_get_first_index(index->table), sym_node->col_no, - NULL); - if (!dict_index_is_clust(index)) { - - ut_a(plan); - - col_pos = dict_index_get_nth_col_pos(index, sym_node->col_no, - NULL); - - if (col_pos == ULINT_UNDEFINED) { - - plan->must_get_clust = TRUE; - } - - sym_node->field_nos[SYM_SEC_FIELD_NO] = col_pos; - } -} - -/*******************************************************************//** -Looks for occurrences of the columns of the table in conditions which are -not yet determined AFTER the join operation has fetched a row in the ith -table. The values for these column must be copied to dynamic memory for -later use. */ -static -void -opt_find_copy_cols( -/*===============*/ - sel_node_t* sel_node, /*!< in: select node */ - ulint i, /*!< in: ith table in the join */ - func_node_t* search_cond) /*!< in: search condition or NULL */ -{ - func_node_t* new_cond; - plan_t* plan; - - if (search_cond == NULL) { - - return; - } - - ut_ad(que_node_get_type(search_cond) == QUE_NODE_FUNC); - - if (search_cond->func == PARS_AND_TOKEN) { - new_cond = static_cast<func_node_t*>(search_cond->args); - - opt_find_copy_cols(sel_node, i, new_cond); - - new_cond = static_cast<func_node_t*>( - que_node_get_next(new_cond)); - - opt_find_copy_cols(sel_node, i, new_cond); - - return; - } - - if (!opt_check_exp_determined_before(search_cond, sel_node, i + 1)) { - - /* Any ith table columns occurring in search_cond should be - copied, as this condition cannot be tested already on the - fetch from the ith table */ - - plan = sel_node_get_nth_plan(sel_node, i); - - opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan, - search_cond); - } -} - -/*******************************************************************//** -Classifies the table columns according to whether we use the column only while -holding the latch on the page, or whether we have to copy the column value to -dynamic memory. Puts the first occurrence of a column to either list in the -plan node, and puts indirections to later occurrences of the column. */ -static -void -opt_classify_cols( -/*==============*/ - sel_node_t* sel_node, /*!< in: select node */ - ulint i) /*!< in: ith table in the join */ -{ - plan_t* plan; - que_node_t* exp; - - plan = sel_node_get_nth_plan(sel_node, i); - - /* The final value of the following field will depend on the - environment of the select statement: */ - - plan->must_get_clust = FALSE; - - UT_LIST_INIT(plan->columns); - - /* All select list columns should be copied: therefore TRUE as the - first argument */ - - for (exp = sel_node->select_list; - exp != 0; - exp = que_node_get_next(exp)) { - - opt_find_all_cols( - TRUE, plan->index, &(plan->columns), plan, exp); - } - - opt_find_copy_cols( - sel_node, i, static_cast<func_node_t*>(sel_node->search_cond)); - - /* All remaining columns in the search condition are temporary - columns: therefore FALSE */ - - opt_find_all_cols( - FALSE, plan->index, &plan->columns, plan, - static_cast<func_node_t*>(sel_node->search_cond)); -} - -/*******************************************************************//** -Fills in the info in plan which is used in accessing a clustered index -record. The columns must already be classified for the plan node. */ -static -void -opt_clust_access( -/*=============*/ - sel_node_t* sel_node, /*!< in: select node */ - ulint n) /*!< in: nth table in select */ -{ - plan_t* plan; - dict_table_t* table; - dict_index_t* clust_index; - dict_index_t* index; - mem_heap_t* heap; - ulint n_fields; - ulint pos; - ulint i; - - plan = sel_node_get_nth_plan(sel_node, n); - - index = plan->index; - - /* The final value of the following field depends on the environment - of the select statement: */ - - plan->no_prefetch = FALSE; - - if (dict_index_is_clust(index)) { - plan->clust_map = NULL; - plan->clust_ref = NULL; - - return; - } - - table = index->table; - - clust_index = dict_table_get_first_index(table); - - n_fields = dict_index_get_n_unique(clust_index); - - heap = pars_sym_tab_global->heap; - - plan->clust_ref = dtuple_create(heap, n_fields); - - dict_index_copy_types(plan->clust_ref, clust_index, n_fields); - - plan->clust_map = static_cast<ulint*>( - mem_heap_alloc(heap, n_fields * sizeof(ulint))); - - for (i = 0; i < n_fields; i++) { - pos = dict_index_get_nth_field_pos(index, clust_index, i); - - ut_a(pos != ULINT_UNDEFINED); - - /* We optimize here only queries to InnoDB's internal system - tables, and they should not contain column prefix indexes. */ - - if (dict_index_get_nth_field(index, pos)->prefix_len != 0 - || dict_index_get_nth_field(clust_index, i) - ->prefix_len != 0) { - fprintf(stderr, - "InnoDB: Error in pars0opt.cc:" - " table %s has prefix_len != 0\n", - index->table_name); - } - - *(plan->clust_map + i) = pos; - - ut_ad(pos != ULINT_UNDEFINED); - } -} - -/*******************************************************************//** -Optimizes a select. Decides which indexes to tables to use. The tables -are accessed in the order that they were written to the FROM part in the -select statement. */ -UNIV_INTERN -void -opt_search_plan( -/*============*/ - sel_node_t* sel_node) /*!< in: parsed select node */ -{ - sym_node_t* table_node; - dict_table_t* table; - order_node_t* order_by; - ulint i; - - sel_node->plans = static_cast<plan_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, - sel_node->n_tables * sizeof(plan_t))); - - /* Analyze the search condition to find out what we know at each - join stage about the conditions that the columns of a table should - satisfy */ - - table_node = sel_node->table_list; - - if (sel_node->order_by == NULL) { - sel_node->asc = TRUE; - } else { - order_by = sel_node->order_by; - - sel_node->asc = order_by->asc; - } - - for (i = 0; i < sel_node->n_tables; i++) { - - table = table_node->table; - - /* Choose index through which to access the table */ - - opt_search_plan_for_table(sel_node, i, table); - - /* Determine the search condition conjuncts we can test at - this table; normalize the end conditions */ - - opt_determine_and_normalize_test_conds(sel_node, i); - - table_node = static_cast<sym_node_t*>( - que_node_get_next(table_node)); - } - - table_node = sel_node->table_list; - - for (i = 0; i < sel_node->n_tables; i++) { - - /* Classify the table columns into those we only need to access - but not copy, and to those we must copy to dynamic memory */ - - opt_classify_cols(sel_node, i); - - /* Calculate possible info for accessing the clustered index - record */ - - opt_clust_access(sel_node, i); - - table_node = static_cast<sym_node_t*>( - que_node_get_next(table_node)); - } - - /* Check that the plan obeys a possible order-by clause: if not, - an assertion error occurs */ - - opt_check_order_by(sel_node); - -#ifdef UNIV_SQL_DEBUG - opt_print_query_plan(sel_node); -#endif -} - -/********************************************************************//** -Prints info of a query plan. */ -UNIV_INTERN -void -opt_print_query_plan( -/*=================*/ - sel_node_t* sel_node) /*!< in: select node */ -{ - plan_t* plan; - ulint n_fields; - ulint i; - - fputs("QUERY PLAN FOR A SELECT NODE\n", stderr); - - fputs(sel_node->asc ? "Asc. search; " : "Desc. search; ", stderr); - - if (sel_node->set_x_locks) { - fputs("sets row x-locks; ", stderr); - ut_a(sel_node->row_lock_mode == LOCK_X); - ut_a(!sel_node->consistent_read); - } else if (sel_node->consistent_read) { - fputs("consistent read; ", stderr); - } else { - ut_a(sel_node->row_lock_mode == LOCK_S); - fputs("sets row s-locks; ", stderr); - } - - putc('\n', stderr); - - for (i = 0; i < sel_node->n_tables; i++) { - plan = sel_node_get_nth_plan(sel_node, i); - - if (plan->tuple) { - n_fields = dtuple_get_n_fields(plan->tuple); - } else { - n_fields = 0; - } - - fputs("Table ", stderr); - dict_index_name_print(stderr, NULL, plan->index); - fprintf(stderr,"; exact m. %lu, match %lu, end conds %lu\n", - (unsigned long) plan->n_exact_match, - (unsigned long) n_fields, - (unsigned long) UT_LIST_GET_LEN(plan->end_conds)); - } -} diff --git a/storage/xtradb/pars/pars0pars.cc b/storage/xtradb/pars/pars0pars.cc deleted file mode 100644 index ce61d6e1e3b..00000000000 --- a/storage/xtradb/pars/pars0pars.cc +++ /dev/null @@ -1,2670 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, -Fifth Floor, Boston, MA 02110-1301 USA - -*****************************************************************************/ - -/**************************************************//** -@file pars/pars0pars.c -SQL parser - -Created 11/19/1996 Heikki Tuuri -*******************************************************/ - -/* Historical note: Innobase executed its first SQL string (CREATE TABLE) -on 1/27/1998 */ - -#include "pars0pars.h" - -#ifdef UNIV_NONINL -#include "pars0pars.ic" -#endif - -#include "row0sel.h" -#include "row0ins.h" -#include "row0upd.h" -#include "dict0dict.h" -#include "dict0mem.h" -#include "dict0crea.h" -#include "que0que.h" -#include "pars0grm.h" -#include "pars0opt.h" -#include "data0data.h" -#include "data0type.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "lock0lock.h" -#include "eval0eval.h" - -#ifdef UNIV_SQL_DEBUG -/** If the following is set TRUE, the lexer will print the SQL string -as it tokenizes it */ -UNIV_INTERN ibool pars_print_lexed = FALSE; -#endif /* UNIV_SQL_DEBUG */ - -/* Global variable used while parsing a single procedure or query : the code is -NOT re-entrant */ -UNIV_INTERN sym_tab_t* pars_sym_tab_global; - -/* Global variables used to denote certain reserved words, used in -constructing the parsing tree */ - -UNIV_INTERN pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN}; -UNIV_INTERN pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN}; -UNIV_INTERN pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN}; -UNIV_INTERN pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN}; -UNIV_INTERN pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN}; -UNIV_INTERN pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN}; -UNIV_INTERN pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN}; -UNIV_INTERN pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN}; -UNIV_INTERN pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN}; -UNIV_INTERN pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN}; -UNIV_INTERN pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN}; -UNIV_INTERN pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN}; -UNIV_INTERN pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN}; -UNIV_INTERN pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN}; -UNIV_INTERN pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN}; -UNIV_INTERN pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN}; -UNIV_INTERN pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN}; -UNIV_INTERN pars_res_word_t pars_binary_token = {PARS_BINARY_TOKEN}; -UNIV_INTERN pars_res_word_t pars_blob_token = {PARS_BLOB_TOKEN}; -UNIV_INTERN pars_res_word_t pars_int_token = {PARS_INT_TOKEN}; -UNIV_INTERN pars_res_word_t pars_bigint_token = {PARS_BIGINT_TOKEN}; -UNIV_INTERN pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN}; -UNIV_INTERN pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN}; -UNIV_INTERN pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN}; -UNIV_INTERN pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN}; -UNIV_INTERN pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN}; -UNIV_INTERN pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN}; -UNIV_INTERN pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN}; -UNIV_INTERN pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN}; -UNIV_INTERN pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN}; -UNIV_INTERN pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN}; - -/** Global variable used to denote the '*' in SELECT * FROM.. */ -UNIV_INTERN ulint pars_star_denoter = 12345678; - -/******************************************************************** -Get user function with the given name.*/ -UNIV_INLINE -pars_user_func_t* -pars_info_lookup_user_func( -/*=======================*/ - /* out: user func, or NULL if not - found */ - pars_info_t* info, /* in: info struct */ - const char* name) /* in: function name to find*/ -{ - if (info && info->funcs) { - ulint i; - ib_vector_t* vec = info->funcs; - - for (i = 0; i < ib_vector_size(vec); i++) { - pars_user_func_t* puf; - - puf = static_cast<pars_user_func_t*>( - ib_vector_get(vec, i)); - - if (strcmp(puf->name, name) == 0) { - return(puf); - } - } - } - - return(NULL); -} - -/******************************************************************** -Get bound identifier with the given name.*/ -UNIV_INLINE -pars_bound_id_t* -pars_info_lookup_bound_id( -/*======================*/ - /* out: bound literal, or NULL if - not found */ - pars_info_t* info, /* in: info struct */ - const char* name) /* in: bound literal name to find */ -{ - if (info && info->bound_ids) { - ulint i; - ib_vector_t* vec = info->bound_ids; - - for (i = 0; i < ib_vector_size(vec); i++) { - pars_bound_id_t* bid; - - bid = static_cast<pars_bound_id_t*>( - ib_vector_get(vec, i)); - - if (strcmp(bid->name, name) == 0) { - return(bid); - } - } - } - - return(NULL); -} - -/******************************************************************** -Get bound literal with the given name.*/ -UNIV_INLINE -pars_bound_lit_t* -pars_info_lookup_bound_lit( -/*=======================*/ - /* out: bound literal, or NULL if - not found */ - pars_info_t* info, /* in: info struct */ - const char* name) /* in: bound literal name to find */ -{ - if (info && info->bound_lits) { - ulint i; - ib_vector_t* vec = info->bound_lits; - - for (i = 0; i < ib_vector_size(vec); i++) { - pars_bound_lit_t* pbl; - - pbl = static_cast<pars_bound_lit_t*>( - ib_vector_get(vec, i)); - - if (strcmp(pbl->name, name) == 0) { - return(pbl); - } - } - } - - return(NULL); -} - -/*********************************************************************//** -Determines the class of a function code. -@return function class: PARS_FUNC_ARITH, ... */ -static -ulint -pars_func_get_class( -/*================*/ - int func) /*!< in: function code: '=', PARS_GE_TOKEN, ... */ -{ - switch (func) { - case '+': case '-': case '*': case '/': - return(PARS_FUNC_ARITH); - - case '=': case '<': case '>': - case PARS_GE_TOKEN: case PARS_LE_TOKEN: case PARS_NE_TOKEN: - return(PARS_FUNC_CMP); - - case PARS_AND_TOKEN: case PARS_OR_TOKEN: case PARS_NOT_TOKEN: - return(PARS_FUNC_LOGICAL); - - case PARS_COUNT_TOKEN: case PARS_SUM_TOKEN: - return(PARS_FUNC_AGGREGATE); - - case PARS_TO_CHAR_TOKEN: - case PARS_TO_NUMBER_TOKEN: - case PARS_TO_BINARY_TOKEN: - case PARS_BINARY_TO_NUMBER_TOKEN: - case PARS_SUBSTR_TOKEN: - case PARS_CONCAT_TOKEN: - case PARS_LENGTH_TOKEN: - case PARS_INSTR_TOKEN: - case PARS_SYSDATE_TOKEN: - case PARS_NOTFOUND_TOKEN: - case PARS_PRINTF_TOKEN: - case PARS_ASSERT_TOKEN: - case PARS_RND_TOKEN: - case PARS_RND_STR_TOKEN: - case PARS_REPLSTR_TOKEN: - return(PARS_FUNC_PREDEFINED); - - default: - return(PARS_FUNC_OTHER); - } -} - -/*********************************************************************//** -Parses an operator or predefined function expression. -@return own: function node in a query tree */ -static -func_node_t* -pars_func_low( -/*==========*/ - int func, /*!< in: function token code */ - que_node_t* arg) /*!< in: first argument in the argument list */ -{ - func_node_t* node; - - node = static_cast<func_node_t*>( - mem_heap_alloc(pars_sym_tab_global->heap, sizeof(func_node_t))); - - node->common.type = QUE_NODE_FUNC; - dfield_set_data(&(node->common.val), NULL, 0); - node->common.val_buf_size = 0; - - node->func = func; - - node->fclass = pars_func_get_class(func); - - node->args = arg; - - UT_LIST_ADD_LAST(func_node_list, pars_sym_tab_global->func_node_list, - node); - return(node); -} - -/*********************************************************************//** -Parses a function expression. -@return own: function node in a query tree */ -UNIV_INTERN -func_node_t* -pars_func( -/*======*/ - que_node_t* res_word,/*!< in: function name reserved word */ - que_node_t* arg) /*!< in: first argument in the argument list */ -{ - return(pars_func_low(((pars_res_word_t*) res_word)->code, arg)); -} - -/************************************************************************* -Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded -within the search string.*/ - -int -pars_like_rebind( -/*=============*/ - /* out, own: function node in a query tree */ - sym_node_t* node, /* in: The search string node.*/ - const byte* ptr, /* in: literal to (re) bind */ - ulint ptr_len)/* in: length of literal to (re) bind*/ -{ - dtype_t* dtype; - dfield_t* dfield; - ib_like_t op_check; - sym_node_t* like_node; - sym_node_t* str_node = NULL; - ib_like_t op = IB_LIKE_EXACT; - int func = PARS_LIKE_TOKEN_EXACT; - - /* Is this a STRING% ? */ - if (ptr[ptr_len - 1] == '%') { - op = IB_LIKE_PREFIX; - } - - /* Is this a '%STRING' or %STRING% ?*/ - if (*ptr == '%') { - op = (op == IB_LIKE_PREFIX) ? IB_LIKE_SUBSTR : IB_LIKE_SUFFIX; - } - - if (node->like_node == NULL) { - /* Add the LIKE operator info node to the node list. - This will be used during the comparison phase to determine - how to match.*/ - like_node = sym_tab_add_int_lit(node->sym_table, op); - que_node_list_add_last(NULL, like_node); - node->like_node = like_node; - str_node = sym_tab_add_str_lit(node->sym_table, ptr, ptr_len); - que_node_list_add_last(like_node, str_node); - } else { - like_node = node->like_node; - - /* Change the value of the string in the existing - string node of like node */ - str_node = static_cast<sym_node_t*>( - que_node_list_get_last(like_node)); - - /* Must find the string node */ - ut_a(str_node); - ut_a(str_node != like_node); - ut_a(str_node->token_type == SYM_LIT); - - dfield = que_node_get_val(str_node); - dfield_set_data(dfield, ptr, ptr_len); - } - - dfield = que_node_get_val(like_node); - dtype = dfield_get_type(dfield); - - ut_a(dtype_get_mtype(dtype) == DATA_INT); - op_check = static_cast<ib_like_t>( - mach_read_from_4(static_cast<byte*>(dfield_get_data(dfield)))); - - switch (op_check) { - case IB_LIKE_PREFIX: - case IB_LIKE_SUFFIX: - case IB_LIKE_SUBSTR: - case IB_LIKE_EXACT: - break; - - default: - ut_error; - } - - mach_write_to_4(static_cast<byte*>(dfield_get_data(dfield)), op); - - dfield = que_node_get_val(node); - - /* Adjust the length of the search value so the '%' is not - visible. Then create and add a search string node to the - search value node. Searching for %SUFFIX and %SUBSTR% requires - a full table scan and so we set the search value to ''. - For PREFIX% we simply remove the trailing '%'.*/ - - switch (op) { - case IB_LIKE_EXACT: - dfield = que_node_get_val(str_node); - dtype = dfield_get_type(dfield); - - ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR); - - dfield_set_data(dfield, ptr, ptr_len); - break; - - case IB_LIKE_PREFIX: - func = PARS_LIKE_TOKEN_PREFIX; - - /* Modify the original node */ - dfield_set_len(dfield, ptr_len - 1); - - dfield = que_node_get_val(str_node); - dtype = dfield_get_type(dfield); - - ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR); - - dfield_set_data(dfield, ptr, ptr_len - 1); - break; - - case IB_LIKE_SUFFIX: - func = PARS_LIKE_TOKEN_SUFFIX; - - /* Modify the original node */ - /* Make it an '' empty string */ - dfield_set_len(dfield, 0); - - dfield = que_node_get_val(str_node); - dtype = dfield_get_type(dfield); - - ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR); - - dfield_set_data(dfield, ptr + 1, ptr_len - 1); - break; - - case IB_LIKE_SUBSTR: - func = PARS_LIKE_TOKEN_SUBSTR; - - /* Modify the original node */ - /* Make it an '' empty string */ - dfield_set_len(dfield, 0); - - dfield = que_node_get_val(str_node); - dtype = dfield_get_type(dfield); - - ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR); - - dfield_set_data(dfield, ptr + 1, ptr_len - 2); - break; - - default: - ut_error; - } - - return(func); -} - -/************************************************************************* -Parses a LIKE operator expression. */ -static -int -pars_like_op( -/*=========*/ - /* out, own: function node in a query tree */ - que_node_t* arg) /* in: LIKE comparison string.*/ -{ - char* ptr; - ulint ptr_len; - int func = PARS_LIKE_TOKEN_EXACT; - dfield_t* dfield = que_node_get_val(arg); - dtype_t* dtype = dfield_get_type(dfield); - - ut_a(dtype_get_mtype(dtype) == DATA_CHAR - || dtype_get_mtype(dtype) == DATA_VARCHAR); - - ptr = static_cast<char*>(dfield_get_data(dfield)); - ptr_len = strlen(ptr); - - if (ptr_len) { - - func = pars_like_rebind( - static_cast<sym_node_t*>(arg), (byte*) ptr, ptr_len); - } - - return(func); -} -/*********************************************************************//** -Parses an operator expression. -@return own: function node in a query tree */ -UNIV_INTERN -func_node_t* -pars_op( -/*====*/ - int func, /*!< in: operator token code */ - que_node_t* arg1, /*!< in: first argument */ - que_node_t* arg2) /*!< in: second argument or NULL for an unary - operator */ -{ - que_node_list_add_last(NULL, arg1); - - if (arg2) { - que_node_list_add_last(arg1, arg2); - } - - /* We need to parse the string and determine whether it's a - PREFIX, SUFFIX or SUBSTRING comparison */ - if (func == PARS_LIKE_TOKEN) { - - ut_a(que_node_get_type(arg2) == QUE_NODE_SYMBOL); - - func = pars_like_op(arg2); - - ut_a(func == PARS_LIKE_TOKEN_EXACT - || func == PARS_LIKE_TOKEN_PREFIX - || func == PARS_LIKE_TOKEN_SUFFIX - || func == PARS_LIKE_TOKEN_SUBSTR); - } - - return(pars_func_low(func, arg1)); -} - -/*********************************************************************//** -Parses an ORDER BY clause. Order by a single column only is supported. -@return own: order-by node in a query tree */ -UNIV_INTERN -order_node_t* -pars_order_by( -/*==========*/ - sym_node_t* column, /*!< in: column name */ - pars_res_word_t* asc) /*!< in: &pars_asc_token or pars_desc_token */ -{ - order_node_t* node; - - node = static_cast<order_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(order_node_t))); - - node->common.type = QUE_NODE_ORDER; - - node->column = column; - - if (asc == &pars_asc_token) { - node->asc = TRUE; - } else { - ut_a(asc == &pars_desc_token); - node->asc = FALSE; - } - - return(node); -} - -/*********************************************************************//** -Determine if a data type is a built-in string data type of the InnoDB -SQL parser. -@return TRUE if string data type */ -static -ibool -pars_is_string_type( -/*================*/ - ulint mtype) /*!< in: main data type */ -{ - switch (mtype) { - case DATA_VARCHAR: case DATA_CHAR: - case DATA_FIXBINARY: case DATA_BINARY: - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************************//** -Resolves the data type of a function in an expression. The argument data -types must already be resolved. */ -static -void -pars_resolve_func_data_type( -/*========================*/ - func_node_t* node) /*!< in: function node */ -{ - que_node_t* arg; - - ut_a(que_node_get_type(node) == QUE_NODE_FUNC); - - arg = node->args; - - switch (node->func) { - case PARS_SUM_TOKEN: - case '+': case '-': case '*': case '/': - /* Inherit the data type from the first argument (which must - not be the SQL null literal whose type is DATA_ERROR) */ - - dtype_copy(que_node_get_data_type(node), - que_node_get_data_type(arg)); - - ut_a(dtype_get_mtype(que_node_get_data_type(node)) - == DATA_INT); - break; - - case PARS_COUNT_TOKEN: - ut_a(arg); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - break; - - case PARS_TO_CHAR_TOKEN: - case PARS_RND_STR_TOKEN: - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT); - dtype_set(que_node_get_data_type(node), DATA_VARCHAR, - DATA_ENGLISH, 0); - break; - - case PARS_TO_BINARY_TOKEN: - if (dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT) { - dtype_set(que_node_get_data_type(node), DATA_VARCHAR, - DATA_ENGLISH, 0); - } else { - dtype_set(que_node_get_data_type(node), DATA_BINARY, - 0, 0); - } - break; - - case PARS_TO_NUMBER_TOKEN: - case PARS_BINARY_TO_NUMBER_TOKEN: - case PARS_LENGTH_TOKEN: - case PARS_INSTR_TOKEN: - ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype)); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - break; - - case PARS_SYSDATE_TOKEN: - ut_a(arg == NULL); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - break; - - case PARS_SUBSTR_TOKEN: - case PARS_CONCAT_TOKEN: - ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype)); - dtype_set(que_node_get_data_type(node), DATA_VARCHAR, - DATA_ENGLISH, 0); - break; - - case '>': case '<': case '=': - case PARS_GE_TOKEN: - case PARS_LE_TOKEN: - case PARS_NE_TOKEN: - case PARS_AND_TOKEN: - case PARS_OR_TOKEN: - case PARS_NOT_TOKEN: - case PARS_NOTFOUND_TOKEN: - - /* We currently have no iboolean type: use integer type */ - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - break; - - case PARS_RND_TOKEN: - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - break; - - case PARS_LIKE_TOKEN_EXACT: - case PARS_LIKE_TOKEN_PREFIX: - case PARS_LIKE_TOKEN_SUFFIX: - case PARS_LIKE_TOKEN_SUBSTR: - dtype_set(que_node_get_data_type(node), DATA_VARCHAR, - DATA_ENGLISH, 0); - break; - - default: - ut_error; - } -} - -/*********************************************************************//** -Resolves the meaning of variables in an expression and the data types of -functions. It is an error if some identifier cannot be resolved here. */ -static -void -pars_resolve_exp_variables_and_types( -/*=================================*/ - sel_node_t* select_node, /*!< in: select node or NULL; if - this is not NULL then the variable - sym nodes are added to the - copy_variables list of select_node */ - que_node_t* exp_node) /*!< in: expression */ -{ - func_node_t* func_node; - que_node_t* arg; - sym_node_t* sym_node; - sym_node_t* node; - - ut_a(exp_node); - - if (que_node_get_type(exp_node) == QUE_NODE_FUNC) { - func_node = static_cast<func_node_t*>(exp_node); - - arg = func_node->args; - - while (arg) { - pars_resolve_exp_variables_and_types(select_node, arg); - - arg = que_node_get_next(arg); - } - - pars_resolve_func_data_type(func_node); - - return; - } - - ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL); - - sym_node = static_cast<sym_node_t*>(exp_node); - - if (sym_node->resolved) { - - return; - } - - /* Not resolved yet: look in the symbol table for a variable - or a cursor or a function with the same name */ - - node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list); - - while (node) { - if (node->resolved - && ((node->token_type == SYM_VAR) - || (node->token_type == SYM_CURSOR) - || (node->token_type == SYM_FUNCTION)) - && node->name - && (sym_node->name_len == node->name_len) - && (ut_memcmp(sym_node->name, node->name, - node->name_len) == 0)) { - - /* Found a variable or a cursor declared with - the same name */ - - break; - } - - node = UT_LIST_GET_NEXT(sym_list, node); - } - - if (!node) { - fprintf(stderr, "PARSER ERROR: Unresolved identifier %s\n", - sym_node->name); - } - - ut_a(node); - - sym_node->resolved = TRUE; - sym_node->token_type = SYM_IMPLICIT_VAR; - sym_node->alias = node; - sym_node->indirection = node; - - if (select_node) { - UT_LIST_ADD_LAST(col_var_list, select_node->copy_variables, - sym_node); - } - - dfield_set_type(que_node_get_val(sym_node), - que_node_get_data_type(node)); -} - -/*********************************************************************//** -Resolves the meaning of variables in an expression list. It is an error if -some identifier cannot be resolved here. Resolves also the data types of -functions. */ -static -void -pars_resolve_exp_list_variables_and_types( -/*======================================*/ - sel_node_t* select_node, /*!< in: select node or NULL */ - que_node_t* exp_node) /*!< in: expression list first node, or - NULL */ -{ - while (exp_node) { - pars_resolve_exp_variables_and_types(select_node, exp_node); - - exp_node = que_node_get_next(exp_node); - } -} - -/*********************************************************************//** -Resolves the columns in an expression. */ -static -void -pars_resolve_exp_columns( -/*=====================*/ - sym_node_t* table_node, /*!< in: first node in a table list */ - que_node_t* exp_node) /*!< in: expression */ -{ - func_node_t* func_node; - que_node_t* arg; - sym_node_t* sym_node; - dict_table_t* table; - sym_node_t* t_node; - ulint n_cols; - ulint i; - - ut_a(exp_node); - - if (que_node_get_type(exp_node) == QUE_NODE_FUNC) { - func_node = static_cast<func_node_t*>(exp_node); - - arg = func_node->args; - - while (arg) { - pars_resolve_exp_columns(table_node, arg); - - arg = que_node_get_next(arg); - } - - return; - } - - ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL); - - sym_node = static_cast<sym_node_t*>(exp_node); - - if (sym_node->resolved) { - - return; - } - - /* Not resolved yet: look in the table list for a column with the - same name */ - - t_node = table_node; - - while (t_node) { - table = t_node->table; - - n_cols = dict_table_get_n_cols(table); - - for (i = 0; i < n_cols; i++) { - const dict_col_t* col - = dict_table_get_nth_col(table, i); - const char* col_name - = dict_table_get_col_name(table, i); - - if ((sym_node->name_len == ut_strlen(col_name)) - && (0 == ut_memcmp(sym_node->name, col_name, - sym_node->name_len))) { - /* Found */ - sym_node->resolved = TRUE; - sym_node->token_type = SYM_COLUMN; - sym_node->table = table; - sym_node->col_no = i; - sym_node->prefetch_buf = NULL; - - dict_col_copy_type( - col, - dfield_get_type(&sym_node - ->common.val)); - - return; - } - } - - t_node = static_cast<sym_node_t*>(que_node_get_next(t_node)); - } -} - -/*********************************************************************//** -Resolves the meaning of columns in an expression list. */ -static -void -pars_resolve_exp_list_columns( -/*==========================*/ - sym_node_t* table_node, /*!< in: first node in a table list */ - que_node_t* exp_node) /*!< in: expression list first node, or - NULL */ -{ - while (exp_node) { - pars_resolve_exp_columns(table_node, exp_node); - - exp_node = que_node_get_next(exp_node); - } -} - -/*********************************************************************//** -Retrieves the table definition for a table name id. */ -static -void -pars_retrieve_table_def( -/*====================*/ - sym_node_t* sym_node) /*!< in: table node */ -{ - ut_a(sym_node); - ut_a(que_node_get_type(sym_node) == QUE_NODE_SYMBOL); - - /* Open the table only if it is not already opened. */ - if (sym_node->token_type != SYM_TABLE_REF_COUNTED) { - - ut_a(sym_node->table == NULL); - - sym_node->resolved = TRUE; - sym_node->token_type = SYM_TABLE_REF_COUNTED; - - sym_node->table = dict_table_open_on_name( - sym_node->name, TRUE, FALSE, DICT_ERR_IGNORE_NONE); - - ut_a(sym_node->table != NULL); - } -} - -/*********************************************************************//** -Retrieves the table definitions for a list of table name ids. -@return number of tables */ -static -ulint -pars_retrieve_table_list_defs( -/*==========================*/ - sym_node_t* sym_node) /*!< in: first table node in list */ -{ - ulint count = 0; - - if (sym_node == NULL) { - - return(count); - } - - while (sym_node) { - pars_retrieve_table_def(sym_node); - - count++; - - sym_node = static_cast<sym_node_t*>( - que_node_get_next(sym_node)); - } - - return(count); -} - -/*********************************************************************//** -Adds all columns to the select list if the query is SELECT * FROM ... */ -static -void -pars_select_all_columns( -/*====================*/ - sel_node_t* select_node) /*!< in: select node already containing - the table list */ -{ - sym_node_t* col_node; - sym_node_t* table_node; - dict_table_t* table; - ulint i; - - select_node->select_list = NULL; - - table_node = select_node->table_list; - - while (table_node) { - table = table_node->table; - - for (i = 0; i < dict_table_get_n_user_cols(table); i++) { - const char* col_name = dict_table_get_col_name( - table, i); - - col_node = sym_tab_add_id(pars_sym_tab_global, - (byte*) col_name, - ut_strlen(col_name)); - - select_node->select_list = que_node_list_add_last( - select_node->select_list, col_node); - } - - table_node = static_cast<sym_node_t*>( - que_node_get_next(table_node)); - } -} - -/*********************************************************************//** -Parses a select list; creates a query graph node for the whole SELECT -statement. -@return own: select node in a query tree */ -UNIV_INTERN -sel_node_t* -pars_select_list( -/*=============*/ - que_node_t* select_list, /*!< in: select list */ - sym_node_t* into_list) /*!< in: variables list or NULL */ -{ - sel_node_t* node; - - node = sel_node_create(pars_sym_tab_global->heap); - - node->select_list = select_list; - node->into_list = into_list; - - pars_resolve_exp_list_variables_and_types(NULL, into_list); - - return(node); -} - -/*********************************************************************//** -Checks if the query is an aggregate query, in which case the selct list must -contain only aggregate function items. */ -static -void -pars_check_aggregate( -/*=================*/ - sel_node_t* select_node) /*!< in: select node already containing - the select list */ -{ - que_node_t* exp_node; - func_node_t* func_node; - ulint n_nodes = 0; - ulint n_aggregate_nodes = 0; - - exp_node = select_node->select_list; - - while (exp_node) { - - n_nodes++; - - if (que_node_get_type(exp_node) == QUE_NODE_FUNC) { - - func_node = static_cast<func_node_t*>(exp_node); - - if (func_node->fclass == PARS_FUNC_AGGREGATE) { - - n_aggregate_nodes++; - } - } - - exp_node = que_node_get_next(exp_node); - } - - if (n_aggregate_nodes > 0) { - ut_a(n_nodes == n_aggregate_nodes); - - select_node->is_aggregate = TRUE; - } else { - select_node->is_aggregate = FALSE; - } -} - -/*********************************************************************//** -Parses a select statement. -@return own: select node in a query tree */ -UNIV_INTERN -sel_node_t* -pars_select_statement( -/*==================*/ - sel_node_t* select_node, /*!< in: select node already containing - the select list */ - sym_node_t* table_list, /*!< in: table list */ - que_node_t* search_cond, /*!< in: search condition or NULL */ - pars_res_word_t* for_update, /*!< in: NULL or &pars_update_token */ - pars_res_word_t* lock_shared, /*!< in: NULL or &pars_share_token */ - order_node_t* order_by) /*!< in: NULL or an order-by node */ -{ - select_node->state = SEL_NODE_OPEN; - - select_node->table_list = table_list; - select_node->n_tables = pars_retrieve_table_list_defs(table_list); - - if (select_node->select_list == &pars_star_denoter) { - - /* SELECT * FROM ... */ - pars_select_all_columns(select_node); - } - - if (select_node->into_list) { - ut_a(que_node_list_get_len(select_node->into_list) - == que_node_list_get_len(select_node->select_list)); - } - - UT_LIST_INIT(select_node->copy_variables); - - pars_resolve_exp_list_columns(table_list, select_node->select_list); - pars_resolve_exp_list_variables_and_types(select_node, - select_node->select_list); - pars_check_aggregate(select_node); - - select_node->search_cond = search_cond; - - if (search_cond) { - pars_resolve_exp_columns(table_list, search_cond); - pars_resolve_exp_variables_and_types(select_node, search_cond); - } - - if (for_update) { - ut_a(!lock_shared); - - select_node->set_x_locks = TRUE; - select_node->row_lock_mode = LOCK_X; - - select_node->consistent_read = FALSE; - select_node->read_view = NULL; - } else if (lock_shared){ - select_node->set_x_locks = FALSE; - select_node->row_lock_mode = LOCK_S; - - select_node->consistent_read = FALSE; - select_node->read_view = NULL; - } else { - select_node->set_x_locks = FALSE; - select_node->row_lock_mode = LOCK_S; - - select_node->consistent_read = TRUE; - } - - select_node->order_by = order_by; - - if (order_by) { - pars_resolve_exp_columns(table_list, order_by->column); - } - - /* The final value of the following fields depend on the environment - where the select statement appears: */ - - select_node->can_get_updated = FALSE; - select_node->explicit_cursor = NULL; - - opt_search_plan(select_node); - - return(select_node); -} - -/*********************************************************************//** -Parses a cursor declaration. -@return sym_node */ -UNIV_INTERN -que_node_t* -pars_cursor_declaration( -/*====================*/ - sym_node_t* sym_node, /*!< in: cursor id node in the symbol - table */ - sel_node_t* select_node) /*!< in: select node */ -{ - sym_node->resolved = TRUE; - sym_node->token_type = SYM_CURSOR; - sym_node->cursor_def = select_node; - - select_node->state = SEL_NODE_CLOSED; - select_node->explicit_cursor = sym_node; - - return(sym_node); -} - -/*********************************************************************//** -Parses a function declaration. -@return sym_node */ -UNIV_INTERN -que_node_t* -pars_function_declaration( -/*======================*/ - sym_node_t* sym_node) /*!< in: function id node in the symbol - table */ -{ - sym_node->resolved = TRUE; - sym_node->token_type = SYM_FUNCTION; - - /* Check that the function exists. */ - ut_a(pars_info_lookup_user_func( - pars_sym_tab_global->info, sym_node->name)); - - return(sym_node); -} - -/*********************************************************************//** -Parses a delete or update statement start. -@return own: update node in a query tree */ -UNIV_INTERN -upd_node_t* -pars_update_statement_start( -/*========================*/ - ibool is_delete, /*!< in: TRUE if delete */ - sym_node_t* table_sym, /*!< in: table name node */ - col_assign_node_t* col_assign_list)/*!< in: column assignment list, NULL - if delete */ -{ - upd_node_t* node; - - node = upd_node_create(pars_sym_tab_global->heap); - - node->is_delete = is_delete; - - node->table_sym = table_sym; - node->col_assign_list = col_assign_list; - - return(node); -} - -/*********************************************************************//** -Parses a column assignment in an update. -@return column assignment node */ -UNIV_INTERN -col_assign_node_t* -pars_column_assignment( -/*===================*/ - sym_node_t* column, /*!< in: column to assign */ - que_node_t* exp) /*!< in: value to assign */ -{ - col_assign_node_t* node; - - node = static_cast<col_assign_node_t*>( - mem_heap_alloc(pars_sym_tab_global->heap, - sizeof(col_assign_node_t))); - node->common.type = QUE_NODE_COL_ASSIGNMENT; - - node->col = column; - node->val = exp; - - return(node); -} - -/*********************************************************************//** -Processes an update node assignment list. */ -static -void -pars_process_assign_list( -/*=====================*/ - upd_node_t* node) /*!< in: update node */ -{ - col_assign_node_t* col_assign_list; - sym_node_t* table_sym; - col_assign_node_t* assign_node; - upd_field_t* upd_field; - dict_index_t* clust_index; - sym_node_t* col_sym; - ulint changes_ord_field; - ulint changes_field_size; - ulint n_assigns; - ulint i; - - table_sym = node->table_sym; - col_assign_list = static_cast<col_assign_node_t*>( - node->col_assign_list); - clust_index = dict_table_get_first_index(node->table); - - assign_node = col_assign_list; - n_assigns = 0; - - while (assign_node) { - pars_resolve_exp_columns(table_sym, assign_node->col); - pars_resolve_exp_columns(table_sym, assign_node->val); - pars_resolve_exp_variables_and_types(NULL, assign_node->val); -#if 0 - ut_a(dtype_get_mtype( - dfield_get_type(que_node_get_val( - assign_node->col))) - == dtype_get_mtype( - dfield_get_type(que_node_get_val( - assign_node->val)))); -#endif - - /* Add to the update node all the columns found in assignment - values as columns to copy: therefore, TRUE */ - - opt_find_all_cols(TRUE, clust_index, &(node->columns), NULL, - assign_node->val); - n_assigns++; - - assign_node = static_cast<col_assign_node_t*>( - que_node_get_next(assign_node)); - } - - node->update = upd_create(n_assigns, pars_sym_tab_global->heap); - - assign_node = col_assign_list; - - changes_field_size = UPD_NODE_NO_SIZE_CHANGE; - - for (i = 0; i < n_assigns; i++) { - upd_field = upd_get_nth_field(node->update, i); - - col_sym = assign_node->col; - - upd_field_set_field_no(upd_field, dict_index_get_nth_col_pos( - clust_index, col_sym->col_no, - NULL), - clust_index, NULL); - upd_field->exp = assign_node->val; - - if (!dict_col_get_fixed_size( - dict_index_get_nth_col(clust_index, - upd_field->field_no), - dict_table_is_comp(node->table))) { - changes_field_size = 0; - } - - assign_node = static_cast<col_assign_node_t*>( - que_node_get_next(assign_node)); - } - - /* Find out if the update can modify an ordering field in any index */ - - changes_ord_field = UPD_NODE_NO_ORD_CHANGE; - - if (row_upd_changes_some_index_ord_field_binary(node->table, - node->update)) { - changes_ord_field = 0; - } - - node->cmpl_info = changes_ord_field | changes_field_size; -} - -/*********************************************************************//** -Parses an update or delete statement. -@return own: update node in a query tree */ -UNIV_INTERN -upd_node_t* -pars_update_statement( -/*==================*/ - upd_node_t* node, /*!< in: update node */ - sym_node_t* cursor_sym, /*!< in: pointer to a cursor entry in - the symbol table or NULL */ - que_node_t* search_cond) /*!< in: search condition or NULL */ -{ - sym_node_t* table_sym; - sel_node_t* sel_node; - plan_t* plan; - - table_sym = node->table_sym; - - pars_retrieve_table_def(table_sym); - node->table = table_sym->table; - - UT_LIST_INIT(node->columns); - - /* Make the single table node into a list of table nodes of length 1 */ - - que_node_list_add_last(NULL, table_sym); - - if (cursor_sym) { - pars_resolve_exp_variables_and_types(NULL, cursor_sym); - - sel_node = cursor_sym->alias->cursor_def; - - node->searched_update = FALSE; - } else { - sel_node = pars_select_list(NULL, NULL); - - pars_select_statement(sel_node, table_sym, search_cond, NULL, - &pars_share_token, NULL); - node->searched_update = TRUE; - sel_node->common.parent = node; - } - - node->select = sel_node; - - ut_a(!node->is_delete || (node->col_assign_list == NULL)); - ut_a(node->is_delete || (node->col_assign_list != NULL)); - - if (node->is_delete) { - node->cmpl_info = 0; - } else { - pars_process_assign_list(node); - } - - if (node->searched_update) { - node->has_clust_rec_x_lock = TRUE; - sel_node->set_x_locks = TRUE; - sel_node->row_lock_mode = LOCK_X; - } else { - node->has_clust_rec_x_lock = sel_node->set_x_locks; - } - - ut_a(sel_node->n_tables == 1); - ut_a(sel_node->consistent_read == FALSE); - ut_a(sel_node->order_by == NULL); - ut_a(sel_node->is_aggregate == FALSE); - - sel_node->can_get_updated = TRUE; - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - plan = sel_node_get_nth_plan(sel_node, 0); - - plan->no_prefetch = TRUE; - - if (!dict_index_is_clust(plan->index)) { - - plan->must_get_clust = TRUE; - - node->pcur = &(plan->clust_pcur); - } else { - node->pcur = &(plan->pcur); - } - - return(node); -} - -/*********************************************************************//** -Parses an insert statement. -@return own: update node in a query tree */ -UNIV_INTERN -ins_node_t* -pars_insert_statement( -/*==================*/ - sym_node_t* table_sym, /*!< in: table name node */ - que_node_t* values_list, /*!< in: value expression list or NULL */ - sel_node_t* select) /*!< in: select condition or NULL */ -{ - ins_node_t* node; - dtuple_t* row; - ulint ins_type; - - ut_a(values_list || select); - ut_a(!values_list || !select); - - if (values_list) { - ins_type = INS_VALUES; - } else { - ins_type = INS_SEARCHED; - } - - pars_retrieve_table_def(table_sym); - - node = ins_node_create(ins_type, table_sym->table, - pars_sym_tab_global->heap); - - row = dtuple_create(pars_sym_tab_global->heap, - dict_table_get_n_cols(node->table)); - - dict_table_copy_types(row, table_sym->table); - - ins_node_set_new_row(node, row); - - node->select = select; - - if (select) { - select->common.parent = node; - - ut_a(que_node_list_get_len(select->select_list) - == dict_table_get_n_user_cols(table_sym->table)); - } - - node->values_list = values_list; - - if (node->values_list) { - pars_resolve_exp_list_variables_and_types(NULL, values_list); - - ut_a(que_node_list_get_len(values_list) - == dict_table_get_n_user_cols(table_sym->table)); - } - - return(node); -} - -/*********************************************************************//** -Set the type of a dfield. */ -static -void -pars_set_dfield_type( -/*=================*/ - dfield_t* dfield, /*!< in: dfield */ - pars_res_word_t* type, /*!< in: pointer to a type - token */ - ulint len, /*!< in: length, or 0 */ - ibool is_unsigned, /*!< in: if TRUE, column is - UNSIGNED. */ - ibool is_not_null) /*!< in: if TRUE, column is - NOT NULL. */ -{ - ulint flags = 0; - - if (is_not_null) { - flags |= DATA_NOT_NULL; - } - - if (is_unsigned) { - flags |= DATA_UNSIGNED; - } - - if (type == &pars_bigint_token) { - ut_a(len == 0); - - dtype_set(dfield_get_type(dfield), DATA_INT, flags, 8); - } else if (type == &pars_int_token) { - ut_a(len == 0); - - dtype_set(dfield_get_type(dfield), DATA_INT, flags, 4); - - } else if (type == &pars_char_token) { - //ut_a(len == 0); - - dtype_set(dfield_get_type(dfield), DATA_VARCHAR, - DATA_ENGLISH | flags, len); - } else if (type == &pars_binary_token) { - ut_a(len != 0); - - dtype_set(dfield_get_type(dfield), DATA_FIXBINARY, - DATA_BINARY_TYPE | flags, len); - } else if (type == &pars_blob_token) { - ut_a(len == 0); - - dtype_set(dfield_get_type(dfield), DATA_BLOB, - DATA_BINARY_TYPE | flags, 0); - } else { - ut_error; - } -} - -/*********************************************************************//** -Parses a variable declaration. -@return own: symbol table node of type SYM_VAR */ -UNIV_INTERN -sym_node_t* -pars_variable_declaration( -/*======================*/ - sym_node_t* node, /*!< in: symbol table node allocated for the - id of the variable */ - pars_res_word_t* type) /*!< in: pointer to a type token */ -{ - node->resolved = TRUE; - node->token_type = SYM_VAR; - - node->param_type = PARS_NOT_PARAM; - - pars_set_dfield_type(que_node_get_val(node), type, 0, FALSE, FALSE); - - return(node); -} - -/*********************************************************************//** -Parses a procedure parameter declaration. -@return own: symbol table node of type SYM_VAR */ -UNIV_INTERN -sym_node_t* -pars_parameter_declaration( -/*=======================*/ - sym_node_t* node, /*!< in: symbol table node allocated for the - id of the parameter */ - ulint param_type, - /*!< in: PARS_INPUT or PARS_OUTPUT */ - pars_res_word_t* type) /*!< in: pointer to a type token */ -{ - ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT)); - - pars_variable_declaration(node, type); - - node->param_type = param_type; - - return(node); -} - -/*********************************************************************//** -Sets the parent field in a query node list. */ -static -void -pars_set_parent_in_list( -/*====================*/ - que_node_t* node_list, /*!< in: first node in a list */ - que_node_t* parent) /*!< in: parent value to set in all - nodes of the list */ -{ - que_common_t* common; - - common = static_cast<que_common_t*>(node_list); - - while (common) { - common->parent = parent; - - common = static_cast<que_common_t*>(que_node_get_next(common)); - } -} - -/*********************************************************************//** -Parses an elsif element. -@return elsif node */ -UNIV_INTERN -elsif_node_t* -pars_elsif_element( -/*===============*/ - que_node_t* cond, /*!< in: if-condition */ - que_node_t* stat_list) /*!< in: statement list */ -{ - elsif_node_t* node; - - node = static_cast<elsif_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(elsif_node_t))); - - node->common.type = QUE_NODE_ELSIF; - - node->cond = cond; - - pars_resolve_exp_variables_and_types(NULL, cond); - - node->stat_list = stat_list; - - return(node); -} - -/*********************************************************************//** -Parses an if-statement. -@return if-statement node */ -UNIV_INTERN -if_node_t* -pars_if_statement( -/*==============*/ - que_node_t* cond, /*!< in: if-condition */ - que_node_t* stat_list, /*!< in: statement list */ - que_node_t* else_part) /*!< in: else-part statement list - or elsif element list */ -{ - if_node_t* node; - elsif_node_t* elsif_node; - - node = static_cast<if_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(if_node_t))); - - node->common.type = QUE_NODE_IF; - - node->cond = cond; - - pars_resolve_exp_variables_and_types(NULL, cond); - - node->stat_list = stat_list; - - if (else_part && (que_node_get_type(else_part) == QUE_NODE_ELSIF)) { - - /* There is a list of elsif conditions */ - - node->else_part = NULL; - node->elsif_list = static_cast<elsif_node_t*>(else_part); - - elsif_node = static_cast<elsif_node_t*>(else_part); - - while (elsif_node) { - pars_set_parent_in_list(elsif_node->stat_list, node); - - elsif_node = static_cast<elsif_node_t*>( - que_node_get_next(elsif_node)); - } - } else { - node->else_part = else_part; - node->elsif_list = NULL; - - pars_set_parent_in_list(else_part, node); - } - - pars_set_parent_in_list(stat_list, node); - - return(node); -} - -/*********************************************************************//** -Parses a while-statement. -@return while-statement node */ -UNIV_INTERN -while_node_t* -pars_while_statement( -/*=================*/ - que_node_t* cond, /*!< in: while-condition */ - que_node_t* stat_list) /*!< in: statement list */ -{ - while_node_t* node; - - node = static_cast<while_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(while_node_t))); - - node->common.type = QUE_NODE_WHILE; - - node->cond = cond; - - pars_resolve_exp_variables_and_types(NULL, cond); - - node->stat_list = stat_list; - - pars_set_parent_in_list(stat_list, node); - - return(node); -} - -/*********************************************************************//** -Parses a for-loop-statement. -@return for-statement node */ -UNIV_INTERN -for_node_t* -pars_for_statement( -/*===============*/ - sym_node_t* loop_var, /*!< in: loop variable */ - que_node_t* loop_start_limit,/*!< in: loop start expression */ - que_node_t* loop_end_limit, /*!< in: loop end expression */ - que_node_t* stat_list) /*!< in: statement list */ -{ - for_node_t* node; - - node = static_cast<for_node_t*>( - mem_heap_alloc(pars_sym_tab_global->heap, sizeof(for_node_t))); - - node->common.type = QUE_NODE_FOR; - - pars_resolve_exp_variables_and_types(NULL, loop_var); - pars_resolve_exp_variables_and_types(NULL, loop_start_limit); - pars_resolve_exp_variables_and_types(NULL, loop_end_limit); - - node->loop_var = loop_var->indirection; - - ut_a(loop_var->indirection); - - node->loop_start_limit = loop_start_limit; - node->loop_end_limit = loop_end_limit; - - node->stat_list = stat_list; - - pars_set_parent_in_list(stat_list, node); - - return(node); -} - -/*********************************************************************//** -Parses an exit statement. -@return exit statement node */ -UNIV_INTERN -exit_node_t* -pars_exit_statement(void) -/*=====================*/ -{ - exit_node_t* node; - - node = static_cast<exit_node_t*>( - mem_heap_alloc(pars_sym_tab_global->heap, sizeof(exit_node_t))); - node->common.type = QUE_NODE_EXIT; - - return(node); -} - -/*********************************************************************//** -Parses a return-statement. -@return return-statement node */ -UNIV_INTERN -return_node_t* -pars_return_statement(void) -/*=======================*/ -{ - return_node_t* node; - - node = static_cast<return_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(return_node_t))); - node->common.type = QUE_NODE_RETURN; - - return(node); -} - -/*********************************************************************//** -Parses an assignment statement. -@return assignment statement node */ -UNIV_INTERN -assign_node_t* -pars_assignment_statement( -/*======================*/ - sym_node_t* var, /*!< in: variable to assign */ - que_node_t* val) /*!< in: value to assign */ -{ - assign_node_t* node; - - node = static_cast<assign_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(assign_node_t))); - node->common.type = QUE_NODE_ASSIGNMENT; - - node->var = var; - node->val = val; - - pars_resolve_exp_variables_and_types(NULL, var); - pars_resolve_exp_variables_and_types(NULL, val); - - ut_a(dtype_get_mtype(dfield_get_type(que_node_get_val(var))) - == dtype_get_mtype(dfield_get_type(que_node_get_val(val)))); - - return(node); -} - -/*********************************************************************//** -Parses a procedure call. -@return function node */ -UNIV_INTERN -func_node_t* -pars_procedure_call( -/*================*/ - que_node_t* res_word,/*!< in: procedure name reserved word */ - que_node_t* args) /*!< in: argument list */ -{ - func_node_t* node; - - node = pars_func(res_word, args); - - pars_resolve_exp_list_variables_and_types(NULL, args); - - return(node); -} - -/*********************************************************************//** -Parses a fetch statement. into_list or user_func (but not both) must be -non-NULL. -@return fetch statement node */ -UNIV_INTERN -fetch_node_t* -pars_fetch_statement( -/*=================*/ - sym_node_t* cursor, /*!< in: cursor node */ - sym_node_t* into_list, /*!< in: variables to set, or NULL */ - sym_node_t* user_func) /*!< in: user function name, or NULL */ -{ - sym_node_t* cursor_decl; - fetch_node_t* node; - - /* Logical XOR. */ - ut_a(!into_list != !user_func); - - node = static_cast<fetch_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(fetch_node_t))); - - node->common.type = QUE_NODE_FETCH; - - pars_resolve_exp_variables_and_types(NULL, cursor); - - if (into_list) { - pars_resolve_exp_list_variables_and_types(NULL, into_list); - node->into_list = into_list; - node->func = NULL; - } else { - pars_resolve_exp_variables_and_types(NULL, user_func); - - node->func = pars_info_lookup_user_func( - pars_sym_tab_global->info, user_func->name); - - ut_a(node->func); - - node->into_list = NULL; - } - - cursor_decl = cursor->alias; - - ut_a(cursor_decl->token_type == SYM_CURSOR); - - node->cursor_def = cursor_decl->cursor_def; - - if (into_list) { - ut_a(que_node_list_get_len(into_list) - == que_node_list_get_len(node->cursor_def->select_list)); - } - - return(node); -} - -/*********************************************************************//** -Parses an open or close cursor statement. -@return fetch statement node */ -UNIV_INTERN -open_node_t* -pars_open_statement( -/*================*/ - ulint type, /*!< in: ROW_SEL_OPEN_CURSOR - or ROW_SEL_CLOSE_CURSOR */ - sym_node_t* cursor) /*!< in: cursor node */ -{ - sym_node_t* cursor_decl; - open_node_t* node; - - node = static_cast<open_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(open_node_t))); - - node->common.type = QUE_NODE_OPEN; - - pars_resolve_exp_variables_and_types(NULL, cursor); - - cursor_decl = cursor->alias; - - ut_a(cursor_decl->token_type == SYM_CURSOR); - - node->op_type = static_cast<open_node_op>(type); - node->cursor_def = cursor_decl->cursor_def; - - return(node); -} - -/*********************************************************************//** -Parses a row_printf-statement. -@return row_printf-statement node */ -UNIV_INTERN -row_printf_node_t* -pars_row_printf_statement( -/*======================*/ - sel_node_t* sel_node) /*!< in: select node */ -{ - row_printf_node_t* node; - - node = static_cast<row_printf_node_t*>( - mem_heap_alloc( - pars_sym_tab_global->heap, sizeof(row_printf_node_t))); - node->common.type = QUE_NODE_ROW_PRINTF; - - node->sel_node = sel_node; - - sel_node->common.parent = node; - - return(node); -} - -/*********************************************************************//** -Parses a commit statement. -@return own: commit node struct */ -UNIV_INTERN -commit_node_t* -pars_commit_statement(void) -/*=======================*/ -{ - return(trx_commit_node_create(pars_sym_tab_global->heap)); -} - -/*********************************************************************//** -Parses a rollback statement. -@return own: rollback node struct */ -UNIV_INTERN -roll_node_t* -pars_rollback_statement(void) -/*=========================*/ -{ - return(roll_node_create(pars_sym_tab_global->heap)); -} - -/*********************************************************************//** -Parses a column definition at a table creation. -@return column sym table node */ -UNIV_INTERN -sym_node_t* -pars_column_def( -/*============*/ - sym_node_t* sym_node, /*!< in: column node in the - symbol table */ - pars_res_word_t* type, /*!< in: data type */ - sym_node_t* len, /*!< in: length of column, or - NULL */ - void* is_unsigned, /*!< in: if not NULL, column - is of type UNSIGNED. */ - void* is_not_null) /*!< in: if not NULL, column - is of type NOT NULL. */ -{ - ulint len2; - - if (len) { - len2 = eval_node_get_int_val(len); - } else { - len2 = 0; - } - - pars_set_dfield_type(que_node_get_val(sym_node), type, len2, - is_unsigned != NULL, is_not_null != NULL); - - return(sym_node); -} - -/*********************************************************************//** -Parses a table creation operation. -@return table create subgraph */ -UNIV_INTERN -tab_node_t* -pars_create_table( -/*==============*/ - sym_node_t* table_sym, /*!< in: table name node in the symbol - table */ - sym_node_t* column_defs, /*!< in: list of column names */ - sym_node_t* compact, /* in: non-NULL if COMPACT table. */ - sym_node_t* block_size, /* in: block size (can be NULL) */ - void* not_fit_in_memory MY_ATTRIBUTE((unused))) - /*!< in: a non-NULL pointer means that - this is a table which in simulations - should be simulated as not fitting - in memory; thread is put to sleep - to simulate disk accesses; NOTE that - this flag is not stored to the data - dictionary on disk, and the database - will forget about non-NULL value if - it has to reload the table definition - from disk */ -{ - dict_table_t* table; - sym_node_t* column; - tab_node_t* node; - const dtype_t* dtype; - ulint n_cols; - ulint flags = 0; - ulint flags2 = 0; - - if (compact != NULL) { - - /* System tables currently only use the REDUNDANT row - format therefore the check for srv_file_per_table should be - safe for now. */ - - flags |= DICT_TF_COMPACT; - - /* FIXME: Ideally this should be part of the SQL syntax - or use some other mechanism. We want to reduce dependency - on global variables. There is an inherent race here but - that has always existed around this variable. */ - if (srv_file_per_table) { - flags2 |= DICT_TF2_USE_TABLESPACE; - } - } - - if (block_size != NULL) { - ulint size; - dfield_t* dfield; - - dfield = que_node_get_val(block_size); - - ut_a(dfield_get_len(dfield) == 4); - size = mach_read_from_4(static_cast<byte*>( - dfield_get_data(dfield))); - - - switch (size) { - case 0: - break; - - case 1: case 2: case 4: case 8: case 16: - flags |= DICT_TF_COMPACT; - /* FTS-FIXME: needs the zip changes */ - /* flags |= size << DICT_TF_COMPRESSED_SHIFT; */ - break; - - default: - ut_error; - } - } - - /* Set the flags2 when create table or alter tables */ - flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; - DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", - flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); - - - n_cols = que_node_list_get_len(column_defs); - - table = dict_mem_table_create( - table_sym->name, 0, n_cols, flags, flags2); - -#ifdef UNIV_DEBUG - if (not_fit_in_memory != NULL) { - table->does_not_fit_in_memory = TRUE; - } -#endif /* UNIV_DEBUG */ - column = column_defs; - - while (column) { - dtype = dfield_get_type(que_node_get_val(column)); - - dict_mem_table_add_col(table, table->heap, - column->name, dtype->mtype, - dtype->prtype, dtype->len); - column->resolved = TRUE; - column->token_type = SYM_COLUMN; - - column = static_cast<sym_node_t*>(que_node_get_next(column)); - } - - node = tab_create_graph_create(table, pars_sym_tab_global->heap, true, - FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY); - - table_sym->resolved = TRUE; - table_sym->token_type = SYM_TABLE; - - return(node); -} - -/*********************************************************************//** -Parses an index creation operation. -@return index create subgraph */ -UNIV_INTERN -ind_node_t* -pars_create_index( -/*==============*/ - pars_res_word_t* unique_def, /*!< in: not NULL if a unique index */ - pars_res_word_t* clustered_def, /*!< in: not NULL if a clustered index */ - sym_node_t* index_sym, /*!< in: index name node in the symbol - table */ - sym_node_t* table_sym, /*!< in: table name node in the symbol - table */ - sym_node_t* column_list) /*!< in: list of column names */ -{ - dict_index_t* index; - sym_node_t* column; - ind_node_t* node; - ulint n_fields; - ulint ind_type; - - n_fields = que_node_list_get_len(column_list); - - ind_type = 0; - - if (unique_def) { - ind_type = ind_type | DICT_UNIQUE; - } - - if (clustered_def) { - ind_type = ind_type | DICT_CLUSTERED; - } - - index = dict_mem_index_create(table_sym->name, index_sym->name, 0, - ind_type, n_fields); - column = column_list; - - while (column) { - dict_mem_index_add_field(index, column->name, 0); - - column->resolved = TRUE; - column->token_type = SYM_COLUMN; - - column = static_cast<sym_node_t*>(que_node_get_next(column)); - } - - node = ind_create_graph_create(index, pars_sym_tab_global->heap, true); - - table_sym->resolved = TRUE; - table_sym->token_type = SYM_TABLE; - - index_sym->resolved = TRUE; - index_sym->token_type = SYM_TABLE; - - return(node); -} - -/*********************************************************************//** -Parses a procedure definition. -@return query fork node */ -UNIV_INTERN -que_fork_t* -pars_procedure_definition( -/*======================*/ - sym_node_t* sym_node, /*!< in: procedure id node in the symbol - table */ - sym_node_t* param_list, /*!< in: parameter declaration list */ - que_node_t* stat_list) /*!< in: statement list */ -{ - proc_node_t* node; - que_fork_t* fork; - que_thr_t* thr; - mem_heap_t* heap; - - heap = pars_sym_tab_global->heap; - - fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE, heap); - fork->trx = NULL; - - thr = que_thr_create(fork, heap); - - node = static_cast<proc_node_t*>( - mem_heap_alloc(heap, sizeof(proc_node_t))); - - node->common.type = QUE_NODE_PROC; - node->common.parent = thr; - - sym_node->token_type = SYM_PROCEDURE_NAME; - sym_node->resolved = TRUE; - - node->proc_id = sym_node; - node->param_list = param_list; - node->stat_list = stat_list; - - pars_set_parent_in_list(stat_list, node); - - node->sym_tab = pars_sym_tab_global; - - thr->child = node; - - pars_sym_tab_global->query_graph = fork; - - return(fork); -} - -/*************************************************************//** -Parses a stored procedure call, when this is not within another stored -procedure, that is, the client issues a procedure call directly. -In MySQL/InnoDB, stored InnoDB procedures are invoked via the -parsed procedure tree, not via InnoDB SQL, so this function is not used. -@return query graph */ -UNIV_INTERN -que_fork_t* -pars_stored_procedure_call( -/*=======================*/ - sym_node_t* sym_node MY_ATTRIBUTE((unused))) - /*!< in: stored procedure name */ -{ - ut_error; - return(NULL); -} - -/*************************************************************//** -Retrieves characters to the lexical analyzer. */ -UNIV_INTERN -int -pars_get_lex_chars( -/*===============*/ - char* buf, /*!< in/out: buffer where to copy */ - int max_size) /*!< in: maximum number of characters which fit - in the buffer */ -{ - int len; - - len = static_cast<int>( - pars_sym_tab_global->string_len - - pars_sym_tab_global->next_char_pos); - if (len == 0) { -#ifdef YYDEBUG - /* fputs("SQL string ends\n", stderr); */ -#endif - return(0); - } - - if (len > max_size) { - len = max_size; - } - -#ifdef UNIV_SQL_DEBUG - if (pars_print_lexed) { - - if (len >= 5) { - len = 5; - } - - fwrite(pars_sym_tab_global->sql_string - + pars_sym_tab_global->next_char_pos, - 1, len, stderr); - } -#endif /* UNIV_SQL_DEBUG */ - - ut_memcpy(buf, pars_sym_tab_global->sql_string - + pars_sym_tab_global->next_char_pos, len); - - pars_sym_tab_global->next_char_pos += len; - - return(len); -} - -/*************************************************************//** -Called by yyparse on error. */ -UNIV_INTERN -void -yyerror( -/*====*/ - const char* s MY_ATTRIBUTE((unused))) - /*!< in: error message string */ -{ - ut_ad(s); - - fputs("PARSER ERROR: Syntax error in SQL string\n", stderr); - - ut_error; -} - -/*************************************************************//** -Parses an SQL string returning the query graph. -@return own: the query graph */ -UNIV_INTERN -que_t* -pars_sql( -/*=====*/ - pars_info_t* info, /*!< in: extra information, or NULL */ - const char* str) /*!< in: SQL string */ -{ - sym_node_t* sym_node; - mem_heap_t* heap; - que_t* graph; - - ut_ad(str); - - heap = mem_heap_create(16000); - - /* Currently, the parser is not reentrant: */ - ut_ad(mutex_own(&(dict_sys->mutex))); - - pars_sym_tab_global = sym_tab_create(heap); - - pars_sym_tab_global->string_len = strlen(str); - pars_sym_tab_global->sql_string = static_cast<char*>( - mem_heap_dup(heap, str, pars_sym_tab_global->string_len + 1)); - pars_sym_tab_global->next_char_pos = 0; - pars_sym_tab_global->info = info; - - yyparse(); - - sym_node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list); - - while (sym_node) { - ut_a(sym_node->resolved); - - sym_node = UT_LIST_GET_NEXT(sym_list, sym_node); - } - - graph = pars_sym_tab_global->query_graph; - - graph->sym_tab = pars_sym_tab_global; - graph->info = info; - - pars_sym_tab_global = NULL; - - /* fprintf(stderr, "SQL graph size %lu\n", mem_heap_get_size(heap)); */ - - return(graph); -} - -/******************************************************************//** -Completes a query graph by adding query thread and fork nodes -above it and prepares the graph for running. The fork created is of -type QUE_FORK_MYSQL_INTERFACE. -@return query thread node to run */ -UNIV_INTERN -que_thr_t* -pars_complete_graph_for_exec( -/*=========================*/ - que_node_t* node, /*!< in: root node for an incomplete - query graph, or NULL for dummy graph */ - trx_t* trx, /*!< in: transaction handle */ - mem_heap_t* heap) /*!< in: memory heap from which allocated */ -{ - que_fork_t* fork; - que_thr_t* thr; - - fork = que_fork_create(NULL, NULL, QUE_FORK_MYSQL_INTERFACE, heap); - fork->trx = trx; - - thr = que_thr_create(fork, heap); - - thr->child = node; - - if (node) { - que_node_set_parent(node, thr); - } - - trx->graph = NULL; - - return(thr); -} - -/****************************************************************//** -Create parser info struct. -@return own: info struct */ -UNIV_INTERN -pars_info_t* -pars_info_create(void) -/*==================*/ -{ - pars_info_t* info; - mem_heap_t* heap; - - heap = mem_heap_create(512); - - info = static_cast<pars_info_t*>(mem_heap_alloc(heap, sizeof(*info))); - - info->heap = heap; - info->funcs = NULL; - info->bound_lits = NULL; - info->bound_ids = NULL; - info->graph_owns_us = TRUE; - - return(info); -} - -/****************************************************************//** -Free info struct and everything it contains. */ -UNIV_INTERN -void -pars_info_free( -/*===========*/ - pars_info_t* info) /*!< in, own: info struct */ -{ - mem_heap_free(info->heap); -} - -/****************************************************************//** -Add bound literal. */ -UNIV_INTERN -void -pars_info_add_literal( -/*==================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const void* address, /*!< in: address */ - ulint length, /*!< in: length of data */ - ulint type, /*!< in: type, e.g. DATA_FIXBINARY */ - ulint prtype) /*!< in: precise type, e.g. - DATA_UNSIGNED */ -{ - pars_bound_lit_t* pbl; - - ut_ad(!pars_info_get_bound_lit(info, name)); - - pbl = static_cast<pars_bound_lit_t*>( - mem_heap_alloc(info->heap, sizeof(*pbl))); - - pbl->name = name; - - pbl->address = address; - pbl->length = length; - pbl->type = type; - pbl->prtype = prtype; - - if (!info->bound_lits) { - ib_alloc_t* heap_alloc; - - heap_alloc = ib_heap_allocator_create(info->heap); - - info->bound_lits = ib_vector_create(heap_alloc, sizeof(*pbl), 8); - } - - ib_vector_push(info->bound_lits, pbl); -} - -/****************************************************************//** -Equivalent to pars_info_add_literal(info, name, str, strlen(str), -DATA_VARCHAR, DATA_ENGLISH). */ -UNIV_INTERN -void -pars_info_add_str_literal( -/*======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const char* str) /*!< in: string */ -{ - pars_info_add_literal(info, name, str, strlen(str), - DATA_VARCHAR, DATA_ENGLISH); -} - -/******************************************************************** -If the literal value already exists then it rebinds otherwise it -creates a new entry.*/ -UNIV_INTERN -void -pars_info_bind_literal( -/*===================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const void* address, /* in: address */ - ulint length, /* in: length of data */ - ulint type, /* in: type, e.g. DATA_FIXBINARY */ - ulint prtype) /* in: precise type, e.g. */ -{ - pars_bound_lit_t* pbl; - - pbl = pars_info_lookup_bound_lit(info, name); - - if (!pbl) { - pars_info_add_literal( - info, name, address, length, type, prtype); - } else { - pbl->address = address; - pbl->length = length; - - sym_tab_rebind_lit(pbl->node, address, length); - } -} - -/******************************************************************** -If the literal value already exists then it rebinds otherwise it -creates a new entry.*/ -UNIV_INTERN -void -pars_info_bind_varchar_literal( -/*===========================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const byte* str, /*!< in: string */ - ulint str_len) /*!< in: string length */ -{ - pars_bound_lit_t* pbl; - - pbl = pars_info_lookup_bound_lit(info, name); - - if (!pbl) { - pars_info_add_literal( - info, name, str, str_len, DATA_VARCHAR, DATA_ENGLISH); - } else { - - pbl->address = str; - pbl->length = str_len; - - sym_tab_rebind_lit(pbl->node, str, str_len); - } -} - -/****************************************************************//** -Equivalent to: - -char buf[4]; -mach_write_to_4(buf, val); -pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ -UNIV_INTERN -void -pars_info_add_int4_literal( -/*=======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - lint val) /*!< in: value */ -{ - byte* buf = static_cast<byte*>(mem_heap_alloc(info->heap, 4)); - - mach_write_to_4(buf, val); - pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); -} - -/******************************************************************** -If the literal value already exists then it rebinds otherwise it -creates a new entry. */ -UNIV_INTERN -void -pars_info_bind_int4_literal( -/*========================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const ib_uint32_t* val) /* in: value */ -{ - pars_bound_lit_t* pbl; - - pbl = pars_info_lookup_bound_lit(info, name); - - if (!pbl) { - pars_info_add_literal(info, name, val, 4, DATA_INT, 0); - } else { - - pbl->address = val; - pbl->length = sizeof(*val); - - sym_tab_rebind_lit(pbl->node, val, sizeof(*val)); - } -} - -/******************************************************************** -If the literal value already exists then it rebinds otherwise it -creates a new entry. */ -UNIV_INTERN -void -pars_info_bind_int8_literal( -/*========================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const ib_uint64_t* val) /* in: value */ -{ - pars_bound_lit_t* pbl; - - pbl = pars_info_lookup_bound_lit(info, name); - - if (!pbl) { - pars_info_add_literal( - info, name, val, sizeof(*val), DATA_INT, 0); - } else { - - pbl->address = val; - pbl->length = sizeof(*val); - - sym_tab_rebind_lit(pbl->node, val, sizeof(*val)); - } -} - -/****************************************************************//** -Equivalent to: - -char buf[8]; -mach_write_to_8(buf, val); -pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ -UNIV_INTERN -void -pars_info_add_ull_literal( -/*======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - ib_uint64_t val) /*!< in: value */ -{ - byte* buf = static_cast<byte*>(mem_heap_alloc(info->heap, 8)); - - mach_write_to_8(buf, val); - - pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0); -} - -/****************************************************************//** -If the literal value already exists then it rebinds otherwise it -creates a new entry. */ -UNIV_INTERN -void -pars_info_bind_ull_literal( -/*=======================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: name */ - const ib_uint64_t* val) /*!< in: value */ -{ - pars_bound_lit_t* pbl; - - pbl = pars_info_lookup_bound_lit(info, name); - - if (!pbl) { - pars_info_add_literal( - info, name, val, sizeof(*val), DATA_FIXBINARY, 0); - } else { - - pbl->address = val; - pbl->length = sizeof(*val); - - sym_tab_rebind_lit(pbl->node, val, sizeof(*val)); - } -} - -/****************************************************************//** -Add user function. */ -UNIV_INTERN -void -pars_info_bind_function( -/*====================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: function name */ - pars_user_func_cb_t func, /*!< in: function address */ - void* arg) /*!< in: user-supplied argument */ -{ - pars_user_func_t* puf; - - puf = pars_info_lookup_user_func(info, name); - - if (!puf) { - if (!info->funcs) { - ib_alloc_t* heap_alloc; - - heap_alloc = ib_heap_allocator_create(info->heap); - - info->funcs = ib_vector_create( - heap_alloc, sizeof(*puf), 8); - } - - /* Create a "new" element */ - puf = static_cast<pars_user_func_t*>( - ib_vector_push(info->funcs, NULL)); - puf->name = name; - } - - puf->arg = arg; - puf->func = func; -} - -/******************************************************************** -Add bound id. */ -UNIV_INTERN -void -pars_info_bind_id( -/*==============*/ - pars_info_t* info, /*!< in: info struct */ - ibool copy_name, /* in: copy name if TRUE */ - const char* name, /*!< in: name */ - const char* id) /*!< in: id */ -{ - pars_bound_id_t* bid; - - bid = pars_info_lookup_bound_id(info, name); - - if (!bid) { - - if (!info->bound_ids) { - ib_alloc_t* heap_alloc; - - heap_alloc = ib_heap_allocator_create(info->heap); - - info->bound_ids = ib_vector_create( - heap_alloc, sizeof(*bid), 8); - } - - /* Create a "new" element */ - bid = static_cast<pars_bound_id_t*>( - ib_vector_push(info->bound_ids, NULL)); - - bid->name = (copy_name) - ? mem_heap_strdup(info->heap, name) : name; - } - - bid->id = id; -} - -/******************************************************************** -Get bound identifier with the given name.*/ - -pars_bound_id_t* -pars_info_get_bound_id( -/*===================*/ - /* out: bound id, or NULL if not - found */ - pars_info_t* info, /* in: info struct */ - const char* name) /* in: bound id name to find */ -{ - return(pars_info_lookup_bound_id(info, name)); -} - -/****************************************************************//** -Get bound literal with the given name. -@return bound literal, or NULL if not found */ -UNIV_INTERN -pars_bound_lit_t* -pars_info_get_bound_lit( -/*====================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name) /*!< in: bound literal name to find */ -{ - return(pars_info_lookup_bound_lit(info, name)); -} diff --git a/storage/xtradb/pars/pars0sym.cc b/storage/xtradb/pars/pars0sym.cc deleted file mode 100644 index b01a69cb33a..00000000000 --- a/storage/xtradb/pars/pars0sym.cc +++ /dev/null @@ -1,440 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file pars/pars0sym.cc -SQL parser symbol table - -Created 12/15/1997 Heikki Tuuri -*******************************************************/ - -#include "pars0sym.h" - -#ifdef UNIV_NONINL -#include "pars0sym.ic" -#endif - -#include "mem0mem.h" -#include "data0type.h" -#include "data0data.h" -#include "pars0grm.h" -#include "pars0pars.h" -#include "que0que.h" -#include "eval0eval.h" -#include "row0sel.h" - -/******************************************************************//** -Creates a symbol table for a single stored procedure or query. -@return own: symbol table */ -UNIV_INTERN -sym_tab_t* -sym_tab_create( -/*===========*/ - mem_heap_t* heap) /*!< in: memory heap where to create */ -{ - sym_tab_t* sym_tab; - - sym_tab = static_cast<sym_tab_t*>( - mem_heap_alloc(heap, sizeof(sym_tab_t))); - - UT_LIST_INIT(sym_tab->sym_list); - UT_LIST_INIT(sym_tab->func_node_list); - - sym_tab->heap = heap; - - return(sym_tab); -} - - -/******************************************************************//** -Frees the memory allocated dynamically AFTER parsing phase for variables -etc. in the symbol table. Does not free the mem heap where the table was -originally created. Frees also SQL explicit cursor definitions. */ -UNIV_INTERN -void -sym_tab_free_private( -/*=================*/ - sym_tab_t* sym_tab) /*!< in, own: symbol table */ -{ - sym_node_t* sym; - func_node_t* func; - - ut_ad(mutex_own(&dict_sys->mutex)); - - for (sym = UT_LIST_GET_FIRST(sym_tab->sym_list); - sym != NULL; - sym = UT_LIST_GET_NEXT(sym_list, sym)) { - - /* Close the tables opened in pars_retrieve_table_def(). */ - - if (sym->token_type == SYM_TABLE_REF_COUNTED) { - - dict_table_close(sym->table, TRUE, FALSE); - - sym->table = NULL; - sym->resolved = FALSE; - sym->token_type = SYM_UNSET; - } - - eval_node_free_val_buf(sym); - - if (sym->prefetch_buf) { - sel_col_prefetch_buf_free(sym->prefetch_buf); - } - - if (sym->cursor_def) { - que_graph_free_recursive(sym->cursor_def); - } - } - - for (func = UT_LIST_GET_FIRST(sym_tab->func_node_list); - func != NULL; - func = UT_LIST_GET_NEXT(func_node_list, func)) { - - eval_node_free_val_buf(func); - } -} - -/******************************************************************//** -Adds an integer literal to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_int_lit( -/*================*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - ulint val) /*!< in: integer value */ -{ - sym_node_t* node; - byte* data; - - node = static_cast<sym_node_t*>( - mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t))); - - node->common.type = QUE_NODE_SYMBOL; - - node->table = NULL; - node->resolved = TRUE; - node->token_type = SYM_LIT; - - node->indirection = NULL; - - dtype_set(dfield_get_type(&node->common.val), DATA_INT, 0, 4); - - data = static_cast<byte*>(mem_heap_alloc(sym_tab->heap, 4)); - mach_write_to_4(data, val); - - dfield_set_data(&(node->common.val), data, 4); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - node->like_node = NULL; - - node->sym_table = sym_tab; - - return(node); -} - -/******************************************************************//** -Adds a string literal to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_str_lit( -/*================*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - const byte* str, /*!< in: string with no quotes around - it */ - ulint len) /*!< in: string length */ -{ - sym_node_t* node; - byte* data; - - node = static_cast<sym_node_t*>( - mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t))); - - node->common.type = QUE_NODE_SYMBOL; - - node->table = NULL; - node->resolved = TRUE; - node->token_type = SYM_LIT; - - node->indirection = NULL; - - dtype_set(dfield_get_type(&node->common.val), - DATA_VARCHAR, DATA_ENGLISH, 0); - - data = (len) ? static_cast<byte*>(mem_heap_dup(sym_tab->heap, str, len)) - : NULL; - - dfield_set_data(&(node->common.val), data, len); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - node->like_node = NULL; - - node->sym_table = sym_tab; - - return(node); -} - -/******************************************************************//** -Add a bound literal to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_bound_lit( -/*==================*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - const char* name, /*!< in: name of bound literal */ - ulint* lit_type) /*!< out: type of literal (PARS_*_LIT) */ -{ - sym_node_t* node; - pars_bound_lit_t* blit; - ulint len = 0; - - blit = pars_info_get_bound_lit(sym_tab->info, name); - ut_a(blit); - - node = static_cast<sym_node_t*>( - mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t))); - - node->common.type = QUE_NODE_SYMBOL; - node->common.brother = node->common.parent = NULL; - - node->table = NULL; - node->resolved = TRUE; - node->token_type = SYM_LIT; - - node->indirection = NULL; - - switch (blit->type) { - case DATA_FIXBINARY: - len = blit->length; - *lit_type = PARS_FIXBINARY_LIT; - break; - - case DATA_BLOB: - *lit_type = PARS_BLOB_LIT; - break; - - case DATA_VARCHAR: - *lit_type = PARS_STR_LIT; - break; - - case DATA_CHAR: - ut_a(blit->length > 0); - - len = blit->length; - *lit_type = PARS_STR_LIT; - break; - - case DATA_INT: - ut_a(blit->length > 0); - ut_a(blit->length <= 8); - - len = blit->length; - *lit_type = PARS_INT_LIT; - break; - - default: - ut_error; - } - - dtype_set(dfield_get_type(&node->common.val), - blit->type, blit->prtype, len); - - dfield_set_data(&(node->common.val), blit->address, blit->length); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - blit->node = node; - node->like_node = NULL; - node->sym_table = sym_tab; - - return(node); -} - -/********************************************************************** -Rebind literal to a node in the symbol table. */ - -sym_node_t* -sym_tab_rebind_lit( -/*===============*/ - /* out: symbol table node */ - sym_node_t* node, /* in: node that is bound to literal*/ - const void* address, /* in: pointer to data */ - ulint length) /* in: length of data */ -{ - dfield_t* dfield = que_node_get_val(node); - dtype_t* dtype = dfield_get_type(dfield); - - ut_a(node->token_type == SYM_LIT); - - dfield_set_data(&node->common.val, address, length); - - if (node->like_node) { - - ut_a(dtype_get_mtype(dtype) == DATA_CHAR - || dtype_get_mtype(dtype) == DATA_VARCHAR); - - /* Don't force [FALSE] creation of sub-nodes (for LIKE) */ - pars_like_rebind( - node,static_cast<const byte*>(address), length); - } - - /* FIXME: What's this ? */ - node->common.val_buf_size = 0; - - if (node->prefetch_buf) { - sel_col_prefetch_buf_free(node->prefetch_buf); - node->prefetch_buf = NULL; - } - - if (node->cursor_def) { - que_graph_free_recursive(node->cursor_def); - node->cursor_def = NULL; - } - - return(node); -} - -/******************************************************************//** -Adds an SQL null literal to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_null_lit( -/*=================*/ - sym_tab_t* sym_tab) /*!< in: symbol table */ -{ - sym_node_t* node; - - node = static_cast<sym_node_t*>( - mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t))); - - node->common.type = QUE_NODE_SYMBOL; - - node->table = NULL; - node->resolved = TRUE; - node->token_type = SYM_LIT; - - node->indirection = NULL; - - dfield_get_type(&node->common.val)->mtype = DATA_ERROR; - - dfield_set_null(&node->common.val); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - node->like_node = NULL; - - node->sym_table = sym_tab; - - return(node); -} - -/******************************************************************//** -Adds an identifier to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_id( -/*===========*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - byte* name, /*!< in: identifier name */ - ulint len) /*!< in: identifier length */ -{ - sym_node_t* node; - - node = static_cast<sym_node_t*>( - mem_heap_zalloc(sym_tab->heap, sizeof(*node))); - - node->common.type = QUE_NODE_SYMBOL; - - node->name = mem_heap_strdupl(sym_tab->heap, (char*) name, len); - node->name_len = len; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - dfield_set_null(&node->common.val); - - node->sym_table = sym_tab; - - return(node); -} - -/******************************************************************//** -Add a bound identifier to a symbol table. -@return symbol table node */ -UNIV_INTERN -sym_node_t* -sym_tab_add_bound_id( -/*=================*/ - sym_tab_t* sym_tab, /*!< in: symbol table */ - const char* name) /*!< in: name of bound id */ -{ - sym_node_t* node; - pars_bound_id_t* bid; - - bid = pars_info_get_bound_id(sym_tab->info, name); - ut_a(bid); - - node = static_cast<sym_node_t*>( - mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t))); - - node->common.type = QUE_NODE_SYMBOL; - - node->table = NULL; - node->resolved = FALSE; - node->token_type = SYM_UNSET; - node->indirection = NULL; - - node->name = mem_heap_strdup(sym_tab->heap, bid->id); - node->name_len = strlen(node->name); - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - dfield_set_null(&node->common.val); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - node->like_node = NULL; - - node->sym_table = sym_tab; - - return(node); -} diff --git a/storage/xtradb/que/que0que.cc b/storage/xtradb/que/que0que.cc deleted file mode 100644 index e2dc0239e13..00000000000 --- a/storage/xtradb/que/que0que.cc +++ /dev/null @@ -1,1308 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file que/que0que.cc -Query graph - -Created 5/27/1996 Heikki Tuuri -*******************************************************/ - -#include "que0que.h" - -#ifdef UNIV_NONINL -#include "que0que.ic" -#endif - -#include "usr0sess.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "row0undo.h" -#include "row0ins.h" -#include "row0upd.h" -#include "row0sel.h" -#include "row0purge.h" -#include "dict0crea.h" -#include "log0log.h" -#include "eval0proc.h" -#include "lock0lock.h" -#include "eval0eval.h" -#include "pars0types.h" - -#define QUE_MAX_LOOPS_WITHOUT_CHECK 16 - -#ifdef UNIV_DEBUG -/* If the following flag is set TRUE, the module will print trace info -of SQL execution in the UNIV_SQL_DEBUG version */ -UNIV_INTERN ibool que_trace_on = FALSE; -#endif /* UNIV_DEBUG */ - -/* Short introduction to query graphs - ================================== - -A query graph consists of nodes linked to each other in various ways. The -execution starts at que_run_threads() which takes a que_thr_t parameter. -que_thr_t contains two fields that control query graph execution: run_node -and prev_node. run_node is the next node to execute and prev_node is the -last node executed. - -Each node has a pointer to a 'next' statement, i.e., its brother, and a -pointer to its parent node. The next pointer is NULL in the last statement -of a block. - -Loop nodes contain a link to the first statement of the enclosed statement -list. While the loop runs, que_thr_step() checks if execution to the loop -node came from its parent or from one of the statement nodes in the loop. If -it came from the parent of the loop node it starts executing the first -statement node in the loop. If it came from one of the statement nodes in -the loop, then it checks if the statement node has another statement node -following it, and runs it if so. - -To signify loop ending, the loop statements (see e.g. while_step()) set -que_thr_t->run_node to the loop node's parent node. This is noticed on the -next call of que_thr_step() and execution proceeds to the node pointed to by -the loop node's 'next' pointer. - -For example, the code: - -X := 1; -WHILE X < 5 LOOP - X := X + 1; - X := X + 1; -X := 5 - -will result in the following node hierarchy, with the X-axis indicating -'next' links and the Y-axis indicating parent/child links: - -A - W - A - | - | - A - A - -A = assign_node_t, W = while_node_t. */ - -/* How a stored procedure containing COMMIT or ROLLBACK commands -is executed? - -The commit or rollback can be seen as a subprocedure call. - -When the transaction starts to handle a rollback or commit. -It builds a query graph which, when executed, will roll back -or commit the incomplete transaction. The transaction -is moved to the TRX_QUE_ROLLING_BACK or TRX_QUE_COMMITTING state. -If specified, the SQL cursors opened by the transaction are closed. -When the execution of the graph completes, it is like returning -from a subprocedure: the query thread which requested the operation -starts running again. */ - -/**********************************************************************//** -Moves a thread from another state to the QUE_THR_RUNNING state. Increments -the n_active_thrs counters of the query graph and transaction. -***NOTE***: This is the only function in which such a transition is allowed -to happen! */ -static -void -que_thr_move_to_run_state( -/*======================*/ - que_thr_t* thr); /*!< in: an query thread */ - -/***********************************************************************//** -Creates a query graph fork node. -@return own: fork node */ -UNIV_INTERN -que_fork_t* -que_fork_create( -/*============*/ - que_t* graph, /*!< in: graph, if NULL then this - fork node is assumed to be the - graph root */ - que_node_t* parent, /*!< in: parent node */ - ulint fork_type, /*!< in: fork type */ - mem_heap_t* heap) /*!< in: memory heap where created */ -{ - que_fork_t* fork; - - ut_ad(heap); - - fork = static_cast<que_fork_t*>(mem_heap_zalloc(heap, sizeof(*fork))); - - fork->heap = heap; - - fork->fork_type = fork_type; - - fork->common.parent = parent; - - fork->common.type = QUE_NODE_FORK; - - fork->state = QUE_FORK_COMMAND_WAIT; - - fork->graph = (graph != NULL) ? graph : fork; - - return(fork); -} - -/***********************************************************************//** -Creates a query graph thread node. -@return own: query thread node */ -UNIV_INTERN -que_thr_t* -que_thr_create( -/*===========*/ - que_fork_t* parent, /*!< in: parent node, i.e., a fork node */ - mem_heap_t* heap) /*!< in: memory heap where created */ -{ - que_thr_t* thr; - - ut_ad(parent && heap); - - thr = static_cast<que_thr_t*>(mem_heap_zalloc(heap, sizeof(*thr))); - - thr->graph = parent->graph; - - thr->common.parent = parent; - - thr->magic_n = QUE_THR_MAGIC_N; - - thr->common.type = QUE_NODE_THR; - - thr->state = QUE_THR_COMMAND_WAIT; - - thr->lock_state = QUE_THR_LOCK_NOLOCK; - - UT_LIST_ADD_LAST(thrs, parent->thrs, thr); - - return(thr); -} - -/**********************************************************************//** -Moves a suspended query thread to the QUE_THR_RUNNING state and may release -a worker thread to execute it. This function should be used to end -the wait state of a query thread waiting for a lock or a stored procedure -completion. -@return the query thread that needs to be released. */ -UNIV_INTERN -que_thr_t* -que_thr_end_lock_wait( -/*==================*/ - trx_t* trx) /*!< in: transaction with que_state in - QUE_THR_LOCK_WAIT */ -{ - que_thr_t* thr; - ibool was_active; - ulint sec; - ulint ms; - ib_uint64_t now; - - ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(trx)); - - thr = trx->lock.wait_thr; - - ut_ad(thr != NULL); - - ut_ad(trx->lock.que_state == TRX_QUE_LOCK_WAIT); - /* In MySQL this is the only possible state here */ - ut_a(thr->state == QUE_THR_LOCK_WAIT); - - was_active = thr->is_active; - - que_thr_move_to_run_state(thr); - - if (UNIV_UNLIKELY(trx->take_stats)) { - ut_usectime(&sec, &ms); - now = (ib_uint64_t)sec * 1000000 + ms; - trx->lock_que_wait_timer - += (ulint)(now - trx->lock_que_wait_ustarted); - } - - trx->lock.que_state = TRX_QUE_RUNNING; - - trx->lock.wait_thr = NULL; - - /* In MySQL we let the OS thread (not just the query thread) to wait - for the lock to be released: */ - - return((!was_active && thr != NULL) ? thr : NULL); -} - -/**********************************************************************//** -Inits a query thread for a command. */ -UNIV_INLINE -void -que_thr_init_command( -/*=================*/ - que_thr_t* thr) /*!< in: query thread */ -{ - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - que_thr_move_to_run_state(thr); -} - -/**********************************************************************//** -Round robin scheduler. -@return a query thread of the graph moved to QUE_THR_RUNNING state, or -NULL; the query thread should be executed by que_run_threads by the -caller */ -UNIV_INTERN -que_thr_t* -que_fork_scheduler_round_robin( -/*===========================*/ - que_fork_t* fork, /*!< in: a query fork */ - que_thr_t* thr) /*!< in: current pos */ -{ - trx_mutex_enter(fork->trx); - - /* If no current, start first available. */ - if (thr == NULL) { - thr = UT_LIST_GET_FIRST(fork->thrs); - } else { - thr = UT_LIST_GET_NEXT(thrs, thr); - } - - if (thr) { - - fork->state = QUE_FORK_ACTIVE; - - fork->last_sel_node = NULL; - - switch (thr->state) { - case QUE_THR_COMMAND_WAIT: - case QUE_THR_COMPLETED: - ut_a(!thr->is_active); - que_thr_init_command(thr); - break; - - case QUE_THR_SUSPENDED: - case QUE_THR_LOCK_WAIT: - default: - ut_error; - - } - } - - trx_mutex_exit(fork->trx); - - return(thr); -} - -/**********************************************************************//** -Starts execution of a command in a query fork. Picks a query thread which -is not in the QUE_THR_RUNNING state and moves it to that state. If none -can be chosen, a situation which may arise in parallelized fetches, NULL -is returned. -@return a query thread of the graph moved to QUE_THR_RUNNING state, or -NULL; the query thread should be executed by que_run_threads by the -caller */ -UNIV_INTERN -que_thr_t* -que_fork_start_command( -/*===================*/ - que_fork_t* fork) /*!< in: a query fork */ -{ - que_thr_t* thr; - que_thr_t* suspended_thr = NULL; - que_thr_t* completed_thr = NULL; - - fork->state = QUE_FORK_ACTIVE; - - fork->last_sel_node = NULL; - - suspended_thr = NULL; - completed_thr = NULL; - - /* Choose the query thread to run: usually there is just one thread, - but in a parallelized select, which necessarily is non-scrollable, - there may be several to choose from */ - - /* First we try to find a query thread in the QUE_THR_COMMAND_WAIT - state. Then we try to find a query thread in the QUE_THR_SUSPENDED - state, finally we try to find a query thread in the QUE_THR_COMPLETED - state */ - - /* We make a single pass over the thr list within which we note which - threads are ready to run. */ - for (thr = UT_LIST_GET_FIRST(fork->thrs); - thr != NULL; - thr = UT_LIST_GET_NEXT(thrs, thr)) { - - switch (thr->state) { - case QUE_THR_COMMAND_WAIT: - - /* We have to send the initial message to query thread - to start it */ - - que_thr_init_command(thr); - - return(thr); - - case QUE_THR_SUSPENDED: - /* In this case the execution of the thread was - suspended: no initial message is needed because - execution can continue from where it was left */ - if (!suspended_thr) { - suspended_thr = thr; - } - - break; - - case QUE_THR_COMPLETED: - if (!completed_thr) { - completed_thr = thr; - } - - break; - - case QUE_THR_LOCK_WAIT: - ut_error; - - } - } - - if (suspended_thr) { - - thr = suspended_thr; - que_thr_move_to_run_state(thr); - - } else if (completed_thr) { - - thr = completed_thr; - que_thr_init_command(thr); - } else { - ut_error; - } - - return(thr); -} - -/**********************************************************************//** -Calls que_graph_free_recursive for statements in a statement list. */ -static -void -que_graph_free_stat_list( -/*=====================*/ - que_node_t* node) /*!< in: first query graph node in the list */ -{ - while (node) { - que_graph_free_recursive(node); - - node = que_node_get_next(node); - } -} - -/**********************************************************************//** -Frees a query graph, but not the heap where it was created. Does not free -explicit cursor declarations, they are freed in que_graph_free. */ -UNIV_INTERN -void -que_graph_free_recursive( -/*=====================*/ - que_node_t* node) /*!< in: query graph node */ -{ - que_fork_t* fork; - que_thr_t* thr; - undo_node_t* undo; - sel_node_t* sel; - ins_node_t* ins; - upd_node_t* upd; - tab_node_t* cre_tab; - ind_node_t* cre_ind; - purge_node_t* purge; - - if (node == NULL) { - - return; - } - - switch (que_node_get_type(node)) { - - case QUE_NODE_FORK: - fork = static_cast<que_fork_t*>(node); - - thr = UT_LIST_GET_FIRST(fork->thrs); - - while (thr) { - que_graph_free_recursive(thr); - - thr = UT_LIST_GET_NEXT(thrs, thr); - } - - break; - case QUE_NODE_THR: - - thr = static_cast<que_thr_t*>(node); - - if (thr->magic_n != QUE_THR_MAGIC_N) { - fprintf(stderr, - "que_thr struct appears corrupt;" - " magic n %lu\n", - (unsigned long) thr->magic_n); - mem_analyze_corruption(thr); - ut_error; - } - - thr->magic_n = QUE_THR_MAGIC_FREED; - - que_graph_free_recursive(thr->child); - - break; - case QUE_NODE_UNDO: - - undo = static_cast<undo_node_t*>(node); - - mem_heap_free(undo->heap); - - break; - case QUE_NODE_SELECT: - - sel = static_cast<sel_node_t*>(node); - - sel_node_free_private(sel); - - break; - case QUE_NODE_INSERT: - - ins = static_cast<ins_node_t*>(node); - - que_graph_free_recursive(ins->select); - - mem_heap_free(ins->entry_sys_heap); - - break; - case QUE_NODE_PURGE: - purge = static_cast<purge_node_t*>(node); - - mem_heap_free(purge->heap); - - break; - - case QUE_NODE_UPDATE: - - upd = static_cast<upd_node_t*>(node); - - if (upd->in_mysql_interface) { - - btr_pcur_free_for_mysql(upd->pcur); - } - - que_graph_free_recursive(upd->cascade_node); - - if (upd->cascade_heap) { - mem_heap_free(upd->cascade_heap); - } - - que_graph_free_recursive(upd->select); - - mem_heap_free(upd->heap); - - break; - case QUE_NODE_CREATE_TABLE: - cre_tab = static_cast<tab_node_t*>(node); - - que_graph_free_recursive(cre_tab->tab_def); - que_graph_free_recursive(cre_tab->col_def); - que_graph_free_recursive(cre_tab->commit_node); - - mem_heap_free(cre_tab->heap); - - break; - case QUE_NODE_CREATE_INDEX: - cre_ind = static_cast<ind_node_t*>(node); - - que_graph_free_recursive(cre_ind->ind_def); - que_graph_free_recursive(cre_ind->field_def); - que_graph_free_recursive(cre_ind->commit_node); - - mem_heap_free(cre_ind->heap); - - break; - case QUE_NODE_PROC: - que_graph_free_stat_list(((proc_node_t*) node)->stat_list); - - break; - case QUE_NODE_IF: - que_graph_free_stat_list(((if_node_t*) node)->stat_list); - que_graph_free_stat_list(((if_node_t*) node)->else_part); - que_graph_free_stat_list(((if_node_t*) node)->elsif_list); - - break; - case QUE_NODE_ELSIF: - que_graph_free_stat_list(((elsif_node_t*) node)->stat_list); - - break; - case QUE_NODE_WHILE: - que_graph_free_stat_list(((while_node_t*) node)->stat_list); - - break; - case QUE_NODE_FOR: - que_graph_free_stat_list(((for_node_t*) node)->stat_list); - - break; - - case QUE_NODE_ASSIGNMENT: - case QUE_NODE_EXIT: - case QUE_NODE_RETURN: - case QUE_NODE_COMMIT: - case QUE_NODE_ROLLBACK: - case QUE_NODE_LOCK: - case QUE_NODE_FUNC: - case QUE_NODE_ORDER: - case QUE_NODE_ROW_PRINTF: - case QUE_NODE_OPEN: - case QUE_NODE_FETCH: - /* No need to do anything */ - - break; - default: - fprintf(stderr, - "que_node struct appears corrupt; type %lu\n", - (unsigned long) que_node_get_type(node)); - mem_analyze_corruption(node); - ut_error; - } -} - -/**********************************************************************//** -Frees a query graph. */ -UNIV_INTERN -void -que_graph_free( -/*===========*/ - que_t* graph) /*!< in: query graph; we assume that the memory - heap where this graph was created is private - to this graph: if not, then use - que_graph_free_recursive and free the heap - afterwards! */ -{ - ut_ad(graph); - - if (graph->sym_tab) { - /* The following call frees dynamic memory allocated - for variables etc. during execution. Frees also explicit - cursor definitions. */ - - sym_tab_free_private(graph->sym_tab); - } - - if (graph->info && graph->info->graph_owns_us) { - pars_info_free(graph->info); - } - - que_graph_free_recursive(graph); - - mem_heap_free(graph->heap); -} - -/****************************************************************//** -Performs an execution step on a thr node. -@return query thread to run next, or NULL if none */ -static -que_thr_t* -que_thr_node_step( -/*==============*/ - que_thr_t* thr) /*!< in: query thread where run_node must - be the thread node itself */ -{ - ut_ad(thr->run_node == thr); - - if (thr->prev_node == thr->common.parent) { - /* If control to the node came from above, it is just passed - on */ - - thr->run_node = thr->child; - - return(thr); - } - - trx_mutex_enter(thr_get_trx(thr)); - - if (que_thr_peek_stop(thr)) { - - trx_mutex_exit(thr_get_trx(thr)); - - return(thr); - } - - /* Thread execution completed */ - - thr->state = QUE_THR_COMPLETED; - - trx_mutex_exit(thr_get_trx(thr)); - - return(NULL); -} - -/**********************************************************************//** -Moves a thread from another state to the QUE_THR_RUNNING state. Increments -the n_active_thrs counters of the query graph and transaction if thr was -not active. -***NOTE***: This and ..._mysql are the only functions in which such a -transition is allowed to happen! */ -static -void -que_thr_move_to_run_state( -/*======================*/ - que_thr_t* thr) /*!< in: an query thread */ -{ - ut_ad(thr->state != QUE_THR_RUNNING); - - if (!thr->is_active) { - trx_t* trx; - - trx = thr_get_trx(thr); - - thr->graph->n_active_thrs++; - - trx->lock.n_active_thrs++; - - thr->is_active = TRUE; - } - - thr->state = QUE_THR_RUNNING; -} - -/**********************************************************************//** -Stops a query thread if graph or trx is in a state requiring it. The -conditions are tested in the order (1) graph, (2) trx. -@return TRUE if stopped */ -UNIV_INTERN -ibool -que_thr_stop( -/*=========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - que_t* graph; - trx_t* trx = thr_get_trx(thr); - - graph = thr->graph; - - ut_ad(trx_mutex_own(trx)); - - if (graph->state == QUE_FORK_COMMAND_WAIT) { - - thr->state = QUE_THR_SUSPENDED; - - } else if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - - trx->lock.wait_thr = thr; - thr->state = QUE_THR_LOCK_WAIT; - - } else if (trx->error_state != DB_SUCCESS - && trx->error_state != DB_LOCK_WAIT) { - - /* Error handling built for the MySQL interface */ - thr->state = QUE_THR_COMPLETED; - - } else if (graph->fork_type == QUE_FORK_ROLLBACK) { - - thr->state = QUE_THR_SUSPENDED; - } else { - ut_ad(graph->state == QUE_FORK_ACTIVE); - - return(FALSE); - } - - return(TRUE); -} - -/**********************************************************************//** -Decrements the query thread reference counts in the query graph and the -transaction. -*** NOTE ***: -This and que_thr_stop_for_mysql are the only functions where the reference -count can be decremented and this function may only be called from inside -que_run_threads! These restrictions exist to make the rollback code easier -to maintain. */ -static -void -que_thr_dec_refer_count( -/*====================*/ - que_thr_t* thr, /*!< in: query thread */ - que_thr_t** next_thr) /*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -{ - trx_t* trx; - que_fork_t* fork; - - trx = thr_get_trx(thr); - - ut_a(thr->is_active); - ut_ad(trx_mutex_own(trx)); - - if (thr->state == QUE_THR_RUNNING) { - - if (!que_thr_stop(thr)) { - - ut_a(next_thr != NULL && *next_thr == NULL); - - /* The reason for the thr suspension or wait was - already canceled before we came here: continue - running the thread. - - This is also possible because in trx_commit_step() we - assume a single query thread. We set the query thread - state to QUE_THR_RUNNING. */ - - /* fprintf(stderr, - "Wait already ended: trx: %p\n", trx); */ - - /* Normally srv_suspend_mysql_thread resets - the state to DB_SUCCESS before waiting, but - in this case we have to do it here, - otherwise nobody does it. */ - - trx->error_state = DB_SUCCESS; - - *next_thr = thr; - - return; - } - } - - fork = static_cast<que_fork_t*>(thr->common.parent); - - --trx->lock.n_active_thrs; - - --fork->n_active_thrs; - - thr->is_active = FALSE; -} - -/**********************************************************************//** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The -query thread is stopped and made inactive, except in the case where -it was put to the lock wait state in lock0lock.cc, but the lock has already -been granted or the transaction chosen as a victim in deadlock resolution. */ -UNIV_INTERN -void -que_thr_stop_for_mysql( -/*===================*/ - que_thr_t* thr) /*!< in: query thread */ -{ - trx_t* trx; - - trx = thr_get_trx(thr); - - /* Can't be the purge transaction. */ - ut_a(trx->id != 0); - - trx_mutex_enter(trx); - - if (thr->state == QUE_THR_RUNNING) { - - if (trx->error_state != DB_SUCCESS - && trx->error_state != DB_LOCK_WAIT) { - - /* Error handling built for the MySQL interface */ - thr->state = QUE_THR_COMPLETED; - } else { - /* It must have been a lock wait but the lock was - already released, or this transaction was chosen - as a victim in selective deadlock resolution */ - - trx_mutex_exit(trx); - - return; - } - } - - ut_ad(thr->is_active == TRUE); - ut_ad(trx->lock.n_active_thrs == 1); - ut_ad(thr->graph->n_active_thrs == 1); - - thr->is_active = FALSE; - thr->graph->n_active_thrs--; - - trx->lock.n_active_thrs--; - - trx_mutex_exit(trx); -} - -/**********************************************************************//** -Moves a thread from another state to the QUE_THR_RUNNING state. Increments -the n_active_thrs counters of the query graph and transaction if thr was -not active. */ -UNIV_INTERN -void -que_thr_move_to_run_state_for_mysql( -/*================================*/ - que_thr_t* thr, /*!< in: an query thread */ - trx_t* trx) /*!< in: transaction */ -{ - if (thr->magic_n != QUE_THR_MAGIC_N) { - fprintf(stderr, - "que_thr struct appears corrupt; magic n %lu\n", - (unsigned long) thr->magic_n); - - mem_analyze_corruption(thr); - - ut_error; - } - - if (!thr->is_active) { - - thr->graph->n_active_thrs++; - - trx->lock.n_active_thrs++; - - thr->is_active = TRUE; - } - - thr->state = QUE_THR_RUNNING; -} - -/**********************************************************************//** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL -select, when there is no error or lock wait. */ -UNIV_INTERN -void -que_thr_stop_for_mysql_no_error( -/*============================*/ - que_thr_t* thr, /*!< in: query thread */ - trx_t* trx) /*!< in: transaction */ -{ - ut_ad(thr->state == QUE_THR_RUNNING); - ut_ad(thr_get_trx(thr)->id != 0); - ut_ad(thr->is_active == TRUE); - ut_ad(trx->lock.n_active_thrs == 1); - ut_ad(thr->graph->n_active_thrs == 1); - - if (thr->magic_n != QUE_THR_MAGIC_N) { - fprintf(stderr, - "que_thr struct appears corrupt; magic n %lu\n", - (unsigned long) thr->magic_n); - - mem_analyze_corruption(thr); - - ut_error; - } - - thr->state = QUE_THR_COMPLETED; - - thr->is_active = FALSE; - thr->graph->n_active_thrs--; - - trx->lock.n_active_thrs--; -} - -/****************************************************************//** -Get the first containing loop node (e.g. while_node_t or for_node_t) for the -given node, or NULL if the node is not within a loop. -@return containing loop node, or NULL. */ -UNIV_INTERN -que_node_t* -que_node_get_containing_loop_node( -/*==============================*/ - que_node_t* node) /*!< in: node */ -{ - ut_ad(node); - - for (;;) { - ulint type; - - node = que_node_get_parent(node); - - if (!node) { - break; - } - - type = que_node_get_type(node); - - if ((type == QUE_NODE_FOR) || (type == QUE_NODE_WHILE)) { - break; - } - } - - return(node); -} - -/**********************************************************************//** -Prints info of an SQL query graph node. */ -UNIV_INTERN -void -que_node_print_info( -/*================*/ - que_node_t* node) /*!< in: query graph node */ -{ - ulint type; - const char* str; - - type = que_node_get_type(node); - - if (type == QUE_NODE_SELECT) { - str = "SELECT"; - } else if (type == QUE_NODE_INSERT) { - str = "INSERT"; - } else if (type == QUE_NODE_UPDATE) { - str = "UPDATE"; - } else if (type == QUE_NODE_WHILE) { - str = "WHILE"; - } else if (type == QUE_NODE_ASSIGNMENT) { - str = "ASSIGNMENT"; - } else if (type == QUE_NODE_IF) { - str = "IF"; - } else if (type == QUE_NODE_FETCH) { - str = "FETCH"; - } else if (type == QUE_NODE_OPEN) { - str = "OPEN"; - } else if (type == QUE_NODE_PROC) { - str = "STORED PROCEDURE"; - } else if (type == QUE_NODE_FUNC) { - str = "FUNCTION"; - } else if (type == QUE_NODE_LOCK) { - str = "LOCK"; - } else if (type == QUE_NODE_THR) { - str = "QUERY THREAD"; - } else if (type == QUE_NODE_COMMIT) { - str = "COMMIT"; - } else if (type == QUE_NODE_UNDO) { - str = "UNDO ROW"; - } else if (type == QUE_NODE_PURGE) { - str = "PURGE ROW"; - } else if (type == QUE_NODE_ROLLBACK) { - str = "ROLLBACK"; - } else if (type == QUE_NODE_CREATE_TABLE) { - str = "CREATE TABLE"; - } else if (type == QUE_NODE_CREATE_INDEX) { - str = "CREATE INDEX"; - } else if (type == QUE_NODE_FOR) { - str = "FOR LOOP"; - } else if (type == QUE_NODE_RETURN) { - str = "RETURN"; - } else if (type == QUE_NODE_EXIT) { - str = "EXIT"; - } else { - str = "UNKNOWN NODE TYPE"; - } - - fprintf(stderr, "Node type %lu: %s, address %p\n", - (ulong) type, str, (void*) node); -} - -/**********************************************************************//** -Performs an execution step on a query thread. -@return query thread to run next: it may differ from the input -parameter if, e.g., a subprocedure call is made */ -UNIV_INLINE -que_thr_t* -que_thr_step( -/*=========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - que_node_t* node; - que_thr_t* old_thr; - trx_t* trx; - ulint type; - - trx = thr_get_trx(thr); - - ut_ad(thr->state == QUE_THR_RUNNING); - ut_a(trx->error_state == DB_SUCCESS); - - thr->resource++; - - node = thr->run_node; - type = que_node_get_type(node); - - old_thr = thr; - -#ifdef UNIV_DEBUG - if (que_trace_on) { - fputs("To execute: ", stderr); - que_node_print_info(node); - } -#endif - if (type & QUE_NODE_CONTROL_STAT) { - if ((thr->prev_node != que_node_get_parent(node)) - && que_node_get_next(thr->prev_node)) { - - /* The control statements, like WHILE, always pass the - control to the next child statement if there is any - child left */ - - thr->run_node = que_node_get_next(thr->prev_node); - - } else if (type == QUE_NODE_IF) { - if_step(thr); - } else if (type == QUE_NODE_FOR) { - for_step(thr); - } else if (type == QUE_NODE_PROC) { - - /* We can access trx->undo_no without reserving - trx->undo_mutex, because there cannot be active query - threads doing updating or inserting at the moment! */ - - if (thr->prev_node == que_node_get_parent(node)) { - trx->last_sql_stat_start.least_undo_no - = trx->undo_no; - } - - proc_step(thr); - } else if (type == QUE_NODE_WHILE) { - while_step(thr); - } else { - ut_error; - } - } else if (type == QUE_NODE_ASSIGNMENT) { - assign_step(thr); - } else if (type == QUE_NODE_SELECT) { - thr = row_sel_step(thr); - } else if (type == QUE_NODE_INSERT) { - thr = row_ins_step(thr); - } else if (type == QUE_NODE_UPDATE) { - thr = row_upd_step(thr); - } else if (type == QUE_NODE_FETCH) { - thr = fetch_step(thr); - } else if (type == QUE_NODE_OPEN) { - thr = open_step(thr); - } else if (type == QUE_NODE_FUNC) { - proc_eval_step(thr); - - } else if (type == QUE_NODE_LOCK) { - - ut_error; - } else if (type == QUE_NODE_THR) { - thr = que_thr_node_step(thr); - } else if (type == QUE_NODE_COMMIT) { - thr = trx_commit_step(thr); - } else if (type == QUE_NODE_UNDO) { - thr = row_undo_step(thr); - } else if (type == QUE_NODE_PURGE) { - thr = row_purge_step(thr); - } else if (type == QUE_NODE_RETURN) { - thr = return_step(thr); - } else if (type == QUE_NODE_EXIT) { - thr = exit_step(thr); - } else if (type == QUE_NODE_ROLLBACK) { - thr = trx_rollback_step(thr); - } else if (type == QUE_NODE_CREATE_TABLE) { - thr = dict_create_table_step(thr); - } else if (type == QUE_NODE_CREATE_INDEX) { - thr = dict_create_index_step(thr); - } else if (type == QUE_NODE_ROW_PRINTF) { - thr = row_printf_step(thr); - } else { - ut_error; - } - - if (type == QUE_NODE_EXIT) { - old_thr->prev_node = que_node_get_containing_loop_node(node); - } else { - old_thr->prev_node = node; - } - - if (thr) { - ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS); - } - - return(thr); -} - -/**********************************************************************//** -Run a query thread until it finishes or encounters e.g. a lock wait. */ -static -void -que_run_threads_low( -/*================*/ - que_thr_t* thr) /*!< in: query thread */ -{ - trx_t* trx; - que_thr_t* next_thr; - - ut_ad(thr->state == QUE_THR_RUNNING); - ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS); - ut_ad(!trx_mutex_own(thr_get_trx(thr))); - - /* cumul_resource counts how much resources the OS thread (NOT the - query thread) has spent in this function */ - - trx = thr_get_trx(thr); - - do { - /* Check that there is enough space in the log to accommodate - possible log entries by this query step; if the operation can - touch more than about 4 pages, checks must be made also within - the query step! */ - - log_free_check(); - - /* Perform the actual query step: note that the query thread - may change if, e.g., a subprocedure call is made */ - - /*-------------------------*/ - next_thr = que_thr_step(thr); - /*-------------------------*/ - - trx_mutex_enter(trx); - - ut_a(next_thr == NULL || trx->error_state == DB_SUCCESS); - - if (next_thr != thr) { - ut_a(next_thr == NULL); - - /* This can change next_thr to a non-NULL value - if there was a lock wait that already completed. */ - - que_thr_dec_refer_count(thr, &next_thr); - - if (next_thr != NULL) { - - thr = next_thr; - } - } - - ut_ad(trx == thr_get_trx(thr)); - - trx_mutex_exit(trx); - - } while (next_thr != NULL); -} - -/**********************************************************************//** -Run a query thread. Handles lock waits. */ -UNIV_INTERN -void -que_run_threads( -/*============*/ - que_thr_t* thr) /*!< in: query thread */ -{ - ut_ad(!trx_mutex_own(thr_get_trx(thr))); - -loop: - ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS); - - que_run_threads_low(thr); - - switch (thr->state) { - - case QUE_THR_RUNNING: - /* There probably was a lock wait, but it already ended - before we came here: continue running thr */ - - goto loop; - - case QUE_THR_LOCK_WAIT: - lock_wait_suspend_thread(thr); - - trx_mutex_enter(thr_get_trx(thr)); - - ut_a(thr_get_trx(thr)->id != 0); - - if (thr_get_trx(thr)->error_state != DB_SUCCESS) { - /* thr was chosen as a deadlock victim or there was - a lock wait timeout */ - - que_thr_dec_refer_count(thr, NULL); - trx_mutex_exit(thr_get_trx(thr)); - break; - } - - trx_mutex_exit(thr_get_trx(thr)); - goto loop; - - case QUE_THR_COMPLETED: - case QUE_THR_COMMAND_WAIT: - /* Do nothing */ - break; - - default: - ut_error; - } -} - -/*********************************************************************//** -Evaluate the given SQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -que_eval_sql( -/*=========*/ - pars_info_t* info, /*!< in: info struct, or NULL */ - const char* sql, /*!< in: SQL string */ - ibool reserve_dict_mutex, - /*!< in: if TRUE, acquire/release - dict_sys->mutex around call to pars_sql. */ - trx_t* trx) /*!< in: trx */ -{ - que_thr_t* thr; - que_t* graph; - - ut_a(trx->error_state == DB_SUCCESS); - - if (UNIV_UNLIKELY(trx->fake_changes)) { - /* fake_changes should not access to system tables */ - fprintf(stderr, "InnoDB: ERROR: innodb_fake_changes tried to access to system tables.\n"); - return(DB_ERROR); - } - - if (reserve_dict_mutex) { - mutex_enter(&dict_sys->mutex); - } - - graph = pars_sql(info, sql); - - if (reserve_dict_mutex) { - mutex_exit(&dict_sys->mutex); - } - - ut_a(graph); - - graph->trx = trx; - trx->graph = NULL; - - graph->fork_type = QUE_FORK_MYSQL_INTERFACE; - - ut_a(thr = que_fork_start_command(graph)); - - que_run_threads(thr); - - if (reserve_dict_mutex) { - mutex_enter(&dict_sys->mutex); - } - - que_graph_free(graph); - - if (reserve_dict_mutex) { - mutex_exit(&dict_sys->mutex); - } - - return(trx->error_state); -} - -/*********************************************************************//** -Initialise the query sub-system. */ -UNIV_INTERN -void -que_init(void) -/*==========*/ -{ - /* No op */ -} - -/*********************************************************************//** -Close the query sub-system. */ -UNIV_INTERN -void -que_close(void) -/*===========*/ -{ - /* No op */ -} diff --git a/storage/xtradb/read/read0read.cc b/storage/xtradb/read/read0read.cc deleted file mode 100644 index c350e24dbb0..00000000000 --- a/storage/xtradb/read/read0read.cc +++ /dev/null @@ -1,691 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file read/read0read.cc -Cursor read - -Created 2/16/1997 Heikki Tuuri -*******************************************************/ - -#include "read0read.h" -#include "read0i_s.h" - -#ifdef UNIV_NONINL -#include "read0read.ic" -#endif - -#include "srv0srv.h" -#include "trx0sys.h" - -/* -------------------------------------------------------------------------------- -FACT A: Cursor read view on a secondary index sees only committed versions -------- -of the records in the secondary index or those versions of rows created -by transaction which created a cursor before cursor was created even -if transaction which created the cursor has changed that clustered index page. - -PROOF: We must show that read goes always to the clustered index record -to see that record is visible in the cursor read view. Consider e.g. -following table and SQL-clauses: - -create table t1(a int not null, b int, primary key(a), index(b)); -insert into t1 values (1,1),(2,2); -commit; - -Now consider that we have a cursor for a query - -select b from t1 where b >= 1; - -This query will use secondary key on the table t1. Now after the first fetch -on this cursor if we do a update: - -update t1 set b = 5 where b = 2; - -Now second fetch of the cursor should not see record (2,5) instead it should -see record (2,2). - -We also should show that if we have delete t1 where b = 5; we still -can see record (2,2). - -When we access a secondary key record maximum transaction id is fetched -from this record and this trx_id is compared to up_limit_id in the view. -If trx_id in the record is greater or equal than up_limit_id in the view -cluster record is accessed. Because trx_id of the creating -transaction is stored when this view was created to the list of -trx_ids not seen by this read view previous version of the -record is requested to be built. This is build using clustered record. -If the secondary key record is delete-marked, its corresponding -clustered record can be already be purged only if records -trx_id < low_limit_no. Purge can't remove any record deleted by a -transaction which was active when cursor was created. But, we still -may have a deleted secondary key record but no clustered record. But, -this is not a problem because this case is handled in -row_sel_get_clust_rec() function which is called -whenever we note that this read view does not see trx_id in the -record. Thus, we see correct version. Q. E. D. - -------------------------------------------------------------------------------- -FACT B: Cursor read view on a clustered index sees only committed versions -------- -of the records in the clustered index or those versions of rows created -by transaction which created a cursor before cursor was created even -if transaction which created the cursor has changed that clustered index page. - -PROOF: Consider e.g.following table and SQL-clauses: - -create table t1(a int not null, b int, primary key(a)); -insert into t1 values (1),(2); -commit; - -Now consider that we have a cursor for a query - -select a from t1 where a >= 1; - -This query will use clustered key on the table t1. Now after the first fetch -on this cursor if we do a update: - -update t1 set a = 5 where a = 2; - -Now second fetch of the cursor should not see record (5) instead it should -see record (2). - -We also should show that if we have execute delete t1 where a = 5; after -the cursor is opened we still can see record (2). - -When accessing clustered record we always check if this read view sees -trx_id stored to clustered record. By default we don't see any changes -if record trx_id >= low_limit_id i.e. change was made transaction -which started after transaction which created the cursor. If row -was changed by the future transaction a previous version of the -clustered record is created. Thus we see only committed version in -this case. We see all changes made by committed transactions i.e. -record trx_id < up_limit_id. In this case we don't need to do anything, -we already see correct version of the record. We don't see any changes -made by active transaction except creating transaction. We have stored -trx_id of creating transaction to list of trx_ids when this view was -created. Thus we can easily see if this record was changed by the -creating transaction. Because we already have clustered record we can -access roll_ptr. Using this roll_ptr we can fetch undo record. -We can now check that undo_no of the undo record is less than undo_no of the -trancaction which created a view when cursor was created. We see this -clustered record only in case when record undo_no is less than undo_no -in the view. If this is not true we build based on undo_rec previous -version of the record. This record is found because purge can't remove -records accessed by active transaction. Thus we see correct version. Q. E. D. -------------------------------------------------------------------------------- -FACT C: Purge does not remove any delete-marked row that is visible -------- -in any cursor read view. - -PROOF: We know that: - 1: Currently active read views in trx_sys_t::view_list are ordered by - read_view_t::low_limit_no in descending order, that is, - newest read view first. - - 2: Purge clones the oldest read view and uses that to determine whether there - are any active transactions that can see the to be purged records. - -Therefore any joining or active transaction will not have a view older -than the purge view, according to 1. - -When purge needs to remove a delete-marked row from a secondary index, -it will first check that the DB_TRX_ID value of the corresponding -record in the clustered index is older than the purge view. It will -also check if there is a newer version of the row (clustered index -record) that is not delete-marked in the secondary index. If such a -row exists and is collation-equal to the delete-marked secondary index -record then purge will not remove the secondary index record. - -Delete-marked clustered index records will be removed by -row_purge_remove_clust_if_poss(), unless the clustered index record -(and its DB_ROLL_PTR) has been updated. Every new version of the -clustered index record will update DB_ROLL_PTR, pointing to a new UNDO -log entry that allows the old version to be reconstructed. The -DB_ROLL_PTR in the oldest remaining version in the old-version chain -may be pointing to garbage (an undo log record discarded by purge), -but it will never be dereferenced, because the purge view is older -than any active transaction. - -For details see: row_vers_old_has_index_entry() and row_purge_poss_sec() - -Some additional issues: - -What if trx_sys->view_list == NULL and some transaction T1 and Purge both -try to open read_view at same time. Only one can acquire trx_sys->mutex. -In which order will the views be opened? Should it matter? If no, why? - -The order does not matter. No new transactions can be created and no running -transaction can commit or rollback (or free views). -*/ - -/*********************************************************************//** -Creates a read view object. -@return own: read view struct */ -UNIV_INLINE -read_view_t* -read_view_create_low( -/*=================*/ - ulint n, /*!< in: number of cells in the trx_ids array */ - read_view_t*& view) /*!< in,out: pre-allocated view array or NULL if - a new one needs to be created */ -{ - if (view == NULL) { - view = static_cast<read_view_t*>( - ut_malloc(sizeof(read_view_t))); - os_atomic_increment_ulint(&srv_read_views_memory, - sizeof(read_view_t)); - view->max_descr = 0; - view->descriptors = NULL; - } - - if (UNIV_UNLIKELY(view->max_descr < n)) { - - /* avoid frequent re-allocations by extending the array to the - desired size + 10% */ - - os_atomic_increment_ulint(&srv_read_views_memory, - (n + n / 10 - view->max_descr) * - sizeof(trx_id_t)); - view->max_descr = n + n / 10; - view->descriptors = static_cast<trx_id_t*>( - ut_realloc(view->descriptors, - view->max_descr * - sizeof *view->descriptors)); - } - - view->n_descr = n; - - return(view); -} - -/*********************************************************************//** -Clones a read view object. This function will allocate space for two read -views contiguously, one identical in size and content as @param view (starting -at returned pointer) and another view immediately following the trx_ids array. -The second view will have space for an extra trx_id_t element. -@return read view struct */ -UNIV_INTERN -read_view_t* -read_view_clone( -/*============*/ - const read_view_t* view, /*!< in: view to clone */ - read_view_t*& prebuilt_clone) /*!< in,out: prebuilt view or - NULL */ -{ - read_view_t* clone; - trx_id_t* old_descriptors; - ulint old_max_descr; - - ut_ad(mutex_own(&trx_sys->mutex)); - - clone = read_view_create_low(view->n_descr, prebuilt_clone); - - old_descriptors = clone->descriptors; - old_max_descr = clone->max_descr; - - memcpy(clone, view, sizeof(*view)); - - clone->descriptors = old_descriptors; - clone->max_descr = old_max_descr; - - if (view->n_descr) { - memcpy(clone->descriptors, view->descriptors, - view->n_descr * sizeof(trx_id_t)); - } - - return(clone); -} - -/*********************************************************************//** -Insert the view in the proper order into the trx_sys->view_list. The -read view list is ordered by read_view_t::low_limit_no in descending order. */ -UNIV_INTERN -void -read_view_add( -/*==========*/ - read_view_t* view) /*!< in: view to add to */ -{ - read_view_t* elem; - read_view_t* prev_elem; - - ut_ad(mutex_own(&trx_sys->mutex)); - ut_ad(read_view_validate(view)); - - /* Find the correct slot for insertion. */ - for (elem = UT_LIST_GET_FIRST(trx_sys->view_list), prev_elem = NULL; - elem != NULL && view->low_limit_no < elem->low_limit_no; - prev_elem = elem, elem = UT_LIST_GET_NEXT(view_list, elem)) { - /* No op */ - } - - if (prev_elem == NULL) { - UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view); - } else { - UT_LIST_INSERT_AFTER( - view_list, trx_sys->view_list, prev_elem, view); - } - - ut_ad(read_view_list_validate()); -} - -/*********************************************************************//** -Opens a read view where exactly the transactions serialized before this -point in time are seen in the view. -@return own: read view struct */ -static -read_view_t* -read_view_open_now_low( -/*===================*/ - trx_id_t cr_trx_id, /*!< in: trx_id of creating - transaction, or 0 used in purge */ - read_view_t*& view) /*!< in,out: pre-allocated view array or - NULL if a new one needs to be created */ -{ - trx_id_t* descr; - ulint i; - - ut_ad(mutex_own(&trx_sys->mutex)); - - view = read_view_create_low(trx_sys->descr_n_used, view); - - view->undo_no = 0; - view->type = VIEW_NORMAL; - view->creator_trx_id = cr_trx_id; - - /* No future transactions should be visible in the view */ - - view->low_limit_no = trx_sys->max_trx_id; - view->low_limit_id = view->low_limit_no; - - descr = trx_find_descriptor(trx_sys->descriptors, - trx_sys->descr_n_used, - cr_trx_id); - if (UNIV_LIKELY(descr != NULL)) { - ut_ad(trx_sys->descr_n_used > 0); - ut_ad(view->n_descr > 0); - - view->n_descr--; - - i = descr - trx_sys->descriptors; - } else { - i = trx_sys->descr_n_used; - } - - if (UNIV_LIKELY(i > 0)) { - /* Copy the [0; i-1] range */ - memcpy(view->descriptors, trx_sys->descriptors, - i * sizeof(trx_id_t)); - } - - if (UNIV_UNLIKELY(i + 1 < trx_sys->descr_n_used)) { - /* Copy the [i+1; descr_n_used-1] range */ - memcpy(view->descriptors + i, - trx_sys->descriptors + i + 1, - (trx_sys->descr_n_used - i - 1) * - sizeof(trx_id_t)); - } - - /* NOTE that a transaction whose trx number is < trx_sys->max_trx_id can - still be active, if it is in the middle of its commit! Note that when a - transaction starts, we initialize trx->no to TRX_ID_MAX. */ - - if (UT_LIST_GET_LEN(trx_sys->trx_serial_list) > 0) { - - trx_id_t trx_no; - - trx_no = UT_LIST_GET_FIRST(trx_sys->trx_serial_list)->no; - - if (trx_no < view->low_limit_no) { - view->low_limit_no = trx_no; - } - } - - if (UNIV_LIKELY(view->n_descr > 0)) { - /* The last active transaction has the smallest id: */ - view->up_limit_id = view->descriptors[0]; - } else { - view->up_limit_id = view->low_limit_id; - } - - /* Purge views are not added to the view list. */ - if (cr_trx_id > 0) { - read_view_add(view); - } - - return(view); -} - -/*********************************************************************//** -Opens a read view where exactly the transactions serialized before this -point in time are seen in the view. -@return own: read view struct */ -UNIV_INTERN -read_view_t* -read_view_open_now( -/*===============*/ - trx_id_t cr_trx_id, /*!< in: trx_id of creating - transaction, or 0 used in purge */ - read_view_t*& view) /*!< in,out: pre-allocated view array or - NULL if a new one needs to be created */ -{ - mutex_enter(&trx_sys->mutex); - - view = read_view_open_now_low(cr_trx_id, view); - - mutex_exit(&trx_sys->mutex); - - return(view); -} - -/*********************************************************************//** -Makes a copy of the oldest existing read view, with the exception that also -the creating trx of the oldest view is set as not visible in the 'copied' -view. Opens a new view if no views currently exist. The view must be closed -with ..._close. This is used in purge. -@return own: read view struct */ -UNIV_INTERN -read_view_t* -read_view_purge_open( -/*=================*/ - read_view_t*& prebuilt_clone, /*!< in,out: pre-allocated view that - will be used to clone the oldest view if - exists */ - read_view_t*& prebuilt_view) /*!< in,out: pre-allocated view array or - NULL if a new one needs to be created */ -{ - ulint i; - read_view_t* view; - read_view_t* oldest_view; - trx_id_t creator_trx_id; - ulint insert_done = 0; - - mutex_enter(&trx_sys->mutex); - - oldest_view = UT_LIST_GET_LAST(trx_sys->view_list); - - if (oldest_view == NULL) { - - view = read_view_open_now_low(0, prebuilt_view); - - mutex_exit(&trx_sys->mutex); - - return(view); - } - - /* Clone the oldest view to a pre-allocated clone view */ - - oldest_view = read_view_clone(oldest_view, prebuilt_clone); - - ut_ad(read_view_validate(oldest_view)); - - mutex_exit(&trx_sys->mutex); - - ut_a(oldest_view->creator_trx_id > 0); - creator_trx_id = oldest_view->creator_trx_id; - - view = read_view_create_low(oldest_view->n_descr + 1, prebuilt_view); - - /* Add the creator transaction id in the trx_ids array in the - correct slot. */ - - for (i = 0; i < oldest_view->n_descr; ++i) { - trx_id_t id; - - id = oldest_view->descriptors[i - insert_done]; - - if (insert_done == 0 && creator_trx_id < id) { - id = creator_trx_id; - insert_done = 1; - } - - view->descriptors[i] = id; - } - - if (insert_done == 0) { - view->descriptors[i] = creator_trx_id; - } else { - ut_a(i > 0); - view->descriptors[i] = oldest_view->descriptors[i - 1]; - } - - view->creator_trx_id = 0; - - view->low_limit_no = oldest_view->low_limit_no; - view->low_limit_id = oldest_view->low_limit_id; - - if (view->n_descr > 0) { - /* The last active transaction has the smallest id: */ - - view->up_limit_id = view->descriptors[0]; - } else { - view->up_limit_id = oldest_view->up_limit_id; - } - - return(view); -} - -/*********************************************************************//** -Closes a consistent read view for MySQL. This function is called at an SQL -statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */ -UNIV_INTERN -void -read_view_close_for_mysql( -/*======================*/ - trx_t* trx) /*!< in: trx which has a read view */ -{ - ut_a(trx->global_read_view); - - read_view_remove(trx->global_read_view, false); - - trx->read_view = NULL; - trx->global_read_view = NULL; -} - -/*********************************************************************//** -Prints a read view to file. */ -UNIV_INTERN -void -read_view_print( -/*============*/ - FILE* file, /*!< in: file to print to */ - const read_view_t* view) /*!< in: read view */ -{ - ulint n_ids; - ulint i; - - if (view->type == VIEW_HIGH_GRANULARITY) { - fprintf(file, - "High-granularity read view undo_n:o " TRX_ID_FMT "\n", - view->undo_no); - } else { - fprintf(file, "Normal read view\n"); - } - - fprintf(file, "Read view low limit trx n:o " TRX_ID_FMT "\n", - view->low_limit_no); - - fprintf(file, "Read view up limit trx id " TRX_ID_FMT "\n", - view->up_limit_id); - - fprintf(file, "Read view low limit trx id " TRX_ID_FMT "\n", - view->low_limit_id); - - fprintf(file, "Read view individually stored trx ids:\n"); - - n_ids = view->n_descr; - - for (i = 0; i < n_ids; i++) { - fprintf(file, "Read view trx id " TRX_ID_FMT "\n", - view->descriptors[i]); - } -} - -UNIV_INTERN -i_s_xtradb_read_view_t* -read_fill_i_s_xtradb_read_view(i_s_xtradb_read_view_t* rv) -{ - read_view_t* view; - - mutex_enter(&trx_sys->mutex); - - if (UT_LIST_GET_LEN(trx_sys->view_list)) { - view = UT_LIST_GET_LAST(trx_sys->view_list); - } else { - mutex_exit(&trx_sys->mutex); - return NULL; - } - - if (view->type == VIEW_HIGH_GRANULARITY) { - rv->undo_no = view->undo_no; - } else { - rv->undo_no = ULINT_UNDEFINED; - } - - rv->low_limit_no = view->low_limit_no; - rv->up_limit_id = view->up_limit_id; - rv->low_limit_id = view->low_limit_id; - - mutex_exit(&trx_sys->mutex); - - return rv; -} - -/*********************************************************************//** -Frees resource allocated by a read view. */ -UNIV_INTERN -void -read_view_free( -/*===========*/ - read_view_t*& view) /*< in,out: read view */ -{ - if (view == NULL) { - - return; - } - - os_atomic_decrement_ulint(&srv_read_views_memory, - sizeof(read_view_t) + - view->max_descr * sizeof(trx_id_t)); - - if (view->descriptors != NULL) { - ut_free(view->descriptors); - } - - ut_free(view); - - view = NULL; -} - -/*********************************************************************//** -Create a high-granularity consistent cursor view for mysql to be used -in cursors. In this consistent read view modifications done by the -creating transaction after the cursor is created or future transactions -are not visible. */ -UNIV_INTERN -cursor_view_t* -read_cursor_view_create_for_mysql( -/*==============================*/ - trx_t* cr_trx) /*!< in: trx where cursor view is created */ -{ - read_view_t* view; - mem_heap_t* heap; - cursor_view_t* curview; - - /* Use larger heap than in trx_create when creating a read_view - because cursors are quite long. */ - - heap = mem_heap_create(512); - - curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(*curview)); - - curview->heap = heap; - - /* Drop cursor tables from consideration when evaluating the - need of auto-commit */ - - curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use; - - cr_trx->n_mysql_tables_in_use = 0; - - mutex_enter(&trx_sys->mutex); - - curview->read_view = NULL; - read_view_open_now_low(UINT64_UNDEFINED, curview->read_view); - - view = curview->read_view; - view->undo_no = cr_trx->undo_no; - view->type = VIEW_HIGH_GRANULARITY; - - mutex_exit(&trx_sys->mutex); - - return(curview); -} - -/*********************************************************************//** -Close a given consistent cursor view for mysql and restore global read view -back to a transaction read view. */ -UNIV_INTERN -void -read_cursor_view_close_for_mysql( -/*=============================*/ - trx_t* trx, /*!< in: trx */ - cursor_view_t* curview)/*!< in: cursor view to be closed */ -{ - ut_a(curview); - ut_a(curview->read_view); - ut_a(curview->heap); - - /* Add cursor's tables to the global count of active tables that - belong to this transaction */ - trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use; - - read_view_remove(curview->read_view, false); - read_view_free(curview->read_view); - - trx->read_view = trx->global_read_view; - - mem_heap_free(curview->heap); -} - -/*********************************************************************//** -This function sets a given consistent cursor view to a transaction -read view if given consistent cursor view is not NULL. Otherwise, function -restores a global read view to a transaction read view. */ -UNIV_INTERN -void -read_cursor_set_for_mysql( -/*======================*/ - trx_t* trx, /*!< in: transaction where cursor is set */ - cursor_view_t* curview)/*!< in: consistent cursor view to be set */ -{ - ut_a(trx); - - mutex_enter(&trx_sys->mutex); - - if (UNIV_LIKELY(curview != NULL)) { - trx->read_view = curview->read_view; - } else { - trx->read_view = trx->global_read_view; - } - - ut_ad(read_view_validate(trx->read_view)); - - mutex_exit(&trx_sys->mutex); -} diff --git a/storage/xtradb/rem/rem0cmp.cc b/storage/xtradb/rem/rem0cmp.cc deleted file mode 100644 index 616ef322fb5..00000000000 --- a/storage/xtradb/rem/rem0cmp.cc +++ /dev/null @@ -1,1465 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file rem/rem0cmp.cc -Comparison services for records - -Created 7/1/1994 Heikki Tuuri -************************************************************************/ - -#include "rem0cmp.h" - -#ifdef UNIV_NONINL -#include "rem0cmp.ic" -#endif - -#include "ha_prototypes.h" -#include "handler0alter.h" -#include "srv0srv.h" - -/* ALPHABETICAL ORDER - ================== - -The records are put into alphabetical order in the following -way: let F be the first field where two records disagree. -If there is a character in some position n where the -records disagree, the order is determined by comparison of -the characters at position n, possibly after -collating transformation. If there is no such character, -but the corresponding fields have different lengths, then -if the data type of the fields is paddable, -shorter field is padded with a padding character. If the -data type is not paddable, longer field is considered greater. -Finally, the SQL null is bigger than any other value. - -At the present, the comparison functions return 0 in the case, -where two records disagree only in the way that one -has more fields than the other. */ - -#ifdef UNIV_DEBUG -/*************************************************************//** -Used in debug checking of cmp_dtuple_... . -This function is used to compare a data tuple to a physical record. If -dtuple has n fields then rec must have either m >= n fields, or it must -differ from dtuple in some of the m fields rec has. -@return 1, 0, -1, if dtuple is greater, equal, less than rec, -respectively, when only the common first fields are compared */ -static -int -cmp_debug_dtuple_rec_with_match( -/*============================*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record which differs from - dtuple in some of the common fields, or which - has an equal number or more fields than - dtuple */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n_cmp, /*!< in: number of fields to compare */ - ulint* matched_fields)/*!< in/out: number of already - completely matched fields; when function - returns, contains the value for current - comparison */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -#endif /* UNIV_DEBUG */ -/*************************************************************//** -This function is used to compare two data fields for which the data type -is such that we must use MySQL code to compare them. The prototype here -must be a copy of the one in ha_innobase.cc! -@return 1, 0, -1, if a is greater, equal, less than b, respectively */ -extern -int -innobase_mysql_cmp( -/*===============*/ - int mysql_type, /*!< in: MySQL type */ - uint charset_number, /*!< in: number of the charset */ - const unsigned char* a, /*!< in: data field */ - unsigned int a_length, /*!< in: data field length, - not UNIV_SQL_NULL */ - const unsigned char* b, /*!< in: data field */ - unsigned int b_length); /*!< in: data field length, - not UNIV_SQL_NULL */ -/*************************************************************//** -This function is used to compare two data fields for which the data type -is such that we must use MySQL code to compare them. The prototype here -must be a copy of the one in ha_innobase.cc! -@return 1, 0, -1, if a is greater, equal, less than b, respectively */ -extern -int -innobase_mysql_cmp_prefix( -/*======================*/ - int mysql_type, /*!< in: MySQL type */ - uint charset_number, /*!< in: number of the charset */ - const unsigned char* a, /*!< in: data field */ - unsigned int a_length, /*!< in: data field length, - not UNIV_SQL_NULL */ - const unsigned char* b, /*!< in: data field */ - unsigned int b_length); /*!< in: data field length, - not UNIV_SQL_NULL */ -/*********************************************************************//** -Transforms the character code so that it is ordered appropriately for the -language. This is only used for the latin1 char set. MySQL does the -comparisons for other char sets. -@return collation order position */ -UNIV_INLINE -ulint -cmp_collate( -/*========*/ - ulint code) /*!< in: code of a character stored in database record */ -{ - return((ulint) srv_latin1_ordering[code]); -} - -/*************************************************************//** -Returns TRUE if two columns are equal for comparison purposes. -@return TRUE if the columns are considered equal in comparisons */ -UNIV_INTERN -ibool -cmp_cols_are_equal( -/*===============*/ - const dict_col_t* col1, /*!< in: column 1 */ - const dict_col_t* col2, /*!< in: column 2 */ - ibool check_charsets) - /*!< in: whether to check charsets */ -{ - if (dtype_is_non_binary_string_type(col1->mtype, col1->prtype) - && dtype_is_non_binary_string_type(col2->mtype, col2->prtype)) { - - /* Both are non-binary string types: they can be compared if - and only if the charset-collation is the same */ - - if (check_charsets) { - return(dtype_get_charset_coll(col1->prtype) - == dtype_get_charset_coll(col2->prtype)); - } else { - return(TRUE); - } - } - - if (dtype_is_binary_string_type(col1->mtype, col1->prtype) - && dtype_is_binary_string_type(col2->mtype, col2->prtype)) { - - /* Both are binary string types: they can be compared */ - - return(TRUE); - } - - if (col1->mtype != col2->mtype) { - - return(FALSE); - } - - if (col1->mtype == DATA_INT - && (col1->prtype & DATA_UNSIGNED) - != (col2->prtype & DATA_UNSIGNED)) { - - /* The storage format of an unsigned integer is different - from a signed integer: in a signed integer we OR - 0x8000... to the value of positive integers. */ - - return(FALSE); - } - - return(col1->mtype != DATA_INT || col1->len == col2->len); -} - -/*************************************************************//** -Innobase uses this function to compare two data fields for which the data type -is such that we must compare whole fields or call MySQL to do the comparison -@return 1, 0, -1, if a is greater, equal, less than b, respectively */ -static -int -cmp_whole_field( -/*============*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - const byte* a, /*!< in: data field */ - unsigned int a_length, /*!< in: data field length, - not UNIV_SQL_NULL */ - const byte* b, /*!< in: data field */ - unsigned int b_length) /*!< in: data field length, - not UNIV_SQL_NULL */ -{ - float f_1; - float f_2; - double d_1; - double d_2; - int swap_flag = 1; - - switch (mtype) { - - case DATA_DECIMAL: - /* Remove preceding spaces */ - for (; a_length && *a == ' '; a++, a_length--) { } - for (; b_length && *b == ' '; b++, b_length--) { } - - if (*a == '-') { - if (*b != '-') { - return(-1); - } - - a++; b++; - a_length--; - b_length--; - - swap_flag = -1; - - } else if (*b == '-') { - - return(1); - } - - while (a_length > 0 && (*a == '+' || *a == '0')) { - a++; a_length--; - } - - while (b_length > 0 && (*b == '+' || *b == '0')) { - b++; b_length--; - } - - if (a_length != b_length) { - if (a_length < b_length) { - return(-swap_flag); - } - - return(swap_flag); - } - - while (a_length > 0 && *a == *b) { - - a++; b++; a_length--; - } - - if (a_length == 0) { - - return(0); - } - - if (*a > *b) { - return(swap_flag); - } - - return(-swap_flag); - case DATA_DOUBLE: - d_1 = mach_double_read(a); - d_2 = mach_double_read(b); - - if (d_1 > d_2) { - return(1); - } else if (d_2 > d_1) { - return(-1); - } - - return(0); - - case DATA_FLOAT: - f_1 = mach_float_read(a); - f_2 = mach_float_read(b); - - if (f_1 > f_2) { - return(1); - } else if (f_2 > f_1) { - return(-1); - } - - return(0); - case DATA_BLOB: - if (prtype & DATA_BINARY_TYPE) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: comparing a binary BLOB" - " with a character set sensitive\n" - "InnoDB: comparison!\n"); - } - /* fall through */ - case DATA_VARMYSQL: - case DATA_MYSQL: - return(innobase_mysql_cmp( - (int)(prtype & DATA_MYSQL_TYPE_MASK), - (uint) dtype_get_charset_coll(prtype), - a, a_length, b, b_length)); - default: - fprintf(stderr, - "InnoDB: unknown type number %lu\n", - (ulong) mtype); - ut_error; - } - - return(0); -} - -/***************************************************************** -This function is used to compare two dfields where at least the first -has its data type field set. */ -UNIV_INTERN -int -cmp_dfield_dfield_like_prefix( -/*==========================*/ - /* out: 1, 0, -1, if dfield1 is greater, equal, - less than dfield2, respectively */ - dfield_t* dfield1,/* in: data field; must have type field set */ - dfield_t* dfield2)/* in: data field */ -{ - const dtype_t* type; - int ret; - - ut_ad(dfield_check_typed(dfield1)); - - type = dfield_get_type(dfield1); - - if (type->mtype >= DATA_FLOAT) { - ret = innobase_mysql_cmp_prefix( - static_cast<int>(type->prtype & DATA_MYSQL_TYPE_MASK), - static_cast<uint>(dtype_get_charset_coll(type->prtype)), - static_cast<byte*>(dfield_get_data(dfield1)), - static_cast<uint>(dfield_get_len(dfield1)), - static_cast<byte*>(dfield_get_data(dfield2)), - static_cast<uint>(dfield_get_len(dfield2))); - } else { - ret = (cmp_data_data_like_prefix( - static_cast<byte*>(dfield_get_data(dfield1)), - dfield_get_len(dfield1), - static_cast<byte*>(dfield_get_data(dfield2)), - dfield_get_len(dfield2))); - } - - return(ret); -} - -/*************************************************************//** -This function is used to compare two data fields for which we know the -data type. -@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */ -UNIV_INTERN -int -cmp_data_data_slow( -/*===============*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - const byte* data1, /*!< in: data field (== a pointer to a memory - buffer) */ - ulint len1, /*!< in: data field length or UNIV_SQL_NULL */ - const byte* data2, /*!< in: data field (== a pointer to a memory - buffer) */ - ulint len2) /*!< in: data field length or UNIV_SQL_NULL */ -{ - ulint data1_byte; - ulint data2_byte; - ulint cur_bytes; - - if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) { - - if (len1 == len2) { - - return(0); - } - - if (len1 == UNIV_SQL_NULL) { - /* We define the SQL null to be the smallest possible - value of a field in the alphabetical order */ - - return(-1); - } - - return(1); - } - - if (mtype >= DATA_FLOAT - || (mtype == DATA_BLOB - && 0 == (prtype & DATA_BINARY_TYPE) - && dtype_get_charset_coll(prtype) - != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) { - - return(cmp_whole_field(mtype, prtype, - data1, (unsigned) len1, - data2, (unsigned) len2)); - } - - /* Compare then the fields */ - - cur_bytes = 0; - - for (;;) { - if (len1 <= cur_bytes) { - if (len2 <= cur_bytes) { - - return(0); - } - - data1_byte = dtype_get_pad_char(mtype, prtype); - - if (data1_byte == ULINT_UNDEFINED) { - - return(-1); - } - } else { - data1_byte = *data1; - } - - if (len2 <= cur_bytes) { - data2_byte = dtype_get_pad_char(mtype, prtype); - - if (data2_byte == ULINT_UNDEFINED) { - - return(1); - } - } else { - data2_byte = *data2; - } - - if (data1_byte == data2_byte) { - /* If the bytes are equal, they will remain such even - after the collation transformation below */ - - goto next_byte; - } - - if (mtype <= DATA_CHAR - || (mtype == DATA_BLOB - && 0 == (prtype & DATA_BINARY_TYPE))) { - - data1_byte = cmp_collate(data1_byte); - data2_byte = cmp_collate(data2_byte); - } - - if (data1_byte > data2_byte) { - - return(1); - } else if (data1_byte < data2_byte) { - - return(-1); - } -next_byte: - /* Next byte */ - cur_bytes++; - data1++; - data2++; - } - - return(0); /* Not reached */ -} - -/***************************************************************** -This function is used to compare two data fields for which we know the -data type to be VARCHAR */ - -int -cmp_data_data_slow_varchar( -/*=======================*/ - /* out: 1, 0, -1, if lhs is greater, equal, - less than rhs, respectively */ - const byte* lhs, /* in: data field (== a pointer to a memory - buffer) */ - ulint lhs_len,/* in: data field length or UNIV_SQL_NULL */ - const byte* rhs, /* in: data field (== a pointer to a memory - buffer) */ - ulint rhs_len)/* in: data field length or UNIV_SQL_NULL */ -{ - ulint i; - - ut_a(rhs_len != UNIV_SQL_NULL); - - if (lhs_len == UNIV_SQL_NULL) { - - /* We define the SQL null to be the smallest possible - value of a field in the alphabetical order */ - - return(-1); - } - - /* Compare the values.*/ - - for (i = 0; i < lhs_len && i < rhs_len; ++i, ++rhs, ++lhs) { - ulint lhs_byte = *lhs; - ulint rhs_byte = *rhs; - - if (lhs_byte != rhs_byte) { - /* If the bytes are equal, they will remain such even - after the collation transformation below */ - - lhs_byte = cmp_collate(lhs_byte); - rhs_byte = cmp_collate(rhs_byte); - - if (lhs_byte > rhs_byte) { - - return(1); - } else if (lhs_byte < rhs_byte) { - - return(-1); - } - } - } - - return((i == lhs_len && i == rhs_len) ? 0 : - static_cast<int>(rhs_len - lhs_len)); -} - -/***************************************************************** -This function is used to compare two data fields for which we know the -data type. The comparison is done for the LIKE operator.*/ - -int -cmp_data_data_slow_like_prefix( -/*===========================*/ - /* out: 1, 0, -1, if lhs is greater, equal, - less than rhs, respectively */ - const byte* lhs, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - const byte* rhs, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2) /* in: data field length or UNIV_SQL_NULL */ -{ - ulint i; - - ut_a(len2 != UNIV_SQL_NULL); - - if (len1 == UNIV_SQL_NULL) { - - /* We define the SQL null to be the smallest possible - value of a field in the alphabetical order */ - - return(-1); - } - - /* Compare the values.*/ - - for (i = 0; i < len1 && i < len2; ++i, ++rhs, ++lhs) { - ulint lhs_byte = *lhs; - ulint rhs_byte = *rhs; - - if (lhs_byte != rhs_byte) { - /* If the bytes are equal, they will remain such even - after the collation transformation below */ - - lhs_byte = cmp_collate(lhs_byte); - rhs_byte = cmp_collate(rhs_byte); - - if (lhs_byte > rhs_byte) { - - return(1); - } else if (lhs_byte < rhs_byte) { - - return(-1); - } - } - } - - return(i == len2 ? 0 : 1); -} - -/***************************************************************** -This function is used to compare two data fields for which we know the -data type. The comparison is done for the LIKE operator.*/ - -int -cmp_data_data_slow_like_suffix( -/*===========================*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - /* in: data field (== a pointer to a - memory buffer) */ - const byte* data1 UNIV_UNUSED, - /* in: data field length or UNIV_SQL_NULL */ - ulint len1 UNIV_UNUSED, - /* in: data field (== a pointer to a memory - buffer) */ - const byte* data2 UNIV_UNUSED, - /* in: data field length or UNIV_SQL_NULL */ - ulint len2 UNIV_UNUSED) - -{ - ut_error; // FIXME: - return(1); -} - -/***************************************************************** -This function is used to compare two data fields for which we know the -data type. The comparison is done for the LIKE operator.*/ - -int -cmp_data_data_slow_like_substr( -/*===========================*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - /* in: data field (== a pointer to a - memory buffer) */ - const byte* data1 UNIV_UNUSED, - /* in: data field length or UNIV_SQL_NULL */ - ulint len1 UNIV_UNUSED, - /* in: data field (== a pointer to a memory - buffer) */ - const byte* data2 UNIV_UNUSED, - /* in: data field length or UNIV_SQL_NULL */ - ulint len2 UNIV_UNUSED) -{ - ut_error; // FIXME: - return(1); -} -/*************************************************************//** -This function is used to compare a data tuple to a physical record. -Only dtuple->n_fields_cmp first fields are taken into account for -the data tuple! If we denote by n = n_fields_cmp, then rec must -have either m >= n fields, or it must differ from dtuple in some of -the m fields rec has. If rec has an externally stored field we do not -compare it but return with value 0 if such a comparison should be -made. -@return 1, 0, -1, if dtuple is greater, equal, less than rec, -respectively, when only the common first fields are compared, or until -the first externally stored field in rec */ -UNIV_INTERN -int -cmp_dtuple_rec_with_match_low( -/*==========================*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record which differs from - dtuple in some of the common fields, or which - has an equal number or more fields than - dtuple */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n_cmp, /*!< in: number of fields to compare */ - ulint* matched_fields, /*!< in/out: number of already completely - matched fields; when function returns, - contains the value for current comparison */ - ulint* matched_bytes) /*!< in/out: number of already matched - bytes within the first field not completely - matched; when function returns, contains the - value for current comparison */ -{ - const dfield_t* dtuple_field; /* current field in logical record */ - ulint dtuple_f_len; /* the length of the current field - in the logical record */ - const byte* dtuple_b_ptr; /* pointer to the current byte in - logical field data */ - ulint dtuple_byte; /* value of current byte to be compared - in dtuple*/ - ulint rec_f_len; /* length of current field in rec */ - const byte* rec_b_ptr; /* pointer to the current byte in - rec field */ - ulint rec_byte; /* value of current byte to be - compared in rec */ - ulint cur_field; /* current field number */ - ulint cur_bytes; /* number of already matched bytes - in current field */ - int ret; /* return value */ - - ut_ad(dtuple != NULL); - ut_ad(rec != NULL); - ut_ad(matched_fields != NULL); - ut_ad(matched_bytes != NULL); - ut_ad(dtuple_check_typed(dtuple)); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - cur_field = *matched_fields; - cur_bytes = *matched_bytes; - - ut_ad(n_cmp > 0); - ut_ad(n_cmp <= dtuple_get_n_fields(dtuple)); - ut_ad(cur_field <= n_cmp); - ut_ad(cur_field <= rec_offs_n_fields(offsets)); - - if (cur_bytes == 0 && cur_field == 0) { - ulint rec_info = rec_get_info_bits(rec, - rec_offs_comp(offsets)); - ulint tup_info = dtuple_get_info_bits(dtuple); - - if (UNIV_UNLIKELY(rec_info & REC_INFO_MIN_REC_FLAG)) { - ret = !(tup_info & REC_INFO_MIN_REC_FLAG); - goto order_resolved; - } else if (UNIV_UNLIKELY(tup_info & REC_INFO_MIN_REC_FLAG)) { - ret = -1; - goto order_resolved; - } - } - - /* Match fields in a loop; stop if we run out of fields in dtuple - or find an externally stored field */ - - while (cur_field < n_cmp) { - - ulint mtype; - ulint prtype; - - dtuple_field = dtuple_get_nth_field(dtuple, cur_field); - { - const dtype_t* type - = dfield_get_type(dtuple_field); - - mtype = type->mtype; - prtype = type->prtype; - } - - dtuple_f_len = dfield_get_len(dtuple_field); - - rec_b_ptr = rec_get_nth_field(rec, offsets, - cur_field, &rec_f_len); - - /* If we have matched yet 0 bytes, it may be that one or - both the fields are SQL null, or the record or dtuple may be - the predefined minimum record, or the field is externally - stored */ - - if (UNIV_LIKELY(cur_bytes == 0)) { - if (rec_offs_nth_extern(offsets, cur_field)) { - /* We do not compare to an externally - stored field */ - - ret = 0; - - goto order_resolved; - } - - if (dtuple_f_len == UNIV_SQL_NULL) { - if (rec_f_len == UNIV_SQL_NULL) { - - goto next_field; - } - - ret = -1; - goto order_resolved; - } else if (rec_f_len == UNIV_SQL_NULL) { - /* We define the SQL null to be the - smallest possible value of a field - in the alphabetical order */ - - ret = 1; - goto order_resolved; - } - } - - if (mtype >= DATA_FLOAT - || (mtype == DATA_BLOB - && 0 == (prtype & DATA_BINARY_TYPE) - && dtype_get_charset_coll(prtype) - != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) { - - ret = cmp_whole_field( - mtype, prtype, - static_cast<const byte*>( - dfield_get_data(dtuple_field)), - (unsigned) dtuple_f_len, - rec_b_ptr, (unsigned) rec_f_len); - - if (ret != 0) { - cur_bytes = 0; - - goto order_resolved; - } else { - goto next_field; - } - } - - /* Set the pointers at the current byte */ - - rec_b_ptr = rec_b_ptr + cur_bytes; - dtuple_b_ptr = (byte*) dfield_get_data(dtuple_field) - + cur_bytes; - /* Compare then the fields */ - - for (;;) { - if (UNIV_UNLIKELY(rec_f_len <= cur_bytes)) { - if (dtuple_f_len <= cur_bytes) { - - goto next_field; - } - - rec_byte = dtype_get_pad_char(mtype, prtype); - - if (rec_byte == ULINT_UNDEFINED) { - ret = 1; - - goto order_resolved; - } - } else { - rec_byte = *rec_b_ptr; - } - - if (UNIV_UNLIKELY(dtuple_f_len <= cur_bytes)) { - dtuple_byte = dtype_get_pad_char(mtype, - prtype); - - if (dtuple_byte == ULINT_UNDEFINED) { - ret = -1; - - goto order_resolved; - } - } else { - dtuple_byte = *dtuple_b_ptr; - } - - if (dtuple_byte == rec_byte) { - /* If the bytes are equal, they will - remain such even after the collation - transformation below */ - - goto next_byte; - } - - if (mtype <= DATA_CHAR - || (mtype == DATA_BLOB - && !(prtype & DATA_BINARY_TYPE))) { - - rec_byte = cmp_collate(rec_byte); - dtuple_byte = cmp_collate(dtuple_byte); - } - - ret = (int) (dtuple_byte - rec_byte); - if (UNIV_LIKELY(ret)) { - if (ret < 0) { - ret = -1; - goto order_resolved; - } else { - ret = 1; - goto order_resolved; - } - } -next_byte: - /* Next byte */ - cur_bytes++; - rec_b_ptr++; - dtuple_b_ptr++; - } - -next_field: - cur_field++; - cur_bytes = 0; - } - - ut_ad(cur_bytes == 0); - - ret = 0; /* If we ran out of fields, dtuple was equal to rec - up to the common fields */ -order_resolved: - ut_ad((ret >= - 1) && (ret <= 1)); - ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets, - n_cmp, matched_fields)); - ut_ad(*matched_fields == cur_field); /* In the debug version, the - above cmp_debug_... sets - *matched_fields to a value */ - *matched_fields = cur_field; - *matched_bytes = cur_bytes; - - return(ret); -} - -/**************************************************************//** -Compares a data tuple to a physical record. -@see cmp_dtuple_rec_with_match -@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */ -UNIV_INTERN -int -cmp_dtuple_rec( -/*===========*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint matched_fields = 0; - ulint matched_bytes = 0; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, - &matched_fields, &matched_bytes)); -} - -/**************************************************************//** -Checks if a dtuple is a prefix of a record. The last field in dtuple -is allowed to be a prefix of the corresponding field in the record. -@return TRUE if prefix */ -UNIV_INTERN -ibool -cmp_dtuple_is_prefix_of_rec( -/*========================*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint n_fields; - ulint matched_fields = 0; - ulint matched_bytes = 0; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - n_fields = dtuple_get_n_fields(dtuple); - - if (n_fields > rec_offs_n_fields(offsets)) { - - return(FALSE); - } - - cmp_dtuple_rec_with_match(dtuple, rec, offsets, - &matched_fields, &matched_bytes); - if (matched_fields == n_fields) { - - return(TRUE); - } - - if (matched_fields == n_fields - 1 - && matched_bytes == dfield_get_len( - dtuple_get_nth_field(dtuple, n_fields - 1))) { - return(TRUE); - } - - return(FALSE); -} - -/*************************************************************//** -Compare two physical record fields. -@retval 1 if rec1 field is greater than rec2 -@retval -1 if rec1 field is less than rec2 -@retval 0 if rec1 field equals to rec2 */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -int -cmp_rec_rec_simple_field( -/*=====================*/ - const rec_t* rec1, /*!< in: physical record */ - const rec_t* rec2, /*!< in: physical record */ - const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ - const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index, /*!< in: data dictionary index */ - ulint n) /*!< in: field to compare */ -{ - const byte* rec1_b_ptr; - const byte* rec2_b_ptr; - ulint rec1_f_len; - ulint rec2_f_len; - const dict_col_t* col = dict_index_get_nth_col(index, n); - - ut_ad(!rec_offs_nth_extern(offsets1, n)); - ut_ad(!rec_offs_nth_extern(offsets2, n)); - - rec1_b_ptr = rec_get_nth_field(rec1, offsets1, n, &rec1_f_len); - rec2_b_ptr = rec_get_nth_field(rec2, offsets2, n, &rec2_f_len); - - if (rec1_f_len == UNIV_SQL_NULL || rec2_f_len == UNIV_SQL_NULL) { - if (rec1_f_len == rec2_f_len) { - return(0); - } - /* We define the SQL null to be the smallest possible - value of a field in the alphabetical order */ - return(rec1_f_len == UNIV_SQL_NULL ? -1 : 1); - } - - if (col->mtype >= DATA_FLOAT - || (col->mtype == DATA_BLOB - && !(col->prtype & DATA_BINARY_TYPE) - && dtype_get_charset_coll(col->prtype) - != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) { - return(cmp_whole_field(col->mtype, col->prtype, - rec1_b_ptr, (unsigned) rec1_f_len, - rec2_b_ptr, (unsigned) rec2_f_len)); - } - - /* Compare the fields */ - for (ulint cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) { - ulint rec1_byte; - ulint rec2_byte; - - if (rec2_f_len <= cur_bytes) { - if (rec1_f_len <= cur_bytes) { - return(0); - } - - rec2_byte = dtype_get_pad_char( - col->mtype, col->prtype); - - if (rec2_byte == ULINT_UNDEFINED) { - return(1); - } - } else { - rec2_byte = *rec2_b_ptr; - } - - if (rec1_f_len <= cur_bytes) { - rec1_byte = dtype_get_pad_char( - col->mtype, col->prtype); - - if (rec1_byte == ULINT_UNDEFINED) { - return(-1); - } - } else { - rec1_byte = *rec1_b_ptr; - } - - if (rec1_byte == rec2_byte) { - /* If the bytes are equal, they will remain such - even after the collation transformation below */ - continue; - } - - if (col->mtype <= DATA_CHAR - || (col->mtype == DATA_BLOB - && !(col->prtype & DATA_BINARY_TYPE))) { - - rec1_byte = cmp_collate(rec1_byte); - rec2_byte = cmp_collate(rec2_byte); - } - - if (rec1_byte < rec2_byte) { - return(-1); - } else if (rec1_byte > rec2_byte) { - return(1); - } - } -} - -/*************************************************************//** -Compare two physical records that contain the same number of columns, -none of which are stored externally. -@retval 1 if rec1 (including non-ordering columns) is greater than rec2 -@retval -1 if rec1 (including non-ordering columns) is less than rec2 -@retval 0 if rec1 is a duplicate of rec2 */ -UNIV_INTERN -int -cmp_rec_rec_simple( -/*===============*/ - const rec_t* rec1, /*!< in: physical record */ - const rec_t* rec2, /*!< in: physical record */ - const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */ - const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */ - const dict_index_t* index, /*!< in: data dictionary index */ - struct TABLE* table) /*!< in: MySQL table, for reporting - duplicate key value if applicable, - or NULL */ -{ - ulint n; - ulint n_uniq = dict_index_get_n_unique(index); - bool null_eq = false; - - ut_ad(rec_offs_n_fields(offsets1) >= n_uniq); - ut_ad(rec_offs_n_fields(offsets2) == rec_offs_n_fields(offsets2)); - - ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2)); - - for (n = 0; n < n_uniq; n++) { - int cmp = cmp_rec_rec_simple_field( - rec1, rec2, offsets1, offsets2, index, n); - - if (cmp) { - return(cmp); - } - - /* If the fields are internally equal, they must both - be NULL or non-NULL. */ - ut_ad(rec_offs_nth_sql_null(offsets1, n) - == rec_offs_nth_sql_null(offsets2, n)); - - if (rec_offs_nth_sql_null(offsets1, n)) { - ut_ad(!(dict_index_get_nth_col(index, n)->prtype - & DATA_NOT_NULL)); - null_eq = true; - } - } - - /* If we ran out of fields, the ordering columns of rec1 were - equal to rec2. Issue a duplicate key error if needed. */ - - if (!null_eq && table && dict_index_is_unique(index)) { - /* Report erroneous row using new version of table. */ - innobase_rec_to_mysql(table, rec1, index, offsets1); - return(0); - } - - /* Else, keep comparing so that we have the full internal - order. */ - for (; n < dict_index_get_n_fields(index); n++) { - int cmp = cmp_rec_rec_simple_field( - rec1, rec2, offsets1, offsets2, index, n); - - if (cmp) { - return(cmp); - } - - /* If the fields are internally equal, they must both - be NULL or non-NULL. */ - ut_ad(rec_offs_nth_sql_null(offsets1, n) - == rec_offs_nth_sql_null(offsets2, n)); - } - - /* This should never be reached. Internally, an index must - never contain duplicate entries. */ - ut_ad(0); - return(0); -} - -/*************************************************************//** -This function is used to compare two physical records. Only the common -first fields are compared, and if an externally stored field is -encountered, then 0 is returned. -@return 1, 0, -1 if rec1 is greater, equal, less, respectively */ -UNIV_INTERN -int -cmp_rec_rec_with_match( -/*===================*/ - const rec_t* rec1, /*!< in: physical record */ - const rec_t* rec2, /*!< in: physical record */ - const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */ - const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ - dict_index_t* index, /*!< in: data dictionary index */ - ibool nulls_unequal, - /* in: TRUE if this is for index statistics - cardinality estimation, and innodb_stats_method - is "nulls_unequal" or "nulls_ignored" */ - ulint* matched_fields, /*!< in/out: number of already completely - matched fields; when the function returns, - contains the value the for current - comparison */ - ulint* matched_bytes) /*!< in/out: number of already matched - bytes within the first field not completely - matched; when the function returns, contains - the value for the current comparison */ -{ - ulint rec1_n_fields; /* the number of fields in rec */ - ulint rec1_f_len; /* length of current field in rec */ - const byte* rec1_b_ptr; /* pointer to the current byte - in rec field */ - ulint rec1_byte; /* value of current byte to be - compared in rec */ - ulint rec2_n_fields; /* the number of fields in rec */ - ulint rec2_f_len; /* length of current field in rec */ - const byte* rec2_b_ptr; /* pointer to the current byte - in rec field */ - ulint rec2_byte; /* value of current byte to be - compared in rec */ - ulint cur_field; /* current field number */ - ulint cur_bytes; /* number of already matched - bytes in current field */ - int ret = 0; /* return value */ - ulint comp; - - ut_ad(rec1 != NULL); - ut_ad(rec2 != NULL); - ut_ad(index != NULL); - ut_ad(rec_offs_validate(rec1, index, offsets1)); - ut_ad(rec_offs_validate(rec2, index, offsets2)); - ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2)); - - comp = rec_offs_comp(offsets1); - rec1_n_fields = rec_offs_n_fields(offsets1); - rec2_n_fields = rec_offs_n_fields(offsets2); - - cur_field = *matched_fields; - cur_bytes = *matched_bytes; - - /* Match fields in a loop */ - - while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) { - - ulint mtype; - ulint prtype; - - if (dict_index_is_univ(index)) { - /* This is for the insert buffer B-tree. */ - mtype = DATA_BINARY; - prtype = 0; - } else { - const dict_col_t* col - = dict_index_get_nth_col(index, cur_field); - - mtype = col->mtype; - prtype = col->prtype; - } - - rec1_b_ptr = rec_get_nth_field(rec1, offsets1, - cur_field, &rec1_f_len); - rec2_b_ptr = rec_get_nth_field(rec2, offsets2, - cur_field, &rec2_f_len); - - if (cur_bytes == 0) { - if (cur_field == 0) { - /* Test if rec is the predefined minimum - record */ - if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp) - & REC_INFO_MIN_REC_FLAG)) { - - if (!(rec_get_info_bits(rec2, comp) - & REC_INFO_MIN_REC_FLAG)) { - ret = -1; - } - - goto order_resolved; - - } else if (UNIV_UNLIKELY - (rec_get_info_bits(rec2, comp) - & REC_INFO_MIN_REC_FLAG)) { - - ret = 1; - - goto order_resolved; - } - } - - if (rec_offs_nth_extern(offsets1, cur_field) - || rec_offs_nth_extern(offsets2, cur_field)) { - /* We do not compare to an externally - stored field */ - - goto order_resolved; - } - - if (rec1_f_len == UNIV_SQL_NULL - || rec2_f_len == UNIV_SQL_NULL) { - - if (rec1_f_len == rec2_f_len) { - /* This is limited to stats collection, - cannot use it for regular search */ - if (nulls_unequal) { - ret = -1; - } else { - goto next_field; - } - } else if (rec2_f_len == UNIV_SQL_NULL) { - - /* We define the SQL null to be the - smallest possible value of a field - in the alphabetical order */ - - ret = 1; - } else { - ret = -1; - } - - goto order_resolved; - } - } - - if (mtype >= DATA_FLOAT - || (mtype == DATA_BLOB - && 0 == (prtype & DATA_BINARY_TYPE) - && dtype_get_charset_coll(prtype) - != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) { - - ret = cmp_whole_field(mtype, prtype, - rec1_b_ptr, - (unsigned) rec1_f_len, - rec2_b_ptr, - (unsigned) rec2_f_len); - if (ret != 0) { - cur_bytes = 0; - - goto order_resolved; - } else { - goto next_field; - } - } - - /* Set the pointers at the current byte */ - rec1_b_ptr = rec1_b_ptr + cur_bytes; - rec2_b_ptr = rec2_b_ptr + cur_bytes; - - /* Compare then the fields */ - for (;;) { - if (rec2_f_len <= cur_bytes) { - - if (rec1_f_len <= cur_bytes) { - - goto next_field; - } - - rec2_byte = dtype_get_pad_char(mtype, prtype); - - if (rec2_byte == ULINT_UNDEFINED) { - ret = 1; - - goto order_resolved; - } - } else { - rec2_byte = *rec2_b_ptr; - } - - if (rec1_f_len <= cur_bytes) { - rec1_byte = dtype_get_pad_char(mtype, prtype); - - if (rec1_byte == ULINT_UNDEFINED) { - ret = -1; - - goto order_resolved; - } - } else { - rec1_byte = *rec1_b_ptr; - } - - if (rec1_byte == rec2_byte) { - /* If the bytes are equal, they will remain - such even after the collation transformation - below */ - - goto next_byte; - } - - if (mtype <= DATA_CHAR - || (mtype == DATA_BLOB - && !(prtype & DATA_BINARY_TYPE))) { - - rec1_byte = cmp_collate(rec1_byte); - rec2_byte = cmp_collate(rec2_byte); - } - - if (rec1_byte < rec2_byte) { - ret = -1; - goto order_resolved; - } else if (rec1_byte > rec2_byte) { - ret = 1; - goto order_resolved; - } -next_byte: - /* Next byte */ - - cur_bytes++; - rec1_b_ptr++; - rec2_b_ptr++; - } - -next_field: - cur_field++; - cur_bytes = 0; - } - - ut_ad(cur_bytes == 0); - - /* If we ran out of fields, rec1 was equal to rec2 up - to the common fields */ - ut_ad(ret == 0); -order_resolved: - - ut_ad((ret >= - 1) && (ret <= 1)); - - *matched_fields = cur_field; - *matched_bytes = cur_bytes; - - return(ret); -} - -#ifdef UNIV_DEBUG -/*************************************************************//** -Used in debug checking of cmp_dtuple_... . -This function is used to compare a data tuple to a physical record. If -dtuple has n fields then rec must have either m >= n fields, or it must -differ from dtuple in some of the m fields rec has. If encounters an -externally stored field, returns 0. -@return 1, 0, -1, if dtuple is greater, equal, less than rec, -respectively, when only the common first fields are compared */ -static -int -cmp_debug_dtuple_rec_with_match( -/*============================*/ - const dtuple_t* dtuple, /*!< in: data tuple */ - const rec_t* rec, /*!< in: physical record which differs from - dtuple in some of the common fields, or which - has an equal number or more fields than - dtuple */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint n_cmp, /*!< in: number of fields to compare */ - ulint* matched_fields) /*!< in/out: number of already - completely matched fields; when function - returns, contains the value for current - comparison */ -{ - const dfield_t* dtuple_field; /* current field in logical record */ - ulint dtuple_f_len; /* the length of the current field - in the logical record */ - const byte* dtuple_f_data; /* pointer to the current logical - field data */ - ulint rec_f_len; /* length of current field in rec */ - const byte* rec_f_data; /* pointer to the current rec field */ - int ret; /* return value */ - ulint cur_field; /* current field number */ - - ut_ad(dtuple != NULL); - ut_ad(rec != NULL); - ut_ad(matched_fields != NULL); - ut_ad(dtuple_check_typed(dtuple)); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - ut_ad(n_cmp > 0); - ut_ad(n_cmp <= dtuple_get_n_fields(dtuple)); - ut_ad(*matched_fields <= n_cmp); - ut_ad(*matched_fields <= rec_offs_n_fields(offsets)); - - cur_field = *matched_fields; - - if (cur_field == 0) { - if (UNIV_UNLIKELY - (rec_get_info_bits(rec, rec_offs_comp(offsets)) - & REC_INFO_MIN_REC_FLAG)) { - - ret = !(dtuple_get_info_bits(dtuple) - & REC_INFO_MIN_REC_FLAG); - - goto order_resolved; - } - - if (UNIV_UNLIKELY - (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG)) { - ret = -1; - - goto order_resolved; - } - } - - /* Match fields in a loop; stop if we run out of fields in dtuple */ - - while (cur_field < n_cmp) { - - ulint mtype; - ulint prtype; - - dtuple_field = dtuple_get_nth_field(dtuple, cur_field); - { - const dtype_t* type - = dfield_get_type(dtuple_field); - - mtype = type->mtype; - prtype = type->prtype; - } - - dtuple_f_data = static_cast<const byte*>( - dfield_get_data(dtuple_field)); - - dtuple_f_len = dfield_get_len(dtuple_field); - - rec_f_data = rec_get_nth_field(rec, offsets, - cur_field, &rec_f_len); - - if (rec_offs_nth_extern(offsets, cur_field)) { - /* We do not compare to an externally stored field */ - - ret = 0; - - goto order_resolved; - } - - ret = cmp_data_data(mtype, prtype, dtuple_f_data, dtuple_f_len, - rec_f_data, rec_f_len); - if (ret != 0) { - goto order_resolved; - } - - cur_field++; - } - - ret = 0; /* If we ran out of fields, dtuple was equal to rec - up to the common fields */ -order_resolved: - ut_ad((ret >= - 1) && (ret <= 1)); - - *matched_fields = cur_field; - - return(ret); -} -#endif /* UNIV_DEBUG */ diff --git a/storage/xtradb/rem/rem0rec.cc b/storage/xtradb/rem/rem0rec.cc deleted file mode 100644 index c62e8c90434..00000000000 --- a/storage/xtradb/rem/rem0rec.cc +++ /dev/null @@ -1,2107 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file rem/rem0rec.cc -Record manager - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#include "rem0rec.h" - -#ifdef UNIV_NONINL -#include "rem0rec.ic" -#endif - -#include "page0page.h" -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "fts0fts.h" -#ifdef WITH_WSREP -#include <ha_prototypes.h> -#endif /* WITH_WSREP */ - -/* PHYSICAL RECORD (OLD STYLE) - =========================== - -The physical record, which is the data type of all the records -found in index pages of the database, has the following format -(lower addresses and more significant bits inside a byte are below -represented on a higher text line): - -| offset of the end of the last field of data, the most significant - bit is set to 1 if and only if the field is SQL-null, - if the offset is 2-byte, then the second most significant - bit is set to 1 if the field is stored on another page: - mostly this will occur in the case of big BLOB fields | -... -| offset of the end of the first field of data + the SQL-null bit | -| 4 bits used to delete mark a record, and mark a predefined - minimum record in alphabetical order | -| 4 bits giving the number of records owned by this record - (this term is explained in page0page.h) | -| 13 bits giving the order number of this record in the - heap of the index page | -| 10 bits giving the number of fields in this record | -| 1 bit which is set to 1 if the offsets above are given in - one byte format, 0 if in two byte format | -| two bytes giving an absolute pointer to the next record in the page | -ORIGIN of the record -| first field of data | -... -| last field of data | - -The origin of the record is the start address of the first field -of data. The offsets are given relative to the origin. -The offsets of the data fields are stored in an inverted -order because then the offset of the first fields are near the -origin, giving maybe a better processor cache hit rate in searches. - -The offsets of the data fields are given as one-byte -(if there are less than 127 bytes of data in the record) -or two-byte unsigned integers. The most significant bit -is not part of the offset, instead it indicates the SQL-null -if the bit is set to 1. */ - -/* PHYSICAL RECORD (NEW STYLE) - =========================== - -The physical record, which is the data type of all the records -found in index pages of the database, has the following format -(lower addresses and more significant bits inside a byte are below -represented on a higher text line): - -| length of the last non-null variable-length field of data: - if the maximum length is 255, one byte; otherwise, - 0xxxxxxx (one byte, length=0..127), or 1exxxxxxxxxxxxxx (two bytes, - length=128..16383, extern storage flag) | -... -| length of first variable-length field of data | -| SQL-null flags (1 bit per nullable field), padded to full bytes | -| 4 bits used to delete mark a record, and mark a predefined - minimum record in alphabetical order | -| 4 bits giving the number of records owned by this record - (this term is explained in page0page.h) | -| 13 bits giving the order number of this record in the - heap of the index page | -| 3 bits record type: 000=conventional, 001=node pointer (inside B-tree), - 010=infimum, 011=supremum, 1xx=reserved | -| two bytes giving a relative pointer to the next record in the page | -ORIGIN of the record -| first field of data | -... -| last field of data | - -The origin of the record is the start address of the first field -of data. The offsets are given relative to the origin. -The offsets of the data fields are stored in an inverted -order because then the offset of the first fields are near the -origin, giving maybe a better processor cache hit rate in searches. - -The offsets of the data fields are given as one-byte -(if there are less than 127 bytes of data in the record) -or two-byte unsigned integers. The most significant bit -is not part of the offset, instead it indicates the SQL-null -if the bit is set to 1. */ - -/* CANONICAL COORDINATES. A record can be seen as a single -string of 'characters' in the following way: catenate the bytes -in each field, in the order of fields. An SQL-null field -is taken to be an empty sequence of bytes. Then after -the position of each field insert in the string -the 'character' <FIELD-END>, except that after an SQL-null field -insert <NULL-FIELD-END>. Now the ordinal position of each -byte in this canonical string is its canonical coordinate. -So, for the record ("AA", SQL-NULL, "BB", ""), the canonical -string is "AA<FIELD_END><NULL-FIELD-END>BB<FIELD-END><FIELD-END>". -We identify prefixes (= initial segments) of a record -with prefixes of the canonical string. The canonical -length of the prefix is the length of the corresponding -prefix of the canonical string. The canonical length of -a record is the length of its canonical string. - -For example, the maximal common prefix of records -("AA", SQL-NULL, "BB", "C") and ("AA", SQL-NULL, "B", "C") -is "AA<FIELD-END><NULL-FIELD-END>B", and its canonical -length is 5. - -A complete-field prefix of a record is a prefix which ends at the -end of some field (containing also <FIELD-END>). -A record is a complete-field prefix of another record, if -the corresponding canonical strings have the same property. */ - -/* this is used to fool compiler in rec_validate */ -UNIV_INTERN ulint rec_dummy; - -/***************************************************************//** -Validates the consistency of an old-style physical record. -@return TRUE if ok */ -static -ibool -rec_validate_old( -/*=============*/ - const rec_t* rec); /*!< in: physical record */ - -/******************************************************//** -Determine how many of the first n columns in a compact -physical record are stored externally. -@return number of externally stored columns */ -UNIV_INTERN -ulint -rec_get_n_extern_new( -/*=================*/ - const rec_t* rec, /*!< in: compact physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint n) /*!< in: number of columns to scan */ -{ - const byte* nulls; - const byte* lens; - ulint null_mask; - ulint n_extern; - ulint i; - - ut_ad(dict_table_is_comp(index->table)); - ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY); - ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index)); - - if (n == ULINT_UNDEFINED) { - n = dict_index_get_n_fields(index); - } - - nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); - lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); - null_mask = 1; - n_extern = 0; - i = 0; - - /* read the lengths of fields 0..n */ - do { - const dict_field_t* field - = dict_index_get_nth_field(index, i); - const dict_col_t* col - = dict_field_get_col(field); - ulint len; - - if (!(col->prtype & DATA_NOT_NULL)) { - /* nullable field => read the null flag */ - - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - if (*nulls & null_mask) { - null_mask <<= 1; - /* No length is stored for NULL fields. */ - continue; - } - null_mask <<= 1; - } - - if (UNIV_UNLIKELY(!field->fixed_len)) { - /* Variable-length field: read the length */ - len = *lens--; - /* If the maximum length of the field is up - to 255 bytes, the actual length is always - stored in one byte. If the maximum length is - more than 255 bytes, the actual length is - stored in one byte for 0..127. The length - will be encoded in two bytes when it is 128 or - more, or when the field is stored externally. */ - if (UNIV_UNLIKELY(col->len > 255) - || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { - if (len & 0x80) { - /* 1exxxxxxx xxxxxxxx */ - if (len & 0x40) { - n_extern++; - } - lens--; - } - } - } - } while (++i < n); - - return(n_extern); -} - -/******************************************************//** -Determine the offset to each field in a leaf-page record -in ROW_FORMAT=COMPACT. This is a special case of -rec_init_offsets() and rec_get_offsets_func(). */ -UNIV_INLINE MY_ATTRIBUTE((nonnull)) -void -rec_init_offsets_comp_ordinary( -/*===========================*/ - const rec_t* rec, /*!< in: physical record in - ROW_FORMAT=COMPACT */ - bool temp, /*!< in: whether to use the - format for temporary files in - index creation */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets)/*!< in/out: array of offsets; - in: n=rec_offs_n_fields(offsets) */ -{ - ulint i = 0; - ulint offs = 0; - ulint any_ext = 0; - ulint n_null = index->n_nullable; - const byte* nulls = temp - ? rec - 1 - : rec - (1 + REC_N_NEW_EXTRA_BYTES); - const byte* lens = nulls - UT_BITS_IN_BYTES(n_null); - ulint null_mask = 1; - -#ifdef UNIV_DEBUG - /* We cannot invoke rec_offs_make_valid() here if temp=true. - Similarly, rec_offs_validate() will fail in that case, because - it invokes rec_get_status(). */ - offsets[2] = (ulint) rec; - offsets[3] = (ulint) index; -#endif /* UNIV_DEBUG */ - - ut_ad(temp || dict_table_is_comp(index->table)); - - if (temp && dict_table_is_comp(index->table)) { - /* No need to do adjust fixed_len=0. We only need to - adjust it for ROW_FORMAT=REDUNDANT. */ - temp = false; - } - - /* read the lengths of fields 0..n */ - do { - const dict_field_t* field - = dict_index_get_nth_field(index, i); - const dict_col_t* col - = dict_field_get_col(field); - ulint len; - - if (!(col->prtype & DATA_NOT_NULL)) { - /* nullable field => read the null flag */ - ut_ad(n_null--); - - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - if (*nulls & null_mask) { - null_mask <<= 1; - /* No length is stored for NULL fields. - We do not advance offs, and we set - the length to zero and enable the - SQL NULL flag in offsets[]. */ - len = offs | REC_OFFS_SQL_NULL; - goto resolved; - } - null_mask <<= 1; - } - - if (!field->fixed_len - || (temp && !dict_col_get_fixed_size(col, temp))) { - /* Variable-length field: read the length */ - len = *lens--; - /* If the maximum length of the field is up - to 255 bytes, the actual length is always - stored in one byte. If the maximum length is - more than 255 bytes, the actual length is - stored in one byte for 0..127. The length - will be encoded in two bytes when it is 128 or - more, or when the field is stored externally. */ - if (UNIV_UNLIKELY(col->len > 255) - || UNIV_UNLIKELY(col->mtype - == DATA_BLOB)) { - if (len & 0x80) { - /* 1exxxxxxx xxxxxxxx */ - len <<= 8; - len |= *lens--; - - offs += len & 0x3fff; - if (UNIV_UNLIKELY(len - & 0x4000)) { - ut_ad(dict_index_is_clust - (index)); - any_ext = REC_OFFS_EXTERNAL; - len = offs - | REC_OFFS_EXTERNAL; - } else { - len = offs; - } - - goto resolved; - } - } - - len = offs += len; - } else { - len = offs += field->fixed_len; - } -resolved: - rec_offs_base(offsets)[i + 1] = len; - } while (++i < rec_offs_n_fields(offsets)); - - *rec_offs_base(offsets) - = (rec - (lens + 1)) | REC_OFFS_COMPACT | any_ext; -} - -/******************************************************//** -The following function determines the offsets to each field in the -record. The offsets are written to a previously allocated array of -ulint, where rec_offs_n_fields(offsets) has been initialized to the -number of fields in the record. The rest of the array will be -initialized by this function. rec_offs_base(offsets)[0] will be set -to the extra size (if REC_OFFS_COMPACT is set, the record is in the -new format; if REC_OFFS_EXTERNAL is set, the record contains externally -stored columns), and rec_offs_base(offsets)[1..n_fields] will be set to -offsets past the end of fields 0..n_fields, or to the beginning of -fields 1..n_fields+1. When the high-order bit of the offset at [i+1] -is set (REC_OFFS_SQL_NULL), the field i is NULL. When the second -high-order bit of the offset at [i+1] is set (REC_OFFS_EXTERNAL), the -field i is being stored externally. */ -static -void -rec_init_offsets( -/*=============*/ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets)/*!< in/out: array of offsets; - in: n=rec_offs_n_fields(offsets) */ -{ - ulint i = 0; - ulint offs; - - rec_offs_make_valid(rec, index, offsets); - - if (dict_table_is_comp(index->table)) { - const byte* nulls; - const byte* lens; - dict_field_t* field; - ulint null_mask; - ulint status = rec_get_status(rec); - ulint n_node_ptr_field = ULINT_UNDEFINED; - - switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - /* the field is 8 bytes long */ - rec_offs_base(offsets)[0] - = REC_N_NEW_EXTRA_BYTES | REC_OFFS_COMPACT; - rec_offs_base(offsets)[1] = 8; - return; - case REC_STATUS_NODE_PTR: - n_node_ptr_field - = dict_index_get_n_unique_in_tree(index); - break; - case REC_STATUS_ORDINARY: - rec_init_offsets_comp_ordinary( - rec, false, index, offsets); - return; - } - - nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); - lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); - offs = 0; - null_mask = 1; - - /* read the lengths of fields 0..n */ - do { - ulint len; - if (UNIV_UNLIKELY(i == n_node_ptr_field)) { - len = offs += REC_NODE_PTR_SIZE; - goto resolved; - } - - field = dict_index_get_nth_field(index, i); - if (!(dict_field_get_col(field)->prtype - & DATA_NOT_NULL)) { - /* nullable field => read the null flag */ - - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - if (*nulls & null_mask) { - null_mask <<= 1; - /* No length is stored for NULL fields. - We do not advance offs, and we set - the length to zero and enable the - SQL NULL flag in offsets[]. */ - len = offs | REC_OFFS_SQL_NULL; - goto resolved; - } - null_mask <<= 1; - } - - if (UNIV_UNLIKELY(!field->fixed_len)) { - /* Variable-length field: read the length */ - const dict_col_t* col - = dict_field_get_col(field); - len = *lens--; - /* If the maximum length of the field - is up to 255 bytes, the actual length - is always stored in one byte. If the - maximum length is more than 255 bytes, - the actual length is stored in one - byte for 0..127. The length will be - encoded in two bytes when it is 128 or - more, or when the field is stored - externally. */ - if (UNIV_UNLIKELY(col->len > 255) - || UNIV_UNLIKELY(col->mtype - == DATA_BLOB)) { - if (len & 0x80) { - /* 1exxxxxxx xxxxxxxx */ - - len <<= 8; - len |= *lens--; - - /* B-tree node pointers - must not contain externally - stored columns. Thus - the "e" flag must be 0. */ - ut_a(!(len & 0x4000)); - offs += len & 0x3fff; - len = offs; - - goto resolved; - } - } - - len = offs += len; - } else { - len = offs += field->fixed_len; - } -resolved: - rec_offs_base(offsets)[i + 1] = len; - } while (++i < rec_offs_n_fields(offsets)); - - *rec_offs_base(offsets) - = (rec - (lens + 1)) | REC_OFFS_COMPACT; - } else { - /* Old-style record: determine extra size and end offsets */ - offs = REC_N_OLD_EXTRA_BYTES; - if (rec_get_1byte_offs_flag(rec)) { - offs += rec_offs_n_fields(offsets); - *rec_offs_base(offsets) = offs; - /* Determine offsets to fields */ - do { - offs = rec_1_get_field_end_info(rec, i); - if (offs & REC_1BYTE_SQL_NULL_MASK) { - offs &= ~REC_1BYTE_SQL_NULL_MASK; - offs |= REC_OFFS_SQL_NULL; - } - rec_offs_base(offsets)[1 + i] = offs; - } while (++i < rec_offs_n_fields(offsets)); - } else { - offs += 2 * rec_offs_n_fields(offsets); - *rec_offs_base(offsets) = offs; - /* Determine offsets to fields */ - do { - offs = rec_2_get_field_end_info(rec, i); - if (offs & REC_2BYTE_SQL_NULL_MASK) { - offs &= ~REC_2BYTE_SQL_NULL_MASK; - offs |= REC_OFFS_SQL_NULL; - } - if (offs & REC_2BYTE_EXTERN_MASK) { - offs &= ~REC_2BYTE_EXTERN_MASK; - offs |= REC_OFFS_EXTERNAL; - *rec_offs_base(offsets) |= REC_OFFS_EXTERNAL; - } - rec_offs_base(offsets)[1 + i] = offs; - } while (++i < rec_offs_n_fields(offsets)); - } - } -} - -/******************************************************//** -The following function determines the offsets to each field -in the record. It can reuse a previously returned array. -@return the new offsets */ -UNIV_INTERN -ulint* -rec_get_offsets_func( -/*=================*/ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets,/*!< in/out: array consisting of - offsets[0] allocated elements, - or an array from rec_get_offsets(), - or NULL */ - ulint n_fields,/*!< in: maximum number of - initialized fields - (ULINT_UNDEFINED if all fields) */ -#ifdef UNIV_DEBUG - const char* file, /*!< in: file name where called */ - ulint line, /*!< in: line number where called */ -#endif /* UNIV_DEBUG */ - mem_heap_t** heap) /*!< in/out: memory heap */ -{ - ulint n; - ulint size; - - ut_ad(rec); - ut_ad(index); - ut_ad(heap); - - if (dict_table_is_comp(index->table)) { - switch (UNIV_EXPECT(rec_get_status(rec), - REC_STATUS_ORDINARY)) { - case REC_STATUS_ORDINARY: - n = dict_index_get_n_fields(index); - break; - case REC_STATUS_NODE_PTR: - /* Node pointer records consist of the - uniquely identifying fields of the record - followed by a child page number field. */ - n = dict_index_get_n_unique_in_tree(index) + 1; - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - /* infimum or supremum record */ - n = 1; - break; - default: - ut_error; - return(NULL); - } - } else { - n = rec_get_n_fields_old(rec); - } - - if (UNIV_UNLIKELY(n_fields < n)) { - n = n_fields; - } - - /* The offsets header consists of the allocation size at - offsets[0] and the REC_OFFS_HEADER_SIZE bytes. */ - size = n + (1 + REC_OFFS_HEADER_SIZE); - - if (UNIV_UNLIKELY(!offsets) - || UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) { - if (UNIV_UNLIKELY(!*heap)) { - *heap = mem_heap_create_at(size * sizeof(ulint), - file, line); - } - offsets = static_cast<ulint*>( - mem_heap_alloc(*heap, size * sizeof(ulint))); - - rec_offs_set_n_alloc(offsets, size); - } - - rec_offs_set_n_fields(offsets, n); - rec_init_offsets(rec, index, offsets); - return(offsets); -} - -/******************************************************//** -The following function determines the offsets to each field -in the record. It can reuse a previously allocated array. */ -UNIV_INTERN -void -rec_get_offsets_reverse( -/*====================*/ - const byte* extra, /*!< in: the extra bytes of a - compact record in reverse order, - excluding the fixed-size - REC_N_NEW_EXTRA_BYTES */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint node_ptr,/*!< in: nonzero=node pointer, - 0=leaf node */ - ulint* offsets)/*!< in/out: array consisting of - offsets[0] allocated elements */ -{ - ulint n; - ulint i; - ulint offs; - ulint any_ext; - const byte* nulls; - const byte* lens; - dict_field_t* field; - ulint null_mask; - ulint n_node_ptr_field; - - ut_ad(extra); - ut_ad(index); - ut_ad(offsets); - ut_ad(dict_table_is_comp(index->table)); - - if (UNIV_UNLIKELY(node_ptr)) { - n_node_ptr_field = dict_index_get_n_unique_in_tree(index); - n = n_node_ptr_field + 1; - } else { - n_node_ptr_field = ULINT_UNDEFINED; - n = dict_index_get_n_fields(index); - } - - ut_a(rec_offs_get_n_alloc(offsets) >= n + (1 + REC_OFFS_HEADER_SIZE)); - rec_offs_set_n_fields(offsets, n); - - nulls = extra; - lens = nulls + UT_BITS_IN_BYTES(index->n_nullable); - i = offs = 0; - null_mask = 1; - any_ext = 0; - - /* read the lengths of fields 0..n */ - do { - ulint len; - if (UNIV_UNLIKELY(i == n_node_ptr_field)) { - len = offs += REC_NODE_PTR_SIZE; - goto resolved; - } - - field = dict_index_get_nth_field(index, i); - if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) { - /* nullable field => read the null flag */ - - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls++; - null_mask = 1; - } - - if (*nulls & null_mask) { - null_mask <<= 1; - /* No length is stored for NULL fields. - We do not advance offs, and we set - the length to zero and enable the - SQL NULL flag in offsets[]. */ - len = offs | REC_OFFS_SQL_NULL; - goto resolved; - } - null_mask <<= 1; - } - - if (UNIV_UNLIKELY(!field->fixed_len)) { - /* Variable-length field: read the length */ - const dict_col_t* col - = dict_field_get_col(field); - len = *lens++; - /* If the maximum length of the field is up - to 255 bytes, the actual length is always - stored in one byte. If the maximum length is - more than 255 bytes, the actual length is - stored in one byte for 0..127. The length - will be encoded in two bytes when it is 128 or - more, or when the field is stored externally. */ - if (UNIV_UNLIKELY(col->len > 255) - || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) { - if (len & 0x80) { - /* 1exxxxxxx xxxxxxxx */ - len <<= 8; - len |= *lens++; - - offs += len & 0x3fff; - if (UNIV_UNLIKELY(len & 0x4000)) { - any_ext = REC_OFFS_EXTERNAL; - len = offs | REC_OFFS_EXTERNAL; - } else { - len = offs; - } - - goto resolved; - } - } - - len = offs += len; - } else { - len = offs += field->fixed_len; - } -resolved: - rec_offs_base(offsets)[i + 1] = len; - } while (++i < rec_offs_n_fields(offsets)); - - ut_ad(lens >= extra); - *rec_offs_base(offsets) = (lens - extra + REC_N_NEW_EXTRA_BYTES) - | REC_OFFS_COMPACT | any_ext; -} - -/************************************************************//** -The following function is used to get the offset to the nth -data field in an old-style record. -@return offset to the field */ -UNIV_INTERN -ulint -rec_get_nth_field_offs_old( -/*=======================*/ - const rec_t* rec, /*!< in: record */ - ulint n, /*!< in: index of the field */ - ulint* len) /*!< out: length of the field; - UNIV_SQL_NULL if SQL null */ -{ - ulint os; - ulint next_os; - - ut_ad(len); - ut_a(rec); - ut_a(n < rec_get_n_fields_old(rec)); - - if (rec_get_1byte_offs_flag(rec)) { - os = rec_1_get_field_start_offs(rec, n); - - next_os = rec_1_get_field_end_info(rec, n); - - if (next_os & REC_1BYTE_SQL_NULL_MASK) { - *len = UNIV_SQL_NULL; - - return(os); - } - - next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK; - } else { - os = rec_2_get_field_start_offs(rec, n); - - next_os = rec_2_get_field_end_info(rec, n); - - if (next_os & REC_2BYTE_SQL_NULL_MASK) { - *len = UNIV_SQL_NULL; - - return(os); - } - - next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK - | REC_2BYTE_EXTERN_MASK); - } - - *len = next_os - os; - - ut_ad(*len < UNIV_PAGE_SIZE); - - return(os); -} - -/**********************************************************//** -Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT. -@return total size */ -UNIV_INLINE MY_ATTRIBUTE((warn_unused_result)) -ulint -rec_get_converted_size_comp_prefix_low( -/*===================================*/ - const dict_index_t* index, /*!< in: record descriptor; - dict_table_is_comp() is - assumed to hold, even if - it does not */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra, /*!< out: extra size */ - bool temp) /*!< in: whether this is a - temporary file record */ -{ - ulint extra_size; - ulint data_size; - ulint i; - ulint n_null = index->n_nullable; - ut_ad(n_fields > 0); - ut_ad(n_fields <= dict_index_get_n_fields(index)); - ut_ad(!temp || extra); - - extra_size = temp - ? UT_BITS_IN_BYTES(n_null) - : REC_N_NEW_EXTRA_BYTES - + UT_BITS_IN_BYTES(n_null); - data_size = 0; - - if (temp && dict_table_is_comp(index->table)) { - /* No need to do adjust fixed_len=0. We only need to - adjust it for ROW_FORMAT=REDUNDANT. */ - temp = false; - } - - /* read the lengths of fields 0..n */ - for (i = 0; i < n_fields; i++) { - const dict_field_t* field; - ulint len; - ulint fixed_len; - const dict_col_t* col; - - field = dict_index_get_nth_field(index, i); - len = dfield_get_len(&fields[i]); - col = dict_field_get_col(field); - - ut_ad(dict_col_type_assert_equal(col, - dfield_get_type(&fields[i]))); - /* All NULLable fields must be included in the n_null count. */ - ut_ad((col->prtype & DATA_NOT_NULL) || n_null--); - - if (dfield_is_null(&fields[i])) { - /* No length is stored for NULL fields. */ - ut_ad(!(col->prtype & DATA_NOT_NULL)); - continue; - } - - ut_ad(len <= col->len || col->mtype == DATA_BLOB || - ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY - || col->mtype == DATA_VARMYSQL) - && (col->len == 0 - || len <= col->len))); - - fixed_len = field->fixed_len; - if (temp && fixed_len - && !dict_col_get_fixed_size(col, temp)) { - fixed_len = 0; - } - /* If the maximum length of a variable-length field - is up to 255 bytes, the actual length is always stored - in one byte. If the maximum length is more than 255 - bytes, the actual length is stored in one byte for - 0..127. The length will be encoded in two bytes when - it is 128 or more, or when the field is stored externally. */ - - if (fixed_len) { -#ifdef UNIV_DEBUG - ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen); - - ut_ad(len <= fixed_len); - - ut_ad(!mbmaxlen || len >= mbminlen - * (fixed_len / mbmaxlen)); - - /* dict_index_add_col() should guarantee this */ - ut_ad(!field->prefix_len - || fixed_len == field->prefix_len); -#endif /* UNIV_DEBUG */ - } else if (dfield_is_ext(&fields[i])) { - ut_ad(col->len >= 256 || col->mtype == DATA_BLOB); - extra_size += 2; - } else if (len < 128 - || (col->len < 256 - && col->mtype != DATA_BLOB)) { - extra_size++; - } else { - /* For variable-length columns, we look up the - maximum length from the column itself. If this - is a prefix index column shorter than 256 bytes, - this will waste one byte. */ - extra_size += 2; - } - data_size += len; - } - - if (extra) { - *extra = extra_size; - } - - return(extra_size + data_size); -} - -/**********************************************************//** -Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT. -@return total size */ -UNIV_INTERN -ulint -rec_get_converted_size_comp_prefix( -/*===============================*/ - const dict_index_t* index, /*!< in: record descriptor */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra) /*!< out: extra size */ -{ - ut_ad(dict_table_is_comp(index->table)); - return(rec_get_converted_size_comp_prefix_low( - index, fields, n_fields, extra, false)); -} - -/**********************************************************//** -Determines the size of a data tuple in ROW_FORMAT=COMPACT. -@return total size */ -UNIV_INTERN -ulint -rec_get_converted_size_comp( -/*========================*/ - const dict_index_t* index, /*!< in: record descriptor; - dict_table_is_comp() is - assumed to hold, even if - it does not */ - ulint status, /*!< in: status bits of the record */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra) /*!< out: extra size */ -{ - ulint size; - ut_ad(n_fields > 0); - - switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { - case REC_STATUS_ORDINARY: - ut_ad(n_fields == dict_index_get_n_fields(index)); - size = 0; - break; - case REC_STATUS_NODE_PTR: - n_fields--; - ut_ad(n_fields == dict_index_get_n_unique_in_tree(index)); - ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE); - size = REC_NODE_PTR_SIZE; /* child page number */ - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - /* infimum or supremum record, 8 data bytes */ - if (UNIV_LIKELY_NULL(extra)) { - *extra = REC_N_NEW_EXTRA_BYTES; - } - return(REC_N_NEW_EXTRA_BYTES + 8); - default: - ut_error; - return(ULINT_UNDEFINED); - } - - return(size + rec_get_converted_size_comp_prefix_low( - index, fields, n_fields, extra, false)); -} - -/***********************************************************//** -Sets the value of the ith field SQL null bit of an old-style record. */ -UNIV_INTERN -void -rec_set_nth_field_null_bit( -/*=======================*/ - rec_t* rec, /*!< in: record */ - ulint i, /*!< in: ith field */ - ibool val) /*!< in: value to set */ -{ - ulint info; - - if (rec_get_1byte_offs_flag(rec)) { - - info = rec_1_get_field_end_info(rec, i); - - if (val) { - info = info | REC_1BYTE_SQL_NULL_MASK; - } else { - info = info & ~REC_1BYTE_SQL_NULL_MASK; - } - - rec_1_set_field_end_info(rec, i, info); - - return; - } - - info = rec_2_get_field_end_info(rec, i); - - if (val) { - info = info | REC_2BYTE_SQL_NULL_MASK; - } else { - info = info & ~REC_2BYTE_SQL_NULL_MASK; - } - - rec_2_set_field_end_info(rec, i, info); -} - -/***********************************************************//** -Sets an old-style record field to SQL null. -The physical size of the field is not changed. */ -UNIV_INTERN -void -rec_set_nth_field_sql_null( -/*=======================*/ - rec_t* rec, /*!< in: record */ - ulint n) /*!< in: index of the field */ -{ - ulint offset; - - offset = rec_get_field_start_offs(rec, n); - - data_write_sql_null(rec + offset, rec_get_nth_field_size(rec, n)); - - rec_set_nth_field_null_bit(rec, n, TRUE); -} - -/*********************************************************//** -Builds an old-style physical record out of a data tuple and -stores it beginning from the start of the given buffer. -@return pointer to the origin of physical record */ -static -rec_t* -rec_convert_dtuple_to_rec_old( -/*==========================*/ - byte* buf, /*!< in: start address of the physical record */ - const dtuple_t* dtuple, /*!< in: data tuple */ - ulint n_ext) /*!< in: number of externally stored columns */ -{ - const dfield_t* field; - ulint n_fields; - ulint data_size; - rec_t* rec; - ulint end_offset; - ulint ored_offset; - ulint len; - ulint i; - - ut_ad(buf && dtuple); - ut_ad(dtuple_validate(dtuple)); - ut_ad(dtuple_check_typed(dtuple)); - - n_fields = dtuple_get_n_fields(dtuple); - data_size = dtuple_get_data_size(dtuple, 0); - - ut_ad(n_fields > 0); - - /* Calculate the offset of the origin in the physical record */ - - rec = buf + rec_get_converted_extra_size(data_size, n_fields, n_ext); -#ifdef UNIV_DEBUG - /* Suppress Valgrind warnings of ut_ad() - in mach_write_to_1(), mach_write_to_2() et al. */ - memset(buf, 0xff, rec - buf + data_size); -#endif /* UNIV_DEBUG */ - /* Store the number of fields */ - rec_set_n_fields_old(rec, n_fields); - - /* Set the info bits of the record */ - rec_set_info_bits_old(rec, dtuple_get_info_bits(dtuple) - & REC_INFO_BITS_MASK); - - /* Store the data and the offsets */ - - end_offset = 0; - - if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) { - - rec_set_1byte_offs_flag(rec, TRUE); - - for (i = 0; i < n_fields; i++) { - - field = dtuple_get_nth_field(dtuple, i); - - if (dfield_is_null(field)) { - len = dtype_get_sql_null_size( - dfield_get_type(field), 0); - data_write_sql_null(rec + end_offset, len); - - end_offset += len; - ored_offset = end_offset - | REC_1BYTE_SQL_NULL_MASK; - } else { - /* If the data is not SQL null, store it */ - len = dfield_get_len(field); - - memcpy(rec + end_offset, - dfield_get_data(field), len); - - end_offset += len; - ored_offset = end_offset; - } - - rec_1_set_field_end_info(rec, i, ored_offset); - } - } else { - rec_set_1byte_offs_flag(rec, FALSE); - - for (i = 0; i < n_fields; i++) { - - field = dtuple_get_nth_field(dtuple, i); - - if (dfield_is_null(field)) { - len = dtype_get_sql_null_size( - dfield_get_type(field), 0); - data_write_sql_null(rec + end_offset, len); - - end_offset += len; - ored_offset = end_offset - | REC_2BYTE_SQL_NULL_MASK; - } else { - /* If the data is not SQL null, store it */ - len = dfield_get_len(field); - - memcpy(rec + end_offset, - dfield_get_data(field), len); - - end_offset += len; - ored_offset = end_offset; - - if (dfield_is_ext(field)) { - ored_offset |= REC_2BYTE_EXTERN_MASK; - } - } - - rec_2_set_field_end_info(rec, i, ored_offset); - } - } - - return(rec); -} - -/*********************************************************//** -Builds a ROW_FORMAT=COMPACT record out of a data tuple. */ -UNIV_INLINE MY_ATTRIBUTE((nonnull)) -void -rec_convert_dtuple_to_rec_comp( -/*===========================*/ - rec_t* rec, /*!< in: origin of record */ - const dict_index_t* index, /*!< in: record descriptor */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint status, /*!< in: status bits of the record */ - bool temp) /*!< in: whether to use the - format for temporary files in - index creation */ -{ - const dfield_t* field; - const dtype_t* type; - byte* end; - byte* nulls; - byte* lens; - ulint len; - ulint i; - ulint n_node_ptr_field; - ulint fixed_len; - ulint null_mask = 1; - ulint n_null; - - ut_ad(temp || dict_table_is_comp(index->table)); - ut_ad(n_fields > 0); - - if (temp) { - ut_ad(status == REC_STATUS_ORDINARY); - ut_ad(n_fields <= dict_index_get_n_fields(index)); - n_node_ptr_field = ULINT_UNDEFINED; - nulls = rec - 1; - if (dict_table_is_comp(index->table)) { - /* No need to do adjust fixed_len=0. We only - need to adjust it for ROW_FORMAT=REDUNDANT. */ - temp = false; - } - } else { - nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); - - switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { - case REC_STATUS_ORDINARY: - ut_ad(n_fields <= dict_index_get_n_fields(index)); - n_node_ptr_field = ULINT_UNDEFINED; - break; - case REC_STATUS_NODE_PTR: - ut_ad(n_fields - == dict_index_get_n_unique_in_tree(index) + 1); - n_node_ptr_field = n_fields - 1; - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - ut_ad(n_fields == 1); - n_node_ptr_field = ULINT_UNDEFINED; - break; - default: - ut_error; - return; - } - } - - end = rec; - n_null = index->n_nullable; - lens = nulls - UT_BITS_IN_BYTES(n_null); - /* clear the SQL-null flags */ - memset(lens + 1, 0, nulls - lens); - - /* Store the data and the offsets */ - - for (i = 0, field = fields; i < n_fields; i++, field++) { - const dict_field_t* ifield; - - type = dfield_get_type(field); - len = dfield_get_len(field); - - if (UNIV_UNLIKELY(i == n_node_ptr_field)) { - ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); - ut_ad(len == REC_NODE_PTR_SIZE); - memcpy(end, dfield_get_data(field), len); - end += REC_NODE_PTR_SIZE; - break; - } - - if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { - /* nullable field */ - ut_ad(n_null--); - - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - ut_ad(*nulls < null_mask); - - /* set the null flag if necessary */ - if (dfield_is_null(field)) { - *nulls |= null_mask; - null_mask <<= 1; - continue; - } - - null_mask <<= 1; - } - /* only nullable fields can be null */ - ut_ad(!dfield_is_null(field)); - - ifield = dict_index_get_nth_field(index, i); - fixed_len = ifield->fixed_len; - if (temp && fixed_len - && !dict_col_get_fixed_size(ifield->col, temp)) { - fixed_len = 0; - } - /* If the maximum length of a variable-length field - is up to 255 bytes, the actual length is always stored - in one byte. If the maximum length is more than 255 - bytes, the actual length is stored in one byte for - 0..127. The length will be encoded in two bytes when - it is 128 or more, or when the field is stored externally. */ - if (fixed_len) { -#ifdef UNIV_DEBUG - ulint mbminlen = DATA_MBMINLEN( - ifield->col->mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN( - ifield->col->mbminmaxlen); - - ut_ad(len <= fixed_len); - ut_ad(!mbmaxlen || len >= mbminlen - * (fixed_len / mbmaxlen)); - ut_ad(!dfield_is_ext(field)); -#endif /* UNIV_DEBUG */ - } else if (dfield_is_ext(field)) { - ut_ad(ifield->col->len >= 256 - || ifield->col->mtype == DATA_BLOB); - ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE); - *lens-- = (byte) (len >> 8) | 0xc0; - *lens-- = (byte) len; - } else { - ut_ad(len <= dtype_get_len(type) - || dtype_get_mtype(type) == DATA_BLOB - || !strcmp(index->name, - FTS_INDEX_TABLE_IND_NAME)); - if (len < 128 - || (dtype_get_len(type) < 256 - && dtype_get_mtype(type) != DATA_BLOB)) { - - *lens-- = (byte) len; - } else { - ut_ad(len < 16384); - *lens-- = (byte) (len >> 8) | 0x80; - *lens-- = (byte) len; - } - } - - if (len) { - memcpy(end, dfield_get_data(field), len); - end += len; - } - } -} - -/*********************************************************//** -Builds a new-style physical record out of a data tuple and -stores it beginning from the start of the given buffer. -@return pointer to the origin of physical record */ -static -rec_t* -rec_convert_dtuple_to_rec_new( -/*==========================*/ - byte* buf, /*!< in: start address of - the physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* dtuple) /*!< in: data tuple */ -{ - ulint extra_size; - ulint status; - rec_t* rec; - - status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK; - rec_get_converted_size_comp( - index, status, dtuple->fields, dtuple->n_fields, &extra_size); - rec = buf + extra_size; - - rec_convert_dtuple_to_rec_comp( - rec, index, dtuple->fields, dtuple->n_fields, status, false); - - /* Set the info bits of the record */ - rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple)); - - return(rec); -} - -/*********************************************************//** -Builds a physical record out of a data tuple and -stores it beginning from the start of the given buffer. -@return pointer to the origin of physical record */ -UNIV_INTERN -rec_t* -rec_convert_dtuple_to_rec( -/*======================*/ - byte* buf, /*!< in: start address of the - physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* dtuple, /*!< in: data tuple */ - ulint n_ext) /*!< in: number of - externally stored columns */ -{ - rec_t* rec; - - ut_ad(buf != NULL); - ut_ad(index != NULL); - ut_ad(dtuple != NULL); - ut_ad(dtuple_validate(dtuple)); - ut_ad(dtuple_check_typed(dtuple)); - - if (dict_table_is_comp(index->table)) { - rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple); - } else { - rec = rec_convert_dtuple_to_rec_old(buf, dtuple, n_ext); - } - -#ifdef UNIV_DEBUG - { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - ulint i; - rec_offs_init(offsets_); - - offsets = rec_get_offsets(rec, index, - offsets_, ULINT_UNDEFINED, &heap); - ut_ad(rec_validate(rec, offsets)); - ut_ad(dtuple_get_n_fields(dtuple) - == rec_offs_n_fields(offsets)); - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - ut_ad(!dfield_is_ext(dtuple_get_nth_field(dtuple, i)) - == !rec_offs_nth_extern(offsets, i)); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } -#endif /* UNIV_DEBUG */ - return(rec); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************//** -Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT. -@return total size */ -UNIV_INTERN -ulint -rec_get_converted_size_temp( -/*========================*/ - const dict_index_t* index, /*!< in: record descriptor */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields,/*!< in: number of data fields */ - ulint* extra) /*!< out: extra size */ -{ - return(rec_get_converted_size_comp_prefix_low( - index, fields, n_fields, extra, true)); -} - -/******************************************************//** -Determine the offset to each field in temporary file. -@see rec_convert_dtuple_to_temp() */ -UNIV_INTERN -void -rec_init_offsets_temp( -/*==================*/ - const rec_t* rec, /*!< in: temporary file record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint* offsets)/*!< in/out: array of offsets; - in: n=rec_offs_n_fields(offsets) */ -{ - rec_init_offsets_comp_ordinary(rec, true, index, offsets); -} - -/*********************************************************//** -Builds a temporary file record out of a data tuple. -@see rec_init_offsets_temp() */ -UNIV_INTERN -void -rec_convert_dtuple_to_temp( -/*=======================*/ - rec_t* rec, /*!< out: record */ - const dict_index_t* index, /*!< in: record descriptor */ - const dfield_t* fields, /*!< in: array of data fields */ - ulint n_fields) /*!< in: number of fields */ -{ - rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields, - REC_STATUS_ORDINARY, true); -} - -/**************************************************************//** -Copies the first n fields of a physical record to a data tuple. The fields -are copied to the memory heap. */ -UNIV_INTERN -void -rec_copy_prefix_to_dtuple( -/*======================*/ - dtuple_t* tuple, /*!< out: data tuple */ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint n_fields, /*!< in: number of fields - to copy */ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ulint i; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - offsets = rec_get_offsets(rec, index, offsets, n_fields, &heap); - - ut_ad(rec_validate(rec, offsets)); - ut_ad(dtuple_check_typed(tuple)); - - dtuple_set_info_bits(tuple, rec_get_info_bits( - rec, dict_table_is_comp(index->table))); - - for (i = 0; i < n_fields; i++) { - dfield_t* field; - const byte* data; - ulint len; - - field = dtuple_get_nth_field(tuple, i); - data = rec_get_nth_field(rec, offsets, i, &len); - - if (len != UNIV_SQL_NULL) { - dfield_set_data(field, - mem_heap_dup(heap, data, len), len); - ut_ad(!rec_offs_nth_extern(offsets, i)); - } else { - dfield_set_null(field); - } - } -} - -/**************************************************************//** -Copies the first n fields of an old-style physical record -to a new physical record in a buffer. -@return own: copied record */ -static -rec_t* -rec_copy_prefix_to_buf_old( -/*=======================*/ - const rec_t* rec, /*!< in: physical record */ - ulint n_fields, /*!< in: number of fields to copy */ - ulint area_end, /*!< in: end of the prefix data */ - byte** buf, /*!< in/out: memory buffer for - the copied prefix, or NULL */ - ulint* buf_size) /*!< in/out: buffer size */ -{ - rec_t* copy_rec; - ulint area_start; - ulint prefix_len; - - if (rec_get_1byte_offs_flag(rec)) { - area_start = REC_N_OLD_EXTRA_BYTES + n_fields; - } else { - area_start = REC_N_OLD_EXTRA_BYTES + 2 * n_fields; - } - - prefix_len = area_start + area_end; - - if ((*buf == NULL) || (*buf_size < prefix_len)) { - if (*buf != NULL) { - mem_free(*buf); - } - - *buf = static_cast<byte*>(mem_alloc2(prefix_len, buf_size)); - } - - ut_memcpy(*buf, rec - area_start, prefix_len); - - copy_rec = *buf + area_start; - - rec_set_n_fields_old(copy_rec, n_fields); - - return(copy_rec); -} - -/**************************************************************//** -Copies the first n fields of a physical record to a new physical record in -a buffer. -@return own: copied record */ -UNIV_INTERN -rec_t* -rec_copy_prefix_to_buf( -/*===================*/ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index, /*!< in: record descriptor */ - ulint n_fields, /*!< in: number of fields - to copy */ - byte** buf, /*!< in/out: memory buffer - for the copied prefix, - or NULL */ - ulint* buf_size) /*!< in/out: buffer size */ -{ - const byte* nulls; - const byte* lens; - ulint i; - ulint prefix_len; - ulint null_mask; - ulint status; - - UNIV_PREFETCH_RW(*buf); - - if (!dict_table_is_comp(index->table)) { - ut_ad(rec_validate_old(rec)); - return(rec_copy_prefix_to_buf_old( - rec, n_fields, - rec_get_field_start_offs(rec, n_fields), - buf, buf_size)); - } - - status = rec_get_status(rec); - - switch (status) { - case REC_STATUS_ORDINARY: - ut_ad(n_fields <= dict_index_get_n_fields(index)); - break; - case REC_STATUS_NODE_PTR: - /* it doesn't make sense to copy the child page number field */ - ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index)); - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - /* infimum or supremum record: no sense to copy anything */ - default: - ut_error; - return(NULL); - } - - nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); - lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); - UNIV_PREFETCH_R(lens); - prefix_len = 0; - null_mask = 1; - - /* read the lengths of fields 0..n */ - for (i = 0; i < n_fields; i++) { - const dict_field_t* field; - const dict_col_t* col; - - field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(field); - - if (!(col->prtype & DATA_NOT_NULL)) { - /* nullable field => read the null flag */ - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - if (*nulls & null_mask) { - null_mask <<= 1; - continue; - } - - null_mask <<= 1; - } - - if (field->fixed_len) { - prefix_len += field->fixed_len; - } else { - ulint len = *lens--; - /* If the maximum length of the column is up - to 255 bytes, the actual length is always - stored in one byte. If the maximum length is - more than 255 bytes, the actual length is - stored in one byte for 0..127. The length - will be encoded in two bytes when it is 128 or - more, or when the column is stored externally. */ - if (col->len > 255 || col->mtype == DATA_BLOB) { - if (len & 0x80) { - /* 1exxxxxx */ - len &= 0x3f; - len <<= 8; - len |= *lens--; - UNIV_PREFETCH_R(lens); - } - } - prefix_len += len; - } - } - - UNIV_PREFETCH_R(rec + prefix_len); - - prefix_len += rec - (lens + 1); - - if ((*buf == NULL) || (*buf_size < prefix_len)) { - if (*buf != NULL) { - mem_free(*buf); - } - - *buf = static_cast<byte*>(mem_alloc2(prefix_len, buf_size)); - } - - memcpy(*buf, lens + 1, prefix_len); - - return(*buf + (rec - (lens + 1))); -} -#endif /* UNIV_HOTBACKUP */ - -/***************************************************************//** -Validates the consistency of an old-style physical record. -@return TRUE if ok */ -static -ibool -rec_validate_old( -/*=============*/ - const rec_t* rec) /*!< in: physical record */ -{ - const byte* data; - ulint len; - ulint n_fields; - ulint len_sum = 0; - ulint sum = 0; - ulint i; - - ut_a(rec); - n_fields = rec_get_n_fields_old(rec); - - if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) { - fprintf(stderr, "InnoDB: Error: record has %lu fields\n", - (ulong) n_fields); - return(FALSE); - } - - for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field_old(rec, i, &len); - - if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) { - fprintf(stderr, - "InnoDB: Error: record field %lu len %lu\n", - (ulong) i, - (ulong) len); - return(FALSE); - } - - if (len != UNIV_SQL_NULL) { - len_sum += len; - sum += *(data + len -1); /* dereference the - end of the field to - cause a memory trap - if possible */ - } else { - len_sum += rec_get_nth_field_size(rec, i); - } - } - - if (len_sum != rec_get_data_size_old(rec)) { - fprintf(stderr, - "InnoDB: Error: record len should be %lu, len %lu\n", - (ulong) len_sum, - rec_get_data_size_old(rec)); - return(FALSE); - } - - rec_dummy = sum; /* This is here only to fool the compiler */ - - return(TRUE); -} - -/***************************************************************//** -Validates the consistency of a physical record. -@return TRUE if ok */ -UNIV_INTERN -ibool -rec_validate( -/*=========*/ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - const byte* data; - ulint len; - ulint n_fields; - ulint len_sum = 0; - ulint sum = 0; - ulint i; - - ut_a(rec); - n_fields = rec_offs_n_fields(offsets); - - if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) { - fprintf(stderr, "InnoDB: Error: record has %lu fields\n", - (ulong) n_fields); - return(FALSE); - } - - ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec)); - - for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, offsets, i, &len); - - if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) { - fprintf(stderr, - "InnoDB: Error: record field %lu len %lu\n", - (ulong) i, - (ulong) len); - return(FALSE); - } - - if (len != UNIV_SQL_NULL) { - len_sum += len; - sum += *(data + len -1); /* dereference the - end of the field to - cause a memory trap - if possible */ - } else if (!rec_offs_comp(offsets)) { - len_sum += rec_get_nth_field_size(rec, i); - } - } - - if (len_sum != rec_offs_data_size(offsets)) { - fprintf(stderr, - "InnoDB: Error: record len should be %lu, len %lu\n", - (ulong) len_sum, - (ulong) rec_offs_data_size(offsets)); - return(FALSE); - } - - rec_dummy = sum; /* This is here only to fool the compiler */ - - if (!rec_offs_comp(offsets)) { - ut_a(rec_validate_old(rec)); - } - - return(TRUE); -} - -/***************************************************************//** -Prints an old-style physical record. */ -UNIV_INTERN -void -rec_print_old( -/*==========*/ - FILE* file, /*!< in: file where to print */ - const rec_t* rec) /*!< in: physical record */ -{ - const byte* data; - ulint len; - ulint n; - ulint i; - - ut_ad(rec); - - n = rec_get_n_fields_old(rec); - - fprintf(file, "PHYSICAL RECORD: n_fields %lu;" - " %u-byte offsets; info bits %lu\n", - (ulong) n, - rec_get_1byte_offs_flag(rec) ? 1 : 2, - (ulong) rec_get_info_bits(rec, FALSE)); - - for (i = 0; i < n; i++) { - - data = rec_get_nth_field_old(rec, i, &len); - - fprintf(file, " %lu:", (ulong) i); - - if (len != UNIV_SQL_NULL) { - if (len <= 30) { - - ut_print_buf(file, data, len); - } else { - ut_print_buf(file, data, 30); - - fprintf(file, " (total %lu bytes)", - (ulong) len); - } - } else { - fprintf(file, " SQL NULL, size %lu ", - rec_get_nth_field_size(rec, i)); - } - - putc(';', file); - putc('\n', file); - } - - rec_validate_old(rec); -} - -#ifndef UNIV_HOTBACKUP -/***************************************************************//** -Prints a physical record in ROW_FORMAT=COMPACT. Ignores the -record header. */ -UNIV_INTERN -void -rec_print_comp( -/*===========*/ - FILE* file, /*!< in: file where to print */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ulint i; - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - const byte* data; - ulint len; - - data = rec_get_nth_field(rec, offsets, i, &len); - - fprintf(file, " %lu:", (ulong) i); - - if (len != UNIV_SQL_NULL) { - if (len <= 30) { - - ut_print_buf(file, data, len); - } else if (rec_offs_nth_extern(offsets, i)) { - ut_print_buf(file, data, 30); - fprintf(file, " (total %lu bytes, external)", - (ulong) len); - ut_print_buf(file, data + len - - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - } else { - ut_print_buf(file, data, 30); - - fprintf(file, " (total %lu bytes)", - (ulong) len); - } - } else { - fputs(" SQL NULL", file); - } - putc(';', file); - putc('\n', file); - } -} - -/***************************************************************//** -Prints a physical record. */ -UNIV_INTERN -void -rec_print_new( -/*==========*/ - FILE* file, /*!< in: file where to print */ - const rec_t* rec, /*!< in: physical record */ - const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ -{ - ut_ad(rec); - ut_ad(offsets); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - if (!rec_offs_comp(offsets)) { - rec_print_old(file, rec); - return; - } - - fprintf(file, "PHYSICAL RECORD: n_fields %lu;" - " compact format; info bits %lu\n", - (ulong) rec_offs_n_fields(offsets), - (ulong) rec_get_info_bits(rec, TRUE)); - - rec_print_comp(file, rec, offsets); - rec_validate(rec, offsets); -} - -/***************************************************************//** -Prints a physical record. */ -UNIV_INTERN -void -rec_print( -/*======*/ - FILE* file, /*!< in: file where to print */ - const rec_t* rec, /*!< in: physical record */ - const dict_index_t* index) /*!< in: record descriptor */ -{ - ut_ad(index); - - if (!dict_table_is_comp(index->table)) { - rec_print_old(file, rec); - return; - } else { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - - rec_print_new(file, rec, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap)); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef WITH_WSREP -int -wsrep_rec_get_foreign_key( - byte *buf, /* out: extracted key */ - ulint *buf_len, /* in/out: length of buf */ - const rec_t* rec, /* in: physical record */ - dict_index_t* index_for, /* in: index in foreign table */ - dict_index_t* index_ref, /* in: index in referenced table */ - ibool new_protocol) /* in: protocol > 1 */ -{ - const byte* data; - ulint len; - ulint key_len = 0; - ulint i; - uint key_parts; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - - ut_ad(index_for); - ut_ad(index_ref); - - rec_offs_init(offsets_); - offsets = rec_get_offsets(rec, index_for, offsets_, - ULINT_UNDEFINED, &heap); - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - ut_ad(rec); - - key_parts = dict_index_get_n_unique_in_tree(index_for); - for (i = 0; - i < key_parts && - (index_for->type & DICT_CLUSTERED || i < key_parts - 1); - i++) { - dict_field_t* field_f = - dict_index_get_nth_field(index_for, i); - const dict_col_t* col_f = dict_field_get_col(field_f); - dict_field_t* field_r = - dict_index_get_nth_field(index_ref, i); - const dict_col_t* col_r = dict_field_get_col(field_r); - - data = rec_get_nth_field(rec, offsets, i, &len); - if (key_len + ((len != UNIV_SQL_NULL) ? len + 1 : 1) > - *buf_len) { - fprintf (stderr, - "WSREP: FK key len exceeded %lu %lu %lu\n", - key_len, len, *buf_len); - goto err_out; - } - - if (len == UNIV_SQL_NULL) { - ut_a(!(col_f->prtype & DATA_NOT_NULL)); - *buf++ = 1; - key_len++; - } else if (!new_protocol) { - if (!(col_r->prtype & DATA_NOT_NULL)) { - *buf++ = 0; - key_len++; - } - memcpy(buf, data, len); - *buf_len = wsrep_innobase_mysql_sort( - (int)(col_f->prtype & DATA_MYSQL_TYPE_MASK), - (uint)dtype_get_charset_coll(col_f->prtype), - buf, len, *buf_len); - } else { /* new protocol */ - if (!(col_r->prtype & DATA_NOT_NULL)) { - *buf++ = 0; - key_len++; - } - switch (col_f->mtype) { - case DATA_INT: { - byte* ptr = buf+len; - for (;;) { - ptr--; - *ptr = *data; - if (ptr == buf) { - break; - } - data++; - } - - if (!(col_f->prtype & DATA_UNSIGNED)) { - buf[len-1] = (byte) (buf[len-1] ^ 128); - } - - break; - } - case DATA_VARCHAR: - case DATA_VARMYSQL: - case DATA_CHAR: - case DATA_MYSQL: - /* Copy the actual data */ - ut_memcpy(buf, data, len); - len = wsrep_innobase_mysql_sort( - (int) - (col_f->prtype & DATA_MYSQL_TYPE_MASK), - (uint) - dtype_get_charset_coll(col_f->prtype), - buf, len, *buf_len); - break; - case DATA_BLOB: - case DATA_BINARY: - memcpy(buf, data, len); - break; - default: - break; - } - - key_len += len; - buf += len; - } - } - - rec_validate(rec, offsets); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - *buf_len = key_len; - return DB_SUCCESS; - - err_out: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return DB_ERROR; -} -#endif /* WITH_WSREP */ - -# ifdef UNIV_DEBUG -/************************************************************//** -Reads the DB_TRX_ID of a clustered index record. -@return the value of DB_TRX_ID */ -UNIV_INTERN -trx_id_t -rec_get_trx_id( -/*===========*/ - const rec_t* rec, /*!< in: record */ - const dict_index_t* index) /*!< in: clustered index */ -{ - const page_t* page - = page_align(rec); - ulint trx_id_col - = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - const byte* trx_id; - ulint len; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id); - ut_ad(dict_index_is_clust(index)); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - - offsets = rec_get_offsets(rec, index, offsets, trx_id_col + 1, &heap); - - trx_id = rec_get_nth_field(rec, offsets, trx_id_col, &len); - - ut_ad(len == DATA_TRX_ID_LEN); - - if (heap) { - mem_heap_free(heap); - } - - return(trx_read_trx_id(trx_id)); -} -#endif /* UNIV_DEBUG */ - diff --git a/storage/xtradb/row/row0ext.cc b/storage/xtradb/row/row0ext.cc deleted file mode 100644 index ad852577ad2..00000000000 --- a/storage/xtradb/row/row0ext.cc +++ /dev/null @@ -1,143 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0ext.cc -Caching of externally stored column prefixes - -Created September 2006 Marko Makela -*******************************************************/ - -#include "row0ext.h" - -#ifdef UNIV_NONINL -#include "row0ext.ic" -#endif - -#include "btr0cur.h" - -/********************************************************************//** -Fills the column prefix cache of an externally stored column. */ -static -void -row_ext_cache_fill( -/*===============*/ - row_ext_t* ext, /*!< in/out: column prefix cache */ - ulint i, /*!< in: index of ext->ext[] */ - ulint zip_size,/*!< compressed page size in bytes, or 0 */ - const dfield_t* dfield) /*!< in: data field */ -{ - const byte* field = static_cast<const byte*>( - dfield_get_data(dfield)); - ulint f_len = dfield_get_len(dfield); - byte* buf = ext->buf + i * ext->max_len; - - ut_ad(ext->max_len > 0); - ut_ad(i < ext->n_ext); - ut_ad(dfield_is_ext(dfield)); - ut_a(f_len >= BTR_EXTERN_FIELD_REF_SIZE); - - if (UNIV_UNLIKELY(!memcmp(field_ref_zero, - field + f_len - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE))) { - /* The BLOB pointer is not set: we cannot fetch it */ - ext->len[i] = 0; - } else { - if (ext->max_len == REC_VERSION_56_MAX_INDEX_COL_LEN - && f_len > BTR_EXTERN_FIELD_REF_SIZE) { - /* In this case, the field is in B format or beyond, - (refer to the definition of row_ext_t.max_len) - and the field is already fill with prefix, otherwise - f_len would be BTR_EXTERN_FIELD_REF_SIZE. - So there is no need to re-read the prefix externally, - but just copy the local prefix to buf. Please note - if the ext->len[i] is zero, it means an error - as above. */ - memcpy(buf, field, f_len - BTR_EXTERN_FIELD_REF_SIZE); - ext->len[i] = f_len - BTR_EXTERN_FIELD_REF_SIZE; - } else { - /* Fetch at most ext->max_len of the column. - The column should be non-empty. However, - trx_rollback_or_clean_all_recovered() may try to - access a half-deleted BLOB if the server previously - crashed during the execution of - btr_free_externally_stored_field(). */ - ext->len[i] = btr_copy_externally_stored_field_prefix( - buf, ext->max_len, zip_size, field, f_len, - NULL); - } - } -} - -/********************************************************************//** -Creates a cache of column prefixes of externally stored columns. -@return own: column prefix cache */ -UNIV_INTERN -row_ext_t* -row_ext_create( -/*===========*/ - ulint n_ext, /*!< in: number of externally stored columns */ - const ulint* ext, /*!< in: col_no's of externally stored columns - in the InnoDB table object, as reported by - dict_col_get_no(); NOT relative to the records - in the clustered index */ - ulint flags, /*!< in: table->flags */ - const dtuple_t* tuple, /*!< in: data tuple containing the field - references of the externally stored - columns; must be indexed by col_no; - the clustered index record must be - covered by a lock or a page latch - to prevent deletion (rollback or purge). */ - mem_heap_t* heap) /*!< in: heap where created */ -{ - ulint i; - ulint zip_size = dict_tf_get_zip_size(flags); - - row_ext_t* ret; - - ut_ad(n_ext > 0); - - ret = static_cast<row_ext_t*>( - mem_heap_alloc(heap, - (sizeof *ret) + (n_ext - 1) * sizeof ret->len)); - - ut_ad(ut_is_2pow(zip_size)); - ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); - - ret->n_ext = n_ext; - ret->ext = ext; - ret->max_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags); - - ret->buf = static_cast<byte*>( - mem_heap_alloc(heap, n_ext * ret->max_len)); - -#ifdef UNIV_DEBUG - memset(ret->buf, 0xaa, n_ext * ret->max_len); - UNIV_MEM_ALLOC(ret->buf, n_ext * ret->max_len); -#endif - - /* Fetch the BLOB prefixes */ - for (i = 0; i < n_ext; i++) { - const dfield_t* dfield; - - dfield = dtuple_get_nth_field(tuple, ext[i]); - row_ext_cache_fill(ret, i, zip_size, dfield); - } - - return(ret); -} diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc deleted file mode 100644 index 7ffcc59dc5f..00000000000 --- a/storage/xtradb/row/row0ftsort.cc +++ /dev/null @@ -1,1662 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0ftsort.cc -Create Full Text Index with (parallel) merge sort - -Created 10/13/2010 Jimmy Yang -*******************************************************/ - -#include "dict0dict.h" /* dict_table_stats_lock() */ -#include "row0merge.h" -#include "pars0pars.h" -#include "row0ftsort.h" -#include "row0merge.h" -#include "row0row.h" -#include "btr0cur.h" -#include "btr0sea.h" - -/** Read the next record to buffer N. -@param N index into array of merge info structure */ -#define ROW_MERGE_READ_GET_NEXT(N) \ - do { \ - b[N] = row_merge_read_rec( \ - block[N], buf[N], b[N], index, \ - fd[N], &foffs[N], &mrec[N], offsets[N], \ - crypt_data, crypt_block[N], space); \ - if (UNIV_UNLIKELY(!b[N])) { \ - if (mrec[N]) { \ - goto exit; \ - } \ - } \ - } while (0) - -/** Parallel sort degree */ -UNIV_INTERN ulong fts_sort_pll_degree = 2; - -/*********************************************************************//** -Create a temporary "fts sort index" used to merge sort the -tokenized doc string. The index has three "fields": - -1) Tokenized word, -2) Doc ID (depend on number of records to sort, it can be a 4 bytes or 8 bytes -integer value) -3) Word's position in original doc. - -@see fts_create_one_index_table() - -@return dict_index_t structure for the fts sort index */ -UNIV_INTERN -dict_index_t* -row_merge_create_fts_sort_index( -/*============================*/ - dict_index_t* index, /*!< in: Original FTS index - based on which this sort index - is created */ - const dict_table_t* table, /*!< in: table that FTS index - is being created on */ - ibool* opt_doc_id_size) - /*!< out: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort */ -{ - dict_index_t* new_index; - dict_field_t* field; - dict_field_t* idx_field; - CHARSET_INFO* charset; - - // FIXME: This name shouldn't be hard coded here. - new_index = dict_mem_index_create( - index->table->name, "tmp_fts_idx", 0, DICT_FTS, 3); - - new_index->id = index->id; - new_index->table = (dict_table_t*) table; - new_index->n_uniq = FTS_NUM_FIELDS_SORT; - new_index->n_def = FTS_NUM_FIELDS_SORT; - new_index->cached = TRUE; - - btr_search_index_init(new_index); - - idx_field = dict_index_get_nth_field(index, 0); - charset = fts_index_get_charset(index); - - /* The first field is on the Tokenized Word */ - field = dict_index_get_nth_field(new_index, 0); - field->name = NULL; - field->prefix_len = 0; - field->col = static_cast<dict_col_t*>( - mem_heap_alloc(new_index->heap, sizeof(dict_col_t))); - field->col->prtype = idx_field->col->prtype | DATA_NOT_NULL; - field->col->mtype = charset == &my_charset_latin1 - ? DATA_VARCHAR : DATA_VARMYSQL; - field->col->mbminmaxlen = idx_field->col->mbminmaxlen; - field->col->len = HA_FT_MAXCHARLEN * DATA_MBMAXLEN(field->col->mbminmaxlen); - - field->fixed_len = 0; - - /* Doc ID */ - field = dict_index_get_nth_field(new_index, 1); - field->name = NULL; - field->prefix_len = 0; - field->col = static_cast<dict_col_t*>( - mem_heap_alloc(new_index->heap, sizeof(dict_col_t))); - field->col->mtype = DATA_INT; - *opt_doc_id_size = FALSE; - - /* Check whether we can use 4 bytes instead of 8 bytes integer - field to hold the Doc ID, thus reduce the overall sort size */ - if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) { - /* If Doc ID column is being added by this create - index, then just check the number of rows in the table */ - if (dict_table_get_n_rows(table) < MAX_DOC_ID_OPT_VAL) { - *opt_doc_id_size = TRUE; - } - } else { - doc_id_t max_doc_id; - - /* If the Doc ID column is supplied by user, then - check the maximum Doc ID in the table */ - max_doc_id = fts_get_max_doc_id((dict_table_t*) table); - - if (max_doc_id && max_doc_id < MAX_DOC_ID_OPT_VAL) { - *opt_doc_id_size = TRUE; - } - } - - if (*opt_doc_id_size) { - field->col->len = sizeof(ib_uint32_t); - field->fixed_len = sizeof(ib_uint32_t); - } else { - field->col->len = FTS_DOC_ID_LEN; - field->fixed_len = FTS_DOC_ID_LEN; - } - - field->col->prtype = DATA_NOT_NULL | DATA_BINARY_TYPE; - - field->col->mbminmaxlen = 0; - - /* The third field is on the word's position in the original doc */ - field = dict_index_get_nth_field(new_index, 2); - field->name = NULL; - field->prefix_len = 0; - field->col = static_cast<dict_col_t*>( - mem_heap_alloc(new_index->heap, sizeof(dict_col_t))); - field->col->mtype = DATA_INT; - field->col->len = 4 ; - field->fixed_len = 4; - field->col->prtype = DATA_NOT_NULL; - field->col->mbminmaxlen = 0; - - return(new_index); -} -/*********************************************************************//** -Initialize FTS parallel sort structures. -@return TRUE if all successful */ -UNIV_INTERN -ibool -row_fts_psort_info_init( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - row_merge_dup_t* dup, /*!< in,own: descriptor of - FTS index being created */ - const dict_table_t* new_table,/*!< in: table on which indexes are - created */ - ibool opt_doc_id_size, - /*!< in: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort */ - fts_psort_t** psort, /*!< out: parallel sort info to be - instantiated */ - fts_psort_t** merge) /*!< out: parallel merge info - to be instantiated */ -{ - ulint i; - ulint j; - fts_psort_common_t* common_info = NULL; - fts_psort_t* psort_info = NULL; - fts_psort_t* merge_info = NULL; - ulint block_size; - ibool ret = TRUE; - fil_space_crypt_t* crypt_data = NULL; - bool encrypted = false; - - block_size = 3 * srv_sort_buf_size; - - *psort = psort_info = static_cast<fts_psort_t*>(mem_zalloc( - fts_sort_pll_degree * sizeof *psort_info)); - - if (!psort_info) { - ut_free(dup); - return(FALSE); - } - - /* Common Info for all sort threads */ - common_info = static_cast<fts_psort_common_t*>( - mem_alloc(sizeof *common_info)); - - if (!common_info) { - ut_free(dup); - mem_free(psort_info); - return(FALSE); - } - - common_info->dup = dup; - common_info->new_table = (dict_table_t*) new_table; - common_info->trx = trx; - common_info->all_info = psort_info; - common_info->sort_event = os_event_create(); - common_info->merge_event = os_event_create(); - common_info->opt_doc_id_size = opt_doc_id_size; - - /* Theoretically the tablespace can be dropped straight away. - In practice, the DDL completion will wait for this thread to - finish. */ - if (fil_space_t* space = fil_space_acquire(new_table->space)) { - crypt_data = space->crypt_data; - fil_space_release(space); - } - - if (crypt_data && crypt_data->should_encrypt()) { - common_info->crypt_data = crypt_data; - encrypted = true; - } else { - /* Not needed */ - common_info->crypt_data = NULL; - crypt_data = NULL; - } - - ut_ad(trx->mysql_thd != NULL); - const char* path = thd_innodb_tmpdir(trx->mysql_thd); - - /* There will be FTS_NUM_AUX_INDEX number of "sort buckets" for - each parallel sort thread. Each "sort bucket" holds records for - a particular "FTS index partition" */ - for (j = 0; j < fts_sort_pll_degree; j++) { - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - - psort_info[j].merge_file[i] = - static_cast<merge_file_t*>( - mem_zalloc(sizeof(merge_file_t))); - - if (!psort_info[j].merge_file[i]) { - ret = FALSE; - goto func_exit; - } - - psort_info[j].merge_buf[i] = row_merge_buf_create( - dup->index); - - if (row_merge_file_create(psort_info[j].merge_file[i], - path) < 0) { - goto func_exit; - } - - /* Need to align memory for O_DIRECT write */ - psort_info[j].block_alloc[i] = - static_cast<row_merge_block_t*>(ut_malloc( - block_size + 1024)); - - psort_info[j].merge_block[i] = - static_cast<row_merge_block_t*>( - ut_align( - psort_info[j].block_alloc[i], 1024)); - - /* If tablespace is encrypted, allocate additional buffer for - encryption/decryption. */ - if (encrypted) { - - /* Need to align memory for O_DIRECT write */ - psort_info[j].crypt_alloc[i] = - static_cast<row_merge_block_t*>(ut_malloc( - block_size + 1024)); - - psort_info[j].crypt_block[i] = - static_cast<row_merge_block_t*>( - ut_align( - psort_info[j].crypt_alloc[i], 1024)); - - if (!psort_info[j].crypt_block[i]) { - ret = FALSE; - goto func_exit; - } - } else { - psort_info[j].crypt_alloc[i] = NULL; - psort_info[j].crypt_block[i] = NULL; - } - - if (!psort_info[j].merge_block[i]) { - ret = FALSE; - goto func_exit; - } - } - - psort_info[j].child_status = 0; - psort_info[j].state = 0; - psort_info[j].psort_common = common_info; - psort_info[j].error = DB_SUCCESS; - psort_info[j].memory_used = 0; - mutex_create(fts_pll_tokenize_mutex_key, &psort_info[j].mutex, SYNC_FTS_TOKENIZE); - } - - /* Initialize merge_info structures parallel merge and insert - into auxiliary FTS tables (FTS_INDEX_TABLE) */ - *merge = merge_info = static_cast<fts_psort_t*>( - mem_alloc(FTS_NUM_AUX_INDEX * sizeof *merge_info)); - - for (j = 0; j < FTS_NUM_AUX_INDEX; j++) { - - merge_info[j].child_status = 0; - merge_info[j].state = 0; - merge_info[j].psort_common = common_info; - } - -func_exit: - if (!ret) { - row_fts_psort_info_destroy(psort_info, merge_info); - } - - return(ret); -} -/*********************************************************************//** -Clean up and deallocate FTS parallel sort structures, and close the -merge sort files */ -UNIV_INTERN -void -row_fts_psort_info_destroy( -/*=======================*/ - fts_psort_t* psort_info, /*!< parallel sort info */ - fts_psort_t* merge_info) /*!< parallel merge info */ -{ - ulint i; - ulint j; - - if (psort_info) { - for (j = 0; j < fts_sort_pll_degree; j++) { - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - if (psort_info[j].merge_file[i]) { - row_merge_file_destroy( - psort_info[j].merge_file[i]); - } - - if (psort_info[j].block_alloc[i]) { - ut_free(psort_info[j].block_alloc[i]); - } - - if (psort_info[j].crypt_alloc[i]) { - ut_free(psort_info[j].crypt_alloc[i]); - } - - mem_free(psort_info[j].merge_file[i]); - } - - mutex_free(&psort_info[j].mutex); - } - - os_event_free(merge_info[0].psort_common->sort_event); - os_event_free(merge_info[0].psort_common->merge_event); - ut_free(merge_info[0].psort_common->dup); - mem_free(merge_info[0].psort_common); - mem_free(psort_info); - } - - if (merge_info) { - mem_free(merge_info); - } -} -/*********************************************************************//** -Free up merge buffers when merge sort is done */ -UNIV_INTERN -void -row_fts_free_pll_merge_buf( -/*=======================*/ - fts_psort_t* psort_info) /*!< in: parallel sort info */ -{ - ulint j; - ulint i; - - if (!psort_info) { - return; - } - - for (j = 0; j < fts_sort_pll_degree; j++) { - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - row_merge_buf_free(psort_info[j].merge_buf[i]); - } - } - - return; -} - -/*********************************************************************//** -Tokenize incoming text data and add to the sort buffer. -@see row_merge_buf_encode() -@return TRUE if the record passed, FALSE if out of space */ -static -ibool -row_merge_fts_doc_tokenize( -/*=======================*/ - row_merge_buf_t** sort_buf, /*!< in/out: sort buffer */ - doc_id_t doc_id, /*!< in: Doc ID */ - fts_doc_t* doc, /*!< in: Doc to be tokenized */ - merge_file_t** merge_file, /*!< in/out: merge file */ - ibool opt_doc_id_size,/*!< in: whether to use 4 bytes - instead of 8 bytes integer to - store Doc ID during sort*/ - fts_tokenize_ctx_t* t_ctx) /*!< in/out: tokenize context */ -{ - ulint i; - ulint inc; - fts_string_t str; - ulint len; - row_merge_buf_t* buf; - dfield_t* field; - fts_string_t t_str; - ibool buf_full = FALSE; - byte str_buf[FTS_MAX_WORD_LEN + 1]; - ulint data_size[FTS_NUM_AUX_INDEX]; - ulint n_tuple[FTS_NUM_AUX_INDEX]; - - t_str.f_n_char = 0; - t_ctx->buf_used = 0; - - memset(n_tuple, 0, FTS_NUM_AUX_INDEX * sizeof(ulint)); - memset(data_size, 0, FTS_NUM_AUX_INDEX * sizeof(ulint)); - - /* Tokenize the data and add each word string, its corresponding - doc id and position to sort buffer */ - for (i = t_ctx->processed_len; i < doc->text.f_len; i += inc) { - ib_rbt_bound_t parent; - ulint idx = 0; - ib_uint32_t position; - ulint offset = 0; - ulint cur_len; - doc_id_t write_doc_id; - - inc = innobase_mysql_fts_get_token( - doc->charset, doc->text.f_str + i, - doc->text.f_str + doc->text.f_len, &str, &offset); - - ut_a(inc > 0); - - /* Ignore string whose character number is less than - "fts_min_token_size" or more than "fts_max_token_size" */ - if (str.f_n_char < fts_min_token_size - || str.f_n_char > fts_max_token_size) { - - t_ctx->processed_len += inc; - continue; - } - - t_str.f_len = innobase_fts_casedn_str( - doc->charset, (char*) str.f_str, str.f_len, - (char*) &str_buf, FTS_MAX_WORD_LEN + 1); - - t_str.f_str = (byte*) &str_buf; - - /* if "cached_stopword" is defined, ingore words in the - stopword list */ - if (t_ctx->cached_stopword - && rbt_search(t_ctx->cached_stopword, - &parent, &t_str) == 0) { - - t_ctx->processed_len += inc; - continue; - } - - /* There are FTS_NUM_AUX_INDEX auxiliary tables, find - out which sort buffer to put this word record in */ - t_ctx->buf_used = fts_select_index( - doc->charset, t_str.f_str, t_str.f_len); - - buf = sort_buf[t_ctx->buf_used]; - - ut_a(t_ctx->buf_used < FTS_NUM_AUX_INDEX); - idx = t_ctx->buf_used; - - mtuple_t* mtuple = &buf->tuples[buf->n_tuples + n_tuple[idx]]; - - field = mtuple->fields = static_cast<dfield_t*>( - mem_heap_alloc(buf->heap, - FTS_NUM_FIELDS_SORT * sizeof *field)); - - /* The first field is the tokenized word */ - dfield_set_data(field, t_str.f_str, t_str.f_len); - len = dfield_get_len(field); - - dict_col_copy_type(dict_index_get_nth_col(buf->index, 0), &field->type); - field->type.prtype |= DATA_NOT_NULL; - ut_ad(len <= field->type.len); - - /* For the temporary file, row_merge_buf_encode() uses - 1 byte for representing the number of extra_size bytes. - This number will always be 1, because for this 3-field index - consisting of one variable-size column, extra_size will always - be 1 or 2, which can be encoded in one byte. - - The extra_size is 1 byte if the length of the - variable-length column is less than 128 bytes or the - maximum length is less than 256 bytes. */ - - /* One variable length column, word with its lenght less than - fts_max_token_size, add one extra size and one extra byte. - - Since the max length for FTS token now is larger than 255, - so we will need to signify length byte itself, so only 1 to 128 - bytes can be used for 1 bytes, larger than that 2 bytes. */ - if (len < 128 || field->type.len < 256) { - /* Extra size is one byte. */ - cur_len = 2 + len; - } else { - /* Extra size is two bytes. */ - cur_len = 3 + len; - } - - dfield_dup(field, buf->heap); - field++; - - /* The second field is the Doc ID */ - - ib_uint32_t doc_id_32_bit; - - if (!opt_doc_id_size) { - fts_write_doc_id((byte*) &write_doc_id, doc_id); - - dfield_set_data( - field, &write_doc_id, sizeof(write_doc_id)); - } else { - mach_write_to_4( - (byte*) &doc_id_32_bit, (ib_uint32_t) doc_id); - - dfield_set_data( - field, &doc_id_32_bit, sizeof(doc_id_32_bit)); - } - - len = field->len; - ut_ad(len == FTS_DOC_ID_LEN || len == sizeof(ib_uint32_t)); - - field->type.mtype = DATA_INT; - field->type.prtype = DATA_NOT_NULL | DATA_BINARY_TYPE; - field->type.len = len; - field->type.mbminmaxlen = 0; - - cur_len += len; - dfield_dup(field, buf->heap); - - ++field; - - /* The third field is the position */ - mach_write_to_4( - (byte*) &position, - (i + offset + inc - str.f_len + t_ctx->init_pos)); - - dfield_set_data(field, &position, sizeof(position)); - len = dfield_get_len(field); - ut_ad(len == sizeof(ib_uint32_t)); - - field->type.mtype = DATA_INT; - field->type.prtype = DATA_NOT_NULL; - field->type.len = len; - field->type.mbminmaxlen = 0; - cur_len += len; - dfield_dup(field, buf->heap); - - /* Reserve one byte for the end marker of row_merge_block_t - and we have reserved ROW_MERGE_RESERVE_SIZE (= 4) for - encryption key_version in the beginning of the buffer. */ - if (buf->total_size + data_size[idx] + cur_len - >= (srv_sort_buf_size - 1 - ROW_MERGE_RESERVE_SIZE)) { - - buf_full = TRUE; - break; - } - - /* Increment the number of tuples */ - n_tuple[idx]++; - t_ctx->processed_len += inc; - data_size[idx] += cur_len; - } - - /* Update the data length and the number of new word tuples - added in this round of tokenization */ - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - /* The computation of total_size below assumes that no - delete-mark flags will be stored and that all fields - are NOT NULL and fixed-length. */ - - sort_buf[i]->total_size += data_size[i]; - - sort_buf[i]->n_tuples += n_tuple[i]; - - merge_file[i]->n_rec += n_tuple[i]; - t_ctx->rows_added[i] += n_tuple[i]; - } - - if (!buf_full) { - /* we pad one byte between text accross two fields */ - t_ctx->init_pos += doc->text.f_len + 1; - } - - return(!buf_full); -} - -/*********************************************************************//** -Get next doc item from fts_doc_list */ -UNIV_INLINE -void -row_merge_fts_get_next_doc_item( -/*============================*/ - fts_psort_t* psort_info, /*!< in: psort_info */ - fts_doc_item_t** doc_item) /*!< in/out: doc item */ -{ - if (*doc_item != NULL) { - ut_free(*doc_item); - } - - mutex_enter(&psort_info->mutex); - - *doc_item = UT_LIST_GET_FIRST(psort_info->fts_doc_list); - if (*doc_item != NULL) { - UT_LIST_REMOVE(doc_list, psort_info->fts_doc_list, - *doc_item); - - ut_ad(psort_info->memory_used >= sizeof(fts_doc_item_t) - + (*doc_item)->field->len); - psort_info->memory_used -= sizeof(fts_doc_item_t) - + (*doc_item)->field->len; - } - - mutex_exit(&psort_info->mutex); -} - -/*********************************************************************//** -Function performs parallel tokenization of the incoming doc strings. -It also performs the initial in memory sort of the parsed records. -@return OS_THREAD_DUMMY_RETURN */ -UNIV_INTERN -os_thread_ret_t -fts_parallel_tokenization( -/*======================*/ - void* arg) /*!< in: psort_info for the thread */ -{ - fts_psort_t* psort_info = (fts_psort_t*) arg; - ulint i; - fts_doc_item_t* doc_item = NULL; - row_merge_buf_t** buf; - ibool processed = FALSE; - merge_file_t** merge_file; - row_merge_block_t** block; - row_merge_block_t** crypt_block; - int tmpfd[FTS_NUM_AUX_INDEX]; - ulint mycount[FTS_NUM_AUX_INDEX]; - ib_uint64_t total_rec = 0; - ulint num_doc_processed = 0; - doc_id_t last_doc_id = 0; - ulint zip_size; - mem_heap_t* blob_heap = NULL; - fts_doc_t doc; - dict_table_t* table = psort_info->psort_common->new_table; - fts_tokenize_ctx_t t_ctx; - ulint retried = 0; - dberr_t error = DB_SUCCESS; - fil_space_crypt_t* crypt_data = NULL; - - ut_ad(psort_info->psort_common->trx->mysql_thd != NULL); - - const char* path = thd_innodb_tmpdir( - psort_info->psort_common->trx->mysql_thd); - - ut_ad(psort_info); - - buf = psort_info->merge_buf; - merge_file = psort_info->merge_file; - blob_heap = mem_heap_create(512); - memset(&doc, 0, sizeof(doc)); - memset(&t_ctx, 0, sizeof(t_ctx)); - memset(mycount, 0, FTS_NUM_AUX_INDEX * sizeof(int)); - - doc.charset = fts_index_get_charset( - psort_info->psort_common->dup->index); - - block = psort_info->merge_block; - crypt_block = psort_info->crypt_block; - crypt_data = psort_info->psort_common->crypt_data; - zip_size = dict_table_zip_size(table); - - row_merge_fts_get_next_doc_item(psort_info, &doc_item); - - t_ctx.cached_stopword = table->fts->cache->stopword_info.cached_stopword; - processed = TRUE; -loop: - while (doc_item) { - dfield_t* dfield = doc_item->field; - - last_doc_id = doc_item->doc_id; - - ut_ad (dfield->data != NULL - && dfield_get_len(dfield) != UNIV_SQL_NULL); - - /* If finish processing the last item, update "doc" with - strings in the doc_item, otherwise continue processing last - item */ - if (processed) { - byte* data; - ulint data_len; - - dfield = doc_item->field; - data = static_cast<byte*>(dfield_get_data(dfield)); - data_len = dfield_get_len(dfield); - - if (dfield_is_ext(dfield)) { - doc.text.f_str = - btr_copy_externally_stored_field( - &doc.text.f_len, data, - zip_size, data_len, blob_heap, - NULL); - } else { - doc.text.f_str = data; - doc.text.f_len = data_len; - } - - doc.tokens = 0; - t_ctx.processed_len = 0; - } else { - /* Not yet finish processing the "doc" on hand, - continue processing it */ - ut_ad(doc.text.f_str); - ut_ad(t_ctx.processed_len < doc.text.f_len); - } - - processed = row_merge_fts_doc_tokenize( - buf, doc_item->doc_id, &doc, - merge_file, psort_info->psort_common->opt_doc_id_size, - &t_ctx); - - /* Current sort buffer full, need to recycle */ - if (!processed) { - ut_ad(t_ctx.processed_len < doc.text.f_len); - ut_ad(t_ctx.rows_added[t_ctx.buf_used]); - break; - } - - num_doc_processed++; - - if (fts_enable_diag_print && num_doc_processed % 10000 == 1) { - ib_logf(IB_LOG_LEVEL_INFO, - "number of doc processed %d\n", - (int) num_doc_processed); -#ifdef FTS_INTERNAL_DIAG_PRINT - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - ib_logf(IB_LOG_LEVEL_INFO, - "ID %d, partition %d, word " - "%d\n",(int) psort_info->psort_id, - (int) i, (int) mycount[i]); - } -#endif - } - - mem_heap_empty(blob_heap); - - row_merge_fts_get_next_doc_item(psort_info, &doc_item); - - if (doc_item && last_doc_id != doc_item->doc_id) { - t_ctx.init_pos = 0; - } - } - - /* If we run out of current sort buffer, need to sort - and flush the sort buffer to disk */ - if (t_ctx.rows_added[t_ctx.buf_used] && !processed) { - row_merge_buf_sort(buf[t_ctx.buf_used], NULL); - row_merge_buf_write(buf[t_ctx.buf_used], - merge_file[t_ctx.buf_used], - block[t_ctx.buf_used]); - - if (!row_merge_write(merge_file[t_ctx.buf_used]->fd, - merge_file[t_ctx.buf_used]->offset++, - block[t_ctx.buf_used], - crypt_data, - crypt_block[t_ctx.buf_used], - table->space)) { - error = DB_TEMP_FILE_WRITE_FAILURE; - goto func_exit; - } - - UNIV_MEM_INVALID(block[t_ctx.buf_used][0], srv_sort_buf_size); - buf[t_ctx.buf_used] = row_merge_buf_empty(buf[t_ctx.buf_used]); - mycount[t_ctx.buf_used] += t_ctx.rows_added[t_ctx.buf_used]; - t_ctx.rows_added[t_ctx.buf_used] = 0; - - ut_a(doc_item); - goto loop; - } - - /* Parent done scanning, and if finish processing all the docs, exit */ - if (psort_info->state == FTS_PARENT_COMPLETE) { - if (UT_LIST_GET_LEN(psort_info->fts_doc_list) == 0) { - goto exit; - } else if (retried > 10000) { - ut_ad(!doc_item); - /* retied too many times and cannot get new record */ - ib_logf(IB_LOG_LEVEL_ERROR, - "InnoDB: FTS parallel sort processed " - "%lu records, the sort queue has " - "%lu records. But sort cannot get " - "the next records", num_doc_processed, - UT_LIST_GET_LEN( - psort_info->fts_doc_list)); - goto exit; - } - } else if (psort_info->state == FTS_PARENT_EXITING) { - /* Parent abort */ - goto func_exit; - } - - if (doc_item == NULL) { - os_thread_yield(); - } - - row_merge_fts_get_next_doc_item(psort_info, &doc_item); - - if (doc_item != NULL) { - if (last_doc_id != doc_item->doc_id) { - t_ctx.init_pos = 0; - } - - retried = 0; - } else if (psort_info->state == FTS_PARENT_COMPLETE) { - retried++; - } - - goto loop; - -exit: - /* Do a final sort of the last (or latest) batch of records - in block memory. Flush them to temp file if records cannot - be hold in one block memory */ - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - if (t_ctx.rows_added[i]) { - row_merge_buf_sort(buf[i], NULL); - row_merge_buf_write( - buf[i], merge_file[i], block[i]); - - /* Write to temp file, only if records have - been flushed to temp file before (offset > 0): - The pseudo code for sort is following: - - while (there are rows) { - tokenize rows, put result in block[] - if (block[] runs out) { - sort rows; - write to temp file with - row_merge_write(); - offset++; - } - } - - # write out the last batch - if (offset > 0) { - row_merge_write(); - offset++; - } else { - # no need to write anything - offset stay as 0 - } - - so if merge_file[i]->offset is 0 when we come to - here as the last batch, this means rows have - never flush to temp file, it can be held all in - memory */ - if (merge_file[i]->offset != 0) { - if (!row_merge_write(merge_file[i]->fd, - merge_file[i]->offset++, - block[i], - crypt_data, - crypt_block[i], - table->space)) { - error = DB_TEMP_FILE_WRITE_FAILURE; - goto func_exit; - } - - UNIV_MEM_INVALID(block[i][0], - srv_sort_buf_size); - - if (crypt_block[i]) { - UNIV_MEM_INVALID(crypt_block[i][0], - srv_sort_buf_size); - } - } - - buf[i] = row_merge_buf_empty(buf[i]); - t_ctx.rows_added[i] = 0; - } - } - - if (fts_enable_diag_print) { - DEBUG_FTS_SORT_PRINT(" InnoDB_FTS: start merge sort\n"); - } - - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - if (!merge_file[i]->offset) { - continue; - } - - tmpfd[i] = row_merge_file_create_low(path); - if (tmpfd[i] < 0) { - error = DB_OUT_OF_MEMORY; - goto func_exit; - } - - error = row_merge_sort(psort_info->psort_common->trx, - psort_info->psort_common->dup, - merge_file[i], block[i], &tmpfd[i], - false, 0.0/* pct_progress */, 0.0/* pct_cost */, - crypt_data, crypt_block[i], table->space); - - if (error != DB_SUCCESS) { - close(tmpfd[i]); - goto func_exit; - } - - total_rec += merge_file[i]->n_rec; - close(tmpfd[i]); - } - -func_exit: - if (fts_enable_diag_print) { - DEBUG_FTS_SORT_PRINT(" InnoDB_FTS: complete merge sort\n"); - } - - mem_heap_free(blob_heap); - - mutex_enter(&psort_info->mutex); - psort_info->error = error; - mutex_exit(&psort_info->mutex); - - if (UT_LIST_GET_LEN(psort_info->fts_doc_list) > 0) { - /* child can exit either with error or told by parent. */ - ut_ad(error != DB_SUCCESS - || psort_info->state == FTS_PARENT_EXITING); - } - - /* Free fts doc list in case of error. */ - do { - row_merge_fts_get_next_doc_item(psort_info, &doc_item); - } while (doc_item != NULL); - - psort_info->child_status = FTS_CHILD_COMPLETE; - os_event_set(psort_info->psort_common->sort_event); - psort_info->child_status = FTS_CHILD_EXITING; - -#ifdef __WIN__ - CloseHandle(psort_info->thread_hdl); -#endif /*__WIN__ */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/*********************************************************************//** -Start the parallel tokenization and parallel merge sort */ -UNIV_INTERN -void -row_fts_start_psort( -/*================*/ - fts_psort_t* psort_info) /*!< parallel sort structure */ -{ - ulint i = 0; - os_thread_id_t thd_id; - - for (i = 0; i < fts_sort_pll_degree; i++) { - psort_info[i].psort_id = i; - psort_info[i].thread_hdl = os_thread_create( - fts_parallel_tokenization, - (void*) &psort_info[i], &thd_id); - } -} - -/*********************************************************************//** -Function performs the merge and insertion of the sorted records. -@return OS_THREAD_DUMMY_RETURN */ -UNIV_INTERN -os_thread_ret_t -fts_parallel_merge( -/*===============*/ - void* arg) /*!< in: parallel merge info */ -{ - fts_psort_t* psort_info = (fts_psort_t*) arg; - ulint id; - - ut_ad(psort_info); - - id = psort_info->psort_id; - - row_fts_merge_insert(psort_info->psort_common->dup->index, - psort_info->psort_common->new_table, - psort_info->psort_common->all_info, id); - - psort_info->child_status = FTS_CHILD_COMPLETE; - os_event_set(psort_info->psort_common->merge_event); - psort_info->child_status = FTS_CHILD_EXITING; - -#ifdef __WIN__ - CloseHandle(psort_info->thread_hdl); -#endif /*__WIN__ */ - - os_thread_exit(NULL, false); - - OS_THREAD_DUMMY_RETURN; -} - -/*********************************************************************//** -Kick off the parallel merge and insert thread */ -UNIV_INTERN -void -row_fts_start_parallel_merge( -/*=========================*/ - fts_psort_t* merge_info) /*!< in: parallel sort info */ -{ - int i = 0; - os_thread_id_t thd_id; - - /* Kick off merge/insert threads */ - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - merge_info[i].psort_id = i; - merge_info[i].child_status = 0; - - merge_info[i].thread_hdl = os_thread_create( - fts_parallel_merge, (void*) &merge_info[i], &thd_id); - } -} - -/********************************************************************//** -Insert processed FTS data to auxillary index tables. -@return DB_SUCCESS if insertion runs fine */ -static MY_ATTRIBUTE((nonnull)) -dberr_t -row_merge_write_fts_word( -/*=====================*/ - trx_t* trx, /*!< in: transaction */ - que_t** ins_graph, /*!< in: Insert query graphs */ - fts_tokenizer_word_t* word, /*!< in: sorted and tokenized - word */ - fts_table_t* fts_table, /*!< in: fts aux table instance */ - CHARSET_INFO* charset) /*!< in: charset */ -{ - ulint selected; - dberr_t ret = DB_SUCCESS; - - selected = fts_select_index( - charset, word->text.f_str, word->text.f_len); - fts_table->suffix = fts_get_suffix(selected); - - /* Pop out each fts_node in word->nodes write them to auxiliary table */ - while (ib_vector_size(word->nodes) > 0) { - dberr_t error; - fts_node_t* fts_node; - - fts_node = static_cast<fts_node_t*>(ib_vector_pop(word->nodes)); - - error = fts_write_node( - trx, &ins_graph[selected], fts_table, &word->text, - fts_node); - - if (error != DB_SUCCESS) { - fprintf(stderr, "InnoDB: failed to write" - " word %s to FTS auxiliary index" - " table, error (%s) \n", - word->text.f_str, ut_strerr(error)); - ret = error; - } - - ut_free(fts_node->ilist); - fts_node->ilist = NULL; - } - - return(ret); -} - -/*********************************************************************//** -Read sorted FTS data files and insert data tuples to auxillary tables. -@return DB_SUCCESS or error number */ -UNIV_INTERN -void -row_fts_insert_tuple( -/*=================*/ - fts_psort_insert_t* - ins_ctx, /*!< in: insert context */ - fts_tokenizer_word_t* word, /*!< in: last processed - tokenized word */ - ib_vector_t* positions, /*!< in: word position */ - doc_id_t* in_doc_id, /*!< in: last item doc id */ - dtuple_t* dtuple) /*!< in: entry to insert */ -{ - fts_node_t* fts_node = NULL; - dfield_t* dfield; - doc_id_t doc_id; - ulint position; - fts_string_t token_word; - ulint i; - - /* Get fts_node for the FTS auxillary INDEX table */ - if (ib_vector_size(word->nodes) > 0) { - fts_node = static_cast<fts_node_t*>( - ib_vector_last(word->nodes)); - } - - if (fts_node == NULL - || fts_node->ilist_size > FTS_ILIST_MAX_SIZE) { - - fts_node = static_cast<fts_node_t*>( - ib_vector_push(word->nodes, NULL)); - - memset(fts_node, 0x0, sizeof(*fts_node)); - } - - /* If dtuple == NULL, this is the last word to be processed */ - if (!dtuple) { - if (fts_node && ib_vector_size(positions) > 0) { - fts_cache_node_add_positions( - NULL, fts_node, *in_doc_id, - positions); - - /* Write out the current word */ - row_merge_write_fts_word(ins_ctx->trx, - ins_ctx->ins_graph, word, - &ins_ctx->fts_table, - ins_ctx->charset); - - } - - return; - } - - /* Get the first field for the tokenized word */ - dfield = dtuple_get_nth_field(dtuple, 0); - - token_word.f_n_char = 0; - token_word.f_len = dfield->len; - token_word.f_str = static_cast<byte*>(dfield_get_data(dfield)); - - if (!word->text.f_str) { - fts_utf8_string_dup(&word->text, &token_word, ins_ctx->heap); - } - - /* compare to the last word, to see if they are the same - word */ - if (innobase_fts_text_cmp(ins_ctx->charset, - &word->text, &token_word) != 0) { - ulint num_item; - - /* Getting a new word, flush the last position info - for the currnt word in fts_node */ - if (ib_vector_size(positions) > 0) { - fts_cache_node_add_positions( - NULL, fts_node, *in_doc_id, positions); - } - - /* Write out the current word */ - row_merge_write_fts_word(ins_ctx->trx, ins_ctx->ins_graph, - word, &ins_ctx->fts_table, - ins_ctx->charset); - - /* Copy the new word */ - fts_utf8_string_dup(&word->text, &token_word, ins_ctx->heap); - - num_item = ib_vector_size(positions); - - /* Clean up position queue */ - for (i = 0; i < num_item; i++) { - ib_vector_pop(positions); - } - - /* Reset Doc ID */ - *in_doc_id = 0; - memset(fts_node, 0x0, sizeof(*fts_node)); - } - - /* Get the word's Doc ID */ - dfield = dtuple_get_nth_field(dtuple, 1); - - if (!ins_ctx->opt_doc_id_size) { - doc_id = fts_read_doc_id( - static_cast<byte*>(dfield_get_data(dfield))); - } else { - doc_id = (doc_id_t) mach_read_from_4( - static_cast<byte*>(dfield_get_data(dfield))); - } - - /* Get the word's position info */ - dfield = dtuple_get_nth_field(dtuple, 2); - position = mach_read_from_4(static_cast<byte*>(dfield_get_data(dfield))); - - /* If this is the same word as the last word, and they - have the same Doc ID, we just need to add its position - info. Otherwise, we will flush position info to the - fts_node and initiate a new position vector */ - if (!(*in_doc_id) || *in_doc_id == doc_id) { - ib_vector_push(positions, &position); - } else { - ulint num_pos = ib_vector_size(positions); - - fts_cache_node_add_positions(NULL, fts_node, - *in_doc_id, positions); - for (i = 0; i < num_pos; i++) { - ib_vector_pop(positions); - } - ib_vector_push(positions, &position); - } - - /* record the current Doc ID */ - *in_doc_id = doc_id; -} - -/*********************************************************************//** -Propagate a newly added record up one level in the selection tree -@return parent where this value propagated to */ -static -int -row_fts_sel_tree_propagate( -/*=======================*/ - int propogated, /*<! in: tree node propagated */ - int* sel_tree, /*<! in: selection tree */ - const mrec_t** mrec, /*<! in: sort record */ - ulint** offsets, /*<! in: record offsets */ - dict_index_t* index) /*<! in/out: FTS index */ -{ - ulint parent; - int child_left; - int child_right; - int selected; - - /* Find which parent this value will be propagated to */ - parent = (propogated - 1) / 2; - - /* Find out which value is smaller, and to propagate */ - child_left = sel_tree[parent * 2 + 1]; - child_right = sel_tree[parent * 2 + 2]; - - if (child_left == -1 || mrec[child_left] == NULL) { - if (child_right == -1 - || mrec[child_right] == NULL) { - selected = -1; - } else { - selected = child_right ; - } - } else if (child_right == -1 - || mrec[child_right] == NULL) { - selected = child_left; - } else if (cmp_rec_rec_simple(mrec[child_left], mrec[child_right], - offsets[child_left], - offsets[child_right], - index, NULL) < 0) { - selected = child_left; - } else { - selected = child_right; - } - - sel_tree[parent] = selected; - - return(static_cast<int>(parent)); -} - -/*********************************************************************//** -Readjust selection tree after popping the root and read a new value -@return the new root */ -static -int -row_fts_sel_tree_update( -/*====================*/ - int* sel_tree, /*<! in/out: selection tree */ - ulint propagated, /*<! in: node to propagate up */ - ulint height, /*<! in: tree height */ - const mrec_t** mrec, /*<! in: sort record */ - ulint** offsets, /*<! in: record offsets */ - dict_index_t* index) /*<! in: index dictionary */ -{ - ulint i; - - for (i = 1; i <= height; i++) { - propagated = static_cast<ulint>(row_fts_sel_tree_propagate( - static_cast<int>(propagated), sel_tree, mrec, offsets, index)); - } - - return(sel_tree[0]); -} - -/*********************************************************************//** -Build selection tree at a specified level */ -static -void -row_fts_build_sel_tree_level( -/*=========================*/ - int* sel_tree, /*<! in/out: selection tree */ - ulint level, /*<! in: selection tree level */ - const mrec_t** mrec, /*<! in: sort record */ - ulint** offsets, /*<! in: record offsets */ - dict_index_t* index) /*<! in: index dictionary */ -{ - ulint start; - int child_left; - int child_right; - ulint i; - ulint num_item = ulint(1) << level; - - start = num_item - 1; - - for (i = 0; i < num_item; i++) { - child_left = sel_tree[(start + i) * 2 + 1]; - child_right = sel_tree[(start + i) * 2 + 2]; - - if (child_left == -1) { - if (child_right == -1) { - sel_tree[start + i] = -1; - } else { - sel_tree[start + i] = child_right; - } - continue; - } else if (child_right == -1) { - sel_tree[start + i] = child_left; - continue; - } - - /* Deal with NULL child conditions */ - if (!mrec[child_left]) { - if (!mrec[child_right]) { - sel_tree[start + i] = -1; - } else { - sel_tree[start + i] = child_right; - } - continue; - } else if (!mrec[child_right]) { - sel_tree[start + i] = child_left; - continue; - } - - /* Select the smaller one to set parent pointer */ - int cmp = cmp_rec_rec_simple( - mrec[child_left], mrec[child_right], - offsets[child_left], offsets[child_right], - index, NULL); - - sel_tree[start + i] = cmp < 0 ? child_left : child_right; - } -} - -/*********************************************************************//** -Build a selection tree for merge. The selection tree is a binary tree -and should have fts_sort_pll_degree / 2 levels. With root as level 0 -@return number of tree levels */ -static -ulint -row_fts_build_sel_tree( -/*===================*/ - int* sel_tree, /*<! in/out: selection tree */ - const mrec_t** mrec, /*<! in: sort record */ - ulint** offsets, /*<! in: record offsets */ - dict_index_t* index) /*<! in: index dictionary */ -{ - ulint treelevel = 1; - ulint num = 2; - int i = 0; - ulint start; - - /* No need to build selection tree if we only have two merge threads */ - if (fts_sort_pll_degree <= 2) { - return(0); - } - - while (num < fts_sort_pll_degree) { - num = num << 1; - treelevel++; - } - - start = (ulint(1) << treelevel) - 1; - - for (i = 0; i < (int) fts_sort_pll_degree; i++) { - sel_tree[i + start] = i; - } - - for (i = static_cast<int>(treelevel) - 1; i >= 0; i--) { - row_fts_build_sel_tree_level( - sel_tree, static_cast<ulint>(i), mrec, offsets, index); - } - - return(treelevel); -} - -/*********************************************************************//** -Read sorted file containing index data tuples and insert these data -tuples to the index -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -row_fts_merge_insert( -/*=================*/ - dict_index_t* index, /*!< in: index */ - dict_table_t* table, /*!< in: new table */ - fts_psort_t* psort_info, /*!< parallel sort info */ - ulint id) /* !< in: which auxiliary table's data - to insert to */ -{ - const byte** b; - mem_heap_t* tuple_heap; - mem_heap_t* heap; - dberr_t error = DB_SUCCESS; - ulint* foffs; - ulint** offsets; - fts_tokenizer_word_t new_word; - ib_vector_t* positions; - doc_id_t last_doc_id; - ib_alloc_t* heap_alloc; - ulint n_bytes; - ulint i; - mrec_buf_t** buf; - int* fd; - byte** block; - byte** crypt_block; - const mrec_t** mrec; - ulint count = 0; - int* sel_tree; - ulint height; - ulint start; - fts_psort_insert_t ins_ctx; - ulint count_diag = 0; - fil_space_crypt_t* crypt_data = NULL; - ulint space; - - ut_ad(index); - ut_ad(table); - - /* We use the insert query graph as the dummy graph - needed in the row module call */ - - ins_ctx.trx = trx_allocate_for_background(); - - ins_ctx.trx->op_info = "inserting index entries"; - - ins_ctx.opt_doc_id_size = psort_info[0].psort_common->opt_doc_id_size; - crypt_data = psort_info[0].psort_common->crypt_data; - - heap = mem_heap_create(500 + sizeof(mrec_buf_t)); - - b = (const byte**) mem_heap_alloc( - heap, sizeof (*b) * fts_sort_pll_degree); - foffs = (ulint*) mem_heap_alloc( - heap, sizeof(*foffs) * fts_sort_pll_degree); - offsets = (ulint**) mem_heap_alloc( - heap, sizeof(*offsets) * fts_sort_pll_degree); - buf = (mrec_buf_t**) mem_heap_alloc( - heap, sizeof(*buf) * fts_sort_pll_degree); - fd = (int*) mem_heap_alloc(heap, sizeof(*fd) * fts_sort_pll_degree); - block = (byte**) mem_heap_alloc( - heap, sizeof(*block) * fts_sort_pll_degree); - crypt_block = (byte**) mem_heap_alloc( - heap, sizeof(*block) * fts_sort_pll_degree); - mrec = (const mrec_t**) mem_heap_alloc( - heap, sizeof(*mrec) * fts_sort_pll_degree); - sel_tree = (int*) mem_heap_alloc( - heap, sizeof(*sel_tree) * (fts_sort_pll_degree * 2)); - - tuple_heap = mem_heap_create(1000); - - ins_ctx.charset = fts_index_get_charset(index); - ins_ctx.heap = heap; - - for (i = 0; i < fts_sort_pll_degree; i++) { - ulint num; - - num = 1 + REC_OFFS_HEADER_SIZE - + dict_index_get_n_fields(index); - offsets[i] = static_cast<ulint*>(mem_heap_zalloc( - heap, num * sizeof *offsets[i])); - offsets[i][0] = num; - offsets[i][1] = dict_index_get_n_fields(index); - block[i] = psort_info[i].merge_block[id]; - crypt_block[i] = psort_info[i].crypt_block[id]; - b[i] = psort_info[i].merge_block[id]; - fd[i] = psort_info[i].merge_file[id]->fd; - foffs[i] = 0; - - buf[i] = static_cast<mrec_buf_t*>( - mem_heap_alloc(heap, sizeof *buf[i])); - - count_diag += (int) psort_info[i].merge_file[id]->n_rec; - } - - if (fts_enable_diag_print) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB_FTS: to inserted %lu records\n", - (ulong) count_diag); - } - - /* Initialize related variables if creating FTS indexes */ - heap_alloc = ib_heap_allocator_create(heap); - - memset(&new_word, 0, sizeof(new_word)); - - new_word.nodes = ib_vector_create(heap_alloc, sizeof(fts_node_t), 4); - positions = ib_vector_create(heap_alloc, sizeof(ulint), 32); - last_doc_id = 0; - - /* Allocate insert query graphs for FTS auxillary - Index Table, note we have FTS_NUM_AUX_INDEX such index tables */ - n_bytes = sizeof(que_t*) * (FTS_NUM_AUX_INDEX + 1); - ins_ctx.ins_graph = static_cast<que_t**>(mem_heap_alloc(heap, n_bytes)); - memset(ins_ctx.ins_graph, 0x0, n_bytes); - - /* We should set the flags2 with aux_table_name here, - in order to get the correct aux table names. */ - index->table->flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; - DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", - index->table->flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); - - ins_ctx.fts_table.type = FTS_INDEX_TABLE; - ins_ctx.fts_table.index_id = index->id; - ins_ctx.fts_table.table_id = table->id; - ins_ctx.fts_table.parent = index->table->name; - ins_ctx.fts_table.table = index->table; - space = table->space; - - for (i = 0; i < fts_sort_pll_degree; i++) { - if (psort_info[i].merge_file[id]->n_rec == 0) { - /* No Rows to read */ - mrec[i] = b[i] = NULL; - } else { - /* Read from temp file only if it has been - written to. Otherwise, block memory holds - all the sorted records */ - if (psort_info[i].merge_file[id]->offset > 0 - && (!row_merge_read( - fd[i], foffs[i], - (row_merge_block_t*) block[i], - crypt_data, - (row_merge_block_t*) crypt_block[i], - space))) { - error = DB_CORRUPTION; - goto exit; - } - - ROW_MERGE_READ_GET_NEXT(i); - } - } - - height = row_fts_build_sel_tree(sel_tree, (const mrec_t **) mrec, - offsets, index); - - start = (1 << height) - 1; - - /* Fetch sorted records from sort buffer and insert them into - corresponding FTS index auxiliary tables */ - for (;;) { - dtuple_t* dtuple; - ulint n_ext; - int min_rec = 0; - - if (fts_sort_pll_degree <= 2) { - while (!mrec[min_rec]) { - min_rec++; - - if (min_rec >= (int) fts_sort_pll_degree) { - row_fts_insert_tuple( - &ins_ctx, &new_word, - positions, &last_doc_id, - NULL); - - goto exit; - } - } - - for (i = min_rec + 1; i < fts_sort_pll_degree; i++) { - if (!mrec[i]) { - continue; - } - - if (cmp_rec_rec_simple( - mrec[i], mrec[min_rec], - offsets[i], offsets[min_rec], - index, NULL) < 0) { - min_rec = static_cast<int>(i); - } - } - } else { - min_rec = sel_tree[0]; - - if (min_rec == -1) { - row_fts_insert_tuple( - &ins_ctx, &new_word, - positions, &last_doc_id, - NULL); - - goto exit; - } - } - - dtuple = row_rec_to_index_entry_low( - mrec[min_rec], index, offsets[min_rec], &n_ext, - tuple_heap); - - row_fts_insert_tuple( - &ins_ctx, &new_word, positions, - &last_doc_id, dtuple); - - - ROW_MERGE_READ_GET_NEXT(min_rec); - - if (fts_sort_pll_degree > 2) { - if (!mrec[min_rec]) { - sel_tree[start + min_rec] = -1; - } - - row_fts_sel_tree_update(sel_tree, start + min_rec, - height, mrec, - offsets, index); - } - - count++; - - mem_heap_empty(tuple_heap); - } - -exit: - fts_sql_commit(ins_ctx.trx); - - ins_ctx.trx->op_info = ""; - - mem_heap_free(tuple_heap); - - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { - if (ins_ctx.ins_graph[i]) { - fts_que_graph_free(ins_ctx.ins_graph[i]); - } - } - - trx_free_for_background(ins_ctx.trx); - - mem_heap_free(heap); - - if (fts_enable_diag_print) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB_FTS: inserted %lu records\n", - (ulong) count); - } - - return(error); -} diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc deleted file mode 100644 index 86b2d782b7b..00000000000 --- a/storage/xtradb/row/row0import.cc +++ /dev/null @@ -1,3774 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0import.cc -Import a tablespace to a running instance. - -Created 2012-02-08 by Sunny Bains. -*******************************************************/ - -#include "row0import.h" - -#ifdef UNIV_NONINL -#include "row0import.ic" -#endif - -#include "btr0pcur.h" -#include "que0que.h" -#include "dict0boot.h" -#include "ibuf0ibuf.h" -#include "pars0pars.h" -#include "row0upd.h" -#include "row0sel.h" -#include "row0mysql.h" -#include "srv0start.h" -#include "row0quiesce.h" -#include "buf0buf.h" - -#include <vector> - -/** The size of the buffer to use for IO. Note: os_file_read() doesn't expect -reads to fail. If you set the buffer size to be greater than a multiple of the -file size then it will assert. TODO: Fix this limitation of the IO functions. -@param n - page size of the tablespace. -@retval number of pages */ -#define IO_BUFFER_SIZE(n) ((1024 * 1024) / n) - -/** For gathering stats on records during phase I */ -struct row_stats_t { - ulint m_n_deleted; /*!< Number of deleted records - found in the index */ - - ulint m_n_purged; /*!< Number of records purged - optimisatically */ - - ulint m_n_rows; /*!< Number of rows */ - - ulint m_n_purge_failed; /*!< Number of deleted rows - that could not be purged */ -}; - -/** Index information required by IMPORT. */ -struct row_index_t { - index_id_t m_id; /*!< Index id of the table - in the exporting server */ - byte* m_name; /*!< Index name */ - - ulint m_space; /*!< Space where it is placed */ - - ulint m_page_no; /*!< Root page number */ - - ulint m_type; /*!< Index type */ - - ulint m_trx_id_offset; /*!< Relevant only for clustered - indexes, offset of transaction - id system column */ - - ulint m_n_user_defined_cols; /*!< User defined columns */ - - ulint m_n_uniq; /*!< Number of columns that can - uniquely identify the row */ - - ulint m_n_nullable; /*!< Number of nullable - columns */ - - ulint m_n_fields; /*!< Total number of fields */ - - dict_field_t* m_fields; /*!< Index fields */ - - const dict_index_t* - m_srv_index; /*!< Index instance in the - importing server */ - - row_stats_t m_stats; /*!< Statistics gathered during - the import phase */ - -}; - -/** Meta data required by IMPORT. */ -struct row_import { - row_import() UNIV_NOTHROW - : - m_table(), - m_version(), - m_hostname(), - m_table_name(), - m_autoinc(), - m_page_size(), - m_flags(), - m_n_cols(), - m_cols(), - m_col_names(), - m_n_indexes(), - m_indexes(), - m_missing(true) { } - - ~row_import() UNIV_NOTHROW; - - /** - Find the index entry in in the indexes array. - @param name - index name - @return instance if found else 0. */ - row_index_t* get_index(const char* name) const UNIV_NOTHROW; - - /** - Get the number of rows in the index. - @param name - index name - @return number of rows (doesn't include delete marked rows). */ - ulint get_n_rows(const char* name) const UNIV_NOTHROW; - - /** - Find the ordinal value of the column name in the cfg table columns. - @param name - of column to look for. - @return ULINT_UNDEFINED if not found. */ - ulint find_col(const char* name) const UNIV_NOTHROW; - - /** - Get the number of rows for which purge failed during the convert phase. - @param name - index name - @return number of rows for which purge failed. */ - ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW; - - /** - Check if the index is clean. ie. no delete-marked records - @param name - index name - @return true if index needs to be purged. */ - bool requires_purge(const char* name) const UNIV_NOTHROW - { - return(get_n_purge_failed(name) > 0); - } - - /** - Set the index root <space, pageno> using the index name */ - void set_root_by_name() UNIV_NOTHROW; - - /** - Set the index root <space, pageno> using a heuristic - @return DB_SUCCESS or error code */ - dberr_t set_root_by_heuristic() UNIV_NOTHROW; - - /** Check if the index schema that was read from the .cfg file - matches the in memory index definition. - Note: It will update row_import_t::m_srv_index to map the meta-data - read from the .cfg file to the server index instance. - @return DB_SUCCESS or error code. */ - dberr_t match_index_columns( - THD* thd, - const dict_index_t* index) UNIV_NOTHROW; - - /** - Check if the table schema that was read from the .cfg file matches the - in memory table definition. - @param thd - MySQL session variable - @return DB_SUCCESS or error code. */ - dberr_t match_table_columns( - THD* thd) UNIV_NOTHROW; - - /** - Check if the table (and index) schema that was read from the .cfg file - matches the in memory table definition. - @param thd - MySQL session variable - @return DB_SUCCESS or error code. */ - dberr_t match_schema( - THD* thd) UNIV_NOTHROW; - - dict_table_t* m_table; /*!< Table instance */ - - ulint m_version; /*!< Version of config file */ - - byte* m_hostname; /*!< Hostname where the - tablespace was exported */ - byte* m_table_name; /*!< Exporting instance table - name */ - - ib_uint64_t m_autoinc; /*!< Next autoinc value */ - - ulint m_page_size; /*!< Tablespace page size */ - - ulint m_flags; /*!< Table flags */ - - ulint m_n_cols; /*!< Number of columns in the - meta-data file */ - - dict_col_t* m_cols; /*!< Column data */ - - byte** m_col_names; /*!< Column names, we store the - column naems separately becuase - there is no field to store the - value in dict_col_t */ - - ulint m_n_indexes; /*!< Number of indexes, - including clustered index */ - - row_index_t* m_indexes; /*!< Index meta data */ - - bool m_missing; /*!< true if a .cfg file was - found and was readable */ -}; - -/** Use the page cursor to iterate over records in a block. */ -class RecIterator { -public: - /** - Default constructor */ - RecIterator() UNIV_NOTHROW - { - memset(&m_cur, 0x0, sizeof(m_cur)); - } - - /** - Position the cursor on the first user record. */ - void open(buf_block_t* block) UNIV_NOTHROW - { - page_cur_set_before_first(block, &m_cur); - - if (!end()) { - next(); - } - } - - /** - Move to the next record. */ - void next() UNIV_NOTHROW - { - page_cur_move_to_next(&m_cur); - } - - /** - @return the current record */ - rec_t* current() UNIV_NOTHROW - { - ut_ad(!end()); - return(page_cur_get_rec(&m_cur)); - } - - /** - @return true if cursor is at the end */ - bool end() UNIV_NOTHROW - { - return(page_cur_is_after_last(&m_cur) == TRUE); - } - - /** Remove the current record - @return true on success */ - bool remove( - const dict_index_t* index, - page_zip_des_t* page_zip, - ulint* offsets) UNIV_NOTHROW - { - /* We can't end up with an empty page unless it is root. */ - if (page_get_n_recs(m_cur.block->frame) <= 1) { - return(false); - } - - return(page_delete_rec(index, &m_cur, page_zip, offsets)); - } - -private: - page_cur_t m_cur; -}; - -/** Class that purges delete marked reocords from indexes, both secondary -and cluster. It does a pessimistic delete. This should only be done if we -couldn't purge the delete marked reocrds during Phase I. */ -class IndexPurge { -public: - /** Constructor - @param trx - the user transaction covering the import tablespace - @param index - to be imported - @param space_id - space id of the tablespace */ - IndexPurge( - trx_t* trx, - dict_index_t* index) UNIV_NOTHROW - : - m_trx(trx), - m_index(index), - m_n_rows(0) - { - ib_logf(IB_LOG_LEVEL_INFO, - "Phase II - Purge records from index %s", - index->name); - } - - /** Descructor */ - ~IndexPurge() UNIV_NOTHROW { } - - /** Purge delete marked records. - @return DB_SUCCESS or error code. */ - dberr_t garbage_collect() UNIV_NOTHROW; - - /** The number of records that are not delete marked. - @return total records in the index after purge */ - ulint get_n_rows() const UNIV_NOTHROW - { - return(m_n_rows); - } - -private: - /** - Begin import, position the cursor on the first record. */ - void open() UNIV_NOTHROW; - - /** - Close the persistent curosr and commit the mini-transaction. */ - void close() UNIV_NOTHROW; - - /** - Position the cursor on the next record. - @return DB_SUCCESS or error code */ - dberr_t next() UNIV_NOTHROW; - - /** - Store the persistent cursor position and reopen the - B-tree cursor in BTR_MODIFY_TREE mode, because the - tree structure may be changed during a pessimistic delete. */ - void purge_pessimistic_delete() UNIV_NOTHROW; - - /** - Purge delete-marked records. - @param offsets - current row offsets. */ - void purge() UNIV_NOTHROW; - -protected: - // Disable copying - IndexPurge(); - IndexPurge(const IndexPurge&); - IndexPurge &operator=(const IndexPurge&); - -private: - trx_t* m_trx; /*!< User transaction */ - mtr_t m_mtr; /*!< Mini-transaction */ - btr_pcur_t m_pcur; /*!< Persistent cursor */ - dict_index_t* m_index; /*!< Index to be processed */ - ulint m_n_rows; /*!< Records in index */ -}; - -/** Functor that is called for each physical page that is read from the -tablespace file. */ -class AbstractCallback : public PageCallback { -public: - /** Constructor - @param trx - covering transaction */ - AbstractCallback(trx_t* trx) - : - m_trx(trx), - m_space(ULINT_UNDEFINED), - m_xdes(), - m_xdes_page_no(ULINT_UNDEFINED), - m_space_flags(ULINT_UNDEFINED) UNIV_NOTHROW { } - - /** - Free any extent descriptor instance */ - virtual ~AbstractCallback() - { - delete [] m_xdes; - } - - /** Determine the page size to use for traversing the tablespace - @param file_size - size of the tablespace file in bytes - @param block - contents of the first page in the tablespace file. - @retval DB_SUCCESS or error code. */ - virtual dberr_t init( - os_offset_t file_size, - const buf_block_t* block) UNIV_NOTHROW; - - /** @return true if compressed table. */ - bool is_compressed_table() const UNIV_NOTHROW - { - return(get_zip_size() > 0); - } - -protected: - /** - Get the data page depending on the table type, compressed or not. - @param block - block read from disk - @retval the buffer frame */ - buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW - { - if (is_compressed_table()) { - return(block->page.zip.data); - } - - return(buf_block_get_frame(block)); - } - - /** Check for session interrupt. If required we could - even flush to disk here every N pages. - @retval DB_SUCCESS or error code */ - dberr_t periodic_check() UNIV_NOTHROW - { - if (trx_is_interrupted(m_trx)) { - return(DB_INTERRUPTED); - } - - return(DB_SUCCESS); - } - - /** - Get the physical offset of the extent descriptor within the page. - @param page_no - page number of the extent descriptor - @param page - contents of the page containing the extent descriptor. - @return the start of the xdes array in a page */ - const xdes_t* xdes( - ulint page_no, - const page_t* page) const UNIV_NOTHROW - { - ulint offset; - - offset = xdes_calc_descriptor_index(get_zip_size(), page_no); - - return(page + XDES_ARR_OFFSET + XDES_SIZE * offset); - } - - /** - Set the current page directory (xdes). If the extent descriptor is - marked as free then free the current extent descriptor and set it to - 0. This implies that all pages that are covered by this extent - descriptor are also freed. - - @param page_no - offset of page within the file - @param page - page contents - @return DB_SUCCESS or error code. */ - dberr_t set_current_xdes( - ulint page_no, - const page_t* page) UNIV_NOTHROW - { - m_xdes_page_no = page_no; - - delete[] m_xdes; - - m_xdes = 0; - - ulint state; - const xdes_t* xdesc = page + XDES_ARR_OFFSET; - - state = mach_read_ulint(xdesc + XDES_STATE, MLOG_4BYTES); - - if (state != XDES_FREE) { - - m_xdes = new(std::nothrow) xdes_t[m_page_size]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_13", - delete [] m_xdes; m_xdes = 0;); - - if (m_xdes == 0) { - return(DB_OUT_OF_MEMORY); - } - - memcpy(m_xdes, page, m_page_size); - } - - return(DB_SUCCESS); - } - - /** - @return true if it is a root page */ - bool is_root_page(const page_t* page) const UNIV_NOTHROW - { - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - - return(mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL - && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL); - } - - /** - Check if the page is marked as free in the extent descriptor. - @param page_no - page number to check in the extent descriptor. - @return true if the page is marked as free */ - bool is_free(ulint page_no) const UNIV_NOTHROW - { - ut_a(xdes_calc_descriptor_page(get_zip_size(), page_no) - == m_xdes_page_no); - - if (m_xdes != 0) { - const xdes_t* xdesc = xdes(page_no, m_xdes); - ulint pos = page_no % FSP_EXTENT_SIZE; - - return(xdes_get_bit(xdesc, XDES_FREE_BIT, pos)); - } - - /* If the current xdes was free, the page must be free. */ - return(true); - } - -protected: - /** Covering transaction. */ - trx_t* m_trx; - - /** Space id of the file being iterated over. */ - ulint m_space; - - /** Minimum page number for which the free list has not been - initialized: the pages >= this limit are, by definition, free; - note that in a single-table tablespace where size < 64 pages, - this number is 64, i.e., we have initialized the space about - the first extent, but have not physically allocted those pages - to the file. @see FSP_LIMIT. */ - ulint m_free_limit; - - /** Current size of the space in pages */ - ulint m_size; - - /** Current extent descriptor page */ - xdes_t* m_xdes; - - /** Physical page offset in the file of the extent descriptor */ - ulint m_xdes_page_no; - - /** Flags value read from the header page */ - ulint m_space_flags; -}; - -/** Determine the page size to use for traversing the tablespace -@param file_size - size of the tablespace file in bytes -@param block - contents of the first page in the tablespace file. -@retval DB_SUCCESS or error code. */ -dberr_t -AbstractCallback::init( - os_offset_t file_size, - const buf_block_t* block) UNIV_NOTHROW -{ - const page_t* page = block->frame; - - m_space_flags = fsp_header_get_flags(page); - if (!fsp_flags_is_valid(m_space_flags)) { - ulint cflags = fsp_flags_convert_from_101(m_space_flags); - if (cflags == ULINT_UNDEFINED) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Invalid FSP_SPACE_FLAGS=0x%x", - int(m_space_flags)); - return(DB_CORRUPTION); - } - m_space_flags = cflags; - } - - /* Clear the DATA_DIR flag, which is basically garbage. */ - m_space_flags &= ~(1U << FSP_FLAGS_POS_RESERVED); - - /* Since we don't know whether it is a compressed table - or not, the data is always read into the block->frame. */ - - dberr_t err = set_zip_size(block->frame); - - if (err != DB_SUCCESS) { - return(DB_CORRUPTION); - } - - /* Set the page size used to traverse the tablespace. */ - - m_page_size = (is_compressed_table()) - ? get_zip_size() : fsp_flags_get_page_size(m_space_flags); - - if (m_page_size == 0) { - ib_logf(IB_LOG_LEVEL_ERROR, "Page size is 0"); - return(DB_CORRUPTION); - } else if (!is_compressed_table() && m_page_size != UNIV_PAGE_SIZE) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Page size " ULINTPF " of ibd file is not the same " - "as the server page size " ULINTPF, - m_page_size, UNIV_PAGE_SIZE); - - return(DB_CORRUPTION); - - } else if ((file_size % m_page_size)) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "File size " UINT64PF " is not a multiple " - "of the page size " ULINTPF, - (ib_uint64_t) file_size, m_page_size); - - return(DB_CORRUPTION); - } - - ut_a(m_space == ULINT_UNDEFINED); - - m_size = mach_read_from_4(page + FSP_SIZE); - m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT); - m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID); - - if ((err = set_current_xdes(0, page)) != DB_SUCCESS) { - return(err); - } - - return(DB_SUCCESS); -} - -/** -Try and determine the index root pages by checking if the next/prev -pointers are both FIL_NULL. We need to ensure that skip deleted pages. */ -struct FetchIndexRootPages : public AbstractCallback { - - /** Index information gathered from the .ibd file. */ - struct Index { - - Index(index_id_t id, ulint page_no) - : - m_id(id), - m_page_no(page_no) { } - - index_id_t m_id; /*!< Index id */ - ulint m_page_no; /*!< Root page number */ - }; - - typedef std::vector<Index> Indexes; - - /** Constructor - @param trx - covering (user) transaction - @param table - table definition in server .*/ - FetchIndexRootPages(const dict_table_t* table, trx_t* trx) - : - AbstractCallback(trx), - m_table(table) UNIV_NOTHROW { } - - /** Destructor */ - virtual ~FetchIndexRootPages() UNIV_NOTHROW { } - - /** - @retval the space id of the tablespace being iterated over */ - virtual ulint get_space_id() const UNIV_NOTHROW - { - return(m_space); - } - - /** - Called for each block as it is read from the file. - @param offset - physical offset in the file - @param block - block to convert, it is not from the buffer pool. - @retval DB_SUCCESS or error code. */ - virtual dberr_t operator() ( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW; - - /** Update the import configuration that will be used to import - the tablespace. */ - dberr_t build_row_import(row_import* cfg) const UNIV_NOTHROW; - - /** Table definition in server. */ - const dict_table_t* m_table; - - /** Index information */ - Indexes m_indexes; -}; - -/** -Called for each block as it is read from the file. Check index pages to -determine the exact row format. We can't get that from the tablespace -header flags alone. - -@param offset - physical offset in the file -@param block - block to convert, it is not from the buffer pool. -@retval DB_SUCCESS or error code. */ -dberr_t -FetchIndexRootPages::operator() ( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW -{ - dberr_t err; - - if ((err = periodic_check()) != DB_SUCCESS) { - return(err); - } - - const page_t* page = get_frame(block); - - ulint page_type = fil_page_get_type(page); - - if (block->page.offset * m_page_size != offset) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Page offset doesn't match file offset: " - "page offset: %u, file offset: " ULINTPF, - block->page.offset, - (ulint) (offset / m_page_size)); - - err = DB_CORRUPTION; - } else if (page_type == FIL_PAGE_TYPE_XDES) { - err = set_current_xdes(block->page.offset, page); - } else if (page_type == FIL_PAGE_INDEX - && !is_free(block->page.offset) - && is_root_page(page)) { - - index_id_t id = btr_page_get_index_id(page); - ulint page_no = buf_block_get_page_no(block); - - m_indexes.push_back(Index(id, page_no)); - - if (m_indexes.size() == 1) { - /* Check that the tablespace flags match the table flags. */ - ulint expected = dict_tf_to_fsp_flags(m_table->flags); - if (!fsp_flags_match(expected, m_space_flags)) { - ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Expected FSP_SPACE_FLAGS=0x%x, .ibd " - "file contains 0x%x.", - unsigned(expected), - unsigned(m_space_flags)); - return(DB_CORRUPTION); - } - } - } - - return(err); -} - -/** -Update the import configuration that will be used to import the tablespace. -@return error code or DB_SUCCESS */ -dberr_t -FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW -{ - Indexes::const_iterator end = m_indexes.end(); - - ut_a(cfg->m_table == m_table); - cfg->m_page_size = m_page_size; - cfg->m_n_indexes = m_indexes.size(); - - if (cfg->m_n_indexes == 0) { - - ib_logf(IB_LOG_LEVEL_ERROR, "No B+Tree found in tablespace"); - - return(DB_CORRUPTION); - } - - cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_11", - delete [] cfg->m_indexes; cfg->m_indexes = 0;); - - if (cfg->m_indexes == 0) { - return(DB_OUT_OF_MEMORY); - } - - memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes); - - row_index_t* cfg_index = cfg->m_indexes; - - for (Indexes::const_iterator it = m_indexes.begin(); - it != end; - ++it, ++cfg_index) { - - char name[BUFSIZ]; - - ut_snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id); - - ulint len = strlen(name) + 1; - - cfg_index->m_name = new(std::nothrow) byte[len]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_12", - delete [] cfg_index->m_name; - cfg_index->m_name = 0;); - - if (cfg_index->m_name == 0) { - return(DB_OUT_OF_MEMORY); - } - - memcpy(cfg_index->m_name, name, len); - - cfg_index->m_id = it->m_id; - - cfg_index->m_space = m_space; - - cfg_index->m_page_no = it->m_page_no; - } - - return(DB_SUCCESS); -} - -/* Functor that is called for each physical page that is read from the -tablespace file. - - 1. Check each page for corruption. - - 2. Update the space id and LSN on every page - * For the header page - - Validate the flags - - Update the LSN - - 3. On Btree pages - * Set the index id - * Update the max trx id - * In a cluster index, update the system columns - * In a cluster index, update the BLOB ptr, set the space id - * Purge delete marked records, but only if they can be easily - removed from the page - * Keep a counter of number of rows, ie. non-delete-marked rows - * Keep a counter of number of delete marked rows - * Keep a counter of number of purge failure - * If a page is stamped with an index id that isn't in the .cfg file - we assume it is deleted and the page can be ignored. - - 4. Set the page state to dirty so that it will be written to disk. -*/ -class PageConverter : public AbstractCallback { -public: - /** Constructor - * @param cfg - config of table being imported. - * @param trx - transaction covering the import */ - PageConverter(row_import* cfg, trx_t* trx) UNIV_NOTHROW; - - virtual ~PageConverter() UNIV_NOTHROW - { - if (m_heap != 0) { - mem_heap_free(m_heap); - } - } - - /** - @retval the server space id of the tablespace being iterated over */ - virtual ulint get_space_id() const UNIV_NOTHROW - { - return(m_cfg->m_table->space); - } - - /** - Called for each block as it is read from the file. - @param offset - physical offset in the file - @param block - block to convert, it is not from the buffer pool. - @retval DB_SUCCESS or error code. */ - virtual dberr_t operator() ( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW; -private: - - /** Status returned by PageConverter::validate() */ - enum import_page_status_t { - IMPORT_PAGE_STATUS_OK, /*!< Page is OK */ - IMPORT_PAGE_STATUS_ALL_ZERO, /*!< Page is all zeros */ - IMPORT_PAGE_STATUS_CORRUPTED /*!< Page is corrupted */ - }; - - /** - Update the page, set the space id, max trx id and index id. - @param block - block read from file - @param page_type - type of the page - @retval DB_SUCCESS or error code */ - dberr_t update_page( - buf_block_t* block, - ulint& page_type) UNIV_NOTHROW; - -#if defined UNIV_DEBUG - /** - @return true error condition is enabled. */ - bool trigger_corruption() UNIV_NOTHROW - { - return(false); - } - #else -#define trigger_corruption() (false) -#endif /* UNIV_DEBUG */ - - /** - Update the space, index id, trx id. - @param block - block to convert - @return DB_SUCCESS or error code */ - dberr_t update_index_page(buf_block_t* block) UNIV_NOTHROW; - - /** Update the BLOB refrences and write UNDO log entries for - rows that can't be purged optimistically. - @param block - block to update - @retval DB_SUCCESS or error code */ - dberr_t update_records(buf_block_t* block) UNIV_NOTHROW; - - /** - Validate the page, check for corruption. - @param offset - physical offset within file. - @param page - page read from file. - @return 0 on success, 1 if all zero, 2 if corrupted */ - import_page_status_t validate( - os_offset_t offset, - buf_block_t* page) UNIV_NOTHROW; - - /** - Validate the space flags and update tablespace header page. - @param block - block read from file, not from the buffer pool. - @retval DB_SUCCESS or error code */ - dberr_t update_header(buf_block_t* block) UNIV_NOTHROW; - - /** - Adjust the BLOB reference for a single column that is externally stored - @param rec - record to update - @param offsets - column offsets for the record - @param i - column ordinal value - @return DB_SUCCESS or error code */ - dberr_t adjust_cluster_index_blob_column( - rec_t* rec, - const ulint* offsets, - ulint i) UNIV_NOTHROW; - - /** - Adjusts the BLOB reference in the clustered index row for all - externally stored columns. - @param rec - record to update - @param offsets - column offsets for the record - @return DB_SUCCESS or error code */ - dberr_t adjust_cluster_index_blob_columns( - rec_t* rec, - const ulint* offsets) UNIV_NOTHROW; - - /** - In the clustered index, adjist the BLOB pointers as needed. - Also update the BLOB reference, write the new space id. - @param rec - record to update - @param offsets - column offsets for the record - @return DB_SUCCESS or error code */ - dberr_t adjust_cluster_index_blob_ref( - rec_t* rec, - const ulint* offsets) UNIV_NOTHROW; - - /** - Purge delete-marked records, only if it is possible to do - so without re-organising the B+tree. - @param offsets - current row offsets. - @retval true if purged */ - bool purge(const ulint* offsets) UNIV_NOTHROW; - - /** - Adjust the BLOB references and sys fields for the current record. - @param index - the index being converted - @param rec - record to update - @param offsets - column offsets for the record - @param deleted - true if row is delete marked - @return DB_SUCCESS or error code. */ - dberr_t adjust_cluster_record( - const dict_index_t* index, - rec_t* rec, - const ulint* offsets, - bool deleted) UNIV_NOTHROW; - - /** - Find an index with the matching id. - @return row_index_t* instance or 0 */ - row_index_t* find_index(index_id_t id) UNIV_NOTHROW - { - row_index_t* index = &m_cfg->m_indexes[0]; - - for (ulint i = 0; i < m_cfg->m_n_indexes; ++i, ++index) { - if (id == index->m_id) { - return(index); - } - } - - return(0); - - } -private: - /** Config for table that is being imported. */ - row_import* m_cfg; - - /** Current index whose pages are being imported */ - row_index_t* m_index; - - /** Current system LSN */ - lsn_t m_current_lsn; - - /** Alias for m_page_zip, only set for compressed pages. */ - page_zip_des_t* m_page_zip_ptr; - - /** Iterator over records in a block */ - RecIterator m_rec_iter; - - /** Record offset */ - ulint m_offsets_[REC_OFFS_NORMAL_SIZE]; - - /** Pointer to m_offsets_ */ - ulint* m_offsets; - - /** Memory heap for the record offsets */ - mem_heap_t* m_heap; - - /** Cluster index instance */ - dict_index_t* m_cluster_index; -}; - -/** -row_import destructor. */ -row_import::~row_import() UNIV_NOTHROW -{ - for (ulint i = 0; m_indexes != 0 && i < m_n_indexes; ++i) { - delete [] m_indexes[i].m_name; - - if (m_indexes[i].m_fields == 0) { - continue; - } - - dict_field_t* fields = m_indexes[i].m_fields; - ulint n_fields = m_indexes[i].m_n_fields; - - for (ulint j = 0; j < n_fields; ++j) { - delete [] fields[j].name; - } - - delete [] fields; - } - - for (ulint i = 0; m_col_names != 0 && i < m_n_cols; ++i) { - delete [] m_col_names[i]; - } - - delete [] m_cols; - delete [] m_indexes; - delete [] m_col_names; - delete [] m_table_name; - delete [] m_hostname; -} - -/** -Find the index entry in in the indexes array. -@param name - index name -@return instance if found else 0. */ -row_index_t* -row_import::get_index( - const char* name) const UNIV_NOTHROW -{ - for (ulint i = 0; i < m_n_indexes; ++i) { - const char* index_name; - row_index_t* index = &m_indexes[i]; - - index_name = reinterpret_cast<const char*>(index->m_name); - - if (strcmp(index_name, name) == 0) { - - return(index); - } - } - - return(0); -} - -/** -Get the number of rows in the index. -@param name - index name -@return number of rows (doesn't include delete marked rows). */ -ulint -row_import::get_n_rows( - const char* name) const UNIV_NOTHROW -{ - const row_index_t* index = get_index(name); - - ut_a(name != 0); - - return(index->m_stats.m_n_rows); -} - -/** -Get the number of rows for which purge failed uding the convert phase. -@param name - index name -@return number of rows for which purge failed. */ -ulint -row_import::get_n_purge_failed( - const char* name) const UNIV_NOTHROW -{ - const row_index_t* index = get_index(name); - - ut_a(name != 0); - - return(index->m_stats.m_n_purge_failed); -} - -/** -Find the ordinal value of the column name in the cfg table columns. -@param name - of column to look for. -@return ULINT_UNDEFINED if not found. */ -ulint -row_import::find_col( - const char* name) const UNIV_NOTHROW -{ - for (ulint i = 0; i < m_n_cols; ++i) { - const char* col_name; - - col_name = reinterpret_cast<const char*>(m_col_names[i]); - - if (strcmp(col_name, name) == 0) { - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/** -Check if the index schema that was read from the .cfg file matches the -in memory index definition. -@return DB_SUCCESS or error code. */ -dberr_t -row_import::match_index_columns( - THD* thd, - const dict_index_t* index) UNIV_NOTHROW -{ - row_index_t* cfg_index; - dberr_t err = DB_SUCCESS; - - cfg_index = get_index(index->name); - - if (cfg_index == 0) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Index %s not found in tablespace meta-data file.", - index->name); - - return(DB_ERROR); - } - - if (cfg_index->m_n_fields != index->n_fields) { - - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Index field count %u doesn't match" - " tablespace metadata file value " ULINTPF, - index->n_fields, cfg_index->m_n_fields); - - return(DB_ERROR); - } - - cfg_index->m_srv_index = index; - - const dict_field_t* field = index->fields; - const dict_field_t* cfg_field = cfg_index->m_fields; - - for (ulint i = 0; i < index->n_fields; ++i, ++field, ++cfg_field) { - - if (strcmp(field->name, cfg_field->name) != 0) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Index field name %s doesn't match" - " tablespace metadata field name %s" - " for field position " ULINTPF, - field->name, cfg_field->name, i); - - err = DB_ERROR; - } - - if (cfg_field->prefix_len != field->prefix_len) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Index %s field %s prefix len %u" - " doesn't match metadata file value %u", - index->name, field->name, - field->prefix_len, cfg_field->prefix_len); - - err = DB_ERROR; - } - - if (cfg_field->fixed_len != field->fixed_len) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Index %s field %s fixed len %u" - " doesn't match metadata file value %u", - index->name, field->name, - field->fixed_len, - cfg_field->fixed_len); - - err = DB_ERROR; - } - } - - return(err); -} - -/** -Check if the table schema that was read from the .cfg file matches the -in memory table definition. -@param thd - MySQL session variable -@return DB_SUCCESS or error code. */ -dberr_t -row_import::match_table_columns( - THD* thd) UNIV_NOTHROW -{ - dberr_t err = DB_SUCCESS; - const dict_col_t* col = m_table->cols; - - for (ulint i = 0; i < m_table->n_cols; ++i, ++col) { - - const char* col_name; - ulint cfg_col_index; - - col_name = dict_table_get_col_name( - m_table, dict_col_get_no(col)); - - cfg_col_index = find_col(col_name); - - if (cfg_col_index == ULINT_UNDEFINED) { - - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Column %s not found in tablespace.", - col_name); - - err = DB_ERROR; - } else if (cfg_col_index != col->ind) { - - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Column %s ordinal value mismatch, it's at %u" - " in the table and " ULINTPF - " in the tablespace meta-data file", - col_name, col->ind, cfg_col_index); - - err = DB_ERROR; - } else { - const dict_col_t* cfg_col; - - cfg_col = &m_cols[cfg_col_index]; - ut_a(cfg_col->ind == cfg_col_index); - - if (cfg_col->prtype != col->prtype) { - ib_errf(thd, - IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Column %s precise type mismatch.", - col_name); - err = DB_ERROR; - } - - if (cfg_col->mtype != col->mtype) { - ib_errf(thd, - IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Column %s main type mismatch.", - col_name); - err = DB_ERROR; - } - - if (cfg_col->len != col->len) { - ib_errf(thd, - IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Column %s length mismatch.", - col_name); - err = DB_ERROR; - } - - if (cfg_col->mbminmaxlen != col->mbminmaxlen) { - ib_errf(thd, - IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Column %s multi-byte len mismatch.", - col_name); - err = DB_ERROR; - } - - if (cfg_col->ind != col->ind) { - err = DB_ERROR; - } - - if (cfg_col->ord_part != col->ord_part) { - ib_errf(thd, - IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Column %s ordering mismatch.", - col_name); - err = DB_ERROR; - } - - if (cfg_col->max_prefix != col->max_prefix) { - ib_errf(thd, - IB_LOG_LEVEL_ERROR, - ER_TABLE_SCHEMA_MISMATCH, - "Column %s max prefix mismatch.", - col_name); - err = DB_ERROR; - } - } - } - - return(err); -} - -/** -Check if the table (and index) schema that was read from the .cfg file -matches the in memory table definition. -@param thd - MySQL session variable -@return DB_SUCCESS or error code. */ -dberr_t -row_import::match_schema( - THD* thd) UNIV_NOTHROW -{ - /* Do some simple checks. */ - - if ((m_table->flags ^ m_flags) & ~DICT_TF_MASK_DATA_DIR) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH, - "Table flags don't match, server table has 0x%x" - " and the meta-data file has 0x%lx", - m_table->flags, ulong(m_flags)); - - return(DB_ERROR); - } else if (m_table->n_cols != m_n_cols) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH, - "Number of columns don't match, table has %u " - "columns but the tablespace meta-data file has " - ULINTPF " columns", - m_table->n_cols, m_n_cols); - - return(DB_ERROR); - } else if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) { - - /* If the number of indexes don't match then it is better - to abort the IMPORT. It is easy for the user to create a - table matching the IMPORT definition. */ - - ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH, - "Number of indexes don't match, table has " ULINTPF - " indexes but the tablespace meta-data file has " - ULINTPF " indexes", - UT_LIST_GET_LEN(m_table->indexes), m_n_indexes); - - return(DB_ERROR); - } - - dberr_t err = match_table_columns(thd); - - if (err != DB_SUCCESS) { - return(err); - } - - /* Check if the index definitions match. */ - - const dict_index_t* index; - - for (index = UT_LIST_GET_FIRST(m_table->indexes); - index != 0; - index = UT_LIST_GET_NEXT(indexes, index)) { - - dberr_t index_err; - - index_err = match_index_columns(thd, index); - - if (index_err != DB_SUCCESS) { - err = index_err; - } - } - - return(err); -} - -/** -Set the index root <space, pageno>, using index name. */ -void -row_import::set_root_by_name() UNIV_NOTHROW -{ - row_index_t* cfg_index = m_indexes; - - for (ulint i = 0; i < m_n_indexes; ++i, ++cfg_index) { - dict_index_t* index; - - const char* index_name; - - index_name = reinterpret_cast<const char*>(cfg_index->m_name); - - index = dict_table_get_index_on_name(m_table, index_name); - - /* We've already checked that it exists. */ - ut_a(index != 0); - - /* Set the root page number and space id. */ - index->space = m_table->space; - index->page = cfg_index->m_page_no; - } -} - -/** -Set the index root <space, pageno>, using a heuristic. -@return DB_SUCCESS or error code */ -dberr_t -row_import::set_root_by_heuristic() UNIV_NOTHROW -{ - row_index_t* cfg_index = m_indexes; - - ut_a(m_n_indexes > 0); - - // TODO: For now use brute force, based on ordinality - - if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) { - - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), m_table->name, FALSE); - - ib_logf(IB_LOG_LEVEL_WARN, - "Table %s should have " ULINTPF - " indexes but the tablespace has " ULINTPF " indexes", - table_name, - UT_LIST_GET_LEN(m_table->indexes), - m_n_indexes); - } - - dict_mutex_enter_for_mysql(); - - ulint i = 0; - dberr_t err = DB_SUCCESS; - - for (dict_index_t* index = UT_LIST_GET_FIRST(m_table->indexes); - index != 0; - index = UT_LIST_GET_NEXT(indexes, index)) { - - if (index->type & DICT_FTS) { - index->type |= DICT_CORRUPT; - ib_logf(IB_LOG_LEVEL_WARN, - "Skipping FTS index: %s", index->name); - } else if (i < m_n_indexes) { - - delete [] cfg_index[i].m_name; - - ulint len = strlen(index->name) + 1; - - cfg_index[i].m_name = new(std::nothrow) byte[len]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_14", - delete[] cfg_index[i].m_name; - cfg_index[i].m_name = 0;); - - if (cfg_index[i].m_name == 0) { - err = DB_OUT_OF_MEMORY; - break; - } - - memcpy(cfg_index[i].m_name, index->name, len); - - cfg_index[i].m_srv_index = index; - - index->space = m_table->space; - index->page = cfg_index[i].m_page_no; - - ++i; - } - } - - dict_mutex_exit_for_mysql(); - - return(err); -} - -/** -Purge delete marked records. -@return DB_SUCCESS or error code. */ -dberr_t -IndexPurge::garbage_collect() UNIV_NOTHROW -{ - dberr_t err; - ibool comp = dict_table_is_comp(m_index->table); - - /* Open the persistent cursor and start the mini-transaction. */ - - open(); - - while ((err = next()) == DB_SUCCESS) { - - rec_t* rec = btr_pcur_get_rec(&m_pcur); - ibool deleted = rec_get_deleted_flag(rec, comp); - - if (!deleted) { - ++m_n_rows; - } else { - purge(); - } - } - - /* Close the persistent cursor and commit the mini-transaction. */ - - close(); - - return(err == DB_END_OF_INDEX ? DB_SUCCESS : err); -} - -/** -Begin import, position the cursor on the first record. */ -void -IndexPurge::open() UNIV_NOTHROW -{ - mtr_start(&m_mtr); - - mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO); - - btr_pcur_open_at_index_side( - true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr); -} - -/** -Close the persistent curosr and commit the mini-transaction. */ -void -IndexPurge::close() UNIV_NOTHROW -{ - btr_pcur_close(&m_pcur); - mtr_commit(&m_mtr); -} - -/** -Position the cursor on the next record. -@return DB_SUCCESS or error code */ -dberr_t -IndexPurge::next() UNIV_NOTHROW -{ - btr_pcur_move_to_next_on_page(&m_pcur); - - /* When switching pages, commit the mini-transaction - in order to release the latch on the old page. */ - - if (!btr_pcur_is_after_last_on_page(&m_pcur)) { - return(DB_SUCCESS); - } else if (trx_is_interrupted(m_trx)) { - /* Check after every page because the check - is expensive. */ - return(DB_INTERRUPTED); - } - - btr_pcur_store_position(&m_pcur, &m_mtr); - - mtr_commit(&m_mtr); - - mtr_start(&m_mtr); - - mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO); - - btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr); - - if (!btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr)) { - - return(DB_END_OF_INDEX); - } - - return(DB_SUCCESS); -} - -/** -Store the persistent cursor position and reopen the -B-tree cursor in BTR_MODIFY_TREE mode, because the -tree structure may be changed during a pessimistic delete. */ -void -IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW -{ - dberr_t err; - - btr_pcur_restore_position(BTR_MODIFY_TREE, &m_pcur, &m_mtr); - - ut_ad(rec_get_deleted_flag( - btr_pcur_get_rec(&m_pcur), - dict_table_is_comp(m_index->table))); - - btr_cur_pessimistic_delete( - &err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, RB_NONE, &m_mtr); - - ut_a(err == DB_SUCCESS); - - /* Reopen the B-tree cursor in BTR_MODIFY_LEAF mode */ - mtr_commit(&m_mtr); -} - -/** -Purge delete-marked records. */ -void -IndexPurge::purge() UNIV_NOTHROW -{ - btr_pcur_store_position(&m_pcur, &m_mtr); - - purge_pessimistic_delete(); - - mtr_start(&m_mtr); - - mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO); - - btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr); -} - -/** -Constructor -* @param cfg - config of table being imported. -* @param trx - transaction covering the import */ -PageConverter::PageConverter( - row_import* cfg, - trx_t* trx) - : - AbstractCallback(trx), - m_cfg(cfg), - m_page_zip_ptr(0), - m_heap(0) UNIV_NOTHROW -{ - m_index = m_cfg->m_indexes; - - m_current_lsn = log_get_lsn(); - ut_a(m_current_lsn > 0); - - m_offsets = m_offsets_; - rec_offs_init(m_offsets_); - - m_cluster_index = dict_table_get_first_index(m_cfg->m_table); -} - -/** -Adjust the BLOB reference for a single column that is externally stored -@param rec - record to update -@param offsets - column offsets for the record -@param i - column ordinal value -@return DB_SUCCESS or error code */ -dberr_t -PageConverter::adjust_cluster_index_blob_column( - rec_t* rec, - const ulint* offsets, - ulint i) UNIV_NOTHROW -{ - ulint len; - byte* field; - - field = rec_get_nth_field(rec, offsets, i, &len); - - DBUG_EXECUTE_IF("ib_import_trigger_corruption_2", - len = BTR_EXTERN_FIELD_REF_SIZE - 1;); - - if (len < BTR_EXTERN_FIELD_REF_SIZE) { - - char index_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - index_name, sizeof(index_name), - m_cluster_index->name, TRUE); - - ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_INNODB_INDEX_CORRUPT, - "Externally stored column(" ULINTPF - ") has a reference length of " ULINTPF - " in the cluster index %s", - i, len, index_name); - - return(DB_CORRUPTION); - } - - field += BTR_EXTERN_SPACE_ID - BTR_EXTERN_FIELD_REF_SIZE + len; - - if (is_compressed_table()) { - mach_write_to_4(field, get_space_id()); - - page_zip_write_blob_ptr( - m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0); - } else { - mlog_write_ulint(field, get_space_id(), MLOG_4BYTES, 0); - } - - return(DB_SUCCESS); -} - -/** -Adjusts the BLOB reference in the clustered index row for all externally -stored columns. -@param rec - record to update -@param offsets - column offsets for the record -@return DB_SUCCESS or error code */ -dberr_t -PageConverter::adjust_cluster_index_blob_columns( - rec_t* rec, - const ulint* offsets) UNIV_NOTHROW -{ - ut_ad(rec_offs_any_extern(offsets)); - - /* Adjust the space_id in the BLOB pointers. */ - - for (ulint i = 0; i < rec_offs_n_fields(offsets); ++i) { - - /* Only if the column is stored "externally". */ - - if (rec_offs_nth_extern(offsets, i)) { - dberr_t err; - - err = adjust_cluster_index_blob_column(rec, offsets, i); - - if (err != DB_SUCCESS) { - return(err); - } - } - } - - return(DB_SUCCESS); -} - -/** -In the clustered index, adjust BLOB pointers as needed. Also update the -BLOB reference, write the new space id. -@param rec - record to update -@param offsets - column offsets for the record -@return DB_SUCCESS or error code */ -dberr_t -PageConverter::adjust_cluster_index_blob_ref( - rec_t* rec, - const ulint* offsets) UNIV_NOTHROW -{ - if (rec_offs_any_extern(offsets)) { - dberr_t err; - - err = adjust_cluster_index_blob_columns(rec, offsets); - - if (err != DB_SUCCESS) { - return(err); - } - } - - return(DB_SUCCESS); -} - -/** -Purge delete-marked records, only if it is possible to do so without -re-organising the B+tree. -@param offsets - current row offsets. -@return true if purge succeeded */ -bool -PageConverter::purge(const ulint* offsets) UNIV_NOTHROW -{ - const dict_index_t* index = m_index->m_srv_index; - - /* We can't have a page that is empty and not root. */ - if (m_rec_iter.remove(index, m_page_zip_ptr, m_offsets)) { - - ++m_index->m_stats.m_n_purged; - - return(true); - } else { - ++m_index->m_stats.m_n_purge_failed; - } - - return(false); -} - -/** -Adjust the BLOB references and sys fields for the current record. -@param rec - record to update -@param offsets - column offsets for the record -@param deleted - true if row is delete marked -@return DB_SUCCESS or error code. */ -dberr_t -PageConverter::adjust_cluster_record( - const dict_index_t* index, - rec_t* rec, - const ulint* offsets, - bool deleted) UNIV_NOTHROW -{ - dberr_t err; - - if ((err = adjust_cluster_index_blob_ref(rec, offsets)) == DB_SUCCESS) { - - /* Reset DB_TRX_ID and DB_ROLL_PTR. Normally, these fields - are only written in conjunction with other changes to the - record. */ - - row_upd_rec_sys_fields( - rec, m_page_zip_ptr, m_cluster_index, m_offsets, - m_trx, 0); - } - - return(err); -} - -/** -Update the BLOB refrences and write UNDO log entries for -rows that can't be purged optimistically. -@param block - block to update -@retval DB_SUCCESS or error code */ -dberr_t -PageConverter::update_records( - buf_block_t* block) UNIV_NOTHROW -{ - ibool comp = dict_table_is_comp(m_cfg->m_table); - bool clust_index = m_index->m_srv_index == m_cluster_index; - - /* This will also position the cursor on the first user record. */ - - m_rec_iter.open(block); - - while (!m_rec_iter.end()) { - - rec_t* rec = m_rec_iter.current(); - - /* FIXME: Move out of the loop */ - - if (rec_get_status(rec) == REC_STATUS_NODE_PTR) { - break; - } - - ibool deleted = rec_get_deleted_flag(rec, comp); - - /* For the clustered index we have to adjust the BLOB - reference and the system fields irrespective of the - delete marked flag. The adjustment of delete marked - cluster records is required for purge to work later. */ - - if (deleted || clust_index) { - m_offsets = rec_get_offsets( - rec, m_index->m_srv_index, m_offsets, - ULINT_UNDEFINED, &m_heap); - } - - if (clust_index) { - - dberr_t err = adjust_cluster_record( - m_index->m_srv_index, rec, m_offsets, - deleted); - - if (err != DB_SUCCESS) { - return(err); - } - } - - /* If it is a delete marked record then try an - optimistic delete. */ - - if (deleted) { - /* A successful purge will move the cursor to the - next record. */ - - if (!purge(m_offsets)) { - m_rec_iter.next(); - } - - ++m_index->m_stats.m_n_deleted; - } else { - ++m_index->m_stats.m_n_rows; - m_rec_iter.next(); - } - } - - return(DB_SUCCESS); -} - -/** -Update the space, index id, trx id. -@return DB_SUCCESS or error code */ -dberr_t -PageConverter::update_index_page( - buf_block_t* block) UNIV_NOTHROW -{ - index_id_t id; - buf_frame_t* page = block->frame; - - if (is_free(buf_block_get_page_no(block))) { - return(DB_SUCCESS); - } else if ((id = btr_page_get_index_id(page)) != m_index->m_id) { - - row_index_t* index = find_index(id); - - if (index == 0) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Page for tablespace " ULINTPF " is " - " index page with id " IB_ID_FMT " but that" - " index is not found from configuration file." - " Current index name %s and id " IB_ID_FMT ".", - m_space, - id, - m_index->m_name, - m_index->m_id); - m_index = 0; - return(DB_CORRUPTION); - } - - /* Update current index */ - m_index = index; - } - - /* If the .cfg file is missing and there is an index mismatch - then ignore the error. */ - if (m_cfg->m_missing && (m_index == 0 || m_index->m_srv_index == 0)) { - return(DB_SUCCESS); - } - -#ifdef UNIV_ZIP_DEBUG - ut_a(!is_compressed_table() - || page_zip_validate(m_page_zip_ptr, page, m_index->m_srv_index)); -#endif /* UNIV_ZIP_DEBUG */ - - /* This has to be written to uncompressed index header. Set it to - the current index id. */ - btr_page_set_index_id( - page, m_page_zip_ptr, m_index->m_srv_index->id, 0); - - page_set_max_trx_id(block, m_page_zip_ptr, m_trx->id, 0); - - if (page_is_empty(block->frame)) { - - /* Only a root page can be empty. */ - if (!is_root_page(block->frame)) { - // TODO: We should relax this and skip secondary - // indexes. Mark them as corrupt because they can - // always be rebuilt. - return(DB_CORRUPTION); - } - - return(DB_SUCCESS); - } - - return(update_records(block)); -} - -/** -Validate the space flags and update tablespace header page. -@param block - block read from file, not from the buffer pool. -@retval DB_SUCCESS or error code */ -dberr_t -PageConverter::update_header( - buf_block_t* block) UNIV_NOTHROW -{ - /* Check for valid header */ - switch(fsp_header_get_space_id(get_frame(block))) { - case 0: - return(DB_CORRUPTION); - case ULINT_UNDEFINED: - ib_logf(IB_LOG_LEVEL_WARN, - "Space id check in the header failed " - "- ignored"); - } - - mach_write_to_8( - get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - m_current_lsn); - - /* Write back the adjusted flags. */ - mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS - + get_frame(block), m_space_flags); - - /* Write space_id to the tablespace header, page 0. */ - mach_write_to_4( - get_frame(block) + FSP_HEADER_OFFSET + FSP_SPACE_ID, - get_space_id()); - - /* This is on every page in the tablespace. */ - mach_write_to_4( - get_frame(block) + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - get_space_id()); - - return(DB_SUCCESS); -} - -/** -Update the page, set the space id, max trx id and index id. -@param block - block read from file -@retval DB_SUCCESS or error code */ -dberr_t -PageConverter::update_page( - buf_block_t* block, - ulint& page_type) UNIV_NOTHROW -{ - dberr_t err = DB_SUCCESS; - - switch (page_type = fil_page_get_type(get_frame(block))) { - case FIL_PAGE_TYPE_FSP_HDR: - /* Work directly on the uncompressed page headers. */ - ut_a(buf_block_get_page_no(block) == 0); - return(update_header(block)); - - case FIL_PAGE_INDEX: - /* We need to decompress the contents into block->frame - before we can do any thing with Btree pages. */ - - if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) { - return(DB_CORRUPTION); - } - - /* This is on every page in the tablespace. */ - mach_write_to_4( - get_frame(block) - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id()); - - /* Only update the Btree nodes. */ - return(update_index_page(block)); - - case FIL_PAGE_TYPE_SYS: - /* This is page 0 in the system tablespace. */ - return(DB_CORRUPTION); - - case FIL_PAGE_TYPE_XDES: - err = set_current_xdes( - buf_block_get_page_no(block), get_frame(block)); - /* fall through */ - case FIL_PAGE_INODE: - case FIL_PAGE_TYPE_TRX_SYS: - case FIL_PAGE_IBUF_FREE_LIST: - case FIL_PAGE_TYPE_ALLOCATED: - case FIL_PAGE_IBUF_BITMAP: - case FIL_PAGE_TYPE_BLOB: - case FIL_PAGE_TYPE_ZBLOB: - case FIL_PAGE_TYPE_ZBLOB2: - - /* Work directly on the uncompressed page headers. */ - /* This is on every page in the tablespace. */ - mach_write_to_4( - get_frame(block) - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id()); - - return(err); - } - - ib_logf(IB_LOG_LEVEL_WARN, "Unknown page type (" ULINTPF ")", - page_type); - - return(DB_CORRUPTION); -} - -/** -Validate the page -@param offset - physical offset within file. -@param page - page read from file. -@return status */ -PageConverter::import_page_status_t -PageConverter::validate( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW -{ - buf_frame_t* page = get_frame(block); - - /* Check that the page number corresponds to the offset in - the file. Flag as corrupt if it doesn't. Disable the check - for LSN in buf_page_is_corrupted() */ - - if (buf_page_is_corrupted(false, page, get_zip_size(), NULL) - || (page_get_page_no(page) != offset / m_page_size - && page_get_page_no(page) != 0)) { - - return(IMPORT_PAGE_STATUS_CORRUPTED); - - } else if (offset > 0 && page_get_page_no(page) == 0) { - ulint checksum; - - checksum = mach_read_from_4(page + FIL_PAGE_SPACE_OR_CHKSUM); - if (checksum != 0) { - /* Checksum check passed in buf_page_is_corrupted(). */ - ib_logf(IB_LOG_LEVEL_WARN, - "%s: Page %lu checksum " ULINTPF - " should be zero.", - m_filepath, (ulong) (offset / m_page_size), - checksum); - } - - const byte* b = page + FIL_PAGE_OFFSET; - const byte* e = page + m_page_size - - FIL_PAGE_END_LSN_OLD_CHKSUM; - - /* If the page number is zero and offset > 0 then - the entire page MUST consist of zeroes. If not then - we flag it as corrupt. */ - - while (b != e) { - - if (*b++ && !trigger_corruption()) { - return(IMPORT_PAGE_STATUS_CORRUPTED); - } - } - - /* The page is all zero: do nothing. */ - return(IMPORT_PAGE_STATUS_ALL_ZERO); - } - - return(IMPORT_PAGE_STATUS_OK); -} - -/** -Called for every page in the tablespace. If the page was not -updated then its state must be set to BUF_PAGE_NOT_USED. -@param offset - physical offset within the file -@param block - block read from file, note it is not from the buffer pool -@retval DB_SUCCESS or error code. */ -dberr_t -PageConverter::operator() ( - os_offset_t offset, - buf_block_t* block) UNIV_NOTHROW -{ - ulint page_type; - dberr_t err = DB_SUCCESS; - - if ((err = periodic_check()) != DB_SUCCESS) { - return(err); - } - - if (is_compressed_table()) { - m_page_zip_ptr = &block->page.zip; - } else { - ut_ad(m_page_zip_ptr == 0); - } - - switch(validate(offset, block)) { - case IMPORT_PAGE_STATUS_OK: - - /* We have to decompress the compressed pages before - we can work on them */ - - if ((err = update_page(block, page_type)) != DB_SUCCESS) { - return(err); - } - - /* Note: For compressed pages this function will write to the - zip descriptor and for uncompressed pages it will write to - page (ie. the block->frame). Therefore the caller should write - out the descriptor contents and not block->frame for compressed - pages. */ - - if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) { - - buf_flush_init_for_writing( - !is_compressed_table() - ? block->frame : block->page.zip.data, - !is_compressed_table() ? 0 : m_page_zip_ptr, - m_current_lsn); - } else { - /* Calculate and update the checksum of non-btree - pages for compressed tables explicitly here. */ - - buf_flush_update_zip_checksum( - get_frame(block), get_zip_size(), - m_current_lsn); - } - - break; - - case IMPORT_PAGE_STATUS_ALL_ZERO: - /* The page is all zero: leave it as is. */ - break; - - case IMPORT_PAGE_STATUS_CORRUPTED: - - ib_logf(IB_LOG_LEVEL_WARN, - "%s: Page %lu at offset " UINT64PF " looks corrupted.", - m_filepath, (ulong) (offset / m_page_size), offset); - - return(DB_CORRUPTION); - } - - return(err); -} - -/*****************************************************************//** -Clean up after import tablespace failure, this function will acquire -the dictionary latches on behalf of the transaction if the transaction -hasn't already acquired them. */ -static MY_ATTRIBUTE((nonnull)) -void -row_import_discard_changes( -/*=======================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */ - trx_t* trx, /*!< in/out: transaction for import */ - dberr_t err) /*!< in: error code */ -{ - dict_table_t* table = prebuilt->table; - - ut_a(err != DB_SUCCESS); - - prebuilt->trx->error_info = NULL; - - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), - prebuilt->table->name, FALSE); - - ib_logf(IB_LOG_LEVEL_INFO, - "Discarding tablespace of table %s: %s", - table_name, ut_strerr(err)); - - if (trx->dict_operation_lock_mode != RW_X_LATCH) { - ut_a(trx->dict_operation_lock_mode == 0); - row_mysql_lock_data_dictionary(trx); - } - - ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - - /* Since we update the index root page numbers on disk after - we've done a successful import. The table will not be loadable. - However, we need to ensure that the in memory root page numbers - are reset to "NULL". */ - - for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); - index != 0; - index = UT_LIST_GET_NEXT(indexes, index)) { - - index->page = FIL_NULL; - index->space = FIL_NULL; - } - - table->file_unreadable = true; - - fil_close_tablespace(trx, table->space); -} - -/*****************************************************************//** -Clean up after import tablespace. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_cleanup( -/*===============*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */ - trx_t* trx, /*!< in/out: transaction for import */ - dberr_t err) /*!< in: error code */ -{ - ut_a(prebuilt->trx != trx); - - if (err != DB_SUCCESS) { - row_import_discard_changes(prebuilt, trx, err); - } - - ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - - DBUG_EXECUTE_IF("ib_import_before_commit_crash", DBUG_SUICIDE();); - - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx_free_for_mysql(trx); - - prebuilt->trx->op_info = ""; - - DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE();); - - log_make_checkpoint_at(LSN_MAX, TRUE); - - return(err); -} - -/*****************************************************************//** -Report error during tablespace import. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_error( -/*=============*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */ - trx_t* trx, /*!< in/out: transaction for import */ - dberr_t err) /*!< in: error code */ -{ - if (!trx_is_interrupted(trx)) { - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), - prebuilt->table->name, FALSE); - - ib_senderrf( - trx->mysql_thd, IB_LOG_LEVEL_WARN, - ER_INNODB_IMPORT_ERROR, - table_name, (ulong) err, ut_strerr(err)); - } - - return(row_import_cleanup(prebuilt, trx, err)); -} - -/*****************************************************************//** -Adjust the root page index node and leaf node segment headers, update -with the new space id. For all the table's secondary indexes. -@return error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_adjust_root_pages_of_secondary_indexes( -/*==============================================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from - handler */ - trx_t* trx, /*!< in: transaction used for - the import */ - dict_table_t* table, /*!< in: table the indexes - belong to */ - const row_import& cfg) /*!< Import context */ -{ - dict_index_t* index; - ulint n_rows_in_table; - dberr_t err = DB_SUCCESS; - - /* Skip the clustered index. */ - index = dict_table_get_first_index(table); - - n_rows_in_table = cfg.get_n_rows(index->name); - - DBUG_EXECUTE_IF("ib_import_sec_rec_count_mismatch_failure", - n_rows_in_table++;); - - /* Adjust the root pages of the secondary indexes only. */ - while ((index = dict_table_get_next_index(index)) != NULL) { - char index_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - index_name, sizeof(index_name), index->name, TRUE); - - ut_a(!dict_index_is_clust(index)); - - if (!(index->type & DICT_CORRUPT) - && index->space != FIL_NULL - && index->page != FIL_NULL) { - - /* Update the Btree segment headers for index node and - leaf nodes in the root page. Set the new space id. */ - - err = btr_root_adjust_on_import(index); - } else { - ib_logf(IB_LOG_LEVEL_WARN, - "Skip adjustment of root pages for " - "index %s.", index->name); - - err = DB_CORRUPTION; - } - - if (err != DB_SUCCESS) { - - if (index->type & DICT_CLUSTERED) { - break; - } - - ib_errf(trx->mysql_thd, - IB_LOG_LEVEL_WARN, - ER_INNODB_INDEX_CORRUPT, - "Index '%s' not found or corrupt, " - "you should recreate this index.", - index_name); - - /* Do not bail out, so that the data - can be recovered. */ - - err = DB_SUCCESS; - index->type |= DICT_CORRUPT; - continue; - } - - /* If we failed to purge any records in the index then - do it the hard way. - - TODO: We can do this in the first pass by generating UNDO log - records for the failed rows. */ - - if (!cfg.requires_purge(index->name)) { - continue; - } - - IndexPurge purge(trx, index); - - trx->op_info = "secondary: purge delete marked records"; - - err = purge.garbage_collect(); - - trx->op_info = ""; - - if (err != DB_SUCCESS) { - break; - } else if (purge.get_n_rows() != n_rows_in_table) { - - ib_errf(trx->mysql_thd, - IB_LOG_LEVEL_WARN, - ER_INNODB_INDEX_CORRUPT, - "Index '%s' contains " ULINTPF " entries, " - "should be " ULINTPF ", you should recreate " - "this index.", index_name, - purge.get_n_rows(), n_rows_in_table); - - index->type |= DICT_CORRUPT; - - /* Do not bail out, so that the data - can be recovered. */ - - err = DB_SUCCESS; - } - } - - return(err); -} - -/*****************************************************************//** -Ensure that dict_sys->row_id exceeds SELECT MAX(DB_ROW_ID). -@return error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_set_sys_max_row_id( -/*==========================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from - handler */ - const dict_table_t* table) /*!< in: table to import */ -{ - dberr_t err; - const rec_t* rec; - mtr_t mtr; - btr_pcur_t pcur; - row_id_t row_id = 0; - dict_index_t* index; - - index = dict_table_get_first_index(table); - ut_a(dict_index_is_clust(index)); - - mtr_start(&mtr); - - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - - btr_pcur_open_at_index_side( - false, // High end - index, - BTR_SEARCH_LEAF, - &pcur, - true, // Init cursor - 0, // Leaf level - &mtr); - - btr_pcur_move_to_prev_on_page(&pcur); - rec = btr_pcur_get_rec(&pcur); - - /* Check for empty table. */ - if (!page_rec_is_infimum(rec)) { - ulint len; - const byte* field; - mem_heap_t* heap = NULL; - ulint offsets_[1 + REC_OFFS_HEADER_SIZE]; - ulint* offsets; - - rec_offs_init(offsets_); - - offsets = rec_get_offsets( - rec, index, offsets_, ULINT_UNDEFINED, &heap); - - field = rec_get_nth_field( - rec, offsets, - dict_index_get_sys_col_pos(index, DATA_ROW_ID), - &len); - - if (len == DATA_ROW_ID_LEN) { - row_id = mach_read_from_6(field); - err = DB_SUCCESS; - } else { - err = DB_CORRUPTION; - } - - if (heap != NULL) { - mem_heap_free(heap); - } - } else { - /* The table is empty. */ - err = DB_SUCCESS; - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - DBUG_EXECUTE_IF("ib_import_set_max_rowid_failure", - err = DB_CORRUPTION;); - - if (err != DB_SUCCESS) { - char index_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - index_name, sizeof(index_name), index->name, TRUE); - - ib_errf(prebuilt->trx->mysql_thd, - IB_LOG_LEVEL_WARN, - ER_INNODB_INDEX_CORRUPT, - "Index '%s' corruption detected, invalid DB_ROW_ID " - "in index.", index_name); - - return(err); - - } else if (row_id > 0) { - - /* Update the system row id if the imported index row id is - greater than the max system row id. */ - - mutex_enter(&dict_sys->mutex); - - if (row_id >= dict_sys->row_id) { - dict_sys->row_id = row_id + 1; - dict_hdr_flush_row_id(); - } - - mutex_exit(&dict_sys->mutex); - } - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Read the a string from the meta data file. -@return DB_SUCCESS or error code. */ -static -dberr_t -row_import_cfg_read_string( -/*=======================*/ - FILE* file, /*!< in/out: File to read from */ - byte* ptr, /*!< out: string to read */ - ulint max_len) /*!< in: maximum length of the output - buffer in bytes */ -{ - DBUG_EXECUTE_IF("ib_import_string_read_error", - errno = EINVAL; return(DB_IO_ERROR);); - - ulint len = 0; - - while (!feof(file)) { - int ch = fgetc(file); - - if (ch == EOF) { - break; - } else if (ch != 0) { - if (len < max_len) { - ptr[len++] = ch; - } else { - break; - } - /* max_len includes the NUL byte */ - } else if (len != max_len - 1) { - break; - } else { - ptr[len] = 0; - return(DB_SUCCESS); - } - } - - errno = EINVAL; - - return(DB_IO_ERROR); -} - -/*********************************************************************//** -Write the meta data (index user fields) config file. -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_cfg_read_index_fields( -/*=============================*/ - FILE* file, /*!< in: file to write to */ - THD* thd, /*!< in/out: session */ - row_index_t* index, /*!< Index being read in */ - row_import* cfg) /*!< in/out: meta-data read */ -{ - byte row[sizeof(ib_uint32_t) * 3]; - ulint n_fields = index->m_n_fields; - - index->m_fields = new(std::nothrow) dict_field_t[n_fields]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_4", - delete [] index->m_fields; index->m_fields = 0;); - - if (index->m_fields == 0) { - return(DB_OUT_OF_MEMORY); - } - - dict_field_t* field = index->m_fields; - - memset(field, 0x0, sizeof(*field) * n_fields); - - for (ulint i = 0; i < n_fields; ++i, ++field) { - byte* ptr = row; - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_1", - (void) fseek(file, 0L, SEEK_END);); - - if (fread(row, 1, sizeof(row), file) != sizeof(row)) { - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while reading index fields."); - - return(DB_IO_ERROR); - } - - field->prefix_len = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - field->fixed_len = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - /* Include the NUL byte in the length. */ - ulint len = mach_read_from_4(ptr); - - byte* name = new(std::nothrow) byte[len]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_5", delete [] name; name = 0;); - - if (name == 0) { - return(DB_OUT_OF_MEMORY); - } - - field->name = reinterpret_cast<const char*>(name); - - dberr_t err = row_import_cfg_read_string(file, name, len); - - if (err != DB_SUCCESS) { - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while parsing table name."); - - return(err); - } - } - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Read the index names and root page numbers of the indexes and set the values. -Row format [root_page_no, len of str, str ... ] -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_read_index_data( -/*=======================*/ - FILE* file, /*!< in: File to read from */ - THD* thd, /*!< in: session */ - row_import* cfg) /*!< in/out: meta-data read */ -{ - byte* ptr; - row_index_t* cfg_index; - byte row[sizeof(index_id_t) + sizeof(ib_uint32_t) * 9]; - - /* FIXME: What is the max value? */ - ut_a(cfg->m_n_indexes > 0); - ut_a(cfg->m_n_indexes < 1024); - - cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_6", - delete [] cfg->m_indexes; cfg->m_indexes = 0;); - - if (cfg->m_indexes == 0) { - return(DB_OUT_OF_MEMORY); - } - - memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes); - - cfg_index = cfg->m_indexes; - - for (ulint i = 0; i < cfg->m_n_indexes; ++i, ++cfg_index) { - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_2", - (void) fseek(file, 0L, SEEK_END);); - - /* Read the index data. */ - size_t n_bytes = fread(row, 1, sizeof(row), file); - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error", - (void) fseek(file, 0L, SEEK_END);); - - if (n_bytes != sizeof(row)) { - char msg[BUFSIZ]; - - ut_snprintf(msg, sizeof(msg), - "while reading index meta-data, expected " - "to read %lu bytes but read only %lu " - "bytes", - (ulong) sizeof(row), (ulong) n_bytes); - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), msg); - - ib_logf(IB_LOG_LEVEL_ERROR, "IO Error: %s", msg); - - return(DB_IO_ERROR); - } - - ptr = row; - - cfg_index->m_id = mach_read_from_8(ptr); - ptr += sizeof(index_id_t); - - cfg_index->m_space = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - cfg_index->m_page_no = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - cfg_index->m_type = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - cfg_index->m_trx_id_offset = mach_read_from_4(ptr); - if (cfg_index->m_trx_id_offset != mach_read_from_4(ptr)) { - ut_ad(0); - /* Overflow. Pretend that the clustered index - has a variable-length PRIMARY KEY. */ - cfg_index->m_trx_id_offset = 0; - } - ptr += sizeof(ib_uint32_t); - - cfg_index->m_n_user_defined_cols = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - cfg_index->m_n_uniq = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - cfg_index->m_n_nullable = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - cfg_index->m_n_fields = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - /* The NUL byte is included in the name length. */ - ulint len = mach_read_from_4(ptr); - - if (len > OS_FILE_MAX_PATH) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_INNODB_INDEX_CORRUPT, - "Index name length (" ULINTPF ") is too long, " - "the meta-data is corrupt", len); - - return(DB_CORRUPTION); - } - - cfg_index->m_name = new(std::nothrow) byte[len]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_7", - delete [] cfg_index->m_name; - cfg_index->m_name = 0;); - - if (cfg_index->m_name == 0) { - return(DB_OUT_OF_MEMORY); - } - - dberr_t err; - - err = row_import_cfg_read_string(file, cfg_index->m_name, len); - - if (err != DB_SUCCESS) { - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while parsing index name."); - - return(err); - } - - err = row_import_cfg_read_index_fields( - file, thd, cfg_index, cfg); - - if (err != DB_SUCCESS) { - return(err); - } - - } - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Set the index root page number for v1 format. -@return DB_SUCCESS or error code. */ -static -dberr_t -row_import_read_indexes( -/*====================*/ - FILE* file, /*!< in: File to read from */ - THD* thd, /*!< in: session */ - row_import* cfg) /*!< in/out: meta-data read */ -{ - byte row[sizeof(ib_uint32_t)]; - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_3", - (void) fseek(file, 0L, SEEK_END);); - - /* Read the number of indexes. */ - if (fread(row, 1, sizeof(row), file) != sizeof(row)) { - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while reading number of indexes."); - - return(DB_IO_ERROR); - } - - cfg->m_n_indexes = mach_read_from_4(row); - - if (cfg->m_n_indexes == 0) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - "Number of indexes in meta-data file is 0"); - - return(DB_CORRUPTION); - - } else if (cfg->m_n_indexes > 1024) { - // FIXME: What is the upper limit? */ - ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - "Number of indexes in meta-data file is too high: " - ULINTPF, cfg->m_n_indexes); - cfg->m_n_indexes = 0; - - return(DB_CORRUPTION); - } - - return(row_import_read_index_data(file, thd, cfg)); -} - -/*********************************************************************//** -Read the meta data (table columns) config file. Deserialise the contents of -dict_col_t structure, along with the column name. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_read_columns( -/*====================*/ - FILE* file, /*!< in: file to write to */ - THD* thd, /*!< in/out: session */ - row_import* cfg) /*!< in/out: meta-data read */ -{ - dict_col_t* col; - byte row[sizeof(ib_uint32_t) * 8]; - - /* FIXME: What should the upper limit be? */ - ut_a(cfg->m_n_cols > 0); - ut_a(cfg->m_n_cols < 1024); - - cfg->m_cols = new(std::nothrow) dict_col_t[cfg->m_n_cols]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_8", - delete [] cfg->m_cols; cfg->m_cols = 0;); - - if (cfg->m_cols == 0) { - return(DB_OUT_OF_MEMORY); - } - - cfg->m_col_names = new(std::nothrow) byte* [cfg->m_n_cols]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_9", - delete [] cfg->m_col_names; cfg->m_col_names = 0;); - - if (cfg->m_col_names == 0) { - return(DB_OUT_OF_MEMORY); - } - - memset(cfg->m_cols, 0x0, sizeof(cfg->m_cols) * cfg->m_n_cols); - memset(cfg->m_col_names, 0x0, sizeof(cfg->m_col_names) * cfg->m_n_cols); - - col = cfg->m_cols; - - for (ulint i = 0; i < cfg->m_n_cols; ++i, ++col) { - byte* ptr = row; - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_4", - (void) fseek(file, 0L, SEEK_END);); - - if (fread(row, 1, sizeof(row), file) != sizeof(row)) { - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while reading table column meta-data."); - - return(DB_IO_ERROR); - } - - col->prtype = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - col->mtype = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - col->len = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - col->mbminmaxlen = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - col->ind = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - col->ord_part = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - col->max_prefix = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - /* Read in the column name as [len, byte array]. The len - includes the NUL byte. */ - - ulint len = mach_read_from_4(ptr); - - /* FIXME: What is the maximum column name length? */ - if (len == 0 || len > 128) { - ib_errf(thd, IB_LOG_LEVEL_ERROR, - ER_IO_READ_ERROR, - "Column name length " ULINTPF ", is invalid", - len); - - return(DB_CORRUPTION); - } - - cfg->m_col_names[i] = new(std::nothrow) byte[len]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_10", - delete [] cfg->m_col_names[i]; - cfg->m_col_names[i] = 0;); - - if (cfg->m_col_names[i] == 0) { - return(DB_OUT_OF_MEMORY); - } - - dberr_t err; - - err = row_import_cfg_read_string( - file, cfg->m_col_names[i], len); - - if (err != DB_SUCCESS) { - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while parsing table column name."); - - return(err); - } - } - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Read the contents of the <tablespace>.cfg file. -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_read_v1( -/*===============*/ - FILE* file, /*!< in: File to read from */ - THD* thd, /*!< in: session */ - row_import* cfg) /*!< out: meta data */ -{ - byte value[sizeof(ib_uint32_t)]; - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_5", - (void) fseek(file, 0L, SEEK_END);); - - /* Read the hostname where the tablespace was exported. */ - if (fread(value, 1, sizeof(value), file) != sizeof(value)) { - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while reading meta-data export hostname length."); - - return(DB_IO_ERROR); - } - - ulint len = mach_read_from_4(value); - - /* NUL byte is part of name length. */ - cfg->m_hostname = new(std::nothrow) byte[len]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_1", - delete [] cfg->m_hostname; cfg->m_hostname = 0;); - - if (cfg->m_hostname == 0) { - return(DB_OUT_OF_MEMORY); - } - - dberr_t err = row_import_cfg_read_string(file, cfg->m_hostname, len); - - if (err != DB_SUCCESS) { - - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while parsing export hostname."); - - return(err); - } - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_6", - (void) fseek(file, 0L, SEEK_END);); - - /* Read the table name of tablespace that was exported. */ - if (fread(value, 1, sizeof(value), file) != sizeof(value)) { - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while reading meta-data table name length."); - - return(DB_IO_ERROR); - } - - len = mach_read_from_4(value); - - /* NUL byte is part of name length. */ - cfg->m_table_name = new(std::nothrow) byte[len]; - - /* Trigger OOM */ - DBUG_EXECUTE_IF("ib_import_OOM_2", - delete [] cfg->m_table_name; cfg->m_table_name = 0;); - - if (cfg->m_table_name == 0) { - return(DB_OUT_OF_MEMORY); - } - - err = row_import_cfg_read_string(file, cfg->m_table_name, len); - - if (err != DB_SUCCESS) { - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while parsing table name."); - - return(err); - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Importing tablespace for table '%s' that was exported " - "from host '%s'", cfg->m_table_name, cfg->m_hostname); - - byte row[sizeof(ib_uint32_t) * 3]; - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_7", - (void) fseek(file, 0L, SEEK_END);); - - /* Read the autoinc value. */ - if (fread(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) { - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while reading autoinc value."); - - return(DB_IO_ERROR); - } - - cfg->m_autoinc = mach_read_from_8(row); - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_8", - (void) fseek(file, 0L, SEEK_END);); - - /* Read the tablespace page size. */ - if (fread(row, 1, sizeof(row), file) != sizeof(row)) { - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while reading meta-data header."); - - return(DB_IO_ERROR); - } - - byte* ptr = row; - - cfg->m_page_size = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - if (cfg->m_page_size != UNIV_PAGE_SIZE) { - - ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH, - "Tablespace to be imported has a different " - "page size than this server. Server page size " - "is " ULINTPF ", whereas tablespace page size is " - ULINTPF, - UNIV_PAGE_SIZE, cfg->m_page_size); - - return(DB_ERROR); - } - - cfg->m_flags = mach_read_from_4(ptr); - ptr += sizeof(ib_uint32_t); - - cfg->m_n_cols = mach_read_from_4(ptr); - - if (!dict_tf_is_valid(cfg->m_flags)) { - - return(DB_CORRUPTION); - - } else if ((err = row_import_read_columns(file, thd, cfg)) - != DB_SUCCESS) { - - return(err); - - } else if ((err = row_import_read_indexes(file, thd, cfg)) - != DB_SUCCESS) { - - return(err); - } - - ut_a(err == DB_SUCCESS); - return(err); -} - -/** -Read the contents of the <tablespace>.cfg file. -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_read_meta_data( -/*======================*/ - dict_table_t* table, /*!< in: table */ - FILE* file, /*!< in: File to read from */ - THD* thd, /*!< in: session */ - row_import& cfg) /*!< out: contents of the .cfg file */ -{ - byte row[sizeof(ib_uint32_t)]; - - /* Trigger EOF */ - DBUG_EXECUTE_IF("ib_import_io_read_error_9", - (void) fseek(file, 0L, SEEK_END);); - - if (fread(&row, 1, sizeof(row), file) != sizeof(row)) { - ib_senderrf( - thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - errno, strerror(errno), - "while reading meta-data version."); - - return(DB_IO_ERROR); - } - - cfg.m_version = mach_read_from_4(row); - - /* Check the version number. */ - switch (cfg.m_version) { - case IB_EXPORT_CFG_VERSION_V1: - - return(row_import_read_v1(file, thd, &cfg)); - default: - ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR, - "Unsupported meta-data version number (" ULINTPF "), " - "file ignored", cfg.m_version); - } - - return(DB_ERROR); -} - -/** -Read the contents of the <tablename>.cfg file. -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_import_read_cfg( -/*================*/ - dict_table_t* table, /*!< in: table */ - THD* thd, /*!< in: session */ - row_import& cfg) /*!< out: contents of the .cfg file */ -{ - dberr_t err; - char name[OS_FILE_MAX_PATH]; - - cfg.m_table = table; - - srv_get_meta_data_filename(table, name, sizeof(name)); - - FILE* file = fopen(name, "rb"); - - if (file == NULL) { - char msg[BUFSIZ]; - - ut_snprintf(msg, sizeof(msg), - "Error opening '%s', will attempt to import " - "without schema verification", name); - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR, - errno, strerror(errno), msg); - - cfg.m_missing = true; - - err = DB_FAIL; - } else { - - cfg.m_missing = false; - - err = row_import_read_meta_data(table, file, thd, cfg); - fclose(file); - } - - return(err); -} - -/*****************************************************************//** -Update the <space, root page> of a table's indexes from the values -in the data dictionary. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_import_update_index_root( -/*=========================*/ - trx_t* trx, /*!< in/out: transaction that - covers the update */ - const dict_table_t* table, /*!< in: Table for which we want - to set the root page_no */ - bool reset, /*!< in: if true then set to - FIL_NUL */ - bool dict_locked) /*!< in: Set to true if the - caller already owns the - dict_sys_t:: mutex. */ - -{ - const dict_index_t* index; - que_t* graph = 0; - dberr_t err = DB_SUCCESS; - - static const char sql[] = { - "PROCEDURE UPDATE_INDEX_ROOT() IS\n" - "BEGIN\n" - "UPDATE SYS_INDEXES\n" - "SET SPACE = :space,\n" - " PAGE_NO = :page,\n" - " TYPE = :type\n" - "WHERE TABLE_ID = :table_id AND ID = :index_id;\n" - "END;\n"}; - - if (!dict_locked) { - mutex_enter(&dict_sys->mutex); - } - - for (index = dict_table_get_first_index(table); - index != 0; - index = dict_table_get_next_index(index)) { - - pars_info_t* info; - ib_uint32_t page; - ib_uint32_t space; - ib_uint32_t type; - index_id_t index_id; - table_id_t table_id; - - info = (graph != 0) ? graph->info : pars_info_create(); - - mach_write_to_4( - reinterpret_cast<byte*>(&type), - index->type); - - mach_write_to_4( - reinterpret_cast<byte*>(&page), - reset ? FIL_NULL : index->page); - - mach_write_to_4( - reinterpret_cast<byte*>(&space), - reset ? FIL_NULL : index->space); - - mach_write_to_8( - reinterpret_cast<byte*>(&index_id), - index->id); - - mach_write_to_8( - reinterpret_cast<byte*>(&table_id), - table->id); - - /* If we set the corrupt bit during the IMPORT phase then - we need to update the system tables. */ - pars_info_bind_int4_literal(info, "type", &type); - pars_info_bind_int4_literal(info, "space", &space); - pars_info_bind_int4_literal(info, "page", &page); - pars_info_bind_ull_literal(info, "index_id", &index_id); - pars_info_bind_ull_literal(info, "table_id", &table_id); - - if (graph == 0) { - graph = pars_sql(info, sql); - ut_a(graph); - graph->trx = trx; - } - - que_thr_t* thr; - - graph->fork_type = QUE_FORK_MYSQL_INTERFACE; - - ut_a(thr = que_fork_start_command(graph)); - - que_run_threads(thr); - - DBUG_EXECUTE_IF("ib_import_internal_error", - trx->error_state = DB_ERROR;); - - err = trx->error_state; - - if (err != DB_SUCCESS) { - char index_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - index_name, sizeof(index_name), - index->name, TRUE); - - ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_INTERNAL_ERROR, - "While updating the <space, root page " - "number> of index %s - %s", - index_name, ut_strerr(err)); - - break; - } - } - - que_graph_free(graph); - - if (!dict_locked) { - mutex_exit(&dict_sys->mutex); - } - - return(err); -} - -/** Callback arg for row_import_set_discarded. */ -struct discard_t { - ib_uint32_t flags2; /*!< Value read from column */ - bool state; /*!< New state of the flag */ - ulint n_recs; /*!< Number of recs processed */ -}; - -/******************************************************************//** -Fetch callback that sets or unsets the DISCARDED tablespace flag in -SYS_TABLES. The flags is stored in MIX_LEN column. -@return FALSE if all OK */ -static -ibool -row_import_set_discarded( -/*=====================*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: bool set/unset flag */ -{ - sel_node_t* node = static_cast<sel_node_t*>(row); - discard_t* discard = static_cast<discard_t*>(user_arg); - dfield_t* dfield = que_node_get_val(node->select_list); - dtype_t* type = dfield_get_type(dfield); - ulint len = dfield_get_len(dfield); - - ut_a(dtype_get_mtype(type) == DATA_INT); - ut_a(len == sizeof(ib_uint32_t)); - - ulint flags2 = mach_read_from_4( - static_cast<byte*>(dfield_get_data(dfield))); - - if (discard->state) { - flags2 |= DICT_TF2_DISCARDED; - } else { - flags2 &= ~DICT_TF2_DISCARDED; - } - - mach_write_to_4(reinterpret_cast<byte*>(&discard->flags2), flags2); - - ++discard->n_recs; - - /* There should be at most one matching record. */ - ut_a(discard->n_recs == 1); - - return(FALSE); -} - -/*****************************************************************//** -Update the DICT_TF2_DISCARDED flag in SYS_TABLES. -@return DB_SUCCESS or error code. */ -UNIV_INTERN -dberr_t -row_import_update_discarded_flag( -/*=============================*/ - trx_t* trx, /*!< in/out: transaction that - covers the update */ - table_id_t table_id, /*!< in: Table for which we want - to set the root table->flags2 */ - bool discarded, /*!< in: set MIX_LEN column bit - to discarded, if true */ - bool dict_locked) /*!< in: set to true if the - caller already owns the - dict_sys_t:: mutex. */ - -{ - pars_info_t* info; - discard_t discard; - - static const char sql[] = - "PROCEDURE UPDATE_DISCARDED_FLAG() IS\n" - "DECLARE FUNCTION my_func;\n" - "DECLARE CURSOR c IS\n" - " SELECT MIX_LEN " - " FROM SYS_TABLES " - " WHERE ID = :table_id FOR UPDATE;" - "\n" - "BEGIN\n" - "OPEN c;\n" - "WHILE 1 = 1 LOOP\n" - " FETCH c INTO my_func();\n" - " IF c % NOTFOUND THEN\n" - " EXIT;\n" - " END IF;\n" - "END LOOP;\n" - "UPDATE SYS_TABLES" - " SET MIX_LEN = :flags2" - " WHERE ID = :table_id;\n" - "CLOSE c;\n" - "END;\n"; - - discard.n_recs = 0; - discard.state = discarded; - discard.flags2 = ULINT32_UNDEFINED; - - info = pars_info_create(); - - pars_info_add_ull_literal(info, "table_id", table_id); - pars_info_bind_int4_literal(info, "flags2", &discard.flags2); - - pars_info_bind_function( - info, "my_func", row_import_set_discarded, &discard); - - dberr_t err = que_eval_sql(info, sql, !dict_locked, trx); - - ut_a(discard.n_recs == 1); - ut_a(discard.flags2 != ULINT32_UNDEFINED); - - return(err); -} - -/*****************************************************************//** -Imports a tablespace. The space id in the .ibd file must match the space id -of the table in the data dictionary. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_import_for_mysql( -/*=================*/ - dict_table_t* table, /*!< in/out: table */ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */ -{ - dberr_t err; - trx_t* trx; - ib_uint64_t autoinc = 0; - char table_name[MAX_FULL_NAME_LEN + 1]; - char* filepath = NULL; - - ut_ad(!srv_read_only_mode); - - innobase_format_name( - table_name, sizeof(table_name), table->name, FALSE); - - ut_a(table->space); - ut_ad(prebuilt->trx); - ut_a(table->file_unreadable); - - trx_start_if_not_started(prebuilt->trx); - - trx = trx_allocate_for_mysql(); - - /* So that the table is not DROPped during recovery. */ - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - - trx_start_if_not_started(trx); - - /* So that we can send error messages to the user. */ - trx->mysql_thd = prebuilt->trx->mysql_thd; - - /* Ensure that the table will be dropped by trx_rollback_active() - in case of a crash. */ - - trx->table_id = table->id; - - /* Assign an undo segment for the transaction, so that the - transaction will be recovered after a crash. */ - - mutex_enter(&trx->undo_mutex); - - err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE); - - mutex_exit(&trx->undo_mutex); - - DBUG_EXECUTE_IF("ib_import_undo_assign_failure", - err = DB_TOO_MANY_CONCURRENT_TRXS;); - - if (err != DB_SUCCESS) { - - return(row_import_cleanup(prebuilt, trx, err)); - - } else if (trx->update_undo == 0) { - - err = DB_TOO_MANY_CONCURRENT_TRXS; - return(row_import_cleanup(prebuilt, trx, err)); - } - - prebuilt->trx->op_info = "read meta-data file"; - - /* Prevent DDL operations while we are checking. */ - - rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__); - - row_import cfg; - - memset(&cfg, 0x0, sizeof(cfg)); - - err = row_import_read_cfg(table, trx->mysql_thd, cfg); - - /* Check if the table column definitions match the contents - of the config file. */ - - if (err == DB_SUCCESS) { - - /* We have a schema file, try and match it with the our - data dictionary. */ - - err = cfg.match_schema(trx->mysql_thd); - - /* Update index->page and SYS_INDEXES.PAGE_NO to match the - B-tree root page numbers in the tablespace. Use the index - name from the .cfg file to find match. */ - - if (err == DB_SUCCESS) { - cfg.set_root_by_name(); - autoinc = cfg.m_autoinc; - } - - rw_lock_s_unlock_gen(&dict_operation_lock, 0); - - DBUG_EXECUTE_IF("ib_import_set_index_root_failure", - err = DB_TOO_MANY_CONCURRENT_TRXS;); - - } else if (cfg.m_missing) { - - rw_lock_s_unlock_gen(&dict_operation_lock, 0); - - /* We don't have a schema file, we will have to discover - the index root pages from the .ibd file and skip the schema - matching step. */ - - ut_a(err == DB_FAIL); - - cfg.m_page_size = UNIV_PAGE_SIZE; - - FetchIndexRootPages fetchIndexRootPages(table, trx); - - err = fil_tablespace_iterate( - table, IO_BUFFER_SIZE(cfg.m_page_size), - fetchIndexRootPages); - - if (err == DB_SUCCESS) { - - err = fetchIndexRootPages.build_row_import(&cfg); - - /* Update index->page and SYS_INDEXES.PAGE_NO - to match the B-tree root page numbers in the - tablespace. */ - - if (err == DB_SUCCESS) { - err = cfg.set_root_by_heuristic(); - } - } - - } else { - rw_lock_s_unlock_gen(&dict_operation_lock, 0); - } - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } - - prebuilt->trx->op_info = "importing tablespace"; - - ib_logf(IB_LOG_LEVEL_INFO, "Phase I - Update all pages"); - - /* Iterate over all the pages and do the sanity checking and - the conversion required to import the tablespace. */ - - PageConverter converter(&cfg, trx); - - /* Set the IO buffer size in pages. */ - - err = fil_tablespace_iterate( - table, IO_BUFFER_SIZE(cfg.m_page_size), converter); - - DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure", - err = DB_TOO_MANY_CONCURRENT_TRXS;); - - if (err != DB_SUCCESS) { - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), table->name, FALSE); - - if (err != DB_DECRYPTION_FAILED) { - - ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_INTERNAL_ERROR, - "Cannot reset LSNs in table '%s' : %s", - table_name, ut_strerr(err)); - } - - return(row_import_cleanup(prebuilt, trx, err)); - } - - row_mysql_lock_data_dictionary(trx); - - /* If the table is stored in a remote tablespace, we need to - determine that filepath from the link file and system tables. - Find the space ID in SYS_TABLES since this is an ALTER TABLE. */ - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - dict_get_and_save_data_dir_path(table, true); - ut_a(table->data_dir_path); - - filepath = os_file_make_remote_pathname( - table->data_dir_path, table->name, "ibd"); - } else { - filepath = fil_make_ibd_name(table->name, false); - } - ut_a(filepath); - - /* Open the tablespace so that we can access via the buffer pool. - We set the 2nd param (fix_dict = true) here because we already - have an x-lock on dict_operation_lock and dict_sys->mutex. */ - - err = fil_open_single_table_tablespace( - true, true, table->space, - dict_tf_to_fsp_flags(table->flags), - table->name, filepath); - - DBUG_EXECUTE_IF("ib_import_open_tablespace_failure", - err = DB_TABLESPACE_NOT_FOUND;); - - if (err != DB_SUCCESS) { - row_mysql_unlock_data_dictionary(trx); - - ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_GET_ERRMSG, - err, ut_strerr(err), filepath); - - mem_free(filepath); - - return(row_import_cleanup(prebuilt, trx, err)); - } - - row_mysql_unlock_data_dictionary(trx); - - mem_free(filepath); - - err = ibuf_check_bitmap_on_import(trx, table->space); - - DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;); - - if (err != DB_SUCCESS) { - return(row_import_cleanup(prebuilt, trx, err)); - } - - /* The first index must always be the clustered index. */ - - dict_index_t* index = dict_table_get_first_index(table); - - if (!dict_index_is_clust(index)) { - return(row_import_error(prebuilt, trx, DB_CORRUPTION)); - } - - /* Update the Btree segment headers for index node and - leaf nodes in the root page. Set the new space id. */ - - err = btr_root_adjust_on_import(index); - - DBUG_EXECUTE_IF("ib_import_cluster_root_adjust_failure", - err = DB_CORRUPTION;); - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } else if (cfg.requires_purge(index->name)) { - - /* Purge any delete-marked records that couldn't be - purged during the page conversion phase from the - cluster index. */ - - IndexPurge purge(trx, index); - - trx->op_info = "cluster: purging delete marked records"; - - err = purge.garbage_collect(); - - trx->op_info = ""; - } - - DBUG_EXECUTE_IF("ib_import_cluster_failure", err = DB_CORRUPTION;); - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } - - /* For secondary indexes, purge any records that couldn't be purged - during the page conversion phase. */ - - err = row_import_adjust_root_pages_of_secondary_indexes( - prebuilt, trx, table, cfg); - - DBUG_EXECUTE_IF("ib_import_sec_root_adjust_failure", - err = DB_CORRUPTION;); - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } - - /* Ensure that the next available DB_ROW_ID is not smaller than - any DB_ROW_ID stored in the table. */ - - if (prebuilt->clust_index_was_generated) { - - err = row_import_set_sys_max_row_id(prebuilt, table); - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } - } - - ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush changes to disk"); - - /* Ensure that all pages dirtied during the IMPORT make it to disk. - The only dirty pages generated should be from the pessimistic purge - of delete marked records that couldn't be purged in Phase I. */ - - buf_LRU_flush_or_remove_pages( - prebuilt->table->space, BUF_REMOVE_FLUSH_WRITE, trx); - - if (trx_is_interrupted(trx)) { - ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush interrupted"); - return(row_import_error(prebuilt, trx, DB_INTERRUPTED)); - } else { - ib_logf(IB_LOG_LEVEL_INFO, "Phase IV - Flush complete"); - } - - /* The dictionary latches will be released in in row_import_cleanup() - after the transaction commit, for both success and error. */ - - row_mysql_lock_data_dictionary(trx); - - /* Update the root pages of the table's indexes. */ - err = row_import_update_index_root(trx, table, false, true); - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } - - /* Update the table's discarded flag, unset it. */ - err = row_import_update_discarded_flag(trx, table->id, false, true); - - if (err != DB_SUCCESS) { - return(row_import_error(prebuilt, trx, err)); - } - - table->file_unreadable = false; - table->flags2 &= ~DICT_TF2_DISCARDED; - - if (autoinc != 0) { - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), table->name, FALSE); - - ib_logf(IB_LOG_LEVEL_INFO, "%s autoinc value set to " IB_ID_FMT, - table_name, autoinc); - - dict_table_autoinc_lock(table); - dict_table_autoinc_initialize(table, autoinc); - dict_table_autoinc_unlock(table); - } - - ut_a(err == DB_SUCCESS); - - return(row_import_cleanup(prebuilt, trx, err)); -} - diff --git a/storage/xtradb/row/row0ins.cc b/storage/xtradb/row/row0ins.cc deleted file mode 100644 index 6072b303d3a..00000000000 --- a/storage/xtradb/row/row0ins.cc +++ /dev/null @@ -1,3458 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0ins.cc -Insert into a table - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#include "row0ins.h" - -#ifdef UNIV_NONINL -#include "row0ins.ic" -#endif - -#include "ha_prototypes.h" -#include "dict0dict.h" -#include "dict0boot.h" -#include "trx0rec.h" -#include "trx0undo.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "mach0data.h" -#include "que0que.h" -#include "row0upd.h" -#include "row0sel.h" -#include "row0row.h" -#include "row0log.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "log0log.h" -#include "eval0eval.h" -#include "data0data.h" -#include "usr0sess.h" -#include "buf0lru.h" -#include "fts0fts.h" -#include "fts0types.h" -#include "m_string.h" - -/************************************************************************* -IMPORTANT NOTE: Any operation that generates redo MUST check that there -is enough space in the redo log before for that operation. This is -done by calling log_free_check(). The reason for checking the -availability of the redo log space before the start of the operation is -that we MUST not hold any synchonization objects when performing the -check. -If you make a change in this module make sure that no codepath is -introduced where a call to log_free_check() is bypassed. */ - -/*********************************************************************//** -Creates an insert node struct. -@return own: insert node struct */ -UNIV_INTERN -ins_node_t* -ins_node_create( -/*============*/ - ulint ins_type, /*!< in: INS_VALUES, ... */ - dict_table_t* table, /*!< in: table where to insert */ - mem_heap_t* heap) /*!< in: mem heap where created */ -{ - ins_node_t* node; - - node = static_cast<ins_node_t*>( - mem_heap_alloc(heap, sizeof(ins_node_t))); - - node->common.type = QUE_NODE_INSERT; - - node->ins_type = ins_type; - - node->state = INS_NODE_SET_IX_LOCK; - node->table = table; - node->index = NULL; - node->entry = NULL; - - node->select = NULL; - - node->trx_id = 0; - - node->entry_sys_heap = mem_heap_create(128); - - node->magic_n = INS_NODE_MAGIC_N; - - return(node); -} - -/***********************************************************//** -Creates an entry template for each index of a table. */ -static -void -ins_node_create_entry_list( -/*=======================*/ - ins_node_t* node) /*!< in: row insert node */ -{ - dict_index_t* index; - dtuple_t* entry; - - ut_ad(node->entry_sys_heap); - - UT_LIST_INIT(node->entry_list); - - /* We will include all indexes (include those corrupted - secondary indexes) in the entry list. Filteration of - these corrupted index will be done in row_ins() */ - - for (index = dict_table_get_first_index(node->table); - index != 0; - index = dict_table_get_next_index(index)) { - - entry = row_build_index_entry( - node->row, NULL, index, node->entry_sys_heap); - - UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry); - } -} - -/*****************************************************************//** -Adds system field buffers to a row. */ -static -void -row_ins_alloc_sys_fields( -/*=====================*/ - ins_node_t* node) /*!< in: insert node */ -{ - dtuple_t* row; - dict_table_t* table; - mem_heap_t* heap; - const dict_col_t* col; - dfield_t* dfield; - byte* ptr; - - row = node->row; - table = node->table; - heap = node->entry_sys_heap; - - ut_ad(row && table && heap); - ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table)); - - /* allocate buffer to hold the needed system created hidden columns. */ - uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; - ptr = static_cast<byte*>(mem_heap_zalloc(heap, len)); - - /* 1. Populate row-id */ - col = dict_table_get_sys_col(table, DATA_ROW_ID); - - dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - - dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN); - - node->row_id_buf = ptr; - - ptr += DATA_ROW_ID_LEN; - - /* 2. Populate trx id */ - col = dict_table_get_sys_col(table, DATA_TRX_ID); - - dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - - dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN); - - node->trx_id_buf = ptr; - - ptr += DATA_TRX_ID_LEN; - - /* 3. Populate roll ptr */ - - col = dict_table_get_sys_col(table, DATA_ROLL_PTR); - - dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - - dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN); -} - -/*********************************************************************//** -Sets a new row to insert for an INS_DIRECT node. This function is only used -if we have constructed the row separately, which is a rare case; this -function is quite slow. */ -UNIV_INTERN -void -ins_node_set_new_row( -/*=================*/ - ins_node_t* node, /*!< in: insert node */ - dtuple_t* row) /*!< in: new row (or first row) for the node */ -{ - node->state = INS_NODE_SET_IX_LOCK; - node->index = NULL; - node->entry = NULL; - - node->row = row; - - mem_heap_empty(node->entry_sys_heap); - - /* Create templates for index entries */ - - ins_node_create_entry_list(node); - - /* Allocate from entry_sys_heap buffers for sys fields */ - - row_ins_alloc_sys_fields(node); - - /* As we allocated a new trx id buf, the trx id should be written - there again: */ - - node->trx_id = 0; -} - -/*******************************************************************//** -Does an insert operation by updating a delete-marked existing record -in the index. This situation can occur if the delete-marked record is -kept in the index for consistent reads. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_sec_index_entry_by_modify( -/*==============================*/ - ulint flags, /*!< in: undo logging and locking flags */ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether mtr holds just a leaf - latch or also a tree latch */ - btr_cur_t* cursor, /*!< in: B-tree cursor */ - ulint** offsets,/*!< in/out: offsets on cursor->page_cur.rec */ - mem_heap_t* offsets_heap, - /*!< in/out: memory heap that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - const dtuple_t* entry, /*!< in: index entry to insert */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in: mtr; must be committed before - latching any further pages */ -{ - big_rec_t* dummy_big_rec; - upd_t* update; - rec_t* rec; - dberr_t err; - - rec = btr_cur_get_rec(cursor); - - ut_ad(!dict_index_is_clust(cursor->index)); - ut_ad(rec_offs_validate(rec, cursor->index, *offsets)); - ut_ad(!entry->info_bits); - - /* We know that in the alphabetical ordering, entry and rec are - identified. But in their binary form there may be differences if - there are char fields in them. Therefore we have to calculate the - difference. */ - - update = row_upd_build_sec_rec_difference_binary( - rec, cursor->index, *offsets, entry, heap); - - /* If operating in fake_change mode then flow will not mark the record - deleted but will still assume it and take delete-mark path. Condition - below has a different path if record is not marked deleted but we need - to still by-pass it given that original flow has taken this path for - fake_change mode execution assuming record is delete-marked. */ - if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets)) - && UNIV_UNLIKELY(!thr_get_trx(thr)->fake_changes)) { - /* We should never insert in place of a record that - has not been delete-marked. The only exception is when - online CREATE INDEX copied the changes that we already - made to the clustered index, and completed the - secondary index creation before we got here. In this - case, the change would already be there. The CREATE - INDEX should be waiting for a MySQL meta-data lock - upgrade at least until this INSERT or UPDATE - returns. After that point, the TEMP_INDEX_PREFIX - would be dropped from the index name in - commit_inplace_alter_table(). */ - ut_a(update->n_fields == 0); - ut_a(*cursor->index->name == TEMP_INDEX_PREFIX); - ut_ad(!dict_index_is_online_ddl(cursor->index)); - return(DB_SUCCESS); - } - - if (mode == BTR_MODIFY_LEAF) { - /* Try an optimistic updating of the record, keeping changes - within the page */ - - /* TODO: pass only *offsets */ - err = btr_cur_optimistic_update( - flags | BTR_KEEP_SYS_FLAG, cursor, - offsets, &offsets_heap, update, 0, thr, - thr_get_trx(thr)->id, mtr); - switch (err) { - case DB_OVERFLOW: - case DB_UNDERFLOW: - case DB_ZIP_OVERFLOW: - err = DB_FAIL; - default: - break; - } - } else { - ut_a(mode == BTR_MODIFY_TREE); - if (buf_LRU_buf_pool_running_out()) { - - return(DB_LOCK_TABLE_FULL); - } - - err = btr_cur_pessimistic_update( - flags | BTR_KEEP_SYS_FLAG, cursor, - offsets, &offsets_heap, - heap, &dummy_big_rec, update, 0, - thr, thr_get_trx(thr)->id, mtr); - ut_ad(!dummy_big_rec); - } - - return(err); -} - -/*******************************************************************//** -Does an insert operation by delete unmarking and updating a delete marked -existing record in the index. This situation can occur if the delete marked -record is kept in the index for consistent reads. -@return DB_SUCCESS, DB_FAIL, or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_clust_index_entry_by_modify( -/*================================*/ - ulint flags, /*!< in: undo logging and locking flags */ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether mtr holds just a leaf - latch or also a tree latch */ - btr_cur_t* cursor, /*!< in: B-tree cursor */ - ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ - mem_heap_t** offsets_heap, - /*!< in/out: pointer to memory heap that can - be emptied, or NULL */ - mem_heap_t* heap, /*!< in/out: memory heap */ - big_rec_t** big_rec,/*!< out: possible big rec vector of fields - which have to be stored externally by the - caller */ - const dtuple_t* entry, /*!< in: index entry to insert */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in: mtr; must be committed before - latching any further pages */ -{ - const rec_t* rec; - const upd_t* update; - dberr_t err; - - ut_ad(dict_index_is_clust(cursor->index)); - - *big_rec = NULL; - - rec = btr_cur_get_rec(cursor); - - ut_ad(rec_get_deleted_flag(rec, - dict_table_is_comp(cursor->index->table))); - - /* Build an update vector containing all the fields to be modified; - NOTE that this vector may NOT contain system columns trx_id or - roll_ptr */ - - update = row_upd_build_difference_binary( - cursor->index, entry, rec, NULL, true, - thr_get_trx(thr), heap); - if (mode != BTR_MODIFY_TREE) { - ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF); - - /* Try optimistic updating of the record, keeping changes - within the page */ - - err = btr_cur_optimistic_update( - flags, cursor, offsets, offsets_heap, update, 0, thr, - thr_get_trx(thr)->id, mtr); - switch (err) { - case DB_OVERFLOW: - case DB_UNDERFLOW: - case DB_ZIP_OVERFLOW: - err = DB_FAIL; - default: - break; - } - } else { - if (buf_LRU_buf_pool_running_out()) { - - return(DB_LOCK_TABLE_FULL); - - } - err = btr_cur_pessimistic_update( - flags | BTR_KEEP_POS_FLAG, - cursor, offsets, offsets_heap, heap, - big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr); - } - - return(err); -} - -/*********************************************************************//** -Returns TRUE if in a cascaded update/delete an ancestor node of node -updates (not DELETE, but UPDATE) table. -@return TRUE if an ancestor updates table */ -static -ibool -row_ins_cascade_ancestor_updates_table( -/*===================================*/ - que_node_t* node, /*!< in: node in a query graph */ - dict_table_t* table) /*!< in: table */ -{ - que_node_t* parent; - - for (parent = que_node_get_parent(node); - que_node_get_type(parent) == QUE_NODE_UPDATE; - parent = que_node_get_parent(parent)) { - - upd_node_t* upd_node; - - upd_node = static_cast<upd_node_t*>(parent); - - if (upd_node->table == table && upd_node->is_delete == FALSE) { - - return(TRUE); - } - } - - return(FALSE); -} - -/*********************************************************************//** -Returns the number of ancestor UPDATE or DELETE nodes of a -cascaded update/delete node. -@return number of ancestors */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ulint -row_ins_cascade_n_ancestors( -/*========================*/ - que_node_t* node) /*!< in: node in a query graph */ -{ - que_node_t* parent; - ulint n_ancestors = 0; - - for (parent = que_node_get_parent(node); - que_node_get_type(parent) == QUE_NODE_UPDATE; - parent = que_node_get_parent(parent)) { - - n_ancestors++; - } - - return(n_ancestors); -} - -/******************************************************************//** -Calculates the update vector node->cascade->update for a child table in -a cascaded update. -@return number of fields in the calculated update vector; the value -can also be 0 if no foreign key fields changed; the returned value is -ULINT_UNDEFINED if the column type in the child table is too short to -fit the new value in the parent table: that means the update fails */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ulint -row_ins_cascade_calc_update_vec( -/*============================*/ - upd_node_t* node, /*!< in: update node of the parent - table */ - dict_foreign_t* foreign, /*!< in: foreign key constraint whose - type is != 0 */ - mem_heap_t* heap, /*!< in: memory heap to use as - temporary storage */ - trx_t* trx, /*!< in: update transaction */ - ibool* fts_col_affected)/*!< out: is FTS column affected */ -{ - upd_node_t* cascade = node->cascade_node; - dict_table_t* table = foreign->foreign_table; - dict_index_t* index = foreign->foreign_index; - upd_t* update; - dict_table_t* parent_table; - dict_index_t* parent_index; - upd_t* parent_update; - ulint n_fields_updated; - ulint parent_field_no; - ulint i; - ulint j; - ibool doc_id_updated = FALSE; - ulint doc_id_pos = 0; - doc_id_t new_doc_id = FTS_NULL_DOC_ID; - - ut_a(node); - ut_a(foreign); - ut_a(cascade); - ut_a(table); - ut_a(index); - - /* Calculate the appropriate update vector which will set the fields - in the child index record to the same value (possibly padded with - spaces if the column is a fixed length CHAR or FIXBINARY column) as - the referenced index record will get in the update. */ - - parent_table = node->table; - ut_a(parent_table == foreign->referenced_table); - parent_index = foreign->referenced_index; - parent_update = node->update; - - update = cascade->update; - - update->info_bits = 0; - update->n_fields = foreign->n_fields; - - n_fields_updated = 0; - - *fts_col_affected = FALSE; - - if (table->fts) { - doc_id_pos = dict_table_get_nth_col_pos( - table, table->fts->doc_col); - } - - for (i = 0; i < foreign->n_fields; i++) { - - parent_field_no = dict_table_get_nth_col_pos( - parent_table, - dict_index_get_nth_col_no(parent_index, i)); - - for (j = 0; j < parent_update->n_fields; j++) { - const upd_field_t* parent_ufield - = &parent_update->fields[j]; - - if (parent_ufield->field_no == parent_field_no) { - - ulint min_size; - const dict_col_t* col; - ulint ufield_len; - upd_field_t* ufield; - - col = dict_index_get_nth_col(index, i); - - /* A field in the parent index record is - updated. Let us make the update vector - field for the child table. */ - - ufield = update->fields + n_fields_updated; - - ufield->field_no - = dict_table_get_nth_col_pos( - table, dict_col_get_no(col)); - - ufield->orig_len = 0; - ufield->exp = NULL; - - ufield->new_val = parent_ufield->new_val; - ufield_len = dfield_get_len(&ufield->new_val); - - /* Clear the "external storage" flag */ - dfield_set_len(&ufield->new_val, ufield_len); - - /* Do not allow a NOT NULL column to be - updated as NULL */ - - if (dfield_is_null(&ufield->new_val) - && (col->prtype & DATA_NOT_NULL)) { - - return(ULINT_UNDEFINED); - } - - /* If the new value would not fit in the - column, do not allow the update */ - - if (!dfield_is_null(&ufield->new_val) - && dtype_get_at_most_n_mbchars( - col->prtype, col->mbminmaxlen, - col->len, - ufield_len, - static_cast<char*>( - dfield_get_data( - &ufield->new_val))) - < ufield_len) { - - return(ULINT_UNDEFINED); - } - - /* If the parent column type has a different - length than the child column type, we may - need to pad with spaces the new value of the - child column */ - - min_size = dict_col_get_min_size(col); - - /* Because UNIV_SQL_NULL (the marker - of SQL NULL values) exceeds all possible - values of min_size, the test below will - not hold for SQL NULL columns. */ - - if (min_size > ufield_len) { - - byte* pad; - ulint pad_len; - byte* padded_data; - ulint mbminlen; - - padded_data = static_cast<byte*>( - mem_heap_alloc( - heap, min_size)); - - pad = padded_data + ufield_len; - pad_len = min_size - ufield_len; - - memcpy(padded_data, - dfield_get_data(&ufield - ->new_val), - ufield_len); - - mbminlen = dict_col_get_mbminlen(col); - - ut_ad(!(ufield_len % mbminlen)); - ut_ad(!(min_size % mbminlen)); - - if (mbminlen == 1 - && dtype_get_charset_coll( - col->prtype) - == DATA_MYSQL_BINARY_CHARSET_COLL) { - /* Do not pad BINARY columns */ - return(ULINT_UNDEFINED); - } - - row_mysql_pad_col(mbminlen, - pad, pad_len); - dfield_set_data(&ufield->new_val, - padded_data, min_size); - } - - /* Check whether the current column has - FTS index on it */ - if (table->fts - && dict_table_is_fts_column( - table->fts->indexes, - dict_col_get_no(col)) - != ULINT_UNDEFINED) { - *fts_col_affected = TRUE; - } - - /* If Doc ID is updated, check whether the - Doc ID is valid */ - if (table->fts - && ufield->field_no == doc_id_pos) { - doc_id_t n_doc_id; - - n_doc_id = - table->fts->cache->next_doc_id; - - new_doc_id = fts_read_doc_id( - static_cast<const byte*>( - dfield_get_data( - &ufield->new_val))); - - if (new_doc_id <= 0) { - fprintf(stderr, - "InnoDB: FTS Doc ID " - "must be larger than " - "0 \n"); - return(ULINT_UNDEFINED); - } - - if (new_doc_id < n_doc_id) { - fprintf(stderr, - "InnoDB: FTS Doc ID " - "must be larger than " - IB_ID_FMT" for table", - n_doc_id -1); - - ut_print_name(stderr, trx, - TRUE, - table->name); - - putc('\n', stderr); - return(ULINT_UNDEFINED); - } - - *fts_col_affected = TRUE; - doc_id_updated = TRUE; - } - - n_fields_updated++; - } - } - } - - /* Generate a new Doc ID if FTS index columns get updated */ - if (table->fts && *fts_col_affected) { - if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - doc_id_t doc_id; - upd_field_t* ufield; - - ut_ad(!doc_id_updated); - ufield = update->fields + n_fields_updated; - fts_get_next_doc_id(table, &trx->fts_next_doc_id); - doc_id = fts_update_doc_id(table, ufield, - &trx->fts_next_doc_id); - n_fields_updated++; - fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL); - } else { - if (doc_id_updated) { - ut_ad(new_doc_id); - fts_trx_add_op(trx, table, new_doc_id, - FTS_INSERT, NULL); - } else { - fprintf(stderr, "InnoDB: FTS Doc ID must be " - "updated along with FTS indexed " - "column for table "); - ut_print_name(stderr, trx, TRUE, table->name); - putc('\n', stderr); - return(ULINT_UNDEFINED); - } - } - } - - update->n_fields = n_fields_updated; - - return(n_fields_updated); -} - -/*********************************************************************//** -Set detailed error message associated with foreign key errors for -the given transaction. */ -static -void -row_ins_set_detailed( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - dict_foreign_t* foreign) /*!< in: foreign key constraint */ -{ - ut_ad(!srv_read_only_mode); - - mutex_enter(&srv_misc_tmpfile_mutex); - rewind(srv_misc_tmpfile); - - if (os_file_set_eof(srv_misc_tmpfile)) { - std::string fk_str; - ut_print_name(srv_misc_tmpfile, trx, TRUE, - foreign->foreign_table_name); - fk_str = dict_print_info_on_foreign_key_in_create_format( - trx, foreign, FALSE); - fputs(fk_str.c_str(), srv_misc_tmpfile); - trx_set_detailed_error_from_file(trx, srv_misc_tmpfile); - } else { - trx_set_detailed_error(trx, "temp file operation failed"); - } - - mutex_exit(&srv_misc_tmpfile_mutex); -} - -/*********************************************************************//** -Acquires dict_foreign_err_mutex, rewinds dict_foreign_err_file -and displays information about the given transaction. -The caller must release dict_foreign_err_mutex. */ -static -void -row_ins_foreign_trx_print( -/*======================*/ - trx_t* trx) /*!< in: transaction */ -{ - ulint n_rec_locks; - ulint n_trx_locks; - ulint heap_size; - - if (srv_read_only_mode) { - return; - } - - lock_mutex_enter(); - n_rec_locks = lock_number_of_rows_locked(&trx->lock); - n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); - heap_size = mem_heap_get_size(trx->lock.lock_heap); - lock_mutex_exit(); - - mutex_enter(&trx_sys->mutex); - - mutex_enter(&dict_foreign_err_mutex); - rewind(dict_foreign_err_file); - ut_print_timestamp(dict_foreign_err_file); - fputs(" Transaction:\n", dict_foreign_err_file); - - trx_print_low(dict_foreign_err_file, trx, 600, - n_rec_locks, n_trx_locks, heap_size); - - mutex_exit(&trx_sys->mutex); - - ut_ad(mutex_own(&dict_foreign_err_mutex)); -} - -/*********************************************************************//** -Reports a foreign key error associated with an update or a delete of a -parent table index entry. */ -static -void -row_ins_foreign_report_err( -/*=======================*/ - const char* errstr, /*!< in: error string from the viewpoint - of the parent table */ - que_thr_t* thr, /*!< in: query thread whose run_node - is an update node */ - dict_foreign_t* foreign, /*!< in: foreign key constraint */ - const rec_t* rec, /*!< in: a matching index record in the - child table */ - const dtuple_t* entry) /*!< in: index entry in the parent - table */ -{ - std::string fk_str; - - if (srv_read_only_mode) { - return; - } - - FILE* ef = dict_foreign_err_file; - trx_t* trx = thr_get_trx(thr); - - row_ins_set_detailed(trx, foreign); - - row_ins_foreign_trx_print(trx); - - fputs("Foreign key constraint fails for table ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - fputs(":\n", ef); - fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign, - TRUE); - fputs(fk_str.c_str(), ef); - putc('\n', ef); - fputs(errstr, ef); - fputs(" in parent table, in index ", ef); - ut_print_name(ef, trx, FALSE, foreign->referenced_index->name); - if (entry) { - fputs(" tuple:\n", ef); - dtuple_print(ef, entry); - } - fputs("\nBut in child table ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - fputs(", in index ", ef); - ut_print_name(ef, trx, FALSE, foreign->foreign_index->name); - if (rec) { - fputs(", there is a record:\n", ef); - rec_print(ef, rec, foreign->foreign_index); - } else { - fputs(", the record is not available\n", ef); - } - putc('\n', ef); - - mutex_exit(&dict_foreign_err_mutex); -} - -/*********************************************************************//** -Reports a foreign key error to dict_foreign_err_file when we are trying -to add an index entry to a child table. Note that the adding may be the result -of an update, too. */ -static -void -row_ins_foreign_report_add_err( -/*===========================*/ - trx_t* trx, /*!< in: transaction */ - dict_foreign_t* foreign, /*!< in: foreign key constraint */ - const rec_t* rec, /*!< in: a record in the parent table: - it does not match entry because we - have an error! */ - const dtuple_t* entry) /*!< in: index entry to insert in the - child table */ -{ - std::string fk_str; - - if (srv_read_only_mode) { - return; - } - - FILE* ef = dict_foreign_err_file; - - row_ins_set_detailed(trx, foreign); - - row_ins_foreign_trx_print(trx); - - fputs("Foreign key constraint fails for table ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - fputs(":\n", ef); - fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign, - TRUE); - fputs(fk_str.c_str(), ef); - fputs("\nTrying to add in child table, in index ", ef); - ut_print_name(ef, trx, FALSE, foreign->foreign_index->name); - if (entry) { - fputs(" tuple:\n", ef); - /* TODO: DB_TRX_ID and DB_ROLL_PTR may be uninitialized. - It would be better to only display the user columns. */ - dtuple_print(ef, entry); - } - fputs("\nBut in parent table ", ef); - ut_print_name(ef, trx, TRUE, foreign->referenced_table_name); - fputs(", in index ", ef); - ut_print_name(ef, trx, FALSE, foreign->referenced_index->name); - fputs(",\nthe closest match we can find is record:\n", ef); - if (rec && page_rec_is_supremum(rec)) { - /* If the cursor ended on a supremum record, it is better - to report the previous record in the error message, so that - the user gets a more descriptive error message. */ - rec = page_rec_get_prev_const(rec); - } - - if (rec) { - rec_print(ef, rec, foreign->referenced_index); - } - putc('\n', ef); - - mutex_exit(&dict_foreign_err_mutex); -} - -/*********************************************************************//** -Invalidate the query cache for the given table. */ -static -void -row_ins_invalidate_query_cache( -/*===========================*/ - que_thr_t* thr, /*!< in: query thread whose run_node - is an update node */ - const char* name) /*!< in: table name prefixed with - database name and a '/' character */ -{ - char* buf; - char* ptr; - ulint len = strlen(name) + 1; - - buf = mem_strdupl(name, len); - - ptr = strchr(buf, '/'); - ut_a(ptr); - *ptr = '\0'; - - innobase_invalidate_query_cache(thr_get_trx(thr), buf, len); - mem_free(buf); -} -#ifdef WITH_WSREP -dberr_t wsrep_append_foreign_key(trx_t *trx, - dict_foreign_t* foreign, - const rec_t* clust_rec, - dict_index_t* clust_index, - ibool referenced, - ibool shared); -#endif /* WITH_WSREP */ - -/*********************************************************************//** -Perform referential actions or checks when a parent row is deleted or updated -and the constraint had an ON DELETE or ON UPDATE condition which was not -RESTRICT. -@return DB_SUCCESS, DB_LOCK_WAIT, or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_foreign_check_on_constraint( -/*================================*/ - que_thr_t* thr, /*!< in: query thread whose run_node - is an update node */ - dict_foreign_t* foreign, /*!< in: foreign key constraint whose - type is != 0 */ - btr_pcur_t* pcur, /*!< in: cursor placed on a matching - index record in the child table */ - dtuple_t* entry, /*!< in: index entry in the parent - table */ - mtr_t* mtr) /*!< in: mtr holding the latch of pcur - page */ -{ - upd_node_t* node; - upd_node_t* cascade; - dict_table_t* table = foreign->foreign_table; - dict_index_t* index; - dict_index_t* clust_index; - dtuple_t* ref; - mem_heap_t* upd_vec_heap = NULL; - const rec_t* rec; - const rec_t* clust_rec; - const buf_block_t* clust_block; - upd_t* update; - ulint n_to_update; - dberr_t err; - ulint i; - trx_t* trx; - mem_heap_t* tmp_heap = NULL; - doc_id_t doc_id = FTS_NULL_DOC_ID; - ibool fts_col_affacted = FALSE; - - ut_a(thr); - ut_a(foreign); - ut_a(pcur); - ut_a(mtr); - - trx = thr_get_trx(thr); - - /* Since we are going to delete or update a row, we have to invalidate - the MySQL query cache for table. A deadlock of threads is not possible - here because the caller of this function does not hold any latches with - the sync0sync.h rank above the lock_sys_t::mutex. The query cache mutex - has a rank just above the lock_sys_t::mutex. */ - - row_ins_invalidate_query_cache(thr, table->name); - - node = static_cast<upd_node_t*>(thr->run_node); - - if (node->is_delete && 0 == (foreign->type - & (DICT_FOREIGN_ON_DELETE_CASCADE - | DICT_FOREIGN_ON_DELETE_SET_NULL))) { - - row_ins_foreign_report_err("Trying to delete", - thr, foreign, - btr_pcur_get_rec(pcur), entry); - - return(DB_ROW_IS_REFERENCED); - } - - if (!node->is_delete && 0 == (foreign->type - & (DICT_FOREIGN_ON_UPDATE_CASCADE - | DICT_FOREIGN_ON_UPDATE_SET_NULL))) { - - /* This is an UPDATE */ - - row_ins_foreign_report_err("Trying to update", - thr, foreign, - btr_pcur_get_rec(pcur), entry); - - return(DB_ROW_IS_REFERENCED); - } - - if (node->cascade_node == NULL) { - /* Extend our query graph by creating a child to current - update node. The child is used in the cascade or set null - operation. */ - - node->cascade_heap = mem_heap_create(128); - node->cascade_node = row_create_update_node_for_mysql( - table, node->cascade_heap); - que_node_set_parent(node->cascade_node, node); - } - - /* Initialize cascade_node to do the operation we want. Note that we - use the SAME cascade node to do all foreign key operations of the - SQL DELETE: the table of the cascade node may change if there are - several child tables to the table where the delete is done! */ - - cascade = node->cascade_node; - - cascade->table = table; - - cascade->foreign = foreign; - - if (node->is_delete - && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) { - cascade->is_delete = TRUE; - } else { - cascade->is_delete = FALSE; - - if (foreign->n_fields > cascade->update_n_fields) { - /* We have to make the update vector longer */ - - cascade->update = upd_create(foreign->n_fields, - node->cascade_heap); - cascade->update_n_fields = foreign->n_fields; - } - } - - /* We do not allow cyclic cascaded updating (DELETE is allowed, - but not UPDATE) of the same table, as this can lead to an infinite - cycle. Check that we are not updating the same table which is - already being modified in this cascade chain. We have to check - this also because the modification of the indexes of a 'parent' - table may still be incomplete, and we must avoid seeing the indexes - of the parent table in an inconsistent state! */ - - if (!cascade->is_delete - && row_ins_cascade_ancestor_updates_table(cascade, table)) { - - /* We do not know if this would break foreign key - constraints, but play safe and return an error */ - - err = DB_ROW_IS_REFERENCED; - - row_ins_foreign_report_err( - "Trying an update, possibly causing a cyclic" - " cascaded update\n" - "in the child table,", thr, foreign, - btr_pcur_get_rec(pcur), entry); - - goto nonstandard_exit_func; - } - - if (row_ins_cascade_n_ancestors(cascade) >= 15) { - err = DB_ROW_IS_REFERENCED; - - row_ins_foreign_report_err( - "Trying a too deep cascaded delete or update\n", - thr, foreign, btr_pcur_get_rec(pcur), entry); - - goto nonstandard_exit_func; - } - - index = btr_pcur_get_btr_cur(pcur)->index; - - ut_a(index == foreign->foreign_index); - - rec = btr_pcur_get_rec(pcur); - - tmp_heap = mem_heap_create(256); - - if (dict_index_is_clust(index)) { - /* pcur is already positioned in the clustered index of - the child table */ - - clust_index = index; - clust_rec = rec; - clust_block = btr_pcur_get_block(pcur); - } else { - /* We have to look for the record in the clustered index - in the child table */ - - clust_index = dict_table_get_first_index(table); - - ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, - tmp_heap); - btr_pcur_open_with_no_init(clust_index, ref, - PAGE_CUR_LE, BTR_SEARCH_LEAF, - cascade->pcur, 0, mtr); - - clust_rec = btr_pcur_get_rec(cascade->pcur); - clust_block = btr_pcur_get_block(cascade->pcur); - - if (!page_rec_is_user_rec(clust_rec) - || btr_pcur_get_low_match(cascade->pcur) - < dict_index_get_n_unique(clust_index)) { - - fputs("InnoDB: error in cascade of a foreign key op\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, rec, index); - fputs("\n" - "InnoDB: clustered record ", stderr); - rec_print(stderr, clust_rec, clust_index); - fputs("\n" - "InnoDB: Submit a detailed bug report to" - " http://bugs.mysql.com\n", stderr); - ut_ad(0); - err = DB_SUCCESS; - - goto nonstandard_exit_func; - } - } - - /* Set an X-lock on the row to delete or update in the child table */ - - err = lock_table(0, table, LOCK_IX, thr); - - if (err == DB_SUCCESS) { - /* Here it suffices to use a LOCK_REC_NOT_GAP type lock; - we already have a normal shared lock on the appropriate - gap if the search criterion was not unique */ - - err = lock_clust_rec_read_check_and_lock_alt( - 0, clust_block, clust_rec, clust_index, - LOCK_X, LOCK_REC_NOT_GAP, thr); - } - - if (err != DB_SUCCESS) { - - goto nonstandard_exit_func; - } - - if (rec_get_deleted_flag(clust_rec, dict_table_is_comp(table))) { - /* This can happen if there is a circular reference of - rows such that cascading delete comes to delete a row - already in the process of being delete marked */ - err = DB_SUCCESS; - - goto nonstandard_exit_func; - } - - if (table->fts) { - doc_id = fts_get_doc_id_from_rec(table, clust_rec, tmp_heap); - } - - if (node->is_delete - ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) - : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) { - - /* Build the appropriate update vector which sets - foreign->n_fields first fields in rec to SQL NULL */ - - update = cascade->update; - - update->info_bits = 0; - update->n_fields = foreign->n_fields; - UNIV_MEM_INVALID(update->fields, - update->n_fields * sizeof *update->fields); - - for (i = 0; i < foreign->n_fields; i++) { - upd_field_t* ufield = &update->fields[i]; - - ufield->field_no = dict_table_get_nth_col_pos( - table, - dict_index_get_nth_col_no(index, i)); - ufield->orig_len = 0; - ufield->exp = NULL; - dfield_set_null(&ufield->new_val); - - if (table->fts && dict_table_is_fts_column( - table->fts->indexes, - dict_index_get_nth_col_no(index, i)) - != ULINT_UNDEFINED) { - fts_col_affacted = TRUE; - } - } - - if (fts_col_affacted) { - fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL); - } - } else if (table->fts && cascade->is_delete) { - /* DICT_FOREIGN_ON_DELETE_CASCADE case */ - for (i = 0; i < foreign->n_fields; i++) { - if (table->fts && dict_table_is_fts_column( - table->fts->indexes, - dict_index_get_nth_col_no(index, i)) - != ULINT_UNDEFINED) { - fts_col_affacted = TRUE; - } - } - - if (fts_col_affacted) { - fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL); - } - } - - if (!node->is_delete - && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) { - - /* Build the appropriate update vector which sets changing - foreign->n_fields first fields in rec to new values */ - - upd_vec_heap = mem_heap_create(256); - - n_to_update = row_ins_cascade_calc_update_vec( - node, foreign, upd_vec_heap, trx, &fts_col_affacted); - - if (n_to_update == ULINT_UNDEFINED) { - err = DB_ROW_IS_REFERENCED; - - row_ins_foreign_report_err( - "Trying a cascaded update where the" - " updated value in the child\n" - "table would not fit in the length" - " of the column, or the value would\n" - "be NULL and the column is" - " declared as not NULL in the child table,", - thr, foreign, btr_pcur_get_rec(pcur), entry); - - goto nonstandard_exit_func; - } - - if (cascade->update->n_fields == 0) { - - /* The update does not change any columns referred - to in this foreign key constraint: no need to do - anything */ - - err = DB_SUCCESS; - - goto nonstandard_exit_func; - } - - /* Mark the old Doc ID as deleted */ - if (fts_col_affacted) { - ut_ad(table->fts); - fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL); - } - } - - /* Store pcur position and initialize or store the cascade node - pcur stored position */ - - btr_pcur_store_position(pcur, mtr); - - if (index == clust_index) { - btr_pcur_copy_stored_position(cascade->pcur, pcur); - } else { - btr_pcur_store_position(cascade->pcur, mtr); - } - - mtr_commit(mtr); - - ut_a(cascade->pcur->rel_pos == BTR_PCUR_ON); - - cascade->state = UPD_NODE_UPDATE_CLUSTERED; - -#ifdef WITH_WSREP - err = wsrep_append_foreign_key( - thr_get_trx(thr), - foreign, - clust_rec, - clust_index, - FALSE, FALSE); - if (err != DB_SUCCESS) { - fprintf(stderr, - "WSREP: foreign key append failed: %d\n", err); - } else -#endif /* WITH_WSREP */ - err = row_update_cascade_for_mysql(thr, cascade, - foreign->foreign_table); - - if (foreign->foreign_table->n_foreign_key_checks_running == 0) { - fprintf(stderr, - "InnoDB: error: table %s has the counter 0" - " though there is\n" - "InnoDB: a FOREIGN KEY check running on it.\n", - foreign->foreign_table->name); - } - - /* Release the data dictionary latch for a while, so that we do not - starve other threads from doing CREATE TABLE etc. if we have a huge - cascaded operation running. The counter n_foreign_key_checks_running - will prevent other users from dropping or ALTERing the table when we - release the latch. */ - - row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); - - DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze"); - - row_mysql_freeze_data_dictionary(thr_get_trx(thr)); - - mtr_start_trx(mtr, trx); - - /* Restore pcur position */ - - btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); - - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - if (upd_vec_heap) { - mem_heap_free(upd_vec_heap); - } - - return(err); - -nonstandard_exit_func: - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - if (upd_vec_heap) { - mem_heap_free(upd_vec_heap); - } - - btr_pcur_store_position(pcur, mtr); - - mtr_commit(mtr); - mtr_start_trx(mtr, trx); - - btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); - - return(err); -} - -/*********************************************************************//** -Sets a shared lock on a record. Used in locking possible duplicate key -records and also in checking foreign key constraints. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ -static -dberr_t -row_ins_set_shared_rec_lock( -/*========================*/ - ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP type lock */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (dict_index_is_clust(index)) { - err = lock_clust_rec_read_check_and_lock( - 0, block, rec, index, offsets, LOCK_S, type, thr); - } else { - err = lock_sec_rec_read_check_and_lock( - 0, block, rec, index, offsets, LOCK_S, type, thr); - } - - return(err); -} - -/*********************************************************************//** -Sets a exclusive lock on a record. Used in locking possible duplicate key -records -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ -static -dberr_t -row_ins_set_exclusive_rec_lock( -/*===========================*/ - ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP type lock */ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (dict_index_is_clust(index)) { - err = lock_clust_rec_read_check_and_lock( - 0, block, rec, index, offsets, LOCK_X, type, thr); - } else { - err = lock_sec_rec_read_check_and_lock( - 0, block, rec, index, offsets, LOCK_X, type, thr); - } - - return(err); -} - -/***************************************************************//** -Checks if foreign key constraint fails for an index entry. Sets shared locks -which lock either the success or the failure of the constraint. NOTE that -the caller must have a shared latch on dict_operation_lock. -@return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */ -UNIV_INTERN -dberr_t -row_ins_check_foreign_constraint( -/*=============================*/ - ibool check_ref,/*!< in: TRUE if we want to check that - the referenced table is ok, FALSE if we - want to check the foreign key table */ - dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the - tables mentioned in it must be in the - dictionary cache if they exist at all */ - dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign - table, else the referenced table */ - dtuple_t* entry, /*!< in: index entry for index */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - upd_node_t* upd_node; - dict_table_t* check_table; - dict_index_t* check_index; - ulint n_fields_cmp; - btr_pcur_t pcur; - int cmp; - ulint i; - mtr_t mtr; - trx_t* trx = thr_get_trx(thr); - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - -run_again: -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - err = DB_SUCCESS; - - if (trx->check_foreigns == FALSE) { - /* The user has suppressed foreign key checks currently for - this session */ - goto exit_func; - } - - /* If any of the foreign key fields in entry is SQL NULL, we - suppress the foreign key check: this is compatible with Oracle, - for example */ - - for (i = 0; i < foreign->n_fields; i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(entry, i))) { - - goto exit_func; - } - } - - if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) { - upd_node = static_cast<upd_node_t*>(thr->run_node); - - if (!(upd_node->is_delete) && upd_node->foreign == foreign) { - /* If a cascaded update is done as defined by a - foreign key constraint, do not check that - constraint for the child row. In ON UPDATE CASCADE - the update of the parent row is only half done when - we come here: if we would check the constraint here - for the child row it would fail. - - A QUESTION remains: if in the child table there are - several constraints which refer to the same parent - table, we should merge all updates to the child as - one update? And the updates can be contradictory! - Currently we just perform the update associated - with each foreign key constraint, one after - another, and the user has problems predicting in - which order they are performed. */ - - goto exit_func; - } - } - - if (check_ref) { - check_table = foreign->referenced_table; - check_index = foreign->referenced_index; - } else { - check_table = foreign->foreign_table; - check_index = foreign->foreign_index; - } - - if (check_table == NULL - || check_table->file_unreadable - || check_index == NULL) { - - if (!srv_read_only_mode && check_ref) { - FILE* ef = dict_foreign_err_file; - std::string fk_str; - - row_ins_set_detailed(trx, foreign); - - row_ins_foreign_trx_print(trx); - - fputs("Foreign key constraint fails for table ", ef); - ut_print_name(ef, trx, TRUE, - foreign->foreign_table_name); - fputs(":\n", ef); - fk_str = dict_print_info_on_foreign_key_in_create_format( - trx, foreign, TRUE); - fputs(fk_str.c_str(), ef); - fputs("\nTrying to add to index ", ef); - ut_print_name(ef, trx, FALSE, - foreign->foreign_index->name); - fputs(" tuple:\n", ef); - dtuple_print(ef, entry); - fputs("\nBut the parent table ", ef); - ut_print_name(ef, trx, TRUE, - foreign->referenced_table_name); - fputs("\nor its .ibd file does" - " not currently exist!\n", ef); - mutex_exit(&dict_foreign_err_mutex); - - err = DB_NO_REFERENCED_ROW; - } - - goto exit_func; - } - - if (check_table != table) { - /* We already have a LOCK_IX on table, but not necessarily - on check_table */ - - err = lock_table(0, check_table, LOCK_IS, thr); - - if (err != DB_SUCCESS) { - - goto do_possible_lock_wait; - } - } - - mtr_start_trx(&mtr, trx); - - /* Store old value on n_fields_cmp */ - - n_fields_cmp = dtuple_get_n_fields_cmp(entry); - - dtuple_set_n_fields_cmp(entry, foreign->n_fields); - - btr_pcur_open(check_index, entry, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - - /* Scan index records and check if there is a matching record */ - - do { - const rec_t* rec = btr_pcur_get_rec(&pcur); - const buf_block_t* block = btr_pcur_get_block(&pcur); - - SRV_CORRUPT_TABLE_CHECK(block, - { - err = DB_CORRUPTION; - goto exit_loop; - }); - - if (page_rec_is_infimum(rec)) { - - continue; - } - - offsets = rec_get_offsets(rec, check_index, - offsets, ULINT_UNDEFINED, &heap); - - if (page_rec_is_supremum(rec)) { - - err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block, - rec, check_index, - offsets, thr); - switch (err) { - case DB_SUCCESS_LOCKED_REC: - case DB_SUCCESS: - continue; - default: - goto end_scan; - } - } - - cmp = cmp_dtuple_rec(entry, rec, offsets); - - if (cmp == 0) { - if (rec_get_deleted_flag(rec, - rec_offs_comp(offsets))) { - err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, block, - rec, check_index, offsets, thr); - switch (err) { - case DB_SUCCESS_LOCKED_REC: - case DB_SUCCESS: - break; - default: - goto end_scan; - } - } else { - /* Found a matching record. Lock only - a record because we can allow inserts - into gaps */ - - err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP, block, - rec, check_index, offsets, thr); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - case DB_SUCCESS: - break; - default: - goto end_scan; - } - - if (check_ref) { - err = DB_SUCCESS; -#ifdef WITH_WSREP - err = wsrep_append_foreign_key( - thr_get_trx(thr), - foreign, - rec, - check_index, - check_ref, TRUE); -#endif /* WITH_WSREP */ - goto end_scan; - } else if (foreign->type != 0) { - /* There is an ON UPDATE or ON DELETE - condition: check them in a separate - function */ - - err = row_ins_foreign_check_on_constraint( - thr, foreign, &pcur, entry, - &mtr); - if (err != DB_SUCCESS) { - /* Since reporting a plain - "duplicate key" error - message to the user in - cases where a long CASCADE - operation would lead to a - duplicate key in some - other table is very - confusing, map duplicate - key errors resulting from - FK constraints to a - separate error code. */ - - if (err == DB_DUPLICATE_KEY) { - err = DB_FOREIGN_DUPLICATE_KEY; - } - - goto end_scan; - } - - /* row_ins_foreign_check_on_constraint - may have repositioned pcur on a - different block */ - block = btr_pcur_get_block(&pcur); - } else { - row_ins_foreign_report_err( - "Trying to delete or update", - thr, foreign, rec, entry); - - err = DB_ROW_IS_REFERENCED; - goto end_scan; - } - } - } else { - ut_a(cmp < 0); - - err = row_ins_set_shared_rec_lock( - LOCK_GAP, block, - rec, check_index, offsets, thr); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - case DB_SUCCESS: - if (check_ref) { - err = DB_NO_REFERENCED_ROW; - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; - } - default: - break; - } - - goto end_scan; - } - } while (btr_pcur_move_to_next(&pcur, &mtr)); - -exit_loop: - if (check_ref) { - row_ins_foreign_report_add_err( - trx, foreign, btr_pcur_get_rec(&pcur), entry); - err = DB_NO_REFERENCED_ROW; - } else { - err = DB_SUCCESS; - } - -end_scan: - btr_pcur_close(&pcur); - - mtr_commit(&mtr); - - /* Restore old value */ - dtuple_set_n_fields_cmp(entry, n_fields_cmp); - -do_possible_lock_wait: - if (err == DB_LOCK_WAIT) { - bool verified = false; - - trx->error_state = err; - - que_thr_stop_for_mysql(thr); - - lock_wait_suspend_thread(thr); - - if (check_table->to_be_dropped) { - /* The table is being dropped. We shall timeout - this operation */ - err = DB_LOCK_WAIT_TIMEOUT; - goto exit_func; - } - - /* We had temporarily released dict_operation_lock in - above lock sleep wait, now we have the lock again, and - we will need to re-check whether the foreign key has been - dropped. We only need to verify if the table is referenced - table case (check_ref == 0), since MDL lock will prevent - concurrent DDL and DML on the same table */ - if (!check_ref) { - for (dict_foreign_set::iterator it - = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - if (*it == foreign) { - verified = true; - break; - } - } - } else { - verified = true; - } - - if (!verified) { - err = DB_DICT_CHANGED; - } else if (trx->error_state == DB_SUCCESS) { - goto run_again; - } else { - err = trx->error_state; - } - } - -exit_func: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - if (UNIV_UNLIKELY(trx->fake_changes)) { - err = DB_SUCCESS; - } - - return(err); -} - -/***************************************************************//** -Checks if foreign key constraints fail for an index entry. If index -is not mentioned in any constraint, this function does nothing, -Otherwise does searches to the indexes of referenced tables and -sets shared locks which lock either the success or the failure of -a constraint. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_check_foreign_constraints( -/*==============================*/ - dict_table_t* table, /*!< in: table */ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry for index */ - que_thr_t* thr) /*!< in: query thread */ -{ - dict_foreign_t* foreign; - dberr_t err; - trx_t* trx; - ibool got_s_lock = FALSE; - - trx = thr_get_trx(thr); - - DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd, - "foreign_constraint_check_for_ins"); - - for (dict_foreign_set::iterator it = table->foreign_set.begin(); - it != table->foreign_set.end(); - ++it) { - - foreign = *it; - - if (foreign->foreign_index == index) { - dict_table_t* ref_table = NULL; - dict_table_t* foreign_table = foreign->foreign_table; - dict_table_t* referenced_table - = foreign->referenced_table; - - if (referenced_table == NULL) { - - ref_table = dict_table_open_on_name( - foreign->referenced_table_name_lookup, - FALSE, FALSE, DICT_ERR_IGNORE_NONE); - } - - if (0 == trx->dict_operation_lock_mode) { - got_s_lock = TRUE; - - row_mysql_freeze_data_dictionary(trx); - } - - if (referenced_table) { - os_inc_counter(dict_sys->mutex, - foreign_table - ->n_foreign_key_checks_running); - } - - /* NOTE that if the thread ends up waiting for a lock - we will release dict_operation_lock temporarily! - But the counter on the table protects the referenced - table from being dropped while the check is running. */ - - err = row_ins_check_foreign_constraint( - TRUE, foreign, table, entry, thr); - - DBUG_EXECUTE_IF("row_ins_dict_change_err", - err = DB_DICT_CHANGED;); - - if (referenced_table) { - os_dec_counter(dict_sys->mutex, - foreign_table - ->n_foreign_key_checks_running); - } - - if (got_s_lock) { - row_mysql_unfreeze_data_dictionary(trx); - } - - if (ref_table != NULL) { - dict_table_close(ref_table, FALSE, FALSE); - } - - if (err != DB_SUCCESS) { - - return(err); - } - } - } - - return(DB_SUCCESS); -} - -/***************************************************************//** -Checks if a unique key violation to rec would occur at the index entry -insert. -@return TRUE if error */ -static -ibool -row_ins_dupl_error_with_rec( -/*========================*/ - const rec_t* rec, /*!< in: user record; NOTE that we assume - that the caller already has a record lock on - the record! */ - const dtuple_t* entry, /*!< in: entry to insert */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - ulint matched_fields; - ulint matched_bytes; - ulint n_unique; - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - n_unique = dict_index_get_n_unique(index); - - matched_fields = 0; - matched_bytes = 0; - - cmp_dtuple_rec_with_match(entry, rec, offsets, - &matched_fields, &matched_bytes); - - if (matched_fields < n_unique) { - - return(FALSE); - } - - /* In a unique secondary index we allow equal key values if they - contain SQL NULLs */ - - if (!dict_index_is_clust(index)) { - - for (i = 0; i < n_unique; i++) { - if (dfield_is_null(dtuple_get_nth_field(entry, i))) { - - return(FALSE); - } - } - } - - return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); -} - -/***************************************************************//** -Scans a unique non-clustered index at a given index entry to determine -whether a uniqueness violation has occurred for the key value of the entry. -Set shared locks on possible duplicate records. -@return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_scan_sec_index_for_duplicate( -/*=================================*/ - ulint flags, /*!< in: undo logging and locking flags */ - dict_index_t* index, /*!< in: non-clustered unique index */ - dtuple_t* entry, /*!< in: index entry */ - que_thr_t* thr, /*!< in: query thread */ - bool s_latch,/*!< in: whether index->lock is being held */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - mem_heap_t* offsets_heap) - /*!< in/out: memory heap that can be emptied */ -{ - ulint n_unique; - int cmp; - ulint n_fields_cmp; - btr_pcur_t pcur; - dberr_t err = DB_SUCCESS; - ulint allow_duplicates; - ulint* offsets = NULL; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(s_latch == rw_lock_own(&index->lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - n_unique = dict_index_get_n_unique(index); - - /* If the secondary index is unique, but one of the fields in the - n_unique first fields is NULL, a unique key violation cannot occur, - since we define NULL != NULL in this case */ - - for (ulint i = 0; i < n_unique; i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(entry, i))) { - - return(DB_SUCCESS); - } - } - - /* Store old value on n_fields_cmp */ - - n_fields_cmp = dtuple_get_n_fields_cmp(entry); - - dtuple_set_n_fields_cmp(entry, n_unique); - - btr_pcur_open(index, entry, PAGE_CUR_GE, - s_latch - ? BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED - : BTR_SEARCH_LEAF, - &pcur, mtr); - - allow_duplicates = thr_get_trx(thr)->duplicates; - - /* Scan index records and check if there is a duplicate */ - - do { - const rec_t* rec = btr_pcur_get_rec(&pcur); - const buf_block_t* block = btr_pcur_get_block(&pcur); - const ulint lock_type = LOCK_ORDINARY; - - if (page_rec_is_infimum(rec)) { - - continue; - } - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &offsets_heap); - - if (flags & BTR_NO_LOCKING_FLAG) { - /* Set no locks when applying log - in online table rebuild. */ - } else if (allow_duplicates) { - - /* If the SQL-query will update or replace - duplicate key we will take X-lock for - duplicates ( REPLACE, LOAD DATAFILE REPLACE, - INSERT ON DUPLICATE KEY UPDATE). */ - - err = row_ins_set_exclusive_rec_lock( - lock_type, block, rec, index, offsets, thr); - } else { - - err = row_ins_set_shared_rec_lock( - lock_type, block, rec, index, offsets, thr); - } - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - err = DB_SUCCESS; - case DB_SUCCESS: - break; - default: - goto end_scan; - } - - if (page_rec_is_supremum(rec)) { - - continue; - } - - cmp = cmp_dtuple_rec(entry, rec, offsets); - - if (cmp == 0) { - if (row_ins_dupl_error_with_rec(rec, entry, - index, offsets)) { - err = DB_DUPLICATE_KEY; - - thr_get_trx(thr)->error_info = index; - - /* If the duplicate is on hidden FTS_DOC_ID, - state so in the error log */ - if (DICT_TF2_FLAG_IS_SET( - index->table, - DICT_TF2_FTS_HAS_DOC_ID) - && strcmp(index->name, - FTS_DOC_ID_INDEX_NAME) == 0) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Duplicate FTS_DOC_ID value" - " on table %s", - index->table->name); - } - - goto end_scan; - } - } else { - ut_a(cmp < 0); - goto end_scan; - } - } while (btr_pcur_move_to_next(&pcur, mtr)); - -end_scan: - /* Restore old value */ - dtuple_set_n_fields_cmp(entry, n_fields_cmp); - - return(err); -} - -/** Checks for a duplicate when the table is being rebuilt online. -@retval DB_SUCCESS when no duplicate is detected -@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or -a newer version of entry (the entry should not be inserted) -@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_duplicate_online( -/*=====================*/ - ulint n_uniq, /*!< in: offset of DB_TRX_ID */ - const dtuple_t* entry, /*!< in: entry that is being inserted */ - const rec_t* rec, /*!< in: clustered index record */ - ulint* offsets)/*!< in/out: rec_get_offsets(rec) */ -{ - ulint fields = 0; - ulint bytes = 0; - - /* During rebuild, there should not be any delete-marked rows - in the new table. */ - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - ut_ad(dtuple_get_n_fields_cmp(entry) == n_uniq); - - /* Compare the PRIMARY KEY fields and the - DB_TRX_ID, DB_ROLL_PTR. */ - cmp_dtuple_rec_with_match_low( - entry, rec, offsets, n_uniq + 2, &fields, &bytes); - - if (fields < n_uniq) { - /* Not a duplicate. */ - return(DB_SUCCESS); - } - - if (fields == n_uniq + 2) { - /* rec is an exact match of entry. */ - ut_ad(bytes == 0); - return(DB_SUCCESS_LOCKED_REC); - } - - return(DB_DUPLICATE_KEY); -} - -/** Checks for a duplicate when the table is being rebuilt online. -@retval DB_SUCCESS when no duplicate is detected -@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or -a newer version of entry (the entry should not be inserted) -@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_duplicate_error_in_clust_online( -/*====================================*/ - ulint n_uniq, /*!< in: offset of DB_TRX_ID */ - const dtuple_t* entry, /*!< in: entry that is being inserted */ - const btr_cur_t*cursor, /*!< in: cursor on insert position */ - ulint** offsets,/*!< in/out: rec_get_offsets(rec) */ - mem_heap_t** heap) /*!< in/out: heap for offsets */ -{ - dberr_t err = DB_SUCCESS; - const rec_t* rec = btr_cur_get_rec(cursor); - - if (cursor->low_match >= n_uniq && !page_rec_is_infimum(rec)) { - *offsets = rec_get_offsets(rec, cursor->index, *offsets, - ULINT_UNDEFINED, heap); - err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets); - if (err != DB_SUCCESS) { - return(err); - } - } - - rec = page_rec_get_next_const(btr_cur_get_rec(cursor)); - - if (cursor->up_match >= n_uniq && !page_rec_is_supremum(rec)) { - *offsets = rec_get_offsets(rec, cursor->index, *offsets, - ULINT_UNDEFINED, heap); - err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets); - } - - return(err); -} - -/***************************************************************//** -Checks if a unique key violation error would occur at an index entry -insert. Sets shared locks on possible duplicate records. Works only -for a clustered index! -@retval DB_SUCCESS if no error -@retval DB_DUPLICATE_KEY if error, -@retval DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate -record */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_duplicate_error_in_clust( - btr_cur_t* cursor, /*!< in: B-tree cursor */ - const dtuple_t* entry, /*!< in: entry to insert */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in: mtr */ -{ - dberr_t err; - rec_t* rec; - ulint n_unique; - trx_t* trx = thr_get_trx(thr); - mem_heap_t*heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - UT_NOT_USED(mtr); - - ut_ad(dict_index_is_clust(cursor->index)); - - /* NOTE: For unique non-clustered indexes there may be any number - of delete marked records with the same value for the non-clustered - index key (remember multiversioning), and which differ only in - the row refererence part of the index record, containing the - clustered index key fields. For such a secondary index record, - to avoid race condition, we must FIRST do the insertion and after - that check that the uniqueness condition is not breached! */ - - /* NOTE: A problem is that in the B-tree node pointers on an - upper level may match more to the entry than the actual existing - user records on the leaf level. So, even if low_match would suggest - that a duplicate key violation may occur, this may not be the case. */ - - n_unique = dict_index_get_n_unique(cursor->index); - - if (cursor->low_match >= n_unique) { - - rec = btr_cur_get_rec(cursor); - - if (!page_rec_is_infimum(rec)) { - offsets = rec_get_offsets(rec, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - - /* We set a lock on the possible duplicate: this - is needed in logical logging of MySQL to make - sure that in roll-forward we get the same duplicate - errors as in original execution */ - - if (trx->duplicates) { - - /* If the SQL-query will update or replace - duplicate key we will take X-lock for - duplicates ( REPLACE, LOAD DATAFILE REPLACE, - INSERT ON DUPLICATE KEY UPDATE). */ - - err = row_ins_set_exclusive_rec_lock( - LOCK_REC_NOT_GAP, - btr_cur_get_block(cursor), - rec, cursor->index, offsets, thr); - } else { - - err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP, - btr_cur_get_block(cursor), rec, - cursor->index, offsets, thr); - } - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - case DB_SUCCESS: - break; - default: - goto func_exit; - } - - if (row_ins_dupl_error_with_rec( - rec, entry, cursor->index, offsets)) { -duplicate: - trx->error_info = cursor->index; - err = DB_DUPLICATE_KEY; - goto func_exit; - } - } - } - - if (cursor->up_match >= n_unique) { - - rec = page_rec_get_next(btr_cur_get_rec(cursor)); - - if (!page_rec_is_supremum(rec)) { - offsets = rec_get_offsets(rec, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - - if (trx->duplicates) { - - /* If the SQL-query will update or replace - duplicate key we will take X-lock for - duplicates ( REPLACE, LOAD DATAFILE REPLACE, - INSERT ON DUPLICATE KEY UPDATE). */ - - err = row_ins_set_exclusive_rec_lock( - LOCK_REC_NOT_GAP, - btr_cur_get_block(cursor), - rec, cursor->index, offsets, thr); - } else { - - err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP, - btr_cur_get_block(cursor), - rec, cursor->index, offsets, thr); - } - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - case DB_SUCCESS: - break; - default: - goto func_exit; - } - - if (row_ins_dupl_error_with_rec( - rec, entry, cursor->index, offsets)) { - goto duplicate; - } - } - - /* This should never happen */ - ut_error; - } - - err = DB_SUCCESS; -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -} - -/***************************************************************//** -Checks if an index entry has long enough common prefix with an -existing record so that the intended insert of the entry must be -changed to a modify of the existing record. In the case of a clustered -index, the prefix must be n_unique fields long. In the case of a -secondary index, all fields must be equal. InnoDB never updates -secondary index records in place, other than clearing or setting the -delete-mark flag. We could be able to update the non-unique fields -of a unique secondary index record by checking the cursor->up_match, -but we do not do so, because it could have some locking implications. -@return TRUE if the existing record should be updated; FALSE if not */ -UNIV_INLINE -ibool -row_ins_must_modify_rec( -/*====================*/ - const btr_cur_t* cursor) /*!< in: B-tree cursor */ -{ - /* NOTE: (compare to the note in row_ins_duplicate_error_in_clust) - Because node pointers on upper levels of the B-tree may match more - to entry than to actual user records on the leaf level, we - have to check if the candidate record is actually a user record. - A clustered index node pointer contains index->n_unique first fields, - and a secondary index node pointer contains all index fields. */ - - return(cursor->low_match - >= dict_index_get_n_unique_in_tree(cursor->index) - && !page_rec_is_infimum(btr_cur_get_rec(cursor))); -} - -/***************************************************************//** -Tries to insert an entry into a clustered index, ignoring foreign key -constraints. If a record with the same unique key is found, the other -record is necessarily marked deleted by a committed transaction, or a -unique key violation error occurs. The delete marked record is then -updated to an existing record, and we must write an undo log record on -the delete marked record. -@retval DB_SUCCESS on success -@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG) -@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed -@return error code */ -UNIV_INTERN -dberr_t -row_ins_clust_index_entry_low( -/*==========================*/ - ulint flags, /*!< in: undo logging and locking flags */ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether we wish optimistic or - pessimistic descent down the index tree */ - dict_index_t* index, /*!< in: clustered index */ - ulint n_uniq, /*!< in: 0 or index->n_uniq */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr) /*!< in: query thread */ -{ - btr_cur_t cursor; - ulint* offsets = NULL; - dberr_t err = DB_SUCCESS; - big_rec_t* big_rec = NULL; - mtr_t mtr; - mem_heap_t* offsets_heap = NULL; - ulint search_mode; - - ut_ad(dict_index_is_clust(index)); - ut_ad(!dict_index_is_unique(index) - || n_uniq == dict_index_get_n_unique(index)); - ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index)); - - /* If running with fake_changes mode on then switch from modify to - search so that code takes only s-latch and not x-latch. - For dry-run (fake-changes) s-latch is acceptable. Taking x-latch will - make it more restrictive and will block real changes/workflow. */ - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - search_mode = (mode & BTR_MODIFY_TREE) - ? BTR_SEARCH_TREE : BTR_SEARCH_LEAF; - } else { - search_mode = mode; - } - - mtr_start_trx(&mtr, thr_get_trx(thr)); - - if (mode == BTR_MODIFY_LEAF && dict_index_is_online_ddl(index)) { - - /* We really don't need to OR mode but will leave it for - code consistency. */ - mode |= BTR_ALREADY_S_LATCHED; - search_mode |= BTR_ALREADY_S_LATCHED; - - mtr_s_lock(dict_index_get_lock(index), &mtr); - } - - cursor.thr = thr; - - /* Note that we use PAGE_CUR_LE as the search mode, because then - the function will return in both low_match and up_match of the - cursor sensible values */ - - err = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, search_mode, - &cursor, 0, __FILE__, __LINE__, &mtr); - - if (err != DB_SUCCESS) { - index->table->file_unreadable = true; - mtr_commit(&mtr); - goto func_exit; - } - -#ifdef UNIV_DEBUG - { - page_t* page = btr_cur_get_page(&cursor); - rec_t* first_rec = page_rec_get_next( - page_get_infimum_rec(page)); - - ut_ad(page_rec_is_supremum(first_rec) - || rec_get_n_fields(first_rec, index) - == dtuple_get_n_fields(entry)); - } -#endif - - if (n_uniq && (cursor.up_match >= n_uniq - || cursor.low_match >= n_uniq)) { - - if (flags - == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG)) { - /* Set no locks when applying log - in online table rebuild. Only check for duplicates. */ - err = row_ins_duplicate_error_in_clust_online( - n_uniq, entry, &cursor, - &offsets, &offsets_heap); - - switch (err) { - case DB_SUCCESS: - break; - default: - ut_ad(0); - /* fall through */ - case DB_SUCCESS_LOCKED_REC: - case DB_DUPLICATE_KEY: - thr_get_trx(thr)->error_info = cursor.index; - } - } else { - /* Note that the following may return also - DB_LOCK_WAIT */ - - err = row_ins_duplicate_error_in_clust( - &cursor, entry, thr, &mtr); - } - - if (err != DB_SUCCESS) { -err_exit: - mtr_commit(&mtr); - goto func_exit; - } - } - - if (row_ins_must_modify_rec(&cursor)) { - /* There is already an index entry with a long enough common - prefix, we must convert the insert into a modify of an - existing record */ - mem_heap_t* entry_heap = mem_heap_create(1024); - - err = row_ins_clust_index_entry_by_modify( - flags, mode, &cursor, &offsets, &offsets_heap, - entry_heap, &big_rec, entry, thr, &mtr); - - rec_t* rec = btr_cur_get_rec(&cursor); - - if (big_rec && UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)) { - ut_a(err == DB_SUCCESS); - /* Write out the externally stored - columns while still x-latching - index->lock and block->lock. Allocate - pages for big_rec in the mtr that - modified the B-tree, but be sure to skip - any pages that were freed in mtr. We will - write out the big_rec pages before - committing the B-tree mini-transaction. If - the system crashes so that crash recovery - will not replay the mtr_commit(&mtr), the - big_rec pages will be left orphaned until - the pages are allocated for something else. - - TODO: If the allocation extends the - tablespace, it will not be redo - logged, in either mini-transaction. - Tablespace extension should be - redo-logged in the big_rec - mini-transaction, so that recovery - will not fail when the big_rec was - written to the extended portion of the - file, in case the file was somehow - truncated in the crash. */ - - DEBUG_SYNC_C_IF_THD( - thr_get_trx(thr)->mysql_thd, - "before_row_ins_upd_extern"); - err = btr_store_big_rec_extern_fields( - index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr, - BTR_STORE_INSERT_UPDATE); - DEBUG_SYNC_C_IF_THD( - thr_get_trx(thr)->mysql_thd, - "after_row_ins_upd_extern"); - /* If writing big_rec fails (for - example, because of DB_OUT_OF_FILE_SPACE), - the record will be corrupted. Even if - we did not update any externally - stored columns, our update could cause - the record to grow so that a - non-updated column was selected for - external storage. This non-update - would not have been written to the - undo log, and thus the record cannot - be rolled back. - - However, because we have not executed - mtr_commit(mtr) yet, the update will - not be replayed in crash recovery, and - the following assertion failure will - effectively "roll back" the operation. */ - ut_a(err == DB_SUCCESS); - dtuple_big_rec_free(big_rec); - } else if (big_rec != NULL - && UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - dtuple_big_rec_free(big_rec); - } - - if (err == DB_SUCCESS - && dict_index_is_online_ddl(index) - && UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)) { - row_log_table_insert(rec, index, offsets); - } - - mtr_commit(&mtr); - mem_heap_free(entry_heap); - } else { - rec_t* insert_rec; - - if (mode != BTR_MODIFY_TREE) { - ut_ad(((mode & ~BTR_ALREADY_S_LATCHED) - == BTR_MODIFY_LEAF) - || thr_get_trx(thr)->fake_changes); - err = btr_cur_optimistic_insert( - flags, &cursor, &offsets, &offsets_heap, - entry, &insert_rec, &big_rec, - n_ext, thr, &mtr); - } else { - if (buf_LRU_buf_pool_running_out()) { - - err = DB_LOCK_TABLE_FULL; - goto err_exit; - } - - err = btr_cur_optimistic_insert( - flags, &cursor, - &offsets, &offsets_heap, - entry, &insert_rec, &big_rec, - n_ext, thr, &mtr); - - if (err == DB_FAIL) { - err = btr_cur_pessimistic_insert( - flags, &cursor, - &offsets, &offsets_heap, - entry, &insert_rec, &big_rec, - n_ext, thr, &mtr); - } - } - - if (UNIV_LIKELY_NULL(big_rec)) { - mtr_commit(&mtr); - - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - - dtuple_convert_back_big_rec( - index, entry, big_rec); - goto func_exit; - } - - /* Online table rebuild could read (and - ignore) the incomplete record at this point. - If online rebuild is in progress, the - row_ins_index_entry_big_rec() will write log. */ - - DBUG_EXECUTE_IF( - "row_ins_extern_checkpoint", - log_make_checkpoint_at( - LSN_MAX, TRUE);); - err = row_ins_index_entry_big_rec( - entry, big_rec, offsets, &offsets_heap, index, - thr_get_trx(thr)->mysql_thd, - __FILE__, __LINE__); - dtuple_convert_back_big_rec(index, entry, big_rec); - } else { - if (err == DB_SUCCESS - && dict_index_is_online_ddl(index) - && !UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - row_log_table_insert( - insert_rec, index, offsets); - } - - mtr_commit(&mtr); - } - } - -func_exit: - if (offsets_heap) { - mem_heap_free(offsets_heap); - } - - return(err); -} - -/***************************************************************//** -Starts a mini-transaction and checks if the index will be dropped. -@return true if the index is to be dropped */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -row_ins_sec_mtr_start_trx_and_check_if_aborted( -/*=======================================*/ - mtr_t* mtr, /*!< out: mini-transaction */ - trx_t* trx, /*!< in: transaction handle */ - dict_index_t* index, /*!< in/out: secondary index */ - bool check, /*!< in: whether to check */ - ulint search_mode) - /*!< in: flags */ -{ - ut_ad(!dict_index_is_clust(index)); - - mtr_start_trx(mtr, trx); - - if (!check) { - return(false); - } - - if (search_mode & BTR_ALREADY_S_LATCHED) { - mtr_s_lock(dict_index_get_lock(index), mtr); - } else { - mtr_x_lock(dict_index_get_lock(index), mtr); - } - - switch (index->online_status) { - case ONLINE_INDEX_ABORTED: - case ONLINE_INDEX_ABORTED_DROPPED: - ut_ad(*index->name == TEMP_INDEX_PREFIX); - return(true); - case ONLINE_INDEX_COMPLETE: - return(false); - case ONLINE_INDEX_CREATION: - break; - } - - ut_error; - return(true); -} - -/***************************************************************//** -Tries to insert an entry into a secondary index. If a record with exactly the -same fields is found, the other record is necessarily marked deleted. -It is then unmarked. Otherwise, the entry is just inserted to the index. -@retval DB_SUCCESS on success -@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG) -@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed -@return error code */ -UNIV_INTERN -dberr_t -row_ins_sec_index_entry_low( -/*========================*/ - ulint flags, /*!< in: undo logging and locking flags */ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether we wish optimistic or - pessimistic descent down the index tree */ - dict_index_t* index, /*!< in: secondary index */ - mem_heap_t* offsets_heap, - /*!< in/out: memory heap that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during - row_log_table_apply(), or 0 */ - que_thr_t* thr) /*!< in: query thread */ -{ - btr_cur_t cursor; - ulint search_mode; - dberr_t err = DB_SUCCESS; - ulint n_unique; - mtr_t mtr; - ulint* offsets = NULL; - trx_t* trx = thr_get_trx(thr); - - ut_ad(!dict_index_is_clust(index)); - ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE); - - cursor.thr = thr; - ut_ad(thr_get_trx(thr)->id); - mtr_start_trx(&mtr, trx); - - /* If running with fake_changes mode on then avoid using insert buffer - and also switch from modify to search so that code takes only s-latch - and not x-latch. For dry-run (fake-changes) s-latch is acceptable. - Taking x-latch will make it more restrictive and will block real - changes/workflow. */ - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - search_mode = (mode & BTR_MODIFY_TREE) - ? BTR_SEARCH_TREE : BTR_SEARCH_LEAF; - } else { - search_mode = mode | BTR_INSERT; - } - - /* Ensure that we acquire index->lock when inserting into an - index with index->online_status == ONLINE_INDEX_COMPLETE, but - could still be subject to rollback_inplace_alter_table(). - This prevents a concurrent change of index->online_status. - The memory object cannot be freed as long as we have an open - reference to the table, or index->table->n_ref_count > 0. */ - const bool check = *index->name == TEMP_INDEX_PREFIX; - - if (check) { - - DEBUG_SYNC_C("row_ins_sec_index_enter"); - - /* mode = MODIFY_LEAF is synonymous to search_mode = SEARCH_LEAF - search_mode = SEARCH_TREE suggest operation in fake_change mode - so continue to s-latch in this mode too. */ - - if (mode == BTR_MODIFY_LEAF || search_mode == BTR_SEARCH_TREE) { - - ut_ad((search_mode == BTR_SEARCH_TREE - && thr_get_trx(thr)->fake_changes) - || mode == BTR_MODIFY_LEAF); - - search_mode |= BTR_ALREADY_S_LATCHED; - mtr_s_lock(dict_index_get_lock(index), &mtr); - - } else { - mtr_x_lock(dict_index_get_lock(index), &mtr); - } - - if (row_log_online_op_try( - index, entry, thr_get_trx(thr)->id)) { - goto func_exit; - } - } - - if (!thr_get_trx(thr)->check_unique_secondary) { - search_mode |= BTR_IGNORE_SEC_UNIQUE; - } - - /* Note that we use PAGE_CUR_LE as the search mode, because then - the function will return in both low_match and up_match of the - cursor sensible values */ - err = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - search_mode, - &cursor, 0, __FILE__, __LINE__, &mtr); - - if (err != DB_SUCCESS) { - if (err == DB_DECRYPTION_FAILED) { - ib_push_warning(trx->mysql_thd, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - index->table->name); - index->table->file_unreadable = true; - } - goto func_exit; - } - - if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) { - /* The insert was buffered during the search: we are done */ - goto func_exit; - } - -#ifdef UNIV_DEBUG - { - page_t* page = btr_cur_get_page(&cursor); - rec_t* first_rec = page_rec_get_next( - page_get_infimum_rec(page)); - - ut_ad(page_rec_is_supremum(first_rec) - || rec_get_n_fields(first_rec, index) - == dtuple_get_n_fields(entry)); - } -#endif - - n_unique = dict_index_get_n_unique(index); - - if (dict_index_is_unique(index) - && (cursor.low_match >= n_unique || cursor.up_match >= n_unique)) { - mtr_commit(&mtr); - - DEBUG_SYNC_C("row_ins_sec_index_unique"); - - if (row_ins_sec_mtr_start_trx_and_check_if_aborted( - &mtr, trx, index, check, search_mode)) { - goto func_exit; - } - - err = row_ins_scan_sec_index_for_duplicate( - flags, index, entry, thr, check, &mtr, offsets_heap); - - mtr_commit(&mtr); - - switch (err) { - case DB_SUCCESS: - break; - case DB_DUPLICATE_KEY: - if (*index->name == TEMP_INDEX_PREFIX) { - ut_ad(!thr_get_trx(thr) - ->dict_operation_lock_mode); - mutex_enter(&dict_sys->mutex); - dict_set_corrupted_index_cache_only( - index, index->table); - mutex_exit(&dict_sys->mutex); - /* Do not return any error to the - caller. The duplicate will be reported - by ALTER TABLE or CREATE UNIQUE INDEX. - Unfortunately we cannot report the - duplicate key value to the DDL thread, - because the altered_table object is - private to its call stack. */ - err = DB_SUCCESS; - } - /* fall through */ - default: - return(err); - } - - if (row_ins_sec_mtr_start_trx_and_check_if_aborted( - &mtr, trx, index, check, search_mode)) { - goto func_exit; - } - - DEBUG_SYNC_C("row_ins_sec_index_entry_dup_locks_created"); - - /* We did not find a duplicate and we have now - locked with s-locks the necessary records to - prevent any insertion of a duplicate by another - transaction. Let us now reposition the cursor and - continue the insertion. */ - - btr_cur_search_to_nth_level( - index, 0, entry, PAGE_CUR_LE, - search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE), - &cursor, 0, __FILE__, __LINE__, &mtr); - } - - if (row_ins_must_modify_rec(&cursor)) { - /* There is already an index entry with a long enough common - prefix, we must convert the insert into a modify of an - existing record */ - offsets = rec_get_offsets( - btr_cur_get_rec(&cursor), index, offsets, - ULINT_UNDEFINED, &offsets_heap); - - err = row_ins_sec_index_entry_by_modify( - flags, mode, &cursor, &offsets, - offsets_heap, heap, entry, thr, &mtr); - } else { - rec_t* insert_rec; - big_rec_t* big_rec; - - if (mode == BTR_MODIFY_LEAF) { - err = btr_cur_optimistic_insert( - flags, &cursor, &offsets, &offsets_heap, - entry, &insert_rec, - &big_rec, 0, thr, &mtr); - } else { - ut_ad(mode == BTR_MODIFY_TREE); - if (buf_LRU_buf_pool_running_out()) { - - err = DB_LOCK_TABLE_FULL; - goto func_exit; - } - - err = btr_cur_optimistic_insert( - flags, &cursor, - &offsets, &offsets_heap, - entry, &insert_rec, - &big_rec, 0, thr, &mtr); - if (err == DB_FAIL) { - err = btr_cur_pessimistic_insert( - flags, &cursor, - &offsets, &offsets_heap, - entry, &insert_rec, - &big_rec, 0, thr, &mtr); - } - } - - if (err == DB_SUCCESS && trx_id) { - page_update_max_trx_id( - btr_cur_get_block(&cursor), - btr_cur_get_page_zip(&cursor), - trx_id, &mtr); - } - - ut_ad(!big_rec); - } - -func_exit: - mtr_commit(&mtr); - return(err); -} - -/***************************************************************//** -Tries to insert the externally stored fields (off-page columns) -of a clustered index entry. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -UNIV_INTERN -dberr_t -row_ins_index_entry_big_rec_func( -/*=============================*/ - const dtuple_t* entry, /*!< in/out: index entry to insert */ - const big_rec_t* big_rec,/*!< in: externally stored fields */ - ulint* offsets,/*!< in/out: rec offsets */ - mem_heap_t** heap, /*!< in/out: memory heap */ - dict_index_t* index, /*!< in: index */ - const char* file, /*!< in: file name of caller */ -#ifndef DBUG_OFF - const void* thd, /*!< in: connection, or NULL */ -#endif /* DBUG_OFF */ - ulint line) /*!< in: line number of caller */ -{ - mtr_t mtr; - btr_cur_t cursor; - rec_t* rec; - dberr_t error; - - ut_ad(dict_index_is_clust(index)); - - DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch"); - - mtr_start(&mtr); - btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, 0, - file, line, &mtr); - rec = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, heap); - - DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern"); - error = btr_store_big_rec_extern_fields( - index, btr_cur_get_block(&cursor), - rec, offsets, big_rec, &mtr, BTR_STORE_INSERT); - DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern"); - - if (error == DB_SUCCESS - && dict_index_is_online_ddl(index)) { - row_log_table_insert(rec, index, offsets); - } - - mtr_commit(&mtr); - - return(error); -} - -/***************************************************************//** -Inserts an entry into a clustered index. Tries first optimistic, -then pessimistic descent down the tree. If the entry matches enough -to a delete marked record, performs the insert by updating or delete -unmarking the delete marked record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */ -UNIV_INTERN -dberr_t -row_ins_clust_index_entry( -/*======================*/ - dict_index_t* index, /*!< in: clustered index */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - que_thr_t* thr, /*!< in: query thread */ - ulint n_ext) /*!< in: number of externally stored columns */ -{ - dberr_t err; - ulint n_uniq; - - if (!index->table->foreign_set.empty()) { - err = row_ins_check_foreign_constraints( - index->table, index, entry, thr); - if (err != DB_SUCCESS) { - - return(err); - } - } - - n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0; - - /* Try first optimistic descent to the B-tree */ - - log_free_check(); - - err = row_ins_clust_index_entry_low( - 0, BTR_MODIFY_LEAF, index, n_uniq, entry, n_ext, thr); - -#ifdef UNIV_DEBUG - /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC(). - Once it is fixed, remove the 'ifdef', 'if' and this comment. */ - if (!thr_get_trx(thr)->ddl) { - DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd, - "after_row_ins_clust_index_entry_leaf"); - } -#endif /* UNIV_DEBUG */ - - if (err != DB_FAIL) { - DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after"); - return(err); - } - - /* Try then pessimistic descent to the B-tree */ - - log_free_check(); - - return(row_ins_clust_index_entry_low( - 0, BTR_MODIFY_TREE, index, n_uniq, entry, n_ext, thr)); -} - -/***************************************************************//** -Inserts an entry into a secondary index. Tries first optimistic, -then pessimistic descent down the tree. If the entry matches enough -to a delete marked record, performs the insert by updating or delete -unmarking the delete marked record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */ -UNIV_INTERN -dberr_t -row_ins_sec_index_entry( -/*====================*/ - dict_index_t* index, /*!< in: secondary index */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - mem_heap_t* offsets_heap; - mem_heap_t* heap; - - if (!index->table->foreign_set.empty()) { - err = row_ins_check_foreign_constraints(index->table, index, - entry, thr); - if (err != DB_SUCCESS) { - - return(err); - } - } - - ut_ad(thr_get_trx(thr)->id); - - offsets_heap = mem_heap_create(1024); - heap = mem_heap_create(1024); - - /* Try first optimistic descent to the B-tree */ - - log_free_check(); - - err = row_ins_sec_index_entry_low( - 0, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry, 0, thr); - if (err == DB_FAIL) { - mem_heap_empty(heap); - - /* Try then pessimistic descent to the B-tree */ - - log_free_check(); - - err = row_ins_sec_index_entry_low( - 0, BTR_MODIFY_TREE, index, - offsets_heap, heap, entry, 0, thr); - } - - mem_heap_free(heap); - mem_heap_free(offsets_heap); - return(err); -} - -/***************************************************************//** -Inserts an index entry to index. Tries first optimistic, then pessimistic -descent down the tree. If the entry matches enough to a delete marked record, -performs the insert by updating or delete unmarking the delete marked -record. -@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */ -static -dberr_t -row_ins_index_entry( -/*================*/ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in/out: index entry to insert */ - que_thr_t* thr) /*!< in: query thread */ -{ - DBUG_EXECUTE_IF("row_ins_index_entry_timeout", { - DBUG_SET("-d,row_ins_index_entry_timeout"); - return(DB_LOCK_WAIT);}); - - if (dict_index_is_clust(index)) { - return(row_ins_clust_index_entry(index, entry, thr, 0)); - } else { - return(row_ins_sec_index_entry(index, entry, thr)); - } -} - -/***********************************************************//** -Sets the values of the dtuple fields in entry from the values of appropriate -columns in row. */ -static MY_ATTRIBUTE((nonnull)) -void -row_ins_index_entry_set_vals( -/*=========================*/ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry to make */ - const dtuple_t* row) /*!< in: row */ -{ - ulint n_fields; - ulint i; - - n_fields = dtuple_get_n_fields(entry); - - for (i = 0; i < n_fields; i++) { - dict_field_t* ind_field; - dfield_t* field; - const dfield_t* row_field; - ulint len; - - field = dtuple_get_nth_field(entry, i); - ind_field = dict_index_get_nth_field(index, i); - row_field = dtuple_get_nth_field(row, ind_field->col->ind); - len = dfield_get_len(row_field); - - /* Check column prefix indexes */ - if (ind_field->prefix_len > 0 - && dfield_get_len(row_field) != UNIV_SQL_NULL) { - - const dict_col_t* col - = dict_field_get_col(ind_field); - - len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminmaxlen, - ind_field->prefix_len, - len, - static_cast<const char*>( - dfield_get_data(row_field))); - - ut_ad(!dfield_is_ext(row_field)); - } - - dfield_set_data(field, dfield_get_data(row_field), len); - if (dfield_is_ext(row_field)) { - ut_ad(dict_index_is_clust(index)); - dfield_set_ext(field); - } - } -} - -/***********************************************************//** -Inserts a single index entry to the table. -@return DB_SUCCESS if operation successfully completed, else error -code or DB_LOCK_WAIT */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins_index_entry_step( -/*=====================*/ - ins_node_t* node, /*!< in: row insert node */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - - ut_ad(dtuple_check_typed(node->row)); - - row_ins_index_entry_set_vals(node->index, node->entry, node->row); - - ut_ad(dtuple_check_typed(node->entry)); - - err = row_ins_index_entry(node->index, node->entry, thr); - -#ifdef UNIV_DEBUG - /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC(). - Once it is fixed, remove the 'ifdef', 'if' and this comment. */ - if (!thr_get_trx(thr)->ddl) { - DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd, - "after_row_ins_index_entry_step"); - } -#endif /* UNIV_DEBUG */ - - return(err); -} - -/***********************************************************//** -Allocates a row id for row and inits the node->index field. */ -UNIV_INLINE -void -row_ins_alloc_row_id_step( -/*======================*/ - ins_node_t* node) /*!< in: row insert node */ -{ - row_id_t row_id; - - ut_ad(node->state == INS_NODE_ALLOC_ROW_ID); - - if (dict_index_is_unique(dict_table_get_first_index(node->table))) { - - /* No row id is stored if the clustered index is unique */ - - return; - } - - /* Fill in row id value to row */ - - row_id = dict_sys_get_new_row_id(); - - dict_sys_write_row_id(node->row_id_buf, row_id); -} - -/***********************************************************//** -Gets a row to insert from the values list. */ -UNIV_INLINE -void -row_ins_get_row_from_values( -/*========================*/ - ins_node_t* node) /*!< in: row insert node */ -{ - que_node_t* list_node; - dfield_t* dfield; - dtuple_t* row; - ulint i; - - /* The field values are copied in the buffers of the select node and - it is safe to use them until we fetch from select again: therefore - we can just copy the pointers */ - - row = node->row; - - i = 0; - list_node = node->values_list; - - while (list_node) { - eval_exp(list_node); - - dfield = dtuple_get_nth_field(row, i); - dfield_copy_data(dfield, que_node_get_val(list_node)); - - i++; - list_node = que_node_get_next(list_node); - } -} - -/***********************************************************//** -Gets a row to insert from the select list. */ -UNIV_INLINE -void -row_ins_get_row_from_select( -/*========================*/ - ins_node_t* node) /*!< in: row insert node */ -{ - que_node_t* list_node; - dfield_t* dfield; - dtuple_t* row; - ulint i; - - /* The field values are copied in the buffers of the select node and - it is safe to use them until we fetch from select again: therefore - we can just copy the pointers */ - - row = node->row; - - i = 0; - list_node = node->select->select_list; - - while (list_node) { - dfield = dtuple_get_nth_field(row, i); - dfield_copy_data(dfield, que_node_get_val(list_node)); - - i++; - list_node = que_node_get_next(list_node); - } -} - -/***********************************************************//** -Inserts a row to a table. -@return DB_SUCCESS if operation successfully completed, else error -code or DB_LOCK_WAIT */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_ins( -/*====*/ - ins_node_t* node, /*!< in: row insert node */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - - if (node->state == INS_NODE_ALLOC_ROW_ID) { - - row_ins_alloc_row_id_step(node); - - node->index = dict_table_get_first_index(node->table); - node->entry = UT_LIST_GET_FIRST(node->entry_list); - - if (node->ins_type == INS_SEARCHED) { - - row_ins_get_row_from_select(node); - - } else if (node->ins_type == INS_VALUES) { - - row_ins_get_row_from_values(node); - } - - node->state = INS_NODE_INSERT_ENTRIES; - } - - ut_ad(node->state == INS_NODE_INSERT_ENTRIES); - - while (node->index != NULL) { - if (node->index->type != DICT_FTS) { - err = row_ins_index_entry_step(node, thr); - - if (err != DB_SUCCESS) { - - return(err); - } - } - - node->index = dict_table_get_next_index(node->index); - node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry); - - DBUG_EXECUTE_IF( - "row_ins_skip_sec", - node->index = NULL; node->entry = NULL; break;); - - /* Skip corrupted secondary index and its entry */ - while (node->index && dict_index_is_corrupted(node->index)) { - - node->index = dict_table_get_next_index(node->index); - node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry); - } - } - - ut_ad(node->entry == NULL); - - node->state = INS_NODE_ALLOC_ROW_ID; - - return(DB_SUCCESS); -} - -/***********************************************************//** -Inserts a row to a table. This is a high-level function used in SQL execution -graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_ins_step( -/*=========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - ins_node_t* node; - que_node_t* parent; - sel_node_t* sel_node; - trx_t* trx; - dberr_t err; - - ut_ad(thr); - - trx = thr_get_trx(thr); - - trx_start_if_not_started_xa(trx); - - node = static_cast<ins_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_INSERT); - - parent = que_node_get_parent(node); - sel_node = node->select; - - if (thr->prev_node == parent) { - node->state = INS_NODE_SET_IX_LOCK; - } - - /* If this is the first time this node is executed (or when - execution resumes after wait for the table IX lock), set an - IX lock on the table and reset the possible select node. MySQL's - partitioned table code may also call an insert within the same - SQL statement AFTER it has used this table handle to do a search. - This happens, for example, when a row update moves it to another - partition. In that case, we have already set the IX lock on the - table during the search operation, and there is no need to set - it again here. But we must write trx->id to node->trx_id_buf. */ - - trx_write_trx_id(node->trx_id_buf, trx->id); - - if (node->state == INS_NODE_SET_IX_LOCK) { - - node->state = INS_NODE_ALLOC_ROW_ID; - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - if (trx->id == node->trx_id) { - /* No need to do IX-locking */ - - goto same_trx; - } - - err = lock_table(0, node->table, LOCK_IX, thr); - - DBUG_EXECUTE_IF("ib_row_ins_ix_lock_wait", - err = DB_LOCK_WAIT;); - - if (err != DB_SUCCESS) { - - goto error_handling; - } - - node->trx_id = trx->id; -same_trx: - if (node->ins_type == INS_SEARCHED) { - /* Reset the cursor */ - sel_node->state = SEL_NODE_OPEN; - - /* Fetch a row to insert */ - - thr->run_node = sel_node; - - return(thr); - } - } - - if ((node->ins_type == INS_SEARCHED) - && (sel_node->state != SEL_NODE_FETCH)) { - - ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS); - - /* No more rows to insert */ - thr->run_node = parent; - - return(thr); - } - - /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ - - err = row_ins(node, thr); - -error_handling: - trx->error_state = err; - - if (err != DB_SUCCESS) { - /* err == DB_LOCK_WAIT or SQL error detected */ - return(NULL); - } - - /* DO THE TRIGGER ACTIONS HERE */ - - if (node->ins_type == INS_SEARCHED) { - /* Fetch a row to insert */ - - thr->run_node = sel_node; - } else { - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc deleted file mode 100644 index 2cd663fd600..00000000000 --- a/storage/xtradb/row/row0log.cc +++ /dev/null @@ -1,3710 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0log.cc -Modification log for online index creation and online table rebuild - -Created 2011-05-26 Marko Makela -*******************************************************/ - -#include "row0log.h" - -#ifdef UNIV_NONINL -#include "row0log.ic" -#endif - -#include "row0row.h" -#include "row0ins.h" -#include "row0upd.h" -#include "row0merge.h" -#include "row0ext.h" -#include "data0data.h" -#include "que0que.h" -#include "handler0alter.h" - -#include<map> - -ulint onlineddl_rowlog_rows; -ulint onlineddl_rowlog_pct_used; -ulint onlineddl_pct_progress; - -/** Table row modification operations during online table rebuild. -Delete-marked records are not copied to the rebuilt table. */ -enum row_tab_op { - /** Insert a record */ - ROW_T_INSERT = 0x41, - /** Update a record in place */ - ROW_T_UPDATE, - /** Delete (purge) a record */ - ROW_T_DELETE -}; - -/** Index record modification operations during online index creation */ -enum row_op { - /** Insert a record */ - ROW_OP_INSERT = 0x61, - /** Delete a record */ - ROW_OP_DELETE -}; - -#ifdef UNIV_DEBUG -/** Write information about the applied record to the error log */ -# define ROW_LOG_APPLY_PRINT -#endif /* UNIV_DEBUG */ - -#ifdef ROW_LOG_APPLY_PRINT -/** When set, write information about the applied record to the error log */ -static bool row_log_apply_print; -#endif /* ROW_LOG_APPLY_PRINT */ - -/** Size of the modification log entry header, in bytes */ -#define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/ - -/** Log block for modifications during online ALTER TABLE */ -struct row_log_buf_t { - byte* block; /*!< file block buffer */ - mrec_buf_t buf; /*!< buffer for accessing a record - that spans two blocks */ - ulint blocks; /*!< current position in blocks */ - ulint bytes; /*!< current position within block */ - ulonglong total; /*!< logical position, in bytes from - the start of the row_log_table log; - 0 for row_log_online_op() and - row_log_apply(). */ - ulint size; /*!< allocated size of block */ -}; - -/** Tracks BLOB allocation during online ALTER TABLE */ -class row_log_table_blob_t { -public: - /** Constructor (declaring a BLOB freed) - @param offset_arg row_log_t::tail::total */ -#ifdef UNIV_DEBUG - row_log_table_blob_t(ulonglong offset_arg) : - old_offset (0), free_offset (offset_arg), - offset (BLOB_FREED) {} -#else /* UNIV_DEBUG */ - row_log_table_blob_t() : - offset (BLOB_FREED) {} -#endif /* UNIV_DEBUG */ - - /** Declare a BLOB freed again. - @param offset_arg row_log_t::tail::total */ -#ifdef UNIV_DEBUG - void blob_free(ulonglong offset_arg) -#else /* UNIV_DEBUG */ - void blob_free() -#endif /* UNIV_DEBUG */ - { - ut_ad(offset < offset_arg); - ut_ad(offset != BLOB_FREED); - ut_d(old_offset = offset); - ut_d(free_offset = offset_arg); - offset = BLOB_FREED; - } - /** Declare a freed BLOB reused. - @param offset_arg row_log_t::tail::total */ - void blob_alloc(ulonglong offset_arg) { - ut_ad(free_offset <= offset_arg); - ut_d(old_offset = offset); - offset = offset_arg; - } - /** Determine if a BLOB was freed at a given log position - @param offset_arg row_log_t::head::total after the log record - @return true if freed */ - bool is_freed(ulonglong offset_arg) const { - /* This is supposed to be the offset at the end of the - current log record. */ - ut_ad(offset_arg > 0); - /* We should never get anywhere close the magic value. */ - ut_ad(offset_arg < BLOB_FREED); - return(offset_arg < offset); - } -private: - /** Magic value for a freed BLOB */ - static const ulonglong BLOB_FREED = ~0ULL; -#ifdef UNIV_DEBUG - /** Old offset, in case a page was freed, reused, freed, ... */ - ulonglong old_offset; - /** Offset of last blob_free() */ - ulonglong free_offset; -#endif /* UNIV_DEBUG */ - /** Byte offset to the log file */ - ulonglong offset; -}; - -/** @brief Map of off-page column page numbers to 0 or log byte offsets. - -If there is no mapping for a page number, it is safe to access. -If a page number maps to 0, it is an off-page column that has been freed. -If a page number maps to a nonzero number, the number is a byte offset -into the index->online_log, indicating that the page is safe to access -when applying log records starting from that offset. */ -typedef std::map<ulint, row_log_table_blob_t> page_no_map; - -/** @brief Buffer for logging modifications during online index creation - -All modifications to an index that is being created will be logged by -row_log_online_op() to this buffer. - -All modifications to a table that is being rebuilt will be logged by -row_log_table_delete(), row_log_table_update(), row_log_table_insert() -to this buffer. - -When head.blocks == tail.blocks, the reader will access tail.block -directly. When also head.bytes == tail.bytes, both counts will be -reset to 0 and the file will be truncated. */ -struct row_log_t { - int fd; /*!< file descriptor */ - ib_mutex_t mutex; /*!< mutex protecting error, - max_trx and tail */ - page_no_map* blobs; /*!< map of page numbers of off-page columns - that have been freed during table-rebuilding - ALTER TABLE (row_log_table_*); protected by - index->lock X-latch only */ - dict_table_t* table; /*!< table that is being rebuilt, - or NULL when this is a secondary - index that is being created online */ - bool same_pk;/*!< whether the definition of the PRIMARY KEY - has remained the same */ - const dtuple_t* add_cols; - /*!< default values of added columns, or NULL */ - const ulint* col_map;/*!< mapping of old column numbers to - new ones, or NULL if !table */ - dberr_t error; /*!< error that occurred during online - table rebuild */ - trx_id_t max_trx;/*!< biggest observed trx_id in - row_log_online_op(); - protected by mutex and index->lock S-latch, - or by index->lock X-latch only */ - row_log_buf_t tail; /*!< writer context; - protected by mutex and index->lock S-latch, - or by index->lock X-latch only */ - row_log_buf_t head; /*!< reader context; protected by MDL only; - modifiable by row_log_apply_ops() */ - const char* path; /*!< where to create temporary file during - log operation */ -}; - -/** Create the file or online log if it does not exist. -@param[in,out] log online rebuild log -@return file descriptor. */ -static MY_ATTRIBUTE((warn_unused_result)) -int -row_log_tmpfile( - row_log_t* log) -{ - DBUG_ENTER("row_log_tmpfile"); - if (log->fd < 0) { - log->fd = row_merge_file_create_low(log->path); - } - - DBUG_RETURN(log->fd); -} - -/** Allocate the memory for the log buffer. -@param[in,out] log_buf Buffer used for log operation -@return TRUE if success, false if not */ -static MY_ATTRIBUTE((warn_unused_result)) -bool -row_log_block_allocate( - row_log_buf_t& log_buf) -{ - DBUG_ENTER("row_log_block_allocate"); - if (log_buf.block == NULL) { - log_buf.size = srv_sort_buf_size; - log_buf.block = (byte*) os_mem_alloc_large(&log_buf.size); - DBUG_EXECUTE_IF("simulate_row_log_allocation_failure", - if (log_buf.block) - os_mem_free_large(log_buf.block, log_buf.size); - log_buf.block = NULL;); - if (!log_buf.block) { - DBUG_RETURN(false); - } - } - DBUG_RETURN(true); -} - -/** Free the log buffer. -@param[in,out] log_buf Buffer used for log operation */ -static -void -row_log_block_free( - row_log_buf_t& log_buf) -{ - DBUG_ENTER("row_log_block_free"); - if (log_buf.block != NULL) { - os_mem_free_large(log_buf.block, log_buf.size); - log_buf.block = NULL; - } - DBUG_VOID_RETURN; -} - -/******************************************************//** -Logs an operation to a secondary index that is (or was) being created. */ -UNIV_INTERN -void -row_log_online_op( -/*==============*/ - dict_index_t* index, /*!< in/out: index, S or X latched */ - const dtuple_t* tuple, /*!< in: index tuple */ - trx_id_t trx_id) /*!< in: transaction ID for insert, - or 0 for delete */ -{ - byte* b; - ulint extra_size; - ulint size; - ulint mrec_size; - ulint avail_size; - row_log_t* log; - - ut_ad(dtuple_validate(tuple)); - ut_ad(dtuple_get_n_fields(tuple) == dict_index_get_n_fields(index)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED) - || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (dict_index_is_corrupted(index)) { - return; - } - - ut_ad(dict_index_is_online_ddl(index)); - - /* Compute the size of the record. This differs from - row_merge_buf_encode(), because here we do not encode - extra_size+1 (and reserve 0 as the end-of-chunk marker). */ - - size = rec_get_converted_size_temp( - index, tuple->fields, tuple->n_fields, &extra_size); - ut_ad(size >= extra_size); - ut_ad(size <= sizeof log->tail.buf); - - mrec_size = ROW_LOG_HEADER_SIZE - + (extra_size >= 0x80) + size - + (trx_id ? DATA_TRX_ID_LEN : 0); - - log = index->online_log; - mutex_enter(&log->mutex); - - if (trx_id > log->max_trx) { - log->max_trx = trx_id; - } - - if (!row_log_block_allocate(log->tail)) { - log->error = DB_OUT_OF_MEMORY; - goto err_exit; - } - - UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); - - ut_ad(log->tail.bytes < srv_sort_buf_size); - avail_size = srv_sort_buf_size - log->tail.bytes; - - if (mrec_size > avail_size) { - b = log->tail.buf; - } else { - b = log->tail.block + log->tail.bytes; - } - - if (trx_id != 0) { - *b++ = ROW_OP_INSERT; - trx_write_trx_id(b, trx_id); - b += DATA_TRX_ID_LEN; - } else { - *b++ = ROW_OP_DELETE; - } - - if (extra_size < 0x80) { - *b++ = (byte) extra_size; - } else { - ut_ad(extra_size < 0x8000); - *b++ = (byte) (0x80 | (extra_size >> 8)); - *b++ = (byte) extra_size; - } - - rec_convert_dtuple_to_temp( - b + extra_size, index, tuple->fields, tuple->n_fields); - b += size; - - if (mrec_size >= avail_size) { - const os_offset_t byte_offset - = (os_offset_t) log->tail.blocks - * srv_sort_buf_size; - ibool ret; - - if (byte_offset + srv_sort_buf_size >= srv_online_max_size) { - goto write_failed; - } - - if (mrec_size == avail_size) { - ut_ad(b == &log->tail.block[srv_sort_buf_size]); - } else { - ut_ad(b == log->tail.buf + mrec_size); - memcpy(log->tail.block + log->tail.bytes, - log->tail.buf, avail_size); - } - UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size); - - if (row_log_tmpfile(log) < 0) { - log->error = DB_OUT_OF_MEMORY; - goto err_exit; - } - - ret = os_file_write_int_fd( - "(modification log)", - log->fd, - log->tail.block, byte_offset, srv_sort_buf_size); - log->tail.blocks++; - if (!ret) { -write_failed: - /* We set the flag directly instead of invoking - dict_set_corrupted_index_cache_only(index) here, - because the index is not "public" yet. */ - index->type |= DICT_CORRUPT; - } - UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size); - memcpy(log->tail.block, log->tail.buf + avail_size, - mrec_size - avail_size); - log->tail.bytes = mrec_size - avail_size; - } else { - log->tail.bytes += mrec_size; - ut_ad(b == log->tail.block + log->tail.bytes); - } - - UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); -err_exit: - mutex_exit(&log->mutex); -} - -/******************************************************//** -Gets the error status of the online index rebuild log. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_log_table_get_error( -/*====================*/ - const dict_index_t* index) /*!< in: clustered index of a table - that is being rebuilt online */ -{ - ut_ad(dict_index_is_clust(index)); - ut_ad(dict_index_is_online_ddl(index)); - return(index->online_log->error); -} - -/******************************************************//** -Starts logging an operation to a table that is being rebuilt. -@return pointer to log, or NULL if no logging is necessary */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -byte* -row_log_table_open( -/*===============*/ - row_log_t* log, /*!< in/out: online rebuild log */ - ulint size, /*!< in: size of log record */ - ulint* avail) /*!< out: available size for log record */ -{ - mutex_enter(&log->mutex); - - UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); - - if (log->error != DB_SUCCESS) { -err_exit: - mutex_exit(&log->mutex); - return(NULL); - } - - if (!row_log_block_allocate(log->tail)) { - log->error = DB_OUT_OF_MEMORY; - goto err_exit; - } - - ut_ad(log->tail.bytes < srv_sort_buf_size); - *avail = srv_sort_buf_size - log->tail.bytes; - - if (size > *avail) { - return(log->tail.buf); - } else { - return(log->tail.block + log->tail.bytes); - } -} - -/******************************************************//** -Stops logging an operation to a table that is being rebuilt. */ -static MY_ATTRIBUTE((nonnull)) -void -row_log_table_close_func( -/*=====================*/ - row_log_t* log, /*!< in/out: online rebuild log */ -#ifdef UNIV_DEBUG - const byte* b, /*!< in: end of log record */ -#endif /* UNIV_DEBUG */ - ulint size, /*!< in: size of log record */ - ulint avail) /*!< in: available size for log record */ -{ - ut_ad(mutex_own(&log->mutex)); - - if (size >= avail) { - const os_offset_t byte_offset - = (os_offset_t) log->tail.blocks - * srv_sort_buf_size; - ibool ret; - - if (byte_offset + srv_sort_buf_size >= srv_online_max_size) { - goto write_failed; - } - - if (size == avail) { - ut_ad(b == &log->tail.block[srv_sort_buf_size]); - } else { - ut_ad(b == log->tail.buf + size); - memcpy(log->tail.block + log->tail.bytes, - log->tail.buf, avail); - } - UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size); - - if (row_log_tmpfile(log) < 0) { - log->error = DB_OUT_OF_MEMORY; - goto err_exit; - } - - ret = os_file_write_int_fd( - "(modification log)", - log->fd, - log->tail.block, byte_offset, srv_sort_buf_size); - log->tail.blocks++; - if (!ret) { -write_failed: - log->error = DB_ONLINE_LOG_TOO_BIG; - } - UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size); - memcpy(log->tail.block, log->tail.buf + avail, size - avail); - log->tail.bytes = size - avail; - } else { - log->tail.bytes += size; - ut_ad(b == log->tail.block + log->tail.bytes); - } - - log->tail.total += size; - UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); -err_exit: - mutex_exit(&log->mutex); - - os_atomic_increment_ulint(&onlineddl_rowlog_rows, 1); - /* 10000 means 100.00%, 4525 means 45.25% */ - onlineddl_rowlog_pct_used = (log->tail.total * 10000) / srv_online_max_size; -} - -#ifdef UNIV_DEBUG -# define row_log_table_close(log, b, size, avail) \ - row_log_table_close_func(log, b, size, avail) -#else /* UNIV_DEBUG */ -# define row_log_table_close(log, b, size, avail) \ - row_log_table_close_func(log, size, avail) -#endif /* UNIV_DEBUG */ - -/******************************************************//** -Logs a delete operation to a table that is being rebuilt. -This will be merged in row_log_table_apply_delete(). */ -UNIV_INTERN -void -row_log_table_delete( -/*=================*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ - const byte* sys) /*!< in: DB_TRX_ID,DB_ROLL_PTR that should - be logged, or NULL to use those in rec */ -{ - ulint old_pk_extra_size; - ulint old_pk_size; - ulint ext_size = 0; - ulint mrec_size; - ulint avail_size; - mem_heap_t* heap = NULL; - const dtuple_t* old_pk; - row_ext_t* ext; - - ut_ad(dict_index_is_clust(index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index)); - ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED) - || rw_lock_own(&index->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (dict_index_is_corrupted(index) - || !dict_index_is_online_ddl(index) - || index->online_log->error != DB_SUCCESS) { - return; - } - - dict_table_t* new_table = index->online_log->table; - dict_index_t* new_index = dict_table_get_first_index(new_table); - - ut_ad(dict_index_is_clust(new_index)); - ut_ad(!dict_index_is_online_ddl(new_index)); - - /* Create the tuple PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in new_table. */ - if (index->online_log->same_pk) { - dtuple_t* tuple; - ut_ad(new_index->n_uniq == index->n_uniq); - - /* The PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR are in the first - fields of the record. */ - heap = mem_heap_create( - DATA_TRX_ID_LEN - + DTUPLE_EST_ALLOC(new_index->n_uniq + 2)); - old_pk = tuple = dtuple_create(heap, new_index->n_uniq + 2); - dict_index_copy_types(tuple, new_index, tuple->n_fields); - dtuple_set_n_fields_cmp(tuple, new_index->n_uniq); - - for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) { - ulint len; - const void* field = rec_get_nth_field( - rec, offsets, i, &len); - dfield_t* dfield = dtuple_get_nth_field( - tuple, i); - ut_ad(len != UNIV_SQL_NULL); - ut_ad(!rec_offs_nth_extern(offsets, i)); - dfield_set_data(dfield, field, len); - } - - if (sys) { - dfield_set_data( - dtuple_get_nth_field(tuple, - new_index->n_uniq), - sys, DATA_TRX_ID_LEN); - dfield_set_data( - dtuple_get_nth_field(tuple, - new_index->n_uniq + 1), - sys + DATA_TRX_ID_LEN, DATA_ROLL_PTR_LEN); - } - } else { - /* The PRIMARY KEY has changed. Translate the tuple. */ - old_pk = row_log_table_get_pk( - rec, index, offsets, NULL, &heap); - - if (!old_pk) { - ut_ad(index->online_log->error != DB_SUCCESS); - if (heap) { - goto func_exit; - } - return; - } - } - - ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field( - old_pk, old_pk->n_fields - 2)->len); - ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field( - old_pk, old_pk->n_fields - 1)->len); - old_pk_size = rec_get_converted_size_temp( - new_index, old_pk->fields, old_pk->n_fields, - &old_pk_extra_size); - ut_ad(old_pk_extra_size < 0x100); - - mrec_size = 6 + old_pk_size; - - /* Log enough prefix of the BLOB unless both the - old and new table are in COMPACT or REDUNDANT format, - which store the prefix in the clustered index record. */ - if (rec_offs_any_extern(offsets) - && (dict_table_get_format(index->table) >= UNIV_FORMAT_B - || dict_table_get_format(new_table) >= UNIV_FORMAT_B)) { - - /* Build a cache of those off-page column prefixes - that are referenced by secondary indexes. It can be - that none of the off-page columns are needed. */ - row_build(ROW_COPY_DATA, index, rec, - offsets, NULL, NULL, NULL, &ext, heap); - if (ext) { - /* Log the row_ext_t, ext->ext and ext->buf */ - ext_size = ext->n_ext * ext->max_len - + sizeof(*ext) - + ext->n_ext * sizeof(ulint) - + (ext->n_ext - 1) * sizeof ext->len; - mrec_size += ext_size; - } - } - - if (byte* b = row_log_table_open(index->online_log, - mrec_size, &avail_size)) { - *b++ = ROW_T_DELETE; - *b++ = static_cast<byte>(old_pk_extra_size); - - /* Log the size of external prefix we saved */ - mach_write_to_4(b, ext_size); - b += 4; - - rec_convert_dtuple_to_temp( - b + old_pk_extra_size, new_index, - old_pk->fields, old_pk->n_fields); - - b += old_pk_size; - - if (ext_size) { - ulint cur_ext_size = sizeof(*ext) - + (ext->n_ext - 1) * sizeof ext->len; - - memcpy(b, ext, cur_ext_size); - b += cur_ext_size; - - /* Check if we need to col_map to adjust the column - number. If columns were added/removed/reordered, - adjust the column number. */ - if (const ulint* col_map = - index->online_log->col_map) { - for (ulint i = 0; i < ext->n_ext; i++) { - const_cast<ulint&>(ext->ext[i]) = - col_map[ext->ext[i]]; - } - } - - memcpy(b, ext->ext, ext->n_ext * sizeof(*ext->ext)); - b += ext->n_ext * sizeof(*ext->ext); - - ext_size -= cur_ext_size - + ext->n_ext * sizeof(*ext->ext); - memcpy(b, ext->buf, ext_size); - b += ext_size; - } - - row_log_table_close( - index->online_log, b, mrec_size, avail_size); - } - -func_exit: - mem_heap_free(heap); -} - -/******************************************************//** -Logs an insert or update to a table that is being rebuilt. */ -static -void -row_log_table_low_redundant( -/*========================*/ - const rec_t* rec, /*!< in: clustered index leaf - page record in ROW_FORMAT=REDUNDANT, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - bool insert, /*!< in: true if insert, - false if update */ - const dtuple_t* old_pk, /*!< in: old PRIMARY KEY value - (if !insert and a PRIMARY KEY - is being created) */ - const dict_index_t* new_index) - /*!< in: clustered index of the - new table, not latched */ -{ - ulint old_pk_size; - ulint old_pk_extra_size; - ulint size; - ulint extra_size; - ulint mrec_size; - ulint avail_size; - mem_heap_t* heap = NULL; - dtuple_t* tuple; - - ut_ad(!page_is_comp(page_align(rec))); - ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec)); - ut_ad(dict_tf_is_valid(index->table->flags)); - ut_ad(!dict_table_is_comp(index->table)); /* redundant row format */ - ut_ad(dict_index_is_clust(new_index)); - - heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields)); - tuple = dtuple_create(heap, index->n_fields); - dict_index_copy_types(tuple, index, index->n_fields); - dtuple_set_n_fields_cmp(tuple, dict_index_get_n_unique(index)); - - if (rec_get_1byte_offs_flag(rec)) { - for (ulint i = 0; i < index->n_fields; i++) { - dfield_t* dfield; - ulint len; - const void* field; - - dfield = dtuple_get_nth_field(tuple, i); - field = rec_get_nth_field_old(rec, i, &len); - - dfield_set_data(dfield, field, len); - } - } else { - for (ulint i = 0; i < index->n_fields; i++) { - dfield_t* dfield; - ulint len; - const void* field; - - dfield = dtuple_get_nth_field(tuple, i); - field = rec_get_nth_field_old(rec, i, &len); - - dfield_set_data(dfield, field, len); - - if (rec_2_is_field_extern(rec, i)) { - dfield_set_ext(dfield); - } - } - } - - size = rec_get_converted_size_temp( - index, tuple->fields, tuple->n_fields, &extra_size); - - mrec_size = ROW_LOG_HEADER_SIZE + size + (extra_size >= 0x80); - - if (insert || index->online_log->same_pk) { - ut_ad(!old_pk); - old_pk_extra_size = old_pk_size = 0; - } else { - ut_ad(old_pk); - ut_ad(old_pk->n_fields == 2 + old_pk->n_fields_cmp); - ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field( - old_pk, old_pk->n_fields - 2)->len); - ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field( - old_pk, old_pk->n_fields - 1)->len); - - old_pk_size = rec_get_converted_size_temp( - new_index, old_pk->fields, old_pk->n_fields, - &old_pk_extra_size); - ut_ad(old_pk_extra_size < 0x100); - mrec_size += 1/*old_pk_extra_size*/ + old_pk_size; - } - - if (byte* b = row_log_table_open(index->online_log, - mrec_size, &avail_size)) { - *b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE; - - if (old_pk_size) { - *b++ = static_cast<byte>(old_pk_extra_size); - - rec_convert_dtuple_to_temp( - b + old_pk_extra_size, new_index, - old_pk->fields, old_pk->n_fields); - b += old_pk_size; - } - - if (extra_size < 0x80) { - *b++ = static_cast<byte>(extra_size); - } else { - ut_ad(extra_size < 0x8000); - *b++ = static_cast<byte>(0x80 | (extra_size >> 8)); - *b++ = static_cast<byte>(extra_size); - } - - rec_convert_dtuple_to_temp( - b + extra_size, index, tuple->fields, tuple->n_fields); - b += size; - - row_log_table_close( - index->online_log, b, mrec_size, avail_size); - } - - mem_heap_free(heap); -} - -/******************************************************//** -Logs an insert or update to a table that is being rebuilt. */ -static MY_ATTRIBUTE((nonnull(1,2,3))) -void -row_log_table_low( -/*==============*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ - bool insert, /*!< in: true if insert, false if update */ - const dtuple_t* old_pk) /*!< in: old PRIMARY KEY value (if !insert - and a PRIMARY KEY is being created) */ -{ - ulint omit_size; - ulint old_pk_size; - ulint old_pk_extra_size; - ulint extra_size; - ulint mrec_size; - ulint avail_size; - const dict_index_t* new_index = dict_table_get_first_index( - index->online_log->table); - ut_ad(dict_index_is_clust(index)); - ut_ad(dict_index_is_clust(new_index)); - ut_ad(!dict_index_is_online_ddl(new_index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index)); - ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED) - || rw_lock_own(&index->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX); - ut_ad(page_is_leaf(page_align(rec))); - ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets)); - - if (dict_index_is_corrupted(index) - || !dict_index_is_online_ddl(index) - || index->online_log->error != DB_SUCCESS) { - return; - } - - if (!rec_offs_comp(offsets)) { - row_log_table_low_redundant( - rec, index, insert, old_pk, new_index); - return; - } - - ut_ad(page_is_comp(page_align(rec))); - ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY); - - omit_size = REC_N_NEW_EXTRA_BYTES; - - extra_size = rec_offs_extra_size(offsets) - omit_size; - - mrec_size = ROW_LOG_HEADER_SIZE - + (extra_size >= 0x80) + rec_offs_size(offsets) - omit_size; - - if (insert || index->online_log->same_pk) { - ut_ad(!old_pk); - old_pk_extra_size = old_pk_size = 0; - } else { - ut_ad(old_pk); - ut_ad(old_pk->n_fields == 2 + old_pk->n_fields_cmp); - ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field( - old_pk, old_pk->n_fields - 2)->len); - ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field( - old_pk, old_pk->n_fields - 1)->len); - - old_pk_size = rec_get_converted_size_temp( - new_index, old_pk->fields, old_pk->n_fields, - &old_pk_extra_size); - ut_ad(old_pk_extra_size < 0x100); - mrec_size += 1/*old_pk_extra_size*/ + old_pk_size; - } - - if (byte* b = row_log_table_open(index->online_log, - mrec_size, &avail_size)) { - *b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE; - - if (old_pk_size) { - *b++ = static_cast<byte>(old_pk_extra_size); - - rec_convert_dtuple_to_temp( - b + old_pk_extra_size, new_index, - old_pk->fields, old_pk->n_fields); - b += old_pk_size; - } - - if (extra_size < 0x80) { - *b++ = static_cast<byte>(extra_size); - } else { - ut_ad(extra_size < 0x8000); - *b++ = static_cast<byte>(0x80 | (extra_size >> 8)); - *b++ = static_cast<byte>(extra_size); - } - - memcpy(b, rec - rec_offs_extra_size(offsets), extra_size); - b += extra_size; - memcpy(b, rec, rec_offs_data_size(offsets)); - b += rec_offs_data_size(offsets); - - row_log_table_close( - index->online_log, b, mrec_size, avail_size); - } -} - -/******************************************************//** -Logs an update to a table that is being rebuilt. -This will be merged in row_log_table_apply_update(). */ -UNIV_INTERN -void -row_log_table_update( -/*=================*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ - const dtuple_t* old_pk) /*!< in: row_log_table_get_pk() - before the update */ -{ - row_log_table_low(rec, index, offsets, false, old_pk); -} - -/** Gets the old table column of a PRIMARY KEY column. -@param table old table (before ALTER TABLE) -@param col_map mapping of old column numbers to new ones -@param col_no column position in the new table -@return old table column, or NULL if this is an added column */ -static -const dict_col_t* -row_log_table_get_pk_old_col( -/*=========================*/ - const dict_table_t* table, - const ulint* col_map, - ulint col_no) -{ - for (ulint i = 0; i < table->n_cols; i++) { - if (col_no == col_map[i]) { - return(dict_table_get_nth_col(table, i)); - } - } - - return(NULL); -} - -/** Maps an old table column of a PRIMARY KEY column. -@param col old table column (before ALTER TABLE) -@param ifield clustered index field in the new table (after ALTER TABLE) -@param dfield clustered index tuple field in the new table -@param heap memory heap for allocating dfield contents -@param rec clustered index leaf page record in the old table -@param offsets rec_get_offsets(rec) -@param i rec field corresponding to col -@param zip_size compressed page size of the old table, or 0 for uncompressed -@param max_len maximum length of dfield -@retval DB_INVALID_NULL if a NULL value is encountered -@retval DB_TOO_BIG_INDEX_COL if the maximum prefix length is exceeded */ -static -dberr_t -row_log_table_get_pk_col( -/*=====================*/ - const dict_col_t* col, - const dict_field_t* ifield, - dfield_t* dfield, - mem_heap_t* heap, - const rec_t* rec, - const ulint* offsets, - ulint i, - ulint zip_size, - ulint max_len) -{ - const byte* field; - ulint len; - - ut_ad(ut_is_2pow(zip_size)); - - field = rec_get_nth_field(rec, offsets, i, &len); - - if (len == UNIV_SQL_NULL) { - return(DB_INVALID_NULL); - } - - if (rec_offs_nth_extern(offsets, i)) { - ulint field_len = ifield->prefix_len; - byte* blob_field; - - if (!field_len) { - field_len = ifield->fixed_len; - if (!field_len) { - field_len = max_len + 1; - } - } - - blob_field = static_cast<byte*>( - mem_heap_alloc(heap, field_len)); - - len = btr_copy_externally_stored_field_prefix( - blob_field, field_len, zip_size, field, len, NULL); - if (len >= max_len + 1) { - return(DB_TOO_BIG_INDEX_COL); - } - - dfield_set_data(dfield, blob_field, len); - } else { - dfield_set_data(dfield, mem_heap_dup(heap, field, len), len); - } - - return(DB_SUCCESS); -} - -/******************************************************//** -Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR -of a table that is being rebuilt. -@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table, -or NULL if the PRIMARY KEY definition does not change */ -UNIV_INTERN -const dtuple_t* -row_log_table_get_pk( -/*=================*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */ - byte* sys, /*!< out: DB_TRX_ID,DB_ROLL_PTR for - row_log_table_delete(), or NULL */ - mem_heap_t** heap) /*!< in/out: memory heap where allocated */ -{ - dtuple_t* tuple = NULL; - row_log_t* log = index->online_log; - - ut_ad(dict_index_is_clust(index)); - ut_ad(dict_index_is_online_ddl(index)); - ut_ad(!offsets || rec_offs_validate(rec, index, offsets)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED) - || rw_lock_own(&index->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(log); - ut_ad(log->table); - - if (log->same_pk) { - /* The PRIMARY KEY columns are unchanged. */ - if (sys) { - /* Store the DB_TRX_ID,DB_ROLL_PTR. */ - ulint trx_id_offs = index->trx_id_offset; - - if (!trx_id_offs) { - ulint pos = dict_index_get_sys_col_pos( - index, DATA_TRX_ID); - ulint len; - ut_ad(pos > 0); - - if (!offsets) { - offsets = rec_get_offsets( - rec, index, NULL, pos + 1, - heap); - } - - trx_id_offs = rec_get_nth_field_offs( - offsets, pos, &len); - ut_ad(len == DATA_TRX_ID_LEN); - } - - memcpy(sys, rec + trx_id_offs, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - } - - return(NULL); - } - - mutex_enter(&log->mutex); - - /* log->error is protected by log->mutex. */ - if (log->error == DB_SUCCESS) { - dict_table_t* new_table = log->table; - dict_index_t* new_index - = dict_table_get_first_index(new_table); - const ulint new_n_uniq - = dict_index_get_n_unique(new_index); - - if (!*heap) { - ulint size = 0; - - if (!offsets) { - size += (1 + REC_OFFS_HEADER_SIZE - + index->n_fields) - * sizeof *offsets; - } - - for (ulint i = 0; i < new_n_uniq; i++) { - size += dict_col_get_min_size( - dict_index_get_nth_col(new_index, i)); - } - - *heap = mem_heap_create( - DTUPLE_EST_ALLOC(new_n_uniq + 2) + size); - } - - if (!offsets) { - offsets = rec_get_offsets(rec, index, NULL, - ULINT_UNDEFINED, heap); - } - - tuple = dtuple_create(*heap, new_n_uniq + 2); - dict_index_copy_types(tuple, new_index, tuple->n_fields); - dtuple_set_n_fields_cmp(tuple, new_n_uniq); - - const ulint max_len = DICT_MAX_FIELD_LEN_BY_FORMAT(new_table); - const ulint zip_size = dict_table_zip_size(index->table); - - for (ulint new_i = 0; new_i < new_n_uniq; new_i++) { - dict_field_t* ifield; - dfield_t* dfield; - ulint prtype; - ulint mbminmaxlen; - - ifield = dict_index_get_nth_field(new_index, new_i); - dfield = dtuple_get_nth_field(tuple, new_i); - - const ulint col_no - = dict_field_get_col(ifield)->ind; - - if (const dict_col_t* col - = row_log_table_get_pk_old_col( - index->table, log->col_map, col_no)) { - ulint i = dict_col_get_clust_pos(col, index); - - if (i == ULINT_UNDEFINED) { - ut_ad(0); - log->error = DB_CORRUPTION; - goto err_exit; - } - - log->error = row_log_table_get_pk_col( - col, ifield, dfield, *heap, - rec, offsets, i, zip_size, max_len); - - if (log->error != DB_SUCCESS) { -err_exit: - tuple = NULL; - goto func_exit; - } - - mbminmaxlen = col->mbminmaxlen; - prtype = col->prtype; - } else { - /* No matching column was found in the old - table, so this must be an added column. - Copy the default value. */ - ut_ad(log->add_cols); - - dfield_copy(dfield, dtuple_get_nth_field( - log->add_cols, col_no)); - mbminmaxlen = dfield->type.mbminmaxlen; - prtype = dfield->type.prtype; - } - - ut_ad(!dfield_is_ext(dfield)); - ut_ad(!dfield_is_null(dfield)); - - if (ifield->prefix_len) { - ulint len = dtype_get_at_most_n_mbchars( - prtype, mbminmaxlen, - ifield->prefix_len, - dfield_get_len(dfield), - static_cast<const char*>( - dfield_get_data(dfield))); - - ut_ad(len <= dfield_get_len(dfield)); - dfield_set_len(dfield, len); - } - } - - const byte* trx_roll = rec - + row_get_trx_id_offset(index, offsets); - - /* Copy the fields, because the fields will be updated - or the record may be moved somewhere else in the B-tree - as part of the upcoming operation. */ - if (sys) { - memcpy(sys, trx_roll, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - trx_roll = sys; - } else { - trx_roll = static_cast<const byte*>( - mem_heap_dup( - *heap, trx_roll, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)); - } - - dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq), - trx_roll, DATA_TRX_ID_LEN); - dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq + 1), - trx_roll + DATA_TRX_ID_LEN, DATA_ROLL_PTR_LEN); - } - -func_exit: - mutex_exit(&log->mutex); - return(tuple); -} - -/******************************************************//** -Logs an insert to a table that is being rebuilt. -This will be merged in row_log_table_apply_insert(). */ -UNIV_INTERN -void -row_log_table_insert( -/*=================*/ - const rec_t* rec, /*!< in: clustered index leaf page record, - page X-latched */ - dict_index_t* index, /*!< in/out: clustered index, S-latched - or X-latched */ - const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */ -{ - row_log_table_low(rec, index, offsets, true, NULL); -} - -/******************************************************//** -Notes that a BLOB is being freed during online ALTER TABLE. */ -UNIV_INTERN -void -row_log_table_blob_free( -/*====================*/ - dict_index_t* index, /*!< in/out: clustered index, X-latched */ - ulint page_no)/*!< in: starting page number of the BLOB */ -{ - ut_ad(dict_index_is_clust(index)); - ut_ad(dict_index_is_online_ddl(index)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(page_no != FIL_NULL); - - if (index->online_log->error != DB_SUCCESS) { - return; - } - - page_no_map* blobs = index->online_log->blobs; - - if (!blobs) { - index->online_log->blobs = blobs = new page_no_map(); - } - -#ifdef UNIV_DEBUG - const ulonglong log_pos = index->online_log->tail.total; -#else -# define log_pos /* empty */ -#endif /* UNIV_DEBUG */ - - const page_no_map::value_type v(page_no, - row_log_table_blob_t(log_pos)); - - std::pair<page_no_map::iterator,bool> p = blobs->insert(v); - - if (!p.second) { - /* Update the existing mapping. */ - ut_ad(p.first->first == page_no); - p.first->second.blob_free(log_pos); - } -#undef log_pos -} - -/******************************************************//** -Notes that a BLOB is being allocated during online ALTER TABLE. */ -UNIV_INTERN -void -row_log_table_blob_alloc( -/*=====================*/ - dict_index_t* index, /*!< in/out: clustered index, X-latched */ - ulint page_no)/*!< in: starting page number of the BLOB */ -{ - ut_ad(dict_index_is_clust(index)); - ut_ad(dict_index_is_online_ddl(index)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(page_no != FIL_NULL); - - if (index->online_log->error != DB_SUCCESS) { - return; - } - - /* Only track allocations if the same page has been freed - earlier. Double allocation without a free is not allowed. */ - if (page_no_map* blobs = index->online_log->blobs) { - page_no_map::iterator p = blobs->find(page_no); - - if (p != blobs->end()) { - ut_ad(p->first == page_no); - p->second.blob_alloc(index->online_log->tail.total); - } - } -} - -/******************************************************//** -Converts a log record to a table row. -@return converted row, or NULL if the conversion fails */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -const dtuple_t* -row_log_table_apply_convert_mrec( -/*=============================*/ - const mrec_t* mrec, /*!< in: merge record */ - dict_index_t* index, /*!< in: index of mrec */ - const ulint* offsets, /*!< in: offsets of mrec */ - const row_log_t* log, /*!< in: rebuild context */ - mem_heap_t* heap, /*!< in/out: memory heap */ - trx_id_t trx_id, /*!< in: DB_TRX_ID of mrec */ - dberr_t* error) /*!< out: DB_SUCCESS or - DB_MISSING_HISTORY or - reason of failure */ -{ - dtuple_t* row; - - *error = DB_SUCCESS; - - /* This is based on row_build(). */ - if (log->add_cols) { - row = dtuple_copy(log->add_cols, heap); - /* dict_table_copy_types() would set the fields to NULL */ - for (ulint i = 0; i < dict_table_get_n_cols(log->table); i++) { - dict_col_copy_type( - dict_table_get_nth_col(log->table, i), - dfield_get_type(dtuple_get_nth_field(row, i))); - } - } else { - row = dtuple_create(heap, dict_table_get_n_cols(log->table)); - dict_table_copy_types(row, log->table); - } - - for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) { - const dict_field_t* ind_field - = dict_index_get_nth_field(index, i); - - if (ind_field->prefix_len) { - /* Column prefixes can only occur in key - fields, which cannot be stored externally. For - a column prefix, there should also be the full - field in the clustered index tuple. The row - tuple comprises full fields, not prefixes. */ - ut_ad(!rec_offs_nth_extern(offsets, i)); - continue; - } - - const dict_col_t* col - = dict_field_get_col(ind_field); - ulint col_no - = log->col_map[dict_col_get_no(col)]; - - if (col_no == ULINT_UNDEFINED) { - /* dropped column */ - continue; - } - - dfield_t* dfield - = dtuple_get_nth_field(row, col_no); - ulint len; - const byte* data; - - if (rec_offs_nth_extern(offsets, i)) { - ut_ad(rec_offs_any_extern(offsets)); - rw_lock_x_lock(dict_index_get_lock(index)); - - if (const page_no_map* blobs = log->blobs) { - data = rec_get_nth_field( - mrec, offsets, i, &len); - ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); - - ulint page_no = mach_read_from_4( - data + len - (BTR_EXTERN_FIELD_REF_SIZE - - BTR_EXTERN_PAGE_NO)); - page_no_map::const_iterator p = blobs->find( - page_no); - if (p != blobs->end() - && p->second.is_freed(log->head.total)) { - /* This BLOB has been freed. - We must not access the row. */ - *error = DB_MISSING_HISTORY; - dfield_set_data(dfield, data, len); - dfield_set_ext(dfield); - goto blob_done; - } - } - - data = btr_rec_copy_externally_stored_field( - mrec, offsets, - dict_table_zip_size(index->table), - i, &len, heap, NULL); - ut_a(data); - dfield_set_data(dfield, data, len); -blob_done: - rw_lock_x_unlock(dict_index_get_lock(index)); - } else { - data = rec_get_nth_field(mrec, offsets, i, &len); - dfield_set_data(dfield, data, len); - } - - if (len != UNIV_SQL_NULL && col->mtype == DATA_MYSQL - && col->len != len && !dict_table_is_comp(log->table)) { - - ut_ad(col->len >= len); - if (dict_table_is_comp(index->table)) { - byte* buf = (byte*) mem_heap_alloc(heap, - col->len); - memcpy(buf, dfield->data, len); - memset(buf + len, 0x20, col->len - len); - - dfield_set_data(dfield, buf, col->len); - } else { - /* field length mismatch should not happen - when rebuilding the redundant row format - table. */ - ut_ad(0); - *error = DB_CORRUPTION; - return(NULL); - } - } - - /* See if any columns were changed to NULL or NOT NULL. */ - const dict_col_t* new_col - = dict_table_get_nth_col(log->table, col_no); - ut_ad(new_col->mtype == col->mtype); - - /* Assert that prtype matches except for nullability. */ - ut_ad(!((new_col->prtype ^ col->prtype) & ~DATA_NOT_NULL)); - ut_ad(!((new_col->prtype ^ dfield_get_type(dfield)->prtype) - & ~DATA_NOT_NULL)); - - if (new_col->prtype == col->prtype) { - continue; - } - - if ((new_col->prtype & DATA_NOT_NULL) - && dfield_is_null(dfield)) { - /* We got a NULL value for a NOT NULL column. */ - *error = DB_INVALID_NULL; - return(NULL); - } - - /* Adjust the DATA_NOT_NULL flag in the parsed row. */ - dfield_get_type(dfield)->prtype = new_col->prtype; - - ut_ad(dict_col_type_assert_equal(new_col, - dfield_get_type(dfield))); - } - - return(row); -} - -/******************************************************//** -Replays an insert operation on a table that was rebuilt. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_log_table_apply_insert_low( -/*===========================*/ - que_thr_t* thr, /*!< in: query graph */ - const dtuple_t* row, /*!< in: table row - in the old table definition */ - trx_id_t trx_id, /*!< in: trx_id of the row */ - mem_heap_t* offsets_heap, /*!< in/out: memory heap - that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - row_merge_dup_t* dup) /*!< in/out: for reporting - duplicate key errors */ -{ - dberr_t error; - dtuple_t* entry; - const row_log_t*log = dup->index->online_log; - dict_index_t* index = dict_table_get_first_index(log->table); - ulint n_index = 0; - - ut_ad(dtuple_validate(row)); - ut_ad(trx_id); - -#ifdef ROW_LOG_APPLY_PRINT - if (row_log_apply_print) { - fprintf(stderr, "table apply insert " - IB_ID_FMT " " IB_ID_FMT "\n", - index->table->id, index->id); - dtuple_print(stderr, row); - } -#endif /* ROW_LOG_APPLY_PRINT */ - - static const ulint flags - = (BTR_CREATE_FLAG - | BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG - | BTR_KEEP_SYS_FLAG); - - entry = row_build_index_entry(row, NULL, index, heap); - - error = row_ins_clust_index_entry_low( - flags, BTR_MODIFY_TREE, index, index->n_uniq, entry, 0, thr); - - switch (error) { - case DB_SUCCESS: - break; - case DB_SUCCESS_LOCKED_REC: - /* The row had already been copied to the table. */ - return(DB_SUCCESS); - default: - return(error); - } - - do { - n_index++; - - if (!(index = dict_table_get_next_index(index))) { - break; - } - - if (index->type & DICT_FTS) { - continue; - } - - entry = row_build_index_entry(row, NULL, index, heap); - error = row_ins_sec_index_entry_low( - flags, BTR_MODIFY_TREE, - index, offsets_heap, heap, entry, trx_id, thr); - - /* Report correct index name for duplicate key error. */ - if (error == DB_DUPLICATE_KEY) { - thr_get_trx(thr)->error_key_num = n_index; - } - - } while (error == DB_SUCCESS); - - return(error); -} - -/******************************************************//** -Replays an insert operation on a table that was rebuilt. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_log_table_apply_insert( -/*=======================*/ - que_thr_t* thr, /*!< in: query graph */ - const mrec_t* mrec, /*!< in: record to insert */ - const ulint* offsets, /*!< in: offsets of mrec */ - mem_heap_t* offsets_heap, /*!< in/out: memory heap - that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - row_merge_dup_t* dup, /*!< in/out: for reporting - duplicate key errors */ - trx_id_t trx_id) /*!< in: DB_TRX_ID of mrec */ -{ - const row_log_t*log = dup->index->online_log; - dberr_t error; - const dtuple_t* row = row_log_table_apply_convert_mrec( - mrec, dup->index, offsets, log, heap, trx_id, &error); - - switch (error) { - case DB_MISSING_HISTORY: - ut_ad(log->blobs); - /* Because some BLOBs are missing, we know that the - transaction was rolled back later (a rollback of - an insert can free BLOBs). - We can simply skip the insert: the subsequent - ROW_T_DELETE will be ignored, or a ROW_T_UPDATE will - be interpreted as ROW_T_INSERT. */ - return(DB_SUCCESS); - case DB_SUCCESS: - ut_ad(row != NULL); - break; - default: - ut_ad(0); - case DB_INVALID_NULL: - ut_ad(row == NULL); - return(error); - } - - error = row_log_table_apply_insert_low( - thr, row, trx_id, offsets_heap, heap, dup); - if (error != DB_SUCCESS) { - /* Report the erroneous row using the new - version of the table. */ - innobase_row_to_mysql(dup->table, log->table, row); - } - return(error); -} - -/******************************************************//** -Deletes a record from a table that is being rebuilt. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1, 2, 4, 5), warn_unused_result)) -dberr_t -row_log_table_apply_delete_low( -/*===========================*/ - btr_pcur_t* pcur, /*!< in/out: B-tree cursor, - will be trashed */ - const ulint* offsets, /*!< in: offsets on pcur */ - const row_ext_t* save_ext, /*!< in: saved external field - info, or NULL */ - mem_heap_t* heap, /*!< in/out: memory heap */ - mtr_t* mtr) /*!< in/out: mini-transaction, - will be committed */ -{ - dberr_t error; - row_ext_t* ext; - dtuple_t* row; - dict_index_t* index = btr_pcur_get_btr_cur(pcur)->index; - - ut_ad(dict_index_is_clust(index)); - -#ifdef ROW_LOG_APPLY_PRINT - if (row_log_apply_print) { - fprintf(stderr, "table apply delete " - IB_ID_FMT " " IB_ID_FMT "\n", - index->table->id, index->id); - rec_print_new(stderr, btr_pcur_get_rec(pcur), offsets); - } -#endif /* ROW_LOG_APPLY_PRINT */ - if (dict_table_get_next_index(index)) { - /* Build a row template for purging secondary index entries. */ - row = row_build( - ROW_COPY_DATA, index, btr_pcur_get_rec(pcur), - offsets, NULL, NULL, NULL, - save_ext ? NULL : &ext, heap); - if (!save_ext) { - save_ext = ext; - } - } else { - row = NULL; - } - - btr_cur_pessimistic_delete(&error, FALSE, btr_pcur_get_btr_cur(pcur), - BTR_CREATE_FLAG, RB_NONE, mtr); - mtr_commit(mtr); - - if (error != DB_SUCCESS) { - return(error); - } - - while ((index = dict_table_get_next_index(index)) != NULL) { - if (index->type & DICT_FTS) { - continue; - } - - const dtuple_t* entry = row_build_index_entry( - row, save_ext, index, heap); - mtr_start(mtr); - btr_pcur_open(index, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, pcur, mtr); -#ifdef UNIV_DEBUG - switch (btr_pcur_get_btr_cur(pcur)->flag) { - case BTR_CUR_DELETE_REF: - case BTR_CUR_DEL_MARK_IBUF: - case BTR_CUR_DELETE_IBUF: - case BTR_CUR_INSERT_TO_IBUF: - /* We did not request buffering. */ - break; - case BTR_CUR_HASH: - case BTR_CUR_HASH_FAIL: - case BTR_CUR_BINARY: - goto flag_ok; - } - ut_ad(0); -flag_ok: -#endif /* UNIV_DEBUG */ - - if (page_rec_is_infimum(btr_pcur_get_rec(pcur)) - || btr_pcur_get_low_match(pcur) < index->n_uniq) { - /* All secondary index entries should be - found, because new_table is being modified by - this thread only, and all indexes should be - updated in sync. */ - mtr_commit(mtr); - return(DB_INDEX_CORRUPT); - } - - btr_cur_pessimistic_delete(&error, FALSE, - btr_pcur_get_btr_cur(pcur), - BTR_CREATE_FLAG, RB_NONE, mtr); - mtr_commit(mtr); - } - - return(error); -} - -/******************************************************//** -Replays a delete operation on a table that was rebuilt. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1, 3, 4, 5, 6, 7), warn_unused_result)) -dberr_t -row_log_table_apply_delete( -/*=======================*/ - que_thr_t* thr, /*!< in: query graph */ - ulint trx_id_col, /*!< in: position of - DB_TRX_ID in the new - clustered index */ - const mrec_t* mrec, /*!< in: merge record */ - const ulint* moffsets, /*!< in: offsets of mrec */ - mem_heap_t* offsets_heap, /*!< in/out: memory heap - that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - const row_log_t* log, /*!< in: online log */ - const row_ext_t* save_ext) /*!< in: saved external field - info, or NULL */ -{ - dict_table_t* new_table = log->table; - dict_index_t* index = dict_table_get_first_index(new_table); - dtuple_t* old_pk; - mtr_t mtr; - btr_pcur_t pcur; - ulint* offsets; - - ut_ad(rec_offs_n_fields(moffsets) - == dict_index_get_n_unique(index) + 2); - ut_ad(!rec_offs_any_extern(moffsets)); - - /* Convert the row to a search tuple. */ - old_pk = dtuple_create(heap, index->n_uniq); - dict_index_copy_types(old_pk, index, index->n_uniq); - - for (ulint i = 0; i < index->n_uniq; i++) { - ulint len; - const void* field; - field = rec_get_nth_field(mrec, moffsets, i, &len); - ut_ad(len != UNIV_SQL_NULL); - dfield_set_data(dtuple_get_nth_field(old_pk, i), - field, len); - } - - mtr_start(&mtr); - btr_pcur_open(index, old_pk, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, &mtr); -#ifdef UNIV_DEBUG - switch (btr_pcur_get_btr_cur(&pcur)->flag) { - case BTR_CUR_DELETE_REF: - case BTR_CUR_DEL_MARK_IBUF: - case BTR_CUR_DELETE_IBUF: - case BTR_CUR_INSERT_TO_IBUF: - /* We did not request buffering. */ - break; - case BTR_CUR_HASH: - case BTR_CUR_HASH_FAIL: - case BTR_CUR_BINARY: - goto flag_ok; - } - ut_ad(0); -flag_ok: -#endif /* UNIV_DEBUG */ - - if (page_rec_is_infimum(btr_pcur_get_rec(&pcur)) - || btr_pcur_get_low_match(&pcur) < index->n_uniq) { -all_done: - mtr_commit(&mtr); - /* The record was not found. All done. */ - /* This should only happen when an earlier - ROW_T_INSERT was skipped or - ROW_T_UPDATE was interpreted as ROW_T_DELETE - due to BLOBs having been freed by rollback. */ - return(DB_SUCCESS); - } - - offsets = rec_get_offsets(btr_pcur_get_rec(&pcur), index, NULL, - ULINT_UNDEFINED, &offsets_heap); -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern(btr_pcur_get_rec(&pcur), offsets)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - - /* Only remove the record if DB_TRX_ID,DB_ROLL_PTR match. */ - - { - ulint len; - const byte* mrec_trx_id - = rec_get_nth_field(mrec, moffsets, trx_id_col, &len); - ut_ad(len == DATA_TRX_ID_LEN); - const byte* rec_trx_id - = rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets, - trx_id_col, &len); - ut_ad(len == DATA_TRX_ID_LEN); - - ut_ad(rec_get_nth_field(mrec, moffsets, trx_id_col + 1, &len) - == mrec_trx_id + DATA_TRX_ID_LEN); - ut_ad(len == DATA_ROLL_PTR_LEN); - ut_ad(rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets, - trx_id_col + 1, &len) - == rec_trx_id + DATA_TRX_ID_LEN); - ut_ad(len == DATA_ROLL_PTR_LEN); - - if (memcmp(mrec_trx_id, rec_trx_id, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) { - /* The ROW_T_DELETE was logged for a different - PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR. - This is possible if a ROW_T_INSERT was skipped - or a ROW_T_UPDATE was interpreted as ROW_T_DELETE - because some BLOBs were missing due to - (1) rolling back the initial insert, or - (2) purging the BLOB for a later ROW_T_DELETE - (3) purging 'old values' for a later ROW_T_UPDATE - or ROW_T_DELETE. */ - ut_ad(!log->same_pk); - goto all_done; - } - } - - return(row_log_table_apply_delete_low(&pcur, offsets, save_ext, - heap, &mtr)); -} - -/******************************************************//** -Replays an update operation on a table that was rebuilt. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_log_table_apply_update( -/*=======================*/ - que_thr_t* thr, /*!< in: query graph */ - ulint new_trx_id_col, /*!< in: position of - DB_TRX_ID in the new - clustered index */ - const mrec_t* mrec, /*!< in: new value */ - const ulint* offsets, /*!< in: offsets of mrec */ - mem_heap_t* offsets_heap, /*!< in/out: memory heap - that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - row_merge_dup_t* dup, /*!< in/out: for reporting - duplicate key errors */ - trx_id_t trx_id, /*!< in: DB_TRX_ID of mrec */ - const dtuple_t* old_pk) /*!< in: PRIMARY KEY and - DB_TRX_ID,DB_ROLL_PTR - of the old value, - or PRIMARY KEY if same_pk */ -{ - const row_log_t*log = dup->index->online_log; - const dtuple_t* row; - dict_index_t* index = dict_table_get_first_index(log->table); - mtr_t mtr; - btr_pcur_t pcur; - dberr_t error; - ulint n_index = 0; - - ut_ad(dtuple_get_n_fields_cmp(old_pk) - == dict_index_get_n_unique(index)); - ut_ad(dtuple_get_n_fields(old_pk) - == dict_index_get_n_unique(index) - + (log->same_pk ? 0 : 2)); - - row = row_log_table_apply_convert_mrec( - mrec, dup->index, offsets, log, heap, trx_id, &error); - - switch (error) { - case DB_MISSING_HISTORY: - /* The record contained BLOBs that are now missing. */ - ut_ad(log->blobs); - /* Whether or not we are updating the PRIMARY KEY, we - know that there should be a subsequent - ROW_T_DELETE for rolling back a preceding ROW_T_INSERT, - overriding this ROW_T_UPDATE record. (*1) - - This allows us to interpret this ROW_T_UPDATE - as ROW_T_DELETE. - - When applying the subsequent ROW_T_DELETE, no matching - record will be found. */ - /* fall through */ - case DB_SUCCESS: - ut_ad(row != NULL); - break; - default: - ut_ad(0); - case DB_INVALID_NULL: - ut_ad(row == NULL); - return(error); - } - - mtr_start(&mtr); - btr_pcur_open(index, old_pk, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, &mtr); -#ifdef UNIV_DEBUG - switch (btr_pcur_get_btr_cur(&pcur)->flag) { - case BTR_CUR_DELETE_REF: - case BTR_CUR_DEL_MARK_IBUF: - case BTR_CUR_DELETE_IBUF: - case BTR_CUR_INSERT_TO_IBUF: - ut_ad(0);/* We did not request buffering. */ - case BTR_CUR_HASH: - case BTR_CUR_HASH_FAIL: - case BTR_CUR_BINARY: - break; - } -#endif /* UNIV_DEBUG */ - - if (page_rec_is_infimum(btr_pcur_get_rec(&pcur)) - || btr_pcur_get_low_match(&pcur) < index->n_uniq) { - /* The record was not found. This should only happen - when an earlier ROW_T_INSERT or ROW_T_UPDATE was - diverted because BLOBs were freed when the insert was - later rolled back. */ - - ut_ad(log->blobs); - - if (error == DB_SUCCESS) { - /* An earlier ROW_T_INSERT could have been - skipped because of a missing BLOB, like this: - - BEGIN; - INSERT INTO t SET blob_col='blob value'; - UPDATE t SET blob_col=''; - ROLLBACK; - - This would generate the following records: - ROW_T_INSERT (referring to 'blob value') - ROW_T_UPDATE - ROW_T_UPDATE (referring to 'blob value') - ROW_T_DELETE - [ROLLBACK removes the 'blob value'] - - The ROW_T_INSERT would have been skipped - because of a missing BLOB. Now we are - executing the first ROW_T_UPDATE. - The second ROW_T_UPDATE (for the ROLLBACK) - would be interpreted as ROW_T_DELETE, because - the BLOB would be missing. - - We could probably assume that the transaction - has been rolled back and simply skip the - 'insert' part of this ROW_T_UPDATE record. - However, there might be some complex scenario - that could interfere with such a shortcut. - So, we will insert the row (and risk - introducing a bogus duplicate key error - for the ALTER TABLE), and a subsequent - ROW_T_UPDATE or ROW_T_DELETE will delete it. */ - mtr_commit(&mtr); - error = row_log_table_apply_insert_low( - thr, row, trx_id, offsets_heap, heap, dup); - } else { - /* Some BLOBs are missing, so we are interpreting - this ROW_T_UPDATE as ROW_T_DELETE (see *1). - Because the record was not found, we do nothing. */ - ut_ad(error == DB_MISSING_HISTORY); - error = DB_SUCCESS; -func_exit: - mtr_commit(&mtr); - } -func_exit_committed: - ut_ad(mtr.state == MTR_COMMITTED); - - if (error != DB_SUCCESS) { - /* Report the erroneous row using the new - version of the table. */ - innobase_row_to_mysql(dup->table, log->table, row); - } - - return(error); - } - - /* Prepare to update (or delete) the record. */ - ulint* cur_offsets = rec_get_offsets( - btr_pcur_get_rec(&pcur), - index, NULL, ULINT_UNDEFINED, &offsets_heap); - - if (!log->same_pk) { - /* Only update the record if DB_TRX_ID,DB_ROLL_PTR match what - was buffered. */ - ulint len; - const void* rec_trx_id - = rec_get_nth_field(btr_pcur_get_rec(&pcur), - cur_offsets, index->n_uniq, &len); - ut_ad(len == DATA_TRX_ID_LEN); - ut_ad(dtuple_get_nth_field(old_pk, index->n_uniq)->len - == DATA_TRX_ID_LEN); - ut_ad(dtuple_get_nth_field(old_pk, index->n_uniq + 1)->len - == DATA_ROLL_PTR_LEN); - ut_ad(DATA_TRX_ID_LEN + static_cast<const char*>( - dtuple_get_nth_field(old_pk, - index->n_uniq)->data) - == dtuple_get_nth_field(old_pk, - index->n_uniq + 1)->data); - if (memcmp(rec_trx_id, - dtuple_get_nth_field(old_pk, index->n_uniq)->data, - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) { - /* The ROW_T_UPDATE was logged for a different - DB_TRX_ID,DB_ROLL_PTR. This is possible if an - earlier ROW_T_INSERT or ROW_T_UPDATE was diverted - because some BLOBs were missing due to rolling - back the initial insert or due to purging - the old BLOB values of an update. */ - ut_ad(log->blobs); - if (error != DB_SUCCESS) { - ut_ad(error == DB_MISSING_HISTORY); - /* Some BLOBs are missing, so we are - interpreting this ROW_T_UPDATE as - ROW_T_DELETE (see *1). - Because this is a different row, - we will do nothing. */ - error = DB_SUCCESS; - } else { - /* Because the user record is missing due to - BLOBs that were missing when processing - an earlier log record, we should - interpret the ROW_T_UPDATE as ROW_T_INSERT. - However, there is a different user record - with the same PRIMARY KEY value already. */ - error = DB_DUPLICATE_KEY; - } - - goto func_exit; - } - } - - if (error != DB_SUCCESS) { - ut_ad(error == DB_MISSING_HISTORY); - ut_ad(log->blobs); - /* Some BLOBs are missing, so we are interpreting - this ROW_T_UPDATE as ROW_T_DELETE (see *1). */ - error = row_log_table_apply_delete_low( - &pcur, cur_offsets, NULL, heap, &mtr); - goto func_exit_committed; - } - - dtuple_t* entry = row_build_index_entry( - row, NULL, index, heap); - const upd_t* update = row_upd_build_difference_binary( - index, entry, btr_pcur_get_rec(&pcur), cur_offsets, - false, NULL, heap); - - if (!update->n_fields) { - /* Nothing to do. */ - goto func_exit; - } - - const bool pk_updated - = upd_get_nth_field(update, 0)->field_no < new_trx_id_col; - - if (pk_updated || rec_offs_any_extern(cur_offsets)) { - /* If the record contains any externally stored - columns, perform the update by delete and insert, - because we will not write any undo log that would - allow purge to free any orphaned externally stored - columns. */ - - if (pk_updated && log->same_pk) { - /* The ROW_T_UPDATE log record should only be - written when the PRIMARY KEY fields of the - record did not change in the old table. We - can only get a change of PRIMARY KEY columns - in the rebuilt table if the PRIMARY KEY was - redefined (!same_pk). */ - ut_ad(0); - error = DB_CORRUPTION; - goto func_exit; - } - - error = row_log_table_apply_delete_low( - &pcur, cur_offsets, NULL, heap, &mtr); - ut_ad(mtr.state == MTR_COMMITTED); - - if (error == DB_SUCCESS) { - error = row_log_table_apply_insert_low( - thr, row, trx_id, offsets_heap, heap, dup); - } - - goto func_exit_committed; - } - - dtuple_t* old_row; - row_ext_t* old_ext; - - if (dict_table_get_next_index(index)) { - /* Construct the row corresponding to the old value of - the record. */ - old_row = row_build( - ROW_COPY_DATA, index, btr_pcur_get_rec(&pcur), - cur_offsets, NULL, NULL, NULL, &old_ext, heap); - ut_ad(old_row); -#ifdef ROW_LOG_APPLY_PRINT - if (row_log_apply_print) { - fprintf(stderr, "table apply update " - IB_ID_FMT " " IB_ID_FMT "\n", - index->table->id, index->id); - dtuple_print(stderr, old_row); - dtuple_print(stderr, row); - } -#endif /* ROW_LOG_APPLY_PRINT */ - } else { - old_row = NULL; - old_ext = NULL; - } - - big_rec_t* big_rec; - - error = btr_cur_pessimistic_update( - BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG - | BTR_KEEP_POS_FLAG, - btr_pcur_get_btr_cur(&pcur), - &cur_offsets, &offsets_heap, heap, &big_rec, - update, 0, thr, 0, &mtr); - - if (big_rec) { - if (error == DB_SUCCESS) { - error = btr_store_big_rec_extern_fields( - index, btr_pcur_get_block(&pcur), - btr_pcur_get_rec(&pcur), cur_offsets, - big_rec, &mtr, BTR_STORE_UPDATE); - } - - dtuple_big_rec_free(big_rec); - } - - while ((index = dict_table_get_next_index(index)) != NULL) { - if (error != DB_SUCCESS) { - break; - } - - n_index++; - - if (index->type & DICT_FTS) { - continue; - } - - if (!row_upd_changes_ord_field_binary( - index, update, thr, old_row, NULL)) { - continue; - } - - mtr_commit(&mtr); - - entry = row_build_index_entry(old_row, old_ext, index, heap); - if (!entry) { - ut_ad(0); - return(DB_CORRUPTION); - } - - mtr_start(&mtr); - - if (ROW_FOUND != row_search_index_entry( - index, entry, BTR_MODIFY_TREE, &pcur, &mtr)) { - ut_ad(0); - error = DB_CORRUPTION; - break; - } - - btr_cur_pessimistic_delete( - &error, FALSE, btr_pcur_get_btr_cur(&pcur), - BTR_CREATE_FLAG, RB_NONE, &mtr); - - if (error != DB_SUCCESS) { - break; - } - - mtr_commit(&mtr); - - entry = row_build_index_entry(row, NULL, index, heap); - error = row_ins_sec_index_entry_low( - BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG, - BTR_MODIFY_TREE, index, offsets_heap, heap, - entry, trx_id, thr); - - /* Report correct index name for duplicate key error. */ - if (error == DB_DUPLICATE_KEY) { - thr_get_trx(thr)->error_key_num = n_index; - } - - mtr_start(&mtr); - } - - goto func_exit; -} - -/******************************************************//** -Applies an operation to a table that was rebuilt. -@return NULL on failure (mrec corruption) or when out of data; -pointer to next record on success */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -const mrec_t* -row_log_table_apply_op( -/*===================*/ - que_thr_t* thr, /*!< in: query graph */ - ulint trx_id_col, /*!< in: position of - DB_TRX_ID in old index */ - ulint new_trx_id_col, /*!< in: position of - DB_TRX_ID in new index */ - row_merge_dup_t* dup, /*!< in/out: for reporting - duplicate key errors */ - dberr_t* error, /*!< out: DB_SUCCESS - or error code */ - mem_heap_t* offsets_heap, /*!< in/out: memory heap - that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - const mrec_t* mrec, /*!< in: merge record */ - const mrec_t* mrec_end, /*!< in: end of buffer */ - ulint* offsets) /*!< in/out: work area - for parsing mrec */ -{ - row_log_t* log = dup->index->online_log; - dict_index_t* new_index = dict_table_get_first_index(log->table); - ulint extra_size; - const mrec_t* next_mrec; - dtuple_t* old_pk; - row_ext_t* ext; - ulint ext_size; - - ut_ad(dict_index_is_clust(dup->index)); - ut_ad(dup->index->table != log->table); - ut_ad(log->head.total <= log->tail.total); - - *error = DB_SUCCESS; - - /* 3 = 1 (op type) + 1 (ext_size) + at least 1 byte payload */ - if (mrec + 3 >= mrec_end) { - return(NULL); - } - - const mrec_t* const mrec_start = mrec; - - switch (*mrec++) { - default: - ut_ad(0); - *error = DB_CORRUPTION; - return(NULL); - case ROW_T_INSERT: - extra_size = *mrec++; - - if (extra_size >= 0x80) { - /* Read another byte of extra_size. */ - - extra_size = (extra_size & 0x7f) << 8; - extra_size |= *mrec++; - } - - mrec += extra_size; - - if (mrec > mrec_end) { - return(NULL); - } - - rec_offs_set_n_fields(offsets, dup->index->n_fields); - rec_init_offsets_temp(mrec, dup->index, offsets); - - next_mrec = mrec + rec_offs_data_size(offsets); - - if (next_mrec > mrec_end) { - return(NULL); - } else { - log->head.total += next_mrec - mrec_start; - - ulint len; - const byte* db_trx_id - = rec_get_nth_field( - mrec, offsets, trx_id_col, &len); - ut_ad(len == DATA_TRX_ID_LEN); - *error = row_log_table_apply_insert( - thr, mrec, offsets, offsets_heap, - heap, dup, trx_read_trx_id(db_trx_id)); - } - break; - - case ROW_T_DELETE: - /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */ - if (mrec + 6 >= mrec_end) { - return(NULL); - } - - extra_size = *mrec++; - ext_size = mach_read_from_4(mrec); - mrec += 4; - ut_ad(mrec < mrec_end); - - /* We assume extra_size < 0x100 for the PRIMARY KEY prefix. - For fixed-length PRIMARY key columns, it is 0. */ - mrec += extra_size; - - rec_offs_set_n_fields(offsets, new_index->n_uniq + 2); - rec_init_offsets_temp(mrec, new_index, offsets); - next_mrec = mrec + rec_offs_data_size(offsets) + ext_size; - if (next_mrec > mrec_end) { - return(NULL); - } - - log->head.total += next_mrec - mrec_start; - - /* If there are external fields, retrieve those logged - prefix info and reconstruct the row_ext_t */ - if (ext_size) { - /* We use memcpy to avoid unaligned - access on some non-x86 platforms.*/ - ext = static_cast<row_ext_t*>( - mem_heap_dup(heap, - mrec + rec_offs_data_size(offsets), - ext_size)); - - byte* ext_start = reinterpret_cast<byte*>(ext); - - ulint ext_len = sizeof(*ext) - + (ext->n_ext - 1) * sizeof ext->len; - - ext->ext = reinterpret_cast<ulint*>(ext_start + ext_len); - ext_len += ext->n_ext * sizeof(*ext->ext); - - ext->buf = static_cast<byte*>(ext_start + ext_len); - } else { - ext = NULL; - } - - *error = row_log_table_apply_delete( - thr, new_trx_id_col, - mrec, offsets, offsets_heap, heap, - log, ext); - break; - - case ROW_T_UPDATE: - /* Logically, the log entry consists of the - (PRIMARY KEY,DB_TRX_ID) of the old value (converted - to the new primary key definition) followed by - the new value in the old table definition. If the - definition of the columns belonging to PRIMARY KEY - is not changed, the log will only contain - DB_TRX_ID,new_row. */ - - if (dup->index->online_log->same_pk) { - ut_ad(new_index->n_uniq == dup->index->n_uniq); - - extra_size = *mrec++; - - if (extra_size >= 0x80) { - /* Read another byte of extra_size. */ - - extra_size = (extra_size & 0x7f) << 8; - extra_size |= *mrec++; - } - - mrec += extra_size; - - if (mrec > mrec_end) { - return(NULL); - } - - rec_offs_set_n_fields(offsets, dup->index->n_fields); - rec_init_offsets_temp(mrec, dup->index, offsets); - - next_mrec = mrec + rec_offs_data_size(offsets); - - if (next_mrec > mrec_end) { - return(NULL); - } - - old_pk = dtuple_create(heap, new_index->n_uniq); - dict_index_copy_types( - old_pk, new_index, old_pk->n_fields); - - /* Copy the PRIMARY KEY fields from mrec to old_pk. */ - for (ulint i = 0; i < new_index->n_uniq; i++) { - const void* field; - ulint len; - dfield_t* dfield; - - ut_ad(!rec_offs_nth_extern(offsets, i)); - - field = rec_get_nth_field( - mrec, offsets, i, &len); - ut_ad(len != UNIV_SQL_NULL); - - dfield = dtuple_get_nth_field(old_pk, i); - dfield_set_data(dfield, field, len); - } - } else { - /* We assume extra_size < 0x100 - for the PRIMARY KEY prefix. */ - mrec += *mrec + 1; - - if (mrec > mrec_end) { - return(NULL); - } - - /* Get offsets for PRIMARY KEY, - DB_TRX_ID, DB_ROLL_PTR. */ - rec_offs_set_n_fields(offsets, new_index->n_uniq + 2); - rec_init_offsets_temp(mrec, new_index, offsets); - - next_mrec = mrec + rec_offs_data_size(offsets); - if (next_mrec + 2 > mrec_end) { - return(NULL); - } - - /* Copy the PRIMARY KEY fields and - DB_TRX_ID, DB_ROLL_PTR from mrec to old_pk. */ - old_pk = dtuple_create(heap, new_index->n_uniq + 2); - dict_index_copy_types(old_pk, new_index, - old_pk->n_fields); - - for (ulint i = 0; - i < dict_index_get_n_unique(new_index) + 2; - i++) { - const void* field; - ulint len; - dfield_t* dfield; - - ut_ad(!rec_offs_nth_extern(offsets, i)); - - field = rec_get_nth_field( - mrec, offsets, i, &len); - ut_ad(len != UNIV_SQL_NULL); - - dfield = dtuple_get_nth_field(old_pk, i); - dfield_set_data(dfield, field, len); - } - - mrec = next_mrec; - - /* Fetch the new value of the row as it was - in the old table definition. */ - extra_size = *mrec++; - - if (extra_size >= 0x80) { - /* Read another byte of extra_size. */ - - extra_size = (extra_size & 0x7f) << 8; - extra_size |= *mrec++; - } - - mrec += extra_size; - - if (mrec > mrec_end) { - return(NULL); - } - - rec_offs_set_n_fields(offsets, dup->index->n_fields); - rec_init_offsets_temp(mrec, dup->index, offsets); - - next_mrec = mrec + rec_offs_data_size(offsets); - - if (next_mrec > mrec_end) { - return(NULL); - } - } - - ut_ad(next_mrec <= mrec_end); - log->head.total += next_mrec - mrec_start; - dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq); - - { - ulint len; - const byte* db_trx_id - = rec_get_nth_field( - mrec, offsets, trx_id_col, &len); - ut_ad(len == DATA_TRX_ID_LEN); - *error = row_log_table_apply_update( - thr, new_trx_id_col, - mrec, offsets, offsets_heap, - heap, dup, trx_read_trx_id(db_trx_id), old_pk); - } - - break; - } - - ut_ad(log->head.total <= log->tail.total); - mem_heap_empty(offsets_heap); - mem_heap_empty(heap); - return(next_mrec); -} - -/******************************************************//** -Applies operations to a table was rebuilt. -@return DB_SUCCESS, or error code on failure */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_log_table_apply_ops( -/*====================*/ - que_thr_t* thr, /*!< in: query graph */ - row_merge_dup_t*dup) /*!< in/out: for reporting duplicate key - errors */ -{ - dberr_t error; - const mrec_t* mrec = NULL; - const mrec_t* next_mrec; - const mrec_t* mrec_end = NULL; /* silence bogus warning */ - const mrec_t* next_mrec_end; - mem_heap_t* heap; - mem_heap_t* offsets_heap; - ulint* offsets; - bool has_index_lock; - dict_index_t* index = const_cast<dict_index_t*>( - dup->index); - dict_table_t* new_table = index->online_log->table; - dict_index_t* new_index = dict_table_get_first_index( - new_table); - const ulint i = 1 + REC_OFFS_HEADER_SIZE - + ut_max(dict_index_get_n_fields(index), - dict_index_get_n_unique(new_index) + 2); - const ulint trx_id_col = dict_col_get_clust_pos( - dict_table_get_sys_col(index->table, DATA_TRX_ID), index); - const ulint new_trx_id_col = dict_col_get_clust_pos( - dict_table_get_sys_col(new_table, DATA_TRX_ID), new_index); - trx_t* trx = thr_get_trx(thr); - - ut_ad(dict_index_is_clust(index)); - ut_ad(dict_index_is_online_ddl(index)); - ut_ad(trx->mysql_thd); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(!dict_index_is_online_ddl(new_index)); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - ut_ad(new_trx_id_col > 0); - ut_ad(new_trx_id_col != ULINT_UNDEFINED); - - UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end); - - offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets)); - offsets[0] = i; - offsets[1] = dict_index_get_n_fields(index); - - heap = mem_heap_create(UNIV_PAGE_SIZE); - offsets_heap = mem_heap_create(UNIV_PAGE_SIZE); - has_index_lock = true; - -next_block: - ut_ad(has_index_lock); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(index->online_log->head.bytes == 0); - - if (trx_is_interrupted(trx)) { - goto interrupted; - } - - if (dict_index_is_corrupted(index)) { - error = DB_INDEX_CORRUPT; - goto func_exit; - } - - ut_ad(dict_index_is_online_ddl(index)); - - error = index->online_log->error; - - if (error != DB_SUCCESS) { - goto func_exit; - } - - if (UNIV_UNLIKELY(index->online_log->head.blocks - > index->online_log->tail.blocks)) { -unexpected_eof: - fprintf(stderr, "InnoDB: unexpected end of temporary file" - " for table %s\n", index->table_name); -corruption: - error = DB_CORRUPTION; - goto func_exit; - } - - if (index->online_log->head.blocks - == index->online_log->tail.blocks) { - if (index->online_log->head.blocks) { -#ifdef HAVE_FTRUNCATE - /* Truncate the file in order to save space. */ - if (index->online_log->fd != -1 - && ftruncate(index->online_log->fd, 0) == -1) { - perror("ftruncate"); - } -#endif /* HAVE_FTRUNCATE */ - index->online_log->head.blocks - = index->online_log->tail.blocks = 0; - } - - next_mrec = index->online_log->tail.block; - next_mrec_end = next_mrec + index->online_log->tail.bytes; - - if (next_mrec_end == next_mrec) { - /* End of log reached. */ -all_done: - ut_ad(has_index_lock); - ut_ad(index->online_log->head.blocks == 0); - ut_ad(index->online_log->tail.blocks == 0); - index->online_log->head.bytes = 0; - index->online_log->tail.bytes = 0; - error = DB_SUCCESS; - goto func_exit; - } - } else { - os_offset_t ofs; - ibool success; - - ofs = (os_offset_t) index->online_log->head.blocks - * srv_sort_buf_size; - - ut_ad(has_index_lock); - has_index_lock = false; - rw_lock_x_unlock(dict_index_get_lock(index)); - - log_free_check(); - - ut_ad(dict_index_is_online_ddl(index)); - - if (!row_log_block_allocate(index->online_log->head)) { - error = DB_OUT_OF_MEMORY; - goto func_exit; - } - - success = os_file_read_no_error_handling_int_fd( - index->online_log->fd, - index->online_log->head.block, ofs, - srv_sort_buf_size); - if (!success) { - fprintf(stderr, "InnoDB: unable to read temporary file" - " for table %s\n", index->table_name); - goto corruption; - } - -#ifdef POSIX_FADV_DONTNEED - /* Each block is read exactly once. Free up the file cache. */ - posix_fadvise(index->online_log->fd, - ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED); -#endif /* POSIX_FADV_DONTNEED */ -#if 0 //def FALLOC_FL_PUNCH_HOLE - /* Try to deallocate the space for the file on disk. - This should work on ext4 on Linux 2.6.39 and later, - and be ignored when the operation is unsupported. */ - fallocate(index->online_log->fd, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - ofs, srv_sort_buf_size); -#endif /* FALLOC_FL_PUNCH_HOLE */ - - next_mrec = index->online_log->head.block; - next_mrec_end = next_mrec + srv_sort_buf_size; - } - - /* This read is not protected by index->online_log->mutex for - performance reasons. We will eventually notice any error that - was flagged by a DML thread. */ - error = index->online_log->error; - - if (error != DB_SUCCESS) { - goto func_exit; - } - - if (mrec) { - /* A partial record was read from the previous block. - Copy the temporary buffer full, as we do not know the - length of the record. Parse subsequent records from - the bigger buffer index->online_log->head.block - or index->online_log->tail.block. */ - - ut_ad(mrec == index->online_log->head.buf); - ut_ad(mrec_end > mrec); - ut_ad(mrec_end < (&index->online_log->head.buf)[1]); - - memcpy((mrec_t*) mrec_end, next_mrec, - (&index->online_log->head.buf)[1] - mrec_end); - mrec = row_log_table_apply_op( - thr, trx_id_col, new_trx_id_col, - dup, &error, offsets_heap, heap, - index->online_log->head.buf, - (&index->online_log->head.buf)[1], offsets); - if (error != DB_SUCCESS) { - goto func_exit; - } else if (UNIV_UNLIKELY(mrec == NULL)) { - /* The record was not reassembled properly. */ - goto corruption; - } - /* The record was previously found out to be - truncated. Now that the parse buffer was extended, - it should proceed beyond the old end of the buffer. */ - ut_a(mrec > mrec_end); - - index->online_log->head.bytes = mrec - mrec_end; - next_mrec += index->online_log->head.bytes; - } - - ut_ad(next_mrec <= next_mrec_end); - /* The following loop must not be parsing the temporary - buffer, but head.block or tail.block. */ - - /* mrec!=NULL means that the next record starts from the - middle of the block */ - ut_ad((mrec == NULL) == (index->online_log->head.bytes == 0)); - -#ifdef UNIV_DEBUG - if (next_mrec_end == index->online_log->head.block - + srv_sort_buf_size) { - /* If tail.bytes == 0, next_mrec_end can also be at - the end of tail.block. */ - if (index->online_log->tail.bytes == 0) { - ut_ad(next_mrec == next_mrec_end); - ut_ad(index->online_log->tail.blocks == 0); - ut_ad(index->online_log->head.blocks == 0); - ut_ad(index->online_log->head.bytes == 0); - } else { - ut_ad(next_mrec == index->online_log->head.block - + index->online_log->head.bytes); - ut_ad(index->online_log->tail.blocks - > index->online_log->head.blocks); - } - } else if (next_mrec_end == index->online_log->tail.block - + index->online_log->tail.bytes) { - ut_ad(next_mrec == index->online_log->tail.block - + index->online_log->head.bytes); - ut_ad(index->online_log->tail.blocks == 0); - ut_ad(index->online_log->head.blocks == 0); - ut_ad(index->online_log->head.bytes - <= index->online_log->tail.bytes); - } else { - ut_error; - } -#endif /* UNIV_DEBUG */ - - mrec_end = next_mrec_end; - - while (!trx_is_interrupted(trx)) { - mrec = next_mrec; - ut_ad(mrec < mrec_end); - - if (!has_index_lock) { - /* We are applying operations from a different - block than the one that is being written to. - We do not hold index->lock in order to - allow other threads to concurrently buffer - modifications. */ - ut_ad(mrec >= index->online_log->head.block); - ut_ad(mrec_end == index->online_log->head.block - + srv_sort_buf_size); - ut_ad(index->online_log->head.bytes - < srv_sort_buf_size); - - /* Take the opportunity to do a redo log - checkpoint if needed. */ - log_free_check(); - } else { - /* We are applying operations from the last block. - Do not allow other threads to buffer anything, - so that we can finally catch up and synchronize. */ - ut_ad(index->online_log->head.blocks == 0); - ut_ad(index->online_log->tail.blocks == 0); - ut_ad(mrec_end == index->online_log->tail.block - + index->online_log->tail.bytes); - ut_ad(mrec >= index->online_log->tail.block); - } - - /* This read is not protected by index->online_log->mutex - for performance reasons. We will eventually notice any - error that was flagged by a DML thread. */ - error = index->online_log->error; - - if (error != DB_SUCCESS) { - goto func_exit; - } - - next_mrec = row_log_table_apply_op( - thr, trx_id_col, new_trx_id_col, - dup, &error, offsets_heap, heap, - mrec, mrec_end, offsets); - - if (error != DB_SUCCESS) { - goto func_exit; - } else if (next_mrec == next_mrec_end) { - /* The record happened to end on a block boundary. - Do we have more blocks left? */ - if (has_index_lock) { - /* The index will be locked while - applying the last block. */ - goto all_done; - } - - mrec = NULL; -process_next_block: - rw_lock_x_lock(dict_index_get_lock(index)); - has_index_lock = true; - - index->online_log->head.bytes = 0; - index->online_log->head.blocks++; - goto next_block; - } else if (next_mrec != NULL) { - ut_ad(next_mrec < next_mrec_end); - index->online_log->head.bytes += next_mrec - mrec; - } else if (has_index_lock) { - /* When mrec is within tail.block, it should - be a complete record, because we are holding - index->lock and thus excluding the writer. */ - ut_ad(index->online_log->tail.blocks == 0); - ut_ad(mrec_end == index->online_log->tail.block - + index->online_log->tail.bytes); - ut_ad(0); - goto unexpected_eof; - } else { - memcpy(index->online_log->head.buf, mrec, - mrec_end - mrec); - mrec_end += index->online_log->head.buf - mrec; - mrec = index->online_log->head.buf; - goto process_next_block; - } - } - -interrupted: - error = DB_INTERRUPTED; -func_exit: - if (!has_index_lock) { - rw_lock_x_lock(dict_index_get_lock(index)); - } - - mem_heap_free(offsets_heap); - mem_heap_free(heap); - row_log_block_free(index->online_log->head); - ut_free(offsets); - return(error); -} - -/******************************************************//** -Apply the row_log_table log to a table upon completing rebuild. -@return DB_SUCCESS, or error code on failure */ -UNIV_INTERN -dberr_t -row_log_table_apply( -/*================*/ - que_thr_t* thr, /*!< in: query graph */ - dict_table_t* old_table, - /*!< in: old table */ - struct TABLE* table) /*!< in/out: MySQL table - (for reporting duplicates) */ -{ - dberr_t error; - dict_index_t* clust_index; - - thr_get_trx(thr)->error_key_num = 0; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - clust_index = dict_table_get_first_index(old_table); - - rw_lock_x_lock(dict_index_get_lock(clust_index)); - - if (!clust_index->online_log) { - ut_ad(dict_index_get_online_status(clust_index) - == ONLINE_INDEX_COMPLETE); - /* This function should not be called unless - rebuilding a table online. Build in some fault - tolerance. */ - ut_ad(0); - error = DB_ERROR; - } else { - row_merge_dup_t dup = { - clust_index, table, - clust_index->online_log->col_map, 0 - }; - - error = row_log_table_apply_ops(thr, &dup); - - ut_ad(error != DB_SUCCESS - || clust_index->online_log->head.total - == clust_index->online_log->tail.total); - } - - rw_lock_x_unlock(dict_index_get_lock(clust_index)); - return(error); -} - -/******************************************************//** -Allocate the row log for an index and flag the index -for online creation. -@retval true if success, false if not */ -UNIV_INTERN -bool -row_log_allocate( -/*=============*/ - dict_index_t* index, /*!< in/out: index */ - dict_table_t* table, /*!< in/out: new table being rebuilt, - or NULL when creating a secondary index */ - bool same_pk,/*!< in: whether the definition of the - PRIMARY KEY has remained the same */ - const dtuple_t* add_cols, - /*!< in: default values of - added columns, or NULL */ - const ulint* col_map,/*!< in: mapping of old column - numbers to new ones, or NULL if !table */ - const char* path) /*!< in: where to create temporary file */ -{ - row_log_t* log; - DBUG_ENTER("row_log_allocate"); - - ut_ad(!dict_index_is_online_ddl(index)); - ut_ad(dict_index_is_clust(index) == !!table); - ut_ad(!table || index->table != table); - ut_ad(same_pk || table); - ut_ad(!table || col_map); - ut_ad(!add_cols || col_map); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - log = (row_log_t*) ut_malloc(sizeof *log); - if (!log) { - DBUG_RETURN(false); - } - - log->fd = -1; - mutex_create(index_online_log_key, &log->mutex, - SYNC_INDEX_ONLINE_LOG); - log->blobs = NULL; - log->table = table; - log->same_pk = same_pk; - log->add_cols = add_cols; - log->col_map = col_map; - log->error = DB_SUCCESS; - log->max_trx = 0; - log->tail.blocks = log->tail.bytes = 0; - log->tail.total = 0; - log->tail.block = log->head.block = NULL; - log->head.blocks = log->head.bytes = 0; - log->head.total = 0; - log->path = path; - dict_index_set_online_status(index, ONLINE_INDEX_CREATION); - index->online_log = log; - - /* While we might be holding an exclusive data dictionary lock - here, in row_log_abort_sec() we will not always be holding it. Use - atomic operations in both cases. */ - MONITOR_ATOMIC_INC(MONITOR_ONLINE_CREATE_INDEX); - - DBUG_RETURN(true); -} - -/******************************************************//** -Free the row log for an index that was being created online. */ -UNIV_INTERN -void -row_log_free( -/*=========*/ - row_log_t*& log) /*!< in,own: row log */ -{ - MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX); - - delete log->blobs; - row_log_block_free(log->tail); - row_log_block_free(log->head); - row_merge_file_destroy_low(log->fd); - mutex_free(&log->mutex); - ut_free(log); - log = 0; -} - -/******************************************************//** -Get the latest transaction ID that has invoked row_log_online_op() -during online creation. -@return latest transaction ID, or 0 if nothing was logged */ -UNIV_INTERN -trx_id_t -row_log_get_max_trx( -/*================*/ - dict_index_t* index) /*!< in: index, must be locked */ -{ - ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_CREATION); -#ifdef UNIV_SYNC_DEBUG - ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED) - && mutex_own(&index->online_log->mutex)) - || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - return(index->online_log->max_trx); -} - -/******************************************************//** -Applies an operation to a secondary index that was being created. */ -static MY_ATTRIBUTE((nonnull)) -void -row_log_apply_op_low( -/*=================*/ - dict_index_t* index, /*!< in/out: index */ - row_merge_dup_t*dup, /*!< in/out: for reporting - duplicate key errors */ - dberr_t* error, /*!< out: DB_SUCCESS or error code */ - mem_heap_t* offsets_heap, /*!< in/out: memory heap for - allocating offsets; can be emptied */ - bool has_index_lock, /*!< in: true if holding index->lock - in exclusive mode */ - enum row_op op, /*!< in: operation being applied */ - trx_id_t trx_id, /*!< in: transaction identifier */ - const dtuple_t* entry) /*!< in: row */ -{ - mtr_t mtr; - btr_cur_t cursor; - ulint* offsets = NULL; - - ut_ad(!dict_index_is_clust(index)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX) - == has_index_lock); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(!dict_index_is_corrupted(index)); - ut_ad(trx_id != 0 || op == ROW_OP_DELETE); - - mtr_start(&mtr); - - /* We perform the pessimistic variant of the operations if we - already hold index->lock exclusively. First, search the - record. The operation may already have been performed, - depending on when the row in the clustered index was - scanned. */ - btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - has_index_lock - ? BTR_MODIFY_TREE - : BTR_MODIFY_LEAF, - &cursor, 0, __FILE__, __LINE__, - &mtr); - - ut_ad(dict_index_get_n_unique(index) > 0); - /* This test is somewhat similar to row_ins_must_modify_rec(), - but not identical for unique secondary indexes. */ - if (cursor.low_match >= dict_index_get_n_unique(index) - && !page_rec_is_infimum(btr_cur_get_rec(&cursor))) { - /* We have a matching record. */ - bool exists = (cursor.low_match - == dict_index_get_n_fields(index)); -#ifdef UNIV_DEBUG - rec_t* rec = btr_cur_get_rec(&cursor); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec))); -#endif /* UNIV_DEBUG */ - - ut_ad(exists || dict_index_is_unique(index)); - - switch (op) { - case ROW_OP_DELETE: - if (!exists) { - /* The existing record matches the - unique secondary index key, but the - PRIMARY KEY columns differ. So, this - exact record does not exist. For - example, we could detect a duplicate - key error in some old index before - logging an ROW_OP_INSERT for our - index. This ROW_OP_DELETE could have - been logged for rolling back - TRX_UNDO_INSERT_REC. */ - goto func_exit; - } - - if (btr_cur_optimistic_delete( - &cursor, BTR_CREATE_FLAG, &mtr)) { - *error = DB_SUCCESS; - break; - } - - if (!has_index_lock) { - /* This needs a pessimistic operation. - Lock the index tree exclusively. */ - mtr_commit(&mtr); - mtr_start(&mtr); - btr_cur_search_to_nth_level( - index, 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, 0, - __FILE__, __LINE__, &mtr); - - /* No other thread than the current one - is allowed to modify the index tree. - Thus, the record should still exist. */ - ut_ad(cursor.low_match - >= dict_index_get_n_fields(index)); - ut_ad(page_rec_is_user_rec( - btr_cur_get_rec(&cursor))); - } - - /* As there are no externally stored fields in - a secondary index record, the parameter - rb_ctx = RB_NONE will be ignored. */ - - btr_cur_pessimistic_delete( - error, FALSE, &cursor, - BTR_CREATE_FLAG, RB_NONE, &mtr); - break; - case ROW_OP_INSERT: - if (exists) { - /* The record already exists. There - is nothing to be inserted. - This could happen when processing - TRX_UNDO_DEL_MARK_REC in statement - rollback: - - UPDATE of PRIMARY KEY can lead to - statement rollback if the updated - value of the PRIMARY KEY already - exists. In this case, the UPDATE would - be mapped to DELETE;INSERT, and we - only wrote undo log for the DELETE - part. The duplicate key error would be - triggered before logging the INSERT - part. - - Theoretically, we could also get a - similar situation when a DELETE operation - is blocked by a FOREIGN KEY constraint. */ - goto func_exit; - } - - if (dtuple_contains_null(entry)) { - /* The UNIQUE KEY columns match, but - there is a NULL value in the key, and - NULL!=NULL. */ - goto insert_the_rec; - } - - goto duplicate; - } - } else { - switch (op) { - rec_t* rec; - big_rec_t* big_rec; - case ROW_OP_DELETE: - /* The record does not exist. For example, we - could detect a duplicate key error in some old - index before logging an ROW_OP_INSERT for our - index. This ROW_OP_DELETE could be logged for - rolling back TRX_UNDO_INSERT_REC. */ - goto func_exit; - case ROW_OP_INSERT: - if (dict_index_is_unique(index) - && (cursor.up_match - >= dict_index_get_n_unique(index) - || cursor.low_match - >= dict_index_get_n_unique(index)) - && (!index->n_nullable - || !dtuple_contains_null(entry))) { -duplicate: - /* Duplicate key */ - ut_ad(dict_index_is_unique(index)); - row_merge_dup_report(dup, entry->fields); - *error = DB_DUPLICATE_KEY; - goto func_exit; - } -insert_the_rec: - /* Insert the record. As we are inserting into - a secondary index, there cannot be externally - stored columns (!big_rec). */ - *error = btr_cur_optimistic_insert( - BTR_NO_UNDO_LOG_FLAG - | BTR_NO_LOCKING_FLAG - | BTR_CREATE_FLAG, - &cursor, &offsets, &offsets_heap, - const_cast<dtuple_t*>(entry), - &rec, &big_rec, 0, NULL, &mtr); - ut_ad(!big_rec); - if (*error != DB_FAIL) { - break; - } - - if (!has_index_lock) { - /* This needs a pessimistic operation. - Lock the index tree exclusively. */ - mtr_commit(&mtr); - mtr_start(&mtr); - btr_cur_search_to_nth_level( - index, 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, 0, - __FILE__, __LINE__, &mtr); - } - - /* We already determined that the - record did not exist. No other thread - than the current one is allowed to - modify the index tree. Thus, the - record should still not exist. */ - - *error = btr_cur_pessimistic_insert( - BTR_NO_UNDO_LOG_FLAG - | BTR_NO_LOCKING_FLAG - | BTR_CREATE_FLAG, - &cursor, &offsets, &offsets_heap, - const_cast<dtuple_t*>(entry), - &rec, &big_rec, - 0, NULL, &mtr); - ut_ad(!big_rec); - break; - } - mem_heap_empty(offsets_heap); - } - - if (*error == DB_SUCCESS && trx_id) { - page_update_max_trx_id(btr_cur_get_block(&cursor), - btr_cur_get_page_zip(&cursor), - trx_id, &mtr); - } - -func_exit: - mtr_commit(&mtr); -} - -/******************************************************//** -Applies an operation to a secondary index that was being created. -@return NULL on failure (mrec corruption) or when out of data; -pointer to next record on success */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -const mrec_t* -row_log_apply_op( -/*=============*/ - dict_index_t* index, /*!< in/out: index */ - row_merge_dup_t*dup, /*!< in/out: for reporting - duplicate key errors */ - dberr_t* error, /*!< out: DB_SUCCESS or error code */ - mem_heap_t* offsets_heap, /*!< in/out: memory heap for - allocating offsets; can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap for - allocating data tuples */ - bool has_index_lock, /*!< in: true if holding index->lock - in exclusive mode */ - const mrec_t* mrec, /*!< in: merge record */ - const mrec_t* mrec_end, /*!< in: end of buffer */ - ulint* offsets) /*!< in/out: work area for - rec_init_offsets_temp() */ - -{ - enum row_op op; - ulint extra_size; - ulint data_size; - ulint n_ext; - dtuple_t* entry; - trx_id_t trx_id; - - /* Online index creation is only used for secondary indexes. */ - ut_ad(!dict_index_is_clust(index)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX) - == has_index_lock); -#endif /* UNIV_SYNC_DEBUG */ - - if (dict_index_is_corrupted(index)) { - *error = DB_INDEX_CORRUPT; - return(NULL); - } - - *error = DB_SUCCESS; - - if (mrec + ROW_LOG_HEADER_SIZE >= mrec_end) { - return(NULL); - } - - switch (*mrec) { - case ROW_OP_INSERT: - if (ROW_LOG_HEADER_SIZE + DATA_TRX_ID_LEN + mrec >= mrec_end) { - return(NULL); - } - - op = static_cast<enum row_op>(*mrec++); - trx_id = trx_read_trx_id(mrec); - mrec += DATA_TRX_ID_LEN; - break; - case ROW_OP_DELETE: - op = static_cast<enum row_op>(*mrec++); - trx_id = 0; - break; - default: -corrupted: - ut_ad(0); - *error = DB_CORRUPTION; - return(NULL); - } - - extra_size = *mrec++; - - ut_ad(mrec < mrec_end); - - if (extra_size >= 0x80) { - /* Read another byte of extra_size. */ - - extra_size = (extra_size & 0x7f) << 8; - extra_size |= *mrec++; - } - - mrec += extra_size; - - if (mrec > mrec_end) { - return(NULL); - } - - rec_init_offsets_temp(mrec, index, offsets); - - if (rec_offs_any_extern(offsets)) { - /* There should never be any externally stored fields - in a secondary index, which is what online index - creation is used for. Therefore, the log file must be - corrupted. */ - goto corrupted; - } - - data_size = rec_offs_data_size(offsets); - - mrec += data_size; - - if (mrec > mrec_end) { - return(NULL); - } - - entry = row_rec_to_index_entry_low( - mrec - data_size, index, offsets, &n_ext, heap); - /* Online index creation is only implemented for secondary - indexes, which never contain off-page columns. */ - ut_ad(n_ext == 0); -#ifdef ROW_LOG_APPLY_PRINT - if (row_log_apply_print) { - fprintf(stderr, "apply " IB_ID_FMT " " TRX_ID_FMT " %u %u ", - index->id, trx_id, - unsigned (op), unsigned (has_index_lock)); - for (const byte* m = mrec - data_size; m < mrec; m++) { - fprintf(stderr, "%02x", *m); - } - putc('\n', stderr); - } -#endif /* ROW_LOG_APPLY_PRINT */ - row_log_apply_op_low(index, dup, error, offsets_heap, - has_index_lock, op, trx_id, entry); - return(mrec); -} - -/******************************************************//** -Applies operations to a secondary index that was being created. -@return DB_SUCCESS, or error code on failure */ -static MY_ATTRIBUTE((nonnull)) -dberr_t -row_log_apply_ops( -/*==============*/ - trx_t* trx, /*!< in: transaction (for checking if - the operation was interrupted) */ - dict_index_t* index, /*!< in/out: index */ - row_merge_dup_t*dup) /*!< in/out: for reporting duplicate key - errors */ -{ - dberr_t error; - const mrec_t* mrec = NULL; - const mrec_t* next_mrec; - const mrec_t* mrec_end= NULL; /* silence bogus warning */ - const mrec_t* next_mrec_end; - mem_heap_t* offsets_heap; - mem_heap_t* heap; - ulint* offsets; - bool has_index_lock; - const ulint i = 1 + REC_OFFS_HEADER_SIZE - + dict_index_get_n_fields(index); - - ut_ad(dict_index_is_online_ddl(index)); - ut_ad(*index->name == TEMP_INDEX_PREFIX); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(index->online_log); - UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end); - - offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets)); - offsets[0] = i; - offsets[1] = dict_index_get_n_fields(index); - - offsets_heap = mem_heap_create(UNIV_PAGE_SIZE); - heap = mem_heap_create(UNIV_PAGE_SIZE); - has_index_lock = true; - -next_block: - ut_ad(has_index_lock); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(index->online_log->head.bytes == 0); - - if (trx_is_interrupted(trx)) { - goto interrupted; - } - - error = index->online_log->error; - if (error != DB_SUCCESS) { - goto func_exit; - } - - if (dict_index_is_corrupted(index)) { - error = DB_INDEX_CORRUPT; - goto func_exit; - } - - if (UNIV_UNLIKELY(index->online_log->head.blocks - > index->online_log->tail.blocks)) { -unexpected_eof: - fprintf(stderr, "InnoDB: unexpected end of temporary file" - " for index %s\n", index->name + 1); -corruption: - error = DB_CORRUPTION; - goto func_exit; - } - - if (index->online_log->head.blocks - == index->online_log->tail.blocks) { - if (index->online_log->head.blocks) { -#ifdef HAVE_FTRUNCATE - /* Truncate the file in order to save space. */ - if (index->online_log->fd != -1 - && ftruncate(index->online_log->fd, 0) == -1) { - perror("ftruncate"); - } -#endif /* HAVE_FTRUNCATE */ - index->online_log->head.blocks - = index->online_log->tail.blocks = 0; - } - - next_mrec = index->online_log->tail.block; - next_mrec_end = next_mrec + index->online_log->tail.bytes; - - if (next_mrec_end == next_mrec) { - /* End of log reached. */ -all_done: - ut_ad(has_index_lock); - ut_ad(index->online_log->head.blocks == 0); - ut_ad(index->online_log->tail.blocks == 0); - error = DB_SUCCESS; - goto func_exit; - } - } else { - os_offset_t ofs; - ibool success; - - ofs = (os_offset_t) index->online_log->head.blocks - * srv_sort_buf_size; - - ut_ad(has_index_lock); - has_index_lock = false; - rw_lock_x_unlock(dict_index_get_lock(index)); - - log_free_check(); - - if (!row_log_block_allocate(index->online_log->head)) { - error = DB_OUT_OF_MEMORY; - goto func_exit; - } - - success = os_file_read_no_error_handling_int_fd( - index->online_log->fd, - index->online_log->head.block, ofs, - srv_sort_buf_size); - - if (!success) { - fprintf(stderr, "InnoDB: unable to read temporary file" - " for index %s\n", index->name + 1); - goto corruption; - } - -#ifdef POSIX_FADV_DONTNEED - /* Each block is read exactly once. Free up the file cache. */ - posix_fadvise(index->online_log->fd, - ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED); -#endif /* POSIX_FADV_DONTNEED */ -#if 0 //def FALLOC_FL_PUNCH_HOLE - /* Try to deallocate the space for the file on disk. - This should work on ext4 on Linux 2.6.39 and later, - and be ignored when the operation is unsupported. */ - fallocate(index->online_log->fd, - FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - ofs, srv_sort_buf_size); -#endif /* FALLOC_FL_PUNCH_HOLE */ - - next_mrec = index->online_log->head.block; - next_mrec_end = next_mrec + srv_sort_buf_size; - } - - if (mrec) { - /* A partial record was read from the previous block. - Copy the temporary buffer full, as we do not know the - length of the record. Parse subsequent records from - the bigger buffer index->online_log->head.block - or index->online_log->tail.block. */ - - ut_ad(mrec == index->online_log->head.buf); - ut_ad(mrec_end > mrec); - ut_ad(mrec_end < (&index->online_log->head.buf)[1]); - - memcpy((mrec_t*) mrec_end, next_mrec, - (&index->online_log->head.buf)[1] - mrec_end); - mrec = row_log_apply_op( - index, dup, &error, offsets_heap, heap, - has_index_lock, index->online_log->head.buf, - (&index->online_log->head.buf)[1], offsets); - if (error != DB_SUCCESS) { - goto func_exit; - } else if (UNIV_UNLIKELY(mrec == NULL)) { - /* The record was not reassembled properly. */ - goto corruption; - } - /* The record was previously found out to be - truncated. Now that the parse buffer was extended, - it should proceed beyond the old end of the buffer. */ - ut_a(mrec > mrec_end); - - index->online_log->head.bytes = mrec - mrec_end; - next_mrec += index->online_log->head.bytes; - } - - ut_ad(next_mrec <= next_mrec_end); - /* The following loop must not be parsing the temporary - buffer, but head.block or tail.block. */ - - /* mrec!=NULL means that the next record starts from the - middle of the block */ - ut_ad((mrec == NULL) == (index->online_log->head.bytes == 0)); - -#ifdef UNIV_DEBUG - if (next_mrec_end == index->online_log->head.block - + srv_sort_buf_size) { - /* If tail.bytes == 0, next_mrec_end can also be at - the end of tail.block. */ - if (index->online_log->tail.bytes == 0) { - ut_ad(next_mrec == next_mrec_end); - ut_ad(index->online_log->tail.blocks == 0); - ut_ad(index->online_log->head.blocks == 0); - ut_ad(index->online_log->head.bytes == 0); - } else { - ut_ad(next_mrec == index->online_log->head.block - + index->online_log->head.bytes); - ut_ad(index->online_log->tail.blocks - > index->online_log->head.blocks); - } - } else if (next_mrec_end == index->online_log->tail.block - + index->online_log->tail.bytes) { - ut_ad(next_mrec == index->online_log->tail.block - + index->online_log->head.bytes); - ut_ad(index->online_log->tail.blocks == 0); - ut_ad(index->online_log->head.blocks == 0); - ut_ad(index->online_log->head.bytes - <= index->online_log->tail.bytes); - } else { - ut_error; - } -#endif /* UNIV_DEBUG */ - - mrec_end = next_mrec_end; - - while (!trx_is_interrupted(trx)) { - mrec = next_mrec; - ut_ad(mrec < mrec_end); - - if (!has_index_lock) { - /* We are applying operations from a different - block than the one that is being written to. - We do not hold index->lock in order to - allow other threads to concurrently buffer - modifications. */ - ut_ad(mrec >= index->online_log->head.block); - ut_ad(mrec_end == index->online_log->head.block - + srv_sort_buf_size); - ut_ad(index->online_log->head.bytes - < srv_sort_buf_size); - - /* Take the opportunity to do a redo log - checkpoint if needed. */ - log_free_check(); - } else { - /* We are applying operations from the last block. - Do not allow other threads to buffer anything, - so that we can finally catch up and synchronize. */ - ut_ad(index->online_log->head.blocks == 0); - ut_ad(index->online_log->tail.blocks == 0); - ut_ad(mrec_end == index->online_log->tail.block - + index->online_log->tail.bytes); - ut_ad(mrec >= index->online_log->tail.block); - } - - next_mrec = row_log_apply_op( - index, dup, &error, offsets_heap, heap, - has_index_lock, mrec, mrec_end, offsets); - - if (error != DB_SUCCESS) { - goto func_exit; - } else if (next_mrec == next_mrec_end) { - /* The record happened to end on a block boundary. - Do we have more blocks left? */ - if (has_index_lock) { - /* The index will be locked while - applying the last block. */ - goto all_done; - } - - mrec = NULL; -process_next_block: - rw_lock_x_lock(dict_index_get_lock(index)); - has_index_lock = true; - - index->online_log->head.bytes = 0; - index->online_log->head.blocks++; - goto next_block; - } else if (next_mrec != NULL) { - ut_ad(next_mrec < next_mrec_end); - index->online_log->head.bytes += next_mrec - mrec; - } else if (has_index_lock) { - /* When mrec is within tail.block, it should - be a complete record, because we are holding - index->lock and thus excluding the writer. */ - ut_ad(index->online_log->tail.blocks == 0); - ut_ad(mrec_end == index->online_log->tail.block - + index->online_log->tail.bytes); - ut_ad(0); - goto unexpected_eof; - } else { - memcpy(index->online_log->head.buf, mrec, - mrec_end - mrec); - mrec_end += index->online_log->head.buf - mrec; - mrec = index->online_log->head.buf; - goto process_next_block; - } - } - -interrupted: - error = DB_INTERRUPTED; -func_exit: - if (!has_index_lock) { - rw_lock_x_lock(dict_index_get_lock(index)); - } - - switch (error) { - case DB_SUCCESS: - break; - case DB_INDEX_CORRUPT: - if (((os_offset_t) index->online_log->tail.blocks + 1) - * srv_sort_buf_size >= srv_online_max_size) { - /* The log file grew too big. */ - error = DB_ONLINE_LOG_TOO_BIG; - } - /* fall through */ - default: - /* We set the flag directly instead of invoking - dict_set_corrupted_index_cache_only(index) here, - because the index is not "public" yet. */ - index->type |= DICT_CORRUPT; - } - - mem_heap_free(heap); - mem_heap_free(offsets_heap); - row_log_block_free(index->online_log->head); - ut_free(offsets); - return(error); -} - -/******************************************************//** -Apply the row log to the index upon completing index creation. -@return DB_SUCCESS, or error code on failure */ -UNIV_INTERN -dberr_t -row_log_apply( -/*==========*/ - trx_t* trx, /*!< in: transaction (for checking if - the operation was interrupted) */ - dict_index_t* index, /*!< in/out: secondary index */ - struct TABLE* table) /*!< in/out: MySQL table - (for reporting duplicates) */ -{ - dberr_t error; - row_log_t* log; - row_merge_dup_t dup = { index, table, NULL, 0 }; - DBUG_ENTER("row_log_apply"); - - ut_ad(dict_index_is_online_ddl(index)); - ut_ad(!dict_index_is_clust(index)); - - log_free_check(); - - rw_lock_x_lock(dict_index_get_lock(index)); - - if (!index->table->corrupted) { - error = row_log_apply_ops(trx, index, &dup); - } else { - error = DB_SUCCESS; - } - - if (error != DB_SUCCESS) { - ut_a(!dict_table_is_discarded(index->table)); - /* We set the flag directly instead of invoking - dict_set_corrupted_index_cache_only(index) here, - because the index is not "public" yet. */ - index->type |= DICT_CORRUPT; - index->table->drop_aborted = TRUE; - - dict_index_set_online_status(index, ONLINE_INDEX_ABORTED); - } else { - ut_ad(dup.n_dup == 0); - dict_index_set_online_status(index, ONLINE_INDEX_COMPLETE); - } - - log = index->online_log; - index->online_log = NULL; - /* We could remove the TEMP_INDEX_PREFIX and update the data - dictionary to say that this index is complete, if we had - access to the .frm file here. If the server crashes before - all requested indexes have been created, this completed index - will be dropped. */ - rw_lock_x_unlock(dict_index_get_lock(index)); - - row_log_free(log); - - DBUG_RETURN(error); -} diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc deleted file mode 100644 index 6a1298087eb..00000000000 --- a/storage/xtradb/row/row0merge.cc +++ /dev/null @@ -1,4411 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0merge.cc -New index creation routines using a merge sort - -Created 12/4/2005 Jan Lindstrom -Completed by Sunny Bains and Marko Makela -*******************************************************/ -#include <my_config.h> -#include <log.h> -#include <sql_class.h> - -#include "row0merge.h" -#include "row0ext.h" -#include "row0log.h" -#include "row0ins.h" -#include "row0sel.h" -#include "dict0crea.h" -#include "trx0purge.h" -#include "lock0lock.h" -#include "pars0pars.h" -#include "ut0sort.h" -#include "row0ftsort.h" -#include "row0import.h" -#include "handler0alter.h" -#include "ha_prototypes.h" -#include "math.h" /* log2() */ -#include "fil0crypt.h" - -float my_log2f(float n) -{ - /* log(n) / log(2) is log2. */ - return (float)(log((double)n) / log((double)2)); -} - -/* Ignore posix_fadvise() on those platforms where it does not exist */ -#if defined __WIN__ -# define posix_fadvise(fd, offset, len, advice) /* nothing */ -#endif /* __WIN__ */ - -#ifdef UNIV_DEBUG -/** Set these in order ot enable debug printout. */ -/* @{ */ -/** Log each record read from temporary file. */ -static ibool row_merge_print_read; -/** Log each record write to temporary file. */ -static ibool row_merge_print_write; -/** Log each row_merge_blocks() call, merging two blocks of records to -a bigger one. */ -static ibool row_merge_print_block; -/** Log each block read from temporary file. */ -static ibool row_merge_print_block_read; -/** Log each block read from temporary file. */ -static ibool row_merge_print_block_write; -/* @} */ -#endif /* UNIV_DEBUG */ - -/* Whether to disable file system cache */ -UNIV_INTERN char srv_disable_sort_file_cache; - -/* Maximum pending doc memory limit in bytes for a fts tokenization thread */ -#define FTS_PENDING_DOC_MEMORY_LIMIT 1000000 - - -/******************************************************//** -Encrypt a merge block. */ -static -void -row_merge_encrypt_buf( -/*==================*/ - fil_space_crypt_t* crypt_data, /*!< in: table crypt data */ - ulint offset, /*!< in: offset where to - write */ - ulint space, /*!< in: tablespace id */ - const byte* input_buf, /*!< in: input buffer */ - byte* crypted_buf) /*!< out: crypted buffer */ -{ - uint key_version; - uint dstlen=0; - os_offset_t ofs = (os_offset_t)srv_sort_buf_size * (os_offset_t)offset; - - key_version = encryption_key_get_latest_version(crypt_data->key_id); - - /* Store key_version at the beginning of the input buffer */ - mach_write_to_4((byte *)crypted_buf, key_version); - - int rc = encryption_scheme_encrypt(input_buf+ROW_MERGE_RESERVE_SIZE, - srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE, - crypted_buf+ROW_MERGE_RESERVE_SIZE, &dstlen, - crypt_data, key_version, - space, ofs, 0); - - if (! ((rc == MY_AES_OK) && ((ulint)dstlen == srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE))) { - ib_logf(IB_LOG_LEVEL_FATAL, - "Unable to encrypt data-block " - " src: %p srclen: %lu buf: %p buflen: %u." - " return-code: %d. Can't continue!\n", - input_buf, srv_sort_buf_size, - crypted_buf, dstlen, rc); - } -} - -/******************************************************//** -Decrypt a merge block. */ -static -bool -row_merge_decrypt_buf( -/*==================*/ - fil_space_crypt_t* crypt_data, /*!< in: table crypt data */ - ulint offset, /*!< in: offset where to - write */ - ulint space, /*!< in: tablespace id */ - const byte* input_buf, /*!< in: input buffer */ - byte* crypted_buf) /*!< out: crypted buffer */ -{ - uint key_version; - uint dstlen=0; - os_offset_t ofs = (os_offset_t)srv_sort_buf_size * (os_offset_t)offset; - - /* Read key_version from beginning of the buffer */ - key_version = mach_read_from_4((byte *)input_buf); - - if (key_version == 0) { - /* block not encrypted */ - return false; - } - - int rc = encryption_scheme_decrypt(input_buf+ROW_MERGE_RESERVE_SIZE, - srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE, - crypted_buf+ROW_MERGE_RESERVE_SIZE, &dstlen, - crypt_data, key_version, - space, ofs, 0); - - if (! ((rc == MY_AES_OK) && ((ulint)dstlen == srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE))) { - ib_logf(IB_LOG_LEVEL_FATAL, - "Unable to encrypt data-block " - " src: %p srclen: %lu buf: %p buflen: %d." - " return-code: %d. Can't continue!\n", - input_buf, srv_sort_buf_size, - crypted_buf, dstlen, rc); - } - - return (true); -} - -#ifdef UNIV_DEBUG -/******************************************************//** -Display a merge tuple. */ -static MY_ATTRIBUTE((nonnull)) -void -row_merge_tuple_print( -/*==================*/ - FILE* f, /*!< in: output stream */ - const mtuple_t* entry, /*!< in: tuple to print */ - ulint n_fields)/*!< in: number of fields in the tuple */ -{ - ulint j; - - for (j = 0; j < n_fields; j++) { - const dfield_t* field = &entry->fields[j]; - - if (dfield_is_null(field)) { - fputs("\n NULL;", f); - } else { - ulint field_len = dfield_get_len(field); - ulint len = ut_min(field_len, 20); - if (dfield_is_ext(field)) { - fputs("\nE", f); - } else { - fputs("\n ", f); - } - ut_print_buf(f, dfield_get_data(field), len); - if (len != field_len) { - fprintf(f, " (total " ULINTPF " bytes)", - field_len); - } - } - } - putc('\n', f); -} -#endif /* UNIV_DEBUG */ - -/******************************************************//** -Encode an index record. */ -static MY_ATTRIBUTE((nonnull)) -void -row_merge_buf_encode( -/*=================*/ - byte** b, /*!< in/out: pointer to - current end of output buffer */ - const dict_index_t* index, /*!< in: index */ - const mtuple_t* entry, /*!< in: index fields - of the record to encode */ - ulint n_fields) /*!< in: number of fields - in the entry */ -{ - ulint size; - ulint extra_size; - - size = rec_get_converted_size_temp( - index, entry->fields, n_fields, &extra_size); - ut_ad(size >= extra_size); - - /* Encode extra_size + 1 */ - if (extra_size + 1 < 0x80) { - *(*b)++ = (byte) (extra_size + 1); - } else { - ut_ad((extra_size + 1) < 0x8000); - *(*b)++ = (byte) (0x80 | ((extra_size + 1) >> 8)); - *(*b)++ = (byte) (extra_size + 1); - } - - rec_convert_dtuple_to_temp(*b + extra_size, index, - entry->fields, n_fields); - - *b += size; -} - -/******************************************************//** -Allocate a sort buffer. -@return own: sort buffer */ -static MY_ATTRIBUTE((malloc, nonnull)) -row_merge_buf_t* -row_merge_buf_create_low( -/*=====================*/ - mem_heap_t* heap, /*!< in: heap where allocated */ - dict_index_t* index, /*!< in: secondary index */ - ulint max_tuples, /*!< in: maximum number of - data tuples */ - ulint buf_size) /*!< in: size of the buffer, - in bytes */ -{ - row_merge_buf_t* buf; - - ut_ad(max_tuples > 0); - - ut_ad(max_tuples <= srv_sort_buf_size); - - buf = static_cast<row_merge_buf_t*>(mem_heap_zalloc(heap, buf_size)); - buf->heap = heap; - buf->index = index; - buf->max_tuples = max_tuples; - buf->tuples = static_cast<mtuple_t*>( - ut_malloc(2 * max_tuples * sizeof *buf->tuples)); - buf->tmp_tuples = buf->tuples + max_tuples; - - return(buf); -} - -/******************************************************//** -Allocate a sort buffer. -@return own: sort buffer */ -UNIV_INTERN -row_merge_buf_t* -row_merge_buf_create( -/*=================*/ - dict_index_t* index) /*!< in: secondary index */ -{ - row_merge_buf_t* buf; - ulint max_tuples; - ulint buf_size; - mem_heap_t* heap; - - max_tuples = (srv_sort_buf_size - ROW_MERGE_RESERVE_SIZE) - / ut_max(1, dict_index_get_min_size(index)); - - buf_size = (sizeof *buf); - - heap = mem_heap_create(buf_size); - - buf = row_merge_buf_create_low(heap, index, max_tuples, buf_size); - - return(buf); -} - -/******************************************************//** -Empty a sort buffer. -@return sort buffer */ -UNIV_INTERN -row_merge_buf_t* -row_merge_buf_empty( -/*================*/ - row_merge_buf_t* buf) /*!< in,own: sort buffer */ -{ - ulint buf_size = sizeof *buf; - ulint max_tuples = buf->max_tuples; - mem_heap_t* heap = buf->heap; - dict_index_t* index = buf->index; - mtuple_t* tuples = buf->tuples; - - mem_heap_empty(heap); - - buf = static_cast<row_merge_buf_t*>(mem_heap_zalloc(heap, buf_size)); - buf->heap = heap; - buf->index = index; - buf->max_tuples = max_tuples; - buf->tuples = tuples; - buf->tmp_tuples = buf->tuples + max_tuples; - - return(buf); -} - -/******************************************************//** -Deallocate a sort buffer. */ -UNIV_INTERN -void -row_merge_buf_free( -/*===============*/ - row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */ -{ - ut_free(buf->tuples); - mem_heap_free(buf->heap); -} - -/** Convert the field data from compact to redundant format. -@param[in] row_field field to copy from -@param[out] field field to copy to -@param[in] len length of the field data -@param[in] zip_size compressed BLOB page size, - zero for uncompressed BLOBs -@param[in,out] heap memory heap where to allocate data when - converting to ROW_FORMAT=REDUNDANT, or NULL - when not to invoke - row_merge_buf_redundant_convert(). */ -static -void -row_merge_buf_redundant_convert( - const dfield_t* row_field, - dfield_t* field, - ulint len, - ulint zip_size, - mem_heap_t* heap, - trx_t* trx) -{ - ut_ad(DATA_MBMINLEN(field->type.mbminmaxlen) == 1); - ut_ad(DATA_MBMAXLEN(field->type.mbminmaxlen) > 1); - - byte* buf = (byte*) mem_heap_alloc(heap, len); - ulint field_len = row_field->len; - ut_ad(field_len <= len); - - if (row_field->ext) { - const byte* field_data = static_cast<byte*>( - dfield_get_data(row_field)); - ulint ext_len; - - ut_a(field_len >= BTR_EXTERN_FIELD_REF_SIZE); - ut_a(memcmp(field_data + field_len - BTR_EXTERN_FIELD_REF_SIZE, - field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)); - - byte* data = btr_copy_externally_stored_field( - &ext_len, field_data, zip_size, field_len, heap, trx); - - ut_ad(ext_len < len); - - memcpy(buf, data, ext_len); - field_len = ext_len; - } else { - memcpy(buf, row_field->data, field_len); - } - - memset(buf + field_len, 0x20, len - field_len); - - dfield_set_data(field, buf, len); -} - -/** Insert a data tuple into a sort buffer. -@param[in,out] buf sort buffer -@param[in] fts_index fts index to be created -@param[in] old_table original table -@param[in,out] psort_info parallel sort info -@param[in] row table row -@param[in] ext cache of externally stored - column prefixes, or NULL -@param[in,out] doc_id Doc ID if we are creating - FTS index -@param[in,out] conv_heap memory heap where to allocate data when - converting to ROW_FORMAT=REDUNDANT, or NULL - when not to invoke - row_merge_buf_redundant_convert() -@param[in,out] exceed_page set if the record size exceeds the page size - when converting to ROW_FORMAT=REDUNDANT -@return number of rows added, 0 if out of space */ -static -ulint -row_merge_buf_add( - row_merge_buf_t* buf, - dict_index_t* fts_index, - const dict_table_t* old_table, - fts_psort_t* psort_info, - const dtuple_t* row, - const row_ext_t* ext, - doc_id_t* doc_id, - mem_heap_t* conv_heap, - bool* exceed_page, - trx_t* trx) -{ - ulint i; - const dict_index_t* index; - mtuple_t* entry; - dfield_t* field; - const dict_field_t* ifield; - ulint n_fields; - ulint data_size; - ulint extra_size; - ulint bucket = 0; - doc_id_t write_doc_id; - ulint n_row_added = 0; - DBUG_ENTER("row_merge_buf_add"); - - if (buf->n_tuples >= buf->max_tuples) { - DBUG_RETURN(0); - } - - DBUG_EXECUTE_IF( - "ib_row_merge_buf_add_two", - if (buf->n_tuples >= 2) DBUG_RETURN(0);); - - UNIV_PREFETCH_R(row->fields); - - /* If we are building FTS index, buf->index points to - the 'fts_sort_idx', and real FTS index is stored in - fts_index */ - index = (buf->index->type & DICT_FTS) ? fts_index : buf->index; - - n_fields = dict_index_get_n_fields(index); - - entry = &buf->tuples[buf->n_tuples]; - field = entry->fields = static_cast<dfield_t*>( - mem_heap_alloc(buf->heap, n_fields * sizeof *entry->fields)); - - data_size = 0; - extra_size = UT_BITS_IN_BYTES(index->n_nullable); - - ifield = dict_index_get_nth_field(index, 0); - - for (i = 0; i < n_fields; i++, field++, ifield++) { - ulint len; - const dict_col_t* col; - ulint col_no; - ulint fixed_len; - const dfield_t* row_field; - - col = ifield->col; - col_no = dict_col_get_no(col); - - /* Process the Doc ID column */ - if (*doc_id > 0 - && col_no == index->table->fts->doc_col) { - fts_write_doc_id((byte*) &write_doc_id, *doc_id); - - /* Note: field->data now points to a value on the - stack: &write_doc_id after dfield_set_data(). Because - there is only one doc_id per row, it shouldn't matter. - We allocate a new buffer before we leave the function - later below. */ - - dfield_set_data( - field, &write_doc_id, sizeof(write_doc_id)); - - field->type.mtype = ifield->col->mtype; - field->type.prtype = ifield->col->prtype; - field->type.mbminmaxlen = DATA_MBMINMAXLEN(0, 0); - field->type.len = ifield->col->len; - } else { - row_field = dtuple_get_nth_field(row, col_no); - - dfield_copy(field, row_field); - - /* Tokenize and process data for FTS */ - if (index->type & DICT_FTS) { - fts_doc_item_t* doc_item; - byte* value; - void* ptr; - const ulint max_trial_count = 10000; - ulint trial_count = 0; - - /* fetch Doc ID if it already exists - in the row, and not supplied by the - caller. Even if the value column is - NULL, we still need to get the Doc - ID so to maintain the correct max - Doc ID */ - if (*doc_id == 0) { - const dfield_t* doc_field; - doc_field = dtuple_get_nth_field( - row, - index->table->fts->doc_col); - *doc_id = (doc_id_t) mach_read_from_8( - static_cast<byte*>( - dfield_get_data(doc_field))); - - if (*doc_id == 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "FTS Doc ID is zero. " - "Record Skipped"); - DBUG_RETURN(0); - } - } - - if (dfield_is_null(field)) { - n_row_added = 1; - continue; - } - - ptr = ut_malloc(sizeof(*doc_item) - + field->len); - - doc_item = static_cast<fts_doc_item_t*>(ptr); - value = static_cast<byte*>(ptr) - + sizeof(*doc_item); - memcpy(value, field->data, field->len); - field->data = value; - - doc_item->field = field; - doc_item->doc_id = *doc_id; - - bucket = *doc_id % fts_sort_pll_degree; - - /* Add doc item to fts_doc_list */ - mutex_enter(&psort_info[bucket].mutex); - - if (psort_info[bucket].error == DB_SUCCESS) { - UT_LIST_ADD_LAST( - doc_list, - psort_info[bucket].fts_doc_list, - doc_item); - psort_info[bucket].memory_used += - sizeof(*doc_item) + field->len; - } else { - ut_free(doc_item); - } - - mutex_exit(&psort_info[bucket].mutex); - - /* Sleep when memory used exceeds limit*/ - while (psort_info[bucket].memory_used - > FTS_PENDING_DOC_MEMORY_LIMIT - && trial_count++ < max_trial_count) { - os_thread_sleep(1000); - } - - n_row_added = 1; - continue; - } - - if (field->len != UNIV_SQL_NULL - && col->mtype == DATA_MYSQL - && col->len != field->len) { - - if (conv_heap != NULL) { - row_merge_buf_redundant_convert( - row_field, field, col->len, - dict_table_zip_size(old_table), - conv_heap, trx); - } else { - /* Field length mismatch should not - happen when rebuilding redundant row - format table. */ - ut_ad(dict_table_is_comp(index->table)); - } - } - } - - len = dfield_get_len(field); - - if (dfield_is_null(field)) { - ut_ad(!(col->prtype & DATA_NOT_NULL)); - continue; - } else if (!ext) { - } else if (dict_index_is_clust(index)) { - /* Flag externally stored fields. */ - const byte* buf = row_ext_lookup(ext, col_no, - &len); - if (UNIV_LIKELY_NULL(buf)) { - ut_a(buf != field_ref_zero); - if (i < dict_index_get_n_unique(index)) { - dfield_set_data(field, buf, len); - } else { - dfield_set_ext(field); - len = dfield_get_len(field); - } - } - } else { - const byte* buf = row_ext_lookup(ext, col_no, - &len); - if (UNIV_LIKELY_NULL(buf)) { - ut_a(buf != field_ref_zero); - dfield_set_data(field, buf, len); - } - } - - /* If a column prefix index, take only the prefix */ - - if (ifield->prefix_len) { - len = dtype_get_at_most_n_mbchars( - col->prtype, - col->mbminmaxlen, - ifield->prefix_len, - len, - static_cast<char*>(dfield_get_data(field))); - dfield_set_len(field, len); - } - - ut_ad(len <= col->len || col->mtype == DATA_BLOB || - ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY - || col->mtype == DATA_VARMYSQL) - && (col->len == 0 - || len <= col->len))); - - fixed_len = ifield->fixed_len; - if (fixed_len && !dict_table_is_comp(index->table) - && DATA_MBMINLEN(col->mbminmaxlen) - != DATA_MBMAXLEN(col->mbminmaxlen)) { - /* CHAR in ROW_FORMAT=REDUNDANT is always - fixed-length, but in the temporary file it is - variable-length for variable-length character - sets. */ - fixed_len = 0; - } - - if (fixed_len) { -#ifdef UNIV_DEBUG - ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen); - ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen); - - /* len should be between size calcualted base on - mbmaxlen and mbminlen */ - ut_ad(len <= fixed_len); - ut_ad(!mbmaxlen || len >= mbminlen - * (fixed_len / mbmaxlen)); - - ut_ad(!dfield_is_ext(field)); -#endif /* UNIV_DEBUG */ - } else if (dfield_is_ext(field)) { - extra_size += 2; - } else if (len < 128 - || (col->len < 256 - && col->mtype != DATA_BLOB)) { - extra_size++; - } else { - /* For variable-length columns, we look up the - maximum length from the column itself. If this - is a prefix index column shorter than 256 bytes, - this will waste one byte. */ - extra_size += 2; - } - data_size += len; - } - - /* If this is FTS index, we already populated the sort buffer, return - here */ - if (index->type & DICT_FTS) { - DBUG_RETURN(n_row_added); - } - -#ifdef UNIV_DEBUG - { - ulint size; - ulint extra; - - size = rec_get_converted_size_temp( - index, entry->fields, n_fields, &extra); - - ut_ad(data_size + extra_size == size); - ut_ad(extra_size == extra); - } -#endif /* UNIV_DEBUG */ - - /* Add to the total size of the record in row_merge_block_t - the encoded length of extra_size and the extra bytes (extra_size). - See row_merge_buf_write() for the variable-length encoding - of extra_size. */ - data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80); - - /* Record size can exceed page size while converting to - redundant row format. But there is assert - ut_ad(size < UNIV_PAGE_SIZE) in rec_offs_data_size(). - It may hit the assert before attempting to insert the row. */ - if (conv_heap != NULL && data_size > UNIV_PAGE_SIZE) { - *exceed_page = true; - } - - ut_ad(data_size < srv_sort_buf_size); - - /* Reserve bytes for the end marker of row_merge_block_t. */ - if (buf->total_size + data_size >= (srv_sort_buf_size - ROW_MERGE_RESERVE_SIZE)) { - DBUG_RETURN(0); - } - - buf->total_size += data_size; - buf->n_tuples++; - n_row_added++; - - field = entry->fields; - - /* Copy the data fields. */ - - do { - dfield_dup(field++, buf->heap); - } while (--n_fields); - - if (conv_heap != NULL) { - mem_heap_empty(conv_heap); - } - - DBUG_RETURN(n_row_added); -} - -/*************************************************************//** -Report a duplicate key. */ -UNIV_INTERN -void -row_merge_dup_report( -/*=================*/ - row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */ - const dfield_t* entry) /*!< in: duplicate index entry */ -{ - if (!dup->n_dup++) { - /* Only report the first duplicate record, - but count all duplicate records. */ - innobase_fields_to_mysql(dup->table, dup->index, entry); - } -} - -/*************************************************************//** -Compare two tuples. -@return 1, 0, -1 if a is greater, equal, less, respectively, than b */ -static MY_ATTRIBUTE((warn_unused_result)) -int -row_merge_tuple_cmp( -/*================*/ - ulint n_uniq, /*!< in: number of unique fields */ - ulint n_field,/*!< in: number of fields */ - const mtuple_t& a, /*!< in: first tuple to be compared */ - const mtuple_t& b, /*!< in: second tuple to be compared */ - row_merge_dup_t* dup) /*!< in/out: for reporting duplicates, - NULL if non-unique index */ -{ - int cmp; - const dfield_t* af = a.fields; - const dfield_t* bf = b.fields; - ulint n = n_uniq; - - ut_ad(n_uniq > 0); - ut_ad(n_uniq <= n_field); - - /* Compare the fields of the tuples until a difference is - found or we run out of fields to compare. If !cmp at the - end, the tuples are equal. */ - do { - cmp = cmp_dfield_dfield(af++, bf++); - } while (!cmp && --n); - - if (cmp) { - return(cmp); - } - - if (dup) { - /* Report a duplicate value error if the tuples are - logically equal. NULL columns are logically inequal, - although they are equal in the sorting order. Find - out if any of the fields are NULL. */ - for (const dfield_t* df = a.fields; df != af; df++) { - if (dfield_is_null(df)) { - goto no_report; - } - } - - row_merge_dup_report(dup, a.fields); - } - -no_report: - /* The n_uniq fields were equal, but we compare all fields so - that we will get the same (internal) order as in the B-tree. */ - for (n = n_field - n_uniq + 1; --n; ) { - cmp = cmp_dfield_dfield(af++, bf++); - if (cmp) { - return(cmp); - } - } - - /* This should never be reached, except in a secondary index - when creating a secondary index and a PRIMARY KEY, and there - is a duplicate in the PRIMARY KEY that has not been detected - yet. Internally, an index must never contain duplicates. */ - return(cmp); -} - -/** Wrapper for row_merge_tuple_sort() to inject some more context to -UT_SORT_FUNCTION_BODY(). -@param tuples array of tuples that being sorted -@param aux work area, same size as tuples[] -@param low lower bound of the sorting area, inclusive -@param high upper bound of the sorting area, inclusive */ -#define row_merge_tuple_sort_ctx(tuples, aux, low, high) \ - row_merge_tuple_sort(n_uniq, n_field, dup, tuples, aux, low, high) -/** Wrapper for row_merge_tuple_cmp() to inject some more context to -UT_SORT_FUNCTION_BODY(). -@param a first tuple to be compared -@param b second tuple to be compared -@return 1, 0, -1 if a is greater, equal, less, respectively, than b */ -#define row_merge_tuple_cmp_ctx(a,b) \ - row_merge_tuple_cmp(n_uniq, n_field, a, b, dup) - -/**********************************************************************//** -Merge sort the tuple buffer in main memory. */ -static MY_ATTRIBUTE((nonnull(4,5))) -void -row_merge_tuple_sort( -/*=================*/ - ulint n_uniq, /*!< in: number of unique fields */ - ulint n_field,/*!< in: number of fields */ - row_merge_dup_t* dup, /*!< in/out: reporter of duplicates - (NULL if non-unique index) */ - mtuple_t* tuples, /*!< in/out: tuples */ - mtuple_t* aux, /*!< in/out: work area */ - ulint low, /*!< in: lower bound of the - sorting area, inclusive */ - ulint high) /*!< in: upper bound of the - sorting area, exclusive */ -{ - ut_ad(n_field > 0); - ut_ad(n_uniq <= n_field); - - UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx, - tuples, aux, low, high, row_merge_tuple_cmp_ctx); -} - -/******************************************************//** -Sort a buffer. */ -UNIV_INTERN -void -row_merge_buf_sort( -/*===============*/ - row_merge_buf_t* buf, /*!< in/out: sort buffer */ - row_merge_dup_t* dup) /*!< in/out: reporter of duplicates - (NULL if non-unique index) */ -{ - row_merge_tuple_sort(dict_index_get_n_unique(buf->index), - dict_index_get_n_fields(buf->index), - dup, - buf->tuples, buf->tmp_tuples, 0, buf->n_tuples); -} - -/******************************************************//** -Write a buffer to a block. */ -UNIV_INTERN -void -row_merge_buf_write( -/*================*/ - const row_merge_buf_t* buf, /*!< in: sorted buffer */ - const merge_file_t* of UNIV_UNUSED, - /*!< in: output file */ - row_merge_block_t* block) /*!< out: buffer for writing to file */ -{ - const dict_index_t* index = buf->index; - ulint n_fields= dict_index_get_n_fields(index); - byte* b = &block[ROW_MERGE_RESERVE_SIZE]; - - for (ulint i = 0; i < buf->n_tuples; i++) { - const mtuple_t* entry = &buf->tuples[i]; - - row_merge_buf_encode(&b, index, entry, n_fields); - ut_ad(b < &block[srv_sort_buf_size]); -#ifdef UNIV_DEBUG - if (row_merge_print_write) { - fprintf(stderr, "row_merge_buf_write %p,%d," - ULINTPF " " ULINTPF, - (void*) b, of->fd, of->offset, i); - row_merge_tuple_print(stderr, entry, n_fields); - } -#endif /* UNIV_DEBUG */ - } - - /* Write an "end-of-chunk" marker. */ - ut_a(b < &block[srv_sort_buf_size]); - ut_a(b == &block[0] + buf->total_size + ROW_MERGE_RESERVE_SIZE); - *b++ = 0; -#ifdef UNIV_DEBUG_VALGRIND - /* The rest of the block is uninitialized. Initialize it - to avoid bogus warnings. */ - memset(b, 0xff, &block[srv_sort_buf_size] - b); -#endif /* UNIV_DEBUG_VALGRIND */ -#ifdef UNIV_DEBUG - if (row_merge_print_write) { - fprintf(stderr, "row_merge_buf_write %p,%d," ULINTPF " EOF\n", - (void*) b, of->fd, of->offset); - } -#endif /* UNIV_DEBUG */ -} - -/******************************************************//** -Create a memory heap and allocate space for row_merge_rec_offsets() -and mrec_buf_t[3]. -@return memory heap */ -static -mem_heap_t* -row_merge_heap_create( -/*==================*/ - const dict_index_t* index, /*!< in: record descriptor */ - mrec_buf_t** buf, /*!< out: 3 buffers */ - ulint** offsets1, /*!< out: offsets */ - ulint** offsets2) /*!< out: offsets */ -{ - ulint i = 1 + REC_OFFS_HEADER_SIZE - + dict_index_get_n_fields(index); - mem_heap_t* heap = mem_heap_create(2 * i * sizeof **offsets1 - + 3 * sizeof **buf); - - *buf = static_cast<mrec_buf_t*>( - mem_heap_alloc(heap, 3 * sizeof **buf)); - *offsets1 = static_cast<ulint*>( - mem_heap_alloc(heap, i * sizeof **offsets1)); - *offsets2 = static_cast<ulint*>( - mem_heap_alloc(heap, i * sizeof **offsets2)); - - (*offsets1)[0] = (*offsets2)[0] = i; - (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index); - - return(heap); -} - -/********************************************************************//** -Read a merge block from the file system. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -row_merge_read( -/*===========*/ - int fd, /*!< in: file descriptor */ - ulint offset, /*!< in: offset where to read - in number of row_merge_block_t - elements */ - row_merge_block_t* buf, /*!< out: data */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_buf, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - os_offset_t ofs = ((os_offset_t) offset) * srv_sort_buf_size; - ibool success; - - DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE);); - -#ifdef UNIV_DEBUG - if (row_merge_print_block_read) { - fprintf(stderr, "row_merge_read fd=%d ofs=" ULINTPF "\n", - fd, offset); - } -#endif /* UNIV_DEBUG */ - - success = os_file_read_no_error_handling_int_fd(fd, buf, - ofs, srv_sort_buf_size); - - /* For encrypted tables, decrypt data after reading and copy data */ - if (crypt_data && crypt_buf) { - if( row_merge_decrypt_buf(crypt_data, offset, space, buf, crypt_buf)) { - memcpy(buf, crypt_buf, srv_sort_buf_size); - } - } - -#ifdef POSIX_FADV_DONTNEED - /* Each block is read exactly once. Free up the file cache. */ - posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED); -#endif /* POSIX_FADV_DONTNEED */ - - if (UNIV_UNLIKELY(!success)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: failed to read merge block at " UINT64PF "\n", - ofs); - } - - return(UNIV_LIKELY(success)); -} - -/********************************************************************//** -Write a merge block to the file system. -@return TRUE if request was successful, FALSE if fail */ -UNIV_INTERN -ibool -row_merge_write( -/*============*/ - int fd, /*!< in: file descriptor */ - ulint offset, /*!< in: offset where to write, - in number of row_merge_block_t elements */ - const void* buf, /*!< in: data */ - fil_space_crypt_t* crypt_data, /*!< in: table crypt data */ - void* crypt_buf, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - size_t buf_len = srv_sort_buf_size; - os_offset_t ofs = buf_len * (os_offset_t) offset; - ibool ret; - void* out_buf = (void *)buf; - - DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE);); - - /* For encrypted tables, encrypt data before writing */ - if (crypt_data && crypt_buf) { - row_merge_encrypt_buf(crypt_data, offset, space, (const byte *)buf, (byte *)crypt_buf); - out_buf = crypt_buf; - } else { - /* Mark block unencrypted */ - mach_write_to_4((byte *)out_buf, 0); - } - - ret = os_file_write_int_fd("(merge)", fd, out_buf, ofs, buf_len); - -#ifdef UNIV_DEBUG - if (row_merge_print_block_write) { - fprintf(stderr, "row_merge_write fd=%d ofs=" ULINTPF "\n", - fd, offset); - } -#endif /* UNIV_DEBUG */ - -#ifdef POSIX_FADV_DONTNEED - /* The block will be needed on the next merge pass, - but it can be evicted from the file cache meanwhile. */ - posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED); -#endif /* POSIX_FADV_DONTNEED */ - - return(UNIV_LIKELY(ret)); -} - -/********************************************************************//** -Read a merge record. -@return pointer to next record, or NULL on I/O error or end of list */ -UNIV_INTERN -const byte* -row_merge_read_rec( -/*===============*/ - row_merge_block_t* block, /*!< in/out: file buffer */ - mrec_buf_t* buf, /*!< in/out: secondary buffer */ - const byte* b, /*!< in: pointer to record */ - const dict_index_t* index, /*!< in: index of the record */ - int fd, /*!< in: file descriptor */ - ulint* foffs, /*!< in/out: file offset */ - const mrec_t** mrec, /*!< out: pointer to merge record, - or NULL on end of list - (non-NULL on I/O error) */ - ulint* offsets,/*!< out: offsets of mrec */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - ulint extra_size; - ulint data_size; - ulint avail_size; - - ut_ad(b >= &block[0]); - ut_ad(b < &block[srv_sort_buf_size]); - - ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE - + dict_index_get_n_fields(index)); - - if (b == &block[0]) { - b+= ROW_MERGE_RESERVE_SIZE; - } - - extra_size = *b++; - - if (UNIV_UNLIKELY(!extra_size)) { - /* End of list */ - *mrec = NULL; -#ifdef UNIV_DEBUG - if (row_merge_print_read) { - fprintf(stderr, "row_merge_read %p,%p,%d," ULINTPF - " EOF\n", - (const void*) b, (const void*) block, - fd, *foffs); - } -#endif /* UNIV_DEBUG */ - return(NULL); - } - - if (extra_size >= 0x80) { - /* Read another byte of extra_size. */ - - if (UNIV_UNLIKELY(b >= &block[srv_sort_buf_size])) { - if (!row_merge_read(fd, ++(*foffs), block, - crypt_data, crypt_block, space)) { -err_exit: - /* Signal I/O error. */ - *mrec = b; - return(NULL); - } - - /* Wrap around to the beginning of the buffer. */ - b = &block[ROW_MERGE_RESERVE_SIZE]; - } - - extra_size = (extra_size & 0x7f) << 8; - extra_size |= *b++; - } - - /* Normalize extra_size. Above, value 0 signals "end of list". */ - extra_size--; - - /* Read the extra bytes. */ - - if (UNIV_UNLIKELY(b + extra_size >= &block[srv_sort_buf_size])) { - /* The record spans two blocks. Copy the entire record - to the auxiliary buffer and handle this as a special - case. */ - - avail_size = &block[srv_sort_buf_size] - b; - ut_ad(avail_size < sizeof *buf); - memcpy(*buf, b, avail_size); - - if (!row_merge_read(fd, ++(*foffs), block, - crypt_data, crypt_block, space)) { - - goto err_exit; - } - - /* Wrap around to the beginning of the buffer. */ - b = &block[ROW_MERGE_RESERVE_SIZE]; - - /* Copy the record. */ - memcpy(*buf + avail_size, b, extra_size - avail_size); - b += extra_size - avail_size; - - *mrec = *buf + extra_size; - - rec_init_offsets_temp(*mrec, index, offsets); - - data_size = rec_offs_data_size(offsets); - - /* These overflows should be impossible given that - records are much smaller than either buffer, and - the record starts near the beginning of each buffer. */ - ut_a(extra_size + data_size < sizeof *buf); - ut_a(b + data_size < &block[srv_sort_buf_size]); - - /* Copy the data bytes. */ - memcpy(*buf + extra_size, b, data_size); - b += data_size; - - goto func_exit; - } - - *mrec = b + extra_size; - - rec_init_offsets_temp(*mrec, index, offsets); - - data_size = rec_offs_data_size(offsets); - ut_ad(extra_size + data_size < sizeof *buf); - - b += extra_size + data_size; - - if (UNIV_LIKELY(b < &block[srv_sort_buf_size])) { - /* The record fits entirely in the block. - This is the normal case. */ - goto func_exit; - } - - /* The record spans two blocks. Copy it to buf. */ - - b -= extra_size + data_size; - avail_size = &block[srv_sort_buf_size] - b; - memcpy(*buf, b, avail_size); - *mrec = *buf + extra_size; -#ifdef UNIV_DEBUG - /* We cannot invoke rec_offs_make_valid() here, because there - are no REC_N_NEW_EXTRA_BYTES between extra_size and data_size. - Similarly, rec_offs_validate() would fail, because it invokes - rec_get_status(). */ - offsets[2] = (ulint) *mrec; - offsets[3] = (ulint) index; -#endif /* UNIV_DEBUG */ - - if (!row_merge_read(fd, ++(*foffs), block, - crypt_data, crypt_block, space)) { - - goto err_exit; - } - - /* Wrap around to the beginning of the buffer. */ - b = &block[ROW_MERGE_RESERVE_SIZE]; - - /* Copy the rest of the record. */ - memcpy(*buf + avail_size, b, extra_size + data_size - avail_size); - b += extra_size + data_size - avail_size; - -func_exit: -#ifdef UNIV_DEBUG - if (row_merge_print_read) { - fprintf(stderr, "row_merge_read %p,%p,%d," ULINTPF " ", - (const void*) b, (const void*) block, - fd, *foffs); - rec_print_comp(stderr, *mrec, offsets); - putc('\n', stderr); - } -#endif /* UNIV_DEBUG */ - - return(b); -} - -/********************************************************************//** -Write a merge record. */ -static -void -row_merge_write_rec_low( -/*====================*/ - byte* b, /*!< out: buffer */ - ulint e, /*!< in: encoded extra_size */ -#ifdef UNIV_DEBUG - ulint size, /*!< in: total size to write */ - int fd, /*!< in: file descriptor */ - ulint foffs, /*!< in: file offset */ -#endif /* UNIV_DEBUG */ - const mrec_t* mrec, /*!< in: record to write */ - const ulint* offsets)/*!< in: offsets of mrec */ -#ifndef UNIV_DEBUG -# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets) \ - row_merge_write_rec_low(b, e, mrec, offsets) -#endif /* !UNIV_DEBUG */ -{ -#ifdef UNIV_DEBUG - const byte* const end = b + size; - ut_ad(e == rec_offs_extra_size(offsets) + 1); - - if (row_merge_print_write) { - fprintf(stderr, "row_merge_write %p,%d," ULINTPF " ", - (void*) b, fd, foffs); - rec_print_comp(stderr, mrec, offsets); - putc('\n', stderr); - } -#endif /* UNIV_DEBUG */ - - if (e < 0x80) { - *b++ = (byte) e; - } else { - *b++ = (byte) (0x80 | (e >> 8)); - *b++ = (byte) e; - } - - memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets)); - ut_ad(b + rec_offs_size(offsets) == end); -} - -/********************************************************************//** -Write a merge record. -@return pointer to end of block, or NULL on error */ -static -byte* -row_merge_write_rec( -/*================*/ - row_merge_block_t* block, /*!< in/out: file buffer */ - mrec_buf_t* buf, /*!< in/out: secondary buffer */ - byte* b, /*!< in: pointer to end of block */ - int fd, /*!< in: file descriptor */ - ulint* foffs, /*!< in/out: file offset */ - const mrec_t* mrec, /*!< in: record to write */ - const ulint* offsets,/*!< in: offsets of mrec */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - ulint extra_size; - ulint size; - ulint avail_size; - - ut_ad(block); - ut_ad(buf); - ut_ad(b >= &block[0]); - ut_ad(b < &block[srv_sort_buf_size]); - ut_ad(mrec); - ut_ad(foffs); - ut_ad(mrec < &block[0] || mrec > &block[srv_sort_buf_size]); - ut_ad(mrec < buf[0] || mrec > buf[1]); - - /* Normalize extra_size. Value 0 signals "end of list". */ - extra_size = rec_offs_extra_size(offsets) + 1; - - size = extra_size + (extra_size >= 0x80) - + rec_offs_data_size(offsets); - - if (b == &block[0]) { - b+= ROW_MERGE_RESERVE_SIZE; - } - - if (UNIV_UNLIKELY(b + size >= &block[srv_sort_buf_size])) { - /* The record spans two blocks. - Copy it to the temporary buffer first. */ - avail_size = &block[srv_sort_buf_size] - b; - - row_merge_write_rec_low(buf[0], - extra_size, size, fd, *foffs, - mrec, offsets); - - /* Copy the head of the temporary buffer, write - the completed block, and copy the tail of the - record to the head of the new block. */ - memcpy(b, buf[0], avail_size); - - if (!row_merge_write(fd, (*foffs)++, block, - crypt_data, crypt_block, space)) { - return(NULL); - } - - UNIV_MEM_INVALID(&block[0], srv_sort_buf_size); - - /* Copy the rest. */ - b = &block[ROW_MERGE_RESERVE_SIZE]; - memcpy(b, buf[0] + avail_size, size - avail_size); - b += size - avail_size; - } else { - row_merge_write_rec_low(b, extra_size, size, fd, *foffs, - mrec, offsets); - b += size; - } - - return(b); -} - -/********************************************************************//** -Write an end-of-list marker. -@return pointer to end of block, or NULL on error */ -static -byte* -row_merge_write_eof( -/*================*/ - row_merge_block_t* block, /*!< in/out: file buffer */ - byte* b, /*!< in: pointer to end of block */ - int fd, /*!< in: file descriptor */ - ulint* foffs, /*!< in/out: file offset */ - fil_space_crypt_t* crypt_data, /*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - ut_ad(block); - ut_ad(b >= &block[0]); - ut_ad(b < &block[srv_sort_buf_size]); - ut_ad(foffs); -#ifdef UNIV_DEBUG - if (row_merge_print_write) { - fprintf(stderr, "row_merge_write %p,%p,%d," ULINTPF " EOF\n", - (void*) b, (void*) block, fd, *foffs); - } -#endif /* UNIV_DEBUG */ - - if (b == &block[0]) { - b+= ROW_MERGE_RESERVE_SIZE; - } - - *b++ = 0; - UNIV_MEM_ASSERT_RW(&block[0], b - &block[0]); - UNIV_MEM_ASSERT_W(&block[0], srv_sort_buf_size); - -#ifdef UNIV_DEBUG_VALGRIND - /* The rest of the block is uninitialized. Initialize it - to avoid bogus warnings. */ - memset(b, 0xff, &block[srv_sort_buf_size] - b); -#endif /* UNIV_DEBUG_VALGRIND */ - - if (!row_merge_write(fd, (*foffs)++, block, - crypt_data, crypt_block, space)) { - return(NULL); - } - - UNIV_MEM_INVALID(&block[0], srv_sort_buf_size); - - return(&block[0]); -} - -/** Create a temporary file if it has not been created already. -@param[in,out] tmpfd temporary file handle -@param[in] path path to create temporary file -@return file descriptor, or -1 on failure */ -static MY_ATTRIBUTE((warn_unused_result)) -int -row_merge_tmpfile_if_needed( - int* tmpfd, - const char* path) -{ - if (*tmpfd < 0) { - *tmpfd = row_merge_file_create_low(path); - } - - return(*tmpfd); -} - -/** Create a temporary file for merge sort if it was not created already. -@param[in,out] file merge file structure -@param[in,out] tmpfd temporary file structure -@param[in] nrec number of records in the file -@param[in] path path to create temporary files -@return file descriptor, or -1 on failure */ -static MY_ATTRIBUTE((warn_unused_result)) -int -row_merge_file_create_if_needed( - merge_file_t* file, - int* tmpfd, - ulint nrec, - const char* path) -{ - ut_ad(file->fd < 0 || *tmpfd >=0); - if (file->fd < 0 && row_merge_file_create(file, path) >= 0) { - if (row_merge_tmpfile_if_needed(tmpfd, path) < 0) { - return(-1); - } - - file->n_rec = nrec; - } - - ut_ad(file->fd < 0 || *tmpfd >=0); - return(file->fd); -} - -/** Reads clustered index of the table and create temporary files -containing the index entries for the indexes to be built. -@param[in] trx transaction -@param[in,out] table MySQL table object, for reporting erroneous - records -@param[in] old_table table where rows are read from -@param[in] new_table table where indexes are created; identical to - old_table unless creating a PRIMARY KEY -@param[in] online true if creating indexes online -@param[in] index indexes to be created -@param[in] fts_sort_idx full-text index to be created, or NULL -@param[in] psort_info parallel sort info for fts_sort_idx creation, - or NULL -@param[in] files temporary files -@param[in] key_numbers MySQL key numbers to create -@param[in] n_index number of indexes to create -@param[in] add_cols default values of added columns, or NULL -@param[in] col_map mapping of old column numbers to new ones, or - NULL if old_table == new_table -@param[in] add_autoinc number of added AUTO_INCREMENT columns, or - ULINT_UNDEFINED if none is added -@param[in,out] sequence autoinc sequence -@param[in,out] block file buffer -@param[in,out] tmpfd temporary file handle -return DB_SUCCESS or error */ -static MY_ATTRIBUTE((nonnull(1,2,3,4,6,9,10,16), warn_unused_result)) -dberr_t -row_merge_read_clustered_index( - trx_t* trx, - struct TABLE* table, - const dict_table_t* old_table, - const dict_table_t* new_table, - bool online, - dict_index_t** index, - dict_index_t* fts_sort_idx, - fts_psort_t* psort_info, - merge_file_t* files, - const ulint* key_numbers, - ulint n_index, - const dtuple_t* add_cols, - const ulint* col_map, - ulint add_autoinc, - ib_sequence_t& sequence, - row_merge_block_t* block, - int* tmpfd, - float pct_cost, /*!< in: percent of task weight - out of total alter job */ - fil_space_crypt_t* crypt_data,/*!< in: crypt data or NULL */ - row_merge_block_t* crypt_block)/*!< in: in/out: crypted file - buffer */ -{ - dict_index_t* clust_index; /* Clustered index */ - mem_heap_t* row_heap; /* Heap memory to create - clustered index tuples */ - row_merge_buf_t** merge_buf; /* Temporary list for records*/ - btr_pcur_t pcur; /* Cursor on the clustered - index */ - mtr_t mtr; /* Mini transaction */ - dberr_t err = DB_SUCCESS;/* Return code */ - ulint n_nonnull = 0; /* number of columns - changed to NOT NULL */ - ulint* nonnull = NULL; /* NOT NULL columns */ - dict_index_t* fts_index = NULL;/* FTS index */ - doc_id_t doc_id = 0; - doc_id_t max_doc_id = 0; - ibool add_doc_id = FALSE; - os_event_t fts_parallel_sort_event = NULL; - ibool fts_pll_sort = FALSE; - ib_int64_t sig_count = 0; - mem_heap_t* conv_heap = NULL; - - float curr_progress = 0.0; - ib_int64_t read_rows = 0; - ib_int64_t table_total_rows = 0; - - DBUG_ENTER("row_merge_read_clustered_index"); - - ut_ad((old_table == new_table) == !col_map); - ut_ad(!add_cols || col_map); - - table_total_rows = dict_table_get_n_rows(old_table); - if(table_total_rows == 0) { - /* We don't know total row count */ - table_total_rows = 1; - } - - trx->op_info = "reading clustered index"; - -#ifdef FTS_INTERNAL_DIAG_PRINT - DEBUG_FTS_SORT_PRINT("FTS_SORT: Start Create Index\n"); -#endif - - ut_ad(trx->mysql_thd != NULL); - const char* path = thd_innodb_tmpdir(trx->mysql_thd); - - /* Create and initialize memory for record buffers */ - - merge_buf = static_cast<row_merge_buf_t**>( - mem_alloc(n_index * sizeof *merge_buf)); - - for (ulint i = 0; i < n_index; i++) { - if (index[i]->type & DICT_FTS) { - - /* We are building a FT index, make sure - we have the temporary 'fts_sort_idx' */ - ut_a(fts_sort_idx); - - fts_index = index[i]; - - merge_buf[i] = row_merge_buf_create(fts_sort_idx); - - add_doc_id = DICT_TF2_FLAG_IS_SET( - new_table, DICT_TF2_FTS_ADD_DOC_ID); - - /* If Doc ID does not exist in the table itself, - fetch the first FTS Doc ID */ - if (add_doc_id) { - fts_get_next_doc_id( - (dict_table_t*) new_table, - &doc_id); - ut_ad(doc_id > 0); - } - - fts_pll_sort = TRUE; - row_fts_start_psort(psort_info); - fts_parallel_sort_event = - psort_info[0].psort_common->sort_event; - } else { - merge_buf[i] = row_merge_buf_create(index[i]); - } - } - - mtr_start(&mtr); - - /* Find the clustered index and create a persistent cursor - based on that. */ - - clust_index = dict_table_get_first_index(old_table); - - btr_pcur_open_at_index_side( - true, clust_index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - - if (old_table != new_table) { - /* The table is being rebuilt. Identify the columns - that were flagged NOT NULL in the new table, so that - we can quickly check that the records in the old table - do not violate the added NOT NULL constraints. */ - - nonnull = static_cast<ulint*>( - mem_alloc(dict_table_get_n_cols(new_table) - * sizeof *nonnull)); - - for (ulint i = 0; i < dict_table_get_n_cols(old_table); i++) { - if (dict_table_get_nth_col(old_table, i)->prtype - & DATA_NOT_NULL) { - continue; - } - - const ulint j = col_map[i]; - - if (j == ULINT_UNDEFINED) { - /* The column was dropped. */ - continue; - } - - if (dict_table_get_nth_col(new_table, j)->prtype - & DATA_NOT_NULL) { - nonnull[n_nonnull++] = j; - } - } - - if (!n_nonnull) { - mem_free(nonnull); - nonnull = NULL; - } - } - - row_heap = mem_heap_create(sizeof(mrec_buf_t)); - - if (dict_table_is_comp(old_table) - && !dict_table_is_comp(new_table)) { - conv_heap = mem_heap_create(sizeof(mrec_buf_t)); - } - - /* Scan the clustered index. */ - for (;;) { - const rec_t* rec; - ulint* offsets; - const dtuple_t* row; - row_ext_t* ext; - page_cur_t* cur = btr_pcur_get_page_cur(&pcur); - - /* Do not continue if table pages are still encrypted */ - if (!old_table->is_readable() || - !new_table->is_readable()) { - err = DB_DECRYPTION_FAILED; - trx->error_key_num = 0; - goto func_exit; - } - - page_cur_move_to_next(cur); - - if (page_cur_is_after_last(cur)) { - if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { - err = DB_INTERRUPTED; - trx->error_key_num = 0; - goto func_exit; - } - - if (online && old_table != new_table) { - err = row_log_table_get_error(clust_index); - if (err != DB_SUCCESS) { - trx->error_key_num = 0; - goto func_exit; - } - } -#ifdef DBUG_OFF -# define dbug_run_purge false -#else /* DBUG_OFF */ - bool dbug_run_purge = false; -#endif /* DBUG_OFF */ - DBUG_EXECUTE_IF( - "ib_purge_on_create_index_page_switch", - dbug_run_purge = true;); - - if (dbug_run_purge - || rw_lock_get_waiters( - dict_index_get_lock(clust_index))) { - /* There are waiters on the clustered - index tree lock, likely the purge - thread. Store and restore the cursor - position, and yield so that scanning a - large table will not starve other - threads. */ - - /* Store the cursor position on the last user - record on the page. */ - btr_pcur_move_to_prev_on_page(&pcur); - /* Leaf pages must never be empty, unless - this is the only page in the index tree. */ - ut_ad(btr_pcur_is_on_user_rec(&pcur) - || buf_block_get_page_no( - btr_pcur_get_block(&pcur)) - == clust_index->page); - - btr_pcur_store_position(&pcur, &mtr); - mtr_commit(&mtr); - - if (dbug_run_purge) { - /* This is for testing - purposes only (see - DBUG_EXECUTE_IF above). We - signal the purge thread and - hope that the purge batch will - complete before we execute - btr_pcur_restore_position(). */ - trx_purge_run(); - os_thread_sleep(1000000); - } - - /* Give the waiters a chance to proceed. */ - os_thread_yield(); - - mtr_start(&mtr); - /* Restore position on the record, or its - predecessor if the record was purged - meanwhile. */ - btr_pcur_restore_position( - BTR_SEARCH_LEAF, &pcur, &mtr); - /* Move to the successor of the - original record. */ - if (!btr_pcur_move_to_next_user_rec( - &pcur, &mtr)) { -end_of_index: - row = NULL; - mtr_commit(&mtr); - mem_heap_free(row_heap); - if (nonnull) { - mem_free(nonnull); - } - goto write_buffers; - } - } else { - ulint next_page_no; - buf_block_t* block; - - next_page_no = btr_page_get_next( - page_cur_get_page(cur), &mtr); - - if (next_page_no == FIL_NULL) { - goto end_of_index; - } - - block = page_cur_get_block(cur); - block = btr_block_get( - buf_block_get_space(block), - buf_block_get_zip_size(block), - next_page_no, BTR_SEARCH_LEAF, - clust_index, &mtr); - - btr_leaf_page_release(page_cur_get_block(cur), - BTR_SEARCH_LEAF, &mtr); - page_cur_set_before_first(block, cur); - page_cur_move_to_next(cur); - - ut_ad(!page_cur_is_after_last(cur)); - } - } - - rec = page_cur_get_rec(cur); - - SRV_CORRUPT_TABLE_CHECK(rec, - { - err = DB_CORRUPTION; - goto func_exit; - }); - - offsets = rec_get_offsets(rec, clust_index, NULL, - ULINT_UNDEFINED, &row_heap); - - if (online) { - /* Perform a REPEATABLE READ. - - When rebuilding the table online, - row_log_table_apply() must not see a newer - state of the table when applying the log. - This is mainly to prevent false duplicate key - errors, because the log will identify records - by the PRIMARY KEY, and also to prevent unsafe - BLOB access. - - When creating a secondary index online, this - table scan must not see records that have only - been inserted to the clustered index, but have - not been written to the online_log of - index[]. If we performed READ UNCOMMITTED, it - could happen that the ADD INDEX reaches - ONLINE_INDEX_COMPLETE state between the time - the DML thread has updated the clustered index - but has not yet accessed secondary index. */ - ut_ad(trx->read_view); - - if (!read_view_sees_trx_id( - trx->read_view, - row_get_rec_trx_id( - rec, clust_index, offsets))) { - rec_t* old_vers; - - row_vers_build_for_consistent_read( - rec, &mtr, clust_index, &offsets, - trx->read_view, &row_heap, - row_heap, &old_vers); - - rec = old_vers; - - if (!rec) { - continue; - } - } - - if (rec_get_deleted_flag( - rec, - dict_table_is_comp(old_table))) { - /* This record was deleted in the latest - committed version, or it was deleted and - then reinserted-by-update before purge - kicked in. Skip it. */ - continue; - } - - ut_ad(!rec_offs_any_null_extern(rec, offsets)); - } else if (rec_get_deleted_flag( - rec, dict_table_is_comp(old_table))) { - /* Skip delete-marked records. - - Skipping delete-marked records will make the - created indexes unuseable for transactions - whose read views were created before the index - creation completed, but preserving the history - would make it tricky to detect duplicate - keys. */ - continue; - } - - /* When !online, we are holding a lock on old_table, preventing - any inserts that could have written a record 'stub' before - writing out off-page columns. */ - ut_ad(!rec_offs_any_null_extern(rec, offsets)); - - /* Build a row based on the clustered index. */ - - row = row_build(ROW_COPY_POINTERS, clust_index, - rec, offsets, new_table, - add_cols, col_map, &ext, row_heap); - ut_ad(row); - - for (ulint i = 0; i < n_nonnull; i++) { - const dfield_t* field = &row->fields[nonnull[i]]; - - ut_ad(dfield_get_type(field)->prtype & DATA_NOT_NULL); - - if (dfield_is_null(field)) { - err = DB_INVALID_NULL; - trx->error_key_num = 0; - goto func_exit; - } - } - - /* Get the next Doc ID */ - if (add_doc_id) { - doc_id++; - } else { - doc_id = 0; - } - - if (add_autoinc != ULINT_UNDEFINED) { - - ut_ad(add_autoinc - < dict_table_get_n_user_cols(new_table)); - - const dfield_t* dfield; - - dfield = dtuple_get_nth_field(row, add_autoinc); - if (dfield_is_null(dfield)) { - goto write_buffers; - } - - const dtype_t* dtype = dfield_get_type(dfield); - byte* b = static_cast<byte*>(dfield_get_data(dfield)); - - if (sequence.eof()) { - err = DB_ERROR; - trx->error_key_num = 0; - - ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_AUTOINC_READ_FAILED, "[NULL]"); - - goto func_exit; - } - - ulonglong value = sequence++; - - switch (dtype_get_mtype(dtype)) { - case DATA_INT: { - ibool usign; - ulint len = dfield_get_len(dfield); - - usign = dtype_get_prtype(dtype) & DATA_UNSIGNED; - mach_write_ulonglong(b, value, len, usign); - - break; - } - - case DATA_FLOAT: - mach_float_write( - b, static_cast<float>(value)); - break; - - case DATA_DOUBLE: - mach_double_write( - b, static_cast<double>(value)); - break; - - default: - ut_ad(0); - } - } - -write_buffers: - /* Build all entries for all the indexes to be created - in a single scan of the clustered index. */ - - for (ulint i = 0; i < n_index; i++) { - row_merge_buf_t* buf = merge_buf[i]; - merge_file_t* file = &files[i]; - ulint rows_added = 0; - bool exceed_page = false; - - if (UNIV_LIKELY - (row && (rows_added = row_merge_buf_add( - buf, fts_index, old_table, - psort_info, row, ext, &doc_id, - conv_heap, &exceed_page, trx)))) { - - /* If we are creating FTS index, - a single row can generate more - records for tokenized word */ - file->n_rec += rows_added; - - if (exceed_page) { - err = DB_TOO_BIG_RECORD; - break; - } - - if (doc_id > max_doc_id) { - max_doc_id = doc_id; - } - - if (buf->index->type & DICT_FTS) { - /* Check if error occurs in child thread */ - for (ulint j = 0; j < fts_sort_pll_degree; j++) { - if (psort_info[j].error != DB_SUCCESS) { - err = psort_info[j].error; - trx->error_key_num = i; - break; - } - } - - if (err != DB_SUCCESS) { - break; - } - } - - continue; - } - - if (buf->index->type & DICT_FTS) { - if (!row || !doc_id) { - continue; - } - } - - /* The buffer must be sufficiently large - to hold at least one record. It may only - be empty when we reach the end of the - clustered index. row_merge_buf_add() - must not have been called in this loop. */ - ut_ad(buf->n_tuples || row == NULL); - - /* We have enough data tuples to form a block. - Sort them and write to disk. */ - - if (buf->n_tuples) { - if (dict_index_is_unique(buf->index)) { - row_merge_dup_t dup = { - buf->index, table, col_map, 0}; - - row_merge_buf_sort(buf, &dup); - - if (dup.n_dup) { - err = DB_DUPLICATE_KEY; - trx->error_key_num - = key_numbers[i]; - break; - } - } else { - row_merge_buf_sort(buf, NULL); - } - } else if (online && new_table == old_table) { - /* Note the newest transaction that - modified this index when the scan was - completed. We prevent older readers - from accessing this index, to ensure - read consistency. */ - - trx_id_t max_trx_id; - - ut_a(row == NULL); - rw_lock_x_lock( - dict_index_get_lock(buf->index)); - ut_a(dict_index_get_online_status(buf->index) - == ONLINE_INDEX_CREATION); - - max_trx_id = row_log_get_max_trx(buf->index); - - if (max_trx_id > buf->index->trx_id) { - buf->index->trx_id = max_trx_id; - } - - rw_lock_x_unlock( - dict_index_get_lock(buf->index)); - } - - if (buf->n_tuples > 0) { - - if (row_merge_file_create_if_needed( - file, tmpfd, buf->n_tuples, path) < 0) { - err = DB_OUT_OF_MEMORY; - trx->error_key_num = i; - break; - } - - ut_ad(file->n_rec > 0); - - row_merge_buf_write(buf, file, block); - - if (!row_merge_write(file->fd, file->offset++, - block, crypt_data, crypt_block, - new_table->space)) { - err = DB_TEMP_FILE_WRITE_FAILURE; - trx->error_key_num = i; - break; - } - } - - UNIV_MEM_INVALID(&block[0], srv_sort_buf_size); - - merge_buf[i] = row_merge_buf_empty(buf); - - if (UNIV_LIKELY(row != NULL)) { - /* Try writing the record again, now - that the buffer has been written out - and emptied. */ - - if (UNIV_UNLIKELY - (!(rows_added = row_merge_buf_add( - buf, fts_index, old_table, - psort_info, row, ext, - &doc_id, conv_heap, - &exceed_page, trx)))) { - /* An empty buffer should have enough - room for at least one record. */ - ut_error; - } - - if (exceed_page) { - err = DB_TOO_BIG_RECORD; - break; - } - - file->n_rec += rows_added; - } - } - - if (row == NULL) { - goto all_done; - } - - if (err != DB_SUCCESS) { - goto func_exit; - } - - mem_heap_empty(row_heap); - - /* Increment innodb_onlineddl_pct_progress status variable */ - read_rows++; - if(read_rows % 1000 == 0) { - /* Update progress for each 1000 rows */ - curr_progress = (read_rows >= table_total_rows) ? - pct_cost : - ((pct_cost * read_rows) / table_total_rows); - /* presenting 10.12% as 1012 integer */ - onlineddl_pct_progress = (ulint) (curr_progress * 100); - } - } - -func_exit: - mtr_commit(&mtr); - - mem_heap_free(row_heap); - - if (nonnull) { - mem_free(nonnull); - } - -all_done: - if (conv_heap != NULL) { - mem_heap_free(conv_heap); - } - -#ifdef FTS_INTERNAL_DIAG_PRINT - DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n"); -#endif - if (fts_pll_sort) { - bool all_exit = false; - ulint trial_count = 0; - const ulint max_trial_count = 10000; - -wait_again: - /* Check if error occurs in child thread */ - for (ulint j = 0; j < fts_sort_pll_degree; j++) { - if (psort_info[j].error != DB_SUCCESS) { - err = psort_info[j].error; - trx->error_key_num = j; - break; - } - } - - /* Tell all children that parent has done scanning */ - for (ulint i = 0; i < fts_sort_pll_degree; i++) { - if (err == DB_SUCCESS) { - psort_info[i].state = FTS_PARENT_COMPLETE; - } else { - psort_info[i].state = FTS_PARENT_EXITING; - } - } - - /* Now wait all children to report back to be completed */ - os_event_wait_time_low(fts_parallel_sort_event, - 1000000, sig_count); - - for (ulint i = 0; i < fts_sort_pll_degree; i++) { - if (psort_info[i].child_status != FTS_CHILD_COMPLETE - && psort_info[i].child_status != FTS_CHILD_EXITING) { - sig_count = os_event_reset( - fts_parallel_sort_event); - goto wait_again; - } - } - - /* Now all children should complete, wait a bit until - they all finish setting the event, before we free everything. - This has a 10 second timeout */ - do { - all_exit = true; - - for (ulint j = 0; j < fts_sort_pll_degree; j++) { - if (psort_info[j].child_status - != FTS_CHILD_EXITING) { - all_exit = false; - os_thread_sleep(1000); - break; - } - } - trial_count++; - } while (!all_exit && trial_count < max_trial_count); - - if (!all_exit) { - ut_ad(0); - ib_logf(IB_LOG_LEVEL_FATAL, - "Not all child sort threads exited" - " when creating FTS index '%s'", - fts_sort_idx->name); - } - } - -#ifdef FTS_INTERNAL_DIAG_PRINT - DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Tokenization\n"); -#endif - for (ulint i = 0; i < n_index; i++) { - row_merge_buf_free(merge_buf[i]); - } - - row_fts_free_pll_merge_buf(psort_info); - - mem_free(merge_buf); - - btr_pcur_close(&pcur); - - /* Update the next Doc ID we used. Table should be locked, so - no concurrent DML */ - if (max_doc_id && err == DB_SUCCESS) { - /* Sync fts cache for other fts indexes to keep all - fts indexes consistent in sync_doc_id. */ - err = fts_sync_table(const_cast<dict_table_t*>(new_table), - false, true, false); - - if (err == DB_SUCCESS) { - fts_update_next_doc_id( - 0, new_table, old_table->name, max_doc_id); - } - } - - trx->op_info = ""; - - DBUG_RETURN(err); -} - -/** Write a record via buffer 2 and read the next record to buffer N. -@param N number of the buffer (0 or 1) -@param INDEX record descriptor -@param AT_END statement to execute at end of input */ -#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END) \ - do { \ - b2 = row_merge_write_rec(&block[2 * srv_sort_buf_size], \ - &buf[2], b2, \ - of->fd, &of->offset, \ - mrec##N, offsets##N, \ - crypt_data, \ - crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL , \ - space); \ - if (UNIV_UNLIKELY(!b2 || ++of->n_rec > file->n_rec)) { \ - goto corrupt; \ - } \ - b##N = row_merge_read_rec(&block[N * srv_sort_buf_size],\ - &buf[N], b##N, INDEX, \ - file->fd, foffs##N, \ - &mrec##N, offsets##N, \ - crypt_data, \ - crypt_block ? &crypt_block[N * srv_sort_buf_size] : NULL, \ - space); \ - \ - if (UNIV_UNLIKELY(!b##N)) { \ - if (mrec##N) { \ - goto corrupt; \ - } \ - AT_END; \ - } \ - } while (0) - -/*************************************************************//** -Merge two blocks of records on disk and write a bigger block. -@return DB_SUCCESS or error code */ -static __attribute__((nonnull(1,2,3,4,5,6), warn_unused_result)) -dberr_t -row_merge_blocks( -/*=============*/ - const row_merge_dup_t* dup, /*!< in: descriptor of - index being created */ - const merge_file_t* file, /*!< in: file containing - index entries */ - row_merge_block_t* block, /*!< in/out: 3 buffers */ - ulint* foffs0, /*!< in/out: offset of first - source list in the file */ - ulint* foffs1, /*!< in/out: offset of second - source list in the file */ - merge_file_t* of, /*!< in/out: output file */ - fil_space_crypt_t* crypt_data,/*!< in: crypt data or NULL */ - row_merge_block_t* crypt_block,/*!< in: in/out: crypted file - buffer */ - ulint space) /*!< in: space id */ -{ - mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */ - - mrec_buf_t* buf; /*!< buffer for handling - split mrec in block[] */ - const byte* b0; /*!< pointer to block[0] */ - const byte* b1; /*!< pointer to block[srv_sort_buf_size] */ - byte* b2; /*!< pointer to block[2 * srv_sort_buf_size] */ - const mrec_t* mrec0; /*!< merge rec, points to block[0] or buf[0] */ - const mrec_t* mrec1; /*!< merge rec, points to - block[srv_sort_buf_size] or buf[1] */ - ulint* offsets0;/* offsets of mrec0 */ - ulint* offsets1;/* offsets of mrec1 */ - -#ifdef UNIV_DEBUG - if (row_merge_print_block) { - fprintf(stderr, - "row_merge_blocks fd=%d ofs=" ULINTPF - " + fd=%d ofs=" ULINTPF - " = fd=%d ofs=" ULINTPF "\n", - file->fd, *foffs0, - file->fd, *foffs1, - of->fd, of->offset); - } -#endif /* UNIV_DEBUG */ - - heap = row_merge_heap_create(dup->index, &buf, &offsets0, &offsets1); - - /* Write a record and read the next record. Split the output - file in two halves, which can be merged on the following pass. */ - - if (!row_merge_read(file->fd, *foffs0, &block[0], - crypt_data, crypt_block ? &crypt_block[0] : NULL, space) - || !row_merge_read(file->fd, *foffs1, &block[srv_sort_buf_size], - crypt_data, crypt_block ? &crypt_block[srv_sort_buf_size] : NULL, space)) { -corrupt: - mem_heap_free(heap); - return(DB_CORRUPTION); - } - - b0 = &block[0]; - b1 = &block[srv_sort_buf_size]; - b2 = &block[2 * srv_sort_buf_size]; - - b0 = row_merge_read_rec( - &block[0], &buf[0], b0, dup->index, - file->fd, foffs0, &mrec0, offsets0, - crypt_data, crypt_block ? &crypt_block[0] : NULL, space); - - b1 = row_merge_read_rec( - &block[srv_sort_buf_size], - &buf[srv_sort_buf_size], b1, dup->index, - file->fd, foffs1, &mrec1, offsets1, - crypt_data, crypt_block ? &crypt_block[srv_sort_buf_size] : NULL, space); - - if (UNIV_UNLIKELY(!b0 && mrec0) - || UNIV_UNLIKELY(!b1 && mrec1)) { - - goto corrupt; - } - - while (mrec0 && mrec1) { - switch (cmp_rec_rec_simple( - mrec0, mrec1, offsets0, offsets1, - dup->index, dup->table)) { - case 0: - mem_heap_free(heap); - return(DB_DUPLICATE_KEY); - case -1: - ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto merged); - break; - case 1: - ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto merged); - break; - default: - ut_error; - } - } - -merged: - if (mrec0) { - /* append all mrec0 to output */ - for (;;) { - ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto done0); - } - } -done0: - if (mrec1) { - /* append all mrec1 to output */ - for (;;) { - ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto done1); - } - } -done1: - - mem_heap_free(heap); - - b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size], - b2, of->fd, &of->offset, - crypt_data, crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL, space); - - return(b2 ? DB_SUCCESS : DB_CORRUPTION); -} - -/*************************************************************//** -Copy a block of index entries. -@return TRUE on success, FALSE on failure */ -static __attribute__((nonnull(1,2,3,4,5), warn_unused_result)) -ibool -row_merge_blocks_copy( -/*==================*/ - const dict_index_t* index, /*!< in: index being created */ - const merge_file_t* file, /*!< in: input file */ - row_merge_block_t* block, /*!< in/out: 3 buffers */ - ulint* foffs0, /*!< in/out: input file offset */ - merge_file_t* of, /*!< in/out: output file */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */ - - mrec_buf_t* buf; /*!< buffer for handling - split mrec in block[] */ - const byte* b0; /*!< pointer to block[0] */ - byte* b2; /*!< pointer to block[2 * srv_sort_buf_size] */ - const mrec_t* mrec0; /*!< merge rec, points to block[0] */ - ulint* offsets0;/* offsets of mrec0 */ - ulint* offsets1;/* dummy offsets */ - -#ifdef UNIV_DEBUG - if (row_merge_print_block) { - fprintf(stderr, - "row_merge_blocks_copy fd=%d ofs=" ULINTPF - " = fd=%d ofs=" ULINTPF "\n", - file->fd, *foffs0, - of->fd, of->offset); - } -#endif /* UNIV_DEBUG */ - - heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1); - - /* Write a record and read the next record. Split the output - file in two halves, which can be merged on the following pass. */ - - if (!row_merge_read(file->fd, *foffs0, &block[0], - crypt_data, crypt_block ? &crypt_block[0] : NULL, space)) { -corrupt: - mem_heap_free(heap); - return(FALSE); - } - - b0 = &block[0]; - - b2 = &block[2 * srv_sort_buf_size]; - - b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, - file->fd, foffs0, &mrec0, offsets0, - crypt_data, crypt_block ? &crypt_block[0] : NULL, space); - - if (UNIV_UNLIKELY(!b0 && mrec0)) { - - goto corrupt; - } - - if (mrec0) { - /* append all mrec0 to output */ - for (;;) { - ROW_MERGE_WRITE_GET_NEXT(0, index, goto done0); - } - } -done0: - - /* The file offset points to the beginning of the last page - that has been read. Update it to point to the next block. */ - (*foffs0)++; - - mem_heap_free(heap); - - return(row_merge_write_eof(&block[2 * srv_sort_buf_size], - b2, of->fd, &of->offset, - crypt_data, - crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL, space) - != NULL); -} - -/*************************************************************//** -Merge disk files. -@return DB_SUCCESS or error code */ -static __attribute__((nonnull(1,2,3,4,5,6,7))) -dberr_t -row_merge( -/*======*/ - trx_t* trx, /*!< in: transaction */ - const row_merge_dup_t* dup, /*!< in: descriptor of - index being created */ - merge_file_t* file, /*!< in/out: file containing - index entries */ - row_merge_block_t* block, /*!< in/out: 3 buffers */ - int* tmpfd, /*!< in/out: temporary file handle */ - ulint* num_run,/*!< in/out: Number of runs remain - to be merged */ - ulint* run_offset, /*!< in/out: Array contains the - first offset number for each merge - run */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - ulint foffs0; /*!< first input offset */ - ulint foffs1; /*!< second input offset */ - dberr_t error; /*!< error code */ - merge_file_t of; /*!< output file */ - const ulint ihalf = run_offset[*num_run / 2]; - /*!< half the input file */ - ulint n_run = 0; - /*!< num of runs generated from this merge */ - - UNIV_MEM_ASSERT_W(&block[0], 3 * srv_sort_buf_size); - - if (crypt_block) { - UNIV_MEM_ASSERT_W(&crypt_block[0], 3 * srv_sort_buf_size); - } - - ut_ad(ihalf < file->offset); - - of.fd = *tmpfd; - of.offset = 0; - of.n_rec = 0; - -#ifdef POSIX_FADV_SEQUENTIAL - /* The input file will be read sequentially, starting from the - beginning and the middle. In Linux, the POSIX_FADV_SEQUENTIAL - affects the entire file. Each block will be read exactly once. */ - posix_fadvise(file->fd, 0, 0, - POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE); -#endif /* POSIX_FADV_SEQUENTIAL */ - - /* Merge blocks to the output file. */ - foffs0 = 0; - foffs1 = ihalf; - - UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset); - - for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) { - - if (trx_is_interrupted(trx)) { - return(DB_INTERRUPTED); - } - - /* Remember the offset number for this run */ - run_offset[n_run++] = of.offset; - - error = row_merge_blocks(dup, file, block, - &foffs0, &foffs1, &of, - crypt_data, crypt_block, space); - - if (error != DB_SUCCESS) { - return(error); - } - - } - - /* Copy the last blocks, if there are any. */ - - while (foffs0 < ihalf) { - - if (UNIV_UNLIKELY(trx_is_interrupted(trx))) { - return(DB_INTERRUPTED); - } - - /* Remember the offset number for this run */ - run_offset[n_run++] = of.offset; - - if (!row_merge_blocks_copy(dup->index, file, block, - &foffs0, &of, - crypt_data, crypt_block, space)) { - return(DB_CORRUPTION); - } - } - - ut_ad(foffs0 == ihalf); - - while (foffs1 < file->offset) { - - if (trx_is_interrupted(trx)) { - return(DB_INTERRUPTED); - } - - /* Remember the offset number for this run */ - run_offset[n_run++] = of.offset; - - if (!row_merge_blocks_copy(dup->index, file, block, - &foffs1, &of, - crypt_data, crypt_block, space)) { - return(DB_CORRUPTION); - } - } - - ut_ad(foffs1 == file->offset); - - if (UNIV_UNLIKELY(of.n_rec != file->n_rec)) { - return(DB_CORRUPTION); - } - - ut_ad(n_run <= *num_run); - - *num_run = n_run; - - /* Each run can contain one or more offsets. As merge goes on, - the number of runs (to merge) will reduce until we have one - single run. So the number of runs will always be smaller than - the number of offsets in file */ - ut_ad((*num_run) <= file->offset); - - /* The number of offsets in output file is always equal or - smaller than input file */ - ut_ad(of.offset <= file->offset); - - /* Swap file descriptors for the next pass. */ - *tmpfd = file->fd; - *file = of; - - UNIV_MEM_INVALID(&block[0], 3 * srv_sort_buf_size); - - return(DB_SUCCESS); -} - -/*************************************************************//** -Merge disk files. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_merge_sort( -/*===========*/ - trx_t* trx, /*!< in: transaction */ - const row_merge_dup_t* dup, /*!< in: descriptor of - index being created */ - merge_file_t* file, /*!< in/out: file containing - index entries */ - row_merge_block_t* block, /*!< in/out: 3 buffers */ - int* tmpfd, /*!< in/out: temporary file handle - */ - const bool update_progress, - /*!< in: update progress - status variable or not */ - const float pct_progress, - /*!< in: total progress percent - until now */ - const float pct_cost, /*!< in: current progress percent */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - const ulint half = file->offset / 2; - ulint num_runs; - ulint* run_offset; - dberr_t error = DB_SUCCESS; - ulint merge_count = 0; - ulint total_merge_sort_count; - float curr_progress = 0; - - DBUG_ENTER("row_merge_sort"); - - /* Record the number of merge runs we need to perform */ - num_runs = file->offset; - - /* Find the number N which 2^N is greater or equal than num_runs */ - /* N is merge sort running count */ - total_merge_sort_count = (ulint) ceil(my_log2f(num_runs)); - if(total_merge_sort_count <= 0) { - total_merge_sort_count=1; - } - - /* If num_runs are less than 1, nothing to merge */ - if (num_runs <= 1) { - DBUG_RETURN(error); - } - - /* "run_offset" records each run's first offset number */ - run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint)); - - /* This tells row_merge() where to start for the first round - of merge. */ - run_offset[half] = half; - - /* The file should always contain at least one byte (the end - of file marker). Thus, it must be at least one block. */ - ut_ad(file->offset > 0); - - /* Progress report only for "normal" indexes. */ - if (!(dup->index->type & DICT_FTS)) { - thd_progress_init(trx->mysql_thd, 1); - } - - if (global_system_variables.log_warnings > 2) { - sql_print_information("InnoDB: Online DDL : merge-sorting" - " has estimated " ULINTPF " runs", - num_runs); - } - - /* Merge the runs until we have one big run */ - do { - /* Report progress of merge sort to MySQL for - show processlist progress field */ - /* Progress report only for "normal" indexes. */ - if (!(dup->index->type & DICT_FTS)) { - thd_progress_report(trx->mysql_thd, file->offset - num_runs, file->offset); - } - - error = row_merge(trx, dup, file, block, tmpfd, - &num_runs, run_offset, - crypt_data, crypt_block, space); - - if(update_progress) { - merge_count++; - curr_progress = (merge_count >= total_merge_sort_count) ? - pct_cost : - ((pct_cost * merge_count) / total_merge_sort_count); - /* presenting 10.12% as 1012 integer */; - onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100); - } - - if (error != DB_SUCCESS) { - break; - } - - UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset); - } while (num_runs > 1); - - mem_free(run_offset); - - /* Progress report only for "normal" indexes. */ - if (!(dup->index->type & DICT_FTS)) { - thd_progress_end(trx->mysql_thd); - } - - DBUG_RETURN(error); -} - -/*************************************************************//** -Copy externally stored columns to the data tuple. */ -static MY_ATTRIBUTE((nonnull)) -void -row_merge_copy_blobs( -/*=================*/ - const mrec_t* mrec, /*!< in: merge record */ - const ulint* offsets,/*!< in: offsets of mrec */ - ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - dtuple_t* tuple, /*!< in/out: data tuple */ - mem_heap_t* heap) /*!< in/out: memory heap */ -{ - ut_ad(rec_offs_any_extern(offsets)); - - for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) { - ulint len; - const void* data; - dfield_t* field = dtuple_get_nth_field(tuple, i); - - if (!dfield_is_ext(field)) { - continue; - } - - ut_ad(!dfield_is_null(field)); - - /* During the creation of a PRIMARY KEY, the table is - X-locked, and we skip copying records that have been - marked for deletion. Therefore, externally stored - columns cannot possibly be freed between the time the - BLOB pointers are read (row_merge_read_clustered_index()) - and dereferenced (below). */ - data = btr_rec_copy_externally_stored_field( - mrec, offsets, zip_size, i, &len, heap, NULL); - /* Because we have locked the table, any records - written by incomplete transactions must have been - rolled back already. There must not be any incomplete - BLOB columns. */ - ut_a(data); - - dfield_set_data(field, data, len); - } -} - -/********************************************************************//** -Read sorted file containing index data tuples and insert these data -tuples to the index -@return DB_SUCCESS or error number */ -static __attribute__((nonnull(2,3,5), warn_unused_result)) -dberr_t -row_merge_insert_index_tuples( -/*==========================*/ - trx_id_t trx_id, /*!< in: transaction identifier */ - dict_index_t* index, /*!< in: index */ - const dict_table_t* old_table,/*!< in: old table */ - int fd, /*!< in: file descriptor */ - row_merge_block_t* block, /*!< in/out: file buffer */ - const ib_int64_t table_total_rows, /*!< in: total rows of old table */ - const float pct_progress, /*!< in: total progress percent until now */ - const float pct_cost, /*!< in: current progress percent - */ - fil_space_crypt_t* crypt_data,/*!< in: table crypt data */ - row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ - ulint space) /*!< in: space id */ -{ - const byte* b; - mem_heap_t* heap; - mem_heap_t* tuple_heap; - mem_heap_t* ins_heap; - dberr_t error = DB_SUCCESS; - ulint foffs = 0; - ulint* offsets; - mrec_buf_t* buf; - ib_int64_t inserted_rows = 0; - float curr_progress; - DBUG_ENTER("row_merge_insert_index_tuples"); - - ut_ad(!srv_read_only_mode); - ut_ad(!(index->type & DICT_FTS)); - ut_ad(trx_id); - - tuple_heap = mem_heap_create(1000); - - { - ulint i = 1 + REC_OFFS_HEADER_SIZE - + dict_index_get_n_fields(index); - heap = mem_heap_create(sizeof *buf + i * sizeof *offsets); - ins_heap = mem_heap_create(sizeof *buf + i * sizeof *offsets); - offsets = static_cast<ulint*>( - mem_heap_alloc(heap, i * sizeof *offsets)); - offsets[0] = i; - offsets[1] = dict_index_get_n_fields(index); - } - - b = &block[0]; - - if (!row_merge_read(fd, foffs, block, - crypt_data, crypt_block, space)) { - error = DB_CORRUPTION; - } else { - buf = static_cast<mrec_buf_t*>( - mem_heap_alloc(heap, sizeof *buf)); - - for (;;) { - const mrec_t* mrec; - dtuple_t* dtuple; - ulint n_ext; - big_rec_t* big_rec; - rec_t* rec; - btr_cur_t cursor; - mtr_t mtr; - - b = row_merge_read_rec(block, buf, b, index, - fd, &foffs, &mrec, offsets, - crypt_data, crypt_block, space); - if (UNIV_UNLIKELY(!b)) { - /* End of list, or I/O error */ - if (mrec) { - error = DB_CORRUPTION; - } - break; - } - - dict_index_t* old_index - = dict_table_get_first_index(old_table); - - if (dict_index_is_clust(index) - && dict_index_is_online_ddl(old_index)) { - error = row_log_table_get_error(old_index); - if (error != DB_SUCCESS) { - break; - } - } - - dtuple = row_rec_to_index_entry_low( - mrec, index, offsets, &n_ext, tuple_heap); - - if (!n_ext) { - /* There are no externally stored columns. */ - } else { - ut_ad(dict_index_is_clust(index)); - /* Off-page columns can be fetched safely - when concurrent modifications to the table - are disabled. (Purge can process delete-marked - records, but row_merge_read_clustered_index() - would have skipped them.) - - When concurrent modifications are enabled, - row_merge_read_clustered_index() will - only see rows from transactions that were - committed before the ALTER TABLE started - (REPEATABLE READ). - - Any modifications after the - row_merge_read_clustered_index() scan - will go through row_log_table_apply(). - Any modifications to off-page columns - will be tracked by - row_log_table_blob_alloc() and - row_log_table_blob_free(). */ - row_merge_copy_blobs( - mrec, offsets, - dict_table_zip_size(old_table), - dtuple, tuple_heap); - } - - ut_ad(dtuple_validate(dtuple)); - log_free_check(); - - mtr_start(&mtr); - /* Insert after the last user record. */ - btr_cur_open_at_index_side( - false, index, BTR_MODIFY_LEAF, - &cursor, 0, &mtr); - page_cur_position( - page_rec_get_prev(btr_cur_get_rec(&cursor)), - btr_cur_get_block(&cursor), - btr_cur_get_page_cur(&cursor)); - cursor.flag = BTR_CUR_BINARY; -#ifdef UNIV_DEBUG - /* Check that the records are inserted in order. */ - rec = btr_cur_get_rec(&cursor); - - if (!page_rec_is_infimum(rec)) { - ulint* rec_offsets = rec_get_offsets( - rec, index, offsets, - ULINT_UNDEFINED, &tuple_heap); - ut_ad(cmp_dtuple_rec(dtuple, rec, rec_offsets) - > 0); - } -#endif /* UNIV_DEBUG */ - ulint* ins_offsets = NULL; - - error = btr_cur_optimistic_insert( - BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG, - &cursor, &ins_offsets, &ins_heap, - dtuple, &rec, &big_rec, 0, NULL, &mtr); - - if (error == DB_FAIL) { - ut_ad(!big_rec); - mtr_commit(&mtr); - mtr_start(&mtr); - btr_cur_open_at_index_side( - false, index, BTR_MODIFY_TREE, - &cursor, 0, &mtr); - page_cur_position( - page_rec_get_prev(btr_cur_get_rec( - &cursor)), - btr_cur_get_block(&cursor), - btr_cur_get_page_cur(&cursor)); - - error = btr_cur_pessimistic_insert( - BTR_NO_UNDO_LOG_FLAG - | BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG, - &cursor, &ins_offsets, &ins_heap, - dtuple, &rec, &big_rec, 0, NULL, &mtr); - } - - if (!dict_index_is_clust(index)) { - page_update_max_trx_id( - btr_cur_get_block(&cursor), - btr_cur_get_page_zip(&cursor), - trx_id, &mtr); - } - - mtr_commit(&mtr); - - if (UNIV_LIKELY_NULL(big_rec)) { - /* If the system crashes at this - point, the clustered index record will - contain a null BLOB pointer. This - should not matter, because the copied - table will be dropped on crash - recovery anyway. */ - - ut_ad(dict_index_is_clust(index)); - ut_ad(error == DB_SUCCESS); - error = row_ins_index_entry_big_rec( - dtuple, big_rec, - ins_offsets, &ins_heap, - index, NULL, __FILE__, __LINE__); - dtuple_convert_back_big_rec( - index, dtuple, big_rec); - } - - if (error != DB_SUCCESS) { - goto err_exit; - } - - mem_heap_empty(tuple_heap); - mem_heap_empty(ins_heap); - - /* Increment innodb_onlineddl_pct_progress status variable */ - inserted_rows++; - if(inserted_rows % 1000 == 0) { - /* Update progress for each 1000 rows */ - curr_progress = (inserted_rows >= table_total_rows || - table_total_rows <= 0) ? - pct_cost : - ((pct_cost * inserted_rows) / table_total_rows); - - /* presenting 10.12% as 1012 integer */; - onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100); - } - } - } - -err_exit: - mem_heap_free(tuple_heap); - mem_heap_free(ins_heap); - mem_heap_free(heap); - - DBUG_RETURN(error); -} - -/*********************************************************************//** -Sets an exclusive lock on a table, for the duration of creating indexes. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_merge_lock_table( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_table_t* table, /*!< in: table to lock */ - enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */ -{ - mem_heap_t* heap; - que_thr_t* thr; - dberr_t err; - sel_node_t* node; - - ut_ad(!srv_read_only_mode); - ut_ad(mode == LOCK_X || mode == LOCK_S); - - heap = mem_heap_create(512); - - trx->op_info = "setting table lock for creating or dropping index"; - - node = sel_node_create(heap); - thr = pars_complete_graph_for_exec(node, trx, heap); - thr->graph->state = QUE_FORK_ACTIVE; - - /* We use the select query graph as the dummy graph needed - in the lock module call */ - - thr = static_cast<que_thr_t*>( - que_fork_get_first_thr( - static_cast<que_fork_t*>(que_node_get_parent(thr)))); - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - err = lock_table(0, table, mode, thr); - - trx->error_state = err; - - if (UNIV_LIKELY(err == DB_SUCCESS)) { - que_thr_stop_for_mysql_no_error(thr, trx); - } else { - que_thr_stop_for_mysql(thr); - - if (err != DB_QUE_THR_SUSPENDED) { - bool was_lock_wait; - - was_lock_wait = row_mysql_handle_errors( - &err, trx, thr, NULL); - - if (was_lock_wait) { - goto run_again; - } - } else { - que_thr_t* run_thr; - que_node_t* parent; - - parent = que_node_get_parent(thr); - - run_thr = que_fork_start_command( - static_cast<que_fork_t*>(parent)); - - ut_a(run_thr == thr); - - /* There was a lock wait but the thread was not - in a ready to run or running state. */ - trx->error_state = DB_LOCK_WAIT; - - goto run_again; - } - } - - que_graph_free(thr->graph); - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Drop an index that was created before an error occurred. -The data dictionary must have been locked exclusively by the caller, -because the transaction will not be committed. */ -static -void -row_merge_drop_index_dict( -/*======================*/ - trx_t* trx, /*!< in/out: dictionary transaction */ - index_id_t index_id)/*!< in: index identifier */ -{ - static const char sql[] = - "PROCEDURE DROP_INDEX_PROC () IS\n" - "BEGIN\n" - "DELETE FROM SYS_FIELDS WHERE INDEX_ID=:indexid;\n" - "DELETE FROM SYS_INDEXES WHERE ID=:indexid;\n" - "END;\n"; - dberr_t error; - pars_info_t* info; - - ut_ad(!srv_read_only_mode); - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - info = pars_info_create(); - pars_info_add_ull_literal(info, "indexid", index_id); - trx->op_info = "dropping index from dictionary"; - error = que_eval_sql(info, sql, FALSE, trx); - - if (error != DB_SUCCESS) { - /* Even though we ensure that DDL transactions are WAIT - and DEADLOCK free, we could encounter other errors e.g., - DB_TOO_MANY_CONCURRENT_TRXS. */ - trx->error_state = DB_SUCCESS; - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: row_merge_drop_index_dict " - "failed with error code: %u.\n", (unsigned) error); - } - - trx->op_info = ""; -} - -/*********************************************************************//** -Drop indexes that were created before an error occurred. -The data dictionary must have been locked exclusively by the caller, -because the transaction will not be committed. */ -UNIV_INTERN -void -row_merge_drop_indexes_dict( -/*========================*/ - trx_t* trx, /*!< in/out: dictionary transaction */ - table_id_t table_id)/*!< in: table identifier */ -{ - static const char sql[] = - "PROCEDURE DROP_INDEXES_PROC () IS\n" - "ixid CHAR;\n" - "found INT;\n" - - "DECLARE CURSOR index_cur IS\n" - " SELECT ID FROM SYS_INDEXES\n" - " WHERE TABLE_ID=:tableid AND\n" - " SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n" - "FOR UPDATE;\n" - - "BEGIN\n" - "found := 1;\n" - "OPEN index_cur;\n" - "WHILE found = 1 LOOP\n" - " FETCH index_cur INTO ixid;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FIELDS WHERE INDEX_ID=ixid;\n" - " DELETE FROM SYS_INDEXES WHERE CURRENT OF index_cur;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE index_cur;\n" - - "END;\n"; - dberr_t error; - pars_info_t* info; - - ut_ad(!srv_read_only_mode); - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - /* It is possible that table->n_ref_count > 1 when - locked=TRUE. In this case, all code that should have an open - handle to the table be waiting for the next statement to execute, - or waiting for a meta-data lock. - - A concurrent purge will be prevented by dict_operation_lock. */ - - info = pars_info_create(); - pars_info_add_ull_literal(info, "tableid", table_id); - trx->op_info = "dropping indexes"; - error = que_eval_sql(info, sql, FALSE, trx); - - if (error != DB_SUCCESS) { - /* Even though we ensure that DDL transactions are WAIT - and DEADLOCK free, we could encounter other errors e.g., - DB_TOO_MANY_CONCURRENT_TRXS. */ - trx->error_state = DB_SUCCESS; - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: row_merge_drop_indexes_dict " - "failed with error code: %u.\n", (unsigned) error); - } - - trx->op_info = ""; -} - -/*********************************************************************//** -Drop indexes that were created before an error occurred. -The data dictionary must have been locked exclusively by the caller, -because the transaction will not be committed. */ -UNIV_INTERN -void -row_merge_drop_indexes( -/*===================*/ - trx_t* trx, /*!< in/out: dictionary transaction */ - dict_table_t* table, /*!< in/out: table containing the indexes */ - ibool locked) /*!< in: TRUE=table locked, - FALSE=may need to do a lazy drop */ -{ - dict_index_t* index; - dict_index_t* next_index; - - ut_ad(!srv_read_only_mode); - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - index = dict_table_get_first_index(table); - ut_ad(dict_index_is_clust(index)); - ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_COMPLETE); - - /* the caller should have an open handle to the table */ - ut_ad(table->n_ref_count >= 1); - - /* It is possible that table->n_ref_count > 1 when - locked=TRUE. In this case, all code that should have an open - handle to the table be waiting for the next statement to execute, - or waiting for a meta-data lock. - - A concurrent purge will be prevented by dict_operation_lock. */ - - if (!locked && table->n_ref_count > 1) { - /* We will have to drop the indexes later, when the - table is guaranteed to be no longer in use. Mark the - indexes as incomplete and corrupted, so that other - threads will stop using them. Let dict_table_close() - or crash recovery or the next invocation of - prepare_inplace_alter_table() take care of dropping - the indexes. */ - - while ((index = dict_table_get_next_index(index)) != NULL) { - ut_ad(!dict_index_is_clust(index)); - - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_ABORTED_DROPPED: - continue; - case ONLINE_INDEX_COMPLETE: - if (*index->name != TEMP_INDEX_PREFIX) { - /* Do nothing to already - published indexes. */ - } else if (index->type & DICT_FTS) { - /* Drop a completed FULLTEXT - index, due to a timeout during - MDL upgrade for - commit_inplace_alter_table(). - Because only concurrent reads - are allowed (and they are not - seeing this index yet) we - are safe to drop the index. */ - dict_index_t* prev = UT_LIST_GET_PREV( - indexes, index); - /* At least there should be - the clustered index before - this one. */ - ut_ad(prev); - ut_a(table->fts); - fts_drop_index(table, index, trx); - /* Since - INNOBASE_SHARE::idx_trans_tbl - is shared between all open - ha_innobase handles to this - table, no thread should be - accessing this dict_index_t - object. Also, we should be - holding LOCK=SHARED MDL on the - table even after the MDL - upgrade timeout. */ - - /* We can remove a DICT_FTS - index from the cache, because - we do not allow ADD FULLTEXT INDEX - with LOCK=NONE. If we allowed that, - we should exclude FTS entries from - prebuilt->ins_node->entry_list - in ins_node_create_entry_list(). */ - dict_index_remove_from_cache( - table, index); - index = prev; - } else { - rw_lock_x_lock( - dict_index_get_lock(index)); - dict_index_set_online_status( - index, ONLINE_INDEX_ABORTED); - index->type |= DICT_CORRUPT; - table->drop_aborted = TRUE; - goto drop_aborted; - } - continue; - case ONLINE_INDEX_CREATION: - rw_lock_x_lock(dict_index_get_lock(index)); - ut_ad(*index->name == TEMP_INDEX_PREFIX); - row_log_abort_sec(index); - drop_aborted: - rw_lock_x_unlock(dict_index_get_lock(index)); - - DEBUG_SYNC_C("merge_drop_index_after_abort"); - /* covered by dict_sys->mutex */ - MONITOR_INC(MONITOR_BACKGROUND_DROP_INDEX); - /* fall through */ - case ONLINE_INDEX_ABORTED: - /* Drop the index tree from the - data dictionary and free it from - the tablespace, but keep the object - in the data dictionary cache. */ - row_merge_drop_index_dict(trx, index->id); - rw_lock_x_lock(dict_index_get_lock(index)); - dict_index_set_online_status( - index, ONLINE_INDEX_ABORTED_DROPPED); - rw_lock_x_unlock(dict_index_get_lock(index)); - table->drop_aborted = TRUE; - continue; - } - ut_error; - } - - return; - } - - row_merge_drop_indexes_dict(trx, table->id); - - /* Invalidate all row_prebuilt_t::ins_graph that are referring - to this table. That is, force row_get_prebuilt_insert_row() to - rebuild prebuilt->ins_node->entry_list). */ - ut_ad(table->def_trx_id <= trx->id); - table->def_trx_id = trx->id; - - next_index = dict_table_get_next_index(index); - - while ((index = next_index) != NULL) { - /* read the next pointer before freeing the index */ - next_index = dict_table_get_next_index(index); - - ut_ad(!dict_index_is_clust(index)); - - if (*index->name == TEMP_INDEX_PREFIX) { - /* If it is FTS index, drop from table->fts - and also drop its auxiliary tables */ - if (index->type & DICT_FTS) { - ut_a(table->fts); - fts_drop_index(table, index, trx); - } - - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_CREATION: - /* This state should only be possible - when prepare_inplace_alter_table() fails - after invoking row_merge_create_index(). - In inplace_alter_table(), - row_merge_build_indexes() - should never leave the index in this state. - It would invoke row_log_abort_sec() on - failure. */ - case ONLINE_INDEX_COMPLETE: - /* In these cases, we are able to drop - the index straight. The DROP INDEX was - never deferred. */ - break; - case ONLINE_INDEX_ABORTED: - case ONLINE_INDEX_ABORTED_DROPPED: - /* covered by dict_sys->mutex */ - MONITOR_DEC(MONITOR_BACKGROUND_DROP_INDEX); - } - - dict_index_remove_from_cache(table, index); - } - } - - table->drop_aborted = FALSE; - ut_d(dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE)); -} - -/*********************************************************************//** -Drop all partially created indexes during crash recovery. */ -UNIV_INTERN -void -row_merge_drop_temp_indexes(void) -/*=============================*/ -{ - static const char sql[] = - "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n" - "ixid CHAR;\n" - "found INT;\n" - - "DECLARE CURSOR index_cur IS\n" - " SELECT ID FROM SYS_INDEXES\n" - " WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n" - "FOR UPDATE;\n" - - "BEGIN\n" - "found := 1;\n" - "OPEN index_cur;\n" - "WHILE found = 1 LOOP\n" - " FETCH index_cur INTO ixid;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FIELDS WHERE INDEX_ID=ixid;\n" - " DELETE FROM SYS_INDEXES WHERE CURRENT OF index_cur;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE index_cur;\n" - "END;\n"; - trx_t* trx; - dberr_t error; - - /* Load the table definitions that contain partially defined - indexes, so that the data dictionary information can be checked - when accessing the tablename.ibd files. */ - trx = trx_allocate_for_background(); - trx->op_info = "dropping partially created indexes"; - row_mysql_lock_data_dictionary(trx); - /* Ensure that this transaction will be rolled back and locks - will be released, if the server gets killed before the commit - gets written to the redo log. */ - trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); - - trx->op_info = "dropping indexes"; - error = que_eval_sql(NULL, sql, FALSE, trx); - - if (error != DB_SUCCESS) { - /* Even though we ensure that DDL transactions are WAIT - and DEADLOCK free, we could encounter other errors e.g., - DB_TOO_MANY_CONCURRENT_TRXS. */ - trx->error_state = DB_SUCCESS; - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: row_merge_drop_temp_indexes " - "failed with error code: %u.\n", (unsigned) error); - } - - trx_commit_for_mysql(trx); - row_mysql_unlock_data_dictionary(trx); - trx_free_for_background(trx); -} - - -/** Create temporary merge files in the given paramater path, and if -UNIV_PFS_IO defined, register the file descriptor with Performance Schema. -@param[in] path location for creating temporary merge files. -@return File descriptor */ -UNIV_INTERN -int -row_merge_file_create_low( - const char* path) -{ - int fd; -#ifdef UNIV_PFS_IO - /* This temp file open does not go through normal - file APIs, add instrumentation to register with - performance schema */ - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - locker = PSI_FILE_CALL(get_thread_file_name_locker)( - &state, innodb_file_temp_key, PSI_FILE_OPEN, - "Innodb Merge Temp File", &locker); - if (locker != NULL) { - PSI_FILE_CALL(start_file_open_wait)(locker, - __FILE__, - __LINE__); - } -#endif - fd = innobase_mysql_tmpfile(path); -#ifdef UNIV_PFS_IO - if (locker != NULL) { - PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)( - locker, fd); - } -#endif - - if (fd < 0) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot create temporary merge file"); - return (-1); - } - return(fd); -} - - -/** Create a merge file in the given location. -@param[out] merge_file merge file structure -@param[in] path location for creating temporary file -@return file descriptor, or -1 on failure */ -UNIV_INTERN -int -row_merge_file_create( - merge_file_t* merge_file, - const char* path) -{ - merge_file->fd = row_merge_file_create_low(path); - merge_file->offset = 0; - merge_file->n_rec = 0; - - if (merge_file->fd >= 0) { - if (srv_disable_sort_file_cache) { - os_file_set_nocache(OS_FILE_FROM_FD(merge_file->fd), - "row0merge.cc", "sort"); - } - } - return(merge_file->fd); -} - -/*********************************************************************//** -Destroy a merge file. And de-register the file from Performance Schema -if UNIV_PFS_IO is defined. */ -UNIV_INTERN -void -row_merge_file_destroy_low( -/*=======================*/ - int fd) /*!< in: merge file descriptor */ -{ -#ifdef UNIV_PFS_IO - struct PSI_file_locker* locker = NULL; - PSI_file_locker_state state; - locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( - &state, fd, PSI_FILE_CLOSE); - if (locker != NULL) { - PSI_FILE_CALL(start_file_wait)( - locker, 0, __FILE__, __LINE__); - } -#endif - if (fd >= 0) { - close(fd); - } -#ifdef UNIV_PFS_IO - if (locker != NULL) { - PSI_FILE_CALL(end_file_wait)(locker, 0); - } -#endif -} -/*********************************************************************//** -Destroy a merge file. */ -UNIV_INTERN -void -row_merge_file_destroy( -/*===================*/ - merge_file_t* merge_file) /*!< in/out: merge file structure */ -{ - ut_ad(!srv_read_only_mode); - - if (merge_file->fd != -1) { - row_merge_file_destroy_low(merge_file->fd); - merge_file->fd = -1; - } -} - -/*********************************************************************//** -Rename an index in the dictionary that was created. The data -dictionary must have been locked exclusively by the caller, because -the transaction will not be committed. -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -row_merge_rename_index_to_add( -/*==========================*/ - trx_t* trx, /*!< in/out: transaction */ - table_id_t table_id, /*!< in: table identifier */ - index_id_t index_id) /*!< in: index identifier */ -{ - dberr_t err = DB_SUCCESS; - pars_info_t* info = pars_info_create(); - - /* We use the private SQL parser of Innobase to generate the - query graphs needed in renaming indexes. */ - - static const char rename_index[] = - "PROCEDURE RENAME_INDEX_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n" - "WHERE TABLE_ID = :tableid AND ID = :indexid;\n" - "END;\n"; - - ut_ad(trx); - ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); - - trx->op_info = "renaming index to add"; - - pars_info_add_ull_literal(info, "tableid", table_id); - pars_info_add_ull_literal(info, "indexid", index_id); - - err = que_eval_sql(info, rename_index, FALSE, trx); - - if (err != DB_SUCCESS) { - /* Even though we ensure that DDL transactions are WAIT - and DEADLOCK free, we could encounter other errors e.g., - DB_TOO_MANY_CONCURRENT_TRXS. */ - trx->error_state = DB_SUCCESS; - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: row_merge_rename_index_to_add " - "failed with error code: %u.\n", (unsigned) err); - } - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Rename an index in the dictionary that is to be dropped. The data -dictionary must have been locked exclusively by the caller, because -the transaction will not be committed. -@return DB_SUCCESS if all OK */ -UNIV_INTERN -dberr_t -row_merge_rename_index_to_drop( -/*===========================*/ - trx_t* trx, /*!< in/out: transaction */ - table_id_t table_id, /*!< in: table identifier */ - index_id_t index_id) /*!< in: index identifier */ -{ - dberr_t err; - pars_info_t* info = pars_info_create(); - - ut_ad(!srv_read_only_mode); - - /* We use the private SQL parser of Innobase to generate the - query graphs needed in renaming indexes. */ - - static const char rename_index[] = - "PROCEDURE RENAME_INDEX_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_INDEXES SET NAME=CONCAT('" - TEMP_INDEX_PREFIX_STR "',NAME)\n" - "WHERE TABLE_ID = :tableid AND ID = :indexid;\n" - "END;\n"; - - ut_ad(trx); - ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); - - trx->op_info = "renaming index to drop"; - - pars_info_add_ull_literal(info, "tableid", table_id); - pars_info_add_ull_literal(info, "indexid", index_id); - - err = que_eval_sql(info, rename_index, FALSE, trx); - - if (err != DB_SUCCESS) { - /* Even though we ensure that DDL transactions are WAIT - and DEADLOCK free, we could encounter other errors e.g., - DB_TOO_MANY_CONCURRENT_TRXS. */ - trx->error_state = DB_SUCCESS; - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: row_merge_rename_index_to_drop " - "failed with error code: %u.\n", (unsigned) err); - } - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Provide a new pathname for a table that is being renamed if it belongs to -a file-per-table tablespace. The caller is responsible for freeing the -memory allocated for the return value. -@return new pathname of tablespace file, or NULL if space = 0 */ -UNIV_INTERN -char* -row_make_new_pathname( -/*==================*/ - dict_table_t* table, /*!< in: table to be renamed */ - const char* new_name) /*!< in: new name */ -{ - char* new_path; - char* old_path; - - ut_ad(table->space != TRX_SYS_SPACE); - - old_path = fil_space_get_first_path(table->space); - ut_a(old_path); - - new_path = os_file_make_new_pathname(old_path, new_name); - - mem_free(old_path); - - return(new_path); -} - -/*********************************************************************//** -Rename the tables in the data dictionary. The data dictionary must -have been locked exclusively by the caller, because the transaction -will not be committed. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_merge_rename_tables_dict( -/*=========================*/ - dict_table_t* old_table, /*!< in/out: old table, renamed to - tmp_name */ - dict_table_t* new_table, /*!< in/out: new table, renamed to - old_table->name */ - const char* tmp_name, /*!< in: new name for old_table */ - trx_t* trx) /*!< in/out: dictionary transaction */ -{ - dberr_t err = DB_ERROR; - pars_info_t* info; - - ut_ad(!srv_read_only_mode); - ut_ad(old_table != new_table); - ut_ad(mutex_own(&dict_sys->mutex)); - ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE - || trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); - - trx->op_info = "renaming tables"; - - /* We use the private SQL parser of Innobase to generate the query - graphs needed in updating the dictionary data in system tables. */ - - info = pars_info_create(); - - pars_info_add_str_literal(info, "new_name", new_table->name); - pars_info_add_str_literal(info, "old_name", old_table->name); - pars_info_add_str_literal(info, "tmp_name", tmp_name); - - err = que_eval_sql(info, - "PROCEDURE RENAME_TABLES () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLES SET NAME = :tmp_name\n" - " WHERE NAME = :old_name;\n" - "UPDATE SYS_TABLES SET NAME = :old_name\n" - " WHERE NAME = :new_name;\n" - "END;\n", FALSE, trx); - - /* Update SYS_TABLESPACES and SYS_DATAFILES if the old - table is in a non-system tablespace where space > 0. */ - if (err == DB_SUCCESS - && old_table->space != TRX_SYS_SPACE - && fil_space_get(old_table->space) != NULL) { - /* Make pathname to update SYS_DATAFILES. */ - char* tmp_path = row_make_new_pathname(old_table, tmp_name); - - info = pars_info_create(); - - pars_info_add_str_literal(info, "tmp_name", tmp_name); - pars_info_add_str_literal(info, "tmp_path", tmp_path); - pars_info_add_int4_literal(info, "old_space", - (lint) old_table->space); - - err = que_eval_sql(info, - "PROCEDURE RENAME_OLD_SPACE () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLESPACES" - " SET NAME = :tmp_name\n" - " WHERE SPACE = :old_space;\n" - "UPDATE SYS_DATAFILES" - " SET PATH = :tmp_path\n" - " WHERE SPACE = :old_space;\n" - "END;\n", FALSE, trx); - - mem_free(tmp_path); - } - - /* Update SYS_TABLESPACES and SYS_DATAFILES if the new - table is in a non-system tablespace where space > 0. */ - if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) { - /* Make pathname to update SYS_DATAFILES. */ - char* old_path = row_make_new_pathname( - new_table, old_table->name); - - info = pars_info_create(); - - pars_info_add_str_literal(info, "old_name", old_table->name); - pars_info_add_str_literal(info, "old_path", old_path); - pars_info_add_int4_literal(info, "new_space", - (lint) new_table->space); - - err = que_eval_sql(info, - "PROCEDURE RENAME_NEW_SPACE () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLESPACES" - " SET NAME = :old_name\n" - " WHERE SPACE = :new_space;\n" - "UPDATE SYS_DATAFILES" - " SET PATH = :old_path\n" - " WHERE SPACE = :new_space;\n" - "END;\n", FALSE, trx); - - mem_free(old_path); - } - - if (err == DB_SUCCESS && dict_table_is_discarded(new_table)) { - err = row_import_update_discarded_flag( - trx, new_table->id, true, true); - } - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Create and execute a query graph for creating an index. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_merge_create_index_graph( -/*=========================*/ - trx_t* trx, /*!< in: trx */ - dict_table_t* table, /*!< in: table */ - dict_index_t* index) /*!< in: index */ -{ - ind_node_t* node; /*!< Index creation node */ - mem_heap_t* heap; /*!< Memory heap */ - que_thr_t* thr; /*!< Query thread */ - dberr_t err; - - ut_ad(trx); - ut_ad(table); - ut_ad(index); - - heap = mem_heap_create(512); - - index->table = table; - node = ind_create_graph_create(index, heap, false); - thr = pars_complete_graph_for_exec(node, trx, heap); - - ut_a(thr == que_fork_start_command( - static_cast<que_fork_t*>(que_node_get_parent(thr)))); - - que_run_threads(thr); - - err = trx->error_state; - - que_graph_free((que_t*) que_node_get_parent(thr)); - - return(err); -} - -/*********************************************************************//** -Create the index and load in to the dictionary. -@return index, or NULL on error */ -UNIV_INTERN -dict_index_t* -row_merge_create_index( -/*===================*/ - trx_t* trx, /*!< in/out: trx (sets error_state) */ - dict_table_t* table, /*!< in: the index is on this table */ - const index_def_t* index_def, - /*!< in: the index definition */ - const char** col_names) - /*! in: column names if columns are - renamed or NULL */ -{ - dict_index_t* index; - dberr_t err; - ulint n_fields = index_def->n_fields; - ulint i; - - ut_ad(!srv_read_only_mode); - - /* Create the index prototype, using the passed in def, this is not - a persistent operation. We pass 0 as the space id, and determine at - a lower level the space id where to store the table. */ - - index = dict_mem_index_create(table->name, index_def->name, - 0, index_def->ind_type, n_fields); - - ut_a(index); - - for (i = 0; i < n_fields; i++) { - index_field_t* ifield = &index_def->fields[i]; - const char * col_name; - - /* - Alter table renaming a column and then adding a index - to this new name e.g ALTER TABLE t - CHANGE COLUMN b c INT NOT NULL, ADD UNIQUE INDEX (c); - requires additional check as column names are not yet - changed when new index definitions are created. Table's - new column names are on a array of column name pointers - if any of the column names are changed. */ - - if (col_names && col_names[i]) { - col_name = col_names[i]; - } else { - col_name = ifield->col_name ? - dict_table_get_col_name_for_mysql(table, ifield->col_name) : - dict_table_get_col_name(table, ifield->col_no); - } - - dict_mem_index_add_field( - index, - col_name, - ifield->prefix_len); - } - - /* Add the index to SYS_INDEXES, using the index prototype. */ - err = row_merge_create_index_graph(trx, table, index); - - if (err == DB_SUCCESS) { - - index = dict_table_get_index_on_name(table, index_def->name); - - ut_a(index); - - /* Note the id of the transaction that created this - index, we use it to restrict readers from accessing - this index, to ensure read consistency. */ - ut_ad(index->trx_id == trx->id); - } else { - index = NULL; - } - - return(index); -} - -/*********************************************************************//** -Check if a transaction can use an index. */ -UNIV_INTERN -ibool -row_merge_is_index_usable( -/*======================*/ - const trx_t* trx, /*!< in: transaction */ - const dict_index_t* index) /*!< in: index to check */ -{ - if (!dict_index_is_clust(index) - && dict_index_is_online_ddl(index)) { - /* Indexes that are being created are not useable. */ - return(FALSE); - } - - return(!dict_index_is_corrupted(index) - && (dict_table_is_temporary(index->table) - || !trx->read_view - || read_view_sees_trx_id(trx->read_view, index->trx_id))); -} - -/*********************************************************************//** -Drop a table. The caller must have ensured that the background stats -thread is not processing the table. This can be done by calling -dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and -before calling this function. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_merge_drop_table( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* table) /*!< in: table to drop */ -{ - ut_ad(!srv_read_only_mode); - - /* There must be no open transactions on the table. */ - ut_a(table->n_ref_count == 0); - - return(row_drop_table_for_mysql(table->name, trx, false, false, false)); -} - -/*********************************************************************//** -Build indexes on a table by reading a clustered index, -creating a temporary file containing index entries, merge sorting -these index entries and inserting sorted index entries to indexes. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_merge_build_indexes( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - dict_table_t* old_table, /*!< in: table where rows are - read from */ - dict_table_t* new_table, /*!< in: table where indexes are - created; identical to old_table - unless creating a PRIMARY KEY */ - bool online, /*!< in: true if creating indexes - online */ - dict_index_t** indexes, /*!< in: indexes to be created */ - const ulint* key_numbers, /*!< in: MySQL key numbers */ - ulint n_indexes, /*!< in: size of indexes[] */ - struct TABLE* table, /*!< in/out: MySQL table, for - reporting erroneous key value - if applicable */ - const dtuple_t* add_cols, /*!< in: default values of - added columns, or NULL */ - const ulint* col_map, /*!< in: mapping of old column - numbers to new ones, or NULL - if old_table == new_table */ - ulint add_autoinc, /*!< in: number of added - AUTO_INCREMENT column, or - ULINT_UNDEFINED if none is added */ - ib_sequence_t& sequence) /*!< in: autoinc instance if - add_autoinc != ULINT_UNDEFINED */ -{ - merge_file_t* merge_files; - row_merge_block_t* block; - row_merge_block_t* crypt_block = NULL; - ulint block_size; - ulint i; - ulint j; - dberr_t error; - int tmpfd = -1; - dict_index_t* fts_sort_idx = NULL; - fts_psort_t* psort_info = NULL; - fts_psort_t* merge_info = NULL; - ib_int64_t sig_count = 0; - bool fts_psort_initiated = false; - fil_space_crypt_t * crypt_data = NULL; - - float total_static_cost = 0; - float total_dynamic_cost = 0; - uint total_index_blocks = 0; - float pct_cost=0; - float pct_progress=0; - - DBUG_ENTER("row_merge_build_indexes"); - - ut_ad(!srv_read_only_mode); - ut_ad((old_table == new_table) == !col_map); - ut_ad(!add_cols || col_map); - - /* Allocate memory for merge file data structure and initialize - fields */ - - block_size = 3 * srv_sort_buf_size; - block = static_cast<row_merge_block_t*>( - os_mem_alloc_large(&block_size)); - - if (block == NULL) { - DBUG_RETURN(DB_OUT_OF_MEMORY); - } - - /* Get crypt data from tablespace if present. We should be protected - from concurrent DDL (e.g. drop table) by MDL-locks. */ - fil_space_t* space = fil_space_acquire(new_table->space); - - if (space) { - crypt_data = space->crypt_data; - } else { - DBUG_RETURN(DB_TABLESPACE_NOT_FOUND); - } - - /* If tablespace is encrypted, allocate additional buffer for - encryption/decryption. */ - if (crypt_data && crypt_data->should_encrypt()) { - crypt_block = static_cast<row_merge_block_t*>( - os_mem_alloc_large(&block_size)); - - if (crypt_block == NULL) { - fil_space_release(space); - DBUG_RETURN(DB_OUT_OF_MEMORY); - } - } else { - /* Not needed */ - crypt_data = NULL; - } - - trx_start_if_not_started_xa(trx); - - merge_files = static_cast<merge_file_t*>( - mem_alloc(n_indexes * sizeof *merge_files)); - - /* Initialize all the merge file descriptors, so that we - don't call row_merge_file_destroy() on uninitialized - merge file descriptor */ - - for (i = 0; i < n_indexes; i++) { - merge_files[i].fd = -1; - merge_files[i].offset = 0; - } - - total_static_cost = COST_BUILD_INDEX_STATIC * n_indexes + COST_READ_CLUSTERED_INDEX; - total_dynamic_cost = COST_BUILD_INDEX_DYNAMIC * n_indexes; - - for (i = 0; i < n_indexes; i++) { - - if (indexes[i]->type & DICT_FTS) { - ibool opt_doc_id_size = FALSE; - - /* To build FTS index, we would need to extract - doc's word, Doc ID, and word's position, so - we need to build a "fts sort index" indexing - on above three 'fields' */ - fts_sort_idx = row_merge_create_fts_sort_index( - indexes[i], old_table, &opt_doc_id_size); - - row_merge_dup_t* dup = static_cast<row_merge_dup_t*>( - ut_malloc(sizeof *dup)); - dup->index = fts_sort_idx; - dup->table = table; - dup->col_map = col_map; - dup->n_dup = 0; - - row_fts_psort_info_init( - trx, dup, new_table, opt_doc_id_size, - &psort_info, &merge_info); - - /* "We need to ensure that we free the resources - allocated */ - fts_psort_initiated = true; - } - } - - /* Reset the MySQL row buffer that is used when reporting - duplicate keys. */ - innobase_rec_reset(table); - - if (global_system_variables.log_warnings > 2) { - sql_print_information("InnoDB: Online DDL : Start reading" - " clustered index of the table" - " and create temporary files"); - } - - pct_cost = COST_READ_CLUSTERED_INDEX * 100 / (total_static_cost + total_dynamic_cost); - - /* Do not continue if we can't encrypt table pages */ - if (!old_table->is_readable() || - !new_table->is_readable()) { - error = DB_DECRYPTION_FAILED; - ib_push_warning(trx->mysql_thd, DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - !old_table->is_readable() ? old_table->name : - new_table->name); - goto func_exit; - } - - /* Read clustered index of the table and create files for - secondary index entries for merge sort */ - - error = row_merge_read_clustered_index( - trx, table, old_table, new_table, online, indexes, - fts_sort_idx, psort_info, merge_files, key_numbers, - n_indexes, add_cols, col_map, - add_autoinc, sequence, block, &tmpfd, pct_cost, - crypt_data, crypt_block); - - pct_progress += pct_cost; - - if (global_system_variables.log_warnings > 2) { - sql_print_information("InnoDB: Online DDL : End of reading " - "clustered index of the table" - " and create temporary files"); - } - - for (i = 0; i < n_indexes; i++) { - total_index_blocks += merge_files[i].offset; - } - - if (error != DB_SUCCESS) { - goto func_exit; - } - - DEBUG_SYNC_C("row_merge_after_scan"); - - /* Now we have files containing index entries ready for - sorting and inserting. */ - - DBUG_EXECUTE_IF( - "ib_merge_wait_after_read", - os_thread_sleep(20000000);); /* 20 sec */ - - for (i = 0; i < n_indexes; i++) { - dict_index_t* sort_idx = indexes[i]; - - if (indexes[i]->type & DICT_FTS) { - os_event_t fts_parallel_merge_event; - - sort_idx = fts_sort_idx; - - fts_parallel_merge_event - = merge_info[0].psort_common->merge_event; - - if (FTS_PLL_MERGE) { - ulint trial_count = 0; - bool all_exit = false; - - os_event_reset(fts_parallel_merge_event); - row_fts_start_parallel_merge(merge_info); -wait_again: - os_event_wait_time_low( - fts_parallel_merge_event, 1000000, - sig_count); - - for (j = 0; j < FTS_NUM_AUX_INDEX; j++) { - if (merge_info[j].child_status - != FTS_CHILD_COMPLETE - && merge_info[j].child_status - != FTS_CHILD_EXITING) { - sig_count = os_event_reset( - fts_parallel_merge_event); - - goto wait_again; - } - } - - /* Now all children should complete, wait - a bit until they all finish using event */ - while (!all_exit && trial_count < 10000) { - all_exit = true; - - for (j = 0; j < FTS_NUM_AUX_INDEX; - j++) { - if (merge_info[j].child_status - != FTS_CHILD_EXITING) { - all_exit = false; - os_thread_sleep(1000); - break; - } - } - trial_count++; - } - - if (!all_exit) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Not all child merge threads" - " exited when creating FTS" - " index '%s'", - indexes[i]->name); - } else { - for (j = 0; j < FTS_NUM_AUX_INDEX; - j++) { - - os_thread_join(merge_info[j] - .thread_hdl); - } - } - } else { - /* This cannot report duplicates; an - assertion would fail in that case. */ - error = row_fts_merge_insert( - sort_idx, new_table, - psort_info, 0); - } - -#ifdef FTS_INTERNAL_DIAG_PRINT - DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n"); -#endif - } else if (merge_files[i].fd != -1) { - char buf[NAME_LEN + 1]; - row_merge_dup_t dup = { - sort_idx, table, col_map, 0}; - - pct_cost = (COST_BUILD_INDEX_STATIC + - (total_dynamic_cost * merge_files[i].offset / - total_index_blocks)) / - (total_static_cost + total_dynamic_cost) - * PCT_COST_MERGESORT_INDEX * 100; - char* bufend = innobase_convert_name( - buf, sizeof buf, - indexes[i]->name, - strlen(indexes[i]->name), - trx->mysql_thd, - FALSE); - buf[bufend - buf]='\0'; - - if (global_system_variables.log_warnings > 2) { - sql_print_information("InnoDB: Online DDL :" - " Start merge-sorting" - " index %s" - " (" ULINTPF - " / " ULINTPF ")," - " estimated cost :" - " %2.4f", - buf, i + 1, n_indexes, - pct_cost); - } - - error = row_merge_sort( - trx, &dup, &merge_files[i], - block, &tmpfd, true, - pct_progress, pct_cost, - crypt_data, crypt_block, new_table->space); - - pct_progress += pct_cost; - - if (global_system_variables.log_warnings > 2) { - sql_print_information("InnoDB: Online DDL :" - " End of " - " merge-sorting index %s" - " (" ULINTPF - " / " ULINTPF ")", - buf, i + 1, n_indexes); - } - - DBUG_EXECUTE_IF( - "ib_merge_wait_after_sort", - os_thread_sleep(20000000);); /* 20 sec */ - - if (error == DB_SUCCESS) { - pct_cost = (COST_BUILD_INDEX_STATIC + - (total_dynamic_cost * merge_files[i].offset / - total_index_blocks)) / - (total_static_cost + total_dynamic_cost) * - PCT_COST_INSERT_INDEX * 100; - - if (global_system_variables.log_warnings > 2) { - sql_print_information( - "InnoDB: Online DDL : Start " - "building index %s" - " (" ULINTPF - " / " ULINTPF "), estimated " - "cost : %2.4f", buf, i + 1, - n_indexes, pct_cost); - } - - error = row_merge_insert_index_tuples( - trx->id, sort_idx, old_table, - merge_files[i].fd, block, - merge_files[i].n_rec, pct_progress, pct_cost, - crypt_data, crypt_block, new_table->space); - pct_progress += pct_cost; - - if (global_system_variables.log_warnings > 2) { - sql_print_information( - "InnoDB: Online DDL : " - "End of building index %s" - " (" ULINTPF " / " ULINTPF ")", - buf, i + 1, n_indexes); - } - } - } - - /* Close the temporary file to free up space. */ - row_merge_file_destroy(&merge_files[i]); - - if (indexes[i]->type & DICT_FTS) { - row_fts_psort_info_destroy(psort_info, merge_info); - fts_psort_initiated = false; - } else if (error != DB_SUCCESS || !online) { - /* Do not apply any online log. */ - } else if (old_table != new_table) { - ut_ad(!sort_idx->online_log); - ut_ad(sort_idx->online_status - == ONLINE_INDEX_COMPLETE); - } else { - if (global_system_variables.log_warnings > 2) { - sql_print_information( - "InnoDB: Online DDL : Applying" - " log to index"); - } - DEBUG_SYNC_C("row_log_apply_before"); - error = row_log_apply(trx, sort_idx, table); - DEBUG_SYNC_C("row_log_apply_after"); - } - - if (error != DB_SUCCESS) { - trx->error_key_num = key_numbers[i]; - goto func_exit; - } - - if (indexes[i]->type & DICT_FTS && fts_enable_diag_print) { - char* name = (char*) indexes[i]->name; - - if (*name == TEMP_INDEX_PREFIX) { - name++; - } - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Finished building " - "full-text index %s\n", name); - } - } - -func_exit: - DBUG_EXECUTE_IF( - "ib_build_indexes_too_many_concurrent_trxs", - error = DB_TOO_MANY_CONCURRENT_TRXS; - trx->error_state = error;); - - if (fts_psort_initiated) { - /* Clean up FTS psort related resource */ - row_fts_psort_info_destroy(psort_info, merge_info); - fts_psort_initiated = false; - } - - row_merge_file_destroy_low(tmpfd); - - for (i = 0; i < n_indexes; i++) { - row_merge_file_destroy(&merge_files[i]); - } - - if (fts_sort_idx) { - dict_mem_index_free(fts_sort_idx); - } - - mem_free(merge_files); - os_mem_free_large(block, block_size); - - if (crypt_block) { - os_mem_free_large(crypt_block, block_size); - } - - DICT_TF2_FLAG_UNSET(new_table, DICT_TF2_FTS_ADD_DOC_ID); - - if (online && old_table == new_table && error != DB_SUCCESS) { - /* On error, flag all online secondary index creation - as aborted. */ - for (i = 0; i < n_indexes; i++) { - ut_ad(!(indexes[i]->type & DICT_FTS)); - ut_ad(*indexes[i]->name == TEMP_INDEX_PREFIX); - ut_ad(!dict_index_is_clust(indexes[i])); - - /* Completed indexes should be dropped as - well, and indexes whose creation was aborted - should be dropped from the persistent - storage. However, at this point we can only - set some flags in the not-yet-published - indexes. These indexes will be dropped later - in row_merge_drop_indexes(), called by - rollback_inplace_alter_table(). */ - - switch (dict_index_get_online_status(indexes[i])) { - case ONLINE_INDEX_COMPLETE: - break; - case ONLINE_INDEX_CREATION: - rw_lock_x_lock( - dict_index_get_lock(indexes[i])); - row_log_abort_sec(indexes[i]); - indexes[i]->type |= DICT_CORRUPT; - rw_lock_x_unlock( - dict_index_get_lock(indexes[i])); - new_table->drop_aborted = TRUE; - /* fall through */ - case ONLINE_INDEX_ABORTED_DROPPED: - case ONLINE_INDEX_ABORTED: - MONITOR_MUTEX_INC( - &dict_sys->mutex, - MONITOR_BACKGROUND_DROP_INDEX); - } - } - } - - if (space) { - fil_space_release(space); - } - - DBUG_RETURN(error); -} diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc deleted file mode 100644 index 0079fc79a0e..00000000000 --- a/storage/xtradb/row/row0mysql.cc +++ /dev/null @@ -1,5687 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0mysql.cc -Interface between Innobase row operations and MySQL. -Contains also create table and other data dictionary operations. - -Created 9/17/2000 Heikki Tuuri -*******************************************************/ - -#include "row0mysql.h" - -#ifdef UNIV_NONINL -#include "row0mysql.ic" -#endif - -#include "ha_prototypes.h" - -#include <sql_const.h> -#include "row0ins.h" -#include "row0merge.h" -#include "row0sel.h" -#include "row0upd.h" -#include "row0row.h" -#include "que0que.h" -#include "pars0pars.h" -#include "dict0dict.h" -#include "dict0crea.h" -#include "dict0load.h" -#include "dict0priv.h" -#include "dict0boot.h" -#include "dict0stats.h" -#include "dict0stats_bg.h" -#include "trx0roll.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "trx0undo.h" -#include "lock0lock.h" -#include "rem0cmp.h" -#include "log0log.h" -#include "btr0sea.h" -#include "btr0defragment.h" -#include "fil0fil.h" -#include "fil0crypt.h" -#include "ibuf0ibuf.h" -#include "fts0fts.h" -#include "fts0types.h" -#include "srv0start.h" -#include "row0import.h" -#include "m_string.h" -#include "my_sys.h" -#include "zlib.h" -#include <algorithm> - -/** Provide optional 4.x backwards compatibility for 5.0 and above */ -UNIV_INTERN ibool row_rollback_on_timeout = FALSE; - -/** Chain node of the list of tables to drop in the background. */ -struct row_mysql_drop_t{ - char* table_name; /*!< table name */ - UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list; - /*!< list chain node */ -}; - -#ifdef UNIV_PFS_MUTEX -/* Key to register drop list mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t row_drop_list_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/** @brief List of tables we should drop in background. - -ALTER TABLE in MySQL requires that the table handler can drop the -table in background when there are no queries to it any -more. Protected by row_drop_list_mutex. */ -static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list; - -/** Mutex protecting the background table drop list. */ -static ib_mutex_t row_drop_list_mutex; - -/** Flag: has row_mysql_drop_list been initialized? */ -static ibool row_mysql_drop_list_inited = FALSE; - -/** Magic table names for invoking various monitor threads */ -/* @{ */ -static const char S_innodb_monitor[] = "innodb_monitor"; -static const char S_innodb_lock_monitor[] = "innodb_lock_monitor"; -static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor"; -static const char S_innodb_table_monitor[] = "innodb_table_monitor"; -#ifdef UNIV_MEM_DEBUG -static const char S_innodb_mem_validate[] = "innodb_mem_validate"; -#endif /* UNIV_MEM_DEBUG */ -/* @} */ - -/** Evaluates to true if str1 equals str2_onstack, used for comparing -the magic table names. -@param str1 in: string to compare -@param str1_len in: length of str1, in bytes, including terminating NUL -@param str2_onstack in: char[] array containing a NUL terminated string -@return TRUE if str1 equals str2_onstack */ -#define STR_EQ(str1, str1_len, str2_onstack) \ - ((str1_len) == sizeof(str2_onstack) \ - && memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0) - -/*******************************************************************//** -Determine if the given name is a name reserved for MySQL system tables. -@return TRUE if name is a MySQL system table name */ -static -ibool -row_mysql_is_system_table( -/*======================*/ - const char* name) -{ - if (strncmp(name, "mysql/", 6) != 0) { - - return(FALSE); - } - - return(0 == strcmp(name + 6, "host") - || 0 == strcmp(name + 6, "user") - || 0 == strcmp(name + 6, "db")); -} - -/*********************************************************************//** -If a table is not yet in the drop list, adds the table to the list of tables -which the master thread drops in background. We need this on Unix because in -ALTER TABLE MySQL may call drop table even if the table has running queries on -it. Also, if there are running foreign key checks on the table, we drop the -table lazily. -@return TRUE if the table was not yet in the drop list, and was added there */ -static -ibool -row_add_table_to_background_drop_list( -/*==================================*/ - const char* name); /*!< in: table name */ - -/*******************************************************************//** -Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */ -static -void -row_mysql_delay_if_needed(void) -/*===========================*/ -{ - if (srv_dml_needed_delay) { - os_thread_sleep(srv_dml_needed_delay); - } -} - -/*******************************************************************//** -Frees the blob heap in prebuilt when no longer needed. */ -UNIV_INTERN -void -row_mysql_prebuilt_free_blob_heap( -/*==============================*/ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a - ha_innobase:: table handle */ -{ - mem_heap_free(prebuilt->blob_heap); - prebuilt->blob_heap = NULL; -} - -/*******************************************************************//** -Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row -format. -@return pointer to the data, we skip the 1 or 2 bytes at the start -that are used to store the len */ -UNIV_INTERN -byte* -row_mysql_store_true_var_len( -/*=========================*/ - byte* dest, /*!< in: where to store */ - ulint len, /*!< in: length, must fit in two bytes */ - ulint lenlen) /*!< in: storage length of len: either 1 or 2 bytes */ -{ - if (lenlen == 2) { - ut_a(len < 256 * 256); - - mach_write_to_2_little_endian(dest, len); - - return(dest + 2); - } - - ut_a(lenlen == 1); - ut_a(len < 256); - - mach_write_to_1(dest, len); - - return(dest + 1); -} - -/*******************************************************************//** -Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and -returns a pointer to the data. -@return pointer to the data, we skip the 1 or 2 bytes at the start -that are used to store the len */ -UNIV_INTERN -const byte* -row_mysql_read_true_varchar( -/*========================*/ - ulint* len, /*!< out: variable-length field length */ - const byte* field, /*!< in: field in the MySQL format */ - ulint lenlen) /*!< in: storage length of len: either 1 - or 2 bytes */ -{ - if (lenlen == 2) { - *len = mach_read_from_2_little_endian(field); - - return(field + 2); - } - - ut_a(lenlen == 1); - - *len = mach_read_from_1(field); - - return(field + 1); -} - -/*******************************************************************//** -Stores a reference to a BLOB in the MySQL format. */ -UNIV_INTERN -void -row_mysql_store_blob_ref( -/*=====================*/ - byte* dest, /*!< in: where to store */ - ulint col_len,/*!< in: dest buffer size: determines into - how many bytes the BLOB length is stored, - the space for the length may vary from 1 - to 4 bytes */ - const void* data, /*!< in: BLOB data; if the value to store - is SQL NULL this should be NULL pointer */ - ulint len) /*!< in: BLOB length; if the value to store - is SQL NULL this should be 0; remember - also to set the NULL bit in the MySQL record - header! */ -{ - /* MySQL might assume the field is set to zero except the length and - the pointer fields */ - - memset(dest, '\0', col_len); - - /* In dest there are 1 - 4 bytes reserved for the BLOB length, - and after that 8 bytes reserved for the pointer to the data. - In 32-bit architectures we only use the first 4 bytes of the pointer - slot. */ - - ut_a(col_len - 8 > 1 || len < 256); - ut_a(col_len - 8 > 2 || len < 256 * 256); - ut_a(col_len - 8 > 3 || len < 256 * 256 * 256); - - memcpy(dest + col_len - 8, &data, sizeof data); - mach_write_to_n_little_endian(dest, col_len - 8, len); -} - -/*******************************************************************//** -Reads a reference to a BLOB in the MySQL format. -@return pointer to BLOB data */ -UNIV_INTERN -const byte* -row_mysql_read_blob_ref( -/*====================*/ - ulint* len, /*!< out: BLOB length */ - const byte* ref, /*!< in: BLOB reference in the - MySQL format */ - ulint col_len) /*!< in: BLOB reference length - (not BLOB length) */ -{ - byte* data = NULL; - - *len = mach_read_from_n_little_endian(ref, col_len - 8); - - memcpy(&data, ref + col_len - 8, sizeof data); - - return(data); -} - -/**************************************************************//** -Pad a column with spaces. */ -UNIV_INTERN -void -row_mysql_pad_col( -/*==============*/ - ulint mbminlen, /*!< in: minimum size of a character, - in bytes */ - byte* pad, /*!< out: padded buffer */ - ulint len) /*!< in: number of bytes to pad */ -{ - const byte* pad_end; - - switch (UNIV_EXPECT(mbminlen, 1)) { - default: - ut_error; - case 1: - /* space=0x20 */ - memset(pad, 0x20, len); - break; - case 2: - /* space=0x0020 */ - pad_end = pad + len; - ut_a(!(len % 2)); - while (pad < pad_end) { - *pad++ = 0x00; - *pad++ = 0x20; - }; - break; - case 4: - /* space=0x00000020 */ - pad_end = pad + len; - ut_a(!(len % 4)); - while (pad < pad_end) { - *pad++ = 0x00; - *pad++ = 0x00; - *pad++ = 0x00; - *pad++ = 0x20; - } - break; - } -} - -/**************************************************************//** -Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format. -The counterpart of this function is row_sel_field_store_in_mysql_format() in -row0sel.cc. -@return up to which byte we used buf in the conversion */ -UNIV_INTERN -byte* -row_mysql_store_col_in_innobase_format( -/*===================================*/ - dfield_t* dfield, /*!< in/out: dfield where dtype - information must be already set when - this function is called! */ - byte* buf, /*!< in/out: buffer for a converted - integer value; this must be at least - col_len long then! NOTE that dfield - may also get a pointer to 'buf', - therefore do not discard this as long - as dfield is used! */ - ibool row_format_col, /*!< TRUE if the mysql_data is from - a MySQL row, FALSE if from a MySQL - key value; - in MySQL, a true VARCHAR storage - format differs in a row and in a - key value: in a key value the length - is always stored in 2 bytes! */ - const byte* mysql_data, /*!< in: MySQL column value, not - SQL NULL; NOTE that dfield may also - get a pointer to mysql_data, - therefore do not discard this as long - as dfield is used! */ - ulint col_len, /*!< in: MySQL column length; NOTE that - this is the storage length of the - column in the MySQL format row, not - necessarily the length of the actual - payload data; if the column is a true - VARCHAR then this is irrelevant */ - ulint comp) /*!< in: nonzero=compact format */ -{ - const byte* ptr = mysql_data; - const dtype_t* dtype; - ulint type; - ulint lenlen; - - dtype = dfield_get_type(dfield); - - type = dtype->mtype; - - if (type == DATA_INT) { - /* Store integer data in Innobase in a big-endian format, - sign bit negated if the data is a signed integer. In MySQL, - integers are stored in a little-endian format. */ - - byte* p = buf + col_len; - - for (;;) { - p--; - *p = *mysql_data; - if (p == buf) { - break; - } - mysql_data++; - } - - if (!(dtype->prtype & DATA_UNSIGNED)) { - - *buf ^= 128; - } - - ptr = buf; - buf += col_len; - } else if ((type == DATA_VARCHAR - || type == DATA_VARMYSQL - || type == DATA_BINARY)) { - - if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) { - /* The length of the actual data is stored to 1 or 2 - bytes at the start of the field */ - - if (row_format_col) { - if (dtype->prtype & DATA_LONG_TRUE_VARCHAR) { - lenlen = 2; - } else { - lenlen = 1; - } - } else { - /* In a MySQL key value, lenlen is always 2 */ - lenlen = 2; - } - - ptr = row_mysql_read_true_varchar( - &col_len, mysql_data, lenlen); - } else { - /* Remove trailing spaces from old style VARCHAR - columns. */ - - /* Handle Unicode strings differently. */ - ulint mbminlen = dtype_get_mbminlen(dtype); - - ptr = mysql_data; - - switch (mbminlen) { - default: - ut_error; - case 4: - /* space=0x00000020 */ - /* Trim "half-chars", just in case. */ - col_len &= ~3; - - while (col_len >= 4 - && ptr[col_len - 4] == 0x00 - && ptr[col_len - 3] == 0x00 - && ptr[col_len - 2] == 0x00 - && ptr[col_len - 1] == 0x20) { - col_len -= 4; - } - break; - case 2: - /* space=0x0020 */ - /* Trim "half-chars", just in case. */ - col_len &= ~1; - - while (col_len >= 2 && ptr[col_len - 2] == 0x00 - && ptr[col_len - 1] == 0x20) { - col_len -= 2; - } - break; - case 1: - /* space=0x20 */ - while (col_len > 0 - && ptr[col_len - 1] == 0x20) { - col_len--; - } - } - } - } else if (comp && type == DATA_MYSQL - && dtype_get_mbminlen(dtype) == 1 - && dtype_get_mbmaxlen(dtype) > 1) { - /* In some cases we strip trailing spaces from UTF-8 and other - multibyte charsets, from FIXED-length CHAR columns, to save - space. UTF-8 would otherwise normally use 3 * the string length - bytes to store an ASCII string! */ - - /* We assume that this CHAR field is encoded in a - variable-length character set where spaces have - 1:1 correspondence to 0x20 bytes, such as UTF-8. - - Consider a CHAR(n) field, a field of n characters. - It will contain between n * mbminlen and n * mbmaxlen bytes. - We will try to truncate it to n bytes by stripping - space padding. If the field contains single-byte - characters only, it will be truncated to n characters. - Consider a CHAR(5) field containing the string ".a " - where "." denotes a 3-byte character represented by - the bytes "$%&". After our stripping, the string will - be stored as "$%&a " (5 bytes). The string ".abc " - will be stored as "$%&abc" (6 bytes). - - The space padding will be restored in row0sel.cc, function - row_sel_field_store_in_mysql_format(). */ - - ulint n_chars; - - ut_a(!(dtype_get_len(dtype) % dtype_get_mbmaxlen(dtype))); - - n_chars = dtype_get_len(dtype) / dtype_get_mbmaxlen(dtype); - - /* Strip space padding. */ - while (col_len > n_chars && ptr[col_len - 1] == 0x20) { - col_len--; - } - } else if (type == DATA_BLOB && row_format_col) { - - ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); - } - - dfield_set_data(dfield, ptr, col_len); - - return(buf); -} - -/**************************************************************//** -Convert a row in the MySQL format to a row in the Innobase format. Note that -the function to convert a MySQL format key value to an InnoDB dtuple is -row_sel_convert_mysql_key_to_innobase() in row0sel.cc. */ -static -void -row_mysql_convert_row_to_innobase( -/*==============================*/ - dtuple_t* row, /*!< in/out: Innobase row where the - field type information is already - copied there! */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct where template - must be of type ROW_MYSQL_WHOLE_ROW */ - byte* mysql_rec) /*!< in: row in the MySQL format; - NOTE: do not discard as long as - row is used, as row may contain - pointers to this record! */ -{ - const mysql_row_templ_t*templ; - dfield_t* dfield; - ulint i; - - ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); - ut_ad(prebuilt->mysql_template); - - for (i = 0; i < prebuilt->n_template; i++) { - - templ = prebuilt->mysql_template + i; - dfield = dtuple_get_nth_field(row, i); - - if (templ->mysql_null_bit_mask != 0) { - /* Column may be SQL NULL */ - - if (mysql_rec[templ->mysql_null_byte_offset] - & (byte) (templ->mysql_null_bit_mask)) { - - /* It is SQL NULL */ - - dfield_set_null(dfield); - - goto next_column; - } - } - - row_mysql_store_col_in_innobase_format( - dfield, - prebuilt->ins_upd_rec_buff + templ->mysql_col_offset, - TRUE, /* MySQL row format data */ - mysql_rec + templ->mysql_col_offset, - templ->mysql_col_len, - dict_table_is_comp(prebuilt->table)); -next_column: - ; - } - - /* If there is a FTS doc id column and it is not user supplied ( - generated by server) then assign it a new doc id. */ - if (prebuilt->table->fts) { - - ut_a(prebuilt->table->fts->doc_col != ULINT_UNDEFINED); - - fts_create_doc_id(prebuilt->table, row, prebuilt->heap); - } -} - -/****************************************************************//** -Handles user errors and lock waits detected by the database engine. -@return true if it was a lock wait and we should continue running the -query thread and in that case the thr is ALREADY in the running state. */ -UNIV_INTERN -bool -row_mysql_handle_errors( -/*====================*/ - dberr_t* new_err,/*!< out: possible new error encountered in - lock wait, or if no new error, the value - of trx->error_state at the entry of this - function */ - trx_t* trx, /*!< in: transaction */ - que_thr_t* thr, /*!< in: query thread, or NULL */ - trx_savept_t* savept) /*!< in: savepoint, or NULL */ -{ - dberr_t err; - -handle_new_error: - err = trx->error_state; - - ut_a(err != DB_SUCCESS); - - trx->error_state = DB_SUCCESS; - - switch (err) { - case DB_LOCK_WAIT_TIMEOUT: - if (row_rollback_on_timeout) { - trx_rollback_to_savepoint(trx, NULL); - break; - } - /* fall through */ - case DB_DUPLICATE_KEY: - case DB_FOREIGN_DUPLICATE_KEY: - case DB_TOO_BIG_RECORD: - case DB_TOO_BIG_FOR_REDO: - case DB_UNDO_RECORD_TOO_BIG: - case DB_ROW_IS_REFERENCED: - case DB_NO_REFERENCED_ROW: - case DB_CANNOT_ADD_CONSTRAINT: - case DB_TOO_MANY_CONCURRENT_TRXS: - case DB_OUT_OF_FILE_SPACE: - case DB_READ_ONLY: - case DB_FTS_INVALID_DOCID: - case DB_INTERRUPTED: - case DB_DICT_CHANGED: - case DB_TABLE_NOT_FOUND: - case DB_DECRYPTION_FAILED: - if (savept) { - /* Roll back the latest, possibly incomplete insertion - or update */ - - trx_rollback_to_savepoint(trx, savept); - } - /* MySQL will roll back the latest SQL statement */ - break; - case DB_LOCK_WAIT: - lock_wait_suspend_thread(thr); - - if (trx->error_state != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - goto handle_new_error; - } - - *new_err = err; - - return(true); - - case DB_DEADLOCK: - case DB_LOCK_TABLE_FULL: - /* Roll back the whole transaction; this resolution was added - to version 3.23.43 */ - - trx_rollback_to_savepoint(trx, NULL); - break; - - case DB_MUST_GET_MORE_FILE_SPACE: - fputs("InnoDB: The database cannot continue" - " operation because of\n" - "InnoDB: lack of space. You must add" - " a new data file to\n" - "InnoDB: my.cnf and restart the database.\n", stderr); - abort(); - - case DB_CORRUPTION: - case DB_PAGE_CORRUPTED: - fputs("InnoDB: We detected index corruption" - " in an InnoDB type table.\n" - "InnoDB: You have to dump + drop + reimport" - " the table or, in\n" - "InnoDB: a case of widespread corruption," - " dump all InnoDB\n" - "InnoDB: tables and recreate the" - " whole InnoDB tablespace.\n" - "InnoDB: If the mysqld server crashes" - " after the startup or when\n" - "InnoDB: you dump the tables, look at\n" - "InnoDB: " REFMAN "forcing-innodb-recovery.html" - " for help.\n", stderr); - break; - case DB_FOREIGN_EXCEED_MAX_CASCADE: - fprintf(stderr, "InnoDB: Cannot delete/update rows with" - " cascading foreign key constraints that exceed max" - " depth of %lu\n" - "Please drop excessive foreign constraints" - " and try again\n", (ulong) DICT_FK_MAX_RECURSIVE_LOAD); - break; - default: - fprintf(stderr, "InnoDB: unknown error code %lu\n", - (ulong) err); - ut_error; - } - - if (trx->error_state != DB_SUCCESS) { - *new_err = trx->error_state; - } else { - *new_err = err; - } - - trx->error_state = DB_SUCCESS; - - return(false); -} - -/********************************************************************//** -Create a prebuilt struct for a MySQL table handle. -@return own: a prebuilt struct */ -UNIV_INTERN -row_prebuilt_t* -row_create_prebuilt( -/*================*/ - dict_table_t* table, /*!< in: Innobase table handle */ - ulint mysql_row_len) /*!< in: length in bytes of a row in - the MySQL format */ -{ - row_prebuilt_t* prebuilt; - mem_heap_t* heap; - dict_index_t* clust_index; - dict_index_t* temp_index; - dtuple_t* ref; - ulint ref_len; - uint srch_key_len = 0; - ulint search_tuple_n_fields; - - search_tuple_n_fields = 2 * dict_table_get_n_cols(table); - - clust_index = dict_table_get_first_index(table); - - /* Make sure that search_tuple is long enough for clustered index */ - ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields); - - ref_len = dict_index_get_n_unique(clust_index); - - - /* Maximum size of the buffer needed for conversion of INTs from - little endian format to big endian format in an index. An index - can have maximum 16 columns (MAX_REF_PARTS) in it. Therfore - Max size for PK: 16 * 8 bytes (BIGINT's size) = 128 bytes - Max size Secondary index: 16 * 8 bytes + PK = 256 bytes. */ -#define MAX_SRCH_KEY_VAL_BUFFER 2* (8 * MAX_REF_PARTS) - -#define PREBUILT_HEAP_INITIAL_SIZE \ - ( \ - sizeof(*prebuilt) \ - /* allocd in this function */ \ - + DTUPLE_EST_ALLOC(search_tuple_n_fields) \ - + DTUPLE_EST_ALLOC(ref_len) \ - /* allocd in row_prebuild_sel_graph() */ \ - + sizeof(sel_node_t) \ - + sizeof(que_fork_t) \ - + sizeof(que_thr_t) \ - /* allocd in row_get_prebuilt_update_vector() */ \ - + sizeof(upd_node_t) \ - + sizeof(upd_t) \ - + sizeof(upd_field_t) \ - * dict_table_get_n_cols(table) \ - + sizeof(que_fork_t) \ - + sizeof(que_thr_t) \ - /* allocd in row_get_prebuilt_insert_row() */ \ - + sizeof(ins_node_t) \ - /* mysql_row_len could be huge and we are not \ - sure if this prebuilt instance is going to be \ - used in inserts */ \ - + (mysql_row_len < 256 ? mysql_row_len : 0) \ - + DTUPLE_EST_ALLOC(dict_table_get_n_cols(table)) \ - + sizeof(que_fork_t) \ - + sizeof(que_thr_t) \ - ) - - /* Calculate size of key buffer used to store search key in - InnoDB format. MySQL stores INTs in little endian format and - InnoDB stores INTs in big endian format with the sign bit - flipped. All other field types are stored/compared the same - in MySQL and InnoDB, so we must create a buffer containing - the INT key parts in InnoDB format.We need two such buffers - since both start and end keys are used in records_in_range(). */ - - for (temp_index = dict_table_get_first_index(table); temp_index; - temp_index = dict_table_get_next_index(temp_index)) { - DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value", - ut_a(temp_index->n_user_defined_cols - == MAX_REF_PARTS);); - uint temp_len = 0; - for (uint i = 0; i < temp_index->n_uniq; i++) { - if (temp_index->fields[i].col->mtype == DATA_INT) { - temp_len += - temp_index->fields[i].fixed_len; - } - } - srch_key_len = max(srch_key_len,temp_len); - } - - ut_a(srch_key_len <= MAX_SRCH_KEY_VAL_BUFFER); - - DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value", - ut_a(srch_key_len == MAX_SRCH_KEY_VAL_BUFFER);); - - /* We allocate enough space for the objects that are likely to - be created later in order to minimize the number of malloc() - calls */ - heap = mem_heap_create(PREBUILT_HEAP_INITIAL_SIZE + 2 * srch_key_len); - - prebuilt = static_cast<row_prebuilt_t*>( - mem_heap_zalloc(heap, sizeof(*prebuilt))); - - prebuilt->magic_n = ROW_PREBUILT_ALLOCATED; - prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED; - - prebuilt->table = table; - - prebuilt->sql_stat_start = TRUE; - prebuilt->heap = heap; - - prebuilt->srch_key_val_len = srch_key_len; - if (prebuilt->srch_key_val_len) { - prebuilt->srch_key_val1 = static_cast<byte*>( - mem_heap_alloc(prebuilt->heap, - 2 * prebuilt->srch_key_val_len)); - prebuilt->srch_key_val2 = prebuilt->srch_key_val1 + - prebuilt->srch_key_val_len; - } else { - prebuilt->srch_key_val1 = NULL; - prebuilt->srch_key_val2 = NULL; - } - - btr_pcur_reset(&prebuilt->pcur); - btr_pcur_reset(&prebuilt->clust_pcur); - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE_UNSET; - - prebuilt->search_tuple = dtuple_create(heap, search_tuple_n_fields); - - ref = dtuple_create(heap, ref_len); - - dict_index_copy_types(ref, clust_index, ref_len); - - prebuilt->clust_ref = ref; - - prebuilt->autoinc_error = DB_SUCCESS; - prebuilt->autoinc_offset = 0; - - /* Default to 1, we will set the actual value later in - ha_innobase::get_auto_increment(). */ - prebuilt->autoinc_increment = 1; - - prebuilt->autoinc_last_value = 0; - - /* During UPDATE and DELETE we need the doc id. */ - prebuilt->fts_doc_id = 0; - - prebuilt->mysql_row_len = mysql_row_len; - - return(prebuilt); -} - -/********************************************************************//** -Free a prebuilt struct for a MySQL table handle. */ -UNIV_INTERN -void -row_prebuilt_free( -/*==============*/ - row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */ - ibool dict_locked) /*!< in: TRUE=data dictionary locked */ -{ - ulint i; - - if (UNIV_UNLIKELY - (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED - || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED)) { - - fprintf(stderr, - "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu," - " magic n2 %lu, table name ", - (ulong) prebuilt->magic_n, - (ulong) prebuilt->magic_n2); - ut_print_name(stderr, NULL, TRUE, prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - - prebuilt->magic_n = ROW_PREBUILT_FREED; - prebuilt->magic_n2 = ROW_PREBUILT_FREED; - - btr_pcur_reset(&prebuilt->pcur); - btr_pcur_reset(&prebuilt->clust_pcur); - - if (prebuilt->mysql_template) { - mem_free(prebuilt->mysql_template); - } - - if (prebuilt->ins_graph) { - que_graph_free_recursive(prebuilt->ins_graph); - } - - if (prebuilt->sel_graph) { - que_graph_free_recursive(prebuilt->sel_graph); - } - - if (prebuilt->upd_graph) { - que_graph_free_recursive(prebuilt->upd_graph); - } - - if (prebuilt->blob_heap) { - mem_heap_free(prebuilt->blob_heap); - } - - if (prebuilt->old_vers_heap) { - mem_heap_free(prebuilt->old_vers_heap); - } - - if (prebuilt->fetch_cache[0] != NULL) { - byte* base = prebuilt->fetch_cache[0] - 4; - byte* ptr = base; - - for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) { - byte* row; - ulint magic1; - ulint magic2; - - magic1 = mach_read_from_4(ptr); - ptr += 4; - - row = ptr; - ptr += prebuilt->mysql_row_len; - - magic2 = mach_read_from_4(ptr); - ptr += 4; - - if (ROW_PREBUILT_FETCH_MAGIC_N != magic1 - || row != prebuilt->fetch_cache[i] - || ROW_PREBUILT_FETCH_MAGIC_N != magic2) { - - fputs("InnoDB: Error: trying to free" - " a corrupt fetch buffer.\n", stderr); - - mem_analyze_corruption(base); - ut_error; - } - } - - mem_free(base); - } - - dict_table_close(prebuilt->table, dict_locked, TRUE); - - mem_heap_free(prebuilt->heap); -} - -/*********************************************************************//** -Updates the transaction pointers in query graphs stored in the prebuilt -struct. */ -UNIV_INTERN -void -row_update_prebuilt_trx( -/*====================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct - in MySQL handle */ - trx_t* trx) /*!< in: transaction handle */ -{ - if (trx->magic_n != TRX_MAGIC_N) { - fprintf(stderr, - "InnoDB: Error: trying to use a corrupt\n" - "InnoDB: trx handle. Magic n %lu\n", - (ulong) trx->magic_n); - - mem_analyze_corruption(trx); - - ut_error; - } - - if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { - fprintf(stderr, - "InnoDB: Error: trying to use a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name ", - (ulong) prebuilt->magic_n); - ut_print_name(stderr, trx, TRUE, prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - - prebuilt->trx = trx; - - if (prebuilt->ins_graph) { - prebuilt->ins_graph->trx = trx; - } - - if (prebuilt->upd_graph) { - prebuilt->upd_graph->trx = trx; - } - - if (prebuilt->sel_graph) { - prebuilt->sel_graph->trx = trx; - } -} - -/*********************************************************************//** -Gets pointer to a prebuilt dtuple used in insertions. If the insert graph -has not yet been built in the prebuilt struct, then this function first -builds it. -@return prebuilt dtuple; the column type information is also set in it */ -static -dtuple_t* -row_get_prebuilt_insert_row( -/*========================*/ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL - handle */ -{ - dict_table_t* table = prebuilt->table; - - ut_ad(prebuilt && table && prebuilt->trx); - - if (prebuilt->ins_node != 0) { - - /* Check if indexes have been dropped or added and we - may need to rebuild the row insert template. */ - - if (prebuilt->trx_id == table->def_trx_id - && UT_LIST_GET_LEN(prebuilt->ins_node->entry_list) - == UT_LIST_GET_LEN(table->indexes)) { - - return(prebuilt->ins_node->row); - } - - ut_ad(prebuilt->trx_id < table->def_trx_id); - - que_graph_free_recursive(prebuilt->ins_graph); - - prebuilt->ins_graph = 0; - } - - /* Create an insert node and query graph to the prebuilt struct */ - - ins_node_t* node; - - node = ins_node_create(INS_DIRECT, table, prebuilt->heap); - - prebuilt->ins_node = node; - - if (prebuilt->ins_upd_rec_buff == 0) { - prebuilt->ins_upd_rec_buff = static_cast<byte*>( - mem_heap_alloc( - prebuilt->heap, - prebuilt->mysql_row_len)); - } - - dtuple_t* row; - - row = dtuple_create(prebuilt->heap, dict_table_get_n_cols(table)); - - dict_table_copy_types(row, table); - - ins_node_set_new_row(node, row); - - prebuilt->ins_graph = static_cast<que_fork_t*>( - que_node_get_parent( - pars_complete_graph_for_exec( - node, - prebuilt->trx, prebuilt->heap))); - - prebuilt->ins_graph->state = QUE_FORK_ACTIVE; - - prebuilt->trx_id = table->def_trx_id; - - return(prebuilt->ins_node->row); -} - -/*********************************************************************//** -Updates the table modification counter and calculates new estimates -for table and index statistics if necessary. */ -UNIV_INLINE -void -row_update_statistics_if_needed( -/*============================*/ - dict_table_t* table) /*!< in: table */ -{ - ib_uint64_t counter; - ib_uint64_t n_rows; - - if (!table->stat_initialized) { - DBUG_EXECUTE_IF( - "test_upd_stats_if_needed_not_inited", - fprintf(stderr, "test_upd_stats_if_needed_not_inited " - "was executed\n"); - ); - return; - } - - counter = table->stat_modified_counter++; - n_rows = dict_table_get_n_rows(table); - - if (dict_stats_is_persistent_enabled(table)) { - if (counter > n_rows / 10 /* 10% */ - && dict_stats_auto_recalc_is_enabled(table)) { - - dict_stats_recalc_pool_add(table); - table->stat_modified_counter = 0; - } - return; - } - - /* Calculate new statistics if 1 / 16 of table has been modified - since the last time a statistics batch was run. - We calculate statistics at most every 16th round, since we may have - a counter table which is very small and updated very often. */ - ib_uint64_t threshold= 16 + n_rows / 16; /* 6.25% */ - if (srv_stats_modified_counter) - threshold= ut_min(srv_stats_modified_counter, threshold); - - if (counter > threshold) { - - ut_ad(!mutex_own(&dict_sys->mutex)); - /* this will reset table->stat_modified_counter to 0 */ - dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT); - } -} - -/*********************************************************************//** -Sets an AUTO_INC type lock on the table mentioned in prebuilt. The -AUTO_INC lock gives exclusive access to the auto-inc counter of the -table. The lock is reserved only for the duration of an SQL statement. -It is not compatible with another AUTO_INC or exclusive lock on the -table. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_lock_table_autoinc_for_mysql( -/*=============================*/ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL - table handle */ -{ - trx_t* trx = prebuilt->trx; - ins_node_t* node = prebuilt->ins_node; - const dict_table_t* table = prebuilt->table; - que_thr_t* thr; - dberr_t err; - ibool was_lock_wait; - - ut_ad(trx); - - /* If we already hold an AUTOINC lock on the table then do nothing. - Note: We peek at the value of the current owner without acquiring - the lock mutex. **/ - if (trx == table->autoinc_trx) { - - return(DB_SUCCESS); - } - - trx->op_info = "setting auto-inc lock"; - - row_get_prebuilt_insert_row(prebuilt); - node = prebuilt->ins_node; - - /* We use the insert query graph as the dummy graph needed - in the lock module call */ - - thr = que_fork_get_first_thr(prebuilt->ins_graph); - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = node; - thr->prev_node = node; - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - trx_start_if_not_started_xa(trx); - - err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr); - - trx->error_state = err; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL); - - if (was_lock_wait) { - goto run_again; - } - - trx->op_info = ""; - - return(err); - } - - que_thr_stop_for_mysql_no_error(thr, trx); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Sets a table lock on the table mentioned in prebuilt. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_lock_table_for_mysql( -/*=====================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL - table handle */ - dict_table_t* table, /*!< in: table to lock, or NULL - if prebuilt->table should be - locked as - prebuilt->select_lock_type */ - ulint mode) /*!< in: lock mode of table - (ignored if table==NULL) */ -{ - trx_t* trx = prebuilt->trx; - que_thr_t* thr; - dberr_t err; - ibool was_lock_wait; - - ut_ad(trx); - - trx->op_info = "setting table lock"; - - if (prebuilt->sel_graph == NULL) { - /* Build a dummy select query graph */ - row_prebuild_sel_graph(prebuilt); - } - - /* We use the select query graph as the dummy graph needed - in the lock module call */ - - thr = que_fork_get_first_thr(prebuilt->sel_graph); - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - trx_start_if_not_started_xa(trx); - - if (table) { - err = lock_table( - 0, table, - static_cast<enum lock_mode>(mode), thr); - } else { - err = lock_table( - 0, prebuilt->table, - static_cast<enum lock_mode>( - prebuilt->select_lock_type), - thr); - } - - trx->error_state = err; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL); - - if (was_lock_wait) { - goto run_again; - } - - trx->op_info = ""; - - return(err); - } - - que_thr_stop_for_mysql_no_error(thr, trx); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Determine is tablespace encrypted but decryption failed, is table corrupted -or is tablespace .ibd file missing. -@param[in] table Table -@param[in] trx Transaction -@param[in] push_warning true if we should push warning to user -@return DB_DECRYPTION_FAILED table is encrypted but decryption failed -DB_CORRUPTION table is corrupted -DB_TABLESPACE_NOT_FOUND tablespace .ibd file not found */ -static -dberr_t -row_mysql_get_table_status( - const dict_table_t* table, - trx_t* trx, - bool push_warning = true) -{ - dberr_t err = DB_SUCCESS; - FilSpace space(table->space, true); - char buf[MAX_FULL_NAME_LEN]; - ut_format_name(table->name, TRUE, buf, sizeof(buf)); - - if (space()) { - - if (space()->crypt_data && space()->crypt_data->is_encrypted()) { - // maybe we cannot access the table due to failing - // to decrypt - if (push_warning) { - ib_push_warning(trx,HA_ERR_DECRYPTION_FAILED, - "Table %s in file %s is encrypted but encryption service or" - " used key_id %u is not available. " - " Can't continue reading table.", - buf, space()->chain.start->name, - space()->crypt_data->key_id); - } - - err = DB_DECRYPTION_FAILED; - } else { - if (push_warning) { - ib_push_warning(trx, DB_CORRUPTION, - "Table %s in file %s corrupted.", - buf, space()->chain.start->name); - } - - err = DB_CORRUPTION; - } - - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "InnoDB: MySQL is trying to use a table handle" - " but the .ibd file for" - " table %s does not exist." - " Have you deleted the .ibd file" - " from the database directory under" - " the MySQL datadir, or have you" - " used DISCARD TABLESPACE?" - " Look from " REFMAN "innodb-troubleshooting.html" - " how you can resolve the problem.", - buf); - - err = DB_TABLESPACE_NOT_FOUND; - } - - return (err); -} - -/*********************************************************************//** -Does an insert for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_insert_for_mysql( -/*=================*/ - byte* mysql_rec, /*!< in: row in the MySQL format */ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL - handle */ -{ - trx_savept_t savept; - que_thr_t* thr; - dberr_t err; - ibool was_lock_wait; - trx_t* trx = prebuilt->trx; - ins_node_t* node = prebuilt->ins_node; - dict_table_t* table = prebuilt->table; - - ut_ad(trx); - - if (dict_table_is_discarded(prebuilt->table)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "The table %s doesn't have a corresponding " - "tablespace, it was discarded.", - prebuilt->table->name); - - return(DB_TABLESPACE_DELETED); - - } else if (!prebuilt->table->is_readable()) { - return (row_mysql_get_table_status(prebuilt->table, trx, true)); - } else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { - fprintf(stderr, - "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name ", - (ulong) prebuilt->magic_n); - ut_print_name(stderr, trx, TRUE, prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } else if (srv_force_recovery) { - fputs("InnoDB: innodb_force_recovery is on: we do not allow\n" - "InnoDB: database modifications by the user. Shut down\n" - "InnoDB: mysqld and edit my.cnf so that" - "InnoDB: innodb_force_... is removed.\n", - stderr); - - return(DB_READ_ONLY); - } - - trx->op_info = "inserting"; - - row_mysql_delay_if_needed(); - - trx_start_if_not_started_xa(trx); - - row_get_prebuilt_insert_row(prebuilt); - node = prebuilt->ins_node; - - row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec); - - savept = trx_savept_take(trx); - - thr = que_fork_get_first_thr(prebuilt->ins_graph); - - if (prebuilt->sql_stat_start) { - node->state = INS_NODE_SET_IX_LOCK; - prebuilt->sql_stat_start = FALSE; - } else { - node->state = INS_NODE_ALLOC_ROW_ID; - } - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = node; - thr->prev_node = node; - - row_ins_step(thr); - - DEBUG_SYNC_C("ib_after_row_insert_step"); - - err = trx->error_state; - - if (err != DB_SUCCESS) { -error_exit: - que_thr_stop_for_mysql(thr); - - /* FIXME: What's this ? */ - thr->lock_state = QUE_THR_LOCK_ROW; - - was_lock_wait = row_mysql_handle_errors( - &err, trx, thr, &savept); - - thr->lock_state = QUE_THR_LOCK_NOLOCK; - - if (was_lock_wait) { - ut_ad(node->state == INS_NODE_INSERT_ENTRIES - || node->state == INS_NODE_ALLOC_ROW_ID); - goto run_again; - } - - trx->op_info = ""; - - return(err); - } - - if (dict_table_has_fts_index(table) - && UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)) { - doc_id_t doc_id; - - /* Extract the doc id from the hidden FTS column */ - doc_id = fts_get_doc_id_from_row(table, node->row); - - if (doc_id <= 0) { - fprintf(stderr, - "InnoDB: FTS Doc ID must be large than 0 \n"); - err = DB_FTS_INVALID_DOCID; - trx->error_state = DB_FTS_INVALID_DOCID; - goto error_exit; - } - - if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - doc_id_t next_doc_id - = table->fts->cache->next_doc_id; - - if (doc_id < next_doc_id) { - fprintf(stderr, - "InnoDB: FTS Doc ID must be large than" - " " UINT64PF " for table", - next_doc_id - 1); - ut_print_name(stderr, trx, TRUE, table->name); - putc('\n', stderr); - - err = DB_FTS_INVALID_DOCID; - trx->error_state = DB_FTS_INVALID_DOCID; - goto error_exit; - } - - /* Difference between Doc IDs are restricted within - 4 bytes integer. See fts_get_encoded_len(). Consecutive - doc_ids difference should not exceed - FTS_DOC_ID_MAX_STEP value. */ - - if (next_doc_id > 1 - && doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) { - fprintf(stderr, - "InnoDB: Doc ID " UINT64PF " is too" - " big. Its difference with largest" - " used Doc ID " UINT64PF " cannot" - " exceed or equal to %d\n", - doc_id, next_doc_id - 1, - FTS_DOC_ID_MAX_STEP); - err = DB_FTS_INVALID_DOCID; - trx->error_state = DB_FTS_INVALID_DOCID; - goto error_exit; - } - } - - /* Pass NULL for the columns affected, since an INSERT affects - all FTS indexes. */ - fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL); - } - - que_thr_stop_for_mysql_no_error(thr, trx); - - if (UNIV_LIKELY(!(trx->fake_changes))) { - if (table->is_system_db) { - srv_stats.n_system_rows_inserted.inc(size_t(trx->id)); - } else { - srv_stats.n_rows_inserted.inc(size_t(trx->id)); - } - - if (prebuilt->clust_index_was_generated) { - /* set row id to prebuilt */ - ut_memcpy(prebuilt->row_id, node->row_id_buf, DATA_ROW_ID_LEN); - } - - /* Not protected by dict_table_stats_lock() for performance - reasons, we would rather get garbage in stat_n_rows (which is - just an estimate anyway) than protecting the following code - with a latch. */ - dict_table_n_rows_inc(table); - - row_update_statistics_if_needed(table); - } - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Builds a dummy query graph used in selects. */ -UNIV_INTERN -void -row_prebuild_sel_graph( -/*===================*/ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL - handle */ -{ - sel_node_t* node; - - ut_ad(prebuilt && prebuilt->trx); - - if (prebuilt->sel_graph == NULL) { - - node = sel_node_create(prebuilt->heap); - - prebuilt->sel_graph = static_cast<que_fork_t*>( - que_node_get_parent( - pars_complete_graph_for_exec( - static_cast<sel_node_t*>(node), - prebuilt->trx, prebuilt->heap))); - - prebuilt->sel_graph->state = QUE_FORK_ACTIVE; - } -} - -/*********************************************************************//** -Creates an query graph node of 'update' type to be used in the MySQL -interface. -@return own: update node */ -UNIV_INTERN -upd_node_t* -row_create_update_node_for_mysql( -/*=============================*/ - dict_table_t* table, /*!< in: table to update */ - mem_heap_t* heap) /*!< in: mem heap from which allocated */ -{ - upd_node_t* node; - - node = upd_node_create(heap); - - node->in_mysql_interface = TRUE; - node->is_delete = FALSE; - node->searched_update = FALSE; - node->select = NULL; - node->pcur = btr_pcur_create_for_mysql(); - node->table = table; - - node->update = upd_create(dict_table_get_n_cols(table), heap); - - node->update_n_fields = dict_table_get_n_cols(table); - - UT_LIST_INIT(node->columns); - node->has_clust_rec_x_lock = TRUE; - node->cmpl_info = 0; - - node->table_sym = NULL; - node->col_assign_list = NULL; - - return(node); -} - -/*********************************************************************//** -Gets pointer to a prebuilt update vector used in updates. If the update -graph has not yet been built in the prebuilt struct, then this function -first builds it. -@return prebuilt update vector */ -UNIV_INTERN -upd_t* -row_get_prebuilt_update_vector( -/*===========================*/ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL - handle */ -{ - dict_table_t* table = prebuilt->table; - upd_node_t* node; - - ut_ad(prebuilt && table && prebuilt->trx); - - if (prebuilt->upd_node == NULL) { - - /* Not called before for this handle: create an update node - and query graph to the prebuilt struct */ - - node = row_create_update_node_for_mysql(table, prebuilt->heap); - - prebuilt->upd_node = node; - - prebuilt->upd_graph = static_cast<que_fork_t*>( - que_node_get_parent( - pars_complete_graph_for_exec( - static_cast<upd_node_t*>(node), - prebuilt->trx, prebuilt->heap))); - - prebuilt->upd_graph->state = QUE_FORK_ACTIVE; - } - - return(prebuilt->upd_node->update); -} - -/******************************************************************** -Handle an update of a column that has an FTS index. */ -static -void -row_fts_do_update( -/*==============*/ - trx_t* trx, /* in: transaction */ - dict_table_t* table, /* in: Table with FTS index */ - doc_id_t old_doc_id, /* in: old document id */ - doc_id_t new_doc_id) /* in: new document id */ -{ - if (trx->fts_next_doc_id) { - fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL); - fts_trx_add_op(trx, table, new_doc_id, FTS_INSERT, NULL); - } -} - -/************************************************************************ -Handles FTS matters for an update or a delete. -NOTE: should not be called if the table does not have an FTS index. .*/ -static -dberr_t -row_fts_update_or_delete( -/*=====================*/ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL - handle */ -{ - trx_t* trx = prebuilt->trx; - dict_table_t* table = prebuilt->table; - upd_node_t* node = prebuilt->upd_node; - doc_id_t old_doc_id = prebuilt->fts_doc_id; - - ut_a(dict_table_has_fts_index(prebuilt->table)); - - /* Deletes are simple; get them out of the way first. */ - if (node->is_delete) { - /* A delete affects all FTS indexes, so we pass NULL */ - fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL); - } else { - doc_id_t new_doc_id; - - new_doc_id = fts_read_doc_id((byte*) &trx->fts_next_doc_id); - - if (new_doc_id == 0) { - fprintf(stderr, " InnoDB FTS: Doc ID cannot be 0 \n"); - return(DB_FTS_INVALID_DOCID); - } - - row_fts_do_update(trx, table, old_doc_id, new_doc_id); - } - - return(DB_SUCCESS); -} - -/*********************************************************************//** -Initialize the Doc ID system for FK table with FTS index */ -static -void -init_fts_doc_id_for_ref( -/*====================*/ - dict_table_t* table, /*!< in: table */ - ulint* depth) /*!< in: recusive call depth */ -{ - dict_foreign_t* foreign; - - table->fk_max_recusive_level = 0; - - (*depth)++; - - /* Limit on tables involved in cascading delete/update */ - if (*depth > FK_MAX_CASCADE_DEL) { - return; - } - - /* Loop through this table's referenced list and also - recursively traverse each table's foreign table list */ - for (dict_foreign_set::iterator it = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - - if (foreign->foreign_table == NULL) { - break; - } - - if (foreign->foreign_table->fts != NULL) { - fts_init_doc_id(foreign->foreign_table); - } - - if (!foreign->foreign_table->referenced_set.empty() - && foreign->foreign_table != table) { - init_fts_doc_id_for_ref( - foreign->foreign_table, depth); - } - } -} - -/*********************************************************************//** -Does an update or delete of a row for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_update_for_mysql( -/*=================*/ - byte* mysql_rec, /*!< in: the row to be updated, in - the MySQL format */ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL - handle */ -{ - trx_savept_t savept; - dberr_t err; - que_thr_t* thr; - ibool was_lock_wait; - dict_index_t* clust_index; - /* ulint ref_len; */ - upd_node_t* node; - dict_table_t* table = prebuilt->table; - trx_t* trx = prebuilt->trx; - ulint fk_depth = 0; - - ut_ad(prebuilt != NULL); - ut_ad(trx != NULL); - UT_NOT_USED(mysql_rec); - - if (!table->is_readable()) { - return (row_mysql_get_table_status(table, trx, true)); - } - - if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) { - fprintf(stderr, - "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name ", - (ulong) prebuilt->magic_n); - ut_print_name(stderr, trx, TRUE, prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - - if (UNIV_UNLIKELY(srv_force_recovery)) { - fputs("InnoDB: innodb_force_recovery is on: we do not allow\n" - "InnoDB: database modifications by the user. Shut down\n" - "InnoDB: mysqld and edit my.cnf so that" - "InnoDB: innodb_force_... is removed.\n", - stderr); - - return(DB_READ_ONLY); - } - - DEBUG_SYNC_C("innodb_row_update_for_mysql_begin"); - - trx->op_info = "updating or deleting"; - - row_mysql_delay_if_needed(); - - trx_start_if_not_started_xa(trx); - - if (dict_table_is_referenced_by_foreign_key(table)) { - /* Share lock the data dictionary to prevent any - table dictionary (for foreign constraint) change. - This is similar to row_ins_check_foreign_constraint - check protect by the dictionary lock as well. - In the future, this can be removed once the Foreign - key MDL is implemented */ - row_mysql_freeze_data_dictionary(trx); - init_fts_doc_id_for_ref(table, &fk_depth); - row_mysql_unfreeze_data_dictionary(trx); - } - - node = prebuilt->upd_node; - - clust_index = dict_table_get_first_index(table); - - if (prebuilt->pcur.btr_cur.index == clust_index) { - btr_pcur_copy_stored_position(node->pcur, &prebuilt->pcur); - } else { - btr_pcur_copy_stored_position(node->pcur, - &prebuilt->clust_pcur); - } - - ut_a(node->pcur->rel_pos == BTR_PCUR_ON); - - /* MySQL seems to call rnd_pos before updating each row it - has cached: we can get the correct cursor position from - prebuilt->pcur; NOTE that we cannot build the row reference - from mysql_rec if the clustered index was automatically - generated for the table: MySQL does not know anything about - the row id used as the clustered index key */ - - savept = trx_savept_take(trx); - - thr = que_fork_get_first_thr(prebuilt->upd_graph); - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - ut_ad(!prebuilt->sql_stat_start); - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = node; - thr->prev_node = node; - thr->fk_cascade_depth = 0; - - row_upd_step(thr); - - err = trx->error_state; - - /* Reset fk_cascade_depth back to 0 */ - thr->fk_cascade_depth = 0; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - if (err == DB_RECORD_NOT_FOUND) { - trx->error_state = DB_SUCCESS; - trx->op_info = ""; - - return(err); - } - - thr->lock_state= QUE_THR_LOCK_ROW; - - DEBUG_SYNC(trx->mysql_thd, "row_update_for_mysql_error"); - - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, - &savept); - thr->lock_state= QUE_THR_LOCK_NOLOCK; - - if (was_lock_wait) { - goto run_again; - } - - trx->op_info = ""; - - return(err); - } - - que_thr_stop_for_mysql_no_error(thr, trx); - - if (UNIV_UNLIKELY(trx->fake_changes)) { - - trx->op_info = ""; - return(err); - } - - if (dict_table_has_fts_index(table) - && trx->fts_next_doc_id != UINT64_UNDEFINED) { - err = row_fts_update_or_delete(prebuilt); - if (err != DB_SUCCESS) { - trx->op_info = ""; - return(err); - } - } - - if (node->is_delete) { - /* Not protected by dict_table_stats_lock() for performance - reasons, we would rather get garbage in stat_n_rows (which is - just an estimate anyway) than protecting the following code - with a latch. */ - dict_table_n_rows_dec(prebuilt->table); - - if (table->is_system_db) { - srv_stats.n_system_rows_deleted.inc(size_t(trx->id)); - } else { - srv_stats.n_rows_deleted.inc(size_t(trx->id)); - } - } else { - if (table->is_system_db) { - srv_stats.n_system_rows_updated.inc(size_t(trx->id)); - } else { - srv_stats.n_rows_updated.inc(size_t(trx->id)); - } - } - - /* We update table statistics only if it is a DELETE or UPDATE - that changes indexed columns, UPDATEs that change only non-indexed - columns would not affect statistics. */ - if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - row_update_statistics_if_needed(prebuilt->table); - } else { - /* Update the table modification counter even when - non-indexed columns change if statistics is initialized. */ - if (prebuilt->table->stat_initialized) { - prebuilt->table->stat_modified_counter++; - } - } - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -This can only be used when srv_locks_unsafe_for_binlog is TRUE or this -session is using a READ COMMITTED or READ UNCOMMITTED isolation level. -Before calling this function row_search_for_mysql() must have -initialized prebuilt->new_rec_locks to store the information which new -record locks really were set. This function removes a newly set -clustered index record lock under prebuilt->pcur or -prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that -releases the latest clustered index record lock we set. -@return error code or DB_SUCCESS */ -UNIV_INTERN -void -row_unlock_for_mysql( -/*=================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL - handle */ - ibool has_latches_on_recs)/*!< in: TRUE if called so - that we have the latches on - the records under pcur and - clust_pcur, and we do not need - to reposition the cursors. */ -{ - btr_pcur_t* pcur = &prebuilt->pcur; - btr_pcur_t* clust_pcur = &prebuilt->clust_pcur; - trx_t* trx = prebuilt->trx; - - ut_ad(prebuilt != NULL); - ut_ad(trx != NULL); - - if (UNIV_UNLIKELY - (!srv_locks_unsafe_for_binlog - && trx->isolation_level > TRX_ISO_READ_COMMITTED)) { - - fprintf(stderr, - "InnoDB: Error: calling row_unlock_for_mysql though\n" - "InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n" - "InnoDB: this session is not using" - " READ COMMITTED isolation level.\n"); - return; - } - - trx->op_info = "unlock_row"; - - if (prebuilt->new_rec_locks >= 1) { - - const rec_t* rec; - dict_index_t* index; - trx_id_t rec_trx_id; - mtr_t mtr; - - mtr_start_trx(&mtr, trx); - - /* Restore the cursor position and find the record */ - - if (!has_latches_on_recs) { - btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr); - } - - rec = btr_pcur_get_rec(pcur); - index = btr_pcur_get_btr_cur(pcur)->index; - - if (prebuilt->new_rec_locks >= 2) { - /* Restore the cursor position and find the record - in the clustered index. */ - - if (!has_latches_on_recs) { - btr_pcur_restore_position(BTR_SEARCH_LEAF, - clust_pcur, &mtr); - } - - rec = btr_pcur_get_rec(clust_pcur); - index = btr_pcur_get_btr_cur(clust_pcur)->index; - } - - if (!dict_index_is_clust(index)) { - /* This is not a clustered index record. We - do not know how to unlock the record. */ - goto no_unlock; - } - - /* If the record has been modified by this - transaction, do not unlock it. */ - - if (index->trx_id_offset) { - rec_trx_id = trx_read_trx_id(rec - + index->trx_id_offset); - } else { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - - rec_offs_init(offsets_); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - rec_trx_id = row_get_rec_trx_id(rec, index, offsets); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - if (rec_trx_id != trx->id) { - /* We did not update the record: unlock it */ - - rec = btr_pcur_get_rec(pcur); - - lock_rec_unlock( - trx, - btr_pcur_get_block(pcur), - rec, - static_cast<enum lock_mode>( - prebuilt->select_lock_type)); - - if (prebuilt->new_rec_locks >= 2) { - rec = btr_pcur_get_rec(clust_pcur); - - lock_rec_unlock( - trx, - btr_pcur_get_block(clust_pcur), - rec, - static_cast<enum lock_mode>( - prebuilt->select_lock_type)); - } - } -no_unlock: - mtr_commit(&mtr); - } - - trx->op_info = ""; -} - -/**********************************************************************//** -Does a cascaded delete or set null in a foreign key operation. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_update_cascade_for_mysql( -/*=========================*/ - que_thr_t* thr, /*!< in: query thread */ - upd_node_t* node, /*!< in: update node used in the cascade - or set null operation */ - dict_table_t* table) /*!< in: table where we do the operation */ -{ - dberr_t err; - trx_t* trx; - - trx = thr_get_trx(thr); - - /* Increment fk_cascade_depth to record the recursive call depth on - a single update/delete that affects multiple tables chained - together with foreign key relations. */ - thr->fk_cascade_depth++; - - if (thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) { - return(DB_FOREIGN_EXCEED_MAX_CASCADE); - } -run_again: - thr->run_node = node; - thr->prev_node = node; - - DEBUG_SYNC_C("foreign_constraint_update_cascade"); - - row_upd_step(thr); - - /* The recursive call for cascading update/delete happens - in above row_upd_step(), reset the counter once we come - out of the recursive call, so it does not accumulate for - different row deletes */ - thr->fk_cascade_depth = 0; - - err = trx->error_state; - - /* Note that the cascade node is a subnode of another InnoDB - query graph node. We do a normal lock wait in this node, but - all errors are handled by the parent node. */ - - if (err == DB_LOCK_WAIT) { - /* Handle lock wait here */ - - que_thr_stop_for_mysql(thr); - - lock_wait_suspend_thread(thr); - - /* Note that a lock wait may also end in a lock wait timeout, - or this transaction is picked as a victim in selective - deadlock resolution */ - - if (trx->error_state != DB_SUCCESS) { - - return(trx->error_state); - } - - /* Retry operation after a normal lock wait */ - - goto run_again; - } - - if (err != DB_SUCCESS) { - - return(err); - } - - if (UNIV_UNLIKELY((trx->fake_changes))) { - - return(err); - } - - if (node->is_delete) { - /* Not protected by dict_table_stats_lock() for performance - reasons, we would rather get garbage in stat_n_rows (which is - just an estimate anyway) than protecting the following code - with a latch. */ - dict_table_n_rows_dec(table); - - if (table->is_system_db) { - srv_stats.n_system_rows_deleted.inc(size_t(trx->id)); - } else { - srv_stats.n_rows_deleted.inc(size_t(trx->id)); - } - } else { - if (table->is_system_db) { - srv_stats.n_system_rows_updated.inc(size_t(trx->id)); - } else { - srv_stats.n_rows_updated.inc(size_t(trx->id)); - } - } - - row_update_statistics_if_needed(table); - - return(err); -} - -/*********************************************************************//** -Checks if a table is such that we automatically created a clustered -index on it (on row id). -@return TRUE if the clustered index was generated automatically */ -UNIV_INTERN -ibool -row_table_got_default_clust_index( -/*==============================*/ - const dict_table_t* table) /*!< in: table */ -{ - const dict_index_t* clust_index; - - clust_index = dict_table_get_first_index(table); - - return(dict_index_get_nth_col(clust_index, 0)->mtype == DATA_SYS); -} - -/*********************************************************************//** -Locks the data dictionary in shared mode from modifications, for performing -foreign key check, rollback, or other operation invisible to MySQL. */ -UNIV_INTERN -void -row_mysql_freeze_data_dictionary_func( -/*==================================*/ - trx_t* trx, /*!< in/out: transaction */ - const char* file, /*!< in: file name */ - ulint line) /*!< in: line number */ -{ - ut_a(trx->dict_operation_lock_mode == 0); - - rw_lock_s_lock_inline(&dict_operation_lock, 0, file, line); - - trx->dict_operation_lock_mode = RW_S_LATCH; -} - -/*********************************************************************//** -Unlocks the data dictionary shared lock. */ -UNIV_INTERN -void -row_mysql_unfreeze_data_dictionary( -/*===============================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(lock_trx_has_sys_table_locks(trx) == NULL); - - ut_a(trx->dict_operation_lock_mode == RW_S_LATCH); - - rw_lock_s_unlock(&dict_operation_lock); - - trx->dict_operation_lock_mode = 0; -} - -/*********************************************************************//** -Locks the data dictionary exclusively for performing a table create or other -data dictionary modification operation. */ -UNIV_INTERN -void -row_mysql_lock_data_dictionary_func( -/*================================*/ - trx_t* trx, /*!< in/out: transaction */ - const char* file, /*!< in: file name */ - ulint line) /*!< in: line number */ -{ - ut_a(trx->dict_operation_lock_mode == 0 - || trx->dict_operation_lock_mode == RW_X_LATCH); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks or lock waits can occur then in these operations */ - - rw_lock_x_lock_inline(&dict_operation_lock, 0, file, line); - trx->dict_operation_lock_mode = RW_X_LATCH; - - mutex_enter(&(dict_sys->mutex)); -} - -/*********************************************************************//** -Unlocks the data dictionary exclusive lock. */ -UNIV_INTERN -void -row_mysql_unlock_data_dictionary( -/*=============================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_ad(lock_trx_has_sys_table_locks(trx) == NULL); - - ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - mutex_exit(&(dict_sys->mutex)); - rw_lock_x_unlock(&dict_operation_lock); - - trx->dict_operation_lock_mode = 0; -} - -/*********************************************************************//** -Creates a table for MySQL. If the name of the table ends in -one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", -"innodb_table_monitor", then this will also start the printing of monitor -output by the master thread. If the table name ends in "innodb_mem_validate", -InnoDB will try to invoke mem_validate(). On failure the transaction will -be rolled back and the 'table' object will be freed. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_create_table_for_mysql( -/*=======================*/ - dict_table_t* table, /*!< in, own: table definition - (will be freed, or on DB_SUCCESS - added to the data dictionary cache) */ - trx_t* trx, /*!< in/out: transaction */ - bool commit, /*!< in: if true, commit the transaction */ - fil_encryption_t mode, /*!< in: encryption mode */ - ulint key_id) /*!< in: encryption key_id */ -{ - tab_node_t* node; - mem_heap_t* heap; - que_thr_t* thr; - const char* table_name; - ulint table_name_len; - dberr_t err; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - - DBUG_EXECUTE_IF( - "ib_create_table_fail_at_start_of_row_create_table_for_mysql", - goto err_exit; - ); - - trx->op_info = "creating table"; - - if (row_mysql_is_system_table(table->name)) { - - fprintf(stderr, - "InnoDB: Error: trying to create a MySQL system" - " table %s of type InnoDB.\n" - "InnoDB: MySQL system tables must be" - " of the MyISAM type!\n", - table->name); - -#ifndef DBUG_OFF -err_exit: -#endif /* !DBUG_OFF */ - dict_mem_table_free(table); - - if (commit) { - trx_commit_for_mysql(trx); - } - - trx->op_info = ""; - - return(DB_ERROR); - } - - trx_start_if_not_started_xa(trx); - - /* The table name is prefixed with the database name and a '/'. - Certain table names starting with 'innodb_' have their special - meaning regardless of the database name. Thus, we need to - ignore the database name prefix in the comparisons. */ - table_name = dict_remove_db_name(table->name); - table_name_len = strlen(table_name) + 1; - - if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) { - - /* Table equals "innodb_monitor": - start monitor prints */ - - srv_print_innodb_monitor = TRUE; - - /* The lock timeout monitor thread also takes care - of InnoDB monitor prints */ - - os_event_set(srv_monitor_event); - } else if (STR_EQ(table_name, table_name_len, - S_innodb_lock_monitor)) { - - srv_print_innodb_monitor = TRUE; - srv_print_innodb_lock_monitor = TRUE; - os_event_set(srv_monitor_event); - } else if (STR_EQ(table_name, table_name_len, - S_innodb_tablespace_monitor)) { - - srv_print_innodb_tablespace_monitor = TRUE; - os_event_set(srv_monitor_event); - } else if (STR_EQ(table_name, table_name_len, - S_innodb_table_monitor)) { - - srv_print_innodb_table_monitor = TRUE; - os_event_set(srv_monitor_event); -#ifdef UNIV_MEM_DEBUG - } else if (STR_EQ(table_name, table_name_len, - S_innodb_mem_validate)) { - /* We define here a debugging feature intended for - developers */ - - fputs("Validating InnoDB memory:\n" - "to use this feature you must compile InnoDB with\n" - "UNIV_MEM_DEBUG defined in univ.i and" - " the server must be\n" - "quiet because allocation from a mem heap" - " is not protected\n" - "by any semaphore.\n", stderr); - ut_a(mem_validate()); - fputs("Memory validated\n", stderr); -#endif /* UNIV_MEM_DEBUG */ - } - - heap = mem_heap_create(512); - - switch (trx_get_dict_operation(trx)) { - case TRX_DICT_OP_NONE: - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - case TRX_DICT_OP_TABLE: - break; - case TRX_DICT_OP_INDEX: - /* If the transaction was previously flagged as - TRX_DICT_OP_INDEX, we should be creating auxiliary - tables for full-text indexes. */ - ut_ad(strstr(table->name, "/FTS_") != NULL); - } - - node = tab_create_graph_create(table, heap, commit, mode, key_id); - - thr = pars_complete_graph_for_exec(node, trx, heap); - - ut_a(thr == que_fork_start_command( - static_cast<que_fork_t*>(que_node_get_parent(thr)))); - - que_run_threads(thr); - - err = trx->error_state; - - if (table->space != TRX_SYS_SPACE) { - ut_a(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE)); - - /* Update SYS_TABLESPACES and SYS_DATAFILES if a new - tablespace was created. */ - if (err == DB_SUCCESS) { - char* path; - path = fil_space_get_first_path(table->space); - - err = dict_create_add_tablespace_to_dictionary( - table->space, table->name, - fil_space_get_flags(table->space), - path, trx, commit); - - mem_free(path); - } - - if (err != DB_SUCCESS) { - /* We must delete the link file. */ - fil_delete_link_file(table->name); - } - } - - switch (err) { - case DB_SUCCESS: - break; - case DB_OUT_OF_FILE_SPACE: - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: cannot create table ", - stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(" because tablespace full\n", stderr); - - if (dict_table_open_on_name(table->name, TRUE, FALSE, - DICT_ERR_IGNORE_NONE)) { - - /* Make things easy for the drop table code. */ - - if (table->can_be_evicted) { - dict_table_move_from_lru_to_non_lru(table); - } - - dict_table_close(table, TRUE, FALSE); - - row_drop_table_for_mysql(table->name, trx, FALSE, TRUE); - - if (commit) { - trx_commit_for_mysql(trx); - } - } else { - dict_mem_table_free(table); - } - - break; - - case DB_TOO_MANY_CONCURRENT_TRXS: - /* We already have .ibd file here. it should be deleted. */ - - if (table->space - && fil_delete_tablespace( - table->space, - BUF_REMOVE_FLUSH_NO_WRITE) - != DB_SUCCESS) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: not able to" - " delete tablespace %lu of table ", - (ulong) table->space); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("!\n", stderr); - } - /* fall through */ - - case DB_DUPLICATE_KEY: - case DB_TABLESPACE_EXISTS: - default: - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - dict_mem_table_free(table); - break; - } - - que_graph_free((que_t*) que_node_get_parent(thr)); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Does an index creation operation for MySQL. TODO: currently failure -to create an index results in dropping the whole table! This is no problem -currently as all indexes must be created at the same time as the table. -@return error number or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_create_index_for_mysql( -/*=======================*/ - dict_index_t* index, /*!< in, own: index definition - (will be freed) */ - trx_t* trx, /*!< in: transaction handle */ - const ulint* field_lengths) /*!< in: if not NULL, must contain - dict_index_get_n_fields(index) - actual field lengths for the - index columns, which are - then checked for not being too - large. */ -{ - ind_node_t* node; - mem_heap_t* heap; - que_thr_t* thr; - dberr_t err; - ulint i; - ulint len; - char* table_name; - char* index_name; - dict_table_t* table; - ibool is_fts; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&(dict_sys->mutex))); - - trx->op_info = "creating index"; - - /* Copy the table name because we may want to drop the - table later, after the index object is freed (inside - que_run_threads()) and thus index->table_name is not available. */ - table_name = mem_strdup(index->table_name); - index_name = mem_strdup(index->name); - - is_fts = (index->type == DICT_FTS); - - table = dict_table_open_on_name(table_name, TRUE, TRUE, - DICT_ERR_IGNORE_NONE); - - trx_start_if_not_started_xa(trx); - - for (i = 0; i < index->n_def; i++) { - /* Check that prefix_len and actual length - < DICT_MAX_INDEX_COL_LEN */ - - len = dict_index_get_nth_field(index, i)->prefix_len; - - if (field_lengths && field_lengths[i]) { - len = ut_max(len, field_lengths[i]); - } - - DBUG_EXECUTE_IF( - "ib_create_table_fail_at_create_index", - len = DICT_MAX_FIELD_LEN_BY_FORMAT(table) + 1; - ); - - /* Column or prefix length exceeds maximum column length */ - if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) { - err = DB_TOO_BIG_INDEX_COL; - - dict_mem_index_free(index); - goto error_handling; - } - } - - heap = mem_heap_create(512); - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - /* Note that the space id where we store the index is inherited from - the table in dict_build_index_def_step() in dict0crea.cc. */ - - node = ind_create_graph_create(index, heap, true); - - thr = pars_complete_graph_for_exec(node, trx, heap); - - ut_a(thr == que_fork_start_command( - static_cast<que_fork_t*>(que_node_get_parent(thr)))); - - que_run_threads(thr); - - err = trx->error_state; - - que_graph_free((que_t*) que_node_get_parent(thr)); - - /* Create the index specific FTS auxiliary tables. */ - if (err == DB_SUCCESS && is_fts) { - dict_index_t* idx; - - idx = dict_table_get_index_on_name(table, index_name); - - ut_ad(idx); - err = fts_create_index_tables(trx, idx); - } - -error_handling: - dict_table_close(table, TRUE, FALSE); - - if (err != DB_SUCCESS) { - /* We have special error handling here */ - - trx->error_state = DB_SUCCESS; - - trx_rollback_to_savepoint(trx, NULL); - - row_drop_table_for_mysql(table_name, trx, FALSE, TRUE); - - trx_commit_for_mysql(trx); - - trx->error_state = DB_SUCCESS; - } - - trx->op_info = ""; - - mem_free(table_name); - mem_free(index_name); - - return(err); -} - -/*********************************************************************//** -Scans a table create SQL string and adds to the data dictionary -the foreign key constraints declared in the string. This function -should be called after the indexes for a table have been created. -Each foreign key constraint must be accompanied with indexes in -both participating tables. The indexes are allowed to contain more -fields than mentioned in the constraint. Check also that foreign key -constraints which reference this table are ok. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_table_add_foreign_constraints( -/*==============================*/ - trx_t* trx, /*!< in: transaction */ - const char* sql_string, /*!< in: table create statement where - foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES table2(c, d), - table2 can be written also with the - database name before it: test.table2 */ - size_t sql_length, /*!< in: length of sql_string */ - const char* name, /*!< in: table full name in the - normalized form - database_name/table_name */ - ibool reject_fks) /*!< in: if TRUE, fail with error - code DB_CANNOT_ADD_CONSTRAINT if - any foreign keys are found. */ -{ - dberr_t err; - - ut_ad(mutex_own(&(dict_sys->mutex))); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_a(sql_string); - - trx->op_info = "adding foreign keys"; - - trx_start_if_not_started_xa(trx); - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - err = dict_create_foreign_constraints(trx, sql_string, sql_length, - name, reject_fks); - - DBUG_EXECUTE_IF("ib_table_add_foreign_fail", - err = DB_DUPLICATE_KEY;); - - DEBUG_SYNC_C("table_add_foreign_constraints"); - - if (err == DB_SUCCESS) { - /* Check that also referencing constraints are ok */ - err = dict_load_foreigns(name, NULL, false, true, - DICT_ERR_IGNORE_NONE); - } - - if (err != DB_SUCCESS) { - /* We have special error handling here */ - - trx->error_state = DB_SUCCESS; - - trx_rollback_to_savepoint(trx, NULL); - - row_drop_table_for_mysql(name, trx, FALSE, TRUE); - - trx_commit_for_mysql(trx); - - trx->error_state = DB_SUCCESS; - } - - return(err); -} - -/*********************************************************************//** -Drops a table for MySQL as a background operation. MySQL relies on Unix -in ALTER TABLE to the fact that the table handler does not remove the -table before all handles to it has been removed. Furhermore, the MySQL's -call to drop table must be non-blocking. Therefore we do the drop table -as a background operation, which is taken care of by the master thread -in srv0srv.cc. -@return error code or DB_SUCCESS */ -static -dberr_t -row_drop_table_for_mysql_in_background( -/*===================================*/ - const char* name) /*!< in: table name */ -{ - dberr_t error; - trx_t* trx; - - trx = trx_allocate_for_background(); - - /* If the original transaction was dropping a table referenced by - foreign keys, we must set the following to be able to drop the - table: */ - - trx->check_foreigns = FALSE; - - /* fputs("InnoDB: Error: Dropping table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs(" in background drop list\n", stderr); */ - - /* Try to drop the table in InnoDB */ - - error = row_drop_table_for_mysql(name, trx, FALSE, FALSE); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - trx_commit_for_mysql(trx); - - trx_free_for_background(trx); - - return(error); -} - -/*********************************************************************//** -The master thread in srv0srv.cc calls this regularly to drop tables which -we must drop in background after queries to them have ended. Such lazy -dropping of tables is needed in ALTER TABLE on Unix. -@return how many tables dropped + remaining tables in list */ -UNIV_INTERN -ulint -row_drop_tables_for_mysql_in_background(void) -/*=========================================*/ -{ - row_mysql_drop_t* drop; - dict_table_t* table; - ulint n_tables; - ulint n_tables_dropped = 0; -loop: - mutex_enter(&row_drop_list_mutex); - - ut_a(row_mysql_drop_list_inited); - - drop = UT_LIST_GET_FIRST(row_mysql_drop_list); - - n_tables = UT_LIST_GET_LEN(row_mysql_drop_list); - - mutex_exit(&row_drop_list_mutex); - - if (drop == NULL) { - /* All tables dropped */ - - return(n_tables + n_tables_dropped); - } - - DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep", - os_thread_sleep(5000000); - ); - - table = dict_table_open_on_name(drop->table_name, FALSE, FALSE, - DICT_ERR_IGNORE_NONE); - - if (table == NULL) { - /* If for some reason the table has already been dropped - through some other mechanism, do not try to drop it */ - - goto already_dropped; - } - - if (!table->to_be_dropped) { - /* There is a scenario: the old table is dropped - just after it's added into drop list, and new - table with the same name is created, then we try - to drop the new table in background. */ - dict_table_close(table, FALSE, FALSE); - - goto already_dropped; - } - - ut_a(!table->can_be_evicted); - - dict_table_close(table, FALSE, FALSE); - - if (DB_SUCCESS != row_drop_table_for_mysql_in_background( - drop->table_name)) { - /* If the DROP fails for some table, we return, and let the - main thread retry later */ - - return(n_tables + n_tables_dropped); - } - - n_tables_dropped++; - -already_dropped: - mutex_enter(&row_drop_list_mutex); - - UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop); - - MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE); - - ut_print_timestamp(stderr); - fputs(" InnoDB: Dropped table ", stderr); - ut_print_name(stderr, NULL, TRUE, drop->table_name); - fputs(" in background drop queue.\n", stderr); - - mem_free(drop->table_name); - - mem_free(drop); - - mutex_exit(&row_drop_list_mutex); - - goto loop; -} - -/*********************************************************************//** -Get the background drop list length. NOTE: the caller must own the -drop list mutex! -@return how many tables in list */ -UNIV_INTERN -ulint -row_get_background_drop_list_len_low(void) -/*======================================*/ -{ - ulint len; - - mutex_enter(&row_drop_list_mutex); - - ut_a(row_mysql_drop_list_inited); - - len = UT_LIST_GET_LEN(row_mysql_drop_list); - - mutex_exit(&row_drop_list_mutex); - - return(len); -} - -/*********************************************************************//** -If a table is not yet in the drop list, adds the table to the list of tables -which the master thread drops in background. We need this on Unix because in -ALTER TABLE MySQL may call drop table even if the table has running queries on -it. Also, if there are running foreign key checks on the table, we drop the -table lazily. -@return TRUE if the table was not yet in the drop list, and was added there */ -static -ibool -row_add_table_to_background_drop_list( -/*==================================*/ - const char* name) /*!< in: table name */ -{ - row_mysql_drop_t* drop; - - mutex_enter(&row_drop_list_mutex); - - ut_a(row_mysql_drop_list_inited); - - /* Look if the table already is in the drop list */ - for (drop = UT_LIST_GET_FIRST(row_mysql_drop_list); - drop != NULL; - drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop)) { - - if (strcmp(drop->table_name, name) == 0) { - /* Already in the list */ - - mutex_exit(&row_drop_list_mutex); - - return(FALSE); - } - } - - drop = static_cast<row_mysql_drop_t*>( - mem_alloc(sizeof(row_mysql_drop_t))); - - drop->table_name = mem_strdup(name); - - UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop); - - MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE); - - /* fputs("InnoDB: Adding table ", stderr); - ut_print_name(stderr, trx, TRUE, drop->table_name); - fputs(" to background drop list\n", stderr); */ - - mutex_exit(&row_drop_list_mutex); - - return(TRUE); -} - -/*********************************************************************//** -Reassigns the table identifier of a table. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_mysql_table_id_reassign( -/*========================*/ - dict_table_t* table, /*!< in/out: table */ - trx_t* trx, /*!< in/out: transaction */ - table_id_t* new_id) /*!< out: new table id */ -{ - dberr_t err; - pars_info_t* info = pars_info_create(); - - dict_hdr_get_new_id(new_id, NULL, NULL); - - /* Remove all locks except the table-level S and X locks. */ - lock_remove_all_on_table(table, FALSE); - - pars_info_add_ull_literal(info, "old_id", table->id); - pars_info_add_ull_literal(info, "new_id", *new_id); - - /* As micro-SQL does not support int4 == int8 comparisons, - old and new IDs are added again under different names as - int4 values*/ - pars_info_add_int4_literal(info, "old_id_narrow", table->id); - pars_info_add_int4_literal(info, "new_id_narrow", *new_id); - - err = que_eval_sql( - info, - "PROCEDURE RENUMBER_TABLE_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLES SET ID = :new_id\n" - " WHERE ID = :old_id;\n" - "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = :old_id;\n" - "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = :old_id;\n" - "END;\n", FALSE, trx); - - return(err); -} - -/*********************************************************************//** -Setup the pre-requisites for DISCARD TABLESPACE. It will start the transaction, -acquire the data dictionary lock in X mode and open the table. -@return table instance or 0 if not found. */ -static -dict_table_t* -row_discard_tablespace_begin( -/*=========================*/ - const char* name, /*!< in: table name */ - trx_t* trx) /*!< in: transaction handle */ -{ - trx->op_info = "discarding tablespace"; - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - trx_start_if_not_started_xa(trx); - - /* Serialize data dictionary operations with dictionary mutex: - this is to avoid deadlocks during data dictionary operations */ - - row_mysql_lock_data_dictionary(trx); - - dict_table_t* table; - - table = dict_table_open_on_name( - name, TRUE, FALSE, DICT_ERR_IGNORE_NONE); - - if (table) { - dict_stats_wait_bg_to_stop_using_table(table, trx); - ut_a(table->space != TRX_SYS_SPACE); - ut_a(table->n_foreign_key_checks_running == 0); - } - - return(table); -} - -/*********************************************************************//** -Do the foreign key constraint checks. -@return DB_SUCCESS or error code. */ -static -dberr_t -row_discard_tablespace_foreign_key_checks( -/*======================================*/ - const trx_t* trx, /*!< in: transaction handle */ - const dict_table_t* table) /*!< in: table to be discarded */ -{ - - if (srv_read_only_mode || !trx->check_foreigns) { - return(DB_SUCCESS); - } - - /* Check if the table is referenced by foreign key constraints from - some other table (not the table itself) */ - dict_foreign_set::const_iterator it - = std::find_if(table->referenced_set.begin(), - table->referenced_set.end(), - dict_foreign_different_tables()); - - if (it == table->referenced_set.end()) { - return(DB_SUCCESS); - } - - const dict_foreign_t* foreign = *it; - FILE* ef = dict_foreign_err_file; - - ut_ad(foreign->foreign_table != table); - ut_ad(foreign->referenced_table == table); - - /* We only allow discarding a referenced table if - FOREIGN_KEY_CHECKS is set to 0 */ - - mutex_enter(&dict_foreign_err_mutex); - - rewind(ef); - - ut_print_timestamp(ef); - - fputs(" Cannot DISCARD table ", ef); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n" - "because it is referenced by ", ef); - ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name); - putc('\n', ef); - - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_DROP_CONSTRAINT); -} - -/*********************************************************************//** -Cleanup after the DISCARD TABLESPACE operation. -@return error code. */ -static -dberr_t -row_discard_tablespace_end( -/*=======================*/ - trx_t* trx, /*!< in/out: transaction handle */ - dict_table_t* table, /*!< in/out: table to be discarded */ - dberr_t err) /*!< in: error code */ -{ - if (table != 0) { - dict_table_close(table, TRUE, FALSE); - } - - DBUG_EXECUTE_IF("ib_discard_before_commit_crash", - log_make_checkpoint_at(LSN_MAX, TRUE); - DBUG_SUICIDE();); - - trx_commit_for_mysql(trx); - - DBUG_EXECUTE_IF("ib_discard_after_commit_crash", - log_make_checkpoint_at(LSN_MAX, TRUE); - DBUG_SUICIDE();); - - row_mysql_unlock_data_dictionary(trx); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Do the DISCARD TABLESPACE operation. -@return DB_SUCCESS or error code. */ -static -dberr_t -row_discard_tablespace( -/*===================*/ - trx_t* trx, /*!< in/out: transaction handle */ - dict_table_t* table) /*!< in/out: table to be discarded */ -{ - dberr_t err; - - /* How do we prevent crashes caused by ongoing operations on - the table? Old operations could try to access non-existent - pages. MySQL will block all DML on the table using MDL and a - DISCARD will not start unless all existing operations on the - table to be discarded are completed. - - 1) Acquire the data dictionary latch in X mode. To prevent any - internal operations that MySQL is not aware off and also for - the internal SQL parser. - - 2) Purge and rollback: we assign a new table id for the - table. Since purge and rollback look for the table based on - the table id, they see the table as 'dropped' and discard - their operations. - - 3) Insert buffer: we remove all entries for the tablespace in - the insert buffer tree. - - 4) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0, - we do not allow the discard. */ - - /* Play safe and remove all insert buffer entries, though we should - have removed them already when DISCARD TABLESPACE was called */ - - ibuf_delete_for_discarded_space(table->space); - - table_id_t new_id; - - /* Set the TABLESPACE DISCARD flag in the table definition on disk. */ - - err = row_import_update_discarded_flag(trx, table->id, true, true); - - if (err != DB_SUCCESS) { - return(err); - } - - /* Update the index root pages in the system tables, on disk */ - - err = row_import_update_index_root(trx, table, true, true); - - if (err != DB_SUCCESS) { - return(err); - } - - /* Drop all the FTS auxiliary tables. */ - if (dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - - fts_drop_tables(trx, table); - } - - /* Assign a new space ID to the table definition so that purge - can ignore the changes. Update the system table on disk. */ - - err = row_mysql_table_id_reassign(table, trx, &new_id); - - if (err != DB_SUCCESS) { - return(err); - } - - /* Discard the physical file that is used for the tablespace. */ - - err = fil_discard_tablespace(table->space); - - switch(err) { - case DB_SUCCESS: - case DB_IO_ERROR: - case DB_TABLESPACE_NOT_FOUND: - /* All persistent operations successful, update the - data dictionary memory cache. */ - - table->file_unreadable = true; - - table->flags2 |= DICT_TF2_DISCARDED; - - dict_table_change_id_in_cache(table, new_id); - - /* Reset the root page numbers. */ - - for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); - index != 0; - index = UT_LIST_GET_NEXT(indexes, index)) { - - index->page = FIL_NULL; - index->space = FIL_NULL; - } - - /* If the tablespace did not already exist or we couldn't - write to it, we treat that as a successful DISCARD. It is - unusable anyway. */ - - err = DB_SUCCESS; - break; - - default: - /* We need to rollback the disk changes, something failed. */ - - trx->error_state = DB_SUCCESS; - - trx_rollback_to_savepoint(trx, NULL); - - trx->error_state = DB_SUCCESS; - } - - return(err); -} - -/*********************************************************************//** -Discards the tablespace of a table which stored in an .ibd file. Discarding -means that this function renames the .ibd file and assigns a new table id for -the table. Also the flag table->ibd_file_missing is set to TRUE. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_discard_tablespace_for_mysql( -/*=============================*/ - const char* name, /*!< in: table name */ - trx_t* trx) /*!< in: transaction handle */ -{ - dberr_t err; - dict_table_t* table; - - /* Open the table and start the transaction if not started. */ - - table = row_discard_tablespace_begin(name, trx); - - if (table == 0) { - err = DB_TABLE_NOT_FOUND; - } else if (table->space == TRX_SYS_SPACE) { - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), table->name, FALSE); - - ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_TABLE_IN_SYSTEM_TABLESPACE, table_name); - - err = DB_ERROR; - - } else if (table->n_foreign_key_checks_running > 0) { - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), table->name, FALSE); - - ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR, - ER_DISCARD_FK_CHECKS_RUNNING, table_name); - - err = DB_ERROR; - - } else { - /* Do foreign key constraint checks. */ - - err = row_discard_tablespace_foreign_key_checks(trx, table); - - if (err == DB_SUCCESS) { - err = row_discard_tablespace(trx, table); - } - } - - return(row_discard_tablespace_end(trx, table, err)); -} - -/*********************************************************************//** -Sets an exclusive lock on a table. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_mysql_lock_table( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_table_t* table, /*!< in: table to lock */ - enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */ - const char* op_info) /*!< in: string for trx->op_info */ -{ - mem_heap_t* heap; - que_thr_t* thr; - dberr_t err; - sel_node_t* node; - - ut_ad(trx); - ut_ad(mode == LOCK_X || mode == LOCK_S); - - heap = mem_heap_create(512); - - trx->op_info = op_info; - - node = sel_node_create(heap); - thr = pars_complete_graph_for_exec(node, trx, heap); - thr->graph->state = QUE_FORK_ACTIVE; - - /* We use the select query graph as the dummy graph needed - in the lock module call */ - - thr = que_fork_get_first_thr( - static_cast<que_fork_t*>(que_node_get_parent(thr))); - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - err = lock_table(0, table, mode, thr); - - trx->error_state = err; - - if (err == DB_SUCCESS) { - que_thr_stop_for_mysql_no_error(thr, trx); - } else { - que_thr_stop_for_mysql(thr); - - if (err != DB_QUE_THR_SUSPENDED) { - ibool was_lock_wait; - - was_lock_wait = row_mysql_handle_errors( - &err, trx, thr, NULL); - - if (was_lock_wait) { - goto run_again; - } - } else { - que_thr_t* run_thr; - que_node_t* parent; - - parent = que_node_get_parent(thr); - - run_thr = que_fork_start_command( - static_cast<que_fork_t*>(parent)); - - ut_a(run_thr == thr); - - /* There was a lock wait but the thread was not - in a ready to run or running state. */ - trx->error_state = DB_LOCK_WAIT; - - goto run_again; - } - } - - que_graph_free(thr->graph); - trx->op_info = ""; - - return(err); -} - -static -void -fil_wait_crypt_bg_threads( - dict_table_t* table) -{ - time_t start = time(0); - time_t last = start; - - while (table->n_ref_count > 0) { - dict_mutex_exit_for_mysql(); - os_thread_sleep(20000); - dict_mutex_enter_for_mysql(); - time_t now = time(0); - if (now >= last + 30) { - fprintf(stderr, - "WARNING: waited %ld seconds " - "for ref-count on table: %s space: %u\n", - now - start, table->name, table->space); - last = now; - } - - if (now >= start + 300) { - fprintf(stderr, - "WARNING: after %ld seconds, gave up waiting " - "for ref-count on table: %s space: %u\n", - now - start, table->name, table->space); - break; - } - } -} - -/*********************************************************************//** -Truncates a table for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_truncate_table_for_mysql( -/*=========================*/ - dict_table_t* table, /*!< in: table handle */ - trx_t* trx) /*!< in: transaction handle */ -{ - dberr_t err; - mem_heap_t* heap; - byte* buf; - dtuple_t* tuple; - dfield_t* dfield; - dict_index_t* sys_index; - btr_pcur_t pcur; - mtr_t mtr; - table_id_t new_id; - ulint recreate_space = 0; - pars_info_t* info = NULL; - ibool has_internal_doc_id; - ulint old_space = table->space; - - /* How do we prevent crashes caused by ongoing operations on - the table? Old operations could try to access non-existent - pages. - - 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive - InnoDB table lock on the table before we can do TRUNCATE - TABLE. Then there are no running queries on the table. - - 2) Purge and rollback: we assign a new table id for the - table. Since purge and rollback look for the table based on - the table id, they see the table as 'dropped' and discard - their operations. - - 3) Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE, - so we do not have to remove insert buffer records, as the - insert buffer works at a low level. If a freed page is later - reallocated, the allocator will remove the ibuf entries for - it. - - When we truncate *.ibd files by recreating them (analogous to - DISCARD TABLESPACE), we remove all entries for the table in the - insert buffer tree. This is not strictly necessary, because - in 6) we will assign a new tablespace identifier, but we can - free up some space in the system tablespace. - - 4) Linear readahead and random readahead: we use the same - method as in 3) to discard ongoing operations. (This is only - relevant for TRUNCATE TABLE by DISCARD TABLESPACE.) - - 5) FOREIGN KEY operations: if - table->n_foreign_key_checks_running > 0, we do not allow the - TRUNCATE. We also reserve the data dictionary latch. - - 6) Crash recovery: To prevent the application of pre-truncation - redo log records on the truncated tablespace, we will assign - a new tablespace identifier to the truncated tablespace. */ - - ut_ad(table); - - if (dict_table_is_discarded(table)) { - return(DB_TABLESPACE_DELETED); - } else if (!table->is_readable()) { - return (row_mysql_get_table_status(table, trx, true)); - } - - trx_start_for_ddl(trx, TRX_DICT_OP_TABLE); - - trx->op_info = "truncating table"; - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - ut_a(trx->dict_operation_lock_mode == 0); - /* Prevent foreign key checks etc. while we are truncating the - table */ - row_mysql_lock_data_dictionary(trx); - - ut_ad(mutex_own(&(dict_sys->mutex))); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - dict_stats_wait_bg_to_stop_using_table(table, trx); - - /* Check if the table is referenced by foreign key constraints from - some other table (not the table itself) */ - - dict_foreign_set::iterator it - = std::find_if(table->referenced_set.begin(), - table->referenced_set.end(), - dict_foreign_different_tables()); - - if (!srv_read_only_mode - && it != table->referenced_set.end() - && trx->check_foreigns) { - - FILE* ef = dict_foreign_err_file; - dict_foreign_t* foreign = *it; - - /* We only allow truncating a referenced table if - FOREIGN_KEY_CHECKS is set to 0 */ - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - - fputs(" Cannot truncate table ", ef); - ut_print_name(ef, trx, TRUE, table->name); - fputs(" by DROP+CREATE\n" - "InnoDB: because it is referenced by ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - putc('\n', ef); - mutex_exit(&dict_foreign_err_mutex); - - err = DB_ERROR; - goto funct_exit; - } - - /* TODO: could we replace the counter n_foreign_key_checks_running - with lock checks on the table? Acquire here an exclusive lock on the - table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that - they can cope with the table having been truncated here? Foreign key - checks take an IS or IX lock on the table. */ - - if (table->n_foreign_key_checks_running > 0) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Cannot truncate table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(" by DROP+CREATE\n" - "InnoDB: because there is a foreign key check" - " running on it.\n", - stderr); - err = DB_ERROR; - - goto funct_exit; - } - - /* Check if memcached plugin is running on this table. if is, we don't - allow truncate this table. */ - if (table->memcached_sync_count != 0) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Cannot truncate table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(" by DROP+CREATE\n" - "InnoDB: because there are memcached operations" - " running on it.\n", - stderr); - err = DB_ERROR; - - goto funct_exit; - } else { - /* We need to set this counter to -1 for blocking - memcached operations. */ - table->memcached_sync_count = DICT_TABLE_IN_DDL; - } - - /* Remove all locks except the table-level X lock. */ - - lock_remove_all_on_table(table, FALSE); - - /* Ensure that the table will be dropped by - trx_rollback_active() in case of a crash. */ - - trx->table_id = table->id; - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - /* Assign an undo segment for the transaction, so that the - transaction will be recovered after a crash. */ - - mutex_enter(&trx->undo_mutex); - - err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE); - - mutex_exit(&trx->undo_mutex); - - if (err != DB_SUCCESS) { - - goto funct_exit; - } - - if (table->space && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) { - /* Discard and create the single-table tablespace. */ - ulint space_id = table->space; - ulint flags = ULINT_UNDEFINED; - ulint key_id = FIL_DEFAULT_ENCRYPTION_KEY; - fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT; - - dict_get_and_save_data_dir_path(table, true); - - if (fil_space_t* space = fil_space_acquire(space_id)) { - fil_space_crypt_t* crypt_data = space->crypt_data; - - if (crypt_data) { - key_id = crypt_data->key_id; - mode = crypt_data->encryption; - } - - flags = space->flags; - fil_space_release(space); - } - - if (flags != ULINT_UNDEFINED - && fil_discard_tablespace(space_id) == DB_SUCCESS) { - - dict_index_t* index; - - dict_hdr_get_new_id(NULL, NULL, &space_id); - - /* Lock all index trees for this table. We must - do so after dict_hdr_get_new_id() to preserve - the latch order */ - dict_table_x_lock_indexes(table); - - if (space_id == ULINT_UNDEFINED - || fil_create_new_single_table_tablespace( - space_id, table->name, - table->data_dir_path, - flags, table->flags2, - FIL_IBD_FILE_INITIAL_SIZE, - mode, key_id) - != DB_SUCCESS) { - dict_table_x_unlock_indexes(table); - - ib_logf(IB_LOG_LEVEL_ERROR, - "TRUNCATE TABLE %s failed to " - "create a new tablespace", - table->name); - - table->file_unreadable = true; - err = DB_ERROR; - goto funct_exit; - } - - recreate_space = space_id; - - /* Replace the space_id in the data dictionary cache. - The persisent data dictionary (SYS_TABLES.SPACE - and SYS_INDEXES.SPACE) are updated later in this - function. */ - table->space = space_id; - index = dict_table_get_first_index(table); - do { - index->space = space_id; - index = dict_table_get_next_index(index); - } while (index); - - mtr_start_trx(&mtr, trx); - fsp_header_init(space_id, - FIL_IBD_FILE_INITIAL_SIZE, &mtr); - mtr_commit(&mtr); - } - } else { - /* Lock all index trees for this table, as we will - truncate the table/index and possibly change their metadata. - All DML/DDL are blocked by table level lock, with - a few exceptions such as queries into information schema - about the table, MySQL could try to access index stats - for this kind of query, we need to use index locks to - sync up */ - dict_table_x_lock_indexes(table); - } - - /* scan SYS_INDEXES for all indexes of the table */ - heap = mem_heap_create(800); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - sys_index = dict_table_get_first_index(dict_sys->sys_indexes); - dict_index_copy_types(tuple, sys_index, 1); - - mtr_start_trx(&mtr, trx); - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_MODIFY_LEAF, &pcur, &mtr); - for (;;) { - rec_t* rec; - const byte* field; - ulint len; - ulint root_page_no; - - if (!btr_pcur_is_on_user_rec(&pcur)) { - /* The end of SYS_INDEXES has been reached. */ - break; - } - - rec = btr_pcur_get_rec(&pcur); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len); - ut_ad(len == 8); - - if (memcmp(buf, field, len) != 0) { - /* End of indexes for the table (TABLE_ID mismatch). */ - break; - } - - if (rec_get_deleted_flag(rec, FALSE)) { - /* The index has been dropped. */ - goto next_rec; - } - - /* This call may commit and restart mtr - and reposition pcur. */ - root_page_no = dict_truncate_index_tree(table, recreate_space, - &pcur, &mtr); - - rec = btr_pcur_get_rec(&pcur); - - if (root_page_no != FIL_NULL) { - page_rec_write_field( - rec, DICT_FLD__SYS_INDEXES__PAGE_NO, - root_page_no, &mtr); - /* We will need to commit and restart the - mini-transaction in order to avoid deadlocks. - The dict_truncate_index_tree() call has allocated - a page in this mini-transaction, and the rest of - this loop could latch another index page. */ - mtr_commit(&mtr); - mtr_start_trx(&mtr, trx); - btr_pcur_restore_position(BTR_MODIFY_LEAF, - &pcur, &mtr); - } - -next_rec: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - mem_heap_free(heap); - /* Done with index truncation, release index tree locks, - subsequent work relates to table level metadata change */ - dict_table_x_unlock_indexes(table); - - dict_hdr_get_new_id(&new_id, NULL, NULL); - - /* Create new FTS auxiliary tables with the new_id, and - drop the old index later, only if everything runs successful. */ - has_internal_doc_id = dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET( - table, DICT_TF2_FTS_HAS_DOC_ID); - if (has_internal_doc_id) { - dict_table_t fts_table; - ulint i; - - fts_table.name = table->name; - fts_table.id = new_id; - fts_table.flags2 = table->flags2; - - err = fts_create_common_tables( - trx, &fts_table, table->name, TRUE); - - for (i = 0; - i < ib_vector_size(table->fts->indexes) - && err == DB_SUCCESS; - i++) { - - dict_index_t* fts_index; - - fts_index = static_cast<dict_index_t*>( - ib_vector_getp(table->fts->indexes, i)); - - err = fts_create_index_tables_low( - trx, fts_index, table->name, new_id); - } - - if (err != DB_SUCCESS) { - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - trx->error_state = DB_SUCCESS; - ut_print_timestamp(stderr); - fputs(" InnoDB: Unable to truncate FTS index for" - " table", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n", stderr); - - goto funct_exit; - } else { - ut_ad(trx->state != TRX_STATE_NOT_STARTED); - } - } - - info = pars_info_create(); - - pars_info_add_int4_literal(info, "new_space", (lint) table->space); - pars_info_add_ull_literal(info, "old_id", table->id); - pars_info_add_ull_literal(info, "new_id", new_id); - - /* As micro-SQL does not support int4 == int8 comparisons, - old and new IDs are added again under different names as - int4 values*/ - pars_info_add_int4_literal(info, "old_id_narrow", table->id); - pars_info_add_int4_literal(info, "new_id_narrow", new_id); - - err = que_eval_sql(info, - "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLES" - " SET ID = :new_id, SPACE = :new_space\n" - " WHERE ID = :old_id;\n" - "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = :old_id;\n" - "UPDATE SYS_INDEXES" - " SET TABLE_ID = :new_id, SPACE = :new_space\n" - " WHERE TABLE_ID = :old_id;\n" - "END;\n" - , FALSE, trx); - - if (err == DB_SUCCESS && old_space != table->space) { - info = pars_info_create(); - - pars_info_add_int4_literal(info, "old_space", (lint) old_space); - - pars_info_add_int4_literal( - info, "new_space", (lint) table->space); - - err = que_eval_sql(info, - "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLESPACES" - " SET SPACE = :new_space\n" - " WHERE SPACE = :old_space;\n" - "UPDATE SYS_DATAFILES" - " SET SPACE = :new_space" - " WHERE SPACE = :old_space;\n" - "END;\n" - , FALSE, trx); - } - DBUG_EXECUTE_IF("ib_ddl_crash_before_fts_truncate", err = DB_ERROR;); - - if (err != DB_SUCCESS) { - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - trx->error_state = DB_SUCCESS; - - /* Update system table failed. Table in memory metadata - could be in an inconsistent state, mark the in-memory - table->corrupted to be true. In the long run, this should - be fixed by atomic truncate table */ - table->corrupted = true; - - ut_print_timestamp(stderr); - fputs(" InnoDB: Unable to assign a new identifier to table ", - stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n" - "InnoDB: after truncating it. Background processes" - " may corrupt the table!\n", stderr); - - /* Failed to update the table id, so drop the new - FTS auxiliary tables */ - if (has_internal_doc_id) { - ut_ad(trx->state == TRX_STATE_NOT_STARTED); - - table_id_t id = table->id; - - table->id = new_id; - - fts_drop_tables(trx, table); - - table->id = id; - - ut_ad(trx->state != TRX_STATE_NOT_STARTED); - } - - err = DB_ERROR; - } else { - /* Drop the old FTS index */ - if (has_internal_doc_id) { - ut_ad(trx->state != TRX_STATE_NOT_STARTED); - fts_drop_tables(trx, table); - ut_ad(trx->state != TRX_STATE_NOT_STARTED); - } - - DBUG_EXECUTE_IF("ib_truncate_crash_after_fts_drop", - DBUG_SUICIDE();); - - dict_table_change_id_in_cache(table, new_id); - - /* Reset the Doc ID in cache to 0 */ - if (has_internal_doc_id && table->fts->cache) { - table->fts->fts_status |= TABLE_DICT_LOCKED; - fts_update_next_doc_id(trx, table, NULL, 0); - fts_cache_clear(table->fts->cache); - fts_cache_init(table->fts->cache); - table->fts->fts_status &= ~TABLE_DICT_LOCKED; - } - } - - /* Reset auto-increment. */ - dict_table_autoinc_lock(table); - dict_table_autoinc_initialize(table, 1); - dict_table_autoinc_unlock(table); - - trx_commit_for_mysql(trx); - -funct_exit: - - if (table->memcached_sync_count == DICT_TABLE_IN_DDL) { - /* We need to set the memcached sync back to 0, unblock - memcached operationse. */ - table->memcached_sync_count = 0; - } - - row_mysql_unlock_data_dictionary(trx); - - dict_stats_update(table, DICT_STATS_EMPTY_TABLE); - - trx->op_info = ""; - - srv_wake_master_thread(); - - return(err); -} - -/*********************************************************************//** -Drops a table for MySQL. If the name of the dropped table ends in -one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", -"innodb_table_monitor", then this will also stop the printing of monitor -output by the master thread. If the data dictionary was not already locked -by the transaction, the transaction will be committed. Otherwise, the -data dictionary will remain locked. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_drop_table_for_mysql( -/*=====================*/ - const char* name, /*!< in: table name */ - trx_t* trx, /*!< in: transaction handle */ - bool drop_db,/*!< in: true=dropping whole database */ - ibool create_failed,/*!<in: TRUE=create table failed - because e.g. foreign key column - type mismatch. */ - bool nonatomic) - /*!< in: whether it is permitted - to release and reacquire dict_operation_lock */ -{ - dberr_t err; - dict_foreign_t* foreign; - dict_table_t* table; - ibool print_msg; - ulint space_id; - char* filepath = NULL; - const char* tablename_minus_db; - char* tablename = NULL; - bool ibd_file_missing; - ulint namelen; - bool locked_dictionary = false; - pars_info_t* info = NULL; - mem_heap_t* heap = NULL; - - DBUG_ENTER("row_drop_table_for_mysql"); - - DBUG_PRINT("row_drop_table_for_mysql", ("table: %s", name)); - - ut_a(name != NULL); - - /* The table name is prefixed with the database name and a '/'. - Certain table names starting with 'innodb_' have their special - meaning regardless of the database name. Thus, we need to - ignore the database name prefix in the comparisons. */ - tablename_minus_db = strchr(name, '/'); - - if (tablename_minus_db) { - tablename_minus_db++; - } else { - /* Ancillary FTS tables don't have '/' characters. */ - tablename_minus_db = name; - } - - namelen = strlen(tablename_minus_db) + 1; - - if (namelen == sizeof S_innodb_monitor - && !memcmp(tablename_minus_db, S_innodb_monitor, - sizeof S_innodb_monitor)) { - - /* Table name equals "innodb_monitor": - stop monitor prints */ - - srv_print_innodb_monitor = FALSE; - srv_print_innodb_lock_monitor = FALSE; - } else if (namelen == sizeof S_innodb_lock_monitor - && !memcmp(tablename_minus_db, S_innodb_lock_monitor, - sizeof S_innodb_lock_monitor)) { - srv_print_innodb_monitor = FALSE; - srv_print_innodb_lock_monitor = FALSE; - } else if (namelen == sizeof S_innodb_tablespace_monitor - && !memcmp(tablename_minus_db, S_innodb_tablespace_monitor, - sizeof S_innodb_tablespace_monitor)) { - - srv_print_innodb_tablespace_monitor = FALSE; - } else if (namelen == sizeof S_innodb_table_monitor - && !memcmp(tablename_minus_db, S_innodb_table_monitor, - sizeof S_innodb_table_monitor)) { - - srv_print_innodb_table_monitor = FALSE; - } - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - trx->op_info = "dropping table"; - - /* This function is called recursively via fts_drop_tables(). */ - if (trx->state == TRX_STATE_NOT_STARTED) { - trx_start_for_ddl(trx, TRX_DICT_OP_TABLE); - } - - if (trx->dict_operation_lock_mode != RW_X_LATCH) { - /* Prevent foreign key checks etc. while we are dropping the - table */ - - row_mysql_lock_data_dictionary(trx); - - locked_dictionary = true; - nonatomic = true; - } - - ut_ad(mutex_own(&(dict_sys->mutex))); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - table = dict_table_open_on_name( - name, TRUE, FALSE, - static_cast<dict_err_ignore_t>( - DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT)); - - if (!table) { - err = DB_TABLE_NOT_FOUND; - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs(" does not exist in the InnoDB internal\n" - "InnoDB: data dictionary though MySQL is" - " trying to drop it.\n" - "InnoDB: Have you copied the .frm file" - " of the table to the\n" - "InnoDB: MySQL database directory" - " from another database?\n" - "InnoDB: You can look for further help from\n" - "InnoDB: " REFMAN "innodb-troubleshooting.html\n", - stderr); - goto funct_exit; - } - - /* Turn on this drop bit before we could release the dictionary - latch */ - table->to_be_dropped = true; - - if (nonatomic) { - /* This trx did not acquire any locks on dictionary - table records yet. Thus it is safe to release and - reacquire the data dictionary latches. */ - if (table->fts) { - ut_ad(!table->fts->add_wq); - ut_ad(lock_trx_has_sys_table_locks(trx) == 0); - - row_mysql_unlock_data_dictionary(trx); - fts_optimize_remove_table(table); - row_mysql_lock_data_dictionary(trx); - } - - /* Do not bother to deal with persistent stats for temp - tables since we know temp tables do not use persistent - stats. */ - if (!dict_table_is_temporary(table)) { - dict_stats_wait_bg_to_stop_using_table( - table, trx); - } - } - - /* make sure background stats thread is not running on the table */ - ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)); - - /* Delete the link file if used. */ - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - fil_delete_link_file(name); - } - - if (!dict_table_is_temporary(table)) { - - dict_stats_recalc_pool_del(table); - dict_stats_defrag_pool_del(table, NULL); - btr_defragment_remove_table(table); - - /* Remove stats for this table and all of its indexes from the - persistent storage if it exists and if there are stats for this - table in there. This function creates its own trx and commits - it. */ - char errstr[1024]; - err = dict_stats_drop_table(name, errstr, sizeof(errstr)); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_WARN, "%s", errstr); - } - } - - /* Move the table the the non-LRU list so that it isn't - considered for eviction. */ - - if (table->can_be_evicted) { - dict_table_move_from_lru_to_non_lru(table); - } - - dict_table_close(table, TRUE, FALSE); - - /* Check if the table is referenced by foreign key constraints from - some other table (not the table itself) */ - - if (!srv_read_only_mode && trx->check_foreigns) { - - for (dict_foreign_set::iterator it - = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - - const bool ref_ok = drop_db - && dict_tables_have_same_db( - name, - foreign->foreign_table_name_lookup); - - /* We should allow dropping a referenced table if creating - that referenced table has failed for some reason. For example - if referenced table is created but it column types that are - referenced do not match. */ - if (foreign->foreign_table != table && - !create_failed && !ref_ok) { - - FILE* ef = dict_foreign_err_file; - - /* We only allow dropping a referenced table - if FOREIGN_KEY_CHECKS is set to 0 */ - - err = DB_CANNOT_DROP_CONSTRAINT; - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - - fputs(" Cannot drop table ", ef); - ut_print_name(ef, trx, TRUE, name); - fputs("\n" - "because it is referenced by ", ef); - ut_print_name(ef, trx, TRUE, - foreign->foreign_table_name); - putc('\n', ef); - mutex_exit(&dict_foreign_err_mutex); - - goto funct_exit; - } - } - } - - - DBUG_EXECUTE_IF("row_drop_table_add_to_background", - row_add_table_to_background_drop_list(table->name); - err = DB_SUCCESS; - goto funct_exit; - ); - - /* TODO: could we replace the counter n_foreign_key_checks_running - with lock checks on the table? Acquire here an exclusive lock on the - table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that - they can cope with the table having been dropped here? Foreign key - checks take an IS or IX lock on the table. */ - - if (table->n_foreign_key_checks_running > 0) { - - const char* save_tablename = table->name; - ibool added; - - added = row_add_table_to_background_drop_list(save_tablename); - - if (added) { - ut_print_timestamp(stderr); - fputs(" InnoDB: You are trying to drop table ", - stderr); - ut_print_name(stderr, trx, TRUE, save_tablename); - fputs("\n" - "InnoDB: though there is a" - " foreign key check running on it.\n" - "InnoDB: Adding the table to" - " the background drop queue.\n", - stderr); - - /* We return DB_SUCCESS to MySQL though the drop will - happen lazily later */ - - err = DB_SUCCESS; - } else { - /* The table is already in the background drop list */ - err = DB_ERROR; - } - - goto funct_exit; - } - - /* Remove all locks that are on the table or its records, if there - are no refernces to the table but it has record locks, we release - the record locks unconditionally. One use case is: - - CREATE TABLE t2 (PRIMARY KEY (a)) SELECT * FROM t1; - - If after the user transaction has done the SELECT and there is a - problem in completing the CREATE TABLE operation, MySQL will drop - the table. InnoDB will create a new background transaction to do the - actual drop, the trx instance that is passed to this function. To - preserve existing behaviour we remove the locks but ideally we - shouldn't have to. There should never be record locks on a table - that is going to be dropped. */ - - /* Wait on background threads to stop using table */ - fil_wait_crypt_bg_threads(table); - - if (table->n_ref_count == 0) { - lock_remove_all_on_table(table, TRUE); - ut_a(table->n_rec_locks == 0); - } else if (table->n_ref_count > 0 || table->n_rec_locks > 0) { - ibool added; - - added = row_add_table_to_background_drop_list(table->name); - - if (added) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: MySQL is" - " trying to drop table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n" - "InnoDB: though there are still" - " open handles to it.\n" - "InnoDB: Adding the table to the" - " background drop queue.\n", - stderr); - - /* We return DB_SUCCESS to MySQL though the drop will - happen lazily later */ - err = DB_SUCCESS; - } else { - /* The table is already in the background drop list */ - err = DB_ERROR; - } - - goto funct_exit; - } - - /* The "to_be_dropped" marks table that is to be dropped, but - has not been dropped, instead, was put in the background drop - list due to being used by concurrent DML operations. Clear it - here since there are no longer any concurrent activities on it, - and it is free to be dropped */ - table->to_be_dropped = false; - - /* If we get this far then the table to be dropped must not have - any table or record locks on it. */ - - ut_a(!lock_table_has_locks(table)); - - switch (trx_get_dict_operation(trx)) { - case TRX_DICT_OP_NONE: - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - trx->table_id = table->id; - case TRX_DICT_OP_TABLE: - break; - case TRX_DICT_OP_INDEX: - /* If the transaction was previously flagged as - TRX_DICT_OP_INDEX, we should be dropping auxiliary - tables for full-text indexes. */ - ut_ad(strstr(table->name, "/FTS_") != NULL); - } - - /* Mark all indexes unavailable in the data dictionary cache - before starting to drop the table. */ - - unsigned* page_no; - unsigned* page_nos; - heap = mem_heap_create( - 200 + UT_LIST_GET_LEN(table->indexes) * sizeof *page_nos); - tablename = mem_heap_strdup(heap, name); - - page_no = page_nos = static_cast<unsigned*>( - mem_heap_alloc( - heap, - UT_LIST_GET_LEN(table->indexes) * sizeof *page_no)); - - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - rw_lock_x_lock(dict_index_get_lock(index)); - /* Save the page numbers so that we can restore them - if the operation fails. */ - *page_no++ = index->page; - /* Mark the index unusable. */ - index->page = FIL_NULL; - rw_lock_x_unlock(dict_index_get_lock(index)); - } - - /* We use the private SQL parser of Innobase to generate the - query graphs needed in deleting the dictionary data from system - tables in Innobase. Deleting a row from SYS_INDEXES table also - frees the file segments of the B-tree associated with the index. */ - - info = pars_info_create(); - - pars_info_add_str_literal(info, "table_name", name); - - err = que_eval_sql(info, - "PROCEDURE DROP_TABLE_PROC () IS\n" - "sys_foreign_id CHAR;\n" - "table_id CHAR;\n" - "index_id CHAR;\n" - "foreign_id CHAR;\n" - "space_id INT;\n" - "found INT;\n" - - "DECLARE CURSOR cur_fk IS\n" - "SELECT ID FROM SYS_FOREIGN\n" - "WHERE FOR_NAME = :table_name\n" - "AND TO_BINARY(FOR_NAME)\n" - " = TO_BINARY(:table_name)\n" - "LOCK IN SHARE MODE;\n" - - "DECLARE CURSOR cur_idx IS\n" - "SELECT ID FROM SYS_INDEXES\n" - "WHERE TABLE_ID = table_id\n" - "LOCK IN SHARE MODE;\n" - - "BEGIN\n" - "SELECT ID INTO table_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = :table_name\n" - "LOCK IN SHARE MODE;\n" - "IF (SQL % NOTFOUND) THEN\n" - " RETURN;\n" - "END IF;\n" - "SELECT SPACE INTO space_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = :table_name;\n" - "IF (SQL % NOTFOUND) THEN\n" - " RETURN;\n" - "END IF;\n" - "found := 1;\n" - "SELECT ID INTO sys_foreign_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = 'SYS_FOREIGN'\n" - "LOCK IN SHARE MODE;\n" - "IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - "END IF;\n" - "IF (:table_name = 'SYS_FOREIGN') THEN\n" - " found := 0;\n" - "END IF;\n" - "IF (:table_name = 'SYS_FOREIGN_COLS') THEN\n" - " found := 0;\n" - "END IF;\n" - "OPEN cur_fk;\n" - "WHILE found = 1 LOOP\n" - " FETCH cur_fk INTO foreign_id;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FOREIGN_COLS\n" - " WHERE ID = foreign_id;\n" - " DELETE FROM SYS_FOREIGN\n" - " WHERE ID = foreign_id;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE cur_fk;\n" - "found := 1;\n" - "OPEN cur_idx;\n" - "WHILE found = 1 LOOP\n" - " FETCH cur_idx INTO index_id;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FIELDS\n" - " WHERE INDEX_ID = index_id;\n" - " DELETE FROM SYS_INDEXES\n" - " WHERE ID = index_id\n" - " AND TABLE_ID = table_id;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE cur_idx;\n" - "DELETE FROM SYS_TABLESPACES\n" - "WHERE SPACE = space_id;\n" - "DELETE FROM SYS_DATAFILES\n" - "WHERE SPACE = space_id;\n" - "DELETE FROM SYS_COLUMNS\n" - "WHERE TABLE_ID = table_id;\n" - "DELETE FROM SYS_TABLES\n" - "WHERE NAME = :table_name;\n" - "END;\n" - , FALSE, trx); - - switch (err) { - ibool is_temp; - ulint table_flags; - - case DB_SUCCESS: - /* Clone the name, in case it has been allocated - from table->heap, which will be freed by - dict_table_remove_from_cache(table) below. */ - space_id = table->space; - ibd_file_missing = table->file_unreadable; - - table_flags = table->flags; - is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY); - - /* If there is a temp path then the temp flag is set. - However, during recovery or reloading the table object - after eviction from data dictionary cache, we might - have a temp flag but not know the temp path */ - ut_a(table->dir_path_of_temp_table == NULL || is_temp); - if (dict_table_is_discarded(table) - || (!table->is_readable() - && fil_space_get(table->space) == NULL)) { - /* Do not attempt to drop known-to-be-missing - tablespaces. */ - space_id = 0; - } - - /* We do not allow temporary tables with a remote path. */ - ut_a(!(is_temp && DICT_TF_HAS_DATA_DIR(table_flags))); - - if (space_id && DICT_TF_HAS_DATA_DIR(table_flags)) { - dict_get_and_save_data_dir_path(table, true); - ut_a(table->data_dir_path); - - filepath = os_file_make_remote_pathname( - table->data_dir_path, table->name, "ibd"); - } else if (table->dir_path_of_temp_table) { - filepath = fil_make_ibd_name( - table->dir_path_of_temp_table, true); - } else { - filepath = fil_make_ibd_name(tablename, false); - } - - if (dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - ut_ad(table->n_ref_count == 0); - ut_ad(trx->state != TRX_STATE_NOT_STARTED); - err = fts_drop_tables(trx, table); - - if (err != DB_SUCCESS) { - ut_print_timestamp(stderr); - fprintf(stderr," InnoDB: Error: (%s) not " - "able to remove ancillary FTS tables " - "for table ", ut_strerr(err)); - ut_print_name(stderr, trx, TRUE, tablename); - fputs("\n", stderr); - - goto funct_exit; - } - } - - /* The table->fts flag can be set on the table for which - the cluster index is being rebuilt. Such table might not have - DICT_TF2_FTS flag set. So keep this out of above - dict_table_has_fts_index condition */ - if (table->fts) { - /* Need to set TABLE_DICT_LOCKED bit, since - fts_que_graph_free_check_lock would try to acquire - dict mutex lock */ - table->fts->fts_status |= TABLE_DICT_LOCKED; - - fts_free(table); - } - - dict_table_remove_from_cache(table); - - if (dict_load_table(tablename, TRUE, - DICT_ERR_IGNORE_NONE) != NULL) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: not able to remove table ", - stderr); - ut_print_name(stderr, trx, TRUE, tablename); - fputs(" from the dictionary cache!\n", stderr); - err = DB_ERROR; - } - - /* Do not drop possible .ibd tablespace if something went - wrong: we do not want to delete valuable data of the user */ - - /* Don't spam the log if we can't find the tablespace of - a temp table or if the tablesace has been discarded. */ - print_msg = !(is_temp || ibd_file_missing); - - if (err == DB_SUCCESS && space_id > TRX_SYS_SPACE) { - if (!is_temp - && !fil_space_for_table_exists_in_mem( - space_id, tablename, - print_msg, IS_XTRABACKUP() && print_msg, false, NULL, 0, - table_flags)) { - - /* This might happen if we are dropping a - discarded tablespace */ - err = DB_SUCCESS; - - if (print_msg) { - char msg_tablename[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - msg_tablename, sizeof(tablename), - tablename, FALSE); - - ib_logf(IB_LOG_LEVEL_INFO, - "Removed the table %s from " - "InnoDB's data dictionary", - msg_tablename); - } - - /* Force a delete of any discarded - or temporary files. */ - - fil_delete_file(filepath); - - } else if (fil_delete_tablespace( - space_id, - BUF_REMOVE_FLUSH_NO_WRITE) - != DB_SUCCESS) { - fprintf(stderr, - "InnoDB: We removed now the InnoDB" - " internal data dictionary entry\n" - "InnoDB: of table "); - ut_print_name(stderr, trx, TRUE, tablename); - fprintf(stderr, ".\n"); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: not able to" - " delete tablespace %lu of table ", - (ulong) space_id); - ut_print_name(stderr, trx, TRUE, tablename); - fputs("!\n", stderr); - err = DB_ERROR; - } - } - - break; - - case DB_OUT_OF_FILE_SPACE: - err = DB_MUST_GET_MORE_FILE_SPACE; - - trx->error_state = err; - row_mysql_handle_errors(&err, trx, NULL, NULL); - - /* raise error */ - ut_error; - break; - - case DB_TOO_MANY_CONCURRENT_TRXS: - /* Cannot even find a free slot for the - the undo log. We can directly exit here - and return the DB_TOO_MANY_CONCURRENT_TRXS - error. */ - - default: - /* This is some error we do not expect. Print - the error number and rollback transaction */ - ut_print_timestamp(stderr); - - fprintf(stderr, "InnoDB: unknown error code %lu" - " while dropping table:", (ulong) err); - ut_print_name(stderr, trx, TRUE, tablename); - fprintf(stderr, ".\n"); - - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - trx->error_state = DB_SUCCESS; - - /* Mark all indexes available in the data dictionary - cache again. */ - - page_no = page_nos; - - for (dict_index_t* index = dict_table_get_first_index(table); - index != NULL; - index = dict_table_get_next_index(index)) { - rw_lock_x_lock(dict_index_get_lock(index)); - ut_a(index->page == FIL_NULL); - index->page = *page_no++; - rw_lock_x_unlock(dict_index_get_lock(index)); - } - } - -funct_exit: - if (heap) { - mem_heap_free(heap); - } - if (filepath) { - mem_free(filepath); - } - - if (locked_dictionary) { - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - } - - trx->op_info = ""; - - srv_wake_master_thread(); - - DBUG_RETURN(err); -} - -/*********************************************************************//** -Drop all temporary tables during crash recovery. */ -UNIV_INTERN -void -row_mysql_drop_temp_tables(void) -/*============================*/ -{ - trx_t* trx; - btr_pcur_t pcur; - mtr_t mtr; - mem_heap_t* heap; - - trx = trx_allocate_for_background(); - trx->op_info = "dropping temporary tables"; - row_mysql_lock_data_dictionary(trx); - - heap = mem_heap_create(200); - - mtr_start(&mtr); - - btr_pcur_open_at_index_side( - true, - dict_table_get_first_index(dict_sys->sys_tables), - BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - - for (;;) { - const rec_t* rec; - const byte* field; - ulint len; - const char* table_name; - dict_table_t* table; - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - if (!btr_pcur_is_on_user_rec(&pcur)) { - break; - } - - /* The high order bit of N_COLS is set unless - ROW_FORMAT=REDUNDANT. */ - rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__N_COLS, &len); - if (len != 4 - || !(mach_read_from_4(field) & DICT_N_COLS_COMPACT)) { - continue; - } - - /* Older versions of InnoDB, which only supported tables - in ROW_FORMAT=REDUNDANT could write garbage to - SYS_TABLES.MIX_LEN, where we now store the is_temp flag. - Above, we assumed is_temp=0 if ROW_FORMAT=REDUNDANT. */ - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len); - if (len != 4 - || !(mach_read_from_4(field) & DICT_TF2_TEMPORARY)) { - continue; - } - - /* This is a temporary table. */ - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - if (len == UNIV_SQL_NULL || len == 0) { - /* Corrupted SYS_TABLES.NAME */ - continue; - } - - table_name = mem_heap_strdupl(heap, (const char*) field, len); - - btr_pcur_store_position(&pcur, &mtr); - btr_pcur_commit_specify_mtr(&pcur, &mtr); - - table = dict_table_get_low(table_name); - - if (table) { - row_drop_table_for_mysql(table_name, trx, FALSE, FALSE); - trx_commit_for_mysql(trx); - } - - mtr_start(&mtr); - btr_pcur_restore_position(BTR_SEARCH_LEAF, - &pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - row_mysql_unlock_data_dictionary(trx); - trx_free_for_background(trx); -} - -/*******************************************************************//** -Drop all foreign keys in a database, see Bug#18942. -Called at the end of row_drop_database_for_mysql(). -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -drop_all_foreign_keys_in_db( -/*========================*/ - const char* name, /*!< in: database name which ends to '/' */ - trx_t* trx) /*!< in: transaction handle */ -{ - pars_info_t* pinfo; - dberr_t err; - - ut_a(name[strlen(name) - 1] == '/'); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "dbname", name); - -/** true if for_name is not prefixed with dbname */ -#define TABLE_NOT_IN_THIS_DB \ -"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname" - - err = que_eval_sql(pinfo, - "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n" - "foreign_id CHAR;\n" - "for_name CHAR;\n" - "found INT;\n" - "DECLARE CURSOR cur IS\n" - "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n" - "WHERE FOR_NAME >= :dbname\n" - "LOCK IN SHARE MODE\n" - "ORDER BY FOR_NAME;\n" - "BEGIN\n" - "found := 1;\n" - "OPEN cur;\n" - "WHILE found = 1 LOOP\n" - " FETCH cur INTO foreign_id, for_name;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n" - " found := 0;\n" - " ELSIF (1=1) THEN\n" - " DELETE FROM SYS_FOREIGN_COLS\n" - " WHERE ID = foreign_id;\n" - " DELETE FROM SYS_FOREIGN\n" - " WHERE ID = foreign_id;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE cur;\n" - "COMMIT WORK;\n" - "END;\n", - FALSE, /* do not reserve dict mutex, - we are already holding it */ - trx); - - return(err); -} - -/*********************************************************************//** -Drops a database for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_drop_database_for_mysql( -/*========================*/ - const char* name, /*!< in: database name which ends to '/' */ - trx_t* trx) /*!< in: transaction handle */ -{ - dict_table_t* table; - char* table_name; - dberr_t err = DB_SUCCESS; - ulint namelen = strlen(name); - - ut_a(name != NULL); - ut_a(name[namelen - 1] == '/'); - - trx->op_info = "dropping database"; - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - trx_start_if_not_started_xa(trx); -loop: - row_mysql_lock_data_dictionary(trx); - - while ((table_name = dict_get_first_table_name_in_db(name))) { - /* Drop parent table if it is a fts aux table, to - avoid accessing dropped fts aux tables in information - scheam when parent table still exists. - Note: Drop parent table will drop fts aux tables. */ - char* parent_table_name; - parent_table_name = fts_get_parent_table_name( - table_name, strlen(table_name)); - - if (parent_table_name != NULL) { - mem_free(table_name); - table_name = parent_table_name; - } - - ut_a(memcmp(table_name, name, namelen) == 0); - - table = dict_table_open_on_name( - table_name, TRUE, FALSE, static_cast<dict_err_ignore_t>( - DICT_ERR_IGNORE_INDEX_ROOT - | DICT_ERR_IGNORE_CORRUPT)); - - if (!table) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot load table %s from InnoDB internal " - "data dictionary during drop database", - table_name); - mem_free(table_name); - err = DB_TABLE_NOT_FOUND; - break; - - } - - if (!row_is_mysql_tmp_table_name(table->name)) { - /* There could be orphan temp tables left from - interrupted alter table. Leave them, and handle - the rest.*/ - if (table->can_be_evicted) { - ib_logf(IB_LOG_LEVEL_WARN, - "Orphan table encountered during " - "DROP DATABASE. This is possible if " - "'%s.frm' was lost.", table->name); - } - - if (!table->is_readable() - && fil_space_get(table->space) == NULL) { - ib_logf(IB_LOG_LEVEL_WARN, - "Missing %s.ibd file for table %s.", - table->name, table->name); - } - } - - dict_table_close(table, TRUE, FALSE); - - /* The dict_table_t object must not be accessed before - dict_table_open() or after dict_table_close(). But this is OK - if we are holding, the dict_sys->mutex. */ - ut_ad(mutex_own(&dict_sys->mutex)); - - /* Wait until MySQL does not have any queries running on - the table */ - - if (table->n_ref_count > 0) { - row_mysql_unlock_data_dictionary(trx); - - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: MySQL is trying to" - " drop database ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs("\n" - "InnoDB: though there are still" - " open handles to table ", stderr); - ut_print_name(stderr, trx, TRUE, table_name); - fputs(".\n", stderr); - - os_thread_sleep(1000000); - - mem_free(table_name); - - goto loop; - } - - err = row_drop_table_for_mysql(table_name, trx, TRUE, FALSE); - trx_commit_for_mysql(trx); - - if (err != DB_SUCCESS) { - fputs("InnoDB: DROP DATABASE ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fprintf(stderr, " failed with error (%s) for table ", - ut_strerr(err)); - ut_print_name(stderr, trx, TRUE, table_name); - putc('\n', stderr); - mem_free(table_name); - break; - } - - mem_free(table_name); - } - - if (err == DB_SUCCESS) { - /* after dropping all tables try to drop all leftover - foreign keys in case orphaned ones exist */ - err = drop_all_foreign_keys_in_db(name, trx); - - if (err != DB_SUCCESS) { - fputs("InnoDB: DROP DATABASE ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fprintf(stderr, " failed with error %d while " - "dropping all foreign keys", err); - } - } - - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Checks if a table name contains the string "/#sql" which denotes temporary -tables in MySQL. -@return true if temporary table */ -UNIV_INTERN MY_ATTRIBUTE((warn_unused_result)) -bool -row_is_mysql_tmp_table_name( -/*========================*/ - const char* name) /*!< in: table name in the form - 'database/tablename' */ -{ - return(strstr(name, "/#sql") != NULL); - /* return(strstr(name, "/@0023sql") != NULL); */ -} - -/****************************************************************//** -Delete a single constraint. -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_delete_constraint_low( -/*======================*/ - const char* id, /*!< in: constraint id */ - trx_t* trx) /*!< in: transaction handle */ -{ - pars_info_t* info = pars_info_create(); - - pars_info_add_str_literal(info, "id", id); - - return(que_eval_sql(info, - "PROCEDURE DELETE_CONSTRAINT () IS\n" - "BEGIN\n" - "DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n" - "DELETE FROM SYS_FOREIGN WHERE ID = :id;\n" - "END;\n" - , FALSE, trx)); -} - -/****************************************************************//** -Delete a single constraint. -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_delete_constraint( -/*==================*/ - const char* id, /*!< in: constraint id */ - const char* database_name, /*!< in: database name, with the - trailing '/' */ - mem_heap_t* heap, /*!< in: memory heap */ - trx_t* trx) /*!< in: transaction handle */ -{ - dberr_t err; - - /* New format constraints have ids <databasename>/<constraintname>. */ - err = row_delete_constraint_low( - mem_heap_strcat(heap, database_name, id), trx); - - if ((err == DB_SUCCESS) && !strchr(id, '/')) { - /* Old format < 4.0.18 constraints have constraint ids - NUMBER_NUMBER. We only try deleting them if the - constraint name does not contain a '/' character, otherwise - deleting a new format constraint named 'foo/bar' from - database 'baz' would remove constraint 'bar' from database - 'foo', if it existed. */ - - err = row_delete_constraint_low(id, trx); - } - - return(err); -} - -/*********************************************************************//** -Renames a table for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -row_rename_table_for_mysql( -/*=======================*/ - const char* old_name, /*!< in: old table name */ - const char* new_name, /*!< in: new table name */ - trx_t* trx, /*!< in/out: transaction */ - bool commit) /*!< in: whether to commit trx */ -{ - dict_table_t* table = NULL; - ibool dict_locked = FALSE; - dberr_t err = DB_ERROR; - mem_heap_t* heap = NULL; - const char** constraints_to_drop = NULL; - ulint n_constraints_to_drop = 0; - ibool old_is_tmp, new_is_tmp; - pars_info_t* info = NULL; - int retry; - bool aux_fts_rename = false; - char* is_part = NULL; - - ut_a(old_name != NULL); - ut_a(new_name != NULL); - ut_ad(trx->state == TRX_STATE_ACTIVE); - - if (srv_force_recovery) { - fputs("InnoDB: innodb_force_recovery is on: we do not allow\n" - "InnoDB: database modifications by the user. Shut down\n" - "InnoDB: mysqld and edit my.cnf so that" - "InnoDB: innodb_force_... is removed.\n", - stderr); - - err = DB_READ_ONLY; - goto funct_exit; - - } else if (row_mysql_is_system_table(new_name)) { - - fprintf(stderr, - "InnoDB: Error: trying to create a MySQL" - " system table %s of type InnoDB.\n" - "InnoDB: MySQL system tables must be" - " of the MyISAM type!\n", - new_name); - - goto funct_exit; - } - - trx->op_info = "renaming table"; - - old_is_tmp = row_is_mysql_tmp_table_name(old_name); - new_is_tmp = row_is_mysql_tmp_table_name(new_name); - - dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH; - - table = dict_table_open_on_name(old_name, dict_locked, FALSE, - DICT_ERR_IGNORE_NONE); - - /* We look for pattern #P# to see if the table is partitioned - MySQL table. */ -#ifdef __WIN__ - is_part = strstr((char *)old_name, (char *)"#p#"); -#else - is_part = strstr((char *)old_name, (char *)"#P#"); -#endif /* __WIN__ */ - - /* MySQL partition engine hard codes the file name - separator as "#P#". The text case is fixed even if - lower_case_table_names is set to 1 or 2. This is true - for sub-partition names as well. InnoDB always - normalises file names to lower case on Windows, this - can potentially cause problems when copying/moving - tables between platforms. - - 1) If boot against an installation from Windows - platform, then its partition table name could - be all be in lower case in system tables. So we - will need to check lower case name when load table. - - 2) If we boot an installation from other case - sensitive platform in Windows, we might need to - check the existence of table name without lowering - case them in the system table. */ - if (!table && - is_part && - innobase_get_lower_case_table_names() == 1) { - char par_case_name[MAX_FULL_NAME_LEN + 1]; -#ifndef __WIN__ - /* Check for the table using lower - case name, including the partition - separator "P" */ - memcpy(par_case_name, old_name, - strlen(old_name)); - par_case_name[strlen(old_name)] = 0; - innobase_casedn_str(par_case_name); -#else - /* On Windows platfrom, check - whether there exists table name in - system table whose name is - not being normalized to lower case */ - normalize_table_name_low( - par_case_name, old_name, FALSE); -#endif - table = dict_table_open_on_name(par_case_name, dict_locked, FALSE, - DICT_ERR_IGNORE_NONE); - } - - if (!table) { - err = DB_TABLE_NOT_FOUND; - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, old_name); - fputs(" does not exist in the InnoDB internal\n" - "InnoDB: data dictionary though MySQL is" - " trying to rename the table.\n" - "InnoDB: Have you copied the .frm file" - " of the table to the\n" - "InnoDB: MySQL database directory" - " from another database?\n" - "InnoDB: You can look for further help from\n" - "InnoDB: " REFMAN "innodb-troubleshooting.html\n", - stderr); - goto funct_exit; - - } else if (!table->is_readable() - && fil_space_get(table->space) == NULL - && !dict_table_is_discarded(table)) { - - err = DB_TABLE_NOT_FOUND; - - ib_logf(IB_LOG_LEVEL_ERROR, - "Table %s does not have an .ibd file in the database " - "directory. See " REFMAN "innodb-troubleshooting.html", - old_name); - - goto funct_exit; - - } else if (new_is_tmp) { - /* MySQL is doing an ALTER TABLE command and it renames the - original table to a temporary table name. We want to preserve - the original foreign key constraint definitions despite the - name change. An exception is those constraints for which - the ALTER TABLE contained DROP FOREIGN KEY <foreign key id>.*/ - - heap = mem_heap_create(100); - - err = dict_foreign_parse_drop_constraints( - heap, trx, table, &n_constraints_to_drop, - &constraints_to_drop); - - if (err != DB_SUCCESS) { - goto funct_exit; - } - } - - /* Is a foreign key check running on this table? */ - for (retry = 0; retry < 100 - && table->n_foreign_key_checks_running > 0; ++retry) { - row_mysql_unlock_data_dictionary(trx); - os_thread_yield(); - row_mysql_lock_data_dictionary(trx); - } - - if (table->n_foreign_key_checks_running > 0) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: in ALTER TABLE ", stderr); - ut_print_name(stderr, trx, TRUE, old_name); - fprintf(stderr, "\n" - "InnoDB: a FOREIGN KEY check is running.\n" - "InnoDB: Cannot rename table.\n"); - err = DB_TABLE_IN_FK_CHECK; - goto funct_exit; - } - - /* We use the private SQL parser of Innobase to generate the query - graphs needed in updating the dictionary data from system tables. */ - - info = pars_info_create(); - - pars_info_add_str_literal(info, "new_table_name", new_name); - pars_info_add_str_literal(info, "old_table_name", old_name); - - err = que_eval_sql(info, - "PROCEDURE RENAME_TABLE () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLES" - " SET NAME = :new_table_name\n" - " WHERE NAME = :old_table_name;\n" - "END;\n" - , FALSE, trx); - - /* SYS_TABLESPACES and SYS_DATAFILES track non-system tablespaces - which have space IDs > 0. */ - if (err == DB_SUCCESS - && table->space != TRX_SYS_SPACE - && table->is_readable()) { - /* Make a new pathname to update SYS_DATAFILES. */ - char* new_path = row_make_new_pathname(table, new_name); - - info = pars_info_create(); - - pars_info_add_str_literal(info, "new_table_name", new_name); - pars_info_add_str_literal(info, "new_path_name", new_path); - pars_info_add_int4_literal(info, "space_id", table->space); - - err = que_eval_sql(info, - "PROCEDURE RENAME_SPACE () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLESPACES" - " SET NAME = :new_table_name\n" - " WHERE SPACE = :space_id;\n" - "UPDATE SYS_DATAFILES" - " SET PATH = :new_path_name\n" - " WHERE SPACE = :space_id;\n" - "END;\n" - , FALSE, trx); - - mem_free(new_path); - } - if (err != DB_SUCCESS) { - goto end; - } - - if (!new_is_tmp) { - /* Rename all constraints. */ - char new_table_name[MAX_TABLE_NAME_LEN] = ""; - char old_table_utf8[MAX_TABLE_NAME_LEN] = ""; - uint errors = 0; - - strncpy(old_table_utf8, old_name, MAX_TABLE_NAME_LEN); - innobase_convert_to_system_charset( - strchr(old_table_utf8, '/') + 1, - strchr(old_name, '/') +1, - MAX_TABLE_NAME_LEN, &errors); - - if (errors) { - /* Table name could not be converted from charset - my_charset_filename to UTF-8. This means that the - table name is already in UTF-8 (#mysql#50). */ - strncpy(old_table_utf8, old_name, MAX_TABLE_NAME_LEN); - } - - info = pars_info_create(); - - pars_info_add_str_literal(info, "new_table_name", new_name); - pars_info_add_str_literal(info, "old_table_name", old_name); - pars_info_add_str_literal(info, "old_table_name_utf8", - old_table_utf8); - - strncpy(new_table_name, new_name, MAX_TABLE_NAME_LEN); - innobase_convert_to_system_charset( - strchr(new_table_name, '/') + 1, - strchr(new_name, '/') +1, - MAX_TABLE_NAME_LEN, &errors); - - if (errors) { - /* Table name could not be converted from charset - my_charset_filename to UTF-8. This means that the - table name is already in UTF-8 (#mysql#50). */ - strncpy(new_table_name, new_name, MAX_TABLE_NAME_LEN); - } - - pars_info_add_str_literal(info, "new_table_utf8", new_table_name); - - err = que_eval_sql( - info, - "PROCEDURE RENAME_CONSTRAINT_IDS () IS\n" - "gen_constr_prefix CHAR;\n" - "new_db_name CHAR;\n" - "foreign_id CHAR;\n" - "new_foreign_id CHAR;\n" - "old_db_name_len INT;\n" - "old_t_name_len INT;\n" - "new_db_name_len INT;\n" - "id_len INT;\n" - "offset INT;\n" - "found INT;\n" - "BEGIN\n" - "found := 1;\n" - "old_db_name_len := INSTR(:old_table_name, '/')-1;\n" - "new_db_name_len := INSTR(:new_table_name, '/')-1;\n" - "new_db_name := SUBSTR(:new_table_name, 0,\n" - " new_db_name_len);\n" - "old_t_name_len := LENGTH(:old_table_name);\n" - "gen_constr_prefix := CONCAT(:old_table_name_utf8,\n" - " '_ibfk_');\n" - "WHILE found = 1 LOOP\n" - " SELECT ID INTO foreign_id\n" - " FROM SYS_FOREIGN\n" - " WHERE FOR_NAME = :old_table_name\n" - " AND TO_BINARY(FOR_NAME)\n" - " = TO_BINARY(:old_table_name)\n" - " LOCK IN SHARE MODE;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " UPDATE SYS_FOREIGN\n" - " SET FOR_NAME = :new_table_name\n" - " WHERE ID = foreign_id;\n" - " id_len := LENGTH(foreign_id);\n" - " IF (INSTR(foreign_id, '/') > 0) THEN\n" - " IF (INSTR(foreign_id,\n" - " gen_constr_prefix) > 0)\n" - " THEN\n" - " offset := INSTR(foreign_id, '_ibfk_') - 1;\n" - " new_foreign_id :=\n" - " CONCAT(:new_table_utf8,\n" - " SUBSTR(foreign_id, offset,\n" - " id_len - offset));\n" - " ELSE\n" - " new_foreign_id :=\n" - " CONCAT(new_db_name,\n" - " SUBSTR(foreign_id,\n" - " old_db_name_len,\n" - " id_len - old_db_name_len));\n" - " END IF;\n" - " UPDATE SYS_FOREIGN\n" - " SET ID = new_foreign_id\n" - " WHERE ID = foreign_id;\n" - " UPDATE SYS_FOREIGN_COLS\n" - " SET ID = new_foreign_id\n" - " WHERE ID = foreign_id;\n" - " END IF;\n" - " END IF;\n" - "END LOOP;\n" - "UPDATE SYS_FOREIGN SET REF_NAME = :new_table_name\n" - "WHERE REF_NAME = :old_table_name\n" - " AND TO_BINARY(REF_NAME)\n" - " = TO_BINARY(:old_table_name);\n" - "END;\n" - , FALSE, trx); - - } else if (n_constraints_to_drop > 0) { - /* Drop some constraints of tmp tables. */ - - ulint db_name_len = dict_get_db_name_len(old_name) + 1; - char* db_name = mem_heap_strdupl(heap, old_name, - db_name_len); - ulint i; - - for (i = 0; i < n_constraints_to_drop; i++) { - err = row_delete_constraint(constraints_to_drop[i], - db_name, heap, trx); - - if (err != DB_SUCCESS) { - break; - } - } - } - - if (dict_table_has_fts_index(table) - && !dict_tables_have_same_db(old_name, new_name)) { - err = fts_rename_aux_tables(table, new_name, trx); - if (err != DB_TABLE_NOT_FOUND) { - aux_fts_rename = true; - } - } - -end: - if (err != DB_SUCCESS) { - if (err == DB_DUPLICATE_KEY) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error; possible reasons:\n" - "InnoDB: 1) Table rename would cause" - " two FOREIGN KEY constraints\n" - "InnoDB: to have the same internal name" - " in case-insensitive comparison.\n" - "InnoDB: 2) table ", stderr); - ut_print_name(stderr, trx, TRUE, new_name); - fputs(" exists in the InnoDB internal data\n" - "InnoDB: dictionary though MySQL is" - " trying to rename table ", stderr); - ut_print_name(stderr, trx, TRUE, old_name); - fputs(" to it.\n" - "InnoDB: Have you deleted the .frm file" - " and not used DROP TABLE?\n" - "InnoDB: You can look for further help from\n" - "InnoDB: " REFMAN "innodb-troubleshooting.html\n" - "InnoDB: If table ", stderr); - ut_print_name(stderr, trx, TRUE, new_name); - fputs(" is a temporary table #sql..., then" - " it can be that\n" - "InnoDB: there are still queries running" - " on the table, and it will be\n" - "InnoDB: dropped automatically when" - " the queries end.\n" - "InnoDB: You can drop the orphaned table" - " inside InnoDB by\n" - "InnoDB: creating an InnoDB table with" - " the same name in another\n" - "InnoDB: database and copying the .frm file" - " to the current database.\n" - "InnoDB: Then MySQL thinks the table exists," - " and DROP TABLE will\n" - "InnoDB: succeed.\n", stderr); - } - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - trx->error_state = DB_SUCCESS; - } else { - /* The following call will also rename the .ibd data file if - the table is stored in a single-table tablespace */ - - err = dict_table_rename_in_cache( - table, new_name, !new_is_tmp); - if (err != DB_SUCCESS) { - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - trx->error_state = DB_SUCCESS; - goto funct_exit; - } - - /* We only want to switch off some of the type checking in - an ALTER, not in a RENAME. */ - - err = dict_load_foreigns( - new_name, NULL, - false, !old_is_tmp || trx->check_foreigns, - DICT_ERR_IGNORE_NONE); - - if (err != DB_SUCCESS) { - ut_print_timestamp(stderr); - - if (old_is_tmp) { - fputs(" InnoDB: Error: in ALTER TABLE ", - stderr); - ut_print_name(stderr, trx, TRUE, new_name); - fputs("\n" - "InnoDB: has or is referenced" - " in foreign key constraints\n" - "InnoDB: which are not compatible" - " with the new table definition.\n", - stderr); - } else { - fputs(" InnoDB: Error: in RENAME TABLE" - " table ", - stderr); - ut_print_name(stderr, trx, TRUE, new_name); - fputs("\n" - "InnoDB: is referenced in" - " foreign key constraints\n" - "InnoDB: which are not compatible" - " with the new table definition.\n", - stderr); - } - - ut_a(DB_SUCCESS == dict_table_rename_in_cache( - table, old_name, FALSE)); - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - trx->error_state = DB_SUCCESS; - } - } - -funct_exit: - if (aux_fts_rename && err != DB_SUCCESS - && table != NULL && (table->space != 0)) { - - char* orig_name = table->name; - trx_t* trx_bg = trx_allocate_for_background(); - - /* If the first fts_rename fails, the trx would - be rolled back and committed, we can't use it any more, - so we have to start a new background trx here. */ - ut_a(trx_state_eq(trx_bg, TRX_STATE_NOT_STARTED)); - trx_bg->op_info = "Revert the failing rename " - "for fts aux tables"; - trx_bg->dict_operation_lock_mode = RW_X_LATCH; - trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE); - - /* If rename fails and table has its own tablespace, - we need to call fts_rename_aux_tables again to - revert the ibd file rename, which is not under the - control of trx. Also notice the parent table name - in cache is not changed yet. If the reverting fails, - the ibd data may be left in the new database, which - can be fixed only manually. */ - table->name = const_cast<char*>(new_name); - fts_rename_aux_tables(table, old_name, trx_bg); - table->name = orig_name; - - trx_bg->dict_operation_lock_mode = 0; - trx_commit_for_mysql(trx_bg); - trx_free_for_background(trx_bg); - } - - if (table != NULL) { - dict_table_close(table, dict_locked, FALSE); - } - - if (commit) { - trx_commit_for_mysql(trx); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************//** -Checks that the index contains entries in an ascending order, unique -constraint is not broken, and calculates the number of index entries -in the read view of the current transaction. -@return true if ok */ -UNIV_INTERN -bool -row_check_index_for_mysql( -/*======================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct - in MySQL handle */ - const dict_index_t* index, /*!< in: index */ - ulint* n_rows) /*!< out: number of entries - seen in the consistent read */ -{ - dtuple_t* prev_entry = NULL; - ulint matched_fields; - ulint matched_bytes; - byte* buf; - ulint ret; - rec_t* rec; - bool is_ok = true; - int cmp; - ibool contains_null; - ulint i; - ulint cnt; - mem_heap_t* heap = NULL; - ulint n_ext; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets; - rec_offs_init(offsets_); - - *n_rows = 0; - - if (dict_index_is_clust(index)) { - /* The clustered index of a table is always available. - During online ALTER TABLE that rebuilds the table, the - clustered index in the old table will have - index->online_log pointing to the new table. All - indexes of the old table will remain valid and the new - table will be unaccessible to MySQL until the - completion of the ALTER TABLE. */ - } else if (dict_index_is_online_ddl(index) - || (index->type & DICT_FTS)) { - /* Full Text index are implemented by auxiliary tables, - not the B-tree. We also skip secondary indexes that are - being created online. */ - return(true); - } - - buf = static_cast<byte*>(mem_alloc(UNIV_PAGE_SIZE)); - heap = mem_heap_create(100); - - cnt = 1000; - - ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0); -loop: - /* Check thd->killed every 1,000 scanned rows */ - if (--cnt == 0) { - if (trx_is_interrupted(prebuilt->trx)) { - goto func_exit; - } - cnt = 1000; - } - - switch (ret) { - case DB_SUCCESS: - break; - default: - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: CHECK TABLE on ", stderr); - dict_index_name_print(stderr, prebuilt->trx, index); - fprintf(stderr, " returned %lu\n", ret); - /* (this error is ignored by CHECK TABLE) */ - /* fall through */ - case DB_END_OF_INDEX: -func_exit: - mem_free(buf); - mem_heap_free(heap); - - return(is_ok); - } - - *n_rows = *n_rows + 1; - - /* row_search... returns the index record in buf, record origin offset - within buf stored in the first 4 bytes, because we have built a dummy - template */ - - rec = buf + mach_read_from_4(buf); - - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - - if (prev_entry != NULL) { - matched_fields = 0; - matched_bytes = 0; - - cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets, - &matched_fields, - &matched_bytes); - contains_null = FALSE; - - /* In a unique secondary index we allow equal key values if - they contain SQL NULLs */ - - for (i = 0; - i < dict_index_get_n_ordering_defined_by_user(index); - i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(prev_entry, i))) { - - contains_null = TRUE; - break; - } - } - - if (cmp > 0) { - fputs("InnoDB: index records in a wrong order in ", - stderr); -not_ok: - dict_index_name_print(stderr, - prebuilt->trx, index); - fputs("\n" - "InnoDB: prev record ", stderr); - dtuple_print(stderr, prev_entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - is_ok = false; - } else if (dict_index_is_unique(index) - && !contains_null - && matched_fields - >= dict_index_get_n_ordering_defined_by_user( - index)) { - - fputs("InnoDB: duplicate key in ", stderr); - goto not_ok; - } - } - - { - mem_heap_t* tmp_heap = NULL; - - /* Empty the heap on each round. But preserve offsets[] - for the row_rec_to_index_entry() call, by copying them - into a separate memory heap when needed. */ - if (UNIV_UNLIKELY(offsets != offsets_)) { - ulint size = rec_offs_get_n_alloc(offsets) - * sizeof *offsets; - - tmp_heap = mem_heap_create(size); - - offsets = static_cast<ulint*>( - mem_heap_dup(tmp_heap, offsets, size)); - } - - mem_heap_empty(heap); - - prev_entry = row_rec_to_index_entry( - rec, index, offsets, &n_ext, heap); - - if (UNIV_LIKELY_NULL(tmp_heap)) { - mem_heap_free(tmp_heap); - } - } - - ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT); - - goto loop; -} - -/*********************************************************************//** -Determines if a table is a magic monitor table. -@return true if monitor table */ -UNIV_INTERN -bool -row_is_magic_monitor_table( -/*=======================*/ - const char* table_name) /*!< in: name of the table, in the - form database/table_name */ -{ - const char* name; /* table_name without database/ */ - ulint len; - - name = dict_remove_db_name(table_name); - len = strlen(name) + 1; - - return(STR_EQ(name, len, S_innodb_monitor) - || STR_EQ(name, len, S_innodb_lock_monitor) - || STR_EQ(name, len, S_innodb_tablespace_monitor) - || STR_EQ(name, len, S_innodb_table_monitor) -#ifdef UNIV_MEM_DEBUG - || STR_EQ(name, len, S_innodb_mem_validate) -#endif /* UNIV_MEM_DEBUG */ - ); -} - -/*********************************************************************//** -Initialize this module */ -UNIV_INTERN -void -row_mysql_init(void) -/*================*/ -{ - mutex_create( - row_drop_list_mutex_key, - &row_drop_list_mutex, SYNC_NO_ORDER_CHECK); - - UT_LIST_INIT(row_mysql_drop_list); - - row_mysql_drop_list_inited = TRUE; -} - -/*********************************************************************//** -Close this module */ -UNIV_INTERN -void -row_mysql_close(void) -/*================*/ -{ - ut_a(UT_LIST_GET_LEN(row_mysql_drop_list) == 0); - - mutex_free(&row_drop_list_mutex); - - row_mysql_drop_list_inited = FALSE; -} diff --git a/storage/xtradb/row/row0purge.cc b/storage/xtradb/row/row0purge.cc deleted file mode 100644 index 333677edf21..00000000000 --- a/storage/xtradb/row/row0purge.cc +++ /dev/null @@ -1,1057 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0purge.cc -Purge obsolete records - -Created 3/14/1997 Heikki Tuuri -*******************************************************/ - -#include "row0purge.h" - -#ifdef UNIV_NONINL -#include "row0purge.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "trx0undo.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0row.h" -#include "row0upd.h" -#include "row0vers.h" -#include "row0mysql.h" -#include "row0log.h" -#include "log0log.h" -#include "srv0mon.h" -#include "srv0start.h" - -/************************************************************************* -IMPORTANT NOTE: Any operation that generates redo MUST check that there -is enough space in the redo log before for that operation. This is -done by calling log_free_check(). The reason for checking the -availability of the redo log space before the start of the operation is -that we MUST not hold any synchonization objects when performing the -check. -If you make a change in this module make sure that no codepath is -introduced where a call to log_free_check() is bypassed. */ - -/********************************************************************//** -Creates a purge node to a query graph. -@return own: purge node */ -UNIV_INTERN -purge_node_t* -row_purge_node_create( -/*==================*/ - que_thr_t* parent, /*!< in: parent node */ - mem_heap_t* heap) /*!< in: memory heap where created */ -{ - purge_node_t* node; - - ut_ad(parent != NULL); - ut_ad(heap != NULL); - - node = static_cast<purge_node_t*>( - mem_heap_zalloc(heap, sizeof(*node))); - - node->common.type = QUE_NODE_PURGE; - node->common.parent = parent; - node->done = TRUE; - node->heap = mem_heap_create(256); - - return(node); -} - -/***********************************************************//** -Repositions the pcur in the purge node on the clustered index record, -if found. If the record is not found, close pcur. -@return TRUE if the record was found */ -static -ibool -row_purge_reposition_pcur( -/*======================*/ - ulint mode, /*!< in: latching mode */ - purge_node_t* node, /*!< in: row purge node */ - mtr_t* mtr) /*!< in: mtr */ -{ - if (node->found_clust) { - ut_ad(node->validate_pcur()); - - node->found_clust = btr_pcur_restore_position(mode, &node->pcur, mtr); - - } else { - node->found_clust = row_search_on_row_ref( - &node->pcur, mode, node->table, node->ref, mtr); - - if (node->found_clust) { - btr_pcur_store_position(&node->pcur, mtr); - } - } - - /* Close the current cursor if we fail to position it correctly. */ - if (!node->found_clust) { - btr_pcur_close(&node->pcur); - } - - return(node->found_clust); -} - -/***********************************************************//** -Removes a delete marked clustered index record if possible. -@retval true if the row was not found, or it was successfully removed -@retval false if the row was modified after the delete marking */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -row_purge_remove_clust_if_poss_low( -/*===============================*/ - purge_node_t* node, /*!< in/out: row purge node */ - ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ -{ - dict_index_t* index; - bool success = true; - mtr_t mtr; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint* offsets; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - index = dict_table_get_first_index(node->table); - - log_free_check(); - mtr_start(&mtr); - - if (!row_purge_reposition_pcur(mode, node, &mtr)) { - /* The record was already removed. */ - goto func_exit; - } - - rec = btr_pcur_get_rec(&node->pcur); - - offsets = rec_get_offsets( - rec, index, offsets_, ULINT_UNDEFINED, &heap); - - if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) { - /* Someone else has modified the record later: do not remove */ - goto func_exit; - } - - if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete( - btr_pcur_get_btr_cur(&node->pcur), 0, &mtr); - } else { - dberr_t err; - ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete( - &err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0, - RB_NONE, &mtr); - - switch (err) { - case DB_SUCCESS: - break; - case DB_OUT_OF_FILE_SPACE: - success = false; - break; - default: - ut_error; - } - } - -func_exit: - if (heap) { - mem_heap_free(heap); - } - - /* Persistent cursor is closed if reposition fails. */ - if (node->found_clust) { - btr_pcur_commit_specify_mtr(&node->pcur, &mtr); - } else { - mtr_commit(&mtr); - } - - return(success); -} - -/***********************************************************//** -Removes a clustered index record if it has not been modified after the delete -marking. -@retval true if the row was not found, or it was successfully removed -@retval false the purge needs to be suspended because of running out -of file space. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -row_purge_remove_clust_if_poss( -/*===========================*/ - purge_node_t* node) /*!< in/out: row purge node */ -{ - if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) { - return(true); - } - - for (ulint n_tries = 0; - n_tries < BTR_CUR_RETRY_DELETE_N_TIMES; - n_tries++) { - if (row_purge_remove_clust_if_poss_low( - node, BTR_MODIFY_TREE)) { - return(true); - } - - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); - } - - return(false); -} - -/***********************************************************//** -Determines if it is possible to remove a secondary index entry. -Removal is possible if the secondary index entry does not refer to any -not delete marked version of a clustered index record where DB_TRX_ID -is newer than the purge view. - -NOTE: This function should only be called by the purge thread, only -while holding a latch on the leaf page of the secondary index entry -(or keeping the buffer pool watch on the page). It is possible that -this function first returns true and then false, if a user transaction -inserts a record that the secondary index entry would refer to. -However, in that case, the user transaction would also re-insert the -secondary index entry after purge has removed it and released the leaf -page latch. -@return true if the secondary index record can be purged */ -UNIV_INTERN -bool -row_purge_poss_sec( -/*===============*/ - purge_node_t* node, /*!< in/out: row purge node */ - dict_index_t* index, /*!< in: secondary index */ - const dtuple_t* entry) /*!< in: secondary index entry */ -{ - bool can_delete; - mtr_t mtr; - - ut_ad(!dict_index_is_clust(index)); - mtr_start(&mtr); - - can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr) - || !row_vers_old_has_index_entry(TRUE, - btr_pcur_get_rec(&node->pcur), - &mtr, index, entry); - - /* Persistent cursor is closed if reposition fails. */ - if (node->found_clust) { - btr_pcur_commit_specify_mtr(&node->pcur, &mtr); - } else { - mtr_commit(&mtr); - } - - return(can_delete); -} - -/*************************************************************** -Removes a secondary index entry if possible, by modifying the -index tree. Does not try to buffer the delete. -@return TRUE if success or if not found */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ibool -row_purge_remove_sec_if_poss_tree( -/*==============================*/ - purge_node_t* node, /*!< in: row purge node */ - dict_index_t* index, /*!< in: index */ - const dtuple_t* entry) /*!< in: index entry */ -{ - btr_pcur_t pcur; - btr_cur_t* btr_cur; - ibool success = TRUE; - dberr_t err; - mtr_t mtr; - enum row_search_result search_result; - - log_free_check(); - mtr_start(&mtr); - - if (*index->name == TEMP_INDEX_PREFIX) { - /* The index->online_status may change if the - index->name starts with TEMP_INDEX_PREFIX (meaning - that the index is or was being created online). It is - protected by index->lock. */ - mtr_x_lock(dict_index_get_lock(index), &mtr); - - if (dict_index_is_online_ddl(index)) { - /* Online secondary index creation will not - copy any delete-marked records. Therefore - there is nothing to be purged. We must also - skip the purge when a completed index is - dropped by rollback_inplace_alter_table(). */ - goto func_exit_no_pcur; - } - } else { - /* For secondary indexes, - index->online_status==ONLINE_INDEX_CREATION unless - index->name starts with TEMP_INDEX_PREFIX. */ - ut_ad(!dict_index_is_online_ddl(index)); - } - - search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE, - &pcur, &mtr); - - switch (search_result) { - case ROW_NOT_FOUND: - /* Not found. This is a legitimate condition. In a - rollback, InnoDB will remove secondary recs that would - be purged anyway. Then the actual purge will not find - the secondary index record. Also, the purge itself is - eager: if it comes to consider a secondary index - record, and notices it does not need to exist in the - index, it will remove it. Then if/when the purge - comes to consider the secondary index record a second - time, it will not exist any more in the index. */ - - /* fputs("PURGE:........sec entry not found\n", stderr); */ - /* dtuple_print(stderr, entry); */ - goto func_exit; - case ROW_FOUND: - break; - case ROW_BUFFERED: - case ROW_NOT_DELETED_REF: - /* These are invalid outcomes, because the mode passed - to row_search_index_entry() did not include any of the - flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ - ut_error; - } - - btr_cur = btr_pcur_get_btr_cur(&pcur); - - /* We should remove the index record if no later version of the row, - which cannot be purged yet, requires its existence. If some requires, - we should do nothing. */ - - if (row_purge_poss_sec(node, index, entry)) { - /* Remove the index record, which should have been - marked for deletion. */ - if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur), - dict_table_is_comp(index->table))) { - fputs("InnoDB: tried to purge sec index entry not" - " marked for deletion in\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, NULL, index); - fputs("\n" - "InnoDB: tuple ", stderr); - dtuple_print(stderr, entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, btr_cur_get_rec(btr_cur), index); - putc('\n', stderr); - - ut_ad(0); - - goto func_exit; - } - - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, - RB_NONE, &mtr); - switch (UNIV_EXPECT(err, DB_SUCCESS)) { - case DB_SUCCESS: - break; - case DB_OUT_OF_FILE_SPACE: - success = FALSE; - break; - default: - ut_error; - } - } - -func_exit: - btr_pcur_close(&pcur); -func_exit_no_pcur: - mtr_commit(&mtr); - - return(success); -} - -/*************************************************************** -Removes a secondary index entry without modifying the index tree, -if possible. -@retval true if success or if not found -@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -row_purge_remove_sec_if_poss_leaf( -/*==============================*/ - purge_node_t* node, /*!< in: row purge node */ - dict_index_t* index, /*!< in: index */ - const dtuple_t* entry) /*!< in: index entry */ -{ - mtr_t mtr; - btr_pcur_t pcur; - ulint mode; - enum row_search_result search_result; - bool success = true; - - log_free_check(); - - mtr_start(&mtr); - - if (*index->name == TEMP_INDEX_PREFIX) { - /* The index->online_status may change if the - index->name starts with TEMP_INDEX_PREFIX (meaning - that the index is or was being created online). It is - protected by index->lock. */ - mtr_s_lock(dict_index_get_lock(index), &mtr); - - if (dict_index_is_online_ddl(index)) { - /* Online secondary index creation will not - copy any delete-marked records. Therefore - there is nothing to be purged. We must also - skip the purge when a completed index is - dropped by rollback_inplace_alter_table(). */ - goto func_exit_no_pcur; - } - - mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED | BTR_DELETE; - } else { - /* For secondary indexes, - index->online_status==ONLINE_INDEX_CREATION unless - index->name starts with TEMP_INDEX_PREFIX. */ - ut_ad(!dict_index_is_online_ddl(index)); - - mode = BTR_MODIFY_LEAF | BTR_DELETE; - } - - /* Set the purge node for the call to row_purge_poss_sec(). */ - pcur.btr_cur.purge_node = node; - /* Set the query thread, so that ibuf_insert_low() will be - able to invoke thd_get_trx(). */ - pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node)); - - search_result = row_search_index_entry( - index, entry, mode, &pcur, &mtr); - - switch (search_result) { - case ROW_FOUND: - /* Before attempting to purge a record, check - if it is safe to do so. */ - if (row_purge_poss_sec(node, index, entry)) { - btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); - - /* Only delete-marked records should be purged. */ - if (!rec_get_deleted_flag( - btr_cur_get_rec(btr_cur), - dict_table_is_comp(index->table))) { - - fputs("InnoDB: tried to purge sec index" - " entry not marked for deletion in\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, NULL, index); - fputs("\n" - "InnoDB: tuple ", stderr); - dtuple_print(stderr, entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, btr_cur_get_rec(btr_cur), - index); - putc('\n', stderr); - - ut_ad(0); - - btr_pcur_close(&pcur); - - goto func_exit_no_pcur; - } - - if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) { - - /* The index entry could not be deleted. */ - success = false; - } - } - /* (The index entry is still needed, - or the deletion succeeded) */ - /* fall through */ - case ROW_NOT_DELETED_REF: - /* The index entry is still needed. */ - case ROW_BUFFERED: - /* The deletion was buffered. */ - case ROW_NOT_FOUND: - /* The index entry does not exist, nothing to do. */ - btr_pcur_close(&pcur); - func_exit_no_pcur: - mtr_commit(&mtr); - return(success); - } - - ut_error; - return(FALSE); -} - -/***********************************************************//** -Removes a secondary index entry if possible. */ -UNIV_INLINE MY_ATTRIBUTE((nonnull(1,2))) -void -row_purge_remove_sec_if_poss( -/*=========================*/ - purge_node_t* node, /*!< in: row purge node */ - dict_index_t* index, /*!< in: index */ - const dtuple_t* entry) /*!< in: index entry */ -{ - ibool success; - ulint n_tries = 0; - - /* fputs("Purge: Removing secondary record\n", stderr); */ - - if (!entry) { - /* The node->row must have lacked some fields of this - index. This is possible when the undo log record was - written before this index was created. */ - return; - } - - if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) { - - return; - } -retry: - success = row_purge_remove_sec_if_poss_tree(node, index, entry); - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - - if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { - - n_tries++; - - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); - - goto retry; - } - - ut_a(success); -} - -/***********************************************************//** -Purges a delete marking of a record. -@retval true if the row was not found, or it was successfully removed -@retval false the purge needs to be suspended because of -running out of file space */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -row_purge_del_mark( -/*===============*/ - purge_node_t* node) /*!< in/out: row purge node */ -{ - mem_heap_t* heap; - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - /* skip corrupted secondary index */ - dict_table_skip_corrupt_index(node->index); - - if (!node->index) { - break; - } - - if (node->index->type != DICT_FTS) { - dtuple_t* entry = row_build_index_entry_low( - node->row, NULL, node->index, heap); - row_purge_remove_sec_if_poss(node, node->index, entry); - mem_heap_empty(heap); - } - - node->index = dict_table_get_next_index(node->index); - } - - mem_heap_free(heap); - - return(row_purge_remove_clust_if_poss(node)); -} - -/***********************************************************//** -Purges an update of an existing record. Also purges an update of a delete -marked record if that record contained an externally stored field. */ -static -void -row_purge_upd_exist_or_extern_func( -/*===============================*/ -#ifdef UNIV_DEBUG - const que_thr_t*thr, /*!< in: query thread */ -#endif /* UNIV_DEBUG */ - purge_node_t* node, /*!< in: row purge node */ - trx_undo_rec_t* undo_rec) /*!< in: record to purge */ -{ - mem_heap_t* heap; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - if (node->rec_type == TRX_UNDO_UPD_DEL_REC - || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - - goto skip_secondaries; - } - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - dict_table_skip_corrupt_index(node->index); - - if (!node->index) { - break; - } - - if (row_upd_changes_ord_field_binary(node->index, node->update, - thr, NULL, NULL)) { - /* Build the older version of the index entry */ - dtuple_t* entry = row_build_index_entry_low( - node->row, NULL, node->index, heap); - row_purge_remove_sec_if_poss(node, node->index, entry); - mem_heap_empty(heap); - } - - node->index = dict_table_get_next_index(node->index); - } - - mem_heap_free(heap); - -skip_secondaries: - /* Free possible externally stored fields */ - for (ulint i = 0; i < upd_get_n_fields(node->update); i++) { - - const upd_field_t* ufield - = upd_get_nth_field(node->update, i); - - if (dfield_is_ext(&ufield->new_val)) { - trx_rseg_t* rseg; - buf_block_t* block; - ulint internal_offset; - byte* data_field; - dict_index_t* index; - ibool is_insert; - ulint rseg_id; - ulint page_no; - ulint offset; - mtr_t mtr; - - /* We use the fact that new_val points to - undo_rec and get thus the offset of - dfield data inside the undo record. Then we - can calculate from node->roll_ptr the file - address of the new_val data */ - - internal_offset - = ((const byte*) - dfield_get_data(&ufield->new_val)) - - undo_rec; - - ut_a(internal_offset < UNIV_PAGE_SIZE); - - trx_undo_decode_roll_ptr(node->roll_ptr, - &is_insert, &rseg_id, - &page_no, &offset); - - rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id); - ut_a(rseg != NULL); - ut_a(rseg->id == rseg_id); - - mtr_start(&mtr); - - /* We have to acquire an X-latch to the clustered - index tree */ - - index = dict_table_get_first_index(node->table); - mtr_x_lock(dict_index_get_lock(index), &mtr); - - /* NOTE: we must also acquire an X-latch to the - root page of the tree. We will need it when we - free pages from the tree. If the tree is of height 1, - the tree X-latch does NOT protect the root page, - because it is also a leaf page. Since we will have a - latch on an undo log page, we would break the - latching order if we would only later latch the - root page of such a tree! */ - - btr_root_get(index, &mtr); - - block = buf_page_get( - rseg->space, 0, page_no, RW_X_LATCH, &mtr); - - buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); - - data_field = buf_block_get_frame(block) - + offset + internal_offset; - - ut_a(dfield_get_len(&ufield->new_val) - >= BTR_EXTERN_FIELD_REF_SIZE); - btr_free_externally_stored_field( - index, - data_field + dfield_get_len(&ufield->new_val) - - BTR_EXTERN_FIELD_REF_SIZE, - NULL, NULL, NULL, 0, RB_NONE, &mtr); - mtr_commit(&mtr); - } - } -} - -#ifdef UNIV_DEBUG -# define row_purge_upd_exist_or_extern(thr,node,undo_rec) \ - row_purge_upd_exist_or_extern_func(thr,node,undo_rec) -#else /* UNIV_DEBUG */ -# define row_purge_upd_exist_or_extern(thr,node,undo_rec) \ - row_purge_upd_exist_or_extern_func(node,undo_rec) -#endif /* UNIV_DEBUG */ - -/***********************************************************//** -Parses the row reference and other info in a modify undo log record. -@return true if purge operation required */ -static -bool -row_purge_parse_undo_rec( -/*=====================*/ - purge_node_t* node, /*!< in: row undo node */ - trx_undo_rec_t* undo_rec, /*!< in: record to purge */ - bool* updated_extern, /*!< out: true if an externally - stored field was updated */ - que_thr_t* thr) /*!< in: query thread */ -{ - dict_index_t* clust_index; - byte* ptr; - trx_t* trx; - undo_no_t undo_no; - table_id_t table_id; - trx_id_t trx_id; - roll_ptr_t roll_ptr; - ulint info_bits; - ulint type; - - ut_ad(node != NULL); - ut_ad(thr != NULL); - - ptr = trx_undo_rec_get_pars( - undo_rec, &type, &node->cmpl_info, - updated_extern, &undo_no, &table_id); - - node->rec_type = type; - - if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) { - - return(false); - } - - ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); - node->table = NULL; - - /* Prevent DROP TABLE etc. from running when we are doing the purge - for this row */ - - rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__); - - node->table = dict_table_open_on_id( - table_id, FALSE, DICT_TABLE_OP_NORMAL); - - if (node->table == NULL) { - /* The table has been dropped: no need to do purge */ - goto err_exit; - } - - if (node->table->file_unreadable) { - /* We skip purge of missing .ibd files */ - - dict_table_close(node->table, FALSE, FALSE); - - node->table = NULL; - - goto err_exit; - } - - clust_index = dict_table_get_first_index(node->table); - - if (clust_index == NULL) { - /* The table was corrupt in the data dictionary. - dict_set_corrupted() works on an index, and - we do not have an index to call it with. */ -close_exit: - dict_table_close(node->table, FALSE, FALSE); -err_exit: - rw_lock_s_unlock(&dict_operation_lock); - return(false); - } - - if (type == TRX_UNDO_UPD_EXIST_REC - && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) - && !*updated_extern) { - - /* Purge requires no changes to indexes: we may return */ - goto close_exit; - } - - ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), - node->heap); - - trx = thr_get_trx(thr); - - ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, - roll_ptr, info_bits, trx, - node->heap, &(node->update)); - - /* Read to the partial row the fields that occur in indexes */ - - if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - ptr = trx_undo_rec_get_partial_row( - ptr, clust_index, &node->row, - type == TRX_UNDO_UPD_DEL_REC, - node->heap); - } - - return(true); -} - -/***********************************************************//** -Purges the parsed record. -@return true if purged, false if skipped */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -row_purge_record_func( -/*==================*/ - purge_node_t* node, /*!< in: row purge node */ - trx_undo_rec_t* undo_rec, /*!< in: record to purge */ -#ifdef UNIV_DEBUG - const que_thr_t*thr, /*!< in: query thread */ -#endif /* UNIV_DEBUG */ - bool updated_extern) /*!< in: whether external columns - were updated */ -{ - dict_index_t* clust_index; - bool purged = true; - - ut_ad(!node->found_clust); - - clust_index = dict_table_get_first_index(node->table); - - node->index = dict_table_get_next_index(clust_index); - - switch (node->rec_type) { - case TRX_UNDO_DEL_MARK_REC: - purged = row_purge_del_mark(node); - if (!purged) { - break; - } - MONITOR_INC(MONITOR_N_DEL_ROW_PURGE); - break; - default: - if (!updated_extern) { - break; - } - /* fall through */ - case TRX_UNDO_UPD_EXIST_REC: - row_purge_upd_exist_or_extern(thr, node, undo_rec); - MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN); - break; - } - - if (node->found_clust) { - btr_pcur_close(&node->pcur); - node->found_clust = FALSE; - } - - if (node->table != NULL) { - dict_table_close(node->table, FALSE, FALSE); - node->table = NULL; - } - - return(purged); -} - -#ifdef UNIV_DEBUG -# define row_purge_record(node,undo_rec,thr,updated_extern) \ - row_purge_record_func(node,undo_rec,thr,updated_extern) -#else /* UNIV_DEBUG */ -# define row_purge_record(node,undo_rec,thr,updated_extern) \ - row_purge_record_func(node,undo_rec,updated_extern) -#endif /* UNIV_DEBUG */ - -/***********************************************************//** -Fetches an undo log record and does the purge for the recorded operation. -If none left, or the current purge completed, returns the control to the -parent node, which is always a query thread node. */ -static -void -row_purge( -/*======*/ - purge_node_t* node, /*!< in: row purge node */ - trx_undo_rec_t* undo_rec, /*!< in: record to purge */ - que_thr_t* thr) /*!< in: query thread */ -{ - if (undo_rec != &trx_purge_dummy_rec) { - bool updated_extern; - - while (row_purge_parse_undo_rec( - node, undo_rec, &updated_extern, thr)) { - - bool purged = row_purge_record( - node, undo_rec, thr, updated_extern); - - rw_lock_s_unlock(&dict_operation_lock); - - if (purged - || srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } - - /* Retry the purge in a second. */ - os_thread_sleep(1000000); - } - } -} - -/***********************************************************//** -Reset the purge query thread. */ -UNIV_INLINE -void -row_purge_end( -/*==========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - purge_node_t* node; - - ut_ad(thr); - - node = static_cast<purge_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); - - thr->run_node = que_node_get_parent(node); - - node->undo_recs = NULL; - - node->done = TRUE; - - ut_a(thr->run_node != NULL); - - mem_heap_empty(node->heap); -} - -/***********************************************************//** -Does the purge operation for a single undo log record. This is a high-level -function used in an SQL execution graph. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_purge_step( -/*===========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - purge_node_t* node; - - ut_ad(thr); - - node = static_cast<purge_node_t*>(thr->run_node); - - node->table = NULL; - node->row = NULL; - node->ref = NULL; - node->index = NULL; - node->update = NULL; - node->found_clust = FALSE; - node->rec_type = ULINT_UNDEFINED; - node->cmpl_info = ULINT_UNDEFINED; - - ut_a(!node->done); - - ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); - - if (!(node->undo_recs == NULL || ib_vector_is_empty(node->undo_recs))) { - trx_purge_rec_t*purge_rec; - - purge_rec = static_cast<trx_purge_rec_t*>( - ib_vector_pop(node->undo_recs)); - - node->roll_ptr = purge_rec->roll_ptr; - - row_purge(node, purge_rec->undo_rec, thr); - - if (ib_vector_is_empty(node->undo_recs)) { - row_purge_end(thr); - } else { - thr->run_node = node; - } - } else { - row_purge_end(thr); - } - - return(thr); -} - -#ifdef UNIV_DEBUG -/***********************************************************//** -Validate the persisent cursor. The purge node has two references -to the clustered index record - one via the ref member, and the -other via the persistent cursor. These two references must match -each other if the found_clust flag is set. -@return true if the stored copy of persistent cursor is consistent -with the ref member.*/ -bool -purge_node_t::validate_pcur() -{ - if (!found_clust) { - return(true); - } - - if (index == NULL) { - return(true); - } - - if (index->type == DICT_FTS) { - return(true); - } - - if (pcur.old_stored != BTR_PCUR_OLD_STORED) { - return(true); - } - - dict_index_t* clust_index = pcur.btr_cur.index; - - ulint* offsets = rec_get_offsets( - pcur.old_rec, clust_index, NULL, pcur.old_n_fields, &heap); - - /* Here we are comparing the purge ref record and the stored initial - part in persistent cursor. Both cases we store n_uniq fields of the - cluster index and so it is fine to do the comparison. We note this - dependency here as pcur and ref belong to different modules. */ - int st = cmp_dtuple_rec(ref, pcur.old_rec, offsets); - - if (st != 0) { - fprintf(stderr, "Purge node pcur validation failed\n"); - dtuple_print(stderr, ref); - rec_print(stderr, pcur.old_rec, clust_index); - return(false); - } - - return(true); -} -#endif /* UNIV_DEBUG */ diff --git a/storage/xtradb/row/row0quiesce.cc b/storage/xtradb/row/row0quiesce.cc deleted file mode 100644 index 583fbe60fb3..00000000000 --- a/storage/xtradb/row/row0quiesce.cc +++ /dev/null @@ -1,700 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0quiesce.cc -Quiesce a tablespace. - -Created 2012-02-08 by Sunny Bains. -*******************************************************/ - -#include "row0quiesce.h" -#include "row0mysql.h" - -#ifdef UNIV_NONINL -#include "row0quiesce.ic" -#endif - -#include "ibuf0ibuf.h" -#include "srv0start.h" -#include "trx0purge.h" - -/*********************************************************************//** -Write the meta data (index user fields) config file. -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_quiesce_write_index_fields( -/*===========================*/ - const dict_index_t* index, /*!< in: write the meta data for - this index */ - FILE* file, /*!< in: file to write to */ - THD* thd) /*!< in/out: session */ -{ - byte row[sizeof(ib_uint32_t) * 2]; - - for (ulint i = 0; i < index->n_fields; ++i) { - byte* ptr = row; - const dict_field_t* field = &index->fields[i]; - - mach_write_to_4(ptr, field->prefix_len); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, field->fixed_len); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_9", - close(fileno(file));); - - if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) { - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing index fields."); - - return(DB_IO_ERROR); - } - - /* Include the NUL byte in the length. */ - ib_uint32_t len = static_cast<ib_uint32_t>(strlen(field->name) + 1); - ut_a(len > 1); - - mach_write_to_4(row, len); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_10", - close(fileno(file));); - - if (fwrite(row, 1, sizeof(len), file) != sizeof(len) - || fwrite(field->name, 1, len, file) != len) { - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing index column."); - - return(DB_IO_ERROR); - } - } - - return(DB_SUCCESS); -} - -/*********************************************************************//** -Write the meta data config file index information. -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_quiesce_write_indexes( -/*======================*/ - const dict_table_t* table, /*!< in: write the meta data for - this table */ - FILE* file, /*!< in: file to write to */ - THD* thd) /*!< in/out: session */ -{ - { - byte row[sizeof(ib_uint32_t)]; - - /* Write the number of indexes in the table. */ - mach_write_to_4(row, UT_LIST_GET_LEN(table->indexes)); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_11", - close(fileno(file));); - - if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) { - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing index count."); - - return(DB_IO_ERROR); - } - } - - dberr_t err = DB_SUCCESS; - - /* Write the index meta data. */ - for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); - index != 0 && err == DB_SUCCESS; - index = UT_LIST_GET_NEXT(indexes, index)) { - - byte* ptr; - byte row[sizeof(index_id_t) - + sizeof(ib_uint32_t) * 8]; - - ptr = row; - - ut_ad(sizeof(index_id_t) == 8); - mach_write_to_8(ptr, index->id); - ptr += sizeof(index_id_t); - - mach_write_to_4(ptr, index->space); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, index->page); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, index->type); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, index->trx_id_offset); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, index->n_user_defined_cols); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, index->n_uniq); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, index->n_nullable); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, index->n_fields); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_12", - close(fileno(file));); - - if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) { - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing index meta-data."); - - return(DB_IO_ERROR); - } - - /* Write the length of the index name. - NUL byte is included in the length. */ - ib_uint32_t len = static_cast<ib_uint32_t>(strlen(index->name) + 1); - ut_a(len > 1); - - mach_write_to_4(row, len); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_1", - close(fileno(file));); - - if (fwrite(row, 1, sizeof(len), file) != sizeof(len) - || fwrite(index->name, 1, len, file) != len) { - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing index name."); - - return(DB_IO_ERROR); - } - - err = row_quiesce_write_index_fields(index, file, thd); - } - - return(err); -} - -/*********************************************************************//** -Write the meta data (table columns) config file. Serialise the contents of -dict_col_t structure, along with the column name. All fields are serialized -as ib_uint32_t. -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_quiesce_write_table( -/*====================*/ - const dict_table_t* table, /*!< in: write the meta data for - this table */ - FILE* file, /*!< in: file to write to */ - THD* thd) /*!< in/out: session */ -{ - dict_col_t* col; - byte row[sizeof(ib_uint32_t) * 7]; - - col = table->cols; - - for (ulint i = 0; i < table->n_cols; ++i, ++col) { - byte* ptr = row; - - mach_write_to_4(ptr, col->prtype); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, col->mtype); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, col->len); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, col->mbminmaxlen); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, col->ind); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, col->ord_part); - ptr += sizeof(ib_uint32_t); - - mach_write_to_4(ptr, col->max_prefix); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_2", - close(fileno(file));); - - if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) { - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing table column data."); - - return(DB_IO_ERROR); - } - - /* Write out the column name as [len, byte array]. The len - includes the NUL byte. */ - ib_uint32_t len; - const char* col_name; - - col_name = dict_table_get_col_name(table, dict_col_get_no(col)); - - /* Include the NUL byte in the length. */ - len = static_cast<ib_uint32_t>(strlen(col_name) + 1); - ut_a(len > 1); - - mach_write_to_4(row, len); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_3", - close(fileno(file));); - - if (fwrite(row, 1, sizeof(len), file) != sizeof(len) - || fwrite(col_name, 1, len, file) != len) { - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing column name."); - - return(DB_IO_ERROR); - } - } - - return(DB_SUCCESS); -} - -/*********************************************************************//** -Write the meta data config file header. -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_quiesce_write_header( -/*=====================*/ - const dict_table_t* table, /*!< in: write the meta data for - this table */ - FILE* file, /*!< in: file to write to */ - THD* thd) /*!< in/out: session */ -{ - byte value[sizeof(ib_uint32_t)]; - - /* Write the meta-data version number. */ - mach_write_to_4(value, IB_EXPORT_CFG_VERSION_V1); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_4", close(fileno(file));); - - if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)) { - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing meta-data version number."); - - return(DB_IO_ERROR); - } - - /* Write the server hostname. */ - ib_uint32_t len; - const char* hostname = server_get_hostname(); - - /* Play it safe and check for NULL. */ - if (hostname == 0) { - static const char NullHostname[] = "Hostname unknown"; - - ib_logf(IB_LOG_LEVEL_WARN, - "Unable to determine server hostname."); - - hostname = NullHostname; - } - - /* The server hostname includes the NUL byte. */ - len = static_cast<ib_uint32_t>(strlen(hostname) + 1); - mach_write_to_4(value, len); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_5", close(fileno(file));); - - if (fwrite(&value, 1, sizeof(value), file) != sizeof(value) - || fwrite(hostname, 1, len, file) != len) { - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing hostname."); - - return(DB_IO_ERROR); - } - - /* The table name includes the NUL byte. */ - ut_a(table->name != 0); - len = static_cast<ib_uint32_t>(strlen(table->name) + 1); - - /* Write the table name. */ - mach_write_to_4(value, len); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_6", close(fileno(file));); - - if (fwrite(&value, 1, sizeof(value), file) != sizeof(value) - || fwrite(table->name, 1, len, file) != len) { - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing table name."); - - return(DB_IO_ERROR); - } - - byte row[sizeof(ib_uint32_t) * 3]; - - /* Write the next autoinc value. */ - mach_write_to_8(row, table->autoinc); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_7", close(fileno(file));); - - if (fwrite(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) { - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing table autoinc value."); - - return(DB_IO_ERROR); - } - - byte* ptr = row; - - /* Write the system page size. */ - mach_write_to_4(ptr, UNIV_PAGE_SIZE); - ptr += sizeof(ib_uint32_t); - - /* Write the table->flags. */ - mach_write_to_4(ptr, table->flags); - ptr += sizeof(ib_uint32_t); - - /* Write the number of columns in the table. */ - mach_write_to_4(ptr, table->n_cols); - - DBUG_EXECUTE_IF("ib_export_io_write_failure_8", close(fileno(file));); - - if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) { - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), - "while writing table meta-data."); - - return(DB_IO_ERROR); - } - - return(DB_SUCCESS); -} - -/*********************************************************************//** -Write the table meta data after quiesce. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_quiesce_write_cfg( -/*==================*/ - dict_table_t* table, /*!< in: write the meta data for - this table */ - THD* thd) /*!< in/out: session */ -{ - dberr_t err; - char name[OS_FILE_MAX_PATH]; - - srv_get_meta_data_filename(table, name, sizeof(name)); - - ib_logf(IB_LOG_LEVEL_INFO, "Writing table metadata to '%s'", name); - - FILE* file = fopen(name, "w+b"); - - if (file == NULL) { - ib_errf(thd, IB_LOG_LEVEL_WARN, ER_CANT_CREATE_FILE, - name, errno, strerror(errno)); - - err = DB_IO_ERROR; - } else { - err = row_quiesce_write_header(table, file, thd); - - if (err == DB_SUCCESS) { - err = row_quiesce_write_table(table, file, thd); - } - - if (err == DB_SUCCESS) { - err = row_quiesce_write_indexes(table, file, thd); - } - - if (fflush(file) != 0) { - - char msg[BUFSIZ]; - - ut_snprintf(msg, sizeof(msg), "%s flush() failed", - name); - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), msg); - } - - if (fclose(file) != 0) { - char msg[BUFSIZ]; - - ut_snprintf(msg, sizeof(msg), "%s flose() failed", - name); - - ib_senderrf( - thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR, - errno, strerror(errno), msg); - } - } - - return(err); -} - -/*********************************************************************//** -Check whether a table has an FTS index defined on it. -@return true if an FTS index exists on the table */ -static -bool -row_quiesce_table_has_fts_index( -/*============================*/ - const dict_table_t* table) /*!< in: quiesce this table */ -{ - bool exists = false; - - dict_mutex_enter_for_mysql(); - - for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); - index != 0; - index = UT_LIST_GET_NEXT(indexes, index)) { - - if (index->type & DICT_FTS) { - exists = true; - break; - } - } - - dict_mutex_exit_for_mysql(); - - return(exists); -} - -/*********************************************************************//** -Quiesce the tablespace that the table resides in. */ -UNIV_INTERN -void -row_quiesce_table_start( -/*====================*/ - dict_table_t* table, /*!< in: quiesce this table */ - trx_t* trx) /*!< in/out: transaction/session */ -{ - ut_a(trx->mysql_thd != 0); - ut_a(srv_n_purge_threads > 0); - ut_ad(!srv_read_only_mode); - - char table_name[MAX_FULL_NAME_LEN + 1]; - - ut_a(trx->mysql_thd != 0); - - innobase_format_name( - table_name, sizeof(table_name), table->name, FALSE); - - ib_logf(IB_LOG_LEVEL_INFO, - "Sync to disk of '%s' started.", table_name); - - if (trx_purge_state() != PURGE_STATE_DISABLED) { - trx_purge_stop(); - } - - for (ulint count = 0; - ibuf_merge_space(table->space) != 0 - && !trx_is_interrupted(trx); - ++count) { - if (!(count % 20)) { - ib_logf(IB_LOG_LEVEL_INFO, - "Merging change buffer entries for '%s'", - table_name); - } - } - - if (!trx_is_interrupted(trx)) { - buf_LRU_flush_or_remove_pages( - table->space, BUF_REMOVE_FLUSH_WRITE, trx); - - if (trx_is_interrupted(trx)) { - - ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!"); - - } else if (row_quiesce_write_cfg(table, trx->mysql_thd) - != DB_SUCCESS) { - - ib_logf(IB_LOG_LEVEL_WARN, - "There was an error writing to the " - "meta data file"); - } else { - ib_logf(IB_LOG_LEVEL_INFO, - "Table '%s' flushed to disk", table_name); - } - } else { - ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!"); - } - - dberr_t err = row_quiesce_set_state(table, QUIESCE_COMPLETE, trx); - ut_a(err == DB_SUCCESS); -} - -/*********************************************************************//** -Cleanup after table quiesce. */ -UNIV_INTERN -void -row_quiesce_table_complete( -/*=======================*/ - dict_table_t* table, /*!< in: quiesce this table */ - trx_t* trx) /*!< in/out: transaction/session */ -{ - ulint count = 0; - char table_name[MAX_FULL_NAME_LEN + 1]; - - ut_a(trx->mysql_thd != 0); - - innobase_format_name( - table_name, sizeof(table_name), table->name, FALSE); - - /* We need to wait for the operation to complete if the - transaction has been killed. */ - - while (table->quiesce != QUIESCE_COMPLETE) { - - /* Print a warning after every minute. */ - if (!(count % 60)) { - ib_logf(IB_LOG_LEVEL_WARN, - "Waiting for quiesce of '%s' to complete", - table_name); - } - - /* Sleep for a second. */ - os_thread_sleep(1000000); - - ++count; - } - - /* Remove the .cfg file now that the user has resumed - normal operations. Otherwise it will cause problems when - the user tries to drop the database (remove directory). */ - char cfg_name[OS_FILE_MAX_PATH]; - - srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name)); - - os_file_delete_if_exists(innodb_file_data_key, cfg_name); - - ib_logf(IB_LOG_LEVEL_INFO, - "Deleting the meta-data file '%s'", cfg_name); - - if (trx_purge_state() != PURGE_STATE_DISABLED) { - trx_purge_run(); - } - - dberr_t err = row_quiesce_set_state(table, QUIESCE_NONE, trx); - ut_a(err == DB_SUCCESS); -} - -/*********************************************************************//** -Set a table's quiesce state. -@return DB_SUCCESS or error code. */ -UNIV_INTERN -dberr_t -row_quiesce_set_state( -/*==================*/ - dict_table_t* table, /*!< in: quiesce this table */ - ib_quiesce_t state, /*!< in: quiesce state to set */ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_a(srv_n_purge_threads > 0); - - if (srv_read_only_mode) { - - ib_senderrf(trx->mysql_thd, - IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); - - return(DB_UNSUPPORTED); - - } else if (table->space == TRX_SYS_SPACE) { - - char table_name[MAX_FULL_NAME_LEN + 1]; - - innobase_format_name( - table_name, sizeof(table_name), table->name, FALSE); - - ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN, - ER_TABLE_IN_SYSTEM_TABLESPACE, table_name); - - return(DB_UNSUPPORTED); - } else if (row_quiesce_table_has_fts_index(table)) { - - ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN, - ER_NOT_SUPPORTED_YET, - "FLUSH TABLES on tables that have an FTS index. " - "FTS auxiliary tables will not be flushed."); - - } else if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) { - /* If this flag is set then the table may not have any active - FTS indexes but it will still have the auxiliary tables. */ - - ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN, - ER_NOT_SUPPORTED_YET, - "FLUSH TABLES on a table that had an FTS index, " - "created on a hidden column, the " - "auxiliary tables haven't been dropped as yet. " - "FTS auxiliary tables will not be flushed."); - } - - row_mysql_lock_data_dictionary(trx); - - dict_table_x_lock_indexes(table); - - switch (state) { - case QUIESCE_START: - break; - - case QUIESCE_COMPLETE: - ut_a(table->quiesce == QUIESCE_START); - break; - - case QUIESCE_NONE: - ut_a(table->quiesce == QUIESCE_COMPLETE); - break; - } - - table->quiesce = state; - - dict_table_x_unlock_indexes(table); - - row_mysql_unlock_data_dictionary(trx); - - return(DB_SUCCESS); -} - diff --git a/storage/xtradb/row/row0row.cc b/storage/xtradb/row/row0row.cc deleted file mode 100644 index 96d25e15777..00000000000 --- a/storage/xtradb/row/row0row.cc +++ /dev/null @@ -1,1260 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0row.cc -General row routines - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#include "row0row.h" - -#ifdef UNIV_NONINL -#include "row0row.ic" -#endif - -#include "data0type.h" -#include "dict0dict.h" -#include "btr0btr.h" -#include "ha_prototypes.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "trx0undo.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0ext.h" -#include "row0upd.h" -#include "rem0cmp.h" -#include "read0read.h" -#include "ut0mem.h" - -/*****************************************************************//** -When an insert or purge to a table is performed, this function builds -the entry to be inserted into or purged from an index on the table. -@return index entry which should be inserted or purged -@retval NULL if the externally stored columns in the clustered index record -are unavailable and ext != NULL, or row is missing some needed columns. */ -UNIV_INTERN -dtuple_t* -row_build_index_entry_low( -/*======================*/ - const dtuple_t* row, /*!< in: row which should be - inserted or purged */ - const row_ext_t* ext, /*!< in: externally stored column - prefixes, or NULL */ - dict_index_t* index, /*!< in: index on the table */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory for the index entry - is allocated */ -{ - dtuple_t* entry; - ulint entry_len; - ulint i; - - entry_len = dict_index_get_n_fields(index); - entry = dtuple_create(heap, entry_len); - - if (dict_index_is_univ(index)) { - dtuple_set_n_fields_cmp(entry, entry_len); - /* There may only be externally stored columns - in a clustered index B-tree of a user table. */ - ut_a(!ext); - } else { - dtuple_set_n_fields_cmp( - entry, dict_index_get_n_unique_in_tree(index)); - } - - for (i = 0; i < entry_len; i++) { - const dict_field_t* ind_field - = dict_index_get_nth_field(index, i); - const dict_col_t* col - = ind_field->col; - ulint col_no - = dict_col_get_no(col); - dfield_t* dfield - = dtuple_get_nth_field(entry, i); - const dfield_t* dfield2 - = dtuple_get_nth_field(row, col_no); - ulint len; - -#if DATA_MISSING != 0 -# error "DATA_MISSING != 0" -#endif - if (UNIV_UNLIKELY(dfield_get_type(dfield2)->mtype - == DATA_MISSING)) { - /* The field has not been initialized in the row. - This should be from trx_undo_rec_get_partial_row(). */ - return(NULL); - } - - len = dfield_get_len(dfield2); - - dfield_copy(dfield, dfield2); - - if (dfield_is_null(dfield)) { - continue; - } - - if (ind_field->prefix_len == 0 - && (!dfield_is_ext(dfield) - || dict_index_is_clust(index))) { - /* The dfield_copy() above suffices for - columns that are stored in-page, or for - clustered index record columns that are not - part of a column prefix in the PRIMARY KEY. */ - continue; - } - - /* If the column is stored externally (off-page) in - the clustered index, it must be an ordering field in - the secondary index. In the Antelope format, only - prefix-indexed columns may be stored off-page in the - clustered index record. In the Barracuda format, also - fully indexed long CHAR or VARCHAR columns may be - stored off-page. */ - ut_ad(col->ord_part); - - if (ext) { - /* See if the column is stored externally. */ - const byte* buf = row_ext_lookup(ext, col_no, - &len); - if (UNIV_LIKELY_NULL(buf)) { - if (UNIV_UNLIKELY(buf == field_ref_zero)) { - return(NULL); - } - dfield_set_data(dfield, buf, len); - } - - if (ind_field->prefix_len == 0) { - /* In the Barracuda format - (ROW_FORMAT=DYNAMIC or - ROW_FORMAT=COMPRESSED), we can have a - secondary index on an entire column - that is stored off-page in the - clustered index. As this is not a - prefix index (prefix_len == 0), - include the entire off-page column in - the secondary index record. */ - continue; - } - } else if (dfield_is_ext(dfield)) { - /* This table is either in Antelope format - (ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT) - or a purge record where the ordered part of - the field is not external. - In Antelope, the maximum column prefix - index length is 767 bytes, and the clustered - index record contains a 768-byte prefix of - each off-page column. */ - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - len -= BTR_EXTERN_FIELD_REF_SIZE; - dfield_set_len(dfield, len); - } - - /* If a column prefix index, take only the prefix. */ - if (ind_field->prefix_len) { - len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminmaxlen, - ind_field->prefix_len, len, - static_cast<char*>(dfield_get_data(dfield))); - dfield_set_len(dfield, len); - } - } - - return(entry); -} - -/*******************************************************************//** -An inverse function to row_build_index_entry. Builds a row from a -record in a clustered index. -@return own: row built; see the NOTE below! */ -UNIV_INTERN -dtuple_t* -row_build( -/*======*/ - ulint type, /*!< in: ROW_COPY_POINTERS or - ROW_COPY_DATA; the latter - copies also the data fields to - heap while the first only - places pointers to data fields - on the index page, and thus is - more efficient */ - const dict_index_t* index, /*!< in: clustered index */ - const rec_t* rec, /*!< in: record in the clustered - index; NOTE: in the case - ROW_COPY_POINTERS the data - fields in the row will point - directly into this record, - therefore, the buffer page of - this record must be at least - s-latched and the latch held - as long as the row dtuple is used! */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index) - or NULL, in which case this function - will invoke rec_get_offsets() */ - const dict_table_t* col_table, - /*!< in: table, to check which - externally stored columns - occur in the ordering columns - of an index, or NULL if - index->table should be - consulted instead */ - const dtuple_t* add_cols, - /*!< in: default values of - added columns, or NULL */ - const ulint* col_map,/*!< in: mapping of old column - numbers to new ones, or NULL */ - row_ext_t** ext, /*!< out, own: cache of - externally stored column - prefixes, or NULL */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory needed is allocated */ -{ - const byte* copy; - dtuple_t* row; - ulint n_ext_cols; - ulint* ext_cols = NULL; /* remove warning */ - ulint len; - byte* buf; - ulint j; - mem_heap_t* tmp_heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - - ut_ad(index != NULL); - ut_ad(rec != NULL); - ut_ad(heap != NULL); - ut_ad(dict_index_is_clust(index)); - ut_ad(!mutex_own(&trx_sys->mutex)); - ut_ad(!col_map || col_table); - - if (!offsets) { - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &tmp_heap); - } else { - ut_ad(rec_offs_validate(rec, index, offsets)); - } - -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - if (rec_offs_any_null_extern(rec, offsets)) { - /* This condition can occur during crash recovery - before trx_rollback_active() has completed execution, - or when a concurrently executing - row_ins_index_entry_low() has committed the B-tree - mini-transaction but has not yet managed to restore - the cursor position for writing the big_rec. */ - ut_a(trx_undo_roll_ptr_is_insert( - row_get_rec_roll_ptr(rec, index, offsets))); - } -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - - if (type != ROW_COPY_POINTERS) { - /* Take a copy of rec to heap */ - buf = static_cast<byte*>( - mem_heap_alloc(heap, rec_offs_size(offsets))); - - copy = rec_copy(buf, rec, offsets); - } else { - copy = rec; - } - - n_ext_cols = rec_offs_n_extern(offsets); - if (n_ext_cols) { - ext_cols = static_cast<ulint*>( - mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols)); - } - - /* Avoid a debug assertion in rec_offs_validate(). */ - rec_offs_make_valid(copy, index, const_cast<ulint*>(offsets)); - - if (!col_table) { - ut_ad(!col_map); - ut_ad(!add_cols); - col_table = index->table; - } - - if (add_cols) { - ut_ad(col_map); - row = dtuple_copy(add_cols, heap); - /* dict_table_copy_types() would set the fields to NULL */ - for (ulint i = 0; i < dict_table_get_n_cols(col_table); i++) { - dict_col_copy_type( - dict_table_get_nth_col(col_table, i), - dfield_get_type(dtuple_get_nth_field(row, i))); - } - } else { - row = dtuple_create(heap, dict_table_get_n_cols(col_table)); - dict_table_copy_types(row, col_table); - } - - dtuple_set_info_bits(row, rec_get_info_bits( - copy, rec_offs_comp(offsets))); - - j = 0; - - for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) { - const dict_field_t* ind_field - = dict_index_get_nth_field(index, i); - - if (ind_field->prefix_len) { - /* Column prefixes can only occur in key - fields, which cannot be stored externally. For - a column prefix, there should also be the full - field in the clustered index tuple. The row - tuple comprises full fields, not prefixes. */ - ut_ad(!rec_offs_nth_extern(offsets, i)); - continue; - } - - const dict_col_t* col - = dict_field_get_col(ind_field); - ulint col_no - = dict_col_get_no(col); - - if (col_map) { - col_no = col_map[col_no]; - - if (col_no == ULINT_UNDEFINED) { - /* dropped column */ - continue; - } - } - - dfield_t* dfield = dtuple_get_nth_field(row, col_no); - - const byte* field = rec_get_nth_field( - copy, offsets, i, &len); - - dfield_set_data(dfield, field, len); - - if (rec_offs_nth_extern(offsets, i)) { - dfield_set_ext(dfield); - - col = dict_table_get_nth_col(col_table, col_no); - - if (col->ord_part) { - /* We will have to fetch prefixes of - externally stored columns that are - referenced by column prefixes. */ - ext_cols[j++] = col_no; - } - } - } - - rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets)); - - ut_ad(dtuple_check_typed(row)); - - if (!ext) { - /* REDUNDANT and COMPACT formats store a local - 768-byte prefix of each externally stored - column. No cache is needed. - - During online table rebuild, - row_log_table_apply_delete_low() - may use a cache that was set up by - row_log_table_delete(). */ - - } else if (j) { - *ext = row_ext_create(j, ext_cols, index->table->flags, row, - heap); - } else { - *ext = NULL; - } - - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - return(row); -} - -/*******************************************************************//** -Converts an index record to a typed data tuple. -@return index entry built; does not set info_bits, and the data fields -in the entry will point directly to rec */ -UNIV_INTERN -dtuple_t* -row_rec_to_index_entry_low( -/*=======================*/ - const rec_t* rec, /*!< in: record in the index */ - const dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint* n_ext, /*!< out: number of externally - stored columns */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory needed is allocated */ -{ - dtuple_t* entry; - dfield_t* dfield; - ulint i; - const byte* field; - ulint len; - ulint rec_len; - - ut_ad(rec != NULL); - ut_ad(heap != NULL); - ut_ad(index != NULL); - /* Because this function may be invoked by row0merge.cc - on a record whose header is in different format, the check - rec_offs_validate(rec, index, offsets) must be avoided here. */ - ut_ad(n_ext); - *n_ext = 0; - - rec_len = rec_offs_n_fields(offsets); - - entry = dtuple_create(heap, rec_len); - - dtuple_set_n_fields_cmp(entry, - dict_index_get_n_unique_in_tree(index)); - ut_ad(rec_len == dict_index_get_n_fields(index)); - - dict_index_copy_types(entry, index, rec_len); - - for (i = 0; i < rec_len; i++) { - - dfield = dtuple_get_nth_field(entry, i); - field = rec_get_nth_field(rec, offsets, i, &len); - - dfield_set_data(dfield, field, len); - - if (rec_offs_nth_extern(offsets, i)) { - dfield_set_ext(dfield); - (*n_ext)++; - } - } - - ut_ad(dtuple_check_typed(entry)); - - return(entry); -} - -/*******************************************************************//** -Converts an index record to a typed data tuple. NOTE that externally -stored (often big) fields are NOT copied to heap. -@return own: index entry built */ -UNIV_INTERN -dtuple_t* -row_rec_to_index_entry( -/*===================*/ - const rec_t* rec, /*!< in: record in the index */ - const dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec) */ - ulint* n_ext, /*!< out: number of externally - stored columns */ - mem_heap_t* heap) /*!< in: memory heap from which - the memory needed is allocated */ -{ - dtuple_t* entry; - byte* buf; - const rec_t* copy_rec; - - ut_ad(rec != NULL); - ut_ad(heap != NULL); - ut_ad(index != NULL); - ut_ad(rec_offs_validate(rec, index, offsets)); - - /* Take a copy of rec to heap */ - buf = static_cast<byte*>( - mem_heap_alloc(heap, rec_offs_size(offsets))); - - copy_rec = rec_copy(buf, rec, offsets); - - rec_offs_make_valid(copy_rec, index, const_cast<ulint*>(offsets)); - entry = row_rec_to_index_entry_low( - copy_rec, index, offsets, n_ext, heap); - rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets)); - - dtuple_set_info_bits(entry, - rec_get_info_bits(rec, rec_offs_comp(offsets))); - - return(entry); -} - -/*******************************************************************//** -Builds from a secondary index record a row reference with which we can -search the clustered index record. -@return own: row reference built; see the NOTE below! */ -UNIV_INTERN -dtuple_t* -row_build_row_ref( -/*==============*/ - ulint type, /*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap, whereas the latter only places pointers - to data fields on the index page */ - dict_index_t* index, /*!< in: secondary index */ - const rec_t* rec, /*!< in: record in the index; - NOTE: in the case ROW_COPY_POINTERS - the data fields in the row will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the row reference is used! */ - mem_heap_t* heap) /*!< in: memory heap from which the memory - needed is allocated */ -{ - dict_table_t* table; - dict_index_t* clust_index; - dfield_t* dfield; - dtuple_t* ref; - const byte* field; - ulint len; - ulint ref_len; - ulint pos; - byte* buf; - ulint clust_col_prefix_len; - ulint i; - mem_heap_t* tmp_heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(index != NULL); - ut_ad(rec != NULL); - ut_ad(heap != NULL); - ut_ad(!dict_index_is_clust(index)); - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &tmp_heap); - /* Secondary indexes must not contain externally stored columns. */ - ut_ad(!rec_offs_any_extern(offsets)); - - if (type == ROW_COPY_DATA) { - /* Take a copy of rec to heap */ - - buf = static_cast<byte*>( - mem_heap_alloc(heap, rec_offs_size(offsets))); - - rec = rec_copy(buf, rec, offsets); - /* Avoid a debug assertion in rec_offs_validate(). */ - rec_offs_make_valid(rec, index, offsets); - } - - table = index->table; - - clust_index = dict_table_get_first_index(table); - - ref_len = dict_index_get_n_unique(clust_index); - - ref = dtuple_create(heap, ref_len); - - dict_index_copy_types(ref, clust_index, ref_len); - - for (i = 0; i < ref_len; i++) { - dfield = dtuple_get_nth_field(ref, i); - - pos = dict_index_get_nth_field_pos(index, clust_index, i); - - ut_a(pos != ULINT_UNDEFINED); - - field = rec_get_nth_field(rec, offsets, pos, &len); - - dfield_set_data(dfield, field, len); - - /* If the primary key contains a column prefix, then the - secondary index may contain a longer prefix of the same - column, or the full column, and we must adjust the length - accordingly. */ - - clust_col_prefix_len = dict_index_get_nth_field( - clust_index, i)->prefix_len; - - if (clust_col_prefix_len > 0) { - if (len != UNIV_SQL_NULL) { - - const dtype_t* dtype - = dfield_get_type(dfield); - - dfield_set_len(dfield, - dtype_get_at_most_n_mbchars( - dtype->prtype, - dtype->mbminmaxlen, - clust_col_prefix_len, - len, (char*) field)); - } - } - } - - ut_ad(dtuple_check_typed(ref)); - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - return(ref); -} - -/*******************************************************************//** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ -UNIV_INTERN -void -row_build_row_ref_in_tuple( -/*=======================*/ - dtuple_t* ref, /*!< in/out: row reference built; - see the NOTE below! */ - const rec_t* rec, /*!< in: record in the index; - NOTE: the data fields in ref - will point directly into this - record, therefore, the buffer - page of this record must be at - least s-latched and the latch - held as long as the row - reference is used! */ - const dict_index_t* index, /*!< in: secondary index */ - ulint* offsets,/*!< in: rec_get_offsets(rec, index) - or NULL */ - trx_t* trx) /*!< in: transaction */ -{ - const dict_index_t* clust_index; - dfield_t* dfield; - const byte* field; - ulint len; - ulint ref_len; - ulint pos; - ulint clust_col_prefix_len; - ulint i; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - - ut_a(ref); - ut_a(index); - ut_a(rec); - ut_ad(!dict_index_is_clust(index)); - - if (UNIV_UNLIKELY(!index->table)) { - fputs("InnoDB: table ", stderr); -notfound: - ut_print_name(stderr, trx, TRUE, index->table_name); - fputs(" for index ", stderr); - ut_print_name(stderr, trx, FALSE, index->name); - fputs(" not found\n", stderr); - ut_error; - } - - clust_index = dict_table_get_first_index(index->table); - - if (UNIV_UNLIKELY(!clust_index)) { - fputs("InnoDB: clust index for table ", stderr); - goto notfound; - } - - if (!offsets) { - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - } else { - ut_ad(rec_offs_validate(rec, index, offsets)); - } - - /* Secondary indexes must not contain externally stored columns. */ - ut_ad(!rec_offs_any_extern(offsets)); - ref_len = dict_index_get_n_unique(clust_index); - - ut_ad(ref_len == dtuple_get_n_fields(ref)); - - dict_index_copy_types(ref, clust_index, ref_len); - - for (i = 0; i < ref_len; i++) { - dfield = dtuple_get_nth_field(ref, i); - - pos = dict_index_get_nth_field_pos(index, clust_index, i); - - ut_a(pos != ULINT_UNDEFINED); - - field = rec_get_nth_field(rec, offsets, pos, &len); - - dfield_set_data(dfield, field, len); - - /* If the primary key contains a column prefix, then the - secondary index may contain a longer prefix of the same - column, or the full column, and we must adjust the length - accordingly. */ - - clust_col_prefix_len = dict_index_get_nth_field( - clust_index, i)->prefix_len; - - if (clust_col_prefix_len > 0) { - if (len != UNIV_SQL_NULL) { - - const dtype_t* dtype - = dfield_get_type(dfield); - - dfield_set_len(dfield, - dtype_get_at_most_n_mbchars( - dtype->prtype, - dtype->mbminmaxlen, - clust_col_prefix_len, - len, (char*) field)); - } - } - } - - ut_ad(dtuple_check_typed(ref)); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/***************************************************************//** -Searches the clustered index record for a row, if we have the row reference. -@return TRUE if found */ -UNIV_INTERN -ibool -row_search_on_row_ref( -/*==================*/ - btr_pcur_t* pcur, /*!< out: persistent cursor, which must - be closed by the caller */ - ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ - const dict_table_t* table, /*!< in: table */ - const dtuple_t* ref, /*!< in: row reference */ - mtr_t* mtr) /*!< in/out: mtr */ -{ - ulint low_match; - rec_t* rec; - dict_index_t* index; - - ut_ad(dtuple_check_typed(ref)); - - index = dict_table_get_first_index(table); - - ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index)); - - btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr); - - low_match = btr_pcur_get_low_match(pcur); - - rec = btr_pcur_get_rec(pcur); - - if (page_rec_is_infimum(rec)) { - - return(FALSE); - } - - if (low_match != dtuple_get_n_fields(ref)) { - - return(FALSE); - } - - return(TRUE); -} - -/*********************************************************************//** -Fetches the clustered index record for a secondary index record. The latches -on the secondary index record are preserved. -@return record or NULL, if no record found */ -UNIV_INTERN -rec_t* -row_get_clust_rec( -/*==============*/ - ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ - const rec_t* rec, /*!< in: record in a secondary index */ - dict_index_t* index, /*!< in: secondary index */ - dict_index_t** clust_index,/*!< out: clustered index */ - mtr_t* mtr) /*!< in: mtr */ -{ - mem_heap_t* heap; - dtuple_t* ref; - dict_table_t* table; - btr_pcur_t pcur; - ibool found; - rec_t* clust_rec; - - ut_ad(!dict_index_is_clust(index)); - - table = index->table; - - heap = mem_heap_create(256); - - ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap); - - found = row_search_on_row_ref(&pcur, mode, table, ref, mtr); - - clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL; - - mem_heap_free(heap); - - btr_pcur_close(&pcur); - - *clust_index = dict_table_get_first_index(table); - - return(clust_rec); -} - -/***************************************************************//** -Searches an index record. -@return whether the record was found or buffered */ -UNIV_INTERN -enum row_search_result -row_search_index_entry( -/*===================*/ - dict_index_t* index, /*!< in: index */ - const dtuple_t* entry, /*!< in: index entry */ - ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ - btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must - be closed by the caller */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint n_fields; - ulint low_match; - rec_t* rec; - - ut_ad(dtuple_check_typed(entry)); - - btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr); - - switch (btr_pcur_get_btr_cur(pcur)->flag) { - case BTR_CUR_DELETE_REF: - ut_a(mode & BTR_DELETE); - return(ROW_NOT_DELETED_REF); - - case BTR_CUR_DEL_MARK_IBUF: - case BTR_CUR_DELETE_IBUF: - case BTR_CUR_INSERT_TO_IBUF: - return(ROW_BUFFERED); - - case BTR_CUR_HASH: - case BTR_CUR_HASH_FAIL: - case BTR_CUR_BINARY: - break; - } - - low_match = btr_pcur_get_low_match(pcur); - - rec = btr_pcur_get_rec(pcur); - - n_fields = dtuple_get_n_fields(entry); - - if (page_rec_is_infimum(rec)) { - - return(ROW_NOT_FOUND); - } else if (low_match != n_fields) { - - return(ROW_NOT_FOUND); - } - - return(ROW_FOUND); -} - -/*******************************************************************//** -Formats the raw data in "data" (in InnoDB on-disk format) that is of -type DATA_INT using "prtype" and writes the result to "buf". -If the data is in unknown format, then nothing is written to "buf", -0 is returned and "format_in_hex" is set to TRUE, otherwise -"format_in_hex" is left untouched. -Not more than "buf_size" bytes are written to "buf". -The result is always '\0'-terminated (provided buf_size > 0) and the -number of bytes that were written to "buf" is returned (including the -terminating '\0'). -@return number of bytes that were written */ -static -ulint -row_raw_format_int( -/*===============*/ - const char* data, /*!< in: raw data */ - ulint data_len, /*!< in: raw data length - in bytes */ - ulint prtype, /*!< in: precise type */ - char* buf, /*!< out: output buffer */ - ulint buf_size, /*!< in: output buffer size - in bytes */ - ibool* format_in_hex) /*!< out: should the data be - formated in hex */ -{ - ulint ret; - - if (data_len <= sizeof(ib_uint64_t)) { - - ib_uint64_t value; - ibool unsigned_type = prtype & DATA_UNSIGNED; - - value = mach_read_int_type( - (const byte*) data, data_len, unsigned_type); - - ret = ut_snprintf( - buf, buf_size, - unsigned_type ? UINT64PF : INT64PF, value) + 1; - } else { - - *format_in_hex = TRUE; - ret = 0; - } - - return(ut_min(ret, buf_size)); -} - -/*******************************************************************//** -Formats the raw data in "data" (in InnoDB on-disk format) that is of -type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "prtype" and writes the -result to "buf". -If the data is in binary format, then nothing is written to "buf", -0 is returned and "format_in_hex" is set to TRUE, otherwise -"format_in_hex" is left untouched. -Not more than "buf_size" bytes are written to "buf". -The result is always '\0'-terminated (provided buf_size > 0) and the -number of bytes that were written to "buf" is returned (including the -terminating '\0'). -@return number of bytes that were written */ -static -ulint -row_raw_format_str( -/*===============*/ - const char* data, /*!< in: raw data */ - ulint data_len, /*!< in: raw data length - in bytes */ - ulint prtype, /*!< in: precise type */ - char* buf, /*!< out: output buffer */ - ulint buf_size, /*!< in: output buffer size - in bytes */ - ibool* format_in_hex) /*!< out: should the data be - formated in hex */ -{ - ulint charset_coll; - - if (buf_size == 0) { - - return(0); - } - - /* we assume system_charset_info is UTF-8 */ - - charset_coll = dtype_get_charset_coll(prtype); - - if (UNIV_LIKELY(dtype_is_utf8(prtype))) { - - return(ut_str_sql_format(data, data_len, buf, buf_size)); - } - /* else */ - - if (charset_coll == DATA_MYSQL_BINARY_CHARSET_COLL) { - - *format_in_hex = TRUE; - return(0); - } - /* else */ - - return(innobase_raw_format(data, data_len, charset_coll, - buf, buf_size)); -} - -/*******************************************************************//** -Formats the raw data in "data" (in InnoDB on-disk format) using -"dict_field" and writes the result to "buf". -Not more than "buf_size" bytes are written to "buf". -The result is always NUL-terminated (provided buf_size is positive) and the -number of bytes that were written to "buf" is returned (including the -terminating NUL). -@return number of bytes that were written */ -UNIV_INTERN -ulint -row_raw_format( -/*===========*/ - const char* data, /*!< in: raw data */ - ulint data_len, /*!< in: raw data length - in bytes */ - const dict_field_t* dict_field, /*!< in: index field */ - char* buf, /*!< out: output buffer */ - ulint buf_size) /*!< in: output buffer size - in bytes */ -{ - ulint mtype; - ulint prtype; - ulint ret; - ibool format_in_hex; - - if (buf_size == 0) { - - return(0); - } - - if (data_len == UNIV_SQL_NULL) { - - ret = ut_snprintf((char*) buf, buf_size, "NULL") + 1; - - return(ut_min(ret, buf_size)); - } - - mtype = dict_field->col->mtype; - prtype = dict_field->col->prtype; - - format_in_hex = FALSE; - - switch (mtype) { - case DATA_INT: - - ret = row_raw_format_int(data, data_len, prtype, - buf, buf_size, &format_in_hex); - if (format_in_hex) { - - goto format_in_hex; - } - break; - case DATA_CHAR: - case DATA_VARCHAR: - case DATA_MYSQL: - case DATA_VARMYSQL: - - ret = row_raw_format_str(data, data_len, prtype, - buf, buf_size, &format_in_hex); - if (format_in_hex) { - - goto format_in_hex; - } - - break; - /* XXX support more data types */ - default: - format_in_hex: - - if (UNIV_LIKELY(buf_size > 2)) { - - memcpy(buf, "0x", 2); - buf += 2; - buf_size -= 2; - ret = 2 + ut_raw_to_hex(data, data_len, - buf, buf_size); - } else { - - buf[0] = '\0'; - ret = 1; - } - } - - return(ret); -} - -#ifdef UNIV_COMPILE_TEST_FUNCS - -#include "ut0dbg.h" - -void -test_row_raw_format_int() -{ - ulint ret; - char buf[128]; - ibool format_in_hex; - speedo_t speedo; - ulint i; - -#define CALL_AND_TEST(data, data_len, prtype, buf, buf_size,\ - ret_expected, buf_expected, format_in_hex_expected)\ - do {\ - ibool ok = TRUE;\ - ulint i;\ - memset(buf, 'x', 10);\ - buf[10] = '\0';\ - format_in_hex = FALSE;\ - fprintf(stderr, "TESTING \"\\x");\ - for (i = 0; i < data_len; i++) {\ - fprintf(stderr, "%02hhX", data[i]);\ - }\ - fprintf(stderr, "\", %lu, %lu, %lu\n",\ - (ulint) data_len, (ulint) prtype,\ - (ulint) buf_size);\ - ret = row_raw_format_int(data, data_len, prtype,\ - buf, buf_size, &format_in_hex);\ - if (ret != ret_expected) {\ - fprintf(stderr, "expected ret %lu, got %lu\n",\ - (ulint) ret_expected, ret);\ - ok = FALSE;\ - }\ - if (strcmp((char*) buf, buf_expected) != 0) {\ - fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\ - buf_expected, buf);\ - ok = FALSE;\ - }\ - if (format_in_hex != format_in_hex_expected) {\ - fprintf(stderr, "expected format_in_hex %d, got %d\n",\ - (int) format_in_hex_expected,\ - (int) format_in_hex);\ - ok = FALSE;\ - }\ - if (ok) {\ - fprintf(stderr, "OK: %lu, \"%s\" %d\n\n",\ - (ulint) ret, buf, (int) format_in_hex);\ - } else {\ - return;\ - }\ - } while (0) - -#if 1 - /* min values for signed 1-8 byte integers */ - - CALL_AND_TEST("\x00", 1, 0, - buf, sizeof(buf), 5, "-128", 0); - - CALL_AND_TEST("\x00\x00", 2, 0, - buf, sizeof(buf), 7, "-32768", 0); - - CALL_AND_TEST("\x00\x00\x00", 3, 0, - buf, sizeof(buf), 9, "-8388608", 0); - - CALL_AND_TEST("\x00\x00\x00\x00", 4, 0, - buf, sizeof(buf), 12, "-2147483648", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, 0, - buf, sizeof(buf), 14, "-549755813888", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, 0, - buf, sizeof(buf), 17, "-140737488355328", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, 0, - buf, sizeof(buf), 19, "-36028797018963968", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, 0, - buf, sizeof(buf), 21, "-9223372036854775808", 0); - - /* min values for unsigned 1-8 byte integers */ - - CALL_AND_TEST("\x00", 1, DATA_UNSIGNED, - buf, sizeof(buf), 2, "0", 0); - - CALL_AND_TEST("\x00\x00", 2, DATA_UNSIGNED, - buf, sizeof(buf), 2, "0", 0); - - CALL_AND_TEST("\x00\x00\x00", 3, DATA_UNSIGNED, - buf, sizeof(buf), 2, "0", 0); - - CALL_AND_TEST("\x00\x00\x00\x00", 4, DATA_UNSIGNED, - buf, sizeof(buf), 2, "0", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, DATA_UNSIGNED, - buf, sizeof(buf), 2, "0", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, DATA_UNSIGNED, - buf, sizeof(buf), 2, "0", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, DATA_UNSIGNED, - buf, sizeof(buf), 2, "0", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, DATA_UNSIGNED, - buf, sizeof(buf), 2, "0", 0); - - /* max values for signed 1-8 byte integers */ - - CALL_AND_TEST("\xFF", 1, 0, - buf, sizeof(buf), 4, "127", 0); - - CALL_AND_TEST("\xFF\xFF", 2, 0, - buf, sizeof(buf), 6, "32767", 0); - - CALL_AND_TEST("\xFF\xFF\xFF", 3, 0, - buf, sizeof(buf), 8, "8388607", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, 0, - buf, sizeof(buf), 11, "2147483647", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, 0, - buf, sizeof(buf), 13, "549755813887", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, 0, - buf, sizeof(buf), 16, "140737488355327", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, 0, - buf, sizeof(buf), 18, "36028797018963967", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, 0, - buf, sizeof(buf), 20, "9223372036854775807", 0); - - /* max values for unsigned 1-8 byte integers */ - - CALL_AND_TEST("\xFF", 1, DATA_UNSIGNED, - buf, sizeof(buf), 4, "255", 0); - - CALL_AND_TEST("\xFF\xFF", 2, DATA_UNSIGNED, - buf, sizeof(buf), 6, "65535", 0); - - CALL_AND_TEST("\xFF\xFF\xFF", 3, DATA_UNSIGNED, - buf, sizeof(buf), 9, "16777215", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, DATA_UNSIGNED, - buf, sizeof(buf), 11, "4294967295", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, DATA_UNSIGNED, - buf, sizeof(buf), 14, "1099511627775", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, DATA_UNSIGNED, - buf, sizeof(buf), 16, "281474976710655", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, DATA_UNSIGNED, - buf, sizeof(buf), 18, "72057594037927935", 0); - - CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, DATA_UNSIGNED, - buf, sizeof(buf), 21, "18446744073709551615", 0); - - /* some random values */ - - CALL_AND_TEST("\x52", 1, 0, - buf, sizeof(buf), 4, "-46", 0); - - CALL_AND_TEST("\x0E", 1, DATA_UNSIGNED, - buf, sizeof(buf), 3, "14", 0); - - CALL_AND_TEST("\x62\xCE", 2, 0, - buf, sizeof(buf), 6, "-7474", 0); - - CALL_AND_TEST("\x29\xD6", 2, DATA_UNSIGNED, - buf, sizeof(buf), 6, "10710", 0); - - CALL_AND_TEST("\x7F\xFF\x90", 3, 0, - buf, sizeof(buf), 5, "-112", 0); - - CALL_AND_TEST("\x00\xA1\x16", 3, DATA_UNSIGNED, - buf, sizeof(buf), 6, "41238", 0); - - CALL_AND_TEST("\x7F\xFF\xFF\xF7", 4, 0, - buf, sizeof(buf), 3, "-9", 0); - - CALL_AND_TEST("\x00\x00\x00\x5C", 4, DATA_UNSIGNED, - buf, sizeof(buf), 3, "92", 0); - - CALL_AND_TEST("\x7F\xFF\xFF\xFF\xFF\xFF\xDC\x63", 8, 0, - buf, sizeof(buf), 6, "-9117", 0); - - CALL_AND_TEST("\x00\x00\x00\x00\x00\x01\x64\x62", 8, DATA_UNSIGNED, - buf, sizeof(buf), 6, "91234", 0); -#endif - - /* speed test */ - - speedo_reset(&speedo); - - for (i = 0; i < 1000000; i++) { - row_raw_format_int("\x23", 1, - 0, buf, sizeof(buf), - &format_in_hex); - row_raw_format_int("\x23", 1, - DATA_UNSIGNED, buf, sizeof(buf), - &format_in_hex); - - row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8, - 0, buf, sizeof(buf), - &format_in_hex); - row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8, - DATA_UNSIGNED, buf, sizeof(buf), - &format_in_hex); - } - - speedo_show(&speedo); -} - -#endif /* UNIV_COMPILE_TEST_FUNCS */ diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc deleted file mode 100644 index 8e3ed3d1a4e..00000000000 --- a/storage/xtradb/row/row0sel.cc +++ /dev/null @@ -1,5521 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2015, 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/***************************************************//** -@file row/row0sel.cc -Select - -Created 12/19/1997 Heikki Tuuri -*******************************************************/ - -#include "row0sel.h" - -#ifdef UNIV_NONINL -#include "row0sel.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "trx0undo.h" -#include "trx0trx.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "btr0sea.h" -#include "mach0data.h" -#include "que0que.h" -#include "row0upd.h" -#include "row0row.h" -#include "row0vers.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "eval0eval.h" -#include "pars0sym.h" -#include "pars0pars.h" -#include "row0mysql.h" -#include "read0read.h" -#include "buf0lru.h" -#include "srv0srv.h" -#include "ha_prototypes.h" -#include "srv0start.h" -#include "m_string.h" /* for my_sys.h */ -#include "my_sys.h" /* DEBUG_SYNC_C */ -#include "fil0fil.h" - -#include "my_compare.h" /* enum icp_result */ - -#include <vector> - -/* Maximum number of rows to prefetch; MySQL interface has another parameter */ -#define SEL_MAX_N_PREFETCH 16 - -/* Number of rows fetched, after which to start prefetching; MySQL interface -has another parameter */ -#define SEL_PREFETCH_LIMIT 1 - -/* When a select has accessed about this many pages, it returns control back -to que_run_threads: this is to allow canceling runaway queries */ - -#define SEL_COST_LIMIT 100 - -/* Flags for search shortcut */ -#define SEL_FOUND 0 -#define SEL_EXHAUSTED 1 -#define SEL_RETRY 2 - -/********************************************************************//** -Returns TRUE if the user-defined column in a secondary index record -is alphabetically the same as the corresponding BLOB column in the clustered -index record. -NOTE: the comparison is NOT done as a binary comparison, but character -fields are compared with collation! -@return TRUE if the columns are equal */ -static -ibool -row_sel_sec_rec_is_for_blob( -/*========================*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type */ - ulint mbminmaxlen, /*!< in: minimum and maximum length of - a multi-byte character */ - const byte* clust_field, /*!< in: the locally stored part of - the clustered index column, including - the BLOB pointer; the clustered - index record must be covered by - a lock or a page latch to protect it - against deletion (rollback or purge) */ - ulint clust_len, /*!< in: length of clust_field */ - const byte* sec_field, /*!< in: column in secondary index */ - ulint sec_len, /*!< in: length of sec_field */ - ulint prefix_len, /*!< in: index column prefix length - in bytes */ - dict_table_t* table) /*!< in: table */ -{ - ulint len; - byte buf[REC_VERSION_56_MAX_INDEX_COL_LEN]; - ulint zip_size = dict_tf_get_zip_size(table->flags); - - /* This function should never be invoked on an Antelope format - table, because they should always contain enough prefix in the - clustered index record. */ - ut_ad(dict_table_get_format(table) >= UNIV_FORMAT_B); - ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE); - ut_ad(prefix_len >= sec_len); - ut_ad(prefix_len > 0); - ut_a(prefix_len <= sizeof buf); - - if (UNIV_UNLIKELY - (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE, - field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) { - /* The externally stored field was not written yet. - This record should only be seen by - recv_recovery_rollback_active() or any - TRX_ISO_READ_UNCOMMITTED transactions. */ - return(FALSE); - } - - len = btr_copy_externally_stored_field_prefix(buf, prefix_len, - zip_size, - clust_field, clust_len, - NULL); - - if (UNIV_UNLIKELY(len == 0)) { - /* The BLOB was being deleted as the server crashed. - There should not be any secondary index records - referring to this clustered index record, because - btr_free_externally_stored_field() is called after all - secondary index entries of the row have been purged. */ - return(FALSE); - } - - len = dtype_get_at_most_n_mbchars(prtype, mbminmaxlen, - prefix_len, len, (const char*) buf); - - return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len)); -} - -/********************************************************************//** -Returns TRUE if the user-defined column values in a secondary index record -are alphabetically the same as the corresponding columns in the clustered -index record. -NOTE: the comparison is NOT done as a binary comparison, but character -fields are compared with collation! -@return TRUE if the secondary record is equal to the corresponding -fields in the clustered record, when compared with collation; -FALSE if not equal or if the clustered record has been marked for deletion */ -static -ibool -row_sel_sec_rec_is_for_clust_rec( -/*=============================*/ - const rec_t* sec_rec, /*!< in: secondary index record */ - dict_index_t* sec_index, /*!< in: secondary index */ - const rec_t* clust_rec, /*!< in: clustered index record; - must be protected by a lock or - a page latch against deletion - in rollback or purge */ - dict_index_t* clust_index) /*!< in: clustered index */ -{ - const byte* sec_field; - ulint sec_len; - const byte* clust_field; - ulint n; - ulint i; - mem_heap_t* heap = NULL; - ulint clust_offsets_[REC_OFFS_NORMAL_SIZE]; - ulint sec_offsets_[REC_OFFS_SMALL_SIZE]; - ulint* clust_offs = clust_offsets_; - ulint* sec_offs = sec_offsets_; - ibool is_equal = TRUE; - - rec_offs_init(clust_offsets_); - rec_offs_init(sec_offsets_); - - if (rec_get_deleted_flag(clust_rec, - dict_table_is_comp(clust_index->table))) { - - /* The clustered index record is delete-marked; - it is not visible in the read view. Besides, - if there are any externally stored columns, - some of them may have already been purged. */ - return(FALSE); - } - - clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs, - ULINT_UNDEFINED, &heap); - sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs, - ULINT_UNDEFINED, &heap); - - n = dict_index_get_n_ordering_defined_by_user(sec_index); - - for (i = 0; i < n; i++) { - const dict_field_t* ifield; - const dict_col_t* col; - ulint clust_pos; - ulint clust_len; - ulint len; - - ifield = dict_index_get_nth_field(sec_index, i); - col = dict_field_get_col(ifield); - clust_pos = dict_col_get_clust_pos(col, clust_index); - - clust_field = rec_get_nth_field( - clust_rec, clust_offs, clust_pos, &clust_len); - sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len); - - len = clust_len; - - if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL - && sec_len != UNIV_SQL_NULL) { - - if (rec_offs_nth_extern(clust_offs, clust_pos)) { - len -= BTR_EXTERN_FIELD_REF_SIZE; - } - - len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminmaxlen, - ifield->prefix_len, len, (char*) clust_field); - - if (rec_offs_nth_extern(clust_offs, clust_pos) - && len < sec_len) { - if (!row_sel_sec_rec_is_for_blob( - col->mtype, col->prtype, - col->mbminmaxlen, - clust_field, clust_len, - sec_field, sec_len, - ifield->prefix_len, - clust_index->table)) { - goto inequal; - } - - continue; - } - } - - if (0 != cmp_data_data(col->mtype, col->prtype, - clust_field, len, - sec_field, sec_len)) { -inequal: - is_equal = FALSE; - goto func_exit; - } - } - -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(is_equal); -} - -/*********************************************************************//** -Creates a select node struct. -@return own: select node struct */ -UNIV_INTERN -sel_node_t* -sel_node_create( -/*============*/ - mem_heap_t* heap) /*!< in: memory heap where created */ -{ - sel_node_t* node; - - node = static_cast<sel_node_t*>( - mem_heap_alloc(heap, sizeof(sel_node_t))); - - node->common.type = QUE_NODE_SELECT; - node->state = SEL_NODE_OPEN; - - node->plans = NULL; - - return(node); -} - -/*********************************************************************//** -Frees the memory private to a select node when a query graph is freed, -does not free the heap where the node was originally created. */ -UNIV_INTERN -void -sel_node_free_private( -/*==================*/ - sel_node_t* node) /*!< in: select node struct */ -{ - ulint i; - plan_t* plan; - - if (node->plans != NULL) { - for (i = 0; i < node->n_tables; i++) { - plan = sel_node_get_nth_plan(node, i); - - btr_pcur_close(&(plan->pcur)); - btr_pcur_close(&(plan->clust_pcur)); - - if (plan->old_vers_heap) { - mem_heap_free(plan->old_vers_heap); - } - } - } -} - -/*********************************************************************//** -Evaluates the values in a select list. If there are aggregate functions, -their argument value is added to the aggregate total. */ -UNIV_INLINE -void -sel_eval_select_list( -/*=================*/ - sel_node_t* node) /*!< in: select node */ -{ - que_node_t* exp; - - exp = node->select_list; - - while (exp) { - eval_exp(exp); - - exp = que_node_get_next(exp); - } -} - -/*********************************************************************//** -Assigns the values in the select list to the possible into-variables in -SELECT ... INTO ... */ -UNIV_INLINE -void -sel_assign_into_var_values( -/*=======================*/ - sym_node_t* var, /*!< in: first variable in a list of - variables */ - sel_node_t* node) /*!< in: select node */ -{ - que_node_t* exp; - - if (var == NULL) { - - return; - } - - for (exp = node->select_list; - var != 0; - var = static_cast<sym_node_t*>(que_node_get_next(var))) { - - ut_ad(exp); - - eval_node_copy_val(var->alias, exp); - - exp = que_node_get_next(exp); - } -} - -/*********************************************************************//** -Resets the aggregate value totals in the select list of an aggregate type -query. */ -UNIV_INLINE -void -sel_reset_aggregate_vals( -/*=====================*/ - sel_node_t* node) /*!< in: select node */ -{ - func_node_t* func_node; - - ut_ad(node->is_aggregate); - - for (func_node = static_cast<func_node_t*>(node->select_list); - func_node != 0; - func_node = static_cast<func_node_t*>( - que_node_get_next(func_node))) { - - eval_node_set_int_val(func_node, 0); - } - - node->aggregate_already_fetched = FALSE; -} - -/*********************************************************************//** -Copies the input variable values when an explicit cursor is opened. */ -UNIV_INLINE -void -row_sel_copy_input_variable_vals( -/*=============================*/ - sel_node_t* node) /*!< in: select node */ -{ - sym_node_t* var; - - var = UT_LIST_GET_FIRST(node->copy_variables); - - while (var) { - eval_node_copy_val(var, var->alias); - - var->indirection = NULL; - - var = UT_LIST_GET_NEXT(col_var_list, var); - } -} - -/*********************************************************************//** -Fetches the column values from a record. */ -static -void -row_sel_fetch_columns( -/*==================*/ - dict_index_t* index, /*!< in: record index */ - const rec_t* rec, /*!< in: record in a clustered or non-clustered - index; must be protected by a page latch */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - sym_node_t* column) /*!< in: first column in a column list, or - NULL */ -{ - dfield_t* val; - ulint index_type; - ulint field_no; - const byte* data; - ulint len; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (dict_index_is_clust(index)) { - index_type = SYM_CLUST_FIELD_NO; - } else { - index_type = SYM_SEC_FIELD_NO; - } - - while (column) { - mem_heap_t* heap = NULL; - ibool needs_copy; - - field_no = column->field_nos[index_type]; - - if (field_no != ULINT_UNDEFINED) { - - if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, - field_no))) { - - /* Copy an externally stored field to the - temporary heap, if possible. */ - - heap = mem_heap_create(1); - - data = btr_rec_copy_externally_stored_field( - rec, offsets, - dict_table_zip_size(index->table), - field_no, &len, heap, NULL); - - /* data == NULL means that the - externally stored field was not - written yet. This record - should only be seen by - recv_recovery_rollback_active() or any - TRX_ISO_READ_UNCOMMITTED - transactions. The InnoDB SQL parser - (the sole caller of this function) - does not implement READ UNCOMMITTED, - and it is not involved during rollback. */ - ut_a(data); - ut_a(len != UNIV_SQL_NULL); - - needs_copy = TRUE; - } else { - data = rec_get_nth_field(rec, offsets, - field_no, &len); - - needs_copy = column->copy_val; - } - - if (needs_copy) { - eval_node_copy_and_alloc_val(column, data, - len); - } else { - val = que_node_get_val(column); - dfield_set_data(val, data, len); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - column = UT_LIST_GET_NEXT(col_var_list, column); - } -} - -/*********************************************************************//** -Allocates a prefetch buffer for a column when prefetch is first time done. */ -static -void -sel_col_prefetch_buf_alloc( -/*=======================*/ - sym_node_t* column) /*!< in: symbol table node for a column */ -{ - sel_buf_t* sel_buf; - ulint i; - - ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL); - - column->prefetch_buf = static_cast<sel_buf_t*>( - mem_alloc(SEL_MAX_N_PREFETCH * sizeof(sel_buf_t))); - - for (i = 0; i < SEL_MAX_N_PREFETCH; i++) { - sel_buf = column->prefetch_buf + i; - - sel_buf->data = NULL; - sel_buf->len = 0; - sel_buf->val_buf_size = 0; - } -} - -/*********************************************************************//** -Frees a prefetch buffer for a column, including the dynamically allocated -memory for data stored there. */ -UNIV_INTERN -void -sel_col_prefetch_buf_free( -/*======================*/ - sel_buf_t* prefetch_buf) /*!< in, own: prefetch buffer */ -{ - sel_buf_t* sel_buf; - ulint i; - - for (i = 0; i < SEL_MAX_N_PREFETCH; i++) { - sel_buf = prefetch_buf + i; - - if (sel_buf->val_buf_size > 0) { - - mem_free(sel_buf->data); - } - } - - mem_free(prefetch_buf); -} - -/*********************************************************************//** -Pops the column values for a prefetched, cached row from the column prefetch -buffers and places them to the val fields in the column nodes. */ -static -void -sel_dequeue_prefetched_row( -/*=======================*/ - plan_t* plan) /*!< in: plan node for a table */ -{ - sym_node_t* column; - sel_buf_t* sel_buf; - dfield_t* val; - byte* data; - ulint len; - ulint val_buf_size; - - ut_ad(plan->n_rows_prefetched > 0); - - column = UT_LIST_GET_FIRST(plan->columns); - - while (column) { - val = que_node_get_val(column); - - if (!column->copy_val) { - /* We did not really push any value for the - column */ - - ut_ad(!column->prefetch_buf); - ut_ad(que_node_get_val_buf_size(column) == 0); - ut_d(dfield_set_null(val)); - - goto next_col; - } - - ut_ad(column->prefetch_buf); - ut_ad(!dfield_is_ext(val)); - - sel_buf = column->prefetch_buf + plan->first_prefetched; - - data = sel_buf->data; - len = sel_buf->len; - val_buf_size = sel_buf->val_buf_size; - - /* We must keep track of the allocated memory for - column values to be able to free it later: therefore - we swap the values for sel_buf and val */ - - sel_buf->data = static_cast<byte*>(dfield_get_data(val)); - sel_buf->len = dfield_get_len(val); - sel_buf->val_buf_size = que_node_get_val_buf_size(column); - - dfield_set_data(val, data, len); - que_node_set_val_buf_size(column, val_buf_size); -next_col: - column = UT_LIST_GET_NEXT(col_var_list, column); - } - - plan->n_rows_prefetched--; - - plan->first_prefetched++; -} - -/*********************************************************************//** -Pushes the column values for a prefetched, cached row to the column prefetch -buffers from the val fields in the column nodes. */ -UNIV_INLINE -void -sel_enqueue_prefetched_row( -/*=======================*/ - plan_t* plan) /*!< in: plan node for a table */ -{ - sym_node_t* column; - sel_buf_t* sel_buf; - dfield_t* val; - byte* data; - ulint len; - ulint pos; - ulint val_buf_size; - - if (plan->n_rows_prefetched == 0) { - pos = 0; - plan->first_prefetched = 0; - } else { - pos = plan->n_rows_prefetched; - - /* We have the convention that pushing new rows starts only - after the prefetch stack has been emptied: */ - - ut_ad(plan->first_prefetched == 0); - } - - plan->n_rows_prefetched++; - - ut_ad(pos < SEL_MAX_N_PREFETCH); - - for (column = UT_LIST_GET_FIRST(plan->columns); - column != 0; - column = UT_LIST_GET_NEXT(col_var_list, column)) { - - if (!column->copy_val) { - /* There is no sense to push pointers to database - page fields when we do not keep latch on the page! */ - continue; - } - - if (!column->prefetch_buf) { - /* Allocate a new prefetch buffer */ - - sel_col_prefetch_buf_alloc(column); - } - - sel_buf = column->prefetch_buf + pos; - - val = que_node_get_val(column); - - data = static_cast<byte*>(dfield_get_data(val)); - len = dfield_get_len(val); - val_buf_size = que_node_get_val_buf_size(column); - - /* We must keep track of the allocated memory for - column values to be able to free it later: therefore - we swap the values for sel_buf and val */ - - dfield_set_data(val, sel_buf->data, sel_buf->len); - que_node_set_val_buf_size(column, sel_buf->val_buf_size); - - sel_buf->data = data; - sel_buf->len = len; - sel_buf->val_buf_size = val_buf_size; - } -} - -/*********************************************************************//** -Builds a previous version of a clustered index record for a consistent read -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_sel_build_prev_vers( -/*====================*/ - read_view_t* read_view, /*!< in: read view */ - dict_index_t* index, /*!< in: plan node for table */ - rec_t* rec, /*!< in: record in a clustered index */ - ulint** offsets, /*!< in/out: offsets returned by - rec_get_offsets(rec, plan->index) */ - mem_heap_t** offset_heap, /*!< in/out: memory heap from which - the offsets are allocated */ - mem_heap_t** old_vers_heap, /*!< out: old version heap to use */ - rec_t** old_vers, /*!< out: old version, or NULL if the - record does not exist in the view: - i.e., it was freshly inserted - afterwards */ - mtr_t* mtr) /*!< in: mtr */ -{ - dberr_t err; - - if (*old_vers_heap) { - mem_heap_empty(*old_vers_heap); - } else { - *old_vers_heap = mem_heap_create(512); - } - - err = row_vers_build_for_consistent_read( - rec, mtr, index, offsets, read_view, offset_heap, - *old_vers_heap, old_vers); - return(err); -} - -/*********************************************************************//** -Builds the last committed version of a clustered index record for a -semi-consistent read. */ -static MY_ATTRIBUTE((nonnull)) -void -row_sel_build_committed_vers_for_mysql( -/*===================================*/ - dict_index_t* clust_index, /*!< in: clustered index */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: record in a clustered index */ - ulint** offsets, /*!< in/out: offsets returned by - rec_get_offsets(rec, clust_index) */ - mem_heap_t** offset_heap, /*!< in/out: memory heap from which - the offsets are allocated */ - const rec_t** old_vers, /*!< out: old version, or NULL if the - record does not exist in the view: - i.e., it was freshly inserted - afterwards */ - mtr_t* mtr) /*!< in: mtr */ -{ - if (prebuilt->old_vers_heap) { - mem_heap_empty(prebuilt->old_vers_heap); - } else { - prebuilt->old_vers_heap = mem_heap_create( - rec_offs_size(*offsets)); - } - - row_vers_build_for_semi_consistent_read( - rec, mtr, clust_index, offsets, offset_heap, - prebuilt->old_vers_heap, old_vers); -} - -/*********************************************************************//** -Tests the conditions which determine when the index segment we are searching -through has been exhausted. -@return TRUE if row passed the tests */ -UNIV_INLINE -ibool -row_sel_test_end_conds( -/*===================*/ - plan_t* plan) /*!< in: plan for the table; the column values must - already have been retrieved and the right sides of - comparisons evaluated */ -{ - func_node_t* cond; - - /* All conditions in end_conds are comparisons of a column to an - expression */ - - for (cond = UT_LIST_GET_FIRST(plan->end_conds); - cond != 0; - cond = UT_LIST_GET_NEXT(cond_list, cond)) { - - /* Evaluate the left side of the comparison, i.e., get the - column value if there is an indirection */ - - eval_sym(static_cast<sym_node_t*>(cond->args)); - - /* Do the comparison */ - - if (!eval_cmp(cond)) { - - return(FALSE); - } - } - - return(TRUE); -} - -/*********************************************************************//** -Tests the other conditions. -@return TRUE if row passed the tests */ -UNIV_INLINE -ibool -row_sel_test_other_conds( -/*=====================*/ - plan_t* plan) /*!< in: plan for the table; the column values must - already have been retrieved */ -{ - func_node_t* cond; - - cond = UT_LIST_GET_FIRST(plan->other_conds); - - while (cond) { - eval_exp(cond); - - if (!eval_node_get_ibool_val(cond)) { - - return(FALSE); - } - - cond = UT_LIST_GET_NEXT(cond_list, cond); - } - - return(TRUE); -} - -/*********************************************************************//** -Retrieves the clustered index record corresponding to a record in a -non-clustered index. Does the necessary locking. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_sel_get_clust_rec( -/*==================*/ - sel_node_t* node, /*!< in: select_node */ - plan_t* plan, /*!< in: plan node for table */ - rec_t* rec, /*!< in: record in a non-clustered index */ - que_thr_t* thr, /*!< in: query thread */ - rec_t** out_rec,/*!< out: clustered record or an old version of - it, NULL if the old version did not exist - in the read view, i.e., it was a fresh - inserted version */ - mtr_t* mtr) /*!< in: mtr used to get access to the - non-clustered record; the same mtr is used to - access the clustered index */ -{ - dict_index_t* index; - rec_t* clust_rec; - rec_t* old_vers; - dberr_t err; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - *out_rec = NULL; - - offsets = rec_get_offsets(rec, - btr_pcur_get_btr_cur(&plan->pcur)->index, - offsets, ULINT_UNDEFINED, &heap); - - row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets); - - index = dict_table_get_first_index(plan->table); - - btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE, - BTR_SEARCH_LEAF, &plan->clust_pcur, - 0, mtr); - - clust_rec = btr_pcur_get_rec(&(plan->clust_pcur)); - - /* Note: only if the search ends up on a non-infimum record is the - low_match value the real match to the search tuple */ - - if (!page_rec_is_user_rec(clust_rec) - || btr_pcur_get_low_match(&(plan->clust_pcur)) - < dict_index_get_n_unique(index)) { - - ut_a(rec_get_deleted_flag(rec, - dict_table_is_comp(plan->table))); - ut_a(node->read_view); - - /* In a rare case it is possible that no clust rec is found - for a delete-marked secondary index record: if in row0umod.cc - in row_undo_mod_remove_clust_low() we have already removed - the clust rec, while purge is still cleaning and removing - secondary index records associated with earlier versions of - the clustered index record. In that case we know that the - clustered index record did not exist in the read view of - trx. */ - - goto func_exit; - } - - offsets = rec_get_offsets(clust_rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (!node->read_view) { - /* Try to place a lock on the index record */ - ulint lock_type; - trx_t* trx; - - trx = thr_get_trx(thr); - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED or lower isolation level - we lock only the record, i.e., next-key locking is - not used. */ - if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { - lock_type = LOCK_REC_NOT_GAP; - } else { - lock_type = LOCK_ORDINARY; - } - - err = lock_clust_rec_read_check_and_lock( - 0, btr_pcur_get_block(&plan->clust_pcur), - clust_rec, index, offsets, - static_cast<enum lock_mode>(node->row_lock_mode), - lock_type, - thr); - - switch (err) { - case DB_SUCCESS: - case DB_SUCCESS_LOCKED_REC: - /* Declare the variable uninitialized in Valgrind. - It should be set to DB_SUCCESS at func_exit. */ - UNIV_MEM_INVALID(&err, sizeof err); - break; - default: - goto err_exit; - } - } else { - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - old_vers = NULL; - - if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets, - node->read_view)) { - - err = row_sel_build_prev_vers( - node->read_view, index, clust_rec, - &offsets, &heap, &plan->old_vers_heap, - &old_vers, mtr); - - if (err != DB_SUCCESS) { - - goto err_exit; - } - - clust_rec = old_vers; - - if (clust_rec == NULL) { - goto func_exit; - } - } - - /* If we had to go to an earlier version of row or the - secondary index record is delete marked, then it may be that - the secondary index record corresponding to clust_rec - (or old_vers) is not rec; in that case we must ignore - such row because in our snapshot rec would not have existed. - Remember that from rec we cannot see directly which transaction - id corresponds to it: we have to go to the clustered index - record. A query where we want to fetch all rows where - the secondary index value is in some interval would return - a wrong result if we would not drop rows which we come to - visit through secondary index records that would not really - exist in our snapshot. */ - - if ((old_vers - || rec_get_deleted_flag(rec, dict_table_is_comp( - plan->table))) - && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index, - clust_rec, index)) { - goto func_exit; - } - } - - /* Fetch the columns needed in test conditions. The clustered - index record is protected by a page latch that was acquired - when plan->clust_pcur was positioned. The latch will not be - released until mtr_commit(mtr). */ - - ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets))); - row_sel_fetch_columns(index, clust_rec, offsets, - UT_LIST_GET_FIRST(plan->columns)); - *out_rec = clust_rec; -func_exit: - err = DB_SUCCESS; -err_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -} - -/*********************************************************************//** -Sets a lock on a record. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ -UNIV_INLINE -dberr_t -sel_set_rec_lock( -/*=============*/ - const buf_block_t* block, /*!< in: buffer block of rec */ - const rec_t* rec, /*!< in: record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - ulint mode, /*!< in: lock mode */ - ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or - LOC_REC_NOT_GAP */ - que_thr_t* thr) /*!< in: query thread */ -{ - trx_t* trx; - dberr_t err; - - trx = thr_get_trx(thr); - - if (UT_LIST_GET_LEN(trx->lock.trx_locks) > 10000) { - if (buf_LRU_buf_pool_running_out()) { - - return(DB_LOCK_TABLE_FULL); - } - } - - if (dict_index_is_clust(index)) { - err = lock_clust_rec_read_check_and_lock( - 0, block, rec, index, offsets, - static_cast<enum lock_mode>(mode), type, thr); - } else { - err = lock_sec_rec_read_check_and_lock( - 0, block, rec, index, offsets, - static_cast<enum lock_mode>(mode), type, thr); - } - - return(err); -} - -/*********************************************************************//** -Opens a pcur to a table index. */ -static -void -row_sel_open_pcur( -/*==============*/ - plan_t* plan, /*!< in: table plan */ - ibool search_latch_locked, - /*!< in: TRUE if the thread currently - has the search latch locked in - s-mode */ - mtr_t* mtr) /*!< in: mtr */ -{ - dict_index_t* index; - func_node_t* cond; - que_node_t* exp; - ulint n_fields; - ulint has_search_latch = 0; /* RW_S_LATCH or 0 */ - ulint i; - - if (search_latch_locked) { - has_search_latch = RW_S_LATCH; - } - - index = plan->index; - - /* Calculate the value of the search tuple: the exact match columns - get their expressions evaluated when we evaluate the right sides of - end_conds */ - - cond = UT_LIST_GET_FIRST(plan->end_conds); - - while (cond) { - eval_exp(que_node_get_next(cond->args)); - - cond = UT_LIST_GET_NEXT(cond_list, cond); - } - - if (plan->tuple) { - n_fields = dtuple_get_n_fields(plan->tuple); - - if (plan->n_exact_match < n_fields) { - /* There is a non-exact match field which must be - evaluated separately */ - - eval_exp(plan->tuple_exps[n_fields - 1]); - } - - for (i = 0; i < n_fields; i++) { - exp = plan->tuple_exps[i]; - - dfield_copy_data(dtuple_get_nth_field(plan->tuple, i), - que_node_get_val(exp)); - } - - /* Open pcur to the index */ - - btr_pcur_open_with_no_init(index, plan->tuple, plan->mode, - BTR_SEARCH_LEAF, &plan->pcur, - has_search_latch, mtr); - } else { - /* Open the cursor to the start or the end of the index - (FALSE: no init) */ - - btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF, - &(plan->pcur), false, 0, mtr); - } - - ut_ad(plan->n_rows_prefetched == 0); - ut_ad(plan->n_rows_fetched == 0); - ut_ad(plan->cursor_at_end == FALSE); - - plan->pcur_is_open = TRUE; -} - -/*********************************************************************//** -Restores a stored pcur position to a table index. -@return TRUE if the cursor should be moved to the next record after we -return from this function (moved to the previous, in the case of a -descending cursor) without processing again the current cursor -record */ -static -ibool -row_sel_restore_pcur_pos( -/*=====================*/ - plan_t* plan, /*!< in: table plan */ - mtr_t* mtr) /*!< in: mtr */ -{ - ibool equal_position; - ulint relative_position; - - ut_ad(!plan->cursor_at_end); - - relative_position = btr_pcur_get_rel_pos(&(plan->pcur)); - - equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF, - &(plan->pcur), mtr); - - /* If the cursor is traveling upwards, and relative_position is - - (1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock - yet on the successor of the page infimum; - (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the - first record GREATER than the predecessor of a page supremum; we have - not yet processed the cursor record: no need to move the cursor to the - next record; - (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the - last record LESS or EQUAL to the old stored user record; (a) if - equal_position is FALSE, this means that the cursor is now on a record - less than the old user record, and we must move to the next record; - (b) if equal_position is TRUE, then if - plan->stored_cursor_rec_processed is TRUE, we must move to the next - record, else there is no need to move the cursor. */ - - if (plan->asc) { - if (relative_position == BTR_PCUR_ON) { - - if (equal_position) { - - return(plan->stored_cursor_rec_processed); - } - - return(TRUE); - } - - ut_ad(relative_position == BTR_PCUR_AFTER - || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE); - - return(FALSE); - } - - /* If the cursor is traveling downwards, and relative_position is - - (1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on - the last record LESS than the successor of a page infimum; we have not - processed the cursor record: no need to move the cursor; - (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the - first record GREATER than the predecessor of a page supremum; we have - processed the cursor record: we should move the cursor to the previous - record; - (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the - last record LESS or EQUAL to the old stored user record; (a) if - equal_position is FALSE, this means that the cursor is now on a record - less than the old user record, and we need not move to the previous - record; (b) if equal_position is TRUE, then if - plan->stored_cursor_rec_processed is TRUE, we must move to the previous - record, else there is no need to move the cursor. */ - - if (relative_position == BTR_PCUR_BEFORE - || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) { - - return(FALSE); - } - - if (relative_position == BTR_PCUR_ON) { - - if (equal_position) { - - return(plan->stored_cursor_rec_processed); - } - - return(FALSE); - } - - ut_ad(relative_position == BTR_PCUR_AFTER - || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE); - - return(TRUE); -} - -/*********************************************************************//** -Resets a plan cursor to a closed state. */ -UNIV_INLINE -void -plan_reset_cursor( -/*==============*/ - plan_t* plan) /*!< in: plan */ -{ - plan->pcur_is_open = FALSE; - plan->cursor_at_end = FALSE; - plan->n_rows_fetched = 0; - plan->n_rows_prefetched = 0; -} - -/*********************************************************************//** -Tries to do a shortcut to fetch a clustered index record with a unique key, -using the hash index if possible (not always). -@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */ -static -ulint -row_sel_try_search_shortcut( -/*========================*/ - sel_node_t* node, /*!< in: select node for a consistent read */ - plan_t* plan, /*!< in: plan for a unique search in clustered - index */ - ibool search_latch_locked, - /*!< in: whether the search holds - btr_search_latch */ - mtr_t* mtr) /*!< in: mtr */ -{ - dict_index_t* index; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - ulint ret; - rec_offs_init(offsets_); - - index = plan->index; - - ut_ad(node->read_view); - ut_ad(plan->unique_search); - ut_ad(!plan->must_get_clust); -#ifdef UNIV_SYNC_DEBUG - if (search_latch_locked) { - ut_ad(rw_lock_own(btr_search_get_latch(index), - RW_LOCK_SHARED)); - } -#endif /* UNIV_SYNC_DEBUG */ - - row_sel_open_pcur(plan, search_latch_locked, mtr); - - rec = btr_pcur_get_rec(&(plan->pcur)); - - if (!page_rec_is_user_rec(rec)) { - - return(SEL_RETRY); - } - - ut_ad(plan->mode == PAGE_CUR_GE); - - /* As the cursor is now placed on a user record after a search with - the mode PAGE_CUR_GE, the up_match field in the cursor tells how many - fields in the user record matched to the search tuple */ - - if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) { - - return(SEL_EXHAUSTED); - } - - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - if (dict_index_is_clust(index)) { - if (!lock_clust_rec_cons_read_sees(rec, index, offsets, - node->read_view)) { - ret = SEL_RETRY; - goto func_exit; - } - } else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) { - - ret = SEL_RETRY; - goto func_exit; - } - - /* Test the deleted flag. */ - - if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) { - - ret = SEL_EXHAUSTED; - goto func_exit; - } - - /* Fetch the columns needed in test conditions. The index - record is protected by a page latch that was acquired when - plan->pcur was positioned. The latch will not be released - until mtr_commit(mtr). */ - - row_sel_fetch_columns(index, rec, offsets, - UT_LIST_GET_FIRST(plan->columns)); - - /* Test the rest of search conditions */ - - if (!row_sel_test_other_conds(plan)) { - - ret = SEL_EXHAUSTED; - goto func_exit; - } - - ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF); - - plan->n_rows_fetched++; - ret = SEL_FOUND; -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(ret); -} - -/*********************************************************************//** -Performs a select step. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_sel( -/*====*/ - sel_node_t* node, /*!< in: select node */ - que_thr_t* thr) /*!< in: query thread */ -{ - dict_index_t* index; - plan_t* plan; - mtr_t mtr; - ibool moved; - rec_t* rec; - rec_t* old_vers; - rec_t* clust_rec; - ibool search_latch_locked; - ibool consistent_read; - - /* The following flag becomes TRUE when we are doing a - consistent read from a non-clustered index and we must look - at the clustered index to find out the previous delete mark - state of the non-clustered record: */ - - ibool cons_read_requires_clust_rec = FALSE; - ulint cost_counter = 0; - ibool cursor_just_opened; - ibool must_go_to_next; - ibool mtr_has_extra_clust_latch = FALSE; - /* TRUE if the search was made using - a non-clustered index, and we had to - access the clustered record: now &mtr - contains a clustered index latch, and - &mtr must be committed before we move - to the next non-clustered record */ - ulint found_flag; - dberr_t err; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - ut_ad(thr->run_node == node); - - search_latch_locked = FALSE; - - if (node->read_view) { - /* In consistent reads, we try to do with the hash index and - not to use the buffer page get. This is to reduce memory bus - load resulting from semaphore operations. The search latch - will be s-locked when we access an index with a unique search - condition, but not locked when we access an index with a - less selective search condition. */ - - consistent_read = TRUE; - } else { - consistent_read = FALSE; - } - -table_loop: - /* TABLE LOOP - ---------- - This is the outer major loop in calculating a join. We come here when - node->fetch_table changes, and after adding a row to aggregate totals - and, of course, when this function is called. */ - - ut_ad(mtr_has_extra_clust_latch == FALSE); - - plan = sel_node_get_nth_plan(node, node->fetch_table); - index = plan->index; - - if (plan->n_rows_prefetched > 0) { - sel_dequeue_prefetched_row(plan); - - goto next_table_no_mtr; - } - - if (plan->cursor_at_end) { - /* The cursor has already reached the result set end: no more - rows to process for this table cursor, as also the prefetch - stack was empty */ - - ut_ad(plan->pcur_is_open); - - goto table_exhausted_no_mtr; - } - - /* Open a cursor to index, or restore an open cursor position */ - - mtr_start_trx(&mtr, thr_get_trx(thr)); - - if (consistent_read && plan->unique_search && !plan->pcur_is_open - && !plan->must_get_clust - && !plan->table->big_rows) { - if (!search_latch_locked) { - rw_lock_s_lock(btr_search_get_latch(index)); - - search_latch_locked = TRUE; - } else if (rw_lock_get_writer(btr_search_get_latch(index)) - == RW_LOCK_WAIT_EX) { - - /* There is an x-latch request waiting: release the - s-latch for a moment; as an s-latch here is often - kept for some 10 searches before being released, - a waiting x-latch request would block other threads - from acquiring an s-latch for a long time, lowering - performance significantly in multiprocessors. */ - - rw_lock_s_unlock(btr_search_get_latch(index)); - rw_lock_s_lock(btr_search_get_latch(index)); - } - - found_flag = row_sel_try_search_shortcut(node, plan, - search_latch_locked, - &mtr); - - if (found_flag == SEL_FOUND) { - - goto next_table; - - } else if (found_flag == SEL_EXHAUSTED) { - - goto table_exhausted; - } - - ut_ad(found_flag == SEL_RETRY); - - plan_reset_cursor(plan); - - mtr_commit(&mtr); - mtr_start_trx(&mtr, thr_get_trx(thr)); - } - - if (search_latch_locked) { - rw_lock_s_unlock(btr_search_get_latch(index)); - - search_latch_locked = FALSE; - } - - if (!plan->pcur_is_open) { - /* Evaluate the expressions to build the search tuple and - open the cursor */ - - row_sel_open_pcur(plan, search_latch_locked, &mtr); - - cursor_just_opened = TRUE; - - /* A new search was made: increment the cost counter */ - cost_counter++; - } else { - /* Restore pcur position to the index */ - - must_go_to_next = row_sel_restore_pcur_pos(plan, &mtr); - - cursor_just_opened = FALSE; - - if (must_go_to_next) { - /* We have already processed the cursor record: move - to the next */ - - goto next_rec; - } - } - -rec_loop: - /* RECORD LOOP - ----------- - In this loop we use pcur and try to fetch a qualifying row, and - also fill the prefetch buffer for this table if n_rows_fetched has - exceeded a threshold. While we are inside this loop, the following - holds: - (1) &mtr is started, - (2) pcur is positioned and open. - - NOTE that if cursor_just_opened is TRUE here, it means that we came - to this point right after row_sel_open_pcur. */ - - ut_ad(mtr_has_extra_clust_latch == FALSE); - - rec = btr_pcur_get_rec(&(plan->pcur)); - - /* PHASE 1: Set a lock if specified */ - - if (!node->asc && cursor_just_opened - && !page_rec_is_supremum(rec)) { - - /* When we open a cursor for a descending search, we must set - a next-key lock on the successor record: otherwise it would - be possible to insert new records next to the cursor position, - and it might be that these new records should appear in the - search result set, resulting in the phantom problem. */ - - if (!consistent_read) { - rec_t* next_rec = page_rec_get_next(rec); - ulint lock_type; - trx_t* trx; - - trx = thr_get_trx(thr); - - offsets = rec_get_offsets(next_rec, index, offsets, - ULINT_UNDEFINED, &heap); - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED or lower isolation - level, we lock only the record, i.e., next-key - locking is not used. */ - if (srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) { - - if (page_rec_is_supremum(next_rec)) { - - goto skip_lock; - } - - lock_type = LOCK_REC_NOT_GAP; - } else { - lock_type = LOCK_ORDINARY; - } - - err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur), - next_rec, index, offsets, - node->row_lock_mode, - lock_type, thr); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - err = DB_SUCCESS; - case DB_SUCCESS: - break; - default: - /* Note that in this case we will store in pcur - the PREDECESSOR of the record we are waiting - the lock for */ - goto lock_wait_or_error; - } - } - } - -skip_lock: - if (page_rec_is_infimum(rec)) { - - /* The infimum record on a page cannot be in the result set, - and neither can a record lock be placed on it: we skip such - a record. We also increment the cost counter as we may have - processed yet another page of index. */ - - cost_counter++; - - goto next_rec; - } - - if (!consistent_read) { - /* Try to place a lock on the index record */ - ulint lock_type; - trx_t* trx; - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - trx = thr_get_trx(thr); - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED or lower isolation level, - we lock only the record, i.e., next-key locking is - not used. */ - if (srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) { - - if (page_rec_is_supremum(rec)) { - - goto next_rec; - } - - lock_type = LOCK_REC_NOT_GAP; - } else { - lock_type = LOCK_ORDINARY; - } - - err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur), - rec, index, offsets, - node->row_lock_mode, lock_type, thr); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - err = DB_SUCCESS; - case DB_SUCCESS: - break; - default: - goto lock_wait_or_error; - } - } - - if (page_rec_is_supremum(rec)) { - - /* A page supremum record cannot be in the result set: skip - it now when we have placed a possible lock on it */ - - goto next_rec; - } - - ut_ad(page_rec_is_user_rec(rec)); - - if (cost_counter > SEL_COST_LIMIT) { - - /* Now that we have placed the necessary locks, we can stop - for a while and store the cursor position; NOTE that if we - would store the cursor position BEFORE placing a record lock, - it might happen that the cursor would jump over some records - that another transaction could meanwhile insert adjacent to - the cursor: this would result in the phantom problem. */ - - goto stop_for_a_while; - } - - /* PHASE 2: Check a mixed index mix id if needed */ - - if (plan->unique_search && cursor_just_opened) { - - ut_ad(plan->mode == PAGE_CUR_GE); - - /* As the cursor is now placed on a user record after a search - with the mode PAGE_CUR_GE, the up_match field in the cursor - tells how many fields in the user record matched to the search - tuple */ - - if (btr_pcur_get_up_match(&(plan->pcur)) - < plan->n_exact_match) { - goto table_exhausted; - } - - /* Ok, no need to test end_conds or mix id */ - - } - - /* We are ready to look at a possible new index entry in the result - set: the cursor is now placed on a user record */ - - /* PHASE 3: Get previous version in a consistent read */ - - cons_read_requires_clust_rec = FALSE; - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - if (consistent_read) { - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - if (dict_index_is_clust(index)) { - - if (!lock_clust_rec_cons_read_sees(rec, index, offsets, - node->read_view)) { - - err = row_sel_build_prev_vers( - node->read_view, index, rec, - &offsets, &heap, &plan->old_vers_heap, - &old_vers, &mtr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - - if (old_vers == NULL) { - /* The record does not exist - in our read view. Skip it, but - first attempt to determine - whether the index segment we - are searching through has been - exhausted. */ - - offsets = rec_get_offsets( - rec, index, offsets, - ULINT_UNDEFINED, &heap); - - /* Fetch the columns needed in - test conditions. The clustered - index record is protected by a - page latch that was acquired - by row_sel_open_pcur() or - row_sel_restore_pcur_pos(). - The latch will not be released - until mtr_commit(mtr). */ - - row_sel_fetch_columns( - index, rec, offsets, - UT_LIST_GET_FIRST( - plan->columns)); - - if (!row_sel_test_end_conds(plan)) { - - goto table_exhausted; - } - - goto next_rec; - } - - rec = old_vers; - } - } else if (!lock_sec_rec_cons_read_sees(rec, - node->read_view)) { - cons_read_requires_clust_rec = TRUE; - } - } - - /* PHASE 4: Test search end conditions and deleted flag */ - - /* Fetch the columns needed in test conditions. The record is - protected by a page latch that was acquired by - row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch - will not be released until mtr_commit(mtr). */ - - row_sel_fetch_columns(index, rec, offsets, - UT_LIST_GET_FIRST(plan->columns)); - - /* Test the selection end conditions: these can only contain columns - which already are found in the index, even though the index might be - non-clustered */ - - if (plan->unique_search && cursor_just_opened) { - - /* No test necessary: the test was already made above */ - - } else if (!row_sel_test_end_conds(plan)) { - - goto table_exhausted; - } - - if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table)) - && !cons_read_requires_clust_rec) { - - /* The record is delete marked: we can skip it if this is - not a consistent read which might see an earlier version - of a non-clustered index record */ - - if (plan->unique_search) { - - goto table_exhausted; - } - - goto next_rec; - } - - /* PHASE 5: Get the clustered index record, if needed and if we did - not do the search using the clustered index */ - - if (plan->must_get_clust || cons_read_requires_clust_rec) { - - /* It was a non-clustered index and we must fetch also the - clustered index record */ - - err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec, - &mtr); - mtr_has_extra_clust_latch = TRUE; - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - - /* Retrieving the clustered record required a search: - increment the cost counter */ - - cost_counter++; - - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(consistent_read); - - goto next_rec; - } - - if (rec_get_deleted_flag(clust_rec, - dict_table_is_comp(plan->table))) { - - /* The record is delete marked: we can skip it */ - - goto next_rec; - } - - if (node->can_get_updated) { - - btr_pcur_store_position(&(plan->clust_pcur), &mtr); - } - } - - /* PHASE 6: Test the rest of search conditions */ - - if (!row_sel_test_other_conds(plan)) { - - if (plan->unique_search) { - - goto table_exhausted; - } - - goto next_rec; - } - - /* PHASE 7: We found a new qualifying row for the current table; push - the row if prefetch is on, or move to the next table in the join */ - - plan->n_rows_fetched++; - - ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF); - - if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT) - || plan->unique_search || plan->no_prefetch - || plan->table->big_rows) { - - /* No prefetch in operation: go to the next table */ - - goto next_table; - } - - sel_enqueue_prefetched_row(plan); - - if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) { - - /* The prefetch buffer is now full */ - - sel_dequeue_prefetched_row(plan); - - goto next_table; - } - -next_rec: - ut_ad(!search_latch_locked); - - if (mtr_has_extra_clust_latch) { - - /* We must commit &mtr if we are moving to the next - non-clustered index record, because we could break the - latching order if we would access a different clustered - index page right away without releasing the previous. */ - - goto commit_mtr_for_a_while; - } - - if (node->asc) { - moved = btr_pcur_move_to_next(&(plan->pcur), &mtr); - } else { - moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr); - } - - if (!moved) { - - goto table_exhausted; - } - - cursor_just_opened = FALSE; - - /* END OF RECORD LOOP - ------------------ */ - goto rec_loop; - -next_table: - /* We found a record which satisfies the conditions: we can move to - the next table or return a row in the result set */ - - ut_ad(btr_pcur_is_on_user_rec(&plan->pcur)); - - if (plan->unique_search && !node->can_get_updated) { - - plan->cursor_at_end = TRUE; - } else { - ut_ad(!search_latch_locked); - - plan->stored_cursor_rec_processed = TRUE; - - btr_pcur_store_position(&(plan->pcur), &mtr); - } - - mtr_commit(&mtr); - - mtr_has_extra_clust_latch = FALSE; - -next_table_no_mtr: - /* If we use 'goto' to this label, it means that the row was popped - from the prefetched rows stack, and &mtr is already committed */ - - if (node->fetch_table + 1 == node->n_tables) { - - sel_eval_select_list(node); - - if (node->is_aggregate) { - - goto table_loop; - } - - sel_assign_into_var_values(node->into_list, node); - - thr->run_node = que_node_get_parent(node); - - err = DB_SUCCESS; - goto func_exit; - } - - node->fetch_table++; - - /* When we move to the next table, we first reset the plan cursor: - we do not care about resetting it when we backtrack from a table */ - - plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table)); - - goto table_loop; - -table_exhausted: - /* The table cursor pcur reached the result set end: backtrack to the - previous table in the join if we do not have cached prefetched rows */ - - plan->cursor_at_end = TRUE; - - mtr_commit(&mtr); - - mtr_has_extra_clust_latch = FALSE; - - if (plan->n_rows_prefetched > 0) { - /* The table became exhausted during a prefetch */ - - sel_dequeue_prefetched_row(plan); - - goto next_table_no_mtr; - } - -table_exhausted_no_mtr: - if (node->fetch_table == 0) { - err = DB_SUCCESS; - - if (node->is_aggregate && !node->aggregate_already_fetched) { - - node->aggregate_already_fetched = TRUE; - - sel_assign_into_var_values(node->into_list, node); - - thr->run_node = que_node_get_parent(node); - } else { - node->state = SEL_NODE_NO_MORE_ROWS; - - thr->run_node = que_node_get_parent(node); - } - - goto func_exit; - } - - node->fetch_table--; - - goto table_loop; - -stop_for_a_while: - /* Return control for a while to que_run_threads, so that runaway - queries can be canceled. NOTE that when we come here, we must, in a - locking read, have placed the necessary (possibly waiting request) - record lock on the cursor record or its successor: when we reposition - the cursor, this record lock guarantees that nobody can meanwhile have - inserted new records which should have appeared in the result set, - which would result in the phantom problem. */ - - ut_ad(!search_latch_locked); - - plan->stored_cursor_rec_processed = FALSE; - btr_pcur_store_position(&(plan->pcur), &mtr); - - mtr_commit(&mtr); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(sync_thread_levels_empty_except_dict()); -#endif /* UNIV_SYNC_DEBUG */ - err = DB_SUCCESS; - goto func_exit; - -commit_mtr_for_a_while: - /* Stores the cursor position and commits &mtr; this is used if - &mtr may contain latches which would break the latching order if - &mtr would not be committed and the latches released. */ - - plan->stored_cursor_rec_processed = TRUE; - - ut_ad(!search_latch_locked); - btr_pcur_store_position(&(plan->pcur), &mtr); - - mtr_commit(&mtr); - - mtr_has_extra_clust_latch = FALSE; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(sync_thread_levels_empty_except_dict()); -#endif /* UNIV_SYNC_DEBUG */ - - goto table_loop; - -lock_wait_or_error: - /* See the note at stop_for_a_while: the same holds for this case */ - - ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc); - ut_ad(!search_latch_locked); - - plan->stored_cursor_rec_processed = FALSE; - btr_pcur_store_position(&(plan->pcur), &mtr); - - mtr_commit(&mtr); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(sync_thread_levels_empty_except_dict()); -#endif /* UNIV_SYNC_DEBUG */ - -func_exit: - if (search_latch_locked) { - rw_lock_s_unlock(btr_search_get_latch(index)); - } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -} - -/**********************************************************************//** -Performs a select step. This is a high-level function used in SQL execution -graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_sel_step( -/*=========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - sel_node_t* node; - - ut_ad(thr); - - node = static_cast<sel_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_SELECT); - - /* If this is a new time this node is executed (or when execution - resumes after wait for a table intention lock), set intention locks - on the tables, or assign a read view */ - - if (node->into_list && (thr->prev_node == que_node_get_parent(node))) { - - node->state = SEL_NODE_OPEN; - } - - if (node->state == SEL_NODE_OPEN) { - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - trx_start_if_not_started_xa(thr_get_trx(thr)); - - plan_reset_cursor(sel_node_get_nth_plan(node, 0)); - - if (node->consistent_read) { - /* Assign a read view for the query */ - node->read_view = trx_assign_read_view( - thr_get_trx(thr)); - } else { - sym_node_t* table_node; - enum lock_mode i_lock_mode; - - if (node->set_x_locks) { - i_lock_mode = LOCK_IX; - } else { - i_lock_mode = LOCK_IS; - } - - for (table_node = node->table_list; - table_node != 0; - table_node = static_cast<sym_node_t*>( - que_node_get_next(table_node))) { - - dberr_t err = lock_table( - 0, table_node->table, i_lock_mode, - thr); - - if (err != DB_SUCCESS) { - trx_t* trx; - - trx = thr_get_trx(thr); - trx->error_state = err; - - return(NULL); - } - } - } - - /* If this is an explicit cursor, copy stored procedure - variable values, so that the values cannot change between - fetches (currently, we copy them also for non-explicit - cursors) */ - - if (node->explicit_cursor - && UT_LIST_GET_FIRST(node->copy_variables)) { - - row_sel_copy_input_variable_vals(node); - } - - node->state = SEL_NODE_FETCH; - node->fetch_table = 0; - - if (node->is_aggregate) { - /* Reset the aggregate total values */ - sel_reset_aggregate_vals(node); - } - } - - dberr_t err = row_sel(node, thr); - - /* NOTE! if queries are parallelized, the following assignment may - have problems; the assignment should be made only if thr is the - only top-level thr in the graph: */ - - thr->graph->last_sel_node = node; - - if (err != DB_SUCCESS) { - thr_get_trx(thr)->error_state = err; - - return(NULL); - } - - return(thr); -} - -/**********************************************************************//** -Performs a fetch for a cursor. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -fetch_step( -/*=======*/ - que_thr_t* thr) /*!< in: query thread */ -{ - sel_node_t* sel_node; - fetch_node_t* node; - - ut_ad(thr); - - node = static_cast<fetch_node_t*>(thr->run_node); - sel_node = node->cursor_def; - - ut_ad(que_node_get_type(node) == QUE_NODE_FETCH); - - if (thr->prev_node != que_node_get_parent(node)) { - - if (sel_node->state != SEL_NODE_NO_MORE_ROWS) { - - if (node->into_list) { - sel_assign_into_var_values(node->into_list, - sel_node); - } else { - ibool ret = (*node->func->func)( - sel_node, node->func->arg); - - if (!ret) { - sel_node->state - = SEL_NODE_NO_MORE_ROWS; - } - } - } - - thr->run_node = que_node_get_parent(node); - - return(thr); - } - - /* Make the fetch node the parent of the cursor definition for - the time of the fetch, so that execution knows to return to this - fetch node after a row has been selected or we know that there is - no row left */ - - sel_node->common.parent = node; - - if (sel_node->state == SEL_NODE_CLOSED) { - fprintf(stderr, - "InnoDB: Error: fetch called on a closed cursor\n"); - - thr_get_trx(thr)->error_state = DB_ERROR; - - return(NULL); - } - - thr->run_node = sel_node; - - return(thr); -} - -/****************************************************************//** -Sample callback function for fetch that prints each row. -@return always returns non-NULL */ -UNIV_INTERN -void* -row_fetch_print( -/*============*/ - void* row, /*!< in: sel_node_t* */ - void* user_arg) /*!< in: not used */ -{ - que_node_t* exp; - ulint i = 0; - sel_node_t* node = static_cast<sel_node_t*>(row); - - UT_NOT_USED(user_arg); - - fprintf(stderr, "row_fetch_print: row %p\n", row); - - for (exp = node->select_list; - exp != 0; - exp = que_node_get_next(exp), i++) { - - dfield_t* dfield = que_node_get_val(exp); - const dtype_t* type = dfield_get_type(dfield); - - fprintf(stderr, " column %lu:\n", (ulong) i); - - dtype_print(type); - putc('\n', stderr); - - if (dfield_get_len(dfield) != UNIV_SQL_NULL) { - ut_print_buf(stderr, dfield_get_data(dfield), - dfield_get_len(dfield)); - putc('\n', stderr); - } else { - fputs(" <NULL>;\n", stderr); - } - } - - return((void*)42); -} - -/***********************************************************//** -Prints a row in a select result. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_printf_step( -/*============*/ - que_thr_t* thr) /*!< in: query thread */ -{ - row_printf_node_t* node; - sel_node_t* sel_node; - que_node_t* arg; - - ut_ad(thr); - - node = static_cast<row_printf_node_t*>(thr->run_node); - - sel_node = node->sel_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF); - - if (thr->prev_node == que_node_get_parent(node)) { - - /* Reset the cursor */ - sel_node->state = SEL_NODE_OPEN; - - /* Fetch next row to print */ - - thr->run_node = sel_node; - - return(thr); - } - - if (sel_node->state != SEL_NODE_FETCH) { - - ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS); - - /* No more rows to print */ - - thr->run_node = que_node_get_parent(node); - - return(thr); - } - - arg = sel_node->select_list; - - while (arg) { - dfield_print_also_hex(que_node_get_val(arg)); - - fputs(" ::: ", stderr); - - arg = que_node_get_next(arg); - } - - putc('\n', stderr); - - /* Fetch next row to print */ - - thr->run_node = sel_node; - - return(thr); -} - -/****************************************************************//** -Converts a key value stored in MySQL format to an Innobase dtuple. The last -field of the key value may be just a prefix of a fixed length field: hence -the parameter key_len. But currently we do not allow search keys where the -last field is only a prefix of the full key field len and print a warning if -such appears. A counterpart of this function is -ha_innobase::store_key_val_for_row() in ha_innodb.cc. */ -UNIV_INTERN -void -row_sel_convert_mysql_key_to_innobase( -/*==================================*/ - dtuple_t* tuple, /*!< in/out: tuple where to build; - NOTE: we assume that the type info - in the tuple is already according - to index! */ - byte* buf, /*!< in: buffer to use in field - conversions; NOTE that dtuple->data - may end up pointing inside buf so - do not discard that buffer while - the tuple is being used. See - row_mysql_store_col_in_innobase_format() - in the case of DATA_INT */ - ulint buf_len, /*!< in: buffer length */ - dict_index_t* index, /*!< in: index of the key value */ - const byte* key_ptr, /*!< in: MySQL key value */ - ulint key_len, /*!< in: MySQL key value length */ - trx_t* trx) /*!< in: transaction */ -{ - byte* original_buf = buf; - const byte* original_key_ptr = key_ptr; - dict_field_t* field; - dfield_t* dfield; - ulint data_offset; - ulint data_len; - ulint data_field_len; - ibool is_null; - const byte* key_end; - ulint n_fields = 0; - - /* For documentation of the key value storage format in MySQL, see - ha_innobase::store_key_val_for_row() in ha_innodb.cc. */ - - key_end = key_ptr + key_len; - - /* Permit us to access any field in the tuple (ULINT_MAX): */ - - dtuple_set_n_fields(tuple, ULINT_MAX); - - dfield = dtuple_get_nth_field(tuple, 0); - field = dict_index_get_nth_field(index, 0); - - if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) { - /* A special case: we are looking for a position in the - generated clustered index which InnoDB automatically added - to a table with no primary key: the first and the only - ordering column is ROW_ID which InnoDB stored to the key_ptr - buffer. */ - - ut_a(key_len == DATA_ROW_ID_LEN); - - dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN); - - dtuple_set_n_fields(tuple, 1); - - return; - } - - while (key_ptr < key_end) { - - ulint type = dfield_get_type(dfield)->mtype; - ut_a(field->col->mtype == type); - - data_offset = 0; - is_null = FALSE; - - if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) { - /* The first byte in the field tells if this is - an SQL NULL value */ - - data_offset = 1; - - if (*key_ptr != 0) { - dfield_set_null(dfield); - - is_null = TRUE; - } - } - - /* Calculate data length and data field total length */ - - if (type == DATA_BLOB) { - /* The key field is a column prefix of a BLOB or - TEXT */ - - ut_a(field->prefix_len > 0); - - /* MySQL stores the actual data length to the first 2 - bytes after the optional SQL NULL marker byte. The - storage format is little-endian, that is, the most - significant byte at a higher address. In UTF-8, MySQL - seems to reserve field->prefix_len bytes for - storing this field in the key value buffer, even - though the actual value only takes data_len bytes - from the start. */ - - data_len = key_ptr[data_offset] - + 256 * key_ptr[data_offset + 1]; - data_field_len = data_offset + 2 + field->prefix_len; - - data_offset += 2; - - /* Now that we know the length, we store the column - value like it would be a fixed char field */ - - } else if (field->prefix_len > 0) { - /* Looks like MySQL pads unused end bytes in the - prefix with space. Therefore, also in UTF-8, it is ok - to compare with a prefix containing full prefix_len - bytes, and no need to take at most prefix_len / 3 - UTF-8 characters from the start. - If the prefix is used as the upper end of a LIKE - 'abc%' query, then MySQL pads the end with chars - 0xff. TODO: in that case does it any harm to compare - with the full prefix_len bytes. How do characters - 0xff in UTF-8 behave? */ - - data_len = field->prefix_len; - data_field_len = data_offset + data_len; - } else { - data_len = dfield_get_type(dfield)->len; - data_field_len = data_offset + data_len; - } - - if (UNIV_UNLIKELY - (dtype_get_mysql_type(dfield_get_type(dfield)) - == DATA_MYSQL_TRUE_VARCHAR) - && UNIV_LIKELY(type != DATA_INT)) { - /* In a MySQL key value format, a true VARCHAR is - always preceded by 2 bytes of a length field. - dfield_get_type(dfield)->len returns the maximum - 'payload' len in bytes. That does not include the - 2 bytes that tell the actual data length. - - We added the check != DATA_INT to make sure we do - not treat MySQL ENUM or SET as a true VARCHAR! */ - - data_len += 2; - data_field_len += 2; - } - - /* Storing may use at most data_len bytes of buf */ - - if (UNIV_LIKELY(!is_null)) { - buf = row_mysql_store_col_in_innobase_format( - dfield, buf, - /* MySQL key value format col */ - FALSE, - key_ptr + data_offset, data_len, - dict_table_is_comp(index->table)); - ut_a(buf <= original_buf + buf_len); - } - - key_ptr += data_field_len; - - if (UNIV_UNLIKELY(key_ptr > key_end)) { - /* The last field in key was not a complete key field - but a prefix of it. - - Print a warning about this! HA_READ_PREFIX_LAST does - not currently work in InnoDB with partial-field key - value prefixes. Since MySQL currently uses a padding - trick to calculate LIKE 'abc%' type queries there - should never be partial-field prefixes in searches. */ - - ut_print_timestamp(stderr); - - fputs(" InnoDB: Warning: using a partial-field" - " key prefix in search.\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - fprintf(stderr, ". Last data field length %lu bytes,\n" - "InnoDB: key ptr now exceeds" - " key end by %lu bytes.\n" - "InnoDB: Key value in the MySQL format:\n", - (ulong) data_field_len, - (ulong) (key_ptr - key_end)); - fflush(stderr); - ut_print_buf(stderr, original_key_ptr, key_len); - putc('\n', stderr); - - if (!is_null) { - ulint len = dfield_get_len(dfield); - dfield_set_len(dfield, len - - (ulint) (key_ptr - key_end)); - } - ut_ad(0); - } - - n_fields++; - field++; - dfield++; - } - - ut_a(buf <= original_buf + buf_len); - - /* We set the length of tuple to n_fields: we assume that the memory - area allocated for it is big enough (usually bigger than n_fields). */ - - dtuple_set_n_fields(tuple, n_fields); -} - -/**************************************************************//** -Stores the row id to the prebuilt struct. */ -static -void -row_sel_store_row_id_to_prebuilt( -/*=============================*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt */ - const rec_t* index_rec, /*!< in: record */ - const dict_index_t* index, /*!< in: index of the record */ - const ulint* offsets) /*!< in: rec_get_offsets - (index_rec, index) */ -{ - const byte* data; - ulint len; - - ut_ad(rec_offs_validate(index_rec, index, offsets)); - - data = rec_get_nth_field( - index_rec, offsets, - dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len); - - if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) { - fprintf(stderr, - "InnoDB: Error: Row id field is" - " wrong length %lu in ", (ulong) len); - dict_index_name_print(stderr, prebuilt->trx, index); - fprintf(stderr, "\n" - "InnoDB: Field number %lu, record:\n", - (ulong) dict_index_get_sys_col_pos(index, - DATA_ROW_ID)); - rec_print_new(stderr, index_rec, offsets); - putc('\n', stderr); - ut_error; - } - - ut_memcpy(prebuilt->row_id, data, len); -} - -#ifdef UNIV_DEBUG -/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */ -# define row_sel_field_store_in_mysql_format( \ - dest,templ,idx,field,src,len) \ - row_sel_field_store_in_mysql_format_func \ - (dest,templ,idx,field,src,len) -#else /* UNIV_DEBUG */ -/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */ -# define row_sel_field_store_in_mysql_format( \ - dest,templ,idx,field,src,len) \ - row_sel_field_store_in_mysql_format_func \ - (dest,templ,src,len) -#endif /* UNIV_DEBUG */ - -/**************************************************************//** -Stores a non-SQL-NULL field in the MySQL format. The counterpart of this -function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */ -static MY_ATTRIBUTE((nonnull)) -void -row_sel_field_store_in_mysql_format_func( -/*=====================================*/ - byte* dest, /*!< in/out: buffer where to store; NOTE - that BLOBs are not in themselves - stored here: the caller must allocate - and copy the BLOB into buffer before, - and pass the pointer to the BLOB in - 'data' */ - const mysql_row_templ_t* templ, - /*!< in: MySQL column template. - Its following fields are referenced: - type, is_unsigned, mysql_col_len, - mbminlen, mbmaxlen */ -#ifdef UNIV_DEBUG - const dict_index_t* index, - /*!< in: InnoDB index */ - ulint field_no, - /*!< in: templ->rec_field_no or - templ->clust_rec_field_no or - templ->icp_rec_field_no */ -#endif /* UNIV_DEBUG */ - const byte* data, /*!< in: data to store */ - ulint len) /*!< in: length of the data */ -{ - byte* ptr; -#ifdef UNIV_DEBUG - const dict_field_t* field - = dict_index_get_nth_field(index, field_no); -#endif /* UNIV_DEBUG */ - - ut_ad(len != UNIV_SQL_NULL); - UNIV_MEM_ASSERT_RW(data, len); - UNIV_MEM_ASSERT_W(dest, templ->mysql_col_len); - UNIV_MEM_INVALID(dest, templ->mysql_col_len); - - switch (templ->type) { - const byte* field_end; - byte* pad; - case DATA_INT: - /* Convert integer data from Innobase to a little-endian - format, sign bit restored to normal */ - - ptr = dest + len; - - for (;;) { - ptr--; - *ptr = *data; - if (ptr == dest) { - break; - } - data++; - } - - if (!templ->is_unsigned) { - dest[len - 1] = (byte) (dest[len - 1] ^ 128); - } - - ut_ad(templ->mysql_col_len == len); - break; - - case DATA_VARCHAR: - case DATA_VARMYSQL: - case DATA_BINARY: - field_end = dest + templ->mysql_col_len; - - if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { - /* This is a >= 5.0.3 type true VARCHAR. Store the - length of the data to the first byte or the first - two bytes of dest. */ - - dest = row_mysql_store_true_var_len( - dest, len, templ->mysql_length_bytes); - /* Copy the actual data. Leave the rest of the - buffer uninitialized. */ - memcpy(dest, data, len); - break; - } - - /* Copy the actual data */ - ut_memcpy(dest, data, len); - - /* Pad with trailing spaces. */ - - pad = dest + len; - - ut_ad(templ->mbminlen <= templ->mbmaxlen); - - /* We treat some Unicode charset strings specially. */ - switch (templ->mbminlen) { - case 4: - /* InnoDB should never have stripped partial - UTF-32 characters. */ - ut_a(!(len & 3)); - break; - case 2: - /* A space char is two bytes, - 0x0020 in UCS2 and UTF-16 */ - - if (UNIV_UNLIKELY(len & 1)) { - /* A 0x20 has been stripped from the column. - Pad it back. */ - - if (pad < field_end) { - *pad++ = 0x20; - } - } - } - - row_mysql_pad_col(templ->mbminlen, pad, field_end - pad); - break; - - case DATA_BLOB: - /* Store a pointer to the BLOB buffer to dest: the BLOB was - already copied to the buffer in row_sel_store_mysql_rec */ - - row_mysql_store_blob_ref(dest, templ->mysql_col_len, data, - len); - break; - - case DATA_MYSQL: - memcpy(dest, data, len); - - ut_ad(templ->mysql_col_len >= len); - ut_ad(templ->mbmaxlen >= templ->mbminlen); - - /* If field_no equals to templ->icp_rec_field_no, - we are examining a row pointed by "icp_rec_field_no". - There is possibility that icp_rec_field_no refers to - a field in a secondary index while templ->rec_field_no - points to field in a primary index. The length - should still be equal, unless the field pointed - by icp_rec_field_no has a prefix */ - ut_ad(templ->mbmaxlen > templ->mbminlen - || templ->mysql_col_len == len - || (field_no == templ->icp_rec_field_no - && field->prefix_len > 0)); - - /* The following assertion would fail for old tables - containing UTF-8 ENUM columns due to Bug #9526. */ - ut_ad(!templ->mbmaxlen - || !(templ->mysql_col_len % templ->mbmaxlen)); - ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len - || (field_no == templ->icp_rec_field_no - && field->prefix_len > 0) - || templ->rec_field_is_prefix); - ut_ad(!(field->prefix_len % templ->mbmaxlen)); - - if (templ->mbminlen == 1 && templ->mbmaxlen != 1) { - /* Pad with spaces. This undoes the stripping - done in row0mysql.cc, function - row_mysql_store_col_in_innobase_format(). */ - - memset(dest + len, 0x20, templ->mysql_col_len - len); - } - break; - - default: -#ifdef UNIV_DEBUG - case DATA_SYS_CHILD: - case DATA_SYS: - /* These column types should never be shipped to MySQL. */ - ut_ad(0); - - case DATA_CHAR: - case DATA_FIXBINARY: - case DATA_FLOAT: - case DATA_DOUBLE: - case DATA_DECIMAL: - /* Above are the valid column types for MySQL data. */ -#endif /* UNIV_DEBUG */ - ut_ad(field->prefix_len - ? field->prefix_len == len - : templ->mysql_col_len == len); - memcpy(dest, data, len); - } -} - -#ifdef UNIV_DEBUG -/** Convert a field from Innobase format to MySQL format. */ -# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \ - row_sel_store_mysql_field_func(m,p,r,i,o,f,t) -#else /* UNIV_DEBUG */ -/** Convert a field from Innobase format to MySQL format. */ -# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \ - row_sel_store_mysql_field_func(m,p,r,o,f,t) -#endif /* UNIV_DEBUG */ -/** Convert a field in the Innobase format to a field in the MySQL format. -@param[out] mysql_rec record in the MySQL format -@param[in,out] prebuilt prebuilt struct -@param[in] rec InnoDB record; must be protected - by a page latch -@param[in] index index of rec -@param[in] offsets array returned by rec_get_offsets() -@param[in] field_no templ->rec_field_no or - templ->clust_rec_field_no - or templ->icp_rec_field_no - or sec field no if clust_templ_for_sec - is TRUE -@param[in] templ row template -*/ -static MY_ATTRIBUTE((warn_unused_result)) -ibool -row_sel_store_mysql_field_func( - byte* mysql_rec, - row_prebuilt_t* prebuilt, - const rec_t* rec, -#ifdef UNIV_DEBUG - const dict_index_t* index, -#endif - const ulint* offsets, - ulint field_no, - const mysql_row_templ_t*templ) -{ - const byte* data; - ulint len; - - ut_ad(prebuilt->default_rec); - ut_ad(templ); - ut_ad(templ >= prebuilt->mysql_template); - ut_ad(templ < &prebuilt->mysql_template[prebuilt->n_template]); - ut_ad(field_no == templ->clust_rec_field_no - || field_no == templ->rec_field_no - || field_no == templ->icp_rec_field_no); - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { - - mem_heap_t* heap; - /* Copy an externally stored field to a temporary heap */ - - ut_a(!prebuilt->trx->has_search_latch); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!btr_search_own_any()); -#endif - ut_ad(field_no == templ->clust_rec_field_no); - - if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } - - heap = prebuilt->blob_heap; - } else { - heap = mem_heap_create(UNIV_PAGE_SIZE); - } - - /* NOTE: if we are retrieving a big BLOB, we may - already run out of memory in the next call, which - causes an assert */ - - data = btr_rec_copy_externally_stored_field( - rec, offsets, - dict_table_zip_size(prebuilt->table), - field_no, &len, heap, NULL); - - if (UNIV_UNLIKELY(!data)) { - /* The externally stored field was not written - yet. This record should only be seen by - recv_recovery_rollback_active() or any - TRX_ISO_READ_UNCOMMITTED transactions. */ - - if (heap != prebuilt->blob_heap) { - mem_heap_free(heap); - } - - ut_a(prebuilt->trx->isolation_level - == TRX_ISO_READ_UNCOMMITTED); - return(FALSE); - } - - ut_a(len != UNIV_SQL_NULL); - - row_sel_field_store_in_mysql_format( - mysql_rec + templ->mysql_col_offset, - templ, index, field_no, data, len); - - if (heap != prebuilt->blob_heap) { - mem_heap_free(heap); - } - } else { - /* Field is stored in the row. */ - - data = rec_get_nth_field(rec, offsets, field_no, &len); - - if (len == UNIV_SQL_NULL) { - /* MySQL assumes that the field for an SQL - NULL value is set to the default value. */ - ut_ad(templ->mysql_null_bit_mask); - - UNIV_MEM_ASSERT_RW(prebuilt->default_rec - + templ->mysql_col_offset, - templ->mysql_col_len); - mysql_rec[templ->mysql_null_byte_offset] - |= (byte) templ->mysql_null_bit_mask; - memcpy(mysql_rec + templ->mysql_col_offset, - (const byte*) prebuilt->default_rec - + templ->mysql_col_offset, - templ->mysql_col_len); - return(TRUE); - } - - if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { - - /* It is a BLOB field locally stored in the - InnoDB record: we MUST copy its contents to - prebuilt->blob_heap here because - row_sel_field_store_in_mysql_format() stores a - pointer to the data, and the data passed to us - will be invalid as soon as the - mini-transaction is committed and the page - latch on the clustered index page is - released. */ - - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } - - data = static_cast<byte*>( - mem_heap_dup(prebuilt->blob_heap, data, len)); - } - - row_sel_field_store_in_mysql_format( - mysql_rec + templ->mysql_col_offset, - templ, index, field_no, data, len); - } - - ut_ad(len != UNIV_SQL_NULL); - - if (templ->mysql_null_bit_mask) { - /* It is a nullable column with a non-NULL - value */ - mysql_rec[templ->mysql_null_byte_offset] - &= ~(byte) templ->mysql_null_bit_mask; - } - - return(TRUE); -} - -/** Convert a row in the Innobase format to a row in the MySQL format. -Note that the template in prebuilt may advise us to copy only a few -columns to mysql_rec, other columns are left blank. All columns may not -be needed in the query. -@param[out] mysql_rec row in the MySQL format -@param[in] prebuilt prebuilt structure -@param[in] rec Innobase record in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch -@param[in] rec_clust TRUE if the rec in the clustered index -@param[in] index index of rec -@param[in] offsets array returned by rec_get_offsets(rec) -@return TRUE on success, FALSE if not all columns could be retrieved */ -static MY_ATTRIBUTE((warn_unused_result)) -ibool -row_sel_store_mysql_rec( - byte* mysql_rec, - row_prebuilt_t* prebuilt, - const rec_t* rec, - ibool rec_clust, - const dict_index_t* index, - const ulint* offsets) -{ - ulint i; - ut_ad(rec_clust || index == prebuilt->index); - ut_ad(!rec_clust || dict_index_is_clust(index)); - - if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { - mem_heap_free(prebuilt->blob_heap); - prebuilt->blob_heap = NULL; - } - - for (i = 0; i < prebuilt->n_template; i++) { - const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; - const ulint field_no - = rec_clust - ? templ->clust_rec_field_no - : templ->rec_field_no; - /* We should never deliver column prefixes to MySQL, - except for evaluating innobase_index_cond() and if the prefix - index is longer than the actual row data. */ - /* ...actually, we do want to do this in order to - support the prefix query optimization. - - ut_ad(dict_index_get_nth_field(index, field_no)->prefix_len - == 0 || templ->rec_field_is_prefix); - - - ...so we disable this assert. */ - - if (!row_sel_store_mysql_field(mysql_rec, prebuilt, - rec, index, offsets, - field_no, templ)) { - return(FALSE); - } - } - - /* FIXME: We only need to read the doc_id if an FTS indexed - column is being updated. - NOTE, the record must be cluster index record. Secondary index - might not have the Doc ID */ - if (dict_table_has_fts_index(prebuilt->table) - && dict_index_is_clust(index)) { - - prebuilt->fts_doc_id = fts_get_doc_id_from_rec( - prebuilt->table, rec, NULL); - } - - return(TRUE); -} - -/*********************************************************************//** -Builds a previous version of a clustered index record for a consistent read -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_sel_build_prev_vers_for_mysql( -/*==============================*/ - read_view_t* read_view, /*!< in: read view */ - dict_index_t* clust_index, /*!< in: clustered index */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: record in a clustered index */ - ulint** offsets, /*!< in/out: offsets returned by - rec_get_offsets(rec, clust_index) */ - mem_heap_t** offset_heap, /*!< in/out: memory heap from which - the offsets are allocated */ - rec_t** old_vers, /*!< out: old version, or NULL if the - record does not exist in the view: - i.e., it was freshly inserted - afterwards */ - mtr_t* mtr) /*!< in: mtr */ -{ - dberr_t err; - - if (prebuilt->old_vers_heap) { - mem_heap_empty(prebuilt->old_vers_heap); - } else { - prebuilt->old_vers_heap = mem_heap_create(200); - } - - err = row_vers_build_for_consistent_read( - rec, mtr, clust_index, offsets, read_view, offset_heap, - prebuilt->old_vers_heap, old_vers); - return(err); -} - -/*********************************************************************//** -Retrieves the clustered index record corresponding to a record in a -non-clustered index. Does the necessary locking. Used in the MySQL -interface. -@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_sel_get_clust_rec_for_mysql( -/*============================*/ - row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */ - dict_index_t* sec_index,/*!< in: secondary index where rec resides */ - const rec_t* rec, /*!< in: record in a non-clustered index; if - this is a locking read, then rec is not - allowed to be delete-marked, and that would - not make sense either */ - que_thr_t* thr, /*!< in: query thread */ - const rec_t** out_rec,/*!< out: clustered record or an old version of - it, NULL if the old version did not exist - in the read view, i.e., it was a fresh - inserted version */ - ulint** offsets,/*!< in: offsets returned by - rec_get_offsets(rec, sec_index); - out: offsets returned by - rec_get_offsets(out_rec, clust_index) */ - mem_heap_t** offset_heap,/*!< in/out: memory heap from which - the offsets are allocated */ - mtr_t* mtr) /*!< in: mtr used to get access to the - non-clustered record; the same mtr is used to - access the clustered index */ -{ - dict_index_t* clust_index; - const rec_t* clust_rec; - rec_t* old_vers; - dberr_t err; - trx_t* trx; - - *out_rec = NULL; - trx = thr_get_trx(thr); - - srv_stats.n_sec_rec_cluster_reads.inc( - thd_get_thread_id(trx->mysql_thd)); - - row_build_row_ref_in_tuple(prebuilt->clust_ref, rec, - sec_index, *offsets, trx); - - clust_index = dict_table_get_first_index(sec_index->table); - - btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref, - PAGE_CUR_LE, BTR_SEARCH_LEAF, - &prebuilt->clust_pcur, 0, mtr); - - clust_rec = btr_pcur_get_rec(&prebuilt->clust_pcur); - - prebuilt->clust_pcur.trx_if_known = trx; - - /* Note: only if the search ends up on a non-infimum record is the - low_match value the real match to the search tuple */ - - if (!page_rec_is_user_rec(clust_rec) - || btr_pcur_get_low_match(&prebuilt->clust_pcur) - < dict_index_get_n_unique(clust_index)) { - - /* In a rare case it is possible that no clust rec is found - for a delete-marked secondary index record: if in row0umod.cc - in row_undo_mod_remove_clust_low() we have already removed - the clust rec, while purge is still cleaning and removing - secondary index records associated with earlier versions of - the clustered index record. In that case we know that the - clustered index record did not exist in the read view of - trx. */ - - if (!rec_get_deleted_flag(rec, - dict_table_is_comp(sec_index->table)) - || prebuilt->select_lock_type != LOCK_NONE) { - ut_print_timestamp(stderr); - fputs(" InnoDB: error clustered record" - " for sec rec not found\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, sec_index); - fputs("\n" - "InnoDB: sec index record ", stderr); - rec_print(stderr, rec, sec_index); - fputs("\n" - "InnoDB: clust index record ", stderr); - rec_print(stderr, clust_rec, clust_index); - putc('\n', stderr); - trx_print(stderr, trx, 600); - fputs("\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - ut_ad(0); - } - - clust_rec = NULL; - - err = DB_SUCCESS; - goto func_exit; - } - - *offsets = rec_get_offsets(clust_rec, clust_index, *offsets, - ULINT_UNDEFINED, offset_heap); - - if (prebuilt->select_lock_type != LOCK_NONE) { - /* Try to place a lock on the index record; we are searching - the clust rec with a unique condition, hence - we set a LOCK_REC_NOT_GAP type lock */ - - err = lock_clust_rec_read_check_and_lock( - 0, btr_pcur_get_block(&prebuilt->clust_pcur), - clust_rec, clust_index, *offsets, - static_cast<enum lock_mode>(prebuilt->select_lock_type), - LOCK_REC_NOT_GAP, - thr); - - switch (err) { - case DB_SUCCESS: - case DB_SUCCESS_LOCKED_REC: - break; - default: - goto err_exit; - } - } else { - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - old_vers = NULL; - - /* If the isolation level allows reading of uncommitted data, - then we never look for an earlier version */ - - if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED - && !lock_clust_rec_cons_read_sees( - clust_rec, clust_index, *offsets, - trx->read_view)) { - - /* The following call returns 'offsets' associated with - 'old_vers' */ - err = row_sel_build_prev_vers_for_mysql( - trx->read_view, clust_index, prebuilt, - clust_rec, offsets, offset_heap, &old_vers, - mtr); - - if (err != DB_SUCCESS || old_vers == NULL) { - - goto err_exit; - } - - clust_rec = old_vers; - } - - /* If we had to go to an earlier version of row or the - secondary index record is delete marked, then it may be that - the secondary index record corresponding to clust_rec - (or old_vers) is not rec; in that case we must ignore - such row because in our snapshot rec would not have existed. - Remember that from rec we cannot see directly which transaction - id corresponds to it: we have to go to the clustered index - record. A query where we want to fetch all rows where - the secondary index value is in some interval would return - a wrong result if we would not drop rows which we come to - visit through secondary index records that would not really - exist in our snapshot. */ - - if (clust_rec - && (old_vers - || trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED - || rec_get_deleted_flag(rec, dict_table_is_comp( - sec_index->table))) - && !row_sel_sec_rec_is_for_clust_rec( - rec, sec_index, clust_rec, clust_index)) { - clust_rec = NULL; -#ifdef UNIV_SEARCH_DEBUG - } else { - ut_a(clust_rec == NULL - || row_sel_sec_rec_is_for_clust_rec( - rec, sec_index, clust_rec, clust_index)); -#endif - } - - err = DB_SUCCESS; - } - -func_exit: - *out_rec = clust_rec; - - /* Store the current position if select_lock_type is not - LOCK_NONE or if we are scanning using InnoDB APIs */ - if (prebuilt->select_lock_type != LOCK_NONE - || prebuilt->innodb_api) { - /* We may use the cursor in update or in unlock_row(): - store its position */ - - btr_pcur_store_position(&prebuilt->clust_pcur, mtr); - } - -err_exit: - return(err); -} - -/********************************************************************//** -Restores cursor position after it has been stored. We have to take into -account that the record cursor was positioned on may have been deleted. -Then we may have to move the cursor one step up or down. -@return TRUE if we may need to process the record the cursor is now -positioned on (i.e. we should not go to the next record yet) */ -static -ibool -sel_restore_position_for_mysql( -/*===========================*/ - ibool* same_user_rec, /*!< out: TRUE if we were able to restore - the cursor on a user record with the - same ordering prefix in in the - B-tree index */ - ulint latch_mode, /*!< in: latch mode wished in - restoration */ - btr_pcur_t* pcur, /*!< in: cursor whose position - has been stored */ - ibool moves_up, /*!< in: TRUE if the cursor moves up - in the index */ - mtr_t* mtr) /*!< in: mtr; CAUTION: may commit - mtr temporarily! */ -{ - ibool success; - - success = btr_pcur_restore_position(latch_mode, pcur, mtr); - - *same_user_rec = success; - - ut_ad(!success || pcur->rel_pos == BTR_PCUR_ON); -#ifdef UNIV_DEBUG - if (pcur->pos_state == BTR_PCUR_IS_POSITIONED_OPTIMISTIC) { - ut_ad(pcur->rel_pos == BTR_PCUR_BEFORE - || pcur->rel_pos == BTR_PCUR_AFTER); - } else { - ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad((pcur->rel_pos == BTR_PCUR_ON) - == btr_pcur_is_on_user_rec(pcur)); - } -#endif - - /* The position may need be adjusted for rel_pos and moves_up. */ - - switch (pcur->rel_pos) { - case BTR_PCUR_ON: - if (!success && moves_up) { -next: - btr_pcur_move_to_next(pcur, mtr); - return(TRUE); - } - return(!success); - case BTR_PCUR_AFTER_LAST_IN_TREE: - case BTR_PCUR_BEFORE_FIRST_IN_TREE: - return(TRUE); - case BTR_PCUR_AFTER: - /* positioned to record after pcur->old_rec. */ - pcur->pos_state = BTR_PCUR_IS_POSITIONED; -prev: - if (btr_pcur_is_on_user_rec(pcur) && !moves_up) { - btr_pcur_move_to_prev(pcur, mtr); - } - return(TRUE); - case BTR_PCUR_BEFORE: - /* For non optimistic restoration: - The position is now set to the record before pcur->old_rec. - - For optimistic restoration: - The position also needs to take the previous search_mode into - consideration. */ - - switch (pcur->pos_state) { - case BTR_PCUR_IS_POSITIONED_OPTIMISTIC: - pcur->pos_state = BTR_PCUR_IS_POSITIONED; - if (pcur->search_mode == PAGE_CUR_GE) { - /* Positioned during Greater or Equal search - with BTR_PCUR_BEFORE. Optimistic restore to - the same record. If scanning for lower then - we must move to previous record. - This can happen with: - HANDLER READ idx a = (const); - HANDLER READ idx PREV; */ - goto prev; - } - return(TRUE); - case BTR_PCUR_IS_POSITIONED: - if (moves_up && btr_pcur_is_on_user_rec(pcur)) { - goto next; - } - return(TRUE); - case BTR_PCUR_WAS_POSITIONED: - case BTR_PCUR_NOT_POSITIONED: - break; - } - } - ut_ad(0); - return(TRUE); -} - -/********************************************************************//** -Copies a cached field for MySQL from the fetch cache. */ -static -void -row_sel_copy_cached_field_for_mysql( -/*================================*/ - byte* buf, /*!< in/out: row buffer */ - const byte* cache, /*!< in: cached row */ - const mysql_row_templ_t*templ) /*!< in: column template */ -{ - ulint len; - - buf += templ->mysql_col_offset; - cache += templ->mysql_col_offset; - - UNIV_MEM_ASSERT_W(buf, templ->mysql_col_len); - - if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR - && templ->type != DATA_INT) { - /* Check for != DATA_INT to make sure we do - not treat MySQL ENUM or SET as a true VARCHAR! - Find the actual length of the true VARCHAR field. */ - row_mysql_read_true_varchar( - &len, cache, templ->mysql_length_bytes); - len += templ->mysql_length_bytes; - UNIV_MEM_INVALID(buf, templ->mysql_col_len); - } else { - len = templ->mysql_col_len; - } - - ut_memcpy(buf, cache, len); -} - -/********************************************************************//** -Pops a cached row for MySQL from the fetch cache. */ -UNIV_INLINE -void -row_sel_dequeue_cached_row_for_mysql( -/*=================================*/ - byte* buf, /*!< in/out: buffer where to copy the - row */ - row_prebuilt_t* prebuilt) /*!< in: prebuilt struct */ -{ - ulint i; - const mysql_row_templ_t*templ; - const byte* cached_rec; - ut_ad(prebuilt->n_fetch_cached > 0); - ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len); - - UNIV_MEM_ASSERT_W(buf, prebuilt->mysql_row_len); - - cached_rec = prebuilt->fetch_cache[prebuilt->fetch_cache_first]; - - if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) { - /* Copy cache record field by field, don't touch fields that - are not covered by current key */ - - for (i = 0; i < prebuilt->n_template; i++) { - templ = prebuilt->mysql_template + i; - row_sel_copy_cached_field_for_mysql( - buf, cached_rec, templ); - /* Copy NULL bit of the current field from cached_rec - to buf */ - if (templ->mysql_null_bit_mask) { - buf[templ->mysql_null_byte_offset] - ^= (buf[templ->mysql_null_byte_offset] - ^ cached_rec[templ->mysql_null_byte_offset]) - & (byte) templ->mysql_null_bit_mask; - } - } - } else if (prebuilt->mysql_prefix_len > 63) { - /* The record is long. Copy it field by field, in case - there are some long VARCHAR column of which only a - small length is being used. */ - UNIV_MEM_INVALID(buf, prebuilt->mysql_prefix_len); - - /* First copy the NULL bits. */ - ut_memcpy(buf, cached_rec, prebuilt->null_bitmap_len); - /* Then copy the requested fields. */ - - for (i = 0; i < prebuilt->n_template; i++) { - row_sel_copy_cached_field_for_mysql( - buf, cached_rec, prebuilt->mysql_template + i); - } - } else { - ut_memcpy(buf, cached_rec, prebuilt->mysql_prefix_len); - } - - prebuilt->n_fetch_cached--; - prebuilt->fetch_cache_first++; - - if (prebuilt->n_fetch_cached == 0) { - prebuilt->fetch_cache_first = 0; - } -} - -/********************************************************************//** -Initialise the prefetch cache. */ -UNIV_INLINE -void -row_sel_prefetch_cache_init( -/*========================*/ - row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ -{ - ulint i; - ulint sz; - byte* ptr; - - /* Reserve space for the magic number. */ - sz = UT_ARR_SIZE(prebuilt->fetch_cache) * (prebuilt->mysql_row_len + 8); - ptr = static_cast<byte*>(mem_alloc(sz)); - - for (i = 0; i < UT_ARR_SIZE(prebuilt->fetch_cache); i++) { - - /* A user has reported memory corruption in these - buffers in Linux. Put magic numbers there to help - to track a possible bug. */ - - mach_write_to_4(ptr, ROW_PREBUILT_FETCH_MAGIC_N); - ptr += 4; - - prebuilt->fetch_cache[i] = ptr; - ptr += prebuilt->mysql_row_len; - - mach_write_to_4(ptr, ROW_PREBUILT_FETCH_MAGIC_N); - ptr += 4; - } -} - -/********************************************************************//** -Get the last fetch cache buffer from the queue. -@return pointer to buffer. */ -UNIV_INLINE -byte* -row_sel_fetch_last_buf( -/*===================*/ - row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ -{ - ut_ad(!prebuilt->templ_contains_blob); - ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); - - if (prebuilt->fetch_cache[0] == NULL) { - /* Allocate memory for the fetch cache */ - ut_ad(prebuilt->n_fetch_cached == 0); - - row_sel_prefetch_cache_init(prebuilt); - } - - ut_ad(prebuilt->fetch_cache_first == 0); - UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], - prebuilt->mysql_row_len); - - return(prebuilt->fetch_cache[prebuilt->n_fetch_cached]); -} - -/********************************************************************//** -Pushes a row for MySQL to the fetch cache. */ -UNIV_INLINE -void -row_sel_enqueue_cache_row_for_mysql( -/*================================*/ - byte* mysql_rec, /*!< in/out: MySQL record */ - row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ -{ - /* For non ICP code path the row should already exist in the - next fetch cache slot. */ - - if (prebuilt->idx_cond != NULL) { - byte* dest = row_sel_fetch_last_buf(prebuilt); - - ut_memcpy(dest, mysql_rec, prebuilt->mysql_row_len); - } - - ++prebuilt->n_fetch_cached; -} - -/*********************************************************************//** -Tries to do a shortcut to fetch a clustered index record with a unique key, -using the hash index if possible (not always). We assume that the search -mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx, -btr search latch has been locked in S-mode if AHI is enabled. -@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */ -static -ulint -row_sel_try_search_shortcut_for_mysql( -/*==================================*/ - const rec_t** out_rec,/*!< out: record if found */ - row_prebuilt_t* prebuilt,/*!< in: prebuilt struct */ - ulint** offsets,/*!< in/out: for rec_get_offsets(*out_rec) */ - mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */ - mtr_t* mtr) /*!< in: started mtr */ -{ - dict_index_t* index = prebuilt->index; - const dtuple_t* search_tuple = prebuilt->search_tuple; - btr_pcur_t* pcur = &prebuilt->pcur; - trx_t* trx = prebuilt->trx; - const rec_t* rec; - - ut_ad(dict_index_is_clust(index)); - ut_ad(!prebuilt->templ_contains_blob); - -#ifndef UNIV_SEARCH_DEBUG - ut_ad(trx->has_search_latch); - - btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, pcur, - RW_S_LATCH, - mtr); -#else /* UNIV_SEARCH_DEBUG */ - btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, pcur, - 0, - mtr); -#endif /* UNIV_SEARCH_DEBUG */ - rec = btr_pcur_get_rec(pcur); - - if (!page_rec_is_user_rec(rec)) { - - return(SEL_RETRY); - } - - /* As the cursor is now placed on a user record after a search with - the mode PAGE_CUR_GE, the up_match field in the cursor tells how many - fields in the user record matched to the search tuple */ - - if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) { - - return(SEL_EXHAUSTED); - } - - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - *offsets = rec_get_offsets(rec, index, *offsets, - ULINT_UNDEFINED, heap); - - if (!lock_clust_rec_cons_read_sees(rec, index, - *offsets, trx->read_view)) { - - return(SEL_RETRY); - } - - if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) { - - return(SEL_EXHAUSTED); - } - - *out_rec = rec; - - return(SEL_FOUND); -} - -/*********************************************************************//** -Check a pushed-down index condition. -@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ -static -enum icp_result -row_search_idx_cond_check( -/*======================*/ - byte* mysql_rec, /*!< out: record - in MySQL format (invalid unless - prebuilt->idx_cond!=NULL and - we return ICP_MATCH) */ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct - for the table handle */ - const rec_t* rec, /*!< in: InnoDB record */ - const ulint* offsets) /*!< in: rec_get_offsets() */ -{ - enum icp_result result; - ulint i; - - ut_ad(rec_offs_validate(rec, prebuilt->index, offsets)); - - if (!prebuilt->idx_cond) { - return(ICP_MATCH); - } - - MONITOR_INC(MONITOR_ICP_ATTEMPTS); - - /* Convert to MySQL format those fields that are needed for - evaluating the index condition. */ - - if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { - mem_heap_empty(prebuilt->blob_heap); - } - - for (i = 0; i < prebuilt->idx_cond_n_cols; i++) { - const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; - - if (!row_sel_store_mysql_field(mysql_rec, prebuilt, - rec, prebuilt->index, offsets, - templ->icp_rec_field_no, - templ)) { - return(ICP_NO_MATCH); - } - } - - /* We assume that the index conditions on - case-insensitive columns are case-insensitive. The - case of such columns may be wrong in a secondary - index, if the case of the column has been updated in - the past, or a record has been deleted and a record - inserted in a different case. */ - result = innobase_index_cond(prebuilt->idx_cond); - switch (result) { - case ICP_MATCH: - /* Convert the remaining fields to MySQL format. - If this is a secondary index record, we must defer - this until we have fetched the clustered index record. */ - if (!prebuilt->need_to_access_clustered - || dict_index_is_clust(prebuilt->index)) { - if (!row_sel_store_mysql_rec( - mysql_rec, prebuilt, rec, FALSE, - prebuilt->index, offsets)) { - ut_ad(dict_index_is_clust(prebuilt->index)); - return(ICP_NO_MATCH); - } - } - MONITOR_INC(MONITOR_ICP_MATCH); - return(result); - case ICP_NO_MATCH: - MONITOR_INC(MONITOR_ICP_NO_MATCH); - return(result); - case ICP_OUT_OF_RANGE: - MONITOR_INC(MONITOR_ICP_OUT_OF_RANGE); - return(result); - case ICP_ERROR: - case ICP_ABORTED_BY_USER: - return(result); - } - - ut_error; - return(result); -} - -/********************************************************************//** -Searches for rows in the database. This is used in the interface to -MySQL. This function opens a cursor, and also implements fetch next -and fetch prev. NOTE that if we do a search with a full key value -from a unique index (ROW_SEL_EXACT), then we will not store the cursor -position and fetch next or fetch prev must not be tried to the cursor! -@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK, -DB_LOCK_TABLE_FULL, DB_CORRUPTION, DB_SEARCH_ABORTED_BY_USER or -DB_TOO_BIG_RECORD */ -UNIV_INTERN -dberr_t -row_search_for_mysql( -/*=================*/ - byte* buf, /*!< in/out: buffer for the fetched - row in the MySQL format */ - ulint mode, /*!< in: search mode PAGE_CUR_L, ... */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the - table handle; this contains the info - of search_tuple, index; if search - tuple contains 0 fields then we - position the cursor at the start or - the end of the index, depending on - 'mode' */ - ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or - ROW_SEL_EXACT_PREFIX */ - ulint direction) /*!< in: 0 or ROW_SEL_NEXT or - ROW_SEL_PREV; NOTE: if this is != 0, - then prebuilt must have a pcur - with stored position! In opening of a - cursor 'direction' should be 0. */ -{ - dict_index_t* index = prebuilt->index; - ibool comp = dict_table_is_comp(index->table); - const dtuple_t* search_tuple = prebuilt->search_tuple; - btr_pcur_t* pcur = &prebuilt->pcur; - trx_t* trx = prebuilt->trx; - dict_index_t* clust_index; - que_thr_t* thr; - const rec_t* rec = NULL; - const rec_t* result_rec = NULL; - const rec_t* clust_rec; - dberr_t err = DB_SUCCESS; - ibool unique_search = FALSE; - ibool mtr_has_extra_clust_latch = FALSE; - ibool moves_up = FALSE; - ibool set_also_gap_locks = TRUE; - /* if the query is a plain locking SELECT, and the isolation level - is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */ - ibool did_semi_consistent_read = FALSE; - /* if the returned record was locked and we did a semi-consistent - read (fetch the newest committed version), then this is set to - TRUE */ -#ifdef UNIV_SEARCH_DEBUG - ulint cnt = 0; -#endif /* UNIV_SEARCH_DEBUG */ - ulint next_offs; - ibool same_user_rec; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - ibool table_lock_waited = FALSE; - byte* next_buf = 0; - bool use_clustered_index = false; - - rec_offs_init(offsets_); - - ut_ad(index && pcur && search_tuple); - - /* We don't support FTS queries from the HANDLER interfaces, because - we implemented FTS as reversed inverted index with auxiliary tables. - So anything related to traditional index query would not apply to - it. */ - if (index->type & DICT_FTS) { - return(DB_END_OF_INDEX); - } - - ut_ad(!trx->has_search_latch); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!btr_search_own_any()); - ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); -#endif /* UNIV_SYNC_DEBUG */ - - if (dict_table_is_discarded(prebuilt->table)) { - - return(DB_TABLESPACE_DELETED); - - } else if (!prebuilt->table->is_readable()) { - if (fil_space_get(prebuilt->table->space) == NULL) { - return(DB_TABLESPACE_NOT_FOUND); - } else { - return(DB_DECRYPTION_FAILED); - } - } else if (!prebuilt->index_usable) { - - return(DB_MISSING_HISTORY); - - } else if (dict_index_is_corrupted(index)) { - - return(DB_CORRUPTION); - - } else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { - fprintf(stderr, - "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name ", - (ulong) prebuilt->magic_n); - ut_print_name(stderr, trx, TRUE, prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - -#if 0 - /* August 19, 2005 by Heikki: temporarily disable this error - print until the cursor lock count is done correctly. - See bugs #12263 and #12456!*/ - - if (trx->n_mysql_tables_in_use == 0 - && UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) { - /* Note that if MySQL uses an InnoDB temp table that it - created inside LOCK TABLES, then n_mysql_tables_in_use can - be zero; in that case select_lock_type is set to LOCK_X in - ::start_stmt. */ - - fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n" - "InnoDB: but it has not locked" - " any tables in ::external_lock()!\n", - stderr); - trx_print(stderr, trx, 600); - fputc('\n', stderr); - } -#endif - -#if 0 - fprintf(stderr, "Match mode %lu\n search tuple ", - (ulong) match_mode); - dtuple_print(search_tuple); - fprintf(stderr, "N tables locked %lu\n", - (ulong) trx->mysql_n_tables_locked); -#endif - /* Reset the new record lock info if srv_locks_unsafe_for_binlog - is set or session is using a READ COMMITED isolation level. Then - we are able to remove the record locks set here on an individual - row. */ - prebuilt->new_rec_locks = 0; - - /*-------------------------------------------------------------*/ - /* PHASE 1: Try to pop the row from the prefetch cache */ - - if (UNIV_UNLIKELY(direction == 0)) { - trx->op_info = "starting index read"; - - prebuilt->n_rows_fetched = 0; - prebuilt->n_fetch_cached = 0; - prebuilt->fetch_cache_first = 0; - - if (prebuilt->sel_graph == NULL) { - /* Build a dummy select query graph */ - row_prebuild_sel_graph(prebuilt); - } - } else { - trx->op_info = "fetching rows"; - - if (prebuilt->n_rows_fetched == 0) { - prebuilt->fetch_direction = direction; - } - - if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) { - if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) { - ut_error; - /* TODO: scrollable cursor: restore cursor to - the place of the latest returned row, - or better: prevent caching for a scroll - cursor! */ - } - - prebuilt->n_rows_fetched = 0; - prebuilt->n_fetch_cached = 0; - prebuilt->fetch_cache_first = 0; - - } else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) { - row_sel_dequeue_cached_row_for_mysql(buf, prebuilt); - - prebuilt->n_rows_fetched++; - - err = DB_SUCCESS; - goto func_exit; - } - - if (prebuilt->fetch_cache_first > 0 - && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) { - - /* The previous returned row was popped from the fetch - cache, but the cache was not full at the time of the - popping: no more rows can exist in the result set */ - - err = DB_RECORD_NOT_FOUND; - goto func_exit; - } - - prebuilt->n_rows_fetched++; - - if (prebuilt->n_rows_fetched > 1000000000) { - /* Prevent wrap-over */ - prebuilt->n_rows_fetched = 500000000; - } - - mode = pcur->search_mode; - } - - /* In a search where at most one record in the index may match, we - can use a LOCK_REC_NOT_GAP type record lock when locking a - non-delete-marked matching record. - - Note that in a unique secondary index there may be different - delete-marked versions of a record where only the primary key - values differ: thus in a secondary index we must use next-key - locks when locking delete-marked records. */ - - if (match_mode == ROW_SEL_EXACT - && dict_index_is_unique(index) - && dtuple_get_n_fields(search_tuple) - == dict_index_get_n_unique(index) - && (dict_index_is_clust(index) - || !dtuple_contains_null(search_tuple))) { - - /* Note above that a UNIQUE secondary index can contain many - rows with the same key value if one of the columns is the SQL - null. A clustered index under MySQL can never contain null - columns because we demand that all the columns in primary key - are non-null. */ - - unique_search = TRUE; - - /* Even if the condition is unique, MySQL seems to try to - retrieve also a second row if a primary key contains more than - 1 column. Return immediately if this is not a HANDLER - command. */ - - if (UNIV_UNLIKELY(direction != 0 - && !prebuilt->used_in_HANDLER)) { - - err = DB_RECORD_NOT_FOUND; - goto func_exit; - } - } - - mtr_start_trx(&mtr, trx); - - /*-------------------------------------------------------------*/ - /* PHASE 2: Try fast adaptive hash index search if possible */ - - /* Next test if this is the special case where we can use the fast - adaptive hash index to try the search. Since we must release the - search system latch when we retrieve an externally stored field, we - cannot use the adaptive hash index in a search in the case the row - may be long and there may be externally stored fields */ - - if (UNIV_UNLIKELY(direction == 0) - && unique_search - && dict_index_is_clust(index) - && !prebuilt->templ_contains_blob - && !prebuilt->used_in_HANDLER - && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8) - && !prebuilt->innodb_api) { - - mode = PAGE_CUR_GE; - - if (trx->mysql_n_tables_locked == 0 - && prebuilt->select_lock_type == LOCK_NONE - && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED - && trx->read_view) { - - /* This is a SELECT query done as a consistent read, - and the read view has already been allocated: - let us try a search shortcut through the hash - index. - NOTE that we must also test that - mysql_n_tables_locked == 0, because this might - also be INSERT INTO ... SELECT ... or - CREATE TABLE ... SELECT ... . Our algorithm is - NOT prepared to inserts interleaved with the SELECT, - and if we try that, we can deadlock on the adaptive - hash index semaphore! */ - -#ifndef UNIV_SEARCH_DEBUG - ut_ad(!trx->has_search_latch); - rw_lock_s_lock(btr_search_get_latch(index)); - trx->has_search_latch = TRUE; -#endif - switch (row_sel_try_search_shortcut_for_mysql( - &rec, prebuilt, &offsets, &heap, - &mtr)) { - case SEL_FOUND: -#ifdef UNIV_SEARCH_DEBUG - ut_a(0 == cmp_dtuple_rec(search_tuple, - rec, offsets)); -#endif - /* At this point, rec is protected by - a page latch that was acquired by - row_sel_try_search_shortcut_for_mysql(). - The latch will not be released until - mtr_commit(&mtr). */ - ut_ad(!rec_get_deleted_flag(rec, comp)); - - if (prebuilt->idx_cond) { - switch (row_search_idx_cond_check( - buf, prebuilt, - rec, offsets)) { - case ICP_NO_MATCH: - case ICP_OUT_OF_RANGE: - case ICP_ABORTED_BY_USER: - case ICP_ERROR: - goto shortcut_mismatch; - case ICP_MATCH: - goto shortcut_match; - } - } - - if (!row_sel_store_mysql_rec( - buf, prebuilt, - rec, FALSE, index, - offsets)) { - /* Only fresh inserts may contain - incomplete externally stored - columns. Pretend that such - records do not exist. Such - records may only be accessed - at the READ UNCOMMITTED - isolation level or when - rolling back a recovered - transaction. Rollback happens - at a lower level, not here. */ - - /* Proceed as in case SEL_RETRY. */ - break; - } - - shortcut_match: - mtr_commit(&mtr); - - /* ut_print_name(stderr, index->name); - fputs(" shortcut\n", stderr); */ - - err = DB_SUCCESS; - goto release_search_latch; - - case SEL_EXHAUSTED: - shortcut_mismatch: - mtr_commit(&mtr); - - /* ut_print_name(stderr, index->name); - fputs(" record not found 2\n", stderr); */ - - err = DB_RECORD_NOT_FOUND; -release_search_latch: - rw_lock_s_unlock( - btr_search_get_latch(index)); - trx->has_search_latch = FALSE; - - /* NOTE that we do NOT store the cursor - position */ - goto func_exit; - - case SEL_RETRY: - break; - - default: - ut_ad(0); - } - - mtr_commit(&mtr); - mtr_start(&mtr); - - rw_lock_s_unlock(btr_search_get_latch(index)); - trx->has_search_latch = FALSE; - } - } - - /*-------------------------------------------------------------*/ - /* PHASE 3: Open or restore index cursor position */ - - ut_ad(!trx->has_search_latch); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!btr_search_own_any()); -#endif - - /* The state of a running trx can only be changed by the - thread that is currently serving the transaction. Because we - are that thread, we can read trx->state without holding any - mutex. */ - ut_ad(prebuilt->sql_stat_start || trx->state == TRX_STATE_ACTIVE); - - ut_ad(trx->state == TRX_STATE_NOT_STARTED - || trx->state == TRX_STATE_ACTIVE); - - ut_ad(prebuilt->sql_stat_start - || prebuilt->select_lock_type != LOCK_NONE - || trx->read_view); - - trx_start_if_not_started(trx); - - if (trx->isolation_level <= TRX_ISO_READ_COMMITTED - && prebuilt->select_lock_type != LOCK_NONE - && trx->mysql_thd != NULL - && thd_is_select(trx->mysql_thd)) { - /* It is a plain locking SELECT and the isolation - level is low: do not lock gaps */ - - set_also_gap_locks = FALSE; - } - - /* Note that if the search mode was GE or G, then the cursor - naturally moves upward (in fetch next) in alphabetical order, - otherwise downward */ - - if (UNIV_UNLIKELY(direction == 0)) { - if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) { - moves_up = TRUE; - } - } else if (direction == ROW_SEL_NEXT) { - moves_up = TRUE; - } - - thr = que_fork_get_first_thr(prebuilt->sel_graph); - - que_thr_move_to_run_state_for_mysql(thr, trx); - - clust_index = dict_table_get_first_index(index->table); - - /* Do some start-of-statement preparations */ - - if (!prebuilt->sql_stat_start) { - /* No need to set an intention lock or assign a read view */ - - if (UNIV_UNLIKELY - (trx->read_view == NULL - && prebuilt->select_lock_type == LOCK_NONE)) { - - fputs("InnoDB: Error: MySQL is trying to" - " perform a consistent read\n" - "InnoDB: but the read view is not assigned!\n", - stderr); - trx_print(stderr, trx, 600); - fputc('\n', stderr); - ut_error; - } - } else if (prebuilt->select_lock_type == LOCK_NONE) { - /* This is a consistent read */ - /* Assign a read view for the query */ - - trx_assign_read_view(trx); - prebuilt->sql_stat_start = FALSE; - } else { -wait_table_again: - err = lock_table(0, index->table, - prebuilt->select_lock_type == LOCK_S - ? LOCK_IS : LOCK_IX, thr); - - if (err != DB_SUCCESS) { - - table_lock_waited = TRUE; - goto lock_table_wait; - } - prebuilt->sql_stat_start = FALSE; - } - - /* Open or restore index cursor position */ - - if (UNIV_LIKELY(direction != 0)) { - ibool need_to_process = sel_restore_position_for_mysql( - &same_user_rec, BTR_SEARCH_LEAF, - pcur, moves_up, &mtr); - - if (UNIV_UNLIKELY(need_to_process)) { - if (UNIV_UNLIKELY(prebuilt->row_read_type - == ROW_READ_DID_SEMI_CONSISTENT)) { - /* We did a semi-consistent read, - but the record was removed in - the meantime. */ - prebuilt->row_read_type - = ROW_READ_TRY_SEMI_CONSISTENT; - } - } else if (UNIV_LIKELY(prebuilt->row_read_type - != ROW_READ_DID_SEMI_CONSISTENT)) { - - /* The cursor was positioned on the record - that we returned previously. If we need - to repeat a semi-consistent read as a - pessimistic locking read, the record - cannot be skipped. */ - - goto next_rec; - } - - } else if (dtuple_get_n_fields(search_tuple) > 0) { - - err = btr_pcur_open_with_no_init(index, search_tuple, mode, - BTR_SEARCH_LEAF, - pcur, 0, &mtr); - - if (err != DB_SUCCESS) { - rec = NULL; - goto lock_wait_or_error; - } - - pcur->trx_if_known = trx; - - rec = btr_pcur_get_rec(pcur); - - if (!moves_up - && !page_rec_is_supremum(rec) - && set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* Try to place a gap lock on the next index record - to prevent phantoms in ORDER BY ... DESC queries */ - const rec_t* next_rec = page_rec_get_next_const(rec); - - offsets = rec_get_offsets(next_rec, index, offsets, - ULINT_UNDEFINED, &heap); - err = sel_set_rec_lock(btr_pcur_get_block(pcur), - next_rec, index, offsets, - prebuilt->select_lock_type, - LOCK_GAP, thr); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - err = DB_SUCCESS; - case DB_SUCCESS: - break; - default: - goto lock_wait_or_error; - } - } - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_L) { - err = btr_pcur_open_at_index_side( - mode == PAGE_CUR_G, index, BTR_SEARCH_LEAF, - pcur, false, 0, &mtr); - - if (err != DB_SUCCESS) { - if (err == DB_DECRYPTION_FAILED) { - ib_push_warning(trx->mysql_thd, - DB_DECRYPTION_FAILED, - "Table %s is encrypted but encryption service or" - " used key_id is not available. " - " Can't continue reading table.", - prebuilt->table->name); - index->table->file_unreadable = true; - } - rec = NULL; - goto lock_wait_or_error; - } - } - -rec_loop: - DEBUG_SYNC_C("row_search_rec_loop"); - if (trx_is_interrupted(trx)) { - btr_pcur_store_position(pcur, &mtr); - err = DB_INTERRUPTED; - goto normal_return; - } - - /*-------------------------------------------------------------*/ - /* PHASE 4: Look for matching records in a loop */ - - rec = btr_pcur_get_rec(pcur); - - if (!index->table->is_readable()) { - err = DB_DECRYPTION_FAILED; - goto lock_wait_or_error; - } - - SRV_CORRUPT_TABLE_CHECK(rec, - { - err = DB_CORRUPTION; - goto lock_wait_or_error; - }); - - ut_ad(!!page_rec_is_comp(rec) == comp); -#ifdef UNIV_SEARCH_DEBUG - /* - fputs("Using ", stderr); - dict_index_name_print(stderr, trx, index); - fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt, - page_get_page_no(page_align(rec))); - rec_print(stderr, rec, index); - printf("delete-mark: %lu\n", - rec_get_deleted_flag(rec, page_rec_is_comp(rec))); - */ -#endif /* UNIV_SEARCH_DEBUG */ - - if (page_rec_is_infimum(rec)) { - - /* The infimum record on a page cannot be in the result set, - and neither can a record lock be placed on it: we skip such - a record. */ - - goto next_rec; - } - - if (page_rec_is_supremum(rec)) { - if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* Try to place a lock on the index record */ - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using a READ COMMITTED or lower isolation - level we do not lock gaps. Supremum record is really - a gap and therefore we do not set locks there. */ - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - err = sel_set_rec_lock(btr_pcur_get_block(pcur), - rec, index, offsets, - prebuilt->select_lock_type, - LOCK_ORDINARY, thr); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - err = DB_SUCCESS; - case DB_SUCCESS: - break; - default: - goto lock_wait_or_error; - } - } - /* A page supremum record cannot be in the result set: skip - it now that we have placed a possible lock on it */ - - goto next_rec; - } - - /*-------------------------------------------------------------*/ - /* Do sanity checks in case our cursor has bumped into page - corruption */ - - if (comp) { - next_offs = rec_get_next_offs(rec, TRUE); - if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) { - - goto wrong_offs; - } - } else { - next_offs = rec_get_next_offs(rec, FALSE); - if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) { - - goto wrong_offs; - } - } - - if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) { - -wrong_offs: - if (srv_pass_corrupt_table && index->table->space != 0 && - index->table->space < SRV_LOG_SPACE_FIRST_ID) { - index->table->file_unreadable = TRUE; - fil_space_set_corrupt(index->table->space); - } - - if ((srv_force_recovery == 0 || moves_up == FALSE) - && srv_pass_corrupt_table <= 1) { - ut_print_timestamp(stderr); - buf_page_print(page_align(rec), 0, - BUF_PAGE_PRINT_NO_CRASH); - fprintf(stderr, - "\nInnoDB: rec address %p," - " buf block fix count %lu\n", - (void*) rec, (ulong) - btr_cur_get_block(btr_pcur_get_btr_cur(pcur)) - ->page.buf_fix_count); - fprintf(stderr, - "InnoDB: Index corruption: rec offs %lu" - " next offs %lu, page no %lu,\n" - "InnoDB: ", - (ulong) page_offset(rec), - (ulong) next_offs, - (ulong) page_get_page_no(page_align(rec))); - dict_index_name_print(stderr, trx, index); - fputs(". Run CHECK TABLE. You may need to\n" - "InnoDB: restore from a backup, or" - " dump + drop + reimport the table.\n", - stderr); - ut_ad(0); - err = DB_CORRUPTION; - - goto lock_wait_or_error; - } else { - /* The user may be dumping a corrupt table. Jump - over the corruption to recover as much as possible. */ - - fprintf(stderr, - "InnoDB: Index corruption: rec offs %lu" - " next offs %lu, page no %lu,\n" - "InnoDB: ", - (ulong) page_offset(rec), - (ulong) next_offs, - (ulong) page_get_page_no(page_align(rec))); - dict_index_name_print(stderr, trx, index); - fputs(". We try to skip the rest of the page.\n", - stderr); - - btr_pcur_move_to_last_on_page(pcur, &mtr); - - goto next_rec; - } - } - /*-------------------------------------------------------------*/ - - /* Calculate the 'offsets' associated with 'rec' */ - - ut_ad(fil_page_get_type(btr_pcur_get_page(pcur)) == FIL_PAGE_INDEX); - ut_ad(btr_page_get_index_id(btr_pcur_get_page(pcur)) == index->id); - - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - if (UNIV_UNLIKELY(srv_force_recovery > 0 - || (!index->table->is_readable() && - srv_pass_corrupt_table == 2))) { - if (!rec_validate(rec, offsets) - || !btr_index_rec_validate(rec, index, FALSE)) { - char buf[MAX_FULL_NAME_LEN]; - ut_format_name(index->table->name, FALSE, buf, sizeof(buf)); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Index %s corrupted: rec offs " ULINTPF - " next offs " ULINTPF - ", page no " ULINTPF " ." - " We try to skip the record.", - buf, - page_offset(rec), - next_offs, - page_get_page_no(page_align(rec))); - - goto next_rec; - } - } - - /* Note that we cannot trust the up_match value in the cursor at this - place because we can arrive here after moving the cursor! Thus - we have to recompare rec and search_tuple to determine if they - match enough. */ - - if (match_mode == ROW_SEL_EXACT) { - /* Test if the index record matches completely to search_tuple - in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */ - - /* fputs("Comparing rec and search tuple\n", stderr); */ - - if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) { - - if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* Try to place a gap lock on the index - record only if innodb_locks_unsafe_for_binlog - option is not set or this session is not - using a READ COMMITTED or lower isolation level. */ - - err = sel_set_rec_lock( - btr_pcur_get_block(pcur), - rec, index, offsets, - prebuilt->select_lock_type, LOCK_GAP, - thr); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - case DB_SUCCESS: - break; - default: - goto lock_wait_or_error; - } - } - - btr_pcur_store_position(pcur, &mtr); - - /* The found record was not a match, but may be used - as NEXT record (index_next). Set the relative position - to BTR_PCUR_BEFORE, to reflect that the position of - the persistent cursor is before the found/stored row - (pcur->old_rec). */ - ut_ad(pcur->rel_pos == BTR_PCUR_ON); - pcur->rel_pos = BTR_PCUR_BEFORE; - - err = DB_RECORD_NOT_FOUND; -#if 0 - ut_print_name(stderr, trx, FALSE, index->name); - fputs(" record not found 3\n", stderr); -#endif - - goto normal_return; - } - - } else if (match_mode == ROW_SEL_EXACT_PREFIX) { - - if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) { - - if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* Try to place a gap lock on the index - record only if innodb_locks_unsafe_for_binlog - option is not set or this session is not - using a READ COMMITTED or lower isolation level. */ - - err = sel_set_rec_lock( - btr_pcur_get_block(pcur), - rec, index, offsets, - prebuilt->select_lock_type, LOCK_GAP, - thr); - - switch (err) { - case DB_SUCCESS_LOCKED_REC: - case DB_SUCCESS: - break; - default: - goto lock_wait_or_error; - } - } - - btr_pcur_store_position(pcur, &mtr); - - /* The found record was not a match, but may be used - as NEXT record (index_next). Set the relative position - to BTR_PCUR_BEFORE, to reflect that the position of - the persistent cursor is before the found/stored row - (pcur->old_rec). */ - ut_ad(pcur->rel_pos == BTR_PCUR_ON); - pcur->rel_pos = BTR_PCUR_BEFORE; - - err = DB_RECORD_NOT_FOUND; -#if 0 - ut_print_name(stderr, trx, FALSE, index->name); - fputs(" record not found 4\n", stderr); -#endif - - goto normal_return; - } - } - - /* We are ready to look at a possible new index entry in the result - set: the cursor is now placed on a user record */ - - if (prebuilt->select_lock_type != LOCK_NONE) { - /* Try to place a lock on the index record; note that delete - marked records are a special case in a unique search. If there - is a non-delete marked record, then it is enough to lock its - existence with LOCK_REC_NOT_GAP. */ - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using a READ COMMITED isolation - level we lock only the record, i.e., next-key locking is - not used. */ - - ulint lock_type; - - if (!set_also_gap_locks - || srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED - || (unique_search && !rec_get_deleted_flag(rec, comp))) { - - goto no_gap_lock; - } else { - lock_type = LOCK_ORDINARY; - } - - /* If we are doing a 'greater or equal than a primary key - value' search from a clustered index, and we find a record - that has that exact primary key value, then there is no need - to lock the gap before the record, because no insert in the - gap can be in our search range. That is, no phantom row can - appear that way. - - An example: if col1 is the primary key, the search is WHERE - col1 >= 100, and we find a record where col1 = 100, then no - need to lock the gap before that record. */ - - if (index == clust_index - && mode == PAGE_CUR_GE - && direction == 0 - && dtuple_get_n_fields_cmp(search_tuple) - == dict_index_get_n_unique(index) - && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) { -no_gap_lock: - lock_type = LOCK_REC_NOT_GAP; - } - - err = sel_set_rec_lock(btr_pcur_get_block(pcur), - rec, index, offsets, - prebuilt->select_lock_type, - lock_type, thr); - - switch (err) { - const rec_t* old_vers; - case DB_SUCCESS_LOCKED_REC: - if (srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) { - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; - } - err = DB_SUCCESS; - case DB_SUCCESS: - break; - case DB_LOCK_WAIT: - /* Never unlock rows that were part of a conflict. */ - prebuilt->new_rec_locks = 0; - - if (UNIV_LIKELY(prebuilt->row_read_type - != ROW_READ_TRY_SEMI_CONSISTENT) - || unique_search - || index != clust_index) { - - goto lock_wait_or_error; - } - - /* The following call returns 'offsets' - associated with 'old_vers' */ - row_sel_build_committed_vers_for_mysql( - clust_index, prebuilt, rec, - &offsets, &heap, &old_vers, &mtr); - - /* Check whether it was a deadlock or not, if not - a deadlock and the transaction had to wait then - release the lock it is waiting on. */ - - err = lock_trx_handle_wait(trx); - - switch (err) { - case DB_SUCCESS: - /* The lock was granted while we were - searching for the last committed version. - Do a normal locking read. */ - - offsets = rec_get_offsets( - rec, index, offsets, ULINT_UNDEFINED, - &heap); - goto locks_ok; - case DB_DEADLOCK: - goto lock_wait_or_error; - case DB_LOCK_WAIT: - err = DB_SUCCESS; - break; - default: - ut_error; - } - - if (old_vers == NULL) { - /* The row was not yet committed */ - - goto next_rec; - } - - did_semi_consistent_read = TRUE; - rec = old_vers; - break; - default: - - goto lock_wait_or_error; - } - } else { - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) { - - /* Do nothing: we let a non-locking SELECT read the - latest version of the record */ - - } else if (index == clust_index) { - - /* Fetch a previous version of the row if the current - one is not visible in the snapshot; if we have a very - high force recovery level set, we try to avoid crashes - by skipping this lookup */ - - if (UNIV_LIKELY(srv_force_recovery < 5) - && !lock_clust_rec_cons_read_sees( - rec, index, offsets, trx->read_view)) { - - rec_t* old_vers; - /* The following call returns 'offsets' - associated with 'old_vers' */ - err = row_sel_build_prev_vers_for_mysql( - trx->read_view, clust_index, - prebuilt, rec, &offsets, &heap, - &old_vers, &mtr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - - if (old_vers == NULL) { - /* The row did not exist yet in - the read view */ - - goto next_rec; - } - - rec = old_vers; - } - } else { - /* We are looking into a non-clustered index, - and to get the right version of the record we - have to look also into the clustered index: this - is necessary, because we can only get the undo - information via the clustered index record. */ - - ut_ad(!dict_index_is_clust(index)); - - if (!lock_sec_rec_cons_read_sees( - rec, trx->read_view)) { - /* We should look at the clustered index. - However, as this is a non-locking read, - we can skip the clustered index lookup if - the condition does not match the secondary - index entry. */ - switch (row_search_idx_cond_check( - buf, prebuilt, rec, offsets)) { - case ICP_NO_MATCH: - goto next_rec; - case ICP_OUT_OF_RANGE: - err = DB_RECORD_NOT_FOUND; - goto idx_cond_failed; - case ICP_ABORTED_BY_USER: - err = DB_SEARCH_ABORTED_BY_USER; - goto idx_cond_failed; - case ICP_ERROR: - err = DB_ERROR; - goto idx_cond_failed; - case ICP_MATCH: - goto requires_clust_rec; - } - - ut_error; - } - } - } - -locks_ok: - /* NOTE that at this point rec can be an old version of a clustered - index record built for a consistent read. We cannot assume after this - point that rec is on a buffer pool page. Functions like - page_rec_is_comp() cannot be used! */ - - if (rec_get_deleted_flag(rec, comp)) { - - /* The record is delete-marked: we can skip it */ - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE - && !did_semi_consistent_read) { - - /* No need to keep a lock on a delete-marked record - if we do not want to use next-key locking. */ - - row_unlock_for_mysql(prebuilt, TRUE); - } - - /* This is an optimization to skip setting the next key lock - on the record that follows this delete-marked record. This - optimization works because of the unique search criteria - which precludes the presence of a range lock between this - delete marked record and the record following it. - - For now this is applicable only to clustered indexes while - doing a unique search except for HANDLER queries because - HANDLER allows NEXT and PREV even in unique search on - clustered index. There is scope for further optimization - applicable to unique secondary indexes. Current behaviour is - to widen the scope of a lock on an already delete marked record - if the same record is deleted twice by the same transaction */ - if (index == clust_index && unique_search - && !prebuilt->used_in_HANDLER) { - - err = DB_RECORD_NOT_FOUND; - - goto normal_return; - } - - goto next_rec; - } - - /* Check if the record matches the index condition. */ - switch (row_search_idx_cond_check(buf, prebuilt, rec, offsets)) { - case ICP_NO_MATCH: - if (did_semi_consistent_read) { - row_unlock_for_mysql(prebuilt, TRUE); - } - goto next_rec; - case ICP_ABORTED_BY_USER: - err = DB_SEARCH_ABORTED_BY_USER; - goto idx_cond_failed; - case ICP_ERROR: - err = DB_ERROR; - goto idx_cond_failed; - case ICP_OUT_OF_RANGE: - err = DB_RECORD_NOT_FOUND; - goto idx_cond_failed; - case ICP_MATCH: - break; - } - - /* Get the clustered index record if needed, if we did not do the - search using the clustered index... */ - - use_clustered_index = - (index != clust_index && prebuilt->need_to_access_clustered); - - if (use_clustered_index && srv_prefix_index_cluster_optimization - && prebuilt->n_template <= index->n_fields) { - /* ...but, perhaps avoid the clustered index lookup if - all of the following are true: - 1) all columns are in the secondary index - 2) all values for columns that are prefix-only - indexes are shorter than the prefix size - This optimization can avoid many IOs for certain schemas. - */ - bool row_contains_all_values = true; - unsigned int i; - for (i = 0; i < prebuilt->n_template; i++) { - /* Condition (1) from above: is the field in the - index (prefix or not)? */ - const mysql_row_templ_t* templ = - prebuilt->mysql_template + i; - ulint secondary_index_field_no = - templ->rec_prefix_field_no; - if (secondary_index_field_no == ULINT_UNDEFINED) { - row_contains_all_values = false; - break; - } - /* Condition (2) from above: if this is a - prefix, is this row's value size shorter - than the prefix? */ - if (templ->rec_field_is_prefix) { - ulint record_size = rec_offs_nth_size( - offsets, - secondary_index_field_no); - const dict_field_t *field = - dict_index_get_nth_field( - index, - secondary_index_field_no); - ut_a(field->prefix_len > 0); - if (record_size >= field->prefix_len - / templ->mbmaxlen) { - row_contains_all_values = false; - break; - } - } - } - /* If (1) and (2) were true for all columns above, use - rec_prefix_field_no instead of rec_field_no, and skip - the clustered lookup below. */ - if (row_contains_all_values) { - for (i = 0; i < prebuilt->n_template; i++) { - mysql_row_templ_t* templ = - prebuilt->mysql_template + i; - templ->rec_field_no = - templ->rec_prefix_field_no; - ut_a(templ->rec_field_no != ULINT_UNDEFINED); - } - use_clustered_index = false; - srv_stats.n_sec_rec_cluster_reads_avoided.inc(); - } - } - - if (use_clustered_index) { - -requires_clust_rec: - ut_ad(index != clust_index); - /* We use a 'goto' to the preceding label if a consistent - read of a secondary index record requires us to look up old - versions of the associated clustered index record. */ - - ut_ad(rec_offs_validate(rec, index, offsets)); - - /* It was a non-clustered index and we must fetch also the - clustered index record */ - - mtr_has_extra_clust_latch = TRUE; - - /* The following call returns 'offsets' associated with - 'clust_rec'. Note that 'clust_rec' can be an old version - built for a consistent read. */ - - err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, - thr, &clust_rec, - &offsets, &heap, &mtr); - switch (err) { - case DB_SUCCESS: - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(prebuilt->select_lock_type == LOCK_NONE); - - goto next_rec; - } - break; - case DB_SUCCESS_LOCKED_REC: - ut_a(clust_rec != NULL); - if (srv_locks_unsafe_for_binlog - || trx->isolation_level - <= TRX_ISO_READ_COMMITTED) { - /* Note that the clustered index record - was locked. */ - prebuilt->new_rec_locks = 2; - } - err = DB_SUCCESS; - break; - default: - goto lock_wait_or_error; - } - - if (rec_get_deleted_flag(clust_rec, comp)) { - - /* The record is delete marked: we can skip it */ - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* No need to keep a lock on a delete-marked - record if we do not want to use next-key - locking. */ - - row_unlock_for_mysql(prebuilt, TRUE); - } - - goto next_rec; - } - - result_rec = clust_rec; - ut_ad(rec_offs_validate(result_rec, clust_index, offsets)); - - if (prebuilt->idx_cond) { - /* Convert the record to MySQL format. We were - unable to do this in row_search_idx_cond_check(), - because the condition is on the secondary index - and the requested column is in the clustered index. - We convert all fields, including those that - may have been used in ICP, because the - secondary index may contain a column prefix - rather than the full column. Also, as noted - in Bug #56680, the column in the secondary - index may be in the wrong case, and the - authoritative case is in result_rec, the - appropriate version of the clustered index record. */ - if (!row_sel_store_mysql_rec( - buf, prebuilt, result_rec, - TRUE, clust_index, offsets)) { - goto next_rec; - } - } - } else { - result_rec = rec; - } - - /* We found a qualifying record 'result_rec'. At this point, - 'offsets' are associated with 'result_rec'. */ - - ut_ad(rec_offs_validate(result_rec, - result_rec != rec ? clust_index : index, - offsets)); - ut_ad(!rec_get_deleted_flag(result_rec, comp)); - - /* At this point, the clustered index record is protected - by a page latch that was acquired when pcur was positioned. - The latch will not be released until mtr_commit(&mtr). */ - - if ((match_mode == ROW_SEL_EXACT - || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD) - && prebuilt->select_lock_type == LOCK_NONE - && !prebuilt->templ_contains_blob - && !prebuilt->clust_index_was_generated - && !prebuilt->used_in_HANDLER - && !prebuilt->innodb_api - && prebuilt->template_type - != ROW_MYSQL_DUMMY_TEMPLATE - && !prebuilt->in_fts_query) { - - /* Inside an update, for example, we do not cache rows, - since we may use the cursor position to do the actual - update, that is why we require ...lock_type == LOCK_NONE. - Since we keep space in prebuilt only for the BLOBs of - a single row, we cannot cache rows in the case there - are BLOBs in the fields to be fetched. In HANDLER we do - not cache rows because there the cursor is a scrollable - cursor. */ - - ut_a(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); - - /* We only convert from InnoDB row format to MySQL row - format when ICP is disabled. */ - - if (!prebuilt->idx_cond) { - - /* We use next_buf to track the allocation of buffers - where we store and enqueue the buffers for our - pre-fetch optimisation. - - If next_buf == 0 then we store the converted record - directly into the MySQL record buffer (buf). If it is - != 0 then we allocate a pre-fetch buffer and store the - converted record there. - - If the conversion fails and the MySQL record buffer - was not written to then we reset next_buf so that - we can re-use the MySQL record buffer in the next - iteration. */ - - next_buf = next_buf - ? row_sel_fetch_last_buf(prebuilt) : buf; - - if (!row_sel_store_mysql_rec( - next_buf, prebuilt, result_rec, - result_rec != rec, - result_rec != rec ? clust_index : index, - offsets)) { - - if (next_buf == buf) { - ut_a(prebuilt->n_fetch_cached == 0); - next_buf = 0; - } - - /* Only fresh inserts may contain incomplete - externally stored columns. Pretend that such - records do not exist. Such records may only be - accessed at the READ UNCOMMITTED isolation - level or when rolling back a recovered - transaction. Rollback happens at a lower - level, not here. */ - goto next_rec; - } - - if (next_buf != buf) { - row_sel_enqueue_cache_row_for_mysql( - next_buf, prebuilt); - } - } else { - row_sel_enqueue_cache_row_for_mysql(buf, prebuilt); - } - - if (prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE) { - goto next_rec; - } - - } else { - if (UNIV_UNLIKELY - (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) { - /* CHECK TABLE: fetch the row */ - - if (result_rec != rec - && !prebuilt->need_to_access_clustered) { - /* We used 'offsets' for the clust - rec, recalculate them for 'rec' */ - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, - &heap); - result_rec = rec; - } - - memcpy(buf + 4, result_rec - - rec_offs_extra_size(offsets), - rec_offs_size(offsets)); - mach_write_to_4(buf, - rec_offs_extra_size(offsets) + 4); - } else if (!prebuilt->idx_cond && !prebuilt->innodb_api) { - /* The record was not yet converted to MySQL format. */ - if (!row_sel_store_mysql_rec( - buf, prebuilt, result_rec, - result_rec != rec, - result_rec != rec ? clust_index : index, - offsets)) { - /* Only fresh inserts may contain - incomplete externally stored - columns. Pretend that such records do - not exist. Such records may only be - accessed at the READ UNCOMMITTED - isolation level or when rolling back a - recovered transaction. Rollback - happens at a lower level, not here. */ - goto next_rec; - } - } - - if (prebuilt->clust_index_was_generated) { - row_sel_store_row_id_to_prebuilt( - prebuilt, result_rec, - result_rec == rec ? index : clust_index, - offsets); - } - } - - /* From this point on, 'offsets' are invalid. */ - - /* We have an optimization to save CPU time: if this is a consistent - read on a unique condition on the clustered index, then we do not - store the pcur position, because any fetch next or prev will anyway - return 'end of file'. Exceptions are locking reads and the MySQL - HANDLER command where the user can move the cursor with PREV or NEXT - even after a unique search. */ - - err = DB_SUCCESS; - -idx_cond_failed: - if (!unique_search - || !dict_index_is_clust(index) - || direction != 0 - || prebuilt->select_lock_type != LOCK_NONE - || prebuilt->used_in_HANDLER - || prebuilt->innodb_api) { - - /* Inside an update always store the cursor position */ - - btr_pcur_store_position(pcur, &mtr); - - if (prebuilt->innodb_api) { - prebuilt->innodb_api_rec = result_rec; - } - } - - goto normal_return; - -next_rec: - /* Reset the old and new "did semi-consistent read" flags. */ - if (UNIV_UNLIKELY(prebuilt->row_read_type - == ROW_READ_DID_SEMI_CONSISTENT)) { - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - } - did_semi_consistent_read = FALSE; - prebuilt->new_rec_locks = 0; - - /*-------------------------------------------------------------*/ - /* PHASE 5: Move the cursor to the next index record */ - - /* NOTE: For moves_up==FALSE, the mini-transaction will be - committed and restarted every time when switching b-tree - pages. For moves_up==TRUE in index condition pushdown, we can - scan an entire secondary index tree within a single - mini-transaction. As long as the prebuilt->idx_cond does not - match, we do not need to consult the clustered index or - return records to MySQL, and thus we can avoid repositioning - the cursor. What prevents us from buffer-fixing all leaf pages - within the mini-transaction is the btr_leaf_page_release() - call in btr_pcur_move_to_next_page(). Only the leaf page where - the cursor is positioned will remain buffer-fixed. */ - - if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) { - /* We must commit mtr if we are moving to the next - non-clustered index record, because we could break the - latching order if we would access a different clustered - index page right away without releasing the previous. */ - - btr_pcur_store_position(pcur, &mtr); - - mtr_commit(&mtr); - mtr_has_extra_clust_latch = FALSE; - - mtr_start_trx(&mtr, trx); - if (sel_restore_position_for_mysql(&same_user_rec, - BTR_SEARCH_LEAF, - pcur, moves_up, &mtr)) { -#ifdef UNIV_SEARCH_DEBUG - cnt++; -#endif /* UNIV_SEARCH_DEBUG */ - - goto rec_loop; - } - } - - if (moves_up) { - if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) { -not_moved: - btr_pcur_store_position(pcur, &mtr); - - if (match_mode != 0) { - err = DB_RECORD_NOT_FOUND; - } else { - err = DB_END_OF_INDEX; - } - - goto normal_return; - } - } else { - if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) { - goto not_moved; - } - } - -#ifdef UNIV_SEARCH_DEBUG - cnt++; -#endif /* UNIV_SEARCH_DEBUG */ - - goto rec_loop; - -lock_wait_or_error: - /* Reset the old and new "did semi-consistent read" flags. */ - if (UNIV_UNLIKELY(prebuilt->row_read_type - == ROW_READ_DID_SEMI_CONSISTENT)) { - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - } - did_semi_consistent_read = FALSE; - - /*-------------------------------------------------------------*/ - - if (rec) { - btr_pcur_store_position(pcur, &mtr); - } - -lock_table_wait: - mtr_commit(&mtr); - mtr_has_extra_clust_latch = FALSE; - - trx->error_state = err; - - /* The following is a patch for MySQL */ - - que_thr_stop_for_mysql(thr); - - thr->lock_state = QUE_THR_LOCK_ROW; - - if (row_mysql_handle_errors(&err, trx, thr, NULL)) { - /* It was a lock wait, and it ended */ - - thr->lock_state = QUE_THR_LOCK_NOLOCK; - mtr_start_trx(&mtr, trx); - - /* Table lock waited, go try to obtain table lock - again */ - if (table_lock_waited) { - table_lock_waited = FALSE; - - goto wait_table_again; - } - - sel_restore_position_for_mysql(&same_user_rec, - BTR_SEARCH_LEAF, pcur, - moves_up, &mtr); - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && !same_user_rec) { - - /* Since we were not able to restore the cursor - on the same user record, we cannot use - row_unlock_for_mysql() to unlock any records, and - we must thus reset the new rec lock info. Since - in lock0lock.cc we have blocked the inheriting of gap - X-locks, we actually do not have any new record locks - set in this case. - - Note that if we were able to restore on the 'same' - user record, it is still possible that we were actually - waiting on a delete-marked record, and meanwhile - it was removed by purge and inserted again by some - other user. But that is no problem, because in - rec_loop we will again try to set a lock, and - new_rec_lock_info in trx will be right at the end. */ - - prebuilt->new_rec_locks = 0; - } - - mode = pcur->search_mode; - - goto rec_loop; - } - - thr->lock_state = QUE_THR_LOCK_NOLOCK; - -#ifdef UNIV_SEARCH_DEBUG - /* fputs("Using ", stderr); - dict_index_name_print(stderr, index); - fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */ -#endif /* UNIV_SEARCH_DEBUG */ - goto func_exit; - -normal_return: - /*-------------------------------------------------------------*/ - que_thr_stop_for_mysql_no_error(thr, trx); - - mtr_commit(&mtr); - - if (prebuilt->idx_cond != 0) { - - /* When ICP is active we don't write to the MySQL buffer - directly, only to buffers that are enqueued in the pre-fetch - queue. We need to dequeue the first buffer and copy the contents - to the record buffer that was passed in by MySQL. */ - - if (prebuilt->n_fetch_cached > 0) { - row_sel_dequeue_cached_row_for_mysql(buf, prebuilt); - err = DB_SUCCESS; - } - - } else if (next_buf != 0) { - - /* We may or may not have enqueued some buffers to the - pre-fetch queue, but we definitely wrote to the record - buffer passed to use by MySQL. */ - - DEBUG_SYNC_C("row_search_cached_row"); - err = DB_SUCCESS; - } - -#ifdef UNIV_SEARCH_DEBUG - /* fputs("Using ", stderr); - dict_index_name_print(stderr, index); - fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */ -#endif /* UNIV_SEARCH_DEBUG */ - -func_exit: - trx->op_info = ""; - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - /* Set or reset the "did semi-consistent read" flag on return. - The flag did_semi_consistent_read is set if and only if - the record being returned was fetched with a semi-consistent read. */ - ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS - || !did_semi_consistent_read); - - if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) { - if (UNIV_UNLIKELY(did_semi_consistent_read)) { - prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT; - } else { - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - } - } - - ut_ad(!trx->has_search_latch); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!btr_search_own_any()); - ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); -#endif /* UNIV_SYNC_DEBUG */ - - DEBUG_SYNC_C("innodb_row_search_for_mysql_exit"); - - return(err); -} - -/*******************************************************************//** -Checks if MySQL at the moment is allowed for this table to retrieve a -consistent read result, or store it to the query cache. -@return TRUE if storing or retrieving from the query cache is permitted */ -UNIV_INTERN -ibool -row_search_check_if_query_cache_permitted( -/*======================================*/ - trx_t* trx, /*!< in: transaction object */ - const char* norm_name) /*!< in: concatenation of database name, - '/' char, table name */ -{ - dict_table_t* table; - ibool ret = FALSE; - - /* Disable query cache altogether for all tables if recovered XA - transactions in prepared state exist. This is because we do not - restore the table locks for those transactions and we may wrongly - set ret=TRUE above if "lock_table_get_n_locks(table) == 0". See - "Bug#14658648 XA ROLLBACK (DISTRIBUTED DATABASE) NOT WORKING WITH - QUERY CACHE ENABLED". - Read trx_sys->n_prepared_recovered_trx without mutex protection, - not possible to end up with a torn read since n_prepared_recovered_trx - is word size. */ - if (trx_sys->n_prepared_recovered_trx > 0) { - - return(FALSE); - } - - table = dict_table_open_on_name(norm_name, FALSE, FALSE, - DICT_ERR_IGNORE_NONE); - - if (table == NULL) { - - return(FALSE); - } - - /* Start the transaction if it is not started yet */ - - trx_start_if_not_started(trx); - - /* If there are locks on the table or some trx has invalidated the - cache up to our trx id, then ret = FALSE. - We do not check what type locks there are on the table, though only - IX type locks actually would require ret = FALSE. */ - - if (lock_table_get_n_locks(table) == 0 - && trx->id >= table->query_cache_inv_trx_id) { - - ret = TRUE; - - /* If the isolation level is high, assign a read view for the - transaction if it does not yet have one */ - - if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ - && !trx->read_view) { - - trx->read_view = - read_view_open_now(trx->id, - trx->prebuilt_view); - trx->global_read_view = trx->read_view; - } - } - - dict_table_close(table, FALSE, FALSE); - - return(ret); -} - -/*******************************************************************//** -Read the AUTOINC column from the current row. If the value is less than -0 and the type is not unsigned then we reset the value to 0. -@return value read from the column */ -static -ib_uint64_t -row_search_autoinc_read_column( -/*===========================*/ - dict_index_t* index, /*!< in: index to read from */ - const rec_t* rec, /*!< in: current rec */ - ulint col_no, /*!< in: column number */ - ulint mtype, /*!< in: column main type */ - ibool unsigned_type) /*!< in: signed or unsigned flag */ -{ - ulint len; - const byte* data; - ib_uint64_t value; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - - rec_offs_init(offsets_); - - offsets = rec_get_offsets(rec, index, offsets, col_no + 1, &heap); - - if (rec_offs_nth_sql_null(offsets, col_no)) { - /* There is no non-NULL value in the auto-increment column. */ - value = 0; - goto func_exit; - } - - data = rec_get_nth_field(rec, offsets, col_no, &len); - - switch (mtype) { - case DATA_INT: - ut_a(len <= sizeof value); - value = mach_read_int_type(data, len, unsigned_type); - break; - - case DATA_FLOAT: - ut_a(len == sizeof(float)); - value = (ib_uint64_t) mach_float_read(data); - break; - - case DATA_DOUBLE: - ut_a(len == sizeof(double)); - value = (ib_uint64_t) mach_double_read(data); - break; - - default: - ut_error; - } - - if (!unsigned_type && (ib_int64_t) value < 0) { - value = 0; - } - -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return(value); -} - -/** Get the maximum and non-delete-marked record in an index. -@param[in] index index tree -@param[in,out] mtr mini-transaction (may be committed and restarted) -@return maximum record, page s-latched in mtr -@retval NULL if there are no records, or if all of them are delete-marked */ -static -const rec_t* -row_search_get_max_rec( - dict_index_t* index, - mtr_t* mtr) -{ - btr_pcur_t pcur; - const rec_t* rec; - /* Open at the high/right end (false), and init cursor */ - btr_pcur_open_at_index_side( - false, index, BTR_SEARCH_LEAF, &pcur, true, 0, mtr); - - do { - const page_t* page; - - page = btr_pcur_get_page(&pcur); - rec = page_find_rec_max_not_deleted(page); - - if (page_rec_is_user_rec(rec)) { - break; - } else { - rec = NULL; - } - btr_pcur_move_before_first_on_page(&pcur); - } while (btr_pcur_move_to_prev(&pcur, mtr)); - - btr_pcur_close(&pcur); - - return(rec); -} - -/*******************************************************************//** -Read the max AUTOINC value from an index. -@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if -column name can't be found in index */ -UNIV_INTERN -dberr_t -row_search_max_autoinc( -/*===================*/ - dict_index_t* index, /*!< in: index to search */ - const char* col_name, /*!< in: name of autoinc column */ - ib_uint64_t* value) /*!< out: AUTOINC value read */ -{ - dict_field_t* dfield = dict_index_get_nth_field(index, 0); - dberr_t error = DB_SUCCESS; - *value = 0; - - if (strcmp(col_name, dfield->name) != 0) { - error = DB_RECORD_NOT_FOUND; - } else { - mtr_t mtr; - const rec_t* rec; - - mtr_start(&mtr); - - rec = row_search_get_max_rec(index, &mtr); - - if (rec != NULL) { - ibool unsigned_type = ( - dfield->col->prtype & DATA_UNSIGNED); - - *value = row_search_autoinc_read_column( - index, rec, 0, - dfield->col->mtype, unsigned_type); - } - - mtr_commit(&mtr); - } - - return(error); -} diff --git a/storage/xtradb/row/row0uins.cc b/storage/xtradb/row/row0uins.cc deleted file mode 100644 index f14a4ef9bcf..00000000000 --- a/storage/xtradb/row/row0uins.cc +++ /dev/null @@ -1,475 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0uins.cc -Fresh insert undo - -Created 2/25/1997 Heikki Tuuri -*******************************************************/ - -#include "row0uins.h" - -#ifdef UNIV_NONINL -#include "row0uins.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "dict0crea.h" -#include "trx0undo.h" -#include "trx0roll.h" -#include "btr0btr.h" -#include "mach0data.h" -#include "row0undo.h" -#include "row0vers.h" -#include "row0log.h" -#include "trx0trx.h" -#include "trx0rec.h" -#include "row0row.h" -#include "row0upd.h" -#include "que0que.h" -#include "ibuf0ibuf.h" -#include "log0log.h" - -/************************************************************************* -IMPORTANT NOTE: Any operation that generates redo MUST check that there -is enough space in the redo log before for that operation. This is -done by calling log_free_check(). The reason for checking the -availability of the redo log space before the start of the operation is -that we MUST not hold any synchonization objects when performing the -check. -If you make a change in this module make sure that no codepath is -introduced where a call to log_free_check() is bypassed. */ - -/***************************************************************//** -Removes a clustered index record. The pcur in node was positioned on the -record, now it is detached. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_ins_remove_clust_rec( -/*==========================*/ - undo_node_t* node) /*!< in: undo node */ -{ - btr_cur_t* btr_cur; - ibool success; - dberr_t err; - ulint n_tries = 0; - mtr_t mtr; - dict_index_t* index = node->pcur.btr_cur.index; - bool online; - - ut_ad(dict_index_is_clust(index)); - - mtr_start(&mtr); - - /* This is similar to row_undo_mod_clust(). The DDL thread may - already have copied this row from the log to the new table. - We must log the removal, so that the row will be correctly - purged. However, we can log the removal out of sync with the - B-tree modification. */ - - online = dict_index_is_online_ddl(index); - if (online) { - ut_ad(node->trx->dict_operation_lock_mode - != RW_X_LATCH); - ut_ad(node->table->id != DICT_INDEXES_ID); - mtr_s_lock(dict_index_get_lock(index), &mtr); - } - - success = btr_pcur_restore_position( - online - ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED - : BTR_MODIFY_LEAF, &node->pcur, &mtr); - ut_a(success); - - btr_cur = btr_pcur_get_btr_cur(&node->pcur); - - ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index) - == node->trx->id); - - if (online && dict_index_is_online_ddl(index)) { - const rec_t* rec = btr_cur_get_rec(btr_cur); - mem_heap_t* heap = NULL; - const ulint* offsets = rec_get_offsets( - rec, index, NULL, ULINT_UNDEFINED, &heap); - row_log_table_delete(rec, index, offsets, NULL); - mem_heap_free(heap); - } - - if (node->table->id == DICT_INDEXES_ID) { - ut_ad(!online); - ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH); - - /* Drop the index tree associated with the row in - SYS_INDEXES table: */ - - dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr); - - mtr_commit(&mtr); - - mtr_start(&mtr); - - success = btr_pcur_restore_position( - BTR_MODIFY_LEAF, &node->pcur, &mtr); - ut_a(success); - } - - if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) { - err = DB_SUCCESS; - goto func_exit; - } - - btr_pcur_commit_specify_mtr(&node->pcur, &mtr); -retry: - /* If did not succeed, try pessimistic descent to tree */ - mtr_start(&mtr); - - success = btr_pcur_restore_position(BTR_MODIFY_TREE, - &(node->pcur), &mtr); - ut_a(success); - - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, - trx_is_recv(node->trx) - ? RB_RECOVERY - : RB_NORMAL, &mtr); - - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - - if (err == DB_OUT_OF_FILE_SPACE - && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { - - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); - - n_tries++; - - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); - - goto retry; - } - -func_exit: - btr_pcur_commit_specify_mtr(&node->pcur, &mtr); - trx_undo_rec_release(node->trx, node->undo_no); - - return(err); -} - -/***************************************************************//** -Removes a secondary index entry if found. -@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_ins_remove_sec_low( -/*========================*/ - ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether we wish optimistic or - pessimistic descent down the index tree */ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry) /*!< in: index entry to remove */ -{ - btr_pcur_t pcur; - btr_cur_t* btr_cur; - dberr_t err = DB_SUCCESS; - mtr_t mtr; - enum row_search_result search_result; - - log_free_check(); - - mtr_start(&mtr); - - if (mode == BTR_MODIFY_LEAF) { - mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED; - mtr_s_lock(dict_index_get_lock(index), &mtr); - } else { - ut_ad(mode == BTR_MODIFY_TREE); - mtr_x_lock(dict_index_get_lock(index), &mtr); - } - - if (row_log_online_op_try(index, entry, 0)) { - goto func_exit_no_pcur; - } - - search_result = row_search_index_entry(index, entry, mode, - &pcur, &mtr); - - switch (search_result) { - case ROW_NOT_FOUND: - goto func_exit; - case ROW_FOUND: - break; - case ROW_BUFFERED: - case ROW_NOT_DELETED_REF: - /* These are invalid outcomes, because the mode passed - to row_search_index_entry() did not include any of the - flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ - ut_error; - } - - btr_cur = btr_pcur_get_btr_cur(&pcur); - - if (mode != BTR_MODIFY_TREE) { - err = btr_cur_optimistic_delete(btr_cur, 0, &mtr) - ? DB_SUCCESS : DB_FAIL; - } else { - /* No need to distinguish RB_RECOVERY here, because we - are deleting a secondary index record: the distinction - between RB_NORMAL and RB_RECOVERY only matters when - deleting a record that contains externally stored - columns. */ - ut_ad(!dict_index_is_clust(index)); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, - RB_NORMAL, &mtr); - } -func_exit: - btr_pcur_close(&pcur); -func_exit_no_pcur: - mtr_commit(&mtr); - - return(err); -} - -/***************************************************************//** -Removes a secondary index entry from the index if found. Tries first -optimistic, then pessimistic descent down the tree. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_ins_remove_sec( -/*====================*/ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry) /*!< in: index entry to insert */ -{ - dberr_t err; - ulint n_tries = 0; - - /* Try first optimistic descent to the B-tree */ - - err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry); - - if (err == DB_SUCCESS) { - - return(err); - } - - /* Try then pessimistic descent to the B-tree */ -retry: - err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry); - - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - - if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { - - n_tries++; - - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); - - goto retry; - } - - return(err); -} - -/***********************************************************//** -Parses the row reference and other info in a fresh insert undo record. */ -static -void -row_undo_ins_parse_undo_rec( -/*========================*/ - undo_node_t* node, /*!< in/out: row undo node */ - ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */ -{ - dict_index_t* clust_index; - byte* ptr; - undo_no_t undo_no; - table_id_t table_id; - ulint type; - ulint dummy; - bool dummy_extern; - - ut_ad(node); - - ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, - &dummy_extern, &undo_no, &table_id); - ut_ad(type == TRX_UNDO_INSERT_REC); - node->rec_type = type; - - node->update = NULL; - node->table = dict_table_open_on_id( - table_id, dict_locked, DICT_TABLE_OP_NORMAL); - - /* Skip the UNDO if we can't find the table or the .ibd file. */ - if (UNIV_UNLIKELY(node->table == NULL)) { - } else if (UNIV_UNLIKELY(node->table->file_unreadable)) { -close_table: - dict_table_close(node->table, dict_locked, FALSE); - node->table = NULL; - } else { - clust_index = dict_table_get_first_index(node->table); - - if (clust_index != NULL) { - trx_undo_rec_get_row_ref( - ptr, clust_index, &node->ref, node->heap); - - if (!row_undo_search_clust_to_pcur(node)) { - goto close_table; - } - - } else { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: table "); - ut_print_name(stderr, node->trx, TRUE, - node->table->name); - fprintf(stderr, " has no indexes, " - "ignoring the table\n"); - goto close_table; - } - } -} - -/***************************************************************//** -Removes secondary index records. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_ins_remove_sec_rec( -/*========================*/ - undo_node_t* node) /*!< in/out: row undo node */ -{ - dberr_t err = DB_SUCCESS; - dict_index_t* index = node->index; - mem_heap_t* heap; - - heap = mem_heap_create(1024); - - while (index != NULL) { - dtuple_t* entry; - - if (index->type & DICT_FTS) { - dict_table_next_uncorrupted_index(index); - continue; - } - - /* An insert undo record TRX_UNDO_INSERT_REC will - always contain all fields of the index. It does not - matter if any indexes were created afterwards; all - index entries can be reconstructed from the row. */ - entry = row_build_index_entry( - node->row, node->ext, index, heap); - if (UNIV_UNLIKELY(!entry)) { - /* The database must have crashed after - inserting a clustered index record but before - writing all the externally stored columns of - that record, or a statement is being rolled - back because an error occurred while storing - off-page columns. - - Because secondary index entries are inserted - after the clustered index record, we may - assume that the secondary index record does - not exist. */ - } else { - err = row_undo_ins_remove_sec(index, entry); - - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - goto func_exit; - } - } - - mem_heap_empty(heap); - dict_table_next_uncorrupted_index(index); - } - -func_exit: - node->index = index; - mem_heap_free(heap); - return(err); -} - -/***********************************************************//** -Undoes a fresh insert of a row to a table. A fresh insert means that -the same clustered index unique key did not have any record, even delete -marked, at the time of the insert. InnoDB is eager in a rollback: -if it figures out that an index record will be removed in the purge -anyway, it will remove it in the rollback. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -UNIV_INTERN -dberr_t -row_undo_ins( -/*=========*/ - undo_node_t* node) /*!< in: row undo node */ -{ - dberr_t err; - ibool dict_locked; - - ut_ad(node->state == UNDO_NODE_INSERT); - - dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH; - - row_undo_ins_parse_undo_rec(node, dict_locked); - - if (node->table == NULL) { - trx_undo_rec_release(node->trx, node->undo_no); - - return(DB_SUCCESS); - } - - /* Iterate over all the indexes and undo the insert.*/ - - node->index = dict_table_get_first_index(node->table); - ut_ad(dict_index_is_clust(node->index)); - /* Skip the clustered index (the first index) */ - node->index = dict_table_get_next_index(node->index); - - dict_table_skip_corrupt_index(node->index); - - err = row_undo_ins_remove_sec_rec(node); - - if (err == DB_SUCCESS) { - - log_free_check(); - - if (node->table->id == DICT_INDEXES_ID) { - - if (!dict_locked) { - mutex_enter(&dict_sys->mutex); - } - } - - // FIXME: We need to update the dict_index_t::space and - // page number fields too. - err = row_undo_ins_remove_clust_rec(node); - - if (node->table->id == DICT_INDEXES_ID - && !dict_locked) { - - mutex_exit(&dict_sys->mutex); - } - } - - dict_table_close(node->table, dict_locked, FALSE); - - node->table = NULL; - - return(err); -} diff --git a/storage/xtradb/row/row0umod.cc b/storage/xtradb/row/row0umod.cc deleted file mode 100644 index 8deba4f00a5..00000000000 --- a/storage/xtradb/row/row0umod.cc +++ /dev/null @@ -1,1168 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0umod.cc -Undo modify of a row - -Created 2/27/1997 Heikki Tuuri -*******************************************************/ - -#include "row0umod.h" - -#ifdef UNIV_NONINL -#include "row0umod.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "trx0undo.h" -#include "trx0roll.h" -#include "btr0btr.h" -#include "mach0data.h" -#include "row0undo.h" -#include "row0vers.h" -#include "row0log.h" -#include "trx0trx.h" -#include "trx0rec.h" -#include "row0row.h" -#include "row0upd.h" -#include "que0que.h" -#include "log0log.h" - -/* Considerations on undoing a modify operation. -(1) Undoing a delete marking: all index records should be found. Some of -them may have delete mark already FALSE, if the delete mark operation was -stopped underway, or if the undo operation ended prematurely because of a -system crash. -(2) Undoing an update of a delete unmarked record: the newer version of -an updated secondary index entry should be removed if no prior version -of the clustered index record requires its existence. Otherwise, it should -be delete marked. -(3) Undoing an update of a delete marked record. In this kind of update a -delete marked clustered index record was delete unmarked and possibly also -some of its fields were changed. Now, it is possible that the delete marked -version has become obsolete at the time the undo is started. */ - -/************************************************************************* -IMPORTANT NOTE: Any operation that generates redo MUST check that there -is enough space in the redo log before for that operation. This is -done by calling log_free_check(). The reason for checking the -availability of the redo log space before the start of the operation is -that we MUST not hold any synchonization objects when performing the -check. -If you make a change in this module make sure that no codepath is -introduced where a call to log_free_check() is bypassed. */ - -/***********************************************************//** -Undoes a modify in a clustered index record. -@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_clust_low( -/*===================*/ - undo_node_t* node, /*!< in: row undo node */ - ulint** offsets,/*!< out: rec_get_offsets() on the record */ - mem_heap_t** offsets_heap, - /*!< in/out: memory heap that can be emptied */ - mem_heap_t* heap, /*!< in/out: memory heap */ - const dtuple_t**rebuilt_old_pk, - /*!< out: row_log_table_get_pk() - before the update, or NULL if - the table is not being rebuilt online or - the PRIMARY KEY definition does not change */ - byte* sys, /*!< out: DB_TRX_ID, DB_ROLL_PTR - for row_log_table_delete() */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr, /*!< in: mtr; must be committed before - latching any further pages */ - ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ -{ - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - dberr_t err; -#ifdef UNIV_DEBUG - ibool success; -#endif /* UNIV_DEBUG */ - - pcur = &node->pcur; - btr_cur = btr_pcur_get_btr_cur(pcur); - -#ifdef UNIV_DEBUG - success = -#endif /* UNIV_DEBUG */ - btr_pcur_restore_position(mode, pcur, mtr); - - ut_ad(success); - ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), - btr_cur_get_index(btr_cur)) - == thr_get_trx(thr)->id); - - if (mode != BTR_MODIFY_LEAF - && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) { - *rebuilt_old_pk = row_log_table_get_pk( - btr_cur_get_rec(btr_cur), - btr_cur_get_index(btr_cur), NULL, sys, &heap); - } else { - *rebuilt_old_pk = NULL; - } - - if (mode != BTR_MODIFY_TREE) { - ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF); - - err = btr_cur_optimistic_update( - BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG - | BTR_KEEP_SYS_FLAG, - btr_cur, offsets, offsets_heap, - node->update, node->cmpl_info, - thr, thr_get_trx(thr)->id, mtr); - } else { - big_rec_t* dummy_big_rec; - - err = btr_cur_pessimistic_update( - BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG - | BTR_KEEP_SYS_FLAG, - btr_cur, offsets, offsets_heap, heap, - &dummy_big_rec, node->update, - node->cmpl_info, thr, thr_get_trx(thr)->id, mtr); - - ut_a(!dummy_big_rec); - } - - return(err); -} - -/***********************************************************//** -Purges a clustered index record after undo if possible. -This is attempted when the record was inserted by updating a -delete-marked record and there no longer exist transactions -that would see the delete-marked record. -@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_remove_clust_low( -/*==========================*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr, /*!< in/out: mini-transaction */ - ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ -{ - btr_cur_t* btr_cur; - dberr_t err; - ulint trx_id_offset; - - ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); - - /* Find out if the record has been purged already - or if we can remove it. */ - - if (!btr_pcur_restore_position(mode, &node->pcur, mtr) - || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { - - return(DB_SUCCESS); - } - - btr_cur = btr_pcur_get_btr_cur(&node->pcur); - - trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset; - - if (!trx_id_offset) { - mem_heap_t* heap = NULL; - ulint trx_id_col; - const ulint* offsets; - ulint len; - - trx_id_col = dict_index_get_sys_col_pos( - btr_cur_get_index(btr_cur), DATA_TRX_ID); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); - - offsets = rec_get_offsets( - btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur), - NULL, trx_id_col + 1, &heap); - - trx_id_offset = rec_get_nth_field_offs( - offsets, trx_id_col, &len); - ut_ad(len == DATA_TRX_ID_LEN); - mem_heap_free(heap); - } - - if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset) - != node->new_trx_id) { - /* The record must have been purged and then replaced - with a different one. */ - return(DB_SUCCESS); - } - - /* We are about to remove an old, delete-marked version of the - record that may have been delete-marked by a different transaction - than the rolling-back one. */ - ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur), - dict_table_is_comp(node->table))); - - if (mode == BTR_MODIFY_LEAF) { - err = btr_cur_optimistic_delete(btr_cur, 0, mtr) - ? DB_SUCCESS - : DB_FAIL; - } else { - ut_ad(mode == BTR_MODIFY_TREE); - - /* This operation is analogous to purge, we can free also - inherited externally stored fields */ - - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, - thr_is_recv(thr) - ? RB_RECOVERY_PURGE_REC - : RB_NONE, mtr); - - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - } - - return(err); -} - -/***********************************************************//** -Undoes a modify in a clustered index record. Sets also the node state for the -next round of undo. -@return DB_SUCCESS or error code: we may run out of file space */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_clust( -/*===============*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr) /*!< in: query thread */ -{ - btr_pcur_t* pcur; - mtr_t mtr; - dberr_t err; - dict_index_t* index; - bool online; - - ut_ad(thr_get_trx(thr) == node->trx); - ut_ad(node->trx->dict_operation_lock_mode); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED) - || rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - log_free_check(); - pcur = &node->pcur; - index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur)); - - mtr_start_trx(&mtr, thr_get_trx(thr)); - - online = dict_index_is_online_ddl(index); - if (online) { - ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH); - mtr_s_lock(dict_index_get_lock(index), &mtr); - } - - mem_heap_t* heap = mem_heap_create(1024); - mem_heap_t* offsets_heap = NULL; - ulint* offsets = NULL; - const dtuple_t* rebuilt_old_pk; - byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]; - - /* Try optimistic processing of the record, keeping changes within - the index page */ - - err = row_undo_mod_clust_low(node, &offsets, &offsets_heap, - heap, &rebuilt_old_pk, sys, - thr, &mtr, online - ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED - : BTR_MODIFY_LEAF); - - if (err != DB_SUCCESS) { - btr_pcur_commit_specify_mtr(pcur, &mtr); - - /* We may have to modify tree structure: do a pessimistic - descent down the index tree */ - - mtr_start_trx(&mtr, thr_get_trx(thr)); - - err = row_undo_mod_clust_low( - node, &offsets, &offsets_heap, - heap, &rebuilt_old_pk, sys, - thr, &mtr, BTR_MODIFY_TREE); - ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE); - } - - /* Online rebuild cannot be initiated while we are holding - dict_operation_lock and index->lock. (It can be aborted.) */ - ut_ad(online || !dict_index_is_online_ddl(index)); - - if (err == DB_SUCCESS && online) { -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED) - || rw_lock_own(&index->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - switch (node->rec_type) { - case TRX_UNDO_DEL_MARK_REC: - row_log_table_insert( - btr_pcur_get_rec(pcur), index, offsets); - break; - case TRX_UNDO_UPD_EXIST_REC: - row_log_table_update( - btr_pcur_get_rec(pcur), index, offsets, - rebuilt_old_pk); - break; - case TRX_UNDO_UPD_DEL_REC: - row_log_table_delete( - btr_pcur_get_rec(pcur), index, offsets, sys); - break; - default: - ut_ad(0); - break; - } - } - - /** - * when scrubbing, and records gets cleared, - * the transaction id is not present afterwards. - * this is safe as: since the record is on free-list - * it can be reallocated at any time after this mtr-commits - * which is just below - */ - ut_ad(srv_immediate_scrub_data_uncompressed || - rec_get_trx_id(btr_pcur_get_rec(pcur), index) == node->new_trx_id); - - btr_pcur_commit_specify_mtr(pcur, &mtr); - - if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) { - - mtr_start_trx(&mtr, thr_get_trx(thr)); - - /* It is not necessary to call row_log_table, - because the record is delete-marked and would thus - be omitted from the rebuilt copy of the table. */ - err = row_undo_mod_remove_clust_low( - node, thr, &mtr, BTR_MODIFY_LEAF); - if (err != DB_SUCCESS) { - btr_pcur_commit_specify_mtr(pcur, &mtr); - - /* We may have to modify tree structure: do a - pessimistic descent down the index tree */ - - mtr_start_trx(&mtr, thr_get_trx(thr)); - - err = row_undo_mod_remove_clust_low(node, thr, &mtr, - BTR_MODIFY_TREE); - - ut_ad(err == DB_SUCCESS - || err == DB_OUT_OF_FILE_SPACE); - } - - btr_pcur_commit_specify_mtr(pcur, &mtr); - } - - node->state = UNDO_NODE_FETCH_NEXT; - - trx_undo_rec_release(node->trx, node->undo_no); - - if (offsets_heap) { - mem_heap_free(offsets_heap); - } - mem_heap_free(heap); - return(err); -} - -/***********************************************************//** -Delete marks or removes a secondary index entry if found. -@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_del_mark_or_remove_sec_low( -/*====================================*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr, /*!< in: query thread */ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry, /*!< in: index entry */ - ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or - BTR_MODIFY_TREE */ -{ - btr_pcur_t pcur; - btr_cur_t* btr_cur; - ibool success; - ibool old_has; - dberr_t err = DB_SUCCESS; - mtr_t mtr; - mtr_t mtr_vers; - enum row_search_result search_result; - - log_free_check(); - mtr_start_trx(&mtr, thr_get_trx(thr)); - - if (*index->name == TEMP_INDEX_PREFIX) { - /* The index->online_status may change if the - index->name starts with TEMP_INDEX_PREFIX (meaning - that the index is or was being created online). It is - protected by index->lock. */ - if (mode == BTR_MODIFY_LEAF) { - mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED; - mtr_s_lock(dict_index_get_lock(index), &mtr); - } else { - ut_ad(mode == BTR_MODIFY_TREE); - mtr_x_lock(dict_index_get_lock(index), &mtr); - } - - if (row_log_online_op_try(index, entry, 0)) { - goto func_exit_no_pcur; - } - } else { - /* For secondary indexes, - index->online_status==ONLINE_INDEX_CREATION unless - index->name starts with TEMP_INDEX_PREFIX. */ - ut_ad(!dict_index_is_online_ddl(index)); - } - - btr_cur = btr_pcur_get_btr_cur(&pcur); - - search_result = row_search_index_entry(index, entry, mode, - &pcur, &mtr); - - switch (UNIV_EXPECT(search_result, ROW_FOUND)) { - case ROW_NOT_FOUND: - /* In crash recovery, the secondary index record may - be missing if the UPDATE did not have time to insert - the secondary index records before the crash. When we - are undoing that UPDATE in crash recovery, the record - may be missing. - - In normal processing, if an update ends in a deadlock - before it has inserted all updated secondary index - records, then the undo will not find those records. */ - goto func_exit; - case ROW_FOUND: - break; - case ROW_BUFFERED: - case ROW_NOT_DELETED_REF: - /* These are invalid outcomes, because the mode passed - to row_search_index_entry() did not include any of the - flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ - ut_error; - } - - /* We should remove the index record if no prior version of the row, - which cannot be purged yet, requires its existence. If some requires, - we should delete mark the record. */ - - mtr_start_trx(&mtr_vers, thr_get_trx(thr)); - - success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur), - &mtr_vers); - ut_a(success); - - old_has = row_vers_old_has_index_entry(FALSE, - btr_pcur_get_rec(&(node->pcur)), - &mtr_vers, index, entry); - if (old_has) { - err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, &mtr); - ut_ad(err == DB_SUCCESS); - } else { - /* Remove the index record */ - - if (mode != BTR_MODIFY_TREE) { - success = btr_cur_optimistic_delete(btr_cur, 0, &mtr); - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } - } else { - /* No need to distinguish RB_RECOVERY_PURGE here, - because we are deleting a secondary index record: - the distinction between RB_NORMAL and - RB_RECOVERY_PURGE only matters when deleting a - record that contains externally stored - columns. */ - ut_ad(!dict_index_is_clust(index)); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, - RB_NORMAL, &mtr); - - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - } - } - - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers); - -func_exit: - btr_pcur_close(&pcur); -func_exit_no_pcur: - mtr_commit(&mtr); - - return(err); -} - -/***********************************************************//** -Delete marks or removes a secondary index entry if found. -NOTE that if we updated the fields of a delete-marked secondary index record -so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot -return to the original values because we do not know them. But this should -not cause problems because in row0sel.cc, in queries we always retrieve the -clustered index record or an earlier version of it, if the secondary index -record through which we do the search is delete-marked. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_del_mark_or_remove_sec( -/*================================*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr, /*!< in: query thread */ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry) /*!< in: index entry */ -{ - dberr_t err; - - err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index, - entry, BTR_MODIFY_LEAF); - if (err == DB_SUCCESS) { - - return(err); - } - - err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index, - entry, BTR_MODIFY_TREE); - return(err); -} - -/***********************************************************//** -Delete unmarks a secondary index entry which must be found. It might not be -delete-marked at the moment, but it does not harm to unmark it anyway. We also -need to update the fields of the secondary index record if we updated its -fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. -@retval DB_SUCCESS on success -@retval DB_FAIL if BTR_MODIFY_TREE should be tried -@retval DB_OUT_OF_FILE_SPACE when running out of tablespace -@retval DB_DUPLICATE_KEY if the value was missing - and an insert would lead to a duplicate exists */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_del_unmark_sec_and_undo_update( -/*========================================*/ - ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or - BTR_MODIFY_TREE */ - que_thr_t* thr, /*!< in: query thread */ - dict_index_t* index, /*!< in: index */ - dtuple_t* entry) /*!< in: index entry */ -{ - btr_pcur_t pcur; - btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); - upd_t* update; - dberr_t err = DB_SUCCESS; - big_rec_t* dummy_big_rec; - mtr_t mtr; - trx_t* trx = thr_get_trx(thr); - const ulint flags - = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG; - enum row_search_result search_result; - - ut_ad(trx->id); - - log_free_check(); - mtr_start_trx(&mtr, thr_get_trx(thr)); - - if (*index->name == TEMP_INDEX_PREFIX) { - /* The index->online_status may change if the - index->name starts with TEMP_INDEX_PREFIX (meaning - that the index is or was being created online). It is - protected by index->lock. */ - if (mode == BTR_MODIFY_LEAF) { - mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED; - mtr_s_lock(dict_index_get_lock(index), &mtr); - } else { - ut_ad(mode == BTR_MODIFY_TREE); - mtr_x_lock(dict_index_get_lock(index), &mtr); - } - - if (row_log_online_op_try(index, entry, trx->id)) { - goto func_exit_no_pcur; - } - } else { - /* For secondary indexes, - index->online_status==ONLINE_INDEX_CREATION unless - index->name starts with TEMP_INDEX_PREFIX. */ - ut_ad(!dict_index_is_online_ddl(index)); - } - - search_result = row_search_index_entry(index, entry, mode, - &pcur, &mtr); - - switch (search_result) { - mem_heap_t* heap; - mem_heap_t* offsets_heap; - ulint* offsets; - case ROW_BUFFERED: - case ROW_NOT_DELETED_REF: - /* These are invalid outcomes, because the mode passed - to row_search_index_entry() did not include any of the - flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ - ut_error; - case ROW_NOT_FOUND: - if (*index->name != TEMP_INDEX_PREFIX) { - /* During online secondary index creation, it - is possible that MySQL is waiting for a - meta-data lock upgrade before invoking - ha_innobase::commit_inplace_alter_table() - while this ROLLBACK is executing. InnoDB has - finished building the index, but it does not - yet exist in MySQL. In this case, we suppress - the printout to the error log. */ - fputs("InnoDB: error in sec index entry del undo in\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - fputs("\n" - "InnoDB: tuple ", stderr); - dtuple_print(stderr, entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, btr_pcur_get_rec(&pcur), index); - putc('\n', stderr); - trx_print(stderr, trx, 0); - fputs("\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - - ib_logf(IB_LOG_LEVEL_WARN, - "record in index %s was not found" - " on rollback, trying to insert", - index->name); - } - - if (btr_cur->up_match >= dict_index_get_n_unique(index) - || btr_cur->low_match >= dict_index_get_n_unique(index)) { - if (*index->name != TEMP_INDEX_PREFIX) { - ib_logf(IB_LOG_LEVEL_WARN, - "record in index %s was not found on" - " rollback, and a duplicate exists", - index->name); - } - err = DB_DUPLICATE_KEY; - break; - } - - /* Insert the missing record that we were trying to - delete-unmark. */ - big_rec_t* big_rec; - rec_t* insert_rec; - offsets = NULL; - offsets_heap = NULL; - - err = btr_cur_optimistic_insert( - flags, btr_cur, &offsets, &offsets_heap, - entry, &insert_rec, &big_rec, - 0, thr, &mtr); - ut_ad(!big_rec); - - if (err == DB_FAIL && mode == BTR_MODIFY_TREE) { - err = btr_cur_pessimistic_insert( - flags, btr_cur, - &offsets, &offsets_heap, - entry, &insert_rec, &big_rec, - 0, thr, &mtr); - /* There are no off-page columns in - secondary indexes. */ - ut_ad(!big_rec); - } - - if (err == DB_SUCCESS) { - page_update_max_trx_id( - btr_cur_get_block(btr_cur), - btr_cur_get_page_zip(btr_cur), - trx->id, &mtr); - } - - if (offsets_heap) { - mem_heap_free(offsets_heap); - } - - break; - case ROW_FOUND: - err = btr_cur_del_mark_set_sec_rec( - BTR_NO_LOCKING_FLAG, - btr_cur, FALSE, thr, &mtr); - ut_a(err == DB_SUCCESS); - heap = mem_heap_create( - sizeof(upd_t) - + dtuple_get_n_fields(entry) * sizeof(upd_field_t)); - offsets_heap = NULL; - offsets = rec_get_offsets( - btr_cur_get_rec(btr_cur), - index, NULL, ULINT_UNDEFINED, &offsets_heap); - update = row_upd_build_sec_rec_difference_binary( - btr_cur_get_rec(btr_cur), index, offsets, entry, heap); - if (upd_get_n_fields(update) == 0) { - - /* Do nothing */ - - } else if (mode != BTR_MODIFY_TREE) { - /* Try an optimistic updating of the record, keeping - changes within the page */ - - /* TODO: pass offsets, not &offsets */ - err = btr_cur_optimistic_update( - flags, btr_cur, &offsets, &offsets_heap, - update, 0, thr, thr_get_trx(thr)->id, &mtr); - switch (err) { - case DB_OVERFLOW: - case DB_UNDERFLOW: - case DB_ZIP_OVERFLOW: - err = DB_FAIL; - default: - break; - } - } else { - err = btr_cur_pessimistic_update( - flags, btr_cur, &offsets, &offsets_heap, - heap, &dummy_big_rec, - update, 0, thr, thr_get_trx(thr)->id, &mtr); - ut_a(!dummy_big_rec); - } - - mem_heap_free(heap); - mem_heap_free(offsets_heap); - } - - btr_pcur_close(&pcur); -func_exit_no_pcur: - mtr_commit(&mtr); - - return(err); -} - -/***********************************************************//** -Flags a secondary index corrupted. */ -static MY_ATTRIBUTE((nonnull)) -void -row_undo_mod_sec_flag_corrupted( -/*============================*/ - trx_t* trx, /*!< in/out: transaction */ - dict_index_t* index) /*!< in: secondary index */ -{ - ut_ad(!dict_index_is_clust(index)); - - switch (trx->dict_operation_lock_mode) { - case RW_S_LATCH: - /* Because row_undo() is holding an S-latch - on the data dictionary during normal rollback, - we can only mark the index corrupted in the - data dictionary cache. TODO: fix this somehow.*/ - mutex_enter(&dict_sys->mutex); - dict_set_corrupted_index_cache_only(index, index->table); - mutex_exit(&dict_sys->mutex); - break; - default: - ut_ad(0); - /* fall through */ - case RW_X_LATCH: - /* This should be the rollback of a data dictionary - transaction. */ - dict_set_corrupted(index, trx, "rollback"); - } -} - -/***********************************************************//** -Undoes a modify in secondary indexes when undo record type is UPD_DEL. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_upd_del_sec( -/*=====================*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr) /*!< in: query thread */ -{ - mem_heap_t* heap; - dberr_t err = DB_SUCCESS; - - ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); - ut_ad(!node->undo_row); - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - dict_index_t* index = node->index; - dtuple_t* entry; - - if (index->type & DICT_FTS) { - dict_table_next_uncorrupted_index(node->index); - continue; - } - - /* During online index creation, - HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should - guarantee that any active transaction has not modified - indexed columns such that col->ord_part was 0 at the - time when the undo log record was written. When we get - to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, - it should always cover all affected indexes. */ - entry = row_build_index_entry( - node->row, node->ext, index, heap); - - if (UNIV_UNLIKELY(!entry)) { - /* The database must have crashed after - inserting a clustered index record but before - writing all the externally stored columns of - that record. Because secondary index entries - are inserted after the clustered index record, - we may assume that the secondary index record - does not exist. However, this situation may - only occur during the rollback of incomplete - transactions. */ - ut_a(thr_is_recv(thr)); - } else { - err = row_undo_mod_del_mark_or_remove_sec( - node, thr, index, entry); - - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - - break; - } - } - - mem_heap_empty(heap); - dict_table_next_uncorrupted_index(node->index); - } - - mem_heap_free(heap); - - return(err); -} - -/***********************************************************//** -Undoes a modify in secondary indexes when undo record type is DEL_MARK. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_del_mark_sec( -/*======================*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr) /*!< in: query thread */ -{ - mem_heap_t* heap; - dberr_t err = DB_SUCCESS; - - ut_ad(!node->undo_row); - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - dict_index_t* index = node->index; - dtuple_t* entry; - - if (index->type == DICT_FTS) { - dict_table_next_uncorrupted_index(node->index); - continue; - } - - /* During online index creation, - HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should - guarantee that any active transaction has not modified - indexed columns such that col->ord_part was 0 at the - time when the undo log record was written. When we get - to roll back an undo log entry TRX_UNDO_DEL_MARK_REC, - it should always cover all affected indexes. */ - entry = row_build_index_entry( - node->row, node->ext, index, heap); - - ut_a(entry); - - err = row_undo_mod_del_unmark_sec_and_undo_update( - BTR_MODIFY_LEAF, thr, index, entry); - if (err == DB_FAIL) { - err = row_undo_mod_del_unmark_sec_and_undo_update( - BTR_MODIFY_TREE, thr, index, entry); - } - - if (err == DB_DUPLICATE_KEY) { - row_undo_mod_sec_flag_corrupted( - thr_get_trx(thr), index); - err = DB_SUCCESS; - /* Do not return any error to the caller. The - duplicate will be reported by ALTER TABLE or - CREATE UNIQUE INDEX. Unfortunately we cannot - report the duplicate key value to the DDL - thread, because the altered_table object is - private to its call stack. */ - } else if (err != DB_SUCCESS) { - break; - } - - mem_heap_empty(heap); - dict_table_next_uncorrupted_index(node->index); - } - - mem_heap_free(heap); - - return(err); -} - -/***********************************************************//** -Undoes a modify in secondary indexes when undo record type is UPD_EXIST. -@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo_mod_upd_exist_sec( -/*=======================*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr) /*!< in: query thread */ -{ - mem_heap_t* heap; - dberr_t err = DB_SUCCESS; - - if (node->index == NULL - || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) { - /* No change in secondary indexes */ - - return(err); - } - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - dict_index_t* index = node->index; - dtuple_t* entry; - - if (index->type == DICT_FTS - || !row_upd_changes_ord_field_binary( - index, node->update, thr, node->row, node->ext)) { - dict_table_next_uncorrupted_index(node->index); - continue; - } - - /* Build the newest version of the index entry */ - entry = row_build_index_entry(node->row, node->ext, - index, heap); - if (UNIV_UNLIKELY(!entry)) { - /* The server must have crashed in - row_upd_clust_rec_by_insert() before - the updated externally stored columns (BLOBs) - of the new clustered index entry were written. */ - - /* The table must be in DYNAMIC or COMPRESSED - format. REDUNDANT and COMPACT formats - store a local 768-byte prefix of each - externally stored column. */ - ut_a(dict_table_get_format(index->table) - >= UNIV_FORMAT_B); - - /* This is only legitimate when - rolling back an incomplete transaction - after crash recovery. */ - ut_a(thr_get_trx(thr)->is_recovered); - - /* The server must have crashed before - completing the insert of the new - clustered index entry and before - inserting to the secondary indexes. - Because node->row was not yet written - to this index, we can ignore it. But - we must restore node->undo_row. */ - } else { - /* NOTE that if we updated the fields of a - delete-marked secondary index record so that - alphabetically they stayed the same, e.g., - 'abc' -> 'aBc', we cannot return to the - original values because we do not know them. - But this should not cause problems because - in row0sel.cc, in queries we always retrieve - the clustered index record or an earlier - version of it, if the secondary index record - through which we do the search is - delete-marked. */ - - err = row_undo_mod_del_mark_or_remove_sec( - node, thr, index, entry); - if (err != DB_SUCCESS) { - break; - } - } - - mem_heap_empty(heap); - /* We may have to update the delete mark in the - secondary index record of the previous version of - the row. We also need to update the fields of - the secondary index record if we updated its fields - but alphabetically they stayed the same, e.g., - 'abc' -> 'aBc'. */ - entry = row_build_index_entry(node->undo_row, - node->undo_ext, - index, heap); - ut_a(entry); - - err = row_undo_mod_del_unmark_sec_and_undo_update( - BTR_MODIFY_LEAF, thr, index, entry); - if (err == DB_FAIL) { - err = row_undo_mod_del_unmark_sec_and_undo_update( - BTR_MODIFY_TREE, thr, index, entry); - } - - if (err == DB_DUPLICATE_KEY) { - row_undo_mod_sec_flag_corrupted( - thr_get_trx(thr), index); - err = DB_SUCCESS; - } else if (err != DB_SUCCESS) { - break; - } - - mem_heap_empty(heap); - dict_table_next_uncorrupted_index(node->index); - } - - mem_heap_free(heap); - - return(err); -} - -/***********************************************************//** -Parses the row reference and other info in a modify undo log record. */ -static MY_ATTRIBUTE((nonnull)) -void -row_undo_mod_parse_undo_rec( -/*========================*/ - undo_node_t* node, /*!< in: row undo node */ - ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */ -{ - dict_index_t* clust_index; - byte* ptr; - undo_no_t undo_no; - table_id_t table_id; - trx_id_t trx_id; - roll_ptr_t roll_ptr; - ulint info_bits; - ulint type; - ulint cmpl_info; - bool dummy_extern; - - ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, - &dummy_extern, &undo_no, &table_id); - node->rec_type = type; - - node->table = dict_table_open_on_id( - table_id, dict_locked, DICT_TABLE_OP_NORMAL); - - /* TODO: other fixes associated with DROP TABLE + rollback in the - same table by another user */ - - if (node->table == NULL) { - /* Table was dropped */ - return; - } - - if (node->table->file_unreadable) { - dict_table_close(node->table, dict_locked, FALSE); - - /* We skip undo operations to missing .ibd files */ - node->table = NULL; - - return; - } - - clust_index = dict_table_get_first_index(node->table); - - ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); - - ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), - node->heap); - - trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, - roll_ptr, info_bits, node->trx, - node->heap, &(node->update)); - node->new_trx_id = trx_id; - node->cmpl_info = cmpl_info; - - if (!row_undo_search_clust_to_pcur(node)) { - - dict_table_close(node->table, dict_locked, FALSE); - - node->table = NULL; - } -} - -/***********************************************************//** -Undoes a modify operation on a row of a table. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -row_undo_mod( -/*=========*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - ibool dict_locked; - - ut_ad(node != NULL); - ut_ad(thr != NULL); - ut_ad(node->state == UNDO_NODE_MODIFY); - - dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH; - - ut_ad(thr_get_trx(thr) == node->trx); - - row_undo_mod_parse_undo_rec(node, dict_locked); - - if (node->table == NULL) { - /* It is already undone, or will be undone by another query - thread, or table was dropped */ - - trx_undo_rec_release(node->trx, node->undo_no); - node->state = UNDO_NODE_FETCH_NEXT; - - return(DB_SUCCESS); - } - - node->index = dict_table_get_first_index(node->table); - ut_ad(dict_index_is_clust(node->index)); - /* Skip the clustered index (the first index) */ - node->index = dict_table_get_next_index(node->index); - - /* Skip all corrupted secondary index */ - dict_table_skip_corrupt_index(node->index); - - switch (node->rec_type) { - case TRX_UNDO_UPD_EXIST_REC: - err = row_undo_mod_upd_exist_sec(node, thr); - break; - case TRX_UNDO_DEL_MARK_REC: - err = row_undo_mod_del_mark_sec(node, thr); - break; - case TRX_UNDO_UPD_DEL_REC: - err = row_undo_mod_upd_del_sec(node, thr); - break; - default: - ut_error; - err = DB_ERROR; - } - - if (err == DB_SUCCESS) { - - err = row_undo_mod_clust(node, thr); - } - - dict_table_close(node->table, dict_locked, FALSE); - - node->table = NULL; - - return(err); -} diff --git a/storage/xtradb/row/row0undo.cc b/storage/xtradb/row/row0undo.cc deleted file mode 100644 index 82b1ab049fa..00000000000 --- a/storage/xtradb/row/row0undo.cc +++ /dev/null @@ -1,375 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0undo.cc -Row undo - -Created 1/8/1997 Heikki Tuuri -*******************************************************/ - -#include "row0undo.h" - -#ifdef UNIV_NONINL -#include "row0undo.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "trx0undo.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0row.h" -#include "row0uins.h" -#include "row0umod.h" -#include "row0upd.h" -#include "row0mysql.h" -#include "srv0srv.h" - -/* How to undo row operations? -(1) For an insert, we have stored a prefix of the clustered index record -in the undo log. Using it, we look for the clustered record, and using -that we look for the records in the secondary indexes. The insert operation -may have been left incomplete, if the database crashed, for example. -We may have look at the trx id and roll ptr to make sure the record in the -clustered index is really the one for which the undo log record was -written. We can use the framework we get from the original insert op. -(2) Delete marking: We can use the framework we get from the original -delete mark op. We only have to check the trx id. -(3) Update: This may be the most complicated. We have to use the framework -we get from the original update op. - -What if the same trx repeatedly deletes and inserts an identical row. -Then the row id changes and also roll ptr. What if the row id was not -part of the ordering fields in the clustered index? Maybe we have to write -it to undo log. Well, maybe not, because if we order the row id and trx id -in descending order, then the only undeleted copy is the first in the -index. Our searches in row operations always position the cursor before -the first record in the result set. But, if there is no key defined for -a table, then it would be desirable that row id is in ascending order. -So, lets store row id in descending order only if it is not an ordering -field in the clustered index. - -NOTE: Deletes and inserts may lead to situation where there are identical -records in a secondary index. Is that a problem in the B-tree? Yes. -Also updates can lead to this, unless trx id and roll ptr are included in -ord fields. -(1) Fix in clustered indexes: include row id, trx id, and roll ptr -in node pointers of B-tree. -(2) Fix in secondary indexes: include all fields in node pointers, and -if an entry is inserted, check if it is equal to the right neighbor, -in which case update the right neighbor: the neighbor must be delete -marked, set it unmarked and write the trx id of the current transaction. - -What if the same trx repeatedly updates the same row, updating a secondary -index field or not? Updating a clustered index ordering field? - -(1) If it does not update the secondary index and not the clustered index -ord field. Then the secondary index record stays unchanged, but the -trx id in the secondary index record may be smaller than in the clustered -index record. This is no problem? -(2) If it updates secondary index ord field but not clustered: then in -secondary index there are delete marked records, which differ in an -ord field. No problem. -(3) Updates clustered ord field but not secondary, and secondary index -is unique. Then the record in secondary index is just updated at the -clustered ord field. -(4) - -Problem with duplicate records: -Fix 1: Add a trx op no field to all indexes. A problem: if a trx with a -bigger trx id has inserted and delete marked a similar row, our trx inserts -again a similar row, and a trx with an even bigger id delete marks it. Then -the position of the row should change in the index if the trx id affects -the alphabetical ordering. - -Fix 2: If an insert encounters a similar row marked deleted, we turn the -insert into an 'update' of the row marked deleted. Then we must write undo -info on the update. A problem: what if a purge operation tries to remove -the delete marked row? - -We can think of the database row versions as a linked list which starts -from the record in the clustered index, and is linked by roll ptrs -through undo logs. The secondary index records are references which tell -what kinds of records can be found in this linked list for a record -in the clustered index. - -How to do the purge? A record can be removed from the clustered index -if its linked list becomes empty, i.e., the row has been marked deleted -and its roll ptr points to the record in the undo log we are going through, -doing the purge. Similarly, during a rollback, a record can be removed -if the stored roll ptr in the undo log points to a trx already (being) purged, -or if the roll ptr is NULL, i.e., it was a fresh insert. */ - -/********************************************************************//** -Creates a row undo node to a query graph. -@return own: undo node */ -UNIV_INTERN -undo_node_t* -row_undo_node_create( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - que_thr_t* parent, /*!< in: parent node, i.e., a thr node */ - mem_heap_t* heap) /*!< in: memory heap where created */ -{ - undo_node_t* undo; - - ut_ad(trx && parent && heap); - - undo = static_cast<undo_node_t*>( - mem_heap_alloc(heap, sizeof(undo_node_t))); - - undo->common.type = QUE_NODE_UNDO; - undo->common.parent = parent; - - undo->state = UNDO_NODE_FETCH_NEXT; - undo->trx = trx; - - btr_pcur_init(&(undo->pcur)); - - undo->heap = mem_heap_create(256); - - return(undo); -} - -/***********************************************************//** -Looks for the clustered index record when node has the row reference. -The pcur in node is used in the search. If found, stores the row to node, -and stores the position of pcur, and detaches it. The pcur must be closed -by the caller in any case. -@return TRUE if found; NOTE the node->pcur must be closed by the -caller, regardless of the return value */ -UNIV_INTERN -ibool -row_undo_search_clust_to_pcur( -/*==========================*/ - undo_node_t* node) /*!< in: row undo node */ -{ - dict_index_t* clust_index; - ibool found; - mtr_t mtr; - ibool ret; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - rec_offs_init(offsets_); - - mtr_start(&mtr); - - clust_index = dict_table_get_first_index(node->table); - - found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF, - node->table, node->ref, &mtr); - - rec = btr_pcur_get_rec(&(node->pcur)); - - offsets = rec_get_offsets(rec, clust_index, offsets, - ULINT_UNDEFINED, &heap); - - if (!found || node->roll_ptr - != row_get_rec_roll_ptr(rec, clust_index, offsets)) { - - /* We must remove the reservation on the undo log record - BEFORE releasing the latch on the clustered index page: this - is to make sure that some thread will eventually undo the - modification corresponding to node->roll_ptr. */ - - /* fputs("--------------------undoing a previous version\n", - stderr); */ - - ret = FALSE; - } else { - row_ext_t** ext; - - if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) { - /* In DYNAMIC or COMPRESSED format, there is - no prefix of externally stored columns in the - clustered index record. Build a cache of - column prefixes. */ - ext = &node->ext; - } else { - /* REDUNDANT and COMPACT formats store a local - 768-byte prefix of each externally stored - column. No cache is needed. */ - ext = NULL; - node->ext = NULL; - } - - node->row = row_build(ROW_COPY_DATA, clust_index, rec, - offsets, NULL, - NULL, NULL, ext, node->heap); - if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { - node->undo_row = dtuple_copy(node->row, node->heap); - row_upd_replace(node->undo_row, &node->undo_ext, - clust_index, node->update, node->heap); - } else { - node->undo_row = NULL; - node->undo_ext = NULL; - } - - btr_pcur_store_position(&(node->pcur), &mtr); - - ret = TRUE; - } - - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(ret); -} - -/***********************************************************//** -Fetches an undo log record and does the undo for the recorded operation. -If none left, or a partial rollback completed, returns control to the -parent node, which is always a query thread node. -@return DB_SUCCESS if operation successfully completed, else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_undo( -/*=====*/ - undo_node_t* node, /*!< in: row undo node */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - trx_t* trx; - roll_ptr_t roll_ptr; - ibool locked_data_dict; - - ut_ad(node != NULL); - ut_ad(thr != NULL); - - trx = node->trx; - - if (node->state == UNDO_NODE_FETCH_NEXT) { - - node->undo_rec = trx_roll_pop_top_rec_of_trx(trx, - trx->roll_limit, - &roll_ptr, - node->heap); - if (!node->undo_rec) { - /* Rollback completed for this query thread */ - - thr->run_node = que_node_get_parent(node); - - return(DB_SUCCESS); - } - - node->roll_ptr = roll_ptr; - node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec); - - if (trx_undo_roll_ptr_is_insert(roll_ptr)) { - - node->state = UNDO_NODE_INSERT; - } else { - node->state = UNDO_NODE_MODIFY; - } - } - - /* Prevent DROP TABLE etc. while we are rolling back this row. - If we are doing a TABLE CREATE or some other dictionary operation, - then we already have dict_operation_lock locked in x-mode. Do not - try to lock again, because that would cause a hang. */ - - locked_data_dict = (trx->dict_operation_lock_mode == 0); - - if (locked_data_dict) { - - row_mysql_freeze_data_dictionary(trx); - } - - if (node->state == UNDO_NODE_INSERT) { - - err = row_undo_ins(node); - - node->state = UNDO_NODE_FETCH_NEXT; - } else { - ut_ad(node->state == UNDO_NODE_MODIFY); - err = row_undo_mod(node, thr); - } - - if (locked_data_dict) { - - row_mysql_unfreeze_data_dictionary(trx); - } - - /* Do some cleanup */ - btr_pcur_close(&(node->pcur)); - - mem_heap_empty(node->heap); - - thr->run_node = node; - - return(err); -} - -/***********************************************************//** -Undoes a row operation in a table. This is a high-level function used -in SQL execution graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_undo_step( -/*==========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err; - undo_node_t* node; - trx_t* trx; - - ut_ad(thr); - - srv_inc_activity_count(); - - trx = thr_get_trx(thr); - - node = static_cast<undo_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_UNDO); - - err = row_undo(node, thr); - - trx->error_state = err; - - if (err != DB_SUCCESS) { - /* SQL error detected */ - - fprintf(stderr, "InnoDB: Fatal error (%s) in rollback.\n", - ut_strerr(err)); - - if (err == DB_OUT_OF_FILE_SPACE) { - fprintf(stderr, - "InnoDB: Out of tablespace.\n" - "InnoDB: Consider increasing" - " your tablespace.\n"); - abort(); - } - - ut_error; - - return(NULL); - } - - return(thr); -} diff --git a/storage/xtradb/row/row0upd.cc b/storage/xtradb/row/row0upd.cc deleted file mode 100644 index 1156cbe4b4c..00000000000 --- a/storage/xtradb/row/row0upd.cc +++ /dev/null @@ -1,3017 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0upd.cc -Update of a row - -Created 12/27/1996 Heikki Tuuri -*******************************************************/ - -#include "m_string.h" /* for my_sys.h */ -#include "my_sys.h" /* DEBUG_SYNC_C */ -#include "row0upd.h" - -#ifdef UNIV_NONINL -#include "row0upd.ic" -#endif - -#include "ha_prototypes.h" -#include "dict0dict.h" -#include "trx0undo.h" -#include "rem0rec.h" -#ifndef UNIV_HOTBACKUP -#include "dict0boot.h" -#include "dict0crea.h" -#include "mach0data.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "que0que.h" -#include "row0ext.h" -#include "row0ins.h" -#include "row0log.h" -#include "row0row.h" -#include "row0sel.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "log0log.h" -#include "pars0sym.h" -#include "eval0eval.h" -#include "buf0lru.h" -#include <algorithm> - -#include <mysql/plugin.h> -#include <mysql/service_wsrep.h> - -/* What kind of latch and lock can we assume when the control comes to - ------------------------------------------------------------------- -an update node? --------------- -Efficiency of massive updates would require keeping an x-latch on a -clustered index page through many updates, and not setting an explicit -x-lock on clustered index records, as they anyway will get an implicit -x-lock when they are updated. A problem is that the read nodes in the -graph should know that they must keep the latch when passing the control -up to the update node, and not set any record lock on the record which -will be updated. Another problem occurs if the execution is stopped, -as the kernel switches to another query thread, or the transaction must -wait for a lock. Then we should be able to release the latch and, maybe, -acquire an explicit x-lock on the record. - Because this seems too complicated, we conclude that the less -efficient solution of releasing all the latches when the control is -transferred to another node, and acquiring explicit x-locks, is better. */ - -/* How is a delete performed? If there is a delete without an -explicit cursor, i.e., a searched delete, there are at least -two different situations: -the implicit select cursor may run on (1) the clustered index or -on (2) a secondary index. The delete is performed by setting -the delete bit in the record and substituting the id of the -deleting transaction for the original trx id, and substituting a -new roll ptr for previous roll ptr. The old trx id and roll ptr -are saved in the undo log record. Thus, no physical changes occur -in the index tree structure at the time of the delete. Only -when the undo log is purged, the index records will be physically -deleted from the index trees. - -The query graph executing a searched delete would consist of -a delete node which has as a subtree a select subgraph. -The select subgraph should return a (persistent) cursor -in the clustered index, placed on page which is x-latched. -The delete node should look for all secondary index records for -this clustered index entry and mark them as deleted. When is -the x-latch freed? The most efficient way for performing a -searched delete is obviously to keep the x-latch for several -steps of query graph execution. */ - -/************************************************************************* -IMPORTANT NOTE: Any operation that generates redo MUST check that there -is enough space in the redo log before for that operation. This is -done by calling log_free_check(). The reason for checking the -availability of the redo log space before the start of the operation is -that we MUST not hold any synchonization objects when performing the -check. -If you make a change in this module make sure that no codepath is -introduced where a call to log_free_check() is bypassed. */ - -/***********************************************************//** -Checks if an update vector changes some of the first ordering fields of an -index record. This is only used in foreign key checks and we can assume -that index does not contain column prefixes. -@return TRUE if changes */ -static -ibool -row_upd_changes_first_fields_binary( -/*================================*/ - dtuple_t* entry, /*!< in: old value of index entry */ - dict_index_t* index, /*!< in: index of entry */ - const upd_t* update, /*!< in: update vector for the row */ - ulint n); /*!< in: how many first fields to check */ - - -/*********************************************************************//** -Checks if index currently is mentioned as a referenced index in a foreign -key constraint. - -NOTE that since we do not hold dict_operation_lock when leaving the -function, it may be that the referencing table has been dropped when -we leave this function: this function is only for heuristic use! - -@return TRUE if referenced */ -static -ibool -row_upd_index_is_referenced( -/*========================*/ - dict_index_t* index, /*!< in: index */ - trx_t* trx) /*!< in: transaction */ -{ - dict_table_t* table = index->table; - ibool froze_data_dict = FALSE; - ibool is_referenced = FALSE; - - if (table->referenced_set.empty()) { - return(FALSE); - } - - if (trx->dict_operation_lock_mode == 0) { - row_mysql_freeze_data_dictionary(trx); - froze_data_dict = TRUE; - } - - dict_foreign_set::iterator it - = std::find_if(table->referenced_set.begin(), - table->referenced_set.end(), - dict_foreign_with_index(index)); - - is_referenced = (it != table->referenced_set.end()); - - if (froze_data_dict) { - row_mysql_unfreeze_data_dictionary(trx); - } - - return(is_referenced); -} - -#ifdef WITH_WSREP -static -ibool -wsrep_row_upd_index_is_foreign( -/*========================*/ - dict_index_t* index, /*!< in: index */ - trx_t* trx) /*!< in: transaction */ -{ - dict_table_t* table = index->table; - dict_foreign_t* foreign; - ibool froze_data_dict = FALSE; - ibool is_referenced = FALSE; - - if (table->foreign_set.empty()) { - - return(FALSE); - } - - if (trx->dict_operation_lock_mode == 0) { - row_mysql_freeze_data_dictionary(trx); - froze_data_dict = TRUE; - } - - for (dict_foreign_set::iterator it= table->foreign_set.begin(); - it != table->foreign_set.end(); - ++ it) - { - foreign= *it; - - if (foreign->foreign_index == index) { - - is_referenced = TRUE; - goto func_exit; - } - - } - -func_exit: - if (froze_data_dict) { - row_mysql_unfreeze_data_dictionary(trx); - } - - return(is_referenced); -} -#endif /* WITH_WSREP */ - -/*********************************************************************//** -Checks if possible foreign key constraints hold after a delete of the record -under pcur. - -NOTE that this function will temporarily commit mtr and lose the -pcur position! - -@return DB_SUCCESS or an error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_upd_check_references_constraints( -/*=================================*/ - upd_node_t* node, /*!< in: row update node */ - btr_pcur_t* pcur, /*!< in: cursor positioned on a record; NOTE: the - cursor position is lost in this function! */ - dict_table_t* table, /*!< in: table in question */ - dict_index_t* index, /*!< in: index of the cursor */ - ulint* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in: mtr */ -{ - dict_foreign_t* foreign; - mem_heap_t* heap; - dtuple_t* entry; - trx_t* trx; - const rec_t* rec; - ulint n_ext; - dberr_t err; - ibool got_s_lock = FALSE; - - if (table->referenced_set.empty()) { - - return(DB_SUCCESS); - } - - trx = thr_get_trx(thr); - - rec = btr_pcur_get_rec(pcur); - ut_ad(rec_offs_validate(rec, index, offsets)); - - heap = mem_heap_create(500); - - entry = row_rec_to_index_entry(rec, index, offsets, &n_ext, heap); - - mtr_commit(mtr); - - DEBUG_SYNC_C("foreign_constraint_check_for_update"); - - mtr_start_trx(mtr, trx); - - if (trx->dict_operation_lock_mode == 0) { - got_s_lock = TRUE; - - row_mysql_freeze_data_dictionary(trx); - } - -run_again: - - for (dict_foreign_set::iterator it = table->referenced_set.begin(); - it != table->referenced_set.end(); - ++it) { - - foreign = *it; - - /* Note that we may have an update which updates the index - record, but does NOT update the first fields which are - referenced in a foreign key constraint. Then the update does - NOT break the constraint. */ - - if (foreign->referenced_index == index - && (node->is_delete - || row_upd_changes_first_fields_binary( - entry, index, node->update, - foreign->n_fields))) { - dict_table_t* foreign_table = foreign->foreign_table; - - dict_table_t* ref_table = NULL; - - if (foreign_table == NULL) { - - ref_table = dict_table_open_on_name( - foreign->foreign_table_name_lookup, - FALSE, FALSE, DICT_ERR_IGNORE_NONE); - } - - if (foreign_table) { - os_inc_counter(dict_sys->mutex, - foreign_table - ->n_foreign_key_checks_running); - } - - /* NOTE that if the thread ends up waiting for a lock - we will release dict_operation_lock temporarily! - But the counter on the table protects 'foreign' from - being dropped while the check is running. */ - - err = row_ins_check_foreign_constraint( - FALSE, foreign, table, entry, thr); - - if (foreign_table) { - os_dec_counter(dict_sys->mutex, - foreign_table - ->n_foreign_key_checks_running); - } - - if (ref_table != NULL) { - dict_table_close(ref_table, FALSE, FALSE); - } - - /* Some table foreign key dropped, try again */ - if (err == DB_DICT_CHANGED) { - goto run_again; - } else if (err != DB_SUCCESS) { - goto func_exit; - } - } - } - - err = DB_SUCCESS; -func_exit: - if (got_s_lock) { - row_mysql_unfreeze_data_dictionary(trx); - } - - mem_heap_free(heap); - - return(err); -} -#ifdef WITH_WSREP -static -dberr_t -wsrep_row_upd_check_foreign_constraints( -/*=================================*/ - upd_node_t* node, /*!< in: row update node */ - btr_pcur_t* pcur, /*!< in: cursor positioned on a record; NOTE: the - cursor position is lost in this function! */ - dict_table_t* table, /*!< in: table in question */ - dict_index_t* index, /*!< in: index of the cursor */ - ulint* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in: mtr */ -{ - dict_foreign_t* foreign; - mem_heap_t* heap; - dtuple_t* entry; - trx_t* trx; - const rec_t* rec; - ulint n_ext; - dberr_t err; - ibool got_s_lock = FALSE; - ibool opened = FALSE; - - if (table->foreign_set.empty()) { - - return(DB_SUCCESS); - } - - trx = thr_get_trx(thr); - - /* TODO: make native slave thread bail out here */ - - rec = btr_pcur_get_rec(pcur); - ut_ad(rec_offs_validate(rec, index, offsets)); - - heap = mem_heap_create(500); - - entry = row_rec_to_index_entry(rec, index, offsets, - &n_ext, heap); - - mtr_commit(mtr); - - mtr_start(mtr); - - if (trx->dict_operation_lock_mode == 0) { - got_s_lock = TRUE; - - row_mysql_freeze_data_dictionary(trx); - } - - for (dict_foreign_set::iterator it= table->foreign_set.begin(); - it != table->foreign_set.end(); - ++ it) - { - foreign= *it; - - - /* Note that we may have an update which updates the index - record, but does NOT update the first fields which are - referenced in a foreign key constraint. Then the update does - NOT break the constraint. */ - - if (foreign->foreign_index == index - && (node->is_delete - || row_upd_changes_first_fields_binary( - entry, index, node->update, - foreign->n_fields))) { - - if (foreign->referenced_table == NULL) { - foreign->referenced_table = - dict_table_open_on_name( - foreign->referenced_table_name_lookup, - FALSE, FALSE, DICT_ERR_IGNORE_NONE); - opened = (foreign->referenced_table) ? TRUE : FALSE; - } - - if (foreign->referenced_table) { - os_inc_counter(dict_sys->mutex, - foreign->referenced_table - ->n_foreign_key_checks_running); - } - - /* NOTE that if the thread ends up waiting for a lock - we will release dict_operation_lock temporarily! - But the counter on the table protects 'foreign' from - being dropped while the check is running. */ - - err = row_ins_check_foreign_constraint( - TRUE, foreign, table, entry, thr); - - if (foreign->referenced_table) { - os_dec_counter(dict_sys->mutex, - foreign->referenced_table - ->n_foreign_key_checks_running); - - if (opened == TRUE) { - dict_table_close(foreign->referenced_table, FALSE, FALSE); - opened = FALSE; - } - } - - if (err != DB_SUCCESS) { - - goto func_exit; - } - } - - } - - err = DB_SUCCESS; -func_exit: - if (got_s_lock) { - row_mysql_unfreeze_data_dictionary(trx); - } - - mem_heap_free(heap); - - DEBUG_SYNC_C("foreign_constraint_check_for_update_done"); - - return(err); -} -#endif /* WITH_WSREP */ - -/*********************************************************************//** -Creates an update node for a query graph. -@return own: update node */ -UNIV_INTERN -upd_node_t* -upd_node_create( -/*============*/ - mem_heap_t* heap) /*!< in: mem heap where created */ -{ - upd_node_t* node; - - node = static_cast<upd_node_t*>( - mem_heap_alloc(heap, sizeof(upd_node_t))); - - node->common.type = QUE_NODE_UPDATE; - - node->state = UPD_NODE_UPDATE_CLUSTERED; - node->in_mysql_interface = FALSE; - - node->row = NULL; - node->ext = NULL; - node->upd_row = NULL; - node->upd_ext = NULL; - node->index = NULL; - node->update = NULL; - - node->foreign = NULL; - node->cascade_heap = NULL; - node->cascade_node = NULL; - - node->select = NULL; - - node->heap = mem_heap_create(128); - node->magic_n = UPD_NODE_MAGIC_N; - - node->cmpl_info = 0; - - return(node); -} -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Updates the trx id and roll ptr field in a clustered index record in database -recovery. */ -UNIV_INTERN -void -row_upd_rec_sys_fields_in_recovery( -/*===============================*/ - rec_t* rec, /*!< in/out: record */ - page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - ulint pos, /*!< in: TRX_ID position in rec */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */ -{ - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - if (page_zip) { - page_zip_write_trx_id_and_roll_ptr( - page_zip, rec, offsets, pos, trx_id, roll_ptr); - } else { - byte* field; - ulint len; - - field = rec_get_nth_field(rec, offsets, pos, &len); - ut_ad(len == DATA_TRX_ID_LEN); -#if DATA_TRX_ID + 1 != DATA_ROLL_PTR -# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR" -#endif - trx_write_trx_id(field, trx_id); - trx_write_roll_ptr(field + DATA_TRX_ID_LEN, roll_ptr); - } -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Sets the trx id or roll ptr field of a clustered index entry. */ -UNIV_INTERN -void -row_upd_index_entry_sys_field( -/*==========================*/ - dtuple_t* entry, /*!< in/out: index entry, where the memory - buffers for sys fields are already allocated: - the function just copies the new values to - them */ - dict_index_t* index, /*!< in: clustered index */ - ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */ - ib_uint64_t val) /*!< in: value to write */ -{ - dfield_t* dfield; - byte* field; - ulint pos; - - ut_ad(dict_index_is_clust(index)); - - pos = dict_index_get_sys_col_pos(index, type); - - dfield = dtuple_get_nth_field(entry, pos); - field = static_cast<byte*>(dfield_get_data(dfield)); - - if (type == DATA_TRX_ID) { - trx_write_trx_id(field, val); - } else { - ut_ad(type == DATA_ROLL_PTR); - trx_write_roll_ptr(field, val); - } -} - -/***********************************************************//** -Returns TRUE if row update changes size of some field in index or if some -field to be updated is stored externally in rec or update. -@return TRUE if the update changes the size of some field in index or -the field is external in rec or update */ -UNIV_INTERN -ibool -row_upd_changes_field_size_or_external( -/*===================================*/ - dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - const upd_t* update) /*!< in: update vector */ -{ - const upd_field_t* upd_field; - const dfield_t* new_val; - ulint old_len; - ulint new_len; - ulint n_fields; - ulint i; - - ut_ad(rec_offs_validate(NULL, index, offsets)); - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { - upd_field = upd_get_nth_field(update, i); - - new_val = &(upd_field->new_val); - new_len = dfield_get_len(new_val); - - if (dfield_is_null(new_val) && !rec_offs_comp(offsets)) { - /* A bug fixed on Dec 31st, 2004: we looked at the - SQL NULL size from the wrong field! We may backport - this fix also to 4.0. The merge to 5.0 will be made - manually immediately after we commit this to 4.1. */ - - new_len = dict_col_get_sql_null_size( - dict_index_get_nth_col(index, - upd_field->field_no), - 0); - } - - old_len = rec_offs_nth_size(offsets, upd_field->field_no); - - if (rec_offs_comp(offsets) - && rec_offs_nth_sql_null(offsets, - upd_field->field_no)) { - /* Note that in the compact table format, for a - variable length field, an SQL NULL will use zero - bytes in the offset array at the start of the physical - record, but a zero-length value (empty string) will - use one byte! Thus, we cannot use update-in-place - if we update an SQL NULL varchar to an empty string! */ - - old_len = UNIV_SQL_NULL; - } - - if (dfield_is_ext(new_val) || old_len != new_len - || rec_offs_nth_extern(offsets, upd_field->field_no)) { - - return(TRUE); - } - } - - return(FALSE); -} - -/***********************************************************//** -Returns true if row update contains disowned external fields. -@return true if the update contains disowned external fields. */ -UNIV_INTERN -bool -row_upd_changes_disowned_external( -/*==============================*/ - const upd_t* update) /*!< in: update vector */ -{ - const upd_field_t* upd_field; - const dfield_t* new_val; - ulint new_len; - ulint n_fields; - ulint i; - - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { - const byte* field_ref; - - upd_field = upd_get_nth_field(update, i); - new_val = &(upd_field->new_val); - new_len = dfield_get_len(new_val); - - if (!dfield_is_ext(new_val)) { - continue; - } - - ut_ad(new_len >= BTR_EXTERN_FIELD_REF_SIZE); - - field_ref = static_cast<const byte*>(dfield_get_data(new_val)) - + new_len - BTR_EXTERN_FIELD_REF_SIZE; - - if (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG) { - return(true); - } - } - - return(false); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Replaces the new column values stored in the update vector to the -record given. No field size changes are allowed. This function is -usually invoked on a clustered index. The only use case for a -secondary index is row_ins_sec_index_entry_by_modify() or its -counterpart in ibuf_insert_to_index_page(). */ -UNIV_INTERN -void -row_upd_rec_in_place( -/*=================*/ - rec_t* rec, /*!< in/out: record where replaced */ - dict_index_t* index, /*!< in: the index the record belongs to */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - const upd_t* update, /*!< in: update vector */ - page_zip_des_t* page_zip)/*!< in: compressed page with enough space - available, or NULL */ -{ - const upd_field_t* upd_field; - const dfield_t* new_val; - ulint n_fields; - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (rec_offs_comp(offsets)) { - rec_set_info_bits_new(rec, update->info_bits); - } else { - rec_set_info_bits_old(rec, update->info_bits); - } - - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { -#ifdef UNIV_BLOB_DEBUG - btr_blob_dbg_t b; - const byte* field_ref = NULL; -#endif /* UNIV_BLOB_DEBUG */ - - upd_field = upd_get_nth_field(update, i); - new_val = &(upd_field->new_val); - ut_ad(!dfield_is_ext(new_val) == - !rec_offs_nth_extern(offsets, upd_field->field_no)); -#ifdef UNIV_BLOB_DEBUG - if (dfield_is_ext(new_val)) { - ulint len; - field_ref = rec_get_nth_field(rec, offsets, i, &len); - ut_a(len != UNIV_SQL_NULL); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - field_ref += len - BTR_EXTERN_FIELD_REF_SIZE; - - b.ref_page_no = page_get_page_no(page_align(rec)); - b.ref_heap_no = page_rec_get_heap_no(rec); - b.ref_field_no = i; - b.blob_page_no = mach_read_from_4( - field_ref + BTR_EXTERN_PAGE_NO); - ut_a(b.ref_field_no >= index->n_uniq); - btr_blob_dbg_rbt_delete(index, &b, "upd_in_place"); - } -#endif /* UNIV_BLOB_DEBUG */ - - rec_set_nth_field(rec, offsets, upd_field->field_no, - dfield_get_data(new_val), - dfield_get_len(new_val)); - -#ifdef UNIV_BLOB_DEBUG - if (dfield_is_ext(new_val)) { - b.blob_page_no = mach_read_from_4( - field_ref + BTR_EXTERN_PAGE_NO); - b.always_owner = b.owner = !(field_ref[BTR_EXTERN_LEN] - & BTR_EXTERN_OWNER_FLAG); - b.del = rec_get_deleted_flag( - rec, rec_offs_comp(offsets)); - - btr_blob_dbg_rbt_insert(index, &b, "upd_in_place"); - } -#endif /* UNIV_BLOB_DEBUG */ - } - - if (page_zip) { - page_zip_write_rec(page_zip, rec, index, offsets, 0); - } -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Writes into the redo log the values of trx id and roll ptr and enough info -to determine their positions within a clustered index record. -@return new pointer to mlog */ -UNIV_INTERN -byte* -row_upd_write_sys_vals_to_log( -/*==========================*/ - dict_index_t* index, /*!< in: clustered index */ - trx_id_t trx_id, /*!< in: transaction id */ - roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */ - byte* log_ptr,/*!< pointer to a buffer of size > 20 opened - in mlog */ - mtr_t* mtr MY_ATTRIBUTE((unused))) /*!< in: mtr */ -{ - ut_ad(dict_index_is_clust(index)); - ut_ad(mtr); - - log_ptr += mach_write_compressed(log_ptr, - dict_index_get_sys_col_pos( - index, DATA_TRX_ID)); - - trx_write_roll_ptr(log_ptr, roll_ptr); - log_ptr += DATA_ROLL_PTR_LEN; - - log_ptr += mach_ull_write_compressed(log_ptr, trx_id); - - return(log_ptr); -} -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Parses the log data of system field values. -@return log data end or NULL */ -UNIV_INTERN -byte* -row_upd_parse_sys_vals( -/*===================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - ulint* pos, /*!< out: TRX_ID position in record */ - trx_id_t* trx_id, /*!< out: trx id */ - roll_ptr_t* roll_ptr)/*!< out: roll ptr */ -{ - ptr = mach_parse_compressed(ptr, end_ptr, pos); - - if (ptr == NULL) { - - return(NULL); - } - - if (end_ptr < ptr + DATA_ROLL_PTR_LEN) { - - return(NULL); - } - - *roll_ptr = trx_read_roll_ptr(ptr); - ptr += DATA_ROLL_PTR_LEN; - - ptr = mach_ull_parse_compressed(ptr, end_ptr, trx_id); - - return(ptr); -} - -#ifndef UNIV_HOTBACKUP -/***********************************************************//** -Writes to the redo log the new values of the fields occurring in the index. */ -UNIV_INTERN -void -row_upd_index_write_log( -/*====================*/ - const upd_t* update, /*!< in: update vector */ - byte* log_ptr,/*!< in: pointer to mlog buffer: must - contain at least MLOG_BUF_MARGIN bytes - of free space; the buffer is closed - within this function */ - mtr_t* mtr) /*!< in: mtr into whose log to write */ -{ - const upd_field_t* upd_field; - const dfield_t* new_val; - ulint len; - ulint n_fields; - byte* buf_end; - ulint i; - - n_fields = upd_get_n_fields(update); - - buf_end = log_ptr + MLOG_BUF_MARGIN; - - mach_write_to_1(log_ptr, update->info_bits); - log_ptr++; - log_ptr += mach_write_compressed(log_ptr, n_fields); - - for (i = 0; i < n_fields; i++) { - -#if MLOG_BUF_MARGIN <= 30 -# error "MLOG_BUF_MARGIN <= 30" -#endif - - if (log_ptr + 30 > buf_end) { - mlog_close(mtr, log_ptr); - - log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN); - buf_end = log_ptr + MLOG_BUF_MARGIN; - } - - upd_field = upd_get_nth_field(update, i); - - new_val = &(upd_field->new_val); - - len = dfield_get_len(new_val); - - log_ptr += mach_write_compressed(log_ptr, upd_field->field_no); - log_ptr += mach_write_compressed(log_ptr, len); - - if (len != UNIV_SQL_NULL) { - if (log_ptr + len < buf_end) { - memcpy(log_ptr, dfield_get_data(new_val), len); - - log_ptr += len; - } else { - mlog_close(mtr, log_ptr); - - mlog_catenate_string( - mtr, - static_cast<byte*>( - dfield_get_data(new_val)), - len); - - log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN); - buf_end = log_ptr + MLOG_BUF_MARGIN; - } - } - } - - mlog_close(mtr, log_ptr); -} -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Parses the log data written by row_upd_index_write_log. -@return log data end or NULL */ -UNIV_INTERN -byte* -row_upd_index_parse( -/*================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - mem_heap_t* heap, /*!< in: memory heap where update vector is - built */ - upd_t** update_out)/*!< out: update vector */ -{ - upd_t* update; - upd_field_t* upd_field; - dfield_t* new_val; - ulint len; - ulint n_fields; - ulint info_bits; - ulint i; - - if (end_ptr < ptr + 1) { - - return(NULL); - } - - info_bits = mach_read_from_1(ptr); - ptr++; - ptr = mach_parse_compressed(ptr, end_ptr, &n_fields); - - if (ptr == NULL) { - - return(NULL); - } - - update = upd_create(n_fields, heap); - update->info_bits = info_bits; - - for (i = 0; i < n_fields; i++) { - ulint field_no; - upd_field = upd_get_nth_field(update, i); - new_val = &(upd_field->new_val); - - ptr = mach_parse_compressed(ptr, end_ptr, &field_no); - - if (ptr == NULL) { - - return(NULL); - } - - upd_field->field_no = field_no; - - ptr = mach_parse_compressed(ptr, end_ptr, &len); - - if (ptr == NULL) { - - return(NULL); - } - - if (len != UNIV_SQL_NULL) { - - if (end_ptr < ptr + len) { - - return(NULL); - } - - dfield_set_data(new_val, - mem_heap_dup(heap, ptr, len), len); - ptr += len; - } else { - dfield_set_null(new_val); - } - } - - *update_out = update; - - return(ptr); -} - -#ifndef UNIV_HOTBACKUP -/***************************************************************//** -Builds an update vector from those fields which in a secondary index entry -differ from a record that has the equal ordering fields. NOTE: we compare -the fields as binary strings! -@return own: update vector of differing fields */ -UNIV_INTERN -upd_t* -row_upd_build_sec_rec_difference_binary( -/*====================================*/ - const rec_t* rec, /*!< in: secondary index record */ - dict_index_t* index, /*!< in: index */ - const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ - const dtuple_t* entry, /*!< in: entry to insert */ - mem_heap_t* heap) /*!< in: memory heap from which allocated */ -{ - upd_field_t* upd_field; - const dfield_t* dfield; - const byte* data; - ulint len; - upd_t* update; - ulint n_diff; - ulint i; - - /* This function is used only for a secondary index */ - ut_a(!dict_index_is_clust(index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(rec_offs_n_fields(offsets) == dtuple_get_n_fields(entry)); - ut_ad(!rec_offs_any_extern(offsets)); - - update = upd_create(dtuple_get_n_fields(entry), heap); - - n_diff = 0; - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - - data = rec_get_nth_field(rec, offsets, i, &len); - - dfield = dtuple_get_nth_field(entry, i); - - /* NOTE that it may be that len != dfield_get_len(dfield) if we - are updating in a character set and collation where strings of - different length can be equal in an alphabetical comparison, - and also in the case where we have a column prefix index - and the last characters in the index field are spaces; the - latter case probably caused the assertion failures reported at - row0upd.cc line 713 in versions 4.0.14 - 4.0.16. */ - - /* NOTE: we compare the fields as binary strings! - (No collation) */ - - if (!dfield_data_is_binary_equal(dfield, len, data)) { - - upd_field = upd_get_nth_field(update, n_diff); - - dfield_copy(&(upd_field->new_val), dfield); - - upd_field_set_field_no(upd_field, i, index, NULL); - - n_diff++; - } - } - - update->n_fields = n_diff; - - return(update); -} - -/***************************************************************//** -Builds an update vector from those fields, excluding the roll ptr and -trx id fields, which in an index entry differ from a record that has -the equal ordering fields. NOTE: we compare the fields as binary strings! -@return own: update vector of differing fields, excluding roll ptr and -trx id */ -UNIV_INTERN -const upd_t* -row_upd_build_difference_binary( -/*============================*/ - dict_index_t* index, /*!< in: clustered index */ - const dtuple_t* entry, /*!< in: entry to insert */ - const rec_t* rec, /*!< in: clustered index record */ - const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */ - bool no_sys, /*!< in: skip the system columns - DB_TRX_ID and DB_ROLL_PTR */ - trx_t* trx, /*!< in: transaction */ - mem_heap_t* heap) /*!< in: memory heap from which allocated */ -{ - upd_field_t* upd_field; - const dfield_t* dfield; - const byte* data; - ulint len; - upd_t* update; - ulint n_diff; - ulint trx_id_pos; - ulint i; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs_init(offsets_); - - /* This function is used only for a clustered index */ - ut_a(dict_index_is_clust(index)); - - update = upd_create(dtuple_get_n_fields(entry), heap); - - n_diff = 0; - - trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - ut_ad(dict_index_get_sys_col_pos(index, DATA_ROLL_PTR) - == trx_id_pos + 1); - - if (!offsets) { - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - } else { - ut_ad(rec_offs_validate(rec, index, offsets)); - } - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - - data = rec_get_nth_field(rec, offsets, i, &len); - - dfield = dtuple_get_nth_field(entry, i); - - /* NOTE: we compare the fields as binary strings! - (No collation) */ - - if (no_sys && (i == trx_id_pos || i == trx_id_pos + 1)) { - - continue; - } - - if (!dfield_is_ext(dfield) - != !rec_offs_nth_extern(offsets, i) - || !dfield_data_is_binary_equal(dfield, len, data)) { - - upd_field = upd_get_nth_field(update, n_diff); - - dfield_copy(&(upd_field->new_val), dfield); - - upd_field_set_field_no(upd_field, i, index, trx); - - n_diff++; - } - } - - update->n_fields = n_diff; - - return(update); -} - -/***********************************************************//** -Fetch a prefix of an externally stored column. This is similar -to row_ext_lookup(), but the row_ext_t holds the old values -of the column and must not be poisoned with the new values. -@return BLOB prefix */ -static -byte* -row_upd_ext_fetch( -/*==============*/ - const byte* data, /*!< in: 'internally' stored part of the - field containing also the reference to - the external part */ - ulint local_len, /*!< in: length of data, in bytes */ - ulint zip_size, /*!< in: nonzero=compressed BLOB - page size, zero for uncompressed - BLOBs */ - ulint* len, /*!< in: length of prefix to fetch; - out: fetched length of the prefix */ - mem_heap_t* heap) /*!< in: heap where to allocate */ -{ - byte* buf = static_cast<byte*>(mem_heap_alloc(heap, *len)); - - *len = btr_copy_externally_stored_field_prefix( - buf, *len, zip_size, data, local_len, NULL); - - /* We should never update records containing a half-deleted BLOB. */ - ut_a(*len); - - return(buf); -} - -/***********************************************************//** -Replaces the new column value stored in the update vector in -the given index entry field. */ -static -void -row_upd_index_replace_new_col_val( -/*==============================*/ - dfield_t* dfield, /*!< in/out: data field - of the index entry */ - const dict_field_t* field, /*!< in: index field */ - const dict_col_t* col, /*!< in: field->col */ - const upd_field_t* uf, /*!< in: update field */ - mem_heap_t* heap, /*!< in: memory heap for allocating - and copying the new value */ - ulint zip_size)/*!< in: compressed page - size of the table, or 0 */ -{ - ulint len; - const byte* data; - - dfield_copy_data(dfield, &uf->new_val); - - if (dfield_is_null(dfield)) { - return; - } - - len = dfield_get_len(dfield); - data = static_cast<const byte*>(dfield_get_data(dfield)); - - if (field->prefix_len > 0) { - ibool fetch_ext = dfield_is_ext(dfield) - && len < (ulint) field->prefix_len - + BTR_EXTERN_FIELD_REF_SIZE; - - if (fetch_ext) { - ulint l = len; - - len = field->prefix_len; - - data = row_upd_ext_fetch(data, l, zip_size, - &len, heap); - } - - len = dtype_get_at_most_n_mbchars(col->prtype, - col->mbminmaxlen, - field->prefix_len, len, - (const char*) data); - - dfield_set_data(dfield, data, len); - - if (!fetch_ext) { - dfield_dup(dfield, heap); - } - - return; - } - - switch (uf->orig_len) { - byte* buf; - case BTR_EXTERN_FIELD_REF_SIZE: - /* Restore the original locally stored - part of the column. In the undo log, - InnoDB writes a longer prefix of externally - stored columns, so that column prefixes - in secondary indexes can be reconstructed. */ - dfield_set_data(dfield, - data + len - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - dfield_set_ext(dfield); - /* fall through */ - case 0: - dfield_dup(dfield, heap); - break; - default: - /* Reconstruct the original locally - stored part of the column. The data - will have to be copied. */ - ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE); - buf = static_cast<byte*>(mem_heap_alloc(heap, uf->orig_len)); - - /* Copy the locally stored prefix. */ - memcpy(buf, data, - uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE); - - /* Copy the BLOB pointer. */ - memcpy(buf + uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE, - data + len - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - - dfield_set_data(dfield, buf, uf->orig_len); - dfield_set_ext(dfield); - break; - } -} - -/***********************************************************//** -Replaces the new column values stored in the update vector to the index entry -given. */ -UNIV_INTERN -void -row_upd_index_replace_new_col_vals_index_pos( -/*=========================================*/ - dtuple_t* entry, /*!< in/out: index entry where replaced; - the clustered index record must be - covered by a lock or a page latch to - prevent deletion (rollback or purge) */ - dict_index_t* index, /*!< in: index; NOTE that this may also be a - non-clustered index */ - const upd_t* update, /*!< in: an update vector built for the index so - that the field number in an upd_field is the - index position */ - ibool order_only, - /*!< in: if TRUE, limit the replacement to - ordering fields of index; note that this - does not work for non-clustered indexes. */ - mem_heap_t* heap) /*!< in: memory heap for allocating and - copying the new values */ -{ - ulint i; - ulint n_fields; - const ulint zip_size = dict_table_zip_size(index->table); - - dtuple_set_info_bits(entry, update->info_bits); - - if (order_only) { - n_fields = dict_index_get_n_unique(index); - } else { - n_fields = dict_index_get_n_fields(index); - } - - for (i = 0; i < n_fields; i++) { - const dict_field_t* field; - const dict_col_t* col; - const upd_field_t* uf; - - field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(field); - uf = upd_get_field_by_field_no(update, i); - - if (uf) { - row_upd_index_replace_new_col_val( - dtuple_get_nth_field(entry, i), - field, col, uf, heap, zip_size); - } - } -} - -/***********************************************************//** -Replaces the new column values stored in the update vector to the index entry -given. */ -UNIV_INTERN -void -row_upd_index_replace_new_col_vals( -/*===============================*/ - dtuple_t* entry, /*!< in/out: index entry where replaced; - the clustered index record must be - covered by a lock or a page latch to - prevent deletion (rollback or purge) */ - dict_index_t* index, /*!< in: index; NOTE that this may also be a - non-clustered index */ - const upd_t* update, /*!< in: an update vector built for the - CLUSTERED index so that the field number in - an upd_field is the clustered index position */ - mem_heap_t* heap) /*!< in: memory heap for allocating and - copying the new values */ -{ - ulint i; - const dict_index_t* clust_index - = dict_table_get_first_index(index->table); - const ulint zip_size - = dict_table_zip_size(index->table); - - dtuple_set_info_bits(entry, update->info_bits); - - for (i = 0; i < dict_index_get_n_fields(index); i++) { - const dict_field_t* field; - const dict_col_t* col; - const upd_field_t* uf; - - field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(field); - uf = upd_get_field_by_field_no( - update, dict_col_get_clust_pos(col, clust_index)); - - if (uf) { - row_upd_index_replace_new_col_val( - dtuple_get_nth_field(entry, i), - field, col, uf, heap, zip_size); - } - } -} - -/***********************************************************//** -Replaces the new column values stored in the update vector. */ -UNIV_INTERN -void -row_upd_replace( -/*============*/ - dtuple_t* row, /*!< in/out: row where replaced, - indexed by col_no; - the clustered index record must be - covered by a lock or a page latch to - prevent deletion (rollback or purge) */ - row_ext_t** ext, /*!< out, own: NULL, or externally - stored column prefixes */ - const dict_index_t* index, /*!< in: clustered index */ - const upd_t* update, /*!< in: an update vector built for the - clustered index */ - mem_heap_t* heap) /*!< in: memory heap */ -{ - ulint col_no; - ulint i; - ulint n_cols; - ulint n_ext_cols; - ulint* ext_cols; - const dict_table_t* table; - - ut_ad(row); - ut_ad(ext); - ut_ad(index); - ut_ad(dict_index_is_clust(index)); - ut_ad(update); - ut_ad(heap); - - n_cols = dtuple_get_n_fields(row); - table = index->table; - ut_ad(n_cols == dict_table_get_n_cols(table)); - - ext_cols = static_cast<ulint*>( - mem_heap_alloc(heap, n_cols * sizeof *ext_cols)); - - n_ext_cols = 0; - - dtuple_set_info_bits(row, update->info_bits); - - for (col_no = 0; col_no < n_cols; col_no++) { - - const dict_col_t* col - = dict_table_get_nth_col(table, col_no); - const ulint clust_pos - = dict_col_get_clust_pos(col, index); - dfield_t* dfield; - - if (UNIV_UNLIKELY(clust_pos == ULINT_UNDEFINED)) { - - continue; - } - - dfield = dtuple_get_nth_field(row, col_no); - - for (i = 0; i < upd_get_n_fields(update); i++) { - - const upd_field_t* upd_field - = upd_get_nth_field(update, i); - - if (upd_field->field_no != clust_pos) { - - continue; - } - - dfield_copy_data(dfield, &upd_field->new_val); - break; - } - - if (dfield_is_ext(dfield) && col->ord_part) { - ext_cols[n_ext_cols++] = col_no; - } - } - - if (n_ext_cols) { - *ext = row_ext_create(n_ext_cols, ext_cols, table->flags, row, - heap); - } else { - *ext = NULL; - } -} - -/***********************************************************//** -Checks if an update vector changes an ordering field of an index record. - -This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. -NOTE: we compare the fields as binary strings! -@return TRUE if update vector changes an ordering field in the index record */ -UNIV_INTERN -ibool -row_upd_changes_ord_field_binary_func( -/*==================================*/ - dict_index_t* index, /*!< in: index of the record */ - const upd_t* update, /*!< in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ -#ifdef UNIV_DEBUG - const que_thr_t*thr, /*!< in: query thread */ -#endif /* UNIV_DEBUG */ - const dtuple_t* row, /*!< in: old value of row, or NULL if the - row and the data values in update are not - known when this function is called, e.g., at - compile time */ - const row_ext_t*ext) /*!< NULL, or prefixes of the externally - stored columns in the old row */ -{ - ulint n_unique; - ulint i; - const dict_index_t* clust_index; - - ut_ad(thr); - ut_ad(thr->graph); - ut_ad(thr->graph->trx); - - n_unique = dict_index_get_n_unique(index); - - clust_index = dict_table_get_first_index(index->table); - - for (i = 0; i < n_unique; i++) { - - const dict_field_t* ind_field; - const dict_col_t* col; - ulint col_no; - const upd_field_t* upd_field; - const dfield_t* dfield; - dfield_t dfield_ext; - ulint dfield_len= 0; - const byte* buf; - - ind_field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(ind_field); - col_no = dict_col_get_no(col); - - upd_field = upd_get_field_by_field_no( - update, dict_col_get_clust_pos(col, clust_index)); - - if (upd_field == NULL) { - continue; - } - - if (row == NULL) { - ut_ad(ext == NULL); - return(TRUE); - } - - dfield = dtuple_get_nth_field(row, col_no); - - /* This treatment of column prefix indexes is loosely - based on row_build_index_entry(). */ - - if (UNIV_LIKELY(ind_field->prefix_len == 0) - || dfield_is_null(dfield)) { - /* do nothing special */ - } else if (ext) { - /* Silence a compiler warning without - silencing a Valgrind error. */ - dfield_len = 0; - UNIV_MEM_INVALID(&dfield_len, sizeof dfield_len); - /* See if the column is stored externally. */ - buf = row_ext_lookup(ext, col_no, &dfield_len); - - ut_ad(col->ord_part); - - if (UNIV_LIKELY_NULL(buf)) { - if (UNIV_UNLIKELY(buf == field_ref_zero)) { - /* The externally stored field - was not written yet. This - record should only be seen by - recv_recovery_rollback_active(), - when the server had crashed before - storing the field. */ - ut_ad(thr->graph->trx->is_recovered); - ut_ad(trx_is_recv(thr->graph->trx)); - return(TRUE); - } - - goto copy_dfield; - } - } else if (dfield_is_ext(dfield)) { - dfield_len = dfield_get_len(dfield); - ut_a(dfield_len > BTR_EXTERN_FIELD_REF_SIZE); - dfield_len -= BTR_EXTERN_FIELD_REF_SIZE; - ut_a(dict_index_is_clust(index) - || ind_field->prefix_len <= dfield_len); - - buf = static_cast<byte*>(dfield_get_data(dfield)); -copy_dfield: - ut_a(dfield_len > 0); - dfield_copy(&dfield_ext, dfield); - dfield_set_data(&dfield_ext, buf, dfield_len); - dfield = &dfield_ext; - } - - if (!dfield_datas_are_binary_equal( - dfield, &upd_field->new_val, - ind_field->prefix_len)) { - - return(TRUE); - } - } - - return(FALSE); -} - -/***********************************************************//** -Checks if an update vector changes an ordering field of an index record. -NOTE: we compare the fields as binary strings! -@return TRUE if update vector may change an ordering field in an index -record */ -UNIV_INTERN -ibool -row_upd_changes_some_index_ord_field_binary( -/*========================================*/ - const dict_table_t* table, /*!< in: table */ - const upd_t* update) /*!< in: update vector for the row */ -{ - upd_field_t* upd_field; - dict_index_t* index; - ulint i; - - index = dict_table_get_first_index(table); - - for (i = 0; i < upd_get_n_fields(update); i++) { - - upd_field = upd_get_nth_field(update, i); - - if (dict_field_get_col(dict_index_get_nth_field( - index, upd_field->field_no)) - ->ord_part) { - - return(TRUE); - } - } - - return(FALSE); -} - -/***********************************************************//** -Checks if an FTS Doc ID column is affected by an UPDATE. -@return whether the Doc ID column is changed */ -UNIV_INTERN -bool -row_upd_changes_doc_id( -/*===================*/ - dict_table_t* table, /*!< in: table */ - upd_field_t* upd_field) /*!< in: field to check */ -{ - ulint col_no; - dict_index_t* clust_index; - fts_t* fts = table->fts; - - clust_index = dict_table_get_first_index(table); - - /* Convert from index-specific column number to table-global - column number. */ - col_no = dict_index_get_nth_col_no(clust_index, upd_field->field_no); - - return(col_no == fts->doc_col); -} -/***********************************************************//** -Checks if an FTS indexed column is affected by an UPDATE. -@return offset within fts_t::indexes if FTS indexed column updated else -ULINT_UNDEFINED */ -UNIV_INTERN -ulint -row_upd_changes_fts_column( -/*=======================*/ - dict_table_t* table, /*!< in: table */ - upd_field_t* upd_field) /*!< in: field to check */ -{ - ulint col_no; - dict_index_t* clust_index; - fts_t* fts = table->fts; - - clust_index = dict_table_get_first_index(table); - - /* Convert from index-specific column number to table-global - column number. */ - col_no = dict_index_get_nth_col_no(clust_index, upd_field->field_no); - - return(dict_table_is_fts_column(fts->indexes, col_no)); -} - -/***********************************************************//** -Checks if an update vector changes some of the first ordering fields of an -index record. This is only used in foreign key checks and we can assume -that index does not contain column prefixes. -@return TRUE if changes */ -static -ibool -row_upd_changes_first_fields_binary( -/*================================*/ - dtuple_t* entry, /*!< in: index entry */ - dict_index_t* index, /*!< in: index of entry */ - const upd_t* update, /*!< in: update vector for the row */ - ulint n) /*!< in: how many first fields to check */ -{ - ulint n_upd_fields; - ulint i, j; - dict_index_t* clust_index; - - ut_ad(update && index); - ut_ad(n <= dict_index_get_n_fields(index)); - - n_upd_fields = upd_get_n_fields(update); - clust_index = dict_table_get_first_index(index->table); - - for (i = 0; i < n; i++) { - - const dict_field_t* ind_field; - const dict_col_t* col; - ulint col_pos; - - ind_field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(ind_field); - col_pos = dict_col_get_clust_pos(col, clust_index); - - ut_a(ind_field->prefix_len == 0); - - for (j = 0; j < n_upd_fields; j++) { - - upd_field_t* upd_field - = upd_get_nth_field(update, j); - - if (col_pos == upd_field->field_no - && !dfield_datas_are_binary_equal( - dtuple_get_nth_field(entry, i), - &upd_field->new_val, 0)) { - - return(TRUE); - } - } - } - - return(FALSE); -} - -/*********************************************************************//** -Copies the column values from a record. */ -UNIV_INLINE -void -row_upd_copy_columns( -/*=================*/ - rec_t* rec, /*!< in: record in a clustered index */ - const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ - sym_node_t* column) /*!< in: first column in a column list, or - NULL */ -{ - byte* data; - ulint len; - - while (column) { - data = rec_get_nth_field(rec, offsets, - column->field_nos[SYM_CLUST_FIELD_NO], - &len); - eval_node_copy_and_alloc_val(column, data, len); - - column = UT_LIST_GET_NEXT(col_var_list, column); - } -} - -/*********************************************************************//** -Calculates the new values for fields to update. Note that row_upd_copy_columns -must have been called first. */ -UNIV_INLINE -void -row_upd_eval_new_vals( -/*==================*/ - upd_t* update) /*!< in/out: update vector */ -{ - que_node_t* exp; - upd_field_t* upd_field; - ulint n_fields; - ulint i; - - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { - upd_field = upd_get_nth_field(update, i); - - exp = upd_field->exp; - - eval_exp(exp); - - dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp)); - } -} - -/***********************************************************//** -Stores to the heap the row on which the node->pcur is positioned. */ -static -void -row_upd_store_row( -/*==============*/ - upd_node_t* node) /*!< in: row update node */ -{ - dict_index_t* clust_index; - rec_t* rec; - mem_heap_t* heap = NULL; - row_ext_t** ext; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - rec_offs_init(offsets_); - - ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES); - - if (node->row != NULL) { - mem_heap_empty(node->heap); - } - - clust_index = dict_table_get_first_index(node->table); - - rec = btr_pcur_get_rec(node->pcur); - - offsets = rec_get_offsets(rec, clust_index, offsets_, - ULINT_UNDEFINED, &heap); - - if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) { - /* In DYNAMIC or COMPRESSED format, there is no prefix - of externally stored columns in the clustered index - record. Build a cache of column prefixes. */ - ext = &node->ext; - } else { - /* REDUNDANT and COMPACT formats store a local - 768-byte prefix of each externally stored column. - No cache is needed. */ - ext = NULL; - node->ext = NULL; - } - - node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, - NULL, NULL, NULL, ext, node->heap); - if (node->is_delete) { - node->upd_row = NULL; - node->upd_ext = NULL; - } else { - node->upd_row = dtuple_copy(node->row, node->heap); - row_upd_replace(node->upd_row, &node->upd_ext, - clust_index, node->update, node->heap); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/***********************************************************//** -Updates a secondary index entry of a row. -@return DB_SUCCESS if operation successfully completed, else error -code or DB_LOCK_WAIT */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_upd_sec_index_entry( -/*====================*/ - upd_node_t* node, /*!< in: row update node */ - que_thr_t* thr) /*!< in: query thread */ -{ - mtr_t mtr; - const rec_t* rec; - btr_pcur_t pcur; - mem_heap_t* heap; - dtuple_t* entry; - dict_index_t* index; - btr_cur_t* btr_cur; - ibool referenced; - dberr_t err = DB_SUCCESS; - trx_t* trx = thr_get_trx(thr); - ulint mode; - enum row_search_result search_result; - - ut_ad(trx->id); - - index = node->index; - - referenced = row_upd_index_is_referenced(index, trx); -#ifdef WITH_WSREP - ibool foreign = wsrep_row_upd_index_is_foreign(index, trx); -#endif /* WITH_WSREP */ - - heap = mem_heap_create(1024); - - /* Build old index entry */ - entry = row_build_index_entry(node->row, node->ext, index, heap); - ut_a(entry); - - log_free_check(); - -#ifdef UNIV_DEBUG - /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC(). - Once it is fixed, remove the 'ifdef', 'if' and this comment. */ - if (!trx->ddl) { - DEBUG_SYNC_C_IF_THD(trx->mysql_thd, - "before_row_upd_sec_index_entry"); - } -#endif /* UNIV_DEBUG */ - - mtr_start_trx(&mtr, trx); - - if (*index->name == TEMP_INDEX_PREFIX) { - /* The index->online_status may change if the - index->name starts with TEMP_INDEX_PREFIX (meaning - that the index is or was being created online). It is - protected by index->lock. */ - - mtr_s_lock(dict_index_get_lock(index), &mtr); - - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_COMPLETE: - /* This is a normal index. Do not log anything. - Perform the update on the index tree directly. */ - break; - case ONLINE_INDEX_CREATION: - /* Log a DELETE and optionally INSERT. */ - row_log_online_op(index, entry, 0); - - if (!node->is_delete) { - mem_heap_empty(heap); - entry = row_build_index_entry( - node->upd_row, node->upd_ext, - index, heap); - ut_a(entry); - row_log_online_op(index, entry, trx->id); - } - /* fall through */ - case ONLINE_INDEX_ABORTED: - case ONLINE_INDEX_ABORTED_DROPPED: - mtr_commit(&mtr); - goto func_exit; - } - - /* We can only buffer delete-mark operations if there - are no foreign key constraints referring to the index. */ - mode = referenced - ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED - : BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED - | BTR_DELETE_MARK; - } else { - /* For secondary indexes, - index->online_status==ONLINE_INDEX_CREATION unless - index->name starts with TEMP_INDEX_PREFIX. */ - ut_ad(!dict_index_is_online_ddl(index)); - - /* We can only buffer delete-mark operations if there - are no foreign key constraints referring to the index. */ - mode = referenced - ? BTR_MODIFY_LEAF - : BTR_MODIFY_LEAF | BTR_DELETE_MARK; - } - - /* Set the query thread, so that ibuf_insert_low() will be - able to invoke thd_get_trx(). */ - btr_pcur_get_btr_cur(&pcur)->thr = thr; - - search_result = row_search_index_entry(index, entry, - UNIV_UNLIKELY(trx->fake_changes) - ? BTR_SEARCH_LEAF - : (btr_latch_mode)mode, - &pcur, &mtr); - - btr_cur = btr_pcur_get_btr_cur(&pcur); - - rec = btr_cur_get_rec(btr_cur); - - switch (search_result) { - case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */ - ut_error; - break; - case ROW_BUFFERED: - /* Entry was delete marked already. */ - break; - - case ROW_NOT_FOUND: - if (*index->name == TEMP_INDEX_PREFIX) { - /* When online CREATE INDEX copied the update - that we already made to the clustered index, - and completed the secondary index creation - before we got here, the old secondary index - record would not exist. The CREATE INDEX - should be waiting for a MySQL meta-data lock - upgrade at least until this UPDATE - returns. After that point, the - TEMP_INDEX_PREFIX would be dropped from the - index name in commit_inplace_alter_table(). */ - break; - } - - fputs("InnoDB: error in sec index entry update in\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - fputs("\n" - "InnoDB: tuple ", stderr); - dtuple_print(stderr, entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, rec, index); - putc('\n', stderr); - trx_print(stderr, trx, 0); - fputs("\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - ut_ad(0); - break; - case ROW_FOUND: - /* Delete mark the old index record; it can already be - delete marked if we return after a lock wait in - row_ins_sec_index_entry() below */ - if (!rec_get_deleted_flag( - rec, dict_table_is_comp(index->table))) { -#ifdef WITH_WSREP - que_node_t *parent = que_node_get_parent(node); -#endif /* WITH_WSREP */ - err = btr_cur_del_mark_set_sec_rec( - 0, btr_cur, TRUE, thr, &mtr); - - if (err == DB_SUCCESS && referenced) { - - ulint* offsets; - - offsets = rec_get_offsets( - rec, index, NULL, ULINT_UNDEFINED, - &heap); - - /* NOTE that the following call loses - the position of pcur ! */ - err = row_upd_check_references_constraints( - node, &pcur, index->table, - index, offsets, thr, &mtr); - } -#ifdef WITH_WSREP - if (err == DB_SUCCESS && !referenced && - !(parent && que_node_get_type(parent) == - QUE_NODE_UPDATE && - ((upd_node_t*)parent)->cascade_node == node) && - foreign - ) { - ulint* offsets = - rec_get_offsets( - rec, index, NULL, ULINT_UNDEFINED, - &heap); - err = wsrep_row_upd_check_foreign_constraints( - node, &pcur, index->table, - index, offsets, thr, &mtr); - switch (err) { - case DB_SUCCESS: - case DB_NO_REFERENCED_ROW: - err = DB_SUCCESS; - break; - case DB_DEADLOCK: - if (wsrep_debug) fprintf (stderr, - "WSREP: sec index FK check fail for deadlock"); - break; - default: - fprintf (stderr, - "WSREP: referenced FK check fail: %d", - (int)err); - break; - } - } -#endif /* WITH_WSREP */ - } - break; - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - if (node->is_delete || err != DB_SUCCESS) { - - goto func_exit; - } - - mem_heap_empty(heap); - - /* Build a new index entry */ - entry = row_build_index_entry(node->upd_row, node->upd_ext, - index, heap); - ut_a(entry); - - /* Insert new index entry */ - err = row_ins_sec_index_entry(index, entry, thr); - -func_exit: - mem_heap_free(heap); - - return(err); -} - -/***********************************************************//** -Updates the secondary index record if it is changed in the row update or -deletes it if this is a delete. -@return DB_SUCCESS if operation successfully completed, else error -code or DB_LOCK_WAIT */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_upd_sec_step( -/*=============*/ - upd_node_t* node, /*!< in: row update node */ - que_thr_t* thr) /*!< in: query thread */ -{ - ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC) - || (node->state == UPD_NODE_UPDATE_SOME_SEC)); - ut_ad(!dict_index_is_clust(node->index)); - - if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field_binary(node->index, node->update, - thr, node->row, node->ext)) { - return(row_upd_sec_index_entry(node, thr)); - } - - return(DB_SUCCESS); -} - -#ifdef UNIV_DEBUG -# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ - row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update) -#else /* UNIV_DEBUG */ -# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \ - row_upd_clust_rec_by_insert_inherit_func(entry,update) -#endif /* UNIV_DEBUG */ -/*******************************************************************//** -Mark non-updated off-page columns inherited when the primary key is -updated. We must mark them as inherited in entry, so that they are not -freed in a rollback. A limited version of this function used to be -called btr_cur_mark_dtuple_inherited_extern(). -@return TRUE if any columns were inherited */ -static MY_ATTRIBUTE((warn_unused_result)) -ibool -row_upd_clust_rec_by_insert_inherit_func( -/*=====================================*/ -#ifdef UNIV_DEBUG - const rec_t* rec, /*!< in: old record, or NULL */ - const ulint* offsets,/*!< in: rec_get_offsets(rec), or NULL */ -#endif /* UNIV_DEBUG */ - dtuple_t* entry, /*!< in/out: updated entry to be - inserted into the clustered index */ - const upd_t* update) /*!< in: update vector */ -{ - ibool inherit = FALSE; - ulint i; - - ut_ad(!rec == !offsets); - ut_ad(!rec || rec_offs_any_extern(offsets)); - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - dfield_t* dfield = dtuple_get_nth_field(entry, i); - byte* data; - ulint len; - - ut_ad(!offsets - || !rec_offs_nth_extern(offsets, i) - == !dfield_is_ext(dfield) - || upd_get_field_by_field_no(update, i)); - if (!dfield_is_ext(dfield) - || upd_get_field_by_field_no(update, i)) { - continue; - } - -#ifdef UNIV_DEBUG - if (UNIV_LIKELY(rec != NULL)) { - const byte* rec_data - = rec_get_nth_field(rec, offsets, i, &len); - ut_ad(len == dfield_get_len(dfield)); - ut_ad(len != UNIV_SQL_NULL); - ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE); - - rec_data += len - BTR_EXTERN_FIELD_REF_SIZE; - - /* The pointer must not be zero. */ - ut_ad(memcmp(rec_data, field_ref_zero, - BTR_EXTERN_FIELD_REF_SIZE)); - /* The BLOB must be owned. */ - ut_ad(!(rec_data[BTR_EXTERN_LEN] - & BTR_EXTERN_OWNER_FLAG)); - } -#endif /* UNIV_DEBUG */ - - len = dfield_get_len(dfield); - ut_a(len != UNIV_SQL_NULL); - ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); - - data = static_cast<byte*>(dfield_get_data(dfield)); - - data += len - BTR_EXTERN_FIELD_REF_SIZE; - /* The pointer must not be zero. */ - ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)); - data[BTR_EXTERN_LEN] &= ~BTR_EXTERN_OWNER_FLAG; - data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG; - /* The BTR_EXTERN_INHERITED_FLAG only matters in - rollback. Purge will always free the extern fields of - a delete-marked row. */ - - inherit = TRUE; - } - - return(inherit); -} - -/***********************************************************//** -Marks the clustered index record deleted and inserts the updated version -of the record to the index. This function should be used when the ordering -fields of the clustered index record change. This should be quite rare in -database applications. -@return DB_SUCCESS if operation successfully completed, else error -code or DB_LOCK_WAIT */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_upd_clust_rec_by_insert( -/*========================*/ - upd_node_t* node, /*!< in/out: row update node */ - dict_index_t* index, /*!< in: clustered index of the record */ - que_thr_t* thr, /*!< in: query thread */ - ibool referenced,/*!< in: TRUE if index may be referenced in - a foreign key constraint */ -#ifdef WITH_WSREP - ibool foreign, /*!< in: TRUE if index is foreign key index */ -#endif /* WITH_WSREP */ - mtr_t* mtr) /*!< in/out: mtr; gets committed here */ -{ - mem_heap_t* heap; - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - trx_t* trx; - dict_table_t* table; - dtuple_t* entry; - dberr_t err; - ibool change_ownership = FALSE; - rec_t* rec; - ulint* offsets = NULL; - -#ifdef WITH_WSREP - que_node_t *parent = que_node_get_parent(node); -#endif /* WITH_WSREP */ - ut_ad(node); - ut_ad(dict_index_is_clust(index)); - - trx = thr_get_trx(thr); - table = node->table; - pcur = node->pcur; - btr_cur = btr_pcur_get_btr_cur(pcur); - - heap = mem_heap_create(1000); - - entry = row_build_index_entry(node->upd_row, node->upd_ext, - index, heap); - ut_a(entry); - - row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); - - switch (node->state) { - default: - ut_error; - case UPD_NODE_INSERT_BLOB: - /* A lock wait occurred in row_ins_clust_index_entry() in - the previous invocation of this function. Mark the - off-page columns in the entry inherited. */ - - if (UNIV_LIKELY(!trx->fake_changes)) { - change_ownership = row_upd_clust_rec_by_insert_inherit( - NULL, NULL, entry, node->update); - ut_a(change_ownership); - } - /* fall through */ - case UPD_NODE_INSERT_CLUSTERED: - /* A lock wait occurred in row_ins_clust_index_entry() in - the previous invocation of this function. */ - break; - case UPD_NODE_UPDATE_CLUSTERED: - /* This is the first invocation of the function where - we update the primary key. Delete-mark the old record - in the clustered index and prepare to insert a new entry. */ - rec = btr_cur_get_rec(btr_cur); - offsets = rec_get_offsets(rec, index, NULL, - ULINT_UNDEFINED, &heap); - ut_ad(page_rec_is_user_rec(rec)); - - err = btr_cur_del_mark_set_clust_rec( - btr_cur_get_block(btr_cur), rec, index, offsets, - thr, mtr); - if (err != DB_SUCCESS) { -err_exit: - mtr_commit(mtr); - mem_heap_free(heap); - return(err); - } - - /* If the the new row inherits externally stored - fields (off-page columns a.k.a. BLOBs) from the - delete-marked old record, mark them disowned by the - old record and owned by the new entry. */ - - if (rec_offs_any_extern(offsets) - && UNIV_LIKELY(!(trx->fake_changes))) { - change_ownership = row_upd_clust_rec_by_insert_inherit( - rec, offsets, entry, node->update); - - if (change_ownership) { - /* The blobs are disowned here, expecting the - insert down below to inherit them. But if the - insert fails, then this disown will be undone - when the operation is rolled back. */ - btr_cur_disown_inherited_fields( - btr_cur_get_page_zip(btr_cur), - rec, index, offsets, node->update, mtr); - } - } - - if (referenced) { - /* NOTE that the following call loses - the position of pcur ! */ - - err = row_upd_check_references_constraints( - node, pcur, table, index, offsets, thr, mtr); - - if (err != DB_SUCCESS) { - goto err_exit; - } - } -#ifdef WITH_WSREP - if (!referenced && - !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE && - ((upd_node_t*)parent)->cascade_node == node) && - foreign - ) { - err = wsrep_row_upd_check_foreign_constraints( - node, pcur, table, index, offsets, thr, mtr); - switch (err) { - case DB_SUCCESS: - case DB_NO_REFERENCED_ROW: - err = DB_SUCCESS; - break; - case DB_DEADLOCK: - if (wsrep_debug) fprintf (stderr, - "WSREP: insert FK check fail for deadlock"); - break; - default: - fprintf (stderr, - "WSREP: referenced FK check fail: %d", - (int)err); - break; - } - if (err != DB_SUCCESS) { - goto err_exit; - } - } -#endif /* WITH_WSREP */ - } - - mtr_commit(mtr); - - err = row_ins_clust_index_entry( - index, entry, thr, - node->upd_ext ? node->upd_ext->n_ext : 0); - node->state = change_ownership - ? UPD_NODE_INSERT_BLOB - : UPD_NODE_INSERT_CLUSTERED; - - mem_heap_free(heap); - - return(err); -} - -/***********************************************************//** -Updates a clustered index record of a row when the ordering fields do -not change. -@return DB_SUCCESS if operation successfully completed, else error -code or DB_LOCK_WAIT */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_upd_clust_rec( -/*==============*/ - upd_node_t* node, /*!< in: row update node */ - dict_index_t* index, /*!< in: clustered index */ - ulint* offsets,/*!< in: rec_get_offsets() on node->pcur */ - mem_heap_t** offsets_heap, - /*!< in/out: memory heap, can be emptied */ - que_thr_t* thr, /*!< in: query thread */ - mtr_t* mtr) /*!< in: mtr; gets committed here */ -{ - mem_heap_t* heap = NULL; - big_rec_t* big_rec = NULL; - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - dberr_t err; - const dtuple_t* rebuilt_old_pk = NULL; - - ut_ad(node); - ut_ad(dict_index_is_clust(index)); - - pcur = node->pcur; - btr_cur = btr_pcur_get_btr_cur(pcur); - - ut_ad(btr_cur_get_index(btr_cur) == index); - ut_ad(!rec_get_deleted_flag(btr_cur_get_rec(btr_cur), - dict_table_is_comp(index->table))); - ut_ad(rec_offs_validate(btr_cur_get_rec(btr_cur), index, offsets)); - - if (dict_index_is_online_ddl(index)) { - rebuilt_old_pk = row_log_table_get_pk( - btr_cur_get_rec(btr_cur), index, offsets, NULL, &heap); - } - - /* Try optimistic updating of the record, keeping changes within - the page; we do not check locks because we assume the x-lock on the - record to update */ - - if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) { - err = btr_cur_update_in_place( - BTR_NO_LOCKING_FLAG, btr_cur, - offsets, node->update, - node->cmpl_info, thr, thr_get_trx(thr)->id, mtr); - } else { - err = btr_cur_optimistic_update( - BTR_NO_LOCKING_FLAG, btr_cur, - &offsets, offsets_heap, node->update, - node->cmpl_info, thr, thr_get_trx(thr)->id, mtr); - } - - if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) { - row_log_table_update(btr_cur_get_rec(btr_cur), - index, offsets, rebuilt_old_pk); - } - - mtr_commit(mtr); - - if (UNIV_LIKELY(err == DB_SUCCESS)) { - - goto func_exit; - } - - if (buf_LRU_buf_pool_running_out()) { - - err = DB_LOCK_TABLE_FULL; - goto func_exit; - } - /* We may have to modify the tree structure: do a pessimistic descent - down the index tree */ - - mtr_start_trx(mtr, thr_get_trx(thr)); - - /* NOTE: this transaction has an s-lock or x-lock on the record and - therefore other transactions cannot modify the record when we have no - latch on the page. In addition, we assume that other query threads of - the same transaction do not modify the record in the meantime. - Therefore we can assert that the restoration of the cursor succeeds. */ - - ut_a(btr_pcur_restore_position( - UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes) - ? BTR_SEARCH_TREE : BTR_MODIFY_TREE, - pcur, mtr)); - - ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), - dict_table_is_comp(index->table))); - - if (!heap) { - heap = mem_heap_create(1024); - } - - err = btr_cur_pessimistic_update( - BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur, - &offsets, offsets_heap, heap, &big_rec, - node->update, node->cmpl_info, - thr, thr_get_trx(thr)->id, mtr); - if (big_rec && UNIV_LIKELY(!(thr_get_trx(thr)->fake_changes))) { - ut_a(err == DB_SUCCESS); - /* Write out the externally stored - columns while still x-latching - index->lock and block->lock. Allocate - pages for big_rec in the mtr that - modified the B-tree, but be sure to skip - any pages that were freed in mtr. We will - write out the big_rec pages before - committing the B-tree mini-transaction. If - the system crashes so that crash recovery - will not replay the mtr_commit(&mtr), the - big_rec pages will be left orphaned until - the pages are allocated for something else. - - TODO: If the allocation extends the tablespace, it - will not be redo logged, in either mini-transaction. - Tablespace extension should be redo-logged in the - big_rec mini-transaction, so that recovery will not - fail when the big_rec was written to the extended - portion of the file, in case the file was somehow - truncated in the crash. */ - - DEBUG_SYNC_C("before_row_upd_extern"); - err = btr_store_big_rec_extern_fields( - index, btr_cur_get_block(btr_cur), - btr_cur_get_rec(btr_cur), offsets, - big_rec, mtr, BTR_STORE_UPDATE); - DEBUG_SYNC_C("after_row_upd_extern"); - /* If writing big_rec fails (for example, because of - DB_OUT_OF_FILE_SPACE), the record will be corrupted. - Even if we did not update any externally stored - columns, our update could cause the record to grow so - that a non-updated column was selected for external - storage. This non-update would not have been written - to the undo log, and thus the record cannot be rolled - back. - - However, because we have not executed mtr_commit(mtr) - yet, the update will not be replayed in crash - recovery, and the following assertion failure will - effectively "roll back" the operation. */ - ut_a(err == DB_SUCCESS); - } - - if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) { - row_log_table_update(btr_cur_get_rec(btr_cur), - index, offsets, rebuilt_old_pk); - } - - mtr_commit(mtr); -func_exit: - if (heap) { - mem_heap_free(heap); - } - - if (big_rec) { - dtuple_big_rec_free(big_rec); - } - - return(err); -} - -/***********************************************************//** -Delete marks a clustered index record. -@return DB_SUCCESS if operation successfully completed, else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_upd_del_mark_clust_rec( -/*=======================*/ - upd_node_t* node, /*!< in: row update node */ - dict_index_t* index, /*!< in: clustered index */ - ulint* offsets,/*!< in/out: rec_get_offsets() for the - record under the cursor */ - que_thr_t* thr, /*!< in: query thread */ - ibool referenced, - /*!< in: TRUE if index may be referenced in - a foreign key constraint */ -#ifdef WITH_WSREP - ibool foreign,/*!< in: TRUE if index is foreign key index */ -#endif /* WITH_WSREP */ - mtr_t* mtr) /*!< in: mtr; gets committed here */ -{ - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - dberr_t err; -#ifdef WITH_WSREP - rec_t* rec; - que_node_t *parent = que_node_get_parent(node); -#endif /* WITH_WSREP */ - - ut_ad(node); - ut_ad(dict_index_is_clust(index)); - ut_ad(node->is_delete); - - pcur = node->pcur; - btr_cur = btr_pcur_get_btr_cur(pcur); - - /* Store row because we have to build also the secondary index - entries */ - - row_upd_store_row(node); - - /* Mark the clustered index record deleted; we do not have to check - locks, because we assume that we have an x-lock on the record */ - -#ifdef WITH_WSREP - rec = btr_cur_get_rec(btr_cur); -#endif /* WITH_WSREP */ - - err = btr_cur_del_mark_set_clust_rec( -#ifdef WITH_WSREP - btr_cur_get_block(btr_cur), rec, -#else - btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur), -#endif /* WITH_WSREP */ - index, offsets, thr, mtr); - if (err == DB_SUCCESS && referenced) { - /* NOTE that the following call loses the position of pcur ! */ - - err = row_upd_check_references_constraints( - node, pcur, index->table, index, offsets, thr, mtr); - } -#ifdef WITH_WSREP - if (err == DB_SUCCESS && !referenced && - !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE && - ((upd_node_t*)parent)->cascade_node == node) && - thr_get_trx(thr) && - foreign - ) { - err = wsrep_row_upd_check_foreign_constraints( - node, pcur, index->table, index, offsets, thr, mtr); - switch (err) { - case DB_SUCCESS: - case DB_NO_REFERENCED_ROW: - err = DB_SUCCESS; - break; - case DB_DEADLOCK: - if (wsrep_debug) fprintf (stderr, - "WSREP: clust rec FK check fail for deadlock"); - break; - default: - fprintf (stderr, - "WSREP: clust rec referenced FK check fail: %d", - (int)err); - break; - } - } -#endif /* WITH_WSREP */ - - mtr_commit(mtr); - - return(err); -} - -/***********************************************************//** -Updates the clustered index record. -@return DB_SUCCESS if operation successfully completed, DB_LOCK_WAIT -in case of a lock wait, else error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_upd_clust_step( -/*===============*/ - upd_node_t* node, /*!< in: row update node */ - que_thr_t* thr) /*!< in: query thread */ -{ - dict_index_t* index; - btr_pcur_t* pcur; - ibool success; - dberr_t err; - mtr_t mtr; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets; - ibool referenced; - rec_offs_init(offsets_); - - index = dict_table_get_first_index(node->table); - - referenced = row_upd_index_is_referenced(index, thr_get_trx(thr)); -#ifdef WITH_WSREP - ibool foreign = wsrep_row_upd_index_is_foreign( - index, thr_get_trx(thr)); -#endif /* WITH_WSREP */ - - pcur = node->pcur; - - /* We have to restore the cursor to its position */ - - mtr_start_trx(&mtr, thr_get_trx(thr)); - - /* If the restoration does not succeed, then the same - transaction has deleted the record on which the cursor was, - and that is an SQL error. If the restoration succeeds, it may - still be that the same transaction has successively deleted - and inserted a record with the same ordering fields, but in - that case we know that the transaction has at least an - implicit x-lock on the record. */ - - ut_a(pcur->rel_pos == BTR_PCUR_ON); - - ulint mode; - ulint search_mode; - -#ifdef UNIV_DEBUG - /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC(). - Once it is fixed, remove the 'ifdef', 'if' and this comment. */ - if (!thr_get_trx(thr)->ddl) { - DEBUG_SYNC_C_IF_THD( - thr_get_trx(thr)->mysql_thd, - "innodb_row_upd_clust_step_enter"); - } -#endif /* UNIV_DEBUG */ - - /* If running with fake_changes mode on then switch from modify to - search so that code takes only s-latch and not x-latch. - For dry-run (fake-changes) s-latch is acceptable. Taking x-latch will - make it more restrictive and will block real changes/workflow. */ - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - mode = BTR_MODIFY_LEAF; - search_mode = BTR_SEARCH_LEAF; - } else { - mode = BTR_MODIFY_LEAF; - search_mode = BTR_MODIFY_LEAF; - } - - if (dict_index_is_online_ddl(index)) { - - ut_ad(node->table->id != DICT_INDEXES_ID); - - mode |= BTR_ALREADY_S_LATCHED; - search_mode |= BTR_ALREADY_S_LATCHED; - - mtr_s_lock(dict_index_get_lock(index), &mtr); - } - - success = btr_pcur_restore_position(search_mode, pcur, &mtr); - - if (!success) { - err = DB_RECORD_NOT_FOUND; - - mtr_commit(&mtr); - - return(err); - } - - /* If this is a row in SYS_INDEXES table of the data dictionary, - then we have to free the file segments of the index tree associated - with the index */ - - if (node->is_delete && node->table->id == DICT_INDEXES_ID) { - - ut_ad(!dict_index_is_online_ddl(index)); - - /* Action in fake change mode shouldn't cause changes - in system tables. */ - ut_ad(UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)); - - dict_drop_index_tree(btr_pcur_get_rec(pcur), &mtr); - - mtr_commit(&mtr); - - mtr_start_trx(&mtr, thr_get_trx(thr)); - - success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, - &mtr); - if (!success) { - err = DB_ERROR; - - mtr_commit(&mtr); - - return(err); - } - } - - rec = btr_pcur_get_rec(pcur); - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - - if (!node->has_clust_rec_x_lock) { - err = lock_clust_rec_modify_check_and_lock( - 0, btr_pcur_get_block(pcur), - rec, index, offsets, thr); - if (err != DB_SUCCESS) { - mtr_commit(&mtr); - goto exit_func; - } - } - - /* This check passes as the function manipulates x-lock to s-lock - if operating in fake-change mode. */ - ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table, - btr_pcur_get_block(pcur), - page_rec_get_heap_no(rec))); - - /* NOTE: the following function calls will also commit mtr */ - - if (node->is_delete) { - err = row_upd_del_mark_clust_rec( -#ifdef WITH_WSREP - node, index, offsets, thr, referenced, foreign, &mtr); -#else - node, index, offsets, thr, referenced, &mtr); -#endif /* WITH_WSREP */ - - if (err == DB_SUCCESS) { - node->state = UPD_NODE_UPDATE_ALL_SEC; - node->index = dict_table_get_next_index(index); - } - - goto exit_func; - } - - /* If the update is made for MySQL, we already have the update vector - ready, else we have to do some evaluation: */ - - if (UNIV_UNLIKELY(!node->in_mysql_interface)) { - /* Copy the necessary columns from clust_rec and calculate the - new values to set */ - row_upd_copy_columns(rec, offsets, - UT_LIST_GET_FIRST(node->columns)); - row_upd_eval_new_vals(node->update); - } - - if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { - - err = row_upd_clust_rec( - node, index, offsets, &heap, thr, &mtr); - goto exit_func; - } - - row_upd_store_row(node); - - if (row_upd_changes_ord_field_binary(index, node->update, thr, - node->row, node->ext)) { - - /* Update causes an ordering field (ordering fields within - the B-tree) of the clustered index record to change: perform - the update by delete marking and inserting. - - TODO! What to do to the 'Halloween problem', where an update - moves the record forward in index so that it is again - updated when the cursor arrives there? Solution: the - read operation must check the undo record undo number when - choosing records to update. MySQL solves now the problem - externally! */ - - err = row_upd_clust_rec_by_insert( -#ifdef WITH_WSREP - node, index, thr, referenced, foreign, &mtr); -#else - node, index, thr, referenced, &mtr); -#endif /* WITH_WSREP */ - - if (err != DB_SUCCESS) { - - goto exit_func; - } - - node->state = UPD_NODE_UPDATE_ALL_SEC; - } else { - err = row_upd_clust_rec( - node, index, offsets, &heap, thr, &mtr); - - if (err != DB_SUCCESS) { - - goto exit_func; - } - - node->state = UPD_NODE_UPDATE_SOME_SEC; - } - - node->index = dict_table_get_next_index(index); - -exit_func: - if (heap) { - mem_heap_free(heap); - } - return(err); -} - -/***********************************************************//** -Updates the affected index records of a row. When the control is transferred -to this node, we assume that we have a persistent cursor which was on a -record, and the position of the cursor is stored in the cursor. -@return DB_SUCCESS if operation successfully completed, else error -code or DB_LOCK_WAIT */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -row_upd( -/*====*/ - upd_node_t* node, /*!< in: row update node */ - que_thr_t* thr) /*!< in: query thread */ -{ - dberr_t err = DB_SUCCESS; - - if (UNIV_LIKELY(node->in_mysql_interface)) { - - /* We do not get the cmpl_info value from the MySQL - interpreter: we must calculate it on the fly: */ - - if (node->is_delete - || row_upd_changes_some_index_ord_field_binary( - node->table, node->update)) { - node->cmpl_info = 0; - } else { - node->cmpl_info = UPD_NODE_NO_ORD_CHANGE; - } - } - - switch (node->state) { - case UPD_NODE_UPDATE_CLUSTERED: - case UPD_NODE_INSERT_CLUSTERED: - case UPD_NODE_INSERT_BLOB: - log_free_check(); - err = row_upd_clust_step(node, thr); - - if (err != DB_SUCCESS) { - - return(err); - } - } - - if (node->index == NULL - || (!node->is_delete - && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) { - - return(DB_SUCCESS); - } - -#ifdef UNIV_DEBUG - /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC(). - Once it is fixed, remove the 'ifdef', 'if' and this comment. */ - if (!thr_get_trx(thr)->ddl) { - DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd, - "after_row_upd_clust"); - } -#endif /* UNIV_DEBUG */ - - DBUG_EXECUTE_IF("row_upd_skip_sec", node->index = NULL;); - - do { - /* Skip corrupted index */ - dict_table_skip_corrupt_index(node->index); - - if (!node->index) { - break; - } - - if (node->index->type != DICT_FTS) { - err = row_upd_sec_step(node, thr); - - if (err != DB_SUCCESS) { - - return(err); - } - } - - node->index = dict_table_get_next_index(node->index); - } while (node->index != NULL); - - ut_ad(err == DB_SUCCESS); - - /* Do some cleanup */ - - if (node->row != NULL) { - node->row = NULL; - node->ext = NULL; - node->upd_row = NULL; - node->upd_ext = NULL; - mem_heap_empty(node->heap); - } - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - return(err); -} - -/***********************************************************//** -Updates a row in a table. This is a high-level function used in SQL execution -graphs. -@return query thread to run next or NULL */ -UNIV_INTERN -que_thr_t* -row_upd_step( -/*=========*/ - que_thr_t* thr) /*!< in: query thread */ -{ - upd_node_t* node; - sel_node_t* sel_node; - que_node_t* parent; - dberr_t err = DB_SUCCESS; - trx_t* trx; - - ut_ad(thr); - - trx = thr_get_trx(thr); - - trx_start_if_not_started_xa(trx); - - node = static_cast<upd_node_t*>(thr->run_node); - - sel_node = node->select; - - parent = que_node_get_parent(node); - - ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE); - - if (thr->prev_node == parent) { - node->state = UPD_NODE_SET_IX_LOCK; - } - - if (node->state == UPD_NODE_SET_IX_LOCK) { - - if (!node->has_clust_rec_x_lock) { - /* It may be that the current session has not yet - started its transaction, or it has been committed: */ - - err = lock_table(0, node->table, LOCK_IX, thr); - - if (err != DB_SUCCESS) { - - goto error_handling; - } - } - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - if (node->searched_update) { - /* Reset the cursor */ - sel_node->state = SEL_NODE_OPEN; - - /* Fetch a row to update */ - - thr->run_node = sel_node; - - return(thr); - } - } - - /* sel_node is NULL if we are in the MySQL interface */ - - if (sel_node && (sel_node->state != SEL_NODE_FETCH)) { - - if (!node->searched_update) { - /* An explicit cursor should be positioned on a row - to update */ - - ut_error; - - err = DB_ERROR; - - goto error_handling; - } - - ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS); - - /* No more rows to update, or the select node performed the - updates directly in-place */ - - thr->run_node = parent; - - return(thr); - } - - /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ - - err = row_upd(node, thr); - -error_handling: - trx->error_state = err; - - if (err != DB_SUCCESS) { - return(NULL); - } - - /* DO THE TRIGGER ACTIONS HERE */ - - if (node->searched_update) { - /* Fetch next row to update */ - - thr->run_node = sel_node; - } else { - /* It was an explicit cursor update */ - - thr->run_node = parent; - } - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - return(thr); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/row/row0vers.cc b/storage/xtradb/row/row0vers.cc deleted file mode 100644 index 9f1fc13ee09..00000000000 --- a/storage/xtradb/row/row0vers.cc +++ /dev/null @@ -1,770 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file row/row0vers.cc -Row versions - -Created 2/6/1997 Heikki Tuuri -*******************************************************/ - -#include "row0vers.h" - -#ifdef UNIV_NONINL -#include "row0vers.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "btr0btr.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "trx0undo.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0row.h" -#include "row0upd.h" -#include "rem0cmp.h" -#include "read0read.h" -#include "lock0lock.h" - -/*****************************************************************//** -Finds out if an active transaction has inserted or modified a secondary -index record. -@return 0 if committed, else the active transaction id; -NOTE that this function can return false positives but never false -negatives. The caller must confirm all positive results by calling -trx_is_active() while holding lock_sys->mutex. */ -UNIV_INLINE -trx_id_t -row_vers_impl_x_locked_low( -/*=======================*/ - const rec_t* clust_rec, /*!< in: clustered index record */ - dict_index_t* clust_index, /*!< in: the clustered index */ - const rec_t* rec, /*!< in: secondary index record */ - dict_index_t* index, /*!< in: the secondary index */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - trx_id_t trx_id; - ibool corrupt; - ulint comp; - ulint rec_del; - const rec_t* version; - rec_t* prev_version = NULL; - ulint* clust_offsets; - mem_heap_t* heap; - - DBUG_ENTER("row_vers_impl_x_locked_low"); - - ut_ad(rec_offs_validate(rec, index, offsets)); - - heap = mem_heap_create(1024); - - clust_offsets = rec_get_offsets( - clust_rec, clust_index, NULL, ULINT_UNDEFINED, &heap); - - trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets); - corrupt = FALSE; - - if (!trx_rw_is_active(trx_id, &corrupt)) { - /* The transaction that modified or inserted clust_rec is no - longer active, or it is corrupt: no implicit lock on rec */ - if (corrupt) { - lock_report_trx_id_insanity( - trx_id, clust_rec, clust_index, clust_offsets, - trx_sys_get_max_trx_id()); - } - mem_heap_free(heap); - DBUG_RETURN(0); - } - - comp = page_rec_is_comp(rec); - ut_ad(index->table == clust_index->table); - ut_ad(!!comp == dict_table_is_comp(index->table)); - ut_ad(!comp == !page_rec_is_comp(clust_rec)); - - rec_del = rec_get_deleted_flag(rec, comp); - - /* We look up if some earlier version, which was modified by - the trx_id transaction, of the clustered index record would - require rec to be in a different state (delete marked or - unmarked, or have different field values, or not existing). If - there is such a version, then rec was modified by the trx_id - transaction, and it has an implicit x-lock on rec. Note that - if clust_rec itself would require rec to be in a different - state, then the trx_id transaction has not yet had time to - modify rec, and does not necessarily have an implicit x-lock - on rec. */ - - for (version = clust_rec;; version = prev_version) { - row_ext_t* ext; - const dtuple_t* row; - dtuple_t* entry; - ulint vers_del; - trx_id_t prev_trx_id; - mem_heap_t* old_heap = heap; - - /* We keep the semaphore in mtr on the clust_rec page, so - that no other transaction can update it and get an - implicit x-lock on rec until mtr_commit(mtr). */ - - heap = mem_heap_create(1024); - - trx_undo_prev_version_build( - clust_rec, mtr, version, clust_index, clust_offsets, - heap, &prev_version); - - /* The oldest visible clustered index version must not be - delete-marked, because we never start a transaction by - inserting a delete-marked record. */ - ut_ad(prev_version - || !rec_get_deleted_flag(version, comp) - || !trx_rw_is_active(trx_id, NULL)); - - /* Free version and clust_offsets. */ - mem_heap_free(old_heap); - - if (prev_version == NULL) { - - /* We reached the oldest visible version without - finding an older version of clust_rec that would - match the secondary index record. If the secondary - index record is not delete marked, then clust_rec - is considered the correct match of the secondary - index record and hence holds the implicit lock. */ - - if (rec_del) { - /* The secondary index record is del marked. - So, the implicit lock holder of clust_rec - did not modify the secondary index record yet, - and is not holding an implicit lock on it. - - This assumes that whenever a row is inserted - or updated, the leaf page record always is - created with a clear delete-mark flag. - (We never insert a delete-marked record.) */ - trx_id = 0; - } - - break; - } - - clust_offsets = rec_get_offsets( - prev_version, clust_index, NULL, ULINT_UNDEFINED, - &heap); - - vers_del = rec_get_deleted_flag(prev_version, comp); - - prev_trx_id = row_get_rec_trx_id(prev_version, clust_index, - clust_offsets); - - /* The stack of versions is locked by mtr. Thus, it - is safe to fetch the prefixes for externally stored - columns. */ - - row = row_build(ROW_COPY_POINTERS, clust_index, prev_version, - clust_offsets, - NULL, NULL, NULL, &ext, heap); - - entry = row_build_index_entry(row, ext, index, heap); - - /* entry may be NULL if a record was inserted in place - of a deleted record, and the BLOB pointers of the new - record were not initialized yet. But in that case, - prev_version should be NULL. */ - - ut_a(entry != NULL); - - /* If we get here, we know that the trx_id transaction - modified prev_version. Let us check if prev_version - would require rec to be in a different state. */ - - /* The previous version of clust_rec must be - accessible, because clust_rec was not a fresh insert. - There is no guarantee that the transaction is still - active. */ - - /* We check if entry and rec are identified in the alphabetical - ordering */ - - if (!trx_rw_is_active(trx_id, &corrupt)) { - /* Transaction no longer active: no implicit - x-lock. This situation should only be possible - because we are not holding lock_sys->mutex. */ - ut_ad(!lock_mutex_own()); - if (corrupt) { - lock_report_trx_id_insanity( - trx_id, - prev_version, clust_index, - clust_offsets, - trx_sys_get_max_trx_id()); - } - trx_id = 0; - break; - } else if (0 == cmp_dtuple_rec(entry, rec, offsets)) { - /* The delete marks of rec and prev_version should be - equal for rec to be in the state required by - prev_version */ - - if (rec_del != vers_del) { - - break; - } - - /* It is possible that the row was updated so that the - secondary index record remained the same in - alphabetical ordering, but the field values changed - still. For example, 'abc' -> 'ABC'. Check also that. */ - - dtuple_set_types_binary( - entry, dtuple_get_n_fields(entry)); - - if (0 != cmp_dtuple_rec(entry, rec, offsets)) { - - break; - } - - } else if (!rec_del) { - /* The delete mark should be set in rec for it to be - in the state required by prev_version */ - - break; - } - - if (trx_id != prev_trx_id) { - /* prev_version was the first version modified by - the trx_id transaction: no implicit x-lock */ - - trx_id = 0; - break; - } - } - - DBUG_PRINT("info", ("Implicit lock is held by trx:%lu", - static_cast<unsigned long>(trx_id))); - - mem_heap_free(heap); - DBUG_RETURN(trx_id); -} - -/*****************************************************************//** -Finds out if an active transaction has inserted or modified a secondary -index record. -@return 0 if committed, else the active transaction id; -NOTE that this function can return false positives but never false -negatives. The caller must confirm all positive results by calling -trx_is_active() while holding lock_sys->mutex. */ -UNIV_INTERN -trx_id_t -row_vers_impl_x_locked( -/*===================*/ - const rec_t* rec, /*!< in: record in a secondary index */ - dict_index_t* index, /*!< in: the secondary index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - dict_index_t* clust_index; - const rec_t* clust_rec; - trx_id_t trx_id; - mtr_t mtr; - - ut_ad(!lock_mutex_own()); - ut_ad(!mutex_own(&trx_sys->mutex)); - - mtr_start(&mtr); - - /* Search for the clustered index record. The latch on the - page of clust_rec locks the top of the stack of versions. The - bottom of the version stack is not locked; oldest versions may - disappear by the fact that transactions may be committed and - collected by the purge. This is not a problem, because we are - only interested in active transactions. */ - - clust_rec = row_get_clust_rec( - BTR_SEARCH_LEAF, rec, index, &clust_index, &mtr); - - if (UNIV_UNLIKELY(!clust_rec)) { - /* In a rare case it is possible that no clust rec is found - for a secondary index record: if in row0umod.cc - row_undo_mod_remove_clust_low() we have already removed the - clust rec, while purge is still cleaning and removing - secondary index records associated with earlier versions of - the clustered index record. In that case there cannot be - any implicit lock on the secondary index record, because - an active transaction which has modified the secondary index - record has also modified the clustered index record. And in - a rollback we always undo the modifications to secondary index - records before the clustered index record. */ - - trx_id = 0; - } else { - trx_id = row_vers_impl_x_locked_low( - clust_rec, clust_index, rec, index, offsets, &mtr); - } - - mtr_commit(&mtr); - - return(trx_id); -} - -/*****************************************************************//** -Finds out if we must preserve a delete marked earlier version of a clustered -index record, because it is >= the purge view. -@return TRUE if earlier version should be preserved */ -UNIV_INTERN -ibool -row_vers_must_preserve_del_marked( -/*==============================*/ - trx_id_t trx_id, /*!< in: transaction id in the version */ - mtr_t* mtr) /*!< in: mtr holding the latch on the - clustered index record; it will also - hold the latch on purge_view */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - mtr_s_lock(&(purge_sys->latch), mtr); - - return(!read_view_sees_trx_id(purge_sys->view, trx_id)); -} - -/*****************************************************************//** -Finds out if a version of the record, where the version >= the current -purge view, should have ientry as its secondary index entry. We check -if there is any not delete marked version of the record where the trx -id >= purge view, and the secondary index entry and ientry are identified in -the alphabetical ordering; exactly in this case we return TRUE. -@return TRUE if earlier version should have */ -UNIV_INTERN -ibool -row_vers_old_has_index_entry( -/*=========================*/ - ibool also_curr,/*!< in: TRUE if also rec is included in the - versions to search; otherwise only versions - prior to it are searched */ - const rec_t* rec, /*!< in: record in the clustered index; the - caller must have a latch on the page */ - mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will - also hold the latch on purge_view */ - dict_index_t* index, /*!< in: the secondary index */ - const dtuple_t* ientry) /*!< in: the secondary index entry */ -{ - const rec_t* version; - rec_t* prev_version; - dict_index_t* clust_index; - ulint* clust_offsets; - mem_heap_t* heap; - mem_heap_t* heap2; - const dtuple_t* row; - const dtuple_t* entry; - ulint comp; - - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - clust_index = dict_table_get_first_index(index->table); - - comp = page_rec_is_comp(rec); - ut_ad(!dict_table_is_comp(index->table) == !comp); - heap = mem_heap_create(1024); - clust_offsets = rec_get_offsets(rec, clust_index, NULL, - ULINT_UNDEFINED, &heap); - - if (also_curr && !rec_get_deleted_flag(rec, comp)) { - row_ext_t* ext; - - /* The top of the stack of versions is locked by the - mtr holding a latch on the page containing the - clustered index record. The bottom of the stack is - locked by the fact that the purge_sys->view must - 'overtake' any read view of an active transaction. - Thus, it is safe to fetch the prefixes for - externally stored columns. */ - row = row_build(ROW_COPY_POINTERS, clust_index, - rec, clust_offsets, - NULL, NULL, NULL, &ext, heap); - entry = row_build_index_entry(row, ext, index, heap); - - /* If entry == NULL, the record contains unset BLOB - pointers. This must be a freshly inserted record. If - this is called from - row_purge_remove_sec_if_poss_low(), the thread will - hold latches on the clustered index and the secondary - index. Because the insert works in three steps: - - (1) insert the record to clustered index - (2) store the BLOBs and update BLOB pointers - (3) insert records to secondary indexes - - the purge thread can safely ignore freshly inserted - records and delete the secondary index record. The - thread that inserted the new record will be inserting - the secondary index records. */ - - /* NOTE that we cannot do the comparison as binary - fields because the row is maybe being modified so that - the clustered index record has already been updated to - a different binary value in a char field, but the - collation identifies the old and new value anyway! */ - if (entry && !dtuple_coll_cmp(ientry, entry)) { - - mem_heap_free(heap); - - return(TRUE); - } - } - - version = rec; - - for (;;) { - heap2 = heap; - heap = mem_heap_create(1024); - trx_undo_prev_version_build(rec, mtr, version, - clust_index, clust_offsets, - heap, &prev_version); - mem_heap_free(heap2); /* free version and clust_offsets */ - - if (!prev_version) { - /* Versions end here */ - - mem_heap_free(heap); - - return(FALSE); - } - - clust_offsets = rec_get_offsets(prev_version, clust_index, - NULL, ULINT_UNDEFINED, &heap); - - if (!rec_get_deleted_flag(prev_version, comp)) { - row_ext_t* ext; - - /* The stack of versions is locked by mtr. - Thus, it is safe to fetch the prefixes for - externally stored columns. */ - row = row_build(ROW_COPY_POINTERS, clust_index, - prev_version, clust_offsets, - NULL, NULL, NULL, &ext, heap); - entry = row_build_index_entry(row, ext, index, heap); - - /* If entry == NULL, the record contains unset - BLOB pointers. This must be a freshly - inserted record that we can safely ignore. - For the justification, see the comments after - the previous row_build_index_entry() call. */ - - /* NOTE that we cannot do the comparison as binary - fields because maybe the secondary index record has - already been updated to a different binary value in - a char field, but the collation identifies the old - and new value anyway! */ - - if (entry && !dtuple_coll_cmp(ientry, entry)) { - - mem_heap_free(heap); - - return(TRUE); - } - } - - version = prev_version; - } -} - -/*****************************************************************//** -Constructs the version of a clustered index record which a consistent -read should see. We assume that the trx id stored in rec is such that -the consistent read should not see rec in its present version. -@return DB_SUCCESS or DB_MISSING_HISTORY */ -UNIV_INTERN -dberr_t -row_vers_build_for_consistent_read( -/*===============================*/ - const rec_t* rec, /*!< in: record in a clustered index; the - caller must have a latch on the page; this - latch locks the top of the stack of versions - of this records */ - mtr_t* mtr, /*!< in: mtr holding the latch on rec */ - dict_index_t* index, /*!< in: the clustered index */ - ulint** offsets,/*!< in/out: offsets returned by - rec_get_offsets(rec, index) */ - read_view_t* view, /*!< in: the consistent read view */ - mem_heap_t** offset_heap,/*!< in/out: memory heap from which - the offsets are allocated */ - mem_heap_t* in_heap,/*!< in: memory heap from which the memory for - *old_vers is allocated; memory for possible - intermediate versions is allocated and freed - locally within the function */ - rec_t** old_vers)/*!< out, own: old version, or NULL - if the history is missing or the record - does not exist in the view, that is, - it was freshly inserted afterwards */ -{ - const rec_t* version; - rec_t* prev_version; - trx_id_t trx_id; - mem_heap_t* heap = NULL; - byte* buf; - dberr_t err; - - ut_ad(dict_index_is_clust(index)); - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(rec_offs_validate(rec, index, *offsets)); - - trx_id = row_get_rec_trx_id(rec, index, *offsets); - - ut_ad(!read_view_sees_trx_id(view, trx_id)); - - version = rec; - - for (;;) { - mem_heap_t* heap2 = heap; - trx_undo_rec_t* undo_rec; - roll_ptr_t roll_ptr; - undo_no_t undo_no; - heap = mem_heap_create(1024); - - /* If we have high-granularity consistent read view and - creating transaction of the view is the same as trx_id in - the record we see this record only in the case when - undo_no of the record is < undo_no in the view. */ - - if (view->type == VIEW_HIGH_GRANULARITY - && view->creator_trx_id == trx_id) { - - roll_ptr = row_get_rec_roll_ptr(version, index, - *offsets); - undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap); - undo_no = trx_undo_rec_get_undo_no(undo_rec); - mem_heap_empty(heap); - - if (view->undo_no > undo_no) { - /* The view already sees this version: we can - copy it to in_heap and return */ - -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern( - version, *offsets)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - - buf = static_cast<byte*>(mem_heap_alloc( - in_heap, rec_offs_size(*offsets))); - - *old_vers = rec_copy(buf, version, *offsets); - rec_offs_make_valid(*old_vers, index, - *offsets); - err = DB_SUCCESS; - break; - } - } - - err = trx_undo_prev_version_build(rec, mtr, version, index, - *offsets, heap, - &prev_version) - ? DB_SUCCESS : DB_MISSING_HISTORY; - if (heap2) { - mem_heap_free(heap2); /* free version */ - } - - if (prev_version == NULL) { - /* It was a freshly inserted version */ - *old_vers = NULL; - break; - } - - *offsets = rec_get_offsets(prev_version, index, *offsets, - ULINT_UNDEFINED, offset_heap); - -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern(prev_version, *offsets)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - - trx_id = row_get_rec_trx_id(prev_version, index, *offsets); - - if (read_view_sees_trx_id(view, trx_id)) { - - /* The view already sees this version: we can copy - it to in_heap and return */ - - buf = static_cast<byte*>( - mem_heap_alloc( - in_heap, rec_offs_size(*offsets))); - - *old_vers = rec_copy(buf, prev_version, *offsets); - rec_offs_make_valid(*old_vers, index, *offsets); - break; - } - - version = prev_version; - }/* for (;;) */ - - mem_heap_free(heap); - - return(err); -} - -/*****************************************************************//** -Constructs the last committed version of a clustered index record, -which should be seen by a semi-consistent read. */ -UNIV_INTERN -void -row_vers_build_for_semi_consistent_read( -/*====================================*/ - const rec_t* rec, /*!< in: record in a clustered index; the - caller must have a latch on the page; this - latch locks the top of the stack of versions - of this records */ - mtr_t* mtr, /*!< in: mtr holding the latch on rec */ - dict_index_t* index, /*!< in: the clustered index */ - ulint** offsets,/*!< in/out: offsets returned by - rec_get_offsets(rec, index) */ - mem_heap_t** offset_heap,/*!< in/out: memory heap from which - the offsets are allocated */ - mem_heap_t* in_heap,/*!< in: memory heap from which the memory for - *old_vers is allocated; memory for possible - intermediate versions is allocated and freed - locally within the function */ - const rec_t** old_vers)/*!< out: rec, old version, or NULL if the - record does not exist in the view, that is, - it was freshly inserted afterwards */ -{ - const rec_t* version; - mem_heap_t* heap = NULL; - byte* buf; - trx_id_t rec_trx_id = 0; - - ut_ad(dict_index_is_clust(index)); - ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(rec_offs_validate(rec, index, *offsets)); - - version = rec; - - for (;;) { - trx_id_t* version_trx_descr; - mem_heap_t* heap2; - rec_t* prev_version; - trx_id_t version_trx_id; - - version_trx_id = row_get_rec_trx_id(version, index, *offsets); - if (rec == version) { - rec_trx_id = version_trx_id; - } - - mutex_enter(&trx_sys->mutex); - version_trx_descr = trx_find_descriptor(trx_sys->descriptors, - trx_sys->descr_n_used, - version_trx_id); - /* Because version_trx is a read-write transaction, - its state cannot change from or to NOT_STARTED while - we are holding the trx_sys->mutex. It may change from - ACTIVE to PREPARED or COMMITTED. */ - mutex_exit(&trx_sys->mutex); - - if (!version_trx_descr) { -committed_version_trx: - /* We found a version that belongs to a - committed transaction: return it. */ - -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern(version, *offsets)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - - if (rec == version) { - *old_vers = rec; - break; - } - - /* We assume that a rolled-back transaction stays in - TRX_STATE_ACTIVE state until all the changes have been - rolled back and the transaction is removed from - the global list of transactions. */ - - if (rec_trx_id == version_trx_id) { - /* The transaction was committed while - we searched for earlier versions. - Return the current version as a - semi-consistent read. */ - - version = rec; - *offsets = rec_get_offsets(version, - index, *offsets, - ULINT_UNDEFINED, - offset_heap); - } - - buf = static_cast<byte*>( - mem_heap_alloc( - in_heap, rec_offs_size(*offsets))); - - *old_vers = rec_copy(buf, version, *offsets); - rec_offs_make_valid(*old_vers, index, *offsets); - break; - } - - DEBUG_SYNC_C("after_row_vers_check_trx_active"); - - heap2 = heap; - heap = mem_heap_create(1024); - - if (!trx_undo_prev_version_build(rec, mtr, version, index, - *offsets, heap, - &prev_version)) { - mem_heap_free(heap); - heap = heap2; - heap2 = NULL; - goto committed_version_trx; - } - - if (heap2) { - mem_heap_free(heap2); /* free version */ - } - - if (prev_version == NULL) { - /* It was a freshly inserted version */ - *old_vers = NULL; - break; - } - - version = prev_version; - *offsets = rec_get_offsets(version, index, *offsets, - ULINT_UNDEFINED, offset_heap); -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern(version, *offsets)); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - }/* for (;;) */ - - if (heap) { - mem_heap_free(heap); - } -} diff --git a/storage/xtradb/srv/srv0conc.cc b/storage/xtradb/srv/srv0conc.cc deleted file mode 100644 index e90f744cfa4..00000000000 --- a/storage/xtradb/srv/srv0conc.cc +++ /dev/null @@ -1,713 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file srv/srv0conc.cc - -InnoDB concurrency manager - -Created 2011/04/18 Sunny Bains -*******************************************************/ - -#include "srv0srv.h" -#include "sync0sync.h" -#include "btr0types.h" -#include "trx0trx.h" - -#include <mysql/plugin.h> -#include <mysql/service_wsrep.h> - -/** Number of times a thread is allowed to enter InnoDB within the same -SQL query after it has once got the ticket. */ -UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; - -#ifdef HAVE_ATOMIC_BUILTINS -/** Maximum sleep delay (in micro-seconds), value of 0 disables it. */ -UNIV_INTERN ulong srv_adaptive_max_sleep_delay = 150000; -#endif /* HAVE_ATOMIC_BUILTINS */ - -UNIV_INTERN ulong srv_thread_sleep_delay = 10000; - - -/** We are prepared for a situation that we have this many threads waiting for -a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the -value. */ - -UNIV_INTERN ulint srv_max_n_threads = 0; - -/** The following controls how many threads we let inside InnoDB concurrently: -threads waiting for locks are not counted into the number because otherwise -we could get a deadlock. Value of 0 will disable the concurrency check. */ - -UNIV_INTERN ulong srv_thread_concurrency = 0; - -#ifndef HAVE_ATOMIC_BUILTINS - -/** This mutex protects srv_conc data structures */ -static os_fast_mutex_t srv_conc_mutex; - -/** Concurrency list node */ -typedef UT_LIST_NODE_T(struct srv_conc_slot_t) srv_conc_node_t; - -/** Slot for a thread waiting in the concurrency control queue. */ -struct srv_conc_slot_t{ - os_event_t event; /*!< event to wait for; - os_event_set() and os_event_reset() - are protected by srv_conc_mutex */ - ibool reserved; /*!< TRUE if slot - reserved */ - ibool wait_ended; /*!< TRUE when another thread has - already set the event and the thread - in this slot is free to proceed; but - reserved may still be TRUE at that - point */ - srv_conc_node_t srv_conc_queue; /*!< queue node */ -#ifdef WITH_WSREP - void *thd; /*!< to see priority */ -#endif -}; - -/** Queue of threads waiting to get in */ -typedef UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue_t; - -static srv_conc_queue_t srv_conc_queue; - -/** Array of wait slots */ -static srv_conc_slot_t* srv_conc_slots; - -#if defined(UNIV_PFS_MUTEX) -/* Key to register srv_conc_mutex_key with performance schema */ -UNIV_INTERN mysql_pfs_key_t srv_conc_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#endif /* !HAVE_ATOMIC_BUILTINS */ - -/** Variables tracking the active and waiting threads. */ -struct srv_conc_t { - char pad[CACHE_LINE_SIZE - (sizeof(ulint) + sizeof(lint))]; - - /** Number of transactions that have declared_to_be_inside_innodb set. - It used to be a non-error for this value to drop below zero temporarily. - This is no longer true. We'll, however, keep the lint datatype to add - assertions to catch any corner cases that we may have missed. */ - - volatile lint n_active; - - /** Number of OS threads waiting in the FIFO for permission to - enter InnoDB */ - volatile lint n_waiting; -}; - -/* Control variables for tracking concurrency. */ -static srv_conc_t srv_conc; - -/*********************************************************************//** -Initialise the concurrency management data structures */ -void -srv_conc_init(void) -/*===============*/ -{ -#ifndef HAVE_ATOMIC_BUILTINS - ulint i; - - /* Init the server concurrency restriction data structures */ - - os_fast_mutex_init(srv_conc_mutex_key, &srv_conc_mutex); - - UT_LIST_INIT(srv_conc_queue); - - srv_conc_slots = static_cast<srv_conc_slot_t*>( - mem_zalloc(OS_THREAD_MAX_N * sizeof(*srv_conc_slots))); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - srv_conc_slot_t* conc_slot = &srv_conc_slots[i]; - - conc_slot->event = os_event_create(); - ut_a(conc_slot->event); -#ifdef WITH_WSREP - conc_slot->thd = NULL; -#endif /* WITH_WSREP */ - } -#endif /* !HAVE_ATOMIC_BUILTINS */ -} - -/*********************************************************************//** -Free the concurrency management data structures */ -void -srv_conc_free(void) -/*===============*/ -{ -#ifndef HAVE_ATOMIC_BUILTINS - os_fast_mutex_free(&srv_conc_mutex); - - for (ulint i = 0; i < OS_THREAD_MAX_N; i++) - os_event_free(srv_conc_slots[i].event); - - mem_free(srv_conc_slots); - srv_conc_slots = NULL; -#endif /* !HAVE_ATOMIC_BUILTINS */ -} - -#ifdef HAVE_ATOMIC_BUILTINS -/*********************************************************************//** -Note that a user thread is entering InnoDB. */ -static -void -srv_enter_innodb_with_tickets( -/*==========================*/ - trx_t* trx) /*!< in/out: transaction that wants - to enter InnoDB */ -{ - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter; -} - -/*********************************************************************//** -Handle the scheduling of a user thread that wants to enter InnoDB. Setting -srv_adaptive_max_sleep_delay > 0 switches the adaptive sleep calibration to -ON. When set, we want to wait in the queue for as little time as possible. -However, very short waits will result in a lot of context switches and that -is also not desirable. When threads need to sleep multiple times we increment -os_thread_sleep_delay by one. When we see threads getting a slot without -waiting and there are no other threads waiting in the queue, we try and reduce -the wait as much as we can. Currently we reduce it by half each time. If the -thread only had to wait for one turn before it was able to enter InnoDB we -decrement it by one. This is to try and keep the sleep time stable around the -"optimum" sleep time. */ -static -void -srv_conc_enter_innodb_with_atomics( -/*===============================*/ - trx_t* trx) /*!< in/out: transaction that wants - to enter InnoDB */ -{ - ulint n_sleeps = 0; - ibool notified_mysql = FALSE; - - ut_a(!trx->declared_to_be_inside_innodb); - - for (;;) { - ulint sleep_in_us; -#ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_trx_is_aborting(trx->mysql_thd)) { - if (wsrep_debug) - fprintf(stderr, - "srv_conc_enter due to MUST_ABORT"); - srv_conc_force_enter_innodb(trx); - return; - } -#endif /* WITH_WSREP */ - - if (srv_conc.n_active < (lint) srv_thread_concurrency) { - ulint n_active; - - /* Check if there are any free tickets. */ - n_active = os_atomic_increment_lint( - &srv_conc.n_active, 1); - - if (n_active <= srv_thread_concurrency) { - - srv_enter_innodb_with_tickets(trx); - - if (notified_mysql) { - - (void) os_atomic_decrement_lint( - &srv_conc.n_waiting, 1); - - thd_wait_end(trx->mysql_thd); - } - - if (srv_adaptive_max_sleep_delay > 0) { - if (srv_thread_sleep_delay > 20 - && n_sleeps == 1) { - - --srv_thread_sleep_delay; - } - - if (srv_conc.n_waiting == 0) { - srv_thread_sleep_delay >>= 1; - } - } - - return; - } - - /* Since there were no free seats, we relinquish - the overbooked ticket. */ - - (void) os_atomic_decrement_lint( - &srv_conc.n_active, 1); - } - - if (!notified_mysql) { - (void) os_atomic_increment_lint( - &srv_conc.n_waiting, 1); - - /* Release possible search system latch this - thread has */ - - if (trx->has_search_latch) { - trx_search_latch_release_if_reserved(trx); - } - - thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK); - - notified_mysql = TRUE; - } - - trx->op_info = "sleeping before entering InnoDB"; - - sleep_in_us = srv_thread_sleep_delay; - - /* Guard against overflow when adaptive sleep delay is on. */ - - if (srv_adaptive_max_sleep_delay > 0 - && sleep_in_us > srv_adaptive_max_sleep_delay) { - - sleep_in_us = srv_adaptive_max_sleep_delay; - srv_thread_sleep_delay = static_cast<ulong>(sleep_in_us); - } - - os_thread_sleep(sleep_in_us); - trx->innodb_que_wait_timer += sleep_in_us; - - trx->op_info = ""; - - ++n_sleeps; - - if (srv_adaptive_max_sleep_delay > 0 && n_sleeps > 1) { - ++srv_thread_sleep_delay; - } - } -} - -/*********************************************************************//** -Note that a user thread is leaving InnoDB code. */ -static -void -srv_conc_exit_innodb_with_atomics( -/*==============================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - trx->n_tickets_to_enter_innodb = 0; - trx->declared_to_be_inside_innodb = FALSE; - - (void) os_atomic_decrement_lint(&srv_conc.n_active, 1); -} -#else -/*********************************************************************//** -Note that a user thread is leaving InnoDB code. */ -static -void -srv_conc_exit_innodb_without_atomics( -/*=================================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - srv_conc_slot_t* slot; - - os_fast_mutex_lock(&srv_conc_mutex); - - ut_ad(srv_conc.n_active > 0); - srv_conc.n_active--; - trx->declared_to_be_inside_innodb = FALSE; - trx->n_tickets_to_enter_innodb = 0; - - slot = NULL; - - if (srv_conc.n_active < (lint) srv_thread_concurrency) { -#ifdef WITH_WSREP - srv_conc_slot_t* wsrep_slot; -#endif - /* Look for a slot where a thread is waiting and no other - thread has yet released the thread */ - - for (slot = UT_LIST_GET_FIRST(srv_conc_queue); - slot != NULL && slot->wait_ended == TRUE; - slot = UT_LIST_GET_NEXT(srv_conc_queue, slot)) { - - /* No op */ - } - -#ifdef WITH_WSREP - /* look for aborting trx, they must be released asap */ - wsrep_slot= slot; - while (wsrep_slot && (wsrep_slot->wait_ended == TRUE || - !wsrep_trx_is_aborting(wsrep_slot->thd))) { - wsrep_slot = UT_LIST_GET_NEXT(srv_conc_queue, wsrep_slot); - } - if (wsrep_slot) { - slot = wsrep_slot; - if (wsrep_debug) - fprintf(stderr, "WSREP: releasing aborting thd\n"); - } -#endif - if (slot != NULL) { - slot->wait_ended = TRUE; - - /* We increment the count on behalf of the released - thread */ - - srv_conc.n_active++; - } - } - - if (slot != NULL) { - os_event_set(slot->event); - } - - os_fast_mutex_unlock(&srv_conc_mutex); -} - -/*********************************************************************//** -Handle the scheduling of a user thread that wants to enter InnoDB. */ -static -void -srv_conc_enter_innodb_without_atomics( -/*==================================*/ - trx_t* trx) /*!< in/out: transaction that wants - to enter InnoDB */ -{ - ulint i; - srv_conc_slot_t* slot = NULL; - ibool has_slept = FALSE; - ib_uint64_t start_time = 0L; - ib_uint64_t finish_time = 0L; - ulint sec; - ulint ms; - - os_fast_mutex_lock(&srv_conc_mutex); -retry: - if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) { - os_fast_mutex_unlock(&srv_conc_mutex); - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: trying to declare trx" - " to enter InnoDB, but\n" - "InnoDB: it already is declared.\n", stderr); - trx_print(stderr, trx, 0); - putc('\n', stderr); - return; - } - - ut_ad(srv_conc.n_active >= 0); - - if (srv_conc.n_active < (lint) srv_thread_concurrency) { - - srv_conc.n_active++; - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter; - - os_fast_mutex_unlock(&srv_conc_mutex); - - return; - } -#ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_thd_is_brute_force(trx->mysql_thd)) { - srv_conc_force_enter_innodb(trx); - return; - } -#endif - - /* If the transaction is not holding resources, let it sleep - for srv_thread_sleep_delay microseconds, and try again then */ - - if (!has_slept && !trx->has_search_latch - && NULL == UT_LIST_GET_FIRST(trx->lock.trx_locks)) { - - has_slept = TRUE; /* We let it sleep only once to avoid - starvation */ - - srv_conc.n_waiting++; - - os_fast_mutex_unlock(&srv_conc_mutex); - - trx->op_info = "sleeping before joining InnoDB queue"; - - /* Peter Zaitsev suggested that we take the sleep away - altogether. But the sleep may be good in pathological - situations of lots of thread switches. Simply put some - threads aside for a while to reduce the number of thread - switches. */ - if (srv_thread_sleep_delay > 0) { - os_thread_sleep(srv_thread_sleep_delay); - trx->innodb_que_wait_timer += sleep_in_us; - } - - trx->op_info = ""; - - os_fast_mutex_lock(&srv_conc_mutex); - - srv_conc.n_waiting--; - - goto retry; - } - - /* Too many threads inside: put the current thread to a queue */ - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - slot = srv_conc_slots + i; - - if (!slot->reserved) { - - break; - } - } - - if (i == OS_THREAD_MAX_N) { - /* Could not find a free wait slot, we must let the - thread enter */ - - srv_conc.n_active++; - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = 0; - - os_fast_mutex_unlock(&srv_conc_mutex); - - return; - } - - /* Release possible search system latch this thread has */ - if (trx->has_search_latch) { - trx_search_latch_release_if_reserved(trx); - } - - /* Add to the queue */ - slot->reserved = TRUE; - slot->wait_ended = FALSE; -#ifdef WITH_WSREP - slot->thd = trx->mysql_thd; -#endif - - UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot); - - os_event_reset(slot->event); - - srv_conc.n_waiting++; - -#ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - wsrep_trx_is_aborting(trx->mysql_thd)) { - os_fast_mutex_unlock(&srv_conc_mutex); - if (wsrep_debug) - fprintf(stderr, "srv_conc_enter due to MUST_ABORT"); - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter; - return; - } - trx->wsrep_event = slot->event; -#endif /* WITH_WSREP */ - os_fast_mutex_unlock(&srv_conc_mutex); - - /* Go to wait for the event; when a thread leaves InnoDB it will - release this thread */ - - ut_ad(!trx->has_search_latch); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); -#endif /* UNIV_SYNC_DEBUG */ - - if (UNIV_UNLIKELY(trx->take_stats)) { - ut_usectime(&sec, &ms); - start_time = (ib_uint64_t)sec * 1000000 + ms; - } else { - start_time = 0; - } - - trx->op_info = "waiting in InnoDB queue"; - - thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK); - - os_event_wait(slot->event); - thd_wait_end(trx->mysql_thd); -#ifdef WITH_WSREP - trx->wsrep_event = NULL; -#endif /* WITH_WSREP */ - - trx->op_info = ""; - - if (UNIV_UNLIKELY(start_time != 0)) { - ut_usectime(&sec, &ms); - finish_time = (ib_uint64_t)sec * 1000000 + ms; - trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); - } - - os_fast_mutex_lock(&srv_conc_mutex); - - srv_conc.n_waiting--; - - /* NOTE that the thread which released this thread already - incremented the thread counter on behalf of this thread */ - - slot->reserved = FALSE; -#ifdef WITH_WSREP - slot->thd = NULL; -#endif - - UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot); - - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter; - - os_fast_mutex_unlock(&srv_conc_mutex); -} -#endif /* HAVE_ATOMIC_BUILTINS */ - -/*********************************************************************//** -Puts an OS thread to wait if there are too many concurrent threads -(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ -UNIV_INTERN -void -srv_conc_enter_innodb( -/*==================*/ - trx_t* trx) /*!< in: transaction object associated with the - thread */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); -#endif /* UNIV_SYNC_DEBUG */ - -#ifdef HAVE_ATOMIC_BUILTINS - srv_conc_enter_innodb_with_atomics(trx); -#else - srv_conc_enter_innodb_without_atomics(trx); -#endif /* HAVE_ATOMIC_BUILTINS */ -} - -/*********************************************************************//** -This lets a thread enter InnoDB regardless of the number of threads inside -InnoDB. This must be called when a thread ends a lock wait. */ -UNIV_INTERN -void -srv_conc_force_enter_innodb( -/*========================*/ - trx_t* trx) /*!< in: transaction object associated with the - thread */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); -#endif /* UNIV_SYNC_DEBUG */ - - if (!srv_thread_concurrency) { - - return; - } - - ut_ad(srv_conc.n_active >= 0); - -#ifdef HAVE_ATOMIC_BUILTINS - (void) os_atomic_increment_lint(&srv_conc.n_active, 1); -#else - os_fast_mutex_lock(&srv_conc_mutex); - ++srv_conc.n_active; - os_fast_mutex_unlock(&srv_conc_mutex); -#endif /* HAVE_ATOMIC_BUILTINS */ - - trx->n_tickets_to_enter_innodb = 1; - trx->declared_to_be_inside_innodb = TRUE; -} - -/*********************************************************************//** -This must be called when a thread exits InnoDB in a lock wait or at the -end of an SQL statement. */ -UNIV_INTERN -void -srv_conc_force_exit_innodb( -/*=======================*/ - trx_t* trx) /*!< in: transaction object associated with the - thread */ -{ - if ((trx->mysql_thd != NULL - && thd_is_replication_slave_thread(trx->mysql_thd)) - || trx->declared_to_be_inside_innodb == FALSE) { - - return; - } - -#ifdef HAVE_ATOMIC_BUILTINS - srv_conc_exit_innodb_with_atomics(trx); -#else - srv_conc_exit_innodb_without_atomics(trx); -#endif /* HAVE_ATOMIC_BUILTINS */ - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); -#endif /* UNIV_SYNC_DEBUG */ -} - -/*********************************************************************//** -Get the count of threads waiting inside InnoDB. */ -UNIV_INTERN -ulint -srv_conc_get_waiting_threads(void) -/*==============================*/ -{ - return(srv_conc.n_waiting); -} - -/*********************************************************************//** -Get the count of threads active inside InnoDB. */ -UNIV_INTERN -ulint -srv_conc_get_active_threads(void) -/*==============================*/ -{ - return(srv_conc.n_active); -} - -#ifdef WITH_WSREP -UNIV_INTERN -void -wsrep_srv_conc_cancel_wait( -/*==================*/ - trx_t* trx) /*!< in: transaction object associated with the - thread */ -{ -#ifdef HAVE_ATOMIC_BUILTINS - /* aborting transactions will enter innodb by force in - srv_conc_enter_innodb_with_atomics(). No need to cancel here, - thr will wake up after os_sleep and let to enter innodb - */ - if (wsrep_debug) - fprintf(stderr, "WSREP: conc slot cancel, no atomics\n"); -#else - os_fast_mutex_lock(&srv_conc_mutex); - if (trx->wsrep_event) { - if (wsrep_debug) - fprintf(stderr, "WSREP: conc slot cancel\n"); - os_event_set(trx->wsrep_event); - } - os_fast_mutex_unlock(&srv_conc_mutex); -#endif -} -#endif /* WITH_WSREP */ - diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc deleted file mode 100644 index 47abae66192..00000000000 --- a/storage/xtradb/srv/srv0mon.cc +++ /dev/null @@ -1,2177 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file srv/srv0mon.cc -Database monitor counter interfaces - -Created 12/9/2009 Jimmy Yang -*******************************************************/ - -#ifndef UNIV_HOTBACKUP -#include "os0file.h" -#include "mach0data.h" -#include "srv0mon.h" -#include "srv0srv.h" -#include "buf0buf.h" -#include "trx0sys.h" -#include "trx0rseg.h" -#include "lock0lock.h" -#include "ibuf0ibuf.h" -#ifdef UNIV_NONINL -#include "srv0mon.ic" -#endif - -/* Macro to standardize the counter names for counters in the -"monitor_buf_page" module as they have very structured defines */ -#define MONITOR_BUF_PAGE(name, description, code, op, op_code) \ - {"buffer_page_" op "_" name, "buffer_page_io", \ - "Number of " description " Pages " op, \ - MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START, \ - MONITOR_##code##_##op_code} - -#define MONITOR_BUF_PAGE_READ(name, description, code) \ - MONITOR_BUF_PAGE(name, description, code, "read", PAGE_READ) - -#define MONITOR_BUF_PAGE_WRITTEN(name, description, code) \ - MONITOR_BUF_PAGE(name, description, code, "written", PAGE_WRITTEN) - - -/** This array defines basic static information of monitor counters, -including each monitor's name, module it belongs to, a short -description and its property/type and corresponding monitor_id. -Please note: If you add a monitor here, please add its corresponding -monitor_id to "enum monitor_id_value" structure in srv0mon.h file. */ - -static monitor_info_t innodb_counter_info[] = -{ - /* A dummy item to mark the module start, this is - to accomodate the default value (0) set for the - global variables with the control system. */ - {"module_start", "module_start", "module_start", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_DEFAULT_START}, - - /* ========== Counters for Server Metadata ========== */ - {"module_metadata", "metadata", "Server Metadata", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_METADATA}, - - {"metadata_table_handles_opened", "metadata", - "Number of table handles opened", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TABLE_OPEN}, - - {"metadata_table_handles_closed", "metadata", - "Number of table handles closed", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TABLE_CLOSE}, - - {"metadata_table_reference_count", "metadata", - "Table reference counter", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TABLE_REFERENCE}, - - {"metadata_mem_pool_size", "metadata", - "Size of a memory pool InnoDB uses to store data dictionary" - " and internal data structures in bytes", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_META_MEM_POOL}, - - /* ========== Counters for Lock Module ========== */ - {"module_lock", "lock", "Lock Module", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_LOCK}, - - {"lock_deadlocks", "lock", "Number of deadlocks", - MONITOR_DEFAULT_ON, - MONITOR_DEFAULT_START, MONITOR_DEADLOCK}, - - {"lock_timeouts", "lock", "Number of lock timeouts", - MONITOR_DEFAULT_ON, - MONITOR_DEFAULT_START, MONITOR_TIMEOUT}, - - {"lock_rec_lock_waits", "lock", - "Number of times enqueued into record lock wait queue", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_LOCKREC_WAIT}, - - {"lock_table_lock_waits", "lock", - "Number of times enqueued into table lock wait queue", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TABLELOCK_WAIT}, - - {"lock_rec_lock_requests", "lock", - "Number of record locks requested", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK_REQ}, - - {"lock_rec_lock_created", "lock", "Number of record locks created", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_RECLOCK_CREATED}, - - {"lock_rec_lock_removed", "lock", - "Number of record locks removed from the lock queue", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_RECLOCK_REMOVED}, - - {"lock_rec_locks", "lock", - "Current number of record locks on tables", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK}, - - {"lock_table_lock_created", "lock", "Number of table locks created", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TABLELOCK_CREATED}, - - {"lock_table_lock_removed", "lock", - "Number of table locks removed from the lock queue", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TABLELOCK_REMOVED}, - - {"lock_table_locks", "lock", - "Current number of table locks on tables", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_NUM_TABLELOCK}, - - {"lock_row_lock_current_waits", "lock", - "Number of row locks currently being waited for" - " (innodb_row_lock_current_waits)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT}, - - {"lock_row_lock_time", "lock", - "Time spent in acquiring row locks, in milliseconds" - " (innodb_row_lock_time)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_WAIT_TIME}, - - {"lock_row_lock_time_max", "lock", - "The maximum time to acquire a row lock, in milliseconds" - " (innodb_row_lock_time_max)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_MAX_WAIT_TIME}, - - {"lock_row_lock_waits", "lock", - "Number of times a row lock had to be waited for" - " (innodb_row_lock_waits)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_WAIT}, - - {"lock_row_lock_time_avg", "lock", - "The average time to acquire a row lock, in milliseconds" - " (innodb_row_lock_time_avg)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_AVG_WAIT_TIME}, - - /* ========== Counters for Buffer Manager and I/O ========== */ - {"module_buffer", "buffer", "Buffer Manager Module", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_BUFFER}, - - {"buffer_pool_size", "server", - "Server buffer pool size (all buffer pools) in bytes", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUFFER_POOL_SIZE}, - - {"buffer_pool_reads", "buffer", - "Number of reads directly from disk (innodb_buffer_pool_reads)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READS}, - - {"buffer_pool_read_requests", "buffer", - "Number of logical read requests (innodb_buffer_pool_read_requests)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_REQUESTS}, - - {"buffer_pool_write_requests", "buffer", - "Number of write requests (innodb_buffer_pool_write_requests)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WRITE_REQUEST}, - - {"buffer_pool_wait_free", "buffer", - "Number of times waited for free buffer" - " (innodb_buffer_pool_wait_free)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WAIT_FREE}, - - {"buffer_pool_read_ahead", "buffer", - "Number of pages read as read ahead (innodb_buffer_pool_read_ahead)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD}, - - {"buffer_pool_read_ahead_evicted", "buffer", - "Read-ahead pages evicted without being accessed" - " (innodb_buffer_pool_read_ahead_evicted)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED}, - - {"buffer_pool_pages_total", "buffer", - "Total buffer pool size in pages (innodb_buffer_pool_pages_total)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_TOTAL}, - - {"buffer_pool_pages_misc", "buffer", - "Buffer pages for misc use such as row locks or the adaptive" - " hash index (innodb_buffer_pool_pages_misc)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_MISC}, - - {"buffer_pool_pages_data", "buffer", - "Buffer pages containing data (innodb_buffer_pool_pages_data)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DATA}, - - {"buffer_pool_bytes_data", "buffer", - "Buffer bytes containing data (innodb_buffer_pool_bytes_data)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DATA}, - - {"buffer_pool_pages_dirty", "buffer", - "Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY}, - - {"buffer_pool_bytes_dirty", "buffer", - "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY}, - - {"buffer_pool_pages_free", "buffer", - "Buffer pages currently free (innodb_buffer_pool_pages_free)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_FREE}, - - {"buffer_pages_created", "buffer", - "Number of pages created (innodb_pages_created)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_CREATED}, - - {"buffer_pages_written", "buffer", - "Number of pages written (innodb_pages_written)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN}, - - {"buffer_index_pages_written", "buffer", - "Number of index pages written (innodb_index_pages_written)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN}, - - {"buffer_non_index_pages_written", "buffer", - "Number of non index pages written (innodb_non_index_pages_written)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN}, - - {"buffer_pages_read", "buffer", - "Number of pages read (innodb_pages_read)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_READ}, - - {"buffer_pages0_read", "buffer", - "Number of page 0 read (innodb_pages0_read)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES0_READ}, - - {"buffer_index_sec_rec_cluster_reads", "buffer", - "Number of secondary record reads triggered cluster read", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS}, - - {"buffer_index_sec_rec_cluster_reads_avoided", "buffer", - "Number of secondary record reads avoided triggering cluster read", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED}, - - {"buffer_data_reads", "buffer", - "Amount of data read in bytes (innodb_data_reads)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_READ}, - - {"buffer_data_written", "buffer", - "Amount of data written in bytes (innodb_data_written)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_WRITTEN}, - - /* Cumulative counter for scanning in flush batches */ - {"buffer_flush_batch_scanned", "buffer", - "Total pages scanned as part of flush batch", - MONITOR_SET_OWNER, - MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL, - MONITOR_FLUSH_BATCH_SCANNED}, - - {"buffer_flush_batch_num_scan", "buffer", - "Number of times buffer flush list flush is called", - MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED, - MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL}, - - {"buffer_flush_batch_scanned_per_call", "buffer", - "Pages scanned per flush batch scan", - MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED, - MONITOR_FLUSH_BATCH_SCANNED_PER_CALL}, - - {"buffer_flush_batch_rescan", "buffer", - "Number of times rescan of flush list forced", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_FLUSH_HP_RESCAN}, - - /* Cumulative counter for pages flushed in flush batches */ - {"buffer_flush_batch_total_pages", "buffer", - "Total pages flushed as part of flush batch", - MONITOR_SET_OWNER, MONITOR_FLUSH_BATCH_COUNT, - MONITOR_FLUSH_BATCH_TOTAL_PAGE}, - - {"buffer_flush_batches", "buffer", - "Number of flush batches", - MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE, - MONITOR_FLUSH_BATCH_COUNT}, - - {"buffer_flush_batch_pages", "buffer", - "Pages queued as a flush batch", - MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE, - MONITOR_FLUSH_BATCH_PAGES}, - - /* Cumulative counter for flush batches because of neighbor */ - {"buffer_flush_neighbor_total_pages", "buffer", - "Total neighbors flushed as part of neighbor flush", - MONITOR_SET_OWNER, MONITOR_FLUSH_NEIGHBOR_COUNT, - MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE}, - - {"buffer_flush_neighbor", "buffer", - "Number of times neighbors flushing is invoked", - MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, - MONITOR_FLUSH_NEIGHBOR_COUNT}, - - {"buffer_flush_neighbor_pages", "buffer", - "Pages queued as a neighbor batch", - MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, - MONITOR_FLUSH_NEIGHBOR_PAGES}, - - {"buffer_flush_n_to_flush_requested", "buffer", - "Number of pages requested for flushing.", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED}, - - {"buffer_flush_avg_page_rate", "buffer", - "Average number of pages at which flushing is happening", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PAGE_RATE}, - - {"buffer_flush_lsn_avg_rate", "buffer", - "Average redo generation rate", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_FLUSH_LSN_AVG_RATE}, - - {"buffer_flush_pct_for_dirty", "buffer", - "Percent of IO capacity used to avoid max dirty page limit", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_DIRTY}, - - {"buffer_flush_pct_for_lsn", "buffer", - "Percent of IO capacity used to avoid reusable redo space limit", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_LSN}, - - {"buffer_flush_sync_waits", "buffer", - "Number of times a wait happens due to sync flushing", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS}, - - /* Cumulative counter for flush batches for adaptive flushing */ - {"buffer_flush_adaptive_total_pages", "buffer", - "Total pages flushed as part of adaptive flushing", - MONITOR_SET_OWNER, MONITOR_FLUSH_ADAPTIVE_COUNT, - MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE}, - - {"buffer_flush_adaptive", "buffer", - "Number of adaptive batches", - MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, - MONITOR_FLUSH_ADAPTIVE_COUNT}, - - {"buffer_flush_adaptive_pages", "buffer", - "Pages queued as an adaptive batch", - MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, - MONITOR_FLUSH_ADAPTIVE_PAGES}, - - /* Cumulative counter for flush batches because of sync */ - {"buffer_flush_sync_total_pages", "buffer", - "Total pages flushed as part of sync batches", - MONITOR_SET_OWNER, MONITOR_FLUSH_SYNC_COUNT, - MONITOR_FLUSH_SYNC_TOTAL_PAGE}, - - {"buffer_flush_sync", "buffer", - "Number of sync batches", - MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE, - MONITOR_FLUSH_SYNC_COUNT}, - - {"buffer_flush_sync_pages", "buffer", - "Pages queued as a sync batch", - MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE, - MONITOR_FLUSH_SYNC_PAGES}, - - /* Cumulative counter for flush batches because of background */ - {"buffer_flush_background_total_pages", "buffer", - "Total pages flushed as part of background batches", - MONITOR_SET_OWNER, MONITOR_FLUSH_BACKGROUND_COUNT, - MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE}, - - {"buffer_flush_background", "buffer", - "Number of background batches", - MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, - MONITOR_FLUSH_BACKGROUND_COUNT}, - - {"buffer_flush_background_pages", "buffer", - "Pages queued as a background batch", - MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, - MONITOR_FLUSH_BACKGROUND_PAGES}, - - /* Cumulative counter for LRU batch scan */ - {"buffer_LRU_batch_scanned", "buffer", - "Total pages scanned as part of LRU batch", - MONITOR_SET_OWNER, MONITOR_LRU_BATCH_SCANNED_NUM_CALL, - MONITOR_LRU_BATCH_SCANNED}, - - {"buffer_LRU_batch_num_scan", "buffer", - "Number of times LRU batch is called", - MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED, - MONITOR_LRU_BATCH_SCANNED_NUM_CALL}, - - {"buffer_LRU_batch_scanned_per_call", "buffer", - "Pages scanned per LRU batch call", - MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED, - MONITOR_LRU_BATCH_SCANNED_PER_CALL}, - - /* Cumulative counter for LRU batch pages flushed */ - {"buffer_LRU_batch_flush_total_pages", "buffer", - "Total pages flushed as part of LRU batches", - MONITOR_SET_OWNER, MONITOR_LRU_BATCH_FLUSH_COUNT, - MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE}, - - {"buffer_LRU_batches_flush", "buffer", - "Number of LRU batches", - MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, - MONITOR_LRU_BATCH_FLUSH_COUNT}, - - {"buffer_LRU_batch_flush_pages", "buffer", - "Pages queued as an LRU batch", - MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE, - MONITOR_LRU_BATCH_FLUSH_PAGES}, - - /* Cumulative counter for LRU batch pages flushed */ - {"buffer_LRU_batch_evict_total_pages", "buffer", - "Total pages evicted as part of LRU batches", - MONITOR_SET_OWNER, MONITOR_LRU_BATCH_EVICT_COUNT, - MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE}, - - {"buffer_LRU_batches_evict", "buffer", - "Number of LRU batches", - MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, - MONITOR_LRU_BATCH_EVICT_COUNT}, - - {"buffer_LRU_batch_evict_pages", "buffer", - "Pages queued as an LRU batch", - MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE, - MONITOR_LRU_BATCH_EVICT_PAGES}, - - /* Cumulative counter for single page LRU scans */ - {"buffer_LRU_single_flush_scanned", "buffer", - "Total pages scanned as part of single page LRU flush", - MONITOR_SET_OWNER, - MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL, - MONITOR_LRU_SINGLE_FLUSH_SCANNED}, - - {"buffer_LRU_single_flush_num_scan", "buffer", - "Number of times single page LRU flush is called", - MONITOR_SET_MEMBER, MONITOR_LRU_SINGLE_FLUSH_SCANNED, - MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL}, - - {"buffer_LRU_single_flush_scanned_per_call", "buffer", - "Page scanned per single LRU flush", - MONITOR_SET_MEMBER, MONITOR_LRU_SINGLE_FLUSH_SCANNED, - MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL}, - - {"buffer_LRU_single_flush_failure_count", "Buffer", - "Number of times attempt to flush a single page from LRU failed", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT}, - - {"buffer_LRU_get_free_search", "Buffer", - "Number of searches performed for a clean page", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_SEARCH}, - - /* Cumulative counter for LRU search scans */ - {"buffer_LRU_search_scanned", "buffer", - "Total pages scanned as part of LRU search", - MONITOR_SET_OWNER, - MONITOR_LRU_SEARCH_SCANNED_NUM_CALL, - MONITOR_LRU_SEARCH_SCANNED}, - - {"buffer_LRU_search_num_scan", "buffer", - "Number of times LRU search is performed", - MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED, - MONITOR_LRU_SEARCH_SCANNED_NUM_CALL}, - - {"buffer_LRU_search_scanned_per_call", "buffer", - "Page scanned per single LRU search", - MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED, - MONITOR_LRU_SEARCH_SCANNED_PER_CALL}, - - /* Cumulative counter for LRU unzip search scans */ - {"buffer_LRU_unzip_search_scanned", "buffer", - "Total pages scanned as part of LRU unzip search", - MONITOR_SET_OWNER, - MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL, - MONITOR_LRU_UNZIP_SEARCH_SCANNED}, - - {"buffer_LRU_unzip_search_num_scan", "buffer", - "Number of times LRU unzip search is performed", - MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED, - MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL}, - - {"buffer_LRU_unzip_search_scanned_per_call", "buffer", - "Page scanned per single LRU unzip search", - MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED, - MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL}, - - /* ========== Counters for Buffer Page I/O ========== */ - {"module_buffer_page", "buffer_page_io", "Buffer Page I/O Module", - static_cast<monitor_type_t>( - MONITOR_MODULE | MONITOR_GROUP_MODULE), - MONITOR_DEFAULT_START, MONITOR_MODULE_BUF_PAGE}, - - MONITOR_BUF_PAGE_READ("index_leaf","Index Leaf", INDEX_LEAF), - - MONITOR_BUF_PAGE_READ("index_non_leaf","Index Non-leaf", - INDEX_NON_LEAF), - - MONITOR_BUF_PAGE_READ("index_ibuf_leaf", "Insert Buffer Index Leaf", - INDEX_IBUF_LEAF), - - MONITOR_BUF_PAGE_READ("index_ibuf_non_leaf", - "Insert Buffer Index Non-Leaf", - INDEX_IBUF_NON_LEAF), - - MONITOR_BUF_PAGE_READ("undo_log", "Undo Log", UNDO_LOG), - - MONITOR_BUF_PAGE_READ("index_inode", "Index Inode", INODE), - - MONITOR_BUF_PAGE_READ("ibuf_free_list", "Insert Buffer Free List", - IBUF_FREELIST), - - MONITOR_BUF_PAGE_READ("ibuf_bitmap", "Insert Buffer Bitmap", - IBUF_BITMAP), - - MONITOR_BUF_PAGE_READ("system_page", "System", SYSTEM), - - MONITOR_BUF_PAGE_READ("trx_system", "Transaction System", TRX_SYSTEM), - - MONITOR_BUF_PAGE_READ("fsp_hdr", "File Space Header", FSP_HDR), - - MONITOR_BUF_PAGE_READ("xdes", "Extent Descriptor", XDES), - - MONITOR_BUF_PAGE_READ("blob", "Uncompressed BLOB", BLOB), - - MONITOR_BUF_PAGE_READ("zblob", "First Compressed BLOB", ZBLOB), - - MONITOR_BUF_PAGE_READ("zblob2", "Subsequent Compressed BLOB", ZBLOB2), - - MONITOR_BUF_PAGE_READ("other", "other/unknown (old version of InnoDB)", - OTHER), - - MONITOR_BUF_PAGE_WRITTEN("index_leaf","Index Leaf", INDEX_LEAF), - - MONITOR_BUF_PAGE_WRITTEN("index_non_leaf","Index Non-leaf", - INDEX_NON_LEAF), - - MONITOR_BUF_PAGE_WRITTEN("index_ibuf_leaf", "Insert Buffer Index Leaf", - INDEX_IBUF_LEAF), - - MONITOR_BUF_PAGE_WRITTEN("index_ibuf_non_leaf", - "Insert Buffer Index Non-Leaf", - INDEX_IBUF_NON_LEAF), - - MONITOR_BUF_PAGE_WRITTEN("undo_log", "Undo Log", UNDO_LOG), - - MONITOR_BUF_PAGE_WRITTEN("index_inode", "Index Inode", INODE), - - MONITOR_BUF_PAGE_WRITTEN("ibuf_free_list", "Insert Buffer Free List", - IBUF_FREELIST), - - MONITOR_BUF_PAGE_WRITTEN("ibuf_bitmap", "Insert Buffer Bitmap", - IBUF_BITMAP), - - MONITOR_BUF_PAGE_WRITTEN("system_page", "System", SYSTEM), - - MONITOR_BUF_PAGE_WRITTEN("trx_system", "Transaction System", - TRX_SYSTEM), - - MONITOR_BUF_PAGE_WRITTEN("fsp_hdr", "File Space Header", FSP_HDR), - - MONITOR_BUF_PAGE_WRITTEN("xdes", "Extent Descriptor", XDES), - - MONITOR_BUF_PAGE_WRITTEN("blob", "Uncompressed BLOB", BLOB), - - MONITOR_BUF_PAGE_WRITTEN("zblob", "First Compressed BLOB", ZBLOB), - - MONITOR_BUF_PAGE_WRITTEN("zblob2", "Subsequent Compressed BLOB", - ZBLOB2), - - MONITOR_BUF_PAGE_WRITTEN("other", "other/unknown (old version InnoDB)", - OTHER), - - /* ========== Counters for OS level operations ========== */ - {"module_os", "os", "OS Level Operation", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_OS}, - - {"os_data_reads", "os", - "Number of reads initiated (innodb_data_reads)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_READ}, - - {"os_data_writes", "os", - "Number of writes initiated (innodb_data_writes)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_WRITE}, - - {"os_data_fsyncs", "os", - "Number of fsync() calls (innodb_data_fsyncs)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FSYNC}, - - {"os_pending_reads", "os", "Number of reads pending", - MONITOR_DEFAULT_ON, - MONITOR_DEFAULT_START, MONITOR_OS_PENDING_READS}, - - {"os_pending_writes", "os", "Number of writes pending", - MONITOR_DEFAULT_ON, - MONITOR_DEFAULT_START, MONITOR_OS_PENDING_WRITES}, - - {"os_log_bytes_written", "os", - "Bytes of log written (innodb_os_log_written)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_WRITTEN}, - - {"os_log_fsyncs", "os", - "Number of fsync log writes (innodb_os_log_fsyncs)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_FSYNC}, - - {"os_log_pending_fsyncs", "os", - "Number of pending fsync write (innodb_os_log_pending_fsyncs)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_FSYNC}, - - {"os_log_pending_writes", "os", - "Number of pending log file writes (innodb_os_log_pending_writes)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_WRITES}, - - /* ========== Counters for Transaction Module ========== */ - {"module_trx", "transaction", "Transaction Manager", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_TRX}, - - {"trx_rw_commits", "transaction", "Number of read-write transactions " - "committed", - MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RW_COMMIT}, - - {"trx_ro_commits", "transaction", "Number of read-only transactions " - "committed", - MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RO_COMMIT}, - - {"trx_nl_ro_commits", "transaction", "Number of non-locking " - "auto-commit read-only transactions committed", - MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_NL_RO_COMMIT}, - - {"trx_commits_insert_update", "transaction", - "Number of transactions committed with inserts and updates", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TRX_COMMIT_UNDO}, - - {"trx_rollbacks", "transaction", - "Number of transactions rolled back", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK}, - - {"trx_rollbacks_savepoint", "transaction", - "Number of transactions rolled back to savepoint", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_SAVEPOINT}, - - {"trx_rollback_active", "transaction", - "Number of resurrected active transactions rolled back", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_ACTIVE}, - - {"trx_active_transactions", "transaction", - "Number of active transactions", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_TRX_ACTIVE}, - - {"trx_rseg_history_len", "transaction", - "Length of the TRX_RSEG_HISTORY list", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_RSEG_HISTORY_LEN}, - - {"trx_undo_slots_used", "transaction", "Number of undo slots used", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_USED}, - - {"trx_undo_slots_cached", "transaction", - "Number of undo slots cached", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_CACHED}, - - {"trx_rseg_current_size", "transaction", - "Current rollback segment size in pages", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_RSEG_CUR_SIZE}, - - /* ========== Counters for Purge Module ========== */ - {"module_purge", "purge", "Purge Module", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_PURGE}, - - {"purge_del_mark_records", "purge", - "Number of delete-marked rows purged", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_N_DEL_ROW_PURGE}, - - {"purge_upd_exist_or_extern_records", "purge", - "Number of purges on updates of existing records and " - " updates on delete marked record with externally stored field", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_N_UPD_EXIST_EXTERN}, - - {"purge_invoked", "purge", - "Number of times purge was invoked", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PURGE_INVOKED}, - - {"purge_undo_log_pages", "purge", - "Number of undo log pages handled by the purge", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PURGE_N_PAGE_HANDLED}, - - {"purge_dml_delay_usec", "purge", - "Microseconds DML to be delayed due to purge lagging", - MONITOR_DISPLAY_CURRENT, - MONITOR_DEFAULT_START, MONITOR_DML_PURGE_DELAY}, - - {"purge_stop_count", "purge", - "Number of times purge was stopped", - MONITOR_DISPLAY_CURRENT, - MONITOR_DEFAULT_START, MONITOR_PURGE_STOP_COUNT}, - - {"purge_resume_count", "purge", - "Number of times purge was resumed", - MONITOR_DISPLAY_CURRENT, - MONITOR_DEFAULT_START, MONITOR_PURGE_RESUME_COUNT}, - - /* ========== Counters for Recovery Module ========== */ - {"module_log", "recovery", "Recovery Module", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_RECOVERY}, - - {"log_checkpoints", "recovery", "Number of checkpoints", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_NUM_CHECKPOINT}, - - {"log_lsn_last_flush", "recovery", "LSN of Last flush", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_FLUSHDISK}, - - {"log_lsn_last_checkpoint", "recovery", "LSN at last checkpoint", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CHECKPOINT}, - - {"log_lsn_current", "recovery", "Current LSN value", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CURRENT}, - - {"log_lsn_checkpoint_age", "recovery", - "Current LSN value minus LSN at last checkpoint", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_LSN_CHECKPOINT_AGE}, - - {"log_lsn_buf_pool_oldest", "recovery", - "The oldest modified block LSN in the buffer pool", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_OLDEST_LSN}, - - {"log_max_modified_age_async", "recovery", - "Maximum LSN difference; when exceeded, start asynchronous preflush", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_ASYNC}, - - {"log_max_modified_age_sync", "recovery", - "Maximum LSN difference; when exceeded, start synchronous preflush", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_SYNC}, - - {"log_pending_log_writes", "recovery", "Pending log writes", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_WRITE}, - - {"log_pending_checkpoint_writes", "recovery", "Pending checkpoints", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PENDING_CHECKPOINT_WRITE}, - - {"log_num_log_io", "recovery", "Number of log I/Os", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_LOG_IO}, - - {"log_waits", "recovery", - "Number of log waits due to small log buffer (innodb_log_waits)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WAITS}, - - {"log_write_requests", "recovery", - "Number of log write requests (innodb_log_write_requests)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITE_REQUEST}, - - {"log_writes", "recovery", - "Number of log writes (innodb_log_writes)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITES}, - - /* ========== Counters for Page Compression ========== */ - {"module_compress", "compression", "Page Compression Info", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_PAGE}, - - {"compress_pages_compressed", "compression", - "Number of pages compressed", MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PAGE_COMPRESS}, - - {"compress_pages_decompressed", "compression", - "Number of pages decompressed", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PAGE_DECOMPRESS}, - - {"compression_pad_increments", "compression", - "Number of times padding is incremented to avoid compression failures", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PAD_INCREMENTS}, - - {"compression_pad_decrements", "compression", - "Number of times padding is decremented due to good compressibility", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS}, - - {"compress_saved", "compression", - "Number of bytes saved by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED}, - - {"compress_trim_sect512", "compression", - "Number of sect-512 TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512}, - - {"compress_trim_sect1024", "compression", - "Number of sect-1024 TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024}, - - {"compress_trim_sect2048", "compression", - "Number of sect-2048 TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048}, - - {"compress_trim_sect4096", "compression", - "Number of sect-4K TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096}, - - {"compress_trim_sect8192", "compression", - "Number of sect-8K TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192}, - - {"compress_trim_sect16384", "compression", - "Number of sect-16K TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384}, - - {"compress_trim_sect32768", "compression", - "Number of sect-32K TRIMed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768}, - - {"compress_pages_page_compressed", "compression", - "Number of pages compressed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED}, - - {"compress_page_compressed_trim_op", "compression", - "Number of TRIM operation performed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP}, - - {"compress_page_compressed_trim_op_saved", "compression", - "Number of TRIM operation saved by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED}, - - {"compress_pages_page_decompressed", "compression", - "Number of pages decompressed by page compression", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED}, - - {"compress_pages_page_compression_error", "compression", - "Number of page compression errors", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR}, - - {"compress_pages_encrypted", "compression", - "Number of pages encrypted", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_ENCRYPTED}, - - {"compress_pages_decrypted", "compression", - "Number of pages decrypted", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_DECRYPTED}, - - /* ========== Counters for Index ========== */ - {"module_index", "index", "Index Manager", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_INDEX}, - - {"index_page_splits", "index", "Number of index page splits", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_INDEX_SPLIT}, - - {"index_page_merge_attempts", "index", - "Number of index page merge attempts", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_ATTEMPTS}, - - {"index_page_merge_successful", "index", - "Number of successful index page merges", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_SUCCESSFUL}, - - {"index_page_reorg_attempts", "index", - "Number of index page reorganization attempts", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_ATTEMPTS}, - - {"index_page_reorg_successful", "index", - "Number of successful index page reorganizations", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_SUCCESSFUL}, - - {"index_page_discards", "index", "Number of index pages discarded", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_INDEX_DISCARD}, - - /* ========== Counters for Adaptive Hash Index ========== */ - {"module_adaptive_hash", "adaptive_hash_index", "Adpative Hash Index", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_ADAPTIVE_HASH}, - - {"adaptive_hash_searches", "adaptive_hash_index", - "Number of successful searches using Adaptive Hash Index", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH}, - - {"adaptive_hash_searches_btree", "adaptive_hash_index", - "Number of searches using B-tree on an index search", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE}, - - {"adaptive_hash_pages_added", "adaptive_hash_index", - "Number of index pages on which the Adaptive Hash Index is built", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_ADDED}, - - {"adaptive_hash_pages_removed", "adaptive_hash_index", - "Number of index pages whose corresponding Adaptive Hash Index" - " entries were removed", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_REMOVED}, - - {"adaptive_hash_rows_added", "adaptive_hash_index", - "Number of Adaptive Hash Index rows added", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_ADDED}, - - {"adaptive_hash_rows_removed", "adaptive_hash_index", - "Number of Adaptive Hash Index rows removed", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVED}, - - {"adaptive_hash_rows_deleted_no_hash_entry", "adaptive_hash_index", - "Number of rows deleted that did not have corresponding Adaptive Hash" - " Index entries", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND}, - - {"adaptive_hash_rows_updated", "adaptive_hash_index", - "Number of Adaptive Hash Index rows updated", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_UPDATED}, - - /* ========== Counters for tablespace ========== */ - {"module_file", "file_system", "Tablespace and File System Manager", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_FIL_SYSTEM}, - - {"file_num_open_files", "file_system", - "Number of files currently open (innodb_num_open_files)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_N_FILE_OPENED}, - - /* ========== Counters for Change Buffer ========== */ - {"module_ibuf_system", "change_buffer", "InnoDB Change Buffer", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_IBUF_SYSTEM}, - - {"ibuf_merges_insert", "change_buffer", - "Number of inserted records merged by change buffering", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_INSERT}, - - {"ibuf_merges_delete_mark", "change_buffer", - "Number of deleted records merged by change buffering", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DELETE}, - - {"ibuf_merges_delete", "change_buffer", - "Number of purge records merged by change buffering", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_PURGE}, - - {"ibuf_merges_discard_insert", "change_buffer", - "Number of insert merged operations discarded", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT}, - - {"ibuf_merges_discard_delete_mark", "change_buffer", - "Number of deleted merged operations discarded", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE}, - - {"ibuf_merges_discard_delete", "change_buffer", - "Number of purge merged operations discarded", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE}, - - {"ibuf_merges", "change_buffer", "Number of change buffer merges", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGES}, - - {"ibuf_size", "change_buffer", "Change buffer size in pages", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_SIZE}, - - /* ========== Counters for server operations ========== */ - {"module_innodb", "innodb", - "Counter for general InnoDB server wide operations and properties", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_SERVER}, - - {"innodb_master_thread_sleeps", "server", - "Number of times (seconds) master thread sleeps", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_MASTER_THREAD_SLEEP}, - - {"innodb_activity_count", "server", "Current server activity count", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_SERVER_ACTIVITY}, - - {"innodb_master_active_loops", "server", - "Number of times master thread performs its tasks when" - " server is active", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_MASTER_ACTIVE_LOOPS}, - - {"innodb_master_idle_loops", "server", - "Number of times master thread performs its tasks when server is idle", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_MASTER_IDLE_LOOPS}, - - {"innodb_background_drop_table_usec", "server", - "Time (in microseconds) spent to process drop table list", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND}, - - {"innodb_ibuf_merge_usec", "server", - "Time (in microseconds) spent to process change buffer merge", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_IBUF_MERGE_MICROSECOND}, - - {"innodb_log_flush_usec", "server", - "Time (in microseconds) spent to flush log records", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_LOG_FLUSH_MICROSECOND}, - - {"innodb_mem_validate_usec", "server", - "Time (in microseconds) spent to do memory validation", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_MEM_VALIDATE_MICROSECOND}, - - {"innodb_master_purge_usec", "server", - "Time (in microseconds) spent by master thread to purge records", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_PURGE_MICROSECOND}, - - {"innodb_dict_lru_usec", "server", - "Time (in microseconds) spent to process DICT LRU list", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_MICROSECOND}, - - {"innodb_dict_lru_count_active", "server", - "Number of tables evicted from DICT LRU list in the active loop", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE}, - - {"innodb_dict_lru_count_idle", "server", - "Number of tables evicted from DICT LRU list in the idle loop", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE}, - - {"innodb_checkpoint_usec", "server", - "Time (in microseconds) spent by master thread to do checkpoint", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_SRV_CHECKPOINT_MICROSECOND}, - - {"innodb_dblwr_writes", "server", - "Number of doublewrite operations that have been performed" - " (innodb_dblwr_writes)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_WRITES}, - - {"innodb_dblwr_pages_written", "server", - "Number of pages that have been written for doublewrite operations" - " (innodb_dblwr_pages_written)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN}, - - {"innodb_page_size", "server", - "InnoDB page size in bytes (innodb_page_size)", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT), - MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_PAGE_SIZE}, - - {"innodb_rwlock_s_spin_waits", "server", - "Number of rwlock spin waits due to shared latch request", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_WAITS}, - - {"innodb_rwlock_x_spin_waits", "server", - "Number of rwlock spin waits due to exclusive latch request", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_WAITS}, - - {"innodb_rwlock_s_spin_rounds", "server", - "Number of rwlock spin loop rounds due to shared latch request", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS}, - - {"innodb_rwlock_x_spin_rounds", "server", - "Number of rwlock spin loop rounds due to exclusive latch request", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS}, - - {"innodb_rwlock_s_os_waits", "server", - "Number of OS waits due to shared latch request", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_OS_WAITS}, - - {"innodb_rwlock_x_os_waits", "server", - "Number of OS waits due to exclusive latch request", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_OS_WAITS}, - - /* ========== Counters for DML operations ========== */ - {"module_dml", "dml", "Statistics for DMLs", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_DML_STATS}, - - {"dml_reads", "dml", "Number of rows read", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_READ}, - - {"dml_inserts", "dml", "Number of rows inserted", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_INSERTED}, - - {"dml_deletes", "dml", "Number of rows deleted", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_DELETED}, - - {"dml_updates", "dml", "Number of rows updated", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_UPDTATED}, - - {"dml_system_reads", "dml", "Number of system rows read", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_READ}, - - {"dml_system_inserts", "dml", "Number of system rows inserted", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_INSERTED}, - - {"dml_system_deletes", "dml", "Number of system rows deleted", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_DELETED}, - - {"dml_system_updates", "dml", "Number of system rows updated", - static_cast<monitor_type_t>( - MONITOR_EXISTING | MONITOR_DEFAULT_ON), - MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_UPDATED}, - - /* ========== Counters for DDL operations ========== */ - {"module_ddl", "ddl", "Statistics for DDLs", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_DDL_STATS}, - - {"ddl_background_drop_indexes", "ddl", - "Number of indexes waiting to be dropped after failed index creation", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_INDEX}, - - {"ddl_background_drop_tables", "ddl", - "Number of tables in background drop table list", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_TABLE}, - - {"ddl_online_create_index", "ddl", - "Number of indexes being created online", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ONLINE_CREATE_INDEX}, - - {"ddl_pending_alter_table", "ddl", - "Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE}, - - /* ===== Counters for ICP (Index Condition Pushdown) Module ===== */ - {"module_icp", "icp", "Index Condition Pushdown", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_MODULE_ICP}, - - {"icp_attempts", "icp", - "Number of attempts for index push-down condition checks", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ICP_ATTEMPTS}, - - {"icp_no_match", "icp", "Index push-down condition does not match", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ICP_NO_MATCH}, - - {"icp_out_of_range", "icp", "Index push-down condition out of range", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ICP_OUT_OF_RANGE}, - - {"icp_match", "icp", "Index push-down condition matches", - MONITOR_NONE, - MONITOR_DEFAULT_START, MONITOR_ICP_MATCH}, - - /* ========== To turn on/off reset all counters ========== */ - {"all", "All Counters", "Turn on/off and reset all counters", - MONITOR_MODULE, - MONITOR_DEFAULT_START, MONITOR_ALL_COUNTER} -}; - -/* The "innodb_counter_value" array stores actual counter values */ -UNIV_INTERN monitor_value_t innodb_counter_value[NUM_MONITOR]; - -/* monitor_set_tbl is used to record and determine whether a monitor -has been turned on/off. */ -UNIV_INTERN ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - - 1) / NUM_BITS_ULINT]; - -#ifndef HAVE_ATOMIC_BUILTINS_64 -/** Mutex protecting atomic operations on platforms that lack -built-in operations for atomic memory access */ -ib_mutex_t monitor_mutex; - -/** Key to register monitor_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t monitor_mutex_key; - -/****************************************************************//** -Initialize the monitor subsystem. */ -UNIV_INTERN -void -srv_mon_create(void) -/*================*/ -{ - mutex_create(monitor_mutex_key, &monitor_mutex, SYNC_ANY_LATCH); -} -/****************************************************************//** -Close the monitor subsystem. */ -UNIV_INTERN -void -srv_mon_free(void) -/*==============*/ -{ - mutex_free(&monitor_mutex); -} -#endif /* !HAVE_ATOMIC_BUILTINS_64 */ - -/****************************************************************//** -Get a monitor's "monitor_info" by its monitor id (index into the -innodb_counter_info array. -@return Point to corresponding monitor_info_t, or NULL if no such -monitor */ -UNIV_INTERN -monitor_info_t* -srv_mon_get_info( -/*=============*/ - monitor_id_t monitor_id) /*!< id indexing into the - innodb_counter_info array */ -{ - ut_a(monitor_id < NUM_MONITOR); - - return((monitor_id < NUM_MONITOR) - ? &innodb_counter_info[monitor_id] - : NULL); -} - -/****************************************************************//** -Get monitor's name by its monitor id (indexing into the -innodb_counter_info array. -@return corresponding monitor name, or NULL if no such -monitor */ -UNIV_INTERN -const char* -srv_mon_get_name( -/*=============*/ - monitor_id_t monitor_id) /*!< id index into the - innodb_counter_info array */ -{ - ut_a(monitor_id < NUM_MONITOR); - - return((monitor_id < NUM_MONITOR) - ? innodb_counter_info[monitor_id].monitor_name - : NULL); -} - -/****************************************************************//** -Turn on/off, reset monitor counters in a module. If module_id -is MONITOR_ALL_COUNTER then turn on all monitor counters. -turned on because it has already been turned on. */ -UNIV_INTERN -void -srv_mon_set_module_control( -/*=======================*/ - monitor_id_t module_id, /*!< in: Module ID as in - monitor_counter_id. If it is - set to MONITOR_ALL_COUNTER, this means - we shall turn on all the counters */ - mon_option_t set_option) /*!< in: Turn on/off reset the - counter */ -{ - ulint ix; - ulint start_id; - ibool set_current_module = FALSE; - - ut_a(module_id <= NUM_MONITOR); - ut_a(UT_ARR_SIZE(innodb_counter_info) == NUM_MONITOR); - - /* The module_id must be an ID of MONITOR_MODULE type */ - ut_a(innodb_counter_info[module_id].monitor_type & MONITOR_MODULE); - - /* start with the first monitor in the module. If module_id - is MONITOR_ALL_COUNTER, this means we need to turn on all - monitor counters. */ - if (module_id == MONITOR_ALL_COUNTER) { - start_id = 1; - } else if (innodb_counter_info[module_id].monitor_type - & MONITOR_GROUP_MODULE) { - /* Counters in this module are set as a group together - and cannot be turned on/off individually. Need to set - the on/off bit in the module counter */ - start_id = module_id; - set_current_module = TRUE; - - } else { - start_id = module_id + 1; - } - - for (ix = start_id; ix < NUM_MONITOR; ix++) { - /* if we hit the next module counter, we will - continue if we want to turn on all monitor counters, - and break if just turn on the counters in the - current module. */ - if (innodb_counter_info[ix].monitor_type & MONITOR_MODULE) { - - if (set_current_module) { - /* Continue to set on/off bit on current - module */ - set_current_module = FALSE; - } else if (module_id == MONITOR_ALL_COUNTER) { - if (!(innodb_counter_info[ix].monitor_type - & MONITOR_GROUP_MODULE)) { - continue; - } - } else { - /* Hitting the next module, stop */ - break; - } - } - - /* Cannot turn on a monitor already been turned on. User - should be aware some counters are already on before - turn them on again (which could reset counter value) */ - if (MONITOR_IS_ON(ix) && (set_option == MONITOR_TURN_ON)) { - fprintf(stderr, "Monitor '%s' is already enabled.\n", - srv_mon_get_name((monitor_id_t) ix)); - continue; - } - - /* For some existing counters (server status variables), - we will get its counter value at the start/stop time - to calculate the actual value during the time. */ - if (innodb_counter_info[ix].monitor_type & MONITOR_EXISTING) { - srv_mon_process_existing_counter( - static_cast<monitor_id_t>(ix), set_option); - } - - /* Currently support 4 operations on the monitor counters: - turn on, turn off, reset and reset all operations. */ - switch (set_option) { - case MONITOR_TURN_ON: - MONITOR_ON(ix); - MONITOR_INIT(ix); - MONITOR_SET_START(ix); - break; - - case MONITOR_TURN_OFF: - MONITOR_OFF(ix); - MONITOR_SET_OFF(ix); - break; - - case MONITOR_RESET_VALUE: - srv_mon_reset(static_cast<monitor_id_t>(ix)); - break; - - case MONITOR_RESET_ALL_VALUE: - srv_mon_reset_all(static_cast<monitor_id_t>(ix)); - break; - - default: - ut_error; - } - } -} - -/****************************************************************//** -Get transaction system's rollback segment size in pages -@return size in pages */ -static -ulint -srv_mon_get_rseg_size(void) -/*=======================*/ -{ - ulint i; - ulint value = 0; - - /* rseg_array is a static array, so we can go through it without - mutex protection. In addition, we provide an estimate of the - total rollback segment size and to avoid mutex contention we - don't acquire the rseg->mutex" */ - for (i = 0; i < TRX_SYS_N_RSEGS; ++i) { - const trx_rseg_t* rseg = trx_sys->rseg_array[i]; - - if (rseg != NULL) { - value += rseg->curr_size; - } - } - - return(value); -} - -/****************************************************************//** -This function consolidates some existing server counters used -by "system status variables". These existing system variables do not have -mechanism to start/stop and reset the counters, so we simulate these -controls by remembering the corresponding counter values when the -corresponding monitors are turned on/off/reset, and do appropriate -mathematics to deduct the actual value. Please also refer to -srv_export_innodb_status() for related global counters used by -the existing status variables.*/ -UNIV_INTERN -void -srv_mon_process_existing_counter( -/*=============================*/ - monitor_id_t monitor_id, /*!< in: the monitor's ID as in - monitor_counter_id */ - mon_option_t set_option) /*!< in: Turn on/off reset the - counter */ -{ - mon_type_t value; - monitor_info_t* monitor_info; - ibool update_min = FALSE; - buf_pool_stat_t stat; - buf_pools_list_size_t buf_pools_list_size; - ulint LRU_len; - ulint free_len; - ulint flush_list_len; - - monitor_info = srv_mon_get_info(monitor_id); - - ut_a(monitor_info->monitor_type & MONITOR_EXISTING); - ut_a(monitor_id < NUM_MONITOR); - - /* Get the value from corresponding global variable */ - switch (monitor_id) { - case MONITOR_OVLD_META_MEM_POOL: - value = srv_mem_pool_size; - break; - - /* export_vars.innodb_buffer_pool_reads. Num Reads from - disk (page not in buffer) */ - case MONITOR_OVLD_BUF_POOL_READS: - value = srv_stats.buf_pool_reads; - break; - - /* innodb_buffer_pool_read_requests, the number of logical - read requests */ - case MONITOR_OVLD_BUF_POOL_READ_REQUESTS: - buf_get_total_stat(&stat); - value = stat.n_page_gets; - break; - - /* innodb_buffer_pool_write_requests, the number of - write request */ - case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST: - value = srv_stats.buf_pool_write_requests; - break; - - /* innodb_buffer_pool_wait_free */ - case MONITOR_OVLD_BUF_POOL_WAIT_FREE: - value = srv_stats.buf_pool_wait_free; - break; - - /* innodb_buffer_pool_read_ahead */ - case MONITOR_OVLD_BUF_POOL_READ_AHEAD: - buf_get_total_stat(&stat); - value = stat.n_ra_pages_read; - break; - - /* innodb_buffer_pool_read_ahead_evicted */ - case MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED: - buf_get_total_stat(&stat); - value = stat.n_ra_pages_evicted; - break; - - /* innodb_buffer_pool_pages_total */ - case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL: - value = buf_pool_get_n_pages(); - break; - - /* innodb_buffer_pool_pages_misc */ - case MONITOR_OVLD_BUF_POOL_PAGE_MISC: - buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); - value = buf_pool_get_n_pages() - LRU_len - free_len; - break; - - /* innodb_buffer_pool_pages_data */ - case MONITOR_OVLD_BUF_POOL_PAGES_DATA: - buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); - value = LRU_len; - break; - - /* innodb_buffer_pool_bytes_data */ - case MONITOR_OVLD_BUF_POOL_BYTES_DATA: - buf_get_total_list_size_in_bytes(&buf_pools_list_size); - value = buf_pools_list_size.LRU_bytes - + buf_pools_list_size.unzip_LRU_bytes; - break; - - /* innodb_buffer_pool_pages_dirty */ - case MONITOR_OVLD_BUF_POOL_PAGES_DIRTY: - buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); - value = flush_list_len; - break; - - /* innodb_buffer_pool_bytes_dirty */ - case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY: - buf_get_total_list_size_in_bytes(&buf_pools_list_size); - value = buf_pools_list_size.flush_list_bytes; - break; - - /* innodb_buffer_pool_pages_free */ - case MONITOR_OVLD_BUF_POOL_PAGES_FREE: - buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); - value = free_len; - break; - - /* innodb_pages_created, the number of pages created */ - case MONITOR_OVLD_PAGE_CREATED: - buf_get_total_stat(&stat); - value = stat.n_pages_created; - break; - - /* innodb_pages_written, the number of page written */ - case MONITOR_OVLD_PAGES_WRITTEN: - buf_get_total_stat(&stat); - value = stat.n_pages_written; - break; - - /* innodb_index_pages_written, the number of index pages written */ - case MONITOR_OVLD_INDEX_PAGES_WRITTEN: - value = srv_stats.index_pages_written; - break; - - /* innodb_non_index_pages_written, the number of non index pages written */ - case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN: - value = srv_stats.non_index_pages_written; - break; - - /* innodb_pages_read */ - case MONITOR_OVLD_PAGES_READ: - buf_get_total_stat(&stat); - value = stat.n_pages_read; - break; - - /* innodb_pages0_read */ - case MONITOR_OVLD_PAGES0_READ: - value = srv_stats.page0_read; - break; - - /* Number of times secondary index lookup triggered cluster lookup */ - case MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS: - value = srv_stats.n_sec_rec_cluster_reads; - break; - /* Number of times prefix optimization avoided triggering cluster - lookup */ - case MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED: - value = srv_stats.n_sec_rec_cluster_reads_avoided; - break; - - /* innodb_data_reads, the total number of data reads */ - case MONITOR_OVLD_BYTE_READ: - value = srv_stats.data_read; - break; - - /* innodb_data_writes, the total number of data writes. */ - case MONITOR_OVLD_BYTE_WRITTEN: - value = srv_stats.data_written; - break; - - /* innodb_data_reads, the total number of data reads. */ - case MONITOR_OVLD_OS_FILE_READ: - value = os_n_file_reads; - break; - - /* innodb_data_writes, the total number of data writes*/ - case MONITOR_OVLD_OS_FILE_WRITE: - value = os_n_file_writes; - break; - - /* innodb_data_fsyncs, number of fsync() operations so far. */ - case MONITOR_OVLD_OS_FSYNC: - value = os_n_fsyncs; - break; - - /* innodb_os_log_written */ - case MONITOR_OVLD_OS_LOG_WRITTEN: - value = (mon_type_t) srv_stats.os_log_written; - break; - - /* innodb_os_log_fsyncs */ - case MONITOR_OVLD_OS_LOG_FSYNC: - value = fil_n_log_flushes; - break; - - /* innodb_os_log_pending_fsyncs */ - case MONITOR_OVLD_OS_LOG_PENDING_FSYNC: - value = fil_n_pending_log_flushes; - update_min = TRUE; - break; - - /* innodb_os_log_pending_writes */ - case MONITOR_OVLD_OS_LOG_PENDING_WRITES: - value = srv_stats.os_log_pending_writes; - update_min = TRUE; - break; - - /* innodb_log_waits */ - case MONITOR_OVLD_LOG_WAITS: - value = srv_stats.log_waits; - break; - - /* innodb_log_write_requests */ - case MONITOR_OVLD_LOG_WRITE_REQUEST: - value = srv_stats.log_write_requests; - break; - - /* innodb_log_writes */ - case MONITOR_OVLD_LOG_WRITES: - value = srv_stats.log_writes; - break; - - /* innodb_dblwr_writes */ - case MONITOR_OVLD_SRV_DBLWR_WRITES: - value = srv_stats.dblwr_writes; - break; - - /* innodb_dblwr_pages_written */ - case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN: - value = srv_stats.dblwr_pages_written; - break; - - /* innodb_page_size */ - case MONITOR_OVLD_SRV_PAGE_SIZE: - value = UNIV_PAGE_SIZE; - break; - - case MONITOR_OVLD_RWLOCK_S_SPIN_WAITS: - value = rw_lock_stats.rw_s_spin_wait_count; - break; - - case MONITOR_OVLD_RWLOCK_X_SPIN_WAITS: - value = rw_lock_stats.rw_x_spin_wait_count; - break; - - case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS: - value = rw_lock_stats.rw_s_spin_round_count; - break; - - case MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS: - value = rw_lock_stats.rw_x_spin_round_count; - break; - - case MONITOR_OVLD_RWLOCK_S_OS_WAITS: - value = rw_lock_stats.rw_s_os_wait_count; - break; - - case MONITOR_OVLD_RWLOCK_X_OS_WAITS: - value = rw_lock_stats.rw_x_os_wait_count; - break; - - case MONITOR_OVLD_BUFFER_POOL_SIZE: - value = srv_buf_pool_size; - break; - - /* innodb_rows_read */ - case MONITOR_OLVD_ROW_READ: - value = srv_stats.n_rows_read; - break; - - /* innodb_rows_inserted */ - case MONITOR_OLVD_ROW_INSERTED: - value = srv_stats.n_rows_inserted; - break; - - /* innodb_rows_deleted */ - case MONITOR_OLVD_ROW_DELETED: - value = srv_stats.n_rows_deleted; - break; - - /* innodb_rows_updated */ - case MONITOR_OLVD_ROW_UPDTATED: - value = srv_stats.n_rows_updated; - break; - - /* innodb_system_rows_read */ - case MONITOR_OLVD_SYSTEM_ROW_READ: - value = srv_stats.n_system_rows_read; - break; - - /* innodb_system_rows_inserted */ - case MONITOR_OLVD_SYSTEM_ROW_INSERTED: - value = srv_stats.n_system_rows_inserted; - break; - - /* innodb_system_rows_deleted */ - case MONITOR_OLVD_SYSTEM_ROW_DELETED: - value = srv_stats.n_system_rows_deleted; - break; - - /* innodb_system_rows_updated */ - case MONITOR_OLVD_SYSTEM_ROW_UPDATED: - value = srv_stats.n_system_rows_updated; - break; - - /* innodb_row_lock_current_waits */ - case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT: - value = srv_stats.n_lock_wait_current_count; - break; - - /* innodb_row_lock_time */ - case MONITOR_OVLD_LOCK_WAIT_TIME: - value = srv_stats.n_lock_wait_time / 1000; - break; - - /* innodb_row_lock_time_max */ - case MONITOR_OVLD_LOCK_MAX_WAIT_TIME: - value = lock_sys->n_lock_max_wait_time / 1000; - break; - - /* innodb_row_lock_time_avg */ - case MONITOR_OVLD_LOCK_AVG_WAIT_TIME: - if (srv_stats.n_lock_wait_count > 0) { - value = srv_stats.n_lock_wait_time / 1000 - / srv_stats.n_lock_wait_count; - } else { - value = 0; - } - break; - - /* innodb_row_lock_waits */ - case MONITOR_OVLD_ROW_LOCK_WAIT: - value = srv_stats.n_lock_wait_count; - break; - - case MONITOR_RSEG_HISTORY_LEN: - value = trx_sys->rseg_history_len; - break; - - case MONITOR_RSEG_CUR_SIZE: - value = srv_mon_get_rseg_size(); - break; - - case MONITOR_OVLD_N_FILE_OPENED: - value = fil_n_file_opened; - break; - - case MONITOR_OVLD_IBUF_MERGE_INSERT: - value = ibuf->n_merged_ops[IBUF_OP_INSERT]; - break; - - case MONITOR_OVLD_IBUF_MERGE_DELETE: - value = ibuf->n_merged_ops[IBUF_OP_DELETE_MARK]; - break; - - case MONITOR_OVLD_IBUF_MERGE_PURGE: - value = ibuf->n_merged_ops[IBUF_OP_DELETE]; - break; - - case MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT: - value = ibuf->n_discarded_ops[IBUF_OP_INSERT]; - break; - - case MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE: - value = ibuf->n_discarded_ops[IBUF_OP_DELETE_MARK]; - break; - - case MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE: - value = ibuf->n_discarded_ops[IBUF_OP_DELETE]; - break; - - case MONITOR_OVLD_IBUF_MERGES: - value = ibuf->n_merges; - break; - - case MONITOR_OVLD_IBUF_SIZE: - value = ibuf->size; - break; - - case MONITOR_OVLD_SERVER_ACTIVITY: - value = srv_get_activity_count(); - break; - - case MONITOR_OVLD_LSN_FLUSHDISK: - value = (mon_type_t) log_sys->flushed_to_disk_lsn; - break; - - case MONITOR_OVLD_LSN_CURRENT: - value = (mon_type_t) log_sys->lsn; - break; - - case MONITOR_OVLD_BUF_OLDEST_LSN: - value = (mon_type_t) buf_pool_get_oldest_modification(); - break; - - case MONITOR_OVLD_LSN_CHECKPOINT: - value = (mon_type_t) log_sys->last_checkpoint_lsn; - break; - - case MONITOR_OVLD_MAX_AGE_ASYNC: - value = log_sys->max_modified_age_async; - break; - - case MONITOR_OVLD_MAX_AGE_SYNC: - value = log_sys->max_modified_age_sync; - break; - - case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH: - value = btr_cur_n_sea; - break; - - case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE: - value = btr_cur_n_non_sea; - break; - - case MONITOR_OVLD_PAGE_COMPRESS_SAVED: - value = srv_stats.page_compression_saved; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512: - value = srv_stats.page_compression_trim_sect512; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024: - value = srv_stats.page_compression_trim_sect1024; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048: - value = srv_stats.page_compression_trim_sect2048; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096: - value = srv_stats.page_compression_trim_sect4096; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192: - value = srv_stats.page_compression_trim_sect8192; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384: - value = srv_stats.page_compression_trim_sect16384; - break; - case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768: - value = srv_stats.page_compression_trim_sect32768; - break; - case MONITOR_OVLD_PAGES_PAGE_COMPRESSED: - value = srv_stats.pages_page_compressed; - break; - case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP: - value = srv_stats.page_compressed_trim_op; - break; - case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED: - value = srv_stats.page_compressed_trim_op_saved; - break; - case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED: - value = srv_stats.pages_page_decompressed; - break; - case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR: - value = srv_stats.pages_page_compression_error; - break; - case MONITOR_OVLD_PAGES_ENCRYPTED: - value = srv_stats.pages_encrypted; - break; - case MONITOR_OVLD_PAGES_DECRYPTED: - value = srv_stats.pages_decrypted; - break; - - default: - ut_error; - } - - switch (set_option) { - case MONITOR_TURN_ON: - /* Save the initial counter value in mon_start_value - field */ - MONITOR_SAVE_START(monitor_id, value); - return; - - case MONITOR_TURN_OFF: - /* Save the counter value to mon_last_value when we - turn off the monitor but not yet reset. Note the - counter has not yet been set to off in the bitmap - table for normal turn off. We need to check the - count status (on/off) to avoid reset the value - for an already off conte */ - if (MONITOR_IS_ON(monitor_id)) { - srv_mon_process_existing_counter(monitor_id, - MONITOR_GET_VALUE); - MONITOR_SAVE_LAST(monitor_id); - } - return; - - case MONITOR_GET_VALUE: - if (MONITOR_IS_ON(monitor_id)) { - - /* If MONITOR_DISPLAY_CURRENT bit is on, we - only record the current value, rather than - incremental value over a period. Most of -` this type of counters are resource related - counters such as number of buffer pages etc. */ - if (monitor_info->monitor_type - & MONITOR_DISPLAY_CURRENT) { - MONITOR_SET(monitor_id, value); - } else { - /* Most status counters are montonically - increasing, no need to update their - minimum values. Only do so - if "update_min" set to TRUE */ - MONITOR_SET_DIFF(monitor_id, value); - - if (update_min - && (MONITOR_VALUE(monitor_id) - < MONITOR_MIN_VALUE(monitor_id))) { - MONITOR_MIN_VALUE(monitor_id) = - MONITOR_VALUE(monitor_id); - } - } - } - return; - - case MONITOR_RESET_VALUE: - if (!MONITOR_IS_ON(monitor_id)) { - MONITOR_LAST_VALUE(monitor_id) = 0; - } - return; - - /* Nothing special for reset all operation for these existing - counters */ - case MONITOR_RESET_ALL_VALUE: - return; - } -} - -/*************************************************************//** -Reset a monitor, create a new base line with the current monitor -value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */ -UNIV_INTERN -void -srv_mon_reset( -/*==========*/ - monitor_id_t monitor) /*!< in: monitor id */ -{ - ibool monitor_was_on; - - monitor_was_on = MONITOR_IS_ON(monitor); - - if (monitor_was_on) { - /* Temporarily turn off the counter for the resetting - operation */ - MONITOR_OFF(monitor); - } - - /* Before resetting the current monitor value, first - calculate and set the max/min value since monitor - start */ - srv_mon_calc_max_since_start(monitor); - srv_mon_calc_min_since_start(monitor); - - /* Monitors with MONITOR_DISPLAY_CURRENT bit - are not incremental, no need to remember - the reset value. */ - if (innodb_counter_info[monitor].monitor_type - & MONITOR_DISPLAY_CURRENT) { - MONITOR_VALUE_RESET(monitor) = 0; - } else { - /* Remember the new baseline */ - MONITOR_VALUE_RESET(monitor) = MONITOR_VALUE_RESET(monitor) - + MONITOR_VALUE(monitor); - } - - /* Reset the counter value */ - MONITOR_VALUE(monitor) = 0; - MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; - MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; - - MONITOR_FIELD((monitor), mon_reset_time) = time(NULL); - - if (monitor_was_on) { - MONITOR_ON(monitor); - } -} - -/*************************************************************//** -Turn on monitor counters that are marked as default ON. */ -UNIV_INTERN -void -srv_mon_default_on(void) -/*====================*/ -{ - ulint ix; - - for (ix = 0; ix < NUM_MONITOR; ix++) { - if (innodb_counter_info[ix].monitor_type - & MONITOR_DEFAULT_ON) { - /* Turn on monitor counters that are default on */ - MONITOR_ON(ix); - MONITOR_INIT(ix); - MONITOR_SET_START(ix); - } - } -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc deleted file mode 100644 index cc5d1320142..00000000000 --- a/storage/xtradb/srv/srv0srv.cc +++ /dev/null @@ -1,3693 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, 2009 Google Inc. -Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file srv/srv0srv.cc -The database server main program - -Created 10/8/1995 Heikki Tuuri -*******************************************************/ - -/* Dummy comment */ -#include "srv0srv.h" - -#include "ut0mem.h" -#include "ut0ut.h" -#include "os0proc.h" -#include "mem0mem.h" -#include "mem0pool.h" -#include "sync0sync.h" -#include "que0que.h" -#include "log0online.h" -#include "log0recv.h" -#include "pars0pars.h" -#include "usr0sess.h" -#include "lock0lock.h" -#include "trx0purge.h" -#include "ibuf0ibuf.h" -#include "buf0flu.h" -#include "buf0lru.h" -#include "btr0sea.h" -#include "dict0load.h" -#include "dict0boot.h" -#include "srv0start.h" -#include "row0mysql.h" -#include "row0log.h" -#include "ha_prototypes.h" -#include "trx0i_s.h" -#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */ -#include "srv0mon.h" -#include "ut0crc32.h" -#include "os0file.h" -#include "btr0defragment.h" -#include "mysql/plugin.h" -#include "mysql/service_thd_wait.h" -#include "fil0fil.h" -#include "fil0crypt.h" -#include "fil0pagecompress.h" -#include <my_rdtsc.h> -#include "btr0scrub.h" - -/* prototypes for new functions added to ha_innodb.cc */ -ibool innobase_get_slow_log(); - -#ifdef WITH_WSREP -extern int wsrep_debug; -extern int wsrep_trx_is_aborting(void *thd_ptr); -#endif -/* The following counter is incremented whenever there is some user activity -in the server */ -UNIV_INTERN ulint srv_activity_count = 0; - -/* The following is the maximum allowed duration of a lock wait. */ -UNIV_INTERN ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT; - -/**/ -UNIV_INTERN long long srv_kill_idle_transaction = 0; - -/* How much data manipulation language (DML) statements need to be delayed, -in microseconds, in order to reduce the lagging of the purge thread. */ -UNIV_INTERN ulint srv_dml_needed_delay = 0; - -UNIV_INTERN bool srv_monitor_active; -UNIV_INTERN bool srv_error_monitor_active; - -UNIV_INTERN bool srv_buf_dump_thread_active; - -UNIV_INTERN bool srv_dict_stats_thread_active; - -UNIV_INTERN my_bool srv_scrub_log; - -UNIV_INTERN const char* srv_main_thread_op_info = ""; - -/** Prefix used by MySQL to indicate pre-5.1 table name encoding */ -const char srv_mysql50_table_name_prefix[10] = "#mysql50#"; - -/* Server parameters which are read from the initfile */ - -/* The following three are dir paths which are catenated before file -names, where the file name itself may also contain a path */ - -UNIV_INTERN char* srv_data_home = NULL; - -/** Rollback files directory, can be absolute. */ -UNIV_INTERN char* srv_undo_dir = NULL; - -/** The number of tablespaces to use for rollback segments. */ -UNIV_INTERN ulong srv_undo_tablespaces = 8; - -/** The number of UNDO tablespaces that are open and ready to use. */ -UNIV_INTERN ulint srv_undo_tablespaces_open = 8; - -/* The number of rollback segments to use */ -UNIV_INTERN ulong srv_undo_logs = 1; - -#ifdef UNIV_LOG_ARCHIVE -UNIV_INTERN char* srv_arch_dir = NULL; -UNIV_INTERN ulong srv_log_arch_expire_sec = 0; -#endif /* UNIV_LOG_ARCHIVE */ - -/** Set if InnoDB must operate in read-only mode. We don't do any -recovery and open all tables in RO mode instead of RW mode. We don't -sync the max trx id to disk either. */ -UNIV_INTERN my_bool srv_read_only_mode; -/** store to its own file each table created by an user; data -dictionary tables are in the system tablespace 0 */ -UNIV_INTERN my_bool srv_file_per_table; -/** The file format to use on new *.ibd files. */ -UNIV_INTERN ulint srv_file_format = 0; -/** Whether to check file format during startup. A value of -UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to -set it to the highest format we support. */ -UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX; -/** Set if InnoDB operates in read-only mode or innodb-force-recovery -is greater than SRV_FORCE_NO_TRX_UNDO. */ -UNIV_INTERN my_bool high_level_read_only; - -#if UNIV_FORMAT_A -# error "UNIV_FORMAT_A must be 0!" -#endif - -/** Place locks to records only i.e. do not use next-key locking except -on duplicate key checking and foreign key checking */ -UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; -/** Sort buffer size in index creation */ -UNIV_INTERN ulong srv_sort_buf_size = 1048576; -/** Maximum modification log file size for online index creation */ -UNIV_INTERN unsigned long long srv_online_max_size; - -/* If this flag is TRUE, then we will use the native aio of the -OS (provided we compiled Innobase with it in), otherwise we will -use simulated aio we build below with threads. -Currently we support native aio on windows and linux */ -/* make srv_use_native_aio to be visible for other plugins */ -my_bool srv_use_native_aio = TRUE; -UNIV_INTERN my_bool srv_numa_interleave = FALSE; - -/* Default compression level if page compression is used and no compression -level is set for the table*/ -UNIV_INTERN long srv_compress_zlib_level = 6; -/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE) -to the pages */ -UNIV_INTERN my_bool srv_use_trim = FALSE; -/* If this flag is TRUE, then we will use posix fallocate for file extentsion */ -UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE; -/* If this flag is TRUE, then we disable doublewrite buffer */ -UNIV_INTERN my_bool srv_use_atomic_writes = FALSE; -/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */ -UNIV_INTERN ulong innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM; -/* Number of threads used for multi-threaded flush */ -UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER; -/* If this flag is TRUE, then we will use multi threaded flush. */ -UNIV_INTERN my_bool srv_use_mtflush = FALSE; - -#ifdef __WIN__ -/* Windows native condition variables. We use runtime loading / function -pointers, because they are not available on Windows Server 2003 and -Windows XP/2000. - -We use condition for events on Windows if possible, even if os_event -resembles Windows kernel event object well API-wise. The reason is -performance, kernel objects are heavyweights and WaitForSingleObject() is a -performance killer causing calling thread to context switch. Besides, Innodb -is preallocating large number (often millions) of os_events. With kernel event -objects it takes a big chunk out of non-paged pool, which is better suited -for tasks like IO than for storing idle event objects. */ -UNIV_INTERN ibool srv_use_native_conditions = TRUE; -#endif /* __WIN__ */ - -UNIV_INTERN ulint srv_n_data_files = 0; -UNIV_INTERN char** srv_data_file_names = NULL; -/* size in database pages */ -UNIV_INTERN ulint* srv_data_file_sizes = NULL; - -/** Whether the redo log tracking is currently enabled. Note that it is -possible for the log tracker thread to be running and the tracking to be -disabled */ -UNIV_INTERN my_bool srv_track_changed_pages = FALSE; - -UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024; - -UNIV_INTERN ulonglong srv_max_changed_pages = 0; - -/** When TRUE, fake change transcations take S rather than X row locks. - When FALSE, row locks are not taken at all. */ -UNIV_INTERN my_bool srv_fake_changes_locks = TRUE; - -/* if TRUE, then we auto-extend the last data file */ -UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE; -/* if != 0, this tells the max size auto-extending may increase the -last data file size */ -UNIV_INTERN ulint srv_last_file_size_max = 0; -/* If the last data file is auto-extended, we add this -many pages to it at a time */ -UNIV_INTERN ulong srv_auto_extend_increment = 8; -UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL; - -/* If the following is TRUE we do not allow inserts etc. This protects -the user from forgetting the 'newraw' keyword to my.cnf */ - -UNIV_INTERN ibool srv_created_new_raw = FALSE; - -UNIV_INTERN char* srv_log_group_home_dir = NULL; - -UNIV_INTERN ulong srv_n_log_files = SRV_N_LOG_FILES_MAX; -/* size in database pages */ -UNIV_INTERN ib_uint64_t srv_log_file_size = IB_UINT64_MAX; -UNIV_INTERN ib_uint64_t srv_log_file_size_requested; -/* size in database pages */ -UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX; -UNIV_INTERN uint srv_flush_log_at_timeout = 1; -UNIV_INTERN ulong srv_page_size = UNIV_PAGE_SIZE_DEF; -UNIV_INTERN ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF; -UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE; - -/* Try to flush dirty pages so as to avoid IO bursts at -the checkpoints. */ -UNIV_INTERN char srv_adaptive_flushing = TRUE; - -UNIV_INTERN ulong srv_show_locks_held = 10; -UNIV_INTERN ulong srv_show_verbose_locks = 0; - -/** Maximum number of times allowed to conditionally acquire -mutex before switching to blocking wait on the mutex */ -#define MAX_MUTEX_NOWAIT 20 - -/** Check whether the number of failed nonblocking mutex -acquisition attempts exceeds maximum allowed value. If so, -srv_printf_innodb_monitor() will request mutex acquisition -with mutex_enter(), which will wait until it gets the mutex. */ -#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) - -#ifdef WITH_INNODB_DISALLOW_WRITES -UNIV_INTERN os_event_t srv_allow_writes_event; -#endif /* WITH_INNODB_DISALLOW_WRITES */ - -/** The sort order table of the MySQL latin1_swedish_ci character set -collation */ -UNIV_INTERN const byte* srv_latin1_ordering; - -/* use os/external memory allocator */ -UNIV_INTERN my_bool srv_use_sys_malloc = TRUE; -/* requested size in kilobytes */ -UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX; -/* requested number of buffer pool instances */ -UNIV_INTERN ulint srv_buf_pool_instances = 1; -/* number of locks to protect buf_pool->page_hash */ -UNIV_INTERN ulong srv_n_page_hash_locks = 16; -/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/ -UNIV_INTERN ulong srv_LRU_scan_depth = 1024; -/** whether or not to flush neighbors of a block */ -UNIV_INTERN ulong srv_flush_neighbors = 1; -/* previously requested size */ -UNIV_INTERN ulint srv_buf_pool_old_size; -/* current size in kilobytes */ -UNIV_INTERN ulint srv_buf_pool_curr_size = 0; -/* dump that may % of each buffer pool during BP dump */ -UNIV_INTERN ulong srv_buf_pool_dump_pct; -/* size in bytes */ -UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; -UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; - -/* Defragmentation */ -UNIV_INTERN my_bool srv_defragment = FALSE; -UNIV_INTERN uint srv_defragment_n_pages = 7; -UNIV_INTERN uint srv_defragment_stats_accuracy = 0; -UNIV_INTERN uint srv_defragment_fill_factor_n_recs = 20; -UNIV_INTERN double srv_defragment_fill_factor = 0.9; -UNIV_INTERN uint srv_defragment_frequency = - SRV_DEFRAGMENT_FREQUENCY_DEFAULT; -UNIV_INTERN ulonglong srv_defragment_interval = 0; - -/** Query thread preflush algorithm */ -UNIV_INTERN ulong srv_foreground_preflush - = SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF; - -/** The maximum time limit for a single LRU tail flush iteration by the page -cleaner thread */ -UNIV_INTERN ulint srv_cleaner_max_lru_time = 1000; - -/** The maximum time limit for a single flush list flush iteration by the page -cleaner thread */ -UNIV_INTERN ulint srv_cleaner_max_flush_time = 1000; - -/** Page cleaner flush list flush batches are further divided into this chunk -size */ -UNIV_INTERN ulint srv_cleaner_flush_chunk_size = 100; - -/** Page cleaner LRU list flush batches are further divided into this chunk -size */ -UNIV_INTERN ulint srv_cleaner_lru_chunk_size = 100; - -/** If free list length is lower than this percentage of srv_LRU_scan_depth, -page cleaner LRU flushes will issue flush batches to the same instance in a -row */ -UNIV_INTERN ulint srv_cleaner_free_list_lwm = 10; - -/** If TRUE, page cleaner heuristics use evicted instead of flushed page counts -for its heuristics */ -UNIV_INTERN my_bool srv_cleaner_eviction_factor = FALSE; - -/** Page cleaner LSN age factor formula option */ -UNIV_INTERN ulong srv_cleaner_lsn_age_factor - = SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT; - -/** Empty free list for a query thread handling algorithm option */ -UNIV_INTERN ulong srv_empty_free_list_algorithm - = SRV_EMPTY_FREE_LIST_BACKOFF; - -UNIV_INTERN ulong srv_idle_flush_pct = 100; - -/* This parameter is deprecated. Use srv_n_io_[read|write]_threads -instead. */ -UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; -UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX; -UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX; - -/* Switch to enable random read ahead. */ -UNIV_INTERN my_bool srv_random_read_ahead = FALSE; - -/* The log block size */ -UNIV_INTERN ulint srv_log_block_size = 0; - -/* User settable value of the number of pages that must be present -in the buffer cache and accessed sequentially for InnoDB to trigger a -readahead request. */ -UNIV_INTERN ulong srv_read_ahead_threshold = 56; - -#ifdef UNIV_LOG_ARCHIVE -UNIV_INTERN bool srv_log_archive_on; -UNIV_INTERN bool srv_archive_recovery; -UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - -/* This parameter is used to throttle the number of insert buffers that are -merged in a batch. By increasing this parameter on a faster disk you can -possibly reduce the number of I/O operations performed to complete the -merge operation. The value of this parameter is used as is by the -background loop when the system is idle (low load), on a busy system -the parameter is scaled down by a factor of 4, this is to avoid putting -a heavier load on the I/O sub system. */ - -UNIV_INTERN ulong srv_insert_buffer_batch_size = 20; - -UNIV_INTERN char* srv_file_flush_method_str = NULL; -UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC; -UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; - -UNIV_INTERN ulint srv_max_n_open_files = 300; - -/* Number of IO operations per second the server can do */ -UNIV_INTERN ulong srv_io_capacity = 200; -UNIV_INTERN ulong srv_max_io_capacity = 400; - -/* The InnoDB main thread tries to keep the ratio of modified pages -in the buffer pool to all database pages in the buffer pool smaller than -the following number. But it is not guaranteed that the value stays below -that during a time of heavy update/insert activity. */ - -UNIV_INTERN double srv_max_buf_pool_modified_pct = 75.0; -UNIV_INTERN double srv_max_dirty_pages_pct_lwm = 50.0; - -/* This is the percentage of log capacity at which adaptive flushing, -if enabled, will kick in. */ -UNIV_INTERN double srv_adaptive_flushing_lwm = 10.0; - -/* Number of iterations over which adaptive flushing is averaged. */ -UNIV_INTERN ulong srv_flushing_avg_loops = 30; - -/* The tid of the cleaner thread */ -UNIV_INTERN os_tid_t srv_cleaner_tid; - -/* The tid of the LRU manager thread */ -UNIV_INTERN os_tid_t srv_lru_manager_tid; - -/* The tids of the purge threads */ -UNIV_INTERN os_tid_t srv_purge_tids[SRV_MAX_N_PURGE_THREADS]; - -/* The tids of the I/O threads */ -UNIV_INTERN os_tid_t srv_io_tids[SRV_MAX_N_IO_THREADS]; - -/* The tid of the master thread */ -UNIV_INTERN os_tid_t srv_master_tid; - -/* The relative scheduling priority of the cleaner and LRU manager threads */ -UNIV_INTERN ulint srv_sched_priority_cleaner = 19; - -/* The relative scheduling priority of the purge threads */ -UNIV_INTERN ulint srv_sched_priority_purge = 19; - -/* The relative scheduling priority of the I/O threads */ -UNIV_INTERN ulint srv_sched_priority_io = 19; - -/* The relative scheduling priority of the master thread */ -UNIV_INTERN ulint srv_sched_priority_master = 19; - -/* The relative priority of the current thread. If 0, low priority; if 1, high -priority. */ -UNIV_INTERN UNIV_THREAD_LOCAL ulint srv_current_thread_priority = 0; - -/* The relative priority of the purge coordinator and worker threads. */ -UNIV_INTERN my_bool srv_purge_thread_priority = FALSE; - -/* The relative priority of the I/O threads. */ -UNIV_INTERN my_bool srv_io_thread_priority = FALSE; - -/* The relative priority of the cleaner thread. */ -UNIV_INTERN my_bool srv_cleaner_thread_priority = FALSE; - -/* The relative priority of the master thread. */ -UNIV_INTERN my_bool srv_master_thread_priority = FALSE; - -/* The number of purge threads to use.*/ -UNIV_INTERN ulong srv_n_purge_threads; - -/* the number of pages to purge in one batch */ -UNIV_INTERN ulong srv_purge_batch_size = 20; - -/* Internal setting for "innodb_stats_method". Decides how InnoDB treats -NULL value when collecting statistics. By default, it is set to -SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ -UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; - -UNIV_INTERN srv_stats_t srv_stats; - -/* structure to pass status variables to MySQL */ -UNIV_INTERN export_var_t export_vars; - -/** Normally 0. When nonzero, skip some phases of crash recovery, -starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered -by SELECT or mysqldump. When this is nonzero, we do not allow any user -modifications to the data. */ -UNIV_INTERN ulong srv_force_recovery; - -/** Print all user-level transactions deadlocks to mysqld stderr */ - -UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE; - -/* Produce a stacktrace on long semaphore wait */ -UNIV_INTERN my_bool srv_use_stacktrace = FALSE; - -/** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */ -UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE; - -/* If the following is set to 1 then we do not run purge and insert buffer -merge to completion before shutdown. If it is set to 2, do not even flush the -buffer pool to data files at the shutdown: we effectively 'crash' -InnoDB (but lose no committed transactions). */ -UNIV_INTERN ulint srv_fast_shutdown = 0; - -/* Generate a innodb_status.<pid> file */ -UNIV_INTERN ibool srv_innodb_status = FALSE; - -/* Optimize prefix index queries to skip cluster index lookup when possible */ -/* Enables or disables this prefix optimization. Disabled by default. */ -UNIV_INTERN my_bool srv_prefix_index_cluster_optimization = 0; - -/* When estimating number of different key values in an index, sample -this many index pages, there are 2 ways to calculate statistics: -* persistent stats that are calculated by ANALYZE TABLE and saved - in the innodb database. -* quick transient stats, that are used if persistent stats for the given - table/index are not found in the innodb database */ -UNIV_INTERN unsigned long long srv_stats_transient_sample_pages = 8; -UNIV_INTERN my_bool srv_stats_persistent = TRUE; -UNIV_INTERN my_bool srv_stats_include_delete_marked = FALSE; -UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20; -UNIV_INTERN my_bool srv_stats_auto_recalc = TRUE; - -/* The number of rows modified before we calculate new statistics (default 0 -= current limits) */ -UNIV_INTERN unsigned long long srv_stats_modified_counter = 0; - -/* Enable traditional statistic calculation based on number of configured -pages default true. */ -UNIV_INTERN my_bool srv_stats_sample_traditional = TRUE; - -UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; - -/** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages. -The following parameter is the size of the buffer that is used for -batch flushing i.e.: LRU flushing and flush_list flushing. The rest -of the pages are used for single page flushing. */ -UNIV_INTERN ulong srv_doublewrite_batch_size = 120; - -UNIV_INTERN ulong srv_replication_delay = 0; - -UNIV_INTERN bool srv_apply_log_only; - -UNIV_INTERN bool srv_backup_mode; -UNIV_INTERN bool srv_close_files; -UNIV_INTERN bool srv_xtrabackup; - -UNIV_INTERN ulong srv_pass_corrupt_table = 0; /* 0:disable 1:enable */ - -UNIV_INTERN ulong srv_log_checksum_algorithm = - SRV_CHECKSUM_ALGORITHM_INNODB; - -/*-------------------------------------------*/ -#ifdef HAVE_MEMORY_BARRIER -/* No idea to wait long with memory barriers */ -UNIV_INTERN ulong srv_n_spin_wait_rounds = 15; -#else -UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; -#endif -UNIV_INTERN ulong srv_spin_wait_delay = 6; -UNIV_INTERN ibool srv_priority_boost = TRUE; - -#ifdef UNIV_DEBUG -UNIV_INTERN ibool srv_print_thread_releases = FALSE; -UNIV_INTERN ibool srv_print_lock_waits = FALSE; -UNIV_INTERN ibool srv_print_buf_io = FALSE; -UNIV_INTERN ibool srv_print_log_io = FALSE; -UNIV_INTERN ibool srv_print_latch_waits = FALSE; -#endif /* UNIV_DEBUG */ - -static ulint srv_n_rows_inserted_old = 0; -static ulint srv_n_rows_updated_old = 0; -static ulint srv_n_rows_deleted_old = 0; -static ulint srv_n_rows_read_old = 0; -static ulint srv_n_system_rows_inserted_old = 0; -static ulint srv_n_system_rows_updated_old = 0; -static ulint srv_n_system_rows_deleted_old = 0; -static ulint srv_n_system_rows_read_old = 0; - -UNIV_INTERN ulint srv_truncated_status_writes = 0; -UNIV_INTERN ulint srv_available_undo_logs = 0; - -UNIV_INTERN ib_uint64_t srv_page_compression_saved = 0; -UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512 = 0; -UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096 = 0; -UNIV_INTERN ib_uint64_t srv_index_pages_written = 0; -UNIV_INTERN ib_uint64_t srv_non_index_pages_written = 0; -UNIV_INTERN ib_uint64_t srv_pages_page_compressed = 0; -UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op = 0; -UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved = 0; -UNIV_INTERN ib_uint64_t srv_index_page_decompressed = 0; - -/* Ensure status variables are on separate cache lines */ - -#define CACHE_ALIGNED MY_ATTRIBUTE((aligned (CACHE_LINE_SIZE))) - -UNIV_INTERN byte -counters_pad_start[CACHE_LINE_SIZE] MY_ATTRIBUTE((unused)) = {0}; - -UNIV_INTERN ulint srv_read_views_memory CACHE_ALIGNED = 0; -UNIV_INTERN ulint srv_descriptors_memory CACHE_ALIGNED = 0; - -UNIV_INTERN byte -counters_pad_end[CACHE_LINE_SIZE] MY_ATTRIBUTE((unused)) = {0}; - -/* Set the following to 0 if you want InnoDB to write messages on -stderr on startup/shutdown. */ -UNIV_INTERN ibool srv_print_verbose_log = TRUE; -UNIV_INTERN my_bool srv_print_innodb_monitor = FALSE; -UNIV_INTERN my_bool srv_print_innodb_lock_monitor = FALSE; -UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE; -UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE; - -/** If this flag is set tables without primary key are not allowed */ -UNIV_INTERN my_bool srv_force_primary_key = FALSE; - -/* Array of English strings describing the current state of an -i/o handler thread */ - -UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; -UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS]; - -UNIV_INTERN time_t srv_last_monitor_time; - -static ib_mutex_t srv_innodb_monitor_mutex; - -/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */ -UNIV_INTERN ib_mutex_t srv_monitor_file_mutex; - -#ifdef UNIV_PFS_MUTEX -# ifndef HAVE_ATOMIC_BUILTINS -/* Key to register server_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t server_mutex_key; -# endif /* !HAVE_ATOMIC_BUILTINS */ -/** Key to register srv_innodb_monitor_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key; -/** Key to register srv_monitor_file_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key; -/** Key to register srv_dict_tmpfile_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key; -/** Key to register the mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key; -/** Key to register srv_sys_t::mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t srv_sys_mutex_key; -/** Key to register srv_sys_t::tasks_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t srv_sys_tasks_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/** Temporary file for innodb monitor output */ -UNIV_INTERN FILE* srv_monitor_file; -/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode. -This mutex has a very high rank; threads reserving it should not -be holding any InnoDB latches. */ -UNIV_INTERN ib_mutex_t srv_dict_tmpfile_mutex; -/** Temporary file for output from the data dictionary */ -UNIV_INTERN FILE* srv_dict_tmpfile; -/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode. -This mutex has a very low rank; threads reserving it should not -acquire any further latches or sleep before releasing this one. */ -UNIV_INTERN ib_mutex_t srv_misc_tmpfile_mutex; -/** Temporary file for miscellanous diagnostic output */ -UNIV_INTERN FILE* srv_misc_tmpfile; - -UNIV_INTERN ulint srv_main_thread_process_no = 0; -UNIV_INTERN ulint srv_main_thread_id = 0; - -/* The following counts are used by the srv_master_thread. */ - -/** Iterations of the loop bounded by 'srv_active' label. */ -static ulint srv_main_active_loops = 0; -/** Iterations of the loop bounded by the 'srv_idle' label. */ -static ulint srv_main_idle_loops = 0; -/** Iterations of the loop bounded by the 'srv_shutdown' label. */ -static ulint srv_main_shutdown_loops = 0; -/** Log writes involving flush. */ -static ulint srv_log_writes_and_flush = 0; - -/* This is only ever touched by the master thread. It records the -time when the last flush of log file has happened. The master -thread ensures that we flush the log files at least once per -second. */ -static time_t srv_last_log_flush_time; - -/** Enable semaphore request instrumentation */ -UNIV_INTERN my_bool srv_instrument_semaphores = FALSE; - -/* Interval in seconds at which various tasks are performed by the -master thread when server is active. In order to balance the workload, -we should try to keep intervals such that they are not multiple of -each other. For example, if we have intervals for various tasks -defined as 5, 10, 15, 60 then all tasks will be performed when -current_time % 60 == 0 and no tasks will be performed when -current_time % 5 != 0. */ - -# define SRV_MASTER_CHECKPOINT_INTERVAL (7) -# define SRV_MASTER_PURGE_INTERVAL (10) -#ifdef MEM_PERIODIC_CHECK -# define SRV_MASTER_MEM_VALIDATE_INTERVAL (13) -#endif /* MEM_PERIODIC_CHECK */ -# define SRV_MASTER_DICT_LRU_INTERVAL (47) - -/** Buffer pool dump status frequence in percentages */ -UNIV_INTERN ulong srv_buf_dump_status_frequency = 0; - -/** Acquire the system_mutex. */ -#define srv_sys_mutex_enter() do { \ - mutex_enter(&srv_sys.mutex); \ -} while (0) - -/** Test if the system mutex is owned. */ -#define srv_sys_mutex_own() (mutex_own(&srv_sys.mutex) \ - && !srv_read_only_mode) - -/** Release the system mutex. */ -#define srv_sys_mutex_exit() do { \ - mutex_exit(&srv_sys.mutex); \ -} while (0) - -#define fetch_lock_wait_timeout(trx) \ - ((trx)->lock.allowed_to_wait \ - ? thd_lock_wait_timeout((trx)->mysql_thd) \ - : 0) - -/** Simulate compression failures. */ -UNIV_INTERN uint srv_simulate_comp_failures = 0; - -/* - IMPLEMENTATION OF THE SERVER MAIN PROGRAM - ========================================= - -There is the following analogue between this database -server and an operating system kernel: - -DB concept equivalent OS concept ----------- --------------------- -transaction -- process; - -query thread -- thread; - -lock -- semaphore; - -kernel -- kernel; - -query thread execution: -(a) without lock mutex -reserved -- process executing in user mode; -(b) with lock mutex reserved - -- process executing in kernel mode; - -The server has several backgroind threads all running at the same -priority as user threads. It periodically checks if here is anything -happening in the server which requires intervention of the master -thread. Such situations may be, for example, when flushing of dirty -blocks is needed in the buffer pool or old version of database rows -have to be cleaned away (purged). The user can configure a separate -dedicated purge thread(s) too, in which case the master thread does not -do any purging. - -The threads which we call user threads serve the queries of the MySQL -server. They run at normal priority. - -When there is no activity in the system, also the master thread -suspends itself to wait for an event making the server totally silent. - -There is still one complication in our server design. If a -background utility thread obtains a resource (e.g., mutex) needed by a user -thread, and there is also some other user activity in the system, -the user thread may have to wait indefinitely long for the -resource, as the OS does not schedule a background thread if -there is some other runnable user thread. This problem is called -priority inversion in real-time programming. - -One solution to the priority inversion problem would be to keep record -of which thread owns which resource and in the above case boost the -priority of the background thread so that it will be scheduled and it -can release the resource. This solution is called priority inheritance -in real-time programming. A drawback of this solution is that the overhead -of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100 -MHz Pentium, because the thread has to call os_thread_get_curr_id. This may -be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note -that the thread cannot store the information in the resource , say mutex, -itself, because competing threads could wipe out the information if it is -stored before acquiring the mutex, and if it stored afterwards, the -information is outdated for the time of one machine instruction, at least. -(To be precise, the information could be stored to lock_word in mutex if -the machine supports atomic swap.) - -The above solution with priority inheritance may become actual in the -future, currently we do not implement any priority twiddling solution. -Our general aim is to reduce the contention of all mutexes by making -them more fine grained. - -The thread table contains information of the current status of each -thread existing in the system, and also the event semaphores used in -suspending the master thread and utility threads when they have nothing -to do. The thread table can be seen as an analogue to the process table -in a traditional Unix implementation. */ - -/** The server system struct */ -struct srv_sys_t{ - ib_mutex_t tasks_mutex; /*!< variable protecting the - tasks queue */ - UT_LIST_BASE_NODE_T(que_thr_t) - tasks; /*!< task queue */ - - ib_mutex_t mutex; /*!< variable protecting the - fields below. */ - ulint n_sys_threads; /*!< size of the sys_threads - array */ - - srv_slot_t sys_threads[32 + 1]; /*!< server thread table; - os_event_set() and - os_event_reset() on - sys_threads[]->event are - covered by srv_sys_t::mutex */ - - ulint n_threads_active[SRV_MASTER + 1]; - /*!< number of threads active - in a thread class */ - - srv_stats_t::ulint_ctr_1_t - activity_count; /*!< For tracking server - activity */ - srv_stats_t::ulint_ctr_1_t - ibuf_merge_activity_count;/*!< For tracking change - buffer merge activity, a subset - of overall server activity */ -}; - -#ifndef HAVE_ATOMIC_BUILTINS -/** Mutex protecting some server global variables. */ -UNIV_INTERN ib_mutex_t server_mutex; -#endif /* !HAVE_ATOMIC_BUILTINS */ - -static srv_sys_t srv_sys; - -/** Event to signal srv_monitor_thread. Not protected by a mutex. -Set after setting srv_print_innodb_monitor. */ -UNIV_INTERN os_event_t srv_monitor_event; - -/** Event to signal the shutdown of srv_error_monitor_thread. -Not protected by a mutex. */ -UNIV_INTERN os_event_t srv_error_event; - -/** Event for waking up buf_dump_thread. Not protected by a mutex. -Set on shutdown or by buf_dump_start() or buf_load_start(). */ -UNIV_INTERN os_event_t srv_buf_dump_event; - -/** The buffer pool dump/load file name */ -UNIV_INTERN char* srv_buf_dump_filename; - -/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown -and/or load it during startup. */ -UNIV_INTERN char srv_buffer_pool_dump_at_shutdown = FALSE; -UNIV_INTERN char srv_buffer_pool_load_at_startup = FALSE; - -/** Slot index in the srv_sys.sys_threads array for the purge thread. */ -static const ulint SRV_PURGE_SLOT = 1; - -/** Slot index in the srv_sys.sys_threads array for the master thread. */ -static const ulint SRV_MASTER_SLOT = 0; - -UNIV_INTERN os_event_t srv_checkpoint_completed_event; - -UNIV_INTERN os_event_t srv_redo_log_tracked_event; - -/** Whether the redo log tracker thread has been started. Does not take into -account whether the tracking is currently enabled (see srv_track_changed_pages -for that) */ -UNIV_INTERN bool srv_redo_log_thread_started = false; - -/*********************************************************************//** -Prints counters for work done by srv_master_thread. */ -static -void -srv_print_master_thread_info( -/*=========================*/ - FILE *file) /* in: output stream */ -{ - fprintf(file, "srv_master_thread loops: %lu srv_active, " - "%lu srv_shutdown, %lu srv_idle\n", - srv_main_active_loops, - srv_main_shutdown_loops, - srv_main_idle_loops); - fprintf(file, "srv_master_thread log flush and writes: %lu\n", - srv_log_writes_and_flush); -} - -/*********************************************************************//** -Sets the info describing an i/o thread current state. */ -UNIV_INTERN -void -srv_set_io_thread_op_info( -/*======================*/ - ulint i, /*!< in: the 'segment' of the i/o thread */ - const char* str) /*!< in: constant char string describing the - state */ -{ - ut_a(i < SRV_MAX_N_IO_THREADS); - - srv_io_thread_op_info[i] = str; -} - -/*********************************************************************//** -Resets the info describing an i/o thread current state. */ -UNIV_INTERN -void -srv_reset_io_thread_op_info() -/*=========================*/ -{ - for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) { - srv_io_thread_op_info[i] = "not started yet"; - } -} - -#ifdef UNIV_DEBUG -/*********************************************************************//** -Validates the type of a thread table slot. -@return TRUE if ok */ -static -ibool -srv_thread_type_validate( -/*=====================*/ - srv_thread_type type) /*!< in: thread type */ -{ - switch (type) { - case SRV_NONE: - break; - case SRV_WORKER: - case SRV_PURGE: - case SRV_MASTER: - return(TRUE); - } - ut_error; - return(FALSE); -} -#endif /* UNIV_DEBUG */ - -/*********************************************************************//** -Gets the type of a thread table slot. -@return thread type */ -static -srv_thread_type -srv_slot_get_type( -/*==============*/ - const srv_slot_t* slot) /*!< in: thread slot */ -{ - srv_thread_type type = slot->type; - ut_ad(srv_thread_type_validate(type)); - return(type); -} - -/*********************************************************************//** -Reserves a slot in the thread table for the current thread. -@return reserved slot */ -static -srv_slot_t* -srv_reserve_slot( -/*=============*/ - srv_thread_type type) /*!< in: type of the thread */ -{ - srv_slot_t* slot = 0; - - srv_sys_mutex_enter(); - - ut_ad(srv_thread_type_validate(type)); - - switch (type) { - case SRV_MASTER: - slot = &srv_sys.sys_threads[SRV_MASTER_SLOT]; - break; - - case SRV_PURGE: - slot = &srv_sys.sys_threads[SRV_PURGE_SLOT]; - break; - - case SRV_WORKER: - /* Find an empty slot, skip the master and purge slots. */ - for (slot = &srv_sys.sys_threads[2]; - slot->in_use; - ++slot) { - - ut_a(slot < &srv_sys.sys_threads[ - srv_sys.n_sys_threads]); - } - break; - - case SRV_NONE: - ut_error; - } - - ut_a(!slot->in_use); - - slot->in_use = TRUE; - slot->suspended = FALSE; - slot->type = type; - - ut_ad(srv_slot_get_type(slot) == type); - - ++srv_sys.n_threads_active[type]; - - srv_sys_mutex_exit(); - - return(slot); -} - -/*********************************************************************//** -Suspends the calling thread to wait for the event in its thread slot. -@return the current signal count of the event. */ -static -ib_int64_t -srv_suspend_thread_low( -/*===================*/ - srv_slot_t* slot) /*!< in/out: thread slot */ -{ - ut_ad(!srv_read_only_mode); - ut_ad(srv_sys_mutex_own()); - - ut_ad(slot->in_use); - - srv_thread_type type = srv_slot_get_type(slot); - - switch (type) { - case SRV_NONE: - ut_error; - - case SRV_MASTER: - /* We have only one master thread and it - should be the first entry always. */ - ut_a(srv_sys.n_threads_active[type] == 1); - break; - - case SRV_PURGE: - /* We have only one purge coordinator thread - and it should be the second entry always. */ - ut_a(srv_sys.n_threads_active[type] == 1); - break; - - case SRV_WORKER: - ut_a(srv_n_purge_threads > 1); - ut_a(srv_sys.n_threads_active[type] > 0); - break; - } - - ut_a(!slot->suspended); - slot->suspended = TRUE; - - ut_a(srv_sys.n_threads_active[type] > 0); - - srv_sys.n_threads_active[type]--; - - return(os_event_reset(slot->event)); -} - -/*********************************************************************//** -Suspends the calling thread to wait for the event in its thread slot. -@return the current signal count of the event. */ -static -ib_int64_t -srv_suspend_thread( -/*===============*/ - srv_slot_t* slot) /*!< in/out: thread slot */ -{ - srv_sys_mutex_enter(); - - ib_int64_t sig_count = srv_suspend_thread_low(slot); - - srv_sys_mutex_exit(); - - return(sig_count); -} - -/** Resume the calling thread. -@param[in,out] slot thread slot -@param[in] sig_count signal count (if wait) -@param[in] wait whether to wait for the event -@param[in] timeout_usec timeout in microseconds (0=infinite) -@return whether the wait timed out */ -static -bool -srv_resume_thread(srv_slot_t* slot, ib_int64_t sig_count = 0, bool wait = true, - ulint timeout_usec = 0) -{ - bool timeout; - - ut_ad(!srv_read_only_mode); - ut_ad(slot->in_use); - ut_ad(slot->suspended); - - if (!wait) { - timeout = false; - } else if (timeout_usec) { - timeout = OS_SYNC_TIME_EXCEEDED == os_event_wait_time_low( - slot->event, timeout_usec, sig_count); - } else { - timeout = false; - os_event_wait_low(slot->event, sig_count); - } - - srv_sys_mutex_enter(); - ut_ad(slot->in_use); - ut_ad(slot->suspended); - - slot->suspended = FALSE; - ++srv_sys.n_threads_active[slot->type]; - srv_sys_mutex_exit(); - return(timeout); -} - -/** Ensure that a given number of threads of the type given are running -(or are already terminated). -@param[in] type thread type -@param[in] n number of threads that have to run */ -void -srv_release_threads(enum srv_thread_type type, ulint n) -{ - ulint running; - - ut_ad(srv_thread_type_validate(type)); - ut_ad(n > 0); - - do { - running = 0; - - srv_sys_mutex_enter(); - - for (ulint i = 0; i < srv_sys.n_sys_threads; i++) { - srv_slot_t* slot = &srv_sys.sys_threads[i]; - - if (!slot->in_use || srv_slot_get_type(slot) != type) { - continue; - } else if (!slot->suspended) { - if (++running >= n) { - break; - } - continue; - } - - switch (type) { - case SRV_NONE: - ut_error; - - case SRV_MASTER: - /* We have only one master thread and it - should be the first entry always. */ - ut_a(n == 1); - ut_a(i == SRV_MASTER_SLOT); - ut_a(srv_sys.n_threads_active[type] == 0); - break; - - case SRV_PURGE: - /* We have only one purge coordinator thread - and it should be the second entry always. */ - ut_a(n == 1); - ut_a(i == SRV_PURGE_SLOT); - ut_a(srv_n_purge_threads > 0); - ut_a(srv_sys.n_threads_active[type] == 0); - break; - - case SRV_WORKER: - ut_a(srv_n_purge_threads > 1); - ut_a(srv_sys.n_threads_active[type] - < srv_n_purge_threads - 1); - break; - } - - os_event_set(slot->event); - } - - srv_sys_mutex_exit(); - } while (running && running < n); -} - -/*********************************************************************//** -Release a thread's slot. */ -static -void -srv_free_slot( -/*==========*/ - srv_slot_t* slot) /*!< in/out: thread slot */ -{ - srv_sys_mutex_enter(); - - /* Mark the thread as inactive. */ - srv_suspend_thread_low(slot); - /* Free the slot for reuse. */ - ut_ad(slot->in_use); - slot->in_use = FALSE; - - srv_sys_mutex_exit(); -} - -/*********************************************************************//** -Initializes the server. */ -UNIV_INTERN -void -srv_init(void) -/*==========*/ -{ -#ifndef HAVE_ATOMIC_BUILTINS - mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH); -#endif /* !HAVE_ATOMIC_BUILTINS */ - - mutex_create(srv_innodb_monitor_mutex_key, - &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK); - - srv_sys.n_sys_threads = srv_read_only_mode - ? 0 - : srv_n_purge_threads + 1/* purge coordinator */; - - if (!srv_read_only_mode) { - - mutex_create(srv_sys_mutex_key, &srv_sys.mutex, SYNC_THREADS); - - mutex_create(srv_sys_tasks_mutex_key, - &srv_sys.tasks_mutex, SYNC_ANY_LATCH); - - for (ulint i = 0; i < srv_sys.n_sys_threads; ++i) { - srv_slot_t* slot = &srv_sys.sys_threads[i]; - - slot->event = os_event_create(); - - ut_a(slot->event); - } - - srv_error_event = os_event_create(); - - srv_monitor_event = os_event_create(); - - srv_buf_dump_event = os_event_create(); - - srv_checkpoint_completed_event = os_event_create(); - - srv_redo_log_tracked_event = os_event_create(); - - if (srv_track_changed_pages) { - os_event_set(srv_redo_log_tracked_event); - } - } - - /* page_zip_stat_per_index_mutex is acquired from: - 1. page_zip_compress() (after SYNC_FSP) - 2. page_zip_decompress() - 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired) - 4. innodb_cmp_per_index_update(), no other latches - since we do not acquire any other latches while holding this mutex, - it can have very low level. We pick SYNC_ANY_LATCH for it. */ - - mutex_create( - page_zip_stat_per_index_mutex_key, - &page_zip_stat_per_index_mutex, SYNC_ANY_LATCH); - - /* Create dummy indexes for infimum and supremum records */ - - dict_ind_init(); - - srv_conc_init(); -#ifdef WITH_INNODB_DISALLOW_WRITES - /* Writes have to be enabled on init or else we hang. Thus, we - always set the event here regardless of innobase_disallow_writes. - That flag will always be 0 at this point because it isn't settable - via my.cnf or command line arg. */ - srv_allow_writes_event = os_event_create(); - os_event_set(srv_allow_writes_event); -#endif /* WITH_INNODB_DISALLOW_WRITES */ - - /* Initialize some INFORMATION SCHEMA internal structures */ - trx_i_s_cache_init(trx_i_s_cache); - - ut_crc32_init(); - - dict_mem_init(); -} - -/*********************************************************************//** -Frees the data structures created in srv_init(). */ -UNIV_INTERN -void -srv_free(void) -/*==========*/ -{ - srv_conc_free(); - - if (!srv_read_only_mode) { - - for (ulint i = 0; i < srv_sys.n_sys_threads; i++) - os_event_free(srv_sys.sys_threads[i].event); - - os_event_free(srv_error_event); - srv_error_event = NULL; - os_event_free(srv_monitor_event); - srv_monitor_event = NULL; - os_event_free(srv_buf_dump_event); - srv_buf_dump_event = NULL; - os_event_free(srv_checkpoint_completed_event); - srv_checkpoint_completed_event = NULL; - os_event_free(srv_redo_log_tracked_event); - srv_redo_log_tracked_event = NULL; - mutex_free(&srv_sys.mutex); - mutex_free(&srv_sys.tasks_mutex); - } - -#ifdef WITH_INNODB_DISALLOW_WRITES - os_event_free(srv_allow_writes_event); - srv_allow_writes_event = NULL; -#endif /* WITH_INNODB_DISALLOW_WRITES */ - -#ifndef HAVE_ATOMIC_BUILTINS - mutex_free(&server_mutex); -#endif - mutex_free(&srv_innodb_monitor_mutex); - mutex_free(&page_zip_stat_per_index_mutex); - - trx_i_s_cache_free(trx_i_s_cache); - - /* This is needed for Mariabackup. */ - memset(&srv_sys, 0, sizeof srv_sys); -} - -/*********************************************************************//** -Initializes the synchronization primitives, memory system, and the thread -local storage. */ -UNIV_INTERN -void -srv_general_init(void) -/*==================*/ -{ - ut_mem_init(); - /* Reset the system variables in the recovery module. */ - recv_sys_var_init(); - os_sync_init(); - sync_init(); - mem_init(srv_mem_pool_size); - que_init(); - row_mysql_init(); -} - -/*********************************************************************//** -Normalizes init parameter values to use units we use inside InnoDB. */ -static -void -srv_normalize_init_values(void) -/*===========================*/ -{ - ulint n; - ulint i; - - n = srv_n_data_files; - - for (i = 0; i < n; i++) { - srv_data_file_sizes[i] = srv_data_file_sizes[i] - * ((1024 * 1024) / UNIV_PAGE_SIZE); - } - - srv_last_file_size_max = srv_last_file_size_max - * ((1024 * 1024) / UNIV_PAGE_SIZE); - - srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE; - - srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE; - - srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE); -} - -/*********************************************************************//** -Boots the InnoDB server. */ -UNIV_INTERN -void -srv_boot(void) -/*==========*/ -{ - /* Transform the init parameter values given by MySQL to - use units we use inside InnoDB: */ - - srv_normalize_init_values(); - - /* Initialize synchronization primitives, memory management, and thread - local storage */ - - srv_general_init(); - - /* Initialize this module */ - - srv_init(); - srv_mon_create(); -} - -/******************************************************************//** -Refreshes the values used to calculate per-second averages. */ -static -void -srv_refresh_innodb_monitor_stats(void) -/*==================================*/ -{ - mutex_enter(&srv_innodb_monitor_mutex); - - srv_last_monitor_time = time(NULL); - - os_aio_refresh_stats(); - - btr_cur_n_sea_old = btr_cur_n_sea; - btr_cur_n_non_sea_old = btr_cur_n_non_sea; - - log_refresh_stats(); - - buf_refresh_io_stats_all(); - - srv_n_rows_inserted_old = srv_stats.n_rows_inserted; - srv_n_rows_updated_old = srv_stats.n_rows_updated; - srv_n_rows_deleted_old = srv_stats.n_rows_deleted; - srv_n_rows_read_old = srv_stats.n_rows_read; - - srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted; - srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated; - srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted; - srv_n_system_rows_read_old = srv_stats.n_system_rows_read; - - mutex_exit(&srv_innodb_monitor_mutex); -} - -/******************************************************************//** -Outputs to a file the output of the InnoDB Monitor. -@return FALSE if not all information printed -due to failure to obtain necessary mutex */ -UNIV_INTERN -ibool -srv_printf_innodb_monitor( -/*======================*/ - FILE* file, /*!< in: output stream */ - ibool nowait, /*!< in: whether to wait for the - lock_sys_t:: mutex */ - ulint* trx_start_pos, /*!< out: file position of the start of - the list of active transactions */ - ulint* trx_end) /*!< out: file position of the end of - the list of active transactions */ -{ - double time_elapsed; - time_t current_time; - ulint n_reserved; - ibool ret; - - ulong btr_search_sys_constant; - ulong btr_search_sys_variable; - ulint lock_sys_subtotal; - ulint recv_sys_subtotal; - - ulint i; - trx_t* trx; - - mutex_enter(&srv_innodb_monitor_mutex); - - current_time = time(NULL); - - /* We add 0.001 seconds to time_elapsed to prevent division - by zero if two users happen to call SHOW ENGINE INNODB STATUS at the - same time */ - - time_elapsed = difftime(current_time, srv_last_monitor_time) - + 0.001; - - srv_last_monitor_time = time(NULL); - - fputs("\n=====================================\n", file); - - ut_print_timestamp(file); - fprintf(file, - " INNODB MONITOR OUTPUT\n" - "=====================================\n" - "Per second averages calculated from the last %lu seconds\n", - (ulong) time_elapsed); - - fputs("-----------------\n" - "BACKGROUND THREAD\n" - "-----------------\n", file); - srv_print_master_thread_info(file); - - fputs("----------\n" - "SEMAPHORES\n" - "----------\n", file); - sync_print(file); - - /* Conceptually, srv_innodb_monitor_mutex has a very high latching - order level in sync0sync.h, while dict_foreign_err_mutex has a very - low level 135. Therefore we can reserve the latter mutex here without - a danger of a deadlock of threads. */ - - if (!recv_recovery_on) { - - mutex_enter(&dict_foreign_err_mutex); - - if (!srv_read_only_mode - && ftell(dict_foreign_err_file) != 0L) { - fputs("------------------------\n" - "LATEST FOREIGN KEY ERROR\n" - "------------------------\n", file); - ut_copy_file(file, dict_foreign_err_file); - } - - mutex_exit(&dict_foreign_err_mutex); - } - - /* Only if lock_print_info_summary proceeds correctly, - before we call the lock_print_info_all_transactions - to print all the lock information. IMPORTANT NOTE: This - function acquires the lock mutex on success. */ - ret = recv_recovery_on ? FALSE : lock_print_info_summary(file, nowait); - - if (ret) { - if (trx_start_pos) { - long t = ftell(file); - if (t < 0) { - *trx_start_pos = ULINT_UNDEFINED; - } else { - *trx_start_pos = (ulint) t; - } - } - - /* NOTE: If we get here then we have the lock mutex. This - function will release the lock mutex that we acquired when - we called the lock_print_info_summary() function earlier. */ - - lock_print_info_all_transactions(file); - - if (trx_end) { - long t = ftell(file); - if (t < 0) { - *trx_end = ULINT_UNDEFINED; - } else { - *trx_end = (ulint) t; - } - } - } - - fputs("--------\n" - "FILE I/O\n" - "--------\n", file); - os_aio_print(file); - - if (!recv_recovery_on) { - - fputs("-------------------------------------\n" - "INSERT BUFFER AND ADAPTIVE HASH INDEX\n" - "-------------------------------------\n", file); - ibuf_print(file); - } - - - fprintf(file, - "%.2f hash searches/s, %.2f non-hash searches/s\n", - (btr_cur_n_sea - btr_cur_n_sea_old) - / time_elapsed, - (btr_cur_n_non_sea - btr_cur_n_non_sea_old) - / time_elapsed); - btr_cur_n_sea_old = btr_cur_n_sea; - btr_cur_n_non_sea_old = btr_cur_n_non_sea; - - if (!recv_recovery_on) { - - fputs("---\n" - "LOG\n" - "---\n", file); - log_print(file); - } - - fputs("----------------------\n" - "BUFFER POOL AND MEMORY\n" - "----------------------\n", file); - fprintf(file, - "Total memory allocated " ULINTPF - "; in additional pool allocated " ULINTPF "\n", - ut_total_allocated_memory, - mem_pool_get_reserved(mem_comm_pool)); - - fprintf(file, - "Total memory allocated by read views " ULINTPF "\n", - os_atomic_increment_ulint(&srv_read_views_memory, 0)); - - /* Calculate AHI constant and variable memory allocations */ - - btr_search_sys_constant = 0; - btr_search_sys_variable = 0; - - ut_ad(btr_search_sys->hash_tables); - - for (i = 0; i < btr_search_index_num; i++) { - hash_table_t* ht = btr_search_sys->hash_tables[i]; - - ut_ad(ht); - ut_ad(ht->heap); - - /* Multiple mutexes/heaps are currently never used for adaptive - hash index tables. */ - ut_ad(!ht->n_sync_obj); - ut_ad(!ht->heaps); - - btr_search_sys_variable += mem_heap_get_size(ht->heap); - btr_search_sys_constant += ht->n_cells * sizeof(hash_cell_t); - } - - lock_sys_subtotal = 0; - if (trx_sys) { - mutex_enter(&trx_sys->mutex); - trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); - while (trx) { - lock_sys_subtotal - += ((trx->lock.lock_heap) - ? mem_heap_get_size(trx->lock.lock_heap) - : 0); - trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); - } - mutex_exit(&trx_sys->mutex); - } - - recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash) - ? mem_heap_get_size(recv_sys->heap) : 0); - - fprintf(file, - "Internal hash tables (constant factor + variable factor)\n" - " Adaptive hash index %lu \t(%lu + " ULINTPF ")\n" - " Page hash %lu (buffer pool 0 only)\n" - " Dictionary cache %lu \t(%lu + " ULINTPF ")\n" - " File system %lu \t(%lu + " ULINTPF ")\n" - " Lock system %lu \t(%lu + " ULINTPF ")\n" - " Recovery system %lu \t(%lu + " ULINTPF ")\n", - - btr_search_sys_constant + btr_search_sys_variable, - btr_search_sys_constant, - btr_search_sys_variable, - - (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)), - - (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells - + dict_sys->table_id_hash->n_cells - ) * sizeof(hash_cell_t) - + dict_sys->size) : 0), - (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells - + dict_sys->table_id_hash->n_cells - ) * sizeof(hash_cell_t)) : 0), - dict_sys ? (dict_sys->size) : 0, - - (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t) - + fil_system_hash_nodes()), - (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)), - fil_system_hash_nodes(), - - (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0) - + lock_sys_subtotal), - (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0), - lock_sys_subtotal, - - (ulong) (((recv_sys && recv_sys->addr_hash) - ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0) - + recv_sys_subtotal), - (ulong) ((recv_sys && recv_sys->addr_hash) - ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0), - recv_sys_subtotal); - - - fprintf(file, "Dictionary memory allocated " ULINTPF "\n", - dict_sys ? dict_sys->size : 0); - - buf_print_io(file); - - fputs("--------------\n" - "ROW OPERATIONS\n" - "--------------\n", file); - fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n", - (long) srv_conc_get_active_threads(), - srv_conc_get_waiting_threads()); - - mutex_enter(&trx_sys->mutex); - - fprintf(file, "%lu read views open inside InnoDB\n", - UT_LIST_GET_LEN(trx_sys->view_list)); - - fprintf(file, "%lu RW transactions active inside InnoDB\n", - UT_LIST_GET_LEN(trx_sys->rw_trx_list)); - - fprintf(file, "%lu RO transactions active inside InnoDB\n", - UT_LIST_GET_LEN(trx_sys->ro_trx_list)); - - fprintf(file, "%lu out of %lu descriptors used\n", - trx_sys->descr_n_used, trx_sys->descr_n_max); - - if (UT_LIST_GET_LEN(trx_sys->view_list)) { - read_view_t* view = UT_LIST_GET_LAST(trx_sys->view_list); - - if (view) { - fprintf(file, "---OLDEST VIEW---\n"); - read_view_print(file, view); - fprintf(file, "-----------------\n"); - } - } - - mutex_exit(&trx_sys->mutex); - - n_reserved = fil_space_get_n_reserved_extents(0); - if (n_reserved > 0) { - fprintf(file, - "%lu tablespace extents now reserved for" - " B-tree split operations\n", - (ulong) n_reserved); - } - -#ifdef UNIV_LINUX - fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n", - (ulong) srv_main_thread_process_no, - (ulong) srv_main_thread_id, - srv_main_thread_op_info); -#else - fprintf(file, "Main thread id %lu, state: %s\n", - (ulong) srv_main_thread_id, - srv_main_thread_op_info); -#endif - fprintf(file, - "Number of rows inserted " ULINTPF - ", updated " ULINTPF ", deleted " ULINTPF - ", read " ULINTPF "\n", - (ulint) srv_stats.n_rows_inserted, - (ulint) srv_stats.n_rows_updated, - (ulint) srv_stats.n_rows_deleted, - (ulint) srv_stats.n_rows_read); - fprintf(file, - "%.2f inserts/s, %.2f updates/s," - " %.2f deletes/s, %.2f reads/s\n", - ((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old) - / time_elapsed, - ((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old) - / time_elapsed, - ((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old) - / time_elapsed, - ((ulint) srv_stats.n_rows_read - srv_n_rows_read_old) - / time_elapsed); - fprintf(file, - "Number of system rows inserted " ULINTPF - ", updated " ULINTPF ", deleted " ULINTPF - ", read " ULINTPF "\n", - (ulint) srv_stats.n_system_rows_inserted, - (ulint) srv_stats.n_system_rows_updated, - (ulint) srv_stats.n_system_rows_deleted, - (ulint) srv_stats.n_system_rows_read); - fprintf(file, - "%.2f inserts/s, %.2f updates/s," - " %.2f deletes/s, %.2f reads/s\n", - ((ulint) srv_stats.n_system_rows_inserted - - srv_n_system_rows_inserted_old) / time_elapsed, - ((ulint) srv_stats.n_system_rows_updated - - srv_n_system_rows_updated_old) / time_elapsed, - ((ulint) srv_stats.n_system_rows_deleted - - srv_n_system_rows_deleted_old) / time_elapsed, - ((ulint) srv_stats.n_system_rows_read - - srv_n_system_rows_read_old) / time_elapsed); - srv_n_rows_inserted_old = srv_stats.n_rows_inserted; - srv_n_rows_updated_old = srv_stats.n_rows_updated; - srv_n_rows_deleted_old = srv_stats.n_rows_deleted; - srv_n_rows_read_old = srv_stats.n_rows_read; - srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted; - srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated; - srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted; - srv_n_system_rows_read_old = srv_stats.n_system_rows_read; - - fputs("----------------------------\n" - "END OF INNODB MONITOR OUTPUT\n" - "============================\n", file); - mutex_exit(&srv_innodb_monitor_mutex); - fflush(file); - -#ifndef DBUG_OFF - srv_debug_monitor_printed = true; -#endif - - return(ret); -} - -/******************************************************************//** -Function to pass InnoDB status variables to MySQL */ -UNIV_INTERN -void -srv_export_innodb_status(void) -/*==========================*/ -{ - buf_pool_stat_t stat; - buf_pools_list_size_t buf_pools_list_size; - ulint LRU_len; - ulint free_len; - ulint flush_list_len; - ulint mem_adaptive_hash, mem_dictionary; - read_view_t* oldest_view; - ulint i; - fil_crypt_stat_t crypt_stat; - btr_scrub_stat_t scrub_stat; - - buf_get_total_stat(&stat); - buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); - buf_get_total_list_size_in_bytes(&buf_pools_list_size); - if (!srv_read_only_mode) { - fil_crypt_total_stat(&crypt_stat); - btr_scrub_total_stat(&scrub_stat); - } - - mem_adaptive_hash = 0; - - ut_ad(btr_search_sys->hash_tables); - - for (i = 0; i < btr_search_index_num; i++) { - hash_table_t* ht = btr_search_sys->hash_tables[i]; - - ut_ad(ht); - ut_ad(ht->heap); - /* Multiple mutexes/heaps are currently never used for adaptive - hash index tables. */ - ut_ad(!ht->n_sync_obj); - ut_ad(!ht->heaps); - - mem_adaptive_hash += mem_heap_get_size(ht->heap); - mem_adaptive_hash += ht->n_cells * sizeof(hash_cell_t); - } - - mem_dictionary = (dict_sys ? ((dict_sys->table_hash->n_cells - + dict_sys->table_id_hash->n_cells - ) * sizeof(hash_cell_t) - + dict_sys->size) : 0); - - mutex_enter(&srv_innodb_monitor_mutex); - - export_vars.innodb_data_pending_reads = - ulint(MONITOR_VALUE(MONITOR_OS_PENDING_READS)); - - export_vars.innodb_data_pending_writes = - ulint(MONITOR_VALUE(MONITOR_OS_PENDING_WRITES)); - - export_vars.innodb_data_pending_fsyncs = - fil_n_pending_log_flushes - + fil_n_pending_tablespace_flushes; - export_vars.innodb_adaptive_hash_hash_searches - = btr_cur_n_sea; - export_vars.innodb_adaptive_hash_non_hash_searches - = btr_cur_n_non_sea; - export_vars.innodb_background_log_sync - = srv_log_writes_and_flush; - - export_vars.innodb_data_fsyncs = os_n_fsyncs; - - export_vars.innodb_data_read = srv_stats.data_read; - - export_vars.innodb_data_reads = os_n_file_reads; - - export_vars.innodb_data_writes = os_n_file_writes; - - export_vars.innodb_data_written = srv_stats.data_written; - - export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets; - - export_vars.innodb_buffer_pool_write_requests = - srv_stats.buf_pool_write_requests; - - export_vars.innodb_buffer_pool_wait_free = - srv_stats.buf_pool_wait_free; - - export_vars.innodb_buffer_pool_pages_flushed = - srv_stats.buf_pool_flushed; - - export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads; - - export_vars.innodb_buffer_pool_read_ahead_rnd = - stat.n_ra_pages_read_rnd; - - export_vars.innodb_buffer_pool_read_ahead = - stat.n_ra_pages_read; - - export_vars.innodb_buffer_pool_read_ahead_evicted = - stat.n_ra_pages_evicted; - - export_vars.innodb_buffer_pool_pages_LRU_flushed = - stat.buf_lru_flush_page_count; - - export_vars.innodb_buffer_pool_pages_data = LRU_len; - - export_vars.innodb_buffer_pool_bytes_data = - buf_pools_list_size.LRU_bytes - + buf_pools_list_size.unzip_LRU_bytes; - - export_vars.innodb_buffer_pool_pages_dirty = flush_list_len; - - export_vars.innodb_buffer_pool_bytes_dirty = - buf_pools_list_size.flush_list_bytes; - - export_vars.innodb_buffer_pool_pages_free = free_len; - - export_vars.innodb_deadlocks = srv_stats.lock_deadlock_count; - -#ifdef UNIV_DEBUG - export_vars.innodb_buffer_pool_pages_latched = - buf_get_latched_pages_number(); -#endif /* UNIV_DEBUG */ - export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages(); - - export_vars.innodb_buffer_pool_pages_misc = - buf_pool_get_n_pages() - LRU_len - free_len; - - export_vars.innodb_buffer_pool_pages_made_young - = stat.n_pages_made_young; - export_vars.innodb_buffer_pool_pages_made_not_young - = stat.n_pages_not_made_young; - export_vars.innodb_buffer_pool_pages_old = 0; - for (i = 0; i < srv_buf_pool_instances; i++) { - buf_pool_t* buf_pool = buf_pool_from_array(i); - export_vars.innodb_buffer_pool_pages_old - += buf_pool->LRU_old_len; - } - export_vars.innodb_checkpoint_age - = (log_sys->lsn - log_sys->last_checkpoint_lsn); - export_vars.innodb_checkpoint_max_age - = log_sys->max_checkpoint_age; - export_vars.innodb_history_list_length - = trx_sys->rseg_history_len; - ibuf_export_ibuf_status( - &export_vars.innodb_ibuf_size, - &export_vars.innodb_ibuf_free_list, - &export_vars.innodb_ibuf_segment_size, - &export_vars.innodb_ibuf_merges, - &export_vars.innodb_ibuf_merged_inserts, - &export_vars.innodb_ibuf_merged_delete_marks, - &export_vars.innodb_ibuf_merged_deletes, - &export_vars.innodb_ibuf_discarded_inserts, - &export_vars.innodb_ibuf_discarded_delete_marks, - &export_vars.innodb_ibuf_discarded_deletes); - export_vars.innodb_lsn_current - = log_sys->lsn; - export_vars.innodb_lsn_flushed - = log_sys->flushed_to_disk_lsn; - export_vars.innodb_lsn_last_checkpoint - = log_sys->last_checkpoint_lsn; - export_vars.innodb_master_thread_active_loops - = srv_main_active_loops; - export_vars.innodb_master_thread_idle_loops - = srv_main_idle_loops; - export_vars.innodb_max_trx_id - = trx_sys->max_trx_id; - export_vars.innodb_mem_adaptive_hash - = mem_adaptive_hash; - export_vars.innodb_mem_dictionary - = mem_dictionary; - export_vars.innodb_mem_total - = ut_total_allocated_memory; - export_vars.innodb_mutex_os_waits - = mutex_os_wait_count; - export_vars.innodb_mutex_spin_rounds - = mutex_spin_round_count; - export_vars.innodb_mutex_spin_waits - = mutex_spin_wait_count; - export_vars.innodb_s_lock_os_waits - = rw_lock_stats.rw_s_os_wait_count; - export_vars.innodb_s_lock_spin_rounds - = rw_lock_stats.rw_s_spin_round_count; - export_vars.innodb_s_lock_spin_waits - = rw_lock_stats.rw_s_spin_wait_count; - export_vars.innodb_x_lock_os_waits - = rw_lock_stats.rw_x_os_wait_count; - export_vars.innodb_x_lock_spin_rounds - = rw_lock_stats.rw_x_spin_round_count; - export_vars.innodb_x_lock_spin_waits - = rw_lock_stats.rw_x_spin_wait_count; - - oldest_view = UT_LIST_GET_LAST(trx_sys->view_list); - export_vars.innodb_oldest_view_low_limit_trx_id - = oldest_view ? oldest_view->low_limit_id : 0; - - export_vars.innodb_purge_trx_id = purge_sys->limit.trx_no; - export_vars.innodb_purge_undo_no = purge_sys->limit.undo_no; - export_vars.innodb_current_row_locks - = lock_sys->rec_num; - -#ifdef HAVE_ATOMIC_BUILTINS - export_vars.innodb_have_atomic_builtins = 1; -#else - export_vars.innodb_have_atomic_builtins = 0; -#endif - export_vars.innodb_page_size = UNIV_PAGE_SIZE; - - export_vars.innodb_log_waits = srv_stats.log_waits; - - export_vars.innodb_os_log_written = srv_stats.os_log_written; - - export_vars.innodb_os_log_fsyncs = fil_n_log_flushes; - - export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes; - - export_vars.innodb_os_log_pending_writes = - srv_stats.os_log_pending_writes; - - export_vars.innodb_log_write_requests = srv_stats.log_write_requests; - - export_vars.innodb_log_writes = srv_stats.log_writes; - - export_vars.innodb_dblwr_pages_written = - srv_stats.dblwr_pages_written; - - export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes; - - export_vars.innodb_pages_created = stat.n_pages_created; - - export_vars.innodb_pages_read = stat.n_pages_read; - export_vars.innodb_page0_read = srv_stats.page0_read; - - export_vars.innodb_pages_written = stat.n_pages_written; - - export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count; - - export_vars.innodb_row_lock_current_waits = - srv_stats.n_lock_wait_current_count; - - export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000; - - if (srv_stats.n_lock_wait_count > 0) { - - export_vars.innodb_row_lock_time_avg = (ulint) - (srv_stats.n_lock_wait_time - / 1000 / srv_stats.n_lock_wait_count); - - } else { - export_vars.innodb_row_lock_time_avg = 0; - } - - export_vars.innodb_row_lock_time_max = - lock_sys->n_lock_max_wait_time / 1000; - - export_vars.innodb_rows_read = srv_stats.n_rows_read; - - export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted; - - export_vars.innodb_rows_updated = srv_stats.n_rows_updated; - - export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted; - - export_vars.innodb_system_rows_read = srv_stats.n_system_rows_read; - - export_vars.innodb_system_rows_inserted = - srv_stats.n_system_rows_inserted; - - export_vars.innodb_system_rows_updated = - srv_stats.n_system_rows_updated; - - export_vars.innodb_system_rows_deleted = - srv_stats.n_system_rows_deleted; - - export_vars.innodb_num_open_files = fil_n_file_opened; - - export_vars.innodb_truncated_status_writes = - srv_truncated_status_writes; - - export_vars.innodb_available_undo_logs = srv_available_undo_logs; - export_vars.innodb_read_views_memory - = os_atomic_increment_ulint(&srv_read_views_memory, 0); - export_vars.innodb_descriptors_memory - = os_atomic_increment_ulint(&srv_descriptors_memory, 0); - - export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved; - export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512; - export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096; - export_vars.innodb_index_pages_written = srv_stats.index_pages_written; - export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written; - export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed; - export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op; - export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved; - export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed; - export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error; - export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted; - export_vars.innodb_pages_encrypted = srv_stats.pages_encrypted; - - export_vars.innodb_defragment_compression_failures = - btr_defragment_compression_failures; - export_vars.innodb_defragment_failures = btr_defragment_failures; - export_vars.innodb_defragment_count = btr_defragment_count; - - export_vars.innodb_onlineddl_rowlog_rows = onlineddl_rowlog_rows; - export_vars.innodb_onlineddl_rowlog_pct_used = onlineddl_rowlog_pct_used; - export_vars.innodb_onlineddl_pct_progress = onlineddl_pct_progress; - -#ifdef UNIV_DEBUG - rw_lock_s_lock(&purge_sys->latch); - trx_id_t done_trx_no = purge_sys->done.trx_no; - trx_id_t up_limit_id = purge_sys->view - ? purge_sys->view->up_limit_id - : 0; - rw_lock_s_unlock(&purge_sys->latch); - - mutex_enter(&trx_sys->mutex); - trx_id_t max_trx_id = trx_sys->rw_max_trx_id; - mutex_exit(&trx_sys->mutex); - - if (!done_trx_no || max_trx_id < done_trx_no - 1) { - export_vars.innodb_purge_trx_id_age = 0; - } else { - export_vars.innodb_purge_trx_id_age = - (ulint) (max_trx_id - done_trx_no + 1); - } - - if (!up_limit_id - || max_trx_id < up_limit_id) { - export_vars.innodb_purge_view_trx_id_age = 0; - } else { - export_vars.innodb_purge_view_trx_id_age = - (ulint) (max_trx_id - up_limit_id); - } -#endif /* UNIV_DEBUG */ - - export_vars.innodb_sec_rec_cluster_reads = - srv_stats.n_sec_rec_cluster_reads; - export_vars.innodb_sec_rec_cluster_reads_avoided = - srv_stats.n_sec_rec_cluster_reads_avoided; - - if (!srv_read_only_mode) { - export_vars.innodb_encryption_rotation_pages_read_from_cache = - crypt_stat.pages_read_from_cache; - export_vars.innodb_encryption_rotation_pages_read_from_disk = - crypt_stat.pages_read_from_disk; - export_vars.innodb_encryption_rotation_pages_modified = - crypt_stat.pages_modified; - export_vars.innodb_encryption_rotation_pages_flushed = - crypt_stat.pages_flushed; - export_vars.innodb_encryption_rotation_estimated_iops = - crypt_stat.estimated_iops; - export_vars.innodb_encryption_key_requests = - srv_stats.n_key_requests; - export_vars.innodb_key_rotation_list_length = - srv_stats.key_rotation_list_length; - - export_vars.innodb_scrub_page_reorganizations = - scrub_stat.page_reorganizations; - export_vars.innodb_scrub_page_splits = - scrub_stat.page_splits; - export_vars.innodb_scrub_page_split_failures_underflow = - scrub_stat.page_split_failures_underflow; - export_vars.innodb_scrub_page_split_failures_out_of_filespace = - scrub_stat.page_split_failures_out_of_filespace; - export_vars.innodb_scrub_page_split_failures_missing_index = - scrub_stat.page_split_failures_missing_index; - export_vars.innodb_scrub_page_split_failures_unknown = - scrub_stat.page_split_failures_unknown; - } - - mutex_exit(&srv_innodb_monitor_mutex); -} - -#ifndef DBUG_OFF -/** false before InnoDB monitor has been printed at least once, true -afterwards */ -bool srv_debug_monitor_printed = false; -#endif - -/*********************************************************************//** -A thread which prints the info output by various InnoDB monitors. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_monitor_thread)(void*) -{ - ib_int64_t sig_count; - double time_elapsed; - time_t current_time; - time_t last_table_monitor_time; - time_t last_tablespace_monitor_time; - time_t last_monitor_time; - ulint mutex_skipped; - ibool last_srv_print_monitor; - - ut_ad(!srv_read_only_mode); - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Lock timeout thread starts, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(srv_monitor_thread_key); -#endif /* UNIV_PFS_THREAD */ - - srv_last_monitor_time = ut_time(); - last_table_monitor_time = ut_time(); - last_tablespace_monitor_time = ut_time(); - last_monitor_time = ut_time(); - mutex_skipped = 0; - last_srv_print_monitor = srv_print_innodb_monitor; -loop: - /* Wake up every 5 seconds to see if we need to print - monitor information or if signalled at shutdown. */ - - sig_count = os_event_reset(srv_monitor_event); - - os_event_wait_time_low(srv_monitor_event, 5000000, sig_count); - - current_time = ut_time(); - - time_elapsed = difftime(current_time, last_monitor_time); - - if (time_elapsed > 15) { - last_monitor_time = ut_time(); - - if (srv_print_innodb_monitor) { - /* Reset mutex_skipped counter everytime - srv_print_innodb_monitor changes. This is to - ensure we will not be blocked by lock_sys->mutex - for short duration information printing, - such as requested by sync_array_print_long_waits() */ - if (!last_srv_print_monitor) { - mutex_skipped = 0; - last_srv_print_monitor = TRUE; - } - - if (!srv_printf_innodb_monitor(stderr, - MUTEX_NOWAIT(mutex_skipped), - NULL, NULL)) { - mutex_skipped++; - } else { - /* Reset the counter */ - mutex_skipped = 0; - } - } else { - last_srv_print_monitor = FALSE; - } - - - /* We don't create the temp files or associated - mutexes in read-only-mode */ - - if (!srv_read_only_mode && srv_innodb_status) { - mutex_enter(&srv_monitor_file_mutex); - rewind(srv_monitor_file); - if (!srv_printf_innodb_monitor(srv_monitor_file, - MUTEX_NOWAIT(mutex_skipped), - NULL, NULL)) { - mutex_skipped++; - } else { - mutex_skipped = 0; - } - - os_file_set_eof(srv_monitor_file); - mutex_exit(&srv_monitor_file_mutex); - } - - if (srv_print_innodb_tablespace_monitor - && difftime(current_time, - last_tablespace_monitor_time) > 60) { - last_tablespace_monitor_time = ut_time(); - - fputs("========================" - "========================\n", - stderr); - - ut_print_timestamp(stderr); - - fputs(" INNODB TABLESPACE MONITOR OUTPUT\n" - "========================" - "========================\n", - stderr); - - fsp_print(0); - fputs("Validating tablespace\n", stderr); - fsp_validate(0); - fputs("Validation ok\n" - "---------------------------------------\n" - "END OF INNODB TABLESPACE MONITOR OUTPUT\n" - "=======================================\n", - stderr); - } - - if (srv_print_innodb_table_monitor - && difftime(current_time, last_table_monitor_time) > 60) { - - last_table_monitor_time = ut_time(); - - fprintf(stderr, "Warning: %s\n", - DEPRECATED_MSG_INNODB_TABLE_MONITOR); - - fputs("===========================================\n", - stderr); - - ut_print_timestamp(stderr); - - fputs(" INNODB TABLE MONITOR OUTPUT\n" - "===========================================\n", - stderr); - dict_print(); - - fputs("-----------------------------------\n" - "END OF INNODB TABLE MONITOR OUTPUT\n" - "==================================\n", - stderr); - - fprintf(stderr, "Warning: %s\n", - DEPRECATED_MSG_INNODB_TABLE_MONITOR); - } - } - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - goto exit_func; - } - - if (srv_print_innodb_monitor - || srv_print_innodb_lock_monitor - || srv_print_innodb_tablespace_monitor - || srv_print_innodb_table_monitor) { - goto loop; - } - - goto loop; - -exit_func: - srv_monitor_active = false; - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/*********************************************************************//** -A thread which prints warnings about semaphore waits which have lasted -too long. These can be used to track bugs which cause hangs. -Note: In order to make sync_arr_wake_threads_if_sema_free work as expected, -we should avoid waiting any mutexes in this function! -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_error_monitor_thread)(void*) -{ - /* number of successive fatal timeouts observed */ - ulint fatal_cnt = 0; - lsn_t old_lsn; - lsn_t new_lsn; - ib_int64_t sig_count; - /* longest waiting thread for a semaphore */ - os_thread_id_t waiter = os_thread_get_curr_id(); - os_thread_id_t old_waiter = waiter; - /* the semaphore that is being waited for */ - const void* sema = NULL; - const void* old_sema = NULL; - - ut_ad(!srv_read_only_mode); - - old_lsn = srv_start_lsn; - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Error monitor thread starts, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(srv_error_monitor_thread_key); -#endif /* UNIV_PFS_THREAD */ - -loop: - /* Try to track a strange bug reported by Harald Fuchs and others, - where the lsn seems to decrease at times */ - - if (log_peek_lsn(&new_lsn)) { - if (new_lsn < old_lsn) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: old log sequence number " LSN_PF - " was greater\n" - "InnoDB: than the new log sequence number " LSN_PF "!\n" - "InnoDB: Please submit a bug report" - " to http://bugs.mysql.com\n", - old_lsn, new_lsn); - ut_ad(0); - } - - old_lsn = new_lsn; - } - - if (difftime(time(NULL), srv_last_monitor_time) > 60) { - /* We referesh InnoDB Monitor values so that averages are - printed from at most 60 last seconds */ - - srv_refresh_innodb_monitor_stats(); - } - - /* Update the statistics collected for deciding LRU - eviction policy. */ - buf_LRU_stat_update(); - - /* In case mutex_exit is not a memory barrier, it is - theoretically possible some threads are left waiting though - the semaphore is already released. Wake up those threads: */ - - sync_arr_wake_threads_if_sema_free(); - - if (sync_array_print_long_waits(&waiter, &sema) - && sema == old_sema && os_thread_eq(waiter, old_waiter)) { -#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) - if (srv_allow_writes_event->is_set()) { -#endif /* WITH_WSREP */ - fatal_cnt++; -#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES) - } else { - fprintf(stderr, - "WSREP: avoiding InnoDB self crash due to long " - "semaphore wait of > %lu seconds\n" - "Server is processing SST donor operation, " - "fatal_cnt now: %lu", - (ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt); - } -#endif /* WITH_WSREP */ - if (fatal_cnt > 10) { - - fprintf(stderr, - "InnoDB: Error: semaphore wait has lasted" - " > %lu seconds\n" - "InnoDB: We intentionally crash the server," - " because it appears to be hung.\n", - (ulong) srv_fatal_semaphore_wait_threshold); - - ut_error; - } - } else { - fatal_cnt = 0; - old_waiter = waiter; - old_sema = sema; - } - - /* Flush stderr so that a database user gets the output - to possible MySQL error file */ - - fflush(stderr); - - sig_count = os_event_reset(srv_error_event); - - os_event_wait_time_low(srv_error_event, 1000000, sig_count); - - if (srv_shutdown_state == SRV_SHUTDOWN_NONE) { - - goto loop; - } - - srv_error_monitor_active = false; - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/******************************************************************//** -Increment the server activity count. */ -UNIV_INTERN -void -srv_inc_activity_count( -/*===================*/ - bool ibuf_merge_activity) /*!< whether this activity bump - is caused by the background - change buffer merge */ -{ - srv_sys.activity_count.inc(); - if (ibuf_merge_activity) - srv_sys.ibuf_merge_activity_count.inc(); -} - -/**********************************************************************//** -Check whether any background thread is active. If so return the thread -type. -@return SRV_NONE if all are suspended or have exited, thread -type if any are still active. */ -UNIV_INTERN -srv_thread_type -srv_get_active_thread_type(void) -/*============================*/ -{ - srv_thread_type ret = SRV_NONE; - - if (srv_read_only_mode) { - return(SRV_NONE); - } - - srv_sys_mutex_enter(); - - for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) { - if (srv_sys.n_threads_active[i] != 0) { - ret = static_cast<srv_thread_type>(i); - break; - } - } - - srv_sys_mutex_exit(); - - /* Check only on shutdown. */ - if (ret == SRV_NONE - && srv_shutdown_state != SRV_SHUTDOWN_NONE - && trx_purge_state() != PURGE_STATE_DISABLED - && trx_purge_state() != PURGE_STATE_EXIT) { - - ret = SRV_PURGE; - } - - return(ret); -} - -/******************************************************************//** -A thread which follows the redo log and outputs the changed page bitmap. -@return a dummy value */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_redo_log_follow_thread)( -/*=======================================*/ - void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter - required by - os_thread_create */ -{ - ut_ad(!srv_read_only_mode); - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Redo log follower thread starts, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(srv_log_tracking_thread_key); -#endif - - my_thread_init(); - srv_redo_log_thread_started = true; - - do { - os_event_wait(srv_checkpoint_completed_event); - os_event_reset(srv_checkpoint_completed_event); - - if (srv_track_changed_pages - && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { - if (!log_online_follow_redo_log()) { - /* TODO: sync with I_S log tracking status? */ - ib_logf(IB_LOG_LEVEL_ERROR, - "log tracking bitmap write failed, " - "stopping log tracking thread!"); - break; - } - os_event_set(srv_redo_log_tracked_event); - } - - } while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE); - - log_online_read_shutdown(); - os_event_set(srv_redo_log_tracked_event); - - my_thread_end(); - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/*************************************************************//** -Removes old archived transaction log files. -Both parameters couldn't be provided at the same time */ -dberr_t -purge_archived_logs( - time_t before_date, /*!< in: all files modified - before timestamp should be removed */ - lsn_t before_no) /*!< in: files with this number in name - and earler should be removed */ -{ - log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups); - - os_file_dir_t dir; - os_file_stat_t fileinfo; - char archived_log_filename[OS_FILE_MAX_PATH]; - char namegen[OS_FILE_MAX_PATH]; - ulint dirnamelen; - - if (srv_arch_dir) { - dir = os_file_opendir(srv_arch_dir, FALSE); - if (!dir) { - ib_logf(IB_LOG_LEVEL_WARN, - "opening archived log directory %s failed. " - "Purge archived logs are not available", - srv_arch_dir); - /* failed to open directory */ - return(DB_ERROR); - } - } else { - /* log archive directory is not specified */ - return(DB_ERROR); - } - - dirnamelen = strlen(srv_arch_dir); - - memcpy(archived_log_filename, srv_arch_dir, dirnamelen); - if (dirnamelen && - archived_log_filename[dirnamelen - 1] != SRV_PATH_SEPARATOR) { - archived_log_filename[dirnamelen++] = SRV_PATH_SEPARATOR; - } - - memset(&fileinfo, 0, sizeof(fileinfo)); - while(!os_file_readdir_next_file(srv_arch_dir, dir, - &fileinfo) ) { - if (strncmp(fileinfo.name, - IB_ARCHIVED_LOGS_PREFIX, IB_ARCHIVED_LOGS_PREFIX_LEN)) { - continue; - } - if (dirnamelen + strlen(fileinfo.name) + 2 > OS_FILE_MAX_PATH) - continue; - - snprintf(archived_log_filename + dirnamelen, - OS_FILE_MAX_PATH - dirnamelen - 1, - "%s", fileinfo.name); - - if (before_no) { - ib_uint64_t log_file_no = strtoull(fileinfo.name + - IB_ARCHIVED_LOGS_PREFIX_LEN, - NULL, 10); - if (log_file_no == 0 || before_no <= log_file_no) { - continue; - } - } else { - fileinfo.mtime = 0; - if (os_file_get_status(archived_log_filename, - &fileinfo, false) != DB_SUCCESS || - fileinfo.mtime == 0) { - continue; - } - - if (before_date == 0 || fileinfo.mtime > before_date) { - continue; - } - } - - /* We are going to delete archived file. Acquire log_sys->mutex - to make sure that we are the only who try to delete file. This - also prevents log system from using this file. Do not delete - file if it is currently in progress of writting or have - pending IO. This is enforced by checking: - 1. fil_space_contains_node. - 2. group->archived_offset % group->file_size != 0, i.e. - there is archive in progress and we are going to delete it. - This covers 3 cases: - a. Usual case when we have one archive in progress, - both 1 and 2 are TRUE - b. When we have more then 1 archive in fil_space, - this can happen when flushed LSN range crosses file - boundary - c. When we have empty fil_space, but existing file will be - opened once archiving operation is requested. This usually - happens on startup. - */ - - mutex_enter(&log_sys->mutex); - - log_archived_file_name_gen(namegen, sizeof(namegen), - group->id, group->archived_file_no); - - if (fil_space_contains_node(group->archive_space_id, - archived_log_filename) || - (group->archived_offset % group->file_size != 0 && - strcmp(namegen, archived_log_filename) == 0)) { - - mutex_exit(&log_sys->mutex); - continue; - } - - if (!os_file_delete_if_exists(innodb_file_data_key, - archived_log_filename)) { - - ib_logf(IB_LOG_LEVEL_WARN, - "can't delete archived log file %s.", - archived_log_filename); - - mutex_exit(&log_sys->mutex); - os_file_closedir(dir); - - return(DB_ERROR); - } - - mutex_exit(&log_sys->mutex); - } - - os_file_closedir(dir); - - return(DB_SUCCESS); -} - -/*******************************************************************//** -Tells the InnoDB server that there has been activity in the database -and wakes up the master thread if it is suspended (not sleeping). Used -in the MySQL interface. Note that there is a small chance that the master -thread stays suspended (we do not protect our operation with the -srv_sys_t->mutex, for performance reasons). */ -UNIV_INTERN -void -srv_active_wake_master_thread(void) -/*===============================*/ -{ - if (srv_read_only_mode) { - return; - } - - ut_ad(!srv_sys_mutex_own()); - - srv_inc_activity_count(); - - if (srv_sys.n_threads_active[SRV_MASTER] == 0) { - srv_slot_t* slot; - - srv_sys_mutex_enter(); - - slot = &srv_sys.sys_threads[SRV_MASTER_SLOT]; - - /* Only if the master thread has been started. */ - - if (slot->in_use) { - ut_a(srv_slot_get_type(slot) == SRV_MASTER); - os_event_set(slot->event); - } - - srv_sys_mutex_exit(); - } -} - -/*******************************************************************//** -Tells the purge thread that there has been activity in the database -and wakes up the purge thread if it is suspended (not sleeping). Note -that there is a small chance that the purge thread stays suspended -(we do not protect our check with the srv_sys_t:mutex and the -purge_sys->latch, for performance reasons). */ -UNIV_INTERN -void -srv_wake_purge_thread_if_not_active(void) -/*=====================================*/ -{ - ut_ad(!srv_sys_mutex_own()); - - if (purge_sys->state == PURGE_STATE_RUN - && srv_sys.n_threads_active[SRV_PURGE] == 0) { - - srv_release_threads(SRV_PURGE, 1); - } -} - -/*******************************************************************//** -Wakes up the master thread if it is suspended or being suspended. */ -UNIV_INTERN -void -srv_wake_master_thread(void) -/*========================*/ -{ - ut_ad(!srv_sys_mutex_own()); - - srv_inc_activity_count(); - - srv_release_threads(SRV_MASTER, 1); -} - -/*******************************************************************//** -Get current server activity count. We don't hold srv_sys::mutex while -reading this value as it is only used in heuristics. -@return activity count. */ -UNIV_INTERN -ulint -srv_get_activity_count(void) -/*========================*/ -{ - return(srv_sys.activity_count); -} - -/** Get current server ibuf merge activity count. -@return ibuf merge activity count */ -static -ulint -srv_get_ibuf_merge_activity_count(void) -{ - return(srv_sys.ibuf_merge_activity_count); -} - -/*******************************************************************//** -Check if there has been any activity. Considers background change buffer -merge as regular server activity unless a non-default -old_ibuf_merge_activity_count value is passed, in which case the merge will be -treated as keeping server idle. -@return FALSE if no change in activity counter. */ -UNIV_INTERN -ibool -srv_check_activity( -/*===============*/ - ulint old_activity_count, /*!< in: old activity count */ - /*!< old change buffer merge - activity count, or - ULINT_UNDEFINED */ - ulint old_ibuf_merge_activity_count) -{ - ulint new_activity_count = srv_sys.activity_count; - if (old_ibuf_merge_activity_count == ULINT_UNDEFINED) - return(new_activity_count != old_activity_count); - - /* If we care about ibuf merge activity, then the server is considered - idle if all activity, if any, was due to ibuf merge. */ - ulint new_ibuf_merge_activity_count - = srv_sys.ibuf_merge_activity_count; - - ut_ad(new_ibuf_merge_activity_count <= new_activity_count); - ut_ad(new_ibuf_merge_activity_count >= old_ibuf_merge_activity_count); - ut_ad(new_activity_count >= old_activity_count); - - ulint ibuf_merge_activity_delta = - new_ibuf_merge_activity_count - old_ibuf_merge_activity_count; - ulint activity_delta = new_activity_count - old_activity_count; - - return (activity_delta > ibuf_merge_activity_delta); -} - -/********************************************************************//** -The master thread is tasked to ensure that flush of log file happens -once every second in the background. This is to ensure that not more -than one second of trxs are lost in case of crash when -innodb_flush_logs_at_trx_commit != 1 */ -static -void -srv_sync_log_buffer_in_background(void) -/*===================================*/ -{ - time_t current_time = time(NULL); - - srv_main_thread_op_info = "flushing log"; - if (difftime(current_time, srv_last_log_flush_time) - >= srv_flush_log_at_timeout) { - log_buffer_sync_in_background(TRUE); - srv_last_log_flush_time = current_time; - srv_log_writes_and_flush++; - } -} - -/********************************************************************//** -Make room in the table cache by evicting an unused table. -@return number of tables evicted. */ -static -ulint -srv_master_evict_from_table_cache( -/*==============================*/ - ulint pct_check) /*!< in: max percent to check */ -{ - ulint n_tables_evicted = 0; - - rw_lock_x_lock(&dict_operation_lock); - - dict_mutex_enter_for_mysql(); - - n_tables_evicted = dict_make_room_in_cache( - innobase_get_table_cache_size(), pct_check); - - dict_mutex_exit_for_mysql(); - - rw_lock_x_unlock(&dict_operation_lock); - - return(n_tables_evicted); -} - -/*********************************************************************//** -This function prints progress message every 60 seconds during server -shutdown, for any activities that master thread is pending on. */ -static -void -srv_shutdown_print_master_pending( -/*==============================*/ - ib_time_t* last_print_time, /*!< last time the function - print the message */ - ulint n_tables_to_drop, /*!< number of tables to - be dropped */ - ulint n_bytes_merged) /*!< number of change buffer - just merged */ -{ - ib_time_t current_time; - double time_elapsed; - - current_time = ut_time(); - time_elapsed = ut_difftime(current_time, *last_print_time); - - if (time_elapsed > 60) { - *last_print_time = current_time; - - if (n_tables_to_drop) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Waiting for " - "%lu table(s) to be dropped\n", - (ulong) n_tables_to_drop); - } - - /* Check change buffer merge, we only wait for change buffer - merge if it is a slow shutdown */ - if (!srv_fast_shutdown && n_bytes_merged) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Waiting for change " - "buffer merge to complete\n" - " InnoDB: number of bytes of change buffer " - "just merged: %lu\n", - n_bytes_merged); - } - } -} - -/*********************************************************************//** -Perform the tasks that the master thread is supposed to do when the -server is active. There are two types of tasks. The first category is -of such tasks which are performed at each inovcation of this function. -We assume that this function is called roughly every second when the -server is active. The second category is of such tasks which are -performed at some interval e.g.: purge, dict_LRU cleanup etc. */ -static -void -srv_master_do_active_tasks(void) -/*============================*/ -{ - ib_time_t cur_time = ut_time(); - ullint counter_time = ut_time_us(NULL); - ulint n_evicted = 0; - - /* First do the tasks that we are suppose to do at each - invocation of this function. */ - - ++srv_main_active_loops; - - MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS); - - /* ALTER TABLE in MySQL requires on Unix that the table handler - can drop tables lazily after there no longer are SELECT - queries to them. */ - srv_main_thread_op_info = "doing background drop tables"; - row_drop_tables_for_mysql_in_background(); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time); - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } - - /* make sure that there is enough reusable space in the redo - log files */ - srv_main_thread_op_info = "checking free log space"; - log_free_check(); - - /* Do an ibuf merge */ - srv_main_thread_op_info = "doing insert buffer merge"; - counter_time = ut_time_us(NULL); - ibuf_merge_in_background(false); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time); - - /* Flush logs if needed */ - srv_main_thread_op_info = "flushing log"; - srv_sync_log_buffer_in_background(); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time); - - /* Now see if various tasks that are performed at defined - intervals need to be performed. */ - -#ifdef MEM_PERIODIC_CHECK - /* Check magic numbers of every allocated mem block once in - SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */ - if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) { - mem_validate_all_blocks(); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time); - } -#endif - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } - - if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) { - srv_main_thread_op_info = "enforcing dict cache limit"; - n_evicted = srv_master_evict_from_table_cache(50); - MONITOR_INC_VALUE( - MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, n_evicted); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); - } - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } - - /* Make a new checkpoint */ - if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) { - srv_main_thread_op_info = "making checkpoint"; - log_checkpoint(TRUE, FALSE, TRUE); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time); - } -} - -/*********************************************************************//** -Perform the tasks that the master thread is supposed to do whenever the -server is idle. We do check for the server state during this function -and if the server has entered the shutdown phase we may return from -the function without completing the required tasks. -Note that the server can move to active state when we are executing this -function but we don't check for that as we are suppose to perform more -or less same tasks when server is active. */ -static -void -srv_master_do_idle_tasks(void) -/*==========================*/ -{ - ullint counter_time; - ulint n_evicted = 0; - - ++srv_main_idle_loops; - - MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS); - - - /* ALTER TABLE in MySQL requires on Unix that the table handler - can drop tables lazily after there no longer are SELECT - queries to them. */ - counter_time = ut_time_us(NULL); - srv_main_thread_op_info = "doing background drop tables"; - row_drop_tables_for_mysql_in_background(); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, - counter_time); - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } - - /* make sure that there is enough reusable space in the redo - log files */ - srv_main_thread_op_info = "checking free log space"; - log_free_check(); - - /* Do an ibuf merge */ - counter_time = ut_time_us(NULL); - srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_merge_in_background(true); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time); - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } - - srv_main_thread_op_info = "enforcing dict cache limit"; - n_evicted = srv_master_evict_from_table_cache(100); - MONITOR_INC_VALUE( - MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); - - /* Flush logs if needed */ - srv_sync_log_buffer_in_background(); - MONITOR_INC_TIME_IN_MICRO_SECS( - MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time); - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } - - /* Make a new checkpoint */ - srv_main_thread_op_info = "making checkpoint"; - log_checkpoint(TRUE, FALSE, TRUE); - MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND, - counter_time); - - if (srv_shutdown_state > 0) { - return; - } - - if (srv_log_arch_expire_sec) { - srv_main_thread_op_info = "purging archived logs"; - purge_archived_logs(ut_time() - srv_log_arch_expire_sec, - 0); - } -} - -/** Perform shutdown tasks. -@param[in] ibuf_merge whether to complete the change buffer merge */ -static -void -srv_shutdown(bool ibuf_merge) -{ - ulint n_bytes_merged = 0; - ulint n_tables_to_drop; - ib_time_t now = ut_time(); - - do { - ut_ad(!srv_read_only_mode); - ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); - ++srv_main_shutdown_loops; - - /* FIXME: Remove the background DROP TABLE queue; it is not - crash-safe and breaks ACID. */ - srv_main_thread_op_info = "doing background drop tables"; - n_tables_to_drop = row_drop_tables_for_mysql_in_background(); - - if (ibuf_merge) { - srv_main_thread_op_info = "checking free log space"; - log_free_check(); - srv_main_thread_op_info = "doing insert buffer merge"; - n_bytes_merged = ibuf_merge_in_background(true); - - /* Flush logs if needed */ - srv_sync_log_buffer_in_background(); - } - - /* Print progress message every 60 seconds during shutdown */ - if (srv_print_verbose_log) { - srv_shutdown_print_master_pending( - &now, n_tables_to_drop, n_bytes_merged); - } - } while (n_bytes_merged || n_tables_to_drop); -} - -/*********************************************************************//** -Puts master thread to sleep. At this point we are using polling to -service various activities. Master thread sleeps for one second before -checking the state of the server again */ -static -void -srv_master_sleep(void) -/*==================*/ -{ - srv_main_thread_op_info = "sleeping"; - os_thread_sleep(1000000); - srv_main_thread_op_info = ""; -} - -/*********************************************************************//** -The master thread controlling the server. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_master_thread)( -/*==============================*/ - void* arg MY_ATTRIBUTE((unused))) - /*!< in: a dummy parameter required by - os_thread_create */ -{ - my_thread_init(); - - srv_slot_t* slot; - ulint old_activity_count = srv_get_activity_count(); - ulint old_ibuf_merge_activity_count - = srv_get_ibuf_merge_activity_count(); - - ut_ad(!srv_read_only_mode); - - srv_master_tid = os_thread_get_tid(); - - os_thread_set_priority(srv_master_tid, srv_sched_priority_master); - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Master thread starts, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(srv_master_thread_key); -#endif /* UNIV_PFS_THREAD */ - - srv_main_thread_process_no = os_proc_get_number(); - srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); - - slot = srv_reserve_slot(SRV_MASTER); - ut_a(slot == srv_sys.sys_threads); - -loop: - if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { - goto suspend_thread; - } - - while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { - - srv_master_sleep(); - - MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP); - - srv_current_thread_priority = srv_master_thread_priority; - - if (srv_check_activity(old_activity_count, - old_ibuf_merge_activity_count)) { - - old_activity_count = srv_get_activity_count(); - old_ibuf_merge_activity_count - = srv_get_ibuf_merge_activity_count(); - srv_master_do_active_tasks(); - } else { - srv_master_do_idle_tasks(); - } - } - -suspend_thread: - switch (srv_shutdown_state) { - case SRV_SHUTDOWN_NONE: - break; - case SRV_SHUTDOWN_FLUSH_PHASE: - case SRV_SHUTDOWN_LAST_PHASE: - ut_ad(0); - /* fall through */ - case SRV_SHUTDOWN_EXIT_THREADS: - /* srv_init_abort() must have been invoked */ - case SRV_SHUTDOWN_CLEANUP: - if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP - && srv_fast_shutdown < 2) { - srv_shutdown(srv_fast_shutdown == 0); - } - srv_suspend_thread(slot); - my_thread_end(); - os_thread_exit(NULL); - } - - srv_main_thread_op_info = "suspending"; - - srv_suspend_thread(slot); - - /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql() - waits for database activity to die down when converting < 4.1.x - databases, and relies on this string being exactly as it is. InnoDB - manual also mentions this string in several places. */ - srv_main_thread_op_info = "waiting for server activity"; - - srv_resume_thread(slot); - goto loop; -} - -/** Check if purge should stop. -@param[in] n_purged pages purged in the last batch -@return whether purge should exit */ -static -bool -srv_purge_should_exit(ulint n_purged) -{ - ut_ad(srv_shutdown_state == SRV_SHUTDOWN_NONE - || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); - - if (srv_undo_sources) { - return(false); - } - if (srv_fast_shutdown) { - return(true); - } - /* Slow shutdown was requested. */ - if (n_purged) { - /* The previous round still did some work. */ - return(false); - } - /* Exit if there are no active transactions to roll back. */ - return(trx_sys_any_active_transactions() == 0); -} - -/*********************************************************************//** -Fetch and execute a task from the work queue. -@return true if a task was executed */ -static -bool -srv_task_execute(void) -/*==================*/ -{ - que_thr_t* thr = NULL; - - ut_ad(!srv_read_only_mode); - ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); - - mutex_enter(&srv_sys.tasks_mutex); - - if (UT_LIST_GET_LEN(srv_sys.tasks) > 0) { - - thr = UT_LIST_GET_FIRST(srv_sys.tasks); - - ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE); - - UT_LIST_REMOVE(queue, srv_sys.tasks, thr); - } - - mutex_exit(&srv_sys.tasks_mutex); - - if (thr != NULL) { - - que_run_threads(thr); - - os_atomic_inc_ulint( - &purge_sys->bh_mutex, &purge_sys->n_completed, 1); - - srv_inc_activity_count(); - } - - return(thr != NULL); -} - -static ulint purge_tid_i = 0; - -/*********************************************************************//** -Worker thread that reads tasks from the work queue and executes them. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_worker_thread)( -/*==============================*/ - void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter - required by os_thread_create */ -{ - my_thread_init(); - - srv_slot_t* slot; - ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1); - - ut_ad(tid_i < srv_n_purge_threads); - ut_ad(!srv_read_only_mode); - ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); - - srv_purge_tids[tid_i] = os_thread_get_tid(); - os_thread_set_priority(srv_purge_tids[tid_i], - srv_sched_priority_purge); - -#ifdef UNIV_DEBUG_THREAD_CREATION - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: worker thread starting, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - - slot = srv_reserve_slot(SRV_WORKER); - - ut_a(srv_n_purge_threads > 1); - - srv_sys_mutex_enter(); - - ut_a(srv_sys.n_threads_active[SRV_WORKER] < srv_n_purge_threads); - - srv_sys_mutex_exit(); - - /* We need to ensure that the worker threads exit after the - purge coordinator thread. Otherwise the purge coordinaor can - end up waiting forever in trx_purge_wait_for_workers_to_complete() */ - - do { - srv_suspend_thread(slot); - srv_resume_thread(slot); - - srv_current_thread_priority = srv_purge_thread_priority; - - if (srv_task_execute()) { - - /* If there are tasks in the queue, wakeup - the purge coordinator thread. */ - - srv_wake_purge_thread_if_not_active(); - } - - /* Note: we are checking the state without holding the - purge_sys->latch here. */ - } while (purge_sys->state != PURGE_STATE_EXIT); - - srv_free_slot(slot); - - rw_lock_x_lock(&purge_sys->latch); - - ut_a(!purge_sys->running); - ut_a(purge_sys->state == PURGE_STATE_EXIT); - ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE); - - rw_lock_x_unlock(&purge_sys->latch); - -#ifdef UNIV_DEBUG_THREAD_CREATION - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - - my_thread_end(); - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ -} - -/*********************************************************************//** -Do the actual purge operation. -@return length of history list before the last purge batch. */ -static -ulint -srv_do_purge( -/*=========*/ - ulint n_threads, /*!< in: number of threads to use */ - ulint* n_total_purged) /*!< in/out: total pages purged */ -{ - ulint n_pages_purged; - - static ulint count = 0; - static ulint n_use_threads = 0; - static ulint rseg_history_len = 0; - ulint old_activity_count = srv_get_activity_count(); - - ut_a(n_threads > 0); - ut_ad(!srv_read_only_mode); - - /* Purge until there are no more records to purge and there is - no change in configuration or server state. If the user has - configured more than one purge thread then we treat that as a - pool of threads and only use the extra threads if purge can't - keep up with updates. */ - - if (n_use_threads == 0) { - n_use_threads = n_threads; - } - - do { - srv_current_thread_priority = srv_purge_thread_priority; - - if (trx_sys->rseg_history_len > rseg_history_len - || (srv_max_purge_lag > 0 - && rseg_history_len > srv_max_purge_lag)) { - - /* History length is now longer than what it was - when we took the last snapshot. Use more threads. */ - - if (n_use_threads < n_threads) { - ++n_use_threads; - } - - } else if (srv_check_activity(old_activity_count) - && n_use_threads > 1) { - - /* History length same or smaller since last snapshot, - use fewer threads. */ - - --n_use_threads; - - old_activity_count = srv_get_activity_count(); - } - - /* Ensure that the purge threads are less than what - was configured. */ - - ut_a(n_use_threads > 0); - ut_a(n_use_threads <= n_threads); - - /* Take a snapshot of the history list before purge. */ - if ((rseg_history_len = trx_sys->rseg_history_len) == 0) { - break; - } - - n_pages_purged = trx_purge( - n_use_threads, srv_purge_batch_size, - (++count % TRX_SYS_N_RSEGS) == 0); - - *n_total_purged += n_pages_purged; - - } while (!srv_purge_should_exit(n_pages_purged) - && n_pages_purged > 0 - && purge_sys->state == PURGE_STATE_RUN); - - return(rseg_history_len); -} - -/*********************************************************************//** -Suspend the purge coordinator thread. */ -static -void -srv_purge_coordinator_suspend( -/*==========================*/ - srv_slot_t* slot, /*!< in/out: Purge coordinator - thread slot */ - ulint rseg_history_len) /*!< in: history list length - before last purge */ -{ - ut_ad(!srv_read_only_mode); - ut_a(slot->type == SRV_PURGE); - - bool stop = false; - - /** Maximum wait time on the purge event, in micro-seconds. */ - static const ulint SRV_PURGE_MAX_TIMEOUT = 10000; - - ib_int64_t sig_count = srv_suspend_thread(slot); - - do { - rw_lock_x_lock(&purge_sys->latch); - - purge_sys->running = false; - - rw_lock_x_unlock(&purge_sys->latch); - - /* We don't wait right away on the the non-timed wait because - we want to signal the thread that wants to suspend purge. */ - const bool wait = stop - || rseg_history_len <= trx_sys->rseg_history_len; - const bool timeout = srv_resume_thread( - slot, sig_count, wait, - stop ? 0 : SRV_PURGE_MAX_TIMEOUT); - - sig_count = srv_suspend_thread(slot); - - rw_lock_x_lock(&purge_sys->latch); - - stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE - && purge_sys->state == PURGE_STATE_STOP); - - if (!stop) { - ut_a(purge_sys->n_stop == 0); - purge_sys->running = true; - - if (timeout - && rseg_history_len == trx_sys->rseg_history_len - && trx_sys->rseg_history_len < 5000) { - /* No new records were added since the - wait started. Simply wait for new - records. The magic number 5000 is an - approximation for the case where we - have cached UNDO log records which - prevent truncate of the UNDO - segments. */ - stop = true; - } - } else { - ut_a(purge_sys->n_stop > 0); - - /* Signal that we are suspended. */ - os_event_set(purge_sys->event); - } - - rw_lock_x_unlock(&purge_sys->latch); - } while (stop && srv_undo_sources); - - srv_resume_thread(slot, 0, false); -} - -/*********************************************************************//** -Purge coordinator thread that schedules the purge tasks. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(srv_purge_coordinator_thread)( -/*=========================================*/ - void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter - required by os_thread_create */ -{ - my_thread_init(); - - srv_slot_t* slot; - ulint n_total_purged = ULINT_UNDEFINED; - - ut_ad(!srv_read_only_mode); - ut_a(srv_n_purge_threads >= 1); - ut_a(trx_purge_state() == PURGE_STATE_INIT); - ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); - - srv_purge_tids[0] = os_thread_get_tid(); - os_thread_set_priority(srv_purge_tids[0], srv_sched_priority_purge); - - rw_lock_x_lock(&purge_sys->latch); - - purge_sys->running = true; - purge_sys->state = PURGE_STATE_RUN; - - rw_lock_x_unlock(&purge_sys->latch); - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(srv_purge_thread_key); -#endif /* UNIV_PFS_THREAD */ - -#ifdef UNIV_DEBUG_THREAD_CREATION - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - - slot = srv_reserve_slot(SRV_PURGE); - - ulint rseg_history_len = trx_sys->rseg_history_len; - - do { - /* If there are no records to purge or the last - purge didn't purge any records then wait for activity. */ - - if (srv_shutdown_state == SRV_SHUTDOWN_NONE - && srv_undo_sources - && (purge_sys->state == PURGE_STATE_STOP - || n_total_purged == 0)) { - - srv_purge_coordinator_suspend(slot, rseg_history_len); - } - - ut_ad(!slot->suspended); - - if (srv_purge_should_exit(n_total_purged)) { - break; - } - - n_total_purged = 0; - - srv_current_thread_priority = srv_purge_thread_priority; - - rseg_history_len = srv_do_purge( - srv_n_purge_threads, &n_total_purged); - - srv_inc_activity_count(); - } while (!srv_purge_should_exit(n_total_purged)); - - /* The task queue should always be empty, independent of fast - shutdown state. */ - ut_a(srv_get_task_queue_length() == 0); - - srv_free_slot(slot); - - /* Note that we are shutting down. */ - rw_lock_x_lock(&purge_sys->latch); - - purge_sys->state = PURGE_STATE_EXIT; - - purge_sys->running = false; - - rw_lock_x_unlock(&purge_sys->latch); - -#ifdef UNIV_DEBUG_THREAD_CREATION - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif /* UNIV_DEBUG_THREAD_CREATION */ - - /* Ensure that all the worker threads quit. */ - if (srv_n_purge_threads > 1) { - srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1); - } - - my_thread_end(); - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ -} - -/**********************************************************************//** -Enqueues a task to server task queue and releases a worker thread, if there -is a suspended one. */ -UNIV_INTERN -void -srv_que_task_enqueue_low( -/*=====================*/ - que_thr_t* thr) /*!< in: query thread */ -{ - ut_ad(!srv_read_only_mode); - mutex_enter(&srv_sys.tasks_mutex); - - UT_LIST_ADD_LAST(queue, srv_sys.tasks, thr); - - mutex_exit(&srv_sys.tasks_mutex); - - srv_release_threads(SRV_WORKER, 1); -} - -/**********************************************************************//** -Get count of tasks in the queue. -@return number of tasks in queue */ -UNIV_INTERN -ulint -srv_get_task_queue_length(void) -/*===========================*/ -{ - ulint n_tasks; - - ut_ad(!srv_read_only_mode); - - mutex_enter(&srv_sys.tasks_mutex); - - n_tasks = UT_LIST_GET_LEN(srv_sys.tasks); - - mutex_exit(&srv_sys.tasks_mutex); - - return(n_tasks); -} - -/** Wake up the purge threads. */ -UNIV_INTERN -void -srv_purge_wakeup() -{ - ut_ad(!srv_read_only_mode); - - if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { - - srv_release_threads(SRV_PURGE, 1); - - if (srv_n_purge_threads > 1) { - ulint n_workers = srv_n_purge_threads - 1; - - srv_release_threads(SRV_WORKER, n_workers); - } - } -} - -/** Check whether given space id is undo tablespace id -@param[in] space_id space id to check -@return true if it is undo tablespace else false. */ -bool -srv_is_undo_tablespace( - ulint space_id) -{ - if (srv_undo_space_id_start == 0) { - return (false); - } - - return(space_id >= srv_undo_space_id_start - && space_id < (srv_undo_space_id_start - + srv_undo_tablespaces_open)); -} diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc deleted file mode 100644 index fd129c3e55f..00000000000 --- a/storage/xtradb/srv/srv0start.cc +++ /dev/null @@ -1,3430 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -Portions of this file contain modifications contributed and copyrighted -by Percona Inc.. Those modifications are -gratefully acknowledged and are described briefly in the InnoDB -documentation. The contributions by Percona Inc. are incorporated with -their permission, and subject to the conditions contained in the file -COPYING.Percona. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file srv/srv0start.cc -Starts the InnoDB database server - -Created 2/16/1996 Heikki Tuuri -*************************************************************************/ - -#include "mysqld.h" -#include "pars0pars.h" -#include "row0ftsort.h" -#include "ut0mem.h" -#include "mem0mem.h" -#include "data0data.h" -#include "data0type.h" -#include "dict0dict.h" -#include "buf0buf.h" -#include "buf0dump.h" -#include "os0file.h" -#include "os0thread.h" -#include "fil0fil.h" -#include "fil0crypt.h" -#include "fsp0fsp.h" -#include "rem0rec.h" -#include "mtr0mtr.h" -#include "log0log.h" -#include "log0online.h" -#include "log0recv.h" -#include "page0page.h" -#include "page0cur.h" -#include "trx0trx.h" -#include "trx0sys.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "rem0rec.h" -#include "ibuf0ibuf.h" -#include "srv0start.h" -#include "srv0srv.h" -#include "buf0flu.h" -#include "btr0defragment.h" -#include "ut0timer.h" -#include "btr0scrub.h" - -#include <mysql/service_wsrep.h> - -#ifndef UNIV_HOTBACKUP -# include "trx0rseg.h" -# include "os0proc.h" -# include "sync0sync.h" -# include "buf0flu.h" -# include "buf0mtflu.h" -# include "buf0rea.h" -# include "dict0boot.h" -# include "dict0load.h" -# include "dict0stats_bg.h" -# include "que0que.h" -# include "usr0sess.h" -# include "lock0lock.h" -# include "trx0roll.h" -# include "trx0purge.h" -# include "lock0lock.h" -# include "pars0pars.h" -# include "btr0sea.h" -# include "rem0cmp.h" -# include "dict0crea.h" -# include "row0ins.h" -# include "row0sel.h" -# include "row0upd.h" -# include "row0row.h" -# include "row0mysql.h" -# include "btr0pcur.h" -# include "os0sync.h" -# include "zlib.h" -# include "ut0crc32.h" -# include "os0stacktrace.h" - -/** Log sequence number immediately after startup */ -UNIV_INTERN lsn_t srv_start_lsn; -/** Log sequence number at shutdown */ -UNIV_INTERN lsn_t srv_shutdown_lsn; - -#ifdef HAVE_DARWIN_THREADS -# include <sys/utsname.h> -/** TRUE if the F_FULLFSYNC option is available */ -UNIV_INTERN ibool srv_have_fullfsync = FALSE; -#endif - -/** TRUE if a raw partition is in use */ -UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE; - -/** UNDO tablespaces starts with space id. */ -ulint srv_undo_space_id_start; - -/** TRUE if the server is being started, before rolling back any -incomplete transactions */ -UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE; -/** TRUE if the server is being started */ -UNIV_INTERN ibool srv_is_being_started = FALSE; -/** TRUE if the server was successfully started */ -UNIV_INTERN ibool srv_was_started = FALSE; -/** TRUE if innobase_start_or_create_for_mysql() has been called */ -static ibool srv_start_has_been_called; - -/** Whether any undo log records can be generated */ -UNIV_INTERN bool srv_undo_sources; - -#ifdef UNIV_DEBUG -/** InnoDB system tablespace to set during recovery */ -UNIV_INTERN uint srv_sys_space_size_debug; -#endif /* UNIV_DEBUG */ - -/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to -SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ -UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE; - -/** Files comprising the system tablespace. Also used by Mariabackup. */ -UNIV_INTERN pfs_os_file_t files[1000]; - -/** io_handler_thread parameters for thread identification */ -static ulint n[SRV_MAX_N_IO_THREADS]; -/** io_handler_thread identifiers, 32 is the maximum number of purge threads. -The extra elements at the end are allocated as follows: -SRV_MAX_N_IO_THREADS + 1: srv_master_thread -SRV_MAX_N_IO_THREADS + 2: lock_wait_timeout_thread -SRV_MAX_N_IO_THREADS + 3: srv_error_monitor_thread -SRV_MAX_N_IO_THREADS + 4: srv_monitor_thread -SRV_MAX_N_IO_THREADS + 5: srv_redo_log_follow_thread -SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread -SRV_MAX_N_IO_THREADS + 7: srv_worker_thread -... -SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */ -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7 - + SRV_MAX_N_PURGE_THREADS - + MTFLUSH_MAX_WORKER]; -/* Thread contex data for multi-threaded flush */ -void *mtflush_ctx=NULL; - -/** Thead handles */ -static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS]; -static os_thread_t buf_flush_page_cleaner_thread_handle; -static os_thread_t buf_dump_thread_handle; -static os_thread_t dict_stats_thread_handle; -static os_thread_t buf_flush_lru_manager_thread_handle; -static os_thread_t srv_redo_log_follow_thread_handle; -/** Status variables, is thread started ?*/ -static bool thread_started[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS] = {false}; -static bool buf_flush_page_cleaner_thread_started = false; -static bool buf_dump_thread_started = false; -static bool dict_stats_thread_started = false; -static bool buf_flush_lru_manager_thread_started = false; -static bool srv_redo_log_follow_thread_started = false; - -/** We use this mutex to test the return value of pthread_mutex_trylock - on successful locking. HP-UX does NOT return 0, though Linux et al do. */ -static os_fast_mutex_t srv_os_test_mutex; - -/** Name of srv_monitor_file */ -static char* srv_monitor_file_name; -#endif /* !UNIV_HOTBACKUP */ - -/** Default undo tablespace size in UNIV_PAGEs count (10MB). */ -static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES = - ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF; - -/** */ -#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD -#define SRV_MAX_N_PENDING_SYNC_IOS 100 - -#ifdef UNIV_PFS_THREAD -/* Keys to register InnoDB threads with performance schema */ -UNIV_INTERN mysql_pfs_key_t io_handler_thread_key; -UNIV_INTERN mysql_pfs_key_t srv_lock_timeout_thread_key; -UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key; -UNIV_INTERN mysql_pfs_key_t srv_monitor_thread_key; -UNIV_INTERN mysql_pfs_key_t srv_master_thread_key; -UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key; -UNIV_INTERN mysql_pfs_key_t srv_log_tracking_thread_key; -#endif /* UNIV_PFS_THREAD */ - -/** Innobase start-up aborted. Perform cleanup actions. -@param[in] create_new_db TRUE if new db is being created -@param[in] file File name -@param[in] line Line number -@param[in] err Reason for aborting InnoDB startup -@return DB_SUCCESS or error code. */ -static -dberr_t -srv_init_abort( - bool create_new_db, - const char* file, - ulint line, - dberr_t err) -{ - if (create_new_db) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Database creation was aborted" - " at %s [" ULINTPF "]" - " with error %s. You may need" - " to delete the ibdata1 file before trying to start" - " up again.", - file, line, ut_strerr(err)); - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Plugin initialization aborted" - " at %s [" ULINTPF "]" - " with error %s.", - file, line, ut_strerr(err)); - } - - return(err); -} - -/*********************************************************************//** -Convert a numeric string that optionally ends in G or M or K, to a number -containing megabytes. -@return next character in string */ -static -char* -srv_parse_megabytes( -/*================*/ - char* str, /*!< in: string containing a quantity in bytes */ - ulint* megs) /*!< out: the number in megabytes */ -{ - char* endp; - ulint size; - - size = strtoul(str, &endp, 10); - - str = endp; - - switch (*str) { - case 'G': case 'g': - size *= 1024; - /* fall through */ - case 'M': case 'm': - str++; - break; - case 'K': case 'k': - size /= 1024; - str++; - break; - default: - size /= 1024 * 1024; - break; - } - - *megs = size; - return(str); -} - -/*********************************************************************//** -Check if a file can be opened in read-write mode. -@return true if it doesn't exist or can be opened in rw mode. */ -static -bool -srv_file_check_mode( -/*================*/ - const char* name) /*!< in: filename to check */ -{ - os_file_stat_t stat; - - memset(&stat, 0x0, sizeof(stat)); - - dberr_t err = os_file_get_status(name, &stat, true); - - if (err == DB_FAIL) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "os_file_get_status() failed on '%s'. Can't determine " - "file permissions", name); - - return(false); - - } else if (err == DB_SUCCESS) { - - /* Note: stat.rw_perm is only valid of files */ - - if (stat.type == OS_FILE_TYPE_FILE) { - - if (!stat.rw_perm) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "%s can't be opened in %s mode", - name, - srv_read_only_mode - ? "read" : "read-write"); - - return(false); - } - } else { - /* Not a regular file, bail out. */ - - ib_logf(IB_LOG_LEVEL_ERROR, - "'%s' not a regular file.", name); - - return(false); - } - } else { - - /* This is OK. If the file create fails on RO media, there - is nothing we can do. */ - - ut_a(err == DB_NOT_FOUND); - } - - return(true); -} - -/*********************************************************************//** -Reads the data files and their sizes from a character string given in -the .cnf file. -@return TRUE if ok, FALSE on parse error */ -UNIV_INTERN -ibool -srv_parse_data_file_paths_and_sizes( -/*================================*/ - char* str) /*!< in/out: the data file path string */ -{ - char* input_str; - char* path; - ulint size; - ulint i = 0; - - srv_auto_extend_last_data_file = FALSE; - srv_last_file_size_max = 0; - srv_data_file_names = NULL; - srv_data_file_sizes = NULL; - srv_data_file_is_raw_partition = NULL; - - input_str = str; - - /* First calculate the number of data files and check syntax: - path:size[M | G];path:size[M | G]... . Note that a Windows path may - contain a drive name and a ':'. */ - - while (*str != '\0') { - path = str; - - while ((*str != ':' && *str != '\0') - || (*str == ':' - && (*(str + 1) == '\\' || *(str + 1) == '/' - || *(str + 1) == ':'))) { - str++; - } - - if (*str == '\0') { - return(FALSE); - } - - str++; - - str = srv_parse_megabytes(str, &size); - - if (0 == strncmp(str, ":autoextend", - (sizeof ":autoextend") - 1)) { - - str += (sizeof ":autoextend") - 1; - - if (0 == strncmp(str, ":max:", - (sizeof ":max:") - 1)) { - - str += (sizeof ":max:") - 1; - - str = srv_parse_megabytes(str, &size); - } - - if (*str != '\0') { - - return(FALSE); - } - } - - if (strlen(str) >= 6 - && *str == 'n' - && *(str + 1) == 'e' - && *(str + 2) == 'w') { - str += 3; - } - - if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') { - str += 3; - } - - if (size == 0) { - return(FALSE); - } - - i++; - - if (*str == ';') { - str++; - } else if (*str != '\0') { - - return(FALSE); - } - } - - if (i == 0) { - /* If innodb_data_file_path was defined it must contain - at least one data file definition */ - - return(FALSE); - } - - srv_data_file_names = static_cast<char**>( - malloc(i * sizeof *srv_data_file_names)); - - srv_data_file_sizes = static_cast<ulint*>( - malloc(i * sizeof *srv_data_file_sizes)); - - srv_data_file_is_raw_partition = static_cast<ulint*>( - malloc(i * sizeof *srv_data_file_is_raw_partition)); - - srv_n_data_files = i; - - /* Then store the actual values to our arrays */ - - str = input_str; - i = 0; - - while (*str != '\0') { - path = str; - - /* Note that we must step over the ':' in a Windows path; - a Windows path normally looks like C:\ibdata\ibdata1:1G, but - a Windows raw partition may have a specification like - \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */ - - while ((*str != ':' && *str != '\0') - || (*str == ':' - && (*(str + 1) == '\\' || *(str + 1) == '/' - || *(str + 1) == ':'))) { - str++; - } - - if (*str == ':') { - /* Make path a null-terminated string */ - *str = '\0'; - str++; - } - - str = srv_parse_megabytes(str, &size); - - srv_data_file_names[i] = path; - srv_data_file_sizes[i] = size; - - if (0 == strncmp(str, ":autoextend", - (sizeof ":autoextend") - 1)) { - - srv_auto_extend_last_data_file = TRUE; - - str += (sizeof ":autoextend") - 1; - - if (0 == strncmp(str, ":max:", - (sizeof ":max:") - 1)) { - - str += (sizeof ":max:") - 1; - - str = srv_parse_megabytes( - str, &srv_last_file_size_max); - } - - if (*str != '\0') { - - return(FALSE); - } - } - - (srv_data_file_is_raw_partition)[i] = 0; - - if (strlen(str) >= 6 - && *str == 'n' - && *(str + 1) == 'e' - && *(str + 2) == 'w') { - str += 3; - /* Initialize new raw device only during bootstrap */ - (srv_data_file_is_raw_partition)[i] = - opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW; - } - - if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') { - str += 3; - - /* Initialize new raw device only during bootstrap */ - if ((srv_data_file_is_raw_partition)[i] == 0) { - (srv_data_file_is_raw_partition)[i] = - opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW; - } - } - - i++; - - if (*str == ';') { - str++; - } - } - - return(TRUE); -} - -/*********************************************************************//** -Frees the memory allocated by srv_parse_data_file_paths_and_sizes() -and srv_parse_log_group_home_dirs(). */ -UNIV_INTERN -void -srv_free_paths_and_sizes(void) -/*==========================*/ -{ - free(srv_data_file_names); - srv_data_file_names = NULL; - free(srv_data_file_sizes); - srv_data_file_sizes = NULL; - free(srv_data_file_is_raw_partition); - srv_data_file_is_raw_partition = NULL; -} - -#ifndef UNIV_HOTBACKUP - -static ulint io_tid_i = 0; - -/********************************************************************//** -I/o-handler thread function. -@return OS_THREAD_DUMMY_RETURN */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(io_handler_thread)( -/*==============================*/ - void* arg) /*!< in: pointer to the number of the segment in - the aio array */ -{ - ulint segment; - ulint tid_i = os_atomic_increment_ulint(&io_tid_i, 1) - 1; - - ut_ad(tid_i < srv_n_file_io_threads); - - segment = *((ulint*) arg); - - srv_io_tids[tid_i] = os_thread_get_tid(); - os_thread_set_priority(srv_io_tids[tid_i], srv_sched_priority_io); - -#ifdef UNIV_DEBUG_THREAD_CREATION - ib_logf(IB_LOG_LEVEL_INFO, - "Io handler thread %lu starts, id %lu\n", segment, - os_thread_pf(os_thread_get_curr_id())); -#endif - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(io_handler_thread_key); -#endif /* UNIV_PFS_THREAD */ - - while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) { - srv_current_thread_priority = srv_io_thread_priority; - fil_aio_wait(segment); - } - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. - The thread actually never comes here because it is exited in an - os_event_wait(). */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************//** -Normalizes a directory path for Windows: converts slashes to backslashes. */ -UNIV_INTERN -void -srv_normalize_path_for_win( -/*=======================*/ - char* str MY_ATTRIBUTE((unused))) /*!< in/out: null-terminated - character string */ -{ -#ifdef __WIN__ - for (; *str; str++) { - - if (*str == '/') { - *str = '\\'; - } - } -#endif -} - -#ifndef UNIV_HOTBACKUP -/*********************************************************************//** -Creates a log file. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -create_log_file( -/*============*/ - pfs_os_file_t* file, /*!< out: file handle */ - const char* name) /*!< in: log file name */ -{ - ibool ret; - - *file = os_file_create( - innodb_file_log_key, name, - OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, - OS_LOG_FILE, &ret, FALSE); - - if (!ret) { - ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name); - return(DB_ERROR); - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Setting log file %s size to %lu MB", - name, (ulong) srv_log_file_size - >> (20 - UNIV_PAGE_SIZE_SHIFT)); - - ret = os_file_set_size(name, *file, - (os_offset_t) srv_log_file_size - << UNIV_PAGE_SIZE_SHIFT); - if (!ret) { - ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file" - " %s to size %lu MB", name, (ulong) srv_log_file_size - >> (20 - UNIV_PAGE_SIZE_SHIFT)); - return(DB_ERROR); - } - - ret = os_file_close(*file); - ut_a(ret); - - return(DB_SUCCESS); -} - -/** Initial number of the first redo log file */ -#define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1) - -/*********************************************************************//** -Creates all log files. -@return DB_SUCCESS or error code */ -static -dberr_t -create_log_files( -/*=============*/ - bool create_new_db, /*!< in: TRUE if new database is being - created */ - char* logfilename, /*!< in/out: buffer for log file name */ - size_t dirnamelen, /*!< in: length of the directory path */ - lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */ - char*& logfile0) /*!< out: name of the first log file */ -{ - if (srv_read_only_mode) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot create log files in read-only mode"); - return(DB_READ_ONLY); - } - - /* We prevent system tablespace creation with existing files in - data directory. So we do not delete log files when creating new system - tablespace */ - if (!create_new_db) { - /* Remove any old log files. */ - for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) { - sprintf(logfilename + dirnamelen, "ib_logfile%u", i); - - /* Ignore errors about non-existent files or files - that cannot be removed. The create_log_file() will - return an error when the file exists. */ -#ifdef __WIN__ - DeleteFile((LPCTSTR) logfilename); -#else - unlink(logfilename); -#endif - /* Crashing after deleting the first - file should be recoverable. The buffer - pool was clean, and we can simply create - all log files from the scratch. */ - DBUG_EXECUTE_IF("innodb_log_abort_6", - return(DB_ERROR);); - } - } - - ut_ad(!buf_pool_check_no_pending_io()); - - DBUG_EXECUTE_IF("innodb_log_abort_7", return(DB_ERROR);); - - for (unsigned i = 0; i < srv_n_log_files; i++) { - sprintf(logfilename + dirnamelen, - "ib_logfile%u", i ? i : INIT_LOG_FILE0); - - dberr_t err = create_log_file(&files[i], logfilename); - - if (err != DB_SUCCESS) { - return(err); - } - } - - DBUG_EXECUTE_IF("innodb_log_abort_8", return(DB_ERROR);); - - /* We did not create the first log file initially as - ib_logfile0, so that crash recovery cannot find it until it - has been completed and renamed. */ - sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0); - - fil_space_create( - logfilename, SRV_LOG_SPACE_FIRST_ID, 0, - FIL_LOG, - NULL /* no encryption yet */, - true /* this is create */); - - ut_a(fil_validate()); - - logfile0 = fil_node_create( - logfilename, (ulint) srv_log_file_size, - SRV_LOG_SPACE_FIRST_ID, FALSE); - ut_a(logfile0); - - for (unsigned i = 1; i < srv_n_log_files; i++) { - sprintf(logfilename + dirnamelen, "ib_logfile%u", i); - - if (!fil_node_create( - logfilename, - (ulint) srv_log_file_size, - SRV_LOG_SPACE_FIRST_ID, FALSE)) { - ut_error; - } - } - -#ifdef UNIV_LOG_ARCHIVE - /* Create the file space object for archived logs. */ - fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1, - 0, FIL_LOG, NULL /* no encryption yet */, true /* create */); -#endif - log_group_init(0, srv_n_log_files, - srv_log_file_size * UNIV_PAGE_SIZE, - SRV_LOG_SPACE_FIRST_ID, - SRV_LOG_SPACE_FIRST_ID + 1); - - fil_open_log_and_system_tablespace_files(); - - /* Create a log checkpoint. */ - mutex_enter(&log_sys->mutex); - ut_d(recv_no_log_write = FALSE); - recv_reset_logs( -#ifdef UNIV_LOG_ARCHIVE - UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no, - TRUE, -#endif - lsn); - mutex_exit(&log_sys->mutex); - - return(DB_SUCCESS); -} - -/** Rename the first redo log file. -@param[in,out] logfilename buffer for the log file name -@param[in] dirnamelen length of the directory path -@param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value -@param[in,out] logfile0 name of the first log file -@return error code -@retval DB_SUCCESS on successful operation */ -MY_ATTRIBUTE((warn_unused_result, nonnull)) -static -dberr_t -create_log_files_rename( -/*====================*/ - char* logfilename, /*!< in/out: buffer for log file name */ - size_t dirnamelen, /*!< in: length of the directory path */ - lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */ - char* logfile0) /*!< in/out: name of the first log file */ -{ - /* If innodb_flush_method=O_DSYNC, - we need to explicitly flush the log buffers. */ - fil_flush(SRV_LOG_SPACE_FIRST_ID); - - DBUG_EXECUTE_IF("innodb_log_abort_9", return(DB_ERROR);); - - /* Close the log files, so that we can rename - the first one. */ - fil_close_log_files(false); - - /* Rename the first log file, now that a log - checkpoint has been created. */ - sprintf(logfilename + dirnamelen, "ib_logfile%u", 0); - - ib_logf(IB_LOG_LEVEL_INFO, - "Renaming log file %s to %s", logfile0, logfilename); - - mutex_enter(&log_sys->mutex); - ut_ad(strlen(logfile0) == 2 + strlen(logfilename)); - dberr_t err = os_file_rename( - innodb_file_log_key, logfile0, logfilename) - ? DB_SUCCESS : DB_ERROR; - - /* Replace the first file with ib_logfile0. */ - strcpy(logfile0, logfilename); - mutex_exit(&log_sys->mutex); - - DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;); - - if (err == DB_SUCCESS) { - fil_open_log_and_system_tablespace_files(); - ib_logf(IB_LOG_LEVEL_WARN, - "New log files created, LSN=" LSN_PF, lsn); - } - - return(err); -} - -/*********************************************************************//** -Opens a log file. -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -open_log_file( -/*==========*/ - pfs_os_file_t* file, /*!< out: file handle */ - const char* name, /*!< in: log file name */ - os_offset_t* size) /*!< out: file size */ -{ - ibool ret; - - *file = os_file_create(innodb_file_log_key, name, - OS_FILE_OPEN, OS_FILE_AIO, - OS_LOG_FILE, &ret, FALSE); - if (!ret) { - ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name); - return(DB_ERROR); - } - - *size = os_file_get_size(*file); - - ret = os_file_close(*file); - ut_a(ret); - return(DB_SUCCESS); -} - - -/** Creates or opens database data files and closes them. -@param[out] create_new_db true = create new database -@param[out] min_arch_log_no min of archived log numbers in - data files -@param[out] max_arch_log_no max of archived log numbers in - data files -@param[out] flushed_lsn flushed lsn in fist datafile -@param[out] sum_of_new_sizes sum of sizes of the new files - added -@return DB_SUCCESS or error code */ -MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -open_or_create_data_files( - bool* create_new_db, -#ifdef UNIV_LOG_ARCHIVE - lsn_t* min_arch_log_no, - lsn_t* max_arch_log_no, -#endif /* UNIV_LOG_ARCHIVE */ - lsn_t* flushed_lsn, - ulint* sum_of_new_sizes) -{ - ibool ret; - ulint i; - bool one_opened = false; - bool one_created = false; - os_offset_t size; - ulint flags; - ulint space; - ulint rounded_size_pages; - char name[10000]; - fil_space_crypt_t* crypt_data=NULL; - - if (srv_n_data_files >= 1000) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Can only have < 1000 data files, you have " - "defined %lu", (ulong) srv_n_data_files); - - return(DB_ERROR); - } - - *sum_of_new_sizes = 0; - - *create_new_db = false; - - srv_normalize_path_for_win(srv_data_home); - - for (i = 0; i < srv_n_data_files; i++) { - ulint dirnamelen; - - srv_normalize_path_for_win(srv_data_file_names[i]); - dirnamelen = strlen(srv_data_home); - - ut_a(dirnamelen + strlen(srv_data_file_names[i]) - < (sizeof name) - 1); - - memcpy(name, srv_data_home, dirnamelen); - - /* Add a path separator if needed. */ - if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) { - name[dirnamelen++] = SRV_PATH_SEPARATOR; - } - - strcpy(name + dirnamelen, srv_data_file_names[i]); - - /* Note: It will return true if the file doesn' exist. */ - - if (!srv_file_check_mode(name)) { - - return(DB_FAIL); - - } else if (srv_data_file_is_raw_partition[i] == 0) { - - /* First we try to create the file: if it already - exists, ret will get value FALSE */ - - files[i] = os_file_create( - innodb_file_data_key, name, OS_FILE_CREATE, - OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); - - if (srv_read_only_mode) { - - if (ret) { - goto size_check; - } - - ib_logf(IB_LOG_LEVEL_ERROR, - "Opening %s failed!", name); - - return(DB_ERROR); - - } else if (!ret - && os_file_get_last_error(false) - != OS_FILE_ALREADY_EXISTS -#ifdef UNIV_AIX - /* AIX 5.1 after security patch ML7 may have - errno set to 0 here, which causes our - function to return 100; work around that - AIX problem */ - && os_file_get_last_error(false) != 100 -#endif /* UNIV_AIX */ - ) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Creating or opening %s failed!", - name); - - return(DB_ERROR); - } - - } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) { - - ut_a(!srv_read_only_mode); - - /* The partition is opened, not created; then it is - written over */ - - srv_start_raw_disk_in_use = TRUE; - srv_created_new_raw = TRUE; - - files[i] = os_file_create( - innodb_file_data_key, name, OS_FILE_OPEN_RAW, - OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); - - if (!ret) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Error in opening %s", name); - - return(DB_ERROR); - } - - const char* check_msg; - - check_msg = fil_read_first_page( - files[i], FALSE, &flags, &space, - flushed_lsn, NULL); - - /* If first page is valid, don't overwrite DB. - It prevents overwriting DB when mysql_install_db - starts mysqld multiple times during bootstrap. */ - if (check_msg == NULL) { - - srv_created_new_raw = FALSE; - ret = FALSE; - } - - } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { - srv_start_raw_disk_in_use = TRUE; - - ret = FALSE; - } else { - ut_a(0); - } - - if (ret == FALSE) { - const char* check_msg; - /* We open the data file */ - - if (one_created) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Data files can only be added at " - "the end of a tablespace, but " - "data file %s existed beforehand.", - name); - return(DB_ERROR); - } - - if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { - ut_a(!srv_read_only_mode); - files[i] = os_file_create( - innodb_file_data_key, - name, OS_FILE_OPEN_RAW, - OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); - } else if (i == 0) { - files[i] = os_file_create( - innodb_file_data_key, - name, OS_FILE_OPEN_RETRY, - OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); - } else { - files[i] = os_file_create( - innodb_file_data_key, - name, OS_FILE_OPEN, OS_FILE_NORMAL, - OS_DATA_FILE, &ret, FALSE); - } - - if (!ret) { - os_file_get_last_error(true); - - ib_logf(IB_LOG_LEVEL_ERROR, - "Can't open '%s'", name); - - return(DB_ERROR); - } - - if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { - goto skip_size_check; - } - -size_check: - size = os_file_get_size(files[i]); - ut_a(size != (os_offset_t) -1); - - /* If InnoDB encountered an error or was killed - while extending the data file, the last page - could be incomplete. */ - - rounded_size_pages = static_cast<ulint>( - size >> UNIV_PAGE_SIZE_SHIFT); - - if (i == srv_n_data_files - 1 - && srv_auto_extend_last_data_file) { - - if (srv_data_file_sizes[i] > rounded_size_pages - || (srv_last_file_size_max > 0 - && srv_last_file_size_max - < rounded_size_pages)) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "auto-extending " - "data file %s is " - "of a different size " - ULINTPF " pages (rounded " - "down to MB) than specified " - "in the .cnf file: " - "initial " ULINTPF " pages, " - "max " ULINTPF " (relevant if " - "non-zero) pages!", - name, - rounded_size_pages, - srv_data_file_sizes[i], - srv_last_file_size_max); - - return(DB_ERROR); - } - - srv_data_file_sizes[i] = rounded_size_pages; - } - - if (rounded_size_pages != srv_data_file_sizes[i]) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Data file %s is of a different " - "size " ULINTPF " pages (rounded down to MB) " - "than specified in the .cnf file " - ULINTPF " pages!", - name, - rounded_size_pages, - srv_data_file_sizes[i]); - - return(DB_ERROR); - } -skip_size_check: - - /* This is the earliest location where we can load - the double write buffer. */ - if (i == 0) { - /* XtraBackup never loads corrupted pages from - the doublewrite buffer */ - buf_dblwr_init_or_load_pages( - files[i], srv_data_file_names[i], !IS_XTRABACKUP()); - } - - bool retry = true; -check_first_page: - check_msg = fil_read_first_page( - files[i], one_opened, &flags, &space, - flushed_lsn, &crypt_data); - - if (check_msg) { - - if (retry) { - fsp_open_info fsp; - const ulint page_no = 0; - - retry = false; - fsp.id = 0; - fsp.filepath = srv_data_file_names[i]; - fsp.file = files[i]; - - if (fil_user_tablespace_restore_page( - &fsp, page_no)) { - goto check_first_page; - } - } - - ib_logf(IB_LOG_LEVEL_ERROR, - "%s in data file %s", - check_msg, name); - return(DB_ERROR); - } - - /* The first file of the system tablespace must - have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID - field in files greater than ibdata1 are unreliable. */ - ut_a(one_opened || space == TRX_SYS_SPACE); - - /* Check the flags for the first system tablespace - file only. */ - if (!one_opened - && UNIV_PAGE_SIZE - != fsp_flags_get_page_size(flags)) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Data file \"%s\" uses page size " ULINTPF " ," - "but the start-up parameter " - "is --innodb-page-size=" ULINTPF " .", - name, - fsp_flags_get_page_size(flags), - UNIV_PAGE_SIZE); - - return(DB_ERROR); - } - - one_opened = TRUE; - } else if (!srv_read_only_mode) { - /* We created the data file and now write it full of - zeros */ - - one_created = TRUE; - - if (i > 0) { - ib_logf(IB_LOG_LEVEL_INFO, - "Data file %s did not" - " exist: new to be created", - name); - } else { - ib_logf(IB_LOG_LEVEL_INFO, - "The first specified " - "data file %s did not exist: " - "a new database to be created!", - name); - - *create_new_db = TRUE; - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Setting file %s size to " ULINTPF " MB", - name, - (srv_data_file_sizes[i] - >> (20 - UNIV_PAGE_SIZE_SHIFT))); - - ret = os_file_set_size( - name, files[i], - (os_offset_t) srv_data_file_sizes[i] - << UNIV_PAGE_SIZE_SHIFT - /* TODO: enable page_compression on the - system tablespace and add - , FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)*/); - - if (!ret) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Error in creating %s: " - "probably out of disk space", - name); - - return(DB_ERROR); - } - - *sum_of_new_sizes += srv_data_file_sizes[i]; - } - - ret = os_file_close(files[i]); - ut_a(ret); - - if (i == 0) { - if (!crypt_data) { - crypt_data = fil_space_create_crypt_data(FIL_ENCRYPTION_DEFAULT, - FIL_DEFAULT_ENCRYPTION_KEY); - } - - flags = FSP_FLAGS_PAGE_SSIZE(); - - fil_space_create(name, 0, flags, FIL_TABLESPACE, - crypt_data, (*create_new_db) == true); - } - - ut_a(fil_validate()); - - if (!fil_node_create(name, srv_data_file_sizes[i], 0, - srv_data_file_is_raw_partition[i] != 0)) { - return(DB_ERROR); - } - } - - return(DB_SUCCESS); -} - -/*********************************************************************//** -Create undo tablespace. -@return DB_SUCCESS or error code */ -static -dberr_t -srv_undo_tablespace_create( -/*=======================*/ - const char* name, /*!< in: tablespace name */ - ulint size) /*!< in: tablespace size in pages */ -{ - pfs_os_file_t fh; - ibool ret; - dberr_t err = DB_SUCCESS; - - os_file_create_subdirs_if_needed(name); - - fh = os_file_create( - innodb_file_data_key, - name, - srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE, - OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE); - - if (srv_read_only_mode && ret) { - ib_logf(IB_LOG_LEVEL_INFO, - "%s opened in read-only mode", name); - } else if (ret == FALSE) { - if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS -#ifdef UNIV_AIX - /* AIX 5.1 after security patch ML7 may have - errno set to 0 here, which causes our function - to return 100; work around that AIX problem */ - && os_file_get_last_error(false) != 100 -#endif /* UNIV_AIX */ - ) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Can't create UNDO tablespace %s", name); - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Creating system tablespace with" - " existing undo tablespaces is not" - " supported. Please delete all undo" - " tablespaces before creating new" - " system tablespace."); - } - err = DB_ERROR; - } else { - ut_a(!srv_read_only_mode); - - /* We created the data file and now write it full of zeros */ - - ib_logf(IB_LOG_LEVEL_INFO, - "Data file %s did not exist: new to be created", - name); - - ib_logf(IB_LOG_LEVEL_INFO, - "Setting file %s size to %lu MB", - name, size >> (20 - UNIV_PAGE_SIZE_SHIFT)); - - ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT - /* TODO: enable page_compression on the - system tablespace and add - FSP_FLAGS_HAS_PAGE_COMPRESSION(flags) - */); - - if (!ret) { - ib_logf(IB_LOG_LEVEL_INFO, - "Error in creating %s: probably out of " - "disk space", name); - - err = DB_ERROR; - } - - os_file_close(fh); - } - - return(err); -} - -/*********************************************************************//** -Open an undo tablespace. -@return DB_SUCCESS or error code */ -static -dberr_t -srv_undo_tablespace_open( -/*=====================*/ - const char* name, /*!< in: tablespace name */ - ulint space) /*!< in: tablespace id */ -{ - pfs_os_file_t fh; - dberr_t err = DB_ERROR; - ibool ret; - ulint flags; - - if (!srv_file_check_mode(name)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "UNDO tablespaces must be %s!", - srv_read_only_mode ? "writable" : "readable"); - - return(DB_ERROR); - } - - fh = os_file_create( - innodb_file_data_key, name, - OS_FILE_OPEN_RETRY - | OS_FILE_ON_ERROR_NO_EXIT - | OS_FILE_ON_ERROR_SILENT, - OS_FILE_NORMAL, - OS_DATA_FILE, - &ret, - FALSE); - - /* If the file open was successful then load the tablespace. */ - - if (ret) { - os_offset_t size; - - size = os_file_get_size(fh); - ut_a(size != (os_offset_t) -1); - - ret = os_file_close(fh); - ut_a(ret); - - /* Load the tablespace into InnoDB's internal - data structures. */ - - /* We set the biggest space id to the undo tablespace - because InnoDB hasn't opened any other tablespace apart - from the system tablespace. */ - - fil_set_max_space_id_if_bigger(space); - - /* Set the compressed page size to 0 (non-compressed) */ - flags = FSP_FLAGS_PAGE_SSIZE(); - fil_space_create(name, space, flags, FIL_TABLESPACE, - NULL /* no encryption */, - true /* create */); - - ut_a(fil_validate()); - - os_offset_t n_pages = size / UNIV_PAGE_SIZE; - - /* On 64 bit Windows ulint can be 32 bit and os_offset_t - is 64 bit. It is OK to cast the n_pages to ulint because - the unit has been scaled to pages and they are always - 32 bit. */ - if (fil_node_create(name, (ulint) n_pages, space, FALSE)) { - err = DB_SUCCESS; - } - } - - return(err); -} - -/******************************************************************** -Opens the configured number of undo tablespaces. -@return DB_SUCCESS or error code */ -dberr_t -srv_undo_tablespaces_init( -/*======================*/ - ibool create_new_db, /*!< in: TRUE if new db being - created */ - ibool backup_mode, /*!< in: TRUE disables reading - the system tablespace (used in - XtraBackup), FALSE is passed on - recovery. */ - const ulint n_conf_tablespaces, /*!< in: configured undo - tablespaces */ - ulint* n_opened) /*!< out: number of UNDO - tablespaces successfully - discovered and opened */ -{ - ulint i; - dberr_t err = DB_SUCCESS; - ulint prev_space_id = 0; - ulint n_undo_tablespaces; - ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1]; - - *n_opened = 0; - - ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS); - - memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids)); - - /* Create the undo spaces only if we are creating a new - instance. We don't allow creating of new undo tablespaces - in an existing instance (yet). This restriction exists because - we check in several places for SYSTEM tablespaces to be less than - the min of user defined tablespace ids. Once we implement saving - the location of the undo tablespaces and their space ids this - restriction will/should be lifted. */ - - for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) { - char name[OS_FILE_MAX_PATH]; - ulint space_id = i + 1; - - DBUG_EXECUTE_IF("innodb_undo_upgrade", - space_id = i + 3;); - - ut_snprintf( - name, sizeof(name), - "%s%cundo%03lu", - srv_undo_dir, SRV_PATH_SEPARATOR, space_id); - - if (i == 0) { - srv_undo_space_id_start = space_id; - prev_space_id = srv_undo_space_id_start - 1; - } - - undo_tablespace_ids[i] = space_id; - - err = srv_undo_tablespace_create( - name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); - - if (err != DB_SUCCESS) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Could not create undo tablespace '%s'.", - name); - - return(err); - } - } - - /* Get the tablespace ids of all the undo segments excluding - the system tablespace (0). If we are creating a new instance then - we build the undo_tablespace_ids ourselves since they don't - already exist. */ - - if (!create_new_db && !backup_mode) { - n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces( - undo_tablespace_ids); - - if (n_undo_tablespaces != 0) { - srv_undo_space_id_start = undo_tablespace_ids[0]; - prev_space_id = srv_undo_space_id_start - 1; - } - - } else { - n_undo_tablespaces = n_conf_tablespaces; - - undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED; - } - - /* Open all the undo tablespaces that are currently in use. If we - fail to open any of these it is a fatal error. The tablespace ids - should be contiguous. It is a fatal error because they are required - for recovery and are referenced by the UNDO logs (a.k.a RBS). */ - - for (i = 0; i < n_undo_tablespaces; ++i) { - char name[OS_FILE_MAX_PATH]; - - ut_snprintf( - name, sizeof(name), - "%s%cundo%03lu", - srv_undo_dir, SRV_PATH_SEPARATOR, - undo_tablespace_ids[i]); - - /* Should be no gaps in undo tablespace ids. */ - ut_a(prev_space_id + 1 == undo_tablespace_ids[i]); - - /* The system space id should not be in this array. */ - ut_a(undo_tablespace_ids[i] != 0); - ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED); - - err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]); - - if (err != DB_SUCCESS) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unable to open undo tablespace '%s'.", name); - - return(err); - } - - prev_space_id = undo_tablespace_ids[i]; - - ++*n_opened; - } - - /* Open any extra unused undo tablespaces. These must be contiguous. - We stop at the first failure. These are undo tablespaces that are - not in use and therefore not required by recovery. We only check - that there are no gaps. */ - - for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) { - char name[OS_FILE_MAX_PATH]; - - ut_snprintf( - name, sizeof(name), - "%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i); - - /* Undo space ids start from 1. */ - err = srv_undo_tablespace_open(name, i); - - if (err != DB_SUCCESS) { - break; - } - - /** Note the first undo tablespace id in case of - no active undo tablespace. */ - if (n_undo_tablespaces == 0) { - srv_undo_space_id_start = i; - } - - ++n_undo_tablespaces; - - ++*n_opened; - } - - /** Explictly specify the srv_undo_space_id_start - as zero when there are no undo tablespaces. */ - if (n_undo_tablespaces == 0) { - srv_undo_space_id_start = 0; - } - - /* If the user says that there are fewer than what we find we - tolerate that discrepancy but not the inverse. Because there could - be unused undo tablespaces for future use. */ - - if (n_conf_tablespaces > n_undo_tablespaces) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Expected to open %lu undo " - "tablespaces but was able\n", - n_conf_tablespaces); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: to find only %lu undo " - "tablespaces.\n", n_undo_tablespaces); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Set the " - "innodb_undo_tablespaces parameter to " - "the\n"); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: correct value and retry. Suggested " - "value is %lu\n", n_undo_tablespaces); - - return(err != DB_SUCCESS ? err : DB_ERROR); - - } else if (n_undo_tablespaces > 0) { - - ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces", - n_undo_tablespaces); - - if (n_conf_tablespaces == 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "Using the system tablespace for all UNDO " - "logging because innodb_undo_tablespaces=0"); - } - } - - if (create_new_db) { - mtr_t mtr; - - mtr_start(&mtr); - - /* The undo log tablespace */ - for (i = 0; i < n_undo_tablespaces; ++i) { - - fsp_header_init( - undo_tablespace_ids[i], - SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); - } - - mtr_commit(&mtr); - } - - return(DB_SUCCESS); -} - -/******************************************************************** -Wait for the purge thread(s) to start up. */ -static -void -srv_start_wait_for_purge_to_start() -/*===============================*/ -{ - /* Wait for the purge coordinator and master thread to startup. */ - - purge_state_t state = trx_purge_state(); - - ut_a(state != PURGE_STATE_DISABLED); - - while (srv_shutdown_state == SRV_SHUTDOWN_NONE - && srv_force_recovery < SRV_FORCE_NO_BACKGROUND - && state == PURGE_STATE_INIT) { - - switch (state = trx_purge_state()) { - case PURGE_STATE_RUN: - case PURGE_STATE_STOP: - break; - - case PURGE_STATE_INIT: - ib_logf(IB_LOG_LEVEL_INFO, - "Waiting for purge to start"); - - os_thread_sleep(50000); - break; - - case PURGE_STATE_EXIT: - case PURGE_STATE_DISABLED: - ut_error; - } - } -} - -/*********************************************************************//** -Initializes the log tracking subsystem and starts its thread. */ -static -void -init_log_online(void) -/*=================*/ -{ - if (UNIV_UNLIKELY(srv_force_recovery > 0 || srv_read_only_mode)) { - srv_track_changed_pages = FALSE; - return; - } - - if (srv_track_changed_pages) { - - log_online_read_init(); - - /* Create the thread that follows the redo log to output the - changed page bitmap */ - srv_redo_log_follow_thread_handle = os_thread_create(&srv_redo_log_follow_thread, NULL, - thread_ids + 5 + SRV_MAX_N_IO_THREADS); - srv_redo_log_follow_thread_started = true; - } -} - -/******************************************************************** -Starts InnoDB and creates a new database if database files -are not found and the user wants. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -innobase_start_or_create_for_mysql() -{ - bool create_new_db; - lsn_t flushed_lsn; -#ifdef UNIV_LOG_ARCHIVE - lsn_t min_arch_log_no = LSN_MAX; - lsn_t max_arch_log_no = LSN_MAX; -#endif /* UNIV_LOG_ARCHIVE */ - ulint sum_of_new_sizes; - dberr_t err; - unsigned i; - ulint srv_n_log_files_found = srv_n_log_files; - ulint io_limit; - mtr_t mtr; - ib_bh_t* ib_bh; - ulint n_recovered_trx; - char logfilename[10000]; - char* logfile0 = NULL; - size_t dirnamelen; - bool sys_datafiles_created = false; - - /* Check that os_fast_mutexes work as expected */ - os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex); - - ut_a(0 == os_fast_mutex_trylock(&srv_os_test_mutex)); - - os_fast_mutex_unlock(&srv_os_test_mutex); - - os_fast_mutex_lock(&srv_os_test_mutex); - - os_fast_mutex_unlock(&srv_os_test_mutex); - - os_fast_mutex_free(&srv_os_test_mutex); - - /* This should be initialized early */ - ut_init_timer(); - - if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { - srv_read_only_mode = 1; - } - - high_level_read_only = srv_read_only_mode - || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO; - - if (srv_read_only_mode) { - ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode"); - } - -#ifdef HAVE_DARWIN_THREADS -# ifdef F_FULLFSYNC - /* This executable has been compiled on Mac OS X 10.3 or later. - Assume that F_FULLFSYNC is available at run-time. */ - srv_have_fullfsync = TRUE; -# else /* F_FULLFSYNC */ - /* This executable has been compiled on Mac OS X 10.2 - or earlier. Determine if the executable is running - on Mac OS X 10.3 or later. */ - struct utsname utsname; - if (uname(&utsname)) { - ut_print_timestamp(stderr); - fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr); - } else { - srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0; - } - if (!srv_have_fullfsync) { - ut_print_timestamp(stderr); - fputs(" InnoDB: On Mac OS X, fsync() may be " - "broken on internal drives,\n", stderr); - ut_print_timestamp(stderr); - fputs(" InnoDB: making transactions unsafe!\n", stderr); - } -# endif /* F_FULLFSYNC */ -#endif /* HAVE_DARWIN_THREADS */ - - ib_logf(IB_LOG_LEVEL_INFO, - "Using %s to ref count buffer pool pages", -#ifdef PAGE_ATOMIC_REF_COUNT - "atomics" -#else - "mutexes" -#endif /* PAGE_ATOMIC_REF_COUNT */ - ); - - compile_time_assert(sizeof(ulint) == sizeof(void*)); - - /* If stacktrace is used we set up signal handler for SIGUSR2 signal - here. If signal handler set fails we report that and disable - stacktrace feature. */ - - if (srv_use_stacktrace) { -#if defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS - struct sigaction sigact; - - sigact.sa_sigaction = os_stacktrace_print; - sigact.sa_flags = SA_RESTART | SA_SIGINFO; - - if (sigaction(SIGUSR2, &sigact, (struct sigaction *)NULL) != 0) - { - fprintf(stderr, " InnoDB:error setting signal handler for %d (%s)\n", - SIGUSR2, strsignal(SIGUSR2)); - srv_use_stacktrace = FALSE; - - } -#endif /* defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS */ - } - -#ifdef UNIV_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!"); -#endif - -#ifdef UNIV_IBUF_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!"); -# ifdef UNIV_IBUF_COUNT_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on " - "!!!!!!!!!"); - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG"); -# endif -#endif - -#ifdef UNIV_BLOB_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n" - "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG"); -#endif /* UNIV_BLOB_DEBUG */ - -#ifdef UNIV_SYNC_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!"); -#endif - -#ifdef UNIV_SEARCH_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!"); -#endif - -#ifdef UNIV_LOG_LSN_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!"); -#endif /* UNIV_LOG_LSN_DEBUG */ -#ifdef UNIV_MEM_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!"); -#endif - - if (srv_use_sys_malloc) { - ib_logf(IB_LOG_LEVEL_INFO, - "The InnoDB memory heap is disabled"); - } - -#if defined(COMPILER_HINTS_ENABLED) - ib_logf(IB_LOG_LEVEL_INFO, - " InnoDB: Compiler hints enabled."); -#endif /* defined(COMPILER_HINTS_ENABLED) */ - - ib_logf(IB_LOG_LEVEL_INFO, - "" IB_ATOMICS_STARTUP_MSG ""); - - ib_logf(IB_LOG_LEVEL_INFO, - "" IB_MEMORY_BARRIER_STARTUP_MSG ""); - -#ifndef HAVE_MEMORY_BARRIER -#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__ -#else - ib_logf(IB_LOG_LEVEL_WARN, - "MySQL was built without a memory barrier capability on this" - " architecture, which might allow a mutex/rw_lock violation" - " under high thread concurrency. This may cause a hang."); -#endif /* IA32 or AMD64 */ -#endif /* HAVE_MEMORY_BARRIER */ - - ib_logf(IB_LOG_LEVEL_INFO, - "Compressed tables use zlib " ZLIB_VERSION -#ifdef UNIV_ZIP_DEBUG - " with validation" -#endif /* UNIV_ZIP_DEBUG */ - ); -#ifdef UNIV_ZIP_COPY - ib_logf(IB_LOG_LEVEL_INFO, "and extra copying"); -#endif /* UNIV_ZIP_COPY */ - - - /* Since InnoDB does not currently clean up all its internal data - structures in MySQL Embedded Server Library server_end(), we - print an error message if someone tries to start up InnoDB a - second time during the process lifetime. */ - - if (srv_start_has_been_called) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: startup called second time " - "during the process\n"); - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded " - "Server Library you\n"); - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: cannot call server_init() more " - "than once during the\n"); - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: process lifetime.\n"); - } - - srv_start_has_been_called = TRUE; - -#ifdef UNIV_DEBUG - log_do_write = TRUE; -#endif /* UNIV_DEBUG */ - /* yydebug = TRUE; */ - - srv_is_being_started = TRUE; - srv_startup_is_before_trx_rollback_phase = TRUE; - -#ifdef __WIN__ - srv_use_native_aio = TRUE; - -#elif defined(LINUX_NATIVE_AIO) - - if (srv_use_native_aio) { - ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO"); - } -#else - /* Currently native AIO is supported only on windows and linux - and that also when the support is compiled in. In all other - cases, we ignore the setting of innodb_use_native_aio. */ - srv_use_native_aio = FALSE; -#endif /* __WIN__ */ - - if (srv_file_flush_method_str == NULL) { - /* These are the default options */ - - srv_unix_file_flush_method = SRV_UNIX_FSYNC; - - srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; - } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) { - srv_unix_file_flush_method = SRV_UNIX_FSYNC; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) { - srv_unix_file_flush_method = SRV_UNIX_O_DSYNC; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { - srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) { - srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) { - srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { - srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) { - srv_unix_file_flush_method = SRV_UNIX_NOSYNC; -#ifdef _WIN32 - } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) { - srv_win_file_flush_method = SRV_WIN_IO_NORMAL; - srv_use_native_aio = FALSE; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { - srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; - srv_use_native_aio = FALSE; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, - "async_unbuffered")) { - srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; - srv_use_native_aio = TRUE; -#endif /* __WIN__ */ - } else { - ib_logf(IB_LOG_LEVEL_ERROR, - "Unrecognized value %s for innodb_flush_method", - srv_file_flush_method_str); - return(DB_ERROR); - } - - /* Note that the call srv_boot() also changes the values of - some variables to the units used by InnoDB internally */ - - /* Set the maximum number of threads which can wait for a semaphore - inside InnoDB: this is the 'sync wait array' size, as well as the - maximum number of threads that can wait in the 'srv_conc array' for - their time to enter InnoDB. */ - -#define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024) - srv_max_n_threads = 1 /* io_ibuf_thread */ - + 1 /* io_log_thread */ - + 1 /* lock_wait_timeout_thread */ - + 1 /* srv_error_monitor_thread */ - + 1 /* srv_monitor_thread */ - + 1 /* srv_master_thread */ - + 1 /* srv_redo_log_follow_thread */ - + 1 /* srv_purge_coordinator_thread */ - + 1 /* buf_dump_thread */ - + 1 /* dict_stats_thread */ - + 1 /* fts_optimize_thread */ - + 1 /* recv_writer_thread */ - + 1 /* buf_flush_page_cleaner_thread */ - + 1 /* trx_rollback_or_clean_all_recovered */ - + 128 /* added as margin, for use of - InnoDB Memcached etc. */ - + max_connections - + srv_n_read_io_threads - + srv_n_write_io_threads - + srv_n_purge_threads - /* FTS Parallel Sort */ - + fts_sort_pll_degree * FTS_NUM_AUX_INDEX - * max_connections; - - if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) { - /* If buffer pool is less than 1 GB, - use only one buffer pool instance */ - srv_buf_pool_instances = 1; - } - - srv_boot(); - - ib_logf(IB_LOG_LEVEL_INFO, ut_crc32_implementation); - - if (!srv_read_only_mode) { - - mutex_create(srv_monitor_file_mutex_key, - &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK); - - if (srv_innodb_status) { - - srv_monitor_file_name = static_cast<char*>( - mem_alloc( - strlen(fil_path_to_mysql_datadir) - + 20 + sizeof "/innodb_status.")); - - sprintf(srv_monitor_file_name, "%s/innodb_status.%lu", - fil_path_to_mysql_datadir, - os_proc_get_number()); - - srv_monitor_file = fopen(srv_monitor_file_name, "w+"); - - if (!srv_monitor_file) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Unable to create %s: %s", - srv_monitor_file_name, - strerror(errno)); - - return(DB_ERROR); - } - } else { - srv_monitor_file_name = NULL; - srv_monitor_file = os_file_create_tmpfile(NULL); - - if (!srv_monitor_file) { - return(DB_ERROR); - } - } - - mutex_create(srv_dict_tmpfile_mutex_key, - &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION); - - srv_dict_tmpfile = os_file_create_tmpfile(NULL); - - if (!srv_dict_tmpfile) { - return(DB_ERROR); - } - - mutex_create(srv_misc_tmpfile_mutex_key, - &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH); - - srv_misc_tmpfile = os_file_create_tmpfile(NULL); - - if (!srv_misc_tmpfile) { - return(DB_ERROR); - } - } - - /* If user has set the value of innodb_file_io_threads then - we'll emit a message telling the user that this parameter - is now deprecated. */ - if (srv_n_file_io_threads != 4) { - ib_logf(IB_LOG_LEVEL_WARN, - "innodb_file_io_threads is deprecated. Please use " - "innodb_read_io_threads and innodb_write_io_threads " - "instead"); - } - - /* Now overwrite the value on srv_n_file_io_threads */ - srv_n_file_io_threads = srv_n_read_io_threads; - - if (!srv_read_only_mode) { - /* Add the log and ibuf IO threads. */ - srv_n_file_io_threads += 2; - srv_n_file_io_threads += srv_n_write_io_threads; - } else { - ib_logf(IB_LOG_LEVEL_INFO, - "Disabling background IO write threads."); - - srv_n_write_io_threads = 0; - } - - ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS); - - io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD; - - /* On Windows when using native aio the number of aio requests - that a thread can handle at a given time is limited to 32 - i.e.: SRV_N_PENDING_IOS_PER_THREAD */ -# ifdef __WIN__ - if (srv_use_native_aio) { - io_limit = SRV_N_PENDING_IOS_PER_THREAD; - } -# endif /* __WIN__ */ - - if (!os_aio_init(io_limit, - srv_n_read_io_threads, - srv_n_write_io_threads, - SRV_MAX_N_PENDING_SYNC_IOS)) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Fatal : Cannot initialize AIO sub-system"); -#if defined(LINUX_NATIVE_AIO) - ib_logf(IB_LOG_LEVEL_INFO, - "You can try increasing system fs.aio-max-nr to 1048576 " - "or larger or setting innodb_use_native_aio = 0 in my.cnf"); -#endif - - return(DB_ERROR); - } - - fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files); - - double size; - char unit; - - if (srv_buf_pool_size >= 1024 * 1024 * 1024) { - size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024); - unit = 'G'; - } else { - size = ((double) srv_buf_pool_size) / (1024 * 1024); - unit = 'M'; - } - - /* Print time to initialize the buffer pool */ - ib_logf(IB_LOG_LEVEL_INFO, - "Initializing buffer pool, size = %.1f%c", size, unit); - - err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances); - - if (err != DB_SUCCESS) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot allocate memory for the buffer pool"); - - return(DB_ERROR); - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Completed initialization of buffer pool"); - -#ifdef UNIV_DEBUG - /* We have observed deadlocks with a 5MB buffer pool but - the actual lower limit could very well be a little higher. */ - - if (srv_buf_pool_size <= 5 * 1024 * 1024) { - - ib_logf(IB_LOG_LEVEL_INFO, - "Small buffer pool size (%luM), the flst_validate() " - "debug function can cause a deadlock if the " - "buffer pool fills up.", - srv_buf_pool_size / 1024 / 1024); - } -#endif /* UNIV_DEBUG */ - - fsp_init(); - log_init(); - log_online_init(); - - lock_sys_create(srv_lock_table_size); - - /* Create i/o-handler threads: */ - - for (i = 0; i < srv_n_file_io_threads; ++i) { - - n[i] = i; - - thread_handles[i] = os_thread_create(io_handler_thread, n + i, thread_ids + i); - thread_started[i] = true; - } - - if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE - >= 512ULL * 1024ULL * 1024ULL * 1024ULL) { - /* log_block_convert_lsn_to_no() limits the returned block - number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512 - bytes, then we have a limit of 512 GB. If that limit is to - be raised, then log_block_convert_lsn_to_no() must be - modified. */ - ib_logf(IB_LOG_LEVEL_ERROR, - "Combined size of log files must be < 512 GB"); - - return(DB_ERROR); - } - - if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) { - /* fil_io() takes ulint as an argument and we are passing - (next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf(). - So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX. - So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This - means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which - is 64 TB on 32 bit systems. */ - fprintf(stderr, - " InnoDB: Error: combined size of log files" - " must be < %lu GB\n", - ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE); - - return(DB_ERROR); - } - - sum_of_new_sizes = 0; - - for (i = 0; i < srv_n_data_files; i++) { -#ifndef __WIN__ - if (sizeof(off_t) < 5 - && srv_data_file_sizes[i] - >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: file size must be < 4 GB" - " with this MySQL binary\n"); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: and operating system combination," - " in some OS's < 2 GB\n"); - - return(DB_ERROR); - } -#endif - sum_of_new_sizes += srv_data_file_sizes[i]; - } - - if (!srv_auto_extend_last_data_file && sum_of_new_sizes < 640) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Combined size in innodb_data_file_path" - " must be at least %u MiB", - 640 >> (20 - UNIV_PAGE_SIZE_SHIFT)); - - return(DB_ERROR); - } - - recv_sys_create(); - recv_sys_init(buf_pool_get_curr_size()); - - err = open_or_create_data_files(&create_new_db, -#ifdef UNIV_LOG_ARCHIVE - &min_arch_log_no, &max_arch_log_no, -#endif /* UNIV_LOG_ARCHIVE */ - &flushed_lsn, - &sum_of_new_sizes); - if (err == DB_FAIL) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "The system tablespace must be writable!"); - - return(DB_ERROR); - - } else if (err != DB_SUCCESS) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Could not open or create the system tablespace. If " - "you tried to add new data files to the system " - "tablespace, and it failed here, you should now " - "edit innodb_data_file_path in my.cnf back to what " - "it was, and remove the new ibdata files InnoDB " - "created in this failed attempt. InnoDB only wrote " - "those files full of zeros, but did not yet use " - "them in any way. But be careful: do not remove " - "old data files which contain your precious data!"); - - return(err); - } - -#ifdef UNIV_LOG_ARCHIVE - srv_normalize_path_for_win(srv_arch_dir); -#endif /* UNIV_LOG_ARCHIVE */ - - dirnamelen = strlen(srv_log_group_home_dir); - ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile"); - memcpy(logfilename, srv_log_group_home_dir, dirnamelen); - - /* Add a path separator if needed. */ - if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) { - logfilename[dirnamelen++] = SRV_PATH_SEPARATOR; - } - - srv_log_file_size_requested = srv_log_file_size; - - if (create_new_db) { - bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL); - ut_a(success); - - flushed_lsn = log_get_lsn(); - - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - - err = create_log_files(create_new_db, logfilename, dirnamelen, - flushed_lsn, logfile0); - - if (err != DB_SUCCESS) { - return(err); - } - } else { - ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug); - - for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) { - os_offset_t size; - os_file_stat_t stat_info; - - sprintf(logfilename + dirnamelen, - "ib_logfile%u", i); - - err = os_file_get_status( - logfilename, &stat_info, false); - - if (err == DB_NOT_FOUND) { - if (i == 0) { - - if (flushed_lsn < (lsn_t) 1000) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot create" - " log files because" - " data files are" - " corrupt or the" - " database was not" - " shut down cleanly" - " after creating" - " the data files."); - return(DB_ERROR); - } - - err = create_log_files( - create_new_db, logfilename, - dirnamelen, flushed_lsn, - logfile0); - - if (err == DB_SUCCESS) { - err = create_log_files_rename( - logfilename, - dirnamelen, - flushed_lsn, - logfile0); - } - - if (err != DB_SUCCESS) { - return(err); - } - - /* Suppress the message about - crash recovery. */ - flushed_lsn = log_get_lsn(); - goto files_checked; - } else if (i < 2 && !IS_XTRABACKUP()) { - /* must have at least 2 log files */ - ib_logf(IB_LOG_LEVEL_ERROR, - "Only one log file found."); - return(err); - } - - /* opened all files */ - break; - } - - if (!srv_file_check_mode(logfilename)) { - return(DB_ERROR); - } - - err = open_log_file(&files[i], logfilename, &size); - - if (err != DB_SUCCESS) { - return(err); - } - - ut_a(size != (os_offset_t) -1); - - if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Log file %s size " - UINT64PF " is not a multiple of" - " innodb_page_size", - logfilename, size); - return(DB_ERROR); - } - - size >>= UNIV_PAGE_SIZE_SHIFT; - - if (i == 0) { - srv_log_file_size = size; - } else if (size != srv_log_file_size) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Log file %s is" - " of different size " UINT64PF " bytes" - " than other log" - " files " UINT64PF " bytes!", - logfilename, - size << UNIV_PAGE_SIZE_SHIFT, - (os_offset_t) srv_log_file_size - << UNIV_PAGE_SIZE_SHIFT); - return(DB_ERROR); - } - } - - srv_n_log_files_found = i; - - /* Create the in-memory file space objects. */ - - sprintf(logfilename + dirnamelen, "ib_logfile%u", 0); - - fil_space_create(logfilename, - SRV_LOG_SPACE_FIRST_ID, 0, FIL_LOG, - NULL /* no encryption yet */, - true /* create */); - - ut_a(fil_validate()); - - /* srv_log_file_size is measured in pages; if page size is 16KB, - then we have a limit of 64TB on 32 bit systems */ - ut_a(srv_log_file_size <= ULINT_MAX); - - for (unsigned j = 0; j < i; j++) { - sprintf(logfilename + dirnamelen, "ib_logfile%u", j); - - if (!fil_node_create(logfilename, - (ulint) srv_log_file_size, - SRV_LOG_SPACE_FIRST_ID, FALSE)) { - return(DB_ERROR); - } - } - -#ifdef UNIV_LOG_ARCHIVE - /* Create the file space object for archived logs. Under - MySQL, no archiving ever done. */ - fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1, - 0, FIL_LOG, NULL /* no encryption yet */, - true /* create */); -#endif /* UNIV_LOG_ARCHIVE */ - log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE, - SRV_LOG_SPACE_FIRST_ID, - SRV_LOG_SPACE_FIRST_ID + 1); - } - -files_checked: - /* Open all log files and data files in the system - tablespace: we keep them open until database - shutdown */ - - fil_open_log_and_system_tablespace_files(); - - err = srv_undo_tablespaces_init( - create_new_db, - FALSE, - srv_undo_tablespaces, - &srv_undo_tablespaces_open); - - /* If the force recovery is set very high then we carry on regardless - of all errors. Basically this is fingers crossed mode. */ - - if (err != DB_SUCCESS - && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) { - - return(err); - } - - /* Initialize objects used by dict stats gathering thread, which - can also be used by recovery if it tries to drop some table */ - if (!srv_read_only_mode) { - dict_stats_thread_init(); - } - - trx_sys_file_format_init(); - - trx_sys_create(); - - if (create_new_db) { - ut_a(!srv_read_only_mode); - init_log_online(); - - mtr_start(&mtr); - - fsp_header_init(0, sum_of_new_sizes, &mtr); - compile_time_assert(TRX_SYS_SPACE == 0); - compile_time_assert(IBUF_SPACE_ID == 0); - - ulint ibuf_root = btr_create( - DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, - 0, 0, DICT_IBUF_ID_MIN, - dict_ind_redundant, &mtr); - - mtr_commit(&mtr); - - if (ibuf_root == FIL_NULL) { - return(srv_init_abort(true, __FILE__, __LINE__, - DB_ERROR)); - } - - ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO); - - /* To maintain backward compatibility we create only - the first rollback segment before the double write buffer. - All the remaining rollback segments will be created later, - after the double write buffer has been created. */ - trx_sys_create_sys_pages(); - - ib_bh = trx_sys_init_at_db_start(); - n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); - - /* The purge system needs to create the purge view and - therefore requires that the trx_sys is inited. */ - - trx_purge_sys_create(srv_n_purge_threads, ib_bh); - - err = dict_create(); - - if (err != DB_SUCCESS) { - return(err); - } - - srv_startup_is_before_trx_rollback_phase = FALSE; - - bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL); - ut_a(success); - - flushed_lsn = log_get_lsn(); - - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - - /* Stamp the LSN to the data files. */ - err = fil_write_flushed_lsn(flushed_lsn); - - if (err != DB_SUCCESS) { - return(err); - } - - err = create_log_files_rename(logfilename, dirnamelen, - flushed_lsn, logfile0); - - if (err != DB_SUCCESS) { - return(err); - } - } else { - - /* Check if we support the max format that is stamped - on the system tablespace. - Note: We are NOT allowed to make any modifications to - the TRX_SYS_PAGE_NO page before recovery because this - page also contains the max_trx_id etc. important system - variables that are required for recovery. We need to - ensure that we return the system to a state where normal - recovery is guaranteed to work. We do this by - invalidating the buffer cache, this will force the - reread of the page and restoration to its last known - consistent state, this is REQUIRED for the recovery - process to work. */ - err = trx_sys_file_format_max_check( - srv_max_file_format_at_startup); - - if (err != DB_SUCCESS) { - return(err); - } - - /* Invalidate the buffer pool to ensure that we reread - the page that we read above, during recovery. - Note that this is not as heavy weight as it seems. At - this point there will be only ONE page in the buf_LRU - and there must be no page in the buf_flush list. */ - buf_pool_invalidate(); - - /* Start monitor thread early enough so that e.g. crash - recovery failing to find free pages in the buffer pool is - diagnosed. */ - if (!srv_read_only_mode) - { - /* Create the thread which prints InnoDB monitor - info */ - srv_monitor_active = true; - thread_handles[4 + SRV_MAX_N_IO_THREADS] = - os_thread_create( - srv_monitor_thread, - NULL, - thread_ids + 4 + SRV_MAX_N_IO_THREADS); - - thread_started[4 + SRV_MAX_N_IO_THREADS] = true; - } - - /* We always try to do a recovery, even if the database had - been shut down normally: this is the normal startup path */ - - err = recv_recovery_from_checkpoint_start( - LOG_CHECKPOINT, LSN_MAX, - flushed_lsn); - - if (err != DB_SUCCESS) { - return(err); - } - - init_log_online(); - - /* Initialize the change buffer. */ - err = dict_boot(); - - if (err != DB_SUCCESS) { - return(err); - } - - /* This must precede recv_apply_hashed_log_recs(true). */ - ib_bh = trx_sys_init_at_db_start(); - - if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - /* Apply the hashed log records to the - respective file pages, for the last batch of - recv_group_scan_log_recs(). */ - - recv_apply_hashed_log_recs(true); - - if (recv_sys->found_corrupt_log) { - return (DB_CORRUPTION); - } - - DBUG_PRINT("ib_log", ("apply completed")); - } - - if (!srv_read_only_mode) { - const ulint flags = FSP_FLAGS_PAGE_SSIZE(); - for (ulint id = 0; id <= srv_undo_tablespaces; id++) { - if (fil_space_get(id)) { - fsp_flags_try_adjust(id, flags); - } - } - - if (sum_of_new_sizes > 0) { - /* New data file(s) were added */ - mtr_start(&mtr); - fsp_header_inc_size(0, sum_of_new_sizes, &mtr); - mtr_commit(&mtr); - /* Immediately write the log record about - increased tablespace size to disk, so that it - is durable even if mysqld would crash - quickly */ - log_buffer_flush_to_disk(); - } - } - - const ulint tablespace_size_in_header - = fsp_header_get_tablespace_size(); - -#ifdef UNIV_DEBUG - /* buf_debug_prints = TRUE; */ -#endif /* UNIV_DEBUG */ - ulint sum_of_data_file_sizes = 0; - - for (ulint d = 0; d < srv_n_data_files; d++) { - sum_of_data_file_sizes += srv_data_file_sizes[d]; - } - - /* Compare the system tablespace file size to what is - stored in FSP_SIZE. In open_or_create_data_files() - we already checked that the file sizes match the - innodb_data_file_path specification. */ - if (srv_read_only_mode - || sum_of_data_file_sizes == tablespace_size_in_header) { - /* Do not complain about the size. */ - } else if (!srv_auto_extend_last_data_file - || sum_of_data_file_sizes - < tablespace_size_in_header) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespace size stored in header is " ULINTPF - " pages, but the sum of data file sizes is " - ULINTPF " pages", - tablespace_size_in_header, - sum_of_data_file_sizes); - - if (srv_force_recovery == 0 - && sum_of_data_file_sizes - < tablespace_size_in_header) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot start InnoDB. The tail of" - " the system tablespace is" - " missing. Have you edited" - " innodb_data_file_path in my.cnf" - " in an inappropriate way, removing" - " data files from there?" - " You can set innodb_force_recovery=1" - " in my.cnf to force" - " a startup if you are trying to" - " recover a badly corrupt database."); - - return(DB_ERROR); - } - } - - n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); - - /* The purge system needs to create the purge view and - therefore requires that the trx_sys is inited. */ - - trx_purge_sys_create(srv_n_purge_threads, ib_bh); - - /* recv_recovery_from_checkpoint_finish needs trx lists which - are initialized in trx_sys_init_at_db_start(). */ - - recv_recovery_from_checkpoint_finish(); - - if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { - /* The following call is necessary for the insert - buffer to work with multiple tablespaces. We must - know the mapping between space id's and .ibd file - names. - - In a crash recovery, we check that the info in data - dictionary is consistent with what we already know - about space id's from the call of - fil_load_single_table_tablespaces(). - - In a normal startup, we create the space objects for - every table in the InnoDB data dictionary that has - an .ibd file. - - We also determine the maximum tablespace id used. */ - dict_check_t dict_check; - - if (recv_needed_recovery) { - dict_check = DICT_CHECK_ALL_LOADED; - } else if (n_recovered_trx) { - dict_check = DICT_CHECK_SOME_LOADED; - } else { - dict_check = DICT_CHECK_NONE_LOADED; - } - - /* Create the SYS_TABLESPACES and SYS_DATAFILES system table */ - err = dict_create_or_check_sys_tablespace(); - if (err != DB_SUCCESS) { - return(err); - } - - sys_datafiles_created = true; - - /* This function assumes that SYS_DATAFILES exists */ - dict_check_tablespaces_and_store_max_id(dict_check); - } - - if (IS_XTRABACKUP() - && !srv_backup_mode - && srv_read_only_mode - && srv_log_file_size_requested != srv_log_file_size) { - - ib_logf(IB_LOG_LEVEL_WARN, - "Log files size mismatch, ignored in readonly mode"); - srv_log_file_size_requested = srv_log_file_size; - } - - - if (!srv_force_recovery - && !recv_sys->found_corrupt_log - && (srv_log_file_size_requested != srv_log_file_size - || srv_n_log_files_found != srv_n_log_files)) { - /* Prepare to replace the redo log files. */ - - if (srv_read_only_mode) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot resize log files " - "in read-only mode."); - return(DB_READ_ONLY); - } - - /* Clean the buffer pool. */ - bool success = buf_flush_list( - ULINT_MAX, LSN_MAX, NULL); - ut_a(success); - - DBUG_EXECUTE_IF("innodb_log_abort_1", - return(DB_ERROR);); - - flushed_lsn = log_get_lsn(); - - ib_logf(IB_LOG_LEVEL_WARN, - "Resizing redo log from %u*%u to %u*%u pages" - ", LSN=" LSN_PF, - (unsigned) i, - (unsigned) srv_log_file_size, - (unsigned) srv_n_log_files, - (unsigned) srv_log_file_size_requested, - flushed_lsn); - - buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); - - /* Flush the old log files. */ - log_buffer_flush_to_disk(); - /* If innodb_flush_method=O_DSYNC, - we need to explicitly flush the log buffers. */ - fil_flush(SRV_LOG_SPACE_FIRST_ID); - - ut_ad(flushed_lsn == log_get_lsn()); - - /* Prohibit redo log writes from any other - threads until creating a log checkpoint at the - end of create_log_files(). */ - ut_d(recv_no_log_write = TRUE); - ut_ad(!buf_pool_check_no_pending_io()); - - DBUG_EXECUTE_IF("innodb_log_abort_3", - return(DB_ERROR);); - - /* Stamp the LSN to the data files. */ - err = fil_write_flushed_lsn(flushed_lsn); - - DBUG_EXECUTE_IF("innodb_log_abort_4", err = DB_ERROR;); - - if (err != DB_SUCCESS) { - return(err); - } - - /* Close and free the redo log files, so that - we can replace them. */ - fil_close_log_files(true); - - DBUG_EXECUTE_IF("innodb_log_abort_5", - return(DB_ERROR);); - - /* Free the old log file space. */ - log_group_close_all(); - - ib_logf(IB_LOG_LEVEL_WARN, - "Starting to delete and rewrite log files."); - - srv_log_file_size = srv_log_file_size_requested; - - err = create_log_files(create_new_db, logfilename, - dirnamelen, flushed_lsn, - logfile0); - - if (err != DB_SUCCESS) { - return(err); - } - - err = create_log_files_rename(logfilename, dirnamelen, - log_get_lsn(), logfile0); - - if (err != DB_SUCCESS) { - return(err); - } - } - - recv_recovery_rollback_active(); - srv_startup_is_before_trx_rollback_phase = FALSE; - - /* It is possible that file_format tag has never - been set. In this case we initialize it to minimum - value. Important to note that we can do it ONLY after - we have finished the recovery process so that the - image of TRX_SYS_PAGE_NO is not stale. */ - trx_sys_file_format_tag_init(); - } - - ut_ad(err == DB_SUCCESS); - ut_a(sum_of_new_sizes != ULINT_UNDEFINED); - -#ifdef UNIV_LOG_ARCHIVE - if (!srv_read_only_mode) { - if (!srv_log_archive_on) { - ut_a(DB_SUCCESS == log_archive_noarchivelog()); - } else { - bool start_archive; - - mutex_enter(&(log_sys->mutex)); - - start_archive = false; - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - start_archive = true; - } - - mutex_exit(&(log_sys->mutex)); - - if (start_archive) { - ut_a(DB_SUCCESS == log_archive_archivelog()); - } - } - } -#endif /* UNIV_LOG_ARCHIVE */ - - /* fprintf(stderr, "Max allowed record size %lu\n", - page_get_free_space_of_empty() / 2); */ - - if (!buf_dblwr_create()) { - return(srv_init_abort(create_new_db, __FILE__, __LINE__, - DB_ERROR)); - } - - /* Here the double write buffer has already been created and so - any new rollback segments will be allocated after the double - write buffer. The default segment should already exist. - We create the new segments only if it's a new database or - the database was shutdown cleanly. */ - - /* Note: When creating the extra rollback segments during an upgrade - we violate the latching order, even if the change buffer is empty. - We make an exception in sync0sync.cc and check srv_is_being_started - for that violation. It cannot create a deadlock because we are still - running in single threaded mode essentially. Only the IO threads - should be running at this stage. */ - - ut_a(srv_undo_logs > 0); - ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS); - - /* The number of rsegs that exist in InnoDB is given by status - variable srv_available_undo_logs. The number of rsegs to use can - be set using the dynamic global variable srv_undo_logs. */ - - srv_available_undo_logs = trx_sys_create_rsegs( - srv_undo_tablespaces, srv_undo_logs); - - if (srv_available_undo_logs == ULINT_UNDEFINED) { - /* Can only happen if server is read only. */ - ut_a(srv_read_only_mode); - srv_undo_logs = ULONG_UNDEFINED; - } else if (srv_available_undo_logs < srv_undo_logs) { - /* Should due to out of file space. */ - return (srv_init_abort(create_new_db, __FILE__, __LINE__, DB_ERROR)); - } - - if (!srv_read_only_mode) { - /* Create the thread which watches the timeouts - for lock waits */ - thread_handles[2 + SRV_MAX_N_IO_THREADS] = os_thread_create( - lock_wait_timeout_thread, - NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS); - thread_started[2 + SRV_MAX_N_IO_THREADS] = true; - lock_sys->timeout_thread_active = true; - - /* Create the thread which warns of long semaphore waits */ - srv_error_monitor_active = true; - thread_handles[3 + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_error_monitor_thread, - NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS); - thread_started[3 + SRV_MAX_N_IO_THREADS] = true; - - /* Create the thread which prints InnoDB monitor info */ - if (!thread_started[4 + SRV_MAX_N_IO_THREADS]) { - /* srv_monitor_thread not yet started */ - srv_monitor_active = true; - thread_handles[4 + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_monitor_thread, - NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS); - thread_started[4 + SRV_MAX_N_IO_THREADS] = true; - } - } - - /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */ - err = dict_create_or_check_foreign_constraint_tables(); - if (err != DB_SUCCESS) { - return(err); - } - - /* Create the SYS_TABLESPACES and SYS_DATAFILES system tables if we - have not done that already on crash recovery. */ - if (sys_datafiles_created == false) { - err = dict_create_or_check_sys_tablespace(); - if (err != DB_SUCCESS) { - return(err); - } - } - - srv_is_being_started = FALSE; - - ut_a(trx_purge_state() == PURGE_STATE_INIT); - - /* Create the master thread which does purge and other utility - operations */ - - if (!srv_read_only_mode) { - - thread_handles[1 + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_master_thread, - NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS)); - thread_started[1 + SRV_MAX_N_IO_THREADS] = true; - - srv_undo_sources = true; - /* Create the dict stats gathering thread */ - srv_dict_stats_thread_active = true; - dict_stats_thread_handle = os_thread_create( - dict_stats_thread, NULL, NULL); - dict_stats_thread_started = true; - - /* Create the thread that will optimize the FTS sub-system. */ - fts_optimize_init(); - } - - if (!srv_read_only_mode - && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { - - thread_handles[6 + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_purge_coordinator_thread, - NULL, thread_ids + 6 + SRV_MAX_N_IO_THREADS); - - thread_started[6 + SRV_MAX_N_IO_THREADS] = true; - - ut_a(UT_ARR_SIZE(thread_ids) - > 6 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS); - - /* We've already created the purge coordinator thread above. */ - for (i = 1; i < srv_n_purge_threads; ++i) { - thread_handles[6 + i + SRV_MAX_N_IO_THREADS] = os_thread_create( - srv_worker_thread, NULL, - thread_ids + 6 + i + SRV_MAX_N_IO_THREADS); - thread_started[6 + i + SRV_MAX_N_IO_THREADS] = true; - } - - srv_start_wait_for_purge_to_start(); - - } else { - purge_sys->state = PURGE_STATE_DISABLED; - } - - if (!srv_read_only_mode) { - - if (srv_use_mtflush) { - /* Start multi-threaded flush threads */ - mtflush_ctx = buf_mtflu_handler_init( - srv_mtflush_threads, - srv_buf_pool_instances); - - /* Set up the thread ids */ - buf_mtflu_set_thread_ids( - srv_mtflush_threads, - mtflush_ctx, - (thread_ids + 6 + SRV_MAX_N_PURGE_THREADS)); - } - - - buf_page_cleaner_is_active = true; - buf_flush_page_cleaner_thread_handle = os_thread_create( - buf_flush_page_cleaner_thread, NULL, NULL); - buf_flush_page_cleaner_thread_started = true; - - buf_lru_manager_is_active = true; - buf_flush_lru_manager_thread_handle = os_thread_create( - buf_flush_lru_manager_thread, NULL, NULL); - buf_flush_lru_manager_thread_started = true; - } - - if (!srv_file_per_table && srv_pass_corrupt_table) { - fprintf(stderr, "InnoDB: Warning:" - " The option innodb_file_per_table is disabled," - " so using the option innodb_pass_corrupt_table doesn't make sense.\n"); - } - - if (srv_print_verbose_log) { - ib_logf(IB_LOG_LEVEL_INFO, - " Percona XtraDB (http://www.percona.com) %s started; " - "log sequence number " LSN_PF "", - INNODB_VERSION_STR, srv_start_lsn); - } - - if (srv_force_recovery > 0) { - ib_logf(IB_LOG_LEVEL_INFO, - "!!! innodb_force_recovery is set to %lu !!!", - (ulong) srv_force_recovery); - } - - if (!srv_read_only_mode) { - /* - Create a checkpoint before logging anything new, so that - the current encryption key in use is definitely logged - before any log blocks encrypted with that key. - */ - log_make_checkpoint_at(LSN_MAX, TRUE); - } - - if (srv_force_recovery == 0) { - /* In the insert buffer we may have even bigger tablespace - id's, because we may have dropped those tablespaces, but - insert buffer merge has not had time to clean the records from - the ibuf tree. */ - - ibuf_update_max_tablespace_id(); - } - - if (!srv_read_only_mode) { -#ifdef WITH_WSREP - /* - Create the dump/load thread only when not running with - --wsrep-recover. - */ - if (!wsrep_recovery) { -#endif /* WITH_WSREP */ - /* Create the buffer pool dump/load thread */ - srv_buf_dump_thread_active = true; - buf_dump_thread_handle= - os_thread_create(buf_dump_thread, NULL, NULL); - - buf_dump_thread_started = true; -#ifdef WITH_WSREP - } else { - ib_logf(IB_LOG_LEVEL_WARN, - "Skipping buffer pool dump/restore during " - "wsrep recovery."); - } -#endif /* WITH_WSREP */ - - /* Create thread(s) that handles key rotation */ - fil_system_enter(); - fil_crypt_threads_init(); - fil_system_exit(); - - /* Init data for datafile scrub threads */ - btr_scrub_init(); - - /* Initialize online defragmentation. */ - btr_defragment_init(); - btr_defragment_thread_active = true; - os_thread_create(btr_defragment_thread, NULL, NULL); - } - - srv_was_started = TRUE; - - return(DB_SUCCESS); -} - -#if 0 -/******************************************************************** -Sync all FTS cache before shutdown */ -static -void -srv_fts_close(void) -/*===============*/ -{ - dict_table_t* table; - - for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); - table; table = UT_LIST_GET_NEXT(table_LRU, table)) { - fts_t* fts = table->fts; - - if (fts != NULL) { - fts_sync_table(table); - } - } - - for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); - table; table = UT_LIST_GET_NEXT(table_LRU, table)) { - fts_t* fts = table->fts; - - if (fts != NULL) { - fts_sync_table(table); - } - } -} -#endif - -/** Shut down InnoDB. */ -UNIV_INTERN -void -innodb_shutdown() -{ - ulint i; - - if (!srv_was_started) { - if (srv_is_being_started) { - ib_logf(IB_LOG_LEVEL_WARN, - "Shutting down an improperly started, " - "or created database!"); - } - } - - if (srv_undo_sources) { - ut_ad(!srv_read_only_mode); - /* Shutdown the FTS optimize sub system. */ - fts_optimize_start_shutdown(); - - fts_optimize_end(); - dict_stats_shutdown(); - while (row_get_background_drop_list_len_low()) { - srv_wake_master_thread(); - os_thread_yield(); - } - srv_undo_sources = false; - } - - /* 1. Flush the buffer pool to disk, write the current lsn to - the tablespace header(s), and copy all log data to archive. - The step 1 is the real InnoDB shutdown. The remaining steps 2 - ... - just free data structures after the shutdown. */ - - logs_empty_and_mark_files_at_shutdown(); - - if (srv_conc_get_active_threads() != 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "Query counter shows %ld queries still " - "inside InnoDB at shutdown", - srv_conc_get_active_threads()); - } - - /* 2. Make all threads created by InnoDB to exit */ - - srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS; - - /* All threads end up waiting for certain events. Put those events - to the signaled state. Then the threads will exit themselves after - os_event_wait(). */ - - for (i = 0; i < 1000; i++) { - /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM - HERE OR EARLIER */ - - if (!srv_read_only_mode) { - /* a. Let the lock timeout thread exit */ - os_event_set(lock_sys->timeout_event); - - /* b. srv error monitor thread exits automatically, - no need to do anything here */ - - /* c. We wake the master thread so that it exits */ - srv_wake_master_thread(); - - /* d. Wakeup purge threads. */ - srv_purge_wakeup(); - } - - /* e. Exit the i/o threads */ - - os_aio_wake_all_threads_at_shutdown(); - - /* f. dict_stats_thread is signaled from - logs_empty_and_mark_files_at_shutdown() and should have - already quit or is quitting right now. */ - - if (srv_use_mtflush) { - /* g. Exit the multi threaded flush threads */ - - buf_mtflu_io_thread_exit(); - } - - os_rmb; - if (os_thread_count == 0) { - /* All the threads have exited or are just exiting; - NOTE that the threads may not have completed their - exit yet. Should we use pthread_join() to make sure - they have exited? If we did, we would have to - remove the pthread_detach() from - os_thread_exit(). Now we just sleep 0.1 - seconds and hope that is enough! */ - - os_thread_sleep(100000); - - break; - } - - os_thread_sleep(100000); - } - - if (i == 1000) { - ib_logf(IB_LOG_LEVEL_WARN, - "%lu threads created by InnoDB" - " had not exited at shutdown!", - (ulong) os_thread_count); - } - - if (srv_monitor_file) { - fclose(srv_monitor_file); - srv_monitor_file = 0; - if (srv_monitor_file_name) { - unlink(srv_monitor_file_name); - mem_free(srv_monitor_file_name); - } - } - - if (srv_dict_tmpfile) { - fclose(srv_dict_tmpfile); - srv_dict_tmpfile = 0; - } - - if (srv_misc_tmpfile) { - fclose(srv_misc_tmpfile); - srv_misc_tmpfile = 0; - } - - if (!srv_read_only_mode) { - dict_stats_thread_deinit(); - fil_crypt_threads_cleanup(); - btr_scrub_cleanup(); - btr_defragment_shutdown(); - } - -#ifdef __WIN__ - /* MDEV-361: ha_innodb.dll leaks handles on Windows - MDEV-7403: should not pass recv_writer_thread_handle to - CloseHandle(). - - On Windows we should call CloseHandle() for all - open thread handles. */ - if (os_thread_count == 0) { - for (i = 0; i < SRV_MAX_N_IO_THREADS + 6 + 32; ++i) { - if (thread_started[i]) { - CloseHandle(thread_handles[i]); - } - } - - if (buf_flush_page_cleaner_thread_started) { - CloseHandle(buf_flush_page_cleaner_thread_handle); - } - - if (buf_dump_thread_started) { - CloseHandle(buf_dump_thread_handle); - } - - if (dict_stats_thread_started) { - CloseHandle(dict_stats_thread_handle); - } - - if (buf_flush_lru_manager_thread_started) { - CloseHandle(buf_flush_lru_manager_thread_handle); - } - - if (srv_redo_log_follow_thread_started) { - CloseHandle(srv_redo_log_follow_thread_handle); - } - } -#endif /* __WIN __ */ - - /* This must be disabled before closing the buffer pool - and closing the data dictionary. */ - btr_search_disable(); - - ibuf_close(); - log_online_shutdown(); - log_shutdown(); - trx_sys_file_format_close(); - trx_sys_close(); - lock_sys_close(); - - /* We don't create these mutexes in RO mode because we don't create - the temp files that the cover. */ - if (!srv_read_only_mode) { - mutex_free(&srv_monitor_file_mutex); - mutex_free(&srv_dict_tmpfile_mutex); - mutex_free(&srv_misc_tmpfile_mutex); - } - - dict_close(); - btr_search_sys_free(); - - /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside - them */ - os_aio_free(); - que_close(); - row_mysql_close(); - srv_mon_free(); - srv_free(); - fil_close(); - - /* 4. Free all allocated memory */ - - pars_lexer_close(); - log_mem_free(); - buf_pool_free(srv_buf_pool_instances); - mem_close(); - sync_close(); - - /* ut_free_all_mem() frees all allocated memory not freed yet - in shutdown, and it will also free the ut_list_mutex, so it - should be the last one for all operation */ - ut_free_all_mem(); - - os_rmb; - if (os_thread_count != 0 - || os_event_count != 0 - || os_mutex_count != 0 - || os_fast_mutex_count != 0) { - ib_logf(IB_LOG_LEVEL_WARN, - "Some resources were not cleaned up in shutdown: " - "threads %lu, events %lu, os_mutexes %lu, " - "os_fast_mutexes %lu", - (ulong) os_thread_count, (ulong) os_event_count, - (ulong) os_mutex_count, (ulong) os_fast_mutex_count); - } - - if (dict_foreign_err_file) { - fclose(dict_foreign_err_file); - } - - if (srv_print_verbose_log) { - ib_logf(IB_LOG_LEVEL_INFO, - "Shutdown completed; log sequence number " LSN_PF "", - srv_shutdown_lsn); - } - - srv_was_started = FALSE; - srv_start_has_been_called = FALSE; - /* reset io_tid_i, in case current process does second innodb start (xtrabackup might do that).*/ - io_tid_i = 0; -} -#endif /* !UNIV_HOTBACKUP */ - -/*****************************************************************//** -Get the meta-data filename from the table name. */ -UNIV_INTERN -void -srv_get_meta_data_filename( -/*=======================*/ - dict_table_t* table, /*!< in: table */ - char* filename, /*!< out: filename */ - ulint max_len) /*!< in: filename max length */ -{ - ulint len; - char* path; - char* suffix; - static const ulint suffix_len = strlen(".cfg"); - - if (DICT_TF_HAS_DATA_DIR(table->flags)) { - dict_get_and_save_data_dir_path(table, false); - ut_a(table->data_dir_path); - - path = os_file_make_remote_pathname( - table->data_dir_path, table->name, "cfg"); - } else { - path = fil_make_ibd_name(table->name, false); - } - - ut_a(path); - len = ut_strlen(path); - ut_a(max_len >= len); - - suffix = path + (len - suffix_len); - if (strncmp(suffix, ".cfg", suffix_len) == 0) { - strcpy(filename, path); - } else { - ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0); - - strncpy(filename, path, len - suffix_len); - suffix = filename + (len - suffix_len); - strcpy(suffix, ".cfg"); - } - - mem_free(path); - - srv_normalize_path_for_win(filename); -} diff --git a/storage/xtradb/sync/sync0arr.cc b/storage/xtradb/sync/sync0arr.cc deleted file mode 100644 index 134d16ae58e..00000000000 --- a/storage/xtradb/sync/sync0arr.cc +++ /dev/null @@ -1,1564 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file sync/sync0arr.cc -The wait array used in synchronization primitives - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#include "univ.i" - -#include "sync0arr.h" -#ifdef UNIV_NONINL -#include "sync0arr.ic" -#endif - -#include <mysqld_error.h> -#include <mysql/plugin.h> -#include <hash.h> -#include <myisampack.h> -#include <sql_acl.h> -#include <mysys_err.h> -#include <my_sys.h> -#include "srv0srv.h" -#include "srv0start.h" -#include "i_s.h" -#include <sql_plugin.h> -#include <innodb_priv.h> - -#include "sync0sync.h" -#include "sync0rw.h" -#include "os0sync.h" -#include "os0file.h" -#include "lock0lock.h" -#include "srv0srv.h" -#include "ha_prototypes.h" - -/* - WAIT ARRAY - ========== - -The wait array consists of cells each of which has an -an operating system event object created for it. The threads -waiting for a mutex, for example, can reserve a cell -in the array and suspend themselves to wait for the event -to become signaled. When using the wait array, remember to make -sure that some thread holding the synchronization object -will eventually know that there is a waiter in the array and -signal the object, to prevent infinite wait. -Why we chose to implement a wait array? First, to make -mutexes fast, we had to code our own implementation of them, -which only in usually uncommon cases resorts to using -slow operating system primitives. Then we had the choice of -assigning a unique OS event for each mutex, which would -be simpler, or using a global wait array. In some operating systems, -the global wait array solution is more efficient and flexible, -because we can do with a very small number of OS events, -say 200. In NT 3.51, allocating events seems to be a quadratic -algorithm, because 10 000 events are created fast, but -100 000 events takes a couple of minutes to create. - -As of 5.0.30 the above mentioned design is changed. Since now -OS can handle millions of wait events efficiently, we no longer -have this concept of each cell of wait array having one event. -Instead, now the event that a thread wants to wait on is embedded -in the wait object (mutex or rw_lock). We still keep the global -wait array for the sake of diagnostics and also to avoid infinite -wait The error_monitor thread scans the global wait array to signal -any waiting threads who have missed the signal. */ - -/** A cell where an individual thread may wait suspended -until a resource is released. The suspending is implemented -using an operating system event semaphore. */ -struct sync_cell_t { - void* wait_object; /*!< pointer to the object the - thread is waiting for; if NULL - the cell is free for use */ - void* old_wait_mutex; /*!< the latest regular or priority - wait mutex in cell */ - void* old_wait_rw_lock; - /*!< the latest regular or priority - wait rw-lock in cell */ - ulint request_type; /*!< lock type requested on the - object */ - const char* file; /*!< in debug version file where - requested */ - ulint line; /*!< in debug version line where - requested */ - os_thread_id_t thread; /*!< thread id of this waiting - thread */ - ibool waiting; /*!< TRUE if the thread has already - called sync_array_event_wait - on this cell */ - ib_int64_t signal_count; /*!< We capture the signal_count - of the wait_object when we - reset the event. This value is - then passed on to os_event_wait - and we wait only if the event - has not been signalled in the - period between the reset and - wait call. */ - time_t reservation_time;/*!< time when the thread reserved - the wait cell */ -}; - -/* NOTE: It is allowed for a thread to wait -for an event allocated for the array without owning the -protecting mutex (depending on the case: OS or database mutex), but -all changes (set or reset) to the state of the event must be made -while owning the mutex. */ -/** Synchronization array */ -struct sync_array_t { - ulint n_reserved; /*!< number of currently reserved - cells in the wait array */ - ulint n_cells; /*!< number of cells in the - wait array */ - sync_cell_t* array; /*!< pointer to wait array */ - ib_mutex_t mutex; /*!< possible database mutex - protecting this data structure */ - os_ib_mutex_t os_mutex; /*!< Possible operating system mutex - protecting the data structure. - As this data structure is used in - constructing the database mutex, - to prevent infinite recursion - in implementation, we fall back to - an OS mutex. */ - ulint res_count; /*!< count of cell reservations - since creation of the array */ -}; - -/** User configured sync array size */ -UNIV_INTERN ulong srv_sync_array_size = 32; - -/** Locally stored copy of srv_sync_array_size */ -static ulint sync_array_size; - -/** The global array of wait cells for implementation of the database's own -mutexes and read-write locks */ -static sync_array_t** sync_wait_array; - -/** count of how many times an object has been signalled */ -static ulint sg_count; - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -This function is called only in the debug version. Detects a deadlock -of one or more threads because of waits of semaphores. -@return TRUE if deadlock detected */ -static -ibool -sync_array_detect_deadlock( -/*=======================*/ - sync_array_t* arr, /*!< in: wait array; NOTE! the caller must - own the mutex to array */ - sync_cell_t* start, /*!< in: cell where recursive search started */ - sync_cell_t* cell, /*!< in: cell to search */ - ulint depth); /*!< in: recursion depth */ -#endif /* UNIV_SYNC_DEBUG */ - -/*****************************************************************//** -Gets the nth cell in array. -@return cell */ -sync_cell_t* -sync_array_get_nth_cell( -/*====================*/ - sync_array_t* arr, /*!< in: sync array */ - ulint n) /*!< in: index */ -{ - ut_a(arr); - ut_a(n < arr->n_cells); - - return(arr->array + n); -} - -/******************************************************************//** -Looks for a cell with the given thread id. -@return pointer to cell or NULL if not found */ -static -sync_cell_t* -sync_array_find_thread( -/*===================*/ - sync_array_t* arr, /*!< in: wait array */ - os_thread_id_t thread) /*!< in: thread id */ -{ - ulint i; - sync_cell_t* cell; - - for (i = 0; i < arr->n_cells; i++) { - - cell = sync_array_get_nth_cell(arr, i); - - if (cell->wait_object != NULL - && os_thread_eq(cell->thread, thread)) { - - return(cell); /* Found */ - } - } - - return(NULL); /* Not found */ -} - -/******************************************************************//** -Reserves the mutex semaphore protecting a sync array. */ -static -void -sync_array_enter( -/*=============*/ - sync_array_t* arr) /*!< in: sync wait array */ -{ - os_mutex_enter(arr->os_mutex); -} - -/******************************************************************//** -Releases the mutex semaphore protecting a sync array. */ -static -void -sync_array_exit( -/*============*/ - sync_array_t* arr) /*!< in: sync wait array */ -{ - os_mutex_exit(arr->os_mutex); -} - -/*******************************************************************//** -Creates a synchronization wait array. It is protected by a mutex -which is automatically reserved when the functions operating on it -are called. -@return own: created wait array */ -static -sync_array_t* -sync_array_create( -/*==============*/ - ulint n_cells) /*!< in: number of cells in the array - to create */ -{ - ulint sz; - sync_array_t* arr; - - ut_a(n_cells > 0); - - /* Allocate memory for the data structures */ - arr = static_cast<sync_array_t*>(ut_malloc(sizeof(*arr))); - memset(arr, 0x0, sizeof(*arr)); - - sz = sizeof(sync_cell_t) * n_cells; - arr->array = static_cast<sync_cell_t*>(ut_malloc(sz)); - memset(arr->array, 0x0, sz); - - arr->n_cells = n_cells; - - /* Then create the mutex to protect the wait array complex */ - arr->os_mutex = os_mutex_create(); - - return(arr); -} - -/******************************************************************//** -Frees the resources in a wait array. */ -static -void -sync_array_free( -/*============*/ - sync_array_t* arr) /*!< in, own: sync wait array */ -{ - ut_a(arr->n_reserved == 0); - - sync_array_validate(arr); - - /* Release the mutex protecting the wait array complex */ - - os_mutex_free(arr->os_mutex); - - ut_free(arr->array); - ut_free(arr); -} - -/********************************************************************//** -Validates the integrity of the wait array. Checks -that the number of reserved cells equals the count variable. */ -UNIV_INTERN -void -sync_array_validate( -/*================*/ - sync_array_t* arr) /*!< in: sync wait array */ -{ - ulint i; - sync_cell_t* cell; - ulint count = 0; - - sync_array_enter(arr); - - for (i = 0; i < arr->n_cells; i++) { - cell = sync_array_get_nth_cell(arr, i); - if (cell->wait_object != NULL) { - count++; - } - } - - ut_a(count == arr->n_reserved); - - sync_array_exit(arr); -} - -/*******************************************************************//** -Returns the event that the thread owning the cell waits for. */ -static -os_event_t -sync_cell_get_event( -/*================*/ - sync_cell_t* cell) /*!< in: non-empty sync array cell */ -{ - ulint type = cell->request_type; - - if (type == SYNC_MUTEX) { - return(&((ib_mutex_t*) cell->wait_object)->event); - } else if (type == SYNC_PRIO_MUTEX) { - return(&((ib_prio_mutex_t*) cell->wait_object) - ->high_priority_event); - } else if (type == RW_LOCK_WAIT_EX) { - return(&((rw_lock_t*) cell->wait_object)->wait_ex_event); - } else if (type == PRIO_RW_LOCK_SHARED) { - return(&((prio_rw_lock_t *) cell->wait_object) - ->high_priority_s_event); - } else if (type == PRIO_RW_LOCK_EX) { - return(&((prio_rw_lock_t *) cell->wait_object) - ->high_priority_x_event); - } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */ - ut_ad(type == RW_LOCK_SHARED || type == RW_LOCK_EX); - return(&((rw_lock_t*) cell->wait_object)->event); - } -} - -/******************************************************************//** -Reserves a wait array cell for waiting for an object. -The event of the cell is reset to nonsignalled state. -@return true if free cell is found, otherwise false */ -UNIV_INTERN -bool -sync_array_reserve_cell( -/*====================*/ - sync_array_t* arr, /*!< in: wait array */ - void* object, /*!< in: pointer to the object to wait for */ - ulint type, /*!< in: lock request type */ - const char* file, /*!< in: file where requested */ - ulint line, /*!< in: line where requested */ - ulint* index) /*!< out: index of the reserved cell */ -{ - sync_cell_t* cell; - os_event_t event; - ulint i; - - ut_a(object); - ut_a(index); - - sync_array_enter(arr); - - arr->res_count++; - - /* Reserve a new cell. */ - for (i = 0; i < arr->n_cells; i++) { - cell = sync_array_get_nth_cell(arr, i); - - if (cell->wait_object == NULL) { - - cell->waiting = FALSE; - cell->wait_object = object; - - if (type == SYNC_MUTEX || type == SYNC_PRIO_MUTEX) { - cell->old_wait_mutex = object; - } else { - cell->old_wait_rw_lock = object; - } - - cell->request_type = type; - - cell->file = file; - cell->line = line; - - arr->n_reserved++; - - *index = i; - - sync_array_exit(arr); - - /* Make sure the event is reset and also store - the value of signal_count at which the event - was reset. */ - event = sync_cell_get_event(cell); - cell->signal_count = os_event_reset(event); - - cell->reservation_time = ut_time(); - - cell->thread = os_thread_get_curr_id(); - - return(true); - } - } - - /* No free cell found */ - return false; -} - -/******************************************************************//** -This function should be called when a thread starts to wait on -a wait array cell. In the debug version this function checks -if the wait for a semaphore will result in a deadlock, in which -case prints info and asserts. */ -UNIV_INTERN -void -sync_array_wait_event( -/*==================*/ - sync_array_t* arr, /*!< in: wait array */ - ulint index) /*!< in: index of the reserved cell */ -{ - sync_cell_t* cell; - os_event_t event; - - ut_a(arr); - - sync_array_enter(arr); - - cell = sync_array_get_nth_cell(arr, index); - - ut_a(cell->wait_object); - ut_a(!cell->waiting); - ut_ad(os_thread_get_curr_id() == cell->thread); - - event = sync_cell_get_event(cell); - cell->waiting = TRUE; - -#ifdef UNIV_SYNC_DEBUG - - /* We use simple enter to the mutex below, because if - we cannot acquire it at once, mutex_enter would call - recursively sync_array routines, leading to trouble. - rw_lock_debug_mutex freezes the debug lists. */ - - rw_lock_debug_mutex_enter(); - - if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) { - - fputs("########################################\n", stderr); - ut_error; - } - - rw_lock_debug_mutex_exit(); -#endif - sync_array_exit(arr); - - os_event_wait_low(event, cell->signal_count); - - sync_array_free_cell(arr, index); -} - -/******************************************************************//** -Reports info of a wait array cell. */ -static -void -sync_array_cell_print( -/*==================*/ - FILE* file, /*!< in: file where to print */ - sync_cell_t* cell, /*!< in: sync cell */ - os_thread_id_t* reserver) /*!< out: write reserver or - 0 */ -{ - ib_mutex_t* mutex; - ib_prio_mutex_t* prio_mutex; - rw_lock_t* rwlock; - prio_rw_lock_t* prio_rwlock = NULL; - ulint type; - ulint writer; - - type = cell->request_type; - - fprintf(file, - "--Thread %lu has waited at %s line %lu" - " for %#.5g seconds the semaphore:\n", - (ulong) os_thread_pf(cell->thread), - innobase_basename(cell->file), (ulong) cell->line, - difftime(time(NULL), cell->reservation_time)); - - - if (type == SYNC_MUTEX || type == SYNC_PRIO_MUTEX) { - - /* We use old_wait_mutex in case the cell has already - been freed meanwhile */ - if (type == SYNC_MUTEX) { - - mutex = static_cast<ib_mutex_t*>(cell->old_wait_mutex); - } else { - - prio_mutex = static_cast<ib_prio_mutex_t*> - (cell->old_wait_mutex); - mutex = &prio_mutex->base_mutex; - } - - - if (mutex) { - fprintf(file, - "Mutex at %p '%s', lock var %lu\n" - "Last time reserved by thread " ULINTPF - " in file %s line " ULINTPF ", " - "waiters flag " ULINTPF "\n", - (void*) mutex, mutex->cmutex_name, - (ulong) mutex->lock_word, - os_thread_pf(mutex->thread_id), - mutex->file_name, mutex->line, - mutex->waiters); - } - - /* If stacktrace feature is enabled we will send a SIGUSR2 - signal to thread waiting for the semaphore. Signal handler - will then dump the current stack to error log. */ - if (srv_use_stacktrace && cell && cell->thread) { -#ifdef __linux__ - pthread_kill(cell->thread, SIGUSR2); -#endif - } - - if (type == SYNC_PRIO_MUTEX) { - - fprintf(file, - "high-priority waiters count %lu\n", - (ulong) prio_mutex->high_priority_waiters); - } - - } else if (type == RW_LOCK_EX - || type == RW_LOCK_WAIT_EX - || type == RW_LOCK_SHARED - || type == PRIO_RW_LOCK_SHARED - || type == PRIO_RW_LOCK_EX) { - - fputs((type == RW_LOCK_EX || type == PRIO_RW_LOCK_EX) - ? "X-lock on" - : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on" - : "S-lock on", file); - - /* Currently we are unable to tell high priority - RW_LOCK_WAIT_EX waiter from a regular priority one. Assume - it's a regular one. */ - if (type == RW_LOCK_EX || type == RW_LOCK_WAIT_EX - || type == RW_LOCK_SHARED) { - - rwlock = static_cast<rw_lock_t *> - (cell->old_wait_rw_lock); - } else { - - prio_rwlock = static_cast<prio_rw_lock_t *> - (cell->old_wait_rw_lock); - rwlock = &prio_rwlock->base_lock; - } - - if (rwlock) { - fprintf(file, - " RW-latch at %p '%s'\n", - (void*) rwlock, rwlock->lock_name); - - writer = rw_lock_get_writer(rwlock); - - if (writer && writer != RW_LOCK_NOT_LOCKED) { - fprintf(file, - "a writer (thread id " ULINTPF ") has" - " reserved it in mode %s", - os_thread_pf(rwlock->writer_thread), - writer == RW_LOCK_EX - ? " exclusive\n" - : " wait exclusive\n"); - - *reserver = rwlock->writer_thread; - } - - fprintf(file, - "number of readers " ULINTPF - ", waiters flag " ULINTPF ", " - "lock_word: %lx\n" - "Last time read locked in file %s line %u\n" - "Last time write locked in file %s line %u\n" - "Holder thread " ULINTPF - " file %s line " ULINTPF "\n", - rw_lock_get_reader_count(rwlock), - rwlock->waiters, - rwlock->lock_word, - innobase_basename(rwlock->last_s_file_name), - rwlock->last_s_line, - innobase_basename(rwlock->last_x_file_name), - rwlock->last_x_line, - os_thread_pf(rwlock->thread_id), - innobase_basename(rwlock->file_name), - rwlock->line); - - /* If stacktrace feature is enabled we will send a SIGUSR2 - signal to thread that has locked RW-latch with write mode. - Signal handler will then dump the current stack to error log. */ - if (writer != RW_LOCK_NOT_LOCKED && srv_use_stacktrace && - rwlock && rwlock->writer_thread) { -#ifdef __linux__ - pthread_kill(rwlock->writer_thread, SIGUSR2); -#endif - } - } - - if (prio_rwlock) { - fprintf(file, "high priority S waiters count %lu, " - "high priority X waiters count %lu, " - "wait-exclusive waiter is " - "high priority if exists: %lu\n", - prio_rwlock->high_priority_s_waiters, - prio_rwlock->high_priority_x_waiters, - prio_rwlock->high_priority_wait_ex_waiter); - } - } else { - ut_error; - } - - if (!cell->waiting) { - fputs("wait has ended\n", file); - } -} - -#ifdef UNIV_SYNC_DEBUG - -/******************************************************************//** -Recursion step for deadlock detection. -@return TRUE if deadlock detected */ -static -ibool -sync_array_deadlock_step( -/*=====================*/ - sync_array_t* arr, /*!< in: wait array; NOTE! the caller must - own the mutex to array */ - sync_cell_t* start, /*!< in: cell where recursive search - started */ - os_thread_id_t thread, /*!< in: thread to look at */ - ulint pass, /*!< in: pass value */ - ulint depth) /*!< in: recursion depth */ -{ - sync_cell_t* new_cell; - - if (pass != 0) { - /* If pass != 0, then we do not know which threads are - responsible of releasing the lock, and no deadlock can - be detected. */ - - return(FALSE); - } - - new_cell = sync_array_find_thread(arr, thread); - - if (new_cell == start) { - /* Deadlock */ - fputs("########################################\n" - "DEADLOCK of threads detected!\n", stderr); - - return(TRUE); - - } else if (new_cell) { - return(sync_array_detect_deadlock( - arr, start, new_cell, depth + 1)); - } - return(FALSE); -} - -/******************************************************************//** -This function is called only in the debug version. Detects a deadlock -of one or more threads because of waits of semaphores. -@return TRUE if deadlock detected */ -static -ibool -sync_array_detect_deadlock( -/*=======================*/ - sync_array_t* arr, /*!< in: wait array; NOTE! the caller must - own the mutex to array */ - sync_cell_t* start, /*!< in: cell where recursive search started */ - sync_cell_t* cell, /*!< in: cell to search */ - ulint depth) /*!< in: recursion depth */ -{ - ib_mutex_t* mutex; - rw_lock_t* lock; - os_thread_id_t thread; - ibool ret; - rw_lock_debug_t*debug; - os_thread_id_t r = 0; - - ut_a(arr); - ut_a(start); - ut_a(cell); - ut_ad(cell->wait_object); - ut_ad(os_thread_get_curr_id() == start->thread); - ut_ad(depth < 100); - - depth++; - - if (!cell->waiting) { - - return(FALSE); /* No deadlock here */ - } - - if (cell->request_type == SYNC_MUTEX - || cell->request_type == SYNC_PRIO_MUTEX) { - - if (cell->request_type == SYNC_MUTEX) { - mutex = static_cast<ib_mutex_t*>(cell->wait_object); - } else { - mutex = &(static_cast<ib_prio_mutex_t*>( - cell->wait_object))->base_mutex; - } - - if (mutex_get_lock_word(mutex) != 0) { - - thread = mutex->thread_id; - - /* Note that mutex->thread_id above may be - also OS_THREAD_ID_UNDEFINED, because the - thread which held the mutex maybe has not - yet updated the value, or it has already - released the mutex: in this case no deadlock - can occur, as the wait array cannot contain - a thread with ID_UNDEFINED value. */ - - ret = sync_array_deadlock_step(arr, start, thread, 0, - depth); - if (ret) { - fprintf(stderr, - "Mutex %p owned by thread %lu file %s line %lu\n", - mutex, (ulong) os_thread_pf(mutex->thread_id), - mutex->file_name, (ulong) mutex->line); - sync_array_cell_print(stderr, cell, &r); - - return(TRUE); - } - } - - return(FALSE); /* No deadlock */ - - } else if (cell->request_type == RW_LOCK_EX - || cell->request_type == PRIO_RW_LOCK_EX - || cell->request_type == RW_LOCK_WAIT_EX) { - - lock = static_cast<rw_lock_t*>(cell->wait_object); - - for (debug = UT_LIST_GET_FIRST(lock->debug_list); - debug != 0; - debug = UT_LIST_GET_NEXT(list, debug)) { - - thread = debug->thread_id; - - if (((debug->lock_type == RW_LOCK_EX) - && !os_thread_eq(thread, cell->thread)) - || ((debug->lock_type == RW_LOCK_WAIT_EX) - && !os_thread_eq(thread, cell->thread)) - || (debug->lock_type == RW_LOCK_SHARED)) { - - /* The (wait) x-lock request can block - infinitely only if someone (can be also cell - thread) is holding s-lock, or someone - (cannot be cell thread) (wait) x-lock, and - he is blocked by start thread */ - - ret = sync_array_deadlock_step( - arr, start, thread, debug->pass, - depth); - if (ret) { -print: - fprintf(stderr, "rw-lock %p ", - (void*) lock); - sync_array_cell_print(stderr, cell, &r); - rw_lock_debug_print(stderr, debug); - return(TRUE); - } - } - } - - return(FALSE); - - } else if (cell->request_type == RW_LOCK_SHARED - || cell->request_type == PRIO_RW_LOCK_SHARED) { - - lock = static_cast<rw_lock_t*>(cell->wait_object); - - for (debug = UT_LIST_GET_FIRST(lock->debug_list); - debug != 0; - debug = UT_LIST_GET_NEXT(list, debug)) { - - thread = debug->thread_id; - - if ((debug->lock_type == RW_LOCK_EX) - || (debug->lock_type == RW_LOCK_WAIT_EX)) { - - /* The s-lock request can block infinitely - only if someone (can also be cell thread) is - holding (wait) x-lock, and he is blocked by - start thread */ - - ret = sync_array_deadlock_step( - arr, start, thread, debug->pass, - depth); - if (ret) { - goto print; - } - } - } - - return(FALSE); - - } else { - ut_error; - } - - return(TRUE); /* Execution never reaches this line: for compiler - fooling only */ -} -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Determines if we can wake up the thread waiting for a sempahore. */ -static -ibool -sync_arr_cell_can_wake_up( -/*======================*/ - sync_cell_t* cell) /*!< in: cell to search */ -{ - ib_mutex_t* mutex; - rw_lock_t* lock; - - if (cell->request_type == SYNC_MUTEX - || cell->request_type == SYNC_PRIO_MUTEX) { - - if (cell->request_type == SYNC_MUTEX) { - mutex = static_cast<ib_mutex_t*>(cell->wait_object); - } else { - mutex = &(static_cast<ib_prio_mutex_t*>( - cell->wait_object))->base_mutex; - } - - os_rmb; - if (mutex_get_lock_word(mutex) == 0) { - - return(TRUE); - } - - } else if (cell->request_type == RW_LOCK_EX - || cell->request_type == PRIO_RW_LOCK_EX) { - - lock = static_cast<rw_lock_t*>(cell->wait_object); - - os_rmb; - if (lock->lock_word > 0) { - /* Either unlocked or only read locked. */ - - return(TRUE); - } - - } else if (cell->request_type == RW_LOCK_WAIT_EX) { - - lock = static_cast<rw_lock_t*>(cell->wait_object); - - /* lock_word == 0 means all readers have left */ - os_rmb; - if (lock->lock_word == 0) { - - return(TRUE); - } - } else if (cell->request_type == RW_LOCK_SHARED - || cell->request_type == PRIO_RW_LOCK_SHARED) { - lock = static_cast<rw_lock_t*>(cell->wait_object); - - /* lock_word > 0 means no writer or reserved writer */ - os_rmb; - if (lock->lock_word > 0) { - - return(TRUE); - } - } else { - - ut_error; - } - - return(FALSE); -} - -/******************************************************************//** -Frees the cell. NOTE! sync_array_wait_event frees the cell -automatically! */ -UNIV_INTERN -void -sync_array_free_cell( -/*=================*/ - sync_array_t* arr, /*!< in: wait array */ - ulint index) /*!< in: index of the cell in array */ -{ - sync_cell_t* cell; - - sync_array_enter(arr); - - cell = sync_array_get_nth_cell(arr, index); - - ut_a(cell->wait_object != NULL); - - cell->waiting = FALSE; - cell->wait_object = NULL; - cell->signal_count = 0; - - ut_a(arr->n_reserved > 0); - arr->n_reserved--; - - sync_array_exit(arr); -} - -/**********************************************************************//** -Increments the signalled count. */ -UNIV_INTERN -void -sync_array_object_signalled(void) -/*=============================*/ -{ -#ifdef HAVE_ATOMIC_BUILTINS - (void) os_atomic_increment_ulint(&sg_count, 1); -#else - ++sg_count; -#endif /* HAVE_ATOMIC_BUILTINS */ -} - -/**********************************************************************//** -If the wakeup algorithm does not work perfectly at semaphore relases, -this function will do the waking (see the comment in mutex_exit). This -function should be called about every 1 second in the server. - -Note that there's a race condition between this thread and mutex_exit -changing the lock_word and calling signal_object, so sometimes this finds -threads to wake up even when nothing has gone wrong. */ -static -void -sync_array_wake_threads_if_sema_free_low( -/*=====================================*/ - sync_array_t* arr) /* in/out: wait array */ -{ - ulint i = 0; - ulint count; - - sync_array_enter(arr); - - for (count = 0; count < arr->n_reserved; ++i) { - sync_cell_t* cell; - - cell = sync_array_get_nth_cell(arr, i); - - if (cell->wait_object != NULL) { - - count++; - - if (sync_arr_cell_can_wake_up(cell)) { - os_event_t event; - - event = sync_cell_get_event(cell); - - os_event_set(event); - } - } - } - - sync_array_exit(arr); -} - -/**********************************************************************//** -If the wakeup algorithm does not work perfectly at semaphore relases, -this function will do the waking (see the comment in mutex_exit). This -function should be called about every 1 second in the server. - -Note that there's a race condition between this thread and mutex_exit -changing the lock_word and calling signal_object, so sometimes this finds -threads to wake up even when nothing has gone wrong. */ -UNIV_INTERN -void -sync_arr_wake_threads_if_sema_free(void) -/*====================================*/ -{ - ulint i; - - for (i = 0; i < sync_array_size; ++i) { - - sync_array_wake_threads_if_sema_free_low( - sync_wait_array[i]); - } -} - -/**********************************************************************//** -Prints warnings of long semaphore waits to stderr. -@return TRUE if fatal semaphore wait threshold was exceeded */ -static -ibool -sync_array_print_long_waits_low( -/*============================*/ - sync_array_t* arr, /*!< in: sync array instance */ - os_thread_id_t* waiter, /*!< out: longest waiting thread */ - const void** sema, /*!< out: longest-waited-for semaphore */ - ibool* noticed)/*!< out: TRUE if long wait noticed */ -{ - ulint i; - ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; - ibool fatal = FALSE; - double longest_diff = 0; - - /* For huge tables, skip the check during CHECK TABLE etc... */ - if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) { - return(FALSE); - } - -#ifdef UNIV_DEBUG_VALGRIND - /* Increase the timeouts if running under valgrind because it executes - extremely slowly. UNIV_DEBUG_VALGRIND does not necessary mean that - we are running under valgrind but we have no better way to tell. - See Bug#58432 innodb.innodb_bug56143 fails under valgrind - for an example */ -# define SYNC_ARRAY_TIMEOUT 2400 - fatal_timeout *= 10; -#else -# define SYNC_ARRAY_TIMEOUT 240 -#endif - - for (i = 0; i < arr->n_cells; i++) { - - double diff; - sync_cell_t* cell; - void* wait_object; - os_thread_id_t reserver=0; - - cell = sync_array_get_nth_cell(arr, i); - - wait_object = cell->wait_object; - - if (wait_object == NULL || !cell->waiting) { - - continue; - } - - diff = difftime(time(NULL), cell->reservation_time); - - if (diff > SYNC_ARRAY_TIMEOUT) { - fputs("InnoDB: Warning: a long semaphore wait:\n", - stderr); - sync_array_cell_print(stderr, cell, &reserver); - *noticed = TRUE; - } - - if (diff > fatal_timeout) { - fatal = TRUE; - } - - if (diff > longest_diff) { - longest_diff = diff; - *sema = wait_object; - *waiter = cell->thread; - } - } - - /* We found a long semaphore wait, wait all threads that are - waiting for a semaphore. */ - if (*noticed) { - for (i = 0; i < arr->n_cells; i++) { - void* wait_object; - sync_cell_t* cell; - os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED; - ulint loop=0; - - cell = sync_array_get_nth_cell(arr, i); - - wait_object = cell->wait_object; - - if (wait_object == NULL || !cell->waiting) { - - continue; - } - - fputs("InnoDB: Warning: semaphore wait:\n", - stderr); - sync_array_cell_print(stderr, cell, &reserver); - - /* Try to output cell information for writer recursive way */ - while (reserver != (os_thread_id_t)ULINT_UNDEFINED) { - sync_cell_t* reserver_wait; - - reserver_wait = sync_array_find_thread(arr, reserver); - - if (reserver_wait && - reserver_wait->wait_object != NULL && - reserver_wait->waiting) { - fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n", - stderr); - sync_array_cell_print(stderr, reserver_wait, &reserver); - - if (reserver_wait->thread == reserver) { - reserver = (os_thread_id_t)ULINT_UNDEFINED; - } - } else { - reserver = (os_thread_id_t)ULINT_UNDEFINED; - } - - /* This is protection against loop */ - if (loop > 100) { - fputs("InnoDB: Warning: Too many waiting threads.\n", stderr); - break; - } - } - } - } - -#undef SYNC_ARRAY_TIMEOUT - - return(fatal); -} - -/**********************************************************************//** -Prints warnings of long semaphore waits to stderr. -@return TRUE if fatal semaphore wait threshold was exceeded */ -UNIV_INTERN -ibool -sync_array_print_long_waits( -/*========================*/ - os_thread_id_t* waiter, /*!< out: longest waiting thread */ - const void** sema) /*!< out: longest-waited-for semaphore */ -{ - ulint i; - ibool fatal = FALSE; - ibool noticed = FALSE; - - for (i = 0; i < sync_array_size; ++i) { - - sync_array_t* arr = sync_wait_array[i]; - - sync_array_enter(arr); - - if (sync_array_print_long_waits_low( - arr, waiter, sema, ¬iced)) { - - fatal = TRUE; - } - - sync_array_exit(arr); - } - - if (noticed) { - ibool old_val; - - fprintf(stderr, - "InnoDB: ###### Starts InnoDB Monitor" - " for 30 secs to print diagnostic info:\n"); - - old_val = srv_print_innodb_monitor; - - /* If some crucial semaphore is reserved, then also the InnoDB - Monitor can hang, and we do not get diagnostics. Since in - many cases an InnoDB hang is caused by a pwrite() or a pread() - call hanging inside the operating system, let us print right - now the values of pending calls of these. */ - - fprintf(stderr, - "InnoDB: Pending reads " UINT64PF - ", writes " UINT64PF "\n", - MONITOR_VALUE(MONITOR_OS_PENDING_READS), - MONITOR_VALUE(MONITOR_OS_PENDING_WRITES)); - - srv_print_innodb_monitor = TRUE; - os_event_set(srv_monitor_event); - - os_thread_sleep(30000000); - - srv_print_innodb_monitor = static_cast<my_bool>(old_val); - fprintf(stderr, - "InnoDB: ###### Diagnostic info printed" - " to the standard error stream\n"); - } - - return(fatal); -} - -/**********************************************************************//** -Prints info of the wait array. */ -static -void -sync_array_print_info_low( -/*======================*/ - FILE* file, /*!< in: file where to print */ - sync_array_t* arr) /*!< in: wait array */ -{ - ulint i; - ulint count = 0; - - fprintf(file, - "OS WAIT ARRAY INFO: reservation count " ULINTPF "\n", - arr->res_count); - - for (i = 0; count < arr->n_reserved; ++i) { - sync_cell_t* cell; - os_thread_id_t r = 0; - - cell = sync_array_get_nth_cell(arr, i); - - if (cell->wait_object != NULL) { - count++; - sync_array_cell_print(file, cell, &r); - } - } -} - -/**********************************************************************//** -Prints info of the wait array. */ -static -void -sync_array_print_info( -/*==================*/ - FILE* file, /*!< in: file where to print */ - sync_array_t* arr) /*!< in: wait array */ -{ - sync_array_enter(arr); - - sync_array_print_info_low(file, arr); - - sync_array_exit(arr); -} - -/**********************************************************************//** -Create the primary system wait array(s), they are protected by an OS mutex */ -UNIV_INTERN -void -sync_array_init( -/*============*/ - ulint n_threads) /*!< in: Number of slots to - create in all arrays */ -{ - ulint i; - ulint n_slots; - - ut_a(sync_wait_array == NULL); - ut_a(srv_sync_array_size > 0); - ut_a(n_threads > 0); - - sync_array_size = srv_sync_array_size; - - /* We have to use ut_malloc() because the mutex infrastructure - hasn't been initialised yet. It is required by mem_alloc() and - the heap functions. */ - - sync_wait_array = static_cast<sync_array_t**>( - ut_malloc(sizeof(*sync_wait_array) * sync_array_size)); - - n_slots = 1 + (n_threads - 1) / sync_array_size; - - for (i = 0; i < sync_array_size; ++i) { - - sync_wait_array[i] = sync_array_create(n_slots); - } -} - -/**********************************************************************//** -Close sync array wait sub-system. */ -UNIV_INTERN -void -sync_array_close(void) -/*==================*/ -{ - ulint i; - - for (i = 0; i < sync_array_size; ++i) { - sync_array_free(sync_wait_array[i]); - } - - ut_free(sync_wait_array); - sync_wait_array = NULL; -} - -/**********************************************************************//** -Print info about the sync array(s). */ -UNIV_INTERN -void -sync_array_print( -/*=============*/ - FILE* file) /*!< in/out: Print to this stream */ -{ - ulint i; - - for (i = 0; i < sync_array_size; ++i) { - sync_array_print_info(file, sync_wait_array[i]); - } - - fprintf(file, - "OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count); - -} - -/**********************************************************************//** -Get an instance of the sync wait array. */ -UNIV_INTERN -sync_array_t* -sync_array_get(void) -/*================*/ -{ - ulint i; - static ulint count; - -#ifdef HAVE_ATOMIC_BUILTINS - i = os_atomic_increment_ulint(&count, 1); -#else - i = count++; -#endif /* HAVE_ATOMIC_BUILTINS */ - - return(sync_wait_array[i % sync_array_size]); -} - -/**********************************************************************//** -Prints info of the wait array without using any mutexes/semaphores. */ -UNIV_INTERN -void -sync_array_print_xtradb(void) -/*=========================*/ -{ - ulint i; - sync_array_t* arr = sync_array_get(); - - fputs("InnoDB: Semaphore wait debug output started for XtraDB:\n", stderr); - - for (i = 0; i < arr->n_cells; i++) { - void* wait_object; - sync_cell_t* cell; - os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED; - ulint loop=0; - - cell = sync_array_get_nth_cell(arr, i); - - wait_object = cell->wait_object; - - if (wait_object == NULL || !cell->waiting) { - - continue; - } - - fputs("InnoDB: Warning: semaphore wait:\n", - stderr); - sync_array_cell_print(stderr, cell, &reserver); - - /* Try to output cell information for writer recursive way */ - while (reserver != (os_thread_id_t)ULINT_UNDEFINED) { - sync_cell_t* reserver_wait; - - reserver_wait = sync_array_find_thread(arr, reserver); - - if (reserver_wait && - reserver_wait->wait_object != NULL && - reserver_wait->waiting) { - fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n", - stderr); - sync_array_cell_print(stderr, reserver_wait, &reserver); - - if (reserver_wait->thread == reserver) { - reserver = (os_thread_id_t)ULINT_UNDEFINED; - } - } else { - reserver = (os_thread_id_t)ULINT_UNDEFINED; - } - - /* This is protection against loop */ - if (loop > 100) { - fputs("InnoDB: Warning: Too many waiting threads.\n", stderr); - break; - } - } - } - - fputs("InnoDB: Semaphore wait debug output ended:\n", stderr); -} - -/**********************************************************************//** -Get number of items on sync array. */ -UNIV_INTERN -ulint -sync_arr_get_n_items(void) -/*======================*/ -{ - sync_array_t* sync_arr = sync_array_get(); - return (ulint) sync_arr->n_cells; -} - -/******************************************************************//** -Get specified item from sync array if it is reserved. Set given -pointer to array item if it is reserved. -@return true if item is reserved, false othervise */ -UNIV_INTERN -ibool -sync_arr_get_item( -/*==============*/ - ulint i, /*!< in: requested item */ - sync_cell_t **cell) /*!< out: cell contents if item - reserved */ -{ - sync_array_t* sync_arr; - sync_cell_t* wait_cell; - void* wait_object; - ibool found = FALSE; - - sync_arr = sync_array_get(); - wait_cell = sync_array_get_nth_cell(sync_arr, i); - - if (wait_cell) { - wait_object = wait_cell->wait_object; - - if(wait_object != NULL && wait_cell->waiting) { - found = TRUE; - *cell = wait_cell; - } - } - - return found; -} - -/*******************************************************************//** -Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table. -Loop through each item on sync array, and extract the column -information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table. -@return 0 on success */ -UNIV_INTERN -int -sync_arr_fill_sys_semphore_waits_table( -/*===================================*/ - THD* thd, /*!< in: thread */ - TABLE_LIST* tables, /*!< in/out: tables to fill */ - Item* ) /*!< in: condition (not used) */ -{ - Field** fields; - ulint n_items; - - DBUG_ENTER("i_s_sys_semaphore_waits_fill_table"); - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - - /* deny access to user without PROCESS_ACL privilege */ - if (check_global_access(thd, PROCESS_ACL)) { - DBUG_RETURN(0); - } - - fields = tables->table->field; - n_items = sync_arr_get_n_items(); - ulint type; - - for(ulint i=0; i < n_items;i++) { - sync_cell_t *cell=NULL; - if (sync_arr_get_item(i, &cell)) { - ib_prio_mutex_t* prio_mutex; - ib_mutex_t* mutex; - type = cell->request_type; - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID], (longlong)os_thread_pf(cell->thread))); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LINE], cell->line)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME], (longlong)difftime(time(NULL), cell->reservation_time))); - - if (type == SYNC_MUTEX || type == SYNC_PRIO_MUTEX) { - if (type == SYNC_MUTEX) { - mutex = static_cast<ib_mutex_t*>(cell->old_wait_mutex); - } else { - - prio_mutex = static_cast<ib_prio_mutex_t*> - (cell->old_wait_mutex); - mutex = &prio_mutex->base_mutex; - } - - if (mutex) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)mutex)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX")); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], mutex->line)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE], mutex->cline)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], mutex->line)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait)); - } - } else if (type == RW_LOCK_EX - || type == RW_LOCK_WAIT_EX - || type == RW_LOCK_SHARED - || type == PRIO_RW_LOCK_SHARED - || type == PRIO_RW_LOCK_EX) { - rw_lock_t* rwlock=NULL; - prio_rw_lock_t* prio_rwlock=NULL; - - if (type == RW_LOCK_EX || type == RW_LOCK_WAIT_EX - || type == RW_LOCK_SHARED) { - - rwlock = static_cast<rw_lock_t *> - (cell->old_wait_rw_lock); - } else { - - prio_rwlock = static_cast<prio_rw_lock_t *> - (cell->old_wait_rw_lock); - rwlock = &prio_rwlock->base_lock; - } - - if (rwlock) { - ulint writer = rw_lock_get_writer(rwlock); - - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)rwlock)); - if (type == RW_LOCK_EX) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_EX")); - } else if (type == RW_LOCK_WAIT_EX) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_WAIT_EX")); - } else if (type == RW_LOCK_SHARED) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SHARED")); - } - - if (writer != RW_LOCK_NOT_LOCKED) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD], (longlong)os_thread_pf(rwlock->writer_thread))); - - if (writer == RW_LOCK_EX) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_EX")); - } else if (writer == RW_LOCK_WAIT_EX) { - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_WAIT_EX")); - } - - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], rwlock->line)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_READERS], rw_lock_get_reader_count(rwlock))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)rwlock->waiters)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)rwlock->lock_word)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_READER_FILE], innobase_basename(rwlock->last_s_file_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_READER_LINE], rwlock->last_s_line)); - OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name))); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], rwlock->last_x_line)); - OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], rwlock->count_os_wait)); - } - } - } - - OK(schema_table_store_record(thd, tables->table)); - } - } - - DBUG_RETURN(0); -} diff --git a/storage/xtradb/sync/sync0rw.cc b/storage/xtradb/sync/sync0rw.cc deleted file mode 100644 index 729f510013d..00000000000 --- a/storage/xtradb/sync/sync0rw.cc +++ /dev/null @@ -1,1297 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file sync/sync0rw.cc -The read-write lock (for thread synchronization) - -Created 9/11/1995 Heikki Tuuri -*******************************************************/ - -#include "sync0rw.h" -#ifdef UNIV_NONINL -#include "sync0rw.ic" -#include "sync0arr.ic" -#endif - -#include "os0thread.h" -#include "mem0mem.h" -#include "srv0srv.h" -#include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */ -#include "ha_prototypes.h" -#include "my_cpu.h" - -/* - IMPLEMENTATION OF THE RW_LOCK - ============================= -The status of a rw_lock is held in lock_word. The initial value of lock_word is -X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR -for each x-lock. This describes the lock state for each value of lock_word: - -lock_word == X_LOCK_DECR: Unlocked. -0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers. - (X_LOCK_DECR - lock_word) is the - number of readers that hold the lock. -lock_word == 0: Write locked --X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer. - (-lock_word) is the number of readers - that hold the lock. -lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been - decremented by X_LOCK_DECR for the first lock - and the first recursive lock, then by 1 for - each recursive lock thereafter. - So the number of locks is: - (lock_copy == 0) ? 1 : 2 - (lock_copy + X_LOCK_DECR) - -The lock_word is always read and updated atomically and consistently, so that -it always represents the state of the lock, and the state of the lock changes -with a single atomic operation. This lock_word holds all of the information -that a thread needs in order to determine if it is eligible to gain the lock -or if it must spin or sleep. The one exception to this is that writer_thread -must be verified before recursive write locks: to solve this scenario, we make -writer_thread readable by all threads, but only writeable by the x-lock holder. - -The other members of the lock obey the following rules to remain consistent: - -recursive: This and the writer_thread field together control the - behaviour of recursive x-locking. - lock->recursive must be FALSE in following states: - 1) The writer_thread contains garbage i.e.: the - lock has just been initialized. - 2) The lock is not x-held and there is no - x-waiter waiting on WAIT_EX event. - 3) The lock is x-held or there is an x-waiter - waiting on WAIT_EX event but the 'pass' value - is non-zero. - lock->recursive is TRUE iff: - 1) The lock is x-held or there is an x-waiter - waiting on WAIT_EX event and the 'pass' value - is zero. - This flag must be set after the writer_thread field - has been updated with a memory ordering barrier. - It is unset before the lock_word has been incremented. -writer_thread: Is used only in recursive x-locking. Can only be safely - read iff lock->recursive flag is TRUE. - This field is uninitialized at lock creation time and - is updated atomically when x-lock is acquired or when - move_ownership is called. A thread is only allowed to - set the value of this field to it's thread_id i.e.: a - thread cannot set writer_thread to some other thread's - id. -waiters: May be set to 1 anytime, but to avoid unnecessary wake-up - signals, it should only be set to 1 when there are threads - waiting on event. Must be 1 when a writer starts waiting to - ensure the current x-locking thread sends a wake-up signal - during unlock. May only be reset to 0 immediately before a - a wake-up signal is sent to event. On most platforms, a - memory barrier is required after waiters is set, and before - verifying lock_word is still held, to ensure some unlocker - really does see the flags new value. -event: Threads wait on event for read or writer lock when another - thread has an x-lock or an x-lock reservation (wait_ex). A - thread may only wait on event after performing the following - actions in order: - (1) Record the counter value of event (with os_event_reset). - (2) Set waiters to 1. - (3) Verify lock_word <= 0. - (1) must come before (2) to ensure signal is not missed. - (2) must come before (3) to ensure a signal is sent. - These restrictions force the above ordering. - Immediately before sending the wake-up signal, we should: - (1) Verify lock_word == X_LOCK_DECR (unlocked) - (2) Reset waiters to 0. -wait_ex_event: A thread may only wait on the wait_ex_event after it has - performed the following actions in order: - (1) Decrement lock_word by X_LOCK_DECR. - (2) Record counter value of wait_ex_event (os_event_reset, - called from sync_array_reserve_cell). - (3) Verify that lock_word < 0. - (1) must come first to ensures no other threads become reader - or next writer, and notifies unlocker that signal must be sent. - (2) must come before (3) to ensure the signal is not missed. - These restrictions force the above ordering. - Immediately before sending the wake-up signal, we should: - Verify lock_word == 0 (waiting thread holds x_lock) -*/ - -UNIV_INTERN rw_lock_stats_t rw_lock_stats; - -/* The global list of rw-locks */ -UNIV_INTERN rw_lock_list_t rw_lock_list; -UNIV_INTERN ib_mutex_t rw_lock_list_mutex; - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t rw_lock_list_mutex_key; -UNIV_INTERN mysql_pfs_key_t rw_lock_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#ifdef UNIV_SYNC_DEBUG -/* The global mutex which protects debug info lists of all rw-locks. -To modify the debug info list of an rw-lock, this mutex has to be -acquired in addition to the mutex protecting the lock. */ - -UNIV_INTERN os_fast_mutex_t rw_lock_debug_mutex; - -# ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t rw_lock_debug_mutex_key; -# endif - -/******************************************************************//** -Creates a debug info struct. */ -static -rw_lock_debug_t* -rw_lock_debug_create(void); -/*======================*/ -/******************************************************************//** -Frees a debug info struct. */ -static -void -rw_lock_debug_free( -/*===============*/ - rw_lock_debug_t* info); - -/******************************************************************//** -Creates a debug info struct. -@return own: debug info struct */ -static -rw_lock_debug_t* -rw_lock_debug_create(void) -/*======================*/ -{ - return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t))); -} - -/******************************************************************//** -Frees a debug info struct. */ -static -void -rw_lock_debug_free( -/*===============*/ - rw_lock_debug_t* info) -{ - mem_free(info); -} -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Creates, or rather, initializes an rw-lock object in a specified memory -location (which must be appropriately aligned). The rw-lock is initialized -to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. */ -UNIV_INTERN -void -rw_lock_create_func( -/*================*/ - rw_lock_t* lock, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ -{ - /* If this is the very first time a synchronization object is - created, then the following call initializes the sync system. */ - -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - mutex_create(rw_lock_mutex_key, rw_lock_get_mutex(lock), - SYNC_NO_ORDER_CHECK); - - lock->mutex.cfile_name = cfile_name; - lock->mutex.cline = cline; - lock->mutex.lock_name = cmutex_name; - ut_d(lock->mutex.ib_mutex_type = 1); - -#else /* INNODB_RW_LOCKS_USE_ATOMICS */ -# ifdef UNIV_DEBUG - UT_NOT_USED(cmutex_name); -# endif -#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ - - lock->lock_word = X_LOCK_DECR; - lock->waiters = 0; - - /* We set this value to signify that lock->writer_thread - contains garbage at initialization and cannot be used for - recursive x-locking. */ - lock->recursive = FALSE; - /* Silence Valgrind when UNIV_DEBUG_VALGRIND is not enabled. */ - memset((void*) &lock->writer_thread, 0, sizeof lock->writer_thread); - UNIV_MEM_INVALID(&lock->writer_thread, sizeof lock->writer_thread); - -#ifdef UNIV_SYNC_DEBUG - UT_LIST_INIT(lock->debug_list); - - lock->level = level; -#endif /* UNIV_SYNC_DEBUG */ - - ut_d(lock->magic_n = RW_LOCK_MAGIC_N); - - lock->cfile_name = cfile_name; - lock->cline = (unsigned int) cline; - lock->lock_name = cmutex_name; - lock->count_os_wait = 0; - lock->file_name = "not yet reserved"; - lock->line = 0; - lock->last_s_file_name = "not yet reserved"; - lock->last_x_file_name = "not yet reserved"; - lock->last_s_line = 0; - lock->last_x_line = 0; - os_event_create(&lock->event); - os_event_create(&lock->wait_ex_event); - - mutex_enter(&rw_lock_list_mutex); - - ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL - || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N); - - UT_LIST_ADD_FIRST(list, rw_lock_list, lock); - - mutex_exit(&rw_lock_list_mutex); -} - -/******************************************************************//** -Creates, or rather, initializes a priority rw-lock object in a specified memory -location (which must be appropriately aligned). The rw-lock is initialized -to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. */ -UNIV_INTERN -void -rw_lock_create_func( -/*================*/ - prio_rw_lock_t* lock, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline) /*!< in: file line where created */ -{ - rw_lock_create_func(&lock->base_lock, -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - level, -# endif -#endif - cmutex_name, - cfile_name, - cline); - - lock->high_priority_s_waiters = 0; - os_event_create(&lock->high_priority_s_event); - lock->high_priority_x_waiters = 0; - os_event_create(&lock->high_priority_x_event); - lock->high_priority_wait_ex_waiter = 0; -} - -/******************************************************************//** -Calling this function is obligatory only if the memory buffer containing -the rw-lock is freed. Removes an rw-lock object from the global list. The -rw-lock is checked to be in the non-locked state. */ -UNIV_INTERN -void -rw_lock_free_func( -/*==============*/ - rw_lock_t* lock) /*!< in: rw-lock */ -{ -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - ib_mutex_t* mutex; -#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */ - - os_rmb; - ut_ad(rw_lock_validate(lock)); - ut_a(lock->lock_word == X_LOCK_DECR); - - mutex_enter(&rw_lock_list_mutex); - -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - mutex = rw_lock_get_mutex(lock); -#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */ - - os_event_free(&lock->event, false); - - os_event_free(&lock->wait_ex_event, false); - - ut_ad(UT_LIST_GET_PREV(list, lock) == NULL - || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); - ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL - || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); - - UT_LIST_REMOVE(list, rw_lock_list, lock); - - mutex_exit(&rw_lock_list_mutex); - - ut_d(lock->magic_n = 0); - -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - /* We have merely removed the rw_lock from the list, the memory - has not been freed. Therefore the pointer to mutex is valid. */ - mutex_free(mutex); -#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */ -} - -/******************************************************************//** -Calling this function is obligatory only if the memory buffer containing -the priority rw-lock is freed. Removes an rw-lock object from the global list. -The rw-lock is checked to be in the non-locked state. */ -UNIV_INTERN -void -rw_lock_free_func( -/*==============*/ - prio_rw_lock_t* lock) /*!< in: rw-lock */ -{ - os_event_free(&lock->high_priority_s_event, false); - os_event_free(&lock->high_priority_x_event, false); - rw_lock_free_func(&lock->base_lock); -} - -#ifdef UNIV_DEBUG -/******************************************************************//** -Checks that the rw-lock has been initialized and that there are no -simultaneous shared and exclusive locks. -@return TRUE */ -UNIV_INTERN -ibool -rw_lock_validate( -/*=============*/ - rw_lock_t* lock) /*!< in: rw-lock */ -{ - ulint waiters; - lint lock_word; - - ut_ad(lock); - - waiters = rw_lock_get_waiters(lock); - lock_word = lock->lock_word; - - ut_ad(lock->magic_n == RW_LOCK_MAGIC_N); - ut_ad(waiters == 0 || waiters == 1); - ut_ad(lock_word > -(2 * X_LOCK_DECR)); - ut_ad(lock_word <= X_LOCK_DECR); - - return(TRUE); -} - -/******************************************************************//** -Checks that the priority rw-lock has been initialized and that there are no -simultaneous shared and exclusive locks. -@return TRUE */ -UNIV_INTERN -ibool -rw_lock_validate( -/*=============*/ - prio_rw_lock_t* lock) /*!< in: rw-lock */ -{ - return(rw_lock_validate(&lock->base_lock)); -} - -#endif /* UNIV_DEBUG */ - -/******************************************************************//** -Lock a regular or priority rw-lock in shared mode for the current thread. If -the rw-lock is locked in exclusive mode, or there is an exclusive lock request -waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), -waiting for the lock, before suspending the thread. */ -UNIV_INTERN -void -rw_lock_s_lock_spin( -/*================*/ - void* _lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock - will be passed to another thread to unlock */ - bool priority_lock, - /*!< in: whether the lock is a priority lock */ - bool high_priority, - /*!< in: whether we are acquiring a priority - lock with high priority */ - const char* file_name, /*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - ulint index; /* index of the reserved wait cell */ - ulint i = 0; /* spin round count */ - sync_array_t* sync_arr; - size_t counter_index; - rw_lock_t* lock = (rw_lock_t *) _lock; - - /* We reuse the thread id to index into the counter, cache - it here for efficiency. */ - - counter_index = (size_t) os_thread_get_curr_id(); - - ut_ad(rw_lock_validate(lock)); - - rw_lock_stats.rw_s_spin_wait_count.add(counter_index, 1); -lock_loop: - - if (!rw_lock_higher_prio_waiters_exist(priority_lock, high_priority, - lock)) { - - /* Spin waiting for the writer field to become free */ - os_rmb; - HMT_low(); - while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, - srv_spin_wait_delay)); - } - - i++; - os_rmb; - } - - HMT_medium(); - if (i >= SYNC_SPIN_ROUNDS) { - os_thread_yield(); - } - - if (srv_print_latch_waits) { - fprintf(stderr, - "Thread " ULINTPF " spin wait rw-s-lock at %p" - " '%s' rnds " ULINTPF "\n", - os_thread_pf(os_thread_get_curr_id()), - (void*) lock, lock->lock_name, i); - } - } else { - - /* In case of higher priority waiters already present, perform - only this part of the spinning code path. */ - os_thread_yield(); - } - - /* We try once again to obtain the lock */ - if (!rw_lock_higher_prio_waiters_exist(priority_lock, high_priority, - lock) - && (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line))) { - rw_lock_stats.rw_s_spin_round_count.add(counter_index, i); - - return; /* Success */ - } else { - - prio_rw_lock_t* prio_rw_lock = NULL; - - if (i > 0 && i < SYNC_SPIN_ROUNDS) { - goto lock_loop; - } - - rw_lock_stats.rw_s_spin_round_count.add(counter_index, i); - - sync_arr = sync_array_get_and_reserve_cell(lock, - high_priority - ? PRIO_RW_LOCK_SHARED - : RW_LOCK_SHARED, - file_name, - line, &index); - - /* Set waiters before checking lock_word to ensure wake-up - signal is sent. This may lead to some unnecessary signals. */ - if (high_priority) { - - prio_rw_lock = reinterpret_cast<prio_rw_lock_t *> - (_lock); - os_atomic_increment_ulint( - &prio_rw_lock->high_priority_s_waiters, - 1); - } else { - - rw_lock_set_waiter_flag(lock); - } - - if (!rw_lock_higher_prio_waiters_exist(priority_lock, - high_priority, lock) - && (TRUE == rw_lock_s_lock_low(lock, pass, - file_name, line))) { - sync_array_free_cell(sync_arr, index); - if (prio_rw_lock) { - - os_atomic_decrement_ulint( - &prio_rw_lock->high_priority_s_waiters, - 1); - } - return; /* Success */ - } - - if (srv_print_latch_waits) { - fprintf(stderr, - "Thread " ULINTPF " OS wait rw-s-lock at %p" - " '%s'\n", - os_thread_pf(os_thread_get_curr_id()), - (void*) lock, lock->lock_name); - } - - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_lock_stats.rw_s_os_wait_count.add(counter_index, 1); - - sync_array_wait_event(sync_arr, index); - - if (prio_rw_lock) { - - os_atomic_decrement_ulint( - &prio_rw_lock->high_priority_s_waiters, - 1); - } - - i = 0; - goto lock_loop; - } -} - -/******************************************************************//** -This function is used in the insert buffer to move the ownership of an -x-latch on a buffer frame to the current thread. The x-latch was set by -the buffer read operation and it protected the buffer frame while the -read was done. The ownership is moved because we want that the current -thread is able to acquire a second x-latch which is stored in an mtr. -This, in turn, is needed to pass the debug checks of index page -operations. */ -UNIV_INTERN -void -rw_lock_x_lock_move_ownership( -/*==========================*/ - rw_lock_t* lock) /*!< in: lock which was x-locked in the - buffer read */ -{ - ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX)); - - rw_lock_set_writer_id_and_recursion_flag(lock, TRUE); -} - -/******************************************************************//** -Function for the next writer to call. Waits for readers to exit. -The caller must have already decremented lock_word by X_LOCK_DECR. */ -UNIV_INLINE -void -rw_lock_x_lock_wait( -/*================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - bool high_priority, - /*!< in: if true, the rw lock is a priority - lock and is being acquired with high - priority */ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ -#endif - const char* file_name,/*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - ulint index; - ulint i = 0; - sync_array_t* sync_arr; - size_t counter_index; - prio_rw_lock_t* prio_rw_lock = NULL; - - /* We reuse the thread id to index into the counter, cache - it here for efficiency. */ - - counter_index = (size_t) os_thread_get_curr_id(); - - os_rmb; - ut_ad(lock->lock_word <= 0); - - HMT_low(); - if (high_priority) { - - prio_rw_lock = reinterpret_cast<prio_rw_lock_t *>(lock); - prio_rw_lock->high_priority_wait_ex_waiter = 1; - } - - while (lock->lock_word < 0) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); - } - if(i < SYNC_SPIN_ROUNDS) { - i++; - os_rmb; - continue; - } - HMT_medium(); - - /* If there is still a reader, then go to sleep.*/ - rw_lock_stats.rw_x_spin_round_count.add(counter_index, i); - - sync_arr = sync_array_get_and_reserve_cell(lock, - RW_LOCK_WAIT_EX, - file_name, - line, &index); - - i = 0; - - /* Check lock_word to ensure wake-up isn't missed.*/ - if (lock->lock_word < 0) { - - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1); - - /* Add debug info as it is needed to detect possible - deadlock. We must add info for WAIT_EX thread for - deadlock detection to work properly. */ -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX, - file_name, line); -#endif - - if (srv_instrument_semaphores) { - lock->thread_id = os_thread_get_curr_id(); - lock->file_name = file_name; - lock->line = line; - } - - sync_array_wait_event(sync_arr, index); -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info( - lock, pass, RW_LOCK_WAIT_EX); -#endif - /* It is possible to wake when lock_word < 0. - We must pass the while-loop check to proceed.*/ - } else { - sync_array_free_cell(sync_arr, index); - } - HMT_low(); - } - HMT_medium(); - - if (prio_rw_lock) { - - prio_rw_lock->high_priority_wait_ex_waiter = 0; - } - - rw_lock_stats.rw_x_spin_round_count.add(counter_index, i); -} - -/******************************************************************//** -Low-level function for acquiring an exclusive lock. -@return FALSE if did not succeed, TRUE if success. */ -UNIV_INLINE -ibool -rw_lock_x_lock_low( -/*===============*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - bool high_priority, - /*!< in: if true, the rw lock is a priority - lock and is being acquired with high - priority */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - ibool local_recursive= lock->recursive; - - if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) { - - /* lock->recursive also tells us if the writer_thread - field is stale or active. As we are going to write - our own thread id in that field it must be that the - current writer_thread value is not active. */ - ut_a(!lock->recursive); - - /* Decrement occurred: we are writer or next-writer. */ - rw_lock_set_writer_id_and_recursion_flag( - lock, pass ? FALSE : TRUE); - - rw_lock_x_lock_wait(lock, high_priority, -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - file_name, line); - - } else { - os_thread_id_t thread_id = os_thread_get_curr_id(); - - /* Decrement failed: relock or failed lock - Note: recursive must be loaded before writer_thread see - comment for rw_lock_set_writer_id_and_recursion_flag(). - To achieve this we load it before rw_lock_lock_word_decr(), - which implies full memory barrier in current implementation. */ - if (!pass && local_recursive - && os_thread_eq(lock->writer_thread, thread_id)) { - /* Relock */ - if (lock->lock_word == 0) { - lock->lock_word -= X_LOCK_DECR; - } else { - --lock->lock_word; - } - - } else { - /* Another thread locked before us */ - return(FALSE); - } - } -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line); -#endif - lock->last_x_file_name = file_name; - lock->last_x_line = (unsigned int) line; - - if (srv_instrument_semaphores) { - lock->thread_id = os_thread_get_curr_id(); - lock->file_name = file_name; - lock->line = line; - } - - return(TRUE); -} - -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread. If the rw-lock is locked -in shared or exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the lock before suspending the thread. If the same thread has an x-lock -on the rw-lock, locking succeed, with the following exception: if pass != 0, -only a single x-lock may be taken on the lock. NOTE: If the same thread has -an s-lock, locking does not succeed! */ -UNIV_INTERN -void -rw_lock_x_lock_func( -/*================*/ - rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line, /*!< in: line where requested */ - bool priority_lock, - /*!< in: whether the lock is a priority lock */ - bool high_priority) - /*!< in: whether we are acquiring a priority - lock with high priority */ -{ - ulint i; /*!< spin round count */ - ulint index; /*!< index of the reserved wait cell */ - sync_array_t* sync_arr; - ibool spinning = FALSE; - size_t counter_index; - prio_rw_lock_t* prio_lock = NULL; - - /* We reuse the thread id to index into the counter, cache - it here for efficiency. */ - - counter_index = (size_t) os_thread_get_curr_id(); - - ut_ad(rw_lock_validate(lock)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - i = 0; - - ut_ad(priority_lock || !high_priority); - -lock_loop: - - if (!rw_lock_higher_prio_waiters_exist(priority_lock, high_priority, - lock) - && rw_lock_x_lock_low(lock, high_priority, pass, - file_name, line)) { - rw_lock_stats.rw_x_spin_round_count.add(counter_index, i); - - return; /* Locking succeeded */ - - } else if (!rw_lock_higher_prio_waiters_exist(priority_lock, - high_priority, lock)) { - - if (!spinning) { - spinning = TRUE; - - rw_lock_stats.rw_x_spin_wait_count.add( - counter_index, 1); - } - - /* Spin waiting for the lock_word to become free */ - os_rmb; - HMT_low(); - while (i < SYNC_SPIN_ROUNDS - && lock->lock_word <= 0) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, - srv_spin_wait_delay)); - } - - i++; - os_rmb; - } - HMT_medium(); - if (i >= SYNC_SPIN_ROUNDS) { - os_thread_yield(); - } else { - goto lock_loop; - } - } else { - - /* In case we skipped spinning because of higher-priority - waiters already waiting, perform only this bit of the spinning - code path. */ - os_thread_yield(); - } - - if (spinning) { - - rw_lock_stats.rw_x_spin_round_count.add(counter_index, i); - - if (srv_print_latch_waits) { - fprintf(stderr, - "Thread " ULINTPF " spin wait rw-x-lock at %p" - " '%s' rnds " ULINTPF "\n", - os_thread_pf(os_thread_get_curr_id()), - (void*) lock,lock->lock_name, i); - } - } - - sync_arr = sync_array_get_and_reserve_cell(lock, - high_priority - ? PRIO_RW_LOCK_EX - : RW_LOCK_EX, - file_name, line, &index); - - /* Waiters must be set before checking lock_word, to ensure signal - is sent. This could lead to a few unnecessary wake-up signals. */ - if (high_priority) { - - prio_lock = reinterpret_cast<prio_rw_lock_t *>(lock); - os_atomic_increment_ulint(&prio_lock->high_priority_x_waiters, - 1); - } else { - rw_lock_set_waiter_flag(lock); - } - - if (rw_lock_x_lock_low(lock, high_priority, pass, file_name, line)) { - sync_array_free_cell(sync_arr, index); - if (prio_lock) { - - os_atomic_decrement_ulint( - &prio_lock->high_priority_x_waiters, - 1); - } - return; /* Locking succeeded */ - } - - if (srv_print_latch_waits) { - fprintf(stderr, - "Thread " ULINTPF " OS wait for rw-x-lock at %p" - " '%s'\n", - os_thread_pf(os_thread_get_curr_id()), (void*) lock, - lock->lock_name); - } - - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1); - - sync_array_wait_event(sync_arr, index); - - if (prio_lock) { - - os_atomic_decrement_ulint(&prio_lock->high_priority_x_waiters, - 1); - } - - i = 0; - goto lock_loop; -} - -/******************************************************************//** -NOTE! Use the corresponding macro, not directly this function! Lock a priority -rw-lock in exclusive mode for the current thread. If the rw-lock is locked -in shared or exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the lock, before suspending the thread. If the same thread has an x-lock -on the rw-lock, locking succeed, with the following exception: if pass != 0, -only a single x-lock may be taken on the lock. NOTE: If the same thread has -an s-lock, locking does not succeed! */ -UNIV_INTERN -void -rw_lock_x_lock_func( -/*================*/ - prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */ - ulint pass, /*!< in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/*!< in: file name where lock requested */ - ulint line) /*!< in: line where requested */ -{ - rw_lock_x_lock_func(&lock->base_lock, pass, file_name, line, true, - srv_current_thread_priority > 0); -} - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Acquires the debug mutex. We cannot use the mutex defined in sync0sync, -because the debug mutex is also acquired in sync0arr while holding the OS -mutex protecting the sync array, and the ordinary mutex_enter might -recursively call routines in sync0arr, leading to a deadlock on the OS -mutex. */ -UNIV_INTERN -void -rw_lock_debug_mutex_enter(void) -/*===========================*/ -{ - os_fast_mutex_lock(&rw_lock_debug_mutex); -} - -/******************************************************************//** -Releases the debug mutex. */ -UNIV_INTERN -void -rw_lock_debug_mutex_exit(void) -/*==========================*/ -{ - os_fast_mutex_unlock(&rw_lock_debug_mutex); -} - -/******************************************************************//** -Inserts the debug information for an rw-lock. */ -UNIV_INTERN -void -rw_lock_add_debug_info( -/*===================*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint pass, /*!< in: pass value */ - ulint lock_type, /*!< in: lock type */ - const char* file_name, /*!< in: file where requested */ - ulint line) /*!< in: line where requested */ -{ - rw_lock_debug_t* info; - - ut_ad(lock); - ut_ad(file_name); - - info = rw_lock_debug_create(); - - rw_lock_debug_mutex_enter(); - - info->file_name = file_name; - info->line = line; - info->lock_type = lock_type; - info->thread_id = os_thread_get_curr_id(); - info->pass = pass; - - UT_LIST_ADD_FIRST(list, lock->debug_list, info); - - rw_lock_debug_mutex_exit(); - - if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) { - sync_thread_add_level(lock, lock->level, - lock_type == RW_LOCK_EX - && lock->lock_word < 0); - } -} - -/******************************************************************//** -Removes a debug information struct for an rw-lock. */ -UNIV_INTERN -void -rw_lock_remove_debug_info( -/*======================*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint pass, /*!< in: pass value */ - ulint lock_type) /*!< in: lock type */ -{ - rw_lock_debug_t* info; - - ut_ad(lock); - - if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) { - sync_thread_reset_level(lock); - } - - rw_lock_debug_mutex_enter(); - - info = UT_LIST_GET_FIRST(lock->debug_list); - - while (info != NULL) { - if ((pass == info->pass) - && ((pass != 0) - || os_thread_eq(info->thread_id, - os_thread_get_curr_id())) - && (info->lock_type == lock_type)) { - - /* Found! */ - UT_LIST_REMOVE(list, lock->debug_list, info); - rw_lock_debug_mutex_exit(); - - rw_lock_debug_free(info); - - return; - } - - info = UT_LIST_GET_NEXT(list, info); - } - - ut_error; -} -#endif /* UNIV_SYNC_DEBUG */ - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Checks if the thread has locked the rw-lock in the specified mode, with -the pass value == 0. -@return TRUE if locked */ -UNIV_INTERN -ibool -rw_lock_own( -/*========*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ -{ - rw_lock_debug_t* info; - - ut_ad(lock); - ut_ad(rw_lock_validate(lock)); - - rw_lock_debug_mutex_enter(); - - info = UT_LIST_GET_FIRST(lock->debug_list); - - while (info != NULL) { - - if (os_thread_eq(info->thread_id, os_thread_get_curr_id()) - && (info->pass == 0) - && (info->lock_type == lock_type)) { - - rw_lock_debug_mutex_exit(); - /* Found! */ - - return(TRUE); - } - - info = UT_LIST_GET_NEXT(list, info); - } - rw_lock_debug_mutex_exit(); - - return(FALSE); -} - -/******************************************************************//** -Checks if the thread has locked the priority rw-lock in the specified mode, -with the pass value == 0. */ -UNIV_INTERN -ibool -rw_lock_own( -/*========*/ - prio_rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ -{ - return(rw_lock_own(&lock->base_lock, lock_type)); -} - -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Checks if somebody has locked the rw-lock in the specified mode. -@return TRUE if locked */ -UNIV_INTERN -ibool -rw_lock_is_locked( -/*==============*/ - rw_lock_t* lock, /*!< in: rw-lock */ - ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ -{ - ibool ret = FALSE; - - ut_ad(lock); - ut_ad(rw_lock_validate(lock)); - - if (lock_type == RW_LOCK_SHARED) { - if (rw_lock_get_reader_count(lock) > 0) { - ret = TRUE; - } - } else if (lock_type == RW_LOCK_EX) { - if (rw_lock_get_writer(lock) == RW_LOCK_EX) { - ret = TRUE; - } - } else { - ut_error; - } - - return(ret); -} - -#ifdef UNIV_SYNC_DEBUG -/***************************************************************//** -Prints debug info of currently locked rw-locks. */ -UNIV_INTERN -void -rw_lock_list_print_info( -/*====================*/ - FILE* file) /*!< in: file where to print */ -{ - rw_lock_t* lock; - ulint count = 0; - rw_lock_debug_t* info; - - mutex_enter(&rw_lock_list_mutex); - - fputs("-------------\n" - "RW-LATCH INFO\n" - "-------------\n", file); - - lock = UT_LIST_GET_FIRST(rw_lock_list); - - while (lock != NULL) { - - count++; - -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - mutex_enter(&(lock->mutex)); -#endif - if (lock->lock_word != X_LOCK_DECR) { - - fprintf(file, "RW-LOCK: %p ", (void*) lock); - - if (rw_lock_get_waiters(lock)) { - fputs(" Waiters for the lock exist\n", file); - } else { - putc('\n', file); - } - - rw_lock_debug_mutex_enter(); - info = UT_LIST_GET_FIRST(lock->debug_list); - while (info != NULL) { - rw_lock_debug_print(file, info); - info = UT_LIST_GET_NEXT(list, info); - } - rw_lock_debug_mutex_exit(); - } -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - mutex_exit(&(lock->mutex)); -#endif - - lock = UT_LIST_GET_NEXT(list, lock); - } - - fprintf(file, "Total number of rw-locks %ld\n", count); - mutex_exit(&rw_lock_list_mutex); -} - -/***************************************************************//** -Prints debug info of an rw-lock. */ -UNIV_INTERN -void -rw_lock_print( -/*==========*/ - rw_lock_t* lock) /*!< in: rw-lock */ -{ - rw_lock_debug_t* info; - - fprintf(stderr, - "-------------\n" - "RW-LATCH INFO\n" - "RW-LATCH: %p ", (void*) lock); - -#ifndef INNODB_RW_LOCKS_USE_ATOMICS - /* We used to acquire lock->mutex here, but it would cause a - recursive call to sync_thread_add_level() if UNIV_SYNC_DEBUG - is defined. Since this function is only invoked from - sync_thread_levels_g(), let us choose the smaller evil: - performing dirty reads instead of causing bogus deadlocks or - assertion failures. */ -#endif - if (lock->lock_word != X_LOCK_DECR) { - - if (rw_lock_get_waiters(lock)) { - fputs(" Waiters for the lock exist\n", stderr); - } else { - putc('\n', stderr); - } - - rw_lock_debug_mutex_enter(); - info = UT_LIST_GET_FIRST(lock->debug_list); - while (info != NULL) { - rw_lock_debug_print(stderr, info); - info = UT_LIST_GET_NEXT(list, info); - } - rw_lock_debug_mutex_exit(); - } -} - -/*********************************************************************//** -Prints info of a debug struct. */ -UNIV_INTERN -void -rw_lock_debug_print( -/*================*/ - FILE* f, /*!< in: output stream */ - rw_lock_debug_t* info) /*!< in: debug struct */ -{ - ulint rwt; - - rwt = info->lock_type; - - fprintf(f, "Locked: thread %lu file %s line %lu ", - (ulong) os_thread_pf(info->thread_id), info->file_name, - (ulong) info->line); - if (rwt == RW_LOCK_SHARED) { - fputs("S-LOCK", f); - } else if (rwt == RW_LOCK_EX) { - fputs("X-LOCK", f); - } else if (rwt == RW_LOCK_WAIT_EX) { - fputs("WAIT X-LOCK", f); - } else { - ut_error; - } - if (info->pass != 0) { - fprintf(f, " pass value %lu", (ulong) info->pass); - } - putc('\n', f); -} - -/***************************************************************//** -Returns the number of currently locked rw-locks. Works only in the debug -version. -@return number of locked rw-locks */ -UNIV_INTERN -ulint -rw_lock_n_locked(void) -/*==================*/ -{ - rw_lock_t* lock; - ulint count = 0; - - mutex_enter(&rw_lock_list_mutex); - - lock = UT_LIST_GET_FIRST(rw_lock_list); - - while (lock != NULL) { - - if (lock->lock_word != X_LOCK_DECR) { - count++; - } - - lock = UT_LIST_GET_NEXT(list, lock); - } - - mutex_exit(&rw_lock_list_mutex); - - return(count); -} -#endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/xtradb/sync/sync0sync.cc b/storage/xtradb/sync/sync0sync.cc deleted file mode 100644 index 37ac3c56fff..00000000000 --- a/storage/xtradb/sync/sync0sync.cc +++ /dev/null @@ -1,1705 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2008, Google Inc. -Copyright (c) 2017, MariaDB Corporation. - -Portions of this file contain modifications contributed and copyrighted by -Google, Inc. Those modifications are gratefully acknowledged and are described -briefly in the InnoDB documentation. The contributions by Google are -incorporated with their permission, and subject to the conditions contained in -the file COPYING.Google. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file sync/sync0sync.cc -Mutex, the basic synchronization primitive - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#include "sync0sync.h" -#ifdef UNIV_NONINL -#include "sync0sync.ic" -#include "sync0arr.ic" -#endif - -#include "sync0rw.h" -#include "buf0buf.h" -#include "srv0srv.h" -#include "btr0types.h" -#include "buf0types.h" -#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */ -#ifdef UNIV_SYNC_DEBUG -# include "srv0start.h" /* srv_is_being_started */ -#endif /* UNIV_SYNC_DEBUG */ -#include "ha_prototypes.h" -#include "my_cpu.h" - -#include <vector> - -/* - REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX - ============================================ - -Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc -takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995 -Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to -implement our own efficient spin lock mutex. Future operating systems may -provide efficient spin locks, but we cannot count on that. - -Another reason for implementing a spin lock is that on multiprocessor systems -it can be more efficient for a processor to run a loop waiting for the -semaphore to be released than to switch to a different thread. A thread switch -takes 25 us on both platforms mentioned above. See Gray and Reuter's book -Transaction processing for background. - -How long should the spin loop last before suspending the thread? On a -uniprocessor, spinning does not help at all, because if the thread owning the -mutex is not executing, it cannot be released. Spinning actually wastes -resources. - -On a multiprocessor, we do not know if the thread owning the mutex is -executing or not. Thus it would make sense to spin as long as the operation -guarded by the mutex would typically last assuming that the thread is -executing. If the mutex is not released by that time, we may assume that the -thread owning the mutex is not executing and suspend the waiting thread. - -A typical operation (where no i/o involved) guarded by a mutex or a read-write -lock may last 1 - 20 us on the current Pentium platform. The longest -operations are the binary searches on an index node. - -We conclude that the best choice is to set the spin time at 20 us. Then the -system should work well on a multiprocessor. On a uniprocessor we have to -make sure that thread swithches due to mutex collisions are not frequent, -i.e., they do not happen every 100 us or so, because that wastes too much -resources. If the thread switches are not frequent, the 20 us wasted in spin -loop is not too much. - -Empirical studies on the effect of spin time should be done for different -platforms. - - - IMPLEMENTATION OF THE MUTEX - =========================== - -For background, see Curt Schimmel's book on Unix implementation on modern -architectures. The key points in the implementation are atomicity and -serialization of memory accesses. The test-and-set instruction (XCHG in -Pentium) must be atomic. As new processors may have weak memory models, also -serialization of memory references may be necessary. The successor of Pentium, -P6, has at least one mode where the memory model is weak. As far as we know, -in Pentium all memory accesses are serialized in the program order and we do -not have to worry about the memory model. On other processors there are -special machine instructions called a fence, memory barrier, or storage -barrier (STBAR in Sparc), which can be used to serialize the memory accesses -to happen in program order relative to the fence instruction. - -Leslie Lamport has devised a "bakery algorithm" to implement a mutex without -the atomic test-and-set, but his algorithm should be modified for weak memory -models. We do not use Lamport's algorithm, because we guess it is slower than -the atomic test-and-set. - -Our mutex implementation works as follows: After that we perform the atomic -test-and-set instruction on the memory word. If the test returns zero, we -know we got the lock first. If the test returns not zero, some other thread -was quicker and got the lock: then we spin in a loop reading the memory word, -waiting it to become zero. It is wise to just read the word in the loop, not -perform numerous test-and-set instructions, because they generate memory -traffic between the cache and the main memory. The read loop can just access -the cache, saving bus bandwidth. - -If we cannot acquire the mutex lock in the specified time, we reserve a cell -in the wait array, set the waiters byte in the mutex to 1. To avoid a race -condition, after setting the waiters byte and before suspending the waiting -thread, we still have to check that the mutex is reserved, because it may -have happened that the thread which was holding the mutex has just released -it and did not see the waiters byte set to 1, a case which would lead the -other thread to an infinite wait. - -LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some -====== -thread will eventually call os_event_set() on that particular event. -Thus no infinite wait is possible in this case. - -Proof: After making the reservation the thread sets the waiters field in the -mutex to 1. Then it checks that the mutex is still reserved by some thread, -or it reserves the mutex for itself. In any case, some thread (which may be -also some earlier thread, not necessarily the one currently holding the mutex) -will set the waiters field to 0 in mutex_exit, and then call -os_event_set() with the mutex as an argument. -Q.E.D. - -LEMMA 2: If an os_event_set() call is made after some thread has called -====== -the os_event_reset() and before it starts wait on that event, the call -will not be lost to the second thread. This is true even if there is an -intervening call to os_event_reset() by another thread. -Thus no infinite wait is possible in this case. - -Proof (non-windows platforms): os_event_reset() returns a monotonically -increasing value of signal_count. This value is increased at every -call of os_event_set() If thread A has called os_event_reset() followed -by thread B calling os_event_set() and then some other thread C calling -os_event_reset(), the is_set flag of the event will be set to FALSE; -but now if thread A calls os_event_wait_low() with the signal_count -value returned from the earlier call of os_event_reset(), it will -return immediately without waiting. -Q.E.D. - -Proof (windows): If there is a writer thread which is forced to wait for -the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX -The design of rw_lock ensures that there is one and only one thread -that is able to change the state to RW_LOCK_WAIT_EX and this thread is -guaranteed to acquire the lock after it is released by the current -holders and before any other waiter gets the lock. -On windows this thread waits on a separate event i.e.: wait_ex_event. -Since only one thread can wait on this event there is no chance -of this event getting reset before the writer starts wait on it. -Therefore, this thread is guaranteed to catch the os_set_event() -signalled unconditionally at the release of the lock. -Q.E.D. */ - -/* Number of spin waits on mutexes: for performance monitoring */ - -/** The number of iterations in the mutex_spin_wait() spin loop. -Intended for performance monitoring. */ -UNIV_INTERN ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count; -/** The number of mutex_spin_wait() calls. Intended for -performance monitoring. */ -UNIV_INTERN ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count; -/** The number of OS waits in mutex_spin_wait(). Intended for -performance monitoring. */ -UNIV_INTERN ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count; -/** The number of mutex_exit() calls. Intended for performance -monitoring. */ -UNIV_INTERN ib_int64_t mutex_exit_count; - -/** This variable is set to TRUE when sync_init is called */ -UNIV_INTERN ibool sync_initialized = FALSE; - -#ifdef UNIV_SYNC_DEBUG -/** An acquired mutex or rw-lock and its level in the latching order */ -struct sync_level_t; -/** Mutexes or rw-locks held by a thread */ -struct sync_thread_t; - -/** The latch levels currently owned by threads are stored in this data -structure; the size of this array is OS_THREAD_MAX_N */ - -UNIV_INTERN sync_thread_t* sync_thread_level_arrays; - -/** Mutex protecting sync_thread_level_arrays */ -UNIV_INTERN ib_mutex_t sync_thread_mutex; - -# ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key; -# endif /* UNIV_PFS_MUTEX */ -#endif /* UNIV_SYNC_DEBUG */ - -/** Global list of database mutexes (not OS mutexes) created. */ -UNIV_INTERN ut_list_base_node_t mutex_list; - -/** Mutex protecting the mutex_list variable */ -UNIV_INTERN ib_mutex_t mutex_list_mutex; - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t mutex_list_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#ifdef UNIV_SYNC_DEBUG -/** Latching order checks start when this is set TRUE */ -UNIV_INTERN ibool sync_order_checks_on = FALSE; - -/** Array for tracking sync levels per thread. */ -typedef std::vector<sync_level_t> sync_arr_t; - - -/** Mutexes or rw-locks held by a thread */ -struct sync_thread_t{ - os_thread_id_t id; /*!< OS thread id */ - sync_arr_t* levels; /*!< level array for this thread; if - this is NULL this slot is unused */ -}; - -/** An acquired mutex or rw-lock and its level in the latching order */ -struct sync_level_t{ - void* latch; /*!< pointer to a mutex or an - rw-lock; NULL means that - the slot is empty */ - ulint level; /*!< level of the latch in the - latching order. This field is - overloaded to serve as a node in a - linked list of free nodes too. When - latch == NULL then this will contain - the ordinal value of the next free - element */ -}; -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Creates, or rather, initializes a mutex object in a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ -UNIV_INTERN -void -mutex_create_func( -/*==============*/ - ib_mutex_t* mutex, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where created */ - ulint cline, /*!< in: file line where created */ - const char* cmutex_name) /*!< in: mutex name */ -{ -#if defined(HAVE_ATOMIC_BUILTINS) - mutex_reset_lock_word(mutex); -#else - os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex); - mutex->lock_word = 0; -#endif - os_event_create(&mutex->event); - mutex_set_waiters(mutex, 0); -#ifdef UNIV_DEBUG - mutex->magic_n = MUTEX_MAGIC_N; - mutex->level = level; -#endif /* UNIV_DEBUG */ - - mutex->line = 0; - mutex->file_name = "not yet reserved"; - mutex->cfile_name = cfile_name; - mutex->cline = cline; - mutex->count_os_wait = 0; - mutex->cmutex_name= cmutex_name; - - /* Check that lock_word is aligned; this is important on Intel */ - ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0); - - /* NOTE! The very first mutexes are not put to the mutex list */ - - if (mutex == &mutex_list_mutex -#ifdef UNIV_SYNC_DEBUG - || mutex == &sync_thread_mutex -#endif /* UNIV_SYNC_DEBUG */ - ) { - - return; - } - - mutex_enter(&mutex_list_mutex); - - ut_ad(UT_LIST_GET_LEN(mutex_list) == 0 - || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N); - - UT_LIST_ADD_FIRST(list, mutex_list, mutex); - - mutex_exit(&mutex_list_mutex); -} - -/******************************************************************//** -Creates, or rather, initializes a priority mutex object in a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ -UNIV_INTERN -void -mutex_create_func( -/*==============*/ - ib_prio_mutex_t* mutex, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /*!< in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /*!< in: file name where - created */ - ulint cline, /*!< in: file line where - created */ - const char* cmutex_name) /*!< in: mutex name */ -{ - mutex_create_func(&mutex->base_mutex, -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - level, -#endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - cfile_name, - cline, - cmutex_name); - mutex->high_priority_waiters = 0; - os_event_create(&mutex->high_priority_event); -} - -/******************************************************************//** -NOTE! Use the corresponding macro mutex_free(), not directly this function! -Calling this function is obligatory only if the memory buffer containing -the mutex is freed. Removes a mutex object from the mutex list. The mutex -is checked to be in the reset state. */ -UNIV_INTERN -void -mutex_free_func( -/*============*/ - ib_mutex_t* mutex) /*!< in: mutex */ -{ - ut_ad(mutex_validate(mutex)); - ut_a(mutex_get_lock_word(mutex) == 0); - ut_a(mutex_get_waiters(mutex) == 0); - -#ifdef UNIV_MEM_DEBUG - if (mutex == &mem_hash_mutex) { - ut_ad(UT_LIST_GET_LEN(mutex_list) == 1); - ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex); - UT_LIST_REMOVE(list, mutex_list, mutex); - goto func_exit; - } -#endif /* UNIV_MEM_DEBUG */ - - if (mutex != &mutex_list_mutex -#ifdef UNIV_SYNC_DEBUG - && mutex != &sync_thread_mutex -#endif /* UNIV_SYNC_DEBUG */ - ) { - - mutex_enter(&mutex_list_mutex); - - ut_ad(!UT_LIST_GET_PREV(list, mutex) - || UT_LIST_GET_PREV(list, mutex)->magic_n - == MUTEX_MAGIC_N); - ut_ad(!UT_LIST_GET_NEXT(list, mutex) - || UT_LIST_GET_NEXT(list, mutex)->magic_n - == MUTEX_MAGIC_N); - - UT_LIST_REMOVE(list, mutex_list, mutex); - - mutex_exit(&mutex_list_mutex); - } - - os_event_free(&mutex->event, false); -#ifdef UNIV_MEM_DEBUG -func_exit: -#endif /* UNIV_MEM_DEBUG */ -#if !defined(HAVE_ATOMIC_BUILTINS) - os_fast_mutex_free(&(mutex->os_fast_mutex)); -#endif - /* If we free the mutex protecting the mutex list (freeing is - not necessary), we have to reset the magic number AFTER removing - it from the list. */ -#ifdef UNIV_DEBUG - mutex->magic_n = 0; -#endif /* UNIV_DEBUG */ - return; -} - -/******************************************************************//** -NOTE! Use the corresponding macro mutex_free(), not directly this function! -Calling this function is obligatory only if the memory buffer containing -the mutex is freed. Removes a priority mutex object from the mutex list. The -mutex is checked to be in the reset state. */ -UNIV_INTERN -void -mutex_free_func( -/*============*/ - ib_prio_mutex_t* mutex) /*!< in: mutex */ -{ - ut_a(mutex->high_priority_waiters == 0); - os_event_free(&mutex->high_priority_event, false); - mutex_free_func(&mutex->base_mutex); -} - -/********************************************************************//** -NOTE! Use the corresponding macro in the header file, not this function -directly. Tries to lock the mutex for the current thread. If the lock is not -acquired immediately, returns with return value 1. -@return 0 if succeed, 1 if not */ -UNIV_INTERN -ulint -mutex_enter_nowait_func( -/*====================*/ - ib_mutex_t* mutex, /*!< in: pointer to mutex */ - const char* file_name MY_ATTRIBUTE((unused)), - /*!< in: file name where mutex - requested */ - ulint line MY_ATTRIBUTE((unused))) - /*!< in: line where requested */ -{ - ut_ad(mutex_validate(mutex)); - - if (!ib_mutex_test_and_set(mutex)) { - - mutex->thread_id = os_thread_get_curr_id(); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - if (srv_instrument_semaphores) { - mutex->file_name = file_name; - mutex->line = line; - } - - return(0); /* Succeeded! */ - } - - return(1); -} - -#ifdef UNIV_DEBUG -/******************************************************************//** -Checks that the mutex has been initialized. -@return TRUE */ -UNIV_INTERN -ibool -mutex_validate( -/*===========*/ - const ib_mutex_t* mutex) /*!< in: mutex */ -{ - ut_a(mutex); - - if (mutex->magic_n != MUTEX_MAGIC_N) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Mutex %p not initialized file %s line %lu.", - mutex, mutex->cfile_name, mutex->cline); - } - ut_ad(mutex->magic_n == MUTEX_MAGIC_N); - - return(TRUE); -} - -/******************************************************************//** -Checks that the current thread owns the mutex. Works only in the debug -version. -@return TRUE if owns */ -UNIV_INTERN -ibool -mutex_own( -/*======*/ - const ib_mutex_t* mutex) /*!< in: mutex */ -{ - ut_ad(mutex_validate(mutex)); - - return(mutex_get_lock_word(mutex) == 1 - && os_thread_eq(mutex->thread_id, os_thread_get_curr_id())); -} - -/******************************************************************//** -Checks that the current thread owns the priority mutex. Works only -in the debug version. -@return TRUE if owns */ -UNIV_INTERN -ibool -mutex_own( -/*======*/ - const ib_prio_mutex_t* mutex) /*!< in: priority mutex */ -{ - return mutex_own(&mutex->base_mutex); -} - -#endif /* UNIV_DEBUG */ - -/******************************************************************//** -Sets the waiters field in a mutex. */ -UNIV_INTERN -void -mutex_set_waiters( -/*==============*/ - ib_mutex_t* mutex, /*!< in: mutex */ - ulint n) /*!< in: value to set */ -{ - volatile ulint* ptr; /* declared volatile to ensure that - the value is stored to memory */ - ut_ad(mutex); - - ptr = &(mutex->waiters); - - *ptr = n; /* Here we assume that the write of a single - word in memory is atomic */ -} - -/******************************************************************//** -Reserves a mutex or a priority mutex for the current thread. If the mutex is -reserved, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), -waiting for the mutex before suspending the thread. */ -UNIV_INTERN -void -mutex_spin_wait( -/*============*/ - void* _mutex, /*!< in: pointer to mutex */ - bool high_priority, /*!< in: whether the mutex is a - priority mutex with high priority - specified */ - const char* file_name, /*!< in: file name where mutex - requested */ - ulint line) /*!< in: line where requested */ -{ - ulint i; /* spin round count */ - ulint index; /* index of the reserved wait cell */ - sync_array_t* sync_arr; - size_t counter_index; - /* The typecast below is performed for some of the priority mutexes - too, when !high_priority. This exploits the fact that regular mutex is - a prefix of the priority mutex in memory. */ - ib_mutex_t* mutex = (ib_mutex_t *) _mutex; - ib_prio_mutex_t* prio_mutex = NULL; - - counter_index = (size_t) os_thread_get_curr_id(); - - ut_ad(mutex); - - /* This update is not thread safe, but we don't mind if the count - isn't exact. Moved out of ifdef that follows because we are willing - to sacrifice the cost of counting this as the data is valuable. - Count the number of calls to mutex_spin_wait. */ - mutex_spin_wait_count.add(counter_index, 1); - -mutex_loop: - - i = 0; - - /* Spin waiting for the lock word to become zero. Note that we do - not have to assume that the read access to the lock word is atomic, - as the actual locking is always committed with atomic test-and-set. - In reality, however, all processors probably have an atomic read of - a memory word. */ - -spin_loop: - - HMT_low(); - os_rmb; - while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); - } - i++; - } - HMT_medium(); - - if (i >= SYNC_SPIN_ROUNDS) { - os_thread_yield(); - } - - mutex_spin_round_count.add(counter_index, i); - - if (ib_mutex_test_and_set(mutex) == 0) { - /* Succeeded! */ - - mutex->thread_id = os_thread_get_curr_id(); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - if (srv_instrument_semaphores) { - mutex->file_name = file_name; - mutex->line = line; - } - - return; - } - - /* We may end up with a situation where lock_word is 0 but the OS - fast mutex is still reserved. On FreeBSD the OS does not seem to - schedule a thread which is constantly calling pthread_mutex_trylock - (in ib_mutex_test_and_set implementation). Then we could end up - spinning here indefinitely. The following 'i++' stops this infinite - spin. */ - - i++; - - if (i < SYNC_SPIN_ROUNDS) { - goto spin_loop; - } - - sync_arr = sync_array_get_and_reserve_cell(mutex, - high_priority - ? SYNC_PRIO_MUTEX - : SYNC_MUTEX, - file_name, line, &index); - - /* The memory order of the array reservation and the change in the - waiters field is important: when we suspend a thread, we first - reserve the cell and then set waiters field to 1. When threads are - released in mutex_exit, the waiters field is first set to zero and - then the event is set to the signaled state. */ - - if (high_priority) { - - prio_mutex = reinterpret_cast<ib_prio_mutex_t *>(_mutex); - os_atomic_increment_ulint(&prio_mutex->high_priority_waiters, - 1); - } else { - - mutex_set_waiters(mutex, 1); - } - - /* Make sure waiters store won't pass over mutex_test_and_set */ -#ifdef __powerpc__ - os_mb; -#endif - - /* Try to reserve still a few times */ - for (i = 0; i < 4; i++) { - if (ib_mutex_test_and_set(mutex) == 0) { - /* Succeeded! Free the reserved wait cell */ - - sync_array_free_cell(sync_arr, index); - - mutex->thread_id = os_thread_get_curr_id(); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - if (srv_instrument_semaphores) { - mutex->file_name = file_name; - mutex->line = line; - } - - if (prio_mutex) { - os_atomic_decrement_ulint( - &prio_mutex->high_priority_waiters, - 1); - } - return; - - /* Note that in this case we leave the waiters field - set to 1. We cannot reset it to zero, as we do not - know if there are other waiters. */ - } - } - - /* Now we know that there has been some thread holding the mutex - after the change in the wait array and the waiters field was made. - Now there is no risk of infinite wait on the event. */ - - mutex_os_wait_count.add(counter_index, 1); - - mutex->count_os_wait++; - - sync_array_wait_event(sync_arr, index); - - if (prio_mutex) { - - os_atomic_decrement_ulint(&prio_mutex->high_priority_waiters, - 1); - } - - goto mutex_loop; -} - -/******************************************************************//** -Releases the threads waiting in the primary wait array for this mutex. */ -UNIV_INTERN -void -mutex_signal_object( -/*================*/ - ib_mutex_t* mutex) /*!< in: mutex */ -{ - mutex_set_waiters(mutex, 0); - - /* The memory order of resetting the waiters field and - signaling the object is important. See LEMMA 1 above. */ - os_event_set(&mutex->event); - sync_array_object_signalled(); -} - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Sets the debug information for a reserved mutex. */ -UNIV_INTERN -void -mutex_set_debug_info( -/*=================*/ - ib_mutex_t* mutex, /*!< in: mutex */ - const char* file_name, /*!< in: file where requested */ - ulint line) /*!< in: line where requested */ -{ - ut_ad(mutex); - ut_ad(file_name); - - sync_thread_add_level(mutex, mutex->level, FALSE); - - mutex->file_name = file_name; - mutex->line = line; -} - -/******************************************************************//** -Gets the debug information for a reserved mutex. */ -UNIV_INTERN -void -mutex_get_debug_info( -/*=================*/ - ib_mutex_t* mutex, /*!< in: mutex */ - const char** file_name, /*!< out: file where requested */ - ulint* line, /*!< out: line where requested */ - os_thread_id_t* thread_id) /*!< out: id of the thread which owns - the mutex */ -{ - ut_ad(mutex); - - *file_name = mutex->file_name; - *line = mutex->line; - *thread_id = mutex->thread_id; -} - -/******************************************************************//** -Prints debug info of currently reserved mutexes. */ -static -void -mutex_list_print_info( -/*==================*/ - FILE* file) /*!< in: file where to print */ -{ - ib_mutex_t* mutex; - const char* file_name; - ulint line; - os_thread_id_t thread_id; - ulint count = 0; - - fputs("----------\n" - "MUTEX INFO\n" - "----------\n", file); - - mutex_enter(&mutex_list_mutex); - - mutex = UT_LIST_GET_FIRST(mutex_list); - - while (mutex != NULL) { - count++; - - if (mutex_get_lock_word(mutex) != 0) { - mutex_get_debug_info(mutex, &file_name, &line, - &thread_id); - fprintf(file, - "Locked mutex: addr %p thread %ld" - " file %s line %ld\n", - (void*) mutex, os_thread_pf(thread_id), - file_name, line); - } - - mutex = UT_LIST_GET_NEXT(list, mutex); - } - - fprintf(file, "Total number of mutexes %ld\n", count); - - mutex_exit(&mutex_list_mutex); -} - -/******************************************************************//** -Counts currently reserved mutexes. Works only in the debug version. -@return number of reserved mutexes */ -UNIV_INTERN -ulint -mutex_n_reserved(void) -/*==================*/ -{ - ib_mutex_t* mutex; - ulint count = 0; - - mutex_enter(&mutex_list_mutex); - - for (mutex = UT_LIST_GET_FIRST(mutex_list); - mutex != NULL; - mutex = UT_LIST_GET_NEXT(list, mutex)) { - - if (mutex_get_lock_word(mutex) != 0) { - - count++; - } - } - - mutex_exit(&mutex_list_mutex); - - ut_a(count >= 1); - - /* Subtract one, because this function itself was holding - one mutex (mutex_list_mutex) */ - - return(count - 1); -} - -/******************************************************************//** -Returns TRUE if no mutex or rw-lock is currently locked. Works only in -the debug version. -@return TRUE if no mutexes and rw-locks reserved */ -UNIV_INTERN -ibool -sync_all_freed(void) -/*================*/ -{ - return(mutex_n_reserved() + rw_lock_n_locked() == 0); -} - -/******************************************************************//** -Looks for the thread slot for the calling thread. -@return pointer to thread slot, NULL if not found */ -static -sync_thread_t* -sync_thread_level_arrays_find_slot(void) -/*====================================*/ - -{ - ulint i; - os_thread_id_t id; - - id = os_thread_get_curr_id(); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - sync_thread_t* slot; - - slot = &sync_thread_level_arrays[i]; - - if (slot->levels && os_thread_eq(slot->id, id)) { - - return(slot); - } - } - - return(NULL); -} - -/******************************************************************//** -Looks for an unused thread slot. -@return pointer to thread slot */ -static -sync_thread_t* -sync_thread_level_arrays_find_free(void) -/*====================================*/ - -{ - ulint i; - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - sync_thread_t* slot; - - slot = &sync_thread_level_arrays[i]; - - if (slot->levels == NULL) { - - return(slot); - } - } - - return(NULL); -} - -/******************************************************************//** -Print warning. */ -static -void -sync_print_warning( -/*===============*/ - const sync_level_t* slot) /*!< in: slot for which to - print warning */ -{ - ib_mutex_t* mutex; - - mutex = static_cast<ib_mutex_t*>(slot->latch); - - if (mutex->magic_n == MUTEX_MAGIC_N) { - fprintf(stderr, - "Mutex '%s'\n", - mutex->cmutex_name); - - if (mutex_get_lock_word(mutex) != 0) { - ulint line; - const char* file_name; - os_thread_id_t thread_id; - - mutex_get_debug_info( - mutex, &file_name, &line, &thread_id); - - fprintf(stderr, - "InnoDB: Locked mutex:" - " addr %p thread %ld file %s line %ld\n", - (void*) mutex, os_thread_pf(thread_id), - file_name, (ulong) line); - } else { - fputs("Not locked\n", stderr); - } - } else { - rw_lock_t* lock; - - lock = static_cast<rw_lock_t*>(slot->latch); - - rw_lock_print(lock); - } -} - -/******************************************************************//** -Checks if all the level values stored in the level array are greater than -the given limit. -@return TRUE if all greater */ -static -ibool -sync_thread_levels_g( -/*=================*/ - sync_arr_t* arr, /*!< in: pointer to level array for an OS - thread */ - ulint limit, /*!< in: level limit */ - ulint warn) /*!< in: TRUE=display a diagnostic message */ -{ - ulint i; - - for (i = 0; i < arr->size(); i++) { - const sync_level_t* slot; - - slot = (const sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL && slot->level <= limit) { - if (warn) { - fprintf(stderr, - "InnoDB: sync levels should be" - " > %lu but a level is %lu\n", - (ulong) limit, (ulong) slot->level); - - sync_print_warning(slot); - } - - return(FALSE); - } - } - - return(TRUE); -} - -/******************************************************************//** -Checks if the level value is stored in the level array. -@return slot if found or NULL */ -static -const sync_level_t* -sync_thread_levels_contain( -/*=======================*/ - sync_arr_t* arr, /*!< in: pointer to level array for an OS - thread */ - ulint level) /*!< in: level */ -{ - ulint i; - - for (i = 0; i < arr->size(); i++) { - const sync_level_t* slot; - - slot = (const sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL && slot->level == level) { - - return(slot); - } - } - - return(NULL); -} - -/******************************************************************//** -Checks if the level array for the current thread contains a -mutex or rw-latch at the specified level. -@return a matching latch, or NULL if not found */ -UNIV_INTERN -void* -sync_thread_levels_contains( -/*========================*/ - ulint level) /*!< in: latching order level - (SYNC_DICT, ...)*/ -{ - ulint i; - sync_arr_t* arr; - sync_thread_t* thread_slot; - - if (!sync_order_checks_on) { - - return(NULL); - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - mutex_exit(&sync_thread_mutex); - - return(NULL); - } - - arr = thread_slot->levels; - - for (i = 0; i < arr->size(); i++) { - sync_level_t* slot; - - slot = (sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL && slot->level == level) { - - mutex_exit(&sync_thread_mutex); - return(slot->latch); - } - } - - mutex_exit(&sync_thread_mutex); - - return(NULL); -} - -/******************************************************************//** -Checks that the level array for the current thread is empty. -@return a latch, or NULL if empty except the exceptions specified below */ -UNIV_INTERN -void* -sync_thread_levels_nonempty_gen( -/*============================*/ - ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is - allowed to be owned by the thread */ -{ - ulint i; - sync_arr_t* arr; - sync_thread_t* thread_slot; - - if (!sync_order_checks_on) { - - return(NULL); - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - mutex_exit(&sync_thread_mutex); - - return(NULL); - } - - arr = thread_slot->levels; - - for (i = 0; i < arr->size(); ++i) { - const sync_level_t* slot; - - slot = (const sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL - && (!dict_mutex_allowed - || (slot->level != SYNC_DICT - && slot->level != SYNC_DICT_OPERATION - && slot->level != SYNC_FTS_CACHE))) { - - mutex_exit(&sync_thread_mutex); - ut_error; - - return(slot->latch); - } - } - - mutex_exit(&sync_thread_mutex); - - return(NULL); -} - -/******************************************************************//** -Checks if the level array for the current thread is empty, -except for the btr_search_latch. -@return a latch, or NULL if empty except the exceptions specified below */ -UNIV_INTERN -void* -sync_thread_levels_nonempty_trx( -/*============================*/ - ibool has_search_latch) - /*!< in: TRUE if and only if the thread - is supposed to hold btr_search_latch */ -{ - ulint i; - sync_arr_t* arr; - sync_thread_t* thread_slot; - - if (!sync_order_checks_on) { - - return(NULL); - } - - ut_a(!has_search_latch - || sync_thread_levels_contains(SYNC_SEARCH_SYS)); - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - mutex_exit(&sync_thread_mutex); - - return(NULL); - } - - arr = thread_slot->levels; - - for (i = 0; i < arr->size(); ++i) { - const sync_level_t* slot; - - slot = (const sync_level_t*)&(arr->at(i)); - - if (slot->latch != NULL - && (!has_search_latch - || slot->level != SYNC_SEARCH_SYS)) { - - mutex_exit(&sync_thread_mutex); - ut_error; - - return(slot->latch); - } - } - - mutex_exit(&sync_thread_mutex); - - return(NULL); -} - -/******************************************************************//** -Adds a latch and its level in the thread level array. Allocates the memory -for the array if called first time for this OS thread. Makes the checks -against other latch levels stored in the array for this thread. */ -UNIV_INTERN -void -sync_thread_add_level( -/*==================*/ - void* latch, /*!< in: pointer to a mutex or an rw-lock */ - ulint level, /*!< in: level in the latching order; if - SYNC_LEVEL_VARYING, nothing is done */ - ibool relock) /*!< in: TRUE if re-entering an x-lock */ -{ - sync_arr_t* array; - sync_thread_t* thread_slot; - sync_level_t sync_level; - - if (!sync_order_checks_on) { - - return; - } - - if ((latch == (void*) &sync_thread_mutex) - || (latch == (void*) &mutex_list_mutex) - || (latch == (void*) &rw_lock_debug_mutex) - || (latch == (void*) &rw_lock_list_mutex)) { - - return; - } - - if (level == SYNC_LEVEL_VARYING) { - - return; - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - /* We have to allocate the level array for a new thread */ - array = new sync_arr_t(); - ut_a(array != NULL); - thread_slot = sync_thread_level_arrays_find_free(); - thread_slot->levels = array; - thread_slot->id = os_thread_get_curr_id(); - } - - array = thread_slot->levels; - - if (relock) { - goto levels_ok; - } - - /* NOTE that there is a problem with _NODE and _LEAF levels: if the - B-tree height changes, then a leaf can change to an internal node - or the other way around. We do not know at present if this can cause - unnecessary assertion failures below. */ - - switch (level) { - case SYNC_NO_ORDER_CHECK: - case SYNC_EXTERN_STORAGE: - case SYNC_TREE_NODE_FROM_HASH: - /* Do no order checking */ - break; - case SYNC_TRX_SYS_HEADER: - if (srv_is_being_started) { - /* This is violated during trx_sys_create_rsegs() - when creating additional rollback segments when - upgrading in innobase_start_or_create_for_mysql(). */ - break; - } - /* fall through */ - case SYNC_MEM_POOL: - case SYNC_MEM_HASH: - case SYNC_RECV: - case SYNC_FTS_BG_THREADS: - case SYNC_WORK_QUEUE: - case SYNC_FTS_TOKENIZE: - case SYNC_FTS_OPTIMIZE: - case SYNC_FTS_CACHE: - case SYNC_FTS_CACHE_INIT: - case SYNC_LOG_ONLINE: - case SYNC_LOG: - case SYNC_LOG_FLUSH_ORDER: - case SYNC_ANY_LATCH: - case SYNC_FILE_FORMAT_TAG: - case SYNC_DOUBLEWRITE: - case SYNC_THREADS: - case SYNC_LOCK_SYS: - case SYNC_LOCK_WAIT_SYS: - case SYNC_TRX_SYS: - case SYNC_IBUF_BITMAP_MUTEX: - case SYNC_RSEG: - case SYNC_TRX_UNDO: - case SYNC_PURGE_LATCH: - case SYNC_PURGE_QUEUE: - case SYNC_DICT_AUTOINC_MUTEX: - case SYNC_DICT_OPERATION: - case SYNC_DICT_HEADER: - case SYNC_TRX_I_S_RWLOCK: - case SYNC_TRX_I_S_LAST_READ: - case SYNC_IBUF_MUTEX: - case SYNC_INDEX_ONLINE_LOG: - case SYNC_STATS_AUTO_RECALC: - case SYNC_STATS_DEFRAG: - if (!sync_thread_levels_g(array, level, TRUE)) { - fprintf(stderr, - "InnoDB: sync_thread_levels_g(array, %lu)" - " does not hold!\n", level); - ut_error; - } - break; - case SYNC_TRX: - /* Either the thread must own the lock_sys->mutex, or - it is allowed to own only ONE trx->mutex. */ - if (!sync_thread_levels_g(array, level, FALSE)) { - ut_a(sync_thread_levels_g(array, level - 1, TRUE)); - ut_a(sync_thread_levels_contain(array, SYNC_LOCK_SYS)); - } - break; - case SYNC_SEARCH_SYS: { - /* Verify the lock order inside the split btr_search_latch - array */ - bool found_current = false; - for (ulint i = 0; i < btr_search_index_num; i++) { - if (&btr_search_latch_arr[i] == latch) { - found_current = true; - } else if (found_current) { - ut_ad(!rw_lock_own(&btr_search_latch_arr[i], - RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(&btr_search_latch_arr[i], - RW_LOCK_EX)); - } - } - ut_ad(found_current); - } - - /* fall through */ - case SYNC_BUF_FLUSH_LIST: - case SYNC_BUF_LRU_LIST: - case SYNC_BUF_FREE_LIST: - case SYNC_BUF_ZIP_FREE: - case SYNC_BUF_ZIP_HASH: - case SYNC_BUF_FLUSH_STATE: - /* We can have multiple mutexes of this type therefore we - can only check whether the greater than condition holds. */ - if (!sync_thread_levels_g(array, level-1, TRUE)) { - fprintf(stderr, - "InnoDB: sync_thread_levels_g(array, %lu)" - " does not hold!\n", level-1); - ut_error; - } - break; - - - case SYNC_BUF_PAGE_HASH: - /* Multiple page_hash locks are only allowed during - buf_validate. */ - /* Fall through */ - - case SYNC_BUF_BLOCK: - if (!sync_thread_levels_g(array, level, FALSE)) { - ut_a(sync_thread_levels_g(array, level - 1, TRUE)); - } - break; - case SYNC_REC_LOCK: - if (sync_thread_levels_contain(array, SYNC_LOCK_SYS)) { - ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1, - TRUE)); - } else { - ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE)); - } - break; - case SYNC_IBUF_BITMAP: - /* Either the thread must own the master mutex to all - the bitmap pages, or it is allowed to latch only ONE - bitmap page. */ - if (sync_thread_levels_contain(array, - SYNC_IBUF_BITMAP_MUTEX)) { - ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1, - TRUE)); - } else { - /* This is violated during trx_sys_create_rsegs() - when creating additional rollback segments when - upgrading in innobase_start_or_create_for_mysql(). */ - ut_a(srv_is_being_started - || sync_thread_levels_g(array, SYNC_IBUF_BITMAP, - TRUE)); - } - break; - case SYNC_FSP_PAGE: - ut_a(sync_thread_levels_contain(array, SYNC_FSP)); - break; - case SYNC_FSP: - ut_a(sync_thread_levels_contain(array, SYNC_FSP) - || sync_thread_levels_g(array, SYNC_FSP, TRUE)); - break; - case SYNC_TRX_UNDO_PAGE: - /* Purge is allowed to read in as many UNDO pages as it likes, - there was a bogus rule here earlier that forced the caller to - acquire the trx_purge_t::mutex. The purge mutex did not really - protect anything because it was only ever acquired by the - single purge thread. The purge thread can read the UNDO pages - without any covering mutex. */ - - ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO) - || sync_thread_levels_contain(array, SYNC_RSEG) - || sync_thread_levels_g(array, level - 1, TRUE)); - break; - case SYNC_RSEG_HEADER: - ut_a(sync_thread_levels_contain(array, SYNC_RSEG)); - break; - case SYNC_RSEG_HEADER_NEW: - ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)); - break; - case SYNC_TREE_NODE: - ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE) - || sync_thread_levels_contain(array, SYNC_DICT_OPERATION) - || sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE)); - break; - case SYNC_TREE_NODE_NEW: - ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)); - break; - case SYNC_INDEX_TREE: - ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE)); - break; - case SYNC_IBUF_TREE_NODE: - ut_a(sync_thread_levels_contain(array, SYNC_IBUF_INDEX_TREE) - || sync_thread_levels_g(array, SYNC_IBUF_TREE_NODE - 1, - TRUE)); - break; - case SYNC_IBUF_TREE_NODE_NEW: - /* ibuf_add_free_page() allocates new pages for the - change buffer while only holding the tablespace - x-latch. These pre-allocated new pages may only be - taken in use while holding ibuf_mutex, in - btr_page_alloc_for_ibuf(). */ - ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX) - || sync_thread_levels_contain(array, SYNC_FSP)); - break; - case SYNC_IBUF_INDEX_TREE: - if (sync_thread_levels_contain(array, SYNC_FSP)) { - ut_a(sync_thread_levels_g(array, level - 1, TRUE)); - } else { - ut_a(sync_thread_levels_g( - array, SYNC_IBUF_TREE_NODE - 1, TRUE)); - } - break; - case SYNC_IBUF_PESS_INSERT_MUTEX: - ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE)); - ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)); - break; - case SYNC_IBUF_HEADER: - ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE)); - ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)); - ut_a(!sync_thread_levels_contain(array, - SYNC_IBUF_PESS_INSERT_MUTEX)); - break; - case SYNC_DICT: -#ifdef UNIV_DEBUG - ut_a(buf_debug_prints - || sync_thread_levels_g(array, SYNC_DICT, TRUE)); -#else /* UNIV_DEBUG */ - ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE)); -#endif /* UNIV_DEBUG */ - break; - default: - ut_error; - } - -levels_ok: - - sync_level.latch = latch; - sync_level.level = level; - array->push_back(sync_level); - - mutex_exit(&sync_thread_mutex); -} - -/******************************************************************//** -Removes a latch from the thread level array if it is found there. -@return TRUE if found in the array; it is no error if the latch is -not found, as we presently are not able to determine the level for -every latch reservation the program does */ -UNIV_INTERN -ibool -sync_thread_reset_level( -/*====================*/ - void* latch) /*!< in: pointer to a mutex or an rw-lock */ -{ - sync_arr_t* array; - sync_thread_t* thread_slot; - - if (!sync_order_checks_on) { - - return(FALSE); - } - - if ((latch == (void*) &sync_thread_mutex) - || (latch == (void*) &mutex_list_mutex) - || (latch == (void*) &rw_lock_debug_mutex) - || (latch == (void*) &rw_lock_list_mutex)) { - - return(FALSE); - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - ut_error; - - mutex_exit(&sync_thread_mutex); - return(FALSE); - } - - array = thread_slot->levels; - - for (std::vector<sync_level_t>::iterator it = array->begin(); it != array->end(); ++it) { - sync_level_t level = *it; - - if (level.latch != latch) { - continue; - } - - array->erase(it); - mutex_exit(&sync_thread_mutex); - return(TRUE); - } - - if (((ib_mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) { - rw_lock_t* rw_lock; - - rw_lock = (rw_lock_t*) latch; - - if (rw_lock->level == SYNC_LEVEL_VARYING) { - mutex_exit(&sync_thread_mutex); - - return(TRUE); - } - } - - ut_error; - - mutex_exit(&sync_thread_mutex); - - return(FALSE); -} -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Initializes the synchronization data structures. */ -UNIV_INTERN -void -sync_init(void) -/*===========*/ -{ - ut_a(sync_initialized == FALSE); - - sync_initialized = TRUE; - - sync_array_init(OS_THREAD_MAX_N); - -#ifdef UNIV_SYNC_DEBUG - /* Create the thread latch level array where the latch levels - are stored for each OS thread */ - - sync_thread_level_arrays = static_cast<sync_thread_t*>( - calloc(sizeof(sync_thread_t), OS_THREAD_MAX_N)); - - ut_a(sync_thread_level_arrays != NULL); - -#endif /* UNIV_SYNC_DEBUG */ - /* Init the mutex list and create the mutex to protect it. */ - - UT_LIST_INIT(mutex_list); - mutex_create(mutex_list_mutex_key, &mutex_list_mutex, - SYNC_NO_ORDER_CHECK); -#ifdef UNIV_SYNC_DEBUG - mutex_create(sync_thread_mutex_key, &sync_thread_mutex, - SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - /* Init the rw-lock list and create the mutex to protect it. */ - - UT_LIST_INIT(rw_lock_list); - mutex_create(rw_lock_list_mutex_key, &rw_lock_list_mutex, - SYNC_NO_ORDER_CHECK); - -#ifdef UNIV_SYNC_DEBUG - os_fast_mutex_init(rw_lock_debug_mutex_key, &rw_lock_debug_mutex); -#endif /* UNIV_SYNC_DEBUG */ -} - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************//** -Frees all debug memory. */ -static -void -sync_thread_level_arrays_free(void) -/*===============================*/ - -{ - ulint i; - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - sync_thread_t* slot; - - slot = &sync_thread_level_arrays[i]; - - /* If this slot was allocated then free the slot memory too. */ - if (slot->levels != NULL) { - delete slot->levels; - } - } - - free(sync_thread_level_arrays); - sync_thread_level_arrays = NULL; -} -#endif /* UNIV_SYNC_DEBUG */ - -/******************************************************************//** -Frees the resources in InnoDB's own synchronization data structures. */ -UNIV_INTERN -void -sync_close(void) -/*===========*/ -{ - ib_mutex_t* mutex; - - sync_array_close(); - - mutex_free(&rw_lock_list_mutex); - - for (mutex = UT_LIST_GET_FIRST(mutex_list); - mutex != NULL; - /* No op */) { - -#ifdef UNIV_MEM_DEBUG - if (mutex == &mem_hash_mutex) { - mutex = UT_LIST_GET_NEXT(list, mutex); - continue; - } -#endif /* UNIV_MEM_DEBUG */ - - mutex_free(mutex); - - mutex = UT_LIST_GET_FIRST(mutex_list); - } - -#ifdef UNIV_SYNC_DEBUG - mutex_free(&sync_thread_mutex); - - /* Switch latching order checks on in sync0sync.cc */ - sync_order_checks_on = FALSE; - - sync_thread_level_arrays_free(); - os_fast_mutex_free(&rw_lock_debug_mutex); -#endif /* UNIV_SYNC_DEBUG */ - - mutex_free(&mutex_list_mutex); - - sync_initialized = FALSE; -} - -/*******************************************************************//** -Prints wait info of the sync system. */ -UNIV_INTERN -void -sync_print_wait_info( -/*=================*/ - FILE* file) /*!< in: file where to print */ -{ - // Sum counter values once - ib_int64_t mutex_spin_wait_count_val - = static_cast<ib_int64_t>(mutex_spin_wait_count); - ib_int64_t mutex_spin_round_count_val - = static_cast<ib_int64_t>(mutex_spin_round_count); - ib_int64_t mutex_os_wait_count_val - = static_cast<ib_int64_t>(mutex_os_wait_count); - ib_int64_t rw_s_spin_wait_count_val - = static_cast<ib_int64_t>(rw_lock_stats.rw_s_spin_wait_count); - ib_int64_t rw_s_spin_round_count_val - = static_cast<ib_int64_t>(rw_lock_stats.rw_s_spin_round_count); - ib_int64_t rw_s_os_wait_count_val - = static_cast<ib_int64_t>(rw_lock_stats.rw_s_os_wait_count); - ib_int64_t rw_x_spin_wait_count_val - = static_cast<ib_int64_t>(rw_lock_stats.rw_x_spin_wait_count); - ib_int64_t rw_x_spin_round_count_val - = static_cast<ib_int64_t>(rw_lock_stats.rw_x_spin_round_count); - ib_int64_t rw_x_os_wait_count_val - = static_cast<ib_int64_t>(rw_lock_stats.rw_x_os_wait_count); - - fprintf(file, - "Mutex spin waits " INT64PF ", rounds " INT64PF ", " - "OS waits " INT64PF "\n" - "RW-shared spins " INT64PF ", rounds " INT64PF ", " - "OS waits " INT64PF "\n" - "RW-excl spins " INT64PF ", rounds " INT64PF ", " - "OS waits " INT64PF "\n", - mutex_spin_wait_count_val, mutex_spin_round_count_val, - mutex_os_wait_count_val, - rw_s_spin_wait_count_val, rw_s_spin_round_count_val, - rw_s_os_wait_count_val, - rw_x_spin_wait_count_val, rw_x_spin_round_count_val, - rw_x_os_wait_count_val); - - fprintf(file, - "Spin rounds per wait: %.2f mutex, %.2f RW-shared, " - "%.2f RW-excl\n", - (double) mutex_spin_round_count_val / - (mutex_spin_wait_count_val ? mutex_spin_wait_count_val : 1LL), - (double) rw_s_spin_round_count_val / - (rw_s_spin_wait_count_val ? rw_s_spin_wait_count_val : 1LL), - (double) rw_x_spin_round_count_val / - (rw_x_spin_wait_count_val ? rw_x_spin_wait_count_val : 1LL)); -} - -/*******************************************************************//** -Prints info of the sync system. */ -UNIV_INTERN -void -sync_print( -/*=======*/ - FILE* file) /*!< in: file where to print */ -{ -#ifdef UNIV_SYNC_DEBUG - mutex_list_print_info(file); - - rw_lock_list_print_info(file); -#endif /* UNIV_SYNC_DEBUG */ - - sync_array_print(file); - - sync_print_wait_info(file); -} diff --git a/storage/xtradb/trx/trx0i_s.cc b/storage/xtradb/trx/trx0i_s.cc deleted file mode 100644 index 0c9618d98eb..00000000000 --- a/storage/xtradb/trx/trx0i_s.cc +++ /dev/null @@ -1,1692 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file trx/trx0i_s.cc -INFORMATION SCHEMA innodb_trx, innodb_locks and -innodb_lock_waits tables fetch code. - -The code below fetches information needed to fill those -3 dynamic tables and uploads it into a "transactions -table cache" for later retrieval. - -Created July 17, 2007 Vasil Dimov -*******************************************************/ - -/* Found during the build of 5.5.3 on Linux 2.4 and early 2.6 kernels: - The includes "univ.i" -> "my_global.h" cause a different path - to be taken further down with pthread functions and types, - so they must come first. - From the symptoms, this is related to bug#46587 in the MySQL bug DB. -*/ -#include "univ.i" - -#include <mysql/plugin.h> - -#include "buf0buf.h" -#include "dict0dict.h" -#include "ha0storage.h" -#include "ha_prototypes.h" -#include "hash0hash.h" -#include "lock0iter.h" -#include "lock0lock.h" -#include "mem0mem.h" -#include "page0page.h" -#include "rem0rec.h" -#include "row0row.h" -#include "srv0srv.h" -#include "sync0rw.h" -#include "sync0sync.h" -#include "sync0types.h" -#include "trx0i_s.h" -#include "trx0sys.h" -#include "trx0trx.h" -#include "ut0mem.h" -#include "ut0ut.h" - -/** Initial number of rows in the table cache */ -#define TABLE_CACHE_INITIAL_ROWSNUM 1024 - -/** @brief The maximum number of chunks to allocate for a table cache. - -The rows of a table cache are stored in a set of chunks. When a new -row is added a new chunk is allocated if necessary. Assuming that the -first one is 1024 rows (TABLE_CACHE_INITIAL_ROWSNUM) and each -subsequent is N/2 where N is the number of rows we have allocated till -now, then 39th chunk would accommodate 1677416425 rows and all chunks -would accommodate 3354832851 rows. */ -#define MEM_CHUNKS_IN_TABLE_CACHE 39 - -/** The following are some testing auxiliary macros. Do not enable them -in a production environment. */ -/* @{ */ - -#if 0 -/** If this is enabled then lock folds will always be different -resulting in equal rows being put in a different cells of the hash -table. Checking for duplicates will be flawed because different -fold will be calculated when a row is searched in the hash table. */ -#define TEST_LOCK_FOLD_ALWAYS_DIFFERENT -#endif - -#if 0 -/** This effectively kills the search-for-duplicate-before-adding-a-row -function, but searching in the hash is still performed. It will always -be assumed that lock is not present and insertion will be performed in -the hash table. */ -#define TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T -#endif - -#if 0 -/** This aggressively repeats adding each row many times. Depending on -the above settings this may be noop or may result in lots of rows being -added. */ -#define TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES -#endif - -#if 0 -/** Very similar to TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T but hash -table search is not performed at all. */ -#define TEST_DO_NOT_CHECK_FOR_DUPLICATE_ROWS -#endif - -#if 0 -/** Do not insert each row into the hash table, duplicates may appear -if this is enabled, also if this is enabled searching into the hash is -noop because it will be empty. */ -#define TEST_DO_NOT_INSERT_INTO_THE_HASH_TABLE -#endif -/* @} */ - -/** Memory limit passed to ha_storage_put_memlim(). -@param cache hash storage -@return maximum allowed allocation size */ -#define MAX_ALLOWED_FOR_STORAGE(cache) \ - (TRX_I_S_MEM_LIMIT \ - - (cache)->mem_allocd) - -/** Memory limit in table_cache_create_empty_row(). -@param cache hash storage -@return maximum allowed allocation size */ -#define MAX_ALLOWED_FOR_ALLOC(cache) \ - (TRX_I_S_MEM_LIMIT \ - - (cache)->mem_allocd \ - - ha_storage_get_size((cache)->storage)) - -/** Memory for each table in the intermediate buffer is allocated in -separate chunks. These chunks are considered to be concatenated to -represent one flat array of rows. */ -struct i_s_mem_chunk_t { - ulint offset; /*!< offset, in number of rows */ - ulint rows_allocd; /*!< the size of this chunk, in number - of rows */ - void* base; /*!< start of the chunk */ -}; - -/** This represents one table's cache. */ -struct i_s_table_cache_t { - ulint rows_used; /*!< number of used rows */ - ulint rows_allocd; /*!< number of allocated rows */ - ulint row_size; /*!< size of a single row */ - i_s_mem_chunk_t chunks[MEM_CHUNKS_IN_TABLE_CACHE]; /*!< array of - memory chunks that stores the - rows */ -}; - -/** This structure describes the intermediate buffer */ -struct trx_i_s_cache_t { - rw_lock_t rw_lock; /*!< read-write lock protecting - the rest of this structure */ - ullint last_read; /*!< last time the cache was read; - measured in microseconds since - epoch */ - ib_mutex_t last_read_mutex;/*!< mutex protecting the - last_read member - it is updated - inside a shared lock of the - rw_lock member */ - i_s_table_cache_t innodb_trx; /*!< innodb_trx table */ - i_s_table_cache_t innodb_locks; /*!< innodb_locks table */ - i_s_table_cache_t innodb_lock_waits;/*!< innodb_lock_waits table */ -/** the hash table size is LOCKS_HASH_CELLS_NUM * sizeof(void*) bytes */ -#define LOCKS_HASH_CELLS_NUM 10000 - hash_table_t* locks_hash; /*!< hash table used to eliminate - duplicate entries in the - innodb_locks table */ -/** Initial size of the cache storage */ -#define CACHE_STORAGE_INITIAL_SIZE 1024 -/** Number of hash cells in the cache storage */ -#define CACHE_STORAGE_HASH_CELLS 2048 - ha_storage_t* storage; /*!< storage for external volatile - data that may become unavailable - when we release - lock_sys->mutex or trx_sys->mutex */ - ulint mem_allocd; /*!< the amount of memory - allocated with mem_alloc*() */ - ibool is_truncated; /*!< this is TRUE if the memory - limit was hit and thus the data - in the cache is truncated */ -}; - -/** This is the intermediate buffer where data needed to fill the -INFORMATION SCHEMA tables is fetched and later retrieved by the C++ -code in handler/i_s.cc. */ -static trx_i_s_cache_t trx_i_s_cache_static; -/** This is the intermediate buffer where data needed to fill the -INFORMATION SCHEMA tables is fetched and later retrieved by the C++ -code in handler/i_s.cc. */ -UNIV_INTERN trx_i_s_cache_t* trx_i_s_cache = &trx_i_s_cache_static; - -/* Key to register the lock/mutex with performance schema */ -#ifdef UNIV_PFS_RWLOCK -UNIV_INTERN mysql_pfs_key_t trx_i_s_cache_lock_key; -#endif /* UNIV_PFS_RWLOCK */ - -#ifdef UNIV_PFS_MUTEX -UNIV_INTERN mysql_pfs_key_t cache_last_read_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/*******************************************************************//** -For a record lock that is in waiting state retrieves the only bit that -is set, for a table lock returns ULINT_UNDEFINED. -@return record number within the heap */ -static -ulint -wait_lock_get_heap_no( -/*==================*/ - const lock_t* lock) /*!< in: lock */ -{ - ulint ret; - - switch (lock_get_type(lock)) { - case LOCK_REC: - ret = lock_rec_find_set_bit(lock); - ut_a(ret != ULINT_UNDEFINED); - break; - case LOCK_TABLE: - ret = ULINT_UNDEFINED; - break; - default: - ut_error; - } - - return(ret); -} - -/*******************************************************************//** -Initializes the members of a table cache. */ -static -void -table_cache_init( -/*=============*/ - i_s_table_cache_t* table_cache, /*!< out: table cache */ - size_t row_size) /*!< in: the size of a - row */ -{ - ulint i; - - table_cache->rows_used = 0; - table_cache->rows_allocd = 0; - table_cache->row_size = row_size; - - for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) { - - /* the memory is actually allocated in - table_cache_create_empty_row() */ - table_cache->chunks[i].base = NULL; - } -} - -/*******************************************************************//** -Frees a table cache. */ -static -void -table_cache_free( -/*=============*/ - i_s_table_cache_t* table_cache) /*!< in/out: table cache */ -{ - ulint i; - - for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) { - - /* the memory is actually allocated in - table_cache_create_empty_row() */ - if (table_cache->chunks[i].base) { - mem_free(table_cache->chunks[i].base); - table_cache->chunks[i].base = NULL; - } - } -} - -/*******************************************************************//** -Returns an empty row from a table cache. The row is allocated if no more -empty rows are available. The number of used rows is incremented. -If the memory limit is hit then NULL is returned and nothing is -allocated. -@return empty row, or NULL if out of memory */ -static -void* -table_cache_create_empty_row( -/*=========================*/ - i_s_table_cache_t* table_cache, /*!< in/out: table cache */ - trx_i_s_cache_t* cache) /*!< in/out: cache to record - how many bytes are - allocated */ -{ - ulint i; - void* row; - - ut_a(table_cache->rows_used <= table_cache->rows_allocd); - - if (table_cache->rows_used == table_cache->rows_allocd) { - - /* rows_used == rows_allocd means that new chunk needs - to be allocated: either no more empty rows in the - last allocated chunk or nothing has been allocated yet - (rows_num == rows_allocd == 0); */ - - i_s_mem_chunk_t* chunk; - ulint req_bytes; - ulint got_bytes; - ulint req_rows; - ulint got_rows; - - /* find the first not allocated chunk */ - for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) { - - if (table_cache->chunks[i].base == NULL) { - - break; - } - } - - /* i == MEM_CHUNKS_IN_TABLE_CACHE means that all chunks - have been allocated :-X */ - ut_a(i < MEM_CHUNKS_IN_TABLE_CACHE); - - /* allocate the chunk we just found */ - - if (i == 0) { - - /* first chunk, nothing is allocated yet */ - req_rows = TABLE_CACHE_INITIAL_ROWSNUM; - } else { - - /* Memory is increased by the formula - new = old + old / 2; We are trying not to be - aggressive here (= using the common new = old * 2) - because the allocated memory will not be freed - until InnoDB exit (it is reused). So it is better - to once allocate the memory in more steps, but - have less unused/wasted memory than to use less - steps in allocation (which is done once in a - lifetime) but end up with lots of unused/wasted - memory. */ - req_rows = table_cache->rows_allocd / 2; - } - req_bytes = req_rows * table_cache->row_size; - - if (req_bytes > MAX_ALLOWED_FOR_ALLOC(cache)) { - - return(NULL); - } - - chunk = &table_cache->chunks[i]; - - chunk->base = mem_alloc2(req_bytes, &got_bytes); - - got_rows = got_bytes / table_cache->row_size; - - cache->mem_allocd += got_bytes; - -#if 0 - printf("allocating chunk %d req bytes=%lu, got bytes=%lu, " - "row size=%lu, " - "req rows=%lu, got rows=%lu\n", - i, req_bytes, got_bytes, - table_cache->row_size, - req_rows, got_rows); -#endif - - chunk->rows_allocd = got_rows; - - table_cache->rows_allocd += got_rows; - - /* adjust the offset of the next chunk */ - if (i < MEM_CHUNKS_IN_TABLE_CACHE - 1) { - - table_cache->chunks[i + 1].offset - = chunk->offset + chunk->rows_allocd; - } - - /* return the first empty row in the newly allocated - chunk */ - row = chunk->base; - } else { - - char* chunk_start; - ulint offset; - - /* there is an empty row, no need to allocate new - chunks */ - - /* find the first chunk that contains allocated but - empty/unused rows */ - for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) { - - if (table_cache->chunks[i].offset - + table_cache->chunks[i].rows_allocd - > table_cache->rows_used) { - - break; - } - } - - /* i == MEM_CHUNKS_IN_TABLE_CACHE means that all chunks - are full, but - table_cache->rows_used != table_cache->rows_allocd means - exactly the opposite - there are allocated but - empty/unused rows :-X */ - ut_a(i < MEM_CHUNKS_IN_TABLE_CACHE); - - chunk_start = (char*) table_cache->chunks[i].base; - offset = table_cache->rows_used - - table_cache->chunks[i].offset; - - row = chunk_start + offset * table_cache->row_size; - } - - table_cache->rows_used++; - - return(row); -} - -#ifdef UNIV_DEBUG -/*******************************************************************//** -Validates a row in the locks cache. -@return TRUE if valid */ -static -ibool -i_s_locks_row_validate( -/*===================*/ - const i_s_locks_row_t* row) /*!< in: row to validate */ -{ - ut_ad(row->lock_trx_id != 0); - ut_ad(row->lock_mode != NULL); - ut_ad(row->lock_type != NULL); - ut_ad(row->lock_table != NULL); - ut_ad(row->lock_table_id != 0); - - if (row->lock_space == ULINT_UNDEFINED) { - /* table lock */ - ut_ad(!strcmp("TABLE", row->lock_type)); - ut_ad(row->lock_index == NULL); - ut_ad(row->lock_data == NULL); - ut_ad(row->lock_page == ULINT_UNDEFINED); - ut_ad(row->lock_rec == ULINT_UNDEFINED); - } else { - /* record lock */ - ut_ad(!strcmp("RECORD", row->lock_type)); - ut_ad(row->lock_index != NULL); - /* row->lock_data == NULL if buf_page_try_get() == NULL */ - ut_ad(row->lock_page != ULINT_UNDEFINED); - ut_ad(row->lock_rec != ULINT_UNDEFINED); - } - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -Fills i_s_trx_row_t object. -If memory can not be allocated then FALSE is returned. -@return FALSE if allocation fails */ -static -ibool -fill_trx_row( -/*=========*/ - i_s_trx_row_t* row, /*!< out: result object - that's filled */ - const trx_t* trx, /*!< in: transaction to - get data from */ - const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the - corresponding row in - innodb_locks if trx is - waiting or NULL if trx - is not waiting */ - trx_i_s_cache_t* cache) /*!< in/out: cache into - which to copy volatile - strings */ -{ - const char* stmt; - size_t stmt_len; - const char* s; - - ut_ad(lock_mutex_own()); - - row->trx_id = trx->id; - row->trx_started = (ib_time_t) trx->start_time; - row->trx_state = trx_get_que_state_str(trx); - row->requested_lock_row = requested_lock_row; - ut_ad(requested_lock_row == NULL - || i_s_locks_row_validate(requested_lock_row)); - - if (trx->lock.wait_lock != NULL) { - - ut_a(requested_lock_row != NULL); - row->trx_wait_started = (ib_time_t) trx->lock.wait_started; - } else { - ut_a(requested_lock_row == NULL); - row->trx_wait_started = 0; - } - - row->trx_weight = (ullint) TRX_WEIGHT(trx); - - if (trx->mysql_thd == NULL) { - /* For internal transactions e.g., purge and transactions - being recovered at startup there is no associated MySQL - thread data structure. */ - row->trx_mysql_thread_id = 0; - row->trx_query = NULL; - goto thd_done; - } - - row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd); - - stmt = trx->mysql_thd - ? innobase_get_stmt(trx->mysql_thd, &stmt_len) - : NULL; - - if (stmt != NULL) { - char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; - - if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) { - stmt_len = TRX_I_S_TRX_QUERY_MAX_LEN; - } - - memcpy(query, stmt, stmt_len); - query[stmt_len] = '\0'; - - row->trx_query = static_cast<const char*>( - ha_storage_put_memlim( - cache->storage, query, stmt_len + 1, - MAX_ALLOWED_FOR_STORAGE(cache))); - - row->trx_query_cs = innobase_get_charset(trx->mysql_thd); - - if (row->trx_query == NULL) { - - return(FALSE); - } - } else { - - row->trx_query = NULL; - } - -thd_done: - s = trx->op_info; - - if (s != NULL && s[0] != '\0') { - - TRX_I_S_STRING_COPY(s, row->trx_operation_state, - TRX_I_S_TRX_OP_STATE_MAX_LEN, cache); - - if (row->trx_operation_state == NULL) { - - return(FALSE); - } - } else { - - row->trx_operation_state = NULL; - } - - row->trx_tables_in_use = trx->n_mysql_tables_in_use; - - row->trx_tables_locked = trx->mysql_n_tables_locked; - - /* These are protected by both trx->mutex or lock_sys->mutex, - or just lock_sys->mutex. For reading, it suffices to hold - lock_sys->mutex. */ - - row->trx_lock_structs = UT_LIST_GET_LEN(trx->lock.trx_locks); - - row->trx_lock_memory_bytes = mem_heap_get_size(trx->lock.lock_heap); - - row->trx_rows_locked = lock_number_of_rows_locked(&trx->lock); - - row->trx_rows_modified = trx->undo_no; - - row->trx_concurrency_tickets = trx->n_tickets_to_enter_innodb; - - switch (trx->isolation_level) { - case TRX_ISO_READ_UNCOMMITTED: - row->trx_isolation_level = "READ UNCOMMITTED"; - break; - case TRX_ISO_READ_COMMITTED: - row->trx_isolation_level = "READ COMMITTED"; - break; - case TRX_ISO_REPEATABLE_READ: - row->trx_isolation_level = "REPEATABLE READ"; - break; - case TRX_ISO_SERIALIZABLE: - row->trx_isolation_level = "SERIALIZABLE"; - break; - /* Should not happen as TRX_ISO_READ_COMMITTED is default */ - default: - row->trx_isolation_level = "UNKNOWN"; - } - - row->trx_unique_checks = (ibool) trx->check_unique_secondary; - - row->trx_foreign_key_checks = (ibool) trx->check_foreigns; - - s = trx->detailed_error; - - if (s != NULL && s[0] != '\0') { - - TRX_I_S_STRING_COPY(s, - row->trx_foreign_key_error, - TRX_I_S_TRX_FK_ERROR_MAX_LEN, cache); - - if (row->trx_foreign_key_error == NULL) { - - return(FALSE); - } - } else { - row->trx_foreign_key_error = NULL; - } - - row->trx_has_search_latch = (ibool) trx->has_search_latch; - - row->trx_search_latch_timeout = trx->search_latch_timeout; - - row->trx_is_read_only = trx->read_only; - - row->trx_is_autocommit_non_locking = trx_is_autocommit_non_locking(trx); - - return(TRUE); -} - -/*******************************************************************//** -Format the nth field of "rec" and put it in "buf". The result is always -NUL-terminated. Returns the number of bytes that were written to "buf" -(including the terminating NUL). -@return end of the result */ -static -ulint -put_nth_field( -/*==========*/ - char* buf, /*!< out: buffer */ - ulint buf_size,/*!< in: buffer size in bytes */ - ulint n, /*!< in: number of field */ - const dict_index_t* index, /*!< in: index */ - const rec_t* rec, /*!< in: record */ - const ulint* offsets)/*!< in: record offsets, returned - by rec_get_offsets() */ -{ - const byte* data; - ulint data_len; - dict_field_t* dict_field; - ulint ret; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - if (buf_size == 0) { - - return(0); - } - - ret = 0; - - if (n > 0) { - /* we must append ", " before the actual data */ - - if (buf_size < 3) { - - buf[0] = '\0'; - return(1); - } - - memcpy(buf, ", ", 3); - - buf += 2; - buf_size -= 2; - ret += 2; - } - - /* now buf_size >= 1 */ - - data = rec_get_nth_field(rec, offsets, n, &data_len); - - dict_field = dict_index_get_nth_field(index, n); - - ret += row_raw_format((const char*) data, data_len, - dict_field, buf, buf_size); - - return(ret); -} - -/*******************************************************************//** -Fills the "lock_data" member of i_s_locks_row_t object. -If memory can not be allocated then FALSE is returned. -@return FALSE if allocation fails */ -static -ibool -fill_lock_data( -/*===========*/ - const char** lock_data,/*!< out: "lock_data" to fill */ - const lock_t* lock, /*!< in: lock used to find the data */ - ulint heap_no,/*!< in: rec num used to find the data */ - trx_i_s_cache_t* cache) /*!< in/out: cache where to store - volatile data */ -{ - mtr_t mtr; - - const buf_block_t* block; - const page_t* page; - const rec_t* rec; - - ut_a(lock_get_type(lock) == LOCK_REC); - - mtr_start(&mtr); - - block = buf_page_try_get(lock_rec_get_space_id(lock), - lock_rec_get_page_no(lock), - &mtr); - - if (block == NULL) { - - *lock_data = NULL; - - mtr_commit(&mtr); - - return(TRUE); - } - - page = (const page_t*) buf_block_get_frame(block); - - rec = page_find_rec_with_heap_no(page, heap_no); - - if (page_rec_is_infimum(rec)) { - - *lock_data = ha_storage_put_str_memlim( - cache->storage, "infimum pseudo-record", - MAX_ALLOWED_FOR_STORAGE(cache)); - } else if (page_rec_is_supremum(rec)) { - - *lock_data = ha_storage_put_str_memlim( - cache->storage, "supremum pseudo-record", - MAX_ALLOWED_FOR_STORAGE(cache)); - } else { - - const dict_index_t* index; - ulint n_fields; - mem_heap_t* heap; - ulint offsets_onstack[REC_OFFS_NORMAL_SIZE]; - ulint* offsets; - char buf[TRX_I_S_LOCK_DATA_MAX_LEN]; - ulint buf_used; - ulint i; - - rec_offs_init(offsets_onstack); - offsets = offsets_onstack; - - index = lock_rec_get_index(lock); - - n_fields = dict_index_get_n_unique(index); - - ut_a(n_fields > 0); - - heap = NULL; - offsets = rec_get_offsets(rec, index, offsets, n_fields, - &heap); - - /* format and store the data */ - - buf_used = 0; - for (i = 0; i < n_fields; i++) { - - buf_used += put_nth_field( - buf + buf_used, sizeof(buf) - buf_used, - i, index, rec, offsets) - 1; - } - - *lock_data = (const char*) ha_storage_put_memlim( - cache->storage, buf, buf_used + 1, - MAX_ALLOWED_FOR_STORAGE(cache)); - - if (UNIV_UNLIKELY(heap != NULL)) { - - /* this means that rec_get_offsets() has created a new - heap and has stored offsets in it; check that this is - really the case and free the heap */ - ut_a(offsets != offsets_onstack); - mem_heap_free(heap); - } - } - - mtr_commit(&mtr); - - if (*lock_data == NULL) { - - return(FALSE); - } - - return(TRUE); -} - -/*******************************************************************//** -Fills i_s_locks_row_t object. Returns its first argument. -If memory can not be allocated then FALSE is returned. -@return FALSE if allocation fails */ -static -ibool -fill_locks_row( -/*===========*/ - i_s_locks_row_t* row, /*!< out: result object that's filled */ - const lock_t* lock, /*!< in: lock to get data from */ - ulint heap_no,/*!< in: lock's record number - or ULINT_UNDEFINED if the lock - is a table lock */ - trx_i_s_cache_t* cache) /*!< in/out: cache into which to copy - volatile strings */ -{ - row->lock_trx_id = lock_get_trx_id(lock); - row->lock_mode = lock_get_mode_str(lock); - row->lock_type = lock_get_type_str(lock); - - row->lock_table = ha_storage_put_str_memlim( - cache->storage, lock_get_table_name(lock), - MAX_ALLOWED_FOR_STORAGE(cache)); - - /* memory could not be allocated */ - if (row->lock_table == NULL) { - - return(FALSE); - } - - switch (lock_get_type(lock)) { - case LOCK_REC: - row->lock_index = ha_storage_put_str_memlim( - cache->storage, lock_rec_get_index_name(lock), - MAX_ALLOWED_FOR_STORAGE(cache)); - - /* memory could not be allocated */ - if (row->lock_index == NULL) { - - return(FALSE); - } - - row->lock_space = lock_rec_get_space_id(lock); - row->lock_page = lock_rec_get_page_no(lock); - row->lock_rec = heap_no; - - if (!fill_lock_data(&row->lock_data, lock, heap_no, cache)) { - - /* memory could not be allocated */ - return(FALSE); - } - - break; - case LOCK_TABLE: - row->lock_index = NULL; - - row->lock_space = ULINT_UNDEFINED; - row->lock_page = ULINT_UNDEFINED; - row->lock_rec = ULINT_UNDEFINED; - - row->lock_data = NULL; - - break; - default: - ut_error; - } - - row->lock_table_id = lock_get_table_id(lock); - - row->hash_chain.value = row; - ut_ad(i_s_locks_row_validate(row)); - - return(TRUE); -} - -/*******************************************************************//** -Fills i_s_lock_waits_row_t object. Returns its first argument. -@return result object that's filled */ -static -i_s_lock_waits_row_t* -fill_lock_waits_row( -/*================*/ - i_s_lock_waits_row_t* row, /*!< out: result object - that's filled */ - const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the - relevant requested lock - row in innodb_locks */ - const i_s_locks_row_t* blocking_lock_row)/*!< in: pointer to the - relevant blocking lock - row in innodb_locks */ -{ - ut_ad(i_s_locks_row_validate(requested_lock_row)); - ut_ad(i_s_locks_row_validate(blocking_lock_row)); - - row->requested_lock_row = requested_lock_row; - row->blocking_lock_row = blocking_lock_row; - - return(row); -} - -/*******************************************************************//** -Calculates a hash fold for a lock. For a record lock the fold is -calculated from 4 elements, which uniquely identify a lock at a given -point in time: transaction id, space id, page number, record number. -For a table lock the fold is table's id. -@return fold */ -static -ulint -fold_lock( -/*======*/ - const lock_t* lock, /*!< in: lock object to fold */ - ulint heap_no)/*!< in: lock's record number - or ULINT_UNDEFINED if the lock - is a table lock */ -{ -#ifdef TEST_LOCK_FOLD_ALWAYS_DIFFERENT - static ulint fold = 0; - - return(fold++); -#else - ulint ret; - - switch (lock_get_type(lock)) { - case LOCK_REC: - ut_a(heap_no != ULINT_UNDEFINED); - - ret = ut_fold_ulint_pair((ulint) lock_get_trx_id(lock), - lock_rec_get_space_id(lock)); - - ret = ut_fold_ulint_pair(ret, - lock_rec_get_page_no(lock)); - - ret = ut_fold_ulint_pair(ret, heap_no); - - break; - case LOCK_TABLE: - /* this check is actually not necessary for continuing - correct operation, but something must have gone wrong if - it fails. */ - ut_a(heap_no == ULINT_UNDEFINED); - - ret = (ulint) lock_get_table_id(lock); - - break; - default: - ut_error; - } - - return(ret); -#endif -} - -/*******************************************************************//** -Checks whether i_s_locks_row_t object represents a lock_t object. -@return TRUE if they match */ -static -ibool -locks_row_eq_lock( -/*==============*/ - const i_s_locks_row_t* row, /*!< in: innodb_locks row */ - const lock_t* lock, /*!< in: lock object */ - ulint heap_no)/*!< in: lock's record number - or ULINT_UNDEFINED if the lock - is a table lock */ -{ - ut_ad(i_s_locks_row_validate(row)); -#ifdef TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T - return(0); -#else - switch (lock_get_type(lock)) { - case LOCK_REC: - ut_a(heap_no != ULINT_UNDEFINED); - - return(row->lock_trx_id == lock_get_trx_id(lock) - && row->lock_space == lock_rec_get_space_id(lock) - && row->lock_page == lock_rec_get_page_no(lock) - && row->lock_rec == heap_no); - - case LOCK_TABLE: - /* this check is actually not necessary for continuing - correct operation, but something must have gone wrong if - it fails. */ - ut_a(heap_no == ULINT_UNDEFINED); - - return(row->lock_trx_id == lock_get_trx_id(lock) - && row->lock_table_id == lock_get_table_id(lock)); - - default: - ut_error; - return(FALSE); - } -#endif -} - -/*******************************************************************//** -Searches for a row in the innodb_locks cache that has a specified id. -This happens in O(1) time since a hash table is used. Returns pointer to -the row or NULL if none is found. -@return row or NULL */ -static -i_s_locks_row_t* -search_innodb_locks( -/*================*/ - trx_i_s_cache_t* cache, /*!< in: cache */ - const lock_t* lock, /*!< in: lock to search for */ - ulint heap_no)/*!< in: lock's record number - or ULINT_UNDEFINED if the lock - is a table lock */ -{ - i_s_hash_chain_t* hash_chain; - - HASH_SEARCH( - /* hash_chain->"next" */ - next, - /* the hash table */ - cache->locks_hash, - /* fold */ - fold_lock(lock, heap_no), - /* the type of the next variable */ - i_s_hash_chain_t*, - /* auxiliary variable */ - hash_chain, - /* assertion on every traversed item */ - ut_ad(i_s_locks_row_validate(hash_chain->value)), - /* this determines if we have found the lock */ - locks_row_eq_lock(hash_chain->value, lock, heap_no)); - - if (hash_chain == NULL) { - - return(NULL); - } - /* else */ - - return(hash_chain->value); -} - -/*******************************************************************//** -Adds new element to the locks cache, enlarging it if necessary. -Returns a pointer to the added row. If the row is already present then -no row is added and a pointer to the existing row is returned. -If row can not be allocated then NULL is returned. -@return row */ -static -i_s_locks_row_t* -add_lock_to_cache( -/*==============*/ - trx_i_s_cache_t* cache, /*!< in/out: cache */ - const lock_t* lock, /*!< in: the element to add */ - ulint heap_no)/*!< in: lock's record number - or ULINT_UNDEFINED if the lock - is a table lock */ -{ - i_s_locks_row_t* dst_row; - -#ifdef TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES - ulint i; - for (i = 0; i < 10000; i++) { -#endif -#ifndef TEST_DO_NOT_CHECK_FOR_DUPLICATE_ROWS - /* quit if this lock is already present */ - dst_row = search_innodb_locks(cache, lock, heap_no); - if (dst_row != NULL) { - - ut_ad(i_s_locks_row_validate(dst_row)); - return(dst_row); - } -#endif - - dst_row = (i_s_locks_row_t*) - table_cache_create_empty_row(&cache->innodb_locks, cache); - - /* memory could not be allocated */ - if (dst_row == NULL) { - - return(NULL); - } - - if (!fill_locks_row(dst_row, lock, heap_no, cache)) { - - /* memory could not be allocated */ - cache->innodb_locks.rows_used--; - return(NULL); - } - -#ifndef TEST_DO_NOT_INSERT_INTO_THE_HASH_TABLE - HASH_INSERT( - /* the type used in the hash chain */ - i_s_hash_chain_t, - /* hash_chain->"next" */ - next, - /* the hash table */ - cache->locks_hash, - /* fold */ - fold_lock(lock, heap_no), - /* add this data to the hash */ - &dst_row->hash_chain); -#endif -#ifdef TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES - } /* for()-loop */ -#endif - - ut_ad(i_s_locks_row_validate(dst_row)); - return(dst_row); -} - -/*******************************************************************//** -Adds new pair of locks to the lock waits cache. -If memory can not be allocated then FALSE is returned. -@return FALSE if allocation fails */ -static -ibool -add_lock_wait_to_cache( -/*===================*/ - trx_i_s_cache_t* cache, /*!< in/out: cache */ - const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the - relevant requested lock - row in innodb_locks */ - const i_s_locks_row_t* blocking_lock_row)/*!< in: pointer to the - relevant blocking lock - row in innodb_locks */ -{ - i_s_lock_waits_row_t* dst_row; - - dst_row = (i_s_lock_waits_row_t*) - table_cache_create_empty_row(&cache->innodb_lock_waits, - cache); - - /* memory could not be allocated */ - if (dst_row == NULL) { - - return(FALSE); - } - - fill_lock_waits_row(dst_row, requested_lock_row, blocking_lock_row); - - return(TRUE); -} - -/*******************************************************************//** -Adds transaction's relevant (important) locks to cache. -If the transaction is waiting, then the wait lock is added to -innodb_locks and a pointer to the added row is returned in -requested_lock_row, otherwise requested_lock_row is set to NULL. -If rows can not be allocated then FALSE is returned and the value of -requested_lock_row is undefined. -@return FALSE if allocation fails */ -static -ibool -add_trx_relevant_locks_to_cache( -/*============================*/ - trx_i_s_cache_t* cache, /*!< in/out: cache */ - const trx_t* trx, /*!< in: transaction */ - i_s_locks_row_t** requested_lock_row)/*!< out: pointer to the - requested lock row, or NULL or - undefined */ -{ - ut_ad(lock_mutex_own()); - - /* If transaction is waiting we add the wait lock and all locks - from another transactions that are blocking the wait lock. */ - if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - - const lock_t* curr_lock; - ulint wait_lock_heap_no; - i_s_locks_row_t* blocking_lock_row; - lock_queue_iterator_t iter; - - ut_a(trx->lock.wait_lock != NULL); - - wait_lock_heap_no - = wait_lock_get_heap_no(trx->lock.wait_lock); - - /* add the requested lock */ - *requested_lock_row - = add_lock_to_cache(cache, trx->lock.wait_lock, - wait_lock_heap_no); - - /* memory could not be allocated */ - if (*requested_lock_row == NULL) { - - return(FALSE); - } - - /* then iterate over the locks before the wait lock and - add the ones that are blocking it */ - - lock_queue_iterator_reset(&iter, trx->lock.wait_lock, - ULINT_UNDEFINED); - - for (curr_lock = lock_queue_iterator_get_prev(&iter); - curr_lock != NULL; - curr_lock = lock_queue_iterator_get_prev(&iter)) { - - if (lock_has_to_wait(trx->lock.wait_lock, - curr_lock)) { - - /* add the lock that is - blocking trx->lock.wait_lock */ - blocking_lock_row - = add_lock_to_cache( - cache, curr_lock, - /* heap_no is the same - for the wait and waited - locks */ - wait_lock_heap_no); - - /* memory could not be allocated */ - if (blocking_lock_row == NULL) { - - return(FALSE); - } - - /* add the relation between both locks - to innodb_lock_waits */ - if (!add_lock_wait_to_cache( - cache, *requested_lock_row, - blocking_lock_row)) { - - /* memory could not be allocated */ - return(FALSE); - } - } - } - } else { - - *requested_lock_row = NULL; - } - - return(TRUE); -} - -/** The minimum time that a cache must not be updated after it has been -read for the last time; measured in microseconds. We use this technique -to ensure that SELECTs which join several INFORMATION SCHEMA tables read -the same version of the cache. */ -#define CACHE_MIN_IDLE_TIME_US 100000 /* 0.1 sec */ - -/*******************************************************************//** -Checks if the cache can safely be updated. -@return TRUE if can be updated */ -static -ibool -can_cache_be_updated( -/*=================*/ - trx_i_s_cache_t* cache) /*!< in: cache */ -{ - ullint now; - - /* Here we read cache->last_read without acquiring its mutex - because last_read is only updated when a shared rw lock on the - whole cache is being held (see trx_i_s_cache_end_read()) and - we are currently holding an exclusive rw lock on the cache. - So it is not possible for last_read to be updated while we are - reading it. */ - -#ifdef UNIV_SYNC_DEBUG - ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX)); -#endif - - now = ut_time_us(NULL); - if (now - cache->last_read > CACHE_MIN_IDLE_TIME_US) { - - return(TRUE); - } - - return(FALSE); -} - -/*******************************************************************//** -Declare a cache empty, preparing it to be filled up. Not all resources -are freed because they can be reused. */ -static -void -trx_i_s_cache_clear( -/*================*/ - trx_i_s_cache_t* cache) /*!< out: cache to clear */ -{ - cache->innodb_trx.rows_used = 0; - cache->innodb_locks.rows_used = 0; - cache->innodb_lock_waits.rows_used = 0; - - hash_table_clear(cache->locks_hash); - - ha_storage_empty(&cache->storage); -} - -/*******************************************************************//** -Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the -table cache buffer. Cache must be locked for write. */ -static -void -fetch_data_into_cache_low( -/*======================*/ - trx_i_s_cache_t* cache, /*!< in/out: cache */ - ibool only_ac_nl, /*!< in: only select non-locking - autocommit transactions */ - trx_list_t* trx_list) /*!< in: trx list */ -{ - const trx_t* trx; - - ut_ad(trx_list == &trx_sys->rw_trx_list - || trx_list == &trx_sys->ro_trx_list - || trx_list == &trx_sys->mysql_trx_list); - - ut_ad(only_ac_nl == (trx_list == &trx_sys->mysql_trx_list)); - - /* Iterate over the transaction list and add each one - to innodb_trx's cache. We also add all locks that are relevant - to each transaction into innodb_locks' and innodb_lock_waits' - caches. */ - - for (trx = UT_LIST_GET_FIRST(*trx_list); - trx != NULL; - trx = - (trx_list == &trx_sys->mysql_trx_list - ? UT_LIST_GET_NEXT(mysql_trx_list, trx) - : UT_LIST_GET_NEXT(trx_list, trx))) { - - i_s_trx_row_t* trx_row; - i_s_locks_row_t* requested_lock_row; - - if (trx->state == TRX_STATE_NOT_STARTED - || (only_ac_nl && !trx_is_autocommit_non_locking(trx))) { - - continue; - } - - assert_trx_nonlocking_or_in_list(trx); - - ut_ad(trx->in_ro_trx_list - == (trx_list == &trx_sys->ro_trx_list)); - - ut_ad(trx->in_rw_trx_list - == (trx_list == &trx_sys->rw_trx_list)); - - if (!add_trx_relevant_locks_to_cache(cache, trx, - &requested_lock_row)) { - - cache->is_truncated = TRUE; - return; - } - - trx_row = (i_s_trx_row_t*) - table_cache_create_empty_row(&cache->innodb_trx, - cache); - - /* memory could not be allocated */ - if (trx_row == NULL) { - - cache->is_truncated = TRUE; - return; - } - - if (!fill_trx_row(trx_row, trx, requested_lock_row, cache)) { - - /* memory could not be allocated */ - cache->innodb_trx.rows_used--; - cache->is_truncated = TRUE; - return; - } - } -} - -/*******************************************************************//** -Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the -table cache buffer. Cache must be locked for write. */ -static -void -fetch_data_into_cache( -/*==================*/ - trx_i_s_cache_t* cache) /*!< in/out: cache */ -{ - ut_ad(lock_mutex_own()); - ut_ad(mutex_own(&trx_sys->mutex)); - - trx_i_s_cache_clear(cache); - - fetch_data_into_cache_low(cache, FALSE, &trx_sys->rw_trx_list); - fetch_data_into_cache_low(cache, FALSE, &trx_sys->ro_trx_list); - - /* Only select autocommit non-locking selects because they can - only be on the MySQL transaction list (TRUE). */ - fetch_data_into_cache_low(cache, TRUE, &trx_sys->mysql_trx_list); - - cache->is_truncated = FALSE; -} - -/*******************************************************************//** -Update the transactions cache if it has not been read for some time. -Called from handler/i_s.cc. -@return 0 - fetched, 1 - not */ -UNIV_INTERN -int -trx_i_s_possibly_fetch_data_into_cache( -/*===================================*/ - trx_i_s_cache_t* cache) /*!< in/out: cache */ -{ - if (!can_cache_be_updated(cache)) { - - return(1); - } - - /* We need to read trx_sys and record/table lock queues */ - - lock_mutex_enter(); - - mutex_enter(&trx_sys->mutex); - - fetch_data_into_cache(cache); - - mutex_exit(&trx_sys->mutex); - - lock_mutex_exit(); - - return(0); -} - -/*******************************************************************//** -Returns TRUE if the data in the cache is truncated due to the memory -limit posed by TRX_I_S_MEM_LIMIT. -@return TRUE if truncated */ -UNIV_INTERN -ibool -trx_i_s_cache_is_truncated( -/*=======================*/ - trx_i_s_cache_t* cache) /*!< in: cache */ -{ - return(cache->is_truncated); -} - -/*******************************************************************//** -Initialize INFORMATION SCHEMA trx related cache. */ -UNIV_INTERN -void -trx_i_s_cache_init( -/*===============*/ - trx_i_s_cache_t* cache) /*!< out: cache to init */ -{ - /* The latching is done in the following order: - acquire trx_i_s_cache_t::rw_lock, X - acquire lock mutex - release lock mutex - release trx_i_s_cache_t::rw_lock - acquire trx_i_s_cache_t::rw_lock, S - acquire trx_i_s_cache_t::last_read_mutex - release trx_i_s_cache_t::last_read_mutex - release trx_i_s_cache_t::rw_lock */ - - rw_lock_create(trx_i_s_cache_lock_key, &cache->rw_lock, - SYNC_TRX_I_S_RWLOCK); - - cache->last_read = 0; - - mutex_create(cache_last_read_mutex_key, - &cache->last_read_mutex, SYNC_TRX_I_S_LAST_READ); - - table_cache_init(&cache->innodb_trx, sizeof(i_s_trx_row_t)); - table_cache_init(&cache->innodb_locks, sizeof(i_s_locks_row_t)); - table_cache_init(&cache->innodb_lock_waits, - sizeof(i_s_lock_waits_row_t)); - - cache->locks_hash = hash_create(LOCKS_HASH_CELLS_NUM); - - cache->storage = ha_storage_create(CACHE_STORAGE_INITIAL_SIZE, - CACHE_STORAGE_HASH_CELLS); - - cache->mem_allocd = 0; - - cache->is_truncated = FALSE; -} - -/*******************************************************************//** -Free the INFORMATION SCHEMA trx related cache. */ -UNIV_INTERN -void -trx_i_s_cache_free( -/*===============*/ - trx_i_s_cache_t* cache) /*!< in, own: cache to free */ -{ - rw_lock_free(&cache->rw_lock); - mutex_free(&cache->last_read_mutex); - hash_table_free(cache->locks_hash); - ha_storage_free(cache->storage); - table_cache_free(&cache->innodb_trx); - table_cache_free(&cache->innodb_locks); - table_cache_free(&cache->innodb_lock_waits); - memset(cache, 0, sizeof *cache); -} - -/*******************************************************************//** -Issue a shared/read lock on the tables cache. */ -UNIV_INTERN -void -trx_i_s_cache_start_read( -/*=====================*/ - trx_i_s_cache_t* cache) /*!< in: cache */ -{ - rw_lock_s_lock(&cache->rw_lock); -} - -/*******************************************************************//** -Release a shared/read lock on the tables cache. */ -UNIV_INTERN -void -trx_i_s_cache_end_read( -/*===================*/ - trx_i_s_cache_t* cache) /*!< in: cache */ -{ - ullint now; - -#ifdef UNIV_SYNC_DEBUG - ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED)); -#endif - - /* update cache last read time */ - now = ut_time_us(NULL); - mutex_enter(&cache->last_read_mutex); - cache->last_read = now; - mutex_exit(&cache->last_read_mutex); - - rw_lock_s_unlock(&cache->rw_lock); -} - -/*******************************************************************//** -Issue an exclusive/write lock on the tables cache. */ -UNIV_INTERN -void -trx_i_s_cache_start_write( -/*======================*/ - trx_i_s_cache_t* cache) /*!< in: cache */ -{ - rw_lock_x_lock(&cache->rw_lock); -} - -/*******************************************************************//** -Release an exclusive/write lock on the tables cache. */ -UNIV_INTERN -void -trx_i_s_cache_end_write( -/*====================*/ - trx_i_s_cache_t* cache) /*!< in: cache */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX)); -#endif - - rw_lock_x_unlock(&cache->rw_lock); -} - -/*******************************************************************//** -Selects a INFORMATION SCHEMA table cache from the whole cache. -@return table cache */ -static -i_s_table_cache_t* -cache_select_table( -/*===============*/ - trx_i_s_cache_t* cache, /*!< in: whole cache */ - enum i_s_table table) /*!< in: which table */ -{ - i_s_table_cache_t* table_cache; - -#ifdef UNIV_SYNC_DEBUG - ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED) - || rw_lock_own(&cache->rw_lock, RW_LOCK_EX)); -#endif - - switch (table) { - case I_S_INNODB_TRX: - table_cache = &cache->innodb_trx; - break; - case I_S_INNODB_LOCKS: - table_cache = &cache->innodb_locks; - break; - case I_S_INNODB_LOCK_WAITS: - table_cache = &cache->innodb_lock_waits; - break; - default: - ut_error; - } - - return(table_cache); -} - -/*******************************************************************//** -Retrieves the number of used rows in the cache for a given -INFORMATION SCHEMA table. -@return number of rows */ -UNIV_INTERN -ulint -trx_i_s_cache_get_rows_used( -/*========================*/ - trx_i_s_cache_t* cache, /*!< in: cache */ - enum i_s_table table) /*!< in: which table */ -{ - i_s_table_cache_t* table_cache; - - table_cache = cache_select_table(cache, table); - - return(table_cache->rows_used); -} - -/*******************************************************************//** -Retrieves the nth row (zero-based) in the cache for a given -INFORMATION SCHEMA table. -@return row */ -UNIV_INTERN -void* -trx_i_s_cache_get_nth_row( -/*======================*/ - trx_i_s_cache_t* cache, /*!< in: cache */ - enum i_s_table table, /*!< in: which table */ - ulint n) /*!< in: row number */ -{ - i_s_table_cache_t* table_cache; - ulint i; - void* row; - - table_cache = cache_select_table(cache, table); - - ut_a(n < table_cache->rows_used); - - row = NULL; - - for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) { - - if (table_cache->chunks[i].offset - + table_cache->chunks[i].rows_allocd > n) { - - row = (char*) table_cache->chunks[i].base - + (n - table_cache->chunks[i].offset) - * table_cache->row_size; - break; - } - } - - ut_a(row != NULL); - - return(row); -} - -/*******************************************************************//** -Crafts a lock id string from a i_s_locks_row_t object. Returns its -second argument. This function aborts if there is not enough space in -lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you -want to be 100% sure that it will not abort. -@return resulting lock id */ -UNIV_INTERN -char* -trx_i_s_create_lock_id( -/*===================*/ - const i_s_locks_row_t* row, /*!< in: innodb_locks row */ - char* lock_id,/*!< out: resulting lock_id */ - ulint lock_id_size)/*!< in: size of the lock id - buffer */ -{ - int res_len; - - /* please adjust TRX_I_S_LOCK_ID_MAX_LEN if you change this */ - - if (row->lock_space != ULINT_UNDEFINED) { - /* record lock */ - res_len = ut_snprintf(lock_id, lock_id_size, - TRX_ID_FMT ":%lu:%lu:%lu", - row->lock_trx_id, row->lock_space, - row->lock_page, row->lock_rec); - } else { - /* table lock */ - res_len = ut_snprintf(lock_id, lock_id_size, - TRX_ID_FMT ":" UINT64PF, - row->lock_trx_id, - row->lock_table_id); - } - - /* the typecast is safe because snprintf(3) never returns - negative result */ - ut_a(res_len >= 0); - ut_a((ulint) res_len < lock_id_size); - - return(lock_id); -} - -UNIV_INTERN -void -trx_i_s_get_lock_sys_memory_usage(ulint *constant, ulint *variable) -{ - trx_t* trx; - - *constant = lock_sys->rec_hash->n_cells * sizeof(hash_cell_t); - *variable = 0; - - if (trx_sys) { - mutex_enter(&trx_sys->mutex); - trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); - while (trx) { - *variable += ((trx->lock.lock_heap) ? mem_heap_get_size(trx->lock.lock_heap) : 0); - trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); - } - mutex_exit(&trx_sys->mutex); - } - -} diff --git a/storage/xtradb/trx/trx0purge.cc b/storage/xtradb/trx/trx0purge.cc deleted file mode 100644 index df4a3217820..00000000000 --- a/storage/xtradb/trx/trx0purge.cc +++ /dev/null @@ -1,1409 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file trx/trx0purge.cc -Purge old versions - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0purge.h" - -#ifdef UNIV_NONINL -#include "trx0purge.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "read0read.h" -#include "fut0fut.h" -#include "que0que.h" -#include "row0purge.h" -#include "row0upd.h" -#include "trx0rec.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "os0thread.h" -#include "srv0mon.h" -#include "mtr0log.h" - -/** Maximum allowable purge history length. <=0 means 'infinite'. */ -UNIV_INTERN ulong srv_max_purge_lag = 0; - -/** Max DML user threads delay in micro-seconds. */ -UNIV_INTERN ulong srv_max_purge_lag_delay = 0; - -/** The global data structure coordinating a purge */ -UNIV_INTERN trx_purge_t* purge_sys = NULL; - -/** A dummy undo record used as a return value when we have a whole undo log -which needs no purge */ -UNIV_INTERN trx_undo_rec_t trx_purge_dummy_rec; - -#ifdef UNIV_PFS_RWLOCK -/* Key to register trx_purge_latch with performance schema */ -UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key; -#endif /* UNIV_PFS_RWLOCK */ - -#ifdef UNIV_PFS_MUTEX -/* Key to register purge_sys_bh_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -#ifdef UNIV_DEBUG -UNIV_INTERN my_bool srv_purge_view_update_only_debug; -#endif /* UNIV_DEBUG */ - -/****************************************************************//** -Builds a purge 'query' graph. The actual purge is performed by executing -this query graph. -@return own: the query graph */ -static -que_t* -trx_purge_graph_build( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - ulint n_purge_threads) /*!< in: number of purge - threads */ -{ - ulint i; - mem_heap_t* heap; - que_fork_t* fork; - - heap = mem_heap_create(512); - fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap); - fork->trx = trx; - - for (i = 0; i < n_purge_threads; ++i) { - que_thr_t* thr; - - thr = que_thr_create(fork, heap); - - thr->child = row_purge_node_create(thr, heap); - } - - return(fork); -} - -/********************************************************************//** -Creates the global purge system control structure and inits the history -mutex. */ -UNIV_INTERN -void -trx_purge_sys_create( -/*=================*/ - ulint n_purge_threads, /*!< in: number of purge - threads */ - ib_bh_t* ib_bh) /*!< in, own: UNDO log min - binary heap */ -{ - purge_sys = static_cast<trx_purge_t*>(mem_zalloc(sizeof(*purge_sys))); - - purge_sys->state = PURGE_STATE_INIT; - purge_sys->event = os_event_create(); - - /* Take ownership of ib_bh, we are responsible for freeing it. */ - purge_sys->ib_bh = ib_bh; - - rw_lock_create(trx_purge_latch_key, - &purge_sys->latch, SYNC_PURGE_LATCH); - - mutex_create( - purge_sys_bh_mutex_key, &purge_sys->bh_mutex, - SYNC_PURGE_QUEUE); - - purge_sys->heap = mem_heap_create(256); - - ut_a(n_purge_threads > 0); - - purge_sys->sess = sess_open(); - - purge_sys->trx = purge_sys->sess->trx; - - ut_a(purge_sys->trx->sess == purge_sys->sess); - - /* A purge transaction is not a real transaction, we use a transaction - here only because the query threads code requires it. It is otherwise - quite unnecessary. We should get rid of it eventually. */ - purge_sys->trx->id = 0; - purge_sys->trx->start_time = ut_time(); - purge_sys->trx->state = TRX_STATE_ACTIVE; - purge_sys->trx->op_info = "purge trx"; - - purge_sys->query = trx_purge_graph_build( - purge_sys->trx, n_purge_threads); - - purge_sys->view = read_view_purge_open(purge_sys->prebuilt_clone, - purge_sys->prebuilt_view); -} - -/************************************************************************ -Frees the global purge system control structure. */ -UNIV_INTERN -void -trx_purge_sys_close(void) -/*======================*/ -{ - que_graph_free(purge_sys->query); - - ut_a(purge_sys->trx->id == 0); - ut_a(purge_sys->sess->trx == purge_sys->trx); - - purge_sys->trx->state = TRX_STATE_NOT_STARTED; - - sess_close(purge_sys->sess); - - read_view_free(purge_sys->prebuilt_view); - read_view_free(purge_sys->prebuilt_clone); - - rw_lock_free(&purge_sys->latch); - mutex_free(&purge_sys->bh_mutex); - - mem_heap_free(purge_sys->heap); - - ib_bh_free(purge_sys->ib_bh); - - os_event_free(purge_sys->event); - mem_free(purge_sys); - - purge_sys = NULL; -} - -/*================ UNDO LOG HISTORY LIST =============================*/ - -/********************************************************************//** -Adds the update undo log as the first log in the history list. Removes the -update undo log segment from the rseg slot if it is too big for reuse. */ -UNIV_INTERN -void -trx_purge_add_update_undo_to_history( -/*=================================*/ - trx_t* trx, /*!< in: transaction */ - page_t* undo_page, /*!< in: update undo log header page, - x-latched */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_undo_t* undo; - trx_rseg_t* rseg; - trx_rsegf_t* rseg_header; - trx_ulogf_t* undo_header; - - undo = trx->update_undo; - rseg = undo->rseg; - - rseg_header = trx_rsegf_get( - undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no, - mtr); - - undo_header = undo_page + undo->hdr_offset; - - if (undo->state != TRX_UNDO_CACHED) { - ulint hist_size; -#ifdef UNIV_DEBUG - trx_usegf_t* seg_header = undo_page + TRX_UNDO_SEG_HDR; -#endif /* UNIV_DEBUG */ - - /* The undo log segment will not be reused */ - - if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) { - fprintf(stderr, - "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - ut_error; - } - - trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); - - hist_size = mtr_read_ulint( - rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr); - - ut_ad(undo->size == flst_get_len( - seg_header + TRX_UNDO_PAGE_LIST, mtr)); - - mlog_write_ulint( - rseg_header + TRX_RSEG_HISTORY_SIZE, - hist_size + undo->size, MLOG_4BYTES, mtr); - } - - /* Before any transaction-generating background threads or the - purge have been started, recv_recovery_rollback_active() can - start transactions in row_merge_drop_temp_indexes() and - fts_drop_orphaned_tables(), and roll back recovered transactions. - After the purge thread has been given permission to exit, - in fast shutdown, we may roll back transactions (trx->undo_no==0) - in THD::cleanup() invoked from unlink_thd(). */ - ut_ad(srv_undo_sources - || ((srv_startup_is_before_trx_rollback_phase - || trx_rollback_or_clean_is_active) - && purge_sys->state == PURGE_STATE_INIT) - || (trx->undo_no == 0 && srv_fast_shutdown)); - - /* Add the log as the first in the history list */ - flst_add_first(rseg_header + TRX_RSEG_HISTORY, - undo_header + TRX_UNDO_HISTORY_NODE, mtr); - -#ifdef HAVE_ATOMIC_BUILTINS - os_atomic_increment_ulint(&trx_sys->rseg_history_len, 1); -#else - mutex_enter(&trx_sys->mutex); - ++trx_sys->rseg_history_len; - mutex_exit(&trx_sys->mutex); -#endif /* HAVE_ATOMIC_BUILTINS */ - - srv_wake_purge_thread_if_not_active(); - - /* Write the trx number to the undo log header */ - mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr); - - /* Write information about delete markings to the undo log header */ - - if (!undo->del_marks) { - mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE, - MLOG_2BYTES, mtr); - } - - if (rseg->last_page_no == FIL_NULL) { - rseg->last_page_no = undo->hdr_page_no; - rseg->last_offset = undo->hdr_offset; - rseg->last_trx_no = trx->no; - rseg->last_del_marks = undo->del_marks; - } -} - -/** Remove undo log header from the history list. -@param[in,out] rseg_hdr rollback segment header -@param[in] log_hdr undo log segment header -@param[in,out] mtr mini transaction. */ -static -void -trx_purge_remove_log_hdr( - trx_rsegf_t* rseg_hdr, - trx_ulogf_t* log_hdr, - mtr_t* mtr) -{ - flst_remove(rseg_hdr + TRX_RSEG_HISTORY, - log_hdr + TRX_UNDO_HISTORY_NODE, mtr); - - os_atomic_decrement_ulint(&trx_sys->rseg_history_len, 1); -} - -/** Frees an undo log segment which is in the history list. Removes the -undo log hdr from the history list. -@param[in,out] rseg rollback segment -@param[in] hdr_addr file address of log_hdr -@param[in] noredo skip redo logging. */ -static -void -trx_purge_free_segment( - trx_rseg_t* rseg, - fil_addr_t hdr_addr) -{ - mtr_t mtr; - trx_rsegf_t* rseg_hdr; - trx_ulogf_t* log_hdr; - trx_usegf_t* seg_hdr; - ulint seg_size; - ulint hist_size; - ibool marked = FALSE; - - /* fputs("Freeing an update undo log segment\n", stderr); */ - - for (;;) { - page_t* undo_page; - - mtr_start(&mtr); - - mutex_enter(&rseg->mutex); - - rseg_hdr = trx_rsegf_get( - rseg->space, rseg->zip_size, rseg->page_no, &mtr); - - undo_page = trx_undo_page_get( - rseg->space, rseg->zip_size, hdr_addr.page, &mtr); - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - log_hdr = undo_page + hdr_addr.boffset; - - /* Mark the last undo log totally purged, so that if the - system crashes, the tail of the undo log will not get accessed - again. The list of pages in the undo log tail gets inconsistent - during the freeing of the segment, and therefore purge should - not try to access them again. */ - - if (!marked) { - mlog_write_ulint( - log_hdr + TRX_UNDO_DEL_MARKS, FALSE, - MLOG_2BYTES, &mtr); - - marked = TRUE; - } - - if (fseg_free_step_not_header( - seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr)) { - - break; - } - - mutex_exit(&rseg->mutex); - - mtr_commit(&mtr); - } - - /* The page list may now be inconsistent, but the length field - stored in the list base node tells us how big it was before we - started the freeing. */ - - seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr); - - /* We may free the undo log segment header page; it must be freed - within the same mtr as the undo log header is removed from the - history list: otherwise, in case of a database crash, the segment - could become inaccessible garbage in the file space. */ - - trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr); - - do { - - /* Here we assume that a file segment with just the header - page can be freed in a few steps, so that the buffer pool - is not flooded with bufferfixed pages: see the note in - fsp0fsp.cc. */ - - } while(!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr)); - - hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, &mtr); - ut_ad(hist_size >= seg_size); - - mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE, - hist_size - seg_size, MLOG_4BYTES, &mtr); - - ut_ad(rseg->curr_size >= seg_size); - - rseg->curr_size -= seg_size; - - mutex_exit(&(rseg->mutex)); - - mtr_commit(&mtr); -} - -/********************************************************************//** -Removes unnecessary history data from a rollback segment. */ -static -void -trx_purge_truncate_rseg_history( -/*============================*/ - trx_rseg_t* rseg, /*!< in: rollback segment */ - const purge_iter_t* limit) /*!< in: truncate offset */ -{ - fil_addr_t hdr_addr; - fil_addr_t prev_hdr_addr; - trx_rsegf_t* rseg_hdr; - page_t* undo_page; - trx_ulogf_t* log_hdr; - trx_usegf_t* seg_hdr; - mtr_t mtr; - trx_id_t undo_trx_no; - - mtr_start(&mtr); - mutex_enter(&(rseg->mutex)); - - rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size, - rseg->page_no, &mtr); - - hdr_addr = trx_purge_get_log_from_hist( - flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr)); -loop: - if (hdr_addr.page == FIL_NULL) { - - mutex_exit(&(rseg->mutex)); - - mtr_commit(&mtr); - - return; - } - - undo_page = trx_undo_page_get(rseg->space, rseg->zip_size, - hdr_addr.page, &mtr); - - log_hdr = undo_page + hdr_addr.boffset; - - undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); - - if (undo_trx_no >= limit->trx_no) { - - if (undo_trx_no == limit->trx_no) { - - trx_undo_truncate_start( - rseg, rseg->space, hdr_addr.page, - hdr_addr.boffset, limit->undo_no); - } - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - return; - } - - prev_hdr_addr = trx_purge_get_log_from_hist( - flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE) - && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) { - - /* We can free the whole log segment */ - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - /* calls the trx_purge_remove_log_hdr() - inside trx_purge_free_segment(). */ - trx_purge_free_segment(rseg, hdr_addr); - - } else { - /* Remove the log hdr from the rseg history. */ - trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr); - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - } - - mtr_start(&mtr); - mutex_enter(&(rseg->mutex)); - - rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size, - rseg->page_no, &mtr); - - hdr_addr = prev_hdr_addr; - - goto loop; -} - -/********************************************************************//** -Removes unnecessary history data from rollback segments. NOTE that when this -function is called, the caller must not have any latches on undo log pages! */ -static -void -trx_purge_truncate_history( -/*========================*/ - purge_iter_t* limit, /*!< in: truncate limit */ - const read_view_t* view) /*!< in: purge view */ -{ - ulint i; - - /* We play safe and set the truncate limit at most to the purge view - low_limit number, though this is not necessary */ - - if (limit->trx_no >= view->low_limit_no) { - limit->trx_no = view->low_limit_no; - limit->undo_no = 0; - } - - ut_ad(limit->trx_no <= purge_sys->view->low_limit_no); - - for (i = 0; i < TRX_SYS_N_RSEGS; ++i) { - trx_rseg_t* rseg = trx_sys->rseg_array[i]; - - if (rseg != NULL) { - ut_a(rseg->id == i); - trx_purge_truncate_rseg_history(rseg, limit); - } - } -} - -/***********************************************************************//** -Updates the last not yet purged history log info in rseg when we have purged -a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */ -static -void -trx_purge_rseg_get_next_history_log( -/*================================*/ - trx_rseg_t* rseg, /*!< in: rollback segment */ - ulint* n_pages_handled)/*!< in/out: number of UNDO pages - handled */ -{ - const void* ptr; - page_t* undo_page; - trx_ulogf_t* log_hdr; - fil_addr_t prev_log_addr; - trx_id_t trx_no; - ibool del_marks; - mtr_t mtr; - rseg_queue_t rseg_queue; - - mutex_enter(&(rseg->mutex)); - - ut_a(rseg->last_page_no != FIL_NULL); - - purge_sys->iter.trx_no = rseg->last_trx_no + 1; - purge_sys->iter.undo_no = 0; - purge_sys->next_stored = FALSE; - - mtr_start(&mtr); - - undo_page = trx_undo_page_get_s_latched( - rseg->space, rseg->zip_size, rseg->last_page_no, &mtr); - - log_hdr = undo_page + rseg->last_offset; - - /* Increase the purge page count by one for every handled log */ - - (*n_pages_handled)++; - - prev_log_addr = trx_purge_get_log_from_hist( - flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); - - if (prev_log_addr.page == FIL_NULL) { - /* No logs left in the history list */ - - rseg->last_page_no = FIL_NULL; - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - mutex_enter(&trx_sys->mutex); - - /* Add debug code to track history list corruption reported - on the MySQL mailing list on Nov 9, 2004. The fut0lst.cc - file-based list was corrupt. The prev node pointer was - FIL_NULL, even though the list length was over 8 million nodes! - We assume that purge truncates the history list in large - size pieces, and if we here reach the head of the list, the - list cannot be longer than 2000 000 undo logs now. */ - - if (trx_sys->rseg_history_len > 2000000) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: purge reached the" - " head of the history list,\n" - "InnoDB: but its length is still" - " reported as %lu! Make a detailed bug\n" - "InnoDB: report, and submit it" - " to http://bugs.mysql.com\n", - (ulong) trx_sys->rseg_history_len); - ut_ad(0); - } - - mutex_exit(&trx_sys->mutex); - - return; - } - - mutex_exit(&rseg->mutex); - - mtr_commit(&mtr); - - /* Read the trx number and del marks from the previous log header */ - mtr_start(&mtr); - - log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size, - prev_log_addr.page, &mtr) - + prev_log_addr.boffset; - - trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); - - del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS); - - mtr_commit(&mtr); - - mutex_enter(&(rseg->mutex)); - - rseg->last_page_no = prev_log_addr.page; - rseg->last_offset = prev_log_addr.boffset; - rseg->last_trx_no = trx_no; - rseg->last_del_marks = del_marks; - - rseg_queue.rseg = rseg; - rseg_queue.trx_no = rseg->last_trx_no; - - /* Purge can also produce events, however these are already ordered - in the rollback segment and any user generated event will be greater - than the events that Purge produces. ie. Purge can never produce - events from an empty rollback segment. */ - - mutex_enter(&purge_sys->bh_mutex); - - ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); - ut_a(ptr != NULL); - - mutex_exit(&purge_sys->bh_mutex); - - mutex_exit(&rseg->mutex); -} - -/***********************************************************************//** -Chooses the rollback segment with the smallest trx_id. -@return zip_size if log is for a compressed table, ULINT_UNDEFINED if - no rollback segments to purge, 0 for non compressed tables. */ -static -ulint -trx_purge_get_rseg_with_min_trx_id( -/*===============================*/ - trx_purge_t* purge_sys) /*!< in/out: purge instance */ - -{ - ulint zip_size = 0; - - mutex_enter(&purge_sys->bh_mutex); - - /* Only purge consumes events from the binary heap, user - threads only produce the events. */ - - if (!ib_bh_is_empty(purge_sys->ib_bh)) { - trx_rseg_t* rseg; - - rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg; - ib_bh_pop(purge_sys->ib_bh); - - mutex_exit(&purge_sys->bh_mutex); - - purge_sys->rseg = rseg; - } else { - mutex_exit(&purge_sys->bh_mutex); - - purge_sys->rseg = NULL; - - return(ULINT_UNDEFINED); - } - - ut_a(purge_sys->rseg != NULL); - - mutex_enter(&purge_sys->rseg->mutex); - - ut_a(purge_sys->rseg->last_page_no != FIL_NULL); - - /* We assume in purge of externally stored fields that space id is - in the range of UNDO tablespace space ids */ - ut_a(purge_sys->rseg->space == 0 - || srv_is_undo_tablespace(purge_sys->rseg->space)); - - zip_size = purge_sys->rseg->zip_size; - - ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no); - - purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no; - purge_sys->hdr_offset = purge_sys->rseg->last_offset; - purge_sys->hdr_page_no = purge_sys->rseg->last_page_no; - - mutex_exit(&purge_sys->rseg->mutex); - - return(zip_size); -} - -/***********************************************************************//** -Position the purge sys "iterator" on the undo record to use for purging. */ -static -void -trx_purge_read_undo_rec( -/*====================*/ - trx_purge_t* purge_sys, /*!< in/out: purge instance */ - ulint zip_size) /*!< in: block size or 0 */ -{ - ulint offset; - ulint page_no; - ib_uint64_t undo_no; - - purge_sys->hdr_offset = purge_sys->rseg->last_offset; - page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no; - - if (purge_sys->rseg->last_del_marks) { - mtr_t mtr; - trx_undo_rec_t* undo_rec = NULL; - - mtr_start(&mtr); - - undo_rec = trx_undo_get_first_rec( - purge_sys->rseg->space, - zip_size, - purge_sys->hdr_page_no, - purge_sys->hdr_offset, RW_S_LATCH, &mtr); - - if (undo_rec != NULL) { - offset = page_offset(undo_rec); - undo_no = trx_undo_rec_get_undo_no(undo_rec); - page_no = page_get_page_no(page_align(undo_rec)); - } else { - offset = 0; - undo_no = 0; - } - - mtr_commit(&mtr); - } else { - offset = 0; - undo_no = 0; - } - - purge_sys->offset = offset; - purge_sys->page_no = page_no; - purge_sys->iter.undo_no = undo_no; - - purge_sys->next_stored = TRUE; -} - -/***********************************************************************//** -Chooses the next undo log to purge and updates the info in purge_sys. This -function is used to initialize purge_sys when the next record to purge is -not known, and also to update the purge system info on the next record when -purge has handled the whole undo log for a transaction. */ -static -void -trx_purge_choose_next_log(void) -/*===========================*/ -{ - ulint zip_size; - - ut_ad(purge_sys->next_stored == FALSE); - - zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys); - - if (purge_sys->rseg != NULL) { - trx_purge_read_undo_rec(purge_sys, zip_size); - } else { - /* There is nothing to do yet. */ - os_thread_yield(); - } -} - -/***********************************************************************//** -Gets the next record to purge and updates the info in the purge system. -@return copy of an undo log record or pointer to the dummy undo log record */ -static -trx_undo_rec_t* -trx_purge_get_next_rec( -/*===================*/ - ulint* n_pages_handled,/*!< in/out: number of UNDO pages - handled */ - mem_heap_t* heap) /*!< in: memory heap where copied */ -{ - trx_undo_rec_t* rec; - trx_undo_rec_t* rec_copy; - trx_undo_rec_t* rec2; - page_t* undo_page; - page_t* page; - ulint offset; - ulint page_no; - ulint space; - ulint zip_size; - mtr_t mtr; - - ut_ad(purge_sys->next_stored); - ut_ad(purge_sys->iter.trx_no < purge_sys->view->low_limit_no); - - space = purge_sys->rseg->space; - zip_size = purge_sys->rseg->zip_size; - page_no = purge_sys->page_no; - offset = purge_sys->offset; - - if (offset == 0) { - /* It is the dummy undo log record, which means that there is - no need to purge this undo log */ - - trx_purge_rseg_get_next_history_log( - purge_sys->rseg, n_pages_handled); - - /* Look for the next undo log and record to purge */ - - trx_purge_choose_next_log(); - - return(&trx_purge_dummy_rec); - } - - mtr_start(&mtr); - - undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr); - - rec = undo_page + offset; - - rec2 = rec; - - for (;;) { - ulint type; - trx_undo_rec_t* next_rec; - ulint cmpl_info; - - /* Try first to find the next record which requires a purge - operation from the same page of the same undo log */ - - next_rec = trx_undo_page_get_next_rec( - rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset); - - if (next_rec == NULL) { - rec2 = trx_undo_get_next_rec( - rec2, purge_sys->hdr_page_no, - purge_sys->hdr_offset, &mtr); - break; - } - - rec2 = next_rec; - - type = trx_undo_rec_get_type(rec2); - - if (type == TRX_UNDO_DEL_MARK_REC) { - - break; - } - - cmpl_info = trx_undo_rec_get_cmpl_info(rec2); - - if (trx_undo_rec_get_extern_storage(rec2)) { - break; - } - - if ((type == TRX_UNDO_UPD_EXIST_REC) - && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - break; - } - } - - if (rec2 == NULL) { - mtr_commit(&mtr); - - trx_purge_rseg_get_next_history_log( - purge_sys->rseg, n_pages_handled); - - /* Look for the next undo log and record to purge */ - - trx_purge_choose_next_log(); - - mtr_start(&mtr); - - undo_page = trx_undo_page_get_s_latched( - space, zip_size, page_no, &mtr); - - rec = undo_page + offset; - } else { - page = page_align(rec2); - - purge_sys->offset = rec2 - page; - purge_sys->page_no = page_get_page_no(page); - purge_sys->iter.undo_no = trx_undo_rec_get_undo_no(rec2); - - if (undo_page != page) { - /* We advance to a new page of the undo log: */ - (*n_pages_handled)++; - } - } - - rec_copy = trx_undo_rec_copy(rec, heap); - - mtr_commit(&mtr); - - return(rec_copy); -} - -/********************************************************************//** -Fetches the next undo log record from the history list to purge. It must be -released with the corresponding release function. -@return copy of an undo log record or pointer to trx_purge_dummy_rec, -if the whole undo log can skipped in purge; NULL if none left */ -static MY_ATTRIBUTE((warn_unused_result, nonnull)) -trx_undo_rec_t* -trx_purge_fetch_next_rec( -/*=====================*/ - roll_ptr_t* roll_ptr, /*!< out: roll pointer to undo record */ - ulint* n_pages_handled,/*!< in/out: number of UNDO log pages - handled */ - mem_heap_t* heap) /*!< in: memory heap where copied */ -{ - if (!purge_sys->next_stored) { - trx_purge_choose_next_log(); - - if (!purge_sys->next_stored) { - - if (srv_print_thread_releases) { - fprintf(stderr, - "Purge: No logs left in the" - " history list\n"); - } - - return(NULL); - } - } - - if (purge_sys->iter.trx_no >= purge_sys->view->low_limit_no) { - - return(NULL); - } - - /* fprintf(stderr, "Thread %lu purging trx %llu undo record %llu\n", - os_thread_get_curr_id(), iter->trx_no, iter->undo_no); */ - - *roll_ptr = trx_undo_build_roll_ptr( - FALSE, purge_sys->rseg->id, - purge_sys->page_no, purge_sys->offset); - - /* The following call will advance the stored values of the - purge iterator. */ - - return(trx_purge_get_next_rec(n_pages_handled, heap)); -} - -/*******************************************************************//** -This function runs a purge batch. -@return number of undo log pages handled in the batch */ -static -ulint -trx_purge_attach_undo_recs( -/*=======================*/ - ulint n_purge_threads,/*!< in: number of purge threads */ - trx_purge_t* purge_sys, /*!< in/out: purge instance */ - purge_iter_t* limit, /*!< out: records read up to */ - ulint batch_size) /*!< in: no. of pages to purge */ -{ - que_thr_t* thr; - ulint i = 0; - ulint n_pages_handled = 0; - ulint n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs); - - ut_a(n_purge_threads > 0); - - *limit = purge_sys->iter; - - /* Debug code to validate some pre-requisites and reset done flag. */ - for (thr = UT_LIST_GET_FIRST(purge_sys->query->thrs); - thr != NULL && i < n_purge_threads; - thr = UT_LIST_GET_NEXT(thrs, thr), ++i) { - - purge_node_t* node; - - /* Get the purge node. */ - node = (purge_node_t*) thr->child; - - ut_a(que_node_get_type(node) == QUE_NODE_PURGE); - ut_a(node->undo_recs == NULL); - ut_a(node->done); - - node->done = FALSE; - } - - /* There should never be fewer nodes than threads, the inverse - however is allowed because we only use purge threads as needed. */ - ut_a(i == n_purge_threads); - - /* Fetch and parse the UNDO records. The UNDO records are added - to a per purge node vector. */ - thr = UT_LIST_GET_FIRST(purge_sys->query->thrs); - ut_a(n_thrs > 0 && thr != NULL); - - ut_ad(trx_purge_check_limit()); - - i = 0; - - for (;;) { - purge_node_t* node; - trx_purge_rec_t* purge_rec; - - ut_a(!thr->is_active); - - /* Get the purge node. */ - node = (purge_node_t*) thr->child; - ut_a(que_node_get_type(node) == QUE_NODE_PURGE); - - purge_rec = static_cast<trx_purge_rec_t*>( - mem_heap_zalloc(node->heap, sizeof(*purge_rec))); - - /* Track the max {trx_id, undo_no} for truncating the - UNDO logs once we have purged the records. */ - - if (purge_sys->iter.trx_no > limit->trx_no - || (purge_sys->iter.trx_no == limit->trx_no - && purge_sys->iter.undo_no >= limit->undo_no)) { - - *limit = purge_sys->iter; - } - - /* Fetch the next record, and advance the purge_sys->iter. */ - purge_rec->undo_rec = trx_purge_fetch_next_rec( - &purge_rec->roll_ptr, &n_pages_handled, node->heap); - - if (purge_rec->undo_rec != NULL) { - - if (node->undo_recs == NULL) { - node->undo_recs = ib_vector_create( - ib_heap_allocator_create(node->heap), - sizeof(trx_purge_rec_t), - batch_size); - } else { - ut_a(!ib_vector_is_empty(node->undo_recs)); - } - - ib_vector_push(node->undo_recs, purge_rec); - - if (n_pages_handled >= batch_size) { - - break; - } - } else { - break; - } - - thr = UT_LIST_GET_NEXT(thrs, thr); - - if (!(++i % n_purge_threads)) { - thr = UT_LIST_GET_FIRST(purge_sys->query->thrs); - } - - ut_a(thr != NULL); - } - - ut_ad(trx_purge_check_limit()); - - return(n_pages_handled); -} - -/*******************************************************************//** -Calculate the DML delay required. -@return delay in microseconds or ULINT_MAX */ -static -ulint -trx_purge_dml_delay(void) -/*=====================*/ -{ - /* Determine how much data manipulation language (DML) statements - need to be delayed in order to reduce the lagging of the purge - thread. */ - ulint delay = 0; /* in microseconds; default: no delay */ - - /* If purge lag is set (ie. > 0) then calculate the new DML delay. - Note: we do a dirty read of the trx_sys_t data structure here, - without holding trx_sys->mutex. */ - - if (srv_max_purge_lag > 0) { - float ratio; - - ratio = float(trx_sys->rseg_history_len) / srv_max_purge_lag; - - if (ratio > 1.0) { - /* If the history list length exceeds the - srv_max_purge_lag, the data manipulation - statements are delayed by at least 5000 - microseconds. */ - delay = (ulint) ((ratio - .5) * 10000); - } - - if (delay > srv_max_purge_lag_delay) { - delay = srv_max_purge_lag_delay; - } - - MONITOR_SET(MONITOR_DML_PURGE_DELAY, delay); - } - - return(delay); -} - -/*******************************************************************//** -Wait for pending purge jobs to complete. */ -static -void -trx_purge_wait_for_workers_to_complete( -/*===================================*/ - trx_purge_t* purge_sys) /*!< in: purge instance */ -{ - ulint n_submitted = purge_sys->n_submitted; - -#ifdef HAVE_ATOMIC_BUILTINS - /* Ensure that the work queue empties out. */ - while (!os_compare_and_swap_ulint( - &purge_sys->n_completed, n_submitted, n_submitted)) { -#else - mutex_enter(&purge_sys->bh_mutex); - - while (purge_sys->n_completed < n_submitted) { -#endif /* HAVE_ATOMIC_BUILTINS */ - -#ifndef HAVE_ATOMIC_BUILTINS - mutex_exit(&purge_sys->bh_mutex); -#endif /* !HAVE_ATOMIC_BUILTINS */ - - if (srv_get_task_queue_length() > 0) { - srv_release_threads(SRV_WORKER, 1); - } - - os_thread_yield(); - -#ifndef HAVE_ATOMIC_BUILTINS - mutex_enter(&purge_sys->bh_mutex); -#endif /* !HAVE_ATOMIC_BUILTINS */ - } - -#ifndef HAVE_ATOMIC_BUILTINS - mutex_exit(&purge_sys->bh_mutex); -#endif /* !HAVE_ATOMIC_BUILTINS */ - - /* None of the worker threads should be doing any work. */ - ut_a(purge_sys->n_submitted == purge_sys->n_completed); - - /* There should be no outstanding tasks as long - as the worker threads are active. */ - ut_a(srv_get_task_queue_length() == 0); -} - -/******************************************************************//** -Remove old historical changes from the rollback segments. */ -static -void -trx_purge_truncate(void) -/*====================*/ -{ - ut_ad(trx_purge_check_limit()); - - if (purge_sys->limit.trx_no == 0) { - trx_purge_truncate_history(&purge_sys->iter, purge_sys->view); - } else { - trx_purge_truncate_history(&purge_sys->limit, purge_sys->view); - } -} - -/*******************************************************************//** -This function runs a purge batch. -@return number of undo log pages handled in the batch */ -UNIV_INTERN -ulint -trx_purge( -/*======*/ - ulint n_purge_threads, /*!< in: number of purge tasks - to submit to the work queue */ - ulint batch_size, /*!< in: the maximum number of records - to purge in one batch */ - bool truncate) /*!< in: truncate history if true */ -{ - que_thr_t* thr = NULL; - ulint n_pages_handled; - - ut_a(n_purge_threads > 0); - - srv_dml_needed_delay = trx_purge_dml_delay(); - - /* The number of tasks submitted should be completed. */ - ut_a(purge_sys->n_submitted == purge_sys->n_completed); - - rw_lock_x_lock(&purge_sys->latch); - - purge_sys->view = NULL; - - mem_heap_empty(purge_sys->heap); - - purge_sys->view = read_view_purge_open(purge_sys->prebuilt_clone, - purge_sys->prebuilt_view); - - rw_lock_x_unlock(&purge_sys->latch); - -#ifdef UNIV_DEBUG - if (srv_purge_view_update_only_debug) { - return(0); - } -#endif - - /* Fetch the UNDO recs that need to be purged. */ - n_pages_handled = trx_purge_attach_undo_recs( - n_purge_threads, purge_sys, &purge_sys->limit, batch_size); - - /* Do we do an asynchronous purge or not ? */ - if (n_purge_threads > 1) { - ulint i = 0; - - /* Submit the tasks to the work queue. */ - for (i = 0; i < n_purge_threads - 1; ++i) { - thr = que_fork_scheduler_round_robin( - purge_sys->query, thr); - - ut_a(thr != NULL); - - srv_que_task_enqueue_low(thr); - } - - thr = que_fork_scheduler_round_robin(purge_sys->query, thr); - ut_a(thr != NULL); - - purge_sys->n_submitted += n_purge_threads - 1; - - goto run_synchronously; - - /* Do it synchronously. */ - } else { - thr = que_fork_scheduler_round_robin(purge_sys->query, NULL); - ut_ad(thr); - -run_synchronously: - ++purge_sys->n_submitted; - - que_run_threads(thr); - - os_atomic_inc_ulint( - &purge_sys->bh_mutex, &purge_sys->n_completed, 1); - - if (n_purge_threads > 1) { - trx_purge_wait_for_workers_to_complete(purge_sys); - } - } - - ut_a(purge_sys->n_submitted == purge_sys->n_completed); - -#ifdef UNIV_DEBUG - rw_lock_x_lock(&purge_sys->latch); - if (purge_sys->limit.trx_no == 0) { - purge_sys->done = purge_sys->iter; - } else { - purge_sys->done = purge_sys->limit; - } - rw_lock_x_unlock(&purge_sys->latch); -#endif /* UNIV_DEBUG */ - - if (truncate) { - trx_purge_truncate(); - } - - MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1); - MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled); - - return(n_pages_handled); -} - -/*******************************************************************//** -Get the purge state. -@return purge state. */ -UNIV_INTERN -purge_state_t -trx_purge_state(void) -/*=================*/ -{ - purge_state_t state; - - rw_lock_x_lock(&purge_sys->latch); - - state = purge_sys->state; - - rw_lock_x_unlock(&purge_sys->latch); - - return(state); -} - -/*******************************************************************//** -Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */ -UNIV_INTERN -void -trx_purge_stop(void) -/*================*/ -{ - ut_a(srv_n_purge_threads > 0); - - rw_lock_x_lock(&purge_sys->latch); - - const ib_int64_t sig_count = os_event_reset(purge_sys->event); - const purge_state_t state = purge_sys->state; - - ut_a(state == PURGE_STATE_RUN || state == PURGE_STATE_STOP); - - ++purge_sys->n_stop; - - if (state == PURGE_STATE_RUN) { - ib_logf(IB_LOG_LEVEL_INFO, "Stopping purge"); - - /* We need to wakeup the purge thread in case it is suspended, - so that it can acknowledge the state change. */ - - srv_purge_wakeup(); - } - - purge_sys->state = PURGE_STATE_STOP; - - rw_lock_x_unlock(&purge_sys->latch); - - if (state != PURGE_STATE_STOP) { - - /* Wait for purge coordinator to signal that it - is suspended. */ - os_event_wait_low(purge_sys->event, sig_count); - } else { - bool once = true; - - rw_lock_x_lock(&purge_sys->latch); - - /* Wait for purge to signal that it has actually stopped. */ - while (purge_sys->running) { - - if (once) { - ib_logf(IB_LOG_LEVEL_INFO, - "Waiting for purge to stop"); - once = false; - } - - rw_lock_x_unlock(&purge_sys->latch); - - os_thread_sleep(10000); - - rw_lock_x_lock(&purge_sys->latch); - } - - rw_lock_x_unlock(&purge_sys->latch); - } - - MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1); -} - -/*******************************************************************//** -Resume purge, move to PURGE_STATE_RUN. */ -UNIV_INTERN -void -trx_purge_run(void) -/*===============*/ -{ - rw_lock_x_lock(&purge_sys->latch); - - switch(purge_sys->state) { - case PURGE_STATE_INIT: - case PURGE_STATE_EXIT: - case PURGE_STATE_DISABLED: - ut_error; - - case PURGE_STATE_RUN: - case PURGE_STATE_STOP: - break; - } - - if (purge_sys->n_stop > 0) { - - ut_a(purge_sys->state == PURGE_STATE_STOP); - - --purge_sys->n_stop; - - if (purge_sys->n_stop == 0) { - - ib_logf(IB_LOG_LEVEL_INFO, "Resuming purge"); - - purge_sys->state = PURGE_STATE_RUN; - } - - MONITOR_INC_VALUE(MONITOR_PURGE_RESUME_COUNT, 1); - } else { - ut_a(purge_sys->state == PURGE_STATE_RUN); - } - - rw_lock_x_unlock(&purge_sys->latch); - - srv_purge_wakeup(); -} diff --git a/storage/xtradb/trx/trx0rec.cc b/storage/xtradb/trx/trx0rec.cc deleted file mode 100644 index 8c0904dd57b..00000000000 --- a/storage/xtradb/trx/trx0rec.cc +++ /dev/null @@ -1,1633 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file trx/trx0rec.cc -Transaction undo log record - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0rec.h" - -#ifdef UNIV_NONINL -#include "trx0rec.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0undo.h" -#include "mtr0log.h" -#ifndef UNIV_HOTBACKUP -#include "dict0dict.h" -#include "ut0mem.h" -#include "read0read.h" -#include "row0ext.h" -#include "row0upd.h" -#include "que0que.h" -#include "trx0purge.h" -#include "trx0rseg.h" -#include "row0row.h" - -/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/ - -/**********************************************************************//** -Writes the mtr log entry of the inserted undo log record on the undo log -page. */ -UNIV_INLINE -void -trx_undof_page_add_undo_rec_log( -/*============================*/ - page_t* undo_page, /*!< in: undo log page */ - ulint old_free, /*!< in: start offset of the inserted entry */ - ulint new_free, /*!< in: end offset of the entry */ - mtr_t* mtr) /*!< in: mtr */ -{ - byte* log_ptr; - const byte* log_end; - ulint len; - - log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN); - - if (log_ptr == NULL) { - - return; - } - - log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN]; - log_ptr = mlog_write_initial_log_record_fast( - undo_page, MLOG_UNDO_INSERT, log_ptr, mtr); - len = new_free - old_free - 4; - - mach_write_to_2(log_ptr, len); - log_ptr += 2; - - if (log_ptr + len <= log_end) { - memcpy(log_ptr, undo_page + old_free + 2, len); - mlog_close(mtr, log_ptr + len); - } else { - mlog_close(mtr, log_ptr); - mlog_catenate_string(mtr, undo_page + old_free + 2, len); - } -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses a redo log record of adding an undo log record. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_add_undo_rec( -/*========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page) /*!< in: page or NULL */ -{ - ulint len; - byte* rec; - ulint first_free; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - len = mach_read_from_2(ptr); - ptr += 2; - - if (end_ptr < ptr + len) { - - return(NULL); - } - - if (page == NULL) { - - return(ptr + len); - } - - first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - rec = page + first_free; - - mach_write_to_2(rec, first_free + 4 + len); - mach_write_to_2(rec + 2 + len, first_free); - - mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, - first_free + 4 + len); - ut_memcpy(rec + 2, ptr, len); - - return(ptr + len); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Calculates the free space left for extending an undo log record. -@return bytes left */ -UNIV_INLINE -ulint -trx_undo_left( -/*==========*/ - const page_t* page, /*!< in: undo log page */ - const byte* ptr) /*!< in: pointer to page */ -{ - /* The '- 10' is a safety margin, in case we have some small - calculation error below */ - - return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END); -} - -/**********************************************************************//** -Set the next and previous pointers in the undo page for the undo record -that was written to ptr. Update the first free value by the number of bytes -written for this undo record. -@return offset of the inserted entry on the page if succeeded, 0 if fail */ -static -ulint -trx_undo_page_set_next_prev_and_add( -/*================================*/ - page_t* undo_page, /*!< in/out: undo log page */ - byte* ptr, /*!< in: ptr up to where data has been - written on this undo page. */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint first_free; /*!< offset within undo_page */ - ulint end_of_rec; /*!< offset within undo_page */ - byte* ptr_to_first_free; - /* pointer within undo_page - that points to the next free - offset value within undo_page.*/ - - ut_ad(ptr > undo_page); - ut_ad(ptr < undo_page + UNIV_PAGE_SIZE); - - if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) { - - return(0); - } - - ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE; - - first_free = mach_read_from_2(ptr_to_first_free); - - /* Write offset of the previous undo log record */ - mach_write_to_2(ptr, first_free); - ptr += 2; - - end_of_rec = ptr - undo_page; - - /* Write offset of the next undo log record */ - mach_write_to_2(undo_page + first_free, end_of_rec); - - /* Update the offset to first free undo record */ - mach_write_to_2(ptr_to_first_free, end_of_rec); - - /* Write this log entry to the UNDO log */ - trx_undof_page_add_undo_rec_log(undo_page, first_free, - end_of_rec, mtr); - - return(first_free); -} - -/**********************************************************************//** -Reports in the undo log of an insert of a clustered index record. -@return offset of the inserted entry on the page if succeed, 0 if fail */ -static -ulint -trx_undo_page_report_insert( -/*========================*/ - page_t* undo_page, /*!< in: undo log page */ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: clustered index */ - const dtuple_t* clust_entry, /*!< in: index entry which will be - inserted to the clustered index */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint first_free; - byte* ptr; - ulint i; - - ut_ad(dict_index_is_clust(index)); - ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT); - - first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - ptr = undo_page + first_free; - - ut_ad(first_free <= UNIV_PAGE_SIZE); - - if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) { - - /* Not enough space for writing the general parameters */ - - return(0); - } - - /* Reserve 2 bytes for the pointer to the next undo log record */ - ptr += 2; - - /* Store first some general parameters to the undo log */ - *ptr++ = TRX_UNDO_INSERT_REC; - ptr += mach_ull_write_much_compressed(ptr, trx->undo_no); - ptr += mach_ull_write_much_compressed(ptr, index->table->id); - /*----------------------------------------*/ - /* Store then the fields required to uniquely determine the record - to be inserted in the clustered index */ - - for (i = 0; i < dict_index_get_n_unique(index); i++) { - - const dfield_t* field = dtuple_get_nth_field(clust_entry, i); - ulint flen = dfield_get_len(field); - - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - ptr += mach_write_compressed(ptr, flen); - - if (flen != UNIV_SQL_NULL) { - if (trx_undo_left(undo_page, ptr) < flen) { - - return(0); - } - - ut_memcpy(ptr, dfield_get_data(field), flen); - ptr += flen; - } - } - - return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr)); -} - -/**********************************************************************//** -Reads from an undo log record the general parameters. -@return remaining part of undo log record after reading these values */ -UNIV_INTERN -byte* -trx_undo_rec_get_pars( -/*==================*/ - trx_undo_rec_t* undo_rec, /*!< in: undo log record */ - ulint* type, /*!< out: undo record type: - TRX_UNDO_INSERT_REC, ... */ - ulint* cmpl_info, /*!< out: compiler info, relevant only - for update type records */ - bool* updated_extern, /*!< out: true if we updated an - externally stored fild */ - undo_no_t* undo_no, /*!< out: undo log record number */ - table_id_t* table_id) /*!< out: table id */ -{ - byte* ptr; - ulint type_cmpl; - - ptr = undo_rec + 2; - - type_cmpl = mach_read_from_1(ptr); - ptr++; - - *updated_extern = !!(type_cmpl & TRX_UNDO_UPD_EXTERN); - type_cmpl &= ~TRX_UNDO_UPD_EXTERN; - - *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1); - *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT; - - *undo_no = mach_ull_read_much_compressed(ptr); - ptr += mach_ull_get_much_compressed_size(*undo_no); - - *table_id = mach_ull_read_much_compressed(ptr); - ptr += mach_ull_get_much_compressed_size(*table_id); - - return(ptr); -} - -/**********************************************************************//** -Reads from an undo log record a stored column value. -@return remaining part of undo log record after reading these values */ -static -byte* -trx_undo_rec_get_col_val( -/*=====================*/ - byte* ptr, /*!< in: pointer to remaining part of undo log record */ - byte** field, /*!< out: pointer to stored field */ - ulint* len, /*!< out: length of the field, or UNIV_SQL_NULL */ - ulint* orig_len)/*!< out: original length of the locally - stored part of an externally stored column, or 0 */ -{ - *len = mach_read_compressed(ptr); - ptr += mach_get_compressed_size(*len); - - *orig_len = 0; - - switch (*len) { - case UNIV_SQL_NULL: - *field = NULL; - break; - case UNIV_EXTERN_STORAGE_FIELD: - *orig_len = mach_read_compressed(ptr); - ptr += mach_get_compressed_size(*orig_len); - *len = mach_read_compressed(ptr); - ptr += mach_get_compressed_size(*len); - *field = ptr; - ptr += *len; - - ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE); - ut_ad(*len > *orig_len); - /* @see dtuple_convert_big_rec() */ - ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE); - - /* we do not have access to index->table here - ut_ad(dict_table_get_format(index->table) >= UNIV_FORMAT_B - || *len >= col->max_prefix - + BTR_EXTERN_FIELD_REF_SIZE); - */ - - *len += UNIV_EXTERN_STORAGE_FIELD; - break; - default: - *field = ptr; - if (*len >= UNIV_EXTERN_STORAGE_FIELD) { - ptr += *len - UNIV_EXTERN_STORAGE_FIELD; - } else { - ptr += *len; - } - } - - return(ptr); -} - -/*******************************************************************//** -Builds a row reference from an undo log record. -@return pointer to remaining part of undo record */ -UNIV_INTERN -byte* -trx_undo_rec_get_row_ref( -/*=====================*/ - byte* ptr, /*!< in: remaining part of a copy of an undo log - record, at the start of the row reference; - NOTE that this copy of the undo log record must - be preserved as long as the row reference is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /*!< in: clustered index */ - dtuple_t** ref, /*!< out, own: row reference */ - mem_heap_t* heap) /*!< in: memory heap from which the memory - needed is allocated */ -{ - ulint ref_len; - ulint i; - - ut_ad(index && ptr && ref && heap); - ut_a(dict_index_is_clust(index)); - - ref_len = dict_index_get_n_unique(index); - - *ref = dtuple_create(heap, ref_len); - - dict_index_copy_types(*ref, index, ref_len); - - for (i = 0; i < ref_len; i++) { - dfield_t* dfield; - byte* field; - ulint len; - ulint orig_len; - - dfield = dtuple_get_nth_field(*ref, i); - - ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len); - - dfield_set_data(dfield, field, len); - } - - return(ptr); -} - -/*******************************************************************//** -Skips a row reference from an undo log record. -@return pointer to remaining part of undo record */ -UNIV_INTERN -byte* -trx_undo_rec_skip_row_ref( -/*======================*/ - byte* ptr, /*!< in: remaining part in update undo log - record, at the start of the row reference */ - dict_index_t* index) /*!< in: clustered index */ -{ - ulint ref_len; - ulint i; - - ut_ad(index && ptr); - ut_a(dict_index_is_clust(index)); - - ref_len = dict_index_get_n_unique(index); - - for (i = 0; i < ref_len; i++) { - byte* field; - ulint len; - ulint orig_len; - - ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len); - } - - return(ptr); -} - -/**********************************************************************//** -Fetch a prefix of an externally stored column, for writing to the undo log -of an update or delete marking of a clustered index record. -@return ext_buf */ -static -byte* -trx_undo_page_fetch_ext( -/*====================*/ - byte* ext_buf, /*!< in: buffer to hold the prefix - data and BLOB pointer */ - ulint prefix_len, /*!< in: prefix size to store - in the undo log */ - ulint zip_size, /*!< compressed page size in bytes, - or 0 for uncompressed BLOB */ - const byte* field, /*!< in: an externally stored column */ - ulint* len) /*!< in: length of field; - out: used length of ext_buf */ -{ - /* Fetch the BLOB. */ - ulint ext_len = btr_copy_externally_stored_field_prefix( - ext_buf, prefix_len, zip_size, field, *len, NULL); - /* BLOBs should always be nonempty. */ - ut_a(ext_len); - /* Append the BLOB pointer to the prefix. */ - memcpy(ext_buf + ext_len, - field + *len - BTR_EXTERN_FIELD_REF_SIZE, - BTR_EXTERN_FIELD_REF_SIZE); - *len = ext_len + BTR_EXTERN_FIELD_REF_SIZE; - return(ext_buf); -} - -/**********************************************************************//** -Writes to the undo log a prefix of an externally stored column. -@return undo log position */ -static -byte* -trx_undo_page_report_modify_ext( -/*============================*/ - byte* ptr, /*!< in: undo log position, - at least 15 bytes must be available */ - byte* ext_buf, /*!< in: a buffer of - DICT_MAX_FIELD_LEN_BY_FORMAT() size, - or NULL when should not fetch - a longer prefix */ - ulint prefix_len, /*!< prefix size to store in the - undo log */ - ulint zip_size, /*!< compressed page size in bytes, - or 0 for uncompressed BLOB */ - const byte** field, /*!< in/out: the locally stored part of - the externally stored column */ - ulint* len) /*!< in/out: length of field, in bytes */ -{ - if (ext_buf) { - ut_a(prefix_len > 0); - - /* If an ordering column is externally stored, we will - have to store a longer prefix of the field. In this - case, write to the log a marker followed by the - original length and the real length of the field. */ - ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD); - - ptr += mach_write_compressed(ptr, *len); - - *field = trx_undo_page_fetch_ext(ext_buf, prefix_len, zip_size, - *field, len); - - ptr += mach_write_compressed(ptr, *len); - } else { - ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD - + *len); - } - - return(ptr); -} - -/**********************************************************************//** -Reports in the undo log of an update or delete marking of a clustered index -record. -@return byte offset of the inserted undo log entry on the page if -succeed, 0 if fail */ -static -ulint -trx_undo_page_report_modify( -/*========================*/ - page_t* undo_page, /*!< in: undo log page */ - trx_t* trx, /*!< in: transaction */ - dict_index_t* index, /*!< in: clustered index where update or - delete marking is done */ - const rec_t* rec, /*!< in: clustered index record which - has NOT yet been modified */ - const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ - const upd_t* update, /*!< in: update vector which tells the - columns to be updated; in the case of - a delete, this should be set to NULL */ - ulint cmpl_info, /*!< in: compiler info on secondary - index updates */ - mtr_t* mtr) /*!< in: mtr */ -{ - dict_table_t* table; - ulint first_free; - byte* ptr; - const byte* field; - ulint flen; - ulint col_no; - ulint type_cmpl; - byte* type_cmpl_ptr; - ulint i; - trx_id_t trx_id; - ibool ignore_prefix = FALSE; - byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE]; - - ut_a(dict_index_is_clust(index)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); - table = index->table; - - first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - ptr = undo_page + first_free; - - ut_ad(first_free <= UNIV_PAGE_SIZE); - - if (trx_undo_left(undo_page, ptr) < 50) { - - /* NOTE: the value 50 must be big enough so that the general - fields written below fit on the undo log page */ - - return(0); - } - - /* Reserve 2 bytes for the pointer to the next undo log record */ - ptr += 2; - - /* Store first some general parameters to the undo log */ - - if (!update) { - ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(table))); - type_cmpl = TRX_UNDO_DEL_MARK_REC; - } else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) { - type_cmpl = TRX_UNDO_UPD_DEL_REC; - /* We are about to update a delete marked record. - We don't typically need the prefix in this case unless - the delete marking is done by the same transaction - (which we check below). */ - ignore_prefix = TRUE; - } else { - type_cmpl = TRX_UNDO_UPD_EXIST_REC; - } - - type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT; - type_cmpl_ptr = ptr; - - *ptr++ = (byte) type_cmpl; - ptr += mach_ull_write_much_compressed(ptr, trx->undo_no); - - ptr += mach_ull_write_much_compressed(ptr, table->id); - - /*----------------------------------------*/ - /* Store the state of the info bits */ - - *ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table)); - - /* Store the values of the system columns */ - field = rec_get_nth_field(rec, offsets, - dict_index_get_sys_col_pos( - index, DATA_TRX_ID), &flen); - ut_ad(flen == DATA_TRX_ID_LEN); - - trx_id = trx_read_trx_id(field); - - /* If it is an update of a delete marked record, then we are - allowed to ignore blob prefixes if the delete marking was done - by some other trx as it must have committed by now for us to - allow an over-write. */ - if (ignore_prefix) { - ignore_prefix = (trx_id != trx->id); - } - ptr += mach_ull_write_compressed(ptr, trx_id); - - field = rec_get_nth_field(rec, offsets, - dict_index_get_sys_col_pos( - index, DATA_ROLL_PTR), &flen); - ut_ad(flen == DATA_ROLL_PTR_LEN); - - ptr += mach_ull_write_compressed(ptr, trx_read_roll_ptr(field)); - - /*----------------------------------------*/ - /* Store then the fields required to uniquely determine the - record which will be modified in the clustered index */ - - for (i = 0; i < dict_index_get_n_unique(index); i++) { - - field = rec_get_nth_field(rec, offsets, i, &flen); - - /* The ordering columns must not be stored externally. */ - ut_ad(!rec_offs_nth_extern(offsets, i)); - ut_ad(dict_index_get_nth_col(index, i)->ord_part); - - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - ptr += mach_write_compressed(ptr, flen); - - if (flen != UNIV_SQL_NULL) { - if (trx_undo_left(undo_page, ptr) < flen) { - - return(0); - } - - ut_memcpy(ptr, field, flen); - ptr += flen; - } - } - - /*----------------------------------------*/ - /* Save to the undo log the old values of the columns to be updated. */ - - if (update) { - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - ptr += mach_write_compressed(ptr, upd_get_n_fields(update)); - - for (i = 0; i < upd_get_n_fields(update); i++) { - - ulint pos = upd_get_nth_field(update, i)->field_no; - - /* Write field number to undo log */ - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - ptr += mach_write_compressed(ptr, pos); - - /* Save the old value of field */ - field = rec_get_nth_field(rec, offsets, pos, &flen); - - if (trx_undo_left(undo_page, ptr) < 15) { - - return(0); - } - - if (rec_offs_nth_extern(offsets, pos)) { - const dict_col_t* col - = dict_index_get_nth_col(index, pos); - ulint prefix_len - = dict_max_field_len_store_undo( - table, col); - - ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE - <= sizeof ext_buf); - - ptr = trx_undo_page_report_modify_ext( - ptr, - col->ord_part - && !ignore_prefix - && flen < REC_ANTELOPE_MAX_INDEX_COL_LEN - ? ext_buf : NULL, prefix_len, - dict_table_zip_size(table), - &field, &flen); - - /* Notify purge that it eventually has to - free the old externally stored field */ - - trx->update_undo->del_marks = TRUE; - - *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN; - } else { - ptr += mach_write_compressed(ptr, flen); - } - - if (flen != UNIV_SQL_NULL) { - if (trx_undo_left(undo_page, ptr) < flen) { - - return(0); - } - - ut_memcpy(ptr, field, flen); - ptr += flen; - } - } - } - - /*----------------------------------------*/ - /* In the case of a delete marking, and also in the case of an update - where any ordering field of any index changes, store the values of all - columns which occur as ordering fields in any index. This info is used - in the purge of old versions where we use it to build and search the - delete marked index records, to look if we can remove them from the - index tree. Note that starting from 4.0.14 also externally stored - fields can be ordering in some index. Starting from 5.2, we no longer - store REC_MAX_INDEX_COL_LEN first bytes to the undo log record, - but we can construct the column prefix fields in the index by - fetching the first page of the BLOB that is pointed to by the - clustered index. This works also in crash recovery, because all pages - (including BLOBs) are recovered before anything is rolled back. */ - - if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - byte* old_ptr = ptr; - - trx->update_undo->del_marks = TRUE; - - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - /* Reserve 2 bytes to write the number of bytes the stored - fields take in this undo record */ - - ptr += 2; - - for (col_no = 0; col_no < dict_table_get_n_cols(table); - col_no++) { - - const dict_col_t* col - = dict_table_get_nth_col(table, col_no); - - if (col->ord_part) { - ulint pos; - - /* Write field number to undo log */ - if (trx_undo_left(undo_page, ptr) < 5 + 15) { - - return(0); - } - - pos = dict_index_get_nth_col_pos(index, - col_no, - NULL); - ptr += mach_write_compressed(ptr, pos); - - /* Save the old value of field */ - field = rec_get_nth_field(rec, offsets, pos, - &flen); - - if (rec_offs_nth_extern(offsets, pos)) { - const dict_col_t* col = - dict_index_get_nth_col( - index, pos); - ulint prefix_len = - dict_max_field_len_store_undo( - table, col); - - ut_a(prefix_len < sizeof ext_buf); - - ptr = trx_undo_page_report_modify_ext( - ptr, - flen < REC_ANTELOPE_MAX_INDEX_COL_LEN - && !ignore_prefix - ? ext_buf : NULL, prefix_len, - dict_table_zip_size(table), - &field, &flen); - } else { - ptr += mach_write_compressed( - ptr, flen); - } - - if (flen != UNIV_SQL_NULL) { - if (trx_undo_left(undo_page, ptr) - < flen) { - - return(0); - } - - ut_memcpy(ptr, field, flen); - ptr += flen; - } - } - } - - mach_write_to_2(old_ptr, ptr - old_ptr); - } - - /*----------------------------------------*/ - /* Write pointers to the previous and the next undo log records */ - if (trx_undo_left(undo_page, ptr) < 2) { - - return(0); - } - - mach_write_to_2(ptr, first_free); - ptr += 2; - mach_write_to_2(undo_page + first_free, ptr - undo_page); - - mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, - ptr - undo_page); - - /* Write to the REDO log about this change in the UNDO log */ - - trx_undof_page_add_undo_rec_log(undo_page, first_free, - ptr - undo_page, mtr); - return(first_free); -} - -/**********************************************************************//** -Reads from an undo log update record the system field values of the old -version. -@return remaining part of undo log record after reading these values */ -UNIV_INTERN -byte* -trx_undo_update_rec_get_sys_cols( -/*=============================*/ - byte* ptr, /*!< in: remaining part of undo - log record after reading - general parameters */ - trx_id_t* trx_id, /*!< out: trx id */ - roll_ptr_t* roll_ptr, /*!< out: roll ptr */ - ulint* info_bits) /*!< out: info bits state */ -{ - /* Read the state of the info bits */ - *info_bits = mach_read_from_1(ptr); - ptr += 1; - - /* Read the values of the system columns */ - - *trx_id = mach_ull_read_compressed(ptr); - ptr += mach_ull_get_compressed_size(*trx_id); - - *roll_ptr = mach_ull_read_compressed(ptr); - ptr += mach_ull_get_compressed_size(*roll_ptr); - - return(ptr); -} - -/**********************************************************************//** -Reads from an update undo log record the number of updated fields. -@return remaining part of undo log record after reading this value */ -UNIV_INLINE -byte* -trx_undo_update_rec_get_n_upd_fields( -/*=================================*/ - byte* ptr, /*!< in: pointer to remaining part of undo log record */ - ulint* n) /*!< out: number of fields */ -{ - *n = mach_read_compressed(ptr); - ptr += mach_get_compressed_size(*n); - - return(ptr); -} - -/**********************************************************************//** -Reads from an update undo log record a stored field number. -@return remaining part of undo log record after reading this value */ -UNIV_INLINE -byte* -trx_undo_update_rec_get_field_no( -/*=============================*/ - byte* ptr, /*!< in: pointer to remaining part of undo log record */ - ulint* field_no)/*!< out: field number */ -{ - *field_no = mach_read_compressed(ptr); - ptr += mach_get_compressed_size(*field_no); - - return(ptr); -} - -/*******************************************************************//** -Builds an update vector based on a remaining part of an undo log record. -@return remaining part of the record, NULL if an error detected, which -means that the record is corrupted */ -UNIV_INTERN -byte* -trx_undo_update_rec_get_update( -/*===========================*/ - byte* ptr, /*!< in: remaining part in update undo log - record, after reading the row reference - NOTE that this copy of the undo log record must - be preserved as long as the update vector is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /*!< in: clustered index */ - ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC, - TRX_UNDO_UPD_DEL_REC, or - TRX_UNDO_DEL_MARK_REC; in the last case, - only trx id and roll ptr fields are added to - the update vector */ - trx_id_t trx_id, /*!< in: transaction id from this undo record */ - roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */ - ulint info_bits,/*!< in: info bits from this undo record */ - trx_t* trx, /*!< in: transaction */ - mem_heap_t* heap, /*!< in: memory heap from which the memory - needed is allocated */ - upd_t** upd) /*!< out, own: update vector */ -{ - upd_field_t* upd_field; - upd_t* update; - ulint n_fields; - byte* buf; - ulint i; - - ut_a(dict_index_is_clust(index)); - - if (type != TRX_UNDO_DEL_MARK_REC) { - ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields); - } else { - n_fields = 0; - } - - update = upd_create(n_fields + 2, heap); - - update->info_bits = info_bits; - - /* Store first trx id and roll ptr to update vector */ - - upd_field = upd_get_nth_field(update, n_fields); - - buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_TRX_ID_LEN)); - - trx_write_trx_id(buf, trx_id); - - upd_field_set_field_no(upd_field, - dict_index_get_sys_col_pos(index, DATA_TRX_ID), - index, trx); - dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN); - - upd_field = upd_get_nth_field(update, n_fields + 1); - - buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_ROLL_PTR_LEN)); - - trx_write_roll_ptr(buf, roll_ptr); - - upd_field_set_field_no( - upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR), - index, trx); - dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN); - - /* Store then the updated ordinary columns to the update vector */ - - for (i = 0; i < n_fields; i++) { - - byte* field; - ulint len; - ulint field_no; - ulint orig_len; - - ptr = trx_undo_update_rec_get_field_no(ptr, &field_no); - - if (field_no >= dict_index_get_n_fields(index)) { - fprintf(stderr, - "InnoDB: Error: trying to access" - " update undo rec field %lu in ", - (ulong) field_no); - dict_index_name_print(stderr, trx, index); - fprintf(stderr, "\n" - "InnoDB: but index has only %lu fields\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n" - "InnoDB: Run also CHECK TABLE ", - (ulong) dict_index_get_n_fields(index)); - ut_print_name(stderr, trx, TRUE, index->table_name); - fprintf(stderr, "\n" - "InnoDB: n_fields = %lu, i = %lu, ptr %p\n", - (ulong) n_fields, (ulong) i, ptr); - ut_ad(0); - *upd = NULL; - return(NULL); - } - - upd_field = upd_get_nth_field(update, i); - - upd_field_set_field_no(upd_field, field_no, index, trx); - - ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len); - - upd_field->orig_len = orig_len; - - if (len == UNIV_SQL_NULL) { - dfield_set_null(&upd_field->new_val); - } else if (len < UNIV_EXTERN_STORAGE_FIELD) { - dfield_set_data(&upd_field->new_val, field, len); - } else { - len -= UNIV_EXTERN_STORAGE_FIELD; - - dfield_set_data(&upd_field->new_val, field, len); - dfield_set_ext(&upd_field->new_val); - } - } - - *upd = update; - - return(ptr); -} - -/*******************************************************************//** -Builds a partial row from an update undo log record, for purge. -It contains the columns which occur as ordering in any index of the table. -Any missing columns are indicated by col->mtype == DATA_MISSING. -@return pointer to remaining part of undo record */ -UNIV_INTERN -byte* -trx_undo_rec_get_partial_row( -/*=========================*/ - byte* ptr, /*!< in: remaining part in update undo log - record of a suitable type, at the start of - the stored index columns; - NOTE that this copy of the undo log record must - be preserved as long as the partial row is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /*!< in: clustered index */ - dtuple_t** row, /*!< out, own: partial row */ - ibool ignore_prefix, /*!< in: flag to indicate if we - expect blob prefixes in undo. Used - only in the assertion. */ - mem_heap_t* heap) /*!< in: memory heap from which the memory - needed is allocated */ -{ - const byte* end_ptr; - ulint row_len; - - ut_ad(index); - ut_ad(ptr); - ut_ad(row); - ut_ad(heap); - ut_ad(dict_index_is_clust(index)); - - row_len = dict_table_get_n_cols(index->table); - - *row = dtuple_create(heap, row_len); - - /* Mark all columns in the row uninitialized, so that - we can distinguish missing fields from fields that are SQL NULL. */ - for (ulint i = 0; i < row_len; i++) { - dfield_get_type(dtuple_get_nth_field(*row, i)) - ->mtype = DATA_MISSING; - } - - end_ptr = ptr + mach_read_from_2(ptr); - ptr += 2; - - while (ptr != end_ptr) { - dfield_t* dfield; - byte* field; - ulint field_no; - const dict_col_t* col; - ulint col_no; - ulint len; - ulint orig_len; - - ptr = trx_undo_update_rec_get_field_no(ptr, &field_no); - - col = dict_index_get_nth_col(index, field_no); - col_no = dict_col_get_no(col); - - ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len); - - dfield = dtuple_get_nth_field(*row, col_no); - dict_col_copy_type( - dict_table_get_nth_col(index->table, col_no), - dfield_get_type(dfield)); - dfield_set_data(dfield, field, len); - - if (len != UNIV_SQL_NULL - && len >= UNIV_EXTERN_STORAGE_FIELD) { - dfield_set_len(dfield, - len - UNIV_EXTERN_STORAGE_FIELD); - dfield_set_ext(dfield); - /* If the prefix of this column is indexed, - ensure that enough prefix is stored in the - undo log record. */ - if (!ignore_prefix && col->ord_part) { - ut_a(dfield_get_len(dfield) - >= BTR_EXTERN_FIELD_REF_SIZE); - ut_a(dict_table_get_format(index->table) - >= UNIV_FORMAT_B - || dfield_get_len(dfield) - >= REC_ANTELOPE_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE); - } - } - } - - return(ptr); -} -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************************//** -Erases the unused undo log page end. -@return TRUE if the page contained something, FALSE if it was empty */ -static MY_ATTRIBUTE((nonnull)) -ibool -trx_undo_erase_page_end( -/*====================*/ - page_t* undo_page, /*!< in/out: undo page whose end to erase */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - ulint first_free; - - first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - memset(undo_page + first_free, 0xff, - (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free); - - mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr); - return(first_free != TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); -} - -/***********************************************************//** -Parses a redo log record of erasing of an undo page end. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_erase_page_end( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ut_ad(ptr && end_ptr); - - if (page == NULL) { - - return(ptr); - } - - trx_undo_erase_page_end(page, mtr); - - return(ptr); -} - -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Writes information to an undo log about an insert, update, or a delete marking -of a clustered index record. This information is used in a rollback of the -transaction and in consistent reads that must look to the history of this -transaction. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -trx_undo_report_row_operation( -/*==========================*/ - que_thr_t* thr, /*!< in: query thread */ - dict_index_t* index, /*!< in: clustered index */ - const dtuple_t* clust_entry, /*!< in: in the case of an insert, - index entry to insert into the - clustered index, otherwise NULL */ - const upd_t* update, /*!< in: in the case of an update, - the update vector, otherwise NULL */ - ulint cmpl_info, /*!< in: compiler info on secondary - index updates */ - const rec_t* rec, /*!< in: in case of an update or delete - marking, the record in the clustered - index, otherwise NULL */ - const ulint* offsets, /*!< in: rec_get_offsets(rec) */ - roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the - inserted undo log record, - 0 if BTR_NO_UNDO_LOG - flag was specified */ -{ - trx_t* trx; - trx_undo_t* undo; - ulint page_no; - buf_block_t* undo_block; - trx_rseg_t* rseg; - mtr_t mtr; - dberr_t err = DB_SUCCESS; -#ifdef UNIV_DEBUG - int loop_count = 0; -#endif /* UNIV_DEBUG */ - - ut_ad(!srv_read_only_mode); - ut_a(dict_index_is_clust(index)); - ut_ad(!rec || rec_offs_validate(rec, index, offsets)); - - ut_ad(thr); - ut_ad(!clust_entry || (!update && !rec)); - - trx = thr_get_trx(thr); - - /* This table is visible only to the session that created it. */ - if (trx->read_only) { - ut_ad(!srv_read_only_mode); - /* MySQL should block writes to non-temporary tables. */ - ut_a(DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_TEMPORARY)); - if (trx->rseg == 0) { - trx_assign_rseg(trx); - } - } - - rseg = trx->rseg; - - mtr_start_trx(&mtr, trx); - mutex_enter(&trx->undo_mutex); - - /* If the undo log is not assigned yet, assign one */ - - if (clust_entry) { - undo = trx->insert_undo; - - if (undo == NULL) { - - err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT); - undo = trx->insert_undo; - - if (undo == NULL) { - /* Did not succeed */ - ut_ad(err != DB_SUCCESS); - goto err_exit; - } - - ut_ad(err == DB_SUCCESS); - } - } else { - undo = trx->update_undo; - - if (undo == NULL) { - err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE); - undo = trx->update_undo; - - if (undo == NULL) { - /* Did not succeed */ - ut_ad(err != DB_SUCCESS); - goto err_exit; - } - } - - ut_ad(err == DB_SUCCESS); - } - - page_no = undo->last_page_no; - undo_block = buf_page_get_gen( - undo->space, undo->zip_size, page_no, RW_X_LATCH, - undo->guess_block, BUF_GET, __FILE__, __LINE__, &mtr); - buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE); - - do { - ut_ad(page_no == buf_block_get_page_no(undo_block)); - - page_t* undo_page = buf_block_get_frame(undo_block); - ulint offset = clust_entry - ? trx_undo_page_report_insert( - undo_page, trx, index, clust_entry, &mtr) - : trx_undo_page_report_modify( - undo_page, trx, index, rec, offsets, update, - cmpl_info, &mtr); - - if (UNIV_UNLIKELY(offset == 0)) { - /* The record did not fit on the page. We erase the - end segment of the undo log page and write a log - record of it: this is to ensure that in the debug - version the replicate page constructed using the log - records stays identical to the original page */ - - if (!trx_undo_erase_page_end(undo_page, &mtr)) { - /* The record did not fit on an empty - undo page. Discard the freshly allocated - page and return an error. */ - - /* When we remove a page from an undo - log, this is analogous to a - pessimistic insert in a B-tree, and we - must reserve the counterpart of the - tree latch, which is the rseg - mutex. We must commit the mini-transaction - first, because it may be holding lower-level - latches, such as SYNC_FSP and SYNC_FSP_PAGE. */ - - mtr_commit(&mtr); - mtr_start_trx(&mtr, trx); - - mutex_enter(&rseg->mutex); - trx_undo_free_last_page(trx, undo, &mtr); - mutex_exit(&rseg->mutex); - - err = DB_UNDO_RECORD_TOO_BIG; - goto err_exit; - } - - mtr_commit(&mtr); - } else { - /* Success */ - - mtr_commit(&mtr); - - undo->empty = FALSE; - undo->top_page_no = page_no; - undo->top_offset = offset; - undo->top_undo_no = trx->undo_no; - undo->guess_block = undo_block; - - trx->undo_no++; - - mutex_exit(&trx->undo_mutex); - - *roll_ptr = trx_undo_build_roll_ptr( - clust_entry != NULL, - rseg->id, page_no, offset); - return(DB_SUCCESS); - } - - ut_ad(page_no == undo->last_page_no); - - /* We have to extend the undo log by one page */ - - ut_ad(++loop_count < 2); - mtr_start_trx(&mtr, trx); - - /* When we add a page to an undo log, this is analogous to - a pessimistic insert in a B-tree, and we must reserve the - counterpart of the tree latch, which is the rseg mutex. */ - - mutex_enter(&rseg->mutex); - undo_block = trx_undo_add_page(trx, undo, &mtr); - mutex_exit(&rseg->mutex); - - page_no = undo->last_page_no; - } while (undo_block != NULL); - - /* Did not succeed: out of space */ - err = DB_OUT_OF_FILE_SPACE; - -err_exit: - mutex_exit(&trx->undo_mutex); - mtr_commit(&mtr); - return(err); -} - -/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/ - -/******************************************************************//** -Copies an undo record to heap. This function can be called if we know that -the undo log record exists. -@return own: copy of the record */ -UNIV_INTERN -trx_undo_rec_t* -trx_undo_get_undo_rec_low( -/*======================*/ - roll_ptr_t roll_ptr, /*!< in: roll pointer to record */ - mem_heap_t* heap) /*!< in: memory heap where copied */ -{ - trx_undo_rec_t* undo_rec; - ulint rseg_id; - ulint page_no; - ulint offset; - const page_t* undo_page; - trx_rseg_t* rseg; - ibool is_insert; - mtr_t mtr; - - trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no, - &offset); - rseg = trx_rseg_get_on_id(rseg_id); - - mtr_start(&mtr); - - undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size, - page_no, &mtr); - - undo_rec = trx_undo_rec_copy(undo_page + offset, heap); - - mtr_commit(&mtr); - - return(undo_rec); -} - -/******************************************************************//** -Copies an undo record to heap. - -NOTE: the caller must have latches on the clustered index page. - -@retval true if the undo log has been -truncated and we cannot fetch the old version -@retval false if the undo log record is available */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -trx_undo_get_undo_rec( -/*==================*/ - roll_ptr_t roll_ptr, /*!< in: roll pointer to record */ - trx_id_t trx_id, /*!< in: id of the trx that generated - the roll pointer: it points to an - undo log of this transaction */ - trx_undo_rec_t**undo_rec, /*!< out, own: copy of the record */ - mem_heap_t* heap) /*!< in: memory heap where copied */ -{ - bool missing_history; - - rw_lock_s_lock(&purge_sys->latch); - missing_history = read_view_sees_trx_id(purge_sys->view, trx_id); - - if (!missing_history) { - *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap); - } - - rw_lock_s_unlock(&purge_sys->latch); - - return(missing_history); -} - -#ifdef UNIV_DEBUG -#define ATTRIB_USED_ONLY_IN_DEBUG -#else /* UNIV_DEBUG */ -#define ATTRIB_USED_ONLY_IN_DEBUG MY_ATTRIBUTE((unused)) -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -Build a previous version of a clustered index record. The caller must -hold a latch on the index page of the clustered index record. -@retval true if previous version was built, or if it was an insert -or the table has been rebuilt -@retval false if the previous version is earlier than purge_view, -which means that it may have been removed */ -UNIV_INTERN -bool -trx_undo_prev_version_build( -/*========================*/ - const rec_t* index_rec ATTRIB_USED_ONLY_IN_DEBUG, - /*!< in: clustered index record in the - index tree */ - mtr_t* index_mtr ATTRIB_USED_ONLY_IN_DEBUG, - /*!< in: mtr which contains the latch to - index_rec page and purge_view */ - const rec_t* rec, /*!< in: version of a clustered index record */ - dict_index_t* index, /*!< in: clustered index */ - ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mem_heap_t* heap, /*!< in: memory heap from which the memory - needed is allocated */ - rec_t** old_vers)/*!< out, own: previous version, or NULL if - rec is the first inserted version, or if - history data has been deleted (an error), - or if the purge COULD have removed the version - though it has not yet done so */ -{ - trx_undo_rec_t* undo_rec = NULL; - dtuple_t* entry; - trx_id_t rec_trx_id; - ulint type; - undo_no_t undo_no; - table_id_t table_id; - trx_id_t trx_id; - roll_ptr_t roll_ptr; - upd_t* update; - byte* ptr; - ulint info_bits; - ulint cmpl_info; - bool dummy_extern; - byte* buf; -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains_page(index_mtr, index_rec, - MTR_MEMO_PAGE_X_FIX)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_a(dict_index_is_clust(index)); - - roll_ptr = row_get_rec_roll_ptr(rec, index, offsets); - - *old_vers = NULL; - - if (trx_undo_roll_ptr_is_insert(roll_ptr)) { - /* The record rec is the first inserted version */ - return(true); - } - - rec_trx_id = row_get_rec_trx_id(rec, index, offsets); - - if (trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap)) { - /* The undo record may already have been purged, - during purge or semi-consistent read. */ - return(false); - } - - ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, - &dummy_extern, &undo_no, &table_id); - - if (table_id != index->table->id) { - /* The table should have been rebuilt, but purge has - not yet removed the undo log records for the - now-dropped old table (table_id). */ - return(true); - } - - ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); - - /* (a) If a clustered index record version is such that the - trx id stamp in it is bigger than purge_sys->view, then the - BLOBs in that version are known to exist (the purge has not - progressed that far); - - (b) if the version is the first version such that trx id in it - is less than purge_sys->view, and it is not delete-marked, - then the BLOBs in that version are known to exist (the purge - cannot have purged the BLOBs referenced by that version - yet). - - This function does not fetch any BLOBs. The callers might, by - possibly invoking row_ext_create() via row_build(). However, - they should have all needed information in the *old_vers - returned by this function. This is because *old_vers is based - on the transaction undo log records. The function - trx_undo_page_fetch_ext() will write BLOB prefixes to the - transaction undo log that are at least as long as the longest - possible column prefix in a secondary index. Thus, secondary - index entries for *old_vers can be constructed without - dereferencing any BLOB pointers. */ - - ptr = trx_undo_rec_skip_row_ref(ptr, index); - - ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id, - roll_ptr, info_bits, - NULL, heap, &update); - ut_a(ptr); - -# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern(rec, offsets)); -# endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - - if (row_upd_changes_field_size_or_external(index, offsets, update)) { - ulint n_ext; - - /* We should confirm the existence of disowned external data, - if the previous version record is delete marked. If the trx_id - of the previous record is seen by purge view, we should treat - it as missing history, because the disowned external data - might be purged already. - - The inherited external data (BLOBs) can be freed (purged) - after trx_id was committed, provided that no view was started - before trx_id. If the purge view can see the committed - delete-marked record by trx_id, no transactions need to access - the BLOB. */ - - /* the row_upd_changes_disowned_external(update) call could be - omitted, but the synchronization on purge_sys->latch is likely - more expensive. */ - - if ((update->info_bits & REC_INFO_DELETED_FLAG) - && row_upd_changes_disowned_external(update)) { - bool missing_extern; - - rw_lock_s_lock(&purge_sys->latch); - missing_extern = read_view_sees_trx_id(purge_sys->view, - trx_id); - rw_lock_s_unlock(&purge_sys->latch); - - if (missing_extern) { - /* treat as a fresh insert, not to - cause assertion error at the caller. */ - return(true); - } - } - - /* We have to set the appropriate extern storage bits in the - old version of the record: the extern bits in rec for those - fields that update does NOT update, as well as the bits for - those fields that update updates to become externally stored - fields. Store the info: */ - - entry = row_rec_to_index_entry( - rec, index, offsets, &n_ext, heap); - n_ext += btr_push_update_extern_fields(entry, update, heap); - /* The page containing the clustered index record - corresponding to entry is latched in mtr. Thus the - following call is safe. */ - row_upd_index_replace_new_col_vals(entry, index, update, heap); - - buf = static_cast<byte*>( - mem_heap_alloc( - heap, - rec_get_converted_size(index, entry, n_ext))); - - *old_vers = rec_convert_dtuple_to_rec(buf, index, - entry, n_ext); - } else { - buf = static_cast<byte*>( - mem_heap_alloc(heap, rec_offs_size(offsets))); - - *old_vers = rec_copy(buf, rec, offsets); - rec_offs_make_valid(*old_vers, index, offsets); - row_upd_rec_in_place(*old_vers, index, offsets, update, NULL); - } - - return(true); -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/trx/trx0roll.cc b/storage/xtradb/trx/trx0roll.cc deleted file mode 100644 index 335ef8859c4..00000000000 --- a/storage/xtradb/trx/trx0roll.cc +++ /dev/null @@ -1,1417 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file trx/trx0roll.cc -Transaction rollback - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0roll.h" - -#ifdef UNIV_NONINL -#include "trx0roll.ic" -#endif - -#include <mysql/service_wsrep.h> - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0undo.h" -#include "trx0rec.h" -#include "que0que.h" -#include "usr0sess.h" -#include "srv0start.h" -#include "read0read.h" -#include "row0undo.h" -#include "row0mysql.h" -#include "lock0lock.h" -#include "pars0pars.h" -#include "srv0mon.h" -#include "trx0sys.h" -#ifdef WITH_WSREP -#include "ha_prototypes.h" -#endif /* WITH_WSREP */ - -/** This many pages must be undone before a truncate is tried within -rollback */ -#define TRX_ROLL_TRUNC_THRESHOLD 1 - -/** true if trx_rollback_or_clean_all_recovered() thread is active */ -bool trx_rollback_or_clean_is_active; - -/** In crash recovery, the current trx to be rolled back; NULL otherwise */ -static const trx_t* trx_roll_crash_recv_trx = NULL; - -/** In crash recovery we set this to the undo n:o of the current trx to be -rolled back. Then we can print how many % the rollback has progressed. */ -static undo_no_t trx_roll_max_undo_no; - -/** Auxiliary variable which tells the previous progress % we printed */ -static ulint trx_roll_progress_printed_pct; - -/****************************************************************//** -Finishes a transaction rollback. */ -static -void -trx_rollback_finish( -/*================*/ - trx_t* trx); /*!< in: transaction */ - -/*******************************************************************//** -Rollback a transaction used in MySQL. */ -static -void -trx_rollback_to_savepoint_low( -/*==========================*/ - trx_t* trx, /*!< in: transaction handle */ - trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if - partial rollback requested, or NULL for - complete rollback */ -{ - que_thr_t* thr; - mem_heap_t* heap; - roll_node_t* roll_node; - - heap = mem_heap_create(512); - - roll_node = roll_node_create(heap); - - if (savept != NULL) { - roll_node->partial = TRUE; - roll_node->savept = *savept; - assert_trx_in_list(trx); - } else { - assert_trx_nonlocking_or_in_list(trx); - } - - trx->error_state = DB_SUCCESS; - - if (trx->insert_undo || trx->update_undo) { - thr = pars_complete_graph_for_exec(roll_node, trx, heap); - - ut_a(thr == que_fork_start_command( - static_cast<que_fork_t*>(que_node_get_parent(thr)))); - - que_run_threads(thr); - - ut_a(roll_node->undo_thr != NULL); - que_run_threads(roll_node->undo_thr); - - /* Free the memory reserved by the undo graph. */ - que_graph_free(static_cast<que_t*>( - roll_node->undo_thr->common.parent)); - } - - if (savept == NULL) { - trx_rollback_finish(trx); - MONITOR_INC(MONITOR_TRX_ROLLBACK); - } else { - trx->lock.que_state = TRX_QUE_RUNNING; - MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT); - } - - ut_a(trx->error_state == DB_SUCCESS); - ut_a(trx->lock.que_state == TRX_QUE_RUNNING); - - mem_heap_free(heap); - - /* There might be work for utility threads.*/ - srv_active_wake_master_thread(); - - MONITOR_DEC(MONITOR_TRX_ACTIVE); -} - -/*******************************************************************//** -Rollback a transaction to a given savepoint or do a complete rollback. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_rollback_to_savepoint( -/*======================*/ - trx_t* trx, /*!< in: transaction handle */ - trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if - partial rollback requested, or NULL for - complete rollback */ -{ - ut_ad(!trx_mutex_own(trx)); - - trx_start_if_not_started_xa(trx); - - trx_rollback_to_savepoint_low(trx, savept); - - return(trx->error_state); -} - -/*******************************************************************//** -Rollback a transaction used in MySQL. -@return error code or DB_SUCCESS */ -static -dberr_t -trx_rollback_for_mysql_low( -/*=======================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - trx->op_info = "rollback"; - - /* If we are doing the XA recovery of prepared transactions, - then the transaction object does not have an InnoDB session - object, and we set a dummy session that we use for all MySQL - transactions. */ - - trx_rollback_to_savepoint_low(trx, NULL); - - trx->op_info = ""; - - ut_a(trx->error_state == DB_SUCCESS); - - return(trx->error_state); -} - -/*******************************************************************//** -Rollback a transaction used in MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_rollback_for_mysql( -/*===================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - /* We are reading trx->state without holding trx_sys->mutex - here, because the rollback should be invoked for a running - active MySQL transaction (or recovered prepared transaction) - that is associated with the current thread. */ - - switch (trx->state) { - case TRX_STATE_NOT_STARTED: - ut_ad(trx->in_mysql_trx_list); - return(DB_SUCCESS); - - case TRX_STATE_ACTIVE: - ut_ad(trx->in_mysql_trx_list); - assert_trx_nonlocking_or_in_list(trx); - return(trx_rollback_for_mysql_low(trx)); - - case TRX_STATE_PREPARED: - ut_ad(!trx_is_autocommit_non_locking(trx)); - return(trx_rollback_for_mysql_low(trx)); - - case TRX_STATE_COMMITTED_IN_MEMORY: - assert_trx_in_list(trx); - break; - } - - ut_error; - return(DB_CORRUPTION); -} - -/*******************************************************************//** -Rollback the latest SQL statement for MySQL. -@return error code or DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_rollback_last_sql_stat_for_mysql( -/*=================================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - dberr_t err; - - /* We are reading trx->state without holding trx_sys->mutex - here, because the statement rollback should be invoked for a - running active MySQL transaction that is associated with the - current thread. */ - ut_ad(trx->in_mysql_trx_list); - - switch (trx->state) { - case TRX_STATE_NOT_STARTED: - return(DB_SUCCESS); - case TRX_STATE_ACTIVE: - assert_trx_nonlocking_or_in_list(trx); - - trx->op_info = "rollback of SQL statement"; - - err = trx_rollback_to_savepoint( - trx, &trx->last_sql_stat_start); - - if (trx->fts_trx) { - fts_savepoint_rollback_last_stmt(trx); - } - - /* The following call should not be needed, - but we play it safe: */ - trx_mark_sql_stat_end(trx); - - trx->op_info = ""; - - return(err); - case TRX_STATE_PREPARED: - case TRX_STATE_COMMITTED_IN_MEMORY: - /* The statement rollback is only allowed on an ACTIVE - transaction, not a PREPARED or COMMITTED one. */ - break; - } - - ut_error; - return(DB_CORRUPTION); -} - -/*******************************************************************//** -Search for a savepoint using name. -@return savepoint if found else NULL */ -static -trx_named_savept_t* -trx_savepoint_find( -/*===============*/ - trx_t* trx, /*!< in: transaction */ - const char* name) /*!< in: savepoint name */ -{ - trx_named_savept_t* savep; - - for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints); - savep != NULL; - savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) { - - if (0 == ut_strcmp(savep->name, name)) { - return(savep); - } - } - - return(NULL); -} - -/*******************************************************************//** -Frees a single savepoint struct. */ -static -void -trx_roll_savepoint_free( -/*=====================*/ - trx_t* trx, /*!< in: transaction handle */ - trx_named_savept_t* savep) /*!< in: savepoint to free */ -{ - UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); - mem_free(savep->name); - mem_free(savep); -} - -/*******************************************************************//** -Frees savepoint structs starting from savep. */ -UNIV_INTERN -void -trx_roll_savepoints_free( -/*=====================*/ - trx_t* trx, /*!< in: transaction handle */ - trx_named_savept_t* savep) /*!< in: free all savepoints starting - with this savepoint i*/ -{ - while (savep != NULL) { - trx_named_savept_t* next_savep; - - next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep); - - trx_roll_savepoint_free(trx, savep); - - savep = next_savep; - } -} - -/*******************************************************************//** -Rolls back a transaction back to a named savepoint. Modifications after the -savepoint are undone but InnoDB does NOT release the corresponding locks -which are stored in memory. If a lock is 'implicit', that is, a new inserted -row holds a lock where the lock information is carried by the trx id stored in -the row, these locks are naturally released in the rollback. Savepoints which -were set after this savepoint are deleted. -@return if no savepoint of the name found then DB_NO_SAVEPOINT, -otherwise DB_SUCCESS */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -trx_rollback_to_savepoint_for_mysql_low( -/*====================================*/ - trx_t* trx, /*!< in/out: transaction */ - trx_named_savept_t* savep, /*!< in/out: savepoint */ - ib_int64_t* mysql_binlog_cache_pos) - /*!< out: the MySQL binlog - cache position corresponding - to this savepoint; MySQL needs - this information to remove the - binlog entries of the queries - executed after the savepoint */ -{ - dberr_t err; - - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); - ut_ad(trx->in_mysql_trx_list); - - /* Free all savepoints strictly later than savep. */ - - trx_roll_savepoints_free( - trx, UT_LIST_GET_NEXT(trx_savepoints, savep)); - - *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos; - - trx->op_info = "rollback to a savepoint"; - - err = trx_rollback_to_savepoint(trx, &savep->savept); - - /* Store the current undo_no of the transaction so that - we know where to roll back if we have to roll back the - next SQL statement: */ - - trx_mark_sql_stat_end(trx); - - trx->op_info = ""; - -#ifdef WITH_WSREP - if (wsrep_on(trx->mysql_thd) && - trx->lock.was_chosen_as_deadlock_victim) { - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } -#endif - - return(err); -} - -/*******************************************************************//** -Rolls back a transaction back to a named savepoint. Modifications after the -savepoint are undone but InnoDB does NOT release the corresponding locks -which are stored in memory. If a lock is 'implicit', that is, a new inserted -row holds a lock where the lock information is carried by the trx id stored in -the row, these locks are naturally released in the rollback. Savepoints which -were set after this savepoint are deleted. -@return if no savepoint of the name found then DB_NO_SAVEPOINT, -otherwise DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_rollback_to_savepoint_for_mysql( -/*================================*/ - trx_t* trx, /*!< in: transaction handle */ - const char* savepoint_name, /*!< in: savepoint name */ - ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache - position corresponding to this - savepoint; MySQL needs this - information to remove the - binlog entries of the queries - executed after the savepoint */ -{ - trx_named_savept_t* savep; - - /* We are reading trx->state without holding trx_sys->mutex - here, because the savepoint rollback should be invoked for a - running active MySQL transaction that is associated with the - current thread. */ - ut_ad(trx->in_mysql_trx_list); - - savep = trx_savepoint_find(trx, savepoint_name); - - if (savep == NULL) { - return(DB_NO_SAVEPOINT); - } - - switch (trx->state) { - case TRX_STATE_NOT_STARTED: - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: transaction has a savepoint ", stderr); - ut_print_name(stderr, trx, FALSE, savep->name); - fputs(" though it is not started\n", stderr); - return(DB_ERROR); - case TRX_STATE_ACTIVE: - return(trx_rollback_to_savepoint_for_mysql_low( - trx, savep, mysql_binlog_cache_pos)); - case TRX_STATE_PREPARED: - case TRX_STATE_COMMITTED_IN_MEMORY: - /* The savepoint rollback is only allowed on an ACTIVE - transaction, not a PREPARED or COMMITTED one. */ - break; - } - - ut_error; - return(DB_CORRUPTION); -} - -/*******************************************************************//** -Creates a named savepoint. If the transaction is not yet started, starts it. -If there is already a savepoint of the same name, this call erases that old -savepoint and replaces it with a new. Savepoints are deleted in a transaction -commit or rollback. -@return always DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_savepoint_for_mysql( -/*====================*/ - trx_t* trx, /*!< in: transaction handle */ - const char* savepoint_name, /*!< in: savepoint name */ - ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache - position corresponding to this - connection at the time of the - savepoint */ -{ - trx_named_savept_t* savep; - - trx_start_if_not_started_xa(trx); - - savep = trx_savepoint_find(trx, savepoint_name); - - if (savep) { - /* There is a savepoint with the same name: free that */ - - UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); - - mem_free(savep->name); - mem_free(savep); - } - - /* Create a new savepoint and add it as the last in the list */ - - savep = static_cast<trx_named_savept_t*>(mem_alloc(sizeof(*savep))); - - savep->name = mem_strdup(savepoint_name); - - savep->savept = trx_savept_take(trx); - - savep->mysql_binlog_cache_pos = binlog_cache_pos; - - UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep); - - return(DB_SUCCESS); -} - -/*******************************************************************//** -Releases only the named savepoint. Savepoints which were set after this -savepoint are left as is. -@return if no savepoint of the name found then DB_NO_SAVEPOINT, -otherwise DB_SUCCESS */ -UNIV_INTERN -dberr_t -trx_release_savepoint_for_mysql( -/*============================*/ - trx_t* trx, /*!< in: transaction handle */ - const char* savepoint_name) /*!< in: savepoint name */ -{ - trx_named_savept_t* savep; - - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE, true) - || trx_state_eq(trx, TRX_STATE_PREPARED, true)); - ut_ad(trx->in_mysql_trx_list); - - savep = trx_savepoint_find(trx, savepoint_name); - - if (savep != NULL) { - trx_roll_savepoint_free(trx, savep); - } - - return(savep != NULL ? DB_SUCCESS : DB_NO_SAVEPOINT); -} - -/*******************************************************************//** -Determines if this transaction is rolling back an incomplete transaction -in crash recovery. -@return TRUE if trx is an incomplete transaction that is being rolled -back in crash recovery */ -UNIV_INTERN -ibool -trx_is_recv( -/*========*/ - const trx_t* trx) /*!< in: transaction */ -{ - return(trx == trx_roll_crash_recv_trx); -} - -/*******************************************************************//** -Returns a transaction savepoint taken at this point in time. -@return savepoint */ -UNIV_INTERN -trx_savept_t -trx_savept_take( -/*============*/ - trx_t* trx) /*!< in: transaction */ -{ - trx_savept_t savept; - - savept.least_undo_no = trx->undo_no; - - return(savept); -} - -/*******************************************************************//** -Roll back an active transaction. */ -static -void -trx_rollback_active( -/*================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - mem_heap_t* heap; - que_fork_t* fork; - que_thr_t* thr; - roll_node_t* roll_node; - dict_table_t* table; - ib_int64_t rows_to_undo; - const char* unit = ""; - ibool dictionary_locked = FALSE; - - heap = mem_heap_create(512); - - fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap); - fork->trx = trx; - - thr = que_thr_create(fork, heap); - - roll_node = roll_node_create(heap); - - thr->child = roll_node; - roll_node->common.parent = thr; - - trx->graph = fork; - - ut_a(thr == que_fork_start_command(fork)); - - mutex_enter(&trx_sys->mutex); - - trx_roll_crash_recv_trx = trx; - - trx_roll_max_undo_no = trx->undo_no; - - trx_roll_progress_printed_pct = 0; - - rows_to_undo = trx_roll_max_undo_no; - - mutex_exit(&trx_sys->mutex); - - if (rows_to_undo > 1000000000) { - rows_to_undo = rows_to_undo / 1000000; - unit = "M"; - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s" - " rows to undo\n", - trx->id, - (ulong) rows_to_undo, unit); - - if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) { - row_mysql_lock_data_dictionary(trx); - dictionary_locked = TRUE; - } - - que_run_threads(thr); - ut_a(roll_node->undo_thr != NULL); - - que_run_threads(roll_node->undo_thr); - - trx_rollback_finish(thr_get_trx(roll_node->undo_thr)); - - /* Free the memory reserved by the undo graph */ - que_graph_free(static_cast<que_t*>( - roll_node->undo_thr->common.parent)); - - ut_a(trx->lock.que_state == TRX_QUE_RUNNING); - - if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE - && trx->table_id != 0) { - - /* If the transaction was for a dictionary operation, - we drop the relevant table only if it is not flagged - as DISCARDED. If it still exists. */ - - table = dict_table_open_on_id( - trx->table_id, dictionary_locked, - DICT_TABLE_OP_NORMAL); - - if (table && !dict_table_is_discarded(table)) { - - dberr_t err; - - /* Ensure that the table doesn't get evicted from the - cache, keeps things simple for drop. */ - - if (table->can_be_evicted) { - dict_table_move_from_lru_to_non_lru(table); - } - - dict_table_close(table, dictionary_locked, FALSE); - - ib_logf(IB_LOG_LEVEL_WARN, - "Dropping table '%s', with id " UINT64PF " " - "in recovery", - table->name, trx->table_id); - - err = row_drop_table_for_mysql(table->name, trx, TRUE, FALSE); - trx_commit_for_mysql(trx); - - ut_a(err == DB_SUCCESS); - } - } - - if (dictionary_locked) { - row_mysql_unlock_data_dictionary(trx); - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Rollback of trx with id " TRX_ID_FMT " completed", trx->id); - - mem_heap_free(heap); - - trx_roll_crash_recv_trx = NULL; -} - -/*******************************************************************//** -Rollback or clean up any resurrected incomplete transactions. It assumes -that the caller holds the trx_sys_t::mutex and it will release the -lock if it does a clean up or rollback. -@return TRUE if the transaction was cleaned up or rolled back -and trx_sys->mutex was released. */ -static -ibool -trx_rollback_resurrected( -/*=====================*/ - trx_t* trx, /*!< in: transaction to rollback or clean */ - ibool all) /*!< in: FALSE=roll back dictionary transactions; - TRUE=roll back all non-PREPARED transactions */ -{ - ut_ad(mutex_own(&trx_sys->mutex)); - - /* The trx->is_recovered flag and trx->state are set - atomically under the protection of the trx->mutex (and - lock_sys->mutex) in lock_trx_release_locks(). We do not want - to accidentally clean up a non-recovered transaction here. */ - - trx_mutex_enter(trx); - bool is_recovered = trx->is_recovered; - trx_state_t state = trx->state; - trx_mutex_exit(trx); - - if (!is_recovered) { - return(FALSE); - } - - switch (state) { - case TRX_STATE_COMMITTED_IN_MEMORY: - mutex_exit(&trx_sys->mutex); - fprintf(stderr, - "InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n", - trx->id); - trx_cleanup_at_db_startup(trx); - trx_free_for_background(trx); - return(TRUE); - case TRX_STATE_ACTIVE: - if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) { - mutex_exit(&trx_sys->mutex); - trx_rollback_active(trx); - trx_free_for_background(trx); - return(TRUE); - } - return(FALSE); - case TRX_STATE_PREPARED: - return(FALSE); - case TRX_STATE_NOT_STARTED: - break; - } - - ut_error; - return(FALSE); -} - -/*******************************************************************//** -Rollback or clean up any incomplete transactions which were -encountered in crash recovery. If the transaction already was -committed, then we clean up a possible insert undo log. If the -transaction was not yet committed, then we roll it back. */ -UNIV_INTERN -void -trx_rollback_or_clean_recovered( -/*============================*/ - ibool all) /*!< in: FALSE=roll back dictionary transactions; - TRUE=roll back all non-PREPARED transactions */ -{ - trx_t* trx; - - ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO); - - if (trx_sys_get_n_rw_trx() == 0) { - - return; - } - - if (all) { - ib_logf(IB_LOG_LEVEL_INFO, - "Starting in background the rollback" - " of recovered transactions"); - } - - /* Note: For XA recovered transactions, we rely on MySQL to - do rollback. They will be in TRX_STATE_PREPARED state. If the server - is shutdown and they are still lingering in trx_sys_t::trx_list - then the shutdown will hang. */ - - /* Loop over the transaction list as long as there are - recovered transactions to clean up or recover. */ - - do { - mutex_enter(&trx_sys->mutex); - - for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - - assert_trx_in_rw_list(trx); - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE - && srv_fast_shutdown != 0) { - all = FALSE; - break; - } - - /* If this function does a cleanup or rollback - then it will release the trx_sys->mutex, therefore - we need to reacquire it before retrying the loop. */ - - if (trx_rollback_resurrected(trx, all)) { - - mutex_enter(&trx_sys->mutex); - - break; - } - } - - mutex_exit(&trx_sys->mutex); - - } while (trx != NULL); - - if (all) { - ib_logf(IB_LOG_LEVEL_INFO, - "Rollback of non-prepared transactions completed"); - } -} - -/*******************************************************************//** -Rollback or clean up any incomplete transactions which were -encountered in crash recovery. If the transaction already was -committed, then we clean up a possible insert undo log. If the -transaction was not yet committed, then we roll it back. -Note: this is done in a background thread. -@return a dummy parameter */ -extern "C" UNIV_INTERN -os_thread_ret_t -DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( -/*================================================*/ - void* arg MY_ATTRIBUTE((unused))) - /*!< in: a dummy parameter required by - os_thread_create */ -{ - my_thread_init(); - ut_ad(!srv_read_only_mode); - -#ifdef UNIV_PFS_THREAD - pfs_register_thread(trx_rollback_clean_thread_key); -#endif /* UNIV_PFS_THREAD */ - - trx_rollback_or_clean_recovered(TRUE); - - trx_rollback_or_clean_is_active = false; - - my_thread_end(); - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/*******************************************************************//** -Creates an undo number array. -@return own: undo number array */ -static -trx_undo_arr_t* -trx_undo_arr_create( -/*================*/ - ulint n_cells) /*!< Number of cells */ -{ - trx_undo_arr_t* arr; - mem_heap_t* heap; - ulint sz = sizeof(*arr) + sizeof(*arr->infos) * n_cells; - - heap = mem_heap_create(sz); - - arr = static_cast<trx_undo_arr_t*>(mem_heap_zalloc(heap, sz)); - - arr->n_cells = n_cells; - - arr->infos = (trx_undo_inf_t*) (arr + 1); - - arr->heap = heap; - - return(arr); -} - -/*******************************************************************//** -Frees an undo number array. */ -UNIV_INTERN -void -trx_undo_arr_free( -/*==============*/ - trx_undo_arr_t* arr) /*!< in: undo number array */ -{ - mem_heap_free(arr->heap); -} - -/*******************************************************************//** -Stores info of an undo log record to the array if it is not stored yet. -@return FALSE if the record already existed in the array */ -static -ibool -trx_undo_arr_store_info( -/*====================*/ - trx_t* trx, /*!< in: transaction */ - undo_no_t undo_no)/*!< in: undo number */ -{ - ulint i; - trx_undo_arr_t* arr; - ulint n = 0; - ulint n_used; - trx_undo_inf_t* stored_here = NULL; - - arr = trx->undo_no_arr; - n_used = arr->n_used; - - for (i = 0; i < arr->n_cells; i++) { - trx_undo_inf_t* cell; - - cell = trx_undo_arr_get_nth_info(arr, i); - - if (!cell->in_use) { - if (!stored_here) { - /* Not in use, we may store here */ - cell->undo_no = undo_no; - cell->in_use = TRUE; - - arr->n_used++; - - stored_here = cell; - } - } else { - n++; - - if (cell->undo_no == undo_no) { - - if (stored_here) { - stored_here->in_use = FALSE; - ut_ad(arr->n_used > 0); - arr->n_used--; - } - - ut_ad(arr->n_used == n_used); - - return(FALSE); - } - } - - if (n == n_used && stored_here) { - - ut_ad(arr->n_used == 1 + n_used); - - return(TRUE); - } - } - - ut_error; - - return(FALSE); -} - -/*******************************************************************//** -Removes an undo number from the array. */ -static -void -trx_undo_arr_remove_info( -/*=====================*/ - trx_undo_arr_t* arr, /*!< in: undo number array */ - undo_no_t undo_no)/*!< in: undo number */ -{ - ulint i; - - for (i = 0; i < arr->n_cells; i++) { - - trx_undo_inf_t* cell; - - cell = trx_undo_arr_get_nth_info(arr, i); - - if (cell->in_use && cell->undo_no == undo_no) { - cell->in_use = FALSE; - ut_ad(arr->n_used > 0); - --arr->n_used; - break; - } - } -} - -/*******************************************************************//** -Gets the biggest undo number in an array. -@return biggest value, 0 if the array is empty */ -static -undo_no_t -trx_undo_arr_get_biggest( -/*=====================*/ - const trx_undo_arr_t* arr) /*!< in: undo number array */ -{ - ulint i; - undo_no_t biggest = 0; - ulint n_checked = 0; - - for (i = 0; i < arr->n_cells && n_checked < arr->n_used; ++i) { - - const trx_undo_inf_t* cell = &arr->infos[i]; - - if (cell->in_use) { - - ++n_checked; - - if (cell->undo_no > biggest) { - - biggest = cell->undo_no; - } - } - } - - return(biggest); -} - -/***********************************************************************//** -Tries truncate the undo logs. */ -static -void -trx_roll_try_truncate( -/*==================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - undo_no_t limit; - const trx_undo_arr_t* arr; - - ut_ad(mutex_own(&(trx->undo_mutex))); - ut_ad(mutex_own(&((trx->rseg)->mutex))); - - trx->pages_undone = 0; - - arr = trx->undo_no_arr; - - limit = trx->undo_no; - - if (arr->n_used > 0) { - undo_no_t biggest; - - biggest = trx_undo_arr_get_biggest(arr); - - if (biggest >= limit) { - - limit = biggest + 1; - } - } - - if (trx->insert_undo) { - trx_undo_truncate_end(trx, trx->insert_undo, limit); - } - - if (trx->update_undo) { - trx_undo_truncate_end(trx, trx->update_undo, limit); - } - -#ifdef WITH_WSREP_OUT - if (wsrep_on(trx->mysql_thd)) { - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } -#endif /* WITH_WSREP */ -} - -/***********************************************************************//** -Pops the topmost undo log record in a single undo log and updates the info -about the topmost record in the undo log memory struct. -@return undo log record, the page s-latched */ -static -trx_undo_rec_t* -trx_roll_pop_top_rec( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - trx_undo_t* undo, /*!< in: undo log */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* undo_page; - ulint offset; - trx_undo_rec_t* prev_rec; - page_t* prev_rec_page; - - ut_ad(mutex_own(&trx->undo_mutex)); - - undo_page = trx_undo_page_get_s_latched( - undo->space, undo->zip_size, undo->top_page_no, mtr); - - offset = undo->top_offset; - - /* fprintf(stderr, "Thread %lu undoing trx " TRX_ID_FMT - " undo record " TRX_ID_FMT "\n", - os_thread_get_curr_id(), trx->id, undo->top_undo_no); */ - - prev_rec = trx_undo_get_prev_rec( - undo_page + offset, undo->hdr_page_no, undo->hdr_offset, - true, mtr); - - if (prev_rec == NULL) { - - undo->empty = TRUE; - } else { - prev_rec_page = page_align(prev_rec); - - if (prev_rec_page != undo_page) { - - trx->pages_undone++; - } - - undo->top_page_no = page_get_page_no(prev_rec_page); - undo->top_offset = prev_rec - prev_rec_page; - undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); - } - - return(undo_page + offset); -} - -/********************************************************************//** -Pops the topmost record when the two undo logs of a transaction are seen -as a single stack of records ordered by their undo numbers. Inserts the -undo number of the popped undo record to the array of currently processed -undo numbers in the transaction. When the query thread finishes processing -of this undo record, it must be released with trx_undo_rec_release. -@return undo log record copied to heap, NULL if none left, or if the -undo number of the top record would be less than the limit */ -UNIV_INTERN -trx_undo_rec_t* -trx_roll_pop_top_rec_of_trx( -/*========================*/ - trx_t* trx, /*!< in: transaction */ - undo_no_t limit, /*!< in: least undo number we need */ - roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */ - mem_heap_t* heap) /*!< in: memory heap where copied */ -{ - trx_undo_t* undo; - trx_undo_t* ins_undo; - trx_undo_t* upd_undo; - trx_undo_rec_t* undo_rec; - trx_undo_rec_t* undo_rec_copy; - undo_no_t undo_no; - ibool is_insert; - trx_rseg_t* rseg; - ulint progress_pct; - mtr_t mtr; - - rseg = trx->rseg; -try_again: - mutex_enter(&(trx->undo_mutex)); - - if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) { - mutex_enter(&rseg->mutex); - - trx_roll_try_truncate(trx); - - mutex_exit(&rseg->mutex); - } - - ins_undo = trx->insert_undo; - upd_undo = trx->update_undo; - - if (!ins_undo || ins_undo->empty) { - undo = upd_undo; - } else if (!upd_undo || upd_undo->empty) { - undo = ins_undo; - } else if (upd_undo->top_undo_no > ins_undo->top_undo_no) { - undo = upd_undo; - } else { - undo = ins_undo; - } - - if (!undo || undo->empty || limit > undo->top_undo_no) { - - if ((trx->undo_no_arr)->n_used == 0) { - /* Rollback is ending */ - - mutex_enter(&(rseg->mutex)); - - trx_roll_try_truncate(trx); - - mutex_exit(&(rseg->mutex)); - } - - mutex_exit(&(trx->undo_mutex)); - - return(NULL); - } - - is_insert = (undo == ins_undo); - - *roll_ptr = trx_undo_build_roll_ptr( - is_insert, undo->rseg->id, undo->top_page_no, undo->top_offset); - - mtr_start(&mtr); - - undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr); - - undo_no = trx_undo_rec_get_undo_no(undo_rec); - - ut_ad(undo_no + 1 == trx->undo_no); - - /* We print rollback progress info if we are in a crash recovery - and the transaction has at least 1000 row operations to undo. */ - - if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) { - - progress_pct = 100 - (ulint) - ((undo_no * 100) / trx_roll_max_undo_no); - if (progress_pct != trx_roll_progress_printed_pct) { - if (trx_roll_progress_printed_pct == 0) { - fprintf(stderr, - "\nInnoDB: Progress in percents:" - " %lu", (ulong) progress_pct); - } else { - fprintf(stderr, - " %lu", (ulong) progress_pct); - } - fflush(stderr); - trx_roll_progress_printed_pct = progress_pct; - } - } - - trx->undo_no = undo_no; - - if (!trx_undo_arr_store_info(trx, undo_no)) { - /* A query thread is already processing this undo log record */ - - mutex_exit(&(trx->undo_mutex)); - - mtr_commit(&mtr); - - goto try_again; - } - - undo_rec_copy = trx_undo_rec_copy(undo_rec, heap); - - mutex_exit(&(trx->undo_mutex)); - - mtr_commit(&mtr); - - return(undo_rec_copy); -} - -/********************************************************************//** -Reserves an undo log record for a query thread to undo. This should be -called if the query thread gets the undo log record not using the pop -function above. -@return TRUE if succeeded */ -UNIV_INTERN -ibool -trx_undo_rec_reserve( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - undo_no_t undo_no)/*!< in: undo number of the record */ -{ - ibool ret; - - mutex_enter(&(trx->undo_mutex)); - - ret = trx_undo_arr_store_info(trx, undo_no); - - mutex_exit(&(trx->undo_mutex)); - - return(ret); -} - -/*******************************************************************//** -Releases a reserved undo record. */ -UNIV_INTERN -void -trx_undo_rec_release( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - undo_no_t undo_no)/*!< in: undo number */ -{ - trx_undo_arr_t* arr; - - mutex_enter(&(trx->undo_mutex)); - - arr = trx->undo_no_arr; - - trx_undo_arr_remove_info(arr, undo_no); - - mutex_exit(&(trx->undo_mutex)); -} - -/****************************************************************//** -Builds an undo 'query' graph for a transaction. The actual rollback is -performed by executing this query graph like a query subprocedure call. -The reply about the completion of the rollback will be sent by this -graph. -@return own: the query graph */ -static -que_t* -trx_roll_graph_build( -/*=================*/ - trx_t* trx) /*!< in: trx handle */ -{ - mem_heap_t* heap; - que_fork_t* fork; - que_thr_t* thr; - - ut_ad(trx_mutex_own(trx)); - - heap = mem_heap_create(512); - fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap); - fork->trx = trx; - - thr = que_thr_create(fork, heap); - - thr->child = row_undo_node_create(trx, thr, heap); - - return(fork); -} - -/*********************************************************************//** -Starts a rollback operation, creates the UNDO graph that will do the -actual undo operation. -@return query graph thread that will perform the UNDO operations. */ -static -que_thr_t* -trx_rollback_start( -/*===============*/ - trx_t* trx, /*!< in: transaction */ - ib_id_t roll_limit) /*!< in: rollback to undo no (for - partial undo), 0 if we are rolling back - the entire transaction */ -{ - que_t* roll_graph; - - ut_ad(trx_mutex_own(trx)); - - ut_ad(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0); - - /* Initialize the rollback field in the transaction */ - - trx->roll_limit = roll_limit; - - ut_a(trx->roll_limit <= trx->undo_no); - - trx->pages_undone = 0; - - if (trx->undo_no_arr == NULL) { - /* Single query thread -> 1 */ - trx->undo_no_arr = trx_undo_arr_create(1); - } - - /* Build a 'query' graph which will perform the undo operations */ - - roll_graph = trx_roll_graph_build(trx); - - trx->graph = roll_graph; - - trx->lock.que_state = TRX_QUE_ROLLING_BACK; - - return(que_fork_start_command(roll_graph)); -} - -/****************************************************************//** -Finishes a transaction rollback. */ -static -void -trx_rollback_finish( -/*================*/ - trx_t* trx) /*!< in: transaction */ -{ - ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0); - - trx_commit(trx); - - trx->lock.que_state = TRX_QUE_RUNNING; -} - -/*********************************************************************//** -Creates a rollback command node struct. -@return own: rollback node struct */ -UNIV_INTERN -roll_node_t* -roll_node_create( -/*=============*/ - mem_heap_t* heap) /*!< in: mem heap where created */ -{ - roll_node_t* node; - - node = static_cast<roll_node_t*>(mem_heap_zalloc(heap, sizeof(*node))); - - node->state = ROLL_NODE_SEND; - - node->common.type = QUE_NODE_ROLLBACK; - - return(node); -} - -/***********************************************************//** -Performs an execution step for a rollback command node in a query graph. -@return query thread to run next, or NULL */ -UNIV_INTERN -que_thr_t* -trx_rollback_step( -/*==============*/ - que_thr_t* thr) /*!< in: query thread */ -{ - roll_node_t* node; - - node = static_cast<roll_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK); - - if (thr->prev_node == que_node_get_parent(node)) { - node->state = ROLL_NODE_SEND; - } - - if (node->state == ROLL_NODE_SEND) { - trx_t* trx; - ib_id_t roll_limit = 0; - - trx = thr_get_trx(thr); - - trx_mutex_enter(trx); - - node->state = ROLL_NODE_WAIT; - - ut_a(node->undo_thr == NULL); - - roll_limit = node->partial ? node->savept.least_undo_no : 0; - - trx_commit_or_rollback_prepare(trx); - - node->undo_thr = trx_rollback_start(trx, roll_limit); - - trx_mutex_exit(trx); - - } else { - ut_ad(node->state == ROLL_NODE_WAIT); - - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} diff --git a/storage/xtradb/trx/trx0rseg.cc b/storage/xtradb/trx/trx0rseg.cc deleted file mode 100644 index 16fa334872b..00000000000 --- a/storage/xtradb/trx/trx0rseg.cc +++ /dev/null @@ -1,424 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file trx/trx0rseg.cc -Rollback segment - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0rseg.h" - -#ifdef UNIV_NONINL -#include "trx0rseg.ic" -#endif - -#include "trx0undo.h" -#include "fut0lst.h" -#include "srv0srv.h" -#include "trx0purge.h" -#include "ut0bh.h" -#include "srv0mon.h" - -#ifdef UNIV_PFS_MUTEX -/* Key to register rseg_mutex_key with performance schema */ -UNIV_INTERN mysql_pfs_key_t rseg_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/****************************************************************//** -Creates a rollback segment header. This function is called only when -a new rollback segment is created in the database. -@return page number of the created segment, FIL_NULL if fail */ -UNIV_INTERN -ulint -trx_rseg_header_create( -/*===================*/ - ulint space, /*!< in: space id */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint max_size, /*!< in: max size in pages */ - ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint page_no; - trx_rsegf_t* rsegf; - trx_sysf_t* sys_header; - ulint i; - buf_block_t* block; - - ut_ad(mtr); - ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL), - MTR_MEMO_X_LOCK)); - - /* Allocate a new file segment for the rollback segment */ - block = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr); - - if (block == NULL) { - /* No space left */ - - return(FIL_NULL); - } - - buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW); - - page_no = buf_block_get_page_no(block); - - /* Get the rollback segment file page */ - rsegf = trx_rsegf_get_new(space, zip_size, page_no, mtr); - - /* Initialize max size field */ - mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size, - MLOG_4BYTES, mtr); - - /* Initialize the history list */ - - mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr); - flst_init(rsegf + TRX_RSEG_HISTORY, mtr); - - /* Reset the undo log slots */ - for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { - - trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr); - } - - /* Add the rollback segment info to the free slot in - the trx system header */ - - sys_header = trx_sysf_get(mtr); - - trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr); - trx_sysf_rseg_set_page_no(sys_header, rseg_slot_no, page_no, mtr); - - return(page_no); -} - -/***********************************************************************//** -Free's an instance of the rollback segment in memory. */ -UNIV_INTERN -void -trx_rseg_mem_free( -/*==============*/ - trx_rseg_t* rseg) /* in, own: instance to free */ -{ - trx_undo_t* undo; - trx_undo_t* next_undo; - - mutex_free(&rseg->mutex); - - /* There can't be any active transactions. */ - ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0); - ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0); - - for (undo = UT_LIST_GET_FIRST(rseg->update_undo_cached); - undo != NULL; - undo = next_undo) { - - next_undo = UT_LIST_GET_NEXT(undo_list, undo); - - UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - - trx_undo_mem_free(undo); - } - - for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached); - undo != NULL; - undo = next_undo) { - - next_undo = UT_LIST_GET_NEXT(undo_list, undo); - - UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - - trx_undo_mem_free(undo); - } - - /* const_cast<trx_rseg_t*>() because this function is - like a destructor. */ - - *((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = NULL; - - mem_free(rseg); -} - -/*************************************************************************** -Creates and initializes a rollback segment object. The values for the -fields are read from the header. The object is inserted to the rseg -list of the trx system object and a pointer is inserted in the rseg -array in the trx system object. -@return own: rollback segment object */ -static -trx_rseg_t* -trx_rseg_mem_create( -/*================*/ - ulint id, /*!< in: rollback segment id */ - ulint space, /*!< in: space where the segment - placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the segment - header */ - ib_bh_t* ib_bh, /*!< in/out: rseg queue */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint len; - trx_rseg_t* rseg; - fil_addr_t node_addr; - trx_rsegf_t* rseg_header; - trx_ulogf_t* undo_log_hdr; - ulint sum_of_undo_sizes; - - rseg = static_cast<trx_rseg_t*>(mem_zalloc(sizeof(trx_rseg_t))); - - rseg->id = id; - rseg->space = space; - rseg->zip_size = zip_size; - rseg->page_no = page_no; - - mutex_create(rseg_mutex_key, &rseg->mutex, SYNC_RSEG); - - /* const_cast<trx_rseg_t*>() because this function is - like a constructor. */ - *((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = rseg; - - rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr); - - rseg->max_size = mtr_read_ulint( - rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr); - - /* Initialize the undo log lists according to the rseg header */ - - sum_of_undo_sizes = trx_undo_lists_init(rseg); - - rseg->curr_size = mtr_read_ulint( - rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr) - + 1 + sum_of_undo_sizes; - - len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr); - - if (len > 0) { - rseg_queue_t rseg_queue; - - trx_sys->rseg_history_len += len; - - node_addr = trx_purge_get_log_from_hist( - flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr)); - - rseg->last_page_no = node_addr.page; - rseg->last_offset = node_addr.boffset; - - undo_log_hdr = trx_undo_page_get( - rseg->space, rseg->zip_size, node_addr.page, - mtr) + node_addr.boffset; - - rseg->last_trx_no = mach_read_from_8( - undo_log_hdr + TRX_UNDO_TRX_NO); - - rseg->last_del_marks = mtr_read_ulint( - undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr); - - rseg_queue.rseg = rseg; - rseg_queue.trx_no = rseg->last_trx_no; - - if (rseg->last_page_no != FIL_NULL) { - const void* ptr; - - /* There is no need to cover this operation by the purge - mutex because we are still bootstrapping. */ - - ptr = ib_bh_push(ib_bh, &rseg_queue); - ut_a(ptr != NULL); - } - } else { - rseg->last_page_no = FIL_NULL; - } - - return(rseg); -} - -/******************************************************************** -Creates the memory copies for the rollback segments and initializes the -rseg array in trx_sys at a database startup. */ -static -void -trx_rseg_create_instance( -/*=====================*/ - trx_sysf_t* sys_header, /*!< in: trx system header */ - ib_bh_t* ib_bh, /*!< in/out: rseg queue */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint i; - - for (i = 0; i < TRX_SYS_N_RSEGS; i++) { - ulint page_no; - - page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr); - - if (page_no != FIL_NULL) { - ulint space; - ulint zip_size; - trx_rseg_t* rseg = NULL; - - ut_a(!trx_rseg_get_on_id(i)); - - space = trx_sysf_rseg_get_space(sys_header, i, mtr); - - zip_size = space ? fil_space_get_zip_size(space) : 0; - - rseg = trx_rseg_mem_create( - i, space, zip_size, page_no, ib_bh, mtr); - - ut_a(rseg->id == i); - } else { - ut_a(trx_sys->rseg_array[i] == NULL); - } - } -} - -/** Create a rollback segment. -@param[in] space undo tablespace ID -@return pointer to new rollback segment -@retval NULL on failure */ -UNIV_INTERN -trx_rseg_t* -trx_rseg_create(ulint space) -{ - mtr_t mtr; - ulint slot_no; - trx_rseg_t* rseg = NULL; - - mtr_start(&mtr); - - /* To obey the latching order, acquire the file space - x-latch before the trx_sys->mutex. */ - mtr_x_lock(fil_space_get_latch(space, NULL), &mtr); - - slot_no = trx_sysf_rseg_find_free(&mtr); - - if (slot_no != ULINT_UNDEFINED) { - ulint id; - ulint page_no; - ulint zip_size; - trx_sysf_t* sys_header; - - page_no = trx_rseg_header_create( - space, 0, ULINT_MAX, slot_no, &mtr); - - if (page_no != FIL_NULL) { - sys_header = trx_sysf_get(&mtr); - - id = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr); - ut_a(id == space); - - zip_size = space ? fil_space_get_zip_size(space) : 0; - - rseg = trx_rseg_mem_create( - slot_no, space, zip_size, page_no, - purge_sys->ib_bh, &mtr); - } - } - - mtr_commit(&mtr); - return(rseg); -} - -/*********************************************************************//** -Creates the memory copies for rollback segments and initializes the -rseg array in trx_sys at a database startup. */ -UNIV_INTERN -void -trx_rseg_array_init( -/*================*/ - trx_sysf_t* sys_header, /* in/out: trx system header */ - ib_bh_t* ib_bh, /*!< in: rseg queue */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_sys->rseg_history_len = 0; - - trx_rseg_create_instance(sys_header, ib_bh, mtr); -} - -/******************************************************************** -Get the number of unique rollback tablespaces in use except space id 0. -The last space id will be the sentinel value ULINT_UNDEFINED. The array -will be sorted on space id. Note: space_ids should have have space for -TRX_SYS_N_RSEGS + 1 elements. -@return number of unique rollback tablespaces in use. */ -UNIV_INTERN -ulint -trx_rseg_get_n_undo_tablespaces( -/*============================*/ - ulint* space_ids) /*!< out: array of space ids of - UNDO tablespaces */ -{ - ulint i; - mtr_t mtr; - trx_sysf_t* sys_header; - ulint n_undo_tablespaces = 0; - ulint space_ids_aux[TRX_SYS_N_RSEGS + 1]; - - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - for (i = 0; i < TRX_SYS_N_RSEGS; i++) { - ulint page_no; - ulint space; - - page_no = trx_sysf_rseg_get_page_no(sys_header, i, &mtr); - - if (page_no == FIL_NULL) { - continue; - } - - space = trx_sysf_rseg_get_space(sys_header, i, &mtr); - - if (space != 0) { - ulint j; - ibool found = FALSE; - - for (j = 0; j < n_undo_tablespaces; ++j) { - if (space_ids[j] == space) { - found = TRUE; - break; - } - } - - if (!found) { - ut_a(n_undo_tablespaces <= i); - space_ids[n_undo_tablespaces++] = space; - } - } - } - - mtr_commit(&mtr); - - ut_a(n_undo_tablespaces <= TRX_SYS_N_RSEGS); - - space_ids[n_undo_tablespaces] = ULINT_UNDEFINED; - - if (n_undo_tablespaces > 0) { - ut_ulint_sort(space_ids, space_ids_aux, 0, n_undo_tablespaces); - } - - return(n_undo_tablespaces); -} diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc deleted file mode 100644 index 9accb4ef303..00000000000 --- a/storage/xtradb/trx/trx0sys.cc +++ /dev/null @@ -1,1523 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file trx/trx0sys.cc -Transaction system - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0sys.h" - -#ifdef UNIV_NONINL -#include "trx0sys.ic" -#endif - -#ifdef UNIV_HOTBACKUP -#include "fsp0types.h" - -#else /* !UNIV_HOTBACKUP */ -#include "fsp0fsp.h" -#include "mtr0log.h" -#include "mtr0log.h" -#include "trx0trx.h" -#include "trx0rseg.h" -#include "trx0undo.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "trx0purge.h" -#include "log0log.h" -#include "log0recv.h" -#include "os0file.h" -#include "read0read.h" - -#ifdef WITH_WSREP -#include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */ -#endif /* */ - -#include <mysql/service_wsrep.h> - -/** The file format tag structure with id and name. */ -struct file_format_t { - ulint id; /*!< id of the file format */ - const char* name; /*!< text representation of the - file format */ - ib_mutex_t mutex; /*!< covers changes to the above - fields */ -}; - -/** The transaction system */ -UNIV_INTERN trx_sys_t* trx_sys = NULL; - -/** In a MySQL replication slave, in crash recovery we store the master log -file name and position here. */ -/* @{ */ -/** Master binlog file name */ -UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN]; -/** Master binlog file position. We have successfully got the updates -up to this position. -1 means that no crash recovery was needed, or -there was no master log position info inside InnoDB.*/ -UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1; -/* @} */ - -/** If this MySQL server uses binary logging, after InnoDB has been inited -and if it has done a crash recovery, we store the binlog file name and position -here. */ -/* @{ */ -/** Binlog file name */ -UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN]; -/** Binlog file position, or -1 if unknown */ -UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1; -/* @} */ -#endif /* !UNIV_HOTBACKUP */ - -/** List of animal names representing file format. */ -static const char* file_format_name_map[] = { - "Antelope", - "Barracuda", - "Cheetah", - "Dragon", - "Elk", - "Fox", - "Gazelle", - "Hornet", - "Impala", - "Jaguar", - "Kangaroo", - "Leopard", - "Moose", - "Nautilus", - "Ocelot", - "Porpoise", - "Quail", - "Rabbit", - "Shark", - "Tiger", - "Urchin", - "Viper", - "Whale", - "Xenops", - "Yak", - "Zebra" -}; - -/** The number of elements in the file format name array. */ -static const ulint FILE_FORMAT_NAME_N - = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]); - -#ifdef UNIV_PFS_MUTEX -/* Key to register the mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key; -UNIV_INTERN mysql_pfs_key_t trx_sys_mutex_key; -#endif /* UNIV_PFS_RWLOCK */ - -#ifndef UNIV_HOTBACKUP -#ifdef UNIV_DEBUG -/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */ -UNIV_INTERN uint trx_rseg_n_slots_debug = 0; -#endif - -/** This is used to track the maximum file format id known to InnoDB. It's -updated via SET GLOBAL innodb_file_format_max = 'x' or when we open -or create a table. */ -static file_format_t file_format_max; - -#ifdef UNIV_DEBUG -/****************************************************************//** -Checks whether a trx is in one of rw_trx_list or ro_trx_list. -@return TRUE if is in */ -UNIV_INTERN -ibool -trx_in_trx_list( -/*============*/ - const trx_t* in_trx) /*!< in: transaction */ -{ - const trx_t* trx; - trx_list_t* trx_list; - - /* Non-locking autocommits should not hold any locks. */ - assert_trx_in_list(in_trx); - - trx_list = in_trx->read_only - ? &trx_sys->ro_trx_list : &trx_sys->rw_trx_list; - - ut_ad(mutex_own(&trx_sys->mutex)); - - ut_ad(trx_assert_started(in_trx)); - - for (trx = UT_LIST_GET_FIRST(*trx_list); - trx != NULL && trx != in_trx; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - - assert_trx_in_list(trx); - ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list)); - } - - return(trx != NULL); -} -#endif /* UNIV_DEBUG */ - -/*****************************************************************//** -Writes the value of max_trx_id to the file based trx system header. */ -UNIV_INTERN -void -trx_sys_flush_max_trx_id(void) -/*==========================*/ -{ - mtr_t mtr; - trx_sysf_t* sys_header; - -#ifndef WITH_WSREP - /* wsrep_fake_trx_id violates this assert - * Copied from trx_sys_get_new_trx_id - */ - ut_ad(mutex_own(&trx_sys->mutex)); -#endif /* WITH_WSREP */ - - if (!srv_read_only_mode) { - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - mlog_write_ull( - sys_header + TRX_SYS_TRX_ID_STORE, - trx_sys->max_trx_id, &mtr); - - mtr_commit(&mtr); - } -} - -/*****************************************************************//** -Updates the offset information about the end of the MySQL binlog entry -which corresponds to the transaction just being committed. In a MySQL -replication slave updates the latest master binlog position up to which -replication has proceeded. */ -UNIV_INTERN -void -trx_sys_update_mysql_binlog_offset( -/*===============================*/ - const char* file_name,/*!< in: MySQL log file name */ - ib_int64_t offset, /*!< in: position in that log file */ - ulint field, /*!< in: offset of the MySQL log info field in - the trx sys header */ -#ifdef WITH_WSREP - trx_sysf_t* sys_header, /*!< in: trx sys header */ -#endif /* WITH_WSREP */ - mtr_t* mtr) /*!< in: mtr */ -{ -#ifndef WITH_WSREP - trx_sysf_t* sys_header; -#endif /* !WITH_WSREP */ - - if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) { - - /* We cannot fit the name to the 512 bytes we have reserved */ - - return; - } - -#ifndef WITH_WSREP - sys_header = trx_sysf_get(mtr); -#endif /* !WITH_WSREP */ - - if (mach_read_from_4(sys_header + field - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) - != TRX_SYS_MYSQL_LOG_MAGIC_N) { - - mlog_write_ulint(sys_header + field - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD, - TRX_SYS_MYSQL_LOG_MAGIC_N, - MLOG_4BYTES, mtr); - } - - if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME), - file_name)) { - - mlog_write_string(sys_header + field - + TRX_SYS_MYSQL_LOG_NAME, - (byte*) file_name, 1 + ut_strlen(file_name), - mtr); - } - - if (mach_read_from_4(sys_header + field - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0 - || (offset >> 32) > 0) { - - mlog_write_ulint(sys_header + field - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH, - (ulint)(offset >> 32), - MLOG_4BYTES, mtr); - } - - mlog_write_ulint(sys_header + field - + TRX_SYS_MYSQL_LOG_OFFSET_LOW, - (ulint)(offset & 0xFFFFFFFFUL), - MLOG_4BYTES, mtr); -} - -/*****************************************************************//** -Stores the MySQL binlog offset info in the trx system header if -the magic number shows it valid, and print the info to stderr */ -UNIV_INTERN -void -trx_sys_print_mysql_binlog_offset(void) -/*===================================*/ -{ - trx_sysf_t* sys_header; - mtr_t mtr; - ulint trx_sys_mysql_bin_log_pos_high; - ulint trx_sys_mysql_bin_log_pos_low; - - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) - != TRX_SYS_MYSQL_LOG_MAGIC_N) { - - mtr_commit(&mtr); - - return; - } - - trx_sys_mysql_bin_log_pos_high = mach_read_from_4( - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH); - trx_sys_mysql_bin_log_pos_low = mach_read_from_4( - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_LOW); - - trx_sys_mysql_bin_log_pos - = (((ib_int64_t) trx_sys_mysql_bin_log_pos_high) << 32) - + (ib_int64_t) trx_sys_mysql_bin_log_pos_low; - - ut_memcpy(trx_sys_mysql_bin_log_name, - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN); - - fprintf(stderr, - "InnoDB: Last MySQL binlog file position %lu %lu," - " file name %s\n", - trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low, - trx_sys_mysql_bin_log_name); - - mtr_commit(&mtr); -} - -#ifdef WITH_WSREP - -#ifdef UNIV_DEBUG -static long long trx_sys_cur_xid_seqno = -1; -static unsigned char trx_sys_cur_xid_uuid[16]; - -long long read_wsrep_xid_seqno(const XID* xid) -{ - long long seqno; - memcpy(&seqno, xid->data + 24, sizeof(long long)); - return seqno; -} - -void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf) -{ - memcpy(buf, xid->data + 8, 16); -} - -#endif /* UNIV_DEBUG */ - -void -trx_sys_update_wsrep_checkpoint( - const XID* xid, /*!< in: transaction XID */ - trx_sysf_t* sys_header, /*!< in: sys_header */ - mtr_t* mtr) /*!< in: mtr */ -{ -#ifdef UNIV_DEBUG - { - /* Check that seqno is monotonically increasing */ - unsigned char xid_uuid[16]; - long long xid_seqno = read_wsrep_xid_seqno(xid); - read_wsrep_xid_uuid(xid, xid_uuid); - if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 16)) - { - /* - This check is a protection against the initial seqno (-1) - assigned in read_wsrep_xid_uuid(), which, if not checked, - would cause the following assertion to fail. - */ - if (xid_seqno > -1 ) - { - ut_ad(xid_seqno > trx_sys_cur_xid_seqno); - } - } - else - { - memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16); - } - trx_sys_cur_xid_seqno = xid_seqno; - } -#endif /* UNIV_DEBUG */ - - ut_ad(xid && mtr); - ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid)); - - if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_MAGIC_N_FLD) - != TRX_SYS_WSREP_XID_MAGIC_N) { - mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_MAGIC_N_FLD, - TRX_SYS_WSREP_XID_MAGIC_N, - MLOG_4BYTES, mtr); - } - - mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_FORMAT, - (int)xid->formatID, - MLOG_4BYTES, mtr); - mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_GTRID_LEN, - (int)xid->gtrid_length, - MLOG_4BYTES, mtr); - mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_BQUAL_LEN, - (int)xid->bqual_length, - MLOG_4BYTES, mtr); - mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_DATA, - (const unsigned char*) xid->data, - XIDDATASIZE, mtr); - -} - -bool -trx_sys_read_wsrep_checkpoint(XID* xid) -/*===================================*/ -{ - trx_sysf_t* sys_header; - mtr_t mtr; - ulint magic; - - ut_ad(xid); - - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO - + TRX_SYS_WSREP_XID_MAGIC_N_FLD)) - != TRX_SYS_WSREP_XID_MAGIC_N) { - memset(xid, 0, sizeof(*xid)); - long long seqno= -1; - memcpy(xid->data + 24, &seqno, sizeof(long long)); - xid->formatID = -1; - trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr); - mtr_commit(&mtr); - return false; - } - - xid->formatID = (int)mach_read_from_4( - sys_header - + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT); - xid->gtrid_length = (int)mach_read_from_4( - sys_header - + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN); - xid->bqual_length = (int)mach_read_from_4( - sys_header - + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN); - ut_memcpy(xid->data, - sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA, - XIDDATASIZE); - - mtr_commit(&mtr); - return true; -} - -#endif /* WITH_WSREP */ - -/*****************************************************************//** -Prints to stderr the MySQL master log offset info in the trx system header if -the magic number shows it valid. */ -UNIV_INTERN -void -trx_sys_print_mysql_master_log_pos(void) -/*====================================*/ -{ - trx_sysf_t* sys_header; - mtr_t mtr; - - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) - != TRX_SYS_MYSQL_LOG_MAGIC_N) { - - mtr_commit(&mtr); - - return; - } - - fprintf(stderr, - "InnoDB: In a MySQL replication slave the last" - " master binlog file\n" - "InnoDB: position %lu %lu, file name %s\n", - (ulong) mach_read_from_4(sys_header - + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH), - (ulong) mach_read_from_4(sys_header - + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_LOW), - sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_NAME); - /* Copy the master log position info to global variables we can - use in ha_innobase.cc to initialize glob_mi to right values */ - - ut_memcpy(trx_sys_mysql_master_log_name, - sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_NAME, - TRX_SYS_MYSQL_LOG_NAME_LEN); - - trx_sys_mysql_master_log_pos - = (((ib_int64_t) mach_read_from_4( - sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32) - + ((ib_int64_t) mach_read_from_4( - sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_LOW)); - mtr_commit(&mtr); -} - -/****************************************************************//** -Looks for a free slot for a rollback segment in the trx system file copy. -@return slot index or ULINT_UNDEFINED if not found */ -UNIV_INTERN -ulint -trx_sysf_rseg_find_free( -/*====================*/ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint i; - trx_sysf_t* sys_header; - - sys_header = trx_sysf_get(mtr); - - for (i = 0; i < TRX_SYS_N_RSEGS; i++) { - ulint page_no; - - page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr); - - if (page_no == FIL_NULL) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/*****************************************************************//** -Creates the file page for the transaction system. This function is called only -at the database creation, before trx_sys_init. */ -static -void -trx_sysf_create( -/*============*/ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_sysf_t* sys_header; - ulint slot_no; - buf_block_t* block; - page_t* page; - ulint page_no; - byte* ptr; - ulint len; - - ut_ad(mtr); - - /* Note that below we first reserve the file space x-latch, and - then enter the kernel: we must do it in this order to conform - to the latching order rules. */ - - mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr); - - /* Create the trx sys file block in a new allocated file segment */ - block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER, - mtr); - buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER); - - ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO); - - page = buf_block_get_frame(block); - - mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS, - MLOG_2BYTES, mtr); - - /* Reset the doublewrite buffer magic number to zero so that we - know that the doublewrite buffer has not yet been created (this - suppresses a Valgrind warning) */ - - mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE - + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr); - - sys_header = trx_sysf_get(mtr); - - /* Start counting transaction ids from number 1 up */ - mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1); - - /* Reset the rollback segment slots. Old versions of InnoDB - define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect - that the whole array is initialized. */ - ptr = TRX_SYS_RSEGS + sys_header; - len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS) - * TRX_SYS_RSEG_SLOT_SIZE; - memset(ptr, 0xff, len); - ptr += len; - ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END)); - - /* Initialize all of the page. This part used to be uninitialized. */ - memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr); - - mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END - + page - sys_header, mtr); - - /* Create the first rollback segment in the SYSTEM tablespace */ - slot_no = trx_sysf_rseg_find_free(mtr); - page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no, - mtr); - - ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID); - ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO); -} - -/*****************************************************************//** -Compare two trx_rseg_t instances on last_trx_no. */ -static -int -trx_rseg_compare_last_trx_no( -/*=========================*/ - const void* p1, /*!< in: elem to compare */ - const void* p2) /*!< in: elem to compare */ -{ - ib_int64_t cmp; - - const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1; - const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2; - - cmp = rseg_q1->trx_no - rseg_q2->trx_no; - - if (cmp < 0) { - return(-1); - } else if (cmp > 0) { - return(1); - } - - return(0); -} - -/*****************************************************************//** -Creates and initializes the central memory structures for the transaction -system. This is called when the database is started. -@return min binary heap of rsegs to purge */ -UNIV_INTERN -ib_bh_t* -trx_sys_init_at_db_start(void) -/*==========================*/ -{ - mtr_t mtr; - ib_bh_t* ib_bh; - trx_sysf_t* sys_header; - ib_uint64_t rows_to_undo = 0; - const char* unit = ""; - - /* We create the min binary heap here and pass ownership to - purge when we init the purge sub-system. Purge is responsible - for freeing the binary heap. */ - - ib_bh = ib_bh_create( - trx_rseg_compare_last_trx_no, - sizeof(rseg_queue_t), TRX_SYS_N_RSEGS); - - mtr_start(&mtr); - - /* Allocate the trx descriptors array */ - trx_sys->descriptors = static_cast<trx_id_t*>( - ut_malloc(sizeof(trx_id_t) * - TRX_DESCR_ARRAY_INITIAL_SIZE)); - trx_sys->descr_n_max = TRX_DESCR_ARRAY_INITIAL_SIZE; - trx_sys->descr_n_used = 0; - srv_descriptors_memory = TRX_DESCR_ARRAY_INITIAL_SIZE * - sizeof(trx_id_t); - - sys_header = trx_sysf_get(&mtr); - - if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) { - trx_rseg_array_init(sys_header, ib_bh, &mtr); - } - - /* VERY important: after the database is started, max_trx_id value is - divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in - trx_sys_get_new_trx_id will evaluate to TRUE when the function - is first time called, and the value for trx id will be written - to the disk-based header! Thus trx id values will not overlap when - the database is repeatedly started! */ - - trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN - + ut_uint64_align_up(mach_read_from_8(sys_header - + TRX_SYS_TRX_ID_STORE), - TRX_SYS_TRX_ID_WRITE_MARGIN); - - ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id); - - UT_LIST_INIT(trx_sys->mysql_trx_list); - - trx_dummy_sess = sess_open(); - - trx_lists_init_at_db_start(); - - /* This S lock is not strictly required, it is here only to satisfy - the debug code (assertions). We are still running in single threaded - bootstrap mode. */ - - mutex_enter(&trx_sys->mutex); - - ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0); - - if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) { - const trx_t* trx; - - for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - - ut_ad(trx->is_recovered); - assert_trx_in_rw_list(trx); - - if (trx_state_eq(trx, TRX_STATE_ACTIVE)) { - rows_to_undo += trx->undo_no; - } - } - - if (rows_to_undo > 1000000000) { - unit = "M"; - rows_to_undo = rows_to_undo / 1000000; - } - - fprintf(stderr, - "InnoDB: %lu transaction(s) which must be" - " rolled back or cleaned up\n" - "InnoDB: in total %lu%s row operations to undo\n", - (ulong) UT_LIST_GET_LEN(trx_sys->rw_trx_list), - (ulong) rows_to_undo, unit); - - fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n", - trx_sys->max_trx_id); - } - - mutex_exit(&trx_sys->mutex); - - UT_LIST_INIT(trx_sys->view_list); - - mtr_commit(&mtr); - - return(ib_bh); -} - -/*****************************************************************//** -Creates the trx_sys instance and initializes ib_bh and mutex. */ -UNIV_INTERN -void -trx_sys_create(void) -/*================*/ -{ - ut_ad(trx_sys == NULL); - - trx_sys = static_cast<trx_sys_t*>(mem_zalloc(sizeof(*trx_sys))); - - mutex_create(trx_sys_mutex_key, &trx_sys->mutex, SYNC_TRX_SYS); -} - -/*****************************************************************//** -Creates and initializes the transaction system at the database creation. */ -UNIV_INTERN -void -trx_sys_create_sys_pages(void) -/*==========================*/ -{ - mtr_t mtr; - - mtr_start(&mtr); - - trx_sysf_create(&mtr); - - mtr_commit(&mtr); -} - -/*****************************************************************//** -Update the file format tag. -@return always TRUE */ -static -ibool -trx_sys_file_format_max_write( -/*==========================*/ - ulint format_id, /*!< in: file format id */ - const char** name) /*!< out: max file format name, can - be NULL */ -{ - mtr_t mtr; - byte* ptr; - buf_block_t* block; - ib_uint64_t tag_value; - - mtr_start(&mtr); - - block = buf_page_get( - TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr); - - file_format_max.id = format_id; - file_format_max.name = trx_sys_file_format_id_to_name(format_id); - - ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG; - tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N; - - if (name) { - *name = file_format_max.name; - } - - mlog_write_ull(ptr, tag_value, &mtr); - - mtr_commit(&mtr); - - return(TRUE); -} - -/*****************************************************************//** -Read the file format tag. -@return the file format or ULINT_UNDEFINED if not set. */ -static -ulint -trx_sys_file_format_max_read(void) -/*==============================*/ -{ - mtr_t mtr; - const byte* ptr; - const buf_block_t* block; - ib_id_t file_format_id; - - /* Since this is called during the startup phase it's safe to - read the value without a covering mutex. */ - mtr_start(&mtr); - - block = buf_page_get( - TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr); - - ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG; - file_format_id = mach_read_from_8(ptr); - - mtr_commit(&mtr); - - file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N; - - if (file_format_id >= FILE_FORMAT_NAME_N) { - - /* Either it has never been tagged, or garbage in it. */ - return(ULINT_UNDEFINED); - } - - return((ulint) file_format_id); -} - -/*****************************************************************//** -Get the name representation of the file format from its id. -@return pointer to the name */ -UNIV_INTERN -const char* -trx_sys_file_format_id_to_name( -/*===========================*/ - const ulint id) /*!< in: id of the file format */ -{ - ut_a(id < FILE_FORMAT_NAME_N); - - return(file_format_name_map[id]); -} - -/*****************************************************************//** -Check for the max file format tag stored on disk. Note: If max_format_id -is == UNIV_FORMAT_MAX + 1 then we only print a warning. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -trx_sys_file_format_max_check( -/*==========================*/ - ulint max_format_id) /*!< in: max format id to check */ -{ - ulint format_id; - - /* Check the file format in the tablespace. Do not try to - recover if the file format is not supported by the engine - unless forced by the user. */ - format_id = trx_sys_file_format_max_read(); - if (format_id == ULINT_UNDEFINED) { - /* Format ID was not set. Set it to minimum possible - value. */ - format_id = UNIV_FORMAT_MIN; - } - - ib_logf(IB_LOG_LEVEL_INFO, - "Highest supported file format is %s.", - trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX)); - - if (format_id > UNIV_FORMAT_MAX) { - - ut_a(format_id < FILE_FORMAT_NAME_N); - - ib_logf(max_format_id <= UNIV_FORMAT_MAX - ? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN, - "The system tablespace is in a file " - "format that this version doesn't support - %s.", - trx_sys_file_format_id_to_name(format_id)); - - if (max_format_id <= UNIV_FORMAT_MAX) { - return(DB_ERROR); - } - } - - format_id = (format_id > max_format_id) ? format_id : max_format_id; - - /* We don't need a mutex here, as this function should only - be called once at start up. */ - file_format_max.id = format_id; - file_format_max.name = trx_sys_file_format_id_to_name(format_id); - - return(DB_SUCCESS); -} - -/*****************************************************************//** -Set the file format id unconditionally except if it's already the -same value. -@return TRUE if value updated */ -UNIV_INTERN -ibool -trx_sys_file_format_max_set( -/*========================*/ - ulint format_id, /*!< in: file format id */ - const char** name) /*!< out: max file format name or - NULL if not needed. */ -{ - ibool ret = FALSE; - - ut_a(format_id <= UNIV_FORMAT_MAX); - - mutex_enter(&file_format_max.mutex); - - /* Only update if not already same value. */ - if (format_id != file_format_max.id) { - - ret = trx_sys_file_format_max_write(format_id, name); - } - - mutex_exit(&file_format_max.mutex); - - return(ret); -} - -/********************************************************************//** -Tags the system table space with minimum format id if it has not been -tagged yet. -WARNING: This function is only called during the startup and AFTER the -redo log application during recovery has finished. */ -UNIV_INTERN -void -trx_sys_file_format_tag_init(void) -/*==============================*/ -{ - ulint format_id; - - format_id = trx_sys_file_format_max_read(); - - /* If format_id is not set then set it to the minimum. */ - if (format_id == ULINT_UNDEFINED) { - trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL); - } -} - -/********************************************************************//** -Update the file format tag in the system tablespace only if the given -format id is greater than the known max id. -@return TRUE if format_id was bigger than the known max id */ -UNIV_INTERN -ibool -trx_sys_file_format_max_upgrade( -/*============================*/ - const char** name, /*!< out: max file format name */ - ulint format_id) /*!< in: file format identifier */ -{ - ibool ret = FALSE; - - ut_a(name); - ut_a(file_format_max.name != NULL); - ut_a(format_id <= UNIV_FORMAT_MAX); - - mutex_enter(&file_format_max.mutex); - - if (format_id > file_format_max.id) { - - ret = trx_sys_file_format_max_write(format_id, name); - } - - mutex_exit(&file_format_max.mutex); - - return(ret); -} - -/*****************************************************************//** -Get the name representation of the file format from its id. -@return pointer to the max format name */ -UNIV_INTERN -const char* -trx_sys_file_format_max_get(void) -/*=============================*/ -{ - return(file_format_max.name); -} - -/*****************************************************************//** -Initializes the tablespace tag system. */ -UNIV_INTERN -void -trx_sys_file_format_init(void) -/*==========================*/ -{ - mutex_create(file_format_max_mutex_key, - &file_format_max.mutex, SYNC_FILE_FORMAT_TAG); - - /* We don't need a mutex here, as this function should only - be called once at start up. */ - file_format_max.id = UNIV_FORMAT_MIN; - - file_format_max.name = trx_sys_file_format_id_to_name( - file_format_max.id); -} - -/*****************************************************************//** -Closes the tablespace tag system. */ -UNIV_INTERN -void -trx_sys_file_format_close(void) -/*===========================*/ -{ - /* Does nothing at the moment */ -} - -/********************************************************************* -Creates the rollback segments. -@return number of rollback segments that are active. */ -UNIV_INTERN -ulint -trx_sys_create_rsegs( -/*=================*/ - ulint n_spaces, /*!< number of tablespaces for UNDO logs */ - ulint n_rsegs) /*!< number of rollback segments to create */ -{ - mtr_t mtr; - ulint n_used; - - ut_a(n_spaces < TRX_SYS_N_RSEGS); - ut_a(n_rsegs <= TRX_SYS_N_RSEGS); - - if (srv_read_only_mode) { - return(ULINT_UNDEFINED); - } - - /* This is executed in single-threaded mode therefore it is not - necessary to use the same mtr in trx_rseg_create(). n_used cannot - change while the function is executing. */ - - mtr_start(&mtr); - n_used = trx_sysf_rseg_find_free(&mtr); - mtr_commit(&mtr); - - if (n_used == ULINT_UNDEFINED) { - n_used = TRX_SYS_N_RSEGS; - } - - /* Do not create additional rollback segments if innodb_force_recovery - has been set and the database was not shutdown cleanly. */ - - if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) { - ulint i; - ulint new_rsegs = n_rsegs - n_used; - - for (i = 0; i < new_rsegs; ++i) { - ulint space_id; - space_id = (n_spaces == 0) ? 0 - : (srv_undo_space_id_start + i % n_spaces); - - /* Tablespace 0 is the system tablespace. */ - if (trx_rseg_create(space_id) != NULL) { - ++n_used; - } else { - break; - } - } - } - - ib_logf(IB_LOG_LEVEL_INFO, - "%lu rollback segment(s) are active.", n_used); - - return(n_used); -} - -#else /* !UNIV_HOTBACKUP */ -/*****************************************************************//** -Prints to stderr the MySQL binlog info in the system header if the -magic number shows it valid. */ -UNIV_INTERN -void -trx_sys_print_mysql_binlog_offset_from_page( -/*========================================*/ - const byte* page) /*!< in: buffer containing the trx - system header page, i.e., page number - TRX_SYS_PAGE_NO in the tablespace */ -{ - const trx_sysf_t* sys_header; - - sys_header = page + TRX_SYS; - - if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) - == TRX_SYS_MYSQL_LOG_MAGIC_N) { - - fprintf(stderr, - "mysqlbackup: Last MySQL binlog file position %lu %lu," - " file name %s\n", - (ulong) mach_read_from_4( - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH), - (ulong) mach_read_from_4( - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_LOW), - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_NAME); - } -} - -/*****************************************************************//** -Reads the file format id from the first system table space file. -Even if the call succeeds and returns TRUE, the returned format id -may be ULINT_UNDEFINED signalling that the format id was not present -in the data file. -@return TRUE if call succeeds */ -UNIV_INTERN -ibool -trx_sys_read_file_format_id( -/*========================*/ - const char *pathname, /*!< in: pathname of the first system - table space file */ - ulint *format_id) /*!< out: file format of the system table - space */ -{ - os_file_t file; - ibool success; - byte buf[UNIV_PAGE_SIZE * 2]; - page_t* page = ut_align(buf, UNIV_PAGE_SIZE); - const byte* ptr; - ib_id_t file_format_id; - - *format_id = ULINT_UNDEFINED; - - file = os_file_create_simple_no_error_handling( - innodb_file_data_key, - pathname, - OS_FILE_OPEN, - OS_FILE_READ_ONLY, - &success - ); - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(true); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " mysqlbackup: Error: trying to read system " - "tablespace file format,\n" - " mysqlbackup: but could not open the tablespace " - "file %s!\n", pathname); - return(FALSE); - } - - /* Read the page on which file format is stored */ - - success = os_file_read_no_error_handling( - file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, UNIV_PAGE_SIZE); - - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(true); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " mysqlbackup: Error: trying to read system " - "tablespace file format,\n" - " mysqlbackup: but failed to read the tablespace " - "file %s!\n", pathname); - - os_file_close(file); - return(FALSE); - } - os_file_close(file); - - /* get the file format from the page */ - ptr = page + TRX_SYS_FILE_FORMAT_TAG; - file_format_id = mach_read_from_8(ptr); - file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N; - - if (file_format_id >= FILE_FORMAT_NAME_N) { - - /* Either it has never been tagged, or garbage in it. */ - return(TRUE); - } - - *format_id = (ulint) file_format_id; - - return(TRUE); -} - -/*****************************************************************//** -Reads the file format id from the given per-table data file. -@return TRUE if call succeeds */ -UNIV_INTERN -ibool -trx_sys_read_pertable_file_format_id( -/*=================================*/ - const char *pathname, /*!< in: pathname of a per-table - datafile */ - ulint *format_id) /*!< out: file format of the per-table - data file */ -{ - os_file_t file; - ibool success; - byte buf[UNIV_PAGE_SIZE * 2]; - page_t* page = ut_align(buf, UNIV_PAGE_SIZE); - const byte* ptr; - ib_uint32_t flags; - - *format_id = ULINT_UNDEFINED; - - file = os_file_create_simple_no_error_handling( - innodb_file_data_key, - pathname, - OS_FILE_OPEN, - OS_FILE_READ_ONLY, - &success - ); - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(true); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " mysqlbackup: Error: trying to read per-table " - "tablespace format,\n" - " mysqlbackup: but could not open the tablespace " - "file %s!\n", pathname); - - return(FALSE); - } - - /* Read the first page of the per-table datafile */ - - success = os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE); - - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(true); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " mysqlbackup: Error: trying to per-table data file " - "format,\n" - " mysqlbackup: but failed to read the tablespace " - "file %s!\n", pathname); - - os_file_close(file); - return(FALSE); - } - os_file_close(file); - - /* get the file format from the page */ - ptr = page + 54; - flags = mach_read_from_4(ptr); - - if (!fsp_flags_is_valid(flags) { - /* bad tablespace flags */ - return(FALSE); - } - - *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags); - - return(TRUE); -} - - -/*****************************************************************//** -Get the name representation of the file format from its id. -@return pointer to the name */ -UNIV_INTERN -const char* -trx_sys_file_format_id_to_name( -/*===========================*/ - const ulint id) /*!< in: id of the file format */ -{ - if (!(id < FILE_FORMAT_NAME_N)) { - /* unknown id */ - return("Unknown"); - } - - return(file_format_name_map[id]); -} - -#endif /* !UNIV_HOTBACKUP */ - -#ifndef UNIV_HOTBACKUP -/********************************************************************* -Shutdown/Close the transaction system. */ -UNIV_INTERN -void -trx_sys_close(void) -/*===============*/ -{ - ulint i; - trx_t* trx; - read_view_t* view; - - ut_ad(trx_sys != NULL); - ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); - - /* Check that all read views are closed except read view owned - by a purge. */ - - mutex_enter(&trx_sys->mutex); - - if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) { - fprintf(stderr, - "InnoDB: Error: all read views were not closed" - " before shutdown:\n" - "InnoDB: %lu read views open \n", - UT_LIST_GET_LEN(trx_sys->view_list) - 1); - } - - mutex_exit(&trx_sys->mutex); - - sess_close(trx_dummy_sess); - trx_dummy_sess = NULL; - - trx_purge_sys_close(); - - /* Free the double write data structures. */ - if (buf_dblwr) { - buf_dblwr_free(); - } - - - /* Only prepared transactions may be left in the system. Free them. */ - ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx - || srv_read_only_mode - || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO - || (IS_XTRABACKUP() && srv_apply_log_only)); - - - while ((trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) != NULL) { - trx_free_prepared(trx); - } - - /* There can't be any active transactions. */ - for (i = 0; i < TRX_SYS_N_RSEGS; ++i) { - trx_rseg_t* rseg; - - rseg = trx_sys->rseg_array[i]; - - if (rseg != NULL) { - trx_rseg_mem_free(rseg); - } else { - break; - } - } - - view = UT_LIST_GET_FIRST(trx_sys->view_list); - - while (view != NULL) { - read_view_t* prev_view = view; - - view = UT_LIST_GET_NEXT(view_list, prev_view); - - /* Views are allocated from the trx_sys->global_read_view_heap. - So, we simply remove the element here. */ - UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view); - } - - if (!IS_XTRABACKUP() || !srv_apply_log_only) { - ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0); - ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0); - ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0); - ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0); - } - - mutex_free(&trx_sys->mutex); - - ut_ad(trx_sys->descr_n_used == 0); - ut_free(trx_sys->descriptors); - - mem_free(trx_sys); - - trx_sys = NULL; -} - -/** @brief Convert an undo log to TRX_UNDO_PREPARED state on shutdown. - -If any prepared ACTIVE transactions exist, and their rollback was -prevented by innodb_force_recovery, we convert these transactions to -XA PREPARE state in the main-memory data structures, so that shutdown -will proceed normally. These transactions will again recover as ACTIVE -on the next restart, and they will be rolled back unless -innodb_force_recovery prevents it again. - -@param[in] trx transaction -@param[in,out] undo undo log to convert to TRX_UNDO_PREPARED */ -static -void -trx_undo_fake_prepared( - const trx_t* trx, - trx_undo_t* undo) -{ - ut_ad(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); - ut_ad(trx->is_recovered); - - if (undo != NULL) { - ut_ad(undo->state == TRX_UNDO_ACTIVE); - undo->state = TRX_UNDO_PREPARED; - } -} - -/********************************************************************* -Check if there are any active (non-prepared) transactions. -@return total number of active transactions or 0 if none */ -UNIV_INTERN -ulint -trx_sys_any_active_transactions(void) -/*=================================*/ -{ - if (IS_XTRABACKUP() && srv_apply_log_only) { - return(0); - } - mutex_enter(&trx_sys->mutex); - - ulint total_trx = UT_LIST_GET_LEN(trx_sys->mysql_trx_list); - - if (total_trx == 0) { - total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); - ut_a(total_trx >= trx_sys->n_prepared_trx); - - if (total_trx > trx_sys->n_prepared_trx - && srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) { - for (trx_t* trx = UT_LIST_GET_FIRST( - trx_sys->rw_trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - if (!trx_state_eq(trx, TRX_STATE_ACTIVE) - || !trx->is_recovered) { - continue; - } - /* This was a recovered transaction - whose rollback was disabled by - the innodb_force_recovery setting. - Pretend that it is in XA PREPARE - state so that shutdown will work. */ - trx_undo_fake_prepared( - trx, trx->insert_undo); - trx_undo_fake_prepared( - trx, trx->update_undo); - trx->state = TRX_STATE_PREPARED; - trx_sys->n_prepared_trx++; - trx_sys->n_prepared_recovered_trx++; - } - } - - ut_a(total_trx >= trx_sys->n_prepared_trx); - total_trx -= trx_sys->n_prepared_trx; - } - - mutex_exit(&trx_sys->mutex); - - return(total_trx); -} - -#ifdef UNIV_DEBUG -/*************************************************************//** -Validate the trx_list_t. -@return TRUE if valid. */ -static -ibool -trx_sys_validate_trx_list_low( -/*===========================*/ - trx_list_t* trx_list) /*!< in: &trx_sys->ro_trx_list - or &trx_sys->rw_trx_list */ -{ - const trx_t* trx; - const trx_t* prev_trx = NULL; - - ut_ad(mutex_own(&trx_sys->mutex)); - - ut_ad(trx_list == &trx_sys->ro_trx_list - || trx_list == &trx_sys->rw_trx_list); - - for (trx = UT_LIST_GET_FIRST(*trx_list); - trx != NULL; - prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) { - - assert_trx_in_list(trx); - ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list)); - - ut_a(prev_trx == NULL || prev_trx->id > trx->id); - } - - return(TRUE); -} - -/*************************************************************//** -Validate the trx_sys_t::ro_trx_list and trx_sys_t::rw_trx_list. -@return TRUE if lists are valid. */ -UNIV_INTERN -ibool -trx_sys_validate_trx_list(void) -/*===========================*/ -{ - ut_ad(mutex_own(&trx_sys->mutex)); - - ut_a(trx_sys_validate_trx_list_low(&trx_sys->ro_trx_list)); - ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list)); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/trx/trx0trx.cc b/storage/xtradb/trx/trx0trx.cc deleted file mode 100644 index 1d2f7ada54e..00000000000 --- a/storage/xtradb/trx/trx0trx.cc +++ /dev/null @@ -1,2748 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file trx/trx0trx.cc -The transaction - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "btr0types.h" -#include "trx0trx.h" - -#ifdef UNIV_NONINL -#include "trx0trx.ic" -#endif - -#include <mysql/service_wsrep.h> - -#include "trx0undo.h" -#include "trx0rseg.h" -#include "log0log.h" -#include "que0que.h" -#include "lock0lock.h" -#include "trx0roll.h" -#include "usr0sess.h" -#include "read0read.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "btr0sea.h" -#include "os0proc.h" -#include "trx0xa.h" -#include "trx0rec.h" -#include "trx0purge.h" -#include "ha_prototypes.h" -#include "srv0mon.h" -#include "ut0vec.h" - -#include<set> - -extern "C" -int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2); - -/** Set of table_id */ -typedef std::set<table_id_t> table_id_set; - -/** Dummy session used currently in MySQL interface */ -UNIV_INTERN sess_t* trx_dummy_sess = NULL; - -#ifdef UNIV_PFS_MUTEX -/* Key to register the mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t trx_mutex_key; -/* Key to register the mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key; -#endif /* UNIV_PFS_MUTEX */ - -/*************************************************************//** -Set detailed error message for the transaction. */ -UNIV_INTERN -void -trx_set_detailed_error( -/*===================*/ - trx_t* trx, /*!< in: transaction struct */ - const char* msg) /*!< in: detailed error message */ -{ - ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error)); -} - -/*************************************************************//** -Set detailed error message for the transaction from a file. Note that the -file is rewinded before reading from it. */ -UNIV_INTERN -void -trx_set_detailed_error_from_file( -/*=============================*/ - trx_t* trx, /*!< in: transaction struct */ - FILE* file) /*!< in: file to read message from */ -{ - os_file_read_string(file, trx->detailed_error, - sizeof(trx->detailed_error)); -} - -/*************************************************************//** -Callback function for trx_find_descriptor() to compare trx IDs. */ -UNIV_INTERN -int -trx_descr_cmp( -/*==========*/ - const void *a, /*!< in: pointer to first comparison argument */ - const void *b) /*!< in: pointer to second comparison argument */ -{ - const trx_id_t* da = (const trx_id_t*) a; - const trx_id_t* db = (const trx_id_t*) b; - - if (*da < *db) { - return -1; - } else if (*da > *db) { - return 1; - } - - return 0; -} - -/*************************************************************//** -Reserve a slot for a given trx in the global descriptors array. */ -UNIV_INLINE -void -trx_reserve_descriptor( -/*===================*/ - const trx_t* trx) /*!< in: trx pointer */ -{ - ulint n_used; - ulint n_max; - trx_id_t* descr; - - ut_ad(mutex_own(&trx_sys->mutex) || srv_is_being_started); - ut_ad(srv_is_being_started || - !trx_find_descriptor(trx_sys->descriptors, - trx_sys->descr_n_used, - trx->id)); - - n_used = trx_sys->descr_n_used + 1; - n_max = trx_sys->descr_n_max; - - if (UNIV_UNLIKELY(n_used > n_max)) { - - n_max = n_max * 2; - - trx_sys->descriptors = static_cast<trx_id_t*>( - ut_realloc(trx_sys->descriptors, - n_max * sizeof(trx_id_t))); - - trx_sys->descr_n_max = n_max; - srv_descriptors_memory = n_max * sizeof(trx_id_t); - } - - descr = trx_sys->descriptors + n_used - 1; - - if (UNIV_UNLIKELY(n_used > 1 && trx->id < descr[-1])) { - - /* Find the slot where it should be inserted. We could use a - binary search, but in reality linear search should be faster, - because the slot we are looking for is near the array end. */ - - trx_id_t* tdescr; - - for (tdescr = descr - 1; - tdescr >= trx_sys->descriptors && *tdescr > trx->id; - tdescr--) { - } - - tdescr++; - - ut_memmove(tdescr + 1, tdescr, (descr - tdescr) * - sizeof(trx_id_t)); - - descr = tdescr; - } - - *descr = trx->id; - - trx_sys->descr_n_used = n_used; -} - -/*************************************************************//** -Release a slot for a given trx in the global descriptors array. */ -UNIV_INTERN -void -trx_release_descriptor( -/*===================*/ - trx_t* trx) /*!< in: trx pointer */ -{ - ulint size; - trx_id_t* descr; - - ut_ad(mutex_own(&trx_sys->mutex)); - - if (UNIV_LIKELY(trx->in_trx_serial_list)) { - - UT_LIST_REMOVE(trx_serial_list, trx_sys->trx_serial_list, - trx); - trx->in_trx_serial_list = false; - } - - descr = trx_find_descriptor(trx_sys->descriptors, - trx_sys->descr_n_used, - trx->id); - - if (UNIV_UNLIKELY(descr == NULL)) { - - return; - } - - size = (trx_sys->descriptors + trx_sys->descr_n_used - 1 - descr) * - sizeof(trx_id_t); - - if (UNIV_LIKELY(size > 0)) { - - ut_memmove(descr, descr + 1, size); - } - - trx_sys->descr_n_used--; -} - -/****************************************************************//** -Creates and initializes a transaction object. It must be explicitly -started with trx_start_if_not_started() before using it. The default -isolation level is TRX_ISO_REPEATABLE_READ. -@return transaction instance, should never be NULL */ -static -trx_t* -trx_create(void) -/*============*/ -{ - trx_t* trx; - mem_heap_t* heap; - ib_alloc_t* heap_alloc; - - trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx))); - - mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX); - - trx->magic_n = TRX_MAGIC_N; - - trx->active_commit_ordered = 0; - trx->state = TRX_STATE_NOT_STARTED; - - trx->isolation_level = TRX_ISO_REPEATABLE_READ; - - trx->no = TRX_ID_MAX; - trx->in_trx_serial_list = false; - - trx->support_xa = TRUE; - - trx->fake_changes = FALSE; - - trx->check_foreigns = TRUE; - trx->check_unique_secondary = TRUE; - - trx->dict_operation = TRX_DICT_OP_NONE; - - trx->idle_start = 0; - trx->last_stmt_start = 0; - - mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO); - - trx->error_state = DB_SUCCESS; - - trx->lock.que_state = TRX_QUE_RUNNING; - - trx->lock.lock_heap = mem_heap_create_typed( - 256, MEM_HEAP_FOR_LOCK_HEAP); - - trx->search_latch_timeout = BTR_SEA_TIMEOUT; - - trx->io_reads = 0; - trx->io_read = 0; - trx->io_reads_wait_timer = 0; - trx->lock_que_wait_timer = 0; - trx->innodb_que_wait_timer = 0; - trx->distinct_page_access = 0; - trx->distinct_page_access_hash = NULL; - trx->take_stats = FALSE; - - trx->xid.formatID = -1; - - trx->op_info = ""; - - trx->api_trx = false; - - trx->api_auto_commit = false; - - trx->read_write = true; - - heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8); - heap_alloc = ib_heap_allocator_create(heap); - - /* Remember to free the vector explicitly in trx_free(). */ - trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4); - - /* Remember to free the vector explicitly in trx_free(). */ - heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128); - heap_alloc = ib_heap_allocator_create(heap); - - trx->lock.table_locks = ib_vector_create( - heap_alloc, sizeof(void**), 32); -#ifdef WITH_WSREP - trx->wsrep_event = NULL; -#endif /* WITH_WSREP */ - - return(trx); -} - -/********************************************************************//** -Creates a transaction object for background operations by the master thread. -@return own: transaction object */ -UNIV_INTERN -trx_t* -trx_allocate_for_background(void) -/*=============================*/ -{ - trx_t* trx; - - trx = trx_create(); - - trx->sess = trx_dummy_sess; - - return(trx); -} - -/********************************************************************//** -Creates a transaction object for MySQL. -@return own: transaction object */ -UNIV_INTERN -trx_t* -trx_allocate_for_mysql(void) -/*========================*/ -{ - trx_t* trx; - - trx = trx_allocate_for_background(); - - mutex_enter(&trx_sys->mutex); - - ut_d(trx->in_mysql_trx_list = TRUE); - UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx); - - mutex_exit(&trx_sys->mutex); - - if (UNIV_UNLIKELY(trx->take_stats)) { - trx->distinct_page_access_hash - = static_cast<byte *>(mem_alloc(DPAH_SIZE)); - memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); - } - - return(trx); -} - -/********************************************************************//** -Frees a transaction object without releasing the corresponding descriptor. -Should be used by callers that already own trx_sys->mutex. */ -static -void -trx_free_low( -/*=========*/ - trx_t* trx) /*!< in, own: trx object */ -{ - ut_a(trx->magic_n == TRX_MAGIC_N); - ut_ad(!trx->in_ro_trx_list); - ut_ad(!trx->in_rw_trx_list); - ut_ad(!trx->in_mysql_trx_list); - - mutex_free(&trx->undo_mutex); - - if (trx->undo_no_arr != NULL) { - trx_undo_arr_free(trx->undo_no_arr); - } - - ut_a(trx->lock.wait_lock == NULL); - ut_a(trx->lock.wait_thr == NULL); - - ut_a(!trx->has_search_latch); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!btr_search_own_any()); -#endif - - ut_a(trx->dict_operation_lock_mode == 0); - - if (trx->lock.lock_heap) { - mem_heap_free(trx->lock.lock_heap); - } - - ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); - - ut_a(ib_vector_is_empty(trx->autoinc_locks)); - /* We allocated a dedicated heap for the vector. */ - ib_vector_free(trx->autoinc_locks); - - if (trx->lock.table_locks != NULL) { - /* We allocated a dedicated heap for the vector. */ - ib_vector_free(trx->lock.table_locks); - } - - mutex_free(&trx->mutex); - - read_view_free(trx->prebuilt_view); - - mem_free(trx); -} - -/********************************************************************//** -Frees a transaction object. */ -static -void -trx_free( -/*=========*/ - trx_t* trx) /*!< in, own: trx object */ -{ - mutex_enter(&trx_sys->mutex); - trx_release_descriptor(trx); - mutex_exit(&trx_sys->mutex); - - trx_free_low(trx); -} - -/********************************************************************//** -Frees a transaction object of a background operation of the master thread. */ -UNIV_INTERN -void -trx_free_for_background( -/*====================*/ - trx_t* trx) /*!< in, own: trx object */ -{ - - if (trx->distinct_page_access_hash) - { - mem_free(trx->distinct_page_access_hash); - trx->distinct_page_access_hash= NULL; - } - - if (trx->declared_to_be_inside_innodb) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "Freeing a trx (%p, " TRX_ID_FMT ") which is declared " - "to be processing inside InnoDB", trx, trx->id); - - trx_print(stderr, trx, 600); - putc('\n', stderr); - - /* This is an error but not a fatal error. We must keep - the counters like srv_conc_n_threads accurate. */ - srv_conc_force_exit_innodb(trx); - } - - if (trx->n_mysql_tables_in_use != 0 - || trx->mysql_n_tables_locked != 0) { - - ib_logf(IB_LOG_LEVEL_ERROR, - "MySQL is freeing a thd though " - "trx->n_mysql_tables_in_use is %lu and " - "trx->mysql_n_tables_locked is %lu.", - (ulong) trx->n_mysql_tables_in_use, - (ulong) trx->mysql_n_tables_locked); - - trx_print(stderr, trx, 600); - ut_print_buf(stderr, trx, sizeof(trx_t)); - putc('\n', stderr); - } - - ut_a(trx->state == TRX_STATE_NOT_STARTED); - ut_a(trx->insert_undo == NULL); - ut_a(trx->update_undo == NULL); - ut_a(trx->read_view == NULL); - - trx_free(trx); -} - -/********************************************************************//** -At shutdown, frees a transaction object that is in the PREPARED state. */ -UNIV_INTERN -void -trx_free_prepared( -/*==============*/ - trx_t* trx) /*!< in, own: trx object */ -{ - ut_a(trx_state_eq(trx, TRX_STATE_PREPARED) - || (trx_state_eq(trx, TRX_STATE_ACTIVE) - && trx->is_recovered - && (srv_read_only_mode - || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO))); - ut_a(trx->magic_n == TRX_MAGIC_N); - - lock_trx_release_locks(trx); - trx_undo_free_prepared(trx); - - assert_trx_in_rw_list(trx); - - ut_a(!trx->read_only); - - UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx); - ut_d(trx->in_rw_trx_list = FALSE); - - mutex_enter(&trx_sys->mutex); - trx_release_descriptor(trx); - mutex_exit(&trx_sys->mutex); - - /* Undo trx_resurrect_table_locks(). */ - UT_LIST_INIT(trx->lock.trx_locks); - - trx_free_low(trx); - - ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list)); -} - -/********************************************************************//** -Frees a transaction object for MySQL. */ -UNIV_INTERN -void -trx_free_for_mysql( -/*===============*/ - trx_t* trx) /*!< in, own: trx object */ -{ - if (trx->distinct_page_access_hash) - { - mem_free(trx->distinct_page_access_hash); - trx->distinct_page_access_hash= NULL; - } - - mutex_enter(&trx_sys->mutex); - - ut_ad(trx->in_mysql_trx_list); - ut_d(trx->in_mysql_trx_list = FALSE); - UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); - - ut_ad(trx_sys_validate_trx_list()); - - mutex_exit(&trx_sys->mutex); - - trx_free_for_background(trx); -} - -/****************************************************************//** -Inserts the trx handle in the trx system trx list in the right position. -The list is sorted on the trx id so that the biggest id is at the list -start. This function is used at the database startup to insert incomplete -transactions to the list. */ -static -void -trx_list_rw_insert_ordered( -/*=======================*/ - trx_t* trx) /*!< in: trx handle */ -{ - trx_t* trx2; - - ut_ad(!trx->read_only); - - ut_d(trx->start_file = __FILE__); - ut_d(trx->start_line = __LINE__); - - ut_a(srv_is_being_started); - ut_ad(!trx->in_ro_trx_list); - ut_ad(!trx->in_rw_trx_list); - ut_ad(trx->state != TRX_STATE_NOT_STARTED); - ut_ad(trx->is_recovered); - - for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); - trx2 != NULL; - trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) { - - assert_trx_in_rw_list(trx2); - - if (trx->id >= trx2->id) { - - ut_ad(trx->id > trx2->id); - break; - } - } - - if (trx2 != NULL) { - trx2 = UT_LIST_GET_PREV(trx_list, trx2); - - if (trx2 == NULL) { - UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx); - } else { - UT_LIST_INSERT_AFTER( - trx_list, trx_sys->rw_trx_list, trx2, trx); - } - } else { - UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx); - } - -#ifdef UNIV_DEBUG - if (trx->id > trx_sys->rw_max_trx_id) { - trx_sys->rw_max_trx_id = trx->id; - } -#endif /* UNIV_DEBUG */ - - ut_ad(!trx->in_rw_trx_list); - ut_d(trx->in_rw_trx_list = TRUE); -} - -/****************************************************************//** -Resurrect the table locks for a resurrected transaction. */ -static -void -trx_resurrect_table_locks( -/*======================*/ - trx_t* trx, /*!< in/out: transaction */ - const trx_undo_t* undo) /*!< in: undo log */ -{ - mtr_t mtr; - page_t* undo_page; - trx_undo_rec_t* undo_rec; - table_id_set tables; - - ut_ad(undo == trx->insert_undo || undo == trx->update_undo); - - if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) - || undo->empty) { - return; - } - - mtr_start(&mtr); - /* trx_rseg_mem_create() may have acquired an X-latch on this - page, so we cannot acquire an S-latch. */ - undo_page = trx_undo_page_get( - undo->space, undo->zip_size, undo->top_page_no, &mtr); - undo_rec = undo_page + undo->top_offset; - - do { - ulint type; - ulint cmpl_info; - bool updated_extern; - undo_no_t undo_no; - table_id_t table_id; - - page_t* undo_rec_page = page_align(undo_rec); - - if (undo_rec_page != undo_page) { - if (!mtr_memo_release(&mtr, - buf_block_align(undo_page), - MTR_MEMO_PAGE_X_FIX)) { - /* The page of the previous undo_rec - should have been latched by - trx_undo_page_get() or - trx_undo_get_prev_rec(). */ - ut_ad(0); - } - - undo_page = undo_rec_page; - } - - trx_undo_rec_get_pars( - undo_rec, &type, &cmpl_info, - &updated_extern, &undo_no, &table_id); - tables.insert(table_id); - - undo_rec = trx_undo_get_prev_rec( - undo_rec, undo->hdr_page_no, - undo->hdr_offset, false, &mtr); - } while (undo_rec); - - mtr_commit(&mtr); - - for (table_id_set::const_iterator i = tables.begin(); - i != tables.end(); i++) { - if (dict_table_t* table = dict_table_open_on_id( - *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) { - if (table->file_unreadable - || dict_table_is_temporary(table)) { - mutex_enter(&dict_sys->mutex); - dict_table_close(table, TRUE, FALSE); - dict_table_remove_from_cache(table); - mutex_exit(&dict_sys->mutex); - continue; - } - - lock_table_ix_resurrect(table, trx); - - DBUG_PRINT("ib_trx", - ("resurrect" TRX_ID_FMT - " table '%s' IX lock from %s undo", - trx->id, table->name, - undo == trx->insert_undo - ? "insert" : "update")); - - dict_table_close(table, FALSE, FALSE); - } - } -} - -/****************************************************************//** -Resurrect the transactions that were doing inserts the time of the -crash, they need to be undone. -@return trx_t instance */ -static -trx_t* -trx_resurrect_insert( -/*=================*/ - trx_undo_t* undo, /*!< in: entry to UNDO */ - trx_rseg_t* rseg) /*!< in: rollback segment */ -{ - trx_t* trx; - - trx = trx_allocate_for_background(); - - trx->rseg = rseg; - trx->xid = undo->xid; - trx->id = undo->trx_id; - trx->insert_undo = undo; - trx->is_recovered = TRUE; - - /* This is single-threaded startup code, we do not need the - protection of trx->mutex or trx_sys->mutex here. */ - - if (undo->state != TRX_UNDO_ACTIVE) { - - /* Prepared transactions are left in the prepared state - waiting for a commit or abort decision from MySQL */ - - if (undo->state == TRX_UNDO_PREPARED) { - - fprintf(stderr, - "InnoDB: Transaction " TRX_ID_FMT " was in the" - " XA prepared state.\n", trx->id); - - if (srv_force_recovery == 0) { - - /* XtraBackup should rollback prepared XA - transactions */ - if (IS_XTRABACKUP()) { - trx->state = TRX_STATE_ACTIVE; - } - else { - trx->state = TRX_STATE_PREPARED; - trx_sys->n_prepared_trx++; - trx_sys->n_prepared_recovered_trx++; - } - } else { - fprintf(stderr, - "InnoDB: Since innodb_force_recovery" - " > 0, we will rollback it anyway.\n"); - - trx->state = TRX_STATE_ACTIVE; - } - } else { - trx->state = TRX_STATE_COMMITTED_IN_MEMORY; - } - - /* We give a dummy value for the trx no; this should have no - relevance since purge is not interested in committed - transaction numbers, unless they are in the history - list, in which case it looks the number from the disk based - undo log structure */ - - trx->no = trx->id; - } else { - trx->state = TRX_STATE_ACTIVE; - - /* A running transaction always has the number - field inited to TRX_ID_MAX */ - - trx->no = TRX_ID_MAX; - } - - /* trx_start_low() is not called with resurrect, so need to initialize - start time here.*/ - if (trx->state == TRX_STATE_ACTIVE - || trx->state == TRX_STATE_PREPARED) { - trx->start_time = ut_time(); - } - - if (undo->dict_operation) { - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - trx->table_id = undo->table_id; - } - - if (!undo->empty) { - trx->undo_no = undo->top_undo_no + 1; - } - - return(trx); -} - -/****************************************************************//** -Prepared transactions are left in the prepared state waiting for a -commit or abort decision from MySQL */ -static -void -trx_resurrect_update_in_prepared_state( -/*===================================*/ - trx_t* trx, /*!< in,out: transaction */ - const trx_undo_t* undo) /*!< in: update UNDO record */ -{ - /* This is single-threaded startup code, we do not need the - protection of trx->mutex or trx_sys->mutex here. */ - - if (undo->state == TRX_UNDO_PREPARED) { - fprintf(stderr, - "InnoDB: Transaction " TRX_ID_FMT - " was in the XA prepared state.\n", trx->id); - - if (srv_force_recovery == 0) { - if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) { - if (!IS_XTRABACKUP()) { - trx_sys->n_prepared_trx++; - trx_sys->n_prepared_recovered_trx++; - } - } else { - ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED)); - } - /* XtraBackup should rollback prepared XA - transactions */ - trx->state = IS_XTRABACKUP()?TRX_STATE_ACTIVE: TRX_STATE_PREPARED; - } else { - fprintf(stderr, - "InnoDB: Since innodb_force_recovery" - " > 0, we will rollback it anyway.\n"); - - trx->state = TRX_STATE_ACTIVE; - } - } else { - trx->state = TRX_STATE_COMMITTED_IN_MEMORY; - } -} - -/****************************************************************//** -Resurrect the transactions that were doing updates the time of the -crash, they need to be undone. */ -static -void -trx_resurrect_update( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - trx_undo_t* undo, /*!< in/out: update UNDO record */ - trx_rseg_t* rseg) /*!< in/out: rollback segment */ -{ - trx->rseg = rseg; - trx->xid = undo->xid; - trx->id = undo->trx_id; - trx->update_undo = undo; - trx->is_recovered = TRUE; - - /* This is single-threaded startup code, we do not need the - protection of trx->mutex or trx_sys->mutex here. */ - - if (undo->state != TRX_UNDO_ACTIVE) { - trx_resurrect_update_in_prepared_state(trx, undo); - - /* We give a dummy value for the trx number */ - - trx->no = trx->id; - - } else { - trx->state = TRX_STATE_ACTIVE; - - /* A running transaction always has the number field inited to - TRX_ID_MAX */ - - trx->no = TRX_ID_MAX; - } - - /* trx_start_low() is not called with resurrect, so need to initialize - start time here.*/ - if (trx->state == TRX_STATE_ACTIVE - || trx->state == TRX_STATE_PREPARED) { - trx->start_time = ut_time(); - } - - if (undo->dict_operation) { - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - trx->table_id = undo->table_id; - } - - if (!undo->empty && undo->top_undo_no >= trx->undo_no) { - - trx->undo_no = undo->top_undo_no + 1; - } -} - -/****************************************************************//** -Creates trx objects for transactions and initializes the trx list of -trx_sys at database start. Rollback segment and undo log lists must -already exist when this function is called, because the lists of -transactions to be rolled back or cleaned up are built based on the -undo log lists. */ -UNIV_INTERN -void -trx_lists_init_at_db_start(void) -/*============================*/ -{ - ulint i; - - ut_a(srv_is_being_started); - - UT_LIST_INIT(trx_sys->ro_trx_list); - UT_LIST_INIT(trx_sys->rw_trx_list); - UT_LIST_INIT(trx_sys->trx_serial_list); - - /* Look from the rollback segments if there exist undo logs for - transactions */ - - for (i = 0; i < TRX_SYS_N_RSEGS; ++i) { - trx_undo_t* undo; - trx_rseg_t* rseg; - - rseg = trx_sys->rseg_array[i]; - - if (rseg == NULL) { - continue; - } - - /* Resurrect transactions that were doing inserts. */ - for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list); - undo != NULL; - undo = UT_LIST_GET_NEXT(undo_list, undo)) { - trx_t* trx; - - trx = trx_resurrect_insert(undo, rseg); - - if (trx->state == TRX_STATE_ACTIVE || - trx->state == TRX_STATE_PREPARED) { - - trx_reserve_descriptor(trx); - } - trx_list_rw_insert_ordered(trx); - - trx_resurrect_table_locks(trx, undo); - } - - /* Ressurrect transactions that were doing updates. */ - for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list); - undo != NULL; - undo = UT_LIST_GET_NEXT(undo_list, undo)) { - trx_t* trx; - ibool trx_created; - - /* Check the trx_sys->rw_trx_list first. */ - mutex_enter(&trx_sys->mutex); - trx = trx_get_rw_trx_by_id(undo->trx_id); - mutex_exit(&trx_sys->mutex); - - if (trx == NULL) { - trx = trx_allocate_for_background(); - trx_created = TRUE; - } else { - trx_created = FALSE; - } - - trx_resurrect_update(trx, undo, rseg); - - if (trx_created) { - if (trx->state == TRX_STATE_ACTIVE || - trx->state == TRX_STATE_PREPARED) { - - trx_reserve_descriptor(trx); - } - trx_list_rw_insert_ordered(trx); - } - - trx_resurrect_table_locks(trx, undo); - } - } -} - -/******************************************************************//** -Assigns a rollback segment to a transaction in a round-robin fashion. -@return assigned rollback segment instance */ -static -trx_rseg_t* -trx_assign_rseg_low( -/*================*/ - ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */ - ulint n_tablespaces) /*!< in: number of rollback tablespaces */ -{ - ulint i; - trx_rseg_t* rseg; - static ulint latest_rseg = 0; - - if (srv_read_only_mode) { - ut_a(max_undo_logs == ULONG_UNDEFINED); - return(NULL); - } - - /* This breaks true round robin but that should be OK. */ - - ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS); - - i = latest_rseg++; - i %= max_undo_logs; - - /* Note: The assumption here is that there can't be any gaps in - the array. Once we implement more flexible rollback segment - management this may not hold. The assertion checks for that case. */ - - if (trx_sys->rseg_array[0] == NULL) { - return(NULL); - } - - /* Skip the system tablespace if we have more than one tablespace - defined for rollback segments. We want all UNDO records to be in - the non-system tablespaces. */ - - do { - rseg = trx_sys->rseg_array[i]; - ut_a(rseg == NULL || i == rseg->id); - - i = (rseg == NULL) ? 0 : i + 1; - - } while (rseg == NULL - || (rseg->space == 0 - && n_tablespaces > 0 - && trx_sys->rseg_array[1] != NULL)); - - return(rseg); -} - -/****************************************************************//** -Assign a read-only transaction a rollback-segment, if it is attempting -to write to a TEMPORARY table. */ -UNIV_INTERN -void -trx_assign_rseg( -/*============*/ - trx_t* trx) /*!< A read-only transaction that - needs to be assigned a RBS. */ -{ - ut_a(trx->rseg == 0); - ut_a(trx->read_only); - ut_a(!srv_read_only_mode); - ut_a(!trx_is_autocommit_non_locking(trx)); - - trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces); -} - -/****************************************************************//** -Starts a transaction. */ -static -void -trx_start_low( -/*==========*/ - trx_t* trx) /*!< in: transaction */ -{ - ut_ad(trx->rseg == NULL); - - ut_ad(trx->start_file != 0); - ut_ad(trx->start_line != 0); - ut_ad(!trx->is_recovered); - ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)); - ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); - - /* Check whether it is an AUTOCOMMIT SELECT */ - trx->auto_commit = (trx->api_trx && trx->api_auto_commit) - || thd_trx_is_auto_commit(trx->mysql_thd); - - trx->read_only = - (trx->api_trx && !trx->read_write) - || (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd)) - || srv_read_only_mode; - - if (!trx->auto_commit) { - ++trx->will_lock; - } else if (trx->will_lock == 0) { - trx->read_only = TRUE; - } - - if (!trx->read_only) { - trx->rseg = trx_assign_rseg_low( - srv_undo_logs, srv_undo_tablespaces); - } - -#ifdef WITH_WSREP - memset(&trx->xid, 0, sizeof(trx->xid)); - trx->xid.formatID = -1; -#endif /* WITH_WSREP */ - - /* The initial value for trx->no: TRX_ID_MAX is used in - read_view_open_now: */ - - trx->no = TRX_ID_MAX; - - ut_a(ib_vector_is_empty(trx->autoinc_locks)); - ut_a(ib_vector_is_empty(trx->lock.table_locks)); - - mutex_enter(&trx_sys->mutex); - - /* If this transaction came from trx_allocate_for_mysql(), - trx->in_mysql_trx_list would hold. In that case, the trx->state - change must be protected by the trx_sys->mutex, so that - lock_print_info_all_transactions() will have a consistent view. */ - - trx->state = TRX_STATE_ACTIVE; - - trx->id = trx_sys_get_new_trx_id(); - - /* Cache the state of fake_changes that transaction will use for - lifetime. Any change in session/global fake_changes configuration during - lifetime of transaction will not be honored by already started - transaction. */ - trx->fake_changes = thd_fake_changes(trx->mysql_thd); - - ut_ad(!trx->in_rw_trx_list); - ut_ad(!trx->in_ro_trx_list); - - if (trx->read_only) { - - /* Note: The trx_sys_t::ro_trx_list doesn't really need to - be ordered, we should exploit this using a list type that - doesn't need a list wide lock to increase concurrency. */ - - if (!trx_is_autocommit_non_locking(trx)) { - UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx); - ut_d(trx->in_ro_trx_list = TRUE); - } - } else { - - ut_ad(trx->rseg != NULL - || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); - - ut_ad(!trx_is_autocommit_non_locking(trx)); - UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx); - ut_d(trx->in_rw_trx_list = TRUE); - -#ifdef UNIV_DEBUG - if (trx->id > trx_sys->rw_max_trx_id) { - trx_sys->rw_max_trx_id = trx->id; - } -#endif /* UNIV_DEBUG */ - - trx_reserve_descriptor(trx); - } - - ut_ad(trx_sys_validate_trx_list()); - - mutex_exit(&trx_sys->mutex); - - trx->start_time = ut_time(); - - trx->start_time_micro = - trx->mysql_thd ? thd_query_start_micro(trx->mysql_thd) : 0; - - MONITOR_INC(MONITOR_TRX_ACTIVE); -} - -/****************************************************************//** -Set the transaction serialisation number. */ -static -void -trx_serialisation_number_get( -/*=========================*/ - trx_t* trx) /*!< in: transaction */ -{ - trx_rseg_t* rseg; - - rseg = trx->rseg; - - ut_ad(mutex_own(&rseg->mutex)); - - mutex_enter(&trx_sys->mutex); - - trx->no = trx_sys_get_new_trx_id(); - - if (UNIV_LIKELY(!trx->in_trx_serial_list)) { - - UT_LIST_ADD_LAST(trx_serial_list, trx_sys->trx_serial_list, - trx); - - trx->in_trx_serial_list = true; - } - - /* If the rollack segment is not empty then the - new trx_t::no can't be less than any trx_t::no - already in the rollback segment. User threads only - produce events when a rollback segment is empty. */ - - if (rseg->last_page_no == FIL_NULL) { - void* ptr; - rseg_queue_t rseg_queue; - - rseg_queue.rseg = rseg; - rseg_queue.trx_no = trx->no; - - mutex_enter(&purge_sys->bh_mutex); - - /* This is to reduce the pressure on the trx_sys_t::mutex - though in reality it should make very little (read no) - difference because this code path is only taken when the - rbs is empty. */ - - mutex_exit(&trx_sys->mutex); - - ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); - ut_a(ptr); - - mutex_exit(&purge_sys->bh_mutex); - } else { - mutex_exit(&trx_sys->mutex); - } -} - -/****************************************************************//** -Assign the transaction its history serialisation number and write the -update UNDO log record to the assigned rollback segment. */ -static MY_ATTRIBUTE((nonnull)) -void -trx_write_serialisation_history( -/*============================*/ - trx_t* trx, /*!< in/out: transaction */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ -#ifdef WITH_WSREP - trx_sysf_t* sys_header; -#endif /* WITH_WSREP */ - trx_rseg_t* rseg; - - rseg = trx->rseg; - - /* Change the undo log segment states from TRX_UNDO_ACTIVE - to some other state: these modifications to the file data - structure define the transaction as committed in the file - based domain, at the serialization point of the log sequence - number lsn obtained below. */ - - if (trx->update_undo != NULL) { - page_t* undo_hdr_page; - trx_undo_t* undo = trx->update_undo; - - /* We have to hold the rseg mutex because update - log headers have to be put to the history list in the - (serialisation) order of the UNDO trx number. This is - required for the purge in-memory data structures too. */ - - mutex_enter(&rseg->mutex); - - /* Assign the transaction serialisation number and also - update the purge min binary heap if this is the first - UNDO log being written to the assigned rollback segment. */ - - trx_serialisation_number_get(trx); - - /* It is not necessary to obtain trx->undo_mutex here - because only a single OS thread is allowed to do the - transaction commit for this transaction. */ - - undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr); - - trx_undo_update_cleanup(trx, undo_hdr_page, mtr); - } else { - mutex_enter(&rseg->mutex); - } - - if (trx->insert_undo != NULL) { - trx_undo_set_state_at_finish(trx->insert_undo, mtr); - } - - mutex_exit(&rseg->mutex); - - MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); - -#ifdef WITH_WSREP - sys_header = trx_sysf_get(mtr); - /* Update latest MySQL wsrep XID in trx sys header. */ - if (wsrep_is_wsrep_xid(&trx->xid)) - { - trx_sys_update_wsrep_checkpoint(&trx->xid, sys_header, mtr); - } -#endif /* WITH_WSREP */ - - /* Update the latest MySQL binlog name and offset info - in trx sys header if MySQL binlogging is on or the database - server is a MySQL replication slave */ - - if (trx->mysql_log_file_name - && trx->mysql_log_file_name[0] != '\0') { - - trx_sys_update_mysql_binlog_offset( - trx->mysql_log_file_name, - trx->mysql_log_offset, - TRX_SYS_MYSQL_LOG_INFO, -#ifdef WITH_WSREP - sys_header, -#endif /* WITH_WSREP */ - mtr); - - trx->mysql_log_file_name = NULL; - } -} - -/******************************************************************** -Finalize a transaction containing updates for a FTS table. */ -static MY_ATTRIBUTE((nonnull)) -void -trx_finalize_for_fts_table( -/*=======================*/ - fts_trx_table_t* ftt) /* in: FTS trx table */ -{ - fts_t* fts = ftt->table->fts; - fts_doc_ids_t* doc_ids = ftt->added_doc_ids; - - mutex_enter(&fts->bg_threads_mutex); - - if (fts->fts_status & BG_THREAD_STOP) { - /* The table is about to be dropped, no use - adding anything to its work queue. */ - - mutex_exit(&fts->bg_threads_mutex); - } else { - mem_heap_t* heap; - mutex_exit(&fts->bg_threads_mutex); - - ut_a(fts->add_wq); - - heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg); - - ib_wqueue_add(fts->add_wq, doc_ids, heap); - - /* fts_trx_table_t no longer owns the list. */ - ftt->added_doc_ids = NULL; - } -} - -/******************************************************************//** -Finalize a transaction containing updates to FTS tables. */ -static MY_ATTRIBUTE((nonnull)) -void -trx_finalize_for_fts( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - bool is_commit) /*!< in: true if the transaction was - committed, false if it was rolled back. */ -{ - if (is_commit) { - const ib_rbt_node_t* node; - ib_rbt_t* tables; - fts_savepoint_t* savepoint; - - savepoint = static_cast<fts_savepoint_t*>( - ib_vector_last(trx->fts_trx->savepoints)); - - tables = savepoint->tables; - - for (node = rbt_first(tables); - node; - node = rbt_next(tables, node)) { - fts_trx_table_t** ftt; - - ftt = rbt_value(fts_trx_table_t*, node); - - if ((*ftt)->added_doc_ids) { - trx_finalize_for_fts_table(*ftt); - } - } - } - - fts_trx_free(trx->fts_trx); - trx->fts_trx = NULL; -} - -/**********************************************************************//** -If required, flushes the log to disk based on the value of -innodb_flush_log_at_trx_commit. */ -static -void -trx_flush_log_if_needed_low( -/*========================*/ - lsn_t lsn, /*!< in: lsn up to which logs are to be - flushed. */ - trx_t* trx) /*!< in: transaction */ -{ - ulint flush_log_at_trx_commit; - - flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit - ? thd_flush_log_at_trx_commit(NULL) - : thd_flush_log_at_trx_commit(trx->mysql_thd); - - switch (flush_log_at_trx_commit) { - case 0: - /* Do nothing */ - break; - case 1: - case 3: - /* Write the log and optionally flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, - srv_unix_file_flush_method != SRV_UNIX_NOSYNC); - break; - case 2: - /* Write the log but do not flush it to disk */ - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); - - break; - default: - ut_error; - } -} - -/**********************************************************************//** -If required, flushes the log to disk based on the value of -innodb_flush_log_at_trx_commit. */ -static MY_ATTRIBUTE((nonnull)) -void -trx_flush_log_if_needed( -/*====================*/ - lsn_t lsn, /*!< in: lsn up to which logs are to be - flushed. */ - trx_t* trx) /*!< in/out: transaction */ -{ - trx->op_info = "flushing log"; - trx_flush_log_if_needed_low(lsn, trx); - trx->op_info = ""; -} - -/****************************************************************//** -Commits a transaction in memory. */ -static MY_ATTRIBUTE((nonnull)) -void -trx_commit_in_memory( -/*=================*/ - trx_t* trx, /*!< in/out: transaction */ - lsn_t lsn) /*!< in: log sequence number of the mini-transaction - commit of trx_write_serialisation_history(), or 0 - if the transaction did not modify anything */ -{ - trx->must_flush_log_later = FALSE; - - if (trx_is_autocommit_non_locking(trx)) { - ut_ad(trx->read_only); - ut_a(!trx->is_recovered); - ut_ad(trx->rseg == NULL); - ut_ad(!trx->in_ro_trx_list); - ut_ad(!trx->in_rw_trx_list); - - /* Note: We are asserting without holding the lock mutex. But - that is OK because this transaction is not waiting and cannot - be rolled back and no new locks can (or should not) be added - becuase it is flagged as a non-locking read-only transaction. */ - - ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); - - /* This state change is not protected by any mutex, therefore - there is an inherent race here around state transition during - printouts. We ignore this race for the sake of efficiency. - However, the trx_sys_t::mutex will protect the trx_t instance - and it cannot be removed from the mysql_trx_list and freed - without first acquiring the trx_sys_t::mutex. */ - - ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)); - - trx->state = TRX_STATE_NOT_STARTED; - - read_view_remove(trx->global_read_view, false); - - MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT); - } else { - lock_trx_release_locks(trx); - - /* Remove the transaction from the list of active - transactions now that it no longer holds any user locks. */ - - ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); - - mutex_enter(&trx_sys->mutex); - - assert_trx_in_list(trx); - - if (trx->read_only) { - UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx); - ut_d(trx->in_ro_trx_list = FALSE); - MONITOR_INC(MONITOR_TRX_RO_COMMIT); - } else { - UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx); - ut_d(trx->in_rw_trx_list = FALSE); - ut_ad(trx_sys->descr_n_used <= - UT_LIST_GET_LEN(trx_sys->rw_trx_list)); - MONITOR_INC(MONITOR_TRX_RW_COMMIT); - } - - /* If this transaction came from trx_allocate_for_mysql(), - trx->in_mysql_trx_list would hold. In that case, the - trx->state change must be protected by trx_sys->mutex, so that - lock_print_info_all_transactions() will have a consistent - view. */ - - trx->state = TRX_STATE_NOT_STARTED; - - /* We already own the trx_sys_t::mutex, by doing it here we - avoid a potential context switch later. */ - read_view_remove(trx->global_read_view, true); - - ut_ad(trx_sys_validate_trx_list()); - - mutex_exit(&trx_sys->mutex); - } - - if (trx->global_read_view != NULL) { - - trx->global_read_view = NULL; - } - - trx->read_view = NULL; - - if (lsn) { - ulint flush_log_at_trx_commit; - - if (trx->insert_undo != NULL) { - - trx_undo_insert_cleanup(trx); - } - - if (srv_use_global_flush_log_at_trx_commit) { - flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL); - } else { - flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd); - } - - /* NOTE that we could possibly make a group commit more - efficient here: call os_thread_yield here to allow also other - trxs to come to commit! */ - - /*-------------------------------------*/ - - /* Depending on the my.cnf options, we may now write the log - buffer to the log files, making the transaction durable if - the OS does not crash. We may also flush the log files to - disk, making the transaction durable also at an OS crash or a - power outage. - - The idea in InnoDB's group commit is that a group of - transactions gather behind a trx doing a physical disk write - to log files, and when that physical write has been completed, - one of those transactions does a write which commits the whole - group. Note that this group commit will only bring benefit if - there are > 2 users in the database. Then at least 2 users can - gather behind one doing the physical log write to disk. - - If we are calling trx_commit() under prepare_commit_mutex, we - will delay possible log write and flush to a separate function - trx_commit_complete_for_mysql(), which is only called when the - thread has released the mutex. This is to make the - group commit algorithm to work. Otherwise, the prepare_commit - mutex would serialize all commits and prevent a group of - transactions from gathering. */ - - if (trx->flush_log_later) { - /* Do nothing yet */ - trx->must_flush_log_later = TRUE; - } else if (flush_log_at_trx_commit == 0 - || thd_requested_durability(trx->mysql_thd) - == HA_IGNORE_DURABILITY) { - /* Do nothing */ - } else { - trx_flush_log_if_needed(lsn, trx); - } - - trx->commit_lsn = lsn; - - /* Tell server some activity has happened, since the trx - does changes something. Background utility threads like - master thread, purge thread or page_cleaner thread might - have some work to do. */ - srv_active_wake_master_thread(); - } - - /* undo_no is non-zero if we're doing the final commit. */ - bool not_rollback = trx->undo_no != 0; - /* Free all savepoints, starting from the first. */ - trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints); - trx_roll_savepoints_free(trx, savep); - - trx->rseg = NULL; - trx->undo_no = 0; - trx->last_sql_stat_start.least_undo_no = 0; - - trx->ddl = false; -#ifdef UNIV_DEBUG - ut_ad(trx->start_file != 0); - ut_ad(trx->start_line != 0); - trx->start_file = 0; - trx->start_line = 0; -#endif /* UNIV_DEBUG */ - - trx->will_lock = 0; - trx->read_only = FALSE; - trx->auto_commit = FALSE; - - if (trx->fts_trx) { - trx_finalize_for_fts(trx, not_rollback); - } - - ut_ad(trx->lock.wait_thr == NULL); - ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); - ut_ad(!trx->in_ro_trx_list); - ut_ad(!trx->in_rw_trx_list); - -#ifdef WITH_WSREP - if (trx->mysql_thd && wsrep_on(trx->mysql_thd)) { - trx->lock.was_chosen_as_deadlock_victim = FALSE; - } -#endif - trx->dict_operation = TRX_DICT_OP_NONE; - - trx->error_state = DB_SUCCESS; - - /* trx->in_mysql_trx_list would hold between - trx_allocate_for_mysql() and trx_free_for_mysql(). It does not - hold for recovered transactions or system transactions. */ -} - -/****************************************************************//** -Commits a transaction and a mini-transaction. */ -UNIV_INTERN -void -trx_commit_low( -/*===========*/ - trx_t* trx, /*!< in/out: transaction */ - mtr_t* mtr) /*!< in/out: mini-transaction (will be committed), - or NULL if trx made no modifications */ -{ - lsn_t lsn; - - assert_trx_nonlocking_or_in_list(trx); - ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); - ut_ad(!mtr || mtr->state == MTR_ACTIVE); - ut_ad(!mtr == !(trx->insert_undo || trx->update_undo)); - - /* undo_no is non-zero if we're doing the final commit. */ - if (trx->fts_trx && trx->undo_no != 0) { - dberr_t error; - - ut_a(!trx_is_autocommit_non_locking(trx)); - - error = fts_commit(trx); - - /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY - instead of dying. This is a possible scenario if there - is a crash between insert to DELETED table committing - and transaction committing. The fix would be able to - return error from this function */ - if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) { - /* FTS-FIXME: once we can return values from this - function, we should do so and signal an error - instead of just dying. */ - - ut_error; - } - } - - if (mtr) { - trx_write_serialisation_history(trx, mtr); - /* The following call commits the mini-transaction, making the - whole transaction committed in the file-based world, at this - log sequence number. The transaction becomes 'durable' when - we write the log to disk, but in the logical sense the commit - in the file-based data structures (undo logs etc.) happens - here. - - NOTE that transaction numbers, which are assigned only to - transactions with an update undo log, do not necessarily come - in exactly the same order as commit lsn's, if the transactions - have different rollback segments. To get exactly the same - order we should hold the kernel mutex up to this point, - adding to the contention of the kernel mutex. However, if - a transaction T2 is able to see modifications made by - a transaction T1, T2 will always get a bigger transaction - number and a bigger commit lsn than T1. */ - - /*--------------*/ - mtr_commit(mtr); - /*--------------*/ - lsn = mtr->end_lsn; - } else { - lsn = 0; - } - - trx_commit_in_memory(trx, lsn); -} - -/****************************************************************//** -Commits a transaction. */ -UNIV_INTERN -void -trx_commit( -/*=======*/ - trx_t* trx) /*!< in/out: transaction */ -{ - mtr_t local_mtr; - mtr_t* mtr; - - if (trx->insert_undo || trx->update_undo) { - mtr = &local_mtr; - mtr_start(mtr); - } else { - mtr = NULL; - } - - trx_commit_low(trx, mtr); -} - -/****************************************************************//** -Cleans up a transaction at database startup. The cleanup is needed if -the transaction already got to the middle of a commit when the database -crashed, and we cannot roll it back. */ -UNIV_INTERN -void -trx_cleanup_at_db_startup( -/*======================*/ - trx_t* trx) /*!< in: transaction */ -{ - ut_ad(trx->is_recovered); - - if (trx->insert_undo != NULL) { - - trx_undo_insert_cleanup(trx); - } - - trx->rseg = NULL; - trx->undo_no = 0; - trx->last_sql_stat_start.least_undo_no = 0; - - mutex_enter(&trx_sys->mutex); - - ut_a(!trx->read_only); - - UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx); - ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list)); - - assert_trx_in_rw_list(trx); - ut_d(trx->in_rw_trx_list = FALSE); - - trx->state = TRX_STATE_NOT_STARTED; - trx_release_descriptor(trx); - - mutex_exit(&trx_sys->mutex); - - /* Change the transaction state without mutex protection, now - that it no longer is in the trx_list. Recovered transactions - are never placed in the mysql_trx_list. */ - ut_ad(trx->is_recovered); - ut_ad(!trx->in_ro_trx_list); - ut_ad(!trx->in_rw_trx_list); - ut_ad(!trx->in_mysql_trx_list); -} - -/********************************************************************//** -Assigns a read view for a consistent read query. All the consistent reads -within the same transaction will get the same read view, which is created -when this function is first called for a new started transaction. -@return consistent read view */ -UNIV_INTERN -read_view_t* -trx_assign_read_view( -/*=================*/ - trx_t* trx) /*!< in: active transaction */ -{ - ut_ad(trx->state == TRX_STATE_ACTIVE); - - if (trx->read_view != NULL) { - return(trx->read_view); - } - - trx->read_view = read_view_open_now(trx->id, trx->prebuilt_view); - trx->global_read_view = trx->read_view; - - return(trx->read_view); -} - -/********************************************************************//** -Clones the read view from another transaction. All consistent reads within -the receiver transaction will get the same read view as the donor transaction -@return read view clone */ -UNIV_INTERN -read_view_t* -trx_clone_read_view( -/*================*/ - trx_t* trx, /*!< in: receiver transaction */ - trx_t* from_trx) /*!< in: donor transaction */ -{ - ut_ad(lock_mutex_own()); - ut_ad(mutex_own(&trx_sys->mutex)); - ut_ad(trx_mutex_own(from_trx)); - ut_ad(trx->read_view == NULL); - - if (from_trx->state != TRX_STATE_ACTIVE || - from_trx->read_view == NULL) { - - return(NULL); - } - - trx->read_view = read_view_clone(from_trx->read_view, - trx->prebuilt_view); - - read_view_add(trx->read_view); - - trx->global_read_view = trx->read_view; - - return(trx->read_view); -} - -/****************************************************************//** -Prepares a transaction for commit/rollback. */ -UNIV_INTERN -void -trx_commit_or_rollback_prepare( -/*===========================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - /* We are reading trx->state without holding trx_sys->mutex - here, because the commit or rollback should be invoked for a - running (or recovered prepared) transaction that is associated - with the current thread. */ - - switch (trx->state) { - case TRX_STATE_NOT_STARTED: -#ifdef WITH_WSREP - ut_d(trx->start_file = __FILE__); - ut_d(trx->start_line = __LINE__); -#endif /* WITH_WSREP */ - trx_start_low(trx); - /* fall through */ - case TRX_STATE_ACTIVE: - case TRX_STATE_PREPARED: - /* If the trx is in a lock wait state, moves the waiting - query thread to the suspended state */ - - if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - - ulint sec; - ulint ms; - ib_uint64_t now; - - ut_a(trx->lock.wait_thr != NULL); - trx->lock.wait_thr->state = QUE_THR_SUSPENDED; - trx->lock.wait_thr = NULL; - - if (UNIV_UNLIKELY(trx->take_stats)) { - ut_usectime(&sec, &ms); - now = (ib_uint64_t)sec * 1000000 + ms; - trx->lock_que_wait_timer - += (ulint) - (now - trx->lock_que_wait_ustarted); - } - - trx->lock.que_state = TRX_QUE_RUNNING; - } - - ut_a(trx->lock.n_active_thrs == 1); - return; - case TRX_STATE_COMMITTED_IN_MEMORY: - break; - } - - ut_error; -} - -/*********************************************************************//** -Creates a commit command node struct. -@return own: commit node struct */ -UNIV_INTERN -commit_node_t* -trx_commit_node_create( -/*===================*/ - mem_heap_t* heap) /*!< in: mem heap where created */ -{ - commit_node_t* node; - - node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node))); - node->common.type = QUE_NODE_COMMIT; - node->state = COMMIT_NODE_SEND; - - return(node); -} - -/***********************************************************//** -Performs an execution step for a commit type node in a query graph. -@return query thread to run next, or NULL */ -UNIV_INTERN -que_thr_t* -trx_commit_step( -/*============*/ - que_thr_t* thr) /*!< in: query thread */ -{ - commit_node_t* node; - - node = static_cast<commit_node_t*>(thr->run_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT); - - if (thr->prev_node == que_node_get_parent(node)) { - node->state = COMMIT_NODE_SEND; - } - - if (node->state == COMMIT_NODE_SEND) { - trx_t* trx; - - node->state = COMMIT_NODE_WAIT; - - trx = thr_get_trx(thr); - - ut_a(trx->lock.wait_thr == NULL); - ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT); - - trx_commit_or_rollback_prepare(trx); - - trx->lock.que_state = TRX_QUE_COMMITTING; - - trx_commit(trx); - - ut_ad(trx->lock.wait_thr == NULL); - - trx->lock.que_state = TRX_QUE_RUNNING; - - thr = NULL; - } else { - ut_ad(node->state == COMMIT_NODE_WAIT); - - node->state = COMMIT_NODE_SEND; - - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} - -/**********************************************************************//** -Does the transaction commit for MySQL. -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -trx_commit_for_mysql( -/*=================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - /* Because we do not do the commit by sending an Innobase - sig to the transaction, we must here make sure that trx has been - started. */ - - ut_a(trx); - - switch (trx->state) { - case TRX_STATE_NOT_STARTED: - /* Update the info whether we should skip XA steps that eat - CPU time. - - For the duration of the transaction trx->support_xa is - not reread from thd so any changes in the value take - effect in the next transaction. This is to avoid a - scenario where some undo log records generated by a - transaction contain XA information and other undo log - records, generated by the same transaction do not. */ - trx->support_xa = thd_supports_xa(trx->mysql_thd); - - ut_d(trx->start_file = __FILE__); - ut_d(trx->start_line = __LINE__); - - trx_start_low(trx); - /* fall through */ - case TRX_STATE_ACTIVE: - case TRX_STATE_PREPARED: - trx->op_info = "committing"; - trx_commit(trx); - MONITOR_DEC(MONITOR_TRX_ACTIVE); - trx->op_info = ""; - return(DB_SUCCESS); - case TRX_STATE_COMMITTED_IN_MEMORY: - break; - } - ut_error; - return(DB_CORRUPTION); -} - -/**********************************************************************//** -If required, flushes the log to disk if we called trx_commit_for_mysql() -with trx->flush_log_later == TRUE. */ -UNIV_INTERN -void -trx_commit_complete_for_mysql( -/*==========================*/ - trx_t* trx) /*!< in/out: transaction */ -{ - ut_a(trx); - - if (!trx->must_flush_log_later - || thd_requested_durability(trx->mysql_thd) - == HA_IGNORE_DURABILITY) { - return; - } - - ulint flush_log_at_trx_commit; - - flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit - ? thd_flush_log_at_trx_commit(NULL) - : thd_flush_log_at_trx_commit(trx->mysql_thd); - - if (flush_log_at_trx_commit == 1 && trx->active_commit_ordered) { - return; - } - - trx_flush_log_if_needed(trx->commit_lsn, trx); - - trx->must_flush_log_later = FALSE; -} - -/**********************************************************************//** -Marks the latest SQL statement ended. */ -UNIV_INTERN -void -trx_mark_sql_stat_end( -/*==================*/ - trx_t* trx) /*!< in: trx handle */ -{ - ut_a(trx); - - switch (trx->state) { - case TRX_STATE_PREPARED: - case TRX_STATE_COMMITTED_IN_MEMORY: - break; - case TRX_STATE_NOT_STARTED: - trx->undo_no = 0; - /* fall through */ - case TRX_STATE_ACTIVE: - trx->last_sql_stat_start.least_undo_no = trx->undo_no; - - if (trx->fts_trx) { - fts_savepoint_laststmt_refresh(trx); - } - - return; - } - - ut_error; -} - -/**********************************************************************//** -Prints info about a transaction. -Caller must hold trx_sys->mutex. */ -UNIV_INTERN -void -trx_print_low( -/*==========*/ - FILE* f, - /*!< in: output stream */ - const trx_t* trx, - /*!< in: transaction */ - ulint max_query_len, - /*!< in: max query length to print, - or 0 to use the default max length */ - ulint n_rec_locks, - /*!< in: lock_number_of_rows_locked(&trx->lock) */ - ulint n_trx_locks, - /*!< in: length of trx->lock.trx_locks */ - ulint heap_size) - /*!< in: mem_heap_get_size(trx->lock.lock_heap) */ -{ - ibool newline; - const char* op_info; - - ut_ad(mutex_own(&trx_sys->mutex)); - - fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id); - - /* trx->state cannot change from or to NOT_STARTED while we - are holding the trx_sys->mutex. It may change from ACTIVE to - PREPARED or COMMITTED. */ - switch (trx->state) { - case TRX_STATE_NOT_STARTED: - fputs(", not started", f); - goto state_ok; - case TRX_STATE_ACTIVE: - fprintf(f, ", ACTIVE %lu sec", - (ulong) difftime(time(NULL), trx->start_time)); - goto state_ok; - case TRX_STATE_PREPARED: - fprintf(f, ", ACTIVE (PREPARED) %lu sec", - (ulong) difftime(time(NULL), trx->start_time)); - goto state_ok; - case TRX_STATE_COMMITTED_IN_MEMORY: - fputs(", COMMITTED IN MEMORY", f); - goto state_ok; - } - fprintf(f, ", state %lu", (ulong) trx->state); - ut_ad(0); -state_ok: - - /* prevent a race condition */ - op_info = trx->op_info; - - if (*op_info) { - putc(' ', f); - fputs(op_info, f); - } - - if (trx->is_recovered) { - fputs(" recovered trx", f); - } - - if (trx->declared_to_be_inside_innodb) { - fprintf(f, ", thread declared inside InnoDB %lu", - (ulong) trx->n_tickets_to_enter_innodb); - } - - putc('\n', f); - - if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) { - fprintf(f, "mysql tables in use %lu, locked %lu\n", - (ulong) trx->n_mysql_tables_in_use, - (ulong) trx->mysql_n_tables_locked); - } - - newline = TRUE; - - /* trx->lock.que_state of an ACTIVE transaction may change - while we are not holding trx->mutex. We perform a dirty read - for performance reasons. */ - - switch (trx->lock.que_state) { - case TRX_QUE_RUNNING: - newline = FALSE; break; - case TRX_QUE_LOCK_WAIT: - fputs("LOCK WAIT ", f); break; - case TRX_QUE_ROLLING_BACK: - fputs("ROLLING BACK ", f); break; - case TRX_QUE_COMMITTING: - fputs("COMMITTING ", f); break; - default: - fprintf(f, "que state %lu ", (ulong) trx->lock.que_state); - } - - if (n_trx_locks > 0 || heap_size > 400) { - newline = TRUE; - - fprintf(f, "%lu lock struct(s), heap size %lu," - " %lu row lock(s)", - (ulong) n_trx_locks, - (ulong) heap_size, - (ulong) n_rec_locks); - } - - if (trx->has_search_latch) { - newline = TRUE; - fputs(", holds adaptive hash latch", f); - } - - if (trx->undo_no != 0) { - newline = TRUE; - fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no); - } - - if (newline) { - putc('\n', f); - } - - if (trx->mysql_thd != NULL) { - innobase_mysql_print_thd( - f, trx->mysql_thd, static_cast<uint>(max_query_len)); - } -} - -/**********************************************************************//** -Prints info about a transaction. -The caller must hold lock_sys->mutex and trx_sys->mutex. -When possible, use trx_print() instead. */ -UNIV_INTERN -void -trx_print_latched( -/*==============*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - ulint max_query_len) /*!< in: max query length to print, - or 0 to use the default max length */ -{ - ut_ad(lock_mutex_own()); - ut_ad(mutex_own(&trx_sys->mutex)); - - trx_print_low(f, trx, max_query_len, - lock_number_of_rows_locked(&trx->lock), - UT_LIST_GET_LEN(trx->lock.trx_locks), - mem_heap_get_size(trx->lock.lock_heap)); -} - -#ifdef WITH_WSREP -/**********************************************************************//** -Prints info about a transaction. -Transaction information may be retrieved without having trx_sys->mutex acquired -so it may not be completely accurate. The caller must own lock_sys->mutex -and the trx must have some locks to make sure that it does not escape -without locking lock_sys->mutex. */ -UNIV_INTERN -void -wsrep_trx_print_locking( -/*==========*/ - FILE* f, - /*!< in: output stream */ - const trx_t* trx, - /*!< in: transaction */ - ulint max_query_len) - /*!< in: max query length to print, - or 0 to use the default max length */ -{ - ibool newline; - const char* op_info; - - ut_ad(lock_mutex_own()); - ut_ad(trx->lock.trx_locks.count > 0); - - fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id); - - /* trx->state may change since trx_sys->mutex is not required */ - switch (trx->state) { - case TRX_STATE_NOT_STARTED: - fputs(", not started", f); - goto state_ok; - case TRX_STATE_ACTIVE: - fprintf(f, ", ACTIVE %lu sec", - (ulong) difftime(time(NULL), trx->start_time)); - goto state_ok; - case TRX_STATE_PREPARED: - fprintf(f, ", ACTIVE (PREPARED) %lu sec", - (ulong) difftime(time(NULL), trx->start_time)); - goto state_ok; - case TRX_STATE_COMMITTED_IN_MEMORY: - fputs(", COMMITTED IN MEMORY", f); - goto state_ok; - } - fprintf(f, ", state %lu", (ulong) trx->state); - ut_ad(0); -state_ok: - - /* prevent a race condition */ - op_info = trx->op_info; - - if (*op_info) { - putc(' ', f); - fputs(op_info, f); - } - - if (trx->is_recovered) { - fputs(" recovered trx", f); - } - - if (trx->declared_to_be_inside_innodb) { - fprintf(f, ", thread declared inside InnoDB %lu", - (ulong) trx->n_tickets_to_enter_innodb); - } - - putc('\n', f); - - if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) { - fprintf(f, "mysql tables in use %lu, locked %lu\n", - (ulong) trx->n_mysql_tables_in_use, - (ulong) trx->mysql_n_tables_locked); - } - - newline = TRUE; - - /* trx->lock.que_state of an ACTIVE transaction may change - while we are not holding trx->mutex. We perform a dirty read - for performance reasons. */ - - switch (trx->lock.que_state) { - case TRX_QUE_RUNNING: - newline = FALSE; break; - case TRX_QUE_LOCK_WAIT: - fputs("LOCK WAIT ", f); break; - case TRX_QUE_ROLLING_BACK: - fputs("ROLLING BACK ", f); break; - case TRX_QUE_COMMITTING: - fputs("COMMITTING ", f); break; - default: - fprintf(f, "que state %lu ", (ulong) trx->lock.que_state); - } - - if (trx->has_search_latch) { - newline = TRUE; - fputs(", holds adaptive hash latch", f); - } - - if (trx->undo_no != 0) { - newline = TRUE; - fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no); - } - - if (newline) { - putc('\n', f); - } - - if (trx->mysql_thd != NULL) { - innobase_mysql_print_thd( - f, trx->mysql_thd, static_cast<uint>(max_query_len)); - } -} -#endif /* WITH_WSREP */ - -/**********************************************************************//** -Prints info about a transaction. -Acquires and releases lock_sys->mutex and trx_sys->mutex. */ -UNIV_INTERN -void -trx_print( -/*======*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - ulint max_query_len) /*!< in: max query length to print, - or 0 to use the default max length */ -{ - ulint n_rec_locks; - ulint n_trx_locks; - ulint heap_size; - - lock_mutex_enter(); - n_rec_locks = lock_number_of_rows_locked(&trx->lock); - n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks); - heap_size = mem_heap_get_size(trx->lock.lock_heap); - lock_mutex_exit(); - - mutex_enter(&trx_sys->mutex); - trx_print_low(f, trx, max_query_len, - n_rec_locks, n_trx_locks, heap_size); - mutex_exit(&trx_sys->mutex); -} - -#ifdef UNIV_DEBUG -/**********************************************************************//** -Asserts that a transaction has been started. -The caller must hold trx_sys->mutex. -@return TRUE if started */ -UNIV_INTERN -ibool -trx_assert_started( -/*===============*/ - const trx_t* trx) /*!< in: transaction */ -{ - ut_ad(mutex_own(&trx_sys->mutex)); - - /* Non-locking autocommits should not hold any locks and this - function is only called from the locking code. */ - assert_trx_in_list(trx); - - /* trx->state can change from or to NOT_STARTED while we are holding - trx_sys->mutex for non-locking autocommit selects but not for other - types of transactions. It may change from ACTIVE to PREPARED. Unless - we are holding lock_sys->mutex, it may also change to COMMITTED. */ - - switch (trx->state) { - case TRX_STATE_PREPARED: - return(TRUE); - - case TRX_STATE_ACTIVE: - case TRX_STATE_COMMITTED_IN_MEMORY: - return(TRUE); - - case TRX_STATE_NOT_STARTED: - break; - } - - ut_error; - return(FALSE); -} -#endif /* UNIV_DEBUG */ - -/*******************************************************************//** -Compares the "weight" (or size) of two transactions. The heavier the weight, -the more reluctant we will be to choose the transaction as a deadlock victim. -@return TRUE if weight(a) >= weight(b) */ -UNIV_INTERN -ibool -trx_weight_ge( -/*==========*/ - const trx_t* a, /*!< in: the first transaction to be compared */ - const trx_t* b) /*!< in: the second transaction to be compared */ -{ - int pref; - - /* First ask the upper server layer if it has any preference for which - to prefer as a deadlock victim. */ - pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd); - if (pref < 0) { - return FALSE; - } else if (pref > 0) { - return TRUE; - } - - /* Upper server layer had no preference, we fall back to comparing the - number of altered/locked rows. */ - -#if 0 - fprintf(stderr, - "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n", - __func__, - a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks), - b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks)); -#endif - - return(TRX_WEIGHT(a) >= TRX_WEIGHT(b)); -} - -/****************************************************************//** -Prepares a transaction. */ -static -void -trx_prepare( -/*========*/ - trx_t* trx) /*!< in/out: transaction */ -{ - trx_rseg_t* rseg; - lsn_t lsn; - mtr_t mtr; - - rseg = trx->rseg; - /* Only fresh user transactions can be prepared. - Recovered transactions cannot. */ - ut_a(!trx->is_recovered); - - if (trx->insert_undo != NULL || trx->update_undo != NULL) { - - mtr_start(&mtr); - - /* Change the undo log segment states from TRX_UNDO_ACTIVE - to TRX_UNDO_PREPARED: these modifications to the file data - structure define the transaction as prepared in the - file-based world, at the serialization point of lsn. */ - - mutex_enter(&rseg->mutex); - - if (trx->insert_undo != NULL) { - - /* It is not necessary to obtain trx->undo_mutex here - because only a single OS thread is allowed to do the - transaction prepare for this transaction. */ - - trx_undo_set_state_at_prepare(trx, trx->insert_undo, - &mtr); - } - - if (trx->update_undo) { - trx_undo_set_state_at_prepare( - trx, trx->update_undo, &mtr); - } - - mutex_exit(&rseg->mutex); - - /*--------------*/ - mtr_commit(&mtr); /* This mtr commit makes the - transaction prepared in the file-based - world */ - /*--------------*/ - lsn = mtr.end_lsn; - ut_ad(lsn); - } else { - lsn = 0; - } - - /*--------------------------------------*/ - ut_a(trx->state == TRX_STATE_ACTIVE); - mutex_enter(&trx_sys->mutex); - trx->state = TRX_STATE_PREPARED; - trx_sys->n_prepared_trx++; - mutex_exit(&trx_sys->mutex); - /*--------------------------------------*/ - - if (lsn) { - /* Depending on the my.cnf options, we may now write the log - buffer to the log files, making the prepared state of the - transaction durable if the OS does not crash. We may also - flush the log files to disk, making the prepared state of the - transaction durable also at an OS crash or a power outage. - - The idea in InnoDB's group prepare is that a group of - transactions gather behind a trx doing a physical disk write - to log files, and when that physical write has been completed, - one of those transactions does a write which prepares the whole - group. Note that this group prepare will only bring benefit if - there are > 2 users in the database. Then at least 2 users can - gather behind one doing the physical log write to disk. - - TODO: find out if MySQL holds some mutex when calling this. - That would spoil our group prepare algorithm. */ - - trx_flush_log_if_needed(lsn, trx); - } -} - -/**********************************************************************//** -Does the transaction prepare for MySQL. */ -UNIV_INTERN -void -trx_prepare_for_mysql( -/*==================*/ - trx_t* trx) /*!< in/out: trx handle */ -{ - trx_start_if_not_started_xa(trx); - - trx->op_info = "preparing"; - - trx_prepare(trx); - - trx->op_info = ""; -} - -/**********************************************************************//** -This function is used to find number of prepared transactions and -their transaction objects for a recovery. -@return number of prepared transactions stored in xid_list */ -UNIV_INTERN -int -trx_recover_for_mysql( -/*==================*/ - XID* xid_list, /*!< in/out: prepared transactions */ - ulint len) /*!< in: number of slots in xid_list */ -{ - const trx_t* trx; - ulint count = 0; - - ut_ad(xid_list); - ut_ad(len); - - /* We should set those transactions which are in the prepared state - to the xid_list */ - - mutex_enter(&trx_sys->mutex); - - for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - - assert_trx_in_rw_list(trx); - - /* The state of a read-write transaction cannot change - from or to NOT_STARTED while we are holding the - trx_sys->mutex. It may change to PREPARED, but not if - trx->is_recovered. It may also change to COMMITTED. */ - if (trx_state_eq(trx, TRX_STATE_PREPARED)) { - xid_list[count] = trx->xid; - - if (count == 0) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Starting recovery for" - " XA transactions...\n"); - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Transaction " TRX_ID_FMT " in" - " prepared state after recovery\n", - trx->id); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Transaction contains changes" - " to " TRX_ID_FMT " rows\n", - trx->undo_no); - - count++; - - if (count == len) { - break; - } - } - } - - mutex_exit(&trx_sys->mutex); - - if (count > 0){ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: %d transactions in prepared state" - " after recovery\n", - int (count)); - } - - return(int (count)); -} - -/*******************************************************************//** -This function is used to find one X/Open XA distributed transaction -which is in the prepared state -@return trx on match, the trx->xid will be invalidated; -note that the trx may have been committed, unless the caller is -holding lock_sys->mutex */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -trx_t* -trx_get_trx_by_xid_low( -/*===================*/ - const XID* xid) /*!< in: X/Open XA transaction - identifier */ -{ - trx_t* trx; - - ut_ad(mutex_own(&trx_sys->mutex)); - - for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); - trx != NULL; - trx = UT_LIST_GET_NEXT(trx_list, trx)) { - - assert_trx_in_rw_list(trx); - - /* Compare two X/Open XA transaction id's: their - length should be the same and binary comparison - of gtrid_length+bqual_length bytes should be - the same */ - - if (trx->is_recovered - && trx_state_eq(trx, TRX_STATE_PREPARED) - && xid->gtrid_length == trx->xid.gtrid_length - && xid->bqual_length == trx->xid.bqual_length - && memcmp(xid->data, trx->xid.data, - xid->gtrid_length + xid->bqual_length) == 0) { - - /* Invalidate the XID, so that subsequent calls - will not find it. */ - memset(&trx->xid, 0, sizeof(trx->xid)); - trx->xid.formatID = -1; - break; - } - } - - return(trx); -} - -/*******************************************************************//** -This function is used to find one X/Open XA distributed transaction -which is in the prepared state -@return trx or NULL; on match, the trx->xid will be invalidated; -note that the trx may have been committed, unless the caller is -holding lock_sys->mutex */ -UNIV_INTERN -trx_t* -trx_get_trx_by_xid( -/*===============*/ - const XID* xid) /*!< in: X/Open XA transaction identifier */ -{ - trx_t* trx; - - if (xid == NULL) { - - return(NULL); - } - - mutex_enter(&trx_sys->mutex); - - /* Recovered/Resurrected transactions are always only on the - trx_sys_t::rw_trx_list. */ - trx = trx_get_trx_by_xid_low(xid); - - mutex_exit(&trx_sys->mutex); - - return(trx); -} - -/*************************************************************//** -Starts the transaction if it is not yet started. */ -UNIV_INTERN -void -trx_start_if_not_started_xa_low( -/*============================*/ - trx_t* trx) /*!< in: transaction */ -{ - switch (trx->state) { - case TRX_STATE_NOT_STARTED: - - /* Update the info whether we should skip XA steps - that eat CPU time. - - For the duration of the transaction trx->support_xa is - not reread from thd so any changes in the value take - effect in the next transaction. This is to avoid a - scenario where some undo generated by a transaction, - has XA stuff, and other undo, generated by the same - transaction, doesn't. */ - trx->support_xa = thd_supports_xa(trx->mysql_thd); - - trx_start_low(trx); - /* fall through */ - case TRX_STATE_ACTIVE: - return; - case TRX_STATE_PREPARED: - case TRX_STATE_COMMITTED_IN_MEMORY: - break; - } - - ut_error; -} - -/*************************************************************//** -Starts the transaction if it is not yet started. */ -UNIV_INTERN -void -trx_start_if_not_started_low( -/*=========================*/ - trx_t* trx) /*!< in: transaction */ -{ - switch (trx->state) { - case TRX_STATE_NOT_STARTED: -#ifdef WITH_WSREP - ut_d(trx->start_file = __FILE__); - ut_d(trx->start_line = __LINE__); -#endif /* WITH_WSREP */ - trx_start_low(trx); - /* fall through */ - case TRX_STATE_ACTIVE: - return; - case TRX_STATE_PREPARED: - case TRX_STATE_COMMITTED_IN_MEMORY: - break; - } - - ut_error; -} - -/*************************************************************//** -Starts the transaction for a DDL operation. */ -UNIV_INTERN -void -trx_start_for_ddl_low( -/*==================*/ - trx_t* trx, /*!< in/out: transaction */ - trx_dict_op_t op) /*!< in: dictionary operation type */ -{ - switch (trx->state) { - case TRX_STATE_NOT_STARTED: - /* Flag this transaction as a dictionary operation, so that - the data dictionary will be locked in crash recovery. */ - - trx_set_dict_operation(trx, op); - - /* Ensure it is not flagged as an auto-commit-non-locking - transation. */ - trx->will_lock = 1; - - trx->ddl = true; - -#ifdef WITH_WSREP - ut_d(trx->start_file = __FILE__); - ut_d(trx->start_line = __LINE__); -#endif /* WITH_WSREP */ - trx_start_low(trx); - return; - - case TRX_STATE_ACTIVE: - /* We have this start if not started idiom, therefore we - can't add stronger checks here. */ - trx->ddl = true; - - ut_ad(trx->dict_operation != TRX_DICT_OP_NONE); - ut_ad(trx->will_lock > 0); - return; - case TRX_STATE_PREPARED: - case TRX_STATE_COMMITTED_IN_MEMORY: - break; - } - - ut_error; -} diff --git a/storage/xtradb/trx/trx0undo.cc b/storage/xtradb/trx/trx0undo.cc deleted file mode 100644 index 220589dd9ff..00000000000 --- a/storage/xtradb/trx/trx0undo.cc +++ /dev/null @@ -1,2051 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file trx/trx0undo.cc -Transaction undo log - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0undo.h" - -#ifdef UNIV_NONINL -#include "trx0undo.ic" -#endif - -#include "fsp0fsp.h" -#ifndef UNIV_HOTBACKUP -#include "mach0data.h" -#include "mtr0log.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "trx0rec.h" -#include "trx0purge.h" -#include "srv0mon.h" - -/* How should the old versions in the history list be managed? - ---------------------------------------------------------- -If each transaction is given a whole page for its update undo log, file -space consumption can be 10 times higher than necessary. Therefore, -partly filled update undo log pages should be reusable. But then there -is no way individual pages can be ordered so that the ordering agrees -with the serialization numbers of the transactions on the pages. Thus, -the history list must be formed of undo logs, not their header pages as -it was in the old implementation. - However, on a single header page the transactions are placed in -the order of their serialization numbers. As old versions are purged, we -may free the page when the last transaction on the page has been purged. - A problem is that the purge has to go through the transactions -in the serialization order. This means that we have to look through all -rollback segments for the one that has the smallest transaction number -in its history list. - When should we do a purge? A purge is necessary when space is -running out in any of the rollback segments. Then we may have to purge -also old version which might be needed by some consistent read. How do -we trigger the start of a purge? When a transaction writes to an undo log, -it may notice that the space is running out. When a read view is closed, -it may make some history superfluous. The server can have an utility which -periodically checks if it can purge some history. - In a parallellized purge we have the problem that a query thread -can remove a delete marked clustered index record before another query -thread has processed an earlier version of the record, which cannot then -be done because the row cannot be constructed from the clustered index -record. To avoid this problem, we will store in the update and delete mark -undo record also the columns necessary to construct the secondary index -entries which are modified. - We can latch the stack of versions of a single clustered index record -by taking a latch on the clustered index page. As long as the latch is held, -no new versions can be added and no versions removed by undo. But, a purge -can still remove old versions from the bottom of the stack. */ - -/* How to protect rollback segments, undo logs, and history lists with - ------------------------------------------------------------------- -latches? -------- -The contention of the trx_sys_t::mutex should be minimized. When a transaction -does its first insert or modify in an index, an undo log is assigned for it. -Then we must have an x-latch to the rollback segment header. - When the transaction does more modifys or rolls back, the undo log is -protected with undo_mutex in the transaction. - When the transaction commits, its insert undo log is either reset and -cached for a fast reuse, or freed. In these cases we must have an x-latch on -the rollback segment page. The update undo log is put to the history list. If -it is not suitable for reuse, its slot in the rollback segment is reset. In -both cases, an x-latch must be acquired on the rollback segment. - The purge operation steps through the history list without modifying -it until a truncate operation occurs, which can remove undo logs from the end -of the list and release undo log segments. In stepping through the list, -s-latches on the undo log pages are enough, but in a truncate, x-latches must -be obtained on the rollback segment and individual pages. */ -#endif /* !UNIV_HOTBACKUP */ - -/********************************************************************//** -Initializes the fields in an undo log segment page. */ -static -void -trx_undo_page_init( -/*===============*/ - page_t* undo_page, /*!< in: undo log segment page */ - ulint type, /*!< in: undo log segment type */ - mtr_t* mtr); /*!< in: mtr */ - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Creates and initializes an undo log memory object. -@return own: the undo log memory object */ -static -trx_undo_t* -trx_undo_mem_create( -/*================*/ - trx_rseg_t* rseg, /*!< in: rollback segment memory object */ - ulint id, /*!< in: slot index within rseg */ - ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - trx_id_t trx_id, /*!< in: id of the trx for which the undo log - is created */ - const XID* xid, /*!< in: X/Open XA transaction identification*/ - ulint page_no,/*!< in: undo log header page number */ - ulint offset);/*!< in: undo log header byte offset on page */ -#endif /* !UNIV_HOTBACKUP */ -/***************************************************************//** -Initializes a cached insert undo log header page for new use. NOTE that this -function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change -the operation of this function! -@return undo log header byte offset on page */ -static -ulint -trx_undo_insert_header_reuse( -/*=========================*/ - page_t* undo_page, /*!< in/out: insert undo log segment - header page, x-latched */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr); /*!< in: mtr */ -/**********************************************************************//** -If an update undo log can be discarded immediately, this function frees the -space, resetting the page to the proper state for caching. */ -static -void -trx_undo_discard_latest_update_undo( -/*================================*/ - page_t* undo_page, /*!< in: header page of an undo log of size 1 */ - mtr_t* mtr); /*!< in: mtr */ - -#ifndef UNIV_HOTBACKUP -/***********************************************************************//** -Gets the previous record in an undo log from the previous page. -@return undo log record, the page s-latched, NULL if none */ -static -trx_undo_rec_t* -trx_undo_get_prev_rec_from_prev_page( -/*=================================*/ - trx_undo_rec_t* rec, /*!< in: undo record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset, /*!< in: undo log header offset on page */ - bool shared, /*!< in: true=S-latch, false=X-latch */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint space; - ulint zip_size; - ulint prev_page_no; - page_t* prev_page; - page_t* undo_page; - - undo_page = page_align(rec); - - prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_NODE, mtr) - .page; - - if (prev_page_no == FIL_NULL) { - - return(NULL); - } - - space = page_get_space_id(undo_page); - zip_size = fil_space_get_zip_size(space); - - buf_block_t* block = buf_page_get(space, zip_size, prev_page_no, - shared ? RW_S_LATCH : RW_X_LATCH, - mtr); - buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); - - prev_page = buf_block_get_frame(block); - - return(trx_undo_page_get_last_rec(prev_page, page_no, offset)); -} - -/***********************************************************************//** -Gets the previous record in an undo log. -@return undo log record, the page s-latched, NULL if none */ -UNIV_INTERN -trx_undo_rec_t* -trx_undo_get_prev_rec( -/*==================*/ - trx_undo_rec_t* rec, /*!< in: undo record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset, /*!< in: undo log header offset on page */ - bool shared, /*!< in: true=S-latch, false=X-latch */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_undo_rec_t* prev_rec; - - prev_rec = trx_undo_page_get_prev_rec(rec, page_no, offset); - - if (prev_rec) { - - return(prev_rec); - } - - /* We have to go to the previous undo log page to look for the - previous record */ - - return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset, - shared, mtr)); -} - -/***********************************************************************//** -Gets the next record in an undo log from the next page. -@return undo log record, the page latched, NULL if none */ -static -trx_undo_rec_t* -trx_undo_get_next_rec_from_next_page( -/*=================================*/ - ulint space, /*!< in: undo log header space */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - page_t* undo_page, /*!< in: undo log page */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset, /*!< in: undo log header offset on page */ - ulint mode, /*!< in: latch mode: RW_S_LATCH or RW_X_LATCH */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_ulogf_t* log_hdr; - ulint next_page_no; - page_t* next_page; - ulint next; - - if (page_no == page_get_page_no(undo_page)) { - - log_hdr = undo_page + offset; - next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG); - - if (next != 0) { - - return(NULL); - } - } - - next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_NODE, mtr) - .page; - if (next_page_no == FIL_NULL) { - - return(NULL); - } - - if (mode == RW_S_LATCH) { - next_page = trx_undo_page_get_s_latched(space, zip_size, - next_page_no, mtr); - } else { - ut_ad(mode == RW_X_LATCH); - next_page = trx_undo_page_get(space, zip_size, - next_page_no, mtr); - } - - return(trx_undo_page_get_first_rec(next_page, page_no, offset)); -} - -/***********************************************************************//** -Gets the next record in an undo log. -@return undo log record, the page s-latched, NULL if none */ -UNIV_INTERN -trx_undo_rec_t* -trx_undo_get_next_rec( -/*==================*/ - trx_undo_rec_t* rec, /*!< in: undo record */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset, /*!< in: undo log header offset on page */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint space; - ulint zip_size; - trx_undo_rec_t* next_rec; - - next_rec = trx_undo_page_get_next_rec(rec, page_no, offset); - - if (next_rec) { - return(next_rec); - } - - space = page_get_space_id(page_align(rec)); - zip_size = fil_space_get_zip_size(space); - - return(trx_undo_get_next_rec_from_next_page(space, zip_size, - page_align(rec), - page_no, offset, - RW_S_LATCH, mtr)); -} - -/***********************************************************************//** -Gets the first record in an undo log. -@return undo log record, the page latched, NULL if none */ -UNIV_INTERN -trx_undo_rec_t* -trx_undo_get_first_rec( -/*===================*/ - ulint space, /*!< in: undo log header space */ - ulint zip_size,/*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset, /*!< in: undo log header offset on page */ - ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* undo_page; - trx_undo_rec_t* rec; - - if (mode == RW_S_LATCH) { - undo_page = trx_undo_page_get_s_latched(space, zip_size, - page_no, mtr); - } else { - undo_page = trx_undo_page_get(space, zip_size, page_no, mtr); - } - - rec = trx_undo_page_get_first_rec(undo_page, page_no, offset); - - if (rec) { - return(rec); - } - - return(trx_undo_get_next_rec_from_next_page(space, zip_size, - undo_page, page_no, offset, - mode, mtr)); -} - -/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/ - -/**********************************************************************//** -Writes the mtr log entry of an undo log page initialization. */ -UNIV_INLINE -void -trx_undo_page_init_log( -/*===================*/ - page_t* undo_page, /*!< in: undo log page */ - ulint type, /*!< in: undo log type */ - mtr_t* mtr) /*!< in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr); - - mlog_catenate_ulint_compressed(mtr, type); -} -#else /* !UNIV_HOTBACKUP */ -# define trx_undo_page_init_log(undo_page,type,mtr) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses the redo log entry of an undo log page initialization. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_page_init( -/*=====================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ulint type; - - ptr = mach_parse_compressed(ptr, end_ptr, &type); - - if (ptr == NULL) { - - return(NULL); - } - - if (page) { - trx_undo_page_init(page, type, mtr); - } - - return(ptr); -} - -/********************************************************************//** -Initializes the fields in an undo log segment page. */ -static -void -trx_undo_page_init( -/*===============*/ - page_t* undo_page, /*!< in: undo log segment page */ - ulint type, /*!< in: undo log segment type */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_upagef_t* page_hdr; - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, - TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, - TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); - - fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG); - - trx_undo_page_init_log(undo_page, type, mtr); -} - -#ifndef UNIV_HOTBACKUP -/***************************************************************//** -Creates a new undo log segment in file. -@return DB_SUCCESS if page creation OK possible error codes are: -DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -trx_undo_seg_create( -/*================*/ - trx_rseg_t* rseg MY_ATTRIBUTE((unused)),/*!< in: rollback segment */ - trx_rsegf_t* rseg_hdr,/*!< in: rollback segment header, page - x-latched */ - ulint type, /*!< in: type of the segment: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - ulint* id, /*!< out: slot index within rseg header */ - page_t** undo_page, - /*!< out: segment header page x-latched, NULL - if there was an error */ - mtr_t* mtr) /*!< in: mtr */ -{ - ulint slot_no; - ulint space; - buf_block_t* block; - trx_upagef_t* page_hdr; - trx_usegf_t* seg_hdr; - ulint n_reserved; - ibool success; - dberr_t err = DB_SUCCESS; - - ut_ad(mtr != NULL); - ut_ad(id != NULL); - ut_ad(rseg_hdr != NULL); - ut_ad(mutex_own(&(rseg->mutex))); - - /* fputs(type == TRX_UNDO_INSERT - ? "Creating insert undo log segment\n" - : "Creating update undo log segment\n", stderr); */ - slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr); - - if (slot_no == ULINT_UNDEFINED) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: cannot find a free slot for" - " an undo log. Do you have too\n" - "InnoDB: many active transactions" - " running concurrently?\n"); - - return(DB_TOO_MANY_CONCURRENT_TRXS); - } - - space = page_get_space_id(page_align(rseg_hdr)); - - success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, - mtr); - if (!success) { - - return(DB_OUT_OF_FILE_SPACE); - } - - /* Allocate a new file segment for the undo log */ - block = fseg_create_general(space, 0, - TRX_UNDO_SEG_HDR - + TRX_UNDO_FSEG_HEADER, TRUE, mtr); - - fil_space_release_free_extents(space, n_reserved); - - if (block == NULL) { - /* No space left */ - - return(DB_OUT_OF_FILE_SPACE); - } - - buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); - - *undo_page = buf_block_get_frame(block); - - page_hdr = *undo_page + TRX_UNDO_PAGE_HDR; - seg_hdr = *undo_page + TRX_UNDO_SEG_HDR; - - trx_undo_page_init(*undo_page, type, mtr); - - mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, - TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE, - MLOG_2BYTES, mtr); - - mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr); - - flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr); - - flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST, - page_hdr + TRX_UNDO_PAGE_NODE, mtr); - - trx_rsegf_set_nth_undo(rseg_hdr, slot_no, - page_get_page_no(*undo_page), mtr); - *id = slot_no; - - MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED); - - return(err); -} - -/**********************************************************************//** -Writes the mtr log entry of an undo log header initialization. */ -UNIV_INLINE -void -trx_undo_header_create_log( -/*=======================*/ - const page_t* undo_page, /*!< in: undo log header page */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr); - - mlog_catenate_ull_compressed(mtr, trx_id); -} -#else /* !UNIV_HOTBACKUP */ -# define trx_undo_header_create_log(undo_page,trx_id,mtr) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/***************************************************************//** -Creates a new undo log header in file. NOTE that this function has its own -log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of -this function! -@return header byte offset on page */ -static -ulint -trx_undo_header_create( -/*===================*/ - page_t* undo_page, /*!< in/out: undo log segment - header page, x-latched; it is - assumed that there is - TRX_UNDO_LOG_XA_HDR_SIZE bytes - free space on it */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_upagef_t* page_hdr; - trx_usegf_t* seg_hdr; - trx_ulogf_t* log_hdr; - trx_ulogf_t* prev_log_hdr; - ulint prev_log; - ulint free; - ulint new_free; - - ut_ad(mtr && undo_page); - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE); - - log_hdr = undo_page + free; - - new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE; - - ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); - - mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE); - - prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); - - if (prev_log != 0) { - prev_log_hdr = undo_page + prev_log; - - mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free); - } - - mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, free); - - log_hdr = undo_page + free; - - mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE); - - mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); - mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); - - mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE); - mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE); - - mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0); - mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log); - - /* Write the log record about the header creation */ - trx_undo_header_create_log(undo_page, trx_id, mtr); - - return(free); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Write X/Open XA Transaction Identification (XID) to undo log header */ -static -void -trx_undo_write_xid( -/*===============*/ - trx_ulogf_t* log_hdr,/*!< in: undo log header */ - const XID* xid, /*!< in: X/Open XA Transaction Identification */ - mtr_t* mtr) /*!< in: mtr */ -{ - mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT, - (ulint) xid->formatID, MLOG_4BYTES, mtr); - - mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN, - (ulint) xid->gtrid_length, MLOG_4BYTES, mtr); - - mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN, - (ulint) xid->bqual_length, MLOG_4BYTES, mtr); - - mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data, - XIDDATASIZE, mtr); -} - -/********************************************************************//** -Read X/Open XA Transaction Identification (XID) from undo log header */ -static -void -trx_undo_read_xid( -/*==============*/ - trx_ulogf_t* log_hdr,/*!< in: undo log header */ - XID* xid) /*!< out: X/Open XA Transaction Identification */ -{ - xid->formatID = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT); - - xid->gtrid_length - = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN); - xid->bqual_length - = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN); - - memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE); -} - -/***************************************************************//** -Adds space for the XA XID after an undo log old-style header. */ -static -void -trx_undo_header_add_space_for_xid( -/*==============================*/ - page_t* undo_page,/*!< in: undo log segment header page */ - trx_ulogf_t* log_hdr,/*!< in: undo log header */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_upagef_t* page_hdr; - ulint free; - ulint new_free; - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE); - - /* free is now the end offset of the old style undo log header */ - - ut_a(free == (ulint)(log_hdr - undo_page) + TRX_UNDO_LOG_OLD_HDR_SIZE); - - new_free = free + (TRX_UNDO_LOG_XA_HDR_SIZE - - TRX_UNDO_LOG_OLD_HDR_SIZE); - - /* Add space for a XID after the header, update the free offset - fields on the undo log page and in the undo log header */ - - mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_START, new_free, - MLOG_2BYTES, mtr); - - mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, new_free, - MLOG_2BYTES, mtr); - - mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, new_free, - MLOG_2BYTES, mtr); -} - -/**********************************************************************//** -Writes the mtr log entry of an undo log header reuse. */ -UNIV_INLINE -void -trx_undo_insert_header_reuse_log( -/*=============================*/ - const page_t* undo_page, /*!< in: undo log header page */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr); - - mlog_catenate_ull_compressed(mtr, trx_id); -} -#else /* !UNIV_HOTBACKUP */ -# define trx_undo_insert_header_reuse_log(undo_page,trx_id,mtr) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses the redo log entry of an undo log page header create or reuse. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_page_header( -/*=======================*/ - ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - trx_id_t trx_id; - /* Silence a GCC warning about possibly uninitialized variable - when mach_ull_parse_compressed() is not inlined. */ - ut_d(trx_id = 0); - /* Declare the variable uninitialized in Valgrind, so that the - above initialization will not mask any bugs. */ - UNIV_MEM_INVALID(&trx_id, sizeof trx_id); - - ptr = mach_ull_parse_compressed(ptr, end_ptr, &trx_id); - - if (ptr == NULL) { - - return(NULL); - } - - if (page) { - if (type == MLOG_UNDO_HDR_CREATE) { - trx_undo_header_create(page, trx_id, mtr); - } else { - ut_ad(type == MLOG_UNDO_HDR_REUSE); - trx_undo_insert_header_reuse(page, trx_id, mtr); - } - } - - return(ptr); -} - -/***************************************************************//** -Initializes a cached insert undo log header page for new use. NOTE that this -function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change -the operation of this function! -@return undo log header byte offset on page */ -static -ulint -trx_undo_insert_header_reuse( -/*=========================*/ - page_t* undo_page, /*!< in/out: insert undo log segment - header page, x-latched */ - trx_id_t trx_id, /*!< in: transaction id */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_upagef_t* page_hdr; - trx_usegf_t* seg_hdr; - trx_ulogf_t* log_hdr; - ulint free; - ulint new_free; - - ut_ad(mtr && undo_page); - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE; - - ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100); - - log_hdr = undo_page + free; - - new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE; - - /* Insert undo data is not needed after commit: we may free all - the space on the page */ - - ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_INSERT); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); - - mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE); - - log_hdr = undo_page + free; - - mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); - mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); - - mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE); - mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE); - - /* Write the log record MLOG_UNDO_HDR_REUSE */ - trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr); - - return(free); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Writes the redo log entry of an update undo log header discard. */ -UNIV_INLINE -void -trx_undo_discard_latest_log( -/*========================*/ - page_t* undo_page, /*!< in: undo log header page */ - mtr_t* mtr) /*!< in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr); -} -#else /* !UNIV_HOTBACKUP */ -# define trx_undo_discard_latest_log(undo_page, mtr) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses the redo log entry of an undo log page header discard. -@return end of log record or NULL */ -UNIV_INTERN -byte* -trx_undo_parse_discard_latest( -/*==========================*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */ - page_t* page, /*!< in: page or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ -{ - ut_ad(end_ptr); - - if (page) { - trx_undo_discard_latest_update_undo(page, mtr); - } - - return(ptr); -} - -/**********************************************************************//** -If an update undo log can be discarded immediately, this function frees the -space, resetting the page to the proper state for caching. */ -static -void -trx_undo_discard_latest_update_undo( -/*================================*/ - page_t* undo_page, /*!< in: header page of an undo log of size 1 */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_usegf_t* seg_hdr; - trx_upagef_t* page_hdr; - trx_ulogf_t* log_hdr; - trx_ulogf_t* prev_log_hdr; - ulint free; - ulint prev_hdr_offset; - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); - log_hdr = undo_page + free; - - prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG); - - if (prev_hdr_offset != 0) { - prev_log_hdr = undo_page + prev_hdr_offset; - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, - mach_read_from_2(prev_log_hdr - + TRX_UNDO_LOG_START)); - mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0); - } - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free); - - mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED); - mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset); - - trx_undo_discard_latest_log(undo_page, mtr); -} - -#ifndef UNIV_HOTBACKUP -/********************************************************************//** -Tries to add a page to the undo log segment where the undo log is placed. -@return X-latched block if success, else NULL */ -UNIV_INTERN -buf_block_t* -trx_undo_add_page( -/*==============*/ - trx_t* trx, /*!< in: transaction */ - trx_undo_t* undo, /*!< in: undo log memory object */ - mtr_t* mtr) /*!< in: mtr which does not have a latch to any - undo log page; the caller must have reserved - the rollback segment mutex */ -{ - page_t* header_page; - buf_block_t* new_block; - page_t* new_page; - trx_rseg_t* rseg; - ulint n_reserved; - - ut_ad(mutex_own(&(trx->undo_mutex))); - ut_ad(mutex_own(&(trx->rseg->mutex))); - - rseg = trx->rseg; - - if (rseg->curr_size == rseg->max_size) { - - return(NULL); - } - - header_page = trx_undo_page_get(undo->space, undo->zip_size, - undo->hdr_page_no, mtr); - - if (!fsp_reserve_free_extents(&n_reserved, undo->space, 1, - FSP_UNDO, mtr)) { - - return(NULL); - } - - new_block = fseg_alloc_free_page_general( - TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER - + header_page, - undo->top_page_no + 1, FSP_UP, TRUE, mtr, mtr); - - fil_space_release_free_extents(undo->space, n_reserved); - - if (new_block == NULL) { - - /* No space left */ - - return(NULL); - } - - ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1); - buf_block_dbg_add_level(new_block, SYNC_TRX_UNDO_PAGE); - undo->last_page_no = buf_block_get_page_no(new_block); - - new_page = buf_block_get_frame(new_block); - - trx_undo_page_init(new_page, undo->type, mtr); - - flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, - new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); - undo->size++; - rseg->curr_size++; - - return(new_block); -} - -/********************************************************************//** -Frees an undo log page that is not the header page. -@return last page number in remaining log */ -static -ulint -trx_undo_free_page( -/*===============*/ - trx_rseg_t* rseg, /*!< in: rollback segment */ - ibool in_history, /*!< in: TRUE if the undo log is in the history - list */ - ulint space, /*!< in: space */ - ulint hdr_page_no, /*!< in: header page number */ - ulint page_no, /*!< in: page number to free: must not be the - header page */ - mtr_t* mtr) /*!< in: mtr which does not have a latch to any - undo log page; the caller must have reserved - the rollback segment mutex */ -{ - page_t* header_page; - page_t* undo_page; - fil_addr_t last_addr; - trx_rsegf_t* rseg_header; - ulint hist_size; - ulint zip_size; - - ut_a(hdr_page_no != page_no); - ut_ad(mutex_own(&(rseg->mutex))); - - zip_size = rseg->zip_size; - - undo_page = trx_undo_page_get(space, zip_size, page_no, mtr); - - header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr); - - flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, - undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); - - fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, - space, page_no, mtr); - - last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR - + TRX_UNDO_PAGE_LIST, mtr); - rseg->curr_size--; - - if (in_history) { - rseg_header = trx_rsegf_get(space, zip_size, - rseg->page_no, mtr); - - hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr); - ut_ad(hist_size > 0); - mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - hist_size - 1, MLOG_4BYTES, mtr); - } - - return(last_addr.page); -} - -/********************************************************************//** -Frees the last undo log page. -The caller must hold the rollback segment mutex. */ -UNIV_INTERN -void -trx_undo_free_last_page_func( -/*==========================*/ -#ifdef UNIV_DEBUG - const trx_t* trx, /*!< in: transaction */ -#endif /* UNIV_DEBUG */ - trx_undo_t* undo, /*!< in/out: undo log memory copy */ - mtr_t* mtr) /*!< in/out: mini-transaction which does not - have a latch to any undo log page or which - has allocated the undo log page */ -{ - ut_ad(mutex_own(&trx->undo_mutex)); - ut_ad(undo->hdr_page_no != undo->last_page_no); - ut_ad(undo->size > 0); - - undo->last_page_no = trx_undo_free_page( - undo->rseg, FALSE, undo->space, - undo->hdr_page_no, undo->last_page_no, mtr); - - undo->size--; -} - -/********************************************************************//** -Empties an undo log header page of undo records for that undo log. Other -undo logs may still have records on that page, if it is an update undo log. */ -static -void -trx_undo_empty_header_page( -/*=======================*/ - ulint space, /*!< in: space */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint hdr_page_no, /*!< in: header page number */ - ulint hdr_offset, /*!< in: header offset */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* header_page; - trx_ulogf_t* log_hdr; - ulint end; - - header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr); - - log_hdr = header_page + hdr_offset; - - end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset); - - mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr); -} - -/***********************************************************************//** -Truncates an undo log from the end. This function is used during a rollback -to free space from an undo log. */ -UNIV_INTERN -void -trx_undo_truncate_end( -/*=======================*/ - trx_t* trx, /*!< in: transaction whose undo log it is */ - trx_undo_t* undo, /*!< in: undo log */ - undo_no_t limit) /*!< in: all undo records with undo number - >= this value should be truncated */ -{ - page_t* undo_page; - ulint last_page_no; - trx_undo_rec_t* rec; - trx_undo_rec_t* trunc_here; - mtr_t mtr; - - ut_ad(mutex_own(&(trx->undo_mutex))); - ut_ad(mutex_own(&(trx->rseg->mutex))); - - for (;;) { - mtr_start_trx(&mtr, trx); - - trunc_here = NULL; - - last_page_no = undo->last_page_no; - - undo_page = trx_undo_page_get(undo->space, undo->zip_size, - last_page_no, &mtr); - - rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no, - undo->hdr_offset); - while (rec) { - if (trx_undo_rec_get_undo_no(rec) >= limit) { - /* Truncate at least this record off, maybe - more */ - trunc_here = rec; - } else { - goto function_exit; - } - - rec = trx_undo_page_get_prev_rec(rec, - undo->hdr_page_no, - undo->hdr_offset); - } - - if (last_page_no == undo->hdr_page_no) { - - goto function_exit; - } - - ut_ad(last_page_no == undo->last_page_no); - trx_undo_free_last_page(trx, undo, &mtr); - - mtr_commit(&mtr); - } - -function_exit: - if (trunc_here) { - mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE, - trunc_here - undo_page, MLOG_2BYTES, &mtr); - } - - mtr_commit(&mtr); -} - -/***********************************************************************//** -Truncates an undo log from the start. This function is used during a purge -operation. */ -UNIV_INTERN -void -trx_undo_truncate_start( -/*====================*/ - trx_rseg_t* rseg, /*!< in: rollback segment */ - ulint space, /*!< in: space id of the log */ - ulint hdr_page_no, /*!< in: header page number */ - ulint hdr_offset, /*!< in: header offset on the page */ - undo_no_t limit) /*!< in: all undo pages with - undo numbers < this value - should be truncated; NOTE that - the function only frees whole - pages; the header page is not - freed, but emptied, if all the - records there are < limit */ -{ - page_t* undo_page; - trx_undo_rec_t* rec; - trx_undo_rec_t* last_rec; - ulint page_no; - mtr_t mtr; - - ut_ad(mutex_own(&(rseg->mutex))); - - if (!limit) { - - return; - } -loop: - mtr_start(&mtr); - - rec = trx_undo_get_first_rec(space, rseg->zip_size, - hdr_page_no, hdr_offset, - RW_X_LATCH, &mtr); - if (rec == NULL) { - /* Already empty */ - - mtr_commit(&mtr); - - return; - } - - undo_page = page_align(rec); - - last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no, - hdr_offset); - if (trx_undo_rec_get_undo_no(last_rec) >= limit) { - - mtr_commit(&mtr); - - return; - } - - page_no = page_get_page_no(undo_page); - - if (page_no == hdr_page_no) { - trx_undo_empty_header_page(space, rseg->zip_size, - hdr_page_no, hdr_offset, - &mtr); - } else { - trx_undo_free_page(rseg, TRUE, space, hdr_page_no, - page_no, &mtr); - } - - mtr_commit(&mtr); - - goto loop; -} - -/**********************************************************************//** -Frees an undo log segment which is not in the history list. */ -static -void -trx_undo_seg_free( -/*==============*/ - trx_undo_t* undo) /*!< in: undo log */ -{ - trx_rseg_t* rseg; - fseg_header_t* file_seg; - trx_rsegf_t* rseg_header; - trx_usegf_t* seg_header; - ibool finished; - mtr_t mtr; - - rseg = undo->rseg; - - do { - - mtr_start(&mtr); - - mutex_enter(&(rseg->mutex)); - - seg_header = trx_undo_page_get(undo->space, undo->zip_size, - undo->hdr_page_no, - &mtr) + TRX_UNDO_SEG_HDR; - - file_seg = seg_header + TRX_UNDO_FSEG_HEADER; - - finished = fseg_free_step(file_seg, &mtr); - - if (finished) { - /* Update the rseg header */ - rseg_header = trx_rsegf_get( - rseg->space, rseg->zip_size, rseg->page_no, - &mtr); - trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, - &mtr); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); - } - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - } while (!finished); -} - -/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/ - -/********************************************************************//** -Creates and initializes an undo log memory object according to the values -in the header in file, when the database is started. The memory object is -inserted in the appropriate list of rseg. -@return own: the undo log memory object */ -static -trx_undo_t* -trx_undo_mem_create_at_db_start( -/*============================*/ - trx_rseg_t* rseg, /*!< in: rollback segment memory object */ - ulint id, /*!< in: slot index within rseg */ - ulint page_no,/*!< in: undo log segment page number */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* undo_page; - trx_upagef_t* page_header; - trx_usegf_t* seg_header; - trx_ulogf_t* undo_header; - trx_undo_t* undo; - ulint type; - ulint state; - trx_id_t trx_id; - ulint offset; - fil_addr_t last_addr; - page_t* last_page; - trx_undo_rec_t* rec; - XID xid; - ibool xid_exists = FALSE; - - if (id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: undo->id is %lu\n", (ulong) id); - ut_error; - } - - undo_page = trx_undo_page_get(rseg->space, rseg->zip_size, - page_no, mtr); - - page_header = undo_page + TRX_UNDO_PAGE_HDR; - - type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES, - mtr); - seg_header = undo_page + TRX_UNDO_SEG_HDR; - - state = mach_read_from_2(seg_header + TRX_UNDO_STATE); - - offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG); - - undo_header = undo_page + offset; - - trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID); - - xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS, - MLOG_1BYTE, mtr); - - /* Read X/Open XA transaction identification if it exists, or - set it to NULL. */ - - memset(&xid, 0, sizeof(xid)); - xid.formatID = -1; - - if (xid_exists == TRUE) { - trx_undo_read_xid(undo_header, &xid); - } - - mutex_enter(&(rseg->mutex)); - - undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid, - page_no, offset); - mutex_exit(&(rseg->mutex)); - - undo->dict_operation = mtr_read_ulint( - undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr); - - undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID); - undo->state = state; - undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr); - - /* If the log segment is being freed, the page list is inconsistent! */ - if (state == TRX_UNDO_TO_FREE) { - - goto add_to_list; - } - - last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr); - - undo->last_page_no = last_addr.page; - undo->top_page_no = last_addr.page; - - last_page = trx_undo_page_get(rseg->space, rseg->zip_size, - undo->last_page_no, mtr); - - rec = trx_undo_page_get_last_rec(last_page, page_no, offset); - - if (rec == NULL) { - undo->empty = TRUE; - } else { - undo->empty = FALSE; - undo->top_offset = rec - last_page; - undo->top_undo_no = trx_undo_rec_get_undo_no(rec); - } -add_to_list: - if (type == TRX_UNDO_INSERT) { - if (state != TRX_UNDO_CACHED) { - UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list, - undo); - } else { - UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached, - undo); - MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); - } - } else { - ut_ad(type == TRX_UNDO_UPDATE); - if (state != TRX_UNDO_CACHED) { - UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list, - undo); - } else { - UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached, - undo); - MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); - } - } - - return(undo); -} - -/********************************************************************//** -Initializes the undo log lists for a rollback segment memory copy. This -function is only called when the database is started or a new rollback -segment is created. -@return the combined size of undo log segments in pages */ -UNIV_INTERN -ulint -trx_undo_lists_init( -/*================*/ - trx_rseg_t* rseg) /*!< in: rollback segment memory object */ -{ - ulint size = 0; - trx_rsegf_t* rseg_header; - ulint i; - mtr_t mtr; - - UT_LIST_INIT(rseg->update_undo_list); - UT_LIST_INIT(rseg->update_undo_cached); - UT_LIST_INIT(rseg->insert_undo_list); - UT_LIST_INIT(rseg->insert_undo_cached); - - mtr_start(&mtr); - - rseg_header = trx_rsegf_get_new( - rseg->space, rseg->zip_size, rseg->page_no, &mtr); - - for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { - ulint page_no; - - page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr); - - /* In forced recovery: try to avoid operations which look - at database pages; undo logs are rapidly changing data, and - the probability that they are in an inconsistent state is - high */ - - if (page_no != FIL_NULL - && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) { - - trx_undo_t* undo; - - undo = trx_undo_mem_create_at_db_start( - rseg, i, page_no, &mtr); - - size += undo->size; - - mtr_commit(&mtr); - - mtr_start(&mtr); - - rseg_header = trx_rsegf_get( - rseg->space, rseg->zip_size, rseg->page_no, - &mtr); - - /* Found a used slot */ - MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED); - } - } - - mtr_commit(&mtr); - - return(size); -} - -/********************************************************************//** -Creates and initializes an undo log memory object. -@return own: the undo log memory object */ -static -trx_undo_t* -trx_undo_mem_create( -/*================*/ - trx_rseg_t* rseg, /*!< in: rollback segment memory object */ - ulint id, /*!< in: slot index within rseg */ - ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - trx_id_t trx_id, /*!< in: id of the trx for which the undo log - is created */ - const XID* xid, /*!< in: X/Open transaction identification */ - ulint page_no,/*!< in: undo log header page number */ - ulint offset) /*!< in: undo log header byte offset on page */ -{ - trx_undo_t* undo; - - ut_ad(mutex_own(&(rseg->mutex))); - - if (id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: undo->id is %lu\n", (ulong) id); - ut_error; - } - - undo = static_cast<trx_undo_t*>(mem_alloc(sizeof(*undo))); - - if (undo == NULL) { - - return(NULL); - } - - undo->id = id; - undo->type = type; - undo->state = TRX_UNDO_ACTIVE; - undo->del_marks = FALSE; - undo->trx_id = trx_id; - undo->xid = *xid; - - undo->dict_operation = FALSE; - - undo->rseg = rseg; - - undo->space = rseg->space; - undo->zip_size = rseg->zip_size; - undo->hdr_page_no = page_no; - undo->hdr_offset = offset; - undo->last_page_no = page_no; - undo->size = 1; - - undo->empty = TRUE; - undo->top_page_no = page_no; - undo->guess_block = NULL; - - return(undo); -} - -/********************************************************************//** -Initializes a cached undo log object for new use. */ -static -void -trx_undo_mem_init_for_reuse( -/*========================*/ - trx_undo_t* undo, /*!< in: undo log to init */ - trx_id_t trx_id, /*!< in: id of the trx for which the undo log - is created */ - const XID* xid, /*!< in: X/Open XA transaction identification*/ - ulint offset) /*!< in: undo log header byte offset on page */ -{ - ut_ad(mutex_own(&((undo->rseg)->mutex))); - - if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) { - fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - - mem_analyze_corruption(undo); - ut_error; - } - - undo->state = TRX_UNDO_ACTIVE; - undo->del_marks = FALSE; - undo->trx_id = trx_id; - undo->xid = *xid; - - undo->dict_operation = FALSE; - - undo->hdr_offset = offset; - undo->empty = TRUE; -} - -/********************************************************************//** -Frees an undo log memory copy. */ -UNIV_INTERN -void -trx_undo_mem_free( -/*==============*/ - trx_undo_t* undo) /*!< in: the undo object to be freed */ -{ - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id); - ut_error; - } - - mem_free(undo); -} - -/**********************************************************************//** -Creates a new undo log. -@return DB_SUCCESS if successful in creating the new undo lob object, -possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS -DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -dberr_t -trx_undo_create( -/*============*/ - trx_t* trx, /*!< in: transaction */ - trx_rseg_t* rseg, /*!< in: rollback segment memory copy */ - ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - trx_id_t trx_id, /*!< in: id of the trx for which the undo log - is created */ - const XID* xid, /*!< in: X/Open transaction identification*/ - trx_undo_t** undo, /*!< out: the new undo log object, undefined - * if did not succeed */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_rsegf_t* rseg_header; - ulint page_no; - ulint offset; - ulint id; - page_t* undo_page; - dberr_t err; - - ut_ad(mutex_own(&(rseg->mutex))); - - if (rseg->curr_size == rseg->max_size) { - - return(DB_OUT_OF_FILE_SPACE); - } - - rseg->curr_size++; - - rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no, - mtr); - - err = trx_undo_seg_create(rseg, rseg_header, type, &id, - &undo_page, mtr); - - if (err != DB_SUCCESS) { - /* Did not succeed */ - - rseg->curr_size--; - - return(err); - } - - page_no = page_get_page_no(undo_page); - - offset = trx_undo_header_create(undo_page, trx_id, mtr); - - if (trx->support_xa) { - trx_undo_header_add_space_for_xid(undo_page, - undo_page + offset, mtr); - } - - *undo = trx_undo_mem_create(rseg, id, type, trx_id, xid, - page_no, offset); - if (*undo == NULL) { - - err = DB_OUT_OF_MEMORY; - } - - return(err); -} - -/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/ - -/********************************************************************//** -Reuses a cached undo log. -@return the undo log memory object, NULL if none cached */ -static -trx_undo_t* -trx_undo_reuse_cached( -/*==================*/ - trx_t* trx, /*!< in: transaction */ - trx_rseg_t* rseg, /*!< in: rollback segment memory object */ - ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - trx_id_t trx_id, /*!< in: id of the trx for which the undo log - is used */ - const XID* xid, /*!< in: X/Open XA transaction identification */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_undo_t* undo; - page_t* undo_page; - ulint offset; - - ut_ad(mutex_own(&(rseg->mutex))); - - if (type == TRX_UNDO_INSERT) { - - undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached); - if (undo == NULL) { - - return(NULL); - } - - UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - } else { - ut_ad(type == TRX_UNDO_UPDATE); - - undo = UT_LIST_GET_FIRST(rseg->update_undo_cached); - if (undo == NULL) { - - return(NULL); - } - - UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo); - - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - } - - ut_ad(undo->size == 1); - - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - mem_analyze_corruption(undo); - ut_error; - } - - undo_page = trx_undo_page_get(undo->space, undo->zip_size, - undo->hdr_page_no, mtr); - - if (type == TRX_UNDO_INSERT) { - offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); - - if (trx->support_xa) { - trx_undo_header_add_space_for_xid( - undo_page, undo_page + offset, mtr); - } - } else { - ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_UPDATE); - - offset = trx_undo_header_create(undo_page, trx_id, mtr); - - if (trx->support_xa) { - trx_undo_header_add_space_for_xid( - undo_page, undo_page + offset, mtr); - } - } - - trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset); - - return(undo); -} - -/**********************************************************************//** -Marks an undo log header as a header of a data dictionary operation -transaction. */ -static -void -trx_undo_mark_as_dict_operation( -/*============================*/ - trx_t* trx, /*!< in: dict op transaction */ - trx_undo_t* undo, /*!< in: assigned undo log */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* hdr_page; - - hdr_page = trx_undo_page_get(undo->space, undo->zip_size, - undo->hdr_page_no, mtr); - - switch (trx_get_dict_operation(trx)) { - case TRX_DICT_OP_NONE: - ut_error; - case TRX_DICT_OP_INDEX: - /* Do not discard the table on recovery. */ - undo->table_id = 0; - break; - case TRX_DICT_OP_TABLE: - undo->table_id = trx->table_id; - break; - } - - mlog_write_ulint(hdr_page + undo->hdr_offset - + TRX_UNDO_DICT_TRANS, - TRUE, MLOG_1BYTE, mtr); - - mlog_write_ull(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID, - undo->table_id, mtr); - - undo->dict_operation = TRUE; -} - -/**********************************************************************//** -Assigns an undo log for a transaction. A new undo log is created or a cached -undo log reused. -@return DB_SUCCESS if undo log assign successful, possible error codes -are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY -DB_OUT_OF_MEMORY */ -UNIV_INTERN -dberr_t -trx_undo_assign_undo( -/*=================*/ - trx_t* trx, /*!< in: transaction */ - ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ -{ - trx_rseg_t* rseg; - trx_undo_t* undo; - mtr_t mtr; - dberr_t err = DB_SUCCESS; - - ut_ad(trx); - - if (trx->rseg == NULL) { - return(DB_READ_ONLY); - } - - rseg = trx->rseg; - - ut_ad(mutex_own(&(trx->undo_mutex))); - - mtr_start_trx(&mtr, trx); - - mutex_enter(&rseg->mutex); - - DBUG_EXECUTE_IF( - "ib_create_table_fail_too_many_trx", - err = DB_TOO_MANY_CONCURRENT_TRXS; - goto func_exit; - ); - - undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid, - &mtr); - if (undo == NULL) { - err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid, - &undo, &mtr); - if (err != DB_SUCCESS) { - - goto func_exit; - } - } - - if (type == TRX_UNDO_INSERT) { - UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo); - ut_ad(trx->insert_undo == NULL); - trx->insert_undo = undo; - } else { - UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo); - ut_ad(trx->update_undo == NULL); - trx->update_undo = undo; - } - - if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) { - trx_undo_mark_as_dict_operation(trx, undo, &mtr); - } - -func_exit: - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - return(err); -} - -/******************************************************************//** -Sets the state of the undo log segment at a transaction finish. -@return undo log segment header page, x-latched */ -UNIV_INTERN -page_t* -trx_undo_set_state_at_finish( -/*=========================*/ - trx_undo_t* undo, /*!< in: undo log memory copy */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_usegf_t* seg_hdr; - trx_upagef_t* page_hdr; - page_t* undo_page; - ulint state; - - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - mem_analyze_corruption(undo); - ut_error; - } - - undo_page = trx_undo_page_get(undo->space, undo->zip_size, - undo->hdr_page_no, mtr); - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - if (undo->size == 1 - && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE) - < TRX_UNDO_PAGE_REUSE_LIMIT) { - - state = TRX_UNDO_CACHED; - - } else if (undo->type == TRX_UNDO_INSERT) { - - state = TRX_UNDO_TO_FREE; - } else { - state = TRX_UNDO_TO_PURGE; - } - - undo->state = state; - - mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, state, MLOG_2BYTES, mtr); - - return(undo_page); -} - -/******************************************************************//** -Sets the state of the undo log segment at a transaction prepare. -@return undo log segment header page, x-latched */ -UNIV_INTERN -page_t* -trx_undo_set_state_at_prepare( -/*==========================*/ - trx_t* trx, /*!< in: transaction */ - trx_undo_t* undo, /*!< in: undo log memory copy */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_usegf_t* seg_hdr; - trx_ulogf_t* undo_header; - page_t* undo_page; - ulint offset; - - ut_ad(trx && undo && mtr); - - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - mem_analyze_corruption(undo); - ut_error; - } - - undo_page = trx_undo_page_get(undo->space, undo->zip_size, - undo->hdr_page_no, mtr); - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - /*------------------------------*/ - undo->state = TRX_UNDO_PREPARED; - undo->xid = trx->xid; - /*------------------------------*/ - - mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, undo->state, - MLOG_2BYTES, mtr); - - offset = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); - undo_header = undo_page + offset; - - mlog_write_ulint(undo_header + TRX_UNDO_XID_EXISTS, - TRUE, MLOG_1BYTE, mtr); - - trx_undo_write_xid(undo_header, &undo->xid, mtr); - - return(undo_page); -} - -/**********************************************************************//** -Adds the update undo log header as the first in the history list, and -frees the memory object, or puts it to the list of cached update undo log -segments. */ -UNIV_INTERN -void -trx_undo_update_cleanup( -/*====================*/ - trx_t* trx, /*!< in: trx owning the update undo log */ - page_t* undo_page, /*!< in: update undo log header page, - x-latched */ - mtr_t* mtr) /*!< in: mtr */ -{ - trx_rseg_t* rseg; - trx_undo_t* undo; - - undo = trx->update_undo; - rseg = trx->rseg; - - ut_ad(mutex_own(&(rseg->mutex))); - - trx_purge_add_update_undo_to_history(trx, undo_page, mtr); - - UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo); - - trx->update_undo = NULL; - - if (undo->state == TRX_UNDO_CACHED) { - - UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo); - - MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); - } else { - ut_ad(undo->state == TRX_UNDO_TO_PURGE); - - trx_undo_mem_free(undo); - } -} - -/******************************************************************//** -Frees or caches an insert undo log after a transaction commit or rollback. -Knowledge of inserts is not needed after a commit or rollback, therefore -the data can be discarded. */ -UNIV_INTERN -void -trx_undo_insert_cleanup( -/*====================*/ - trx_t* trx) /*!< in: transaction handle */ -{ - trx_undo_t* undo; - trx_rseg_t* rseg; - - undo = trx->insert_undo; - ut_ad(undo); - - rseg = trx->rseg; - - mutex_enter(&(rseg->mutex)); - - UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo); - trx->insert_undo = NULL; - - if (undo->state == TRX_UNDO_CACHED) { - - UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo); - - MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED); - } else { - ut_ad(undo->state == TRX_UNDO_TO_FREE); - - /* Delete first the undo log segment in the file */ - - mutex_exit(&(rseg->mutex)); - - trx_undo_seg_free(undo); - - mutex_enter(&(rseg->mutex)); - - ut_ad(rseg->curr_size > undo->size); - - rseg->curr_size -= undo->size; - - trx_undo_mem_free(undo); - } - - mutex_exit(&(rseg->mutex)); -} - -/********************************************************************//** -At shutdown, frees the undo logs of a PREPARED transaction. */ -UNIV_INTERN -void -trx_undo_free_prepared( -/*===================*/ - trx_t* trx) /*!< in/out: PREPARED transaction */ -{ - ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); - - if (trx->update_undo) { - switch (trx->update_undo->state) { - case TRX_UNDO_PREPARED: - break; - case TRX_UNDO_ACTIVE: - /* lock_trx_release_locks() assigns - trx->is_recovered=false */ - ut_a(srv_read_only_mode - || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); - break; - default: - ut_error; - } - - UT_LIST_REMOVE(undo_list, trx->rseg->update_undo_list, - trx->update_undo); - trx_undo_mem_free(trx->update_undo); - } - if (trx->insert_undo) { - switch (trx->insert_undo->state) { - case TRX_UNDO_PREPARED: - break; - case TRX_UNDO_ACTIVE: - /* lock_trx_release_locks() assigns - trx->is_recovered=false */ - ut_a(srv_read_only_mode - || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO); - break; - default: - ut_error; - } - - UT_LIST_REMOVE(undo_list, trx->rseg->insert_undo_list, - trx->insert_undo); - trx_undo_mem_free(trx->insert_undo); - } -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/usr/usr0sess.cc b/storage/xtradb/usr/usr0sess.cc deleted file mode 100644 index e1bd71ff1a0..00000000000 --- a/storage/xtradb/usr/usr0sess.cc +++ /dev/null @@ -1,67 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file usr/usr0sess.cc -Sessions - -Created 6/25/1996 Heikki Tuuri -*******************************************************/ - -#include "usr0sess.h" - -#ifdef UNIV_NONINL -#include "usr0sess.ic" -#endif - -#include "trx0trx.h" - -/*********************************************************************//** -Opens a session. -@return own: session object */ -UNIV_INTERN -sess_t* -sess_open(void) -/*===========*/ -{ - sess_t* sess; - - sess = static_cast<sess_t*>(mem_zalloc(sizeof(*sess))); - - sess->state = SESS_ACTIVE; - - sess->trx = trx_allocate_for_background(); - sess->trx->sess = sess; - - return(sess); -} - -/*********************************************************************//** -Closes a session, freeing the memory occupied by it. */ -UNIV_INTERN -void -sess_close( -/*=======*/ - sess_t* sess) /*!< in, own: session object */ -{ - ut_a(UT_LIST_GET_LEN(sess->graphs) == 0); - - trx_free_for_background(sess->trx); - mem_free(sess); -} diff --git a/storage/xtradb/ut/ut0bh.cc b/storage/xtradb/ut/ut0bh.cc deleted file mode 100644 index 1a3038a0d71..00000000000 --- a/storage/xtradb/ut/ut0bh.cc +++ /dev/null @@ -1,159 +0,0 @@ -/***************************************************************************//** - -Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/******************************************************************//** -@file ut/ut0bh.cc -Binary min-heap implementation. - -Created 2010-05-28 by Sunny Bains -*******************************************************/ - -#include "ut0bh.h" -#include "ut0mem.h" - -#ifdef UNIV_NONINL -#include "ut0bh.ic" -#endif - -#include <string.h> - -/**********************************************************************//** -Create a binary heap. -@return a new binary heap */ -UNIV_INTERN -ib_bh_t* -ib_bh_create( -/*=========*/ - ib_bh_cmp_t compare, /*!< in: comparator */ - ulint sizeof_elem, /*!< in: size of one element */ - ulint max_elems) /*!< in: max elements allowed */ -{ - ulint sz; - ib_bh_t* ib_bh; - - sz = sizeof(*ib_bh) + (sizeof_elem * max_elems); - - ib_bh = (ib_bh_t*) ut_malloc(sz); - memset(ib_bh, 0x0, sz); - - ib_bh->compare = compare; - ib_bh->max_elems = max_elems; - ib_bh->sizeof_elem = sizeof_elem; - - return(ib_bh); -} - -/**********************************************************************//** -Free a binary heap. -@return a new binary heap */ -UNIV_INTERN -void -ib_bh_free( -/*=======*/ - ib_bh_t* ib_bh) /*!< in/own: instance */ -{ - ut_free(ib_bh); -} - -/**********************************************************************//** -Add an element to the binary heap. Note: The element is copied. -@return pointer to added element or NULL if full. */ -UNIV_INTERN -void* -ib_bh_push( -/*=======*/ - ib_bh_t* ib_bh, /*!< in/out: instance */ - const void* elem) /*!< in: element to add */ -{ - void* ptr; - - if (ib_bh_is_full(ib_bh)) { - return(NULL); - } else if (ib_bh_is_empty(ib_bh)) { - ++ib_bh->n_elems; - return(ib_bh_set(ib_bh, 0, elem)); - } else { - ulint i; - - i = ib_bh->n_elems; - - ++ib_bh->n_elems; - - for (ptr = ib_bh_get(ib_bh, i >> 1); - i > 0 && ib_bh->compare(ptr, elem) > 0; - i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) { - - ib_bh_set(ib_bh, i, ptr); - } - - ptr = ib_bh_set(ib_bh, i, elem); - } - - return(ptr); -} - -/**********************************************************************//** -Remove the first element from the binary heap. */ -UNIV_INTERN -void -ib_bh_pop( -/*======*/ - ib_bh_t* ib_bh) /*!< in/out: instance */ -{ - byte* ptr; - byte* last; - ulint parent = 0; - - if (ib_bh_is_empty(ib_bh)) { - return; - } else if (ib_bh_size(ib_bh) == 1) { - --ib_bh->n_elems; - return; - } - - last = (byte*) ib_bh_last(ib_bh); - - /* Start from the child node */ - ptr = (byte*) ib_bh_get(ib_bh, 1); - - while (ptr < last) { - /* If the "right" child node is < "left" child node */ - if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) { - ptr += ib_bh->sizeof_elem; - } - - if (ib_bh->compare(last, ptr) <= 0) { - break; - } - - ib_bh_set(ib_bh, parent, ptr); - - parent = (ptr - (byte*) ib_bh_first(ib_bh)) - / ib_bh->sizeof_elem; - - if ((parent << 1) >= ib_bh_size(ib_bh)) { - break; - } - - ptr = (byte*) ib_bh_get(ib_bh, parent << 1); - } - - --ib_bh->n_elems; - - ib_bh_set(ib_bh, parent, last); -} diff --git a/storage/xtradb/ut/ut0byte.cc b/storage/xtradb/ut/ut0byte.cc deleted file mode 100644 index bc592edc6bf..00000000000 --- a/storage/xtradb/ut/ut0byte.cc +++ /dev/null @@ -1,30 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/***************************************************************//** -@file ut/ut0byte.cc -Byte utilities - -Created 5/11/1994 Heikki Tuuri -********************************************************************/ - -#include "ut0byte.h" - -#ifdef UNIV_NONINL -#include "ut0byte.ic" -#endif diff --git a/storage/xtradb/ut/ut0crc32.cc b/storage/xtradb/ut/ut0crc32.cc deleted file mode 100644 index 15ed6bfadee..00000000000 --- a/storage/xtradb/ut/ut0crc32.cc +++ /dev/null @@ -1,342 +0,0 @@ -/***************************************************************************** - -Copyright (C) 2009, 2010 Facebook, Inc. All Rights Reserved. -Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/***************************************************************//** -@file ut/ut0crc32.cc -CRC32 implementation from Facebook, based on the zlib implementation. - -Created Aug 8, 2011, Vasil Dimov, based on mysys/my_crc32.c and -mysys/my_perf.c, contributed by Facebook under the following license. -********************************************************************/ - -/* Copyright (C) 2009-2010 Facebook, Inc. All Rights Reserved. - - Dual licensed under BSD license and GPLv2. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY FACEBOOK, INC. ``AS IS'' AND ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - EVENT SHALL FACEBOOK, INC. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - This program is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */ - -/* The below CRC32 implementation is based on the implementation included with - * zlib with modifications to process 8 bytes at a time and using SSE 4.2 - * extentions when available. The polynomial constant has been changed to - * match the one used by SSE 4.2 and does not return the same value as the - * version used by zlib. This implementation only supports 64-bit - * little-endian processors. The original zlib copyright notice follows. */ - -/* crc32.c -- compute the CRC-32 of a buf stream - * Copyright (C) 1995-2005 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - * - * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster - * CRC methods: exclusive-oring 32 bits of buf at a time, and pre-computing - * tables for updating the shift register in one step with three exclusive-ors - * instead of four steps with four exclusive-ors. This results in about a - * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. - */ - -#include "univ.i" -#include "ut0crc32.h" - -#if defined(__linux__) && defined(__powerpc__) -/* Used to detect at runtime if we have vpmsum instructions (PowerISA 2.07) */ -#include <sys/auxv.h> -#include <bits/hwcap.h> -#endif /* defined(__linux__) && defined(__powerpc__) */ - -#include <string.h> - -ib_ut_crc32_t ut_crc32; - -/* Precalculated table used to generate the CRC32 if the CPU does not -have support for it */ -static ib_uint32_t ut_crc32_slice8_table[8][256]; -static ibool ut_crc32_slice8_table_initialized = FALSE; - -/** Text description of CRC32 implementation */ -const char *ut_crc32_implementation = NULL; - -/********************************************************************//** -Initializes the table that is used to generate the CRC32 if the CPU does -not have support for it. */ -#ifndef HAVE_CRC32_VPMSUM -static -void -ut_crc32_slice8_table_init() -/*========================*/ -{ - /* bit-reversed poly 0x1EDC6F41 (from SSE42 crc32 instruction) */ - static const ib_uint32_t poly = 0x82f63b78; - ib_uint32_t n; - ib_uint32_t k; - ib_uint32_t c; - - for (n = 0; n < 256; n++) { - c = n; - for (k = 0; k < 8; k++) { - c = (c & 1) ? (poly ^ (c >> 1)) : (c >> 1); - } - ut_crc32_slice8_table[0][n] = c; - } - - for (n = 0; n < 256; n++) { - c = ut_crc32_slice8_table[0][n]; - for (k = 1; k < 8; k++) { - c = ut_crc32_slice8_table[0][c & 0xFF] ^ (c >> 8); - ut_crc32_slice8_table[k][n] = c; - } - } - - ut_crc32_slice8_table_initialized = TRUE; -} -#endif - -#if defined(__GNUC__) && defined(__x86_64__) -/********************************************************************//** -Fetches CPU info */ -static -void -ut_cpuid( -/*=====*/ - ib_uint32_t vend[3], /*!< out: CPU vendor */ - ib_uint32_t* model, /*!< out: CPU model */ - ib_uint32_t* family, /*!< out: CPU family */ - ib_uint32_t* stepping, /*!< out: CPU stepping */ - ib_uint32_t* features_ecx, /*!< out: CPU features ecx */ - ib_uint32_t* features_edx) /*!< out: CPU features edx */ -{ - ib_uint32_t sig; - asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0)); - asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx) - : "a" (1) - : "ebx"); - - *model = ((sig >> 4) & 0xF); - *family = ((sig >> 8) & 0xF); - *stepping = (sig & 0xF); - - if (memcmp(vend, "GenuineIntel", 12) == 0 - || (memcmp(vend, "AuthenticAMD", 12) == 0 && *family == 0xF)) { - - *model += (((sig >> 16) & 0xF) << 4); - *family += ((sig >> 20) & 0xFF); - } -} - -/* opcodes taken from objdump of "crc32b (%%rdx), %%rcx" -for RHEL4 support (GCC 3 doesn't support this instruction) */ -#define ut_crc32_sse42_byte \ - asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf0, 0x0a" \ - : "=c"(crc) : "c"(crc), "d"(buf)); \ - len--, buf++ - -/* opcodes taken from objdump of "crc32q (%%rdx), %%rcx" -for RHEL4 support (GCC 3 doesn't support this instruction) */ -#define ut_crc32_sse42_quadword \ - asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf1, 0x0a" \ - : "=c"(crc) : "c"(crc), "d"(buf)); \ - len -= 8, buf += 8 -#endif /* defined(__GNUC__) && defined(__x86_64__) */ - - -#ifdef HAVE_CRC32_VPMSUM -extern "C" { -unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len); -}; - -UNIV_INLINE -ib_uint32_t -ut_crc32_power8( -/*===========*/ - const byte* buf, /*!< in: data over which to calculate CRC32 */ - ulint len) /*!< in: data length */ -{ - return crc32c_vpmsum(0, buf, len); -} -#endif - -/********************************************************************//** -Calculates CRC32 using CPU instructions. -@return CRC-32C (polynomial 0x11EDC6F41) */ -UNIV_INLINE -ib_uint32_t -ut_crc32_sse42( -/*===========*/ - const byte* buf, /*!< in: data over which to calculate CRC32 */ - ulint len) /*!< in: data length */ -{ -#if defined(__GNUC__) && defined(__x86_64__) - ib_uint64_t crc = (ib_uint32_t) (-1); - - while (len && ((ulint) buf & 7)) { - ut_crc32_sse42_byte; - } - - while (len >= 32) { - ut_crc32_sse42_quadword; - ut_crc32_sse42_quadword; - ut_crc32_sse42_quadword; - ut_crc32_sse42_quadword; - } - - while (len >= 8) { - ut_crc32_sse42_quadword; - } - - while (len) { - ut_crc32_sse42_byte; - } - - return((ib_uint32_t) ((~crc) & 0xFFFFFFFF)); -#else - ut_error; - /* silence compiler warning about unused parameters */ - return((ib_uint32_t) buf[len]); -#endif /* defined(__GNUC__) && defined(__x86_64__) */ -} - -#define ut_crc32_slice8_byte \ - crc = (crc >> 8) ^ ut_crc32_slice8_table[0][(crc ^ *buf++) & 0xFF]; \ - len-- - -#define ut_crc32_slice8_quadword \ - crc ^= *(ib_uint64_t*) buf; \ - crc = ut_crc32_slice8_table[7][(crc ) & 0xFF] ^ \ - ut_crc32_slice8_table[6][(crc >> 8) & 0xFF] ^ \ - ut_crc32_slice8_table[5][(crc >> 16) & 0xFF] ^ \ - ut_crc32_slice8_table[4][(crc >> 24) & 0xFF] ^ \ - ut_crc32_slice8_table[3][(crc >> 32) & 0xFF] ^ \ - ut_crc32_slice8_table[2][(crc >> 40) & 0xFF] ^ \ - ut_crc32_slice8_table[1][(crc >> 48) & 0xFF] ^ \ - ut_crc32_slice8_table[0][(crc >> 56)]; \ - len -= 8, buf += 8 - -/********************************************************************//** -Calculates CRC32 manually. -@return CRC-32C (polynomial 0x11EDC6F41) */ -UNIV_INLINE -ib_uint32_t -ut_crc32_slice8( -/*============*/ - const byte* buf, /*!< in: data over which to calculate CRC32 */ - ulint len) /*!< in: data length */ -{ - ib_uint64_t crc = (ib_uint32_t) (-1); - - ut_a(ut_crc32_slice8_table_initialized); - - while (len && ((ulint) buf & 7)) { - ut_crc32_slice8_byte; - } - - while (len >= 32) { - ut_crc32_slice8_quadword; - ut_crc32_slice8_quadword; - ut_crc32_slice8_quadword; - ut_crc32_slice8_quadword; - } - - while (len >= 8) { - ut_crc32_slice8_quadword; - } - - while (len) { - ut_crc32_slice8_byte; - } - - return((ib_uint32_t) ((~crc) & 0xFFFFFFFF)); -} - -/********************************************************************//** -Initializes the data structures used by ut_crc32(). Does not do any -allocations, would not hurt if called twice, but would be pointless. */ -UNIV_INTERN -void -ut_crc32_init() -/*===========*/ -{ - ut_crc32_slice8_table_init(); - ut_crc32 = ut_crc32_slice8; - ut_crc32_implementation = "Using generic crc32 instructions"; - -#if defined(__GNUC__) && defined(__x86_64__) - ib_uint32_t vend[3]; - ib_uint32_t model; - ib_uint32_t family; - ib_uint32_t stepping; - ib_uint32_t features_ecx; - ib_uint32_t features_edx; - - ut_cpuid(vend, &model, &family, &stepping, - &features_ecx, &features_edx); - - /* Valgrind does not understand the CRC32 instructions: - - vex amd64->IR: unhandled instruction bytes: 0xF2 0x48 0xF 0x38 0xF0 0xA - valgrind: Unrecognised instruction at address 0xad3db5. - Your program just tried to execute an instruction that Valgrind - did not recognise. There are two possible reasons for this. - 1. Your program has a bug and erroneously jumped to a non-code - location. If you are running Memcheck and you just saw a - warning about a bad jump, it's probably your program's fault. - 2. The instruction is legitimate but Valgrind doesn't handle it, - i.e. it's Valgrind's fault. If you think this is the case or - you are not sure, please let us know and we'll try to fix it. - Either way, Valgrind will now raise a SIGILL signal which will - probably kill your program. - - */ - if ((features_ecx >> 20) & 1) { - ut_crc32 = ut_crc32_sse42; - ut_crc32_implementation = "Using SSE2 crc32 instructions"; - } - -#elif defined(HAVE_CRC32_VPMSUM) - ut_crc32 = ut_crc32_power8; - ut_crc32_implementation = "Using POWER8 crc32 instructions"; -#endif -} diff --git a/storage/xtradb/ut/ut0dbg.cc b/storage/xtradb/ut/ut0dbg.cc deleted file mode 100644 index a1cad144da4..00000000000 --- a/storage/xtradb/ut/ut0dbg.cc +++ /dev/null @@ -1,139 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*****************************************************************//** -@file ut/ut0dbg.cc -Debug utilities for Innobase. - -Created 1/30/1994 Heikki Tuuri -**********************************************************************/ - -#include "univ.i" -#include "ut0dbg.h" -#ifndef UNIV_HOTBACKUP -# include "ha_prototypes.h" -#endif /* !UNIV_HOTBACKUP */ - -#if defined(__GNUC__) && (__GNUC__ > 2) -#else -/** This is used to eliminate compiler warnings */ -UNIV_INTERN ulint ut_dbg_zero = 0; -#endif - -/*************************************************************//** -Report a failed assertion. */ -UNIV_INTERN -void -ut_dbg_assertion_failed( -/*====================*/ - const char* expr, /*!< in: the failed assertion (optional) */ - const char* file, /*!< in: source file containing the assertion */ - ulint line) /*!< in: line number of the assertion */ -{ - ut_print_timestamp(stderr); -#ifdef UNIV_HOTBACKUP - fprintf(stderr, " InnoDB: Assertion failure in file %s line %lu\n", - file, line); -#else /* UNIV_HOTBACKUP */ - fprintf(stderr, - " InnoDB: Assertion failure in thread %lu" - " in file %s line %lu\n", - os_thread_pf(os_thread_get_curr_id()), - innobase_basename(file), line); -#endif /* UNIV_HOTBACKUP */ - if (expr) { - fprintf(stderr, - "InnoDB: Failing assertion: %s\n", expr); - } - - fputs("InnoDB: We intentionally generate a memory trap.\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com.\n" - "InnoDB: If you get repeated assertion failures" - " or crashes, even\n" - "InnoDB: immediately after the mysqld startup, there may be\n" - "InnoDB: corruption in the InnoDB tablespace. Please refer to\n" - "InnoDB: " REFMAN "forcing-innodb-recovery.html\n" - "InnoDB: about forcing recovery.\n", stderr); -} - -#ifdef UNIV_COMPILE_TEST_FUNCS - -#include <sys/types.h> -#include <sys/time.h> -#include <sys/resource.h> - -#include <unistd.h> - -#ifndef timersub -#define timersub(a, b, r) \ - do { \ - (r)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ - (r)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ - if ((r)->tv_usec < 0) { \ - (r)->tv_sec--; \ - (r)->tv_usec += 1000000; \ - } \ - } while (0) -#endif /* timersub */ - -/*******************************************************************//** -Resets a speedo (records the current time in it). */ -UNIV_INTERN -void -speedo_reset( -/*=========*/ - speedo_t* speedo) /*!< out: speedo */ -{ - gettimeofday(&speedo->tv, NULL); - - getrusage(RUSAGE_SELF, &speedo->ru); -} - -/*******************************************************************//** -Shows the time elapsed and usage statistics since the last reset of a -speedo. */ -UNIV_INTERN -void -speedo_show( -/*========*/ - const speedo_t* speedo) /*!< in: speedo */ -{ - struct rusage ru_now; - struct timeval tv_now; - struct timeval tv_diff; - - getrusage(RUSAGE_SELF, &ru_now); - - gettimeofday(&tv_now, NULL); - -#define PRINT_TIMEVAL(prefix, tvp) \ - fprintf(stderr, "%s% 5ld.%06ld sec\n", \ - prefix, (tvp)->tv_sec, (tvp)->tv_usec) - - timersub(&tv_now, &speedo->tv, &tv_diff); - PRINT_TIMEVAL("real", &tv_diff); - - timersub(&ru_now.ru_utime, &speedo->ru.ru_utime, &tv_diff); - PRINT_TIMEVAL("user", &tv_diff); - - timersub(&ru_now.ru_stime, &speedo->ru.ru_stime, &tv_diff); - PRINT_TIMEVAL("sys ", &tv_diff); -} - -#endif /* UNIV_COMPILE_TEST_FUNCS */ diff --git a/storage/xtradb/ut/ut0list.cc b/storage/xtradb/ut/ut0list.cc deleted file mode 100644 index f906061d185..00000000000 --- a/storage/xtradb/ut/ut0list.cc +++ /dev/null @@ -1,203 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file ut/ut0list.cc -A double-linked list - -Created 4/26/2006 Osku Salerma -************************************************************************/ - -#include "ut0list.h" -#ifdef UNIV_NONINL -#include "ut0list.ic" -#endif - -/****************************************************************//** -Create a new list. -@return list */ -UNIV_INTERN -ib_list_t* -ib_list_create(void) -/*=================*/ -{ - ib_list_t* list; - - list = static_cast<ib_list_t*>(mem_alloc(sizeof(*list))); - - list->first = NULL; - list->last = NULL; - list->is_heap_list = FALSE; - - return(list); -} - -/****************************************************************//** -Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for -lists created with this function. -@return list */ -UNIV_INTERN -ib_list_t* -ib_list_create_heap( -/*================*/ - mem_heap_t* heap) /*!< in: memory heap to use */ -{ - ib_list_t* list; - - list = static_cast<ib_list_t*>(mem_heap_alloc(heap, sizeof(*list))); - - list->first = NULL; - list->last = NULL; - list->is_heap_list = TRUE; - - return(list); -} - -/****************************************************************//** -Free a list. */ -UNIV_INTERN -void -ib_list_free( -/*=========*/ - ib_list_t* list) /*!< in: list */ -{ - ut_a(!list->is_heap_list); - - /* We don't check that the list is empty because it's entirely valid - to e.g. have all the nodes allocated from a single heap that is then - freed after the list itself is freed. */ - - mem_free(list); -} - -/****************************************************************//** -Add the data to the start of the list. -@return new list node */ -UNIV_INTERN -ib_list_node_t* -ib_list_add_first( -/*==============*/ - ib_list_t* list, /*!< in: list */ - void* data, /*!< in: data */ - mem_heap_t* heap) /*!< in: memory heap to use */ -{ - return(ib_list_add_after(list, ib_list_get_first(list), data, heap)); -} - -/****************************************************************//** -Add the data to the end of the list. -@return new list node */ -UNIV_INTERN -ib_list_node_t* -ib_list_add_last( -/*=============*/ - ib_list_t* list, /*!< in: list */ - void* data, /*!< in: data */ - mem_heap_t* heap) /*!< in: memory heap to use */ -{ - return(ib_list_add_after(list, ib_list_get_last(list), data, heap)); -} - -/****************************************************************//** -Add the data after the indicated node. -@return new list node */ -UNIV_INTERN -ib_list_node_t* -ib_list_add_after( -/*==============*/ - ib_list_t* list, /*!< in: list */ - ib_list_node_t* prev_node, /*!< in: node preceding new node (can - be NULL) */ - void* data, /*!< in: data */ - mem_heap_t* heap) /*!< in: memory heap to use */ -{ - ib_list_node_t* node; - - node = static_cast<ib_list_node_t*>( - mem_heap_alloc(heap, sizeof(*node))); - - node->data = data; - - if (!list->first) { - /* Empty list. */ - - ut_a(!prev_node); - - node->prev = NULL; - node->next = NULL; - - list->first = node; - list->last = node; - } else if (!prev_node) { - /* Start of list. */ - - node->prev = NULL; - node->next = list->first; - - list->first->prev = node; - - list->first = node; - } else { - /* Middle or end of list. */ - - node->prev = prev_node; - node->next = prev_node->next; - - prev_node->next = node; - - if (node->next) { - node->next->prev = node; - } else { - list->last = node; - } - } - - return(node); -} - -/****************************************************************//** -Remove the node from the list. */ -UNIV_INTERN -void -ib_list_remove( -/*===========*/ - ib_list_t* list, /*!< in: list */ - ib_list_node_t* node) /*!< in: node to remove */ -{ - if (node->prev) { - node->prev->next = node->next; - } else { - /* First item in list. */ - - ut_ad(list->first == node); - - list->first = node->next; - } - - if (node->next) { - node->next->prev = node->prev; - } else { - /* Last item in list. */ - - ut_ad(list->last == node); - - list->last = node->prev; - } - - node->prev = node->next = NULL; -} diff --git a/storage/xtradb/ut/ut0mem.cc b/storage/xtradb/ut/ut0mem.cc deleted file mode 100644 index 2bb5d9ce332..00000000000 --- a/storage/xtradb/ut/ut0mem.cc +++ /dev/null @@ -1,609 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file ut/ut0mem.cc -Memory primitives - -Created 5/11/1994 Heikki Tuuri -*************************************************************************/ - -#include "ut0mem.h" - -#ifdef UNIV_NONINL -#include "ut0mem.ic" -#endif - -#ifndef UNIV_HOTBACKUP -# include "os0thread.h" -# include "srv0srv.h" - -#include <stdlib.h> - -/** The total amount of memory currently allocated from the operating -system with os_mem_alloc_large() or malloc(). Does not count malloc() -if srv_use_sys_malloc is set. Protected by ut_list_mutex. */ -UNIV_INTERN ulint ut_total_allocated_memory = 0; - -/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */ -UNIV_INTERN os_fast_mutex_t ut_list_mutex; - -#ifdef UNIV_PFS_MUTEX -/* Key to register server_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t ut_list_mutex_key; -#endif - -/** Dynamically allocated memory block */ -struct ut_mem_block_t{ - UT_LIST_NODE_T(ut_mem_block_t) mem_block_list; - /*!< mem block list node */ - ulint size; /*!< size of allocated memory */ - ulint magic_n;/*!< magic number (UT_MEM_MAGIC_N) */ -}; - -/** The value of ut_mem_block_t::magic_n. Used in detecting -memory corruption. */ -#define UT_MEM_MAGIC_N 1601650166 - -/** List of all memory blocks allocated from the operating system -with malloc. Protected by ut_list_mutex. */ -static UT_LIST_BASE_NODE_T(ut_mem_block_t) ut_mem_block_list; - -/** Flag: has ut_mem_block_list been initialized? */ -static ibool ut_mem_block_list_inited = FALSE; - -/** A dummy pointer for generating a null pointer exception in -ut_malloc_low() */ -static ulint* ut_mem_null_ptr = NULL; - -/**********************************************************************//** -Initializes the mem block list at database startup. */ -UNIV_INTERN -void -ut_mem_init(void) -/*=============*/ -{ - ut_a(!ut_mem_block_list_inited); - os_fast_mutex_init(ut_list_mutex_key, &ut_list_mutex); - UT_LIST_INIT(ut_mem_block_list); - ut_mem_block_list_inited = TRUE; -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Allocates memory. -@return own: allocated memory */ -UNIV_INTERN -void* -ut_malloc_low( -/*==========*/ - ulint n, /*!< in: number of bytes to allocate */ - ibool assert_on_error)/*!< in: if TRUE, we crash mysqld if the - memory cannot be allocated */ -{ -#ifndef UNIV_HOTBACKUP - ulint retry_count; - void* ret; - - if (UNIV_LIKELY(srv_use_sys_malloc)) { - ret = malloc(n); - ut_a(ret || !assert_on_error); - - return(ret); - } - - ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */ - ut_a(ut_mem_block_list_inited); - - retry_count = 0; -retry: - os_fast_mutex_lock(&ut_list_mutex); - - ret = malloc(n + sizeof(ut_mem_block_t)); - - if (ret == NULL && retry_count < 60) { - if (retry_count == 0) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: cannot allocate" - " %lu bytes of\n" - "InnoDB: memory with malloc!" - " Total allocated memory\n" - "InnoDB: by InnoDB %lu bytes." - " Operating system errno: %lu\n" - "InnoDB: Check if you should" - " increase the swap file or\n" - "InnoDB: ulimits of your operating system.\n" - "InnoDB: On FreeBSD check you" - " have compiled the OS with\n" - "InnoDB: a big enough maximum process size.\n" - "InnoDB: Note that in most 32-bit" - " computers the process\n" - "InnoDB: memory space is limited" - " to 2 GB or 4 GB.\n" - "InnoDB: We keep retrying" - " the allocation for 60 seconds...\n", - (ulong) n, (ulong) ut_total_allocated_memory, -#ifdef __WIN__ - (ulong) GetLastError() -#else - (ulong) errno -#endif - ); - } - - os_fast_mutex_unlock(&ut_list_mutex); - - /* Sleep for a second and retry the allocation; maybe this is - just a temporary shortage of memory */ - - os_thread_sleep(1000000); - - retry_count++; - - goto retry; - } - - if (ret == NULL) { - /* Flush stderr to make more probable that the error - message gets in the error file before we generate a seg - fault */ - - fflush(stderr); - - os_fast_mutex_unlock(&ut_list_mutex); - - /* Make an intentional seg fault so that we get a stack - trace */ - if (assert_on_error) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: We now intentionally" - " generate a seg fault so that\n" - "InnoDB: on Linux we get a stack trace.\n"); - - if (*ut_mem_null_ptr) ut_mem_null_ptr = 0; - } else { - return(NULL); - } - } - - UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t)); - - ((ut_mem_block_t*) ret)->size = n + sizeof(ut_mem_block_t); - ((ut_mem_block_t*) ret)->magic_n = UT_MEM_MAGIC_N; - - ut_total_allocated_memory += n + sizeof(ut_mem_block_t); - - UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list, - ((ut_mem_block_t*) ret)); - os_fast_mutex_unlock(&ut_list_mutex); - - return((void*)((byte*) ret + sizeof(ut_mem_block_t))); -#else /* !UNIV_HOTBACKUP */ - void* ret = malloc(n); - ut_a(ret || !assert_on_error); - - return(ret); -#endif /* !UNIV_HOTBACKUP */ -} - -/**********************************************************************//** -Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is -a nop. */ -UNIV_INTERN -void -ut_free( -/*====*/ - void* ptr) /*!< in, own: memory block, can be NULL */ -{ -#ifndef UNIV_HOTBACKUP - ut_mem_block_t* block; - - if (ptr == NULL) { - return; - } else if (UNIV_LIKELY(srv_use_sys_malloc)) { - free(ptr); - return; - } - - block = (ut_mem_block_t*)((byte*) ptr - sizeof(ut_mem_block_t)); - - os_fast_mutex_lock(&ut_list_mutex); - - ut_a(block->magic_n == UT_MEM_MAGIC_N); - ut_a(ut_total_allocated_memory >= block->size); - - ut_total_allocated_memory -= block->size; - - UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block); - free(block); - - os_fast_mutex_unlock(&ut_list_mutex); -#else /* !UNIV_HOTBACKUP */ - free(ptr); -#endif /* !UNIV_HOTBACKUP */ -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not -use this function because the allocation functions in mem0mem.h are the -recommended ones in InnoDB. - -man realloc in Linux, 2004: - - realloc() changes the size of the memory block pointed to - by ptr to size bytes. The contents will be unchanged to - the minimum of the old and new sizes; newly allocated mem- - ory will be uninitialized. If ptr is NULL, the call is - equivalent to malloc(size); if size is equal to zero, the - call is equivalent to free(ptr). Unless ptr is NULL, it - must have been returned by an earlier call to malloc(), - calloc() or realloc(). - -RETURN VALUE - realloc() returns a pointer to the newly allocated memory, - which is suitably aligned for any kind of variable and may - be different from ptr, or NULL if the request fails. If - size was equal to 0, either NULL or a pointer suitable to - be passed to free() is returned. If realloc() fails the - original block is left untouched - it is not freed or - moved. -@return own: pointer to new mem block or NULL */ -UNIV_INTERN -void* -ut_realloc( -/*=======*/ - void* ptr, /*!< in: pointer to old block or NULL */ - ulint size) /*!< in: desired size */ -{ - ut_mem_block_t* block; - ulint old_size; - ulint min_size; - void* new_ptr; - - if (UNIV_LIKELY(srv_use_sys_malloc)) { - return(realloc(ptr, size)); - } - - if (ptr == NULL) { - - return(ut_malloc(size)); - } - - if (size == 0) { - ut_free(ptr); - - return(NULL); - } - - block = (ut_mem_block_t*)((byte*) ptr - sizeof(ut_mem_block_t)); - - ut_a(block->magic_n == UT_MEM_MAGIC_N); - - old_size = block->size - sizeof(ut_mem_block_t); - - if (size < old_size) { - min_size = size; - } else { - min_size = old_size; - } - - new_ptr = ut_malloc(size); - - if (new_ptr == NULL) { - - return(NULL); - } - - /* Copy the old data from ptr */ - ut_memcpy(new_ptr, ptr, min_size); - - ut_free(ptr); - - return(new_ptr); -} - -/**********************************************************************//** -Frees in shutdown all allocated memory not freed yet. */ -UNIV_INTERN -void -ut_free_all_mem(void) -/*=================*/ -{ - ut_mem_block_t* block; - - ut_a(ut_mem_block_list_inited); - ut_mem_block_list_inited = FALSE; - os_fast_mutex_free(&ut_list_mutex); - - while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) { - - ut_a(block->magic_n == UT_MEM_MAGIC_N); - ut_a(ut_total_allocated_memory >= block->size); - - ut_total_allocated_memory -= block->size; - - UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block); - free(block); - } - - if (ut_total_allocated_memory != 0) { - fprintf(stderr, - "InnoDB: Warning: after shutdown" - " total allocated memory is %lu\n", - (ulong) ut_total_allocated_memory); - } - - ut_mem_block_list_inited = FALSE; -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************************//** -Copies up to size - 1 characters from the NUL-terminated string src to -dst, NUL-terminating the result. Returns strlen(src), so truncation -occurred if the return value >= size. -@return strlen(src) */ -UNIV_INTERN -ulint -ut_strlcpy( -/*=======*/ - char* dst, /*!< in: destination buffer */ - const char* src, /*!< in: source buffer */ - ulint size) /*!< in: size of destination buffer */ -{ - ulint src_size = strlen(src); - - if (size != 0) { - ulint n = ut_min(src_size, size - 1); - - memcpy(dst, src, n); - dst[n] = '\0'; - } - - return(src_size); -} - -/**********************************************************************//** -Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last -(size - 1) bytes of src, not the first. -@return strlen(src) */ -UNIV_INTERN -ulint -ut_strlcpy_rev( -/*===========*/ - char* dst, /*!< in: destination buffer */ - const char* src, /*!< in: source buffer */ - ulint size) /*!< in: size of destination buffer */ -{ - ulint src_size = strlen(src); - - if (size != 0) { - ulint n = ut_min(src_size, size - 1); - - memcpy(dst, src + src_size - n, n + 1); - } - - return(src_size); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Return the number of times s2 occurs in s1. Overlapping instances of s2 -are only counted once. -@return the number of times s2 occurs in s1 */ -UNIV_INTERN -ulint -ut_strcount( -/*========*/ - const char* s1, /*!< in: string to search in */ - const char* s2) /*!< in: string to search for */ -{ - ulint count = 0; - ulint len = strlen(s2); - - if (len == 0) { - - return(0); - } - - for (;;) { - s1 = strstr(s1, s2); - - if (!s1) { - - break; - } - - count++; - s1 += len; - } - - return(count); -} - -/******************************************************************** -Concatenate 3 strings.*/ - -char* -ut_str3cat( -/*=======*/ - /* out, own: concatenated string, must be - freed with mem_free() */ - const char* s1, /* in: string 1 */ - const char* s2, /* in: string 2 */ - const char* s3) /* in: string 3 */ -{ - char* s; - ulint s1_len = strlen(s1); - ulint s2_len = strlen(s2); - ulint s3_len = strlen(s3); - - s = static_cast<char*>(mem_alloc(s1_len + s2_len + s3_len + 1)); - - memcpy(s, s1, s1_len); - memcpy(s + s1_len, s2, s2_len); - memcpy(s + s1_len + s2_len, s3, s3_len); - - s[s1_len + s2_len + s3_len] = '\0'; - - return(s); -} -/**********************************************************************//** -Replace every occurrence of s1 in str with s2. Overlapping instances of s1 -are only replaced once. -@return own: modified string, must be freed with mem_free() */ -UNIV_INTERN -char* -ut_strreplace( -/*==========*/ - const char* str, /*!< in: string to operate on */ - const char* s1, /*!< in: string to replace */ - const char* s2) /*!< in: string to replace s1 with */ -{ - char* new_str; - char* ptr; - const char* str_end; - ulint str_len = strlen(str); - ulint s1_len = strlen(s1); - ulint s2_len = strlen(s2); - ulint count = 0; - int len_delta = (int) s2_len - (int) s1_len; - - str_end = str + str_len; - - if (len_delta <= 0) { - len_delta = 0; - } else { - count = ut_strcount(str, s1); - } - - new_str = static_cast<char*>( - mem_alloc(str_len + count * len_delta + 1)); - - ptr = new_str; - - while (str) { - const char* next = strstr(str, s1); - - if (!next) { - next = str_end; - } - - memcpy(ptr, str, next - str); - ptr += next - str; - - if (next == str_end) { - - break; - } - - memcpy(ptr, s2, s2_len); - ptr += s2_len; - - str = next + s1_len; - } - - *ptr = '\0'; - - return(new_str); -} - -#ifdef UNIV_COMPILE_TEST_FUNCS - -void -test_ut_str_sql_format() -{ - char buf[128]; - ulint ret; - -#define CALL_AND_TEST(str, str_len, buf, buf_size, ret_expected, buf_expected)\ - do {\ - ibool ok = TRUE;\ - memset(buf, 'x', 10);\ - buf[10] = '\0';\ - fprintf(stderr, "TESTING \"%s\", %lu, %lu\n",\ - str, (ulint) str_len, (ulint) buf_size);\ - ret = ut_str_sql_format(str, str_len, buf, buf_size);\ - if (ret != ret_expected) {\ - fprintf(stderr, "expected ret %lu, got %lu\n",\ - (ulint) ret_expected, ret);\ - ok = FALSE;\ - }\ - if (strcmp((char*) buf, buf_expected) != 0) {\ - fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\ - buf_expected, buf);\ - ok = FALSE;\ - }\ - if (ok) {\ - fprintf(stderr, "OK: %lu, \"%s\"\n\n",\ - (ulint) ret, buf);\ - } else {\ - return;\ - }\ - } while (0) - - CALL_AND_TEST("abcd", 4, buf, 0, 0, "xxxxxxxxxx"); - - CALL_AND_TEST("abcd", 4, buf, 1, 1, ""); - - CALL_AND_TEST("abcd", 4, buf, 2, 1, ""); - - CALL_AND_TEST("abcd", 0, buf, 3, 3, "''"); - CALL_AND_TEST("abcd", 1, buf, 3, 1, ""); - CALL_AND_TEST("abcd", 2, buf, 3, 1, ""); - CALL_AND_TEST("abcd", 3, buf, 3, 1, ""); - CALL_AND_TEST("abcd", 4, buf, 3, 1, ""); - - CALL_AND_TEST("abcd", 0, buf, 4, 3, "''"); - CALL_AND_TEST("abcd", 1, buf, 4, 4, "'a'"); - CALL_AND_TEST("abcd", 2, buf, 4, 4, "'a'"); - CALL_AND_TEST("abcd", 3, buf, 4, 4, "'a'"); - CALL_AND_TEST("abcd", 4, buf, 4, 4, "'a'"); - CALL_AND_TEST("abcde", 5, buf, 4, 4, "'a'"); - CALL_AND_TEST("'", 1, buf, 4, 3, "''"); - CALL_AND_TEST("''", 2, buf, 4, 3, "''"); - CALL_AND_TEST("a'", 2, buf, 4, 4, "'a'"); - CALL_AND_TEST("'a", 2, buf, 4, 3, "''"); - CALL_AND_TEST("ab", 2, buf, 4, 4, "'a'"); - - CALL_AND_TEST("abcdef", 0, buf, 5, 3, "''"); - CALL_AND_TEST("abcdef", 1, buf, 5, 4, "'a'"); - CALL_AND_TEST("abcdef", 2, buf, 5, 5, "'ab'"); - CALL_AND_TEST("abcdef", 3, buf, 5, 5, "'ab'"); - CALL_AND_TEST("abcdef", 4, buf, 5, 5, "'ab'"); - CALL_AND_TEST("abcdef", 5, buf, 5, 5, "'ab'"); - CALL_AND_TEST("abcdef", 6, buf, 5, 5, "'ab'"); - CALL_AND_TEST("'", 1, buf, 5, 5, "''''"); - CALL_AND_TEST("''", 2, buf, 5, 5, "''''"); - CALL_AND_TEST("a'", 2, buf, 5, 4, "'a'"); - CALL_AND_TEST("'a", 2, buf, 5, 5, "''''"); - CALL_AND_TEST("ab", 2, buf, 5, 5, "'ab'"); - CALL_AND_TEST("abc", 3, buf, 5, 5, "'ab'"); - - CALL_AND_TEST("ab", 2, buf, 6, 5, "'ab'"); - - CALL_AND_TEST("a'b'c", 5, buf, 32, 10, "'a''b''c'"); - CALL_AND_TEST("a'b'c'", 6, buf, 32, 12, "'a''b''c'''"); -} - -#endif /* UNIV_COMPILE_TEST_FUNCS */ -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/ut/ut0rbt.cc b/storage/xtradb/ut/ut0rbt.cc deleted file mode 100644 index a6c02a8514a..00000000000 --- a/storage/xtradb/ut/ut0rbt.cc +++ /dev/null @@ -1,1353 +0,0 @@ -/***************************************************************************//** - -Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ -/********************************************************************//** -Red-Black tree implementation - -(c) 2007 Oracle/Innobase Oy - -Created 2007-03-20 Sunny Bains -***********************************************************************/ - -#include "ut0rbt.h" - -/**********************************************************************//** -Definition of a red-black tree -============================== - -A red-black tree is a binary search tree which has the following -red-black properties: - - 1. Every node is either red or black. - 2. Every leaf (NULL - in our case tree->nil) is black. - 3. If a node is red, then both its children are black. - 4. Every simple path from a node to a descendant leaf contains the - same number of black nodes. - - from (3) above, the implication is that on any path from the root - to a leaf, red nodes must not be adjacent. - - However, any number of black nodes may appear in a sequence. - */ - -#if defined(IB_RBT_TESTING) -#warning "Testing enabled!" -#endif - -#define ROOT(t) (t->root->left) - -/**********************************************************************//** -Print out the sub-tree recursively. */ -static -void -rbt_print_subtree( -/*==============*/ - const ib_rbt_t* tree, /*!< in: tree to traverse */ - const ib_rbt_node_t* node, /*!< in: node to print */ - ib_rbt_print_node print) /*!< in: print key function */ -{ - /* FIXME: Doesn't do anything yet */ - if (node != tree->nil) { - print(node); - rbt_print_subtree(tree, node->left, print); - rbt_print_subtree(tree, node->right, print); - } -} - -/**********************************************************************//** -Verify that the keys are in order. -@return TRUE of OK. FALSE if not ordered */ -static -ibool -rbt_check_ordering( -/*===============*/ - const ib_rbt_t* tree) /*!< in: tree to verfify */ -{ - const ib_rbt_node_t* node; - const ib_rbt_node_t* prev = NULL; - - /* Iterate over all the nodes, comparing each node with the prev */ - for (node = rbt_first(tree); node; node = rbt_next(tree, prev)) { - - if (prev) { - int result; - - if (tree->cmp_arg) { - result = tree->compare_with_arg( - tree->cmp_arg, prev->value, - node->value); - } else { - result = tree->compare( - prev->value, node->value); - } - - if (result >= 0) { - return(FALSE); - } - } - - prev = node; - } - - return(TRUE); -} - -/**********************************************************************//** -Check that every path from the root to the leaves has the same count. -Count is expressed in the number of black nodes. -@return 0 on failure else black height of the subtree */ -static -ibool -rbt_count_black_nodes( -/*==================*/ - const ib_rbt_t* tree, /*!< in: tree to verify */ - const ib_rbt_node_t* node) /*!< in: start of sub-tree */ -{ - ulint result; - - if (node != tree->nil) { - ulint left_height = rbt_count_black_nodes(tree, node->left); - - ulint right_height = rbt_count_black_nodes(tree, node->right); - - if (left_height == 0 - || right_height == 0 - || left_height != right_height) { - - result = 0; - } else if (node->color == IB_RBT_RED) { - - /* Case 3 */ - if (node->left->color != IB_RBT_BLACK - || node->right->color != IB_RBT_BLACK) { - - result = 0; - } else { - result = left_height; - } - /* Check if it's anything other than RED or BLACK. */ - } else if (node->color != IB_RBT_BLACK) { - - result = 0; - } else { - - result = right_height + 1; - } - } else { - result = 1; - } - - return(result); -} - -/**********************************************************************//** -Turn the node's right child's left sub-tree into node's right sub-tree. -This will also make node's right child it's parent. */ -static -void -rbt_rotate_left( -/*============*/ - const ib_rbt_node_t* nil, /*!< in: nil node of the tree */ - ib_rbt_node_t* node) /*!< in: node to rotate */ -{ - ib_rbt_node_t* right = node->right; - - node->right = right->left; - - if (right->left != nil) { - right->left->parent = node; - } - - /* Right's new parent was node's parent. */ - right->parent = node->parent; - - /* Since root's parent is tree->nil and root->parent->left points - back to root, we can avoid the check. */ - if (node == node->parent->left) { - /* Node was on the left of its parent. */ - node->parent->left = right; - } else { - /* Node must have been on the right. */ - node->parent->right = right; - } - - /* Finally, put node on right's left. */ - right->left = node; - node->parent = right; -} - -/**********************************************************************//** -Turn the node's left child's right sub-tree into node's left sub-tree. -This also make node's left child it's parent. */ -static -void -rbt_rotate_right( -/*=============*/ - const ib_rbt_node_t* nil, /*!< in: nil node of tree */ - ib_rbt_node_t* node) /*!< in: node to rotate */ -{ - ib_rbt_node_t* left = node->left; - - node->left = left->right; - - if (left->right != nil) { - left->right->parent = node; - } - - /* Left's new parent was node's parent. */ - left->parent = node->parent; - - /* Since root's parent is tree->nil and root->parent->left points - back to root, we can avoid the check. */ - if (node == node->parent->right) { - /* Node was on the left of its parent. */ - node->parent->right = left; - } else { - /* Node must have been on the left. */ - node->parent->left = left; - } - - /* Finally, put node on left's right. */ - left->right = node; - node->parent = left; -} - -/**********************************************************************//** -Append a node to the tree. */ -static -ib_rbt_node_t* -rbt_tree_add_child( -/*===============*/ - const ib_rbt_t* tree, - ib_rbt_bound_t* parent, - ib_rbt_node_t* node) -{ - /* Cast away the const. */ - ib_rbt_node_t* last = (ib_rbt_node_t*) parent->last; - - if (last == tree->root || parent->result < 0) { - last->left = node; - } else { - /* FIXME: We don't handle duplicates (yet)! */ - ut_a(parent->result != 0); - - last->right = node; - } - - node->parent = last; - - return(node); -} - -/**********************************************************************//** -Generic binary tree insert */ -static -ib_rbt_node_t* -rbt_tree_insert( -/*============*/ - ib_rbt_t* tree, - const void* key, - ib_rbt_node_t* node) -{ - ib_rbt_bound_t parent; - ib_rbt_node_t* current = ROOT(tree); - - parent.result = 0; - parent.last = tree->root; - - /* Regular binary search. */ - while (current != tree->nil) { - - parent.last = current; - - if (tree->cmp_arg) { - parent.result = tree->compare_with_arg( - tree->cmp_arg, key, current->value); - } else { - parent.result = tree->compare(key, current->value); - } - - if (parent.result < 0) { - current = current->left; - } else { - current = current->right; - } - } - - ut_a(current == tree->nil); - - rbt_tree_add_child(tree, &parent, node); - - return(node); -} - -/**********************************************************************//** -Balance a tree after inserting a node. */ -static -void -rbt_balance_tree( -/*=============*/ - const ib_rbt_t* tree, /*!< in: tree to balance */ - ib_rbt_node_t* node) /*!< in: node that was inserted */ -{ - const ib_rbt_node_t* nil = tree->nil; - ib_rbt_node_t* parent = node->parent; - - /* Restore the red-black property. */ - node->color = IB_RBT_RED; - - while (node != ROOT(tree) && parent->color == IB_RBT_RED) { - ib_rbt_node_t* grand_parent = parent->parent; - - if (parent == grand_parent->left) { - ib_rbt_node_t* uncle = grand_parent->right; - - if (uncle->color == IB_RBT_RED) { - - /* Case 1 - change the colors. */ - uncle->color = IB_RBT_BLACK; - parent->color = IB_RBT_BLACK; - grand_parent->color = IB_RBT_RED; - - /* Move node up the tree. */ - node = grand_parent; - - } else { - - if (node == parent->right) { - /* Right is a black node and node is - to the right, case 2 - move node - up and rotate. */ - node = parent; - rbt_rotate_left(nil, node); - } - - grand_parent = node->parent->parent; - - /* Case 3. */ - node->parent->color = IB_RBT_BLACK; - grand_parent->color = IB_RBT_RED; - - rbt_rotate_right(nil, grand_parent); - } - - } else { - ib_rbt_node_t* uncle = grand_parent->left; - - if (uncle->color == IB_RBT_RED) { - - /* Case 1 - change the colors. */ - uncle->color = IB_RBT_BLACK; - parent->color = IB_RBT_BLACK; - grand_parent->color = IB_RBT_RED; - - /* Move node up the tree. */ - node = grand_parent; - - } else { - - if (node == parent->left) { - /* Left is a black node and node is to - the right, case 2 - move node up and - rotate. */ - node = parent; - rbt_rotate_right(nil, node); - } - - grand_parent = node->parent->parent; - - /* Case 3. */ - node->parent->color = IB_RBT_BLACK; - grand_parent->color = IB_RBT_RED; - - rbt_rotate_left(nil, grand_parent); - } - } - - parent = node->parent; - } - - /* Color the root black. */ - ROOT(tree)->color = IB_RBT_BLACK; -} - -/**********************************************************************//** -Find the given node's successor. -@return successor node or NULL if no successor */ -static -ib_rbt_node_t* -rbt_find_successor( -/*===============*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const ib_rbt_node_t* current) /*!< in: this is declared const - because it can be called via - rbt_next() */ -{ - const ib_rbt_node_t* nil = tree->nil; - ib_rbt_node_t* next = current->right; - - /* Is there a sub-tree to the right that we can follow. */ - if (next != nil) { - - /* Follow the left most links of the current right child. */ - while (next->left != nil) { - next = next->left; - } - - } else { /* We will have to go up the tree to find the successor. */ - ib_rbt_node_t* parent = current->parent; - - /* Cast away the const. */ - next = (ib_rbt_node_t*) current; - - while (parent != tree->root && next == parent->right) { - next = parent; - parent = next->parent; - } - - next = (parent == tree->root) ? NULL : parent; - } - - return(next); -} - -/**********************************************************************//** -Find the given node's precedecessor. -@return predecessor node or NULL if no predecesor */ -static -ib_rbt_node_t* -rbt_find_predecessor( -/*=================*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const ib_rbt_node_t* current) /*!< in: this is declared const - because it can be called via - rbt_prev() */ -{ - const ib_rbt_node_t* nil = tree->nil; - ib_rbt_node_t* prev = current->left; - - /* Is there a sub-tree to the left that we can follow. */ - if (prev != nil) { - - /* Follow the right most links of the current left child. */ - while (prev->right != nil) { - prev = prev->right; - } - - } else { /* We will have to go up the tree to find the precedecessor. */ - ib_rbt_node_t* parent = current->parent; - - /* Cast away the const. */ - prev = (ib_rbt_node_t*) current; - - while (parent != tree->root && prev == parent->left) { - prev = parent; - parent = prev->parent; - } - - prev = (parent == tree->root) ? NULL : parent; - } - - return(prev); -} - -/**********************************************************************//** -Replace node with child. After applying transformations eject becomes -an orphan. */ -static -void -rbt_eject_node( -/*===========*/ - ib_rbt_node_t* eject, /*!< in: node to eject */ - ib_rbt_node_t* node) /*!< in: node to replace with */ -{ - /* Update the to be ejected node's parent's child pointers. */ - if (eject->parent->left == eject) { - eject->parent->left = node; - } else if (eject->parent->right == eject) { - eject->parent->right = node; - } else { - ut_a(0); - } - /* eject is now an orphan but otherwise its pointers - and color are left intact. */ - - node->parent = eject->parent; -} - -/**********************************************************************//** -Replace a node with another node. */ -static -void -rbt_replace_node( -/*=============*/ - ib_rbt_node_t* replace, /*!< in: node to replace */ - ib_rbt_node_t* node) /*!< in: node to replace with */ -{ - ib_rbt_color_t color = node->color; - - /* Update the node pointers. */ - node->left = replace->left; - node->right = replace->right; - - /* Update the child node pointers. */ - node->left->parent = node; - node->right->parent = node; - - /* Make the parent of replace point to node. */ - rbt_eject_node(replace, node); - - /* Swap the colors. */ - node->color = replace->color; - replace->color = color; -} - -/**********************************************************************//** -Detach node from the tree replacing it with one of it's children. -@return the child node that now occupies the position of the detached node */ -static -ib_rbt_node_t* -rbt_detach_node( -/*============*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_node_t* node) /*!< in: node to detach */ -{ - ib_rbt_node_t* child; - const ib_rbt_node_t* nil = tree->nil; - - if (node->left != nil && node->right != nil) { - /* Case where the node to be deleted has two children. */ - ib_rbt_node_t* successor = rbt_find_successor(tree, node); - - ut_a(successor != nil); - ut_a(successor->parent != nil); - ut_a(successor->left == nil); - - child = successor->right; - - /* Remove the successor node and replace with its child. */ - rbt_eject_node(successor, child); - - /* Replace the node to delete with its successor node. */ - rbt_replace_node(node, successor); - } else { - ut_a(node->left == nil || node->right == nil); - - child = (node->left != nil) ? node->left : node->right; - - /* Replace the node to delete with one of it's children. */ - rbt_eject_node(node, child); - } - - /* Reset the node links. */ - node->parent = node->right = node->left = tree->nil; - - return(child); -} - -/**********************************************************************//** -Rebalance the right sub-tree after deletion. -@return node to rebalance if more rebalancing required else NULL */ -static -ib_rbt_node_t* -rbt_balance_right( -/*==============*/ - const ib_rbt_node_t* nil, /*!< in: rb tree nil node */ - ib_rbt_node_t* parent, /*!< in: parent node */ - ib_rbt_node_t* sibling) /*!< in: sibling node */ -{ - ib_rbt_node_t* node = NULL; - - ut_a(sibling != nil); - - /* Case 3. */ - if (sibling->color == IB_RBT_RED) { - - parent->color = IB_RBT_RED; - sibling->color = IB_RBT_BLACK; - - rbt_rotate_left(nil, parent); - - sibling = parent->right; - - ut_a(sibling != nil); - } - - /* Since this will violate case 3 because of the change above. */ - if (sibling->left->color == IB_RBT_BLACK - && sibling->right->color == IB_RBT_BLACK) { - - node = parent; /* Parent needs to be rebalanced too. */ - sibling->color = IB_RBT_RED; - - } else { - if (sibling->right->color == IB_RBT_BLACK) { - - ut_a(sibling->left->color == IB_RBT_RED); - - sibling->color = IB_RBT_RED; - sibling->left->color = IB_RBT_BLACK; - - rbt_rotate_right(nil, sibling); - - sibling = parent->right; - ut_a(sibling != nil); - } - - sibling->color = parent->color; - sibling->right->color = IB_RBT_BLACK; - - parent->color = IB_RBT_BLACK; - - rbt_rotate_left(nil, parent); - } - - return(node); -} - -/**********************************************************************//** -Rebalance the left sub-tree after deletion. -@return node to rebalance if more rebalancing required else NULL */ -static -ib_rbt_node_t* -rbt_balance_left( -/*=============*/ - const ib_rbt_node_t* nil, /*!< in: rb tree nil node */ - ib_rbt_node_t* parent, /*!< in: parent node */ - ib_rbt_node_t* sibling) /*!< in: sibling node */ -{ - ib_rbt_node_t* node = NULL; - - ut_a(sibling != nil); - - /* Case 3. */ - if (sibling->color == IB_RBT_RED) { - - parent->color = IB_RBT_RED; - sibling->color = IB_RBT_BLACK; - - rbt_rotate_right(nil, parent); - sibling = parent->left; - - ut_a(sibling != nil); - } - - /* Since this will violate case 3 because of the change above. */ - if (sibling->right->color == IB_RBT_BLACK - && sibling->left->color == IB_RBT_BLACK) { - - node = parent; /* Parent needs to be rebalanced too. */ - sibling->color = IB_RBT_RED; - - } else { - if (sibling->left->color == IB_RBT_BLACK) { - - ut_a(sibling->right->color == IB_RBT_RED); - - sibling->color = IB_RBT_RED; - sibling->right->color = IB_RBT_BLACK; - - rbt_rotate_left(nil, sibling); - - sibling = parent->left; - - ut_a(sibling != nil); - } - - sibling->color = parent->color; - sibling->left->color = IB_RBT_BLACK; - - parent->color = IB_RBT_BLACK; - - rbt_rotate_right(nil, parent); - } - - return(node); -} - -/**********************************************************************//** -Delete the node and rebalance the tree if necessary */ -static -void -rbt_remove_node_and_rebalance( -/*==========================*/ - ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_node_t* node) /*!< in: node to remove */ -{ - /* Detach node and get the node that will be used - as rebalance start. */ - ib_rbt_node_t* child = rbt_detach_node(tree, node); - - if (node->color == IB_RBT_BLACK) { - ib_rbt_node_t* last = child; - - ROOT(tree)->color = IB_RBT_RED; - - while (child && child->color == IB_RBT_BLACK) { - ib_rbt_node_t* parent = child->parent; - - /* Did the deletion cause an imbalance in the - parents left sub-tree. */ - if (parent->left == child) { - - child = rbt_balance_right( - tree->nil, parent, parent->right); - - } else if (parent->right == child) { - - child = rbt_balance_left( - tree->nil, parent, parent->left); - - } else { - ut_error; - } - - if (child) { - last = child; - } - } - - ut_a(last); - - last->color = IB_RBT_BLACK; - ROOT(tree)->color = IB_RBT_BLACK; - } - - /* Note that we have removed a node from the tree. */ - --tree->n_nodes; -} - -/**********************************************************************//** -Recursively free the nodes. */ -static -void -rbt_free_node( -/*==========*/ - ib_rbt_node_t* node, /*!< in: node to free */ - ib_rbt_node_t* nil) /*!< in: rb tree nil node */ -{ - if (node != nil) { - rbt_free_node(node->left, nil); - rbt_free_node(node->right, nil); - - ut_free(node); - } -} - -/**********************************************************************//** -Free all the nodes and free the tree. */ -UNIV_INTERN -void -rbt_free( -/*=====*/ - ib_rbt_t* tree) /*!< in: rb tree to free */ -{ - rbt_free_node(tree->root, tree->nil); - ut_free(tree->nil); - ut_free(tree); -} - -/**********************************************************************//** -Create an instance of a red black tree, whose comparison function takes -an argument -@return an empty rb tree */ -UNIV_INTERN -ib_rbt_t* -rbt_create_arg_cmp( -/*===============*/ - size_t sizeof_value, /*!< in: sizeof data item */ - ib_rbt_arg_compare - compare, /*!< in: fn to compare items */ - void* cmp_arg) /*!< in: compare fn arg */ -{ - ib_rbt_t* tree; - - ut_a(cmp_arg); - - tree = rbt_create(sizeof_value, NULL); - tree->cmp_arg = cmp_arg; - tree->compare_with_arg = compare; - - return(tree); -} - -/**********************************************************************//** -Create an instance of a red black tree. -@return an empty rb tree */ -UNIV_INTERN -ib_rbt_t* -rbt_create( -/*=======*/ - size_t sizeof_value, /*!< in: sizeof data item */ - ib_rbt_compare compare) /*!< in: fn to compare items */ -{ - ib_rbt_t* tree; - ib_rbt_node_t* node; - - tree = (ib_rbt_t*) ut_malloc(sizeof(*tree)); - memset(tree, 0, sizeof(*tree)); - - tree->sizeof_value = sizeof_value; - - /* Create the sentinel (NIL) node. */ - node = tree->nil = (ib_rbt_node_t*) ut_malloc(sizeof(*node)); - memset(node, 0, sizeof(*node)); - - node->color = IB_RBT_BLACK; - node->parent = node->left = node->right = node; - - /* Create the "fake" root, the real root node will be the - left child of this node. */ - node = tree->root = (ib_rbt_node_t*) ut_malloc(sizeof(*node)); - memset(node, 0, sizeof(*node)); - - node->color = IB_RBT_BLACK; - node->parent = node->left = node->right = tree->nil; - - tree->compare = compare; - - return(tree); -} - -/**********************************************************************//** -Generic insert of a value in the rb tree. -@return inserted node */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_insert( -/*=======*/ - ib_rbt_t* tree, /*!< in: rb tree */ - const void* key, /*!< in: key for ordering */ - const void* value) /*!< in: value of key, this value - is copied to the node */ -{ - ib_rbt_node_t* node; - - /* Create the node that will hold the value data. */ - node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree)); - - memcpy(node->value, value, tree->sizeof_value); - node->parent = node->left = node->right = tree->nil; - - /* Insert in the tree in the usual way. */ - rbt_tree_insert(tree, key, node); - rbt_balance_tree(tree, node); - - ++tree->n_nodes; - - return(node); -} - -/**********************************************************************//** -Add a new node to the tree, useful for data that is pre-sorted. -@return appended node */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_add_node( -/*=========*/ - ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_bound_t* parent, /*!< in: bounds */ - const void* value) /*!< in: this value is copied - to the node */ -{ - ib_rbt_node_t* node; - - /* Create the node that will hold the value data */ - node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree)); - - memcpy(node->value, value, tree->sizeof_value); - return(rbt_add_preallocated_node(tree, parent, node)); -} - -/****************************************************************//** -Add a new caller-provided node to tree at the specified position. -The node must have its key fields initialized correctly. -@return added node */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_add_preallocated_node( -/*======================*/ - ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_bound_t* parent, /*!< in: parent */ - ib_rbt_node_t* node) /*!< in: node */ -{ - node->parent = node->left = node->right = tree->nil; - - /* If tree is empty */ - if (parent->last == NULL) { - parent->last = tree->root; - } - - /* Append the node, the hope here is that the caller knows - what s/he is doing. */ - rbt_tree_add_child(tree, parent, node); - rbt_balance_tree(tree, node); - - ++tree->n_nodes; - -#if defined(IB_RBT_TESTING) - ut_a(rbt_validate(tree)); -#endif - return(node); -} - - -/**********************************************************************//** -Find a matching node in the rb tree. -@return NULL if not found else the node where key was found */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_lookup( -/*=======*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const void* key) /*!< in: key to use for search */ -{ - const ib_rbt_node_t* current = ROOT(tree); - - /* Regular binary search. */ - while (current != tree->nil) { - int result; - - if (tree->cmp_arg) { - result = tree->compare_with_arg( - tree->cmp_arg, key, current->value); - } else { - result = tree->compare(key, current->value); - } - - if (result < 0) { - current = current->left; - } else if (result > 0) { - current = current->right; - } else { - break; - } - } - - return(current != tree->nil ? current : NULL); -} - -/**********************************************************************//** -Delete a node indentified by key. -@return TRUE if success FALSE if not found */ -UNIV_INTERN -ibool -rbt_delete( -/*=======*/ - ib_rbt_t* tree, /*!< in: rb tree */ - const void* key) /*!< in: key to delete */ -{ - ibool deleted = FALSE; - ib_rbt_node_t* node = (ib_rbt_node_t*) rbt_lookup(tree, key); - - if (node) { - rbt_remove_node_and_rebalance(tree, node); - - ut_free(node); - deleted = TRUE; - } - - return(deleted); -} - -/**********************************************************************//** -Remove a node from the rb tree, the node is not free'd, that is the -callers responsibility. -@return deleted node but without the const */ -UNIV_INTERN -ib_rbt_node_t* -rbt_remove_node( -/*============*/ - ib_rbt_t* tree, /*!< in: rb tree */ - const ib_rbt_node_t* const_node) /*!< in: node to delete, this - is a fudge and declared const - because the caller can access - only const nodes */ -{ - /* Cast away the const. */ - rbt_remove_node_and_rebalance(tree, (ib_rbt_node_t*) const_node); - - /* This is to make it easier to do something like this: - ut_free(rbt_remove_node(node)); - */ - - return((ib_rbt_node_t*) const_node); -} - -/**********************************************************************//** -Find the node that has the lowest key that is >= key. -@return node satisfying the lower bound constraint or NULL */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_lower_bound( -/*============*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const void* key) /*!< in: key to search */ -{ - ib_rbt_node_t* lb_node = NULL; - ib_rbt_node_t* current = ROOT(tree); - - while (current != tree->nil) { - int result; - - if (tree->cmp_arg) { - result = tree->compare_with_arg( - tree->cmp_arg, key, current->value); - } else { - result = tree->compare(key, current->value); - } - - if (result > 0) { - - current = current->right; - - } else if (result < 0) { - - lb_node = current; - current = current->left; - - } else { - lb_node = current; - break; - } - } - - return(lb_node); -} - -/**********************************************************************//** -Find the node that has the greatest key that is <= key. -@return node satisfying the upper bound constraint or NULL */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_upper_bound( -/*============*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const void* key) /*!< in: key to search */ -{ - ib_rbt_node_t* ub_node = NULL; - ib_rbt_node_t* current = ROOT(tree); - - while (current != tree->nil) { - int result; - - if (tree->cmp_arg) { - result = tree->compare_with_arg( - tree->cmp_arg, key, current->value); - } else { - result = tree->compare(key, current->value); - } - - if (result > 0) { - - ub_node = current; - current = current->right; - - } else if (result < 0) { - - current = current->left; - - } else { - ub_node = current; - break; - } - } - - return(ub_node); -} - -/**********************************************************************//** -Find the node that has the greatest key that is <= key. -@return value of result */ -UNIV_INTERN -int -rbt_search( -/*=======*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_bound_t* parent, /*!< in: search bounds */ - const void* key) /*!< in: key to search */ -{ - ib_rbt_node_t* current = ROOT(tree); - - /* Every thing is greater than the NULL root. */ - parent->result = 1; - parent->last = NULL; - - while (current != tree->nil) { - - parent->last = current; - - if (tree->cmp_arg) { - parent->result = tree->compare_with_arg( - tree->cmp_arg, key, current->value); - } else { - parent->result = tree->compare(key, current->value); - } - - if (parent->result > 0) { - current = current->right; - } else if (parent->result < 0) { - current = current->left; - } else { - break; - } - } - - return(parent->result); -} - -/**********************************************************************//** -Find the node that has the greatest key that is <= key. But use the -supplied comparison function. -@return value of result */ -UNIV_INTERN -int -rbt_search_cmp( -/*===========*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - ib_rbt_bound_t* parent, /*!< in: search bounds */ - const void* key, /*!< in: key to search */ - ib_rbt_compare compare, /*!< in: fn to compare items */ - ib_rbt_arg_compare - arg_compare) /*!< in: fn to compare items - with argument */ -{ - ib_rbt_node_t* current = ROOT(tree); - - /* Every thing is greater than the NULL root. */ - parent->result = 1; - parent->last = NULL; - - while (current != tree->nil) { - - parent->last = current; - - if (arg_compare) { - ut_ad(tree->cmp_arg); - parent->result = arg_compare( - tree->cmp_arg, key, current->value); - } else { - parent->result = compare(key, current->value); - } - - if (parent->result > 0) { - current = current->right; - } else if (parent->result < 0) { - current = current->left; - } else { - break; - } - } - - return(parent->result); -} - -/**********************************************************************//** -Return the left most node in the tree. */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_first( -/*======*/ - /* out leftmost node or NULL */ - const ib_rbt_t* tree) /* in: rb tree */ -{ - ib_rbt_node_t* first = NULL; - ib_rbt_node_t* current = ROOT(tree); - - while (current != tree->nil) { - first = current; - current = current->left; - } - - return(first); -} - -/**********************************************************************//** -Return the right most node in the tree. -@return the rightmost node or NULL */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_last( -/*=====*/ - const ib_rbt_t* tree) /*!< in: rb tree */ -{ - ib_rbt_node_t* last = NULL; - ib_rbt_node_t* current = ROOT(tree); - - while (current != tree->nil) { - last = current; - current = current->right; - } - - return(last); -} - -/**********************************************************************//** -Return the next node. -@return node next from current */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_next( -/*=====*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const ib_rbt_node_t* current) /*!< in: current node */ -{ - return(current ? rbt_find_successor(tree, current) : NULL); -} - -/**********************************************************************//** -Return the previous node. -@return node prev from current */ -UNIV_INTERN -const ib_rbt_node_t* -rbt_prev( -/*=====*/ - const ib_rbt_t* tree, /*!< in: rb tree */ - const ib_rbt_node_t* current) /*!< in: current node */ -{ - return(current ? rbt_find_predecessor(tree, current) : NULL); -} - -/**********************************************************************//** -Reset the tree. Delete all the nodes. */ -UNIV_INTERN -void -rbt_clear( -/*======*/ - ib_rbt_t* tree) /*!< in: rb tree */ -{ - rbt_free_node(ROOT(tree), tree->nil); - rbt_reset(tree); -} - -/****************************************************************//** -Clear the tree without deleting and freeing its nodes. */ -UNIV_INTERN -void -rbt_reset( -/*======*/ - ib_rbt_t* tree) /*!< in: rb tree */ -{ - tree->n_nodes = 0; - tree->root->left = tree->root->right = tree->nil; -} - -/**********************************************************************//** -Merge the node from dst into src. Return the number of nodes merged. -@return no. of recs merged */ -UNIV_INTERN -ulint -rbt_merge_uniq( -/*===========*/ - ib_rbt_t* dst, /*!< in: dst rb tree */ - const ib_rbt_t* src) /*!< in: src rb tree */ -{ - ib_rbt_bound_t parent; - ulint n_merged = 0; - const ib_rbt_node_t* src_node = rbt_first(src); - - if (rbt_empty(src) || dst == src) { - return(0); - } - - for (/* No op */; src_node; src_node = rbt_next(src, src_node)) { - - if (rbt_search(dst, &parent, src_node->value) != 0) { - rbt_add_node(dst, &parent, src_node->value); - ++n_merged; - } - } - - return(n_merged); -} - -/**********************************************************************//** -Merge the node from dst into src. Return the number of nodes merged. -Delete the nodes from src after copying node to dst. As a side effect -the duplicates will be left untouched in the src. -@return no. of recs merged */ -UNIV_INTERN -ulint -rbt_merge_uniq_destructive( -/*=======================*/ - ib_rbt_t* dst, /*!< in: dst rb tree */ - ib_rbt_t* src) /*!< in: src rb tree */ -{ - ib_rbt_bound_t parent; - ib_rbt_node_t* src_node; - ulint old_size = rbt_size(dst); - - if (rbt_empty(src) || dst == src) { - return(0); - } - - for (src_node = (ib_rbt_node_t*) rbt_first(src); src_node; /* */) { - ib_rbt_node_t* prev = src_node; - - src_node = (ib_rbt_node_t*) rbt_next(src, prev); - - /* Skip duplicates. */ - if (rbt_search(dst, &parent, prev->value) != 0) { - - /* Remove and reset the node but preserve - the node (data) value. */ - rbt_remove_node_and_rebalance(src, prev); - - /* The nil should be taken from the dst tree. */ - prev->parent = prev->left = prev->right = dst->nil; - rbt_tree_add_child(dst, &parent, prev); - rbt_balance_tree(dst, prev); - - ++dst->n_nodes; - } - } - -#if defined(IB_RBT_TESTING) - ut_a(rbt_validate(dst)); - ut_a(rbt_validate(src)); -#endif - return(rbt_size(dst) - old_size); -} - -/**********************************************************************//** -Check that every path from the root to the leaves has the same count and -the tree nodes are in order. -@return TRUE if OK FALSE otherwise */ -UNIV_INTERN -ibool -rbt_validate( -/*=========*/ - const ib_rbt_t* tree) /*!< in: RB tree to validate */ -{ - if (rbt_count_black_nodes(tree, ROOT(tree)) > 0) { - return(rbt_check_ordering(tree)); - } - - return(FALSE); -} - -/**********************************************************************//** -Iterate over the tree in depth first order. */ -UNIV_INTERN -void -rbt_print( -/*======*/ - const ib_rbt_t* tree, /*!< in: tree to traverse */ - ib_rbt_print_node print) /*!< in: print function */ -{ - rbt_print_subtree(tree, ROOT(tree), print); -} diff --git a/storage/xtradb/ut/ut0rnd.cc b/storage/xtradb/ut/ut0rnd.cc deleted file mode 100644 index 3b4d7381181..00000000000 --- a/storage/xtradb/ut/ut0rnd.cc +++ /dev/null @@ -1,97 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/***************************************************************//** -@file ut/ut0rnd.cc -Random numbers and hashing - -Created 5/11/1994 Heikki Tuuri -********************************************************************/ - -#include "ut0rnd.h" - -#ifdef UNIV_NONINL -#include "ut0rnd.ic" -#endif - -/** These random numbers are used in ut_find_prime */ -/*@{*/ -#define UT_RANDOM_1 1.0412321 -#define UT_RANDOM_2 1.1131347 -#define UT_RANDOM_3 1.0132677 -/*@}*/ - -/** Seed value of ut_rnd_gen_ulint(). */ -UNIV_INTERN ulint ut_rnd_ulint_counter = 65654363; - -/***********************************************************//** -Looks for a prime number slightly greater than the given argument. -The prime is chosen so that it is not near any power of 2. -@return prime */ -UNIV_INTERN -ulint -ut_find_prime( -/*==========*/ - ulint n) /*!< in: positive number > 100 */ -{ - ulint pow2; - ulint i; - - n += 100; - - pow2 = 1; - while (pow2 * 2 < n) { - pow2 = 2 * pow2; - } - - if ((double) n < 1.05 * (double) pow2) { - n = (ulint) ((double) n * UT_RANDOM_1); - } - - pow2 = 2 * pow2; - - if ((double) n > 0.95 * (double) pow2) { - n = (ulint) ((double) n * UT_RANDOM_2); - } - - if (n > pow2 - 20) { - n += 30; - } - - /* Now we have n far enough from powers of 2. To make - n more random (especially, if it was not near - a power of 2), we then multiply it by a random number. */ - - n = (ulint) ((double) n * UT_RANDOM_3); - - for (;; n++) { - i = 2; - while (i * i <= n) { - if (n % i == 0) { - goto next_n; - } - i++; - } - - /* Found a prime */ - break; -next_n: ; - } - - return(n); -} diff --git a/storage/xtradb/ut/ut0timer.cc b/storage/xtradb/ut/ut0timer.cc deleted file mode 100644 index 85292cce28c..00000000000 --- a/storage/xtradb/ut/ut0timer.cc +++ /dev/null @@ -1,92 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved. -Copyright (c) 2014, SkySQL Ab. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/********************************************************************//** -@file ut/ut0timer.cc -Timer rountines - -Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com -modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6 -*************************************************************************/ - -#include "data0type.h" -#include <my_rdtsc.h> -#include <ut0timer.h> - -/**************************************************************//** -Initial timer definition -@return 0 */ -static -ulonglong -ut_timer_none(void) -/*===============*/ -{ - return 0; -} - -/**************************************************************//** -Function pointer to point selected timer function. -@return timer current value */ -ulonglong (*ut_timer_now)(void) = &ut_timer_none; - -struct my_timer_unit_info ut_timer; - -/**************************************************************//** -Sets up the data required for use of my_timer_* functions. -Selects the best timer by high frequency, and tight resolution. -Points my_timer_now() to the selected timer function. -Initializes my_timer struct to contain the info for selected timer.*/ -UNIV_INTERN -void -ut_init_timer(void) -/*===============*/ -{ - MY_TIMER_INFO all_timer_info; - my_timer_init(&all_timer_info); - - if (all_timer_info.cycles.frequency > 1000000 && - all_timer_info.cycles.resolution == 1) { - ut_timer = all_timer_info.cycles; - ut_timer_now = &my_timer_cycles; - } else if (all_timer_info.nanoseconds.frequency > 1000000 && - all_timer_info.nanoseconds.resolution == 1) { - ut_timer = all_timer_info.nanoseconds; - ut_timer_now = &my_timer_nanoseconds; - } else if (all_timer_info.microseconds.frequency >= 1000000 && - all_timer_info.microseconds.resolution == 1) { - ut_timer = all_timer_info.microseconds; - ut_timer_now = &my_timer_microseconds; - - } else if (all_timer_info.milliseconds.frequency >= 1000 && - all_timer_info.milliseconds.resolution == 1) { - ut_timer = all_timer_info.milliseconds; - ut_timer_now = &my_timer_milliseconds; - } else if (all_timer_info.ticks.frequency >= 1000 && - /* Will probably be false */ - all_timer_info.ticks.resolution == 1) { - ut_timer = all_timer_info.ticks; - ut_timer_now = &my_timer_ticks; - } else { - /* None are acceptable, so leave it as "None", and fill in struct */ - ut_timer.frequency = 1; /* Avoid div-by-zero */ - ut_timer.overhead = 0; /* Since it doesn't do anything */ - ut_timer.resolution = 10; /* Another sign it's bad */ - ut_timer.routine = 0; /* None */ - } -} diff --git a/storage/xtradb/ut/ut0ut.cc b/storage/xtradb/ut/ut0ut.cc deleted file mode 100644 index fd52537ae11..00000000000 --- a/storage/xtradb/ut/ut0ut.cc +++ /dev/null @@ -1,870 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/***************************************************************//** -@file ut/ut0ut.cc -Various utilities for Innobase. - -Created 5/11/1994 Heikki Tuuri -********************************************************************/ - -#include "ut0ut.h" - -#ifndef UNIV_INNOCHECKSUM - -#include "ut0sort.h" -#include "os0thread.h" /* thread-ID */ - -#ifdef UNIV_NONINL -#include "ut0ut.ic" -#endif - -#include <stdarg.h> -#include <string.h> -#include <ctype.h> - -#ifndef UNIV_HOTBACKUP -# include "btr0types.h" -# include "trx0trx.h" -# include "ha_prototypes.h" -# include "mysql_com.h" /* NAME_LEN */ -# include <string> -#endif /* UNIV_HOTBACKUP */ - -#ifdef __WIN__ -/*****************************************************************//** -NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix -epoch starts from 1970/1/1. For selection of constant see: -http://support.microsoft.com/kb/167296/ */ -#define WIN_TO_UNIX_DELTA_USEC ((ib_int64_t) 11644473600000000ULL) - - -/*****************************************************************//** -This is the Windows version of gettimeofday(2). -@return 0 if all OK else -1 */ -static -int -ut_gettimeofday( -/*============*/ - struct timeval* tv, /*!< out: Values are relative to Unix epoch */ - void* tz) /*!< in: not used */ -{ - FILETIME ft; - ib_int64_t tm; - - if (!tv) { - errno = EINVAL; - return(-1); - } - - GetSystemTimeAsFileTime(&ft); - - tm = (ib_int64_t) ft.dwHighDateTime << 32; - tm |= ft.dwLowDateTime; - - ut_a(tm >= 0); /* If tm wraps over to negative, the quotient / 10 - does not work */ - - tm /= 10; /* Convert from 100 nsec periods to usec */ - - /* If we don't convert to the Unix epoch the value for - struct timeval::tv_sec will overflow.*/ - tm -= WIN_TO_UNIX_DELTA_USEC; - - tv->tv_sec = (long) (tm / 1000000L); - tv->tv_usec = (long) (tm % 1000000L); - - return(0); -} -#else -/** An alias for gettimeofday(2). On Microsoft Windows, we have to -reimplement this function. */ -#define ut_gettimeofday gettimeofday -#endif - -/**********************************************************//** -Returns system time. We do not specify the format of the time returned: -the only way to manipulate it is to use the function ut_difftime. -@return system time */ -UNIV_INTERN -ib_time_t -ut_time(void) -/*=========*/ -{ - return(time(NULL)); -} - -#ifndef UNIV_HOTBACKUP -/**********************************************************//** -Returns system time. -Upon successful completion, the value 0 is returned; otherwise the -value -1 is returned and the global variable errno is set to indicate the -error. -@return 0 on success, -1 otherwise */ -UNIV_INTERN -int -ut_usectime( -/*========*/ - ulint* sec, /*!< out: seconds since the Epoch */ - ulint* ms) /*!< out: microseconds since the Epoch+*sec */ -{ - struct timeval tv; - int ret; - int errno_gettimeofday; - int i; - - for (i = 0; i < 10; i++) { - - ret = ut_gettimeofday(&tv, NULL); - - if (ret == -1) { - errno_gettimeofday = errno; - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: gettimeofday(): %s\n", - strerror(errno_gettimeofday)); - os_thread_sleep(100000); /* 0.1 sec */ - errno = errno_gettimeofday; - } else { - break; - } - } - - if (ret != -1) { - *sec = (ulint) tv.tv_sec; - *ms = (ulint) tv.tv_usec; - } - - return(ret); -} - -/**********************************************************//** -Returns the number of microseconds since epoch. Similar to -time(3), the return value is also stored in *tloc, provided -that tloc is non-NULL. -@return us since epoch */ -UNIV_INTERN -ullint -ut_time_us( -/*=======*/ - ullint* tloc) /*!< out: us since epoch, if non-NULL */ -{ - struct timeval tv; - ullint us; - - ut_gettimeofday(&tv, NULL); - - us = (ullint) tv.tv_sec * 1000000 + tv.tv_usec; - - if (tloc != NULL) { - *tloc = us; - } - - return(us); -} - -/**********************************************************//** -Returns the number of milliseconds since some epoch. The -value may wrap around. It should only be used for heuristic -purposes. -@return ms since epoch */ -UNIV_INTERN -ulint -ut_time_ms(void) -/*============*/ -{ - struct timeval tv; - - ut_gettimeofday(&tv, NULL); - - return((ulint) tv.tv_sec * 1000 + tv.tv_usec / 1000); -} -#endif /* !UNIV_HOTBACKUP */ - -/**********************************************************//** -Returns the difference of two times in seconds. -@return time2 - time1 expressed in seconds */ -UNIV_INTERN -double -ut_difftime( -/*========*/ - ib_time_t time2, /*!< in: time */ - ib_time_t time1) /*!< in: time */ -{ - return(difftime(time2, time1)); -} - -#endif /* !UNIV_INNOCHECKSUM */ - -/**********************************************************//** -Prints a timestamp to a file. */ -UNIV_INTERN -void -ut_print_timestamp( -/*===============*/ - FILE* file) /*!< in: file where to print */ -{ - ulint thread_id = 0; - -#ifndef UNIV_INNOCHECKSUM - thread_id = os_thread_pf(os_thread_get_curr_id()); -#endif - -#ifdef __WIN__ - SYSTEMTIME cal_tm; - - GetLocalTime(&cal_tm); - - fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx", - (int) cal_tm.wYear, - (int) cal_tm.wMonth, - (int) cal_tm.wDay, - (int) cal_tm.wHour, - (int) cal_tm.wMinute, - (int) cal_tm.wSecond, - thread_id); -#else - struct tm* cal_tm_ptr; - time_t tm; - -#ifdef HAVE_LOCALTIME_R - struct tm cal_tm; - time(&tm); - localtime_r(&tm, &cal_tm); - cal_tm_ptr = &cal_tm; -#else - time(&tm); - cal_tm_ptr = localtime(&tm); -#endif - fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx", - cal_tm_ptr->tm_year + 1900, - cal_tm_ptr->tm_mon + 1, - cal_tm_ptr->tm_mday, - cal_tm_ptr->tm_hour, - cal_tm_ptr->tm_min, - cal_tm_ptr->tm_sec, - thread_id); -#endif -} - -#ifndef UNIV_INNOCHECKSUM - -/**********************************************************//** -Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */ -UNIV_INTERN -void -ut_sprintf_timestamp( -/*=================*/ - char* buf) /*!< in: buffer where to sprintf */ -{ -#ifdef __WIN__ - SYSTEMTIME cal_tm; - - GetLocalTime(&cal_tm); - - sprintf(buf, "%02d%02d%02d %2d:%02d:%02d", - (int) cal_tm.wYear % 100, - (int) cal_tm.wMonth, - (int) cal_tm.wDay, - (int) cal_tm.wHour, - (int) cal_tm.wMinute, - (int) cal_tm.wSecond); -#else - struct tm* cal_tm_ptr; - time_t tm; - -#ifdef HAVE_LOCALTIME_R - struct tm cal_tm; - time(&tm); - localtime_r(&tm, &cal_tm); - cal_tm_ptr = &cal_tm; -#else - time(&tm); - cal_tm_ptr = localtime(&tm); -#endif - sprintf(buf, "%02d%02d%02d %2d:%02d:%02d", - cal_tm_ptr->tm_year % 100, - cal_tm_ptr->tm_mon + 1, - cal_tm_ptr->tm_mday, - cal_tm_ptr->tm_hour, - cal_tm_ptr->tm_min, - cal_tm_ptr->tm_sec); -#endif -} - -#ifdef UNIV_HOTBACKUP -/**********************************************************//** -Sprintfs a timestamp to a buffer with no spaces and with ':' characters -replaced by '_'. */ -UNIV_INTERN -void -ut_sprintf_timestamp_without_extra_chars( -/*=====================================*/ - char* buf) /*!< in: buffer where to sprintf */ -{ -#ifdef __WIN__ - SYSTEMTIME cal_tm; - - GetLocalTime(&cal_tm); - - sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d", - (int) cal_tm.wYear % 100, - (int) cal_tm.wMonth, - (int) cal_tm.wDay, - (int) cal_tm.wHour, - (int) cal_tm.wMinute, - (int) cal_tm.wSecond); -#else - struct tm* cal_tm_ptr; - time_t tm; - -#ifdef HAVE_LOCALTIME_R - struct tm cal_tm; - time(&tm); - localtime_r(&tm, &cal_tm); - cal_tm_ptr = &cal_tm; -#else - time(&tm); - cal_tm_ptr = localtime(&tm); -#endif - sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d", - cal_tm_ptr->tm_year % 100, - cal_tm_ptr->tm_mon + 1, - cal_tm_ptr->tm_mday, - cal_tm_ptr->tm_hour, - cal_tm_ptr->tm_min, - cal_tm_ptr->tm_sec); -#endif -} - -/**********************************************************//** -Returns current year, month, day. */ -UNIV_INTERN -void -ut_get_year_month_day( -/*==================*/ - ulint* year, /*!< out: current year */ - ulint* month, /*!< out: month */ - ulint* day) /*!< out: day */ -{ -#ifdef __WIN__ - SYSTEMTIME cal_tm; - - GetLocalTime(&cal_tm); - - *year = (ulint) cal_tm.wYear; - *month = (ulint) cal_tm.wMonth; - *day = (ulint) cal_tm.wDay; -#else - struct tm* cal_tm_ptr; - time_t tm; - -#ifdef HAVE_LOCALTIME_R - struct tm cal_tm; - time(&tm); - localtime_r(&tm, &cal_tm); - cal_tm_ptr = &cal_tm; -#else - time(&tm); - cal_tm_ptr = localtime(&tm); -#endif - *year = (ulint) cal_tm_ptr->tm_year + 1900; - *month = (ulint) cal_tm_ptr->tm_mon + 1; - *day = (ulint) cal_tm_ptr->tm_mday; -#endif -} -#endif /* UNIV_HOTBACKUP */ - -#ifndef UNIV_HOTBACKUP -/*************************************************************//** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. -@return dummy value */ -UNIV_INTERN -void -ut_delay( -/*=====*/ - ulint delay) /*!< in: delay in microseconds on 100 MHz Pentium */ -{ - ulint i; - - UT_LOW_PRIORITY_CPU(); - - for (i = 0; i < delay * 50; i++) { - UT_RELAX_CPU(); - UT_COMPILER_BARRIER(); - } - - UT_RESUME_PRIORITY_CPU(); -} -#endif /* !UNIV_HOTBACKUP */ - -/*************************************************************//** -Prints the contents of a memory buffer in hex and ascii. */ -UNIV_INTERN -void -ut_print_buf( -/*=========*/ - FILE* file, /*!< in: file where to print */ - const void* buf, /*!< in: memory buffer */ - ulint len) /*!< in: length of the buffer */ -{ - const byte* data; - ulint i; - - UNIV_MEM_ASSERT_RW(buf, len); - - fprintf(file, " len %lu; hex ", len); - - for (data = (const byte*) buf, i = 0; i < len; i++) { - fprintf(file, "%02lx", (ulong)*data++); - } - - fputs("; asc ", file); - - data = (const byte*) buf; - - for (i = 0; i < len; i++) { - int c = (int) *data++; - putc(isprint(c) ? c : ' ', file); - } - - putc(';', file); -} - -/**********************************************************************//** -Sort function for ulint arrays. */ -UNIV_INTERN -void -ut_ulint_sort( -/*==========*/ - ulint* arr, /*!< in/out: array to sort */ - ulint* aux_arr, /*!< in/out: aux array to use in sort */ - ulint low, /*!< in: lower bound */ - ulint high) /*!< in: upper bound */ -{ - UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high, - ut_ulint_cmp); -} - -/*************************************************************//** -Calculates fast the number rounded up to the nearest power of 2. -@return first power of 2 which is >= n */ -UNIV_INTERN -ulint -ut_2_power_up( -/*==========*/ - ulint n) /*!< in: number != 0 */ -{ - ulint res; - - res = 1; - - ut_ad(n > 0); - - while (res < n) { - res = res * 2; - } - - return(res); -} - -/**********************************************************************//** -Outputs a NUL-terminated file name, quoted with apostrophes. */ -UNIV_INTERN -void -ut_print_filename( -/*==============*/ - FILE* f, /*!< in: output stream */ - const char* name) /*!< in: name to print */ -{ - putc('\'', f); - for (;;) { - int c = *name++; - switch (c) { - case 0: - goto done; - case '\'': - putc(c, f); - /* fall through */ - default: - putc(c, f); - } - } -done: - putc('\'', f); -} -#ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ -UNIV_INTERN -void -ut_print_name( -/*==========*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction */ - ibool table_id,/*!< in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name) /*!< in: name to print */ -{ - ut_print_namel(f, trx, table_id, name, strlen(name)); -} - -/**********************************************************************//** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ -UNIV_INTERN -void -ut_print_namel( -/*===========*/ - FILE* f, /*!< in: output stream */ - const trx_t* trx, /*!< in: transaction (NULL=no quotes) */ - ibool table_id,/*!< in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name, /*!< in: name to print */ - ulint namelen)/*!< in: length of name */ -{ - /* 2 * NAME_LEN for database and table name, - and some slack for the #mysql50# prefix and quotes */ - char buf[3 * NAME_LEN]; - const char* bufend; - - bufend = innobase_convert_name(buf, sizeof buf, - name, namelen, - trx ? trx->mysql_thd : NULL, - table_id); - - (void) fwrite(buf, 1, bufend - buf, f); -} - -/**********************************************************************//** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ -UNIV_INTERN -std::string -ut_get_name( -/*=========*/ - const trx_t* trx, /*!< in: transaction (NULL=no quotes) */ - ibool table_id,/*!< in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name) /*!< in: name to print */ -{ - /* 2 * NAME_LEN for database and table name, - and some slack for the #mysql50# prefix and quotes */ - char buf[3 * NAME_LEN]; - const char* bufend; - ulint namelen = strlen(name); - - bufend = innobase_convert_name(buf, sizeof buf, - name, namelen, - trx ? trx->mysql_thd : NULL, - table_id); - buf[bufend-buf]='\0'; - std::string str(buf); - return str; -} - -/**********************************************************************//** -Formats a table or index name, quoted as an SQL identifier. If the name -contains a slash '/', the result will contain two identifiers separated by -a period (.), as in SQL database_name.identifier. -@return pointer to 'formatted' */ -UNIV_INTERN -char* -ut_format_name( -/*===========*/ - const char* name, /*!< in: table or index name, must be - '\0'-terminated */ - ibool is_table, /*!< in: if TRUE then 'name' is a table - name */ - char* formatted, /*!< out: formatted result, will be - '\0'-terminated */ - ulint formatted_size) /*!< out: no more than this number of - bytes will be written to 'formatted' */ -{ - switch (formatted_size) { - case 1: - formatted[0] = '\0'; - /* FALL-THROUGH */ - case 0: - return(formatted); - } - - char* end; - - end = innobase_convert_name(formatted, formatted_size, - name, strlen(name), NULL, is_table); - - /* If the space in 'formatted' was completely used, then sacrifice - the last character in order to write '\0' at the end. */ - if ((ulint) (end - formatted) == formatted_size) { - end--; - } - - ut_a((ulint) (end - formatted) < formatted_size); - - *end = '\0'; - - return(formatted); -} - -/**********************************************************************//** -Catenate files. */ -UNIV_INTERN -void -ut_copy_file( -/*=========*/ - FILE* dest, /*!< in: output file */ - FILE* src) /*!< in: input file to be appended to output */ -{ - long len = ftell(src); - char buf[4096]; - - rewind(src); - do { - size_t maxs = len < (long) sizeof buf - ? (size_t) len - : sizeof buf; - size_t size = fread(buf, 1, maxs, src); - (void) fwrite(buf, 1, size, dest); - len -= (long) size; - if (size < maxs) { - break; - } - } while (len > 0); -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef __WIN__ -# include <stdarg.h> -/**********************************************************************//** -A substitute for vsnprintf(3), formatted output conversion into -a limited buffer. Note: this function DOES NOT return the number of -characters that would have been printed if the buffer was unlimited because -VC's _vsnprintf() returns -1 in this case and we would need to call -_vscprintf() in addition to estimate that but we would need another copy -of "ap" for that and VC does not provide va_copy(). */ -UNIV_INTERN -void -ut_vsnprintf( -/*=========*/ - char* str, /*!< out: string */ - size_t size, /*!< in: str size */ - const char* fmt, /*!< in: format */ - va_list ap) /*!< in: format values */ -{ - _vsnprintf(str, size, fmt, ap); - str[size - 1] = '\0'; -} - -/**********************************************************************//** -A substitute for snprintf(3), formatted output conversion into -a limited buffer. -@return number of characters that would have been printed if the size -were unlimited, not including the terminating '\0'. */ -UNIV_INTERN -int -ut_snprintf( -/*========*/ - char* str, /*!< out: string */ - size_t size, /*!< in: str size */ - const char* fmt, /*!< in: format */ - ...) /*!< in: format values */ -{ - int res; - va_list ap1; - va_list ap2; - - va_start(ap1, fmt); - va_start(ap2, fmt); - - res = _vscprintf(fmt, ap1); - ut_a(res != -1); - - if (size > 0) { - _vsnprintf(str, size, fmt, ap2); - - if ((size_t) res >= size) { - str[size - 1] = '\0'; - } - } - - va_end(ap1); - va_end(ap2); - - return(res); -} -#endif /* __WIN__ */ - -/*************************************************************//** -Convert an error number to a human readable text message. The -returned string is static and should not be freed or modified. -@return string, describing the error */ -UNIV_INTERN -const char* -ut_strerr( -/*======*/ - dberr_t num) /*!< in: error number */ -{ - switch (num) { - case DB_SUCCESS: - return("Success"); - case DB_SUCCESS_LOCKED_REC: - return("Success, record lock created"); - case DB_ERROR: - return("Generic error"); - case DB_READ_ONLY: - return("Read only transaction"); - case DB_INTERRUPTED: - return("Operation interrupted"); - case DB_OUT_OF_MEMORY: - return("Cannot allocate memory"); - case DB_OUT_OF_FILE_SPACE: - return("Out of disk space"); - case DB_LOCK_WAIT: - return("Lock wait"); - case DB_DEADLOCK: - return("Deadlock"); - case DB_ROLLBACK: - return("Rollback"); - case DB_DUPLICATE_KEY: - return("Duplicate key"); - case DB_QUE_THR_SUSPENDED: - return("The queue thread has been suspended"); - case DB_MISSING_HISTORY: - return("Required history data has been deleted"); - case DB_CLUSTER_NOT_FOUND: - return("Cluster not found"); - case DB_TABLE_NOT_FOUND: - return("Table not found"); - case DB_MUST_GET_MORE_FILE_SPACE: - return("More file space needed"); - case DB_TABLE_IS_BEING_USED: - return("Table is being used"); - case DB_TOO_BIG_RECORD: - return("Record too big"); - case DB_TOO_BIG_INDEX_COL: - return("Index columns size too big"); - case DB_LOCK_WAIT_TIMEOUT: - return("Lock wait timeout"); - case DB_NO_REFERENCED_ROW: - return("Referenced key value not found"); - case DB_ROW_IS_REFERENCED: - return("Row is referenced"); - case DB_CANNOT_ADD_CONSTRAINT: - return("Cannot add constraint"); - case DB_CORRUPTION: - return("Data structure corruption"); - case DB_CANNOT_DROP_CONSTRAINT: - return("Cannot drop constraint"); - case DB_NO_SAVEPOINT: - return("No such savepoint"); - case DB_TABLESPACE_EXISTS: - return("Tablespace already exists"); - case DB_TABLESPACE_DELETED: - return("Tablespace deleted or being deleted"); - case DB_TABLESPACE_NOT_FOUND: - return("Tablespace not found"); - case DB_LOCK_TABLE_FULL: - return("Lock structs have exhausted the buffer pool"); - case DB_FOREIGN_DUPLICATE_KEY: - return("Foreign key activated with duplicate keys"); - case DB_FOREIGN_EXCEED_MAX_CASCADE: - return("Foreign key cascade delete/update exceeds max depth"); - case DB_TOO_MANY_CONCURRENT_TRXS: - return("Too many concurrent transactions"); - case DB_UNSUPPORTED: - return("Unsupported"); - case DB_INVALID_NULL: - return("NULL value encountered in NOT NULL column"); - case DB_STATS_DO_NOT_EXIST: - return("Persistent statistics do not exist"); - case DB_FAIL: - return("Failed, retry may succeed"); - case DB_OVERFLOW: - return("Overflow"); - case DB_UNDERFLOW: - return("Underflow"); - case DB_STRONG_FAIL: - return("Failed, retry will not succeed"); - case DB_ZIP_OVERFLOW: - return("Zip overflow"); - case DB_RECORD_NOT_FOUND: - return("Record not found"); - case DB_CHILD_NO_INDEX: - return("No index on referencing keys in referencing table"); - case DB_PARENT_NO_INDEX: - return("No index on referenced keys in referenced table"); - case DB_FTS_INVALID_DOCID: - return("FTS Doc ID cannot be zero"); - case DB_INDEX_CORRUPT: - return("Index corrupted"); - case DB_UNDO_RECORD_TOO_BIG: - return("Undo record too big"); - case DB_END_OF_INDEX: - return("End of index"); - case DB_SEARCH_ABORTED_BY_USER: - return("Operation was interrupted by end user"); - case DB_IO_ERROR: - return("I/O error"); - case DB_TABLE_IN_FK_CHECK: - return("Table is being used in foreign key check"); - case DB_DATA_MISMATCH: - return("data mismatch"); - case DB_SCHEMA_NOT_LOCKED: - return("schema not locked"); - case DB_NOT_FOUND: - return("not found"); - case DB_ONLINE_LOG_TOO_BIG: - return("Log size exceeded during online index creation"); - case DB_DICT_CHANGED: - return("Table dictionary has changed"); - case DB_IDENTIFIER_TOO_LONG: - return("Identifier name is too long"); - case DB_FTS_EXCEED_RESULT_CACHE_LIMIT: - return("FTS query exceeds result cache limit"); - case DB_TEMP_FILE_WRITE_FAILURE: - return("Temp file write failure"); - case DB_FTS_TOO_MANY_WORDS_IN_PHRASE: - return("Too many words in a FTS phrase or proximity search"); - case DB_TOO_BIG_FOR_REDO: - return("BLOB record length is greater than 10%% of redo log"); - case DB_DECRYPTION_FAILED: - return("Table is encrypted but decrypt failed."); - case DB_PAGE_CORRUPTED: - return("Page read from tablespace is corrupted."); - - /* do not add default: in order to produce a warning if new code - is added to the enum but not added here */ - } - - /* we abort here because if unknown error code is given, this could - mean that memory corruption has happened and someone's error-code - variable has been overwritten with bogus data */ - ut_error; - - /* NOT REACHED */ - return("Unknown error"); -} -#endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/xtradb/ut/ut0vec.cc b/storage/xtradb/ut/ut0vec.cc deleted file mode 100644 index 5842d9f1c0e..00000000000 --- a/storage/xtradb/ut/ut0vec.cc +++ /dev/null @@ -1,78 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/*******************************************************************//** -@file ut/ut0vec.cc -A vector of pointers to data items - -Created 4/6/2006 Osku Salerma -************************************************************************/ - -#include "ut0vec.h" -#ifdef UNIV_NONINL -#include "ut0vec.ic" -#endif -#include "mem0mem.h" - -/******************************************************************** -Create a new vector with the given initial size. */ -UNIV_INTERN -ib_vector_t* -ib_vector_create( -/*=============*/ - /* out: vector */ - ib_alloc_t* allocator, /* in: vector allocator */ - ulint sizeof_value, /* in: size of data item */ - ulint size) /* in: initial size */ -{ - ib_vector_t* vec; - - ut_a(size > 0); - - vec = static_cast<ib_vector_t*>( - allocator->mem_malloc(allocator, sizeof(*vec))); - - vec->used = 0; - vec->total = size; - vec->allocator = allocator; - vec->sizeof_value = sizeof_value; - - vec->data = static_cast<void*>( - allocator->mem_malloc(allocator, vec->sizeof_value * size)); - - return(vec); -} - -/******************************************************************** -Resize the vector, currently the vector can only grow and we -expand the number of elements it can hold by 2 times. */ -UNIV_INTERN -void -ib_vector_resize( -/*=============*/ - ib_vector_t* vec) /* in: vector */ -{ - ulint new_total = vec->total * 2; - ulint old_size = vec->used * vec->sizeof_value; - ulint new_size = new_total * vec->sizeof_value; - - vec->data = static_cast<void*>(vec->allocator->mem_resize( - vec->allocator, vec->data, old_size, new_size)); - - vec->total = new_total; -} diff --git a/storage/xtradb/ut/ut0wqueue.cc b/storage/xtradb/ut/ut0wqueue.cc deleted file mode 100644 index 1607e535a94..00000000000 --- a/storage/xtradb/ut/ut0wqueue.cc +++ /dev/null @@ -1,224 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -#include "ut0wqueue.h" - -/*******************************************************************//** -@file ut/ut0wqueue.cc -A work queue - -Created 4/26/2006 Osku Salerma -************************************************************************/ - -/****************************************************************//** -Create a new work queue. -@return work queue */ -UNIV_INTERN -ib_wqueue_t* -ib_wqueue_create(void) -/*===================*/ -{ - ib_wqueue_t* wq = static_cast<ib_wqueue_t*>(mem_alloc(sizeof(*wq))); - - /* Function ib_wqueue_create() has not been used anywhere, - not necessary to instrument this mutex */ - mutex_create(PFS_NOT_INSTRUMENTED, &wq->mutex, SYNC_WORK_QUEUE); - - wq->items = ib_list_create(); - wq->event = os_event_create(); - - return(wq); -} - -/****************************************************************//** -Free a work queue. */ -UNIV_INTERN -void -ib_wqueue_free( -/*===========*/ - ib_wqueue_t* wq) /*!< in: work queue */ -{ - mutex_free(&wq->mutex); - ib_list_free(wq->items); - os_event_free(wq->event); - - mem_free(wq); -} - -/****************************************************************//** -Add a work item to the queue. */ -UNIV_INTERN -void -ib_wqueue_add( -/*==========*/ - ib_wqueue_t* wq, /*!< in: work queue */ - void* item, /*!< in: work item */ - mem_heap_t* heap) /*!< in: memory heap to use for allocating the - list node */ -{ - mutex_enter(&wq->mutex); - - ib_list_add_last(wq->items, item, heap); - os_event_set(wq->event); - - mutex_exit(&wq->mutex); -} - -/****************************************************************//** -Wait for a work item to appear in the queue. -@return work item */ -UNIV_INTERN -void* -ib_wqueue_wait( -/*===========*/ - ib_wqueue_t* wq) /*!< in: work queue */ -{ - ib_list_node_t* node; - - for (;;) { - os_event_wait(wq->event); - - mutex_enter(&wq->mutex); - - node = ib_list_get_first(wq->items); - - if (node) { - ib_list_remove(wq->items, node); - - if (!ib_list_get_first(wq->items)) { - /* We must reset the event when the list - gets emptied. */ - os_event_reset(wq->event); - } - - break; - } - - mutex_exit(&wq->mutex); - } - - mutex_exit(&wq->mutex); - - return(node->data); -} - - -/******************************************************************** -Wait for a work item to appear in the queue for specified time. */ - -void* -ib_wqueue_timedwait( -/*================*/ - /* out: work item or NULL on timeout*/ - ib_wqueue_t* wq, /* in: work queue */ - ib_time_t wait_in_usecs) /* in: wait time in micro seconds */ -{ - ib_list_node_t* node = NULL; - - for (;;) { - ulint error; - ib_int64_t sig_count; - - mutex_enter(&wq->mutex); - - node = ib_list_get_first(wq->items); - - if (node) { - ib_list_remove(wq->items, node); - - mutex_exit(&wq->mutex); - break; - } - - sig_count = os_event_reset(wq->event); - - mutex_exit(&wq->mutex); - - error = os_event_wait_time_low(wq->event, - (ulint) wait_in_usecs, - sig_count); - - if (error == OS_SYNC_TIME_EXCEEDED) { - break; - } - } - - return(node ? node->data : NULL); -} - -/******************************************************************** -Return first item on work queue or NULL if queue is empty -@return work item or NULL */ -void* -ib_wqueue_nowait( -/*=============*/ - ib_wqueue_t* wq) /*<! in: work queue */ -{ - ib_list_node_t* node = NULL; - - mutex_enter(&wq->mutex); - - if(!ib_list_is_empty(wq->items)) { - node = ib_list_get_first(wq->items); - - if (node) { - ib_list_remove(wq->items, node); - - } - } - - /* We must reset the event when the list - gets emptied. */ - if(ib_list_is_empty(wq->items)) { - os_event_reset(wq->event); - } - - mutex_exit(&wq->mutex); - - return (node ? node->data : NULL); -} - -/******************************************************************** -Check if queue is empty. */ - -ibool -ib_wqueue_is_empty( -/*===============*/ - /* out: TRUE if queue empty - else FALSE */ - const ib_wqueue_t* wq) /* in: work queue */ -{ - return(ib_list_is_empty(wq->items)); -} - -/******************************************************************** -Get number of items on queue. -@return number of items on queue */ -ulint -ib_wqueue_len( -/*==========*/ - ib_wqueue_t* wq) /*<! in: work queue */ -{ - ulint len = 0; - - mutex_enter(&wq->mutex); - len = ib_list_len(wq->items); - mutex_exit(&wq->mutex); - - return(len); -} |